Skip to main content

spg_engine/
lib.rs

1//! SPG execution engine — v0.3 wires the SQL front-end to the in-memory
2//! storage layer. Implements `CREATE TABLE`, single-row `INSERT VALUES`, and
3//! `SELECT * FROM <table>` (no WHERE yet — that lands in v0.4 alongside
4//! expression evaluation against rows).
5#![no_std]
6
7extern crate alloc;
8
9pub mod aggregate;
10pub mod describe;
11pub mod eval;
12pub mod fts;
13pub mod json;
14pub mod memoize;
15pub mod plan_cache;
16pub mod publications;
17pub mod query_stats;
18pub mod reorder;
19pub mod selectivity;
20pub mod statistics;
21pub mod subscriptions;
22pub mod triggers;
23pub mod users;
24
25pub use crate::users::{Role, ScramSecrets, UserError, UserStore};
26
27use alloc::borrow::Cow;
28use alloc::boxed::Box;
29use alloc::collections::BTreeMap;
30use alloc::string::{String, ToString};
31use alloc::vec::Vec;
32use core::fmt;
33
34use spg_sql::ast::{
35    BinOp, ColumnDef, ColumnName, ColumnTypeName, CreateIndexStatement, CreatePublicationStatement,
36    CreateSubscriptionStatement, CreateTableStatement, CreateUserStatement, Expr, FrameBound,
37    FrameKind, FromClause, IndexMethod, InsertStatement, JoinKind, Literal, OrderBy, SelectItem,
38    SelectStatement, Statement, TableRef, UnOp, UnionKind, VecEncoding as SqlVecEncoding,
39    WindowFrame,
40};
41use spg_sql::parser::{self, ParseError};
42use spg_storage::{
43    Catalog, ColumnSchema, CompactReport, DataType, IndexKey, IndexKind, Row, StorageError, Table,
44    TableSchema, Value, VecEncoding,
45};
46
47use crate::eval::{EvalContext, EvalError};
48
49/// Result of executing one statement.
50#[derive(Debug, Clone, PartialEq)]
51#[non_exhaustive]
52pub enum QueryResult {
53    /// DDL or DML succeeded.
54    ///
55    /// `affected` is the row count for `INSERT` and 0 elsewhere.
56    /// `modified_catalog` tells the server whether this statement
57    /// caused the *committed* catalog to change — it's the signal to
58    /// snapshot/audit. False for `BEGIN`/`ROLLBACK`, false for writeful
59    /// statements executed inside a transaction (those only touch the
60    /// shadow), and true for `COMMIT` and for writes outside a TX.
61    CommandOk {
62        affected: usize,
63        modified_catalog: bool,
64    },
65    /// `SELECT` returned a (possibly empty) row set.
66    Rows {
67        columns: Vec<ColumnSchema>,
68        rows: Vec<Row>,
69    },
70}
71
72/// All errors the engine can return.
73///
74/// Marked `#[non_exhaustive]` from v7.5.0 onward: external `match`
75/// must include a `_` arm so new variants in subsequent v7.x releases
76/// are not breaking changes.
77#[derive(Debug, Clone, PartialEq)]
78#[non_exhaustive]
79pub enum EngineError {
80    Parse(ParseError),
81    Storage(StorageError),
82    Eval(EvalError),
83    /// Front-end accepted a construct that the v0.x executor doesn't support.
84    Unsupported(String),
85    /// `BEGIN` while another transaction is already open.
86    TransactionAlreadyOpen,
87    /// `COMMIT` / `ROLLBACK` with no active transaction.
88    NoActiveTransaction,
89    /// v4.0 sentinel: `execute_readonly` got a statement that
90    /// mutates engine state (INSERT / CREATE / BEGIN / COMMIT / …).
91    /// The caller should retake the write lock and dispatch through
92    /// `execute(&mut self)` instead.
93    WriteRequired,
94    /// v4.2: a SELECT would have returned more rows than the
95    /// configured `max_query_rows` cap. Carries the cap.
96    RowLimitExceeded(usize),
97    /// v4.5: cooperative cancellation — the host (server's
98    /// per-query watchdog) set the cancel flag while a long-running
99    /// SELECT / UPDATE / DELETE was scanning rows. The partial work
100    /// is discarded; the caller should surface this as a timeout
101    /// to the client.
102    Cancelled,
103}
104
105impl fmt::Display for EngineError {
106    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
107        match self {
108            Self::Parse(e) => write!(f, "parse: {e}"),
109            Self::Storage(e) => write!(f, "storage: {e}"),
110            Self::Eval(e) => write!(f, "eval: {e}"),
111            Self::Unsupported(s) => write!(f, "unsupported: {s}"),
112            Self::TransactionAlreadyOpen => f.write_str("a transaction is already open"),
113            Self::NoActiveTransaction => f.write_str("no active transaction"),
114            Self::WriteRequired => {
115                f.write_str("statement requires a write lock (use execute, not execute_readonly)")
116            }
117            Self::RowLimitExceeded(n) => {
118                write!(f, "query exceeded max_query_rows={n}")
119            }
120            Self::Cancelled => f.write_str("query cancelled (timeout or client request)"),
121        }
122    }
123}
124
125impl From<ParseError> for EngineError {
126    fn from(e: ParseError) -> Self {
127        Self::Parse(e)
128    }
129}
130impl From<StorageError> for EngineError {
131    fn from(e: StorageError) -> Self {
132        Self::Storage(e)
133    }
134}
135impl From<EvalError> for EngineError {
136    fn from(e: EvalError) -> Self {
137        Self::Eval(e)
138    }
139}
140
141/// The execution engine. Holds the catalog and (later) other server-scope
142/// state. `Engine::new()` is intentionally cheap so callers can construct one
143/// per database, per test.
144/// Function pointer that returns "now" as microseconds since Unix
145/// epoch. The engine is `no_std`, so it can't reach for `std::time`
146/// itself — callers (`spg-server`, the sqllogictest runner) inject a
147/// concrete implementation. `None` means `NOW()` / `CURRENT_*` raise
148/// `Unsupported`.
149pub type ClockFn = fn() -> i64;
150
151/// Function pointer that produces 16 cryptographically random bytes.
152/// Like `ClockFn`, the engine is `no_std` and can't reach for /dev/urandom
153/// itself — host (`spg-server`) injects an OS-backed source. `None`
154/// means SQL-driven `CREATE USER` falls back to a deterministic salt
155/// derived from the username (acceptable in tests; the server always
156/// installs a real RNG so production paths never see this).
157pub type SaltFn = fn() -> [u8; 16];
158
159/// v4.5 cooperative cancellation token. A long-running SELECT /
160/// UPDATE / DELETE checks `is_cancelled` at row-loop checkpoints
161/// and bails with `EngineError::Cancelled`. The host
162/// (`spg-server`) creates an `AtomicBool` per query, spawns a
163/// watchdog thread that sets it after `SPG_QUERY_TIMEOUT_MS`,
164/// and passes it via `execute_with_cancel` / `execute_readonly_with_cancel`.
165///
166/// `CancelToken::none()` is a no-op — used by the legacy `execute`
167/// and `execute_readonly` entry points so existing callers don't
168/// change.
169#[derive(Debug, Clone, Copy)]
170pub struct CancelToken<'a> {
171    flag: Option<&'a core::sync::atomic::AtomicBool>,
172}
173
174impl<'a> CancelToken<'a> {
175    #[must_use]
176    pub const fn none() -> Self {
177        Self { flag: None }
178    }
179
180    #[must_use]
181    pub const fn from_flag(f: &'a core::sync::atomic::AtomicBool) -> Self {
182        Self { flag: Some(f) }
183    }
184
185    #[must_use]
186    pub fn is_cancelled(self) -> bool {
187        self.flag
188            .is_some_and(|f| f.load(core::sync::atomic::Ordering::Relaxed))
189    }
190
191    /// Returns `Err(Cancelled)` if the token has been tripped.
192    /// Used at row-loop checkpoints to bail cooperatively without
193    /// scattering raw `is_cancelled` checks across the executor.
194    #[inline]
195    pub fn check(self) -> Result<(), EngineError> {
196        if self.is_cancelled() {
197            Err(EngineError::Cancelled)
198        } else {
199            Ok(())
200        }
201    }
202}
203
204// ---- snapshot envelope (v4.1, extended with CRC32 in v4.37,  ----
205// ----   publications in v6.1.2 v3, subscriptions in v6.1.4 v4) ----
206//
207// Wraps a catalog blob + a user blob behind a small header so the
208// server can persist both atomically without inventing a new file.
209// Bare catalog blobs (v3.x) still load via `restore_envelope` since
210// the magic check fails fast and the function falls back to
211// `Catalog::deserialize`.
212//
213// Layout — v1 (v4.1, no CRC):
214//   [8 bytes magic "SPGENV01"]
215//   [u8 version = 1]
216//   [u32 catalog_len][catalog bytes]
217//   [u32 users_len][users bytes]
218//
219// Layout — v2 (v4.37, CRC32 of body):
220//   [8 bytes magic "SPGENV01"]
221//   [u8 version = 2]
222//   [u32 catalog_len][catalog bytes]
223//   [u32 users_len][users bytes]
224//   [u32 crc32]                      ← CRC32 of every byte before it.
225//
226// Layout — v3 (v6.1.2, publications trailer):
227//   [8 bytes magic "SPGENV01"]
228//   [u8 version = 3]
229//   [u32 catalog_len][catalog bytes]
230//   [u32 users_len][users bytes]
231//   [u32 pubs_len][publications bytes]
232//   [u32 crc32]
233//
234// Layout — v4 (v6.1.4, subscriptions trailer):
235//   [8 bytes magic "SPGENV01"]
236//   [u8 version = 4]
237//   [u32 catalog_len][catalog bytes]
238//   [u32 users_len][users bytes]
239//   [u32 pubs_len][publications bytes]
240//   [u32 subs_len][subscriptions bytes]
241//   [u32 crc32]
242//
243// Layout — v5 (v6.2.0, statistics trailer):
244//   [8 bytes magic "SPGENV01"]
245//   [u8 version = 5]
246//   [u32 catalog_len][catalog bytes]
247//   [u32 users_len][users bytes]
248//   [u32 pubs_len][publications bytes]
249//   [u32 subs_len][subscriptions bytes]
250//   [u32 stats_len][statistics bytes]      ← NEW
251//   [u32 crc32]
252//
253// Writers emit v5 from v6.2.0 on. Readers accept all of {v1, v2,
254// v3, v4, v5}: v1/v2 load with empty publications / subscriptions /
255// statistics; v3 loads with empty subscriptions + statistics; v4
256// loads with empty statistics; v5 deserialises all three. Older
257// SPG versions reading a v5 envelope fall through the version
258// match to `EnvelopeParse::Bare` — pre-v6.2.0 binaries cannot
259// open v6.2.0+ snapshots (matches the v6.1.2 / v6.1.4 breaks).
260
261const ENVELOPE_MAGIC: &[u8; 8] = b"SPGENV01";
262const ENVELOPE_VERSION_V1: u8 = 1;
263const ENVELOPE_VERSION_V2: u8 = 2;
264const ENVELOPE_VERSION_V3: u8 = 3;
265const ENVELOPE_VERSION_V4: u8 = 4;
266const ENVELOPE_VERSION_V5: u8 = 5;
267
268fn build_envelope(catalog: &[u8], users: &[u8], pubs: &[u8], subs: &[u8], stats: &[u8]) -> Vec<u8> {
269    let mut out = Vec::with_capacity(
270        8 + 1
271            + 4
272            + catalog.len()
273            + 4
274            + users.len()
275            + 4
276            + pubs.len()
277            + 4
278            + subs.len()
279            + 4
280            + stats.len()
281            + 4,
282    );
283    out.extend_from_slice(ENVELOPE_MAGIC);
284    out.push(ENVELOPE_VERSION_V5);
285    out.extend_from_slice(
286        &u32::try_from(catalog.len())
287            .expect("≤ 4G catalog")
288            .to_le_bytes(),
289    );
290    out.extend_from_slice(catalog);
291    out.extend_from_slice(
292        &u32::try_from(users.len())
293            .expect("≤ 4G users")
294            .to_le_bytes(),
295    );
296    out.extend_from_slice(users);
297    out.extend_from_slice(
298        &u32::try_from(pubs.len())
299            .expect("≤ 4G publications")
300            .to_le_bytes(),
301    );
302    out.extend_from_slice(pubs);
303    out.extend_from_slice(
304        &u32::try_from(subs.len())
305            .expect("≤ 4G subscriptions")
306            .to_le_bytes(),
307    );
308    out.extend_from_slice(subs);
309    out.extend_from_slice(
310        &u32::try_from(stats.len())
311            .expect("≤ 4G statistics")
312            .to_le_bytes(),
313    );
314    out.extend_from_slice(stats);
315    let crc = spg_crypto::crc32::crc32(&out);
316    out.extend_from_slice(&crc.to_le_bytes());
317    out
318}
319
320/// Outcome of envelope parsing: either bare-catalog fallback, a
321/// successfully split section trio from a v1/v2/v3 envelope, or an
322/// explicit corruption error from a v2/v3 CRC mismatch. `Bare`
323/// (catalog-only fallback) preserves v3.x readability. v1/v2
324/// envelopes set `publications` to `None`; v3 sets it to the
325/// publications byte slice.
326enum EnvelopeParse<'a> {
327    Bare,
328    Pair {
329        catalog: &'a [u8],
330        users: &'a [u8],
331        publications: Option<&'a [u8]>,
332        subscriptions: Option<&'a [u8]>,
333        statistics: Option<&'a [u8]>,
334    },
335    CrcMismatch {
336        expected: u32,
337        computed: u32,
338    },
339}
340
341/// Returns `EnvelopeParse::Pair` for a valid v1 / v2 / v3 envelope,
342/// `Bare` for a buffer that doesn't look like an envelope (v3.x
343/// bare catalog fallback), and `CrcMismatch` for a v2/v3 envelope
344/// whose trailing CRC32 doesn't match the body.
345fn split_envelope(buf: &[u8]) -> EnvelopeParse<'_> {
346    if buf.len() < 8 + 1 + 4 || &buf[..8] != ENVELOPE_MAGIC {
347        return EnvelopeParse::Bare;
348    }
349    let version = buf[8];
350    if !matches!(
351        version,
352        ENVELOPE_VERSION_V1
353            | ENVELOPE_VERSION_V2
354            | ENVELOPE_VERSION_V3
355            | ENVELOPE_VERSION_V4
356            | ENVELOPE_VERSION_V5
357    ) {
358        return EnvelopeParse::Bare;
359    }
360    let mut p = 9usize;
361    let Some(cat_len_bytes) = buf.get(p..p + 4) else {
362        return EnvelopeParse::Bare;
363    };
364    let Ok(cat_len_arr) = cat_len_bytes.try_into() else {
365        return EnvelopeParse::Bare;
366    };
367    let cat_len = u32::from_le_bytes(cat_len_arr) as usize;
368    p += 4;
369    if p + cat_len + 4 > buf.len() {
370        return EnvelopeParse::Bare;
371    }
372    let catalog = &buf[p..p + cat_len];
373    p += cat_len;
374    let Some(user_len_bytes) = buf.get(p..p + 4) else {
375        return EnvelopeParse::Bare;
376    };
377    let Ok(user_len_arr) = user_len_bytes.try_into() else {
378        return EnvelopeParse::Bare;
379    };
380    let user_len = u32::from_le_bytes(user_len_arr) as usize;
381    p += 4;
382    if p + user_len > buf.len() {
383        return EnvelopeParse::Bare;
384    }
385    let users = &buf[p..p + user_len];
386    p += user_len;
387    let publications = if matches!(
388        version,
389        ENVELOPE_VERSION_V3 | ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5
390    ) {
391        // [u32 pubs_len][publications bytes]
392        let Some(pubs_len_bytes) = buf.get(p..p + 4) else {
393            return EnvelopeParse::Bare;
394        };
395        let Ok(pubs_len_arr) = pubs_len_bytes.try_into() else {
396            return EnvelopeParse::Bare;
397        };
398        let pubs_len = u32::from_le_bytes(pubs_len_arr) as usize;
399        p += 4;
400        if p + pubs_len > buf.len() {
401            return EnvelopeParse::Bare;
402        }
403        let pubs_slice = &buf[p..p + pubs_len];
404        p += pubs_len;
405        Some(pubs_slice)
406    } else {
407        None
408    };
409    let subscriptions = if matches!(version, ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5) {
410        // [u32 subs_len][subscriptions bytes]
411        let Some(subs_len_bytes) = buf.get(p..p + 4) else {
412            return EnvelopeParse::Bare;
413        };
414        let Ok(subs_len_arr) = subs_len_bytes.try_into() else {
415            return EnvelopeParse::Bare;
416        };
417        let subs_len = u32::from_le_bytes(subs_len_arr) as usize;
418        p += 4;
419        if p + subs_len > buf.len() {
420            return EnvelopeParse::Bare;
421        }
422        let subs_slice = &buf[p..p + subs_len];
423        p += subs_len;
424        Some(subs_slice)
425    } else {
426        None
427    };
428    let statistics = if version == ENVELOPE_VERSION_V5 {
429        // [u32 stats_len][statistics bytes]
430        let Some(stats_len_bytes) = buf.get(p..p + 4) else {
431            return EnvelopeParse::Bare;
432        };
433        let Ok(stats_len_arr) = stats_len_bytes.try_into() else {
434            return EnvelopeParse::Bare;
435        };
436        let stats_len = u32::from_le_bytes(stats_len_arr) as usize;
437        p += 4;
438        if p + stats_len > buf.len() {
439            return EnvelopeParse::Bare;
440        }
441        let stats_slice = &buf[p..p + stats_len];
442        p += stats_len;
443        Some(stats_slice)
444    } else {
445        None
446    };
447    if matches!(
448        version,
449        ENVELOPE_VERSION_V2 | ENVELOPE_VERSION_V3 | ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5
450    ) {
451        if p + 4 != buf.len() {
452            return EnvelopeParse::Bare;
453        }
454        let Ok(crc_arr) = buf[p..p + 4].try_into() else {
455            return EnvelopeParse::Bare;
456        };
457        let expected = u32::from_le_bytes(crc_arr);
458        let computed = spg_crypto::crc32::crc32(&buf[..p]);
459        if expected != computed {
460            return EnvelopeParse::CrcMismatch { expected, computed };
461        }
462    } else if p != buf.len() {
463        // v1: must end exactly at the users section.
464        return EnvelopeParse::Bare;
465    }
466    EnvelopeParse::Pair {
467        catalog,
468        users,
469        publications,
470        subscriptions,
471        statistics,
472    }
473}
474
475/// v4.41.1 opaque transaction handle. Returned by `Engine::alloc_tx_id`,
476/// threaded through `Engine::execute_in` so dispatch can identify which
477/// in-flight TX a statement belongs to. `IMPLICIT_TX` is the reserved
478/// slot every legacy caller — engine self-tests, spg-cli, spg-embedded,
479/// startup replay — implicitly uses through the unchanged
480/// `Engine::execute(sql)` API. v4.41.1 keeps at most one active slot at
481/// runtime (dispatch holds `engine.write()` across the wrap, same as
482/// v4.34); the map shape is here to let v4.42 turn on N in-flight
483/// implicit TXs without reshuffling the engine internals.
484#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
485pub struct TxId(pub u64);
486
487/// Reserved slot used by `Engine::execute(sql)` — the legacy single-
488/// global-shadow path. New `alloc_tx_id` handles start at 1.
489pub const IMPLICIT_TX: TxId = TxId(0);
490
491/// v6.7.3 — default segment-size threshold used by `COMPACT COLD
492/// SEGMENTS` when no explicit target is supplied. Segments whose
493/// `OwnedSegment::bytes().len()` is **strictly** less than this
494/// value are eligible to merge. spg-server reads
495/// `SPG_COMPACTION_TARGET_SEGMENT_BYTES` to override.
496pub const COMPACTION_TARGET_DEFAULT_BYTES: u64 = 4 * 1024 * 1024;
497
498/// Per-slot transaction state. Held inside `tx_catalogs[tx_id]` for the
499/// lifetime of a BEGIN..COMMIT (or BEGIN..ROLLBACK) window. Drops when
500/// the TX commits (its `catalog` is moved over `Engine.catalog`) or
501/// rolls back (slot removed, catalog discarded).
502#[derive(Debug, Default, Clone)]
503struct TxState {
504    /// The TX's shadow copy of the catalog. Started as a clone of
505    /// `Engine.catalog` at BEGIN time; writes flow into it; COMMIT
506    /// installs it over `Engine.catalog`. `Catalog::clone()` is O(1)
507    /// since v4.40 (`PersistentVec` rows + `PersistentBTreeMap` indices).
508    catalog: Catalog,
509    /// Per-TX savepoint stack. Each entry pairs the savepoint name with
510    /// a clone of `catalog` at the moment `SAVEPOINT <name>` fired.
511    /// `ROLLBACK TO <name>` restores from the entry and pops everything
512    /// after it; `RELEASE <name>` discards the entry and everything
513    /// after; COMMIT/ROLLBACK clears the whole stack.
514    savepoints: Vec<(String, Catalog)>,
515}
516
517/// v7.11.0 — frozen read-only view of the engine's committed state.
518/// Constructed via [`Engine::clone_snapshot`]. Holds clones of the
519/// catalog, statistics, clock function, and row-cap config — the
520/// four fields the `execute_readonly` path actually reads. Cheap to
521/// `Clone` (each clone shares the underlying `PersistentVec` row
522/// storage; only the trie root pointers copy). Send + Sync so a
523/// snapshot can be moved across `tokio::task::spawn_blocking`
524/// boundaries without coordination.
525///
526/// The contract: a snapshot reflects the engine's state at the
527/// moment `clone_snapshot()` returned. Subsequent writes to the
528/// engine are NOT visible. Callers who need fresher data take a
529/// new snapshot.
530#[derive(Debug, Clone)]
531pub struct CatalogSnapshot {
532    catalog: Catalog,
533    statistics: statistics::Statistics,
534    clock: Option<ClockFn>,
535    max_query_rows: Option<usize>,
536}
537
538#[derive(Debug, Default)]
539pub struct Engine {
540    /// Committed catalog — what survives `Engine::snapshot()` and what
541    /// outside-TX `SELECT`s read.
542    catalog: Catalog,
543    /// Active TX slots, keyed by `TxId`. Empty when no TX is in flight.
544    /// v4.41.1 runtime invariant: at most one entry (single-writer
545    /// model unchanged). v4.42 will let dispatch hold multiple entries
546    /// concurrently for group commit + engine MVCC.
547    tx_catalogs: BTreeMap<TxId, TxState>,
548    /// Which slot the next exec_* call should mutate. Set by
549    /// `execute_in(sql, tx_id)` at the entry point; legacy `execute(sql)`
550    /// sets it to `IMPLICIT_TX`. None when no TX is in flight (read /
551    /// write goes straight against `catalog`).
552    current_tx: Option<TxId>,
553    /// Monotonic counter for `alloc_tx_id`. Starts at 1 — slot 0 is
554    /// reserved for `IMPLICIT_TX`.
555    next_tx_id: u64,
556    /// Optional wall clock used to satisfy `NOW()` / `CURRENT_TIMESTAMP`
557    /// / `CURRENT_DATE`. Set by the host environment.
558    clock: Option<ClockFn>,
559    /// v4.1 cryptographic RNG for per-user password salt. Set by the
560    /// host. `None` means SQL-driven `CREATE USER` uses a
561    /// deterministic fallback — see `SaltFn`.
562    salt_fn: Option<SaltFn>,
563    /// v4.2 per-query row cap. `None` = unlimited. When set, a
564    /// SELECT that materialises more than `n` rows returns
565    /// `EngineError::RowLimitExceeded`. Enforced before the result
566    /// is shaped into wire frames so a runaway scan can't blow the
567    /// server's heap.
568    max_query_rows: Option<usize>,
569    /// v4.1 RBAC user table. Empty means "no RBAC configured yet" —
570    /// the server decides what that means at the auth boundary
571    /// (open mode vs legacy single-password mode). User CRUD goes
572    /// through `create_user`/`drop_user`/`verify_user`; persistence
573    /// rides the snapshot envelope alongside the catalog.
574    users: UserStore,
575    /// v6.1.2 logical-replication publication catalog. Empty until
576    /// `CREATE PUBLICATION` runs. Persistence rides the v3 envelope
577    /// trailer (see `build_envelope`).
578    publications: publications::Publications,
579    /// v6.1.4 logical-replication subscription catalog. Empty until
580    /// `CREATE SUBSCRIPTION` runs. Persistence rides the v4 envelope
581    /// trailer.
582    subscriptions: subscriptions::Subscriptions,
583    /// v6.2.0 — per-column statistics for the cost-based optimizer.
584    /// Populated by `ANALYZE`; queried via `spg_statistic` virtual
585    /// table. Persistence rides the v5 envelope trailer.
586    statistics: statistics::Statistics,
587    /// v6.3.0 — engine-level plan cache. Caches the post-`prepare()`
588    /// `Statement` keyed on SQL text. In-memory only — does NOT ride
589    /// the snapshot envelope (rebuilt on demand after restart).
590    plan_cache: plan_cache::PlanCache,
591    /// v6.5.1 — per-distinct-SQL execution stats. In-memory only,
592    /// surfaced via `spg_stat_query` virtual table. Updated by the
593    /// `execute_*` paths after a successful execute.
594    query_stats: query_stats::QueryStats,
595    /// v6.5.2 — connection-state provider callback. spg-server
596    /// registers a function at startup that snapshots its
597    /// per-pgwire-connection registry into `ActivityRow`s; engine
598    /// reads through it on every `SELECT * FROM spg_stat_activity`.
599    /// `None` ⇒ no-data (returns empty rows; matches the no_std
600    /// embedded callers that don't run pgwire).
601    activity_provider: Option<ActivityProvider>,
602    /// v6.5.3 — audit-chain provider + verifier. Same pattern as
603    /// activity_provider: spg-server registers both at startup;
604    /// engine reads through on `SELECT * FROM spg_audit_chain` and
605    /// `SELECT * FROM spg_audit_verify`. `None` ⇒ no-data.
606    audit_chain_provider: Option<AuditChainProvider>,
607    audit_verifier: Option<AuditVerifier>,
608    /// v6.5.6 — slow-query log threshold in microseconds. When set,
609    /// every successful execute whose elapsed exceeds the threshold
610    /// gets fed to the registered slow-query log callback (so
611    /// spg-server can emit a structured log line). Default `None`
612    /// = no slow-query logging.
613    slow_query_threshold_us: Option<u64>,
614    slow_query_logger: Option<SlowQueryLogger>,
615    /// v7.12.1 — session parameters set via `SET <name> = <value>`.
616    /// Only `default_text_search_config` is consumed by the engine
617    /// today (the FTS function dispatcher reads it when
618    /// `to_tsvector(text)` is called without an explicit config).
619    /// All other names are accepted + recorded so PG-dump output
620    /// loads, but have no behavioural effect.
621    session_params: BTreeMap<String, String>,
622    /// v7.12.7 — depth counter for trigger-emitted embedded SQL.
623    /// Each time the engine executes a `DeferredEmbeddedStmt` it
624    /// increments this; the recursive `execute_stmt_with_cancel`
625    /// inside that path checks against [`MAX_TRIGGER_RECURSION`]
626    /// to bound runaway cascades (trigger A's UPDATE on table B
627    /// fires trigger B which UPDATEs table A which fires trigger
628    /// A again…). Reset to 0 once the original DML returns.
629    trigger_recursion_depth: u32,
630    /// v7.14.0 — when `SET FOREIGN_KEY_CHECKS=0` is in effect
631    /// (mysqldump preamble), the FK existence + arity check at
632    /// CREATE TABLE time is deferred. FKs referencing a
633    /// not-yet-existing parent land in `pending_foreign_keys`
634    /// keyed by child table; `SET FOREIGN_KEY_CHECKS=1` drains
635    /// the queue and resolves each FK against the now-complete
636    /// catalog. Empty by default; the queue is drained on every
637    /// `RESET ALL` too.
638    foreign_key_checks: bool,
639    pending_foreign_keys: Vec<(alloc::string::String, spg_sql::ast::ForeignKeyConstraint)>,
640}
641
642/// v7.12.7 — hard cap on nested trigger-emitted embedded SQL
643/// fires. 16 deep is well past anything a normal trigger graph
644/// uses while still preventing infinite-loop wedging.
645const MAX_TRIGGER_RECURSION: u32 = 16;
646
647/// v6.5.6 — callback signature for slow-query log emission. Called
648/// with `(sql, elapsed_us)` once per successful execute that crosses
649/// the threshold.
650pub type SlowQueryLogger = fn(&str, u64);
651
652/// v6.5.4 — synthesise a `CREATE TABLE` statement from catalog
653/// state. Round-trips through `Engine::execute` to recreate the
654/// same schema (sans data + indexes — indexes are emitted as a
655/// separate `CREATE INDEX` chain in `spg_database_ddl`).
656fn render_create_table(name: &str, columns: &[ColumnSchema]) -> String {
657    let mut out = alloc::format!("CREATE TABLE {name} (");
658    for (i, col) in columns.iter().enumerate() {
659        if i > 0 {
660            out.push_str(", ");
661        }
662        out.push_str(&col.name);
663        out.push(' ');
664        out.push_str(&render_data_type(col.ty));
665        if !col.nullable {
666            out.push_str(" NOT NULL");
667        }
668        if col.auto_increment {
669            out.push_str(" AUTO_INCREMENT");
670        }
671    }
672    out.push(')');
673    out
674}
675
676fn render_data_type(ty: DataType) -> String {
677    match ty {
678        DataType::SmallInt => "SMALLINT".into(),
679        DataType::Int => "INT".into(),
680        DataType::BigInt => "BIGINT".into(),
681        DataType::Float => "FLOAT".into(),
682        DataType::Text => "TEXT".into(),
683        DataType::Varchar(n) => alloc::format!("VARCHAR({n})"),
684        DataType::Char(n) => alloc::format!("CHAR({n})"),
685        DataType::Bool => "BOOL".into(),
686        DataType::Vector { dim, encoding } => match encoding {
687            spg_storage::VecEncoding::F32 => alloc::format!("VECTOR({dim})"),
688            spg_storage::VecEncoding::Sq8 => alloc::format!("VECTOR({dim}) USING SQ8"),
689            spg_storage::VecEncoding::F16 => alloc::format!("VECTOR({dim}) USING HALF"),
690        },
691        DataType::Numeric { precision, scale } => {
692            alloc::format!("NUMERIC({precision},{scale})")
693        }
694        DataType::Date => "DATE".into(),
695        DataType::Timestamp => "TIMESTAMP".into(),
696        DataType::Interval => "INTERVAL".into(),
697        DataType::Json => "JSON".into(),
698        DataType::Jsonb => "JSONB".into(),
699        DataType::Timestamptz => "TIMESTAMPTZ".into(),
700        DataType::Bytes => "BYTEA".into(),
701        DataType::TextArray => "TEXT[]".into(),
702        DataType::IntArray => "INT[]".into(),
703        DataType::BigIntArray => "BIGINT[]".into(),
704        DataType::TsVector => "TSVECTOR".into(),
705        DataType::TsQuery => "TSQUERY".into(),
706    }
707}
708
709/// v6.5.2 — one row of `spg_stat_activity`. Engine-public so
710/// spg-server can construct rows without re-exporting internal
711/// dispatch types.
712#[derive(Debug, Clone)]
713pub struct ActivityRow {
714    pub pid: u32,
715    pub user: String,
716    pub started_at_us: i64,
717    pub current_sql: String,
718    pub wait_event: String,
719    pub elapsed_us: i64,
720    pub in_transaction: bool,
721}
722
723/// v6.5.2 — provider callback type. Fresh snapshot returned each
724/// call; engine doesn't cache the slice.
725pub type ActivityProvider = fn() -> Vec<ActivityRow>;
726
727/// v6.5.3 — one row of `spg_audit_chain`. Engine-public so
728/// spg-server can construct rows directly from `AuditEntry`.
729#[derive(Debug, Clone)]
730pub struct AuditRow {
731    pub seq: i64,
732    pub ts_ms: i64,
733    pub prev_hash_hex: String,
734    pub entry_hash_hex: String,
735    pub sql: String,
736}
737
738/// v6.5.3 — chain-table provider + verifier. spg-server registers
739/// fn pointers that snapshot / verify the audit log. `verify`
740/// returns `(verified_count, broken_at_seq)` — `broken_at_seq` is
741/// `-1` on a clean chain.
742pub type AuditChainProvider = fn() -> Vec<AuditRow>;
743pub type AuditVerifier = fn() -> (i64, i64);
744
745impl Engine {
746    pub fn new() -> Self {
747        Self {
748            catalog: Catalog::new(),
749            tx_catalogs: BTreeMap::new(),
750            current_tx: None,
751            next_tx_id: 1,
752            clock: None,
753            salt_fn: None,
754            max_query_rows: None,
755            users: UserStore::new(),
756            publications: publications::Publications::new(),
757            subscriptions: subscriptions::Subscriptions::new(),
758            statistics: statistics::Statistics::new(),
759            plan_cache: plan_cache::PlanCache::new(),
760            query_stats: query_stats::QueryStats::new(),
761            activity_provider: None,
762            audit_chain_provider: None,
763            audit_verifier: None,
764            slow_query_threshold_us: None,
765            slow_query_logger: None,
766            session_params: BTreeMap::new(),
767            trigger_recursion_depth: 0,
768            foreign_key_checks: true,
769            pending_foreign_keys: Vec::new(),
770        }
771    }
772
773    /// v7.11.0 — clone the engine's committed catalog + read-time
774    /// state into a frozen `CatalogSnapshot`. Cheap (`Catalog` is
775    /// backed by `PersistentVec`; cloning is O(log n) per table).
776    /// Subsequent writes to this engine are invisible to the
777    /// snapshot; the snapshot is self-contained and can be moved
778    /// to another thread for concurrent `execute_readonly_on_snapshot`
779    /// calls. The basis for [`AsyncReadHandle`] in spg-embedded-tokio
780    /// and any other read-fanout pattern.
781    #[must_use]
782    pub fn clone_snapshot(&self) -> CatalogSnapshot {
783        CatalogSnapshot {
784            catalog: self.active_catalog().clone(),
785            statistics: self.statistics.clone(),
786            clock: self.clock,
787            max_query_rows: self.max_query_rows,
788        }
789    }
790
791    /// v7.11.1 — execute a read-only SQL statement against a
792    /// `CatalogSnapshot` without touching this engine. Same
793    /// semantics as `execute_readonly` but parameterised on the
794    /// snapshot's catalog. Reject DDL/DML the same way
795    /// `execute_readonly` does. Static-on-Self so the caller can
796    /// dispatch without holding an `Engine` borrow alongside the
797    /// snapshot.
798    pub fn execute_readonly_on_snapshot(
799        snapshot: &CatalogSnapshot,
800        sql: &str,
801    ) -> Result<QueryResult, EngineError> {
802        Self::execute_readonly_on_snapshot_with_cancel(snapshot, sql, CancelToken::none())
803    }
804
805    /// v7.11.1 — `execute_readonly_on_snapshot` with cooperative
806    /// cancellation. Builds a transient `Engine` over the snapshot
807    /// state, runs `execute_readonly_with_cancel`, drops. The
808    /// transient engine is cheap to construct (no I/O; everything
809    /// is just struct moves) and lets the existing read path stay
810    /// untouched.
811    pub fn execute_readonly_on_snapshot_with_cancel(
812        snapshot: &CatalogSnapshot,
813        sql: &str,
814        cancel: CancelToken<'_>,
815    ) -> Result<QueryResult, EngineError> {
816        let transient = Engine {
817            catalog: snapshot.catalog.clone(),
818            statistics: snapshot.statistics.clone(),
819            clock: snapshot.clock,
820            max_query_rows: snapshot.max_query_rows,
821            ..Engine::default()
822        };
823        transient.execute_readonly_with_cancel(sql, cancel)
824    }
825
826    /// Construct an engine restored from a previously-snapshotted catalog
827    /// (see `snapshot()`).
828    pub fn restore(catalog: Catalog) -> Self {
829        Self {
830            catalog,
831            tx_catalogs: BTreeMap::new(),
832            current_tx: None,
833            next_tx_id: 1,
834            clock: None,
835            salt_fn: None,
836            max_query_rows: None,
837            users: UserStore::new(),
838            publications: publications::Publications::new(),
839            subscriptions: subscriptions::Subscriptions::new(),
840            statistics: statistics::Statistics::new(),
841            plan_cache: plan_cache::PlanCache::new(),
842            query_stats: query_stats::QueryStats::new(),
843            activity_provider: None,
844            audit_chain_provider: None,
845            audit_verifier: None,
846            slow_query_threshold_us: None,
847            slow_query_logger: None,
848            session_params: BTreeMap::new(),
849            trigger_recursion_depth: 0,
850            foreign_key_checks: true,
851            pending_foreign_keys: Vec::new(),
852        }
853    }
854
855    /// Restore an engine + user table from a v4.1 envelope produced
856    /// by `snapshot_with_users()`. Falls back to plain catalog-only
857    /// restore if the envelope magic isn't present (so v3.x snapshot
858    /// files still load). v6.1.2 adds the optional publications
859    /// trailer (envelope v3); a v1/v2 envelope deserialises to an
860    /// empty publication table.
861    pub fn restore_envelope(buf: &[u8]) -> Result<Self, EngineError> {
862        match split_envelope(buf) {
863            EnvelopeParse::Pair {
864                catalog: catalog_bytes,
865                users: user_bytes,
866                publications: pub_bytes,
867                subscriptions: sub_bytes,
868                statistics: stats_bytes,
869            } => {
870                let catalog = Catalog::deserialize(catalog_bytes).map_err(EngineError::Storage)?;
871                let users = users::deserialize_users(user_bytes)
872                    .map_err(|e| EngineError::Unsupported(alloc::format!("users restore: {e}")))?;
873                let publications = match pub_bytes {
874                    Some(b) => publications::Publications::deserialize(b).map_err(|e| {
875                        EngineError::Unsupported(alloc::format!("publications restore: {e:?}"))
876                    })?,
877                    None => publications::Publications::new(),
878                };
879                let subscriptions = match sub_bytes {
880                    Some(b) => subscriptions::Subscriptions::deserialize(b).map_err(|e| {
881                        EngineError::Unsupported(alloc::format!("subscriptions restore: {e:?}"))
882                    })?,
883                    None => subscriptions::Subscriptions::new(),
884                };
885                let statistics = match stats_bytes {
886                    Some(b) => statistics::Statistics::deserialize(b).map_err(|e| {
887                        EngineError::Unsupported(alloc::format!("statistics restore: {e:?}"))
888                    })?,
889                    None => statistics::Statistics::new(),
890                };
891                Ok(Self {
892                    catalog,
893                    tx_catalogs: BTreeMap::new(),
894                    current_tx: None,
895                    next_tx_id: 1,
896                    clock: None,
897                    salt_fn: None,
898                    max_query_rows: None,
899                    users,
900                    publications,
901                    subscriptions,
902                    statistics,
903                    plan_cache: plan_cache::PlanCache::new(),
904                    query_stats: query_stats::QueryStats::new(),
905                    activity_provider: None,
906                    audit_chain_provider: None,
907                    audit_verifier: None,
908                    slow_query_threshold_us: None,
909                    slow_query_logger: None,
910                    session_params: BTreeMap::new(),
911                    trigger_recursion_depth: 0,
912            foreign_key_checks: true,
913            pending_foreign_keys: Vec::new(),
914                })
915            }
916            EnvelopeParse::CrcMismatch { expected, computed } => {
917                Err(EngineError::Storage(StorageError::Corrupt(alloc::format!(
918                    "snapshot envelope CRC32 mismatch (expected={expected:#010x}, computed={computed:#010x})"
919                ))))
920            }
921            EnvelopeParse::Bare => {
922                let catalog = Catalog::deserialize(buf).map_err(EngineError::Storage)?;
923                Ok(Self::restore(catalog))
924            }
925        }
926    }
927
928    pub const fn users(&self) -> &UserStore {
929        &self.users
930    }
931
932    /// `salt` is supplied by the caller (the host has a random
933    /// source; the engine is `no_std`). Caller should pass a fresh
934    /// 16-byte random value per user.
935    pub fn create_user(
936        &mut self,
937        name: &str,
938        password: &str,
939        role: Role,
940        salt: [u8; 16],
941    ) -> Result<(), UserError> {
942        self.users.create(name, password, role, salt)?;
943        // v4.8: also derive SCRAM-SHA-256 secrets so PG-wire SASL
944        // auth can verify without re-running PBKDF2 per attempt.
945        // Uses a fresh salt from the host RNG (falls back to a
946        // deterministic per-username salt when no RNG is wired, same
947        // as the legacy hash path).
948        let scram_salt = self.salt_fn.map_or_else(
949            || {
950                let mut s = [0u8; users::SCRAM_SALT_LEN];
951                let digest = spg_crypto::hash(name.as_bytes());
952                // Use bytes 16..32 of BLAKE3 so we don't reuse the
953                // exact same fallback salt as the BLAKE3 hash path.
954                s.copy_from_slice(&digest[16..32]);
955                s
956            },
957            |f| f(),
958        );
959        self.users
960            .enable_scram(name, password, scram_salt, users::SCRAM_DEFAULT_ITERS)?;
961        Ok(())
962    }
963
964    pub fn drop_user(&mut self, name: &str) -> Result<(), UserError> {
965        self.users.drop(name)
966    }
967
968    pub fn verify_user(&self, name: &str, password: &str) -> Option<Role> {
969        self.users.verify(name, password)
970    }
971
972    /// Builder: attach a wall clock so `NOW()` / `CURRENT_TIMESTAMP` /
973    /// `CURRENT_DATE` evaluate to a real value instead of erroring out.
974    #[must_use]
975    pub const fn with_clock(mut self, clock: ClockFn) -> Self {
976        self.clock = Some(clock);
977        self
978    }
979
980    /// Builder: attach an OS-backed RNG for per-user password salts.
981    /// The host (`spg-server`) typically wires this to `/dev/urandom`.
982    #[must_use]
983    pub const fn with_salt_fn(mut self, f: SaltFn) -> Self {
984        self.salt_fn = Some(f);
985        self
986    }
987
988    /// Builder: cap the number of rows a single SELECT may return.
989    /// Exceeding the cap raises `EngineError::RowLimitExceeded` —
990    /// the bound is checked inside the executor so a runaway
991    /// catalog scan can't allocate millions of rows before the
992    /// server gets a chance to reject the result.
993    #[must_use]
994    pub const fn with_max_query_rows(mut self, n: usize) -> Self {
995        self.max_query_rows = Some(n);
996        self
997    }
998
999    /// The *committed* catalog. Note: during a transaction this returns the
1000    /// pre-TX state — `SELECT` inside a TX goes through `execute()` and reads
1001    /// the shadow. Tests that inspect outside-TX state should use this.
1002    pub const fn catalog(&self) -> &Catalog {
1003        &self.catalog
1004    }
1005
1006    /// Serialize the *committed* catalog to bytes. v0.6 was full-snapshot; v0.9
1007    /// adds the rule that an open TX's shadow is never snapshotted — only the
1008    /// post-COMMIT state is persisted. v4.1 wraps the catalog in an envelope
1009    /// when there are users to persist; an empty user table snapshots as the
1010    /// bare catalog format (backwards-compat with v3.x readers). v6.1.2
1011    /// adds publications to the envelope condition: either non-empty
1012    /// users OR non-empty publications now triggers the envelope path.
1013    pub fn snapshot(&self) -> Vec<u8> {
1014        if self.users.is_empty()
1015            && self.publications.is_empty()
1016            && self.subscriptions.is_empty()
1017            && self.statistics.is_empty()
1018        {
1019            self.catalog.serialize()
1020        } else {
1021            build_envelope(
1022                &self.catalog.serialize(),
1023                &users::serialize_users(&self.users),
1024                &self.publications.serialize(),
1025                &self.subscriptions.serialize(),
1026                &self.statistics.serialize(),
1027            )
1028        }
1029    }
1030
1031    /// True when at least one TX slot is in flight. v4.41.1 runtime
1032    /// invariant: at most one slot active at a time (dispatch holds
1033    /// `engine.write()` across the entire wrap). v4.42 will let this
1034    /// return true with multiple slots concurrently.
1035    pub fn in_transaction(&self) -> bool {
1036        !self.tx_catalogs.is_empty()
1037    }
1038
1039    /// v4.41.1 allocate a fresh TX handle. Used by spg-server dispatch
1040    /// to scope each implicit-wrap BEGIN..stmt..COMMIT to its own slot
1041    /// in `tx_catalogs`. v4.42 — the commit-barrier leader allocates
1042    /// one of these per task in its group, runs `BEGIN`+sql+`COMMIT`
1043    /// sequentially under a single `engine.write()` so each task's
1044    /// mutations accumulate into shared state, then either keeps the
1045    /// accumulated state (fsync OK) or restores the pre-image via
1046    /// `replace_catalog` (fsync err).
1047    pub fn alloc_tx_id(&mut self) -> TxId {
1048        let id = TxId(self.next_tx_id);
1049        self.next_tx_id = self.next_tx_id.saturating_add(1);
1050        id
1051    }
1052
1053    /// v4.42 — atomically replace the live catalog. Used by the
1054    /// commit-barrier leader to roll back a group whose batched
1055    /// fsync failed: the leader snapshots `engine.catalog().clone()`
1056    /// (O(1) Arc bump after the v4.39/v4.40 persistent migration)
1057    /// at group start, sequentially applies each task's BEGIN+sql+
1058    /// COMMIT under the same write lock to accumulate mutations
1059    /// into shared state, batches the WAL bytes, fsyncs once, and
1060    /// on failure calls this with the pre-image to undo every
1061    /// task in the group at once.
1062    ///
1063    /// **Does NOT touch `tx_catalogs` / `current_tx`.** Any
1064    /// explicit-TX slot from a concurrent client (created via the
1065    /// legacy `IMPLICIT_TX`-less dispatch path or via the future
1066    /// MVCC-readers v5+ work) has its own snapshot baked into the
1067    /// slot — restoring `self.catalog` to the pre-image leaves
1068    /// those slots untouched, exactly as they were when the leader
1069    /// took the lock. The leader's own implicit-TX slots are all
1070    /// already discarded (`exec_commit` removed them as each
1071    /// task's COMMIT ran) by the time this is reached.
1072    pub fn replace_catalog(&mut self, catalog: Catalog) {
1073        self.catalog = catalog;
1074    }
1075
1076    /// v6.7.0 — public shim around `Catalog::freeze_oldest_to_cold`
1077    /// so tests + the spg-server freezer can drive a freeze without
1078    /// reaching into the private `active_catalog_mut`. v6.7.4
1079    /// parallel freezer will build on this surface.
1080    ///
1081    /// Marks the table's cached `cold_row_count` stale because the
1082    /// freeze added cold locators that ANALYZE hasn't yet refreshed.
1083    pub fn freeze_oldest_to_cold(
1084        &mut self,
1085        table_name: &str,
1086        index_name: &str,
1087        max_rows: usize,
1088    ) -> Result<spg_storage::FreezeReport, EngineError> {
1089        let report = self
1090            .active_catalog_mut()
1091            .freeze_oldest_to_cold(table_name, index_name, max_rows)
1092            .map_err(EngineError::Storage)?;
1093        if let Some(t) = self.active_catalog_mut().get_mut(table_name) {
1094            t.mark_cold_row_count_stale();
1095        }
1096        Ok(report)
1097    }
1098
1099    /// v6.7.5 — public shim used by the spg-server follower's
1100    /// segment-forwarding receiver. Registers a cold-tier segment
1101    /// at a specific id (the master's id, as transmitted on the
1102    /// wire) so the follower's BTree-Cold locators stay byte-
1103    /// identical with the master's. Wraps
1104    /// `Catalog::load_segment_bytes_at` under the standard
1105    /// clone-mutate-replace pattern.
1106    ///
1107    /// Returns `Ok(())` on success **and** on the "slot already
1108    /// occupied" case — a follower mid-reconnect may receive a
1109    /// segment chunk for a segment_id it already has on disk
1110    /// (forwarded last session); the caller should treat that
1111    /// path as a no-op rather than a fatal error.
1112    pub fn receive_cold_segment(
1113        &mut self,
1114        segment_id: u32,
1115        bytes: Vec<u8>,
1116    ) -> Result<(), EngineError> {
1117        let mut new_cat = self.catalog.clone();
1118        match new_cat.load_segment_bytes_at(segment_id, bytes) {
1119            Ok(()) => {
1120                self.replace_catalog(new_cat);
1121                Ok(())
1122            }
1123            Err(StorageError::Corrupt(msg)) if msg.contains("already occupied") => Ok(()),
1124            Err(e) => Err(EngineError::Storage(e)),
1125        }
1126    }
1127
1128    /// v6.7.3 — public shim around `Catalog::compact_cold_segments`
1129    /// driving every BTree index on every user table. Returns one
1130    /// `(table, index, report)` triple for each merge that
1131    /// actually happened (no-op (table, index) pairs are filtered
1132    /// out so callers can size persist-side work to the live
1133    /// merges). Caller is responsible for persisting each
1134    /// `report.merged_segment_bytes` and updating the on-disk
1135    /// segment registry; engine layer is no_std and never
1136    /// touches disk.
1137    ///
1138    /// Marks every touched table's cached `cold_row_count` stale
1139    /// — compaction GC'd some shadowed rows, so the count must be
1140    /// re-derived on the next ANALYZE.
1141    pub fn compact_cold_segments_with_target(
1142        &mut self,
1143        target_segment_bytes: u64,
1144    ) -> Result<Vec<(String, String, CompactReport)>, EngineError> {
1145        let table_names = self.active_catalog().table_names();
1146        let mut reports: Vec<(String, String, CompactReport)> = Vec::new();
1147        for tname in table_names {
1148            if is_internal_table_name(&tname) {
1149                continue;
1150            }
1151            let idx_names: Vec<String> = {
1152                let Some(t) = self.active_catalog().get(&tname) else {
1153                    continue;
1154                };
1155                t.indices()
1156                    .iter()
1157                    .filter(|i| matches!(i.kind, IndexKind::BTree(_)))
1158                    .map(|i| i.name.clone())
1159                    .collect()
1160            };
1161            for iname in idx_names {
1162                let report = self
1163                    .active_catalog_mut()
1164                    .compact_cold_segments(&tname, &iname, target_segment_bytes)
1165                    .map_err(EngineError::Storage)?;
1166                if report.merged_segment_id.is_some() {
1167                    if let Some(t) = self.active_catalog_mut().get_mut(&tname) {
1168                        t.mark_cold_row_count_stale();
1169                    }
1170                    reports.push((tname.clone(), iname, report));
1171                }
1172            }
1173        }
1174        Ok(reports)
1175    }
1176
1177    fn active_catalog(&self) -> &Catalog {
1178        match self.current_tx {
1179            Some(t) => self
1180                .tx_catalogs
1181                .get(&t)
1182                .map_or(&self.catalog, |s| &s.catalog),
1183            None => &self.catalog,
1184        }
1185    }
1186
1187    /// v7.12.4 — snapshot every row-level trigger on `table` that
1188    /// fires for `event` (`"INSERT"` / `"UPDATE"` / `"DELETE"`) at
1189    /// the given `timing` (`"BEFORE"` / `"AFTER"`), and clone its
1190    /// referenced function definition. Returned as a vec of owned
1191    /// `FunctionDef` so the row-write loop can fire them without
1192    /// holding a borrow on the catalog (which would conflict with
1193    /// the table.insert / update_row / delete mutable borrows).
1194    fn snapshot_row_triggers(
1195        &self,
1196        table: &str,
1197        event: &str,
1198        timing: &str,
1199    ) -> Vec<spg_storage::FunctionDef> {
1200        let cat = self.active_catalog();
1201        cat.triggers()
1202            .iter()
1203            .filter(|t| {
1204                t.table == table
1205                    && t.timing.eq_ignore_ascii_case(timing)
1206                    && t.for_each.eq_ignore_ascii_case("row")
1207                    && t.events.iter().any(|e| e.eq_ignore_ascii_case(event))
1208            })
1209            .filter_map(|t| cat.functions().get(&t.function).cloned())
1210            .collect()
1211    }
1212
1213    /// v7.13.0 — UPDATE-side snapshot that pairs each trigger's
1214    /// function with its `UPDATE OF cols` filter (mailrs round-5
1215    /// G7). Empty filter Vec means "fire unconditionally", matching
1216    /// the v7.12 behaviour.
1217    fn snapshot_update_row_triggers(
1218        &self,
1219        table: &str,
1220        timing: &str,
1221    ) -> Vec<(spg_storage::FunctionDef, Vec<String>)> {
1222        let cat = self.active_catalog();
1223        cat.triggers()
1224            .iter()
1225            .filter(|t| {
1226                t.table == table
1227                    && t.timing.eq_ignore_ascii_case(timing)
1228                    && t.for_each.eq_ignore_ascii_case("row")
1229                    && t.events.iter().any(|e| e.eq_ignore_ascii_case("UPDATE"))
1230            })
1231            .filter_map(|t| {
1232                cat.functions()
1233                    .get(&t.function)
1234                    .cloned()
1235                    .map(|fd| (fd, t.update_columns.clone()))
1236            })
1237            .collect()
1238    }
1239
1240    /// v7.12.7 — drain the trigger-emitted embedded SQL queue.
1241    /// Called by the INSERT / UPDATE / DELETE executors after
1242    /// their main row-write loop returns. Each statement runs
1243    /// inside the same cancel scope as the firing DML and bumps
1244    /// the recursion counter; nested embedded SQL beyond
1245    /// [`MAX_TRIGGER_RECURSION`] errors with a clear message so
1246    /// a trigger-graph cycle surfaces as a query failure instead
1247    /// of stack-blowing the engine.
1248    fn execute_deferred_trigger_stmts(
1249        &mut self,
1250        deferred: Vec<triggers::DeferredEmbeddedStmt>,
1251        cancel: CancelToken<'_>,
1252    ) -> Result<(), EngineError> {
1253        for d in deferred {
1254            if self.trigger_recursion_depth >= MAX_TRIGGER_RECURSION {
1255                return Err(EngineError::Storage(StorageError::Corrupt(alloc::format!(
1256                    "trigger embedded SQL recursion depth {} exceeded (trigger function \
1257                     {:?} would push past the {} cap — check for trigger cycles)",
1258                    self.trigger_recursion_depth,
1259                    d.function,
1260                    MAX_TRIGGER_RECURSION,
1261                ))));
1262            }
1263            self.trigger_recursion_depth += 1;
1264            let res = self.execute_stmt_with_cancel(d.stmt, cancel);
1265            self.trigger_recursion_depth -= 1;
1266            res?;
1267        }
1268        Ok(())
1269    }
1270
1271    fn active_catalog_mut(&mut self) -> &mut Catalog {
1272        let tx = self.current_tx;
1273        match tx {
1274            Some(t) => match self.tx_catalogs.get_mut(&t) {
1275                Some(s) => &mut s.catalog,
1276                None => &mut self.catalog,
1277            },
1278            None => &mut self.catalog,
1279        }
1280    }
1281
1282    /// Read-only execute path. Succeeds for `SELECT` / `SHOW TABLES`
1283    /// / `SHOW COLUMNS`; returns `EngineError::WriteRequired` for
1284    /// every other statement, so the caller can fall through to the
1285    /// `&mut self` `execute` path under a write lock. Engine state is
1286    /// not mutated even on the success path (`rewrite_clock_calls`
1287    /// and `resolve_order_by_position` both mutate the locally-owned
1288    /// AST, not `self`).
1289    ///
1290    /// **v4.0 concurrency**: this is the entry point the server takes
1291    /// under an `RwLock::read()` so multiple `SELECT` clients run in
1292    /// parallel without serialising on a single mutex.
1293    pub fn execute_readonly(&self, sql: &str) -> Result<QueryResult, EngineError> {
1294        self.execute_readonly_with_cancel(sql, CancelToken::none())
1295    }
1296
1297    /// v4.5 — read path with cooperative cancellation. Token's
1298    /// `is_cancelled` is checked at the start (so a watchdog that
1299    /// already fired returns Cancelled immediately) and at row-loop
1300    /// checkpoints inside `exec_select`. SHOW paths are O(small) and
1301    /// don't bother checking.
1302    pub fn execute_readonly_with_cancel(
1303        &self,
1304        sql: &str,
1305        cancel: CancelToken<'_>,
1306    ) -> Result<QueryResult, EngineError> {
1307        cancel.check()?;
1308        let mut stmt = parser::parse_statement(sql)?;
1309        let now_micros = self.clock.map(|f| f());
1310        rewrite_clock_calls(&mut stmt, now_micros);
1311        if let Statement::Select(s) = &mut stmt {
1312            resolve_order_by_position(s);
1313            // v6.2.3 — cost-based JOIN reorder (read path).
1314            reorder::reorder_joins(s, &self.catalog, &self.statistics);
1315        }
1316        let result = match stmt {
1317            Statement::Select(s) => self.exec_select_cancel(&s, cancel),
1318            Statement::ShowTables => Ok(self.exec_show_tables()),
1319            Statement::ShowColumns(table) => self.exec_show_columns(&table),
1320            Statement::ShowUsers => Ok(self.exec_show_users()),
1321            Statement::ShowPublications => Ok(self.exec_show_publications()),
1322            Statement::ShowSubscriptions => Ok(self.exec_show_subscriptions()),
1323            Statement::WaitForWalPosition { .. } => Err(EngineError::Unsupported(
1324                "WAIT FOR WAL POSITION must be handled by the server layer".into(),
1325            )),
1326            Statement::Explain(e) => self.exec_explain(&e, cancel),
1327            _ => Err(EngineError::WriteRequired),
1328        };
1329        self.enforce_row_limit(result)
1330    }
1331
1332    /// v4.2: cap result-set size. Applied after the executor
1333    /// materialises rows but before they leave the engine — wrapping
1334    /// every Rows-returning exec_* function would scatter the check.
1335    fn enforce_row_limit(
1336        &self,
1337        result: Result<QueryResult, EngineError>,
1338    ) -> Result<QueryResult, EngineError> {
1339        if let (Ok(QueryResult::Rows { rows, .. }), Some(cap)) = (&result, self.max_query_rows)
1340            && rows.len() > cap
1341        {
1342            return Err(EngineError::RowLimitExceeded(cap));
1343        }
1344        result
1345    }
1346
1347    pub fn execute(&mut self, sql: &str) -> Result<QueryResult, EngineError> {
1348        self.execute_in_with_cancel(sql, IMPLICIT_TX, CancelToken::none())
1349    }
1350
1351    /// v4.5 — write path with cooperative cancellation. Same dispatch
1352    /// as `execute_in_with_cancel(sql, IMPLICIT_TX, cancel)`. Kept as
1353    /// a separate entry point for backward-compat with the v4.5
1354    /// public API.
1355    pub fn execute_with_cancel(
1356        &mut self,
1357        sql: &str,
1358        cancel: CancelToken<'_>,
1359    ) -> Result<QueryResult, EngineError> {
1360        self.execute_in_with_cancel(sql, IMPLICIT_TX, cancel)
1361    }
1362
1363    /// v4.41.1 multi-slot write entry. Routes `sql` through the TX
1364    /// slot identified by `tx_id` so spg-server dispatch can scope
1365    /// each implicit-wrap BEGIN..stmt..COMMIT to its own slot in
1366    /// `tx_catalogs`. `IMPLICIT_TX` is the legacy single-slot path
1367    /// every other caller (engine self-tests, replay, spg-embedded)
1368    /// implicitly takes via `execute()` / `execute_with_cancel()`.
1369    pub fn execute_in(&mut self, sql: &str, tx_id: TxId) -> Result<QueryResult, EngineError> {
1370        self.execute_in_with_cancel(sql, tx_id, CancelToken::none())
1371    }
1372
1373    /// v4.41.1 write path with cooperative cancellation + explicit TX
1374    /// scope. Sets `self.current_tx` for the duration of the call so
1375    /// every `exec_*` helper transparently sees its TX's shadow
1376    /// catalog and savepoint stack; restores on exit so the field is
1377    /// only valid mid-call (no leakage across calls).
1378    pub fn execute_in_with_cancel(
1379        &mut self,
1380        sql: &str,
1381        tx_id: TxId,
1382        cancel: CancelToken<'_>,
1383    ) -> Result<QueryResult, EngineError> {
1384        let saved = self.current_tx;
1385        self.current_tx = Some(tx_id);
1386        let result = self.execute_inner_with_cancel(sql, cancel);
1387        self.current_tx = saved;
1388        result
1389    }
1390
1391    /// v6.1.1 — parse and pre-process a SQL string ONCE so the
1392    /// resulting [`Statement`] can be cached and re-executed via
1393    /// [`Engine::execute_prepared`]. Returns the same `Statement`
1394    /// the simple-query path would synthesise internally (clock
1395    /// rewrites + ORDER BY position-ref resolution applied at
1396    /// prepare time, since both are session-independent). The
1397    /// `$N` placeholders in the SQL stay as `Expr::Placeholder(n)`
1398    /// nodes; they're resolved to concrete values per-call by
1399    /// `execute_prepared`'s substitution walk.
1400    ///
1401    /// Pgwire's `Parse` (P) message lands here.
1402    pub fn prepare(&self, sql: &str) -> Result<Statement, ParseError> {
1403        let mut stmt = parser::parse_statement(sql)?;
1404        let now_micros = self.clock.map(|f| f());
1405        rewrite_clock_calls(&mut stmt, now_micros);
1406        if let Statement::Select(s) = &mut stmt {
1407            // v6.4.1 — expand `GROUP BY ALL` to every non-aggregate
1408            // SELECT-list item BEFORE position / alias resolution so
1409            // downstream passes see the explicit list.
1410            expand_group_by_all(s);
1411            resolve_order_by_position(s);
1412            // v6.2.3 — cost-based JOIN reorder. No-op for
1413            // single-table FROMs or any non-INNER join shape.
1414            reorder::reorder_joins(s, &self.catalog, &self.statistics);
1415        }
1416        Ok(stmt)
1417    }
1418
1419    /// v6.3.0 — cached prepare. Returns a cloned `Statement` from
1420    /// the plan cache on hit, runs the full `prepare()` path on miss
1421    /// and inserts the resulting plan before returning. Skipping the
1422    /// parse + JOIN-reorder pipeline on hit is the dominant win for
1423    /// JDBC / sqlx / pgx clients that reuse the same SQL string.
1424    ///
1425    /// Returns a cloned `Statement` (not a borrow) because the
1426    /// pgwire layer owns its `PreparedStmt` map per-session and the
1427    /// engine-level cache must stay available for other sessions.
1428    /// Clone cost on a 5-table JOIN AST is well under the parse cost
1429    /// it replaces.
1430    pub fn prepare_cached(&mut self, sql: &str) -> Result<Statement, ParseError> {
1431        // v6.3.1 — version-aware lookup. If the cached plan was
1432        // prepared before the most recent ANALYZE, evict and replan.
1433        let current_version = self.statistics.version();
1434        if let Some(plan) = self.plan_cache.get(sql) {
1435            if plan.statistics_version == current_version {
1436                return Ok(plan.stmt.clone());
1437            }
1438            // Stale entry — fall through to evict + re-prepare.
1439        }
1440        self.plan_cache.evict(sql);
1441        let stmt = self.prepare(sql)?;
1442        let source_tables = plan_cache::collect_source_tables(&stmt);
1443        let plan = plan_cache::PreparedPlan {
1444            stmt: stmt.clone(),
1445            statistics_version: current_version,
1446            source_tables,
1447            describe_columns: alloc::vec::Vec::new(),
1448        };
1449        self.plan_cache.insert(String::from(sql), plan);
1450        Ok(stmt)
1451    }
1452
1453    /// v6.3.0 — read-only accessor for tests and v6.3.1 invalidation.
1454    pub fn plan_cache(&self) -> &plan_cache::PlanCache {
1455        &self.plan_cache
1456    }
1457
1458    /// v6.3.0 — mutable accessor for v6.3.1 invalidation hooks.
1459    pub fn plan_cache_mut(&mut self) -> &mut plan_cache::PlanCache {
1460        &mut self.plan_cache
1461    }
1462
1463    /// v6.3.3 — Describe a prepared `Statement` without executing.
1464    /// Returns `(parameter_oids, output_columns)`. Empty
1465    /// `output_columns` means the statement has no row-producing
1466    /// shape we could resolve here (JOIN, subquery, non-SELECT, …)
1467    /// — pgwire layer maps that to a `NoData` reply.
1468    pub fn describe_prepared(&self, stmt: &Statement) -> (Vec<u32>, Vec<ColumnSchema>) {
1469        describe::describe_prepared(stmt, self.active_catalog())
1470    }
1471
1472    /// v6.1.1 — execute a [`Statement`] previously returned by
1473    /// [`Engine::prepare`], substituting `Expr::Placeholder(n)`
1474    /// nodes for the corresponding [`Value`] in `params` (1-based
1475    /// per PG: `$1` → `params[0]`). Bind-time string parameters
1476    /// are decoded into typed `Value`s by the pgwire layer before
1477    /// this call so the resulting AST hits the same execution
1478    /// path as a simple query — no SQL re-parse.
1479    ///
1480    /// Pgwire's `Execute` (E) message after a `Bind` (B) lands here.
1481    pub fn execute_prepared(
1482        &mut self,
1483        mut stmt: Statement,
1484        params: &[Value],
1485    ) -> Result<QueryResult, EngineError> {
1486        substitute_placeholders(&mut stmt, params)?;
1487        self.execute_stmt_with_cancel(stmt, CancelToken::none())
1488    }
1489
1490    fn execute_inner_with_cancel(
1491        &mut self,
1492        sql: &str,
1493        cancel: CancelToken<'_>,
1494    ) -> Result<QueryResult, EngineError> {
1495        cancel.check()?;
1496        let stmt = self.prepare(sql)?;
1497        // v6.5.1 — wrap the executor with a wall-clock window so we
1498        // can record into spg_stat_query. Skip when the engine has
1499        // no clock attached (no_std embedded callers).
1500        let start_us = self.clock.map(|f| f());
1501        let result = self.execute_stmt_with_cancel(stmt, cancel);
1502        if let (Some(t0), Ok(_)) = (start_us, &result) {
1503            let now = self.clock.map_or(t0, |f| f());
1504            let elapsed = now.saturating_sub(t0).max(0) as u64;
1505            self.query_stats.record(sql, elapsed, now as u64);
1506            // v6.5.6 — slow-query log: fire callback when elapsed
1507            // exceeds the configured floor.
1508            if let (Some(threshold), Some(logger)) =
1509                (self.slow_query_threshold_us, self.slow_query_logger)
1510                && elapsed >= threshold
1511            {
1512                logger(sql, elapsed);
1513            }
1514        }
1515        result
1516    }
1517
1518    fn execute_stmt_with_cancel(
1519        &mut self,
1520        stmt: Statement,
1521        cancel: CancelToken<'_>,
1522    ) -> Result<QueryResult, EngineError> {
1523        cancel.check()?;
1524        let result = match stmt {
1525            Statement::CreateTable(s) => self.exec_create_table(s),
1526            // v7.9.15 — CREATE EXTENSION is a no-op on SPG. Returns
1527            // CommandOk with affected=0; modified_catalog=false so
1528            // the WAL doesn't grow a useless entry. mailrs F3.
1529            Statement::CreateExtension(_) => Ok(QueryResult::CommandOk {
1530                affected: 0,
1531                modified_catalog: false,
1532            }),
1533            // v7.9.27 — DO $$ ... $$ is also a no-op (SPG has no
1534            // PL/pgSQL). mailrs H1 + pg_dump compat.
1535            Statement::DoBlock => Ok(QueryResult::CommandOk {
1536                affected: 0,
1537                modified_catalog: false,
1538            }),
1539            // v7.14.0 — empty-statement no-op for pg_dump /
1540            // mysqldump preamble lines that collapse to nothing
1541            // after comment-stripping.
1542            Statement::Empty => Ok(QueryResult::CommandOk {
1543                affected: 0,
1544                modified_catalog: false,
1545            }),
1546            Statement::DropTable { names, if_exists } => self.exec_drop_table(names, if_exists),
1547            Statement::DropIndex { name, if_exists } => self.exec_drop_index(name, if_exists),
1548            Statement::CreateIndex(s) => self.exec_create_index(s),
1549            Statement::Insert(s) => self.exec_insert(s),
1550            Statement::Update(s) => self.exec_update_cancel(&s, cancel),
1551            Statement::Delete(s) => self.exec_delete_cancel(&s, cancel),
1552            Statement::Select(s) => self.exec_select_cancel(&s, cancel),
1553            Statement::Begin => self.exec_begin(),
1554            Statement::Commit => self.exec_commit(),
1555            Statement::Rollback => self.exec_rollback(),
1556            Statement::Savepoint(name) => self.exec_savepoint(name),
1557            Statement::RollbackToSavepoint(name) => self.exec_rollback_to_savepoint(&name),
1558            Statement::ReleaseSavepoint(name) => self.exec_release_savepoint(&name),
1559            Statement::ShowTables => Ok(self.exec_show_tables()),
1560            Statement::ShowColumns(table) => self.exec_show_columns(&table),
1561            Statement::ShowUsers => Ok(self.exec_show_users()),
1562            Statement::ShowPublications => Ok(self.exec_show_publications()),
1563            Statement::ShowSubscriptions => Ok(self.exec_show_subscriptions()),
1564            Statement::CreateUser(s) => self.exec_create_user(&s),
1565            Statement::DropUser(name) => self.exec_drop_user(&name),
1566            Statement::Explain(e) => self.exec_explain(&e, cancel),
1567            Statement::AlterIndex(s) => self.exec_alter_index(s),
1568            Statement::AlterTable(s) => self.exec_alter_table(s),
1569            Statement::CreatePublication(s) => self.exec_create_publication(s),
1570            Statement::DropPublication(name) => self.exec_drop_publication(&name),
1571            Statement::CreateSubscription(s) => self.exec_create_subscription(s),
1572            Statement::DropSubscription(name) => self.exec_drop_subscription(&name),
1573            // v6.1.7 — WAIT FOR WAL POSITION needs `lag_state`,
1574            // which lives in spg-server's ServerState. The engine
1575            // surfaces a clear error; the server-layer dispatch
1576            // intercepts the SQL before it reaches the engine on
1577            // a server build, so this arm only fires for
1578            // engine-only callers (spg-embedded, lib tests).
1579            Statement::WaitForWalPosition { .. } => Err(EngineError::Unsupported(
1580                "WAIT FOR WAL POSITION must be handled by the server layer".into(),
1581            )),
1582            // v6.2.0 — ANALYZE recomputes per-column histograms.
1583            Statement::Analyze(target) => self.exec_analyze(target.as_deref()),
1584            // v6.7.3 — COMPACT COLD SEGMENTS.
1585            Statement::CompactColdSegments => self.exec_compact_cold_segments(),
1586            // v7.12.1 — SET / RESET session parameter. Engine
1587            // tracks the value in `session_params`; FTS dispatcher
1588            // reads `default_text_search_config`. Everything else
1589            // is a recorded no-op (PG dump compat).
1590            Statement::SetParameter { name, value } => {
1591                self.set_session_param(name, value);
1592                Ok(QueryResult::CommandOk {
1593                    affected: 0,
1594                    modified_catalog: false,
1595                })
1596            }
1597            // v7.14.0 — MySQL multi-assignment SET. Each pair runs
1598            // through `set_session_param` so engine-known params
1599            // (FOREIGN_KEY_CHECKS, session_replication_role, …) take
1600            // effect; unknown pairs (including `@VAR` LHS from the
1601            // mysqldump preamble) are recorded then ignored.
1602            Statement::SetParameterList(pairs) => {
1603                for (name, value) in pairs {
1604                    self.set_session_param(name, value);
1605                }
1606                Ok(QueryResult::CommandOk {
1607                    affected: 0,
1608                    modified_catalog: false,
1609                })
1610            }
1611            // v7.12.4 — CREATE FUNCTION / CREATE TRIGGER / DROP …
1612            // for the PL/pgSQL trigger surface. exec_* methods are
1613            // defined alongside the existing CREATE handlers below.
1614            Statement::CreateFunction(s) => self.exec_create_function(s),
1615            Statement::CreateTrigger(s) => self.exec_create_trigger(s),
1616            Statement::DropTrigger {
1617                name,
1618                table,
1619                if_exists,
1620            } => self.exec_drop_trigger(&name, &table, if_exists),
1621            Statement::DropFunction { name, if_exists } => {
1622                self.exec_drop_function(&name, if_exists)
1623            }
1624            Statement::ResetParameter(target) => {
1625                match target {
1626                    None => self.session_params.clear(),
1627                    Some(name) => {
1628                        self.session_params.remove(&name.to_ascii_lowercase());
1629                    }
1630                }
1631                Ok(QueryResult::CommandOk {
1632                    affected: 0,
1633                    modified_catalog: false,
1634                })
1635            }
1636        };
1637        self.enforce_row_limit(result)
1638    }
1639
1640    /// v6.1.2 — `CREATE PUBLICATION` runtime path. Duplicate names
1641    /// surface as `EngineError::Unsupported` so the existing PG-wire
1642    /// error mapping stays uniform; the message carries the name so
1643    /// operators can grep replication-log noise. Inside-transaction
1644    /// invocation is rejected (matches `CREATE USER` / `DROP USER`
1645    /// stance) — replication-catalog mutation is a connection-level
1646    /// administrative op, not a transactional one.
1647    fn exec_create_publication(
1648        &mut self,
1649        s: CreatePublicationStatement,
1650    ) -> Result<QueryResult, EngineError> {
1651        // v6.1.4 — the v6.1.2 "no DDL inside a transaction" guard
1652        // was over-cautious: it also blocked the auto-commit wrap
1653        // path (which begins an internal TX around every WAL-
1654        // logged statement). PG itself allows CREATE PUBLICATION
1655        // inside a transaction (it rolls back with the TX).
1656        self.publications
1657            .create(s.name, s.scope)
1658            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE PUBLICATION: {e:?}")))?;
1659        Ok(QueryResult::CommandOk {
1660            affected: 1,
1661            modified_catalog: true,
1662        })
1663    }
1664
1665    /// v6.1.2 — `DROP PUBLICATION` runtime path. PG-compatible silent
1666    /// no-op when the publication doesn't exist (returns `affected=0`
1667    /// in that case so the wire-level command tag distinguishes
1668    /// "dropped" from "no-op", though both succeed).
1669    fn exec_drop_publication(&mut self, name: &str) -> Result<QueryResult, EngineError> {
1670        let removed = self.publications.drop(name);
1671        Ok(QueryResult::CommandOk {
1672            affected: usize::from(removed),
1673            modified_catalog: removed,
1674        })
1675    }
1676
1677    /// v6.1.2 — read access to the publication catalog. Used by
1678    /// the v6.1.5 publisher-side WAL filter, by `SHOW PUBLICATIONS`
1679    /// (v6.1.3+), and by e2e tests that need to assert state without
1680    /// going through the wire.
1681    pub const fn publications(&self) -> &publications::Publications {
1682        &self.publications
1683    }
1684
1685    /// v6.1.4 — `CREATE SUBSCRIPTION` runtime path. Defaults
1686    /// `enabled = true` and `last_received_pos = 0` for a freshly-
1687    /// created subscription. The actual worker thread is spawned
1688    /// by spg-server once the engine returns success.
1689    fn exec_create_subscription(
1690        &mut self,
1691        s: CreateSubscriptionStatement,
1692    ) -> Result<QueryResult, EngineError> {
1693        // See exec_create_publication — the in_transaction gate
1694        // was over-cautious; the auto-commit wrap path holds an
1695        // internal TX that this check was incorrectly blocking.
1696        let sub = subscriptions::Subscription {
1697            conn_str: s.conn_str,
1698            publications: s.publications,
1699            enabled: true,
1700            last_received_pos: 0,
1701        };
1702        self.subscriptions
1703            .create(s.name, sub)
1704            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE SUBSCRIPTION: {e:?}")))?;
1705        Ok(QueryResult::CommandOk {
1706            affected: 1,
1707            modified_catalog: true,
1708        })
1709    }
1710
1711    /// v6.1.4 — `DROP SUBSCRIPTION`. Silent no-op when the name
1712    /// doesn't exist (PG-compatible). The associated worker is
1713    /// torn down by spg-server when it observes the catalog
1714    /// change at the next snapshot or via the engine's
1715    /// subscriptions accessor (the worker polls the catalog on
1716    /// reconnect; v6.1.5's filter-side will tighten this to an
1717    /// explicit signal).
1718    fn exec_drop_subscription(&mut self, name: &str) -> Result<QueryResult, EngineError> {
1719        let removed = self.subscriptions.drop(name);
1720        Ok(QueryResult::CommandOk {
1721            affected: usize::from(removed),
1722            modified_catalog: removed,
1723        })
1724    }
1725
1726    /// v6.1.4 — read access to the subscription catalog. Used by
1727    /// the subscription worker (read its own row to find its
1728    /// publications + last applied position), by SHOW SUBSCRIPTIONS,
1729    /// and by e2e tests asserting state directly.
1730    pub const fn subscriptions(&self) -> &subscriptions::Subscriptions {
1731        &self.subscriptions
1732    }
1733
1734    /// v6.1.4 — write access to `last_received_pos`. Worker
1735    /// calls this after each apply batch (under the engine's
1736    /// write-lock). Returns `false` when the subscription was
1737    /// dropped between when the worker received the record and
1738    /// when this call landed.
1739    pub fn subscription_advance(&mut self, name: &str, pos: u64) -> bool {
1740        self.subscriptions.update_last_received_pos(name, pos)
1741    }
1742
1743    /// v6.1.4 — `SHOW SUBSCRIPTIONS` row materialisation. Returns
1744    /// `(name, conn_str, publications, enabled, last_received_pos)`
1745    /// ordered by subscription name. The `publications` column is
1746    /// the comma-joined list ("p1, p2") for ergonomic SHOW output;
1747    /// callers wanting structured access read `Engine::subscriptions`.
1748    fn exec_show_subscriptions(&self) -> QueryResult {
1749        let columns = alloc::vec![
1750            ColumnSchema::new("name", DataType::Text, false),
1751            ColumnSchema::new("conn_str", DataType::Text, false),
1752            ColumnSchema::new("publications", DataType::Text, false),
1753            ColumnSchema::new("enabled", DataType::Bool, false),
1754            ColumnSchema::new("last_received_pos", DataType::BigInt, false),
1755        ];
1756        let rows: Vec<Row> = self
1757            .subscriptions
1758            .iter()
1759            .map(|(name, sub)| {
1760                Row::new(alloc::vec![
1761                    Value::Text(name.clone()),
1762                    Value::Text(sub.conn_str.clone()),
1763                    Value::Text(sub.publications.join(", ")),
1764                    Value::Bool(sub.enabled),
1765                    Value::BigInt(i64::try_from(sub.last_received_pos).unwrap_or(i64::MAX)),
1766                ])
1767            })
1768            .collect();
1769        QueryResult::Rows { columns, rows }
1770    }
1771
1772    /// v6.2.0 — materialise `spg_statistic` rows. One row per
1773    /// `(table, column)` pair tracked in `Statistics`, with
1774    /// `histogram_bounds` rendered as a `[v0, v1, ...]` string —
1775    /// the same canonical form vector literals use for round-trip.
1776    fn exec_spg_statistic(&self) -> QueryResult {
1777        let columns = alloc::vec![
1778            ColumnSchema::new("table_name", DataType::Text, false),
1779            ColumnSchema::new("column_name", DataType::Text, false),
1780            ColumnSchema::new("null_frac", DataType::Float, false),
1781            ColumnSchema::new("n_distinct", DataType::BigInt, false),
1782            ColumnSchema::new("histogram_bounds", DataType::Text, false),
1783            // v6.7.0 — appended column (v6.2.0 stability contract
1784            // allows APPEND to spg_statistic, not reorder/rename).
1785            // Reports the cached per-table cold-row count; same
1786            // value across every column row of the same table.
1787            ColumnSchema::new("cold_row_count", DataType::BigInt, false),
1788        ];
1789        let rows: Vec<Row> = self
1790            .statistics
1791            .iter()
1792            .map(|((t, c), s)| {
1793                let cold = self
1794                    .catalog
1795                    .get(t)
1796                    .map_or(0, |table| table.cold_row_count());
1797                Row::new(alloc::vec![
1798                    Value::Text(t.clone()),
1799                    Value::Text(c.clone()),
1800                    Value::Float(f64::from(s.null_frac)),
1801                    Value::BigInt(i64::try_from(s.n_distinct).unwrap_or(i64::MAX)),
1802                    Value::Text(render_histogram_bounds(&s.histogram_bounds)),
1803                    Value::BigInt(i64::try_from(cold).unwrap_or(i64::MAX)),
1804                ])
1805            })
1806            .collect();
1807        QueryResult::Rows { columns, rows }
1808    }
1809
1810    /// v6.5.0 — materialise `spg_stat_replication` rows. One row
1811    /// per subscription with `(name, conn_str, publications,
1812    /// last_received_pos, enabled)`. Surface mirrors
1813    /// `SHOW SUBSCRIPTIONS` but follows the virtual-table dispatch
1814    /// shape so it composes with SELECT clauses (WHERE, projection
1815    /// onto specific columns, etc).
1816    fn exec_spg_stat_replication(&self) -> QueryResult {
1817        let columns = alloc::vec![
1818            ColumnSchema::new("name", DataType::Text, false),
1819            ColumnSchema::new("conn_str", DataType::Text, false),
1820            ColumnSchema::new("publications", DataType::Text, false),
1821            ColumnSchema::new("last_received_pos", DataType::BigInt, false),
1822            ColumnSchema::new("enabled", DataType::Bool, false),
1823        ];
1824        let rows: Vec<Row> = self
1825            .subscriptions
1826            .iter()
1827            .map(|(name, sub)| {
1828                Row::new(alloc::vec![
1829                    Value::Text(name.clone()),
1830                    Value::Text(sub.conn_str.clone()),
1831                    Value::Text(sub.publications.join(",")),
1832                    Value::BigInt(i64::try_from(sub.last_received_pos).unwrap_or(i64::MAX)),
1833                    Value::Bool(sub.enabled),
1834                ])
1835            })
1836            .collect();
1837        QueryResult::Rows { columns, rows }
1838    }
1839
1840    /// v6.5.0 — materialise `spg_stat_segment` rows. One row per
1841    /// cold-tier segment with `(segment_id, num_rows, num_pages,
1842    /// total_bytes)`.
1843    ///
1844    /// v6.7.0 — appended `table_name` column resolves the v6.5.0
1845    /// carve-out. Walks every user table's BTree indices to find
1846    /// which table's Cold locators point at each segment. Empty
1847    /// string for orphan segments (loaded via SPG_PRELOAD_COLD_SEGMENT
1848    /// before any index registered a locator). The walk is
1849    /// O(tables × indices × keys); cached per call, not across
1850    /// calls — re-walked on every `SELECT * FROM spg_stat_segment`.
1851    fn exec_spg_stat_segment(&self) -> QueryResult {
1852        let columns = alloc::vec![
1853            ColumnSchema::new("segment_id", DataType::BigInt, false),
1854            ColumnSchema::new("table_name", DataType::Text, false),
1855            ColumnSchema::new("num_rows", DataType::BigInt, false),
1856            ColumnSchema::new("num_pages", DataType::BigInt, false),
1857            ColumnSchema::new("total_bytes", DataType::BigInt, false),
1858        ];
1859        // v6.7.0 — build a segment_id → table_name map by walking
1860        // every user table's BTree indices once. O(tables × indices
1861        // × keys) for the v6.5.0 carve-out resolution; acceptable
1862        // because spg_stat_segment is operator-facing (not on a
1863        // hot-loop path).
1864        let mut segment_owners: alloc::collections::BTreeMap<u32, String> = BTreeMap::new();
1865        for tname in self.catalog.table_names() {
1866            if is_internal_table_name(&tname) {
1867                continue;
1868            }
1869            let Some(t) = self.catalog.get(&tname) else {
1870                continue;
1871            };
1872            for idx in t.indices() {
1873                if let spg_storage::IndexKind::BTree(map) = &idx.kind {
1874                    for (_, locs) in map.iter() {
1875                        for loc in locs {
1876                            if let spg_storage::RowLocator::Cold { segment_id, .. } = loc {
1877                                segment_owners
1878                                    .entry(*segment_id)
1879                                    .or_insert_with(|| tname.clone());
1880                            }
1881                        }
1882                    }
1883                }
1884            }
1885        }
1886        let rows: Vec<Row> = self
1887            .catalog
1888            .cold_segment_ids_global()
1889            .iter()
1890            .filter_map(|&id| {
1891                let seg = self.catalog.cold_segment(id)?;
1892                let meta = seg.meta();
1893                let owner = segment_owners.get(&id).cloned().unwrap_or_default();
1894                Some(Row::new(alloc::vec![
1895                    Value::BigInt(i64::from(id)),
1896                    Value::Text(owner),
1897                    Value::BigInt(i64::try_from(meta.num_rows).unwrap_or(i64::MAX)),
1898                    Value::BigInt(i64::from(meta.num_pages)),
1899                    Value::BigInt(i64::try_from(meta.total_bytes).unwrap_or(i64::MAX)),
1900                ]))
1901            })
1902            .collect();
1903        QueryResult::Rows { columns, rows }
1904    }
1905
1906    /// v6.5.1 — materialise `spg_stat_query` rows. One row per
1907    /// distinct SQL text recorded since the engine booted, capped
1908    /// at `QUERY_STATS_MAX` (1024). Columns:
1909    ///   sql, exec_count, total_us, mean_us, max_us, last_seen_us
1910    /// mean_us = total_us / exec_count (saturating).
1911    fn exec_spg_stat_query(&self) -> QueryResult {
1912        let columns = alloc::vec![
1913            ColumnSchema::new("sql", DataType::Text, false),
1914            ColumnSchema::new("exec_count", DataType::BigInt, false),
1915            ColumnSchema::new("total_us", DataType::BigInt, false),
1916            ColumnSchema::new("mean_us", DataType::BigInt, false),
1917            ColumnSchema::new("max_us", DataType::BigInt, false),
1918            ColumnSchema::new("last_seen_us", DataType::BigInt, false),
1919        ];
1920        let rows: Vec<Row> = self
1921            .query_stats
1922            .snapshot()
1923            .into_iter()
1924            .map(|(sql, s)| {
1925                let mean = if s.exec_count == 0 {
1926                    0
1927                } else {
1928                    s.total_us / s.exec_count
1929                };
1930                Row::new(alloc::vec![
1931                    Value::Text(sql),
1932                    Value::BigInt(i64::try_from(s.exec_count).unwrap_or(i64::MAX)),
1933                    Value::BigInt(i64::try_from(s.total_us).unwrap_or(i64::MAX)),
1934                    Value::BigInt(i64::try_from(mean).unwrap_or(i64::MAX)),
1935                    Value::BigInt(i64::try_from(s.max_us).unwrap_or(i64::MAX)),
1936                    Value::BigInt(i64::try_from(s.last_seen_us).unwrap_or(i64::MAX)),
1937                ])
1938            })
1939            .collect();
1940        QueryResult::Rows { columns, rows }
1941    }
1942
1943    /// v6.5.2 — register a connection-state provider. spg-server
1944    /// calls this at startup with a function that snapshots its
1945    /// per-pgwire-connection registry. Engine reads through the
1946    /// callback on `SELECT * FROM spg_stat_activity`.
1947    #[must_use]
1948    pub const fn with_activity_provider(mut self, f: ActivityProvider) -> Self {
1949        self.activity_provider = Some(f);
1950        self
1951    }
1952
1953    /// v6.5.3 — register audit chain provider + verifier.
1954    #[must_use]
1955    pub const fn with_audit_providers(
1956        mut self,
1957        chain: AuditChainProvider,
1958        verify: AuditVerifier,
1959    ) -> Self {
1960        self.audit_chain_provider = Some(chain);
1961        self.audit_verifier = Some(verify);
1962        self
1963    }
1964
1965    /// v6.5.6 — register a slow-query log callback. `threshold_us`
1966    /// is the floor (in microseconds); only executes above the floor
1967    /// fire the callback. spg-server wires this from
1968    /// `SPG_SLOW_QUERY_THRESHOLD_MS` (default 100 ms).
1969    #[must_use]
1970    pub const fn with_slow_query_log(mut self, threshold_us: u64, logger: SlowQueryLogger) -> Self {
1971        self.slow_query_threshold_us = Some(threshold_us);
1972        self.slow_query_logger = Some(logger);
1973        self
1974    }
1975
1976    /// v6.5.6 — operator knob for plan cache cap. spg-server reads
1977    /// `SPG_PLAN_CACHE_MAX` env at startup; uses this to override
1978    /// the compile-time default of 256.
1979    pub fn set_plan_cache_max(&mut self, n: usize) {
1980        self.plan_cache.set_max_entries(n);
1981    }
1982
1983    /// v6.5.2 — materialise `spg_stat_activity` rows. Pulls a fresh
1984    /// snapshot from the registered `ActivityProvider`. Returns an
1985    /// empty result set when no provider is registered (the no_std
1986    /// embedded path with no pgwire layer).
1987    fn exec_spg_stat_activity(&self) -> QueryResult {
1988        let columns = alloc::vec![
1989            ColumnSchema::new("pid", DataType::Int, false),
1990            ColumnSchema::new("user", DataType::Text, false),
1991            ColumnSchema::new("started_at_us", DataType::BigInt, false),
1992            ColumnSchema::new("current_sql", DataType::Text, false),
1993            ColumnSchema::new("wait_event", DataType::Text, false),
1994            ColumnSchema::new("elapsed_us", DataType::BigInt, false),
1995            ColumnSchema::new("in_transaction", DataType::Bool, false),
1996        ];
1997        let rows: Vec<Row> = self
1998            .activity_provider
1999            .map(|f| f())
2000            .unwrap_or_default()
2001            .into_iter()
2002            .map(|r| {
2003                Row::new(alloc::vec![
2004                    Value::Int(i32::try_from(r.pid).unwrap_or(i32::MAX)),
2005                    Value::Text(r.user),
2006                    Value::BigInt(r.started_at_us),
2007                    Value::Text(r.current_sql),
2008                    Value::Text(r.wait_event),
2009                    Value::BigInt(r.elapsed_us),
2010                    Value::Bool(r.in_transaction),
2011                ])
2012            })
2013            .collect();
2014        QueryResult::Rows { columns, rows }
2015    }
2016
2017    /// v6.5.4 — materialise `spg_table_ddl` rows. One row per user
2018    /// table with `(table_name, ddl)`. Reconstructed from catalog
2019    /// state on demand.
2020    fn exec_spg_table_ddl(&self) -> QueryResult {
2021        let columns = alloc::vec![
2022            ColumnSchema::new("table_name", DataType::Text, false),
2023            ColumnSchema::new("ddl", DataType::Text, false),
2024        ];
2025        let rows: Vec<Row> = self
2026            .catalog
2027            .table_names()
2028            .into_iter()
2029            .filter(|n| !is_internal_table_name(n))
2030            .filter_map(|name| {
2031                let table = self.catalog.get(&name)?;
2032                let ddl = render_create_table(&name, &table.schema().columns);
2033                Some(Row::new(alloc::vec![Value::Text(name), Value::Text(ddl),]))
2034            })
2035            .collect();
2036        QueryResult::Rows { columns, rows }
2037    }
2038
2039    /// v6.5.4 — materialise `spg_role_ddl` rows. One row per user
2040    /// with `(role_name, ddl)`. Password is redacted (matches the
2041    /// `Statement::CreateUser` Display which prints `'<redacted>'`).
2042    fn exec_spg_role_ddl(&self) -> QueryResult {
2043        let columns = alloc::vec![
2044            ColumnSchema::new("role_name", DataType::Text, false),
2045            ColumnSchema::new("ddl", DataType::Text, false),
2046        ];
2047        let rows: Vec<Row> = self
2048            .users
2049            .iter()
2050            .map(|(name, rec)| {
2051                let ddl = alloc::format!(
2052                    "CREATE USER {name} WITH PASSWORD '<redacted>' ROLE '{}'",
2053                    rec.role.as_str(),
2054                );
2055                Row::new(alloc::vec![
2056                    Value::Text(String::from(name)),
2057                    Value::Text(ddl)
2058                ])
2059            })
2060            .collect();
2061        QueryResult::Rows { columns, rows }
2062    }
2063
2064    /// v6.5.4 — materialise `spg_database_ddl`: single row whose
2065    /// `ddl` column concatenates every user table's CREATE +
2066    /// every role's CREATE in deterministic catalog order. Suitable
2067    /// for piping back through `Engine::execute` to recreate a
2068    /// schema-equivalent database.
2069    fn exec_spg_database_ddl(&self) -> QueryResult {
2070        let columns = alloc::vec![ColumnSchema::new("ddl", DataType::Text, false)];
2071        let mut out = String::new();
2072        for (name, rec) in self.users.iter() {
2073            out.push_str(&alloc::format!(
2074                "CREATE USER {name} WITH PASSWORD '<redacted>' ROLE '{}';\n",
2075                rec.role.as_str(),
2076            ));
2077        }
2078        for name in self.catalog.table_names() {
2079            if is_internal_table_name(&name) {
2080                continue;
2081            }
2082            if let Some(table) = self.catalog.get(&name) {
2083                out.push_str(&render_create_table(&name, &table.schema().columns));
2084                out.push_str(";\n");
2085            }
2086        }
2087        QueryResult::Rows {
2088            columns,
2089            rows: alloc::vec![Row::new(alloc::vec![Value::Text(out)])],
2090        }
2091    }
2092
2093    /// v6.5.3 — materialise `spg_audit_chain` rows. Pulls a fresh
2094    /// snapshot from the registered provider; empty when no
2095    /// provider is set.
2096    fn exec_spg_audit_chain(&self) -> QueryResult {
2097        let columns = alloc::vec![
2098            ColumnSchema::new("seq", DataType::BigInt, false),
2099            ColumnSchema::new("ts_ms", DataType::BigInt, false),
2100            ColumnSchema::new("prev_hash", DataType::Text, false),
2101            ColumnSchema::new("entry_hash", DataType::Text, false),
2102            ColumnSchema::new("sql", DataType::Text, false),
2103        ];
2104        let rows: Vec<Row> = self
2105            .audit_chain_provider
2106            .map(|f| f())
2107            .unwrap_or_default()
2108            .into_iter()
2109            .map(|r| {
2110                Row::new(alloc::vec![
2111                    Value::BigInt(r.seq),
2112                    Value::BigInt(r.ts_ms),
2113                    Value::Text(r.prev_hash_hex),
2114                    Value::Text(r.entry_hash_hex),
2115                    Value::Text(r.sql),
2116                ])
2117            })
2118            .collect();
2119        QueryResult::Rows { columns, rows }
2120    }
2121
2122    /// v6.5.3 — materialise `spg_audit_verify` single-row result.
2123    /// `(verified_count, broken_at_seq)` — broken_at_seq is `-1`
2124    /// on a clean chain. Returns one row with both values 0 when
2125    /// no verifier is registered (no-data fallback for embedded
2126    /// callers).
2127    fn exec_spg_audit_verify(&self) -> QueryResult {
2128        let columns = alloc::vec![
2129            ColumnSchema::new("verified_count", DataType::BigInt, false),
2130            ColumnSchema::new("broken_at_seq", DataType::BigInt, false),
2131        ];
2132        let (verified, broken) = self.audit_verifier.map(|f| f()).unwrap_or((0, -1));
2133        let row = Row::new(alloc::vec![Value::BigInt(verified), Value::BigInt(broken),]);
2134        QueryResult::Rows {
2135            columns,
2136            rows: alloc::vec![row],
2137        }
2138    }
2139
2140    /// v6.5.1 — read-only accessor for tests + v6.5.6 ops resets.
2141    pub fn query_stats(&self) -> &query_stats::QueryStats {
2142        &self.query_stats
2143    }
2144
2145    /// v6.5.1 — mutable accessor (clear, etc).
2146    pub fn query_stats_mut(&mut self) -> &mut query_stats::QueryStats {
2147        &mut self.query_stats
2148    }
2149
2150    /// v6.2.0 — read access to the per-column statistics table.
2151    /// Used by the planner (v6.2.2 selectivity functions read this),
2152    /// by `SELECT * FROM spg_statistic`, and by e2e tests.
2153    pub const fn statistics(&self) -> &statistics::Statistics {
2154        &self.statistics
2155    }
2156
2157    /// v6.2.1 — return tables whose modified-row count crossed the
2158    /// auto-analyze threshold since the last ANALYZE on that table.
2159    /// The threshold is `0.1 × max(row_count, MIN_ROWS_FOR_AUTO_
2160    /// ANALYZE)` — combines PG-style fractional + absolute lower
2161    /// bound so a fresh / tiny table doesn't get hammered on every
2162    /// INSERT.
2163    ///
2164    /// Designed to be cheap: walks every user table's
2165    /// `Catalog::table_names()` + reads `statistics::modified_
2166    /// since_last_analyze()` (BTreeMap lookup). The background
2167    /// worker calls this under `engine.read()` then drops the lock
2168    /// before re-acquiring `engine.write()` for the actual ANALYZE.
2169    pub fn tables_needing_analyze(&self) -> Vec<String> {
2170        const MIN_ROWS: u64 = 100;
2171        let mut out = Vec::new();
2172        for name in self.catalog.table_names() {
2173            if is_internal_table_name(&name) {
2174                continue;
2175            }
2176            let Some(table) = self.catalog.get(&name) else {
2177                continue;
2178            };
2179            let row_count = table.rows().len() as u64;
2180            let modified = self.statistics.modified_since_last_analyze(&name);
2181            // Threshold: ceil(0.1 × max(row_count, MIN_ROWS)),
2182            // computed in integer arithmetic so spg-engine stays
2183            // no_std without pulling in libm. `(n + 9) / 10` is
2184            // `ceil(n / 10)` for non-negative `n`.
2185            let base = row_count.max(MIN_ROWS);
2186            let threshold = base.saturating_add(9) / 10;
2187            if modified >= threshold {
2188                out.push(name);
2189            }
2190        }
2191        out
2192    }
2193
2194    /// v6.2.0 — `ANALYZE [<table>]` runtime. Bare `ANALYZE` walks
2195    /// every user table; `ANALYZE <name>` re-stats one. For each
2196    /// target table, single-pass scan + per-column histogram +
2197    /// `null_frac` + `n_distinct`. Replaces the table's prior
2198    /// stats; resets the modified-row counter.
2199    ///
2200    /// v6.2.0 doesn't sample — it scans the full table. v6.2.x
2201    /// can add reservoir sampling at the > 100 K-row mark; not a
2202    /// scope blocker for the current commit since rows ≤ 100 K
2203    /// analyse in milliseconds.
2204    fn exec_analyze(&mut self, target: Option<&str>) -> Result<QueryResult, EngineError> {
2205        let names: Vec<String> = if let Some(name) = target {
2206            // Verify the table exists; surface a clear error if not.
2207            if self.catalog.get(name).is_none() {
2208                return Err(EngineError::Storage(StorageError::TableNotFound {
2209                    name: name.to_string(),
2210                }));
2211            }
2212            alloc::vec![name.to_string()]
2213        } else {
2214            self.catalog
2215                .table_names()
2216                .into_iter()
2217                .filter(|n| !is_internal_table_name(n))
2218                .collect()
2219        };
2220        let mut analysed = 0usize;
2221        for table_name in &names {
2222            self.analyze_one_table(table_name)?;
2223            analysed += 1;
2224        }
2225        // v6.3.1 — plan cache invalidation. Bump stats version so
2226        // future lookups see the new generation, and selectively
2227        // evict every plan whose `source_tables` overlap with the
2228        // ANALYZE target set. Bare ANALYZE (all tables) clears the
2229        // whole cache.
2230        if analysed > 0 {
2231            self.statistics.bump_version();
2232            if target.is_some() {
2233                for t in &names {
2234                    self.plan_cache.evict_referencing(t);
2235                }
2236            } else {
2237                self.plan_cache.clear();
2238            }
2239        }
2240        Ok(QueryResult::CommandOk {
2241            affected: analysed,
2242            modified_catalog: true,
2243        })
2244    }
2245
2246    /// v6.7.3 — `COMPACT COLD SEGMENTS` runtime path. Drives the
2247    /// engine-layer compaction shim with the default
2248    /// 4 MiB segment-size threshold. spg-server intercepts the
2249    /// SQL before it reaches the engine on a server build —
2250    /// it reads `SPG_COMPACTION_TARGET_SEGMENT_BYTES`, calls
2251    /// `Engine::compact_cold_segments_with_target` directly with
2252    /// the env value, and persists every merged segment to
2253    /// v7.12.1 — record a `SET <name> = <value>` parameter. Names
2254    /// are case-folded to lowercase to match PG; values keep their
2255    /// caller-supplied form so observability paths see what was
2256    /// requested. Only `default_text_search_config` is consulted by
2257    /// the engine today.
2258    fn set_session_param(&mut self, name: String, value: spg_sql::ast::SetValue) {
2259        let normalised = match value {
2260            spg_sql::ast::SetValue::String(s) => s,
2261            spg_sql::ast::SetValue::Ident(s) => s,
2262            spg_sql::ast::SetValue::Number(s) => s,
2263            spg_sql::ast::SetValue::Default => String::new(),
2264        };
2265        let key = name.to_ascii_lowercase();
2266        // v7.14.0 — mysqldump preamble emits
2267        // `SET FOREIGN_KEY_CHECKS=0` so it can CREATE TABLE in any
2268        // order despite cross-table FK references; the closing
2269        // section emits `SET FOREIGN_KEY_CHECKS=1` (or
2270        // `=@OLD_FOREIGN_KEY_CHECKS` which resolves to "ON" in our
2271        // session-variable-aware path). Match both shapes.
2272        // Also accept PG's `session_replication_role = 'replica'`
2273        // which suppresses trigger + FK enforcement during a
2274        // logical replication apply (pg_dump preserves this for
2275        // schema-only mode but it shows up in some restores).
2276        let value_off = matches!(
2277            normalised.to_ascii_lowercase().as_str(),
2278            "0" | "off" | "false"
2279        );
2280        let value_on = matches!(
2281            normalised.to_ascii_lowercase().as_str(),
2282            "1" | "on" | "true"
2283        );
2284        if key == "foreign_key_checks"
2285            || key == "session_replication_role" && normalised.eq_ignore_ascii_case("replica")
2286        {
2287            if value_off || key == "session_replication_role" {
2288                self.foreign_key_checks = false;
2289            } else if value_on
2290                || (key == "session_replication_role"
2291                    && normalised.eq_ignore_ascii_case("origin"))
2292            {
2293                self.foreign_key_checks = true;
2294                // Drain pending FK queue against the now-complete
2295                // catalog. Errors here surface as the SET reply —
2296                // caller knows enabling checks revealed orphans.
2297                let _ = self.drain_pending_foreign_keys();
2298            }
2299        }
2300        self.session_params.insert(key, normalised);
2301    }
2302
2303    /// v7.14.0 — resolve every queued FK whose installation was
2304    /// deferred (`SET FOREIGN_KEY_CHECKS=0` window). Called by
2305    /// `set_session_param` when checks flip back on and by the
2306    /// drop-import release gate. Each FK is resolved against the
2307    /// current catalog; remaining missing-parent errors propagate
2308    /// up so the caller knows the import was incomplete.
2309    fn drain_pending_foreign_keys(&mut self) -> Result<(), EngineError> {
2310        let pending = core::mem::take(&mut self.pending_foreign_keys);
2311        for (child, fk) in pending {
2312            // Resolve against the current catalog. Skip silently
2313            // when the child table itself was dropped between
2314            // queue + drain.
2315            let cols_snapshot = match self.active_catalog().get(&child) {
2316                Some(t) => t.schema().columns.clone(),
2317                None => continue,
2318            };
2319            let storage_fk = resolve_foreign_key(&child, &cols_snapshot, fk, self.active_catalog())?;
2320            let table = self
2321                .active_catalog_mut()
2322                .get_mut(&child)
2323                .expect("checked above");
2324            table.schema_mut().foreign_keys.push(storage_fk);
2325        }
2326        Ok(())
2327    }
2328
2329    /// v7.12.1 — read a session parameter set via `SET`. Used by
2330    /// the FTS function dispatcher to resolve the default config
2331    /// for `to_tsvector(text)` / `plainto_tsquery(text)` etc.
2332    #[must_use]
2333    pub fn session_param(&self, name: &str) -> Option<&str> {
2334        self.session_params
2335            .get(&name.to_ascii_lowercase())
2336            .map(String::as_str)
2337    }
2338
2339    /// v7.12.1 — build an `EvalContext` chained with the session's
2340    /// `default_text_search_config`. Engine-internal callers use
2341    /// this instead of `EvalContext::new` so the FTS function
2342    /// dispatcher sees the SET configuration.
2343    fn ev_ctx<'a>(
2344        &'a self,
2345        columns: &'a [ColumnSchema],
2346        alias: Option<&'a str>,
2347    ) -> EvalContext<'a> {
2348        EvalContext::new(columns, alias)
2349            .with_default_text_search_config(self.session_param("default_text_search_config"))
2350    }
2351
2352    /// `<db>.spg/segments/`. This arm only fires for engine-only
2353    /// callers (spg-embedded, lib tests); in that mode merged
2354    /// segments live in memory and are dropped at process exit.
2355    fn exec_compact_cold_segments(&mut self) -> Result<QueryResult, EngineError> {
2356        let target = COMPACTION_TARGET_DEFAULT_BYTES;
2357        let reports = self.compact_cold_segments_with_target(target)?;
2358        let columns = alloc::vec![
2359            ColumnSchema::new("table_name", DataType::Text, false),
2360            ColumnSchema::new("index_name", DataType::Text, false),
2361            ColumnSchema::new("sources_merged", DataType::BigInt, false),
2362            ColumnSchema::new("merged_segment_id", DataType::BigInt, false),
2363            ColumnSchema::new("merged_rows", DataType::BigInt, false),
2364            ColumnSchema::new("deleted_rows_pruned", DataType::BigInt, false),
2365            ColumnSchema::new("bytes_reclaimed_estimate", DataType::BigInt, false),
2366        ];
2367        let rows: Vec<Row> = reports
2368            .into_iter()
2369            .map(|(tname, iname, report)| {
2370                Row::new(alloc::vec![
2371                    Value::Text(tname),
2372                    Value::Text(iname),
2373                    Value::BigInt(i64::try_from(report.sources.len()).unwrap_or(i64::MAX)),
2374                    Value::BigInt(i64::from(report.merged_segment_id.unwrap_or(0))),
2375                    Value::BigInt(i64::try_from(report.merged_rows).unwrap_or(i64::MAX)),
2376                    Value::BigInt(i64::try_from(report.deleted_rows_pruned).unwrap_or(i64::MAX),),
2377                    Value::BigInt(
2378                        i64::try_from(report.bytes_reclaimed_estimate).unwrap_or(i64::MAX),
2379                    ),
2380                ])
2381            })
2382            .collect();
2383        Ok(QueryResult::Rows { columns, rows })
2384    }
2385
2386    /// Walk a single table's rows once and (re-)populate per-column
2387    /// stats. Drops the existing stats for `table` first so columns
2388    /// that have been DROP-ed between ANALYZEs don't leave stale
2389    /// rows.
2390    fn analyze_one_table(&mut self, table_name: &str) -> Result<(), EngineError> {
2391        let table = self.catalog.get(table_name).ok_or_else(|| {
2392            EngineError::Storage(StorageError::TableNotFound {
2393                name: table_name.to_string(),
2394            })
2395        })?;
2396        let schema = table.schema().clone();
2397        let row_count = table.rows().len();
2398        // For each column, collect (sorted) non-NULL textual values
2399        // + count NULLs; then ask `statistics::build_histogram` to
2400        // produce the 101 bounds and `estimate_n_distinct` the
2401        // distinct count.
2402        self.statistics.clear_table(table_name);
2403        for (col_pos, col_schema) in schema.columns.iter().enumerate() {
2404            // v6.2.0 skip: vector columns have their own stats
2405            // shape (HNSW graph topology). v6.2 deliberation #1.
2406            if matches!(col_schema.ty, DataType::Vector { .. }) {
2407                continue;
2408            }
2409            let mut non_null_values: Vec<Value> = Vec::with_capacity(row_count);
2410            let mut nulls: u64 = 0;
2411            for row in table.rows() {
2412                match row.values.get(col_pos) {
2413                    Some(Value::Null) | None => nulls += 1,
2414                    Some(v) => non_null_values.push(v.clone()),
2415                }
2416            }
2417            // Sort by type-aware ordering (Int as int, Text as
2418            // lex, etc.) so histogram bounds reflect the column's
2419            // natural order — not lexicographic on the string
2420            // representation, which would put "9" after "49".
2421            non_null_values.sort_by(|a, b| sort_values_for_histogram(a, b));
2422            let non_null: Vec<String> = non_null_values.iter().map(canonical_value_repr).collect();
2423            let null_frac = if row_count == 0 {
2424                0.0
2425            } else {
2426                #[allow(clippy::cast_precision_loss)]
2427                let f = nulls as f32 / row_count as f32;
2428                f
2429            };
2430            let n_distinct = statistics::estimate_n_distinct(&non_null);
2431            let histogram_bounds = statistics::build_histogram(&non_null);
2432            self.statistics.set(
2433                table_name.to_string(),
2434                col_schema.name.clone(),
2435                statistics::ColumnStats {
2436                    null_frac,
2437                    n_distinct,
2438                    histogram_bounds,
2439                },
2440            );
2441        }
2442        self.statistics.reset_modified(table_name);
2443        // v6.7.0 — refresh the per-table cold_rows cache. Walk the
2444        // BTree indices and count Cold locators (MAX across
2445        // indices); store the result on the table. Surfaced via
2446        // `spg_statistic.cold_row_count` (new column) and
2447        // `spg_stat_segment.table_name` (new column).
2448        let cold_count = {
2449            let table = self
2450                .active_catalog()
2451                .get(table_name)
2452                .expect("table still present");
2453            table.count_cold_locators()
2454        };
2455        let table_mut = self
2456            .active_catalog_mut()
2457            .get_mut(table_name)
2458            .expect("table still present");
2459        table_mut.set_cold_row_count(cold_count);
2460        Ok(())
2461    }
2462
2463    /// v6.1.3 — `SHOW PUBLICATIONS` row materialisation. Returns
2464    /// `(name, scope, table_count)` ordered by publication name.
2465    ///   - `scope` is the human-readable string:
2466    ///       `"FOR ALL TABLES"` /
2467    ///       `"FOR TABLE t1, t2"` /
2468    ///       `"FOR ALL TABLES EXCEPT t1, t2"`.
2469    ///   - `table_count` is NULL for `AllTables`, the list length
2470    ///     otherwise. NULLability lets clients distinguish "publish
2471    ///     everything" from "publish exactly 0 tables" (the v6.1.3
2472    ///     parser forbids the empty list, but the column shape is
2473    ///     ready for the v6.1.5 publisher-side semantics).
2474    fn exec_show_publications(&self) -> QueryResult {
2475        let columns = alloc::vec![
2476            ColumnSchema::new("name", DataType::Text, false),
2477            ColumnSchema::new("scope", DataType::Text, false),
2478            ColumnSchema::new("table_count", DataType::Int, true),
2479        ];
2480        let rows: Vec<Row> = self
2481            .publications
2482            .iter()
2483            .map(|(name, scope)| {
2484                let (scope_str, count_val) = match scope {
2485                    spg_sql::ast::PublicationScope::AllTables => {
2486                        ("FOR ALL TABLES".to_string(), Value::Null)
2487                    }
2488                    spg_sql::ast::PublicationScope::ForTables(ts) => (
2489                        alloc::format!("FOR TABLE {}", ts.join(", ")),
2490                        Value::Int(i32::try_from(ts.len()).unwrap_or(i32::MAX)),
2491                    ),
2492                    spg_sql::ast::PublicationScope::AllTablesExcept(ts) => (
2493                        alloc::format!("FOR ALL TABLES EXCEPT {}", ts.join(", ")),
2494                        Value::Int(i32::try_from(ts.len()).unwrap_or(i32::MAX)),
2495                    ),
2496                };
2497                Row::new(alloc::vec![
2498                    Value::Text(name.clone()),
2499                    Value::Text(scope_str),
2500                    count_val,
2501                ])
2502            })
2503            .collect();
2504        QueryResult::Rows { columns, rows }
2505    }
2506
2507    /// v4.1 `SHOW USERS` — `(name, role)` per row, ordered by name.
2508    fn exec_show_users(&self) -> QueryResult {
2509        let columns = alloc::vec![
2510            ColumnSchema::new("name", DataType::Text, false),
2511            ColumnSchema::new("role", DataType::Text, false),
2512        ];
2513        let rows: Vec<Row> = self
2514            .users
2515            .iter()
2516            .map(|(name, rec)| {
2517                Row::new(alloc::vec![
2518                    Value::Text(name.to_string()),
2519                    Value::Text(rec.role.as_str().to_string()),
2520                ])
2521            })
2522            .collect();
2523        QueryResult::Rows { columns, rows }
2524    }
2525
2526    fn exec_create_user(&mut self, s: &CreateUserStatement) -> Result<QueryResult, EngineError> {
2527        if self.in_transaction() {
2528            return Err(EngineError::Unsupported(
2529                "CREATE USER is not allowed inside a transaction".into(),
2530            ));
2531        }
2532        let role = users::Role::parse(&s.role).ok_or_else(|| {
2533            EngineError::Unsupported(alloc::format!("invalid role: {:?}", s.role))
2534        })?;
2535        // Prefer the host-injected RNG. Falls back to a deterministic
2536        // salt derived from the username only when no RNG is wired —
2537        // acceptable for tests; the server always installs one.
2538        let salt = self.salt_fn.map_or_else(
2539            || {
2540                let mut s_bytes = [0u8; 16];
2541                let digest = spg_crypto::hash(s.name.as_bytes());
2542                s_bytes.copy_from_slice(&digest[..16]);
2543                s_bytes
2544            },
2545            |f| f(),
2546        );
2547        self.users
2548            .create(&s.name, &s.password, role, salt)
2549            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE USER: {e}")))?;
2550        Ok(QueryResult::CommandOk {
2551            affected: 1,
2552            modified_catalog: true,
2553        })
2554    }
2555
2556    fn exec_drop_user(&mut self, name: &str) -> Result<QueryResult, EngineError> {
2557        if self.in_transaction() {
2558            return Err(EngineError::Unsupported(
2559                "DROP USER is not allowed inside a transaction".into(),
2560            ));
2561        }
2562        self.users
2563            .drop(name)
2564            .map_err(|e| EngineError::Unsupported(alloc::format!("DROP USER: {e}")))?;
2565        Ok(QueryResult::CommandOk {
2566            affected: 1,
2567            modified_catalog: true,
2568        })
2569    }
2570
2571    /// v7.12.4 — `CREATE [OR REPLACE] FUNCTION`. Stores the
2572    /// function metadata in the catalog. PL/pgSQL bodies are
2573    /// already parsed by the SQL parser; we re-canonicalise the
2574    /// body to source text for storage (the executor re-parses
2575    /// it at trigger fire time — see the trigger fire path).
2576    fn exec_create_function(
2577        &mut self,
2578        s: spg_sql::ast::CreateFunctionStatement,
2579    ) -> Result<QueryResult, EngineError> {
2580        let args_repr = render_function_args(&s.args);
2581        let returns = match &s.returns {
2582            spg_sql::ast::FunctionReturn::Trigger => alloc::string::String::from("TRIGGER"),
2583            spg_sql::ast::FunctionReturn::Void => alloc::string::String::from("VOID"),
2584            spg_sql::ast::FunctionReturn::Type(t) => alloc::format!("{t}"),
2585            spg_sql::ast::FunctionReturn::Other(s) => s.clone(),
2586        };
2587        let body_text = match &s.body {
2588            spg_sql::ast::FunctionBody::PlPgSql(b) => alloc::format!("{b}"),
2589            spg_sql::ast::FunctionBody::Raw(s) => s.clone(),
2590        };
2591        let def = spg_storage::FunctionDef {
2592            name: s.name.clone(),
2593            args_repr,
2594            returns,
2595            language: s.language.clone(),
2596            body: body_text,
2597        };
2598        self.active_catalog_mut()
2599            .create_function(def, s.or_replace)
2600            .map_err(EngineError::Storage)?;
2601        Ok(QueryResult::CommandOk {
2602            affected: 0,
2603            modified_catalog: true,
2604        })
2605    }
2606
2607    /// v7.12.4 — `CREATE [OR REPLACE] TRIGGER`. The referenced
2608    /// function must already exist in the catalog (forward
2609    /// references defer to a later release). Persists the
2610    /// trigger metadata for the row-write hooks below to consult.
2611    fn exec_create_trigger(
2612        &mut self,
2613        s: spg_sql::ast::CreateTriggerStatement,
2614    ) -> Result<QueryResult, EngineError> {
2615        let timing = match s.timing {
2616            spg_sql::ast::TriggerTiming::Before => "BEFORE",
2617            spg_sql::ast::TriggerTiming::After => "AFTER",
2618            spg_sql::ast::TriggerTiming::InsteadOf => "INSTEAD OF",
2619        };
2620        let events: Vec<alloc::string::String> = s
2621            .events
2622            .iter()
2623            .map(|e| match e {
2624                spg_sql::ast::TriggerEvent::Insert => alloc::string::String::from("INSERT"),
2625                spg_sql::ast::TriggerEvent::Update => alloc::string::String::from("UPDATE"),
2626                spg_sql::ast::TriggerEvent::Delete => alloc::string::String::from("DELETE"),
2627                spg_sql::ast::TriggerEvent::Truncate => alloc::string::String::from("TRUNCATE"),
2628            })
2629            .collect();
2630        let for_each = match s.for_each {
2631            spg_sql::ast::TriggerForEach::Row => "ROW",
2632            spg_sql::ast::TriggerForEach::Statement => "STATEMENT",
2633        };
2634        let def = spg_storage::TriggerDef {
2635            name: s.name.clone(),
2636            table: s.table.clone(),
2637            timing: alloc::string::String::from(timing),
2638            events,
2639            for_each: alloc::string::String::from(for_each),
2640            function: s.function.clone(),
2641            update_columns: s.update_columns.clone(),
2642        };
2643        self.active_catalog_mut()
2644            .create_trigger(def, s.or_replace)
2645            .map_err(EngineError::Storage)?;
2646        Ok(QueryResult::CommandOk {
2647            affected: 0,
2648            modified_catalog: true,
2649        })
2650    }
2651
2652    fn exec_drop_trigger(
2653        &mut self,
2654        name: &str,
2655        table: &str,
2656        if_exists: bool,
2657    ) -> Result<QueryResult, EngineError> {
2658        let removed = self.active_catalog_mut().drop_trigger(name, table);
2659        if !removed && !if_exists {
2660            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
2661                alloc::format!("trigger {name:?} on {table:?} does not exist"),
2662            )));
2663        }
2664        Ok(QueryResult::CommandOk {
2665            affected: usize::from(removed),
2666            modified_catalog: removed,
2667        })
2668    }
2669
2670    fn exec_drop_function(
2671        &mut self,
2672        name: &str,
2673        if_exists: bool,
2674    ) -> Result<QueryResult, EngineError> {
2675        let removed = self.active_catalog_mut().drop_function(name);
2676        if !removed && !if_exists {
2677            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
2678                alloc::format!("function {name:?} does not exist"),
2679            )));
2680        }
2681        Ok(QueryResult::CommandOk {
2682            affected: usize::from(removed),
2683            modified_catalog: removed,
2684        })
2685    }
2686
2687    /// v4.4 `UPDATE <table> SET col = expr [, ...] [WHERE cond]`.
2688    /// Filter pass uses the same WHERE eval as `exec_select`. Per
2689    /// matched row, evaluate each RHS expression against the *old*
2690    /// row, then call `Table::update_row` which rebuilds indices.
2691    /// Indexed columns are correctly reflected because rebuild
2692    /// happens after the cell rewrite.
2693    fn exec_update_cancel(
2694        &mut self,
2695        stmt: &spg_sql::ast::UpdateStatement,
2696        cancel: CancelToken<'_>,
2697    ) -> Result<QueryResult, EngineError> {
2698        // v7.12.5 — snapshot BEFORE/AFTER UPDATE row triggers + the
2699        // session FTS config before the table mut-borrow opens (the
2700        // INSERT path uses the same pattern). Empty vecs are the
2701        // common "no triggers on this table" fast path.
2702        // v7.13.0 — UPDATE triggers carry an optional `UPDATE OF
2703        // cols` filter. The filter is paired with each function so
2704        // the per-row fire loop can skip when no listed column
2705        // actually differs between OLD and NEW.
2706        let before_update_triggers = self.snapshot_update_row_triggers(&stmt.table, "BEFORE");
2707        let after_update_triggers = self.snapshot_update_row_triggers(&stmt.table, "AFTER");
2708        let trigger_session_cfg: Option<String> = self
2709            .session_params
2710            .get("default_text_search_config")
2711            .cloned();
2712        // v5.2.3: if the WHERE is a PK equality and matches a cold-
2713        // tier row, promote it back to the hot tier *before* the
2714        // hot-row walk. The promote pushes the row to the end of
2715        // `table.rows`, where the upcoming SET-evaluation loop will
2716        // pick it up and apply the assignments. Lookups for the key
2717        // never observe a gap because `promote_cold_row` inserts the
2718        // hot row before retiring the cold locator.
2719        if let Some(w) = &stmt.where_ {
2720            let schema_cols = self
2721                .active_catalog()
2722                .get(&stmt.table)
2723                .ok_or_else(|| {
2724                    EngineError::Storage(StorageError::TableNotFound {
2725                        name: stmt.table.clone(),
2726                    })
2727                })?
2728                .schema()
2729                .columns
2730                .clone();
2731            if let Some((col_pos, key)) = try_pk_predicate(w, &schema_cols, stmt.table.as_str())
2732                && let Some(idx_name) = self
2733                    .active_catalog()
2734                    .get(&stmt.table)
2735                    .and_then(|t| t.index_on(col_pos).map(|i| i.name.clone()))
2736            {
2737                // Promote may be a no-op (key is hot-only or absent);
2738                // we don't care about the return value here — the
2739                // subsequent hot walk will either match or not.
2740                let _ = self
2741                    .active_catalog_mut()
2742                    .promote_cold_row(&stmt.table, &idx_name, &key);
2743            }
2744        }
2745
2746        // v7.12.1 — cache session FTS config before the table
2747        // mut-borrow (same reason as exec_delete).
2748        let ts_cfg: Option<String> = self
2749            .session_param("default_text_search_config")
2750            .map(String::from);
2751        let table = self
2752            .active_catalog_mut()
2753            .get_mut(&stmt.table)
2754            .ok_or_else(|| {
2755                EngineError::Storage(StorageError::TableNotFound {
2756                    name: stmt.table.clone(),
2757                })
2758            })?;
2759        let schema_cols: Vec<ColumnSchema> = table.schema().columns.clone();
2760        // Resolve each SET target to a column position once, validate
2761        // up front so a typo'd column doesn't leave a partial mutation
2762        // behind.
2763        let mut targets: Vec<(usize, &Expr)> = Vec::with_capacity(stmt.assignments.len());
2764        for (col, expr) in &stmt.assignments {
2765            let pos = schema_cols
2766                .iter()
2767                .position(|c| c.name == *col)
2768                .ok_or_else(|| {
2769                    EngineError::Eval(EvalError::ColumnNotFound { name: col.clone() })
2770                })?;
2771            targets.push((pos, expr));
2772        }
2773        let ctx = EvalContext::new(&schema_cols, Some(stmt.table.as_str()))
2774            .with_default_text_search_config(ts_cfg.as_deref());
2775        // Walk every row, evaluate WHERE then SET expressions. We
2776        // gather (position, new_values) tuples first and apply them
2777        // afterwards so the WHERE/RHS evaluation reads the original
2778        // row state — matches PG semantics (UPDATE doesn't see its
2779        // own writes).
2780        let mut planned: Vec<(usize, Vec<Value>)> = Vec::new();
2781        for (i, row) in table.rows().iter().enumerate() {
2782            // v4.5: cooperative cancel checkpoint every 256 rows so
2783            // a runaway UPDATE without WHERE doesn't drag past the
2784            // server's query-timeout watchdog.
2785            if i.is_multiple_of(256) {
2786                cancel.check()?;
2787            }
2788            if let Some(w) = &stmt.where_ {
2789                let cond = eval::eval_expr(w, row, &ctx)?;
2790                if !matches!(cond, Value::Bool(true)) {
2791                    continue;
2792                }
2793            }
2794            let mut new_vals = row.values.clone();
2795            for (pos, expr) in &targets {
2796                let v = eval::eval_expr(expr, row, &ctx)?;
2797                new_vals[*pos] =
2798                    coerce_value(v, schema_cols[*pos].ty, &schema_cols[*pos].name, *pos)?;
2799            }
2800            planned.push((i, new_vals));
2801        }
2802        // v7.6.6 — capture pre-update row values for the FK
2803        // enforcement passes below. `planned` carries new values
2804        // only; pair them with the old row.
2805        let plan_with_old: Vec<(usize, Vec<Value>, Vec<Value>)> = planned
2806            .iter()
2807            .map(|(pos, new_vals)| (*pos, table.rows()[*pos].values.clone(), new_vals.clone()))
2808            .collect();
2809        let self_fks = table.schema().foreign_keys.clone();
2810        // v7.12.5 — `affected` is computed post-BEFORE-trigger
2811        // below (triggers may RETURN NULL to skip individual
2812        // rows). The pre-trigger len shape is no longer accurate.
2813        // Release mutable borrow on `table` for the FK passes.
2814        let _ = table;
2815        // v7.6.6 — Stage 2a: outbound FK check. For every row whose
2816        // local FK columns changed, the new value must exist in the
2817        // parent.
2818        if !self_fks.is_empty() {
2819            let new_rows: Vec<Vec<Value>> = planned
2820                .iter()
2821                .map(|(_pos, new_vals)| new_vals.clone())
2822                .collect();
2823            enforce_fk_inserts(self.active_catalog(), &stmt.table, &self_fks, &new_rows)?;
2824        }
2825        // v7.13.0 — CHECK constraint enforcement on UPDATE
2826        // (mailrs round-5 G3). Predicates evaluated against the
2827        // candidate post-UPDATE row; false rejects the UPDATE.
2828        {
2829            let new_rows: Vec<Vec<Value>> = planned
2830                .iter()
2831                .map(|(_pos, new_vals)| new_vals.clone())
2832                .collect();
2833            enforce_check_constraints(self.active_catalog(), &stmt.table, &new_rows)?;
2834        }
2835        // v7.6.6 — Stage 2b: inbound FK check. For every row that
2836        // changed value in a column that *some other table* uses as
2837        // a FK parent column, react per `on_update` action.
2838        let child_plan =
2839            plan_fk_parent_updates(self.active_catalog(), &stmt.table, &plan_with_old)?;
2840        // Stage 3a — apply each child-side action.
2841        for step in &child_plan {
2842            apply_fk_child_step(self.active_catalog_mut(), step)?;
2843        }
2844        // Stage 3b — apply the original UPDATE.
2845        let table = self
2846            .active_catalog_mut()
2847            .get_mut(&stmt.table)
2848            .ok_or_else(|| {
2849                EngineError::Storage(StorageError::TableNotFound {
2850                    name: stmt.table.clone(),
2851                })
2852            })?;
2853        // v7.12.5 — fire BEFORE/AFTER UPDATE row-level triggers
2854        // around the apply loop. BEFORE sees NEW=candidate +
2855        // OLD=current; may rewrite NEW or RETURN NULL to skip.
2856        // AFTER sees NEW=post-write + OLD=pre-write (both read-
2857        // only).
2858        //
2859        // Filter `planned` through the BEFORE pass first so the
2860        // RETURNING snapshot reflects what actually got written
2861        // (triggers may rewrite cells, including a cancellation).
2862        let mut applied_after_before: Vec<(usize, Row, Row)> = Vec::with_capacity(planned.len());
2863        // v7.12.7 — embedded SQL queue.
2864        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
2865        for (pos, new_vals) in &planned {
2866            let old_row = table.rows()[*pos].clone();
2867            let mut new_row = Row::new(new_vals.clone());
2868            let mut skip = false;
2869            for (fd, filter) in &before_update_triggers {
2870                // v7.13.0 — `UPDATE OF cols` filter (mailrs round-5
2871                // G7). Skip this trigger when the filter is set and
2872                // no listed column actually differs between OLD and
2873                // NEW for this row.
2874                if !filter.is_empty()
2875                    && !any_column_changed(filter, &schema_cols, &old_row, &new_row)
2876                {
2877                    continue;
2878                }
2879                let (outcome, deferred) = triggers::fire_row_trigger(
2880                    fd,
2881                    Some(new_row.clone()),
2882                    Some(&old_row),
2883                    &stmt.table,
2884                    &schema_cols,
2885                    &[],
2886                    trigger_session_cfg.as_deref(),
2887                    false,
2888                )
2889                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
2890                deferred_embedded.extend(deferred);
2891                match outcome {
2892                    triggers::TriggerOutcome::Row(r) => new_row = r,
2893                    triggers::TriggerOutcome::Skip => {
2894                        skip = true;
2895                        break;
2896                    }
2897                }
2898            }
2899            if !skip {
2900                applied_after_before.push((*pos, new_row, old_row));
2901            }
2902        }
2903        // v7.9.4 — snapshot post-update values for RETURNING (post-
2904        // BEFORE-trigger because triggers can rewrite cells).
2905        let updated_for_returning: Vec<Vec<Value>> = if stmt.returning.is_some() {
2906            applied_after_before
2907                .iter()
2908                .map(|(_pos, new_row, _old)| new_row.values.clone())
2909                .collect()
2910        } else {
2911            Vec::new()
2912        };
2913        let affected = applied_after_before.len();
2914        // Apply, then fire AFTER triggers per row. AFTER runs read-
2915        // only against the freshly-written row; v7.12.4-shape
2916        // assignment errors with a clear message.
2917        for (pos, new_row, old_row) in applied_after_before {
2918            table.update_row(pos, new_row.values.clone())?;
2919            for (fd, filter) in &after_update_triggers {
2920                if !filter.is_empty()
2921                    && !any_column_changed(filter, &schema_cols, &old_row, &new_row)
2922                {
2923                    continue;
2924                }
2925                let (_outcome, deferred) = triggers::fire_row_trigger(
2926                    fd,
2927                    Some(new_row.clone()),
2928                    Some(&old_row),
2929                    &stmt.table,
2930                    &schema_cols,
2931                    &[],
2932                    trigger_session_cfg.as_deref(),
2933                    true,
2934                )
2935                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
2936                deferred_embedded.extend(deferred);
2937            }
2938        }
2939        let _ = table;
2940        // v7.12.7 — drain trigger-emitted embedded SQL for this UPDATE.
2941        self.execute_deferred_trigger_stmts(deferred_embedded, cancel)?;
2942        // v6.2.1 — auto-analyze modified-row tracking for UPDATE.
2943        if !self.in_transaction() && affected > 0 {
2944            self.statistics
2945                .record_modifications(&stmt.table, affected as u64);
2946        }
2947        // v7.9.4 — RETURNING projection.
2948        if let Some(items) = &stmt.returning {
2949            return self.build_returning_rows(&stmt.table, items, updated_for_returning);
2950        }
2951        Ok(QueryResult::CommandOk {
2952            affected,
2953            modified_catalog: !self.in_transaction(),
2954        })
2955    }
2956
2957    /// v4.4 `DELETE FROM <table> [WHERE cond]`. Collects matching
2958    /// positions then delegates to `Table::delete_rows` (single index
2959    /// rebuild for the batch).
2960    fn exec_delete_cancel(
2961        &mut self,
2962        stmt: &spg_sql::ast::DeleteStatement,
2963        cancel: CancelToken<'_>,
2964    ) -> Result<QueryResult, EngineError> {
2965        // v7.12.5 — snapshot BEFORE/AFTER DELETE row triggers + the
2966        // session FTS config before the mut borrow (same shape as
2967        // INSERT / UPDATE).
2968        let before_delete_triggers = self.snapshot_row_triggers(&stmt.table, "DELETE", "BEFORE");
2969        let after_delete_triggers = self.snapshot_row_triggers(&stmt.table, "DELETE", "AFTER");
2970        let trigger_session_cfg: Option<String> = self
2971            .session_params
2972            .get("default_text_search_config")
2973            .cloned();
2974        // v5.2.3: PK-targeted DELETE → first retire any cold-tier
2975        // locator for the key. The cold row body stays in the
2976        // segment (becoming shadowed garbage that a future
2977        // compaction pass reclaims) but the index no longer
2978        // resolves it. The shadow count contributes to the
2979        // affected total; the subsequent hot walk handles any hot
2980        // rows for the same key.
2981        let mut cold_shadow_count: usize = 0;
2982        if let Some(w) = &stmt.where_ {
2983            let schema_cols = self
2984                .active_catalog()
2985                .get(&stmt.table)
2986                .ok_or_else(|| {
2987                    EngineError::Storage(StorageError::TableNotFound {
2988                        name: stmt.table.clone(),
2989                    })
2990                })?
2991                .schema()
2992                .columns
2993                .clone();
2994            if let Some((col_pos, key)) = try_pk_predicate(w, &schema_cols, stmt.table.as_str())
2995                && let Some(idx_name) = self
2996                    .active_catalog()
2997                    .get(&stmt.table)
2998                    .and_then(|t| t.index_on(col_pos).map(|i| i.name.clone()))
2999            {
3000                cold_shadow_count = self
3001                    .active_catalog_mut()
3002                    .shadow_cold_row(&stmt.table, &idx_name, &key)
3003                    .unwrap_or(0);
3004            }
3005        }
3006
3007        // v7.12.1 — cache the session FTS config as an owned
3008        // String before the mutable table borrow below; the
3009        // ctx-builder then references it via `as_deref` so the
3010        // immutable read of `session_params` doesn't conflict
3011        // with the mut borrow chain.
3012        let ts_cfg: Option<String> = self
3013            .session_param("default_text_search_config")
3014            .map(String::from);
3015        let table = self
3016            .active_catalog_mut()
3017            .get_mut(&stmt.table)
3018            .ok_or_else(|| {
3019                EngineError::Storage(StorageError::TableNotFound {
3020                    name: stmt.table.clone(),
3021                })
3022            })?;
3023        let schema_cols: Vec<ColumnSchema> = table.schema().columns.clone();
3024        let ctx = EvalContext::new(&schema_cols, Some(stmt.table.as_str()))
3025            .with_default_text_search_config(ts_cfg.as_deref());
3026        let mut positions: Vec<usize> = Vec::new();
3027        // v7.6.3 — collect every to-delete row's full Value tuple
3028        // alongside its position, so the FK enforcement pass can
3029        // run after the mut borrow drops.
3030        let mut to_delete_rows: Vec<Vec<Value>> = Vec::new();
3031        for (i, row) in table.rows().iter().enumerate() {
3032            if i.is_multiple_of(256) {
3033                cancel.check()?;
3034            }
3035            let keep = if let Some(w) = &stmt.where_ {
3036                let cond = eval::eval_expr(w, row, &ctx)?;
3037                !matches!(cond, Value::Bool(true))
3038            } else {
3039                false
3040            };
3041            if !keep {
3042                positions.push(i);
3043                to_delete_rows.push(row.values.clone());
3044            }
3045        }
3046        // v7.6.3 / v7.6.4 — Stage 2: FK enforcement on the immutable
3047        // catalog. Release the mut borrow and run reverse-scan
3048        // against every child table whose FK targets this table.
3049        // RESTRICT / NoAction raise an error; CASCADE returns a
3050        // cascade plan that stage 3 applies after the primary delete.
3051        // SET NULL / SET DEFAULT remain Unsupported until v7.6.5.
3052        let _ = table;
3053        // v7.12.5 — BEFORE DELETE row-level triggers. Each fires
3054        // with NEW=None / OLD=pre-delete row; RETURN OLD (or NEW)
3055        // = proceed, RETURN NULL = skip the row entirely. The
3056        // filter must run BEFORE the FK cascade plan so cascaded
3057        // child rows track the trigger's skip-decision on the
3058        // parent.
3059        // v7.12.7 — embedded SQL queue.
3060        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
3061        if !before_delete_triggers.is_empty() {
3062            let mut filtered_positions: Vec<usize> = Vec::with_capacity(positions.len());
3063            let mut filtered_old_rows: Vec<Vec<Value>> = Vec::with_capacity(to_delete_rows.len());
3064            for (pos, old_vals) in positions.iter().zip(to_delete_rows.iter()) {
3065                let old_row = Row::new(old_vals.clone());
3066                let mut cancel_this = false;
3067                for fd in &before_delete_triggers {
3068                    let (outcome, deferred) = triggers::fire_row_trigger(
3069                        fd,
3070                        None,
3071                        Some(&old_row),
3072                        &stmt.table,
3073                        &schema_cols,
3074                        &[],
3075                        trigger_session_cfg.as_deref(),
3076                        false,
3077                    )
3078                    .map_err(|e| {
3079                        EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}")))
3080                    })?;
3081                    deferred_embedded.extend(deferred);
3082                    if matches!(outcome, triggers::TriggerOutcome::Skip) {
3083                        cancel_this = true;
3084                        break;
3085                    }
3086                }
3087                if !cancel_this {
3088                    filtered_positions.push(*pos);
3089                    filtered_old_rows.push(old_vals.clone());
3090                }
3091            }
3092            positions = filtered_positions;
3093            to_delete_rows = filtered_old_rows;
3094        }
3095        let cascade_plan = plan_fk_parent_deletions(
3096            self.active_catalog(),
3097            &stmt.table,
3098            &positions,
3099            &to_delete_rows,
3100        )?;
3101        // Stage 3a — apply each FK child step (SET NULL / SET
3102        // DEFAULT / CASCADE delete) before deleting the parent.
3103        // The plan is already ordered: nulls/defaults first, then
3104        // cascade deletes (so a row mutated and later deleted
3105        // surfaces as deleted — though v7.6.5 doesn't produce
3106        // that overlap today).
3107        for step in &cascade_plan {
3108            apply_fk_child_step(self.active_catalog_mut(), step)?;
3109        }
3110        // Stage 3b — actually delete the original target rows.
3111        let table = self
3112            .active_catalog_mut()
3113            .get_mut(&stmt.table)
3114            .ok_or_else(|| {
3115                EngineError::Storage(StorageError::TableNotFound {
3116                    name: stmt.table.clone(),
3117                })
3118            })?;
3119        let affected = table.delete_rows(&positions) + cold_shadow_count;
3120        let _ = table;
3121        // v7.12.5 — AFTER DELETE row-level triggers fire post-write
3122        // with NEW=None / OLD=pre-delete row (each from the
3123        // already-snapshotted to_delete_rows). Return value is
3124        // ignored (matches PG AFTER semantics).
3125        if !after_delete_triggers.is_empty() {
3126            for old_vals in &to_delete_rows {
3127                let old_row = Row::new(old_vals.clone());
3128                for fd in &after_delete_triggers {
3129                    let (_outcome, deferred) = triggers::fire_row_trigger(
3130                        fd,
3131                        None,
3132                        Some(&old_row),
3133                        &stmt.table,
3134                        &schema_cols,
3135                        &[],
3136                        trigger_session_cfg.as_deref(),
3137                        true,
3138                    )
3139                    .map_err(|e| {
3140                        EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}")))
3141                    })?;
3142                    deferred_embedded.extend(deferred);
3143                }
3144            }
3145        }
3146        // v7.12.7 — drain trigger-emitted embedded SQL for this DELETE.
3147        self.execute_deferred_trigger_stmts(deferred_embedded, cancel)?;
3148        // v6.2.1 — auto-analyze modified-row tracking for DELETE.
3149        if !self.in_transaction() && affected > 0 {
3150            self.statistics
3151                .record_modifications(&stmt.table, affected as u64);
3152        }
3153        // v7.9.4 — RETURNING projection over the soon-to-be-gone
3154        // rows. `to_delete_rows` was snapshotted in stage 1 before
3155        // mutation, so the projection sees the pre-delete state
3156        // (matches PG semantics: DELETE RETURNING returns the row
3157        // as it was just before removal).
3158        if let Some(items) = &stmt.returning {
3159            return self.build_returning_rows(&stmt.table, items, to_delete_rows);
3160        }
3161        Ok(QueryResult::CommandOk {
3162            affected,
3163            modified_catalog: !self.in_transaction(),
3164        })
3165    }
3166
3167    /// `SHOW TABLES` — one row per table in the active catalog.
3168    /// Column name is `name` so result-set consumers can downstream
3169    /// `SELECT name FROM ...` style logic if needed.
3170    /// v4.26: `EXPLAIN [ANALYZE] <select>`. Returns a single-column
3171    /// `QUERY PLAN` text table — first line names the top operator
3172    /// (Scan / Aggregate / Window / etc.), indented children list
3173    /// FROM joins, WHERE filters, ORDER BY / LIMIT, projection
3174    /// shape, and any active index hits. `ANALYZE` execs the inner
3175    /// SELECT and appends actual-row + elapsed-micros annotations.
3176    #[allow(clippy::format_push_string)]
3177    fn exec_explain(
3178        &self,
3179        e: &spg_sql::ast::ExplainStatement,
3180        cancel: CancelToken<'_>,
3181    ) -> Result<QueryResult, EngineError> {
3182        let mut lines = Vec::<String>::new();
3183        explain_select(&e.inner, self, 0, &mut lines);
3184        if e.suggest {
3185            // v6.8.3 — index advisor. Walks the SELECT's FROM
3186            // tables + WHERE column refs; for each (table, column)
3187            // pair that lacks an index, append a SUGGEST line with
3188            // a copy-pastable `CREATE INDEX` statement. This is a
3189            // pure-syntax heuristic — no cardinality estimation —
3190            // matching the v6.8.3 design intent of "tell the
3191            // operator where indexes are missing", not "give the
3192            // mathematically optimal index set".
3193            let suggestions = build_index_suggestions(&e.inner, self);
3194            for s in suggestions {
3195                lines.push(s);
3196            }
3197        } else if e.analyze {
3198            // v6.2.4 — EXPLAIN ANALYZE annotates each operator line
3199            // with `(rows=N)` where the row count is computable
3200            // without re-executing the full query:
3201            //   - Top-level operator (first non-indented line):
3202            //     rows = final result.len()
3203            //   - "From: <table> [full scan]" lines: rows =
3204            //     table.rows().len() (catalog read; no execution)
3205            //   - "From: <table> [index seek]": indeterminate —
3206            //     the index step would need re-execution; v6.2.5
3207            //     adds per-operator wall-clock + hot/cold rows
3208            //     instrumentation that makes this concrete.
3209            //   - Everything else: marked `(—)` so the surface
3210            //     stays well-defined without silently dropping
3211            //     stats. v6.2.5 fills in via inline executor
3212            //     instrumentation.
3213            // Total elapsed lands on a trailing `Total: …` line.
3214            let started = self.clock.map(|f| f());
3215            let exec = self.exec_select_cancel(&e.inner, cancel)?;
3216            let elapsed_micros = match (self.clock, started) {
3217                (Some(f), Some(s)) => Some(f().saturating_sub(s)),
3218                _ => None,
3219            };
3220            let row_count = if let QueryResult::Rows { rows, .. } = &exec {
3221                rows.len()
3222            } else {
3223                0
3224            };
3225            annotate_explain_lines(&mut lines, row_count, self);
3226            let mut total = alloc::format!("Total: rows={row_count}");
3227            if let Some(us) = elapsed_micros {
3228                total.push_str(&alloc::format!(" elapsed={us}us"));
3229            }
3230            lines.push(total);
3231        }
3232        let columns = alloc::vec![ColumnSchema::new("QUERY PLAN", DataType::Text, false)];
3233        let rows: Vec<Row> = lines
3234            .into_iter()
3235            .map(|l| Row::new(alloc::vec![Value::Text(l)]))
3236            .collect();
3237        Ok(QueryResult::Rows { columns, rows })
3238    }
3239
3240    fn exec_show_tables(&self) -> QueryResult {
3241        let columns = alloc::vec![ColumnSchema::new("name", DataType::Text, false)];
3242        let rows: Vec<Row> = self
3243            .active_catalog()
3244            .table_names()
3245            .into_iter()
3246            .map(|n| Row::new(alloc::vec![Value::Text(n)]))
3247            .collect();
3248        QueryResult::Rows { columns, rows }
3249    }
3250
3251    /// `SHOW COLUMNS FROM <table>` — one row per column with the
3252    /// declared name, SQL type rendering, and nullability flag.
3253    fn exec_show_columns(&self, table_name: &str) -> Result<QueryResult, EngineError> {
3254        let table =
3255            self.active_catalog()
3256                .get(table_name)
3257                .ok_or_else(|| StorageError::TableNotFound {
3258                    name: table_name.into(),
3259                })?;
3260        let columns = alloc::vec![
3261            ColumnSchema::new("name", DataType::Text, false),
3262            ColumnSchema::new("type", DataType::Text, false),
3263            ColumnSchema::new("nullable", DataType::Bool, false),
3264        ];
3265        let rows: Vec<Row> = table
3266            .schema()
3267            .columns
3268            .iter()
3269            .map(|c| {
3270                Row::new(alloc::vec![
3271                    Value::Text(c.name.clone()),
3272                    Value::Text(alloc::format!("{}", c.ty)),
3273                    Value::Bool(c.nullable),
3274                ])
3275            })
3276            .collect();
3277        Ok(QueryResult::Rows { columns, rows })
3278    }
3279
3280    fn exec_begin(&mut self) -> Result<QueryResult, EngineError> {
3281        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3282        if self.tx_catalogs.contains_key(&tx_id) {
3283            return Err(EngineError::TransactionAlreadyOpen);
3284        }
3285        self.tx_catalogs.insert(
3286            tx_id,
3287            TxState {
3288                catalog: self.catalog.clone(),
3289                savepoints: Vec::new(),
3290            },
3291        );
3292        Ok(QueryResult::CommandOk {
3293            affected: 0,
3294            modified_catalog: false,
3295        })
3296    }
3297
3298    fn exec_commit(&mut self) -> Result<QueryResult, EngineError> {
3299        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3300        let state = self
3301            .tx_catalogs
3302            .remove(&tx_id)
3303            .ok_or(EngineError::NoActiveTransaction)?;
3304        self.catalog = state.catalog;
3305        // All savepoints become permanent at COMMIT and the stack
3306        // resets for the next TX (`state.savepoints` is discarded with
3307        // `state`).
3308        Ok(QueryResult::CommandOk {
3309            affected: 0,
3310            modified_catalog: true,
3311        })
3312    }
3313
3314    fn exec_rollback(&mut self) -> Result<QueryResult, EngineError> {
3315        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3316        if self.tx_catalogs.remove(&tx_id).is_none() {
3317            return Err(EngineError::NoActiveTransaction);
3318        }
3319        // savepoints discarded with the TxState
3320        Ok(QueryResult::CommandOk {
3321            affected: 0,
3322            modified_catalog: false,
3323        })
3324    }
3325
3326    fn exec_savepoint(&mut self, name: String) -> Result<QueryResult, EngineError> {
3327        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3328        let state = self
3329            .tx_catalogs
3330            .get_mut(&tx_id)
3331            .ok_or(EngineError::NoActiveTransaction)?;
3332        // PG re-uses an existing savepoint name by dropping the older
3333        // entry and pushing a fresh one — match that behaviour so
3334        // application code can `SAVEPOINT sp; ...; SAVEPOINT sp` freely.
3335        state.savepoints.retain(|(n, _)| n != &name);
3336        let snapshot = state.catalog.clone();
3337        state.savepoints.push((name, snapshot));
3338        Ok(QueryResult::CommandOk {
3339            affected: 0,
3340            modified_catalog: false,
3341        })
3342    }
3343
3344    fn exec_rollback_to_savepoint(&mut self, name: &str) -> Result<QueryResult, EngineError> {
3345        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3346        let state = self
3347            .tx_catalogs
3348            .get_mut(&tx_id)
3349            .ok_or(EngineError::NoActiveTransaction)?;
3350        let pos = state
3351            .savepoints
3352            .iter()
3353            .rposition(|(n, _)| n == name)
3354            .ok_or_else(|| {
3355                EngineError::Unsupported(alloc::format!("savepoint not found: {name}"))
3356            })?;
3357        // The savepoint stays on the stack (PG semantics): a later
3358        // `RELEASE` or further `ROLLBACK TO` is still allowed. Everything
3359        // after it is discarded.
3360        let snapshot = state.savepoints[pos].1.clone();
3361        state.savepoints.truncate(pos + 1);
3362        state.catalog = snapshot;
3363        Ok(QueryResult::CommandOk {
3364            affected: 0,
3365            modified_catalog: false,
3366        })
3367    }
3368
3369    fn exec_release_savepoint(&mut self, name: &str) -> Result<QueryResult, EngineError> {
3370        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3371        let state = self
3372            .tx_catalogs
3373            .get_mut(&tx_id)
3374            .ok_or(EngineError::NoActiveTransaction)?;
3375        let pos = state
3376            .savepoints
3377            .iter()
3378            .rposition(|(n, _)| n == name)
3379            .ok_or_else(|| {
3380                EngineError::Unsupported(alloc::format!("savepoint not found: {name}"))
3381            })?;
3382        // RELEASE keeps the work since the savepoint, just discards the
3383        // bookmark plus everything nested under it.
3384        state.savepoints.truncate(pos);
3385        Ok(QueryResult::CommandOk {
3386            affected: 0,
3387            modified_catalog: false,
3388        })
3389    }
3390
3391    /// v6.0.4 — synchronous `ALTER INDEX <name> REBUILD [WITH
3392    /// (encoding = …)]`. Walks every table in the active catalog
3393    /// looking for an index matching `stmt.name`, then delegates the
3394    /// rebuild (including any encoding switch) to
3395    /// `Table::rebuild_nsw_index`. The "live" non-blocking
3396    /// optimisation is v6.0.4.1 / v6.1.x territory.
3397    /// v6.7.2 — `ALTER TABLE t SET hot_tier_bytes = X`. Dispatch
3398    /// arm. Currently the only setting is `hot_tier_bytes`; later
3399    /// v6.7.x can extend `AlterTableTarget` without touching this
3400    /// arm structure.
3401    fn exec_alter_table(
3402        &mut self,
3403        s: spg_sql::ast::AlterTableStatement,
3404    ) -> Result<QueryResult, EngineError> {
3405        // v7.13.2 — mailrs round-6 S1: apply each subaction in order.
3406        // On first error the statement aborts; subactions already
3407        // applied stay (no transactional rollback in v7.13 — wrap in
3408        // BEGIN/COMMIT if atomicity matters).
3409        let table_name = s.name.clone();
3410        for target in s.targets {
3411            self.exec_alter_table_subaction(&table_name, target)?;
3412        }
3413        Ok(QueryResult::CommandOk {
3414            affected: 0,
3415            modified_catalog: !self.in_transaction(),
3416        })
3417    }
3418
3419    fn exec_alter_table_subaction(
3420        &mut self,
3421        table_name_outer: &str,
3422        target: spg_sql::ast::AlterTableTarget,
3423    ) -> Result<(), EngineError> {
3424        // Inner helper retains the s.name closure shape; alias to `s`
3425        // for minimal diff against the v7.13.0 body.
3426        struct S<'a> {
3427            name: &'a str,
3428        }
3429        let s = S {
3430            name: table_name_outer,
3431        };
3432        match target {
3433            spg_sql::ast::AlterTableTarget::SetHotTierBytes(n) => {
3434                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3435                    EngineError::Storage(StorageError::TableNotFound {
3436                        name: s.name.into(),
3437                    })
3438                })?;
3439                table.schema_mut().hot_tier_bytes = Some(n);
3440            }
3441            spg_sql::ast::AlterTableTarget::AddForeignKey(fk) => {
3442                // v7.6.8 — resolve FK against the live catalog first
3443                // (validates parent table, columns, indices). Then
3444                // verify every existing row in the child table
3445                // satisfies the new constraint. Then install it.
3446                let cols_snapshot = self
3447                    .active_catalog()
3448                    .get(s.name)
3449                    .ok_or_else(|| {
3450                        EngineError::Storage(StorageError::TableNotFound {
3451                            name: s.name.into(),
3452                        })
3453                    })?
3454                    .schema()
3455                    .columns
3456                    .clone();
3457                let storage_fk =
3458                    resolve_foreign_key(s.name, &cols_snapshot, fk, self.active_catalog())?;
3459                // Verify existing rows. Treat them as a virtual
3460                // INSERT batch — reusing the v7.6.2 enforce helper.
3461                let existing_rows: Vec<Vec<Value>> = self
3462                    .active_catalog()
3463                    .get(&s.name)
3464                    .expect("checked above")
3465                    .rows()
3466                    .iter()
3467                    .map(|r| r.values.clone())
3468                    .collect();
3469                enforce_fk_inserts(
3470                    self.active_catalog(),
3471                    s.name,
3472                    core::slice::from_ref(&storage_fk),
3473                    &existing_rows,
3474                )?;
3475                // Reject duplicate constraint name.
3476                let table = self
3477                    .active_catalog_mut()
3478                    .get_mut(s.name)
3479                    .expect("checked above");
3480                if let Some(name) = &storage_fk.name
3481                    && table
3482                        .schema()
3483                        .foreign_keys
3484                        .iter()
3485                        .any(|f| f.name.as_ref() == Some(name))
3486                {
3487                    return Err(EngineError::Unsupported(alloc::format!(
3488                        "ALTER TABLE ADD CONSTRAINT: a constraint named {name:?} already exists"
3489                    )));
3490                }
3491                table.schema_mut().foreign_keys.push(storage_fk);
3492            }
3493            spg_sql::ast::AlterTableTarget::DropForeignKey { name, if_exists } => {
3494                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3495                    EngineError::Storage(StorageError::TableNotFound {
3496                        name: s.name.into(),
3497                    })
3498                })?;
3499                let fks = &mut table.schema_mut().foreign_keys;
3500                let before = fks.len();
3501                fks.retain(|f| f.name.as_ref() != Some(&name));
3502                if fks.len() == before && !if_exists {
3503                    return Err(EngineError::Unsupported(alloc::format!(
3504                        "ALTER TABLE DROP CONSTRAINT: no FK named {name:?} on {:?}",
3505                        s.name
3506                    )));
3507                }
3508                // v7.13.2 mailrs round-6 S7: IF EXISTS silences the miss.
3509            }
3510            spg_sql::ast::AlterTableTarget::AddColumn {
3511                column,
3512                if_not_exists,
3513            } => {
3514                // v7.13.0 — mailrs round-5 G1. Append-only column add
3515                // with back-fill of the DEFAULT (or NULL) into every
3516                // existing row. Column positions don't shift, so we
3517                // skip index rebuild.
3518                let clock = self.clock;
3519                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3520                    EngineError::Storage(StorageError::TableNotFound {
3521                        name: s.name.into(),
3522                    })
3523                })?;
3524                if table
3525                    .schema()
3526                    .columns
3527                    .iter()
3528                    .any(|c| c.name.eq_ignore_ascii_case(&column.name))
3529                {
3530                    if if_not_exists {
3531                        return Ok(());
3532                    }
3533                    return Err(EngineError::Unsupported(alloc::format!(
3534                        "ALTER TABLE ADD COLUMN: column {:?} already exists on {:?}",
3535                        column.name,
3536                        s.name
3537                    )));
3538                }
3539                let col_name = column.name.clone();
3540                let nullable = column.nullable;
3541                let has_default =
3542                    column.default.is_some() || column.auto_increment;
3543                let col_schema = column_def_to_schema(column)?;
3544                let row_count = table.row_count();
3545                // Compute the back-fill value. Literal / runtime DEFAULT
3546                // funnels through the same resolver that INSERT uses
3547                // (v7.9.21 `resolve_column_default_free`). NULL when
3548                // the column is nullable and has no DEFAULT. NOT NULL
3549                // without DEFAULT errors when the table has existing
3550                // rows — same as PG.
3551                let fill_value: Value = if has_default
3552                    || col_schema.runtime_default.is_some()
3553                {
3554                    resolve_column_default_free(&col_schema, clock)?
3555                } else if nullable || row_count == 0 {
3556                    Value::Null
3557                } else {
3558                    return Err(EngineError::Unsupported(alloc::format!(
3559                        "ALTER TABLE ADD COLUMN {col_name:?}: NOT NULL column requires DEFAULT \
3560                         when the table has existing rows"
3561                    )));
3562                };
3563                table.add_column(col_schema, fill_value);
3564            }
3565            spg_sql::ast::AlterTableTarget::AlterColumnType {
3566                column,
3567                new_type,
3568                using,
3569            } => {
3570                // v7.13.0 — mailrs round-5 G8. Re-evaluate each
3571                // row's column value (either through the USING
3572                // expression if supplied, or as a direct CAST of
3573                // the existing value) and re-coerce to the new
3574                // type. Indices on the column get rebuilt.
3575                let new_data_type = column_type_to_data_type(new_type);
3576                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3577                    EngineError::Storage(StorageError::TableNotFound {
3578                        name: s.name.into(),
3579                    })
3580                })?;
3581                let col_pos = table
3582                    .schema()
3583                    .columns
3584                    .iter()
3585                    .position(|c| c.name.eq_ignore_ascii_case(&column))
3586                    .ok_or_else(|| {
3587                        EngineError::Unsupported(alloc::format!(
3588                            "ALTER COLUMN TYPE: column {column:?} not found on {:?}",
3589                            s.name
3590                        ))
3591                    })?;
3592                let schema_cols = table.schema().columns.clone();
3593                let ctx = eval::EvalContext::new(&schema_cols, None);
3594                let mut new_values: alloc::vec::Vec<Value> =
3595                    alloc::vec::Vec::with_capacity(table.row_count());
3596                for row in table.rows().iter() {
3597                    let raw = match &using {
3598                        Some(expr) => eval::eval_expr(expr, row, &ctx).map_err(|e| {
3599                            EngineError::Unsupported(alloc::format!(
3600                                "ALTER COLUMN TYPE: USING expression failed: {e:?}"
3601                            ))
3602                        })?,
3603                        None => row.values.get(col_pos).cloned().unwrap_or(Value::Null),
3604                    };
3605                    let coerced = coerce_value(raw, new_data_type, &column, col_pos)?;
3606                    new_values.push(coerced);
3607                }
3608                table.schema_mut().columns[col_pos].ty = new_data_type;
3609                for (i, v) in new_values.into_iter().enumerate() {
3610                    let mut row_values = table
3611                        .rows()
3612                        .get(i)
3613                        .expect("bounds-checked above")
3614                        .values
3615                        .clone();
3616                    row_values[col_pos] = v;
3617                    table.update_row(i, row_values)?;
3618                }
3619            }
3620            spg_sql::ast::AlterTableTarget::AddTableConstraint(tc) => {
3621                // v7.14.0 — pg_dump emits PKs as a separate
3622                // ALTER TABLE ADD CONSTRAINT post-CREATE-TABLE.
3623                // For PRIMARY KEY / UNIQUE, install a UC entry
3624                // and the implicit BTree index on the leading
3625                // column. CHECK: append predicate to schema.
3626                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3627                    EngineError::Storage(StorageError::TableNotFound {
3628                        name: s.name.into(),
3629                    })
3630                })?;
3631                let is_pk = matches!(
3632                    tc,
3633                    spg_sql::ast::TableConstraint::PrimaryKey { .. }
3634                );
3635                match tc {
3636                    spg_sql::ast::TableConstraint::PrimaryKey { columns, .. }
3637                    | spg_sql::ast::TableConstraint::Unique { columns, .. } => {
3638                        let positions: Vec<usize> = columns
3639                            .iter()
3640                            .map(|c| {
3641                                table
3642                                    .schema()
3643                                    .columns
3644                                    .iter()
3645                                    .position(|sc| sc.name.eq_ignore_ascii_case(c))
3646                                    .ok_or_else(|| {
3647                                        EngineError::Unsupported(alloc::format!(
3648                                            "ALTER TABLE ADD CONSTRAINT: column {c:?} not found on {:?}",
3649                                            s.name
3650                                        ))
3651                                    })
3652                            })
3653                            .collect::<Result<Vec<_>, _>>()?;
3654                        // Skip if an equivalent UC is already there
3655                        // (idempotent — pg_dump's PK + a prior inline
3656                        // PK shouldn't double-install).
3657                        let already = table
3658                            .schema()
3659                            .uniqueness_constraints
3660                            .iter()
3661                            .any(|u| u.columns == positions);
3662                        if !already {
3663                            table.schema_mut().uniqueness_constraints.push(
3664                                spg_storage::UniquenessConstraint {
3665                                    is_primary_key: is_pk,
3666                                    columns: positions.clone(),
3667                                    nulls_not_distinct: false,
3668                                },
3669                            );
3670                            // PK implies NOT NULL on referenced cols.
3671                            if is_pk {
3672                                for p in &positions {
3673                                    if let Some(c) = table.schema_mut().columns.get_mut(*p) {
3674                                        c.nullable = false;
3675                                    }
3676                                }
3677                            }
3678                            // Add a BTree index on the leading
3679                            // column for INSERT-side enforcement.
3680                            let leading = &columns[0];
3681                            let already_idx = table.indices().iter().any(|idx| {
3682                                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
3683                                    && table.schema().columns[idx.column_position].name
3684                                        == *leading
3685                            });
3686                            if !already_idx {
3687                                let suffix = if is_pk { "pkey" } else { "key" };
3688                                let idx_name = alloc::format!("{}_{leading}_{suffix}", s.name);
3689                                let _ = table.add_index(idx_name, leading);
3690                            }
3691                        }
3692                    }
3693                    spg_sql::ast::TableConstraint::Check { expr, .. } => {
3694                        table.schema_mut().checks.push(alloc::format!("{expr}"));
3695                    }
3696                    spg_sql::ast::TableConstraint::Index { name, columns } => {
3697                        // v7.15.0 — ALTER TABLE ADD KEY (cols).
3698                        // mysqldump occasionally emits this
3699                        // post-CREATE-TABLE shape; build a BTree
3700                        // on the leading column using the
3701                        // user-supplied or synthesised name.
3702                        let leading = &columns[0];
3703                        let already_idx = table.indices().iter().any(|idx| {
3704                            matches!(idx.kind, spg_storage::IndexKind::BTree(_))
3705                                && table.schema().columns[idx.column_position].name == *leading
3706                        });
3707                        if !already_idx {
3708                            let idx_name = name
3709                                .clone()
3710                                .unwrap_or_else(|| alloc::format!("{}_{leading}_idx", s.name));
3711                            let _ = table.add_index(idx_name, leading);
3712                        }
3713                    }
3714                }
3715            }
3716            spg_sql::ast::AlterTableTarget::DropColumn {
3717                column,
3718                if_exists,
3719                cascade,
3720            } => {
3721                // v7.13.3 — mailrs round-7 S8. Remove the column +
3722                // every row's value at that position; drop any index
3723                // on the column. RESTRICT (default) rejects when an
3724                // FK on this table or partial-index predicate
3725                // references the column; CASCADE removes those
3726                // dependents first.
3727                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3728                    EngineError::Storage(StorageError::TableNotFound {
3729                        name: s.name.into(),
3730                    })
3731                })?;
3732                let col_pos = match table
3733                    .schema()
3734                    .columns
3735                    .iter()
3736                    .position(|c| c.name.eq_ignore_ascii_case(&column))
3737                {
3738                    Some(p) => p,
3739                    None => {
3740                        if if_exists {
3741                            return Ok(());
3742                        }
3743                        return Err(EngineError::Unsupported(alloc::format!(
3744                            "ALTER TABLE DROP COLUMN: column {column:?} not found on {:?}",
3745                            s.name
3746                        )));
3747                    }
3748                };
3749                // Dependent check: FKs whose local columns include
3750                // col_pos. CASCADE drops them; otherwise reject.
3751                let dependent_fks: Vec<usize> = table
3752                    .schema()
3753                    .foreign_keys
3754                    .iter()
3755                    .enumerate()
3756                    .filter_map(|(i, fk)| {
3757                        if fk.local_columns.contains(&col_pos) {
3758                            Some(i)
3759                        } else {
3760                            None
3761                        }
3762                    })
3763                    .collect();
3764                if !dependent_fks.is_empty() && !cascade {
3765                    return Err(EngineError::Unsupported(alloc::format!(
3766                        "ALTER TABLE DROP COLUMN {column:?}: column has FK dependents; \
3767                         use DROP COLUMN ... CASCADE to remove them"
3768                    )));
3769                }
3770                // CASCADE the FK removals first.
3771                if cascade {
3772                    // Drop in reverse so indices stay valid.
3773                    let mut sorted = dependent_fks.clone();
3774                    sorted.sort();
3775                    sorted.reverse();
3776                    let fks = &mut table.schema_mut().foreign_keys;
3777                    for i in sorted {
3778                        fks.remove(i);
3779                    }
3780                }
3781                // Drop the column. New helper on Table does the
3782                // row + schema + index shift atomically.
3783                table.drop_column(col_pos);
3784            }
3785            spg_sql::ast::AlterTableTarget::RenameColumn { old, new } => {
3786                // v7.15.0 — `ALTER TABLE t RENAME [COLUMN] old TO
3787                // new`. Rename the column in the schema; rewrite
3788                // every stored source string on this table that
3789                // references it as a (potentially-qualified)
3790                // column identifier: CHECK predicates, partial-
3791                // index predicates, runtime DEFAULT expressions.
3792                // Then walk catalog triggers on this table and
3793                // patch any `UPDATE OF` column list. Function and
3794                // trigger bodies are NOT auto-rewritten — that
3795                // surface is dynamic SQL territory; users update
3796                // those separately (matches PG plpgsql behavior:
3797                // a column rename invalidates name-referencing
3798                // plpgsql at call time, not rename time).
3799                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3800                    EngineError::Storage(StorageError::TableNotFound {
3801                        name: s.name.into(),
3802                    })
3803                })?;
3804                let col_pos = table
3805                    .schema()
3806                    .columns
3807                    .iter()
3808                    .position(|c| c.name.eq_ignore_ascii_case(&old))
3809                    .ok_or_else(|| {
3810                        EngineError::Unsupported(alloc::format!(
3811                            "ALTER TABLE RENAME COLUMN: column {old:?} not found on {:?}",
3812                            s.name
3813                        ))
3814                    })?;
3815                // Reject same-name (case-insensitive) collision.
3816                if table
3817                    .schema()
3818                    .columns
3819                    .iter()
3820                    .enumerate()
3821                    .any(|(i, c)| i != col_pos && c.name.eq_ignore_ascii_case(&new))
3822                {
3823                    return Err(EngineError::Unsupported(alloc::format!(
3824                        "ALTER TABLE RENAME COLUMN: column {new:?} already exists on {:?}",
3825                        s.name
3826                    )));
3827                }
3828                // Schema rename first — even idempotent same-name
3829                // rename (`ALTER TABLE t RENAME a TO a`) needs to
3830                // be a no-op, not an error.
3831                if old.eq_ignore_ascii_case(&new) {
3832                    return Ok(());
3833                }
3834                table.rename_column(col_pos, &new);
3835                // Rewrite per-column runtime_default sources on
3836                // every column of this table — a DEFAULT expression
3837                // on column X may reference column Y by name (rare,
3838                // but legal in PG when the value is supplied via a
3839                // function that takes the row).
3840                let n_cols = table.schema().columns.len();
3841                for i in 0..n_cols {
3842                    let rt = table.schema().columns[i].runtime_default.clone();
3843                    if let Some(src) = rt {
3844                        let rewritten = rewrite_column_in_source(&src, &old, &new)?;
3845                        table.schema_mut().columns[i].runtime_default = Some(rewritten);
3846                    }
3847                }
3848                // Rewrite table-level CHECK predicates.
3849                let checks = table.schema().checks.clone();
3850                let mut new_checks = Vec::with_capacity(checks.len());
3851                for chk in checks {
3852                    new_checks.push(rewrite_column_in_source(&chk, &old, &new)?);
3853                }
3854                table.schema_mut().checks = new_checks;
3855                // Rewrite per-index partial_predicate sources.
3856                let n_idx = table.indices().len();
3857                for i in 0..n_idx {
3858                    let pred = table.indices()[i].partial_predicate.clone();
3859                    if let Some(src) = pred {
3860                        let rewritten = rewrite_column_in_source(&src, &old, &new)?;
3861                        // SAFETY: indices_mut would be cleanest, but
3862                        // partial_predicate is the only mutable field
3863                        // here; reach in via the public mut accessor.
3864                        table.set_partial_predicate(i, Some(rewritten));
3865                    }
3866                }
3867                // Walk catalog triggers; patch `update_columns` on
3868                // triggers attached to this table.
3869                let table_name = s.name.to_string();
3870                for trig in self.active_catalog_mut().triggers_mut() {
3871                    if !trig.table.eq_ignore_ascii_case(&table_name) {
3872                        continue;
3873                    }
3874                    for c in &mut trig.update_columns {
3875                        if c.eq_ignore_ascii_case(&old) {
3876                            *c = new.clone();
3877                        }
3878                    }
3879                }
3880            }
3881        }
3882        Ok(())
3883    }
3884
3885    fn exec_alter_index(
3886        &mut self,
3887        stmt: spg_sql::ast::AlterIndexStatement,
3888    ) -> Result<QueryResult, EngineError> {
3889        // Translate the optional SQL-side encoding choice into the
3890        // storage-side enum; the same SqlVecEncoding -> VecEncoding
3891        // bridge `column_type_to_data_type` uses.
3892        let spg_sql::ast::AlterIndexStatement {
3893            name: idx_name,
3894            target,
3895        } = stmt;
3896        let spg_sql::ast::AlterIndexTarget::Rebuild { encoding } = target;
3897        let target = encoding.map(|e| match e {
3898            SqlVecEncoding::F32 => VecEncoding::F32,
3899            SqlVecEncoding::Sq8 => VecEncoding::Sq8,
3900            SqlVecEncoding::F16 => VecEncoding::F16,
3901        });
3902        // Linear scan: index names are globally unique within a
3903        // catalog (enforced by add_nsw_index_inner) so the first
3904        // match is the only one. Save the table name to avoid
3905        // borrowing while we then take a mut borrow.
3906        let table_name = {
3907            let cat = self.active_catalog();
3908            let mut found: Option<String> = None;
3909            for tname in cat.table_names() {
3910                if let Some(t) = cat.get(&tname)
3911                    && t.indices().iter().any(|i| i.name == idx_name)
3912                {
3913                    found = Some(tname);
3914                    break;
3915                }
3916            }
3917            found.ok_or_else(|| {
3918                EngineError::Storage(StorageError::IndexNotFound {
3919                    name: idx_name.clone(),
3920                })
3921            })?
3922        };
3923        let table = self
3924            .active_catalog_mut()
3925            .get_mut(&table_name)
3926            .expect("table found above");
3927        table.rebuild_nsw_index(&idx_name, target)?;
3928        // v6.3.1 — ALTER INDEX REBUILD potentially with new encoding
3929        // changes cost characteristics; evict any cached plans.
3930        self.plan_cache.evict_referencing(&table_name);
3931        Ok(QueryResult::CommandOk {
3932            affected: 0,
3933            modified_catalog: !self.in_transaction(),
3934        })
3935    }
3936
3937    fn exec_create_index(
3938        &mut self,
3939        stmt: CreateIndexStatement,
3940    ) -> Result<QueryResult, EngineError> {
3941        let table = self
3942            .active_catalog_mut()
3943            .get_mut(&stmt.table)
3944            .ok_or_else(|| {
3945                EngineError::Storage(StorageError::TableNotFound {
3946                    name: stmt.table.clone(),
3947                })
3948            })?;
3949        // `IF NOT EXISTS` reduces DuplicateIndex to a no-op CommandOk.
3950        if stmt.if_not_exists && table.indices().iter().any(|i| i.name == stmt.name) {
3951            return Ok(QueryResult::CommandOk {
3952                affected: 0,
3953                modified_catalog: false,
3954            });
3955        }
3956        // v7.9.14 — multi-column index parses through; engine
3957        // builds a single-column BTree on the leading column only.
3958        // The extras live on the AST so spg-server's dispatcher
3959        // can emit a PG-wire NoticeResponse / log line. Composite
3960        // BTree keys land in v7.10.
3961        let _ = &stmt.extra_columns; // intentional drop on engine side
3962        let table_name = stmt.table.clone();
3963        // v6.8.0 — resolve INCLUDE column names to positions. Done
3964        // before `add_index` so a typo error surfaces before any
3965        // catalog mutation lands.
3966        let included_positions: Vec<usize> = if stmt.included_columns.is_empty() {
3967            Vec::new()
3968        } else {
3969            let schema = table.schema();
3970            stmt.included_columns
3971                .iter()
3972                .map(|c| {
3973                    schema.column_position(c).ok_or_else(|| {
3974                        EngineError::Storage(StorageError::ColumnNotFound { column: c.clone() })
3975                    })
3976                })
3977                .collect::<Result<Vec<_>, _>>()?
3978        };
3979        match stmt.method {
3980            IndexMethod::BTree => table.add_index(stmt.name.clone(), &stmt.column)?,
3981            IndexMethod::Hnsw => {
3982                if !included_positions.is_empty() {
3983                    return Err(EngineError::Unsupported(
3984                        "INCLUDE columns are not supported on HNSW indexes".into(),
3985                    ));
3986                }
3987                table.add_nsw_index(stmt.name.clone(), &stmt.column, spg_storage::NSW_DEFAULT_M)?;
3988            }
3989            // v6.7.1 — BRIN. Pure metadata; no in-memory data.
3990            IndexMethod::Brin => {
3991                if !included_positions.is_empty() {
3992                    return Err(EngineError::Unsupported(
3993                        "INCLUDE columns are not supported on BRIN indexes".into(),
3994                    ));
3995                }
3996                table.add_brin_index(stmt.name.clone(), &stmt.column)?;
3997            }
3998            // v7.12.3 — GIN inverted index. Real posting-list-backed
3999            // GIN when the indexed column is `tsvector`; falls back
4000            // to a BTree on the leading column for any other column
4001            // type so v7.9.26b's `pg_dump` compatibility (GIN on
4002            // JSONB etc. silently loading as BTree) is preserved.
4003            // Operators see the real GIN only where it matters; old
4004            // schemas keep loading.
4005            IndexMethod::Gin => {
4006                if !included_positions.is_empty() {
4007                    return Err(EngineError::Unsupported(
4008                        "INCLUDE columns are not supported on GIN indexes".into(),
4009                    ));
4010                }
4011                let col_pos = table
4012                    .schema()
4013                    .column_position(&stmt.column)
4014                    .ok_or_else(|| {
4015                        EngineError::Storage(StorageError::ColumnNotFound {
4016                            column: stmt.column.clone(),
4017                        })
4018                    })?;
4019                let col_ty = table.schema().columns[col_pos].ty;
4020                // v7.15.0 — `gin_trgm_ops` on a TEXT/VARCHAR
4021                // column dispatches to the real trigram-shingle
4022                // GIN build (LIKE / similarity acceleration).
4023                // Other GIN opclasses fall through to the regular
4024                // tsvector-vs-BTree split below.
4025                let is_trgm = stmt
4026                    .opclass
4027                    .as_deref()
4028                    .is_some_and(|op| op.eq_ignore_ascii_case("gin_trgm_ops"));
4029                if is_trgm
4030                    && matches!(
4031                        col_ty,
4032                        spg_storage::DataType::Text | spg_storage::DataType::Varchar(_)
4033                    )
4034                {
4035                    table
4036                        .add_gin_trgm_index(stmt.name.clone(), &stmt.column)
4037                        .map_err(EngineError::Storage)?;
4038                } else if col_ty == spg_storage::DataType::TsVector {
4039                    table
4040                        .add_gin_index(stmt.name.clone(), &stmt.column)
4041                        .map_err(EngineError::Storage)?;
4042                } else {
4043                    // v7.9.26b BTree fallback — the catalog still
4044                    // gets an index entry on the leading column so
4045                    // pg_dump scripts that name GIN on JSONB / etc.
4046                    // load clean; query-time gain stays opt-in for
4047                    // tsvector callers.
4048                    table.add_index(stmt.name.clone(), &stmt.column)?;
4049                }
4050            }
4051        }
4052        if !included_positions.is_empty()
4053            && let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name)
4054        {
4055            idx.included_columns = included_positions;
4056        }
4057        // v6.8.1 — persist partial-index predicate. Stored as the
4058        // expression's Display form so the catalog snapshot stays
4059        // pure (storage has no spg-sql dependency). The runtime
4060        // maintenance path treats partial indexes identically to
4061        // full indexes for v6.8.1 (over-maintenance is safe; the
4062        // planner-side "use partial when query WHERE implies the
4063        // predicate" pass is STABILITY carve-out).
4064        if let Some(pred_expr) = &stmt.partial_predicate {
4065            let canonical = pred_expr.to_string();
4066            // v7.13.2 — mailrs round-6 S2. PG's `pg_trgm` uses
4067            // `CREATE INDEX … USING gin(col gin_trgm_ops) WHERE …`
4068            // routinely to slim trigram indexes. SPG now persists
4069            // the predicate for GIN / BRIN / HNSW the same way it
4070            // already does for BTree — same v6.8.1 "over-maintain
4071            // is safe; planner-side partial routing is STABILITY
4072            // carve-out" semantics. HNSW carries an additional
4073            // caveat: the predicate isn't applied at index build
4074            // time (would require per-row eval inside the NSW
4075            // construction loop), so the index oversamples; query
4076            // time the WHERE clause still filters correctly.
4077            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
4078                idx.partial_predicate = Some(canonical);
4079            }
4080        }
4081        // v6.8.2 — persist expression index key. Same Display-form
4082        // storage; the runtime maintenance pass evaluates each
4083        // row's expression to derive the index key, but for v6.8.2
4084        // the engine falls through to the bare-column-reference
4085        // path and the expression is preserved for format-layer
4086        // round-trip + future planner work. Carved-out in
4087        // STABILITY § "Out of v6.8".
4088        if let Some(key_expr) = &stmt.expression {
4089            if matches!(
4090                stmt.method,
4091                IndexMethod::Hnsw | IndexMethod::Brin | IndexMethod::Gin
4092            ) {
4093                return Err(EngineError::Unsupported(
4094                    "Expression keys are not supported on HNSW or BRIN indexes".into(),
4095                ));
4096            }
4097            let canonical = key_expr.to_string();
4098            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
4099                idx.expression = Some(canonical);
4100            }
4101        }
4102        // v7.9.29 — persist `is_unique` flag on the storage Index.
4103        // Combined with `partial_predicate`, INSERT enforcement
4104        // checks that no other row whose predicate evaluates true
4105        // shares the same indexed key. Parser already rejected
4106        // `UNIQUE` on HNSW / BRIN, so plain BTree here.
4107        // For multi-column UNIQUE INDEX the extras matter (the
4108        // full tuple is the uniqueness key), so resolve them to
4109        // column positions and persist on the index too.
4110        if stmt.is_unique {
4111            let mut extra_positions: alloc::vec::Vec<usize> = alloc::vec::Vec::new();
4112            for col_name in &stmt.extra_columns {
4113                let pos = table
4114                    .schema()
4115                    .columns
4116                    .iter()
4117                    .position(|c| c.name.eq_ignore_ascii_case(col_name))
4118                    .ok_or_else(|| {
4119                        EngineError::Unsupported(alloc::format!(
4120                            "UNIQUE INDEX {:?}: extra column {col_name:?} not in table {:?}",
4121                            stmt.name,
4122                            stmt.table
4123                        ))
4124                    })?;
4125                extra_positions.push(pos);
4126            }
4127            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
4128                idx.is_unique = true;
4129                idx.extra_column_positions = extra_positions;
4130            }
4131            // At index-creation time, check the existing rows for
4132            // pre-existing duplicates that would have violated the
4133            // new constraint — otherwise CREATE UNIQUE INDEX would
4134            // silently leave duplicates in place.
4135            let snapshot_indices = table.indices().to_vec();
4136            let snapshot_rows: alloc::vec::Vec<spg_storage::Row> =
4137                table.rows().iter().cloned().collect();
4138            let snapshot_schema = table.schema().clone();
4139            let idx_ref = snapshot_indices
4140                .iter()
4141                .find(|i| i.name == stmt.name)
4142                .expect("just-added index");
4143            check_existing_unique_violation(idx_ref, &snapshot_schema, &snapshot_rows)?;
4144        }
4145        // v6.3.1 — adding an index can change the optimal plan for
4146        // any cached query that references this table.
4147        self.plan_cache.evict_referencing(&table_name);
4148        Ok(QueryResult::CommandOk {
4149            affected: 0,
4150            modified_catalog: !self.in_transaction(),
4151        })
4152    }
4153
4154    /// v7.13.3 — mailrs round-7 S9. SPG-specific reconciliation
4155    /// for `CREATE TABLE IF NOT EXISTS` when the table already
4156    /// exists. Adds missing columns + inline FKs from the new
4157    /// definition; existing columns / constraints stay untouched.
4158    /// New columns with a `NOT NULL` declaration without a
4159    /// `DEFAULT` are reported as a clear error rather than
4160    /// silently dropped — this is the "fail loud on real
4161    /// incompatibility, fail silent on schema-superset" tradeoff.
4162    fn reconcile_table_if_not_exists(
4163        &mut self,
4164        stmt: CreateTableStatement,
4165    ) -> Result<QueryResult, EngineError> {
4166        let table_name = stmt.name.clone();
4167        let clock = self.clock;
4168        let existing_col_names: alloc::collections::BTreeSet<String> = self
4169            .active_catalog()
4170            .get(&table_name)
4171            .expect("checked above")
4172            .schema()
4173            .columns
4174            .iter()
4175            .map(|c| c.name.to_ascii_lowercase())
4176            .collect();
4177        let row_count = self
4178            .active_catalog()
4179            .get(&table_name)
4180            .expect("checked above")
4181            .row_count();
4182        // Collect missing column defs in source order.
4183        let new_columns: alloc::vec::Vec<spg_sql::ast::ColumnDef> = stmt
4184            .columns
4185            .iter()
4186            .filter(|c| !existing_col_names.contains(&c.name.to_ascii_lowercase()))
4187            .cloned()
4188            .collect();
4189        for col_def in new_columns {
4190            let col_name = col_def.name.clone();
4191            let nullable = col_def.nullable;
4192            let has_default = col_def.default.is_some() || col_def.auto_increment;
4193            let col_schema = column_def_to_schema(col_def)?;
4194            let fill_value: Value = if has_default || col_schema.runtime_default.is_some() {
4195                resolve_column_default_free(&col_schema, clock)?
4196            } else if nullable || row_count == 0 {
4197                Value::Null
4198            } else {
4199                return Err(EngineError::Unsupported(alloc::format!(
4200                    "CREATE TABLE IF NOT EXISTS {table_name:?}: reconciling \
4201                     column {col_name:?} requires DEFAULT (existing rows would violate NOT NULL)"
4202                )));
4203            };
4204            let table = self
4205                .active_catalog_mut()
4206                .get_mut(&table_name)
4207                .expect("checked above");
4208            table.add_column(col_schema, fill_value);
4209        }
4210        // Resolve any newly-added inline FKs (column-level
4211        // REFERENCES forms) and install. Skip FKs whose local
4212        // columns we didn't have in the existing table.
4213        let table_cols_now = self
4214            .active_catalog()
4215            .get(&table_name)
4216            .expect("checked above")
4217            .schema()
4218            .columns
4219            .clone();
4220        for fk in stmt.foreign_keys {
4221            // Only install FKs whose every local column resolves
4222            // — older catalogs may have a column the new FK
4223            // references but not the column the new FK declares.
4224            let all_resolved = fk
4225                .columns
4226                .iter()
4227                .all(|c| table_cols_now.iter().any(|sc| sc.name.eq_ignore_ascii_case(c)));
4228            if !all_resolved {
4229                continue;
4230            }
4231            let already_present = {
4232                let table = self
4233                    .active_catalog()
4234                    .get(&table_name)
4235                    .expect("checked above");
4236                table.schema().foreign_keys.iter().any(|f| {
4237                    f.parent_table.eq_ignore_ascii_case(&fk.parent_table)
4238                        && f.local_columns.len() == fk.columns.len()
4239                })
4240            };
4241            if already_present {
4242                continue;
4243            }
4244            let storage_fk =
4245                resolve_foreign_key(&table_name, &table_cols_now, fk, self.active_catalog())?;
4246            let table = self
4247                .active_catalog_mut()
4248                .get_mut(&table_name)
4249                .expect("checked above");
4250            table.schema_mut().foreign_keys.push(storage_fk);
4251        }
4252        Ok(QueryResult::CommandOk {
4253            affected: 0,
4254            modified_catalog: !self.in_transaction(),
4255        })
4256    }
4257
4258    /// v7.14.0 — DROP TABLE handler (pg_dump / mysqldump preamble).
4259    fn exec_drop_table(
4260        &mut self,
4261        names: Vec<String>,
4262        if_exists: bool,
4263    ) -> Result<QueryResult, EngineError> {
4264        for name in names {
4265            let dropped = self.active_catalog_mut().drop_table(&name);
4266            if !dropped && !if_exists {
4267                return Err(EngineError::Storage(StorageError::TableNotFound { name }));
4268            }
4269        }
4270        Ok(QueryResult::CommandOk {
4271            affected: 0,
4272            modified_catalog: !self.in_transaction(),
4273        })
4274    }
4275
4276    /// v7.14.0 — DROP INDEX handler.
4277    fn exec_drop_index(
4278        &mut self,
4279        name: String,
4280        if_exists: bool,
4281    ) -> Result<QueryResult, EngineError> {
4282        let dropped = self.active_catalog_mut().drop_named_index(&name);
4283        if !dropped && !if_exists {
4284            return Err(EngineError::Storage(StorageError::IndexNotFound { name }));
4285        }
4286        Ok(QueryResult::CommandOk {
4287            affected: 0,
4288            modified_catalog: !self.in_transaction(),
4289        })
4290    }
4291
4292    fn exec_create_table(
4293        &mut self,
4294        stmt: CreateTableStatement,
4295    ) -> Result<QueryResult, EngineError> {
4296        if stmt.if_not_exists && self.active_catalog().get(&stmt.name).is_some() {
4297            // v7.13.3 — mailrs round-7 S9 reconciliation. PG's
4298            // semantics for `CREATE TABLE IF NOT EXISTS` is a
4299            // silent no-op when the table exists, even if the new
4300            // definition adds columns or constraints. SPG extends
4301            // this: any column in the new definition that's
4302            // missing from the existing table is added (with
4303            // DEFAULT back-fill / NULL); inline FKs likewise.
4304            // Existing columns are NOT modified. This makes
4305            // mailrs's schema layering (init-schema's `contacts`
4306            // sender-tracking table + migrate-023's CardDAV
4307            // `contacts` extension) converge correctly without
4308            // mailrs-side edits. PG users who want PG-strict
4309            // silent-no-op behaviour can use SPG's `--strict-pg`
4310            // flag (deferred to v7.14).
4311            return self.reconcile_table_if_not_exists(stmt);
4312        }
4313        let table_name = stmt.name.clone();
4314        // v7.9.13 — pluck the names of any columns marked
4315        // `PRIMARY KEY` inline so the post-create-table pass can
4316        // build an implicit BTree index. mailrs F1.
4317        let inline_pk_columns: Vec<String> = stmt
4318            .columns
4319            .iter()
4320            .filter(|c| c.is_primary_key)
4321            .map(|c| c.name.clone())
4322            .collect();
4323        // v7.9.19 — table-level constraints: PRIMARY KEY (a, b, ...)
4324        // and UNIQUE (a, b, ...). Each builds a BTree index on the
4325        // leading column (the existing single-column storage tier)
4326        // and registers a UniquenessConstraint on the schema for
4327        // INSERT-time enforcement of the full tuple. mailrs G1/G6.
4328        let cols = stmt
4329            .columns
4330            .into_iter()
4331            .map(column_def_to_schema)
4332            .collect::<Result<Vec<_>, _>>()?;
4333        // Composite NOT-NULL implication for PRIMARY KEY columns.
4334        let mut cols = cols;
4335        for tc in &stmt.table_constraints {
4336            if let spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } = tc {
4337                for col_name in columns {
4338                    if let Some(col) = cols.iter_mut().find(|c| c.name == *col_name) {
4339                        col.nullable = false;
4340                    }
4341                }
4342            }
4343        }
4344        // v7.6.1 — resolve every FK in the statement against the
4345        // already-known catalog. Validates: parent table exists,
4346        // parent column names exist, arity matches, parent columns
4347        // have a PK / UNIQUE index. Self-referencing FKs (parent
4348        // table == this table) resolve against the column list we
4349        // just built — they don't need the catalog yet.
4350        let mut fks: Vec<spg_storage::ForeignKeyConstraint> =
4351            Vec::with_capacity(stmt.foreign_keys.len());
4352        for fk in stmt.foreign_keys {
4353            // v7.14.0 — when SET FOREIGN_KEY_CHECKS=0 is in effect
4354            // (mysqldump preamble + bulk imports), defer FK
4355            // resolution if the parent table isn't in the catalog
4356            // yet. The FK is queued and resolved when checks flip
4357            // back on. Self-references stay in-band (the parent is
4358            // the same as the child we're building).
4359            let needs_parent = !fk.parent_table.eq_ignore_ascii_case(&table_name);
4360            if !self.foreign_key_checks
4361                && needs_parent
4362                && self.active_catalog().get(&fk.parent_table).is_none()
4363            {
4364                self.pending_foreign_keys
4365                    .push((table_name.clone(), fk));
4366                continue;
4367            }
4368            fks.push(resolve_foreign_key(
4369                &table_name,
4370                &cols,
4371                fk,
4372                self.active_catalog(),
4373            )?);
4374        }
4375        let mut schema = TableSchema::new(table_name.clone(), cols);
4376        schema.foreign_keys = fks;
4377        // v7.9.19 — translate AST table_constraints to storage
4378        // UniquenessConstraints (column name → position) so the
4379        // INSERT enforcement helper sees positions directly.
4380        let mut uc_storage: Vec<spg_storage::UniquenessConstraint> = Vec::new();
4381        let mut check_exprs: Vec<String> = Vec::new();
4382        for tc in &stmt.table_constraints {
4383            let (is_pk, names, nnd) = match tc {
4384                spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } => {
4385                    (true, columns.clone(), false)
4386                }
4387                spg_sql::ast::TableConstraint::Unique {
4388                    columns,
4389                    nulls_not_distinct,
4390                    ..
4391                } => (false, columns.clone(), *nulls_not_distinct),
4392                spg_sql::ast::TableConstraint::Check { expr, .. } => {
4393                    // v7.13.0 — collect CHECK predicate sources;
4394                    // they get attached to the schema below.
4395                    check_exprs.push(alloc::format!("{expr}"));
4396                    continue;
4397                }
4398                // v7.15.0 — plain `KEY (cols)` from MySQL inline
4399                // is NOT a uniqueness constraint; skip the UC
4400                // build path entirely. The BTree index lands in
4401                // the post-create loop below alongside the PK/UQ
4402                // implicit indexes.
4403                spg_sql::ast::TableConstraint::Index { .. } => continue,
4404            };
4405            let mut positions = Vec::with_capacity(names.len());
4406            for n in &names {
4407                let pos = schema
4408                    .columns
4409                    .iter()
4410                    .position(|c| c.name == *n)
4411                    .ok_or_else(|| {
4412                        EngineError::Unsupported(alloc::format!(
4413                            "table constraint references unknown column {n:?}"
4414                        ))
4415                    })?;
4416                positions.push(pos);
4417            }
4418            uc_storage.push(spg_storage::UniquenessConstraint {
4419                is_primary_key: is_pk,
4420                columns: positions,
4421                nulls_not_distinct: nnd,
4422            });
4423        }
4424        schema.uniqueness_constraints = uc_storage.clone();
4425        schema.checks = check_exprs;
4426        self.active_catalog_mut().create_table(schema)?;
4427        // v7.9.13 — implicit BTree per inline PK column +
4428        // v7.9.19 — implicit BTree on the leading column of every
4429        // table-level PRIMARY KEY / UNIQUE constraint.
4430        let table = self
4431            .active_catalog_mut()
4432            .get_mut(&table_name)
4433            .expect("just created");
4434        for (i, col_name) in inline_pk_columns.iter().enumerate() {
4435            let idx_name = if inline_pk_columns.len() == 1 {
4436                alloc::format!("{table_name}_pkey")
4437            } else {
4438                alloc::format!("{table_name}_pkey_{i}")
4439            };
4440            if let Err(e) = table.add_index(idx_name, col_name) {
4441                return Err(EngineError::Storage(e));
4442            }
4443        }
4444        for (i, tc) in stmt.table_constraints.iter().enumerate() {
4445            // v7.15.0 — plain KEY/INDEX rides this same loop so
4446            // the implicit BTree gets built. It carries its own
4447            // user-supplied name; PK/UQ still synthesise.
4448            let (suffix, names, explicit_name): (&str, &Vec<String>, Option<&String>) = match tc {
4449                spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } => {
4450                    ("pkey", columns, None)
4451                }
4452                spg_sql::ast::TableConstraint::Unique { columns, .. } => ("key", columns, None),
4453                spg_sql::ast::TableConstraint::Index { name, columns } => {
4454                    ("idx", columns, name.as_ref())
4455                }
4456                spg_sql::ast::TableConstraint::Check { .. } => continue,
4457            };
4458            let leading = &names[0];
4459            // Skip if a same-column BTree already exists (e.g.
4460            // inline PK on the leading column).
4461            let already = table.indices().iter().any(|idx| {
4462                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
4463                    && table.schema().columns[idx.column_position].name == *leading
4464            });
4465            if already {
4466                continue;
4467            }
4468            let idx_name = if let Some(n) = explicit_name {
4469                n.clone()
4470            } else if names.len() == 1 {
4471                alloc::format!("{table_name}_{leading}_{suffix}")
4472            } else {
4473                alloc::format!("{table_name}_{leading}_{suffix}_{i}")
4474            };
4475            if let Err(e) = table.add_index(idx_name, leading) {
4476                return Err(EngineError::Storage(e));
4477            }
4478        }
4479        Ok(QueryResult::CommandOk {
4480            affected: 0,
4481            modified_catalog: !self.in_transaction(),
4482        })
4483    }
4484
4485    fn exec_insert(&mut self, stmt: InsertStatement) -> Result<QueryResult, EngineError> {
4486        // v7.13.0 — `INSERT INTO t [(cols)] SELECT …` (mailrs
4487        // round-5 G4). Execute the inner SELECT first, then route
4488        // back through the regular VALUES code path with the
4489        // materialised rows.
4490        if let Some(select) = stmt.select_source.clone() {
4491            let select_result = self.exec_select_cancel(&select, CancelToken::none())?;
4492            let rows = match select_result {
4493                QueryResult::Rows { rows, .. } => rows,
4494                other => {
4495                    return Err(EngineError::Unsupported(alloc::format!(
4496                        "INSERT … SELECT: inner statement produced {other:?} instead of a row set"
4497                    )));
4498                }
4499            };
4500            let mut materialised: Vec<Vec<Expr>> = Vec::with_capacity(rows.len());
4501            for row in rows {
4502                let mut tuple: Vec<Expr> = Vec::with_capacity(row.values.len());
4503                for v in row.values {
4504                    tuple.push(value_to_literal_expr_permissive(v)?);
4505                }
4506                materialised.push(tuple);
4507            }
4508            let recurse = InsertStatement {
4509                table: stmt.table,
4510                columns: stmt.columns,
4511                rows: materialised,
4512                select_source: None,
4513                on_conflict: stmt.on_conflict,
4514                returning: stmt.returning,
4515            };
4516            return self.exec_insert(recurse);
4517        }
4518        // v7.9.21 — snapshot the clock fn pointer before the mut
4519        // borrow on the catalog opens; runtime DEFAULT eval needs
4520        // it inside the row hot loop.
4521        let clock = self.clock;
4522        // v7.12.4 — snapshot row-level triggers + their referenced
4523        // functions before the mut borrow on the catalog opens.
4524        // Cloned out so the row hot loop can fire them without
4525        // re-borrowing the catalog (which would conflict with
4526        // table.insert's mutable borrow).
4527        let before_insert_triggers = self.snapshot_row_triggers(&stmt.table, "INSERT", "BEFORE");
4528        let after_insert_triggers = self.snapshot_row_triggers(&stmt.table, "INSERT", "AFTER");
4529        let trigger_session_cfg: Option<alloc::string::String> = self
4530            .session_params
4531            .get("default_text_search_config")
4532            .cloned();
4533        let table = self
4534            .active_catalog_mut()
4535            .get_mut(&stmt.table)
4536            .ok_or_else(|| {
4537                EngineError::Storage(StorageError::TableNotFound {
4538                    name: stmt.table.clone(),
4539                })
4540            })?;
4541        // v3.1.5: clone the columns vector only (not the whole
4542        // TableSchema — saves one String alloc for the table name).
4543        // We need an owned snapshot because we'll call `table.insert`
4544        // (mutable borrow on `table`) inside the row loop while
4545        // reading schema fields.
4546        let column_meta: Vec<ColumnSchema> = table.schema().columns.clone();
4547        let schema_cols_len = column_meta.len();
4548        // Build a permutation `tuple_pos[c] = Some(j)` meaning schema
4549        // column `c` is filled from the `j`-th tuple slot; `None` means
4550        // "fill with NULL". Validated once and reused for every row.
4551        let tuple_pos: Option<Vec<Option<usize>>> = match &stmt.columns {
4552            None => None, // 1-1 mapping, fast path
4553            Some(cols) => {
4554                let mut map = alloc::vec![None; schema_cols_len];
4555                for (j, name) in cols.iter().enumerate() {
4556                    let idx = column_meta
4557                        .iter()
4558                        .position(|c| c.name == *name)
4559                        .ok_or_else(|| {
4560                            EngineError::Eval(EvalError::ColumnNotFound { name: name.clone() })
4561                        })?;
4562                    if map[idx].is_some() {
4563                        return Err(EngineError::Storage(StorageError::ArityMismatch {
4564                            expected: schema_cols_len,
4565                            actual: cols.len(),
4566                        }));
4567                    }
4568                    map[idx] = Some(j);
4569                }
4570                // Omitted columns must either be nullable, carry a
4571                // DEFAULT, or be AUTO_INCREMENT. Catch NOT NULL
4572                // omissions up front so the WAL stays clean.
4573                for (i, col) in column_meta.iter().enumerate() {
4574                    if map[i].is_none()
4575                        && !col.nullable
4576                        && col.default.is_none()
4577                        && col.runtime_default.is_none()
4578                        && !col.auto_increment
4579                    {
4580                        return Err(EngineError::Storage(StorageError::NullInNotNull {
4581                            column: col.name.clone(),
4582                        }));
4583                    }
4584                }
4585                Some(map)
4586            }
4587        };
4588        let expected_tuple_len = stmt.columns.as_ref().map_or(schema_cols_len, Vec::len);
4589        // v7.6.2 — snapshot this table's FK list before the
4590        // mutable-borrow window so we can run parent lookups
4591        // against the immutable catalog after parsing. Empty vec is
4592        // the no-FK fast path; clone cost is O(fks * arity) which
4593        // is < 100 ns for typical schemas.
4594        let fks = table.schema().foreign_keys.clone();
4595        let mut affected = 0usize;
4596        // Stage 1 — parse + AUTO_INC + coerce all rows under the
4597        // single mutable borrow.
4598        let mut all_values: Vec<Vec<Value>> = Vec::with_capacity(stmt.rows.len());
4599        for tuple in stmt.rows {
4600            if tuple.len() != expected_tuple_len {
4601                return Err(EngineError::Storage(StorageError::ArityMismatch {
4602                    expected: expected_tuple_len,
4603                    actual: tuple.len(),
4604                }));
4605            }
4606            // Fast path: no column-list permutation → tuple slot j
4607            // maps to schema column j. We can zip schema with tuple
4608            // and skip the `raw_tuple` staging allocation entirely.
4609            let values: Vec<Value> = if let Some(map) = &tuple_pos {
4610                // Permuted path: still need raw_tuple to index by `map[i]`.
4611                let raw_tuple: Vec<Value> = tuple
4612                    .into_iter()
4613                    .map(literal_expr_to_value)
4614                    .collect::<Result<_, _>>()?;
4615                let mut out = Vec::with_capacity(schema_cols_len);
4616                for (i, col) in column_meta.iter().enumerate() {
4617                    let mut raw = match map[i] {
4618                        Some(j) => raw_tuple[j].clone(),
4619                        None => resolve_column_default_free(col, clock)?,
4620                    };
4621                    if col.auto_increment && raw.is_null() {
4622                        let next = table.next_auto_value(i).ok_or_else(|| {
4623                            EngineError::Unsupported(alloc::format!(
4624                                "AUTO_INCREMENT applies to integer columns only (column `{}`)",
4625                                col.name
4626                            ))
4627                        })?;
4628                        raw = Value::BigInt(next);
4629                    }
4630                    out.push(coerce_value(raw, col.ty, &col.name, i)?);
4631                }
4632                out
4633            } else {
4634                // 1-1 mapping fast path: single Vec alloc, no raw_tuple.
4635                let mut out = Vec::with_capacity(schema_cols_len);
4636                for (i, (col, expr)) in column_meta.iter().zip(tuple).enumerate() {
4637                    let mut raw = literal_expr_to_value(expr)?;
4638                    if col.auto_increment && raw.is_null() {
4639                        let next = table.next_auto_value(i).ok_or_else(|| {
4640                            EngineError::Unsupported(alloc::format!(
4641                                "AUTO_INCREMENT applies to integer columns only (column `{}`)",
4642                                col.name
4643                            ))
4644                        })?;
4645                        raw = Value::BigInt(next);
4646                    }
4647                    out.push(coerce_value(raw, col.ty, &col.name, i)?);
4648                }
4649                out
4650            };
4651            all_values.push(values);
4652        }
4653        // Stage 2 — FK enforcement on the immutable catalog.
4654        // Non-lexical lifetimes release the mutable borrow on
4655        // `table` here since stage 1 was the last use. The
4656        // parent-table lookup runs before any row is committed.
4657        let uniqueness = table.schema().uniqueness_constraints.clone();
4658        let _ = table;
4659        if !fks.is_empty() {
4660            enforce_fk_inserts(self.active_catalog(), &stmt.table, &fks, &all_values)?;
4661        }
4662        // v7.13.0 — CHECK constraint enforcement (mailrs round-5 G3).
4663        enforce_check_constraints(self.active_catalog(), &stmt.table, &all_values)?;
4664        // v7.9.19 — composite UNIQUE / PRIMARY KEY enforcement.
4665        enforce_uniqueness_inserts(self.active_catalog(), &stmt.table, &uniqueness, &all_values)?;
4666        // v7.9.29 — CREATE UNIQUE INDEX [WHERE pred] enforcement.
4667        // Independent of table-level UniquenessConstraint (which
4668        // can't carry a predicate). Walks the table's indexes;
4669        // for each `is_unique` index, only rows whose
4670        // partial_predicate evaluates truthy are checked for
4671        // collision. mailrs K1.
4672        enforce_unique_index_inserts(self.active_catalog(), &stmt.table, &all_values)?;
4673        // v7.9.8 / v7.9.9 — ON CONFLICT handling.
4674        //   - `DO NOTHING` filters `all_values` to non-conflicting
4675        //     rows + drops within-batch duplicates.
4676        //   - `DO UPDATE SET …` ALSO filters, but for each
4677        //     conflicting row it queues an UPDATE on the existing
4678        //     row using the incoming row's values as `EXCLUDED.*`.
4679        let mut pending_updates: Vec<(usize, Vec<Value>)> = Vec::new();
4680        let mut skipped_count = 0usize;
4681        if let Some(clause) = &stmt.on_conflict {
4682            let conflict_cols = resolve_on_conflict_columns(
4683                self.active_catalog(),
4684                &stmt.table,
4685                clause.target_columns.as_slice(),
4686            )?;
4687            let mut kept: Vec<Vec<Value>> = Vec::with_capacity(all_values.len());
4688            let mut seen_keys: Vec<Vec<Value>> = Vec::new();
4689            for values in all_values {
4690                let key_tuple: Vec<&Value> = conflict_cols.iter().map(|&c| &values[c]).collect();
4691                // SQL spec: NULL in any conflict column means "no
4692                // conflict possible" (NULL ≠ NULL for uniqueness).
4693                let has_null_key = key_tuple.iter().any(|v| matches!(v, Value::Null));
4694                let collides_with_table = !has_null_key
4695                    && on_conflict_keys_exist(
4696                        self.active_catalog(),
4697                        &stmt.table,
4698                        &conflict_cols,
4699                        &key_tuple,
4700                    );
4701                let key_tuple_owned: Vec<Value> = key_tuple.iter().map(|v| (*v).clone()).collect();
4702                let collides_with_batch =
4703                    !has_null_key && seen_keys.iter().any(|k| k == &key_tuple_owned);
4704                let collides = collides_with_table || collides_with_batch;
4705                match (&clause.action, collides) {
4706                    (_, false) => {
4707                        seen_keys.push(key_tuple_owned);
4708                        kept.push(values);
4709                    }
4710                    (spg_sql::ast::OnConflictAction::Nothing, true) => {
4711                        skipped_count += 1;
4712                    }
4713                    (
4714                        spg_sql::ast::OnConflictAction::Update {
4715                            assignments,
4716                            where_,
4717                        },
4718                        true,
4719                    ) => {
4720                        if !collides_with_table {
4721                            skipped_count += 1;
4722                            continue;
4723                        }
4724                        let target_pos = lookup_row_position_by_keys(
4725                            self.active_catalog(),
4726                            &stmt.table,
4727                            &conflict_cols,
4728                            &key_tuple,
4729                        )
4730                        .ok_or_else(|| {
4731                            EngineError::Unsupported(
4732                                "ON CONFLICT DO UPDATE: conflict detected but row \
4733                                 position could not be resolved (cold-tier row?)"
4734                                    .into(),
4735                            )
4736                        })?;
4737                        let updated = apply_on_conflict_assignments(
4738                            self.active_catalog(),
4739                            &stmt.table,
4740                            target_pos,
4741                            &values,
4742                            assignments,
4743                            where_.as_ref(),
4744                        )?;
4745                        if let Some(new_row) = updated {
4746                            pending_updates.push((target_pos, new_row));
4747                        } else {
4748                            skipped_count += 1;
4749                        }
4750                    }
4751                }
4752            }
4753            all_values = kept;
4754        }
4755        // Stage 3 — insert all rows under a fresh mutable borrow.
4756        let table = self
4757            .active_catalog_mut()
4758            .get_mut(&stmt.table)
4759            .ok_or_else(|| {
4760                EngineError::Storage(StorageError::TableNotFound {
4761                    name: stmt.table.clone(),
4762                })
4763            })?;
4764        // v7.9.4 — keep RETURNING projection rows separate per
4765        // INSERT and per UPDATE branch so DO UPDATE pushes the new
4766        // post-update state, not the incoming-only values.
4767        let mut returning_rows: Vec<Vec<Value>> = Vec::new();
4768        // v7.12.7 — collect embedded SQL emitted by any trigger
4769        // fire across the row loop; engine drains the queue after
4770        // the table mut borrow drops.
4771        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
4772        'rowloop: for values in all_values {
4773            let mut row = Row::new(values);
4774            // v7.12.4 — BEFORE INSERT row-level triggers. Each
4775            // trigger may rewrite NEW cells (e.g. populate
4776            // `search_vector := to_tsvector(...)`) and may return
4777            // NULL to skip the row entirely.
4778            for fd in &before_insert_triggers {
4779                let (outcome, deferred) = triggers::fire_row_trigger(
4780                    fd,
4781                    Some(row.clone()),
4782                    None,
4783                    &stmt.table,
4784                    &column_meta,
4785                    &[],
4786                    trigger_session_cfg.as_deref(),
4787                    false,
4788                )
4789                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
4790                deferred_embedded.extend(deferred);
4791                match outcome {
4792                    triggers::TriggerOutcome::Row(r) => row = r,
4793                    triggers::TriggerOutcome::Skip => continue 'rowloop,
4794                }
4795            }
4796            if stmt.returning.is_some() {
4797                returning_rows.push(row.values.clone());
4798            }
4799            // v7.12.4 — clone for the AFTER trigger view; insert
4800            // moves the row into the table.
4801            let inserted = row.clone();
4802            table.insert(row)?;
4803            affected += 1;
4804            // v7.12.4 — AFTER INSERT row-level triggers fire post-
4805            // write. Return value is ignored (PG semantics); we
4806            // surface any error from the body up to the caller.
4807            for fd in &after_insert_triggers {
4808                let (_outcome, deferred) = triggers::fire_row_trigger(
4809                    fd,
4810                    Some(inserted.clone()),
4811                    None,
4812                    &stmt.table,
4813                    &column_meta,
4814                    &[],
4815                    trigger_session_cfg.as_deref(),
4816                    true,
4817                )
4818                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
4819                deferred_embedded.extend(deferred);
4820            }
4821        }
4822        // v7.9.9 — apply ON CONFLICT DO UPDATE rewrites collected
4823        // in the conflict-resolution pass. update_row handles
4824        // index maintenance + body re-encoding.
4825        for (pos, new_row) in pending_updates {
4826            if stmt.returning.is_some() {
4827                returning_rows.push(new_row.clone());
4828            }
4829            table.update_row(pos, new_row)?;
4830            affected += 1;
4831        }
4832        let _ = skipped_count;
4833        // v7.12.7 — drop the table mut borrow and drain any
4834        // trigger-emitted embedded SQL queued during this INSERT.
4835        // The borrow has to release first because each deferred
4836        // stmt may UPDATE / INSERT / DELETE the same (or another)
4837        // table — including, in principle, this one.
4838        let _ = table;
4839        self.execute_deferred_trigger_stmts(deferred_embedded, CancelToken::none())?;
4840        // v7.9.4/v7.9.9 — RETURNING streams the rows that ended
4841        // up in the table after this statement (insert or
4842        // post-update on conflict).
4843        if let Some(items) = &stmt.returning {
4844            return self.build_returning_rows(&stmt.table, items, returning_rows);
4845        }
4846        // v6.2.1 — auto-analyze: track per-table modified-row
4847        // counter so the background sweep can decide when to
4848        // re-ANALYZE. Cheap path on the autocommit-wrap hot loop
4849        // — one BTreeMap entry update per INSERT batch.
4850        if !self.in_transaction() && affected > 0 {
4851            self.statistics
4852                .record_modifications(&stmt.table, affected as u64);
4853        }
4854        Ok(QueryResult::CommandOk {
4855            affected,
4856            modified_catalog: !self.in_transaction(),
4857        })
4858    }
4859
4860    /// v4.5: SELECT with cooperative cancellation. The token is
4861    /// honoured between UNION peers and inside the bare-SELECT row
4862    /// loop; HNSW kNN graph walks and the aggregate executor don't
4863    /// honour it yet (deferred — those paths bound their work
4864    /// internally by `LIMIT k` and `GROUP BY` cardinality).
4865    /// v6.10.2 — cold-tier time-travel scan. Resolves the segment
4866    /// by id, decodes each row body against the table's current
4867    /// schema, applies the SELECT's projection + optional WHERE +
4868    /// optional LIMIT, returns a `Rows` result. JOINs / aggregates
4869    /// / ORDER BY are unsupported on this path (STABILITY carve-
4870    /// out); operators wanting them should restore the segment
4871    /// into a regular table first.
4872    fn exec_select_as_of_segment(
4873        &self,
4874        stmt: &SelectStatement,
4875        from: &spg_sql::ast::FromClause,
4876        segment_id: u32,
4877    ) -> Result<QueryResult, EngineError> {
4878        // v6.10.2 scope: no joins, no aggregates, no ORDER BY,
4879        // no GROUP BY / HAVING / UNION / OFFSET / DISTINCT.
4880        if !from.joins.is_empty()
4881            || stmt.group_by.is_some()
4882            || stmt.having.is_some()
4883            || !stmt.unions.is_empty()
4884            || !stmt.order_by.is_empty()
4885            || stmt.offset.is_some()
4886            || stmt.distinct
4887            || aggregate::uses_aggregate(stmt)
4888        {
4889            return Err(EngineError::Unsupported(
4890                "AS OF SEGMENT supports SELECT projection + WHERE + LIMIT only \
4891                 (joins / aggregates / ORDER BY are STABILITY § \"Out of v6.10\")"
4892                    .into(),
4893            ));
4894        }
4895        let table = self
4896            .active_catalog()
4897            .get(&from.primary.name)
4898            .ok_or_else(|| StorageError::TableNotFound {
4899                name: from.primary.name.clone(),
4900            })?;
4901        let schema = table.schema().clone();
4902        let schema_cols = &schema.columns;
4903        let alias = from
4904            .primary
4905            .alias
4906            .as_deref()
4907            .unwrap_or(from.primary.name.as_str());
4908        let ctx = EvalContext::new(schema_cols, Some(alias));
4909        let seg = self
4910            .active_catalog()
4911            .cold_segment(segment_id)
4912            .ok_or_else(|| {
4913                EngineError::Unsupported(alloc::format!(
4914                    "AS OF SEGMENT: cold segment {segment_id} not registered"
4915                ))
4916            })?;
4917        let mut out_rows: Vec<Row> = Vec::new();
4918        let mut limit_remaining: Option<usize> =
4919            stmt.limit_literal().and_then(|n| usize::try_from(n).ok());
4920        for (_key, body) in seg.scan() {
4921            let (row, _consumed) =
4922                spg_storage::decode_row_body_dense(&body, &schema).map_err(EngineError::Storage)?;
4923            if let Some(where_expr) = &stmt.where_ {
4924                let cond = self.eval_expr_simple(where_expr, &row, &ctx)?;
4925                if !matches!(cond, Value::Bool(true)) {
4926                    continue;
4927                }
4928            }
4929            // Projection.
4930            let projected = self.project_row_simple(&row, &stmt.items, schema_cols, alias)?;
4931            out_rows.push(projected);
4932            if let Some(rem) = limit_remaining.as_mut() {
4933                if *rem == 0 {
4934                    out_rows.pop();
4935                    break;
4936                }
4937                *rem -= 1;
4938            }
4939        }
4940        // Output column schema: derive from SELECT items.
4941        let columns = self.derive_output_columns(&stmt.items, schema_cols, alias);
4942        Ok(QueryResult::Rows {
4943            columns,
4944            rows: out_rows,
4945        })
4946    }
4947
4948    /// v6.10.2 — simple-path WHERE eval that doesn't go through
4949    /// the correlated-subquery / Memoize machinery. AS OF SEGMENT
4950    /// scan paths predicate against a snapshot frozen segment, no
4951    /// cross-row state.
4952    fn eval_expr_simple(
4953        &self,
4954        expr: &Expr,
4955        row: &Row,
4956        ctx: &EvalContext,
4957    ) -> Result<Value, EngineError> {
4958        let cancel = CancelToken::none();
4959        self.eval_expr_with_correlated(expr, row, ctx, cancel, None)
4960    }
4961
4962    /// v7.9.4 — INSERT / UPDATE / DELETE RETURNING projector.
4963    /// Given the table name, the user-supplied projection items,
4964    /// and the mutated rows (post-insert / post-update values, or
4965    /// pre-delete snapshot), build a `QueryResult::Rows` whose
4966    /// schema describes the projected columns. Mailrs migration
4967    /// blocker #1.
4968    fn build_returning_rows(
4969        &self,
4970        table_name: &str,
4971        items: &[SelectItem],
4972        mutated_rows: Vec<Vec<Value>>,
4973    ) -> Result<QueryResult, EngineError> {
4974        let table = self.active_catalog().get(table_name).ok_or_else(|| {
4975            EngineError::Storage(StorageError::TableNotFound {
4976                name: table_name.into(),
4977            })
4978        })?;
4979        let schema_cols = table.schema().columns.clone();
4980        let columns = self.derive_output_columns(items, &schema_cols, table_name);
4981        let mut out_rows: Vec<Row> = Vec::with_capacity(mutated_rows.len());
4982        for values in mutated_rows {
4983            let row = Row::new(values);
4984            let projected = self.project_row_simple(&row, items, &schema_cols, table_name)?;
4985            out_rows.push(projected);
4986        }
4987        Ok(QueryResult::Rows {
4988            columns,
4989            rows: out_rows,
4990        })
4991    }
4992
4993    /// v6.10.2 — projection for AS OF SEGMENT. Resolves
4994    /// `SelectItem::Wildcard` to all schema columns and
4995    /// `SelectItem::Expr` via the regular eval path.
4996    fn project_row_simple(
4997        &self,
4998        row: &Row,
4999        items: &[SelectItem],
5000        schema_cols: &[ColumnSchema],
5001        alias: &str,
5002    ) -> Result<Row, EngineError> {
5003        let ctx = EvalContext::new(schema_cols, Some(alias));
5004        let cancel = CancelToken::none();
5005        let mut out_vals = Vec::new();
5006        for item in items {
5007            match item {
5008                SelectItem::Wildcard => {
5009                    out_vals.extend(row.values.iter().cloned());
5010                }
5011                SelectItem::Expr { expr, .. } => {
5012                    let v = self.eval_expr_with_correlated(expr, row, &ctx, cancel, None)?;
5013                    out_vals.push(v);
5014                }
5015            }
5016        }
5017        Ok(Row::new(out_vals))
5018    }
5019
5020    /// v6.10.2 — derive the output `ColumnSchema` list for an
5021    /// AS OF SEGMENT projection. Wildcards take the full schema;
5022    /// expressions take the alias if present or a synthetic
5023    /// `?column?` (PG convention) otherwise.
5024    fn derive_output_columns(
5025        &self,
5026        items: &[SelectItem],
5027        schema_cols: &[ColumnSchema],
5028        _alias: &str,
5029    ) -> Vec<ColumnSchema> {
5030        let mut out = Vec::new();
5031        for item in items {
5032            match item {
5033                SelectItem::Wildcard => {
5034                    out.extend(schema_cols.iter().cloned());
5035                }
5036                SelectItem::Expr { alias, .. } => {
5037                    let name = alias.clone().unwrap_or_else(|| "?column?".to_string());
5038                    // Default to Text; the caller's row values
5039                    // carry the actual type. v6.10.2 scope.
5040                    out.push(ColumnSchema::new(name, DataType::Text, true));
5041                }
5042            }
5043        }
5044        out
5045    }
5046
5047    fn exec_select_cancel(
5048        &self,
5049        stmt: &SelectStatement,
5050        cancel: CancelToken<'_>,
5051    ) -> Result<QueryResult, EngineError> {
5052        cancel.check()?;
5053        // v6.10.2 — cold-tier time-travel short-circuit. When the
5054        // primary TableRef carries `AS OF SEGMENT '<id>'`, run a
5055        // dedicated cold-segment scan instead of the regular
5056        // hot+index path. The scope is intentionally narrow for
5057        // v6.10.2 — bare `SELECT * FROM <t> AS OF SEGMENT 'id'`,
5058        // optionally with a single-column-equality WHERE. JOINs /
5059        // aggregates / ORDER BY / subqueries on top of a time-
5060        // travelled scan are STABILITY § "Out of v6.10".
5061        if let Some(from) = &stmt.from
5062            && let Some(seg_id) = from.primary.as_of_segment
5063        {
5064            return self.exec_select_as_of_segment(stmt, from, seg_id);
5065        }
5066        // v6.2.0 / v6.5.0 — virtual-table short-circuits. Detected
5067        // pre-CTE because they don't read from the catalog and
5068        // shouldn't participate in regular FROM resolution.
5069        if let Some(from) = &stmt.from
5070            && from.joins.is_empty()
5071            && stmt.where_.is_none()
5072            && stmt.group_by.is_none()
5073            && stmt.having.is_none()
5074            && stmt.unions.is_empty()
5075            && stmt.order_by.is_empty()
5076            && stmt.limit.is_none()
5077            && stmt.offset.is_none()
5078            && !stmt.distinct
5079            && stmt.items.iter().all(|i| matches!(i, SelectItem::Wildcard))
5080        {
5081            let lower = from.primary.name.to_ascii_lowercase();
5082            match lower.as_str() {
5083                "spg_statistic" => return Ok(self.exec_spg_statistic()),
5084                // v6.5.0 — observability v2 virtual tables.
5085                "spg_stat_replication" => return Ok(self.exec_spg_stat_replication()),
5086                "spg_stat_segment" => return Ok(self.exec_spg_stat_segment()),
5087                "spg_stat_query" => return Ok(self.exec_spg_stat_query()),
5088                "spg_stat_activity" => return Ok(self.exec_spg_stat_activity()),
5089                "spg_audit_chain" => return Ok(self.exec_spg_audit_chain()),
5090                "spg_audit_verify" => return Ok(self.exec_spg_audit_verify()),
5091                "spg_table_ddl" => return Ok(self.exec_spg_table_ddl()),
5092                "spg_role_ddl" => return Ok(self.exec_spg_role_ddl()),
5093                "spg_database_ddl" => return Ok(self.exec_spg_database_ddl()),
5094                _ => {}
5095            }
5096        }
5097        // v4.11: CTEs materialise into a temporary enriched catalog
5098        // *before* anything else — the body SELECT can then refer
5099        // to CTE names via the regular FROM-clause resolution.
5100        // Uncorrelated only: each CTE body runs once against the
5101        // current catalog, not against later CTEs' results (left-
5102        // to-right materialisation would relax this, but we keep
5103        // it simple for v4.11 MVP).
5104        if !stmt.ctes.is_empty() {
5105            return self.exec_with_ctes(stmt, cancel);
5106        }
5107        // v4.10: subqueries (uncorrelated) are resolved here, before
5108        // the executor sees the row loop. We clone the statement so
5109        // we can mutate without disturbing the caller's AST — most
5110        // queries pass through with no subquery nodes and the clone
5111        // is cheap; with subqueries the materialisation cost
5112        // dominates anyway.
5113        let mut stmt_owned;
5114        let stmt_ref: &SelectStatement = if expr_tree_has_subquery(stmt) {
5115            stmt_owned = stmt.clone();
5116            self.resolve_select_subqueries(&mut stmt_owned, cancel)?;
5117            &stmt_owned
5118        } else {
5119            stmt
5120        };
5121        if stmt_ref.unions.is_empty() {
5122            return self.exec_bare_select_cancel(stmt_ref, cancel);
5123        }
5124        // UNION path: clone-strip the head into a bare block (its own
5125        // DISTINCT and any inner ORDER BY are dropped by parser rule —
5126        // the wrapper SelectStatement carries them), execute, then chain
5127        // peers with left-associative dedup semantics.
5128        let mut head = stmt_ref.clone();
5129        head.unions = Vec::new();
5130        head.order_by = Vec::new();
5131        head.limit = None;
5132        let QueryResult::Rows { columns, mut rows } =
5133            self.exec_bare_select_cancel(&head, cancel)?
5134        else {
5135            unreachable!("bare SELECT cannot return CommandOk")
5136        };
5137        for (kind, peer) in &stmt_ref.unions {
5138            let QueryResult::Rows {
5139                columns: peer_cols,
5140                rows: peer_rows,
5141            } = self.exec_bare_select_cancel(peer, cancel)?
5142            else {
5143                unreachable!("bare SELECT cannot return CommandOk")
5144            };
5145            if peer_cols.len() != columns.len() {
5146                return Err(EngineError::Unsupported(alloc::format!(
5147                    "UNION arity mismatch: head has {} columns, peer has {}",
5148                    columns.len(),
5149                    peer_cols.len()
5150                )));
5151            }
5152            rows.extend(peer_rows);
5153            if matches!(kind, UnionKind::Distinct) {
5154                rows = dedup_rows(rows);
5155            }
5156        }
5157        // ORDER BY at the top of a UNION applies to the combined result.
5158        // Eval against the projected schema (NOT the source table).
5159        if !stmt.order_by.is_empty() {
5160            let synth_ctx = EvalContext::new(&columns, None);
5161            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
5162            let mut tagged: Vec<(Vec<f64>, Row)> = Vec::with_capacity(rows.len());
5163            for r in rows {
5164                let keys = build_order_keys(&stmt.order_by, &r, &synth_ctx)?;
5165                tagged.push((keys, r));
5166            }
5167            sort_by_keys(&mut tagged, &descs);
5168            rows = tagged.into_iter().map(|(_, r)| r).collect();
5169        }
5170        apply_offset_and_limit(&mut rows, stmt.offset_literal(), stmt.limit_literal());
5171        Ok(QueryResult::Rows { columns, rows })
5172    }
5173
5174    #[allow(clippy::too_many_lines)]
5175    #[allow(clippy::too_many_lines)] // huge match — splitting fragments the planner
5176    /// v7.11.7 — execute `SELECT … FROM unnest(expr) [AS] alias …`.
5177    /// Synthesises a single-column virtual table whose column type
5178    /// is TEXT and whose rows are the array elements. Routes
5179    /// through the regular projection / WHERE / ORDER BY / LIMIT
5180    /// machinery so set-returning UNNEST composes naturally with
5181    /// the rest of the SELECT surface.
5182    fn exec_select_unnest(
5183        &self,
5184        stmt: &SelectStatement,
5185        primary: &TableRef,
5186        cancel: CancelToken<'_>,
5187    ) -> Result<QueryResult, EngineError> {
5188        let expr = primary
5189            .unnest_expr
5190            .as_deref()
5191            .expect("caller guards unnest_expr.is_some()");
5192        // Evaluate the array expression once. Empty schema / empty
5193        // row — uncorrelated UNNEST cannot reference outer columns.
5194        let empty_schema: alloc::vec::Vec<ColumnSchema> = alloc::vec::Vec::new();
5195        let ctx = EvalContext::new(&empty_schema, None);
5196        let dummy_row = Row::new(alloc::vec::Vec::new());
5197        // v7.11.13 — unnest dispatches per array element type so
5198        // INT[] / BIGINT[] surface their PG types in projection.
5199        let (elem_dtype, rows): (DataType, alloc::vec::Vec<Row>) =
5200            match eval::eval_expr(expr, &dummy_row, &ctx).map_err(EngineError::Eval)? {
5201                Value::Null => (DataType::Text, alloc::vec::Vec::new()),
5202                Value::TextArray(items) => {
5203                    let rows = items
5204                        .into_iter()
5205                        .map(|item| {
5206                            Row::new(alloc::vec![match item {
5207                                Some(s) => Value::Text(s),
5208                                None => Value::Null,
5209                            }])
5210                        })
5211                        .collect();
5212                    (DataType::Text, rows)
5213                }
5214                Value::IntArray(items) => {
5215                    let rows = items
5216                        .into_iter()
5217                        .map(|item| {
5218                            Row::new(alloc::vec![match item {
5219                                Some(n) => Value::Int(n),
5220                                None => Value::Null,
5221                            }])
5222                        })
5223                        .collect();
5224                    (DataType::Int, rows)
5225                }
5226                Value::BigIntArray(items) => {
5227                    let rows = items
5228                        .into_iter()
5229                        .map(|item| {
5230                            Row::new(alloc::vec![match item {
5231                                Some(n) => Value::BigInt(n),
5232                                None => Value::Null,
5233                            }])
5234                        })
5235                        .collect();
5236                    (DataType::BigInt, rows)
5237                }
5238                other => {
5239                    return Err(EngineError::Unsupported(alloc::format!(
5240                        "unnest() expects an array argument, got {:?}",
5241                        other.data_type()
5242                    )));
5243                }
5244            };
5245        let alias = primary
5246            .alias
5247            .clone()
5248            .unwrap_or_else(|| "unnest".to_string());
5249        // v7.13.2 — mailrs round-6 S5. Honour PG-standard
5250        // `UNNEST(arr) AS p(col_name)` column-list aliasing: the
5251        // first entry overrides the projected column's name.
5252        // Without the column list, fall back to the table alias
5253        // (pre-v7.13.2 behaviour).
5254        let col_name = primary
5255            .unnest_column_aliases
5256            .first()
5257            .cloned()
5258            .unwrap_or_else(|| alias.clone());
5259        let col_schema = ColumnSchema::new(col_name, elem_dtype, true);
5260        let schema_cols = alloc::vec![col_schema.clone()];
5261        let scan_ctx = EvalContext::new(&schema_cols, Some(&alias));
5262        // Apply WHERE.
5263        let filtered: alloc::vec::Vec<Row> = if let Some(w) = &stmt.where_ {
5264            let mut out = alloc::vec::Vec::with_capacity(rows.len());
5265            for row in rows {
5266                cancel.check()?;
5267                let v = eval::eval_expr(w, &row, &scan_ctx).map_err(EngineError::Eval)?;
5268                if matches!(v, Value::Bool(true)) {
5269                    out.push(row);
5270                }
5271            }
5272            out
5273        } else {
5274            rows
5275        };
5276        // Projection.
5277        let projection = build_projection(&stmt.items, &schema_cols, &alias)?;
5278        let mut projected_rows: alloc::vec::Vec<Row> =
5279            alloc::vec::Vec::with_capacity(filtered.len());
5280        for row in &filtered {
5281            let mut vals = alloc::vec::Vec::with_capacity(projection.len());
5282            for p in &projection {
5283                vals.push(eval::eval_expr(&p.expr, row, &scan_ctx).map_err(EngineError::Eval)?);
5284            }
5285            projected_rows.push(Row::new(vals));
5286        }
5287        // ORDER BY / LIMIT — apply on the projected rows (cheap;
5288        // unnest result sets are small by design).
5289        let columns: alloc::vec::Vec<ColumnSchema> = projection
5290            .iter()
5291            .map(|p| ColumnSchema::new(p.output_name.clone(), p.ty, p.nullable))
5292            .collect();
5293        // Re-evaluate ORDER BY against the source schema (pre-projection
5294        // so col refs by name still resolve through `scan_ctx`).
5295        if !stmt.order_by.is_empty() {
5296            let mut indexed: alloc::vec::Vec<(usize, Vec<Value>)> = filtered
5297                .iter()
5298                .enumerate()
5299                .map(|(i, r)| -> Result<_, EngineError> {
5300                    let keys: Result<Vec<Value>, EngineError> = stmt
5301                        .order_by
5302                        .iter()
5303                        .map(|ob| {
5304                            eval::eval_expr(&ob.expr, r, &scan_ctx).map_err(EngineError::Eval)
5305                        })
5306                        .collect();
5307                    Ok((i, keys?))
5308                })
5309                .collect::<Result<_, _>>()?;
5310            indexed.sort_by(|a, b| {
5311                for (idx, (ka, kb)) in a.1.iter().zip(b.1.iter()).enumerate() {
5312                    let mut cmp = value_cmp(ka, kb);
5313                    if stmt.order_by[idx].desc {
5314                        cmp = cmp.reverse();
5315                    }
5316                    if cmp != core::cmp::Ordering::Equal {
5317                        return cmp;
5318                    }
5319                }
5320                core::cmp::Ordering::Equal
5321            });
5322            projected_rows = indexed
5323                .into_iter()
5324                .map(|(i, _)| projected_rows[i].clone())
5325                .collect();
5326        }
5327        // LIMIT / OFFSET — apply at the tail.
5328        if let Some(offset) = stmt.offset_literal() {
5329            let off = (offset as usize).min(projected_rows.len());
5330            projected_rows.drain(..off);
5331        }
5332        if let Some(limit) = stmt.limit_literal() {
5333            projected_rows.truncate(limit as usize);
5334        }
5335        Ok(QueryResult::Rows {
5336            columns,
5337            rows: projected_rows,
5338        })
5339    }
5340
5341    fn exec_bare_select_cancel(
5342        &self,
5343        stmt: &SelectStatement,
5344        cancel: CancelToken<'_>,
5345    ) -> Result<QueryResult, EngineError> {
5346        // v4.12: window-function path. When the projection contains
5347        // any `name(args) OVER (...)` we route to the dedicated
5348        // executor — partition + sort + per-row window value before
5349        // the regular projection.
5350        if select_has_window(stmt) {
5351            return self.exec_select_with_window(stmt, cancel);
5352        }
5353        // Constant SELECT (no FROM) — evaluate each item once against an
5354        // empty dummy row. Useful for `SELECT 1`, `SELECT coalesce(...)`,
5355        // `SELECT '7'::INT`. Column references will surface as
5356        // ColumnNotFound on eval since the schema is empty.
5357        let Some(from) = &stmt.from else {
5358            let empty_schema: Vec<ColumnSchema> = Vec::new();
5359            let ctx = self.ev_ctx(&empty_schema, None);
5360            let projection = build_projection(&stmt.items, &empty_schema, "")?;
5361            let dummy_row = Row::new(Vec::new());
5362            let mut values = Vec::with_capacity(projection.len());
5363            for p in &projection {
5364                values.push(eval::eval_expr(&p.expr, &dummy_row, &ctx)?);
5365            }
5366            let columns: Vec<ColumnSchema> = projection
5367                .into_iter()
5368                .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
5369                .collect();
5370            return Ok(QueryResult::Rows {
5371                columns,
5372                rows: alloc::vec![Row::new(values)],
5373            });
5374        };
5375        // Multi-table FROM (one or more joined peers) goes through the
5376        // nested-loop join executor. Single-table FROM stays on the
5377        // existing scan + index-seek path.
5378        if !from.joins.is_empty() {
5379            return self.exec_joined_select(stmt, from);
5380        }
5381        // v7.11.7 — `FROM unnest(<expr>) [AS] <alias>`. Synthesise a
5382        // single-column table at SELECT entry by evaluating the
5383        // expression once against the empty row (UNNEST is
5384        // uncorrelated in v7.11; correlated / LATERAL unnest is a
5385        // v7.12 carve-out). Build a virtual `Table` in a heap-only
5386        // catalog, then route to the regular scan path.
5387        if from.primary.unnest_expr.is_some() {
5388            return self.exec_select_unnest(stmt, &from.primary, cancel);
5389        }
5390        let primary = &from.primary;
5391        let table = self.active_catalog().get(&primary.name).ok_or_else(|| {
5392            StorageError::TableNotFound {
5393                name: primary.name.clone(),
5394            }
5395        })?;
5396        let schema_cols = &table.schema().columns;
5397        // The qualifier accepted on column refs is the alias (if any) else the
5398        // bare table name.
5399        let alias = primary.alias.as_deref().unwrap_or(primary.name.as_str());
5400        let ctx = self.ev_ctx(schema_cols, Some(alias));
5401
5402        // NSW kNN planner: `ORDER BY col <-> literal LIMIT k` with no
5403        // WHERE and an NSW index on `col` skips the full scan. The
5404        // walk returns rows already in ascending-distance order, so
5405        // ORDER BY / LIMIT are honoured implicitly.
5406        if let Some(nsw_rows) = try_nsw_knn(stmt, table, schema_cols, alias) {
5407            return materialise_in_order(stmt, table, schema_cols, alias, &nsw_rows);
5408        }
5409
5410        // Index seek: if WHERE is `col = literal` (or commuted) and the
5411        // referenced column has an index, dispatch each locator through
5412        // the catalog (hot tier → borrow, cold tier → page-read +
5413        // decode) and iterate just those rows. Otherwise fall back to a
5414        // full scan over the hot tier (cold-tier rows are only reached
5415        // via index seek in v5.1 — full table scans against cold-tier
5416        // data ship in v5.2 with the freezer's per-segment scan API).
5417        let indexed_rows: Option<Vec<Cow<'_, Row>>> = stmt.where_.as_ref().and_then(|w| {
5418            // BTree / col=literal seek first — covers the v7.11.3 multi-
5419            // column AND case and the leading-column equality lookup.
5420            try_index_seek(w, schema_cols, self.active_catalog(), table, alias)
5421                .or_else(|| {
5422                    // v7.12.3 — GIN-accelerated `WHERE col @@
5423                    // tsquery` when the column has a `USING gin`
5424                    // index. Returns an over-approximate candidate
5425                    // set; the WHERE re-eval loop below verifies
5426                    // the full `@@` predicate per row.
5427                    try_gin_seek(w, schema_cols, self.active_catalog(), table, alias, &ctx)
5428                })
5429                .or_else(|| {
5430                    // v7.15.0 — trigram-GIN-accelerated
5431                    // `WHERE col LIKE / ILIKE '<pat>'` when the
5432                    // column has a `gin_trgm_ops` GIN index.
5433                    // Over-approximate candidate set; the WHERE
5434                    // re-eval verifies the LIKE per row.
5435                    try_trgm_seek(w, schema_cols, table, alias)
5436                })
5437        });
5438
5439        // Aggregate path: filter rows first, then hand off to the
5440        // aggregate executor which does its own projection + ORDER BY.
5441        if aggregate::uses_aggregate(stmt) {
5442            let mut filtered: Vec<&Row> = Vec::new();
5443            // v6.2.6 — Memoize: per-query LRU cache for correlated
5444            // scalar subqueries. Fresh per row-loop entry so each
5445            // SELECT execution gets an isolated cache.
5446            let mut memo = memoize::MemoizeCache::new();
5447            if let Some(rows) = &indexed_rows {
5448                for cow in rows {
5449                    let row = cow.as_ref();
5450                    if let Some(where_expr) = &stmt.where_ {
5451                        let cond = self.eval_expr_with_correlated(
5452                            where_expr,
5453                            row,
5454                            &ctx,
5455                            cancel,
5456                            Some(&mut memo),
5457                        )?;
5458                        if !matches!(cond, Value::Bool(true)) {
5459                            continue;
5460                        }
5461                    }
5462                    filtered.push(row);
5463                }
5464            } else {
5465                for i in 0..table.row_count() {
5466                    let row = &table.rows()[i];
5467                    if let Some(where_expr) = &stmt.where_ {
5468                        let cond = self.eval_expr_with_correlated(
5469                            where_expr,
5470                            row,
5471                            &ctx,
5472                            cancel,
5473                            Some(&mut memo),
5474                        )?;
5475                        if !matches!(cond, Value::Bool(true)) {
5476                            continue;
5477                        }
5478                    }
5479                    filtered.push(row);
5480                }
5481            }
5482            let mut agg = aggregate::run(stmt, &filtered, schema_cols, Some(alias))?;
5483            apply_offset_and_limit(&mut agg.rows, stmt.offset_literal(), stmt.limit_literal());
5484            return Ok(QueryResult::Rows {
5485                columns: agg.columns,
5486                rows: agg.rows,
5487            });
5488        }
5489
5490        let projection = build_projection(&stmt.items, schema_cols, alias)?;
5491
5492        // Materialise the filter pass into `(order_key, projected_row)`
5493        // tuples. The order key is `None` when there's no ORDER BY clause.
5494        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::new();
5495        // v6.2.6 — Memoize per-row WHERE eval shares one cache.
5496        let mut memo = memoize::MemoizeCache::new();
5497        // Inline the per-row work in a closure so the indexed and full-
5498        // scan branches share the body.
5499        let mut process_row = |row: &Row, loop_idx: usize| -> Result<(), EngineError> {
5500            if loop_idx.is_multiple_of(256) {
5501                cancel.check()?;
5502            }
5503            if let Some(where_expr) = &stmt.where_ {
5504                let cond =
5505                    self.eval_expr_with_correlated(where_expr, row, &ctx, cancel, Some(&mut memo))?;
5506                if !matches!(cond, Value::Bool(true)) {
5507                    return Ok(());
5508                }
5509            }
5510            let mut values = Vec::with_capacity(projection.len());
5511            for p in &projection {
5512                values.push(eval::eval_expr(&p.expr, row, &ctx)?);
5513            }
5514            let order_keys = if stmt.order_by.is_empty() {
5515                Vec::new()
5516            } else {
5517                build_order_keys(&stmt.order_by, row, &ctx)?
5518            };
5519            tagged.push((order_keys, Row::new(values)));
5520            Ok(())
5521        };
5522        if let Some(rows) = &indexed_rows {
5523            for (loop_idx, cow) in rows.iter().enumerate() {
5524                process_row(cow.as_ref(), loop_idx)?;
5525            }
5526        } else {
5527            for i in 0..table.row_count() {
5528                process_row(&table.rows()[i], i)?;
5529            }
5530        }
5531
5532        if !stmt.order_by.is_empty() {
5533            // Partial-sort fast path: when LIMIT is small relative to
5534            // the row count, select_nth_unstable + sort just the
5535            // prefix is O(n + k log k) instead of O(n log n). DISTINCT
5536            // requires the full sort because de-dup happens after.
5537            let keep = if stmt.distinct {
5538                None
5539            } else {
5540                stmt.limit_literal()
5541                    .map(|l| l as usize + stmt.offset_literal().map_or(0, |o| o as usize))
5542            };
5543            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
5544            partial_sort_tagged(&mut tagged, keep, &descs);
5545        }
5546
5547        let mut output_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
5548        if stmt.distinct {
5549            output_rows = dedup_rows(output_rows);
5550        }
5551        apply_offset_and_limit(
5552            &mut output_rows,
5553            stmt.offset_literal(),
5554            stmt.limit_literal(),
5555        );
5556
5557        let columns: Vec<ColumnSchema> = projection
5558            .into_iter()
5559            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
5560            .collect();
5561
5562        Ok(QueryResult::Rows {
5563            columns,
5564            rows: output_rows,
5565        })
5566    }
5567
5568    /// Multi-table SELECT executor (one or more JOIN peers).
5569    ///
5570    /// v1.10 builds the joined row set up-front via nested-loop joins,
5571    /// then runs WHERE + projection + ORDER BY against the combined
5572    /// rows. No index seek. Aggregates and DISTINCT still work because
5573    /// the executor delegates projection through the same shared paths.
5574    #[allow(clippy::too_many_lines)]
5575    /// v7.13.2 — mailrs round-6 S5. Resolve a TableRef into an
5576    /// owned (rows, schema) pair. Catalog tables clone their hot
5577    /// rows + schema; UNNEST table refs evaluate their array
5578    /// expression once and synthesise a single-column row set
5579    /// using the same dispatch as `exec_select_unnest`. Used by
5580    /// the joined-select path so UNNEST can appear in any FROM
5581    /// position, not just as the primary.
5582    fn materialise_table_ref(
5583        &self,
5584        tref: &TableRef,
5585    ) -> Result<(Vec<Row>, Vec<ColumnSchema>), EngineError> {
5586        if let Some(expr) = tref.unnest_expr.as_deref() {
5587            let empty_schema: Vec<ColumnSchema> = Vec::new();
5588            let ctx = EvalContext::new(&empty_schema, None);
5589            let dummy_row = Row::new(Vec::new());
5590            let (elem_dtype, rows) =
5591                match eval::eval_expr(expr, &dummy_row, &ctx).map_err(EngineError::Eval)? {
5592                    Value::Null => (DataType::Text, Vec::new()),
5593                    Value::TextArray(items) => (
5594                        DataType::Text,
5595                        items
5596                            .into_iter()
5597                            .map(|item| {
5598                                Row::new(alloc::vec![match item {
5599                                    Some(s) => Value::Text(s),
5600                                    None => Value::Null,
5601                                }])
5602                            })
5603                            .collect(),
5604                    ),
5605                    Value::IntArray(items) => (
5606                        DataType::Int,
5607                        items
5608                            .into_iter()
5609                            .map(|item| {
5610                                Row::new(alloc::vec![match item {
5611                                    Some(n) => Value::Int(n),
5612                                    None => Value::Null,
5613                                }])
5614                            })
5615                            .collect(),
5616                    ),
5617                    Value::BigIntArray(items) => (
5618                        DataType::BigInt,
5619                        items
5620                            .into_iter()
5621                            .map(|item| {
5622                                Row::new(alloc::vec![match item {
5623                                    Some(n) => Value::BigInt(n),
5624                                    None => Value::Null,
5625                                }])
5626                            })
5627                            .collect(),
5628                    ),
5629                    other => {
5630                        return Err(EngineError::Unsupported(alloc::format!(
5631                            "unnest() expects an array argument, got {:?}",
5632                            other.data_type()
5633                        )));
5634                    }
5635                };
5636            let alias = tref.alias.clone().unwrap_or_else(|| "unnest".to_string());
5637            let col_name = tref
5638                .unnest_column_aliases
5639                .first()
5640                .cloned()
5641                .unwrap_or(alias);
5642            return Ok((rows, alloc::vec![ColumnSchema::new(col_name, elem_dtype, true)]));
5643        }
5644        let table = self
5645            .active_catalog()
5646            .get(&tref.name)
5647            .ok_or_else(|| StorageError::TableNotFound {
5648                name: tref.name.clone(),
5649            })?;
5650        let rows: Vec<Row> = table.rows().iter().cloned().collect();
5651        let cols = table.schema().columns.clone();
5652        Ok((rows, cols))
5653    }
5654
5655    fn exec_joined_select(
5656        &self,
5657        stmt: &SelectStatement,
5658        from: &FromClause,
5659    ) -> Result<QueryResult, EngineError> {
5660        // v7.13.2 — mailrs round-6 S5. UNNEST peers materialise
5661        // into virtual (rows, schema) sources alongside catalog
5662        // tables, so `FROM t, UNNEST(arr) AS p(col)` works in
5663        // any join-list position. The lookup helper handles both
5664        // shapes uniformly.
5665        let (primary_rows, primary_cols) = self.materialise_table_ref(&from.primary)?;
5666        let primary_alias = from
5667            .primary
5668            .alias
5669            .as_deref()
5670            .unwrap_or(from.primary.name.as_str())
5671            .to_string();
5672        // Owned (rows, schema) per peer — borrows from the catalog
5673        // would not survive UNNEST-side materialisation.
5674        let mut joined: Vec<(Vec<Row>, Vec<ColumnSchema>, String, JoinKind, Option<&Expr>)> =
5675            Vec::new();
5676        for j in &from.joins {
5677            let (rows, cols) = self.materialise_table_ref(&j.table)?;
5678            let a = j
5679                .table
5680                .alias
5681                .as_deref()
5682                .unwrap_or(j.table.name.as_str())
5683                .to_string();
5684            joined.push((rows, cols, a, j.kind, j.on.as_ref()));
5685        }
5686
5687        // Build the combined schema: composite "alias.col" names so the
5688        // qualified-column resolver can find anything by exact match.
5689        let mut combined_schema: Vec<ColumnSchema> = Vec::new();
5690        for col in &primary_cols {
5691            combined_schema.push(ColumnSchema::new(
5692                alloc::format!("{primary_alias}.{}", col.name),
5693                col.ty,
5694                col.nullable,
5695            ));
5696        }
5697        for (_, cols, a, _, _) in &joined {
5698            for col in cols {
5699                combined_schema.push(ColumnSchema::new(
5700                    alloc::format!("{a}.{}", col.name),
5701                    col.ty,
5702                    col.nullable,
5703                ));
5704            }
5705        }
5706        let ctx = EvalContext::new(&combined_schema, None);
5707
5708        // Nested-loop join.
5709        let mut working: Vec<Row> = primary_rows;
5710        let mut produced_len = primary_cols.len();
5711        for (rrows, rcols, _, kind, on) in &joined {
5712            let right_arity = rcols.len();
5713            let mut next: Vec<Row> = Vec::new();
5714            for left in &working {
5715                let mut left_matched = false;
5716                for right in rrows {
5717                    let mut combined_vals = left.values.clone();
5718                    combined_vals.extend(right.values.iter().cloned());
5719                    // Pad combined to the eventual full width so the
5720                    // partial schema still matches positions used by ON.
5721                    let combined = Row::new(combined_vals);
5722                    let keep = if let Some(on_expr) = on {
5723                        let cond = eval::eval_expr(on_expr, &combined, &ctx)?;
5724                        matches!(cond, Value::Bool(true))
5725                    } else {
5726                        // CROSS / comma-list: every pair survives.
5727                        true
5728                    };
5729                    if keep {
5730                        next.push(combined);
5731                        left_matched = true;
5732                    }
5733                }
5734                if !left_matched && matches!(kind, JoinKind::Left) {
5735                    // LEFT OUTER JOIN: emit the left row with NULLs on
5736                    // the right side when no peer matched.
5737                    let mut combined_vals = left.values.clone();
5738                    for _ in 0..right_arity {
5739                        combined_vals.push(Value::Null);
5740                    }
5741                    next.push(Row::new(combined_vals));
5742                }
5743            }
5744            working = next;
5745            produced_len += right_arity;
5746            debug_assert!(produced_len <= combined_schema.len());
5747        }
5748
5749        // WHERE filter against combined rows.
5750        let mut filtered: Vec<Row> = Vec::new();
5751        for row in working {
5752            if let Some(where_expr) = &stmt.where_ {
5753                let cond = eval::eval_expr(where_expr, &row, &ctx)?;
5754                if !matches!(cond, Value::Bool(true)) {
5755                    continue;
5756                }
5757            }
5758            filtered.push(row);
5759        }
5760
5761        // Aggregate path: handle GROUP BY / aggregate calls over the
5762        // joined+filtered rows.
5763        if aggregate::uses_aggregate(stmt) {
5764            let refs: Vec<&Row> = filtered.iter().collect();
5765            let mut agg = aggregate::run(stmt, &refs, &combined_schema, None)?;
5766            apply_offset_and_limit(&mut agg.rows, stmt.offset_literal(), stmt.limit_literal());
5767            return Ok(QueryResult::Rows {
5768                columns: agg.columns,
5769                rows: agg.rows,
5770            });
5771        }
5772
5773        let projection = build_projection(&stmt.items, &combined_schema, "")?;
5774        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::new();
5775        for row in &filtered {
5776            let mut values = Vec::with_capacity(projection.len());
5777            for p in &projection {
5778                values.push(eval::eval_expr(&p.expr, row, &ctx)?);
5779            }
5780            let order_keys = if stmt.order_by.is_empty() {
5781                Vec::new()
5782            } else {
5783                build_order_keys(&stmt.order_by, row, &ctx)?
5784            };
5785            tagged.push((order_keys, Row::new(values)));
5786        }
5787        if !stmt.order_by.is_empty() {
5788            let keep = if stmt.distinct {
5789                None
5790            } else {
5791                stmt.limit_literal()
5792                    .map(|l| l as usize + stmt.offset_literal().map_or(0, |o| o as usize))
5793            };
5794            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
5795            partial_sort_tagged(&mut tagged, keep, &descs);
5796        }
5797        let mut output_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
5798        if stmt.distinct {
5799            output_rows = dedup_rows(output_rows);
5800        }
5801        apply_offset_and_limit(
5802            &mut output_rows,
5803            stmt.offset_literal(),
5804            stmt.limit_literal(),
5805        );
5806        let columns: Vec<ColumnSchema> = projection
5807            .into_iter()
5808            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
5809            .collect();
5810        Ok(QueryResult::Rows {
5811            columns,
5812            rows: output_rows,
5813        })
5814    }
5815}
5816
5817/// One row-producing projection: an expression to evaluate, the resulting
5818/// column's user-visible name, its inferred type, and nullability.
5819#[derive(Debug, Clone)]
5820struct ProjectedItem {
5821    expr: Expr,
5822    output_name: String,
5823    ty: DataType,
5824    nullable: bool,
5825}
5826
5827/// Dedupe a row set, preserving first-seen order. `Row`'s `PartialEq` is
5828/// structural (`Vec<Value>` ⇒ pairwise `Value` equality), which gives SQL
5829/// `NULL = NULL → TRUE` and `NaN = NaN → FALSE`. The first agrees with
5830/// the spec's "two NULLs are not distinct"; the second is a tolerated
5831/// quirk for v1 (no NaN literals are reachable from the SQL surface).
5832fn dedup_rows(rows: Vec<Row>) -> Vec<Row> {
5833    let mut out: Vec<Row> = Vec::with_capacity(rows.len());
5834    for r in rows {
5835        if !out.iter().any(|seen| seen == &r) {
5836            out.push(r);
5837        }
5838    }
5839    out
5840}
5841
5842/// Coerce a `Value` to an `f64` sort key for ORDER BY. Numbers map directly;
5843/// NULL sorts last (treated as `+∞`); booleans are 0.0 / 1.0; text uses lex
5844/// order via the byte values; vectors are not sortable.
5845fn value_to_order_key(v: &Value) -> Result<f64, EngineError> {
5846    match v {
5847        Value::Null => Ok(f64::INFINITY),
5848        Value::SmallInt(n) => Ok(f64::from(*n)),
5849        Value::Int(n) => Ok(f64::from(*n)),
5850        Value::Date(d) => Ok(f64::from(*d)),
5851        #[allow(clippy::cast_precision_loss)]
5852        Value::Timestamp(t) => Ok(*t as f64),
5853        #[allow(clippy::cast_precision_loss)]
5854        Value::Numeric { scaled, scale } => {
5855            // Scaled integer / 10^scale, computed via f64 for sort
5856            // ordering only. Precision losses here only matter for
5857            // ORDER BY tie-breaks well past 15 significant digits.
5858            // `f64::powi` lives in std; we hand-roll the loop so the
5859            // no_std engine crate doesn't need it.
5860            let mut divisor = 1.0_f64;
5861            for _ in 0..*scale {
5862                divisor *= 10.0;
5863            }
5864            Ok((*scaled as f64) / divisor)
5865        }
5866        #[allow(clippy::cast_precision_loss)]
5867        Value::BigInt(n) => Ok(*n as f64),
5868        Value::Float(x) => Ok(*x),
5869        Value::Bool(b) => Ok(if *b { 1.0 } else { 0.0 }),
5870        Value::Text(s) => {
5871            // Lex order by codepoints — good enough for ORDER BY name.
5872            // Map first 8 bytes packed into u64 as a coarse key; ties fall to
5873            // partial_cmp Equal. v1.x can swap in a real string comparator.
5874            let mut key: u64 = 0;
5875            for &b in s.as_bytes().iter().take(8) {
5876                key = (key << 8) | u64::from(b);
5877            }
5878            #[allow(clippy::cast_precision_loss)]
5879            Ok(key as f64)
5880        }
5881        Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => {
5882            Err(EngineError::Unsupported(
5883                "ORDER BY of a raw vector column is not meaningful — use `<->`".into(),
5884            ))
5885        }
5886        Value::Interval { .. } => Err(EngineError::Unsupported(
5887            "ORDER BY of an INTERVAL is not supported in v2.11 \
5888             (months vs micros has no single canonical ordering)"
5889                .into(),
5890        )),
5891        Value::Json(_) => Err(EngineError::Unsupported(
5892            "ORDER BY of a JSON value is not supported — cast the document to text first".into(),
5893        )),
5894        // v7.5.0 — Value is #[non_exhaustive]; future variants need
5895        // an explicit ORDER BY mapping. Surface as Unsupported until
5896        // engine support is added.
5897        _ => Err(EngineError::Unsupported(
5898            "ORDER BY of this value type is not supported".into(),
5899        )),
5900    }
5901}
5902
5903/// Try to plan a WHERE clause as an equality lookup against an existing
5904/// index. Returns the candidate row indices on success; `None` means the
5905/// caller should fall back to a full scan.
5906///
5907/// v0.8 recognises a single top-level `col = literal` (in either operand
5908/// order). AND chains and range scans land in later milestones.
5909/// Look for `ORDER BY col <dist-op> literal LIMIT k` against an
5910/// NSW-indexed vector column. Recognised distance ops: `<->` (L2),
5911/// `<#>` (inner product), `<=>` (cosine). When a WHERE clause is
5912/// present, the planner does an "over-fetch and filter" pass — it
5913/// asks the graph for `k * over_fetch` candidates, evaluates WHERE
5914/// against each, and trims back to `k`. Returns the row indices in
5915/// ascending-distance order when the plan applies.
5916fn try_nsw_knn(
5917    stmt: &SelectStatement,
5918    table: &Table,
5919    schema_cols: &[ColumnSchema],
5920    table_alias: &str,
5921) -> Option<Vec<usize>> {
5922    if stmt.distinct {
5923        return None;
5924    }
5925    let limit = usize::try_from(stmt.limit_literal()?).ok()?;
5926    if limit == 0 {
5927        return None;
5928    }
5929    // v6.4.0 — NSW kNN dispatch needs a single ORDER BY key on the
5930    // distance metric. Multi-key ORDER BY falls through to the
5931    // generic sort path.
5932    if stmt.order_by.len() != 1 {
5933        return None;
5934    }
5935    let order = &stmt.order_by[0];
5936    // NSW kNN returns rows ascending by distance — DESC inverts the
5937    // natural order, so the planner can't handle it without a sort
5938    // pass. Fall back to the generic ORDER BY path.
5939    if order.desc {
5940        return None;
5941    }
5942    let Expr::Binary { lhs, op, rhs } = &order.expr else {
5943        return None;
5944    };
5945    let metric = match op {
5946        BinOp::L2Distance => spg_storage::NswMetric::L2,
5947        BinOp::InnerProduct => spg_storage::NswMetric::InnerProduct,
5948        BinOp::CosineDistance => spg_storage::NswMetric::Cosine,
5949        _ => return None,
5950    };
5951    // Accept both `col <op> literal` and `literal <op> col`.
5952    let ((Expr::Column(col), literal) | (literal, Expr::Column(col))) =
5953        (lhs.as_ref(), rhs.as_ref())
5954    else {
5955        return None;
5956    };
5957    if let Some(q) = &col.qualifier
5958        && q != table_alias
5959    {
5960        return None;
5961    }
5962    let col_pos = schema_cols.iter().position(|s| s.name == col.name)?;
5963    let query = literal_to_vector(literal)?;
5964    let idx = spg_storage::nsw_index_on(table, col_pos)?;
5965    if let Some(where_expr) = &stmt.where_ {
5966        // Over-fetch and filter. The factor (10×) is a heuristic that
5967        // covers typical selectivity for the corpus tests; v2.x will
5968        // make it configurable.
5969        let over_fetch = limit.saturating_mul(10).max(NSW_OVER_FETCH_FLOOR);
5970        let candidates = spg_storage::nsw_query(table, &idx.name, &query, over_fetch, metric);
5971        let ctx = EvalContext::new(schema_cols, Some(table_alias));
5972        let mut kept: Vec<usize> = Vec::with_capacity(limit);
5973        for i in candidates {
5974            let row = &table.rows()[i];
5975            let cond = eval::eval_expr(where_expr, row, &ctx).ok()?;
5976            if matches!(cond, Value::Bool(true)) {
5977                kept.push(i);
5978                if kept.len() >= limit {
5979                    break;
5980                }
5981            }
5982        }
5983        Some(kept)
5984    } else {
5985        Some(spg_storage::nsw_query(
5986            table, &idx.name, &query, limit, metric,
5987        ))
5988    }
5989}
5990
5991/// Lower bound on the over-fetch pool when WHERE is present — even
5992/// for tiny `LIMIT 1` queries we keep enough candidates to absorb a
5993/// few WHERE rejections.
5994const NSW_OVER_FETCH_FLOOR: usize = 32;
5995
5996/// Pull a `Vec<f32>` out of a literal-or-cast expression. Returns
5997/// `None` for anything we can't fold at plan time.
5998fn literal_to_vector(e: &Expr) -> Option<Vec<f32>> {
5999    match e {
6000        Expr::Literal(Literal::Vector(v)) => Some(v.clone()),
6001        Expr::Cast { expr, .. } => literal_to_vector(expr),
6002        _ => None,
6003    }
6004}
6005
6006/// Materialise rows in a planner-supplied order (used by the NSW path)
6007/// without re-running ORDER BY. The projection + LIMIT slot mirror the
6008/// equivalent block in `exec_bare_select`.
6009fn materialise_in_order(
6010    stmt: &SelectStatement,
6011    table: &Table,
6012    schema_cols: &[ColumnSchema],
6013    table_alias: &str,
6014    ordered_rows: &[usize],
6015) -> Result<QueryResult, EngineError> {
6016    let ctx = EvalContext::new(schema_cols, Some(table_alias));
6017    let projection = build_projection(&stmt.items, schema_cols, table_alias)?;
6018    let mut output_rows: Vec<Row> = Vec::with_capacity(ordered_rows.len());
6019    for &i in ordered_rows {
6020        let row = &table.rows()[i];
6021        let mut values = Vec::with_capacity(projection.len());
6022        for p in &projection {
6023            values.push(eval::eval_expr(&p.expr, row, &ctx)?);
6024        }
6025        output_rows.push(Row::new(values));
6026    }
6027    apply_offset_and_limit(
6028        &mut output_rows,
6029        stmt.offset_literal(),
6030        stmt.limit_literal(),
6031    );
6032    let columns: Vec<ColumnSchema> = projection
6033        .into_iter()
6034        .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
6035        .collect();
6036    Ok(QueryResult::Rows {
6037        columns,
6038        rows: output_rows,
6039    })
6040}
6041
6042fn try_index_seek<'a>(
6043    where_expr: &Expr,
6044    schema_cols: &[ColumnSchema],
6045    catalog: &'a Catalog,
6046    table: &'a Table,
6047    table_alias: &str,
6048) -> Option<Vec<Cow<'a, Row>>> {
6049    // v7.11.3 — recurse through top-level `AND` so a PG-style
6050    // composite predicate like `WHERE id = 1 AND created_at > $1`
6051    // still hits the index on `id`. The caller re-applies the
6052    // full WHERE expression to each returned row, so dropping the
6053    // residual conjuncts here is correct — the index just narrows
6054    // the candidate set.
6055    if let Expr::Binary {
6056        lhs,
6057        op: BinOp::And,
6058        rhs,
6059    } = where_expr
6060    {
6061        // Try LHS first (typical convention: leading equality on
6062        // the indexed column comes first in user-written SQL).
6063        if let Some(rows) = try_index_seek(lhs, schema_cols, catalog, table, table_alias) {
6064            return Some(rows);
6065        }
6066        return try_index_seek(rhs, schema_cols, catalog, table, table_alias);
6067    }
6068    let Expr::Binary {
6069        lhs,
6070        op: BinOp::Eq,
6071        rhs,
6072    } = where_expr
6073    else {
6074        return None;
6075    };
6076    let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
6077        .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
6078    let idx = table.index_on(col_pos)?;
6079    let key = IndexKey::from_value(&value)?;
6080    let locators = idx.lookup_eq(&key);
6081    let table_name = table.schema().name.as_str();
6082    // v5.1: each locator dispatches to either the hot tier (zero-
6083    // copy borrow of `table.rows()[i]`) or a cold-tier segment
6084    // (one page read + dense row decode, ~µs scale). Cold rows are
6085    // returned as `Cow::Owned` so the caller's `&Row` iteration
6086    // doesn't see a tier distinction; pre-freezer (no cold
6087    // segments loaded) every locator is `Hot` and every entry is
6088    // `Cow::Borrowed` — identical cost to the pre-v5.1 path.
6089    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(locators.len());
6090    for loc in locators {
6091        match *loc {
6092            spg_storage::RowLocator::Hot(i) => {
6093                if let Some(row) = table.rows().get(i) {
6094                    out.push(Cow::Borrowed(row));
6095                }
6096            }
6097            spg_storage::RowLocator::Cold { segment_id, .. } => {
6098                if let Some(row) = catalog.resolve_cold_locator(table_name, segment_id, &key) {
6099                    out.push(Cow::Owned(row));
6100                }
6101            }
6102        }
6103    }
6104    Some(out)
6105}
6106
6107/// v7.12.3 — GIN-accelerated candidate seek for `WHERE col @@ <ts_query>`.
6108///
6109/// Recurses through top-level `AND` like [`try_index_seek`] so a
6110/// composite predicate `WHERE search_vector @@ q AND id > $1` still
6111/// hits the GIN index on `search_vector` — the caller re-applies the
6112/// full WHERE expression to each returned candidate, so dropping the
6113/// `id > $1` residual here stays semantically correct.
6114///
6115/// Returns `None` when:
6116///   - no leaf is a `col @@ <rhs>` shape on a GIN-indexed column;
6117///   - the RHS can't be const-evaluated to a `Value::TsQuery`
6118///     (typically because it references row columns);
6119///   - the resolved `TsQuery` uses query shapes the MVP doesn't
6120///     accelerate (`Not`, `Phrase` — those fall through to full scan).
6121///
6122/// On `Some(rows)` the caller iterates only `rows` and re-evaluates
6123/// the full `@@` predicate per row, so an over-approximate candidate
6124/// set is safe.
6125fn try_gin_seek<'a>(
6126    where_expr: &Expr,
6127    schema_cols: &[ColumnSchema],
6128    catalog: &'a Catalog,
6129    table: &'a Table,
6130    table_alias: &str,
6131    ctx: &eval::EvalContext<'_>,
6132) -> Option<Vec<Cow<'a, Row>>> {
6133    if let Expr::Binary {
6134        lhs,
6135        op: BinOp::And,
6136        rhs,
6137    } = where_expr
6138    {
6139        if let Some(rows) = try_gin_seek(lhs, schema_cols, catalog, table, table_alias, ctx) {
6140            return Some(rows);
6141        }
6142        return try_gin_seek(rhs, schema_cols, catalog, table, table_alias, ctx);
6143    }
6144    let Expr::Binary {
6145        lhs,
6146        op: BinOp::TsMatch,
6147        rhs,
6148    } = where_expr
6149    else {
6150        return None;
6151    };
6152    // Either side can be the column; pgvector idiom (`vec @@ q`)
6153    // hits the first arm, FROM-clause-derived (`plainto_tsquery($1)
6154    // q ... WHERE search_vector @@ q`) the same. CROSS JOIN derived
6155    // tables resolve `q` to a Column too.
6156    let (col_pos, query) = resolve_gin_col_query(lhs, rhs, schema_cols, table_alias, ctx)
6157        .or_else(|| resolve_gin_col_query(rhs, lhs, schema_cols, table_alias, ctx))?;
6158    let idx = table
6159        .indices()
6160        .iter()
6161        .find(|i| i.column_position == col_pos && i.is_gin())?;
6162    let candidates = gin_query_candidates(idx, &query)?;
6163    let _ = catalog; // cold-tier row resolution unused in MVP; see below.
6164    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(candidates.len());
6165    for loc in candidates {
6166        match loc {
6167            spg_storage::RowLocator::Hot(i) => {
6168                if let Some(row) = table.rows().get(i) {
6169                    out.push(Cow::Borrowed(row));
6170                }
6171            }
6172            // GIN cold-tier rows in the MVP: skipped, matching the
6173            // full-scan `@@` path which itself only iterates
6174            // `table.rows()` (hot tier). When v7.13+ adds cold-tier
6175            // scan-time materialisation for `@@`, the parallel
6176            // resolution lands here; until then both paths see the
6177            // same hot-only candidate set so correctness is preserved.
6178            spg_storage::RowLocator::Cold { .. } => {}
6179        }
6180    }
6181    Some(out)
6182}
6183
6184/// v7.15.0 — trigram-GIN-accelerated candidate seek for
6185/// `WHERE col LIKE '<pat>'` and `WHERE col ILIKE '<pat>'` when
6186/// the column has a `gin_trgm_ops` GIN index.
6187///
6188/// Walks top-level `AND` so multi-predicate WHEREs (`col LIKE
6189/// 'foo%' AND id > 1`) still hit the trigram index; the caller
6190/// re-evaluates the full WHERE per candidate row, so dropping
6191/// non-LIKE conjuncts here stays semantically correct.
6192///
6193/// Returns `None` when:
6194///   - no leaf is `col LIKE/ILIKE <literal>` on a trigram-GIN-
6195///     indexed column;
6196///   - the pattern's literal runs are too short to constrain
6197///     (pattern decomposes into `< 3`-char runs, e.g. `%ab%`);
6198///   - the pattern doesn't const-evaluate to a TEXT.
6199fn try_trgm_seek<'a>(
6200    where_expr: &Expr,
6201    schema_cols: &[ColumnSchema],
6202    table: &'a Table,
6203    table_alias: &str,
6204) -> Option<Vec<Cow<'a, Row>>> {
6205    if let Expr::Binary {
6206        lhs,
6207        op: BinOp::And,
6208        rhs,
6209    } = where_expr
6210    {
6211        if let Some(rows) = try_trgm_seek(lhs, schema_cols, table, table_alias) {
6212            return Some(rows);
6213        }
6214        return try_trgm_seek(rhs, schema_cols, table, table_alias);
6215    }
6216    // LIKE node is what carries the column reference + pattern.
6217    // ILIKE is the same AST node — PG's LIKE/ILIKE both lower
6218    // through `Expr::Like { expr, pattern, negated }`. The trigram
6219    // index posting-list keys are already lower-cased and
6220    // case-folded, so we only need the pattern's literal runs.
6221    let Expr::Like {
6222        expr, pattern, ..
6223    } = where_expr
6224    else {
6225        return None;
6226    };
6227    // Column side.
6228    let Expr::Column(c) = expr.as_ref() else {
6229        return None;
6230    };
6231    if let Some(q) = &c.qualifier
6232        && q != table_alias
6233    {
6234        return None;
6235    }
6236    let col_pos = schema_cols
6237        .iter()
6238        .position(|s| s.name.eq_ignore_ascii_case(&c.name))?;
6239    // Index must exist on that column AND be a trigram-GIN.
6240    let idx = table
6241        .indices()
6242        .iter()
6243        .find(|i| i.column_position == col_pos && i.is_gin_trgm())?;
6244    // Pattern side must be a literal TEXT — anything else (column
6245    // ref, function call, parameter that hasn't been bound yet)
6246    // falls through to full scan.
6247    let Expr::Literal(spg_sql::ast::Literal::String(pat)) = pattern.as_ref() else {
6248        return None;
6249    };
6250    let trigrams = spg_storage::trgm::trigrams_from_like_pattern(pat)?;
6251    // Intersect every trigram's posting list. Empty intersection
6252    // → empty candidate set (caller short-circuits its row loop).
6253    let mut iter = trigrams.iter();
6254    let first = iter.next()?;
6255    let mut acc: Vec<spg_storage::RowLocator> = {
6256        let mut v = idx.gin_trgm_lookup(first).to_vec();
6257        v.sort_by_key(locator_sort_key);
6258        v.dedup_by_key(|l| locator_sort_key(l));
6259        v
6260    };
6261    for tri in iter {
6262        let mut next: Vec<spg_storage::RowLocator> = idx.gin_trgm_lookup(tri).to_vec();
6263        next.sort_by_key(locator_sort_key);
6264        next.dedup_by_key(|l| locator_sort_key(l));
6265        // Sorted-merge intersection.
6266        let mut merged: Vec<spg_storage::RowLocator> = Vec::with_capacity(acc.len().min(next.len()));
6267        let (mut i, mut j) = (0usize, 0usize);
6268        while i < acc.len() && j < next.len() {
6269            let lk = locator_sort_key(&acc[i]);
6270            let rk = locator_sort_key(&next[j]);
6271            match lk.cmp(&rk) {
6272                core::cmp::Ordering::Less => i += 1,
6273                core::cmp::Ordering::Greater => j += 1,
6274                core::cmp::Ordering::Equal => {
6275                    merged.push(acc[i]);
6276                    i += 1;
6277                    j += 1;
6278                }
6279            }
6280        }
6281        acc = merged;
6282        if acc.is_empty() {
6283            break;
6284        }
6285    }
6286    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(acc.len());
6287    for loc in acc {
6288        if let spg_storage::RowLocator::Hot(i) = loc
6289            && let Some(row) = table.rows().get(i)
6290        {
6291            out.push(Cow::Borrowed(row));
6292        }
6293        // Cold-tier rows: skipped in MVP (same as try_gin_seek).
6294    }
6295    Some(out)
6296}
6297
6298/// v7.12.3 — extract `(column_position, TsQueryAst)` when one side of
6299/// the binary is a column reference to a GIN-indexed tsvector column
6300/// and the other side const-evaluates to a `Value::TsQuery`. Returns
6301/// `None` if the column reference is for the wrong table alias, or if
6302/// the RHS expression depends on row data.
6303fn resolve_gin_col_query(
6304    col_side: &Expr,
6305    query_side: &Expr,
6306    schema_cols: &[ColumnSchema],
6307    table_alias: &str,
6308    ctx: &eval::EvalContext<'_>,
6309) -> Option<(usize, spg_storage::TsQueryAst)> {
6310    let Expr::Column(c) = col_side else {
6311        return None;
6312    };
6313    if let Some(q) = &c.qualifier
6314        && q != table_alias
6315    {
6316        return None;
6317    }
6318    let pos = schema_cols.iter().position(|s| s.name == c.name)?;
6319    // Const-evaluate the query side with an empty row — fails fast
6320    // (with a `ColumnNotFound` / similar) if the expression actually
6321    // depends on row data, which is exactly the bail signal we want.
6322    let empty_row = Row::new(Vec::new());
6323    let v = eval::eval_expr(query_side, &empty_row, ctx).ok()?;
6324    let Value::TsQuery(q) = v else { return None };
6325    Some((pos, q))
6326}
6327
6328/// v7.12.3 — walk a `TsQueryAst` against an [`IndexKind::Gin`] index
6329/// to produce a candidate row-locator set. Returns `None` for query
6330/// shapes the MVP doesn't accelerate (`Not` / `Phrase` — both bail to
6331/// full scan since their semantics need either complementation across
6332/// the whole row set or positional verification beyond what the
6333/// posting list carries).
6334///
6335/// Candidate sets are over-approximate — the caller re-applies the
6336/// full `@@` predicate per row, so reporting "row was in some
6337/// posting list" without verifying positions / weights stays correct.
6338fn gin_query_candidates(
6339    idx: &spg_storage::Index,
6340    query: &spg_storage::TsQueryAst,
6341) -> Option<Vec<spg_storage::RowLocator>> {
6342    use spg_storage::TsQueryAst;
6343    match query {
6344        TsQueryAst::Term { word, .. } => {
6345            let mut v: Vec<spg_storage::RowLocator> = idx.gin_lookup_word(word).to_vec();
6346            v.sort_by_key(locator_sort_key);
6347            v.dedup_by_key(|l| locator_sort_key(l));
6348            Some(v)
6349        }
6350        TsQueryAst::And(l, r) => {
6351            let mut left = gin_query_candidates(idx, l)?;
6352            let mut right = gin_query_candidates(idx, r)?;
6353            left.sort_by_key(locator_sort_key);
6354            right.sort_by_key(locator_sort_key);
6355            // Sorted-merge intersection.
6356            let mut out: Vec<spg_storage::RowLocator> = Vec::new();
6357            let (mut i, mut j) = (0usize, 0usize);
6358            while i < left.len() && j < right.len() {
6359                let lk = locator_sort_key(&left[i]);
6360                let rk = locator_sort_key(&right[j]);
6361                match lk.cmp(&rk) {
6362                    core::cmp::Ordering::Less => i += 1,
6363                    core::cmp::Ordering::Greater => j += 1,
6364                    core::cmp::Ordering::Equal => {
6365                        out.push(left[i]);
6366                        i += 1;
6367                        j += 1;
6368                    }
6369                }
6370            }
6371            Some(out)
6372        }
6373        TsQueryAst::Or(l, r) => {
6374            let mut out = gin_query_candidates(idx, l)?;
6375            out.extend(gin_query_candidates(idx, r)?);
6376            out.sort_by_key(locator_sort_key);
6377            out.dedup_by_key(|l| locator_sort_key(l));
6378            Some(out)
6379        }
6380        // Not / Phrase bail to full scan in the MVP. Not needs
6381        // complementation against the whole row set (not represented
6382        // in the posting-list view); Phrase needs positional
6383        // verification beyond what `word → rows` carries.
6384        TsQueryAst::Not(_) | TsQueryAst::Phrase { .. } => None,
6385    }
6386}
6387
6388/// v7.12.3 — total ordering on `RowLocator` for sort/dedup purposes
6389/// inside the GIN intersection / union loops. Hot rows order by their
6390/// row index; Cold rows order after all Hot rows, then by
6391/// `(segment_id, the cold sub-key)`.
6392fn locator_sort_key(l: &spg_storage::RowLocator) -> (u8, u64, u64) {
6393    match *l {
6394        spg_storage::RowLocator::Hot(i) => (0, i as u64, 0),
6395        spg_storage::RowLocator::Cold {
6396            segment_id,
6397            page_offset,
6398        } => (1, u64::from(segment_id), u64::from(page_offset)),
6399    }
6400}
6401
6402/// v5.2.3: extract `(column_position, IndexKey)` when `where_expr`
6403/// is a simple `col = literal` predicate suitable for a `BTree` index
6404/// seek. Used by `exec_update_cancel` / `exec_delete_cancel` to
6405/// decide whether a write touches a cold-tier row (which requires
6406/// promote-on-write / shadow-on-delete) before falling through to
6407/// the hot-tier row walk.
6408///
6409/// Returns `None` for any predicate shape the planner can't push
6410/// down to an index seek — complex WHERE clauses always take the
6411/// hot-only path (cold rows are immutable to non-indexed writes
6412/// until a future scan-fanout sub-version).
6413fn try_pk_predicate(
6414    where_expr: &Expr,
6415    schema_cols: &[ColumnSchema],
6416    table_alias: &str,
6417) -> Option<(usize, IndexKey)> {
6418    let Expr::Binary {
6419        lhs,
6420        op: BinOp::Eq,
6421        rhs,
6422    } = where_expr
6423    else {
6424        return None;
6425    };
6426    let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
6427        .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
6428    let key = IndexKey::from_value(&value)?;
6429    Some((col_pos, key))
6430}
6431
6432fn resolve_col_literal_pair(
6433    col_side: &Expr,
6434    lit_side: &Expr,
6435    schema_cols: &[ColumnSchema],
6436    table_alias: &str,
6437) -> Option<(usize, Value)> {
6438    let Expr::Column(c) = col_side else {
6439        return None;
6440    };
6441    if let Some(q) = &c.qualifier
6442        && q != table_alias
6443    {
6444        return None;
6445    }
6446    let pos = schema_cols.iter().position(|s| s.name == c.name)?;
6447    let Expr::Literal(l) = lit_side else {
6448        return None;
6449    };
6450    let v = match l {
6451        Literal::Integer(n) => {
6452            if let Ok(small) = i32::try_from(*n) {
6453                Value::Int(small)
6454            } else {
6455                Value::BigInt(*n)
6456            }
6457        }
6458        Literal::Float(x) => Value::Float(*x),
6459        Literal::String(s) => Value::Text(s.clone()),
6460        Literal::Bool(b) => Value::Bool(*b),
6461        Literal::Null => Value::Null,
6462        // Vector and Interval literals can't be used as B-tree index keys.
6463        // Tell the planner to fall back to full-scan.
6464        Literal::Vector(_) | Literal::Interval { .. } => return None,
6465    };
6466    Some((pos, v))
6467}
6468
6469/// Find the schema entry that a SELECT-list `Expr::Column` refers to.
6470/// Mirrors `resolve_column` in `eval.rs`, but returns a proper
6471/// `EngineError` so the projection-build path keeps `UnknownQualifier`
6472/// vs `ColumnNotFound` distinct.
6473fn resolve_projection_column<'a>(
6474    c: &ColumnName,
6475    schema_cols: &'a [ColumnSchema],
6476    table_alias: &str,
6477) -> Result<&'a ColumnSchema, EngineError> {
6478    if let Some(q) = &c.qualifier {
6479        let composite = alloc::format!("{q}.{name}", name = c.name);
6480        if let Some(s) = schema_cols.iter().find(|s| s.name == composite) {
6481            return Ok(s);
6482        }
6483        // Single-table case: the qualifier may equal the active alias —
6484        // then look for the bare column name.
6485        if q == table_alias
6486            && let Some(s) = schema_cols.iter().find(|s| s.name == c.name)
6487        {
6488            return Ok(s);
6489        }
6490        // For multi-table schemas the qualifier is unknown only if no
6491        // column bears the "<q>." prefix. For single-table, the alias
6492        // mismatch alone is enough.
6493        let prefix = alloc::format!("{q}.");
6494        let qualifier_known =
6495            q == table_alias || schema_cols.iter().any(|s| s.name.starts_with(&prefix));
6496        if !qualifier_known {
6497            return Err(EngineError::Eval(EvalError::UnknownQualifier {
6498                qualifier: q.clone(),
6499            }));
6500        }
6501        return Err(EngineError::Eval(EvalError::ColumnNotFound {
6502            name: c.name.clone(),
6503        }));
6504    }
6505    if let Some(s) = schema_cols.iter().find(|s| s.name == c.name) {
6506        return Ok(s);
6507    }
6508    let suffix = alloc::format!(".{name}", name = c.name);
6509    let mut matches = schema_cols.iter().filter(|s| s.name.ends_with(&suffix));
6510    let first = matches.next();
6511    let extra = matches.next();
6512    match (first, extra) {
6513        (Some(s), None) => Ok(s),
6514        (Some(_), Some(_)) => Err(EngineError::Eval(EvalError::TypeMismatch {
6515            detail: alloc::format!("ambiguous column reference: {}", c.name),
6516        })),
6517        _ => Err(EngineError::Eval(EvalError::ColumnNotFound {
6518            name: c.name.clone(),
6519        })),
6520    }
6521}
6522
6523fn build_projection(
6524    items: &[SelectItem],
6525    schema_cols: &[ColumnSchema],
6526    table_alias: &str,
6527) -> Result<Vec<ProjectedItem>, EngineError> {
6528    let mut out = Vec::new();
6529    for item in items {
6530        match item {
6531            SelectItem::Wildcard => {
6532                for col in schema_cols {
6533                    out.push(ProjectedItem {
6534                        expr: Expr::Column(ColumnName {
6535                            qualifier: None,
6536                            name: col.name.clone(),
6537                        }),
6538                        output_name: col.name.clone(),
6539                        ty: col.ty,
6540                        nullable: col.nullable,
6541                    });
6542                }
6543            }
6544            SelectItem::Expr { expr, alias } => {
6545                // Plain column ref keeps full schema info (real type +
6546                // nullability). Compound expressions evaluate fine but have
6547                // no static type — surface them as nullable TEXT, which is
6548                // what most clients render anyway.
6549                if let Expr::Column(c) = expr {
6550                    let sch = resolve_projection_column(c, schema_cols, table_alias)?;
6551                    let output_name = alias.clone().unwrap_or_else(|| c.name.clone());
6552                    out.push(ProjectedItem {
6553                        expr: expr.clone(),
6554                        output_name,
6555                        ty: sch.ty,
6556                        nullable: sch.nullable,
6557                    });
6558                } else {
6559                    let output_name = alias.clone().unwrap_or_else(|| expr.to_string());
6560                    out.push(ProjectedItem {
6561                        expr: expr.clone(),
6562                        output_name,
6563                        ty: DataType::Text,
6564                        nullable: true,
6565                    });
6566                }
6567            }
6568        }
6569    }
6570    Ok(out)
6571}
6572
6573/// Promote an integer to a NUMERIC value at the requested scale.
6574/// Rejects values that, after scaling, would overflow the column's
6575/// precision budget.
6576fn numeric_from_integer(
6577    n: i128,
6578    precision: u8,
6579    scale: u8,
6580    col_name: &str,
6581) -> Result<Value, EngineError> {
6582    let factor = pow10_i128(scale);
6583    let scaled = n.checked_mul(factor).ok_or_else(|| {
6584        EngineError::Unsupported(alloc::format!(
6585            "integer overflow scaling value for column `{col_name}` to scale {scale}"
6586        ))
6587    })?;
6588    check_precision(scaled, precision, col_name)?;
6589    Ok(Value::Numeric { scaled, scale })
6590}
6591
6592/// Float → NUMERIC. Uses round-half-away-from-zero on `x * 10^scale`,
6593/// then verifies the result fits the column's precision.
6594#[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)]
6595fn numeric_from_float(
6596    x: f64,
6597    precision: u8,
6598    scale: u8,
6599    col_name: &str,
6600) -> Result<Value, EngineError> {
6601    if !x.is_finite() {
6602        return Err(EngineError::Unsupported(alloc::format!(
6603            "cannot store non-finite float in NUMERIC column `{col_name}`"
6604        )));
6605    }
6606    let mut factor = 1.0_f64;
6607    for _ in 0..scale {
6608        factor *= 10.0;
6609    }
6610    // Round half-away-from-zero by biasing then casting (`as i128`
6611    // truncates toward zero, so the bias + truncation gives the
6612    // desired rounding). `f64::floor` / `ceil` live in std; we don't
6613    // need them — the cast handles the truncation step.
6614    let shifted = x * factor;
6615    let biased = if shifted >= 0.0 {
6616        shifted + 0.5
6617    } else {
6618        shifted - 0.5
6619    };
6620    // Range-check before casting back to i128 — the cast itself is
6621    // saturating in Rust, which would silently truncate huge inputs.
6622    if !(-1e38..=1e38).contains(&biased) {
6623        return Err(EngineError::Unsupported(alloc::format!(
6624            "value {x} overflows NUMERIC range for column `{col_name}`"
6625        )));
6626    }
6627    let scaled = biased as i128;
6628    check_precision(scaled, precision, col_name)?;
6629    Ok(Value::Numeric { scaled, scale })
6630}
6631
6632/// Move a Numeric value from `src_scale` to `dst_scale`. Going up
6633/// multiplies by 10; going down rounds half-away-from-zero.
6634fn numeric_rescale(
6635    scaled: i128,
6636    src_scale: u8,
6637    precision: u8,
6638    dst_scale: u8,
6639    col_name: &str,
6640) -> Result<Value, EngineError> {
6641    let new_scaled = if dst_scale >= src_scale {
6642        let bump = pow10_i128(dst_scale - src_scale);
6643        scaled.checked_mul(bump).ok_or_else(|| {
6644            EngineError::Unsupported(alloc::format!(
6645                "overflow rescaling NUMERIC for column `{col_name}`"
6646            ))
6647        })?
6648    } else {
6649        let drop = pow10_i128(src_scale - dst_scale);
6650        let half = drop / 2;
6651        if scaled >= 0 {
6652            (scaled + half) / drop
6653        } else {
6654            (scaled - half) / drop
6655        }
6656    };
6657    check_precision(new_scaled, precision, col_name)?;
6658    Ok(Value::Numeric {
6659        scaled: new_scaled,
6660        scale: dst_scale,
6661    })
6662}
6663
6664/// Drop the fractional part of a scaled integer, returning the integer
6665/// portion (toward zero). Used for NUMERIC → INT casts.
6666const fn numeric_truncate_to_integer(scaled: i128, scale: u8) -> i128 {
6667    if scale == 0 {
6668        return scaled;
6669    }
6670    let factor = pow10_i128_const(scale);
6671    scaled / factor
6672}
6673
6674/// Verify a scaled NUMERIC value fits the column's declared precision.
6675/// `precision == 0` is the "unconstrained" form (bare `NUMERIC`); we
6676/// skip the check there.
6677fn check_precision(scaled: i128, precision: u8, col_name: &str) -> Result<(), EngineError> {
6678    if precision == 0 {
6679        return Ok(());
6680    }
6681    let limit = pow10_i128(precision);
6682    if scaled.unsigned_abs() >= limit.unsigned_abs() {
6683        return Err(EngineError::Unsupported(alloc::format!(
6684            "NUMERIC value exceeds precision {precision} for column `{col_name}`"
6685        )));
6686    }
6687    Ok(())
6688}
6689
6690const fn pow10_i128_const(p: u8) -> i128 {
6691    let mut acc: i128 = 1;
6692    let mut i = 0;
6693    while i < p {
6694        acc *= 10;
6695        i += 1;
6696    }
6697    acc
6698}
6699
6700fn pow10_i128(p: u8) -> i128 {
6701    pow10_i128_const(p)
6702}
6703
6704/// Walk a parsed `Statement`, swapping any `NOW()` /
6705/// `CURRENT_TIMESTAMP()` / `CURRENT_DATE()` function calls for a
6706/// literal cast that wraps the engine's per-statement clock reading.
6707/// When `now_micros` is `None`, calls stay as-is and surface as
6708/// `unknown function` at eval time — keeps the error path explicit.
6709/// v4.10: pre-walk the WHERE / projection / etc. of a SELECT and
6710/// replace every subquery node with a materialised literal. SPG
6711/// only supports uncorrelated subqueries — the inner SELECT does
6712/// not see outer-row columns, so the result is the same for every
6713/// outer row and can be evaluated once.
6714///
6715/// Returns the rewritten statement; the caller passes this to the
6716/// regular row-loop executor which no longer sees Subquery nodes
6717/// in its tree.
6718impl Engine {
6719    /// v4.12 window executor. Implements `ROW_NUMBER` / `RANK` /
6720    /// `DENSE_RANK` and the partition-aware aggregates `SUM` /
6721    /// `AVG` / `COUNT` / `MIN` / `MAX`. The plan is:
6722    /// 1. Apply the WHERE filter.
6723    /// 2. For each unique `WindowFunction` node in the projection,
6724    ///    partition + sort, compute the per-row value.
6725    /// 3. Append the window values as synthetic columns (`__win_N`)
6726    ///    to the row schema.
6727    /// 4. Rewrite the projection to read those columns.
6728    /// 5. Hand off to the regular project / ORDER BY / LIMIT pipe.
6729    #[allow(
6730        clippy::too_many_lines,
6731        clippy::type_complexity,
6732        clippy::needless_range_loop
6733    )] // window-eval is one cohesive pipe; splitting fragments
6734    fn exec_select_with_window(
6735        &self,
6736        stmt: &SelectStatement,
6737        cancel: CancelToken<'_>,
6738    ) -> Result<QueryResult, EngineError> {
6739        let from = stmt.from.as_ref().ok_or_else(|| {
6740            EngineError::Unsupported("window functions require a FROM clause".into())
6741        })?;
6742        // For v4.12 we only support a single-table FROM. Joins +
6743        // windows is queued for v5.x.
6744        if !from.joins.is_empty() {
6745            return Err(EngineError::Unsupported(
6746                "JOIN with window functions not yet supported".into(),
6747            ));
6748        }
6749        let primary = &from.primary;
6750        let table = self.active_catalog().get(&primary.name).ok_or_else(|| {
6751            StorageError::TableNotFound {
6752                name: primary.name.clone(),
6753            }
6754        })?;
6755        let alias = primary.alias.as_deref().unwrap_or(primary.name.as_str());
6756        let schema_cols = &table.schema().columns;
6757        let ctx = self.ev_ctx(schema_cols, Some(alias));
6758
6759        // 1) Filter pass.
6760        let mut filtered: Vec<&Row> = Vec::new();
6761        for (i, row) in table.rows().iter().enumerate() {
6762            if i.is_multiple_of(256) {
6763                cancel.check()?;
6764            }
6765            if let Some(w) = &stmt.where_ {
6766                let cond = eval::eval_expr(w, row, &ctx)?;
6767                if !matches!(cond, Value::Bool(true)) {
6768                    continue;
6769                }
6770            }
6771            filtered.push(row);
6772        }
6773        let n_rows = filtered.len();
6774
6775        // 2) Collect unique window function nodes from projection.
6776        let mut window_nodes: Vec<Expr> = Vec::new();
6777        for item in &stmt.items {
6778            if let SelectItem::Expr { expr, .. } = item {
6779                collect_window_nodes(expr, &mut window_nodes);
6780            }
6781        }
6782
6783        // 3) For each window, compute per-row value.
6784        // Index: same order as window_nodes; for row i, win_vals[w][i].
6785        let mut win_vals: Vec<Vec<Value>> = Vec::with_capacity(window_nodes.len());
6786        for wnode in &window_nodes {
6787            let Expr::WindowFunction {
6788                name,
6789                args,
6790                partition_by,
6791                order_by,
6792                frame,
6793                null_treatment,
6794            } = wnode
6795            else {
6796                unreachable!("collect_window_nodes pushes only WindowFunction");
6797            };
6798            // Compute (partition_key, order_key, original_index) for each row.
6799            let mut indexed: Vec<(Vec<Value>, Vec<(Value, bool)>, usize)> =
6800                Vec::with_capacity(n_rows);
6801            for (i, row) in filtered.iter().enumerate() {
6802                let pkey: Vec<Value> = partition_by
6803                    .iter()
6804                    .map(|p| eval::eval_expr(p, row, &ctx))
6805                    .collect::<Result<_, _>>()?;
6806                let okey: Vec<(Value, bool)> = order_by
6807                    .iter()
6808                    .map(|(e, desc)| eval::eval_expr(e, row, &ctx).map(|v| (v, *desc)))
6809                    .collect::<Result<_, _>>()?;
6810                indexed.push((pkey, okey, i));
6811            }
6812            // Sort by (partition_key, order_key). Partition key uses
6813            // a stable encoded form; order key respects ASC/DESC.
6814            indexed.sort_by(|a, b| {
6815                let p_cmp = partition_key_cmp(&a.0, &b.0);
6816                if p_cmp != core::cmp::Ordering::Equal {
6817                    return p_cmp;
6818                }
6819                order_key_cmp(&a.1, &b.1)
6820            });
6821            // Per-partition compute.
6822            let mut out_vals: Vec<Value> = alloc::vec![Value::Null; n_rows];
6823            let mut p_start = 0;
6824            while p_start < indexed.len() {
6825                let mut p_end = p_start + 1;
6826                while p_end < indexed.len()
6827                    && partition_key_cmp(&indexed[p_start].0, &indexed[p_end].0)
6828                        == core::cmp::Ordering::Equal
6829                {
6830                    p_end += 1;
6831                }
6832                // Compute the function within this partition slice.
6833                compute_window_partition(
6834                    name,
6835                    args,
6836                    !order_by.is_empty(),
6837                    frame.as_ref(),
6838                    *null_treatment,
6839                    &indexed[p_start..p_end],
6840                    &filtered,
6841                    &ctx,
6842                    &mut out_vals,
6843                )?;
6844                p_start = p_end;
6845            }
6846            win_vals.push(out_vals);
6847        }
6848
6849        // 4) Build extended schema: original columns + synthetic.
6850        let mut ext_cols = schema_cols.clone();
6851        for i in 0..window_nodes.len() {
6852            ext_cols.push(ColumnSchema::new(
6853                alloc::format!("__win_{i}"),
6854                DataType::Text, // type doesn't matter for projection eval
6855                true,
6856            ));
6857        }
6858        // 5) Build extended rows: each row gets its window values appended.
6859        let mut ext_rows: Vec<Row> = Vec::with_capacity(n_rows);
6860        for i in 0..n_rows {
6861            let mut values = filtered[i].values.clone();
6862            for w in 0..window_nodes.len() {
6863                values.push(win_vals[w][i].clone());
6864            }
6865            ext_rows.push(Row::new(values));
6866        }
6867        // 6) Rewrite the projection: WindowFunction nodes → Column(__win_N).
6868        let mut rewritten_items: Vec<SelectItem> = Vec::with_capacity(stmt.items.len());
6869        for item in &stmt.items {
6870            let new_item = match item {
6871                SelectItem::Wildcard => SelectItem::Wildcard,
6872                SelectItem::Expr { expr, alias } => {
6873                    let mut e = expr.clone();
6874                    rewrite_window_to_columns(&mut e, &window_nodes);
6875                    SelectItem::Expr {
6876                        expr: e,
6877                        alias: alias.clone(),
6878                    }
6879                }
6880            };
6881            rewritten_items.push(new_item);
6882        }
6883
6884        // 7) Project into final rows.
6885        let ext_ctx = EvalContext::new(&ext_cols, Some(alias));
6886        let projection = build_projection(&rewritten_items, &ext_cols, alias)?;
6887        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::with_capacity(n_rows);
6888        for (i, row) in ext_rows.iter().enumerate() {
6889            if i.is_multiple_of(256) {
6890                cancel.check()?;
6891            }
6892            let mut values = Vec::with_capacity(projection.len());
6893            for p in &projection {
6894                values.push(eval::eval_expr(&p.expr, row, &ext_ctx)?);
6895            }
6896            let order_keys = if stmt.order_by.is_empty() {
6897                Vec::new()
6898            } else {
6899                let mut keys = Vec::with_capacity(stmt.order_by.len());
6900                for o in &stmt.order_by {
6901                    let mut e = o.expr.clone();
6902                    rewrite_window_to_columns(&mut e, &window_nodes);
6903                    let key = eval::eval_expr(&e, row, &ext_ctx)?;
6904                    keys.push(value_to_order_key(&key)?);
6905                }
6906                keys
6907            };
6908            tagged.push((order_keys, Row::new(values)));
6909        }
6910        // ORDER BY + LIMIT/OFFSET on the projected rows.
6911        if !stmt.order_by.is_empty() {
6912            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
6913            sort_by_keys(&mut tagged, &descs);
6914        }
6915        let mut out_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
6916        apply_offset_and_limit(&mut out_rows, stmt.offset_literal(), stmt.limit_literal());
6917        let final_cols: Vec<ColumnSchema> = projection
6918            .into_iter()
6919            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
6920            .collect();
6921        Ok(QueryResult::Rows {
6922            columns: final_cols,
6923            rows: out_rows,
6924        })
6925    }
6926
6927    /// v4.11: materialise each CTE into a temp table inside a
6928    /// cloned catalog, then run the body SELECT against a fresh
6929    /// engine instance that owns the enriched catalog. The clone
6930    /// is moderately expensive — only paid by CTE-bearing queries.
6931    /// Subqueries inside CTE bodies / the main body resolve as
6932    /// usual; `clock_fn` is propagated so `NOW()` lines up.
6933    fn exec_with_ctes(
6934        &self,
6935        stmt: &SelectStatement,
6936        cancel: CancelToken<'_>,
6937    ) -> Result<QueryResult, EngineError> {
6938        cancel.check()?;
6939        let mut catalog = self.active_catalog().clone();
6940        for cte in &stmt.ctes {
6941            if catalog.get(&cte.name).is_some() {
6942                return Err(EngineError::Unsupported(alloc::format!(
6943                    "CTE name {:?} shadows an existing table; rename the CTE",
6944                    cte.name
6945                )));
6946            }
6947            let (columns, rows) = if cte.recursive {
6948                self.materialise_recursive_cte(cte, &catalog, cancel)?
6949            } else {
6950                let body_result = self.exec_select_cancel(&cte.body, cancel)?;
6951                let QueryResult::Rows { columns, rows } = body_result else {
6952                    return Err(EngineError::Unsupported(alloc::format!(
6953                        "CTE {:?} body did not return rows",
6954                        cte.name
6955                    )));
6956                };
6957                (columns, rows)
6958            };
6959            // v4.22: the projection builder labels any non-column
6960            // expression as Text — including literal SELECT 1.
6961            // Promote each column's type to whatever the rows
6962            // actually carry so the CTE storage table accepts them.
6963            let inferred = infer_column_types(&columns, &rows);
6964            let mut columns = inferred;
6965            // v4.22: apply optional `WITH name(a, b, c)` overrides.
6966            if !cte.column_overrides.is_empty() {
6967                if cte.column_overrides.len() != columns.len() {
6968                    return Err(EngineError::Unsupported(alloc::format!(
6969                        "CTE {:?} column list has {} names but body returns {} columns",
6970                        cte.name,
6971                        cte.column_overrides.len(),
6972                        columns.len()
6973                    )));
6974                }
6975                for (col, name) in columns.iter_mut().zip(cte.column_overrides.iter()) {
6976                    col.name.clone_from(name);
6977                }
6978            }
6979            let schema = TableSchema::new(cte.name.clone(), columns);
6980            catalog.create_table(schema).map_err(EngineError::Storage)?;
6981            let table = catalog
6982                .get_mut(&cte.name)
6983                .expect("just-created CTE table must exist");
6984            for row in rows {
6985                table.insert(row).map_err(EngineError::Storage)?;
6986            }
6987        }
6988        // Strip CTEs from the body before running on the temp engine
6989        // so we don't recurse forever.
6990        let mut body = stmt.clone();
6991        body.ctes = Vec::new();
6992        let mut temp = Engine::restore(catalog);
6993        if let Some(c) = self.clock {
6994            temp = temp.with_clock(c);
6995        }
6996        if let Some(f) = self.salt_fn {
6997            temp = temp.with_salt_fn(f);
6998        }
6999        temp.exec_select_cancel(&body, cancel)
7000    }
7001
7002    /// v4.22: materialise a WITH RECURSIVE CTE. The body must be a
7003    /// UNION (or UNION ALL) of an anchor that does not reference
7004    /// the CTE name, and one or more recursive terms that do. The
7005    /// anchor runs first; each subsequent iteration runs the
7006    /// recursive term against a temp catalog where the CTE name is
7007    /// bound to the *previous* iteration's output. Iteration stops
7008    /// when the recursive term yields no rows; UNION (DISTINCT)
7009    /// deduplicates against the accumulated result, UNION ALL does
7010    /// not. A hard cap on total rows prevents runaway queries.
7011    #[allow(clippy::too_many_lines)]
7012    fn materialise_recursive_cte(
7013        &self,
7014        cte: &spg_sql::ast::Cte,
7015        base_catalog: &Catalog,
7016        cancel: CancelToken<'_>,
7017    ) -> Result<(Vec<ColumnSchema>, Vec<Row>), EngineError> {
7018        const MAX_TOTAL_ROWS: usize = 1_000_000;
7019        const MAX_ITERATIONS: usize = 100_000;
7020        cancel.check()?;
7021        if cte.body.unions.is_empty() {
7022            return Err(EngineError::Unsupported(alloc::format!(
7023                "WITH RECURSIVE {:?} body must be a UNION of an anchor and a recursive term",
7024                cte.name
7025            )));
7026        }
7027        // Anchor: the body's leading SELECT, with unions stripped.
7028        let mut anchor = cte.body.clone();
7029        let union_terms = core::mem::take(&mut anchor.unions);
7030        anchor.ctes = Vec::new();
7031        // Anchor must not reference the CTE name.
7032        if select_refers_to(&anchor, &cte.name) {
7033            return Err(EngineError::Unsupported(alloc::format!(
7034                "WITH RECURSIVE {:?}: the anchor must not reference the CTE itself",
7035                cte.name
7036            )));
7037        }
7038        let anchor_result = self.exec_select_cancel(&anchor, cancel)?;
7039        let QueryResult::Rows {
7040            columns: anchor_cols,
7041            rows: anchor_rows,
7042        } = anchor_result
7043        else {
7044            return Err(EngineError::Unsupported(alloc::format!(
7045                "WITH RECURSIVE {:?}: anchor did not return rows",
7046                cte.name
7047            )));
7048        };
7049        // The projection builder labels non-column expressions Text;
7050        // refine column types from the anchor's actual values so the
7051        // intermediate iter-catalog tables accept them.
7052        let mut columns = infer_column_types(&anchor_cols, &anchor_rows);
7053        if !cte.column_overrides.is_empty() {
7054            if cte.column_overrides.len() != columns.len() {
7055                return Err(EngineError::Unsupported(alloc::format!(
7056                    "CTE {:?} column list has {} names but anchor returns {} columns",
7057                    cte.name,
7058                    cte.column_overrides.len(),
7059                    columns.len()
7060                )));
7061            }
7062            for (col, name) in columns.iter_mut().zip(cte.column_overrides.iter()) {
7063                col.name.clone_from(name);
7064            }
7065        }
7066        let mut all_rows: Vec<Row> = anchor_rows.clone();
7067        let mut working_set: Vec<Row> = anchor_rows;
7068        let mut seen: alloc::collections::BTreeSet<Vec<u8>> = alloc::collections::BTreeSet::new();
7069        // Track at least one "all UNION ALL" flag — if every union
7070        // kind is ALL we skip the dedup step (faster + matches PG).
7071        let all_union_all = union_terms.iter().all(|(k, _)| matches!(k, UnionKind::All));
7072        if !all_union_all {
7073            for r in &all_rows {
7074                seen.insert(encode_row_key(r));
7075            }
7076        }
7077        for iter in 0..MAX_ITERATIONS {
7078            cancel.check()?;
7079            if working_set.is_empty() {
7080                break;
7081            }
7082            // Build a fresh catalog: base + CTE bound to working_set.
7083            let mut iter_catalog = base_catalog.clone();
7084            let schema = TableSchema::new(cte.name.clone(), columns.clone());
7085            iter_catalog
7086                .create_table(schema)
7087                .map_err(EngineError::Storage)?;
7088            {
7089                let table = iter_catalog.get_mut(&cte.name).expect("just-created");
7090                for row in &working_set {
7091                    table.insert(row.clone()).map_err(EngineError::Storage)?;
7092                }
7093            }
7094            let mut iter_engine = Engine::restore(iter_catalog);
7095            if let Some(c) = self.clock {
7096                iter_engine = iter_engine.with_clock(c);
7097            }
7098            if let Some(f) = self.salt_fn {
7099                iter_engine = iter_engine.with_salt_fn(f);
7100            }
7101            // Run each recursive term in sequence and collect new rows.
7102            let mut next_set: Vec<Row> = Vec::new();
7103            for (_, term) in &union_terms {
7104                let mut term = term.clone();
7105                term.ctes = Vec::new();
7106                let r = iter_engine.exec_select_cancel(&term, cancel)?;
7107                let QueryResult::Rows {
7108                    columns: rc,
7109                    rows: rs,
7110                } = r
7111                else {
7112                    return Err(EngineError::Unsupported(alloc::format!(
7113                        "WITH RECURSIVE {:?}: recursive term did not return rows",
7114                        cte.name
7115                    )));
7116                };
7117                if rc.len() != columns.len() {
7118                    return Err(EngineError::Unsupported(alloc::format!(
7119                        "WITH RECURSIVE {:?}: column count of recursive term ({}) does not match anchor ({})",
7120                        cte.name,
7121                        rc.len(),
7122                        columns.len()
7123                    )));
7124                }
7125                for row in rs {
7126                    if !all_union_all {
7127                        let key = encode_row_key(&row);
7128                        if !seen.insert(key) {
7129                            continue;
7130                        }
7131                    }
7132                    next_set.push(row);
7133                }
7134            }
7135            if next_set.is_empty() {
7136                break;
7137            }
7138            all_rows.extend(next_set.iter().cloned());
7139            working_set = next_set;
7140            if all_rows.len() > MAX_TOTAL_ROWS {
7141                return Err(EngineError::Unsupported(alloc::format!(
7142                    "WITH RECURSIVE {:?}: produced more than {MAX_TOTAL_ROWS} rows — likely runaway recursion",
7143                    cte.name
7144                )));
7145            }
7146            if iter + 1 == MAX_ITERATIONS {
7147                return Err(EngineError::Unsupported(alloc::format!(
7148                    "WITH RECURSIVE {:?}: exceeded {MAX_ITERATIONS} iterations",
7149                    cte.name
7150                )));
7151            }
7152        }
7153        Ok((columns, all_rows))
7154    }
7155
7156    fn resolve_select_subqueries(
7157        &self,
7158        stmt: &mut SelectStatement,
7159        cancel: CancelToken<'_>,
7160    ) -> Result<(), EngineError> {
7161        for item in &mut stmt.items {
7162            if let SelectItem::Expr { expr, .. } = item {
7163                self.resolve_expr_subqueries(expr, cancel)?;
7164            }
7165        }
7166        if let Some(w) = &mut stmt.where_ {
7167            self.resolve_expr_subqueries(w, cancel)?;
7168        }
7169        if let Some(gs) = &mut stmt.group_by {
7170            for g in gs {
7171                self.resolve_expr_subqueries(g, cancel)?;
7172            }
7173        }
7174        if let Some(h) = &mut stmt.having {
7175            self.resolve_expr_subqueries(h, cancel)?;
7176        }
7177        for o in &mut stmt.order_by {
7178            self.resolve_expr_subqueries(&mut o.expr, cancel)?;
7179        }
7180        for (_, peer) in &mut stmt.unions {
7181            self.resolve_select_subqueries(peer, cancel)?;
7182        }
7183        Ok(())
7184    }
7185
7186    #[allow(clippy::only_used_in_recursion)] // engine handle reads aren't really pure
7187    fn resolve_expr_subqueries(
7188        &self,
7189        e: &mut Expr,
7190        cancel: CancelToken<'_>,
7191    ) -> Result<(), EngineError> {
7192        // Replace-on-this-node cases first.
7193        if let Some(replacement) = self.subquery_replacement(e, cancel)? {
7194            *e = replacement;
7195            return Ok(());
7196        }
7197        match e {
7198            Expr::Binary { lhs, rhs, .. } => {
7199                self.resolve_expr_subqueries(lhs, cancel)?;
7200                self.resolve_expr_subqueries(rhs, cancel)?;
7201            }
7202            Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7203                self.resolve_expr_subqueries(expr, cancel)?;
7204            }
7205            Expr::FunctionCall { args, .. } => {
7206                for a in args {
7207                    self.resolve_expr_subqueries(a, cancel)?;
7208                }
7209            }
7210            Expr::Like { expr, pattern, .. } => {
7211                self.resolve_expr_subqueries(expr, cancel)?;
7212                self.resolve_expr_subqueries(pattern, cancel)?;
7213            }
7214            Expr::Extract { source, .. } => self.resolve_expr_subqueries(source, cancel)?,
7215            // v4.12 window functions — recurse into args + ORDER BY
7216            // + PARTITION BY in case they carry inner subqueries.
7217            Expr::WindowFunction {
7218                args,
7219                partition_by,
7220                order_by,
7221                ..
7222            } => {
7223                for a in args {
7224                    self.resolve_expr_subqueries(a, cancel)?;
7225                }
7226                for p in partition_by {
7227                    self.resolve_expr_subqueries(p, cancel)?;
7228                }
7229                for (e, _) in order_by {
7230                    self.resolve_expr_subqueries(e, cancel)?;
7231                }
7232            }
7233            // Subquery nodes are handled in subquery_replacement
7234            // (which returned None — defensive no-op); Literal /
7235            // Column are leaves.
7236            Expr::ScalarSubquery(_)
7237            | Expr::Exists { .. }
7238            | Expr::InSubquery { .. }
7239            | Expr::Literal(_)
7240            | Expr::Placeholder(_)
7241            | Expr::Column(_) => {}
7242            // v7.10.10 — recurse children.
7243            Expr::Array(items) => {
7244                for elem in items {
7245                    self.resolve_expr_subqueries(elem, cancel)?;
7246                }
7247            }
7248            Expr::ArraySubscript { target, index } => {
7249                self.resolve_expr_subqueries(target, cancel)?;
7250                self.resolve_expr_subqueries(index, cancel)?;
7251            }
7252            Expr::AnyAll { expr, array, .. } => {
7253                self.resolve_expr_subqueries(expr, cancel)?;
7254                self.resolve_expr_subqueries(array, cancel)?;
7255            }
7256            Expr::Case {
7257                operand,
7258                branches,
7259                else_branch,
7260            } => {
7261                if let Some(o) = operand {
7262                    self.resolve_expr_subqueries(o, cancel)?;
7263                }
7264                for (w, t) in branches {
7265                    self.resolve_expr_subqueries(w, cancel)?;
7266                    self.resolve_expr_subqueries(t, cancel)?;
7267                }
7268                if let Some(e) = else_branch {
7269                    self.resolve_expr_subqueries(e, cancel)?;
7270                }
7271            }
7272        }
7273        Ok(())
7274    }
7275
7276    /// v4.23: per-row eval that handles correlated subqueries.
7277    /// Equivalent to `eval::eval_expr` when the expression has no
7278    /// subqueries; otherwise clones the expression, substitutes
7279    /// outer-row columns into each surviving subquery node, runs
7280    /// the inner SELECT, and replaces the node with the literal
7281    /// result. Only the WHERE-filter call sites use this path so
7282    /// the uncorrelated fast path is preserved everywhere else.
7283    fn eval_expr_with_correlated(
7284        &self,
7285        expr: &Expr,
7286        row: &Row,
7287        ctx: &EvalContext<'_>,
7288        cancel: CancelToken<'_>,
7289        memo: Option<&mut memoize::MemoizeCache>,
7290    ) -> Result<Value, EngineError> {
7291        if !expr_has_subquery(expr) {
7292            return eval::eval_expr(expr, row, ctx).map_err(EngineError::Eval);
7293        }
7294        let mut e = expr.clone();
7295        self.resolve_correlated_in_expr(&mut e, row, ctx, cancel, memo)?;
7296        eval::eval_expr(&e, row, ctx).map_err(EngineError::Eval)
7297    }
7298
7299    fn resolve_correlated_in_expr(
7300        &self,
7301        e: &mut Expr,
7302        row: &Row,
7303        ctx: &EvalContext<'_>,
7304        cancel: CancelToken<'_>,
7305        mut memo: Option<&mut memoize::MemoizeCache>,
7306    ) -> Result<(), EngineError> {
7307        match e {
7308            Expr::ScalarSubquery(inner) => {
7309                // v6.2.6 — Memoize: build the cache key from the
7310                // pre-substitution subquery repr + the outer row's
7311                // values. Two outer rows with identical correlated
7312                // values hit the same entry.
7313                let cache_key = memo.as_ref().map(|_| memoize::CacheKey {
7314                    subquery_repr: alloc::format!("{}", **inner),
7315                    outer_values: row.values.clone(),
7316                });
7317                if let (Some(cache), Some(k)) = (memo.as_deref_mut(), cache_key.as_ref())
7318                    && let Some(cached) = cache.get(k)
7319                {
7320                    *e = value_to_literal_expr(cached)?;
7321                    return Ok(());
7322                }
7323                let mut s = (**inner).clone();
7324                substitute_outer_columns(&mut s, row, ctx);
7325                let r = self.exec_select_cancel(&s, cancel)?;
7326                let QueryResult::Rows { rows, .. } = r else {
7327                    return Err(EngineError::Unsupported(
7328                        "scalar subquery: inner did not return rows".into(),
7329                    ));
7330                };
7331                let value = match rows.as_slice() {
7332                    [] => Value::Null,
7333                    [r0] => r0.values.first().cloned().unwrap_or(Value::Null),
7334                    _ => {
7335                        return Err(EngineError::Unsupported(alloc::format!(
7336                            "scalar subquery returned {} rows; expected 0 or 1",
7337                            rows.len()
7338                        )));
7339                    }
7340                };
7341                if let (Some(cache), Some(k)) = (memo.as_deref_mut(), cache_key) {
7342                    cache.insert(k, value.clone());
7343                }
7344                *e = value_to_literal_expr(value)?;
7345            }
7346            Expr::Exists { subquery, negated } => {
7347                let mut s = (**subquery).clone();
7348                substitute_outer_columns(&mut s, row, ctx);
7349                let r = self.exec_select_cancel(&s, cancel)?;
7350                let exists = matches!(r, QueryResult::Rows { rows, .. } if !rows.is_empty());
7351                let bit = if *negated { !exists } else { exists };
7352                *e = Expr::Literal(Literal::Bool(bit));
7353            }
7354            Expr::InSubquery {
7355                expr: lhs,
7356                subquery,
7357                negated,
7358            } => {
7359                self.resolve_correlated_in_expr(lhs, row, ctx, cancel, memo.as_deref_mut())?;
7360                let lhs_val = eval::eval_expr(lhs, row, ctx).map_err(EngineError::Eval)?;
7361                let mut s = (**subquery).clone();
7362                substitute_outer_columns(&mut s, row, ctx);
7363                let r = self.exec_select_cancel(&s, cancel)?;
7364                let QueryResult::Rows { columns, rows, .. } = r else {
7365                    return Err(EngineError::Unsupported(
7366                        "IN-subquery: inner did not return rows".into(),
7367                    ));
7368                };
7369                if columns.len() != 1 {
7370                    return Err(EngineError::Unsupported(alloc::format!(
7371                        "IN-subquery must project exactly one column; got {}",
7372                        columns.len()
7373                    )));
7374                }
7375                let mut found = false;
7376                let mut any_null = false;
7377                for r0 in rows {
7378                    let v = r0.values.into_iter().next().unwrap_or(Value::Null);
7379                    if v.is_null() {
7380                        any_null = true;
7381                        continue;
7382                    }
7383                    if value_cmp(&v, &lhs_val) == core::cmp::Ordering::Equal {
7384                        found = true;
7385                        break;
7386                    }
7387                }
7388                let bit = if found {
7389                    !*negated
7390                } else if any_null {
7391                    return Err(EngineError::Unsupported(
7392                        "IN-subquery with NULL in result and no match: NULL semantics not yet implemented".into(),
7393                    ));
7394                } else {
7395                    *negated
7396                };
7397                *e = Expr::Literal(Literal::Bool(bit));
7398            }
7399            Expr::Binary { lhs, rhs, .. } => {
7400                self.resolve_correlated_in_expr(lhs, row, ctx, cancel, memo.as_deref_mut())?;
7401                self.resolve_correlated_in_expr(rhs, row, ctx, cancel, memo.as_deref_mut())?;
7402            }
7403            Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7404                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
7405            }
7406            Expr::Like { expr, pattern, .. } => {
7407                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
7408                self.resolve_correlated_in_expr(pattern, row, ctx, cancel, memo.as_deref_mut())?;
7409            }
7410            Expr::FunctionCall { args, .. } => {
7411                for a in args {
7412                    self.resolve_correlated_in_expr(a, row, ctx, cancel, memo.as_deref_mut())?;
7413                }
7414            }
7415            Expr::Extract { source, .. } => {
7416                self.resolve_correlated_in_expr(source, row, ctx, cancel, memo.as_deref_mut())?;
7417            }
7418            Expr::WindowFunction { .. }
7419            | Expr::Literal(_)
7420            | Expr::Placeholder(_)
7421            | Expr::Column(_) => {}
7422            // v7.10.10 — recurse children.
7423            Expr::Array(items) => {
7424                for elem in items {
7425                    self.resolve_correlated_in_expr(elem, row, ctx, cancel, memo.as_deref_mut())?;
7426                }
7427            }
7428            Expr::ArraySubscript { target, index } => {
7429                self.resolve_correlated_in_expr(target, row, ctx, cancel, memo.as_deref_mut())?;
7430                self.resolve_correlated_in_expr(index, row, ctx, cancel, memo.as_deref_mut())?;
7431            }
7432            Expr::AnyAll { expr, array, .. } => {
7433                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
7434                self.resolve_correlated_in_expr(array, row, ctx, cancel, memo.as_deref_mut())?;
7435            }
7436            Expr::Case {
7437                operand,
7438                branches,
7439                else_branch,
7440            } => {
7441                if let Some(o) = operand {
7442                    self.resolve_correlated_in_expr(o, row, ctx, cancel, memo.as_deref_mut())?;
7443                }
7444                for (w, t) in branches {
7445                    self.resolve_correlated_in_expr(w, row, ctx, cancel, memo.as_deref_mut())?;
7446                    self.resolve_correlated_in_expr(t, row, ctx, cancel, memo.as_deref_mut())?;
7447                }
7448                if let Some(e) = else_branch {
7449                    self.resolve_correlated_in_expr(e, row, ctx, cancel, memo.as_deref_mut())?;
7450                }
7451            }
7452        }
7453        Ok(())
7454    }
7455
7456    fn subquery_replacement(
7457        &self,
7458        e: &Expr,
7459        cancel: CancelToken<'_>,
7460    ) -> Result<Option<Expr>, EngineError> {
7461        match e {
7462            Expr::ScalarSubquery(inner) => {
7463                let mut s = (**inner).clone();
7464                // Recurse into the inner SELECT first so nested
7465                // subqueries materialise bottom-up.
7466                self.resolve_select_subqueries(&mut s, cancel)?;
7467                let r = match self.exec_bare_select_cancel(&s, cancel) {
7468                    Ok(r) => r,
7469                    Err(e) if is_correlation_error(&e) => return Ok(None),
7470                    Err(e) => return Err(e),
7471                };
7472                let QueryResult::Rows { rows, .. } = r else {
7473                    return Err(EngineError::Unsupported(
7474                        "scalar subquery: inner statement did not return rows".into(),
7475                    ));
7476                };
7477                let value = match rows.as_slice() {
7478                    [] => Value::Null,
7479                    [row] => row.values.first().cloned().unwrap_or(Value::Null),
7480                    _ => {
7481                        return Err(EngineError::Unsupported(alloc::format!(
7482                            "scalar subquery returned {} rows; expected 0 or 1",
7483                            rows.len()
7484                        )));
7485                    }
7486                };
7487                Ok(Some(value_to_literal_expr(value)?))
7488            }
7489            Expr::Exists { subquery, negated } => {
7490                let mut s = (**subquery).clone();
7491                self.resolve_select_subqueries(&mut s, cancel)?;
7492                let r = match self.exec_bare_select_cancel(&s, cancel) {
7493                    Ok(r) => r,
7494                    Err(e) if is_correlation_error(&e) => return Ok(None),
7495                    Err(e) => return Err(e),
7496                };
7497                let exists = match r {
7498                    QueryResult::Rows { rows, .. } => !rows.is_empty(),
7499                    QueryResult::CommandOk { .. } => false,
7500                };
7501                let bit = if *negated { !exists } else { exists };
7502                Ok(Some(Expr::Literal(Literal::Bool(bit))))
7503            }
7504            Expr::InSubquery {
7505                expr,
7506                subquery,
7507                negated,
7508            } => {
7509                let mut s = (**subquery).clone();
7510                self.resolve_select_subqueries(&mut s, cancel)?;
7511                let r = match self.exec_bare_select_cancel(&s, cancel) {
7512                    Ok(r) => r,
7513                    Err(e) if is_correlation_error(&e) => return Ok(None),
7514                    Err(e) => return Err(e),
7515                };
7516                let QueryResult::Rows { columns, rows, .. } = r else {
7517                    return Err(EngineError::Unsupported(
7518                        "IN-subquery: inner statement did not return rows".into(),
7519                    ));
7520                };
7521                if columns.len() != 1 {
7522                    return Err(EngineError::Unsupported(alloc::format!(
7523                        "IN-subquery must project exactly one column; got {}",
7524                        columns.len()
7525                    )));
7526                }
7527                // Build the same OR-Eq chain the parse-time literal-list
7528                // path constructs, with each value lifted into a Literal.
7529                let mut acc: Option<Expr> = None;
7530                for row in rows {
7531                    let v = row.values.into_iter().next().unwrap_or(Value::Null);
7532                    let lit = value_to_literal_expr(v)?;
7533                    let cmp = Expr::Binary {
7534                        lhs: expr.clone(),
7535                        op: BinOp::Eq,
7536                        rhs: Box::new(lit),
7537                    };
7538                    acc = Some(match acc {
7539                        None => cmp,
7540                        Some(prev) => Expr::Binary {
7541                            lhs: Box::new(prev),
7542                            op: BinOp::Or,
7543                            rhs: Box::new(cmp),
7544                        },
7545                    });
7546                }
7547                let combined = acc.unwrap_or(Expr::Literal(Literal::Bool(false)));
7548                let final_expr = if *negated {
7549                    Expr::Unary {
7550                        op: UnOp::Not,
7551                        expr: Box::new(combined),
7552                    }
7553                } else {
7554                    combined
7555                };
7556                Ok(Some(final_expr))
7557            }
7558            _ => Ok(None),
7559        }
7560    }
7561}
7562
7563// ---- v4.12 window-function helpers ----
7564// The (partition-key, order-key, original-index) tuple shape used
7565// across these helpers is intrinsic to the planner. Factoring it
7566// into a typedef adds indirection without making the code clearer,
7567// so several lints are allowed inline on the affected functions
7568// rather than module-wide.
7569
7570/// v4.22: cheap structural scan for `FROM <name>` (qualified or
7571/// not) inside a SELECT — used to verify the anchor of a WITH
7572/// RECURSIVE CTE doesn't recurse into itself. Conservative: walks
7573/// FROM joins, subqueries, and unions.
7574fn select_refers_to(stmt: &SelectStatement, target: &str) -> bool {
7575    if let Some(from) = &stmt.from
7576        && from_refers_to(from, target)
7577    {
7578        return true;
7579    }
7580    for (_, peer) in &stmt.unions {
7581        if select_refers_to(peer, target) {
7582            return true;
7583        }
7584    }
7585    for item in &stmt.items {
7586        if let SelectItem::Expr { expr, .. } = item
7587            && expr_refers_to(expr, target)
7588        {
7589            return true;
7590        }
7591    }
7592    if let Some(w) = &stmt.where_
7593        && expr_refers_to(w, target)
7594    {
7595        return true;
7596    }
7597    false
7598}
7599
7600fn from_refers_to(from: &FromClause, target: &str) -> bool {
7601    if from.primary.name.eq_ignore_ascii_case(target) {
7602        return true;
7603    }
7604    from.joins
7605        .iter()
7606        .any(|j| j.table.name.eq_ignore_ascii_case(target))
7607}
7608
7609fn expr_refers_to(e: &Expr, target: &str) -> bool {
7610    match e {
7611        Expr::ScalarSubquery(s) => select_refers_to(s, target),
7612        Expr::Exists { subquery, .. } | Expr::InSubquery { subquery, .. } => {
7613            select_refers_to(subquery, target)
7614        }
7615        Expr::Binary { lhs, rhs, .. } => expr_refers_to(lhs, target) || expr_refers_to(rhs, target),
7616        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7617            expr_refers_to(expr, target)
7618        }
7619        Expr::Like { expr, pattern, .. } => {
7620            expr_refers_to(expr, target) || expr_refers_to(pattern, target)
7621        }
7622        Expr::FunctionCall { args, .. } => args.iter().any(|a| expr_refers_to(a, target)),
7623        Expr::Extract { source, .. } => expr_refers_to(source, target),
7624        Expr::WindowFunction {
7625            args,
7626            partition_by,
7627            order_by,
7628            ..
7629        } => {
7630            args.iter().any(|a| expr_refers_to(a, target))
7631                || partition_by.iter().any(|p| expr_refers_to(p, target))
7632                || order_by.iter().any(|(o, _)| expr_refers_to(o, target))
7633        }
7634        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => false,
7635        Expr::Array(items) => items.iter().any(|e| expr_refers_to(e, target)),
7636        Expr::ArraySubscript { target: t, index } => {
7637            expr_refers_to(t, target) || expr_refers_to(index, target)
7638        }
7639        Expr::AnyAll { expr, array, .. } => {
7640            expr_refers_to(expr, target) || expr_refers_to(array, target)
7641        }
7642        Expr::Case {
7643            operand,
7644            branches,
7645            else_branch,
7646        } => {
7647            operand.as_deref().is_some_and(|o| expr_refers_to(o, target))
7648                || branches
7649                    .iter()
7650                    .any(|(w, t)| expr_refers_to(w, target) || expr_refers_to(t, target))
7651                || else_branch
7652                    .as_deref()
7653                    .is_some_and(|e| expr_refers_to(e, target))
7654        }
7655    }
7656}
7657
7658/// v4.22: pick more specific column types from observed rows when
7659/// the projection builder defaulted to Text (the v1.x behavior for
7660/// non-column expressions). Lets `WITH t(n) AS (SELECT 1 ...)`
7661/// land an Int column in the CTE storage table rather than failing
7662/// the insert with "expected TEXT, got INT".
7663fn infer_column_types(columns: &[ColumnSchema], rows: &[Row]) -> Vec<ColumnSchema> {
7664    let mut out = columns.to_vec();
7665    for (col_idx, col) in out.iter_mut().enumerate() {
7666        if col.ty != DataType::Text {
7667            continue;
7668        }
7669        let mut inferred: Option<DataType> = None;
7670        let mut all_null = true;
7671        for row in rows {
7672            let Some(v) = row.values.get(col_idx) else {
7673                continue;
7674            };
7675            let ty = match v {
7676                Value::Null => continue,
7677                Value::SmallInt(_) => DataType::SmallInt,
7678                Value::Int(_) => DataType::Int,
7679                Value::BigInt(_) => DataType::BigInt,
7680                Value::Float(_) => DataType::Float,
7681                Value::Bool(_) => DataType::Bool,
7682                Value::Vector(_) => DataType::Vector {
7683                    dim: 0,
7684                    encoding: VecEncoding::F32,
7685                },
7686                _ => DataType::Text,
7687            };
7688            all_null = false;
7689            inferred = Some(match inferred {
7690                None => ty,
7691                Some(prev) if prev == ty => prev,
7692                Some(_) => DataType::Text,
7693            });
7694        }
7695        if let Some(t) = inferred {
7696            col.ty = t;
7697            col.nullable = true;
7698        } else if all_null {
7699            col.nullable = true;
7700        }
7701    }
7702    out
7703}
7704
7705/// v4.26: render a human-readable plan tree for `EXPLAIN <select>`.
7706/// Lines are pushed into `out`; `depth` controls indentation. We
7707/// describe the rewritten SELECT — what the executor *would* do —
7708/// using the engine handle to spot indexed lookups and table shapes.
7709#[allow(clippy::too_many_lines, clippy::format_push_string)]
7710/// v6.2.4 — Walk every line of the rendered plan tree and append
7711/// per-operator stats. Lines that name a known operator get
7712/// `(rows=N)` (`actual_rows` of the top-level operator equals the
7713/// final result row count; scans report their catalog row count
7714/// as the rows-considered metric). Other lines — Filter / Join /
7715/// GroupBy / OrderBy etc. — are marked `(—)` so the surface is
7716/// complete-by-construction; v6.2.5 fills these in via inline
7717/// executor counters.
7718/// v6.8.3 — surface "CREATE INDEX …" suggestions for every
7719/// `(table, column)` pair the query touches via WHERE / JOIN
7720/// that doesn't already have an index on the owning table.
7721/// Walks the SELECT's FROM clauses + WHERE expression tree;
7722/// returns one line per missing index. Deterministic order:
7723/// FROM-clause iteration order, then column-reference walk
7724/// order inside each WHERE. Each suggestion is a copy-pastable
7725/// DDL string.
7726fn build_index_suggestions(stmt: &SelectStatement, engine: &Engine) -> Vec<String> {
7727    use alloc::collections::BTreeSet;
7728    let mut seen: BTreeSet<(String, String)> = BTreeSet::new();
7729    let mut out: Vec<String> = Vec::new();
7730    let cat = engine.active_catalog();
7731    // Build a (table, qualifier-or-alias) list from the FROM clause
7732    // so unqualified column refs in WHERE resolve to the correct
7733    // table.
7734    let Some(from) = &stmt.from else {
7735        return out;
7736    };
7737    let mut tables: Vec<String> = Vec::new();
7738    tables.push(from.primary.name.clone());
7739    for j in &from.joins {
7740        tables.push(j.table.name.clone());
7741    }
7742    // Collect column refs from the WHERE expression. JOIN ON
7743    // predicates also feed in.
7744    let mut col_refs: Vec<spg_sql::ast::ColumnName> = Vec::new();
7745    if let Some(w) = &stmt.where_ {
7746        collect_column_refs(w, &mut col_refs);
7747    }
7748    for j in &from.joins {
7749        if let Some(on) = &j.on {
7750            collect_column_refs(on, &mut col_refs);
7751        }
7752    }
7753    for cn in &col_refs {
7754        // Resolve owner table: explicit qualifier first, else
7755        // first table in FROM that has a column of this name.
7756        let owner: Option<String> = if let Some(q) = &cn.qualifier {
7757            tables.iter().find(|t| t == &q).cloned()
7758        } else {
7759            tables.iter().find_map(|t| {
7760                cat.get(t).and_then(|tbl| {
7761                    if tbl.schema().column_position(&cn.name).is_some() {
7762                        Some(t.clone())
7763                    } else {
7764                        None
7765                    }
7766                })
7767            })
7768        };
7769        let Some(owner) = owner else {
7770            continue;
7771        };
7772        let Some(tbl) = cat.get(&owner) else {
7773            continue;
7774        };
7775        let Some(col_pos) = tbl.schema().column_position(&cn.name) else {
7776            continue;
7777        };
7778        // Skip if any BTree index already covers this column as
7779        // its key.
7780        let already_indexed = tbl.indices().iter().any(|i| {
7781            matches!(i.kind, spg_storage::IndexKind::BTree(_))
7782                && i.column_position == col_pos
7783                && i.expression.is_none()
7784                && i.partial_predicate.is_none()
7785        });
7786        if already_indexed {
7787            continue;
7788        }
7789        if seen.insert((owner.clone(), cn.name.clone())) {
7790            out.push(alloc::format!(
7791                "SUGGEST: CREATE INDEX ix_{}_{} ON {} ({})",
7792                owner,
7793                cn.name,
7794                owner,
7795                cn.name
7796            ));
7797        }
7798    }
7799    out
7800}
7801
7802/// Walks an `Expr` and pushes every `ColumnName` it references.
7803/// Order is depth-first, left-to-right.
7804fn collect_column_refs(expr: &Expr, out: &mut Vec<spg_sql::ast::ColumnName>) {
7805    match expr {
7806        Expr::Column(cn) => out.push(cn.clone()),
7807        Expr::FunctionCall { args, .. } => {
7808            for a in args {
7809                collect_column_refs(a, out);
7810            }
7811        }
7812        Expr::Binary { lhs, rhs, .. } => {
7813            collect_column_refs(lhs, out);
7814            collect_column_refs(rhs, out);
7815        }
7816        Expr::Unary { expr: e, .. } => collect_column_refs(e, out),
7817        _ => {}
7818    }
7819}
7820
7821fn annotate_explain_lines(lines: &mut [String], total_rows: usize, engine: &Engine) {
7822    let catalog = engine.active_catalog();
7823    let cold_ids = catalog.cold_segment_ids_global();
7824    let any_cold = !cold_ids.is_empty();
7825    let cold_ids_repr = if any_cold {
7826        let mut s = alloc::string::String::from("[");
7827        for (i, id) in cold_ids.iter().enumerate() {
7828            if i > 0 {
7829                s.push(',');
7830            }
7831            s.push_str(&alloc::format!("{id}"));
7832        }
7833        s.push(']');
7834        s
7835    } else {
7836        alloc::string::String::new()
7837    };
7838    for (idx, line) in lines.iter_mut().enumerate() {
7839        let trimmed = line.trim_start();
7840        let is_top_level = idx == 0;
7841        if is_top_level {
7842            line.push_str(&alloc::format!(" (rows={total_rows})"));
7843            continue;
7844        }
7845        if let Some(rest) = trimmed.strip_prefix("From: ") {
7846            let (name, scan_kind) = match rest.split_once(" [") {
7847                Some((n, k)) => (n.trim(), k.trim_end_matches(']')),
7848                None => (rest.trim(), ""),
7849            };
7850            let bare = name.split_whitespace().next().unwrap_or(name);
7851            let hot = catalog.get(bare).map(|t| t.rows().len());
7852            // v6.2.7 — `cold_segments=[id0,id1,…]` enumerates every
7853            // cold-tier segment the scan COULD have walked. v6.2.x
7854            // can tighten to per-table by walking the table's
7855            // BTree-index cold locators.
7856            let annot = match (hot, scan_kind) {
7857                (Some(h), "full scan") => {
7858                    let mut s = alloc::format!(" (hot_rows={h}");
7859                    if any_cold {
7860                        s.push_str(&alloc::format!(
7861                            ", cold_tier=present, cold_segments={cold_ids_repr}"
7862                        ));
7863                    }
7864                    s.push(')');
7865                    s
7866                }
7867                (Some(h), "index seek") => {
7868                    let mut s = alloc::format!(" (hot_rows≤{h}");
7869                    if any_cold {
7870                        s.push_str(&alloc::format!(
7871                            ", cold_tier=present, cold_segments={cold_ids_repr}"
7872                        ));
7873                    }
7874                    s.push(')');
7875                    s
7876                }
7877                _ => " (rows=—)".to_string(),
7878            };
7879            line.push_str(&annot);
7880            continue;
7881        }
7882        // Filter / GroupBy / Having / OrderBy / Limit / Join etc.
7883        line.push_str(" (rows=—)");
7884    }
7885}
7886
7887fn explain_select(stmt: &SelectStatement, engine: &Engine, depth: usize, out: &mut Vec<String>) {
7888    let pad = "  ".repeat(depth);
7889    // 1) Top-level operator label.
7890    let top = if !stmt.ctes.is_empty() {
7891        if stmt.ctes.iter().any(|c| c.recursive) {
7892            "CTEScan (WITH RECURSIVE)"
7893        } else {
7894            "CTEScan (WITH)"
7895        }
7896    } else if !stmt.unions.is_empty() {
7897        "UnionScan"
7898    } else if select_has_window(stmt) {
7899        "WindowAgg"
7900    } else if aggregate::uses_aggregate(stmt) {
7901        "Aggregate"
7902    } else if stmt.distinct {
7903        "Distinct"
7904    } else if stmt.from.is_some() {
7905        "TableScan"
7906    } else {
7907        "Result"
7908    };
7909    out.push(alloc::format!("{pad}{top}"));
7910    let child = "  ".repeat(depth + 1);
7911    // 2) CTE bodies.
7912    for cte in &stmt.ctes {
7913        let head = if cte.recursive {
7914            alloc::format!("{child}CTE (recursive): {}", cte.name)
7915        } else {
7916            alloc::format!("{child}CTE: {}", cte.name)
7917        };
7918        out.push(head);
7919        explain_select(&cte.body, engine, depth + 2, out);
7920    }
7921    // 3) FROM details — primary table + joins, index hits.
7922    if let Some(from) = &stmt.from {
7923        let mut tag = alloc::format!("{child}From: {}", from.primary.name);
7924        if let Some(alias) = &from.primary.alias {
7925            tag.push_str(&alloc::format!(" AS {alias}"));
7926        }
7927        // Try to detect an index-seek opportunity on WHERE against
7928        // the primary table — same heuristic the executor uses.
7929        if let Some(w) = &stmt.where_
7930            && let Some(table) = engine.active_catalog().get(&from.primary.name)
7931        {
7932            let alias = from.primary.alias.as_deref().unwrap_or(&from.primary.name);
7933            let cols = &table.schema().columns;
7934            if try_index_seek(w, cols, engine.active_catalog(), table, alias).is_some() {
7935                tag.push_str(" [index seek]");
7936            } else {
7937                tag.push_str(" [full scan]");
7938            }
7939        } else {
7940            tag.push_str(" [full scan]");
7941        }
7942        out.push(tag);
7943        for j in &from.joins {
7944            let kind = match j.kind {
7945                spg_sql::ast::JoinKind::Inner => "INNER JOIN",
7946                spg_sql::ast::JoinKind::Left => "LEFT JOIN",
7947                spg_sql::ast::JoinKind::Cross => "CROSS JOIN",
7948            };
7949            let mut s = alloc::format!("{child}{kind}: {}", j.table.name);
7950            if let Some(alias) = &j.table.alias {
7951                s.push_str(&alloc::format!(" AS {alias}"));
7952            }
7953            if j.on.is_some() {
7954                s.push_str(" (ON …)");
7955            }
7956            out.push(s);
7957        }
7958    }
7959    // 4) WHERE / GROUP BY / HAVING / ORDER BY / LIMIT / OFFSET.
7960    if let Some(w) = &stmt.where_ {
7961        let mut s = alloc::format!("{child}Filter: {w}");
7962        if expr_has_subquery(w) {
7963            s.push_str(" [subquery]");
7964        }
7965        out.push(s);
7966    }
7967    if let Some(gs) = &stmt.group_by {
7968        let mut parts = Vec::new();
7969        for g in gs {
7970            parts.push(alloc::format!("{g}"));
7971        }
7972        out.push(alloc::format!("{child}GroupBy: {}", parts.join(", ")));
7973    }
7974    if let Some(h) = &stmt.having {
7975        out.push(alloc::format!("{child}Having: {h}"));
7976    }
7977    for o in &stmt.order_by {
7978        let dir = if o.desc { "DESC" } else { "ASC" };
7979        out.push(alloc::format!("{child}OrderBy: {} {dir}", o.expr));
7980    }
7981    if let Some(lim) = stmt.limit {
7982        out.push(alloc::format!("{child}Limit: {lim}"));
7983    }
7984    if let Some(off) = stmt.offset {
7985        out.push(alloc::format!("{child}Offset: {off}"));
7986    }
7987    // 5) Projection — collapse Wildcard or render N items.
7988    if stmt
7989        .items
7990        .iter()
7991        .any(|it| matches!(it, SelectItem::Wildcard))
7992    {
7993        out.push(alloc::format!("{child}Project: *"));
7994    } else {
7995        out.push(alloc::format!(
7996            "{child}Project: {} item(s)",
7997            stmt.items.len()
7998        ));
7999    }
8000    // 6) Recurse into UNION peers.
8001    for (kind, peer) in &stmt.unions {
8002        let label = match kind {
8003            UnionKind::All => "UNION ALL",
8004            UnionKind::Distinct => "UNION",
8005        };
8006        out.push(alloc::format!("{child}{label}"));
8007        explain_select(peer, engine, depth + 2, out);
8008    }
8009}
8010
8011/// v4.23: recognise the engine errors that indicate the inner
8012/// SELECT couldn't be evaluated in isolation because it references
8013/// an outer column — used by `subquery_replacement` to skip
8014/// materialisation and let row-eval handle it instead.
8015fn is_correlation_error(e: &EngineError) -> bool {
8016    matches!(
8017        e,
8018        EngineError::Eval(
8019            eval::EvalError::ColumnNotFound { .. } | eval::EvalError::UnknownQualifier { .. }
8020        )
8021    )
8022}
8023
8024/// v4.23: walk every Expr in `stmt` and replace each Column ref
8025/// that targets the outer scope (qualifier matches the outer
8026/// table alias) with a Literal carrying the outer row's value.
8027/// Conservative: only qualified refs are substituted, so the user
8028/// must write `outer_alias.col` to reference an outer column. This
8029/// matches PG's lexical scoping for correlated subqueries and
8030/// avoids accidentally rebinding inner columns of the same name.
8031fn substitute_outer_columns(stmt: &mut SelectStatement, row: &Row, ctx: &EvalContext<'_>) {
8032    let Some(outer_alias) = ctx.table_alias else {
8033        return;
8034    };
8035    substitute_in_select(stmt, row, ctx, outer_alias);
8036}
8037
8038fn substitute_in_select(
8039    stmt: &mut SelectStatement,
8040    row: &Row,
8041    ctx: &EvalContext<'_>,
8042    outer_alias: &str,
8043) {
8044    for item in &mut stmt.items {
8045        if let SelectItem::Expr { expr, .. } = item {
8046            substitute_in_expr(expr, row, ctx, outer_alias);
8047        }
8048    }
8049    if let Some(w) = &mut stmt.where_ {
8050        substitute_in_expr(w, row, ctx, outer_alias);
8051    }
8052    if let Some(gs) = &mut stmt.group_by {
8053        for g in gs {
8054            substitute_in_expr(g, row, ctx, outer_alias);
8055        }
8056    }
8057    if let Some(h) = &mut stmt.having {
8058        substitute_in_expr(h, row, ctx, outer_alias);
8059    }
8060    for o in &mut stmt.order_by {
8061        substitute_in_expr(&mut o.expr, row, ctx, outer_alias);
8062    }
8063    for (_, peer) in &mut stmt.unions {
8064        substitute_in_select(peer, row, ctx, outer_alias);
8065    }
8066}
8067
8068fn substitute_in_expr(e: &mut Expr, row: &Row, ctx: &EvalContext<'_>, outer_alias: &str) {
8069    if let Expr::Column(c) = e
8070        && let Some(qual) = &c.qualifier
8071        && qual.eq_ignore_ascii_case(outer_alias)
8072    {
8073        // Look up the column's index in the outer schema.
8074        if let Some(idx) = ctx
8075            .columns
8076            .iter()
8077            .position(|sc| sc.name.eq_ignore_ascii_case(&c.name))
8078        {
8079            let v = row.values.get(idx).cloned().unwrap_or(Value::Null);
8080            if let Ok(lit) = value_to_literal_expr(v) {
8081                *e = lit;
8082                return;
8083            }
8084        }
8085    }
8086    match e {
8087        Expr::Binary { lhs, rhs, .. } => {
8088            substitute_in_expr(lhs, row, ctx, outer_alias);
8089            substitute_in_expr(rhs, row, ctx, outer_alias);
8090        }
8091        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
8092            substitute_in_expr(expr, row, ctx, outer_alias);
8093        }
8094        Expr::Like { expr, pattern, .. } => {
8095            substitute_in_expr(expr, row, ctx, outer_alias);
8096            substitute_in_expr(pattern, row, ctx, outer_alias);
8097        }
8098        Expr::FunctionCall { args, .. } => {
8099            for a in args {
8100                substitute_in_expr(a, row, ctx, outer_alias);
8101            }
8102        }
8103        Expr::Extract { source, .. } => substitute_in_expr(source, row, ctx, outer_alias),
8104        Expr::WindowFunction {
8105            args,
8106            partition_by,
8107            order_by,
8108            ..
8109        } => {
8110            for a in args {
8111                substitute_in_expr(a, row, ctx, outer_alias);
8112            }
8113            for p in partition_by {
8114                substitute_in_expr(p, row, ctx, outer_alias);
8115            }
8116            for (o, _) in order_by {
8117                substitute_in_expr(o, row, ctx, outer_alias);
8118            }
8119        }
8120        Expr::ScalarSubquery(s) => substitute_in_select(s, row, ctx, outer_alias),
8121        Expr::Exists { subquery, .. } | Expr::InSubquery { subquery, .. } => {
8122            substitute_in_select(subquery, row, ctx, outer_alias);
8123        }
8124        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
8125        Expr::Array(items) => {
8126            for elem in items {
8127                substitute_in_expr(elem, row, ctx, outer_alias);
8128            }
8129        }
8130        Expr::ArraySubscript { target, index } => {
8131            substitute_in_expr(target, row, ctx, outer_alias);
8132            substitute_in_expr(index, row, ctx, outer_alias);
8133        }
8134        Expr::AnyAll { expr, array, .. } => {
8135            substitute_in_expr(expr, row, ctx, outer_alias);
8136            substitute_in_expr(array, row, ctx, outer_alias);
8137        }
8138        Expr::Case {
8139            operand,
8140            branches,
8141            else_branch,
8142        } => {
8143            if let Some(o) = operand {
8144                substitute_in_expr(o, row, ctx, outer_alias);
8145            }
8146            for (w, t) in branches {
8147                substitute_in_expr(w, row, ctx, outer_alias);
8148                substitute_in_expr(t, row, ctx, outer_alias);
8149            }
8150            if let Some(e) = else_branch {
8151                substitute_in_expr(e, row, ctx, outer_alias);
8152            }
8153        }
8154    }
8155}
8156
8157/// v4.22: encode a Row to a comparable byte key for UNION-DISTINCT
8158/// dedup inside the recursive iteration. Crude but deterministic
8159/// — Debug prints embed type discriminants so NULL ≠ "" ≠ 0.
8160fn encode_row_key(row: &Row) -> Vec<u8> {
8161    let mut out = Vec::new();
8162    for v in &row.values {
8163        let s = alloc::format!("{v:?}|");
8164        out.extend_from_slice(s.as_bytes());
8165    }
8166    out
8167}
8168
8169fn select_has_window(stmt: &SelectStatement) -> bool {
8170    for item in &stmt.items {
8171        if let SelectItem::Expr { expr, .. } = item
8172            && expr_has_window(expr)
8173        {
8174            return true;
8175        }
8176    }
8177    false
8178}
8179
8180fn expr_has_window(e: &Expr) -> bool {
8181    match e {
8182        Expr::WindowFunction { .. } => true,
8183        Expr::Binary { lhs, rhs, .. } => expr_has_window(lhs) || expr_has_window(rhs),
8184        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
8185            expr_has_window(expr)
8186        }
8187        Expr::FunctionCall { args, .. } => args.iter().any(expr_has_window),
8188        Expr::Like { expr, pattern, .. } => expr_has_window(expr) || expr_has_window(pattern),
8189        Expr::Extract { source, .. } => expr_has_window(source),
8190        Expr::ScalarSubquery(_)
8191        | Expr::Exists { .. }
8192        | Expr::InSubquery { .. }
8193        | Expr::Literal(_)
8194        | Expr::Placeholder(_)
8195        | Expr::Column(_) => false,
8196        Expr::Array(items) => items.iter().any(expr_has_window),
8197        Expr::ArraySubscript { target, index } => expr_has_window(target) || expr_has_window(index),
8198        Expr::AnyAll { expr, array, .. } => expr_has_window(expr) || expr_has_window(array),
8199        Expr::Case {
8200            operand,
8201            branches,
8202            else_branch,
8203        } => {
8204            operand.as_deref().is_some_and(expr_has_window)
8205                || branches
8206                    .iter()
8207                    .any(|(w, t)| expr_has_window(w) || expr_has_window(t))
8208                || else_branch.as_deref().is_some_and(expr_has_window)
8209        }
8210    }
8211}
8212
8213fn collect_window_nodes(e: &Expr, out: &mut Vec<Expr>) {
8214    if let Expr::WindowFunction { .. } = e {
8215        // Deduplicate by structural equality on the expression
8216        // (cheap because window args + partition + order are
8217        // small). Without dedup we'd recompute identical windows
8218        // once per occurrence in the projection.
8219        if !out.iter().any(|x| x == e) {
8220            out.push(e.clone());
8221        }
8222        return;
8223    }
8224    match e {
8225        // Already handled by the early-return at the top.
8226        Expr::WindowFunction { .. } => unreachable!(),
8227        Expr::Binary { lhs, rhs, .. } => {
8228            collect_window_nodes(lhs, out);
8229            collect_window_nodes(rhs, out);
8230        }
8231        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
8232            collect_window_nodes(expr, out);
8233        }
8234        Expr::FunctionCall { args, .. } => {
8235            for a in args {
8236                collect_window_nodes(a, out);
8237            }
8238        }
8239        Expr::Like { expr, pattern, .. } => {
8240            collect_window_nodes(expr, out);
8241            collect_window_nodes(pattern, out);
8242        }
8243        Expr::Extract { source, .. } => collect_window_nodes(source, out),
8244        _ => {}
8245    }
8246}
8247
8248fn rewrite_window_to_columns(e: &mut Expr, window_nodes: &[Expr]) {
8249    if let Expr::WindowFunction { .. } = e
8250        && let Some(idx) = window_nodes.iter().position(|w| w == e)
8251    {
8252        *e = Expr::Column(spg_sql::ast::ColumnName {
8253            qualifier: None,
8254            name: alloc::format!("__win_{idx}"),
8255        });
8256        return;
8257    }
8258    match e {
8259        Expr::Binary { lhs, rhs, .. } => {
8260            rewrite_window_to_columns(lhs, window_nodes);
8261            rewrite_window_to_columns(rhs, window_nodes);
8262        }
8263        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
8264            rewrite_window_to_columns(expr, window_nodes);
8265        }
8266        Expr::FunctionCall { args, .. } => {
8267            for a in args {
8268                rewrite_window_to_columns(a, window_nodes);
8269            }
8270        }
8271        Expr::Like { expr, pattern, .. } => {
8272            rewrite_window_to_columns(expr, window_nodes);
8273            rewrite_window_to_columns(pattern, window_nodes);
8274        }
8275        Expr::Extract { source, .. } => rewrite_window_to_columns(source, window_nodes),
8276        _ => {}
8277    }
8278}
8279
8280/// Total order over partition-key tuples. NULL sorts as the
8281/// lowest value (matches the `<` partial order's NULL-last
8282/// behaviour with `INFINITY` flipped).
8283fn partition_key_cmp(a: &[Value], b: &[Value]) -> core::cmp::Ordering {
8284    for (x, y) in a.iter().zip(b.iter()) {
8285        let c = value_cmp(x, y);
8286        if c != core::cmp::Ordering::Equal {
8287            return c;
8288        }
8289    }
8290    a.len().cmp(&b.len())
8291}
8292
8293fn order_key_cmp(a: &[(Value, bool)], b: &[(Value, bool)]) -> core::cmp::Ordering {
8294    for ((va, desc), (vb, _)) in a.iter().zip(b.iter()) {
8295        let c = value_cmp(va, vb);
8296        let c = if *desc { c.reverse() } else { c };
8297        if c != core::cmp::Ordering::Equal {
8298            return c;
8299        }
8300    }
8301    a.len().cmp(&b.len())
8302}
8303
8304#[allow(clippy::match_same_arms)] // explicit arms per type document the supported pairs
8305fn value_cmp(a: &Value, b: &Value) -> core::cmp::Ordering {
8306    use core::cmp::Ordering;
8307    match (a, b) {
8308        (Value::Null, Value::Null) => Ordering::Equal,
8309        (Value::Null, _) => Ordering::Less,
8310        (_, Value::Null) => Ordering::Greater,
8311        (Value::Int(x), Value::Int(y)) => x.cmp(y),
8312        (Value::BigInt(x), Value::BigInt(y)) => x.cmp(y),
8313        (Value::SmallInt(x), Value::SmallInt(y)) => x.cmp(y),
8314        (Value::Text(x), Value::Text(y)) => x.cmp(y),
8315        (Value::Bool(x), Value::Bool(y)) => x.cmp(y),
8316        (Value::Float(x), Value::Float(y)) => x.partial_cmp(y).unwrap_or(Ordering::Equal),
8317        (Value::Date(x), Value::Date(y)) => x.cmp(y),
8318        (Value::Timestamp(x), Value::Timestamp(y)) => x.cmp(y),
8319        // Cross-type compare: fall back to the debug rendering —
8320        // same-partition is the goal, exact order is irrelevant.
8321        _ => alloc::format!("{a:?}").cmp(&alloc::format!("{b:?}")),
8322    }
8323}
8324
8325/// Compute the window function's per-row output for one partition.
8326/// `slice` has (partition key, order key, original-row-index)
8327/// tuples already sorted by order key. `filtered_rows` is the
8328/// full row list indexed by original-row-index. `out_vals` is
8329/// the destination, also indexed by original-row-index.
8330#[allow(
8331    clippy::too_many_arguments,
8332    clippy::cast_possible_truncation,
8333    clippy::cast_possible_wrap,
8334    clippy::cast_precision_loss,
8335    clippy::cast_sign_loss,
8336    clippy::doc_markdown,
8337    clippy::too_many_lines,
8338    clippy::type_complexity,
8339    clippy::match_same_arms
8340)]
8341fn compute_window_partition(
8342    name: &str,
8343    args: &[Expr],
8344    ordered: bool,
8345    frame: Option<&WindowFrame>,
8346    null_treatment: spg_sql::ast::NullTreatment,
8347    slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)],
8348    filtered_rows: &[&Row],
8349    ctx: &EvalContext<'_>,
8350    out_vals: &mut [Value],
8351) -> Result<(), EngineError> {
8352    let ignore_nulls = matches!(null_treatment, spg_sql::ast::NullTreatment::Ignore);
8353    let lower = name.to_ascii_lowercase();
8354    match lower.as_str() {
8355        "row_number" => {
8356            for (rank, (_, _, idx)) in slice.iter().enumerate() {
8357                out_vals[*idx] = Value::BigInt((rank + 1) as i64);
8358            }
8359            Ok(())
8360        }
8361        "rank" => {
8362            let mut prev_key: Option<&[(Value, bool)]> = None;
8363            let mut current_rank: i64 = 1;
8364            for (i, (_, okey, idx)) in slice.iter().enumerate() {
8365                if let Some(p) = prev_key
8366                    && order_key_cmp(p, okey) != core::cmp::Ordering::Equal
8367                {
8368                    current_rank = (i + 1) as i64;
8369                }
8370                if prev_key.is_none() {
8371                    current_rank = 1;
8372                }
8373                out_vals[*idx] = Value::BigInt(current_rank);
8374                prev_key = Some(okey.as_slice());
8375            }
8376            Ok(())
8377        }
8378        "dense_rank" => {
8379            let mut prev_key: Option<&[(Value, bool)]> = None;
8380            let mut current_rank: i64 = 0;
8381            for (_, okey, idx) in slice {
8382                if prev_key.is_none_or(|p| order_key_cmp(p, okey) != core::cmp::Ordering::Equal) {
8383                    current_rank += 1;
8384                }
8385                out_vals[*idx] = Value::BigInt(current_rank);
8386                prev_key = Some(okey.as_slice());
8387            }
8388            Ok(())
8389        }
8390        "sum" | "avg" | "min" | "max" | "count" | "count_star" => {
8391            // Pre-evaluate the function arg per row in the slice
8392            // (count_star has no arg).
8393            let arg_values: Vec<Value> = if lower == "count_star" || args.is_empty() {
8394                slice.iter().map(|_| Value::Null).collect()
8395            } else {
8396                slice
8397                    .iter()
8398                    .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
8399                    .collect::<Result<_, _>>()
8400                    .map_err(EngineError::Eval)?
8401            };
8402            // v4.20: pick the effective frame. Explicit frame
8403            // overrides the implicit default (running for ordered,
8404            // whole-partition for unordered).
8405            let eff = effective_frame(frame, ordered)?;
8406            #[allow(clippy::needless_range_loop)]
8407            for i in 0..slice.len() {
8408                let (lo, hi) = frame_bounds_for_row(&eff, i, slice);
8409                let mut sum: f64 = 0.0;
8410                let mut count: i64 = 0;
8411                let mut min_v: Option<f64> = None;
8412                let mut max_v: Option<f64> = None;
8413                let mut row_count: i64 = 0;
8414                if lo <= hi {
8415                    for j in lo..=hi {
8416                        let v = &arg_values[j];
8417                        match lower.as_str() {
8418                            "count_star" => row_count += 1,
8419                            "count" => {
8420                                if !v.is_null() {
8421                                    count += 1;
8422                                }
8423                            }
8424                            _ => {
8425                                if let Some(x) = value_to_f64(v) {
8426                                    sum += x;
8427                                    count += 1;
8428                                    min_v = Some(min_v.map_or(x, |m| m.min(x)));
8429                                    max_v = Some(max_v.map_or(x, |m| m.max(x)));
8430                                }
8431                            }
8432                        }
8433                    }
8434                }
8435                let value = match lower.as_str() {
8436                    "count_star" => Value::BigInt(row_count),
8437                    "count" => Value::BigInt(count),
8438                    "sum" => Value::Float(sum),
8439                    "avg" => {
8440                        if count == 0 {
8441                            Value::Null
8442                        } else {
8443                            Value::Float(sum / count as f64)
8444                        }
8445                    }
8446                    "min" => min_v.map_or(Value::Null, Value::Float),
8447                    "max" => max_v.map_or(Value::Null, Value::Float),
8448                    _ => unreachable!(),
8449                };
8450                let (_, _, idx) = &slice[i];
8451                out_vals[*idx] = value;
8452            }
8453            Ok(())
8454        }
8455        "lag" | "lead" => {
8456            // lag(expr [, offset [, default]])
8457            // lead(expr [, offset [, default]])
8458            if args.is_empty() {
8459                return Err(EngineError::Unsupported(alloc::format!(
8460                    "{lower}() requires at least one argument"
8461                )));
8462            }
8463            let offset: i64 = if args.len() >= 2 {
8464                let v = eval::eval_expr(&args[1], filtered_rows[slice[0].2], ctx)
8465                    .map_err(EngineError::Eval)?;
8466                match v {
8467                    Value::SmallInt(n) => i64::from(n),
8468                    Value::Int(n) => i64::from(n),
8469                    Value::BigInt(n) => n,
8470                    _ => {
8471                        return Err(EngineError::Unsupported(alloc::format!(
8472                            "{lower}() offset must be integer"
8473                        )));
8474                    }
8475                }
8476            } else {
8477                1
8478            };
8479            let default: Value = if args.len() >= 3 {
8480                eval::eval_expr(&args[2], filtered_rows[slice[0].2], ctx)
8481                    .map_err(EngineError::Eval)?
8482            } else {
8483                Value::Null
8484            };
8485            let values: Vec<Value> = slice
8486                .iter()
8487                .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
8488                .collect::<Result<_, _>>()
8489                .map_err(EngineError::Eval)?;
8490            let n = slice.len();
8491            for (i, (_, _, idx)) in slice.iter().enumerate() {
8492                let signed_offset = if lower == "lag" { -offset } else { offset };
8493                let v = if ignore_nulls {
8494                    // v6.4.2 — IGNORE NULLS: walk in the offset direction
8495                    // skipping NULL values; the `offset`-th non-NULL
8496                    // encountered is the result.
8497                    let step: i64 = if signed_offset >= 0 { 1 } else { -1 };
8498                    let needed: i64 = signed_offset.abs();
8499                    if needed == 0 {
8500                        values[i].clone()
8501                    } else {
8502                        let mut j: i64 = i as i64;
8503                        let mut hits: i64 = 0;
8504                        let mut found: Option<Value> = None;
8505                        loop {
8506                            j += step;
8507                            if j < 0 || j >= n as i64 {
8508                                break;
8509                            }
8510                            #[allow(clippy::cast_sign_loss)]
8511                            let v = &values[j as usize];
8512                            if !v.is_null() {
8513                                hits += 1;
8514                                if hits == needed {
8515                                    found = Some(v.clone());
8516                                    break;
8517                                }
8518                            }
8519                        }
8520                        found.unwrap_or_else(|| default.clone())
8521                    }
8522                } else {
8523                    let target_signed = i64::try_from(i).unwrap_or(i64::MAX) + signed_offset;
8524                    if target_signed < 0 || target_signed >= i64::try_from(n).unwrap_or(i64::MAX) {
8525                        default.clone()
8526                    } else {
8527                        #[allow(clippy::cast_sign_loss)]
8528                        {
8529                            values[target_signed as usize].clone()
8530                        }
8531                    }
8532                };
8533                out_vals[*idx] = v;
8534            }
8535            Ok(())
8536        }
8537        "first_value" | "last_value" | "nth_value" => {
8538            if args.is_empty() {
8539                return Err(EngineError::Unsupported(alloc::format!(
8540                    "{lower}() requires at least one argument"
8541                )));
8542            }
8543            let values: Vec<Value> = slice
8544                .iter()
8545                .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
8546                .collect::<Result<_, _>>()
8547                .map_err(EngineError::Eval)?;
8548            let nth: usize = if lower == "nth_value" {
8549                if args.len() < 2 {
8550                    return Err(EngineError::Unsupported(
8551                        "nth_value() requires (expr, n)".into(),
8552                    ));
8553                }
8554                let v = eval::eval_expr(&args[1], filtered_rows[slice[0].2], ctx)
8555                    .map_err(EngineError::Eval)?;
8556                let raw = match v {
8557                    Value::SmallInt(n) => i64::from(n),
8558                    Value::Int(n) => i64::from(n),
8559                    Value::BigInt(n) => n,
8560                    _ => {
8561                        return Err(EngineError::Unsupported(
8562                            "nth_value() n must be integer".into(),
8563                        ));
8564                    }
8565                };
8566                if raw < 1 {
8567                    return Err(EngineError::Unsupported(
8568                        "nth_value() n must be >= 1".into(),
8569                    ));
8570                }
8571                #[allow(clippy::cast_sign_loss)]
8572                {
8573                    raw as usize
8574                }
8575            } else {
8576                0
8577            };
8578            let eff = effective_frame(frame, ordered)?;
8579            for i in 0..slice.len() {
8580                let (lo, hi) = frame_bounds_for_row(&eff, i, slice);
8581                let (_, _, idx) = &slice[i];
8582                let v = if lo > hi {
8583                    Value::Null
8584                } else if ignore_nulls && matches!(lower.as_str(), "first_value" | "last_value") {
8585                    // v6.4.2 — IGNORE NULLS: skip NULL cells when
8586                    // selecting the boundary value within the frame.
8587                    if lower == "first_value" {
8588                        (lo..=hi)
8589                            .find_map(|j| {
8590                                let v = &values[j];
8591                                (!v.is_null()).then(|| v.clone())
8592                            })
8593                            .unwrap_or(Value::Null)
8594                    } else {
8595                        (lo..=hi)
8596                            .rev()
8597                            .find_map(|j| {
8598                                let v = &values[j];
8599                                (!v.is_null()).then(|| v.clone())
8600                            })
8601                            .unwrap_or(Value::Null)
8602                    }
8603                } else {
8604                    match lower.as_str() {
8605                        "first_value" => values[lo].clone(),
8606                        "last_value" => values[hi].clone(),
8607                        "nth_value" => {
8608                            let pos = lo + nth - 1;
8609                            if pos > hi {
8610                                Value::Null
8611                            } else {
8612                                values[pos].clone()
8613                            }
8614                        }
8615                        _ => unreachable!(),
8616                    }
8617                };
8618                out_vals[*idx] = v;
8619            }
8620            Ok(())
8621        }
8622        "ntile" => {
8623            if args.is_empty() {
8624                return Err(EngineError::Unsupported(
8625                    "ntile(n) requires an integer argument".into(),
8626                ));
8627            }
8628            let v = eval::eval_expr(&args[0], filtered_rows[slice[0].2], ctx)
8629                .map_err(EngineError::Eval)?;
8630            let bucket_count: i64 = match v {
8631                Value::SmallInt(n) => i64::from(n),
8632                Value::Int(n) => i64::from(n),
8633                Value::BigInt(n) => n,
8634                _ => {
8635                    return Err(EngineError::Unsupported(
8636                        "ntile() argument must be integer".into(),
8637                    ));
8638                }
8639            };
8640            if bucket_count < 1 {
8641                return Err(EngineError::Unsupported(
8642                    "ntile() argument must be >= 1".into(),
8643                ));
8644            }
8645            #[allow(clippy::cast_sign_loss)]
8646            let buckets = bucket_count as usize;
8647            let n = slice.len();
8648            // Each bucket gets `base` rows; the first `extras` buckets
8649            // get one extra. PG semantics.
8650            let base = n / buckets;
8651            let extras = n % buckets;
8652            let mut bucket: usize = 1;
8653            let mut remaining_in_bucket = if extras > 0 { base + 1 } else { base };
8654            let mut buckets_with_extra_remaining = extras;
8655            for (_, _, idx) in slice {
8656                if remaining_in_bucket == 0 {
8657                    bucket += 1;
8658                    buckets_with_extra_remaining = buckets_with_extra_remaining.saturating_sub(1);
8659                    remaining_in_bucket = if buckets_with_extra_remaining > 0 {
8660                        base + 1
8661                    } else {
8662                        base
8663                    };
8664                    // Edge: if base==0 and extras==0, all rows fit;
8665                    // shouldn't reach here, but guard anyway.
8666                    if remaining_in_bucket == 0 {
8667                        remaining_in_bucket = 1;
8668                    }
8669                }
8670                out_vals[*idx] = Value::BigInt(i64::try_from(bucket).unwrap_or(i64::MAX));
8671                remaining_in_bucket -= 1;
8672            }
8673            Ok(())
8674        }
8675        "percent_rank" => {
8676            // (rank - 1) / (n - 1) where rank is the standard RANK().
8677            // Single-row partitions get 0.
8678            let n = slice.len();
8679            let mut prev_key: Option<&[(Value, bool)]> = None;
8680            let mut current_rank: i64 = 1;
8681            for (i, (_, okey, idx)) in slice.iter().enumerate() {
8682                if let Some(p) = prev_key
8683                    && order_key_cmp(p, okey) != core::cmp::Ordering::Equal
8684                {
8685                    current_rank = i64::try_from(i + 1).unwrap_or(i64::MAX);
8686                }
8687                if prev_key.is_none() {
8688                    current_rank = 1;
8689                }
8690                #[allow(clippy::cast_precision_loss)]
8691                let pr = if n <= 1 {
8692                    0.0
8693                } else {
8694                    (current_rank - 1) as f64 / (n - 1) as f64
8695                };
8696                out_vals[*idx] = Value::Float(pr);
8697                prev_key = Some(okey.as_slice());
8698            }
8699            Ok(())
8700        }
8701        "cume_dist" => {
8702            // # rows up to and including this row's peer group / n.
8703            let n = slice.len();
8704            // First pass: find peer-group-end rank for each row.
8705            for i in 0..slice.len() {
8706                let peer_end = peer_group_end(slice, i);
8707                #[allow(clippy::cast_precision_loss)]
8708                let cd = (peer_end + 1) as f64 / n as f64;
8709                let (_, _, idx) = &slice[i];
8710                out_vals[*idx] = Value::Float(cd);
8711            }
8712            Ok(())
8713        }
8714        other => Err(EngineError::Unsupported(alloc::format!(
8715            "window function {other:?} not supported (v4.21: row_number/rank/dense_rank/sum/avg/count/min/max/lag/lead/first_value/last_value/nth_value/ntile/percent_rank/cume_dist)"
8716        ))),
8717    }
8718}
8719
8720/// v4.20: resolve the user-provided frame down to a normalised
8721/// `(kind, start, end)`. `None` means default — derive from
8722/// `ordered`: ordered ⇒ RANGE UNBOUNDED PRECEDING AND CURRENT ROW,
8723/// unordered ⇒ ROWS UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING.
8724/// Single-bound shorthand (e.g. `ROWS 5 PRECEDING`) normalises
8725/// end → CURRENT ROW per the PG spec.
8726fn effective_frame(
8727    frame: Option<&WindowFrame>,
8728    ordered: bool,
8729) -> Result<(FrameKind, FrameBound, FrameBound), EngineError> {
8730    match frame {
8731        None => {
8732            if ordered {
8733                Ok((
8734                    FrameKind::Range,
8735                    FrameBound::UnboundedPreceding,
8736                    FrameBound::CurrentRow,
8737                ))
8738            } else {
8739                Ok((
8740                    FrameKind::Rows,
8741                    FrameBound::UnboundedPreceding,
8742                    FrameBound::UnboundedFollowing,
8743                ))
8744            }
8745        }
8746        Some(fr) => {
8747            let end = fr.end.clone().unwrap_or(FrameBound::CurrentRow);
8748            // Reject start > end (a few impossible combinations).
8749            if matches!(fr.start, FrameBound::UnboundedFollowing)
8750                || matches!(end, FrameBound::UnboundedPreceding)
8751            {
8752                return Err(EngineError::Unsupported(alloc::format!(
8753                    "invalid frame: start={:?} end={:?}",
8754                    fr.start,
8755                    end
8756                )));
8757            }
8758            // RANGE OFFSET PRECEDING / FOLLOWING needs value-typed
8759            // arithmetic on the ORDER BY key (e.g. `RANGE BETWEEN
8760            // INTERVAL '1 day' PRECEDING AND CURRENT ROW`). Not
8761            // implemented in v4.20.
8762            if fr.kind == FrameKind::Range
8763                && (matches!(
8764                    fr.start,
8765                    FrameBound::OffsetPreceding(_) | FrameBound::OffsetFollowing(_)
8766                ) || matches!(
8767                    end,
8768                    FrameBound::OffsetPreceding(_) | FrameBound::OffsetFollowing(_)
8769                ))
8770            {
8771                return Err(EngineError::Unsupported(
8772                    "RANGE with explicit offset bounds is not supported (v4.20: only UNBOUNDED / CURRENT ROW for RANGE)".into(),
8773                ));
8774            }
8775            Ok((fr.kind, fr.start.clone(), end))
8776        }
8777    }
8778}
8779
8780/// Compute `(lo, hi)` row-index bounds inside the partition slice
8781/// for the row at position `i`. Inclusive, clamped to
8782/// `[0, slice.len()-1]`. Empty result if `lo > hi`.
8783#[allow(clippy::type_complexity)]
8784fn frame_bounds_for_row(
8785    eff: &(FrameKind, FrameBound, FrameBound),
8786    i: usize,
8787    slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)],
8788) -> (usize, usize) {
8789    let (kind, start, end) = eff;
8790    let n = slice.len();
8791    let last = n.saturating_sub(1);
8792    let (mut lo, mut hi) = match kind {
8793        FrameKind::Rows => {
8794            let lo = match start {
8795                FrameBound::UnboundedPreceding => 0,
8796                FrameBound::OffsetPreceding(k) => {
8797                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
8798                    i.saturating_sub(k)
8799                }
8800                FrameBound::CurrentRow => i,
8801                FrameBound::OffsetFollowing(k) => {
8802                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
8803                    i.saturating_add(k).min(last)
8804                }
8805                FrameBound::UnboundedFollowing => last,
8806            };
8807            let hi = match end {
8808                FrameBound::UnboundedPreceding => 0,
8809                FrameBound::OffsetPreceding(k) => {
8810                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
8811                    i.saturating_sub(k)
8812                }
8813                FrameBound::CurrentRow => i,
8814                FrameBound::OffsetFollowing(k) => {
8815                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
8816                    i.saturating_add(k).min(last)
8817                }
8818                FrameBound::UnboundedFollowing => last,
8819            };
8820            (lo, hi)
8821        }
8822        FrameKind::Range => {
8823            // RANGE bounds are peer-aware. With only UNBOUNDED and
8824            // CURRENT ROW supported (rejected at effective_frame for
8825            // explicit offsets), the start/end map to the
8826            // partition's full extent at the same-order-key peer
8827            // group boundary.
8828            let lo = match start {
8829                FrameBound::UnboundedPreceding => 0,
8830                FrameBound::CurrentRow => peer_group_start(slice, i),
8831                FrameBound::UnboundedFollowing => last,
8832                _ => unreachable!("offset bounds rejected for RANGE"),
8833            };
8834            let hi = match end {
8835                FrameBound::UnboundedPreceding => 0,
8836                FrameBound::CurrentRow => peer_group_end(slice, i),
8837                FrameBound::UnboundedFollowing => last,
8838                _ => unreachable!("offset bounds rejected for RANGE"),
8839            };
8840            (lo, hi)
8841        }
8842    };
8843    if hi >= n {
8844        hi = last;
8845    }
8846    if lo >= n {
8847        lo = last;
8848    }
8849    (lo, hi)
8850}
8851
8852/// Find the inclusive index of the first row with the same ORDER
8853/// BY key as `slice[i]`. Slice is already sorted by partition then
8854/// order, so peers are contiguous.
8855#[allow(clippy::type_complexity)]
8856fn peer_group_start(slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)], i: usize) -> usize {
8857    let key = &slice[i].1;
8858    let mut j = i;
8859    while j > 0 && order_key_cmp(&slice[j - 1].1, key) == core::cmp::Ordering::Equal {
8860        j -= 1;
8861    }
8862    j
8863}
8864
8865/// Find the inclusive index of the last row with the same ORDER
8866/// BY key as `slice[i]`.
8867#[allow(clippy::type_complexity)]
8868fn peer_group_end(slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)], i: usize) -> usize {
8869    let key = &slice[i].1;
8870    let mut j = i;
8871    while j + 1 < slice.len() && order_key_cmp(&slice[j + 1].1, key) == core::cmp::Ordering::Equal {
8872        j += 1;
8873    }
8874    j
8875}
8876
8877fn value_to_f64(v: &Value) -> Option<f64> {
8878    match v {
8879        Value::SmallInt(n) => Some(f64::from(*n)),
8880        Value::Int(n) => Some(f64::from(*n)),
8881        #[allow(clippy::cast_precision_loss)]
8882        Value::BigInt(n) => Some(*n as f64),
8883        Value::Float(x) => Some(*x),
8884        _ => None,
8885    }
8886}
8887
8888/// Quick scan for any subquery-bearing node in a SELECT's WHERE /
8889/// projection / `order_by` — saves cloning the AST when there are
8890/// none (the common case).
8891fn expr_tree_has_subquery(stmt: &SelectStatement) -> bool {
8892    let mut any = false;
8893    for item in &stmt.items {
8894        if let SelectItem::Expr { expr, .. } = item {
8895            any = any || expr_has_subquery(expr);
8896        }
8897    }
8898    if let Some(w) = &stmt.where_ {
8899        any = any || expr_has_subquery(w);
8900    }
8901    if let Some(h) = &stmt.having {
8902        any = any || expr_has_subquery(h);
8903    }
8904    for o in &stmt.order_by {
8905        any = any || expr_has_subquery(&o.expr);
8906    }
8907    for (_, peer) in &stmt.unions {
8908        any = any || expr_tree_has_subquery(peer);
8909    }
8910    any
8911}
8912
8913fn expr_has_subquery(e: &Expr) -> bool {
8914    match e {
8915        Expr::ScalarSubquery(_) | Expr::Exists { .. } | Expr::InSubquery { .. } => true,
8916        Expr::Binary { lhs, rhs, .. } => expr_has_subquery(lhs) || expr_has_subquery(rhs),
8917        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
8918            expr_has_subquery(expr)
8919        }
8920        Expr::FunctionCall { args, .. } => args.iter().any(expr_has_subquery),
8921        Expr::Like { expr, pattern, .. } => expr_has_subquery(expr) || expr_has_subquery(pattern),
8922        Expr::Extract { source, .. } => expr_has_subquery(source),
8923        Expr::WindowFunction {
8924            args,
8925            partition_by,
8926            order_by,
8927            ..
8928        } => {
8929            args.iter().any(expr_has_subquery)
8930                || partition_by.iter().any(expr_has_subquery)
8931                || order_by.iter().any(|(e, _)| expr_has_subquery(e))
8932        }
8933        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => false,
8934        Expr::Array(items) => items.iter().any(expr_has_subquery),
8935        Expr::ArraySubscript { target, index } => {
8936            expr_has_subquery(target) || expr_has_subquery(index)
8937        }
8938        Expr::AnyAll { expr, array, .. } => expr_has_subquery(expr) || expr_has_subquery(array),
8939        Expr::Case {
8940            operand,
8941            branches,
8942            else_branch,
8943        } => {
8944            operand.as_deref().is_some_and(expr_has_subquery)
8945                || branches
8946                    .iter()
8947                    .any(|(w, t)| expr_has_subquery(w) || expr_has_subquery(t))
8948                || else_branch.as_deref().is_some_and(expr_has_subquery)
8949        }
8950    }
8951}
8952
8953/// v4.10 helper: materialise a runtime `Value` back into an AST
8954/// `Expr::Literal` for the subquery-rewrite path. Supports the
8955/// types `Literal` can represent (Integer / Float / Text / Bool /
8956/// Null). Date / Timestamp / Numeric / Vector / Interval / JSON
8957/// would lose precision through Literal and aren't supported in
8958/// uncorrelated-subquery results; they error with a clear hint.
8959fn value_to_literal_expr(v: Value) -> Result<Expr, EngineError> {
8960    let lit = match v {
8961        Value::Null => Literal::Null,
8962        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
8963        Value::Int(n) => Literal::Integer(i64::from(n)),
8964        Value::BigInt(n) => Literal::Integer(n),
8965        Value::Float(x) => Literal::Float(x),
8966        Value::Text(s) | Value::Json(s) => Literal::String(s),
8967        Value::Bool(b) => Literal::Bool(b),
8968        other => {
8969            return Err(EngineError::Unsupported(alloc::format!(
8970                "subquery result type {:?} not yet materialisable; cast to text or integer in the inner SELECT",
8971                other.data_type()
8972            )));
8973        }
8974    };
8975    Ok(Expr::Literal(lit))
8976}
8977
8978/// v7.13.0 — wider helper used by `INSERT … SELECT` (mailrs
8979/// round-5 G4). Covers the most common `Value` variants. Types
8980/// that need lossy textual round-trip (BYTEA, arrays, ts*)
8981/// surface as an Unsupported error so the caller can add a cast
8982/// in the inner SELECT.
8983fn value_to_literal_expr_permissive(v: Value) -> Result<Expr, EngineError> {
8984    let lit = match v {
8985        Value::Null => Literal::Null,
8986        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
8987        Value::Int(n) => Literal::Integer(i64::from(n)),
8988        Value::BigInt(n) => Literal::Integer(n),
8989        Value::Float(x) => Literal::Float(x),
8990        Value::Text(s) | Value::Json(s) => Literal::String(s),
8991        Value::Bool(b) => Literal::Bool(b),
8992        Value::Vector(xs) => Literal::Vector(xs),
8993        // Date / Timestamp / Timestamptz / Numeric round-trip
8994        // through a TEXT literal that `coerce_value` re-parses
8995        // against the target column type.
8996        Value::Date(days) => {
8997            let micros = (i64::from(days)) * 86_400_000_000;
8998            Literal::String(format_timestamp_micros_as_date(micros))
8999        }
9000        Value::Timestamp(us) => Literal::String(format_timestamp_micros(us)),
9001        Value::Numeric { scaled, scale } => {
9002            Literal::String(format_numeric(scaled, scale))
9003        }
9004        other => {
9005            return Err(EngineError::Unsupported(alloc::format!(
9006                "INSERT … SELECT cannot materialise value of type {:?}; \
9007                 add an explicit CAST in the inner SELECT",
9008                other.data_type()
9009            )));
9010        }
9011    };
9012    Ok(Expr::Literal(lit))
9013}
9014
9015fn format_timestamp_micros(us: i64) -> String {
9016    // Same Y/M/D split used by the wire layer; epoch-relative.
9017    let days = us.div_euclid(86_400_000_000);
9018    let intra_day = us.rem_euclid(86_400_000_000);
9019    let date = format_timestamp_micros_as_date(days * 86_400_000_000);
9020    let secs = intra_day / 1_000_000;
9021    let us_rem = intra_day % 1_000_000;
9022    let h = (secs / 3600) % 24;
9023    let m = (secs / 60) % 60;
9024    let s = secs % 60;
9025    if us_rem == 0 {
9026        alloc::format!("{date} {h:02}:{m:02}:{s:02}")
9027    } else {
9028        alloc::format!("{date} {h:02}:{m:02}:{s:02}.{us_rem:06}")
9029    }
9030}
9031
9032fn format_timestamp_micros_as_date(us: i64) -> String {
9033    // Days since 1970-01-01 → calendar Y-M-D via the proleptic
9034    // Gregorian conversion used by spg-engine's date helpers.
9035    let days = us.div_euclid(86_400_000_000);
9036    // 1970-01-01 = JDN 2440588.
9037    let jdn = days + 2_440_588;
9038    let (y, mo, d) = jdn_to_ymd(jdn);
9039    alloc::format!("{y:04}-{mo:02}-{d:02}")
9040}
9041
9042fn jdn_to_ymd(jdn: i64) -> (i64, u32, u32) {
9043    // Fliegel & Van Flandern (1968) — works for all positive JDNs.
9044    let l = jdn + 68569;
9045    let n = (4 * l) / 146_097;
9046    let l = l - (146_097 * n + 3) / 4;
9047    let i = (4000 * (l + 1)) / 1_461_001;
9048    let l = l - (1461 * i) / 4 + 31;
9049    let j = (80 * l) / 2447;
9050    let day = (l - (2447 * j) / 80) as u32;
9051    let l = j / 11;
9052    let month = (j + 2 - 12 * l) as u32;
9053    let year = 100 * (n - 49) + i + l;
9054    (year, month, day)
9055}
9056
9057fn format_numeric(scaled: i128, scale: u8) -> String {
9058    if scale == 0 {
9059        return alloc::format!("{scaled}");
9060    }
9061    let abs = scaled.unsigned_abs();
9062    let divisor = 10u128.pow(u32::from(scale));
9063    let whole = abs / divisor;
9064    let frac = abs % divisor;
9065    let sign = if scaled < 0 { "-" } else { "" };
9066    alloc::format!(
9067        "{sign}{whole}.{frac:0width$}",
9068        width = usize::from(scale)
9069    )
9070}
9071
9072/// v6.1.1 — walk the prepared `Statement` AST and replace every
9073/// `Expr::Placeholder(n)` with `Expr::Literal(value_to_literal(
9074/// params[n-1]))`. The dispatch downstream sees a `Statement`
9075/// indistinguishable from a simple-query parse, so the exec path
9076/// stays unchanged.
9077///
9078/// Errors fall into one shape: a `$N` references past the bound
9079/// `params.len()`. Out-of-range happens when the Bind didn't
9080/// supply enough values; pgwire surfaces this as a protocol error
9081/// to the client.
9082/// v7.15.0 — rewrite every (potentially-qualified) column
9083/// identifier matching `old` to `new` in a stored SQL source
9084/// string. Used by `ALTER TABLE … RENAME COLUMN` to patch
9085/// CHECK predicate sources, partial-index predicate sources,
9086/// and runtime DEFAULT expression sources before they get
9087/// re-parsed on the next INSERT/UPDATE.
9088///
9089/// Round-trips through the parser, so the rewritten output is
9090/// the canonical Display form (matches what the engine stores
9091/// for fresh predicates). If the source doesn't parse, surfaces
9092/// the parse error — the invariant that stored predicates are
9093/// in canonical Display form means a parse failure here is a
9094/// real bug, not a user mistake to swallow.
9095fn rewrite_column_in_source(
9096    src: &str,
9097    old: &str,
9098    new: &str,
9099) -> Result<alloc::string::String, EngineError> {
9100    let mut expr = spg_sql::parser::parse_expression(src).map_err(|e| {
9101        EngineError::Unsupported(alloc::format!(
9102            "ALTER TABLE RENAME COLUMN: stored predicate source {src:?} \
9103             failed to parse for rewrite ({e})"
9104        ))
9105    })?;
9106    rewrite_column_in_expr(&mut expr, old, new);
9107    Ok(alloc::format!("{expr}"))
9108}
9109
9110/// v7.15.0 — Expr walker that swaps `Expr::Column { name: old, .. }`
9111/// for `Expr::Column { name: new, .. }`. Qualifier is preserved
9112/// (e.g. `t.old` → `t.new`); a foreign-table qualifier still
9113/// gets rewritten because the AST has no way to tell us this
9114/// predicate is on table T versus table T2 — predicate sources
9115/// in SPG are always scoped to the owning table, so any
9116/// qualifier present is either redundant or wrong.
9117fn rewrite_column_in_expr(e: &mut Expr, old: &str, new: &str) {
9118    match e {
9119        Expr::Column(c) => {
9120            if c.name.eq_ignore_ascii_case(old) {
9121                c.name = new.to_string();
9122            }
9123        }
9124        Expr::Binary { lhs, rhs, .. } => {
9125            rewrite_column_in_expr(lhs, old, new);
9126            rewrite_column_in_expr(rhs, old, new);
9127        }
9128        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
9129            rewrite_column_in_expr(expr, old, new);
9130        }
9131        Expr::FunctionCall { args, .. } => {
9132            for a in args {
9133                rewrite_column_in_expr(a, old, new);
9134            }
9135        }
9136        Expr::Like { expr, pattern, .. } => {
9137            rewrite_column_in_expr(expr, old, new);
9138            rewrite_column_in_expr(pattern, old, new);
9139        }
9140        Expr::Extract { source, .. } => rewrite_column_in_expr(source, old, new),
9141        Expr::WindowFunction {
9142            args,
9143            partition_by,
9144            order_by,
9145            ..
9146        } => {
9147            for a in args {
9148                rewrite_column_in_expr(a, old, new);
9149            }
9150            for p in partition_by {
9151                rewrite_column_in_expr(p, old, new);
9152            }
9153            for (o, _) in order_by {
9154                rewrite_column_in_expr(o, old, new);
9155            }
9156        }
9157        Expr::Array(items) => {
9158            for elem in items {
9159                rewrite_column_in_expr(elem, old, new);
9160            }
9161        }
9162        Expr::ArraySubscript { target, index } => {
9163            rewrite_column_in_expr(target, old, new);
9164            rewrite_column_in_expr(index, old, new);
9165        }
9166        Expr::AnyAll { expr, array, .. } => {
9167            rewrite_column_in_expr(expr, old, new);
9168            rewrite_column_in_expr(array, old, new);
9169        }
9170        Expr::Case {
9171            operand,
9172            branches,
9173            else_branch,
9174        } => {
9175            if let Some(o) = operand {
9176                rewrite_column_in_expr(o, old, new);
9177            }
9178            for (w, t) in branches {
9179                rewrite_column_in_expr(w, old, new);
9180                rewrite_column_in_expr(t, old, new);
9181            }
9182            if let Some(e) = else_branch {
9183                rewrite_column_in_expr(e, old, new);
9184            }
9185        }
9186        // Stored predicate sources never contain subqueries —
9187        // CHECK / partial-index / runtime_default are all scalar.
9188        // If a future feature changes that, recurse here.
9189        Expr::ScalarSubquery(_) | Expr::Exists { .. } | Expr::InSubquery { .. } => {}
9190        Expr::Literal(_) | Expr::Placeholder(_) => {}
9191    }
9192}
9193
9194fn substitute_placeholders(stmt: &mut Statement, params: &[Value]) -> Result<(), EngineError> {
9195    match stmt {
9196        Statement::Select(s) => substitute_select(s, params)?,
9197        Statement::Insert(ins) => {
9198            for row in &mut ins.rows {
9199                for e in row {
9200                    substitute_expr(e, params)?;
9201                }
9202            }
9203        }
9204        Statement::Update(u) => {
9205            for (_, e) in &mut u.assignments {
9206                substitute_expr(e, params)?;
9207            }
9208            if let Some(w) = &mut u.where_ {
9209                substitute_expr(w, params)?;
9210            }
9211        }
9212        Statement::Delete(d) => {
9213            if let Some(w) = &mut d.where_ {
9214                substitute_expr(w, params)?;
9215            }
9216        }
9217        Statement::Explain(e) => substitute_select(&mut e.inner, params)?,
9218        // Other statements (CREATE / BEGIN / SHOW / …) have no
9219        // expression slots; no walk needed.
9220        _ => {}
9221    }
9222    Ok(())
9223}
9224
9225fn substitute_select(s: &mut SelectStatement, params: &[Value]) -> Result<(), EngineError> {
9226    for item in &mut s.items {
9227        if let SelectItem::Expr { expr, .. } = item {
9228            substitute_expr(expr, params)?;
9229        }
9230    }
9231    if let Some(w) = &mut s.where_ {
9232        substitute_expr(w, params)?;
9233    }
9234    if let Some(gs) = &mut s.group_by {
9235        for g in gs {
9236            substitute_expr(g, params)?;
9237        }
9238    }
9239    if let Some(h) = &mut s.having {
9240        substitute_expr(h, params)?;
9241    }
9242    for o in &mut s.order_by {
9243        substitute_expr(&mut o.expr, params)?;
9244    }
9245    for (_, peer) in &mut s.unions {
9246        substitute_select(peer, params)?;
9247    }
9248    // v7.9.24 — LIMIT $N / OFFSET $N placeholder resolution.
9249    // mailrs H2. After this pass each LIMIT/OFFSET that was a
9250    // Placeholder is rewritten to Literal so the existing
9251    // `LimitExpr::as_literal` path consumes a concrete u32.
9252    if let Some(le) = s.limit {
9253        s.limit = Some(resolve_limit_placeholder(le, params)?);
9254    }
9255    if let Some(le) = s.offset {
9256        s.offset = Some(resolve_limit_placeholder(le, params)?);
9257    }
9258    Ok(())
9259}
9260
9261fn resolve_limit_placeholder(
9262    le: spg_sql::ast::LimitExpr,
9263    params: &[Value],
9264) -> Result<spg_sql::ast::LimitExpr, EngineError> {
9265    use spg_sql::ast::LimitExpr;
9266    match le {
9267        LimitExpr::Literal(_) => Ok(le),
9268        LimitExpr::Placeholder(n) => {
9269            let idx = usize::from(n).saturating_sub(1);
9270            let v = params.get(idx).ok_or_else(|| {
9271                EngineError::Eval(EvalError::PlaceholderOutOfRange {
9272                    n,
9273                    bound: u16::try_from(params.len()).unwrap_or(u16::MAX),
9274                })
9275            })?;
9276            let int = match v {
9277                Value::SmallInt(x) => Some(i64::from(*x)),
9278                Value::Int(x) => Some(i64::from(*x)),
9279                Value::BigInt(x) => Some(*x),
9280                _ => None,
9281            }
9282            .ok_or_else(|| {
9283                EngineError::Unsupported(alloc::format!(
9284                    "LIMIT/OFFSET ${n} bound to non-integer {v:?}"
9285                ))
9286            })?;
9287            if int < 0 {
9288                return Err(EngineError::Unsupported(alloc::format!(
9289                    "LIMIT/OFFSET ${n} bound to negative value {int}"
9290                )));
9291            }
9292            let bounded = u32::try_from(int).map_err(|_| {
9293                EngineError::Unsupported(alloc::format!(
9294                    "LIMIT/OFFSET ${n} value {int} exceeds u32 range"
9295                ))
9296            })?;
9297            Ok(LimitExpr::Literal(bounded))
9298        }
9299    }
9300}
9301
9302fn substitute_expr(e: &mut Expr, params: &[Value]) -> Result<(), EngineError> {
9303    if let Expr::Placeholder(n) = e {
9304        let idx = usize::from(*n).saturating_sub(1);
9305        let v = params.get(idx).ok_or_else(|| {
9306            EngineError::Eval(EvalError::PlaceholderOutOfRange {
9307                n: *n,
9308                bound: u16::try_from(params.len()).unwrap_or(u16::MAX),
9309            })
9310        })?;
9311        *e = Expr::Literal(value_to_literal(v.clone()));
9312        return Ok(());
9313    }
9314    match e {
9315        Expr::Binary { lhs, rhs, .. } => {
9316            substitute_expr(lhs, params)?;
9317            substitute_expr(rhs, params)?;
9318        }
9319        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
9320            substitute_expr(expr, params)?;
9321        }
9322        Expr::FunctionCall { args, .. } => {
9323            for a in args {
9324                substitute_expr(a, params)?;
9325            }
9326        }
9327        Expr::Like { expr, pattern, .. } => {
9328            substitute_expr(expr, params)?;
9329            substitute_expr(pattern, params)?;
9330        }
9331        Expr::Extract { source, .. } => substitute_expr(source, params)?,
9332        Expr::ScalarSubquery(s) => substitute_select(s, params)?,
9333        Expr::Exists { subquery, .. } => substitute_select(subquery, params)?,
9334        Expr::InSubquery { expr, subquery, .. } => {
9335            substitute_expr(expr, params)?;
9336            substitute_select(subquery, params)?;
9337        }
9338        Expr::WindowFunction {
9339            args,
9340            partition_by,
9341            order_by,
9342            ..
9343        } => {
9344            for a in args {
9345                substitute_expr(a, params)?;
9346            }
9347            for p in partition_by {
9348                substitute_expr(p, params)?;
9349            }
9350            for (e, _) in order_by {
9351                substitute_expr(e, params)?;
9352            }
9353        }
9354        Expr::Literal(_) | Expr::Column(_) => {}
9355        // Already handled above.
9356        Expr::Placeholder(_) => unreachable!("Placeholder handled at top of fn"),
9357        Expr::Array(items) => {
9358            for elem in items {
9359                substitute_expr(elem, params)?;
9360            }
9361        }
9362        Expr::ArraySubscript { target, index } => {
9363            substitute_expr(target, params)?;
9364            substitute_expr(index, params)?;
9365        }
9366        Expr::AnyAll { expr, array, .. } => {
9367            substitute_expr(expr, params)?;
9368            substitute_expr(array, params)?;
9369        }
9370        Expr::Case {
9371            operand,
9372            branches,
9373            else_branch,
9374        } => {
9375            if let Some(o) = operand {
9376                substitute_expr(o, params)?;
9377            }
9378            for (w, t) in branches {
9379                substitute_expr(w, params)?;
9380                substitute_expr(t, params)?;
9381            }
9382            if let Some(e) = else_branch {
9383                substitute_expr(e, params)?;
9384            }
9385        }
9386    }
9387    Ok(())
9388}
9389
9390/// v6.1.1 — convert a runtime `Value` into the closest matching
9391/// `Literal` for the substitute walker. Lossless for the simple
9392/// scalars (Int / Float / Text / Bool); Numeric / Date / Timestamp
9393/// / Json / Interval render as their canonical text form so the
9394/// downstream coerce_value can re-parse against the target column
9395/// type. SQ8 / HalfVector cells are NOT expected as bind params;
9396/// pgwire's Bind decodes vector params to the f32 representation
9397/// before they reach this helper.
9398/// v6.2.0 — total ordering on `Value`s used by ANALYZE to sort a
9399/// column's non-NULL sample before histogram building. Cross-type
9400/// pairs (Int vs Float, Date vs Timestamp, …) compare via the
9401/// same widening the eval-side `compare` operator uses; everything
9402/// else (the genuinely-incompatible pairs) falls back to ordering
9403/// by canonical string form so the sort is still total + stable.
9404/// Vector / SQ8 / Half / Json / Numeric / Interval values reach
9405/// here only via the string-fallback path because vector columns
9406/// are filtered out upstream.
9407fn sort_values_for_histogram(a: &Value, b: &Value) -> core::cmp::Ordering {
9408    use core::cmp::Ordering;
9409    match (a, b) {
9410        (Value::SmallInt(a), Value::SmallInt(b)) => a.cmp(b),
9411        (Value::Int(a), Value::Int(b)) => a.cmp(b),
9412        (Value::BigInt(a), Value::BigInt(b)) => a.cmp(b),
9413        (Value::SmallInt(a), Value::Int(b)) => i32::from(*a).cmp(b),
9414        (Value::Int(a), Value::SmallInt(b)) => a.cmp(&i32::from(*b)),
9415        (Value::Int(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
9416        (Value::BigInt(a), Value::Int(b)) => a.cmp(&i64::from(*b)),
9417        (Value::SmallInt(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
9418        (Value::BigInt(a), Value::SmallInt(b)) => a.cmp(&i64::from(*b)),
9419        (Value::Float(a), Value::Float(b)) => a.partial_cmp(b).unwrap_or(Ordering::Equal),
9420        (Value::Text(a), Value::Text(b)) | (Value::Json(a), Value::Json(b)) => a.cmp(b),
9421        (Value::Bool(a), Value::Bool(b)) => a.cmp(b),
9422        (Value::Date(a), Value::Date(b)) => a.cmp(b),
9423        (Value::Timestamp(a), Value::Timestamp(b)) => a.cmp(b),
9424        // Mixed numeric/float — widen to f64 and compare.
9425        (Value::SmallInt(n), Value::Float(x)) => {
9426            (f64::from(*n)).partial_cmp(x).unwrap_or(Ordering::Equal)
9427        }
9428        (Value::Float(x), Value::SmallInt(n)) => {
9429            x.partial_cmp(&f64::from(*n)).unwrap_or(Ordering::Equal)
9430        }
9431        (Value::Int(n), Value::Float(x)) => {
9432            (f64::from(*n)).partial_cmp(x).unwrap_or(Ordering::Equal)
9433        }
9434        (Value::Float(x), Value::Int(n)) => {
9435            x.partial_cmp(&f64::from(*n)).unwrap_or(Ordering::Equal)
9436        }
9437        (Value::BigInt(n), Value::Float(x)) => {
9438            #[allow(clippy::cast_precision_loss)]
9439            let nf = *n as f64;
9440            nf.partial_cmp(x).unwrap_or(Ordering::Equal)
9441        }
9442        (Value::Float(x), Value::BigInt(n)) => {
9443            #[allow(clippy::cast_precision_loss)]
9444            let nf = *n as f64;
9445            x.partial_cmp(&nf).unwrap_or(Ordering::Equal)
9446        }
9447        // Cross-type fallback: lexicographic on canonical form.
9448        // Total + stable so the sort is well-defined.
9449        _ => canonical_value_repr(a).cmp(&canonical_value_repr(b)),
9450    }
9451}
9452
9453/// v6.2.0 — render the histogram bounds list as a `[v0, v1, ...]`
9454/// string for the `spg_statistic.histogram_bounds` column. Values
9455/// containing `,` or `[` / `]` are JSON-style escaped so the
9456/// rendering round-trips through a future parser; v6.2.0 only
9457/// uses the rendered form for human consumption, so the escaping
9458/// is conservative.
9459fn render_histogram_bounds(bounds: &[alloc::string::String]) -> alloc::string::String {
9460    let mut out = alloc::string::String::with_capacity(bounds.len() * 8 + 2);
9461    out.push('[');
9462    for (i, b) in bounds.iter().enumerate() {
9463        if i > 0 {
9464            out.push_str(", ");
9465        }
9466        let needs_quote = b.contains([',', '[', ']', '"']) || b.is_empty();
9467        if needs_quote {
9468            out.push('"');
9469            for ch in b.chars() {
9470                if ch == '"' || ch == '\\' {
9471                    out.push('\\');
9472                }
9473                out.push(ch);
9474            }
9475            out.push('"');
9476        } else {
9477            out.push_str(b);
9478        }
9479    }
9480    out.push(']');
9481    out
9482}
9483
9484/// v6.2.0 — canonical textual form of a `Value` for histogram
9485/// bound storage. Strings used by ANALYZE for sort + bound output.
9486/// INT / BIGINT → decimal; FLOAT → shortest-round-trip via
9487/// `{:?}`; TEXT pass-through; BOOL → `t` / `f`; DATE / TIMESTAMP →
9488/// the same form `format_date` / `format_timestamp` produce for
9489/// SQL Display. Vector / SQ8 / Half / Json / Numeric / Interval
9490/// reach this only via a non-Vector column (vector columns are
9491/// skipped upstream); they fall back to a Debug-derived form so
9492/// stats still serialise without crashing.
9493pub(crate) fn canonical_value_repr(v: &Value) -> alloc::string::String {
9494    match v {
9495        Value::Null => "NULL".to_string(),
9496        Value::SmallInt(n) => alloc::format!("{n}"),
9497        Value::Int(n) => alloc::format!("{n}"),
9498        Value::BigInt(n) => alloc::format!("{n}"),
9499        Value::Float(x) => alloc::format!("{x:?}"),
9500        Value::Text(s) | Value::Json(s) => s.clone(),
9501        Value::Bool(b) => if *b { "t" } else { "f" }.to_string(),
9502        Value::Date(d) => eval::format_date(*d),
9503        Value::Timestamp(t) => eval::format_timestamp(*t),
9504        Value::Interval { months, micros } => eval::format_interval(*months, *micros),
9505        Value::Numeric { scaled, scale } => eval::format_numeric(*scaled, *scale),
9506        Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => {
9507            // Unreachable in practice (vector columns are filtered
9508            // out before this). Defensive fallback so a future
9509            // vector-stats path doesn't crash.
9510            alloc::format!("{v:?}")
9511        }
9512        // v7.5.0 — Value is #[non_exhaustive] for downstream
9513        // forward-compat. Future variants fall through to Debug
9514        // form here (same shape as the vector fallback above).
9515        _ => alloc::format!("{v:?}"),
9516    }
9517}
9518
9519/// v6.2.0 — true for engine-managed catalog tables that the bare
9520/// `ANALYZE` (no target) should skip. v6.2.0 has no internal
9521/// tables yet (publications / subscriptions / users / statistics
9522/// all live as engine fields, not catalog tables), so this is a
9523/// reserved future-proofing hook — every existing user table is
9524/// analysed.
9525const fn is_internal_table_name(_name: &str) -> bool {
9526    false
9527}
9528
9529fn value_to_literal(v: Value) -> Literal {
9530    match v {
9531        Value::Null => Literal::Null,
9532        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
9533        Value::Int(n) => Literal::Integer(i64::from(n)),
9534        Value::BigInt(n) => Literal::Integer(n),
9535        Value::Float(x) => Literal::Float(x),
9536        Value::Text(s) | Value::Json(s) => Literal::String(s),
9537        Value::Bool(b) => Literal::Bool(b),
9538        Value::Vector(v) => Literal::Vector(v),
9539        Value::Numeric { scaled, scale } => Literal::String(eval::format_numeric(scaled, scale)),
9540        Value::Date(d) => Literal::String(eval::format_date(d)),
9541        Value::Timestamp(t) => Literal::String(eval::format_timestamp(t)),
9542        Value::Interval { months, micros } => Literal::Interval {
9543            months,
9544            micros,
9545            text: eval::format_interval(months, micros),
9546        },
9547        // SQ8 / halfvec cells dequantise to f32 before reaching the
9548        // substitute walker; pgwire's Bind path handles that.
9549        Value::Sq8Vector(q) => Literal::Vector(spg_storage::quantize::dequantize(&q)),
9550        Value::HalfVector(h) => Literal::Vector(h.to_f32_vec()),
9551        // v7.5.0 — Value is #[non_exhaustive]; future variants
9552        // render as Debug-form String literal until explicit
9553        // mapping is added.
9554        v => Literal::String(alloc::format!("{v:?}")),
9555    }
9556}
9557
9558fn rewrite_clock_calls(stmt: &mut Statement, now_micros: Option<i64>) {
9559    let Some(now) = now_micros else {
9560        return;
9561    };
9562    match stmt {
9563        Statement::Select(s) => rewrite_select_clock(s, now),
9564        Statement::Insert(ins) => {
9565            for row in &mut ins.rows {
9566                for e in row {
9567                    rewrite_expr_clock(e, now);
9568                }
9569            }
9570        }
9571        _ => {}
9572    }
9573}
9574
9575fn rewrite_select_clock(s: &mut SelectStatement, now: i64) {
9576    for item in &mut s.items {
9577        if let SelectItem::Expr { expr, .. } = item {
9578            rewrite_expr_clock(expr, now);
9579        }
9580    }
9581    if let Some(w) = &mut s.where_ {
9582        rewrite_expr_clock(w, now);
9583    }
9584    if let Some(gs) = &mut s.group_by {
9585        for g in gs {
9586            rewrite_expr_clock(g, now);
9587        }
9588    }
9589    if let Some(h) = &mut s.having {
9590        rewrite_expr_clock(h, now);
9591    }
9592    for o in &mut s.order_by {
9593        rewrite_expr_clock(&mut o.expr, now);
9594    }
9595    for (_, peer) in &mut s.unions {
9596        rewrite_select_clock(peer, now);
9597    }
9598}
9599
9600/// v3.0.3 hot path: every recursion lands in exactly one `match` arm.
9601/// Literal / Column-with-qualifier (the dominant cases on a typical
9602/// AST) take a single pattern dispatch and exit. The clock-rewrite
9603/// targets (zero-arg `NOW` / `CURRENT_TIMESTAMP` / `CURRENT_DATE`
9604/// functions, and bare `CURRENT_TIMESTAMP` / `CURRENT_DATE` column
9605/// refs) sit on their own arms with match guards so the fall-through
9606/// to the recursive arms is unambiguous.
9607fn rewrite_expr_clock(e: &mut Expr, now: i64) {
9608    // Fast-path test on the no-recursion shapes first. We can't fold
9609    // them into the big match below because they need to *replace* `e`
9610    // outright; the recursive arms below match on its sub-fields.
9611    if let Some(replacement) = clock_replacement_for(e, now) {
9612        *e = replacement;
9613        return;
9614    }
9615    match e {
9616        Expr::Binary { lhs, rhs, .. } => {
9617            rewrite_expr_clock(lhs, now);
9618            rewrite_expr_clock(rhs, now);
9619        }
9620        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
9621            rewrite_expr_clock(expr, now);
9622        }
9623        Expr::FunctionCall { args, .. } => {
9624            for a in args {
9625                rewrite_expr_clock(a, now);
9626            }
9627        }
9628        Expr::Like { expr, pattern, .. } => {
9629            rewrite_expr_clock(expr, now);
9630            rewrite_expr_clock(pattern, now);
9631        }
9632        Expr::Extract { source, .. } => rewrite_expr_clock(source, now),
9633        // v4.10 subquery nodes — recurse into the inner SELECT's
9634        // expression slots so e.g. SELECT NOW() in a scalar
9635        // subquery picks up the same instant as the outer query.
9636        Expr::ScalarSubquery(s) => rewrite_select_clock(s, now),
9637        Expr::Exists { subquery, .. } => rewrite_select_clock(subquery, now),
9638        Expr::InSubquery { expr, subquery, .. } => {
9639            rewrite_expr_clock(expr, now);
9640            rewrite_select_clock(subquery, now);
9641        }
9642        // v4.12 window functions — args + PARTITION BY + ORDER BY
9643        // may all reference clock literals.
9644        Expr::WindowFunction {
9645            args,
9646            partition_by,
9647            order_by,
9648            ..
9649        } => {
9650            for a in args {
9651                rewrite_expr_clock(a, now);
9652            }
9653            for p in partition_by {
9654                rewrite_expr_clock(p, now);
9655            }
9656            for (e, _) in order_by {
9657                rewrite_expr_clock(e, now);
9658            }
9659        }
9660        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
9661        Expr::Array(items) => {
9662            for elem in items {
9663                rewrite_expr_clock(elem, now);
9664            }
9665        }
9666        Expr::ArraySubscript { target, index } => {
9667            rewrite_expr_clock(target, now);
9668            rewrite_expr_clock(index, now);
9669        }
9670        Expr::AnyAll { expr, array, .. } => {
9671            rewrite_expr_clock(expr, now);
9672            rewrite_expr_clock(array, now);
9673        }
9674        Expr::Case {
9675            operand,
9676            branches,
9677            else_branch,
9678        } => {
9679            if let Some(o) = operand {
9680                rewrite_expr_clock(o, now);
9681            }
9682            for (w, t) in branches {
9683                rewrite_expr_clock(w, now);
9684                rewrite_expr_clock(t, now);
9685            }
9686            if let Some(e) = else_branch {
9687                rewrite_expr_clock(e, now);
9688            }
9689        }
9690    }
9691}
9692
9693/// Returns `Some(Expr)` when `e` is one of the clock-call shapes that
9694/// must be rewritten; otherwise `None` so the caller falls through to
9695/// the recursive walk. Identifies both function-call forms (`NOW()` /
9696/// `CURRENT_TIMESTAMP()` / `CURRENT_DATE()`) and bare-identifier forms
9697/// (`CURRENT_TIMESTAMP` / `CURRENT_DATE` as unqualified column refs,
9698/// which is how PG accepts them without parens).
9699fn clock_replacement_for(e: &Expr, now: i64) -> Option<Expr> {
9700    let (kind, name) = match e {
9701        Expr::FunctionCall { name, args } if args.is_empty() => (ClockSite::Fn, name.as_str()),
9702        Expr::Column(c) if c.qualifier.is_none() => (ClockSite::BareIdent, c.name.as_str()),
9703        _ => return None,
9704    };
9705    // ASCII case-insensitive name match. Limited to the three keywords
9706    // that actually need rewriting.
9707    let matched = match name.len() {
9708        3 if kind == ClockSite::Fn && name.eq_ignore_ascii_case("now") => Some(true),
9709        12 if name.eq_ignore_ascii_case("current_date") => Some(false),
9710        17 if name.eq_ignore_ascii_case("current_timestamp") => Some(true),
9711        _ => None,
9712    };
9713    let is_timestamp = matched?;
9714    let payload = if is_timestamp {
9715        now
9716    } else {
9717        now.div_euclid(86_400_000_000)
9718    };
9719    let target = if is_timestamp {
9720        spg_sql::ast::CastTarget::Timestamp
9721    } else {
9722        spg_sql::ast::CastTarget::Date
9723    };
9724    Some(Expr::Cast {
9725        expr: alloc::boxed::Box::new(Expr::Literal(spg_sql::ast::Literal::Integer(payload))),
9726        target,
9727    })
9728}
9729
9730#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9731enum ClockSite {
9732    Fn,
9733    BareIdent,
9734}
9735
9736/// `ORDER BY <integer>` references the N-th SELECT item (1-based).
9737/// Swap the integer literal for the matching item's expression so the
9738/// executor doesn't need a special-case branch. Recurses into UNION
9739/// peers because each peer keeps its own SELECT list.
9740/// v6.4.1 — expand `GROUP BY ALL` to every non-aggregate SELECT-list
9741/// item. Mirrors DuckDB / PG 19 semantics. Wildcards (`SELECT * …`)
9742/// are NOT expanded by GROUP BY ALL (PG 19 leaves the wildcard intact
9743/// and groups by whatever explicit non-aggregates remain — none in
9744/// the wildcard-only case, which still works for non-aggregate
9745/// queries).
9746fn expand_group_by_all(s: &mut SelectStatement) {
9747    if !s.group_by_all {
9748        for (_, peer) in &mut s.unions {
9749            expand_group_by_all(peer);
9750        }
9751        return;
9752    }
9753    let mut groups: Vec<Expr> = Vec::new();
9754    for item in &s.items {
9755        if let SelectItem::Expr { expr, .. } = item
9756            && !aggregate::contains_aggregate(expr)
9757        {
9758            groups.push(expr.clone());
9759        }
9760    }
9761    s.group_by = Some(groups);
9762    s.group_by_all = false;
9763    for (_, peer) in &mut s.unions {
9764        expand_group_by_all(peer);
9765    }
9766}
9767
9768fn resolve_order_by_position(s: &mut SelectStatement) {
9769    // v6.4.0 — iterate every ORDER BY key. Position references
9770    // (`ORDER BY 2`) bind to the 1-based projection index;
9771    // identifier references that match a SELECT-list alias bind to
9772    // the projected expression (Step 4 of L3a).
9773    for order in &mut s.order_by {
9774        match &order.expr {
9775            Expr::Literal(Literal::Integer(n)) if *n >= 1 => {
9776                if let Ok(idx_one_based) = usize::try_from(*n) {
9777                    let idx = idx_one_based - 1;
9778                    if idx < s.items.len()
9779                        && let SelectItem::Expr { expr, .. } = &s.items[idx]
9780                    {
9781                        order.expr = expr.clone();
9782                    }
9783                }
9784            }
9785            Expr::Column(c) if c.qualifier.is_none() => {
9786                // Alias-in-ORDER-BY lookup.
9787                for item in &s.items {
9788                    if let SelectItem::Expr {
9789                        expr,
9790                        alias: Some(a),
9791                    } = item
9792                        && a == &c.name
9793                    {
9794                        order.expr = expr.clone();
9795                        break;
9796                    }
9797                }
9798            }
9799            _ => {}
9800        }
9801    }
9802    for (_, peer) in &mut s.unions {
9803        resolve_order_by_position(peer);
9804    }
9805}
9806
9807/// Sort `tagged` by `f64` key, reversing the comparator under DESC.
9808/// Used by the UNION ORDER BY path; per-block paths inline the same
9809/// comparator because they already hold `&OrderBy` directly.
9810/// v3.1.1: partial-sort helper. When `keep` (= offset + limit) is
9811/// strictly less than `tagged.len()`, run `select_nth_unstable_by` to
9812/// partition the prefix in O(n), then sort just that prefix in O(k
9813/// log k). Total O(n + k log k), vs O(n log n) for a full sort. The
9814/// caller decides what `keep` is; passing `None` (no LIMIT) keeps the
9815/// full-sort behaviour.
9816///
9817/// `tagged` holds `(Option<f64>, Row)` (the SELECT path) — `None` keys
9818/// sort last in ascending order, mirroring NULL-sorts-last in SQL.
9819fn partial_sort_tagged(tagged: &mut Vec<(Vec<f64>, Row)>, keep: Option<usize>, descs: &[bool]) {
9820    let cmp = |a: &(Vec<f64>, Row), b: &(Vec<f64>, Row)| cmp_multi_key(&a.0, &b.0, descs);
9821    match keep {
9822        Some(k) if k < tagged.len() && k > 0 => {
9823            let pivot = k - 1;
9824            tagged.select_nth_unstable_by(pivot, cmp);
9825            tagged[..k].sort_by(cmp);
9826            tagged.truncate(k);
9827        }
9828        _ => {
9829            tagged.sort_by(cmp);
9830        }
9831    }
9832}
9833
9834fn sort_by_keys(tagged: &mut [(Vec<f64>, Row)], descs: &[bool]) {
9835    tagged.sort_by(|a, b| cmp_multi_key(&a.0, &b.0, descs));
9836}
9837
9838/// v6.4.0 — multi-key ORDER BY comparator. Each key's per-key DESC
9839/// flag is honored independently. NULL is encoded as `f64::INFINITY`
9840/// so it sorts last in ASC and first in DESC (matches PG default).
9841fn cmp_multi_key(a: &[f64], b: &[f64], descs: &[bool]) -> core::cmp::Ordering {
9842    use core::cmp::Ordering;
9843    for (i, (ka, kb)) in a.iter().zip(b.iter()).enumerate() {
9844        let ord = ka.partial_cmp(kb).unwrap_or(Ordering::Equal);
9845        let ord = if descs.get(i).copied().unwrap_or(false) {
9846            ord.reverse()
9847        } else {
9848            ord
9849        };
9850        if ord != Ordering::Equal {
9851            return ord;
9852        }
9853    }
9854    Ordering::Equal
9855}
9856
9857/// v6.4.0 — eval every ORDER BY expression for a row and pack the
9858/// resulting keys into a `Vec<f64>`. NULL → `f64::INFINITY`.
9859fn build_order_keys(
9860    order_by: &[OrderBy],
9861    row: &Row,
9862    ctx: &EvalContext,
9863) -> Result<Vec<f64>, EngineError> {
9864    let mut keys = Vec::with_capacity(order_by.len());
9865    for o in order_by {
9866        let v = eval::eval_expr(&o.expr, row, ctx)?;
9867        keys.push(value_to_order_key(&v)?);
9868    }
9869    Ok(keys)
9870}
9871
9872/// Drop the first `offset` rows then truncate to `limit`. PG / `MySQL`
9873/// agree: OFFSET applies *after* ORDER BY but *before* LIMIT (so
9874/// `LIMIT 10 OFFSET 5` keeps rows 6..=15).
9875fn apply_offset_and_limit(rows: &mut Vec<Row>, offset: Option<u32>, limit: Option<u32>) {
9876    if let Some(off) = offset {
9877        let off = off as usize;
9878        if off >= rows.len() {
9879            rows.clear();
9880        } else {
9881            rows.drain(..off);
9882        }
9883    }
9884    if let Some(n) = limit {
9885        rows.truncate(n as usize);
9886    }
9887}
9888
9889/// v7.6.1 — resolve a parser-level `ForeignKeyConstraint` (column
9890/// names + parent table name) into the storage-layer shape (column
9891/// indices + same parent table). Validates everything the engine
9892/// needs to know about the FK at CREATE TABLE time:
9893///
9894///   - parent table exists (catalog lookup, unless self-referencing)
9895///   - parent columns exist on the parent table
9896///   - parent column list matches the local arity (defaults to the
9897///     parent's primary index column when omitted)
9898///   - parent columns are covered by a `BTree` UNIQUE-class index
9899///     (SPG's stand-in for `PRIMARY KEY`/`UNIQUE`) — required so
9900///     the v7.6.2 INSERT path can do an O(log n) parent lookup
9901///   - local columns exist on the table being created
9902fn resolve_foreign_key(
9903    local_table_name: &str,
9904    local_cols: &[ColumnSchema],
9905    fk: spg_sql::ast::ForeignKeyConstraint,
9906    catalog: &Catalog,
9907) -> Result<spg_storage::ForeignKeyConstraint, EngineError> {
9908    // Resolve local columns.
9909    let mut local_columns = Vec::with_capacity(fk.columns.len());
9910    for name in &fk.columns {
9911        let pos = local_cols
9912            .iter()
9913            .position(|c| c.name == *name)
9914            .ok_or_else(|| {
9915                EngineError::Unsupported(alloc::format!(
9916                    "FOREIGN KEY references unknown local column {name:?}"
9917                ))
9918            })?;
9919        local_columns.push(pos);
9920    }
9921    // Self-referencing FK: parent table is the one we're creating.
9922    // The parent column resolution uses the local column list since
9923    // the catalog doesn't have this table yet.
9924    let is_self_ref = fk.parent_table == local_table_name;
9925    let (parent_cols_for_lookup, parent_table_str): (&[ColumnSchema], &str) = if is_self_ref {
9926        (local_cols, local_table_name)
9927    } else {
9928        let parent_table = catalog.get(&fk.parent_table).ok_or_else(|| {
9929            EngineError::Storage(StorageError::TableNotFound {
9930                name: fk.parent_table.clone(),
9931            })
9932        })?;
9933        (
9934            parent_table.schema().columns.as_slice(),
9935            fk.parent_table.as_str(),
9936        )
9937    };
9938    // Resolve parent column names → positions. If the FK omitted the
9939    // parent column list, fall back to the parent's primary index
9940    // column (single-column only — composite default is rejected
9941    // because there's no unambiguous "PK" in SPG's index list).
9942    let parent_columns: Vec<usize> = if fk.parent_columns.is_empty() {
9943        if fk.columns.len() != 1 {
9944            return Err(EngineError::Unsupported(
9945                "composite FOREIGN KEY without explicit parent column list is not supported \
9946                 — list the parent columns explicitly"
9947                    .into(),
9948            ));
9949        }
9950        // Find a single BTree index on the parent and use its column.
9951        let pos = pick_pk_index_column(catalog, parent_table_str, is_self_ref, local_cols)
9952            .ok_or_else(|| {
9953                EngineError::Unsupported(alloc::format!(
9954                    "parent table {parent_table_str:?} has no PRIMARY-key / UNIQUE BTree index \
9955                     to default the FOREIGN KEY against"
9956                ))
9957            })?;
9958        alloc::vec![pos]
9959    } else {
9960        let mut out = Vec::with_capacity(fk.parent_columns.len());
9961        for name in &fk.parent_columns {
9962            let pos = parent_cols_for_lookup
9963                .iter()
9964                .position(|c| c.name == *name)
9965                .ok_or_else(|| {
9966                    EngineError::Unsupported(alloc::format!(
9967                        "FOREIGN KEY references unknown parent column \
9968                         {name:?} on table {parent_table_str:?}"
9969                    ))
9970                })?;
9971            out.push(pos);
9972        }
9973        out
9974    };
9975    if parent_columns.len() != local_columns.len() {
9976        return Err(EngineError::Unsupported(alloc::format!(
9977            "FOREIGN KEY arity mismatch: {} local columns vs {} parent columns",
9978            local_columns.len(),
9979            parent_columns.len()
9980        )));
9981    }
9982    // For non-self-referencing FKs, verify the parent column set is
9983    // covered by a BTree index. SPG doesn't have a `PRIMARY KEY`
9984    // declaration; the convention is "the parent column for FK
9985    // purposes must have a BTree index" — which the user creates via
9986    // `CREATE INDEX ... USING btree (col)` (the default). We accept
9987    // any single-column BTree index that covers a parent column;
9988    // composite parent column lists require an index whose `column_position`
9989    // matches the first parent column (multi-column BTree indices
9990    // are not in the v7.x roadmap).
9991    if !is_self_ref {
9992        let parent_table = catalog.get(&fk.parent_table).expect("checked above");
9993        let primary_parent_col = parent_columns[0];
9994        let has_btree = parent_table
9995            .schema()
9996            .columns
9997            .get(primary_parent_col)
9998            .is_some()
9999            && parent_table.indices().iter().any(|idx| {
10000                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
10001                    && idx.column_position == primary_parent_col
10002                    && idx.partial_predicate.is_none()
10003            });
10004        if !has_btree {
10005            return Err(EngineError::Unsupported(alloc::format!(
10006                "FOREIGN KEY parent column on {:?} is not covered by an unconditional BTree \
10007                 index — create one with `CREATE INDEX ... ON {} ({})` first",
10008                parent_table_str,
10009                parent_table_str,
10010                parent_table.schema().columns[primary_parent_col].name,
10011            )));
10012        }
10013    }
10014    let on_delete = fk_action_sql_to_storage(fk.on_delete);
10015    let on_update = fk_action_sql_to_storage(fk.on_update);
10016    Ok(spg_storage::ForeignKeyConstraint {
10017        name: fk.name,
10018        local_columns,
10019        parent_table: fk.parent_table,
10020        parent_columns,
10021        on_delete,
10022        on_update,
10023    })
10024}
10025
10026/// v7.6.1 — pick a sentinel "primary key" column from the parent
10027/// table when the FK didn't name parent columns. Picks the first
10028/// single-column unconditional BTree index — that's the closest
10029/// thing SPG has to a PRIMARY KEY today. Self-referencing FKs use
10030/// `local_cols` as the column source.
10031fn pick_pk_index_column(
10032    catalog: &Catalog,
10033    parent_name: &str,
10034    is_self_ref: bool,
10035    local_cols: &[ColumnSchema],
10036) -> Option<usize> {
10037    if is_self_ref {
10038        // Self-ref FK omitted parent columns: pick column 0 by
10039        // convention (no catalog entry yet). Engine will widen this
10040        // when v7.6.7 lands; v7.6.1 only handles the explicit form.
10041        let _ = local_cols;
10042        return Some(0);
10043    }
10044    let parent = catalog.get(parent_name)?;
10045    parent.indices().iter().find_map(|idx| {
10046        if matches!(idx.kind, spg_storage::IndexKind::BTree(_))
10047            && idx.partial_predicate.is_none()
10048            && idx.included_columns.is_empty()
10049            && idx.expression.is_none()
10050        {
10051            Some(idx.column_position)
10052        } else {
10053            None
10054        }
10055    })
10056}
10057
10058/// v7.9.8 / v7.9.10 — resolve the column positions that
10059/// identify a conflict for ON CONFLICT. Returns a Vec of
10060/// column positions (1 element for single-column form, N for
10061/// composite). When the user wrote bare `ON CONFLICT DO …`,
10062/// falls back to the table's first unconditional BTree index
10063/// (always single-column today).
10064fn resolve_on_conflict_columns(
10065    catalog: &Catalog,
10066    table_name: &str,
10067    target: &[String],
10068) -> Result<Vec<usize>, EngineError> {
10069    let table = catalog.get(table_name).ok_or_else(|| {
10070        EngineError::Storage(StorageError::TableNotFound {
10071            name: table_name.into(),
10072        })
10073    })?;
10074    if target.is_empty() {
10075        // v7.13.2 — mailrs round-6 S5 follow-up. Composite UNIQUE
10076        // constraints carry a multi-column tuple; the prior code
10077        // path picked only the leading column of the first BTree
10078        // index, which caused `ON CONFLICT DO NOTHING` to dedup
10079        // by leading column alone (3 rows with same group_id but
10080        // different permission collapsed to 1). PG semantics use
10081        // the full tuple. Prefer a UniquenessConstraint's full
10082        // column list when one exists; fall back to the leading
10083        // BTree column for legacy single-column UNIQUE.
10084        if let Some(uc) = table.schema().uniqueness_constraints.first() {
10085            return Ok(uc.columns.clone());
10086        }
10087        let pos = table
10088            .indices()
10089            .iter()
10090            .find_map(|idx| {
10091                if matches!(idx.kind, spg_storage::IndexKind::BTree(_))
10092                    && idx.partial_predicate.is_none()
10093                    && idx.included_columns.is_empty()
10094                    && idx.expression.is_none()
10095                {
10096                    Some(idx.column_position)
10097                } else {
10098                    None
10099                }
10100            })
10101            .ok_or_else(|| {
10102                EngineError::Unsupported(alloc::format!(
10103                    "ON CONFLICT without target requires a UNIQUE BTree index on {table_name:?}"
10104                ))
10105            })?;
10106        return Ok(alloc::vec![pos]);
10107    }
10108    let mut out = Vec::with_capacity(target.len());
10109    for name in target {
10110        let pos = table
10111            .schema()
10112            .columns
10113            .iter()
10114            .position(|c| c.name == *name)
10115            .ok_or_else(|| {
10116                EngineError::Unsupported(alloc::format!(
10117                    "ON CONFLICT target column {name:?} not found on {table_name:?}"
10118                ))
10119            })?;
10120        out.push(pos);
10121    }
10122    Ok(out)
10123}
10124
10125/// v7.9.8 — check whether the BTree index on `column_pos` of
10126/// `table_name` already has a row with this key.
10127fn on_conflict_key_exists(
10128    catalog: &Catalog,
10129    table_name: &str,
10130    column_pos: usize,
10131    key: &Value,
10132) -> bool {
10133    let Some(table) = catalog.get(table_name) else {
10134        return false;
10135    };
10136    let Some(idx_key) = spg_storage::IndexKey::from_value(key) else {
10137        return false;
10138    };
10139    table.indices().iter().any(|idx| {
10140        matches!(idx.kind, spg_storage::IndexKind::BTree(_))
10141            && idx.column_position == column_pos
10142            && idx.partial_predicate.is_none()
10143            && !idx.lookup_eq(&idx_key).is_empty()
10144    })
10145}
10146
10147/// v7.9.9 / v7.9.10 — look up an existing row's position by
10148/// matching all `column_positions` against the incoming `key`
10149/// tuple. Single-column shape (one column) reduces to the
10150/// canonical PK lookup; composite shapes scan linearly until
10151/// every position matches.
10152fn lookup_row_position_by_keys(
10153    catalog: &Catalog,
10154    table_name: &str,
10155    column_positions: &[usize],
10156    key: &[&Value],
10157) -> Option<usize> {
10158    let table = catalog.get(table_name)?;
10159    table.rows().iter().position(|r| {
10160        column_positions
10161            .iter()
10162            .enumerate()
10163            .all(|(i, &pos)| r.values.get(pos) == Some(key[i]))
10164    })
10165}
10166
10167/// v7.9.10 — does the table already contain a row whose
10168/// `column_positions` tuple equals `key`? Single-column shape
10169/// uses the existing BTree fast path; composite shapes fall
10170/// back to a row scan.
10171fn on_conflict_keys_exist(
10172    catalog: &Catalog,
10173    table_name: &str,
10174    column_positions: &[usize],
10175    key: &[&Value],
10176) -> bool {
10177    if column_positions.len() == 1 {
10178        return on_conflict_key_exists(catalog, table_name, column_positions[0], key[0]);
10179    }
10180    let Some(table) = catalog.get(table_name) else {
10181        return false;
10182    };
10183    table.rows().iter().any(|r| {
10184        column_positions
10185            .iter()
10186            .enumerate()
10187            .all(|(i, &pos)| r.values.get(pos) == Some(key[i]))
10188    })
10189}
10190
10191/// v7.9.9 — apply ON CONFLICT DO UPDATE SET assignments to an
10192/// existing row.
10193///
10194/// `incoming` is the rejected INSERT row (used to resolve
10195/// `EXCLUDED.col` references in the assignment exprs);
10196/// `target_pos` is the position of the existing row in the table.
10197/// Each assignment substitutes `EXCLUDED.col` with the matching
10198/// incoming value, evaluates the resulting expression against
10199/// the existing row, and writes the new value into the
10200/// corresponding column of the returned `Vec<Value>`. If
10201/// `where_` evaluates falsy, returns Ok(None) — PG behaviour:
10202/// the conflicting row is silently kept unchanged.
10203fn apply_on_conflict_assignments(
10204    catalog: &Catalog,
10205    table_name: &str,
10206    target_pos: usize,
10207    incoming: &[Value],
10208    assignments: &[(String, Expr)],
10209    where_: Option<&Expr>,
10210) -> Result<Option<Vec<Value>>, EngineError> {
10211    let table = catalog.get(table_name).ok_or_else(|| {
10212        EngineError::Storage(StorageError::TableNotFound {
10213            name: table_name.into(),
10214        })
10215    })?;
10216    let schema_cols = table.schema().columns.clone();
10217    let existing = table
10218        .rows()
10219        .get(target_pos)
10220        .ok_or_else(|| {
10221            EngineError::Unsupported(alloc::format!(
10222                "ON CONFLICT DO UPDATE: row position {target_pos} out of bounds on {table_name:?}"
10223            ))
10224        })?
10225        .clone();
10226    let ctx = eval::EvalContext::new(&schema_cols, Some(table_name));
10227    // Optional WHERE filter on the conflict row.
10228    if let Some(w) = where_ {
10229        let pred = w.clone();
10230        let pred = substitute_excluded_refs(pred, &schema_cols, incoming);
10231        let v = eval::eval_expr(&pred, &existing, &ctx)?;
10232        if !matches!(v, Value::Bool(true)) {
10233            return Ok(None);
10234        }
10235    }
10236    let mut new_values = existing.values.clone();
10237    for (col_name, expr) in assignments {
10238        let target_idx = schema_cols
10239            .iter()
10240            .position(|c| c.name == *col_name)
10241            .ok_or_else(|| {
10242                EngineError::Eval(EvalError::ColumnNotFound {
10243                    name: col_name.clone(),
10244                })
10245            })?;
10246        let sub = substitute_excluded_refs(expr.clone(), &schema_cols, incoming);
10247        let v = eval::eval_expr(&sub, &existing, &ctx)?;
10248        new_values[target_idx] = coerce_value(v, schema_cols[target_idx].ty, col_name, target_idx)?;
10249    }
10250    Ok(Some(new_values))
10251}
10252
10253/// v7.9.9 — walk an `Expr` tree replacing any `Column { qualifier:
10254/// "EXCLUDED", name }` reference with a `Literal` of the matching
10255/// value from the incoming-row vec. Resolution against the
10256/// child-table column list (by name).
10257fn substitute_excluded_refs(expr: Expr, schema_cols: &[ColumnSchema], incoming: &[Value]) -> Expr {
10258    use spg_sql::ast::ColumnName;
10259    match expr {
10260        Expr::Column(ColumnName { qualifier, name })
10261            if qualifier
10262                .as_deref()
10263                .is_some_and(|q| q.eq_ignore_ascii_case("excluded")) =>
10264        {
10265            let pos = schema_cols.iter().position(|c| c.name == name);
10266            match pos {
10267                Some(p) => {
10268                    let v = incoming.get(p).cloned().unwrap_or(Value::Null);
10269                    value_to_literal_expr(v)
10270                        .unwrap_or_else(|_| Expr::Literal(spg_sql::ast::Literal::Null))
10271                }
10272                None => Expr::Column(ColumnName { qualifier, name }),
10273            }
10274        }
10275        Expr::Binary { op, lhs, rhs } => Expr::Binary {
10276            op,
10277            lhs: Box::new(substitute_excluded_refs(*lhs, schema_cols, incoming)),
10278            rhs: Box::new(substitute_excluded_refs(*rhs, schema_cols, incoming)),
10279        },
10280        Expr::Unary { op, expr } => Expr::Unary {
10281            op,
10282            expr: Box::new(substitute_excluded_refs(*expr, schema_cols, incoming)),
10283        },
10284        Expr::FunctionCall { name, args } => Expr::FunctionCall {
10285            name,
10286            args: args
10287                .into_iter()
10288                .map(|a| substitute_excluded_refs(a, schema_cols, incoming))
10289                .collect(),
10290        },
10291        other => other,
10292    }
10293}
10294
10295/// v7.6.2 / v7.6.7 — INSERT-side FK enforcement. For every row
10296/// about to be inserted into `child_table`, every FK declared on
10297/// that table is checked: the row's FK columns must either be
10298/// NULL (SQL spec skip) or match an existing parent row via the
10299/// parent's BTree PK / UNIQUE index.
10300///
10301/// Returns `EngineError::Unsupported` with a `FOREIGN KEY violation`
10302/// payload on first failure.
10303///
10304/// **Self-referencing FKs (v7.6.7 widening):** when `fk.parent_table
10305/// == child_table`, the parent rows visible to this check are
10306///  (a) rows already committed to the table, plus
10307///  (b) earlier rows from the *same* `rows` batch.
10308/// This makes `INSERT INTO tree VALUES (1, NULL), (2, 1), (3, 2)`
10309/// work in a single statement — common pattern for bulk-loading
10310/// hierarchies.
10311/// v7.9.19 — enforce table-level UNIQUE / PRIMARY KEY tuple
10312/// constraints at INSERT time. For each constraint declared on
10313/// the target table, check that no existing row + no earlier row
10314/// in the same batch has the same full-column tuple. NULL in
10315/// any column lifts the row out of the check (SQL spec: NULL
10316/// ≠ NULL for uniqueness). mailrs G1 + G6.
10317fn enforce_uniqueness_inserts(
10318    catalog: &Catalog,
10319    child_table: &str,
10320    constraints: &[spg_storage::UniquenessConstraint],
10321    rows: &[Vec<Value>],
10322) -> Result<(), EngineError> {
10323    if constraints.is_empty() {
10324        return Ok(());
10325    }
10326    let table = catalog.get(child_table).ok_or_else(|| {
10327        EngineError::Storage(StorageError::TableNotFound {
10328            name: child_table.into(),
10329        })
10330    })?;
10331    for uc in constraints {
10332        for (batch_idx, row_values) in rows.iter().enumerate() {
10333            let key: Vec<&Value> = uc.columns.iter().map(|&i| &row_values[i]).collect();
10334            let has_null = key.iter().any(|v| matches!(v, Value::Null));
10335            // v7.13.0 — `NULLS NOT DISTINCT` (mailrs round-5 G10,
10336            // PG 15+): two rows whose constrained columns are all
10337            // NULL collide. SQL-standard `NULLS DISTINCT` lets any
10338            // NULL skip the check.
10339            if has_null && !uc.nulls_not_distinct {
10340                continue;
10341            }
10342            // Table-side collision: scan existing rows.
10343            let collides_in_table = table.rows().iter().any(|prow| {
10344                uc.columns
10345                    .iter()
10346                    .enumerate()
10347                    .all(|(i, &p)| prow.values.get(p) == Some(key[i]))
10348            });
10349            // Batch-side collision: earlier rows in the same INSERT.
10350            let collides_in_batch = rows[..batch_idx].iter().any(|earlier| {
10351                uc.columns
10352                    .iter()
10353                    .enumerate()
10354                    .all(|(i, &p)| earlier.get(p) == Some(key[i]))
10355            });
10356            if collides_in_table || collides_in_batch {
10357                let kind = if uc.is_primary_key {
10358                    "PRIMARY KEY"
10359                } else {
10360                    "UNIQUE"
10361                };
10362                let col_names: Vec<String> = uc
10363                    .columns
10364                    .iter()
10365                    .map(|&i| table.schema().columns[i].name.clone())
10366                    .collect();
10367                return Err(EngineError::Unsupported(alloc::format!(
10368                    "{kind} violation on {child_table:?} columns {col_names:?}: \
10369                     row #{batch_idx} duplicates an existing key"
10370                )));
10371            }
10372        }
10373    }
10374    Ok(())
10375}
10376
10377/// v7.9.29 — `true` iff `v` counts as a truthy SQL value for a
10378/// WHERE-style predicate. NULL → false (three-valued logic
10379/// collapses to "skip this row" for index inclusion). Numeric
10380/// non-zero, BIGINT non-zero, TINYINT non-zero, BOOLEAN true → true.
10381/// Everything else (strings, vectors, JSON, …) is not a valid
10382/// predicate result and surfaces as `false` so a malformed
10383/// predicate degrades to "row not in index" rather than panicking.
10384fn predicate_truthy(v: &spg_storage::Value) -> bool {
10385    use spg_storage::Value as V;
10386    match v {
10387        V::Bool(b) => *b,
10388        V::Int(n) => *n != 0,
10389        V::BigInt(n) => *n != 0,
10390        V::SmallInt(n) => *n != 0,
10391        _ => false,
10392    }
10393}
10394
10395/// v7.9.29 — at CREATE UNIQUE INDEX time, scan the table's
10396/// committed rows for pre-existing duplicates. If any pair of rows
10397/// matches the predicate AND has the same index key, refuse to
10398/// create the index so the user fixes the data before retrying.
10399fn check_existing_unique_violation(
10400    idx: &spg_storage::Index,
10401    schema: &spg_storage::TableSchema,
10402    rows: &[spg_storage::Row],
10403) -> Result<(), EngineError> {
10404    let predicate_expr = match idx.partial_predicate.as_deref() {
10405        Some(s) => Some(spg_sql::parser::parse_expression(s).map_err(|e| {
10406            EngineError::Unsupported(alloc::format!(
10407                "stored partial predicate {s:?} failed to re-parse: {e:?}"
10408            ))
10409        })?),
10410        None => None,
10411    };
10412    let ctx = eval::EvalContext::new(&schema.columns, None);
10413    let key_positions = unique_key_positions(idx);
10414    let mut seen: alloc::vec::Vec<alloc::vec::Vec<spg_storage::Value>> = alloc::vec::Vec::new();
10415    for row in rows {
10416        if let Some(expr) = &predicate_expr {
10417            let v = eval::eval_expr(expr, row, &ctx).map_err(|e| {
10418                EngineError::Unsupported(alloc::format!(
10419                    "evaluating UNIQUE INDEX predicate against existing row: {e:?}"
10420                ))
10421            })?;
10422            if !predicate_truthy(&v) {
10423                continue;
10424            }
10425        }
10426        let key: alloc::vec::Vec<spg_storage::Value> = key_positions
10427            .iter()
10428            .map(|&p| {
10429                row.values
10430                    .get(p)
10431                    .cloned()
10432                    .unwrap_or(spg_storage::Value::Null)
10433            })
10434            .collect();
10435        if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
10436            continue;
10437        }
10438        if seen.iter().any(|other| *other == key) {
10439            return Err(EngineError::Unsupported(alloc::format!(
10440                "CREATE UNIQUE INDEX {:?}: existing rows already violate the constraint",
10441                idx.name
10442            )));
10443        }
10444        seen.push(key);
10445    }
10446    Ok(())
10447}
10448
10449/// v7.9.29 — full key tuple for a UNIQUE INDEX (leading +
10450/// extra positions). For single-column indexes this is just
10451/// `[column_position]`.
10452fn unique_key_positions(idx: &spg_storage::Index) -> alloc::vec::Vec<usize> {
10453    let mut out = alloc::vec::Vec::with_capacity(1 + idx.extra_column_positions.len());
10454    out.push(idx.column_position);
10455    out.extend_from_slice(&idx.extra_column_positions);
10456    out
10457}
10458
10459/// v7.9.29 — at INSERT time, walk every `is_unique` index on the
10460/// target table. For each, eval the index's optional predicate
10461/// against (a) the candidate row and (b) every committed row plus
10462/// earlier batch rows; only rows where the predicate is truthy
10463/// participate. A duplicate key among predicate-matching rows is a
10464/// uniqueness violation. NULL keys lift the row out of the check
10465/// (matching PG's "UNIQUE allows multiple NULLs" semantics).
10466fn enforce_unique_index_inserts(
10467    catalog: &Catalog,
10468    table_name: &str,
10469    rows: &[alloc::vec::Vec<spg_storage::Value>],
10470) -> Result<(), EngineError> {
10471    let table = catalog.get(table_name).ok_or_else(|| {
10472        EngineError::Storage(StorageError::TableNotFound {
10473            name: table_name.into(),
10474        })
10475    })?;
10476    let schema = table.schema();
10477    let ctx = eval::EvalContext::new(&schema.columns, None);
10478    for idx in table.indices() {
10479        if !idx.is_unique {
10480            continue;
10481        }
10482        // Re-parse the predicate once per index per batch.
10483        let predicate_expr = match idx.partial_predicate.as_deref() {
10484            Some(s) => Some(spg_sql::parser::parse_expression(s).map_err(|e| {
10485                EngineError::Unsupported(alloc::format!(
10486                    "UNIQUE INDEX {:?} predicate {s:?} failed to re-parse: {e:?}",
10487                    idx.name
10488                ))
10489            })?),
10490            None => None,
10491        };
10492        let key_positions = unique_key_positions(idx);
10493        let key_of = |values: &[spg_storage::Value]| -> alloc::vec::Vec<spg_storage::Value> {
10494            key_positions
10495                .iter()
10496                .map(|&p| values.get(p).cloned().unwrap_or(spg_storage::Value::Null))
10497                .collect()
10498        };
10499        // Helper: does `values` participate in this index? (predicate
10500        // truthy when present.) Wraps `values` into a transient Row
10501        // because eval_expr requires &Row.
10502        let participates = |values: &[spg_storage::Value]| -> Result<bool, EngineError> {
10503            let Some(expr) = &predicate_expr else {
10504                return Ok(true);
10505            };
10506            let tmp_row = spg_storage::Row {
10507                values: values.to_vec(),
10508            };
10509            let v = eval::eval_expr(expr, &tmp_row, &ctx).map_err(|e| {
10510                EngineError::Unsupported(alloc::format!(
10511                    "UNIQUE INDEX {:?} predicate eval: {e:?}",
10512                    idx.name
10513                ))
10514            })?;
10515            Ok(predicate_truthy(&v))
10516        };
10517        for (batch_idx, row_values) in rows.iter().enumerate() {
10518            if !participates(row_values)? {
10519                continue;
10520            }
10521            let key = key_of(row_values);
10522            if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
10523                continue;
10524            }
10525            // Committed-table collision.
10526            for prow in table.rows() {
10527                if !participates(&prow.values)? {
10528                    continue;
10529                }
10530                if key_of(&prow.values) == key {
10531                    return Err(EngineError::Unsupported(alloc::format!(
10532                        "UNIQUE INDEX {:?} violation on {table_name:?}: \
10533                         row #{batch_idx} duplicates an existing key",
10534                        idx.name
10535                    )));
10536                }
10537            }
10538            // Within-batch collision: earlier rows in the same INSERT.
10539            for earlier in &rows[..batch_idx] {
10540                if !participates(earlier)? {
10541                    continue;
10542                }
10543                if key_of(earlier) == key {
10544                    return Err(EngineError::Unsupported(alloc::format!(
10545                        "UNIQUE INDEX {:?} violation on {table_name:?}: \
10546                         row #{batch_idx} duplicates an earlier row in the same batch",
10547                        idx.name
10548                    )));
10549                }
10550            }
10551        }
10552    }
10553    Ok(())
10554}
10555
10556/// v7.13.0 — `UPDATE OF cols` filter helper (mailrs round-5 G7).
10557/// Returns `true` when at least one of `filter_cols` has a
10558/// different value in `new_row` vs `old_row`. Column lookup is
10559/// case-insensitive against `schema_cols`; unknown filter columns
10560/// are treated as "not changed" (the trigger therefore won't
10561/// fire on them — surfacing a parse-time error would be too
10562/// strict for catalog reloads where the schema may have drifted).
10563fn any_column_changed(
10564    filter_cols: &[String],
10565    schema_cols: &[ColumnSchema],
10566    old_row: &Row,
10567    new_row: &Row,
10568) -> bool {
10569    for col_name in filter_cols {
10570        let Some(pos) = schema_cols
10571            .iter()
10572            .position(|c| c.name.eq_ignore_ascii_case(col_name))
10573        else {
10574            continue;
10575        };
10576        let old_v = old_row.values.get(pos);
10577        let new_v = new_row.values.get(pos);
10578        if old_v != new_v {
10579            return true;
10580        }
10581    }
10582    false
10583}
10584
10585/// v7.13.0 — evaluate every CHECK predicate on the schema against
10586/// each candidate row. Mirrors PG semantics: a `false` result
10587/// rejects the mutation; a NULL result *passes* (CHECK rejects
10588/// only on definite-false, not on unknown). mailrs round-5 G3.
10589fn enforce_check_constraints(
10590    catalog: &Catalog,
10591    table_name: &str,
10592    rows: &[alloc::vec::Vec<spg_storage::Value>],
10593) -> Result<(), EngineError> {
10594    let table = catalog.get(table_name).ok_or_else(|| {
10595        EngineError::Storage(StorageError::TableNotFound {
10596            name: table_name.into(),
10597        })
10598    })?;
10599    let schema = table.schema();
10600    if schema.checks.is_empty() {
10601        return Ok(());
10602    }
10603    let ctx = eval::EvalContext::new(&schema.columns, None);
10604    let mut parsed: alloc::vec::Vec<(usize, Expr)> = alloc::vec::Vec::new();
10605    for (i, src) in schema.checks.iter().enumerate() {
10606        let expr = spg_sql::parser::parse_expression(src).map_err(|e| {
10607            EngineError::Unsupported(alloc::format!(
10608                "CHECK constraint #{i} on {table_name:?} ({src:?}) failed to re-parse: {e:?}"
10609            ))
10610        })?;
10611        parsed.push((i, expr));
10612    }
10613    for (batch_idx, row_values) in rows.iter().enumerate() {
10614        let tmp_row = spg_storage::Row {
10615            values: row_values.clone(),
10616        };
10617        for (i, expr) in &parsed {
10618            let v = eval::eval_expr(expr, &tmp_row, &ctx).map_err(|e| {
10619                EngineError::Unsupported(alloc::format!(
10620                    "CHECK constraint #{i} on {table_name:?} eval at row #{batch_idx}: {e:?}"
10621                ))
10622            })?;
10623            // PG: NULL passes (CHECK rejects on definite-false only).
10624            if matches!(v, spg_storage::Value::Bool(false)) {
10625                return Err(EngineError::Unsupported(alloc::format!(
10626                    "CHECK constraint violation on {table_name:?} (row #{batch_idx}): {:?}",
10627                    schema.checks[*i]
10628                )));
10629            }
10630        }
10631    }
10632    Ok(())
10633}
10634
10635fn enforce_fk_inserts(
10636    catalog: &Catalog,
10637    child_table: &str,
10638    fks: &[spg_storage::ForeignKeyConstraint],
10639    rows: &[Vec<Value>],
10640) -> Result<(), EngineError> {
10641    for fk in fks {
10642        let parent_is_self = fk.parent_table == child_table;
10643        let parent = if parent_is_self {
10644            // Self-ref: read the current state of the same table.
10645            // The mut borrow on child has been dropped by the caller.
10646            catalog.get(child_table).ok_or_else(|| {
10647                EngineError::Storage(StorageError::TableNotFound {
10648                    name: child_table.into(),
10649                })
10650            })?
10651        } else {
10652            catalog.get(&fk.parent_table).ok_or_else(|| {
10653                EngineError::Storage(StorageError::TableNotFound {
10654                    name: fk.parent_table.clone(),
10655                })
10656            })?
10657        };
10658        for (batch_idx, row_values) in rows.iter().enumerate() {
10659            // Single-column FK fast path: try the parent's BTree
10660            // index for an O(log n) lookup. Composite FKs fall back
10661            // to a parent-row scan.
10662            if fk.local_columns.len() == 1 {
10663                let v = &row_values[fk.local_columns[0]];
10664                if matches!(v, Value::Null) {
10665                    continue;
10666                }
10667                let parent_col = fk.parent_columns[0];
10668                let key = spg_storage::IndexKey::from_value(v).ok_or_else(|| {
10669                    EngineError::Unsupported(alloc::format!(
10670                        "FOREIGN KEY column value of type {:?} is not index-eligible",
10671                        v.data_type()
10672                    ))
10673                })?;
10674                let present_committed = parent.indices().iter().any(|idx| {
10675                    matches!(idx.kind, spg_storage::IndexKind::BTree(_))
10676                        && idx.column_position == parent_col
10677                        && idx.partial_predicate.is_none()
10678                        && !idx.lookup_eq(&key).is_empty()
10679                });
10680                // v7.6.7 self-ref widening: also accept a match
10681                // against earlier rows in this same batch when the
10682                // FK points at the table being inserted into.
10683                let present_in_batch = parent_is_self
10684                    && rows[..batch_idx]
10685                        .iter()
10686                        .any(|earlier| earlier.get(parent_col) == Some(v));
10687                if !(present_committed || present_in_batch) {
10688                    return Err(EngineError::Unsupported(alloc::format!(
10689                        "FOREIGN KEY violation: no parent row in {:?} where {} = {:?}",
10690                        fk.parent_table,
10691                        parent
10692                            .schema()
10693                            .columns
10694                            .get(parent_col)
10695                            .map_or("?", |c| c.name.as_str()),
10696                        v,
10697                    )));
10698                }
10699            } else {
10700                // Composite FK: scan parent rows. v7.6.7 also
10701                // accepts a match against earlier rows in the same
10702                // batch (self-ref bulk-loading of hierarchies).
10703                if fk
10704                    .local_columns
10705                    .iter()
10706                    .all(|&i| matches!(row_values.get(i), Some(Value::Null)))
10707                {
10708                    continue;
10709                }
10710                let local: Vec<&Value> = fk.local_columns.iter().map(|&i| &row_values[i]).collect();
10711                let parent_match_committed = parent.rows().iter().any(|prow| {
10712                    fk.parent_columns
10713                        .iter()
10714                        .enumerate()
10715                        .all(|(i, &pi)| prow.values.get(pi) == Some(local[i]))
10716                });
10717                let parent_match_in_batch = parent_is_self
10718                    && rows[..batch_idx].iter().any(|earlier| {
10719                        fk.parent_columns
10720                            .iter()
10721                            .enumerate()
10722                            .all(|(i, &pi)| earlier.get(pi) == Some(local[i]))
10723                    });
10724                if !(parent_match_committed || parent_match_in_batch) {
10725                    return Err(EngineError::Unsupported(alloc::format!(
10726                        "FOREIGN KEY violation: no parent row in {:?} matching composite key",
10727                        fk.parent_table,
10728                    )));
10729                }
10730            }
10731        }
10732    }
10733    Ok(())
10734}
10735
10736/// v7.6.4 / v7.6.5 — one step of the FK action plan computed for a
10737/// DELETE on a parent. The plan is a list of these steps, stacked
10738/// across the FK graph by `plan_fk_parent_deletions`.
10739#[derive(Debug, Clone)]
10740struct FkChildStep {
10741    child_table: String,
10742    action: FkChildAction,
10743}
10744
10745#[derive(Debug, Clone)]
10746enum FkChildAction {
10747    /// CASCADE — remove these rows. Sorted, deduplicated positions.
10748    Delete { positions: Vec<usize> },
10749    /// SET NULL — for each (row, column) in the flat list, write
10750    /// NULL into that child cell. Multiple FKs on the same row may
10751    /// produce overlapping entries (deduped at plan time).
10752    SetNull {
10753        positions: Vec<usize>,
10754        columns: Vec<usize>,
10755    },
10756    /// SET DEFAULT — same shape as SetNull but writes the column's
10757    /// declared DEFAULT value (resolved at plan time). Columns
10758    /// without a DEFAULT raise an error during planning.
10759    SetDefault {
10760        positions: Vec<usize>,
10761        columns: Vec<usize>,
10762        defaults: Vec<Value>,
10763    },
10764}
10765
10766/// v7.6.3 → v7.6.5 — plan FK fallout for a DELETE on a parent table.
10767///
10768/// Walks every table in the catalog looking for FKs whose
10769/// `parent_table` is `parent_table_name`. For each such FK + each
10770/// to-be-deleted parent row:
10771///
10772///   - RESTRICT / NoAction → error, no plan returned
10773///   - CASCADE → child rows get scheduled for deletion; recursive
10774///   - SetNull → child FK column(s) scheduled to be NULL-ed.
10775///     Verified NULL-able at plan time.
10776///   - SetDefault → child FK column(s) scheduled to be reset to
10777///     their declared DEFAULT. Columns without a DEFAULT raise.
10778///
10779/// SET NULL / SET DEFAULT do NOT cascade further — the child row
10780/// stays; only one of its columns mutates.
10781fn plan_fk_parent_deletions(
10782    catalog: &Catalog,
10783    parent_table_name: &str,
10784    to_delete_positions: &[usize],
10785    to_delete_rows: &[Vec<Value>],
10786) -> Result<Vec<FkChildStep>, EngineError> {
10787    use alloc::collections::{BTreeMap, BTreeSet};
10788    if to_delete_rows.is_empty() {
10789        return Ok(Vec::new());
10790    }
10791    let mut delete_plan: BTreeMap<String, BTreeSet<usize>> = BTreeMap::new();
10792    // setnull / setdefault keyed by child_table → (row_idx, col_idx) → optional default
10793    let mut setnull_plan: BTreeMap<String, BTreeSet<(usize, usize)>> = BTreeMap::new();
10794    let mut setdefault_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
10795    let mut visited: BTreeSet<(String, usize)> = BTreeSet::new();
10796    for &p in to_delete_positions {
10797        visited.insert((parent_table_name.to_string(), p));
10798    }
10799    let mut work: Vec<(String, Vec<Value>)> = to_delete_rows
10800        .iter()
10801        .map(|r| (parent_table_name.to_string(), r.clone()))
10802        .collect();
10803    while let Some((cur_parent, parent_row)) = work.pop() {
10804        for child_name in catalog.table_names() {
10805            let child = catalog
10806                .get(&child_name)
10807                .expect("table_names → catalog.get round-trip is total");
10808            for fk in &child.schema().foreign_keys {
10809                if fk.parent_table != cur_parent {
10810                    continue;
10811                }
10812                let parent_key: Vec<&Value> = fk
10813                    .parent_columns
10814                    .iter()
10815                    .map(|&pi| &parent_row[pi])
10816                    .collect();
10817                if parent_key.iter().any(|v| matches!(v, Value::Null)) {
10818                    continue;
10819                }
10820                for (child_row_idx, child_row) in child.rows().iter().enumerate() {
10821                    if child_name == cur_parent
10822                        && visited.contains(&(child_name.clone(), child_row_idx))
10823                    {
10824                        continue;
10825                    }
10826                    let matches_key = fk
10827                        .local_columns
10828                        .iter()
10829                        .enumerate()
10830                        .all(|(i, &li)| child_row.values.get(li) == Some(parent_key[i]));
10831                    if !matches_key {
10832                        continue;
10833                    }
10834                    match fk.on_delete {
10835                        spg_storage::FkAction::Restrict | spg_storage::FkAction::NoAction => {
10836                            return Err(EngineError::Unsupported(alloc::format!(
10837                                "FOREIGN KEY violation: DELETE on {cur_parent:?} is \
10838                                 restricted by FK from {child_name:?}.{:?}",
10839                                fk.local_columns,
10840                            )));
10841                        }
10842                        spg_storage::FkAction::Cascade => {
10843                            if visited.insert((child_name.clone(), child_row_idx)) {
10844                                delete_plan
10845                                    .entry(child_name.clone())
10846                                    .or_default()
10847                                    .insert(child_row_idx);
10848                                work.push((child_name.clone(), child_row.values.clone()));
10849                            }
10850                        }
10851                        spg_storage::FkAction::SetNull => {
10852                            // Verify every local FK column is NULL-able.
10853                            for &li in &fk.local_columns {
10854                                let col = child.schema().columns.get(li).ok_or_else(|| {
10855                                    EngineError::Unsupported(alloc::format!(
10856                                        "FK local column {li} missing in {child_name:?}"
10857                                    ))
10858                                })?;
10859                                if !col.nullable {
10860                                    return Err(EngineError::Unsupported(alloc::format!(
10861                                        "FOREIGN KEY ON DELETE SET NULL: column \
10862                                         {child_name:?}.{:?} is NOT NULL — cannot SET NULL",
10863                                        col.name,
10864                                    )));
10865                                }
10866                            }
10867                            let entry = setnull_plan.entry(child_name.clone()).or_default();
10868                            for &li in &fk.local_columns {
10869                                entry.insert((child_row_idx, li));
10870                            }
10871                        }
10872                        spg_storage::FkAction::SetDefault => {
10873                            // Resolve the DEFAULT for every local FK col.
10874                            let entry = setdefault_plan.entry(child_name.clone()).or_default();
10875                            for &li in &fk.local_columns {
10876                                let col = child.schema().columns.get(li).ok_or_else(|| {
10877                                    EngineError::Unsupported(alloc::format!(
10878                                        "FK local column {li} missing in {child_name:?}"
10879                                    ))
10880                                })?;
10881                                let default = col.default.clone().ok_or_else(|| {
10882                                    EngineError::Unsupported(alloc::format!(
10883                                        "FOREIGN KEY ON DELETE SET DEFAULT: column \
10884                                         {child_name:?}.{:?} has no DEFAULT declared",
10885                                        col.name,
10886                                    ))
10887                                })?;
10888                                entry.insert((child_row_idx, li), default);
10889                            }
10890                        }
10891                    }
10892                }
10893            }
10894        }
10895    }
10896    // Flatten the three plans into the ordered `FkChildStep` list.
10897    // Deletes are applied last per child (after any null/default
10898    // re-writes on the same child) so a child row that's both
10899    // re-written and then cascade-deleted only ends up deleted —
10900    // but in v7.6.5 SetNull/Cascade never overlap on the same row
10901    // (a single FK chooses exactly one action), so the order is
10902    // mostly a precaution.
10903    let mut steps: Vec<FkChildStep> = Vec::new();
10904    for (child_table, entries) in setnull_plan {
10905        let (positions, columns): (Vec<usize>, Vec<usize>) = entries.into_iter().unzip();
10906        steps.push(FkChildStep {
10907            child_table,
10908            action: FkChildAction::SetNull { positions, columns },
10909        });
10910    }
10911    for (child_table, entries) in setdefault_plan {
10912        let mut positions = Vec::with_capacity(entries.len());
10913        let mut columns = Vec::with_capacity(entries.len());
10914        let mut defaults = Vec::with_capacity(entries.len());
10915        for ((p, c), v) in entries {
10916            positions.push(p);
10917            columns.push(c);
10918            defaults.push(v);
10919        }
10920        steps.push(FkChildStep {
10921            child_table,
10922            action: FkChildAction::SetDefault {
10923                positions,
10924                columns,
10925                defaults,
10926            },
10927        });
10928    }
10929    for (child_table, positions) in delete_plan {
10930        steps.push(FkChildStep {
10931            child_table,
10932            action: FkChildAction::Delete {
10933                positions: positions.into_iter().collect(),
10934            },
10935        });
10936    }
10937    Ok(steps)
10938}
10939
10940/// v7.6.6 — plan FK fallout for an UPDATE that mutates parent-side
10941/// PK/UNIQUE columns. Walks every other table whose FK references
10942/// `parent_table_name`; for each FK whose parent_columns overlap a
10943/// mutated column, decides the action by `fk.on_update`.
10944///
10945///   - RESTRICT / NoAction → error if any child references the OLD
10946///     value
10947///   - CASCADE → child FK columns get rewritten to the NEW parent
10948///     value (a SetNull-style update step with the new value)
10949///   - SetNull → child FK columns set to NULL
10950///   - SetDefault → child FK columns set to declared default
10951///
10952/// `plan_with_old` is `(row_position, old_values, new_values)` so
10953/// the planner can detect "did this row's parent key actually
10954/// change?" — only rows where at least one referenced parent
10955/// column moved trigger inbound work.
10956fn plan_fk_parent_updates(
10957    catalog: &Catalog,
10958    parent_table_name: &str,
10959    plan_with_old: &[(usize, Vec<Value>, Vec<Value>)],
10960) -> Result<Vec<FkChildStep>, EngineError> {
10961    use alloc::collections::BTreeMap;
10962    if plan_with_old.is_empty() {
10963        return Ok(Vec::new());
10964    }
10965    // For each child table we may touch, build per-child step
10966    // lists. UPDATE never deletes children — `delete_plan` stays
10967    // empty here but is kept structurally aligned with
10968    // `plan_fk_parent_deletions` for future use.
10969    let delete_plan: BTreeMap<String, alloc::collections::BTreeSet<usize>> = BTreeMap::new();
10970    let mut setnull_plan: BTreeMap<String, alloc::collections::BTreeSet<(usize, usize)>> =
10971        BTreeMap::new();
10972    let mut setdefault_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
10973    // Cascade-update plan: child_table → row_idx → col_idx → new_value
10974    let mut cascade_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
10975
10976    for child_name in catalog.table_names() {
10977        let child = catalog
10978            .get(&child_name)
10979            .expect("table_names → catalog.get total");
10980        for fk in &child.schema().foreign_keys {
10981            if fk.parent_table != parent_table_name {
10982                continue;
10983            }
10984            for (_pos, old_row, new_row) in plan_with_old {
10985                // Did any parent FK column change?
10986                let key_changed = fk
10987                    .parent_columns
10988                    .iter()
10989                    .any(|&pi| old_row.get(pi) != new_row.get(pi));
10990                if !key_changed {
10991                    continue;
10992                }
10993                // The OLD parent key — used to find referring children.
10994                let old_key: Vec<&Value> =
10995                    fk.parent_columns.iter().map(|&pi| &old_row[pi]).collect();
10996                if old_key.iter().any(|v| matches!(v, Value::Null)) {
10997                    // NULL parent has no children — skip.
10998                    continue;
10999                }
11000                let new_key: Vec<&Value> =
11001                    fk.parent_columns.iter().map(|&pi| &new_row[pi]).collect();
11002                for (child_row_idx, child_row) in child.rows().iter().enumerate() {
11003                    // Self-ref same-row updates: a row updating its
11004                    // own PK doesn't restrict itself.
11005                    if child_name == parent_table_name
11006                        && plan_with_old.iter().any(|(p, _, _)| *p == child_row_idx)
11007                    {
11008                        continue;
11009                    }
11010                    let matches_key = fk
11011                        .local_columns
11012                        .iter()
11013                        .enumerate()
11014                        .all(|(i, &li)| child_row.values.get(li) == Some(old_key[i]));
11015                    if !matches_key {
11016                        continue;
11017                    }
11018                    match fk.on_update {
11019                        spg_storage::FkAction::Restrict | spg_storage::FkAction::NoAction => {
11020                            return Err(EngineError::Unsupported(alloc::format!(
11021                                "FOREIGN KEY violation: UPDATE on {parent_table_name:?} PK is \
11022                                 restricted by FK from {child_name:?}.{:?}",
11023                                fk.local_columns,
11024                            )));
11025                        }
11026                        spg_storage::FkAction::Cascade => {
11027                            // Rewrite child FK columns to new key.
11028                            let entry = cascade_plan.entry(child_name.clone()).or_default();
11029                            for (i, &li) in fk.local_columns.iter().enumerate() {
11030                                entry.insert((child_row_idx, li), new_key[i].clone());
11031                            }
11032                        }
11033                        spg_storage::FkAction::SetNull => {
11034                            for &li in &fk.local_columns {
11035                                let col = child.schema().columns.get(li).ok_or_else(|| {
11036                                    EngineError::Unsupported(alloc::format!(
11037                                        "FK local column {li} missing in {child_name:?}"
11038                                    ))
11039                                })?;
11040                                if !col.nullable {
11041                                    return Err(EngineError::Unsupported(alloc::format!(
11042                                        "FOREIGN KEY ON UPDATE SET NULL: column \
11043                                         {child_name:?}.{:?} is NOT NULL",
11044                                        col.name,
11045                                    )));
11046                                }
11047                            }
11048                            let entry = setnull_plan.entry(child_name.clone()).or_default();
11049                            for &li in &fk.local_columns {
11050                                entry.insert((child_row_idx, li));
11051                            }
11052                        }
11053                        spg_storage::FkAction::SetDefault => {
11054                            let entry = setdefault_plan.entry(child_name.clone()).or_default();
11055                            for &li in &fk.local_columns {
11056                                let col = child.schema().columns.get(li).ok_or_else(|| {
11057                                    EngineError::Unsupported(alloc::format!(
11058                                        "FK local column {li} missing in {child_name:?}"
11059                                    ))
11060                                })?;
11061                                let default = col.default.clone().ok_or_else(|| {
11062                                    EngineError::Unsupported(alloc::format!(
11063                                        "FOREIGN KEY ON UPDATE SET DEFAULT: column \
11064                                         {child_name:?}.{:?} has no DEFAULT",
11065                                        col.name,
11066                                    ))
11067                                })?;
11068                                entry.insert((child_row_idx, li), default);
11069                            }
11070                        }
11071                    }
11072                }
11073            }
11074        }
11075    }
11076    // Flatten into FkChildStep list. UPDATE doesn't produce
11077    // DeleteSteps (CASCADE on UPDATE just rewrites FK values).
11078    let mut steps: Vec<FkChildStep> = Vec::new();
11079    for (child_table, entries) in cascade_plan {
11080        let mut positions = Vec::with_capacity(entries.len());
11081        let mut columns = Vec::with_capacity(entries.len());
11082        let mut defaults = Vec::with_capacity(entries.len());
11083        for ((p, c), v) in entries {
11084            positions.push(p);
11085            columns.push(c);
11086            defaults.push(v);
11087        }
11088        // We reuse `FkChildAction::SetDefault` for cascade-update:
11089        // both shapes are "write a known value into specific cells"
11090        // — `apply_per_cell_writes` doesn't care whether the value
11091        // came from a DEFAULT declaration or a new parent key.
11092        steps.push(FkChildStep {
11093            child_table,
11094            action: FkChildAction::SetDefault {
11095                positions,
11096                columns,
11097                defaults,
11098            },
11099        });
11100    }
11101    for (child_table, entries) in setnull_plan {
11102        let (positions, columns): (Vec<usize>, Vec<usize>) = entries.into_iter().unzip();
11103        steps.push(FkChildStep {
11104            child_table,
11105            action: FkChildAction::SetNull { positions, columns },
11106        });
11107    }
11108    for (child_table, entries) in setdefault_plan {
11109        let mut positions = Vec::with_capacity(entries.len());
11110        let mut columns = Vec::with_capacity(entries.len());
11111        let mut defaults = Vec::with_capacity(entries.len());
11112        for ((p, c), v) in entries {
11113            positions.push(p);
11114            columns.push(c);
11115            defaults.push(v);
11116        }
11117        steps.push(FkChildStep {
11118            child_table,
11119            action: FkChildAction::SetDefault {
11120                positions,
11121                columns,
11122                defaults,
11123            },
11124        });
11125    }
11126    let _ = delete_plan; // UPDATE never deletes children.
11127    Ok(steps)
11128}
11129
11130/// v7.6.5 — apply one FK child step to the catalog. Encapsulates
11131/// the three action variants so the DELETE executor stays a
11132/// simple loop over the planned steps.
11133fn apply_fk_child_step(catalog: &mut Catalog, step: &FkChildStep) -> Result<(), EngineError> {
11134    let child = catalog.get_mut(&step.child_table).ok_or_else(|| {
11135        EngineError::Storage(StorageError::TableNotFound {
11136            name: step.child_table.clone(),
11137        })
11138    })?;
11139    match &step.action {
11140        FkChildAction::Delete { positions } => {
11141            let _ = child.delete_rows(positions);
11142        }
11143        FkChildAction::SetNull { positions, columns } => {
11144            apply_per_cell_writes(child, positions, columns, |_| Value::Null)?;
11145        }
11146        FkChildAction::SetDefault {
11147            positions,
11148            columns,
11149            defaults,
11150        } => {
11151            apply_per_cell_writes(child, positions, columns, |i| defaults[i].clone())?;
11152        }
11153    }
11154    Ok(())
11155}
11156
11157/// v7.6.5 — write new values into selected child cells via
11158/// `Table::update_row` (the catalog's existing UPDATE entry).
11159/// Groups writes by row position so multi-column updates on the
11160/// same row only call `update_row` once. `value_for(i)` produces
11161/// the new value for the i-th (position, column) entry.
11162fn apply_per_cell_writes(
11163    child: &mut spg_storage::Table,
11164    positions: &[usize],
11165    columns: &[usize],
11166    mut value_for: impl FnMut(usize) -> Value,
11167) -> Result<(), EngineError> {
11168    use alloc::collections::BTreeMap;
11169    let mut by_row: BTreeMap<usize, Vec<(usize, Value)>> = BTreeMap::new();
11170    for i in 0..positions.len() {
11171        by_row
11172            .entry(positions[i])
11173            .or_default()
11174            .push((columns[i], value_for(i)));
11175    }
11176    for (pos, mutations) in by_row {
11177        let mut new_values = child.rows()[pos].values.clone();
11178        for (col, v) in mutations {
11179            if let Some(slot) = new_values.get_mut(col) {
11180                *slot = v;
11181            }
11182        }
11183        child
11184            .update_row(pos, new_values)
11185            .map_err(EngineError::Storage)?;
11186    }
11187    Ok(())
11188}
11189
11190fn fk_action_sql_to_storage(a: spg_sql::ast::FkAction) -> spg_storage::FkAction {
11191    match a {
11192        spg_sql::ast::FkAction::Restrict => spg_storage::FkAction::Restrict,
11193        spg_sql::ast::FkAction::Cascade => spg_storage::FkAction::Cascade,
11194        spg_sql::ast::FkAction::SetNull => spg_storage::FkAction::SetNull,
11195        spg_sql::ast::FkAction::SetDefault => spg_storage::FkAction::SetDefault,
11196        spg_sql::ast::FkAction::NoAction => spg_storage::FkAction::NoAction,
11197    }
11198}
11199
11200/// v7.9.21 — resolve a column's DEFAULT for INSERT-time
11201/// default-fill. Free fn (rather than `&self`) so callers
11202/// with an active `&mut Table` borrow can still use it.
11203/// Literal defaults take the cached path (`col.default`);
11204/// runtime defaults hit `clock_fn` at each call. mailrs G4.
11205fn resolve_column_default_free(
11206    col: &ColumnSchema,
11207    clock_fn: Option<ClockFn>,
11208) -> Result<Value, EngineError> {
11209    if let Some(rt) = &col.runtime_default {
11210        return eval_runtime_default_free(rt, col.ty, clock_fn);
11211    }
11212    Ok(col.default.clone().unwrap_or(Value::Null))
11213}
11214
11215fn eval_runtime_default_free(
11216    rt: &str,
11217    ty: DataType,
11218    clock_fn: Option<ClockFn>,
11219) -> Result<Value, EngineError> {
11220    let s = rt.trim().to_ascii_lowercase();
11221    let canonical = s.trim_end_matches("()");
11222    let now_us = match clock_fn {
11223        Some(f) => f(),
11224        None => 0,
11225    };
11226    let v = match canonical {
11227        "now" | "current_timestamp" | "localtimestamp" => Value::Timestamp(now_us),
11228        "current_date" => Value::Date((now_us / 86_400_000_000) as i32),
11229        "current_time" | "localtime" => Value::Timestamp(now_us),
11230        other => {
11231            return Err(EngineError::Unsupported(alloc::format!(
11232                "runtime DEFAULT expression {other:?} not supported \
11233                 (v7.9.21 whitelist: now() / current_timestamp / \
11234                 current_date / current_time / localtimestamp / \
11235                 localtime)"
11236            )));
11237        }
11238    };
11239    coerce_value(v, ty, "DEFAULT", 0)
11240}
11241
11242/// v7.9.21 — true when a DEFAULT expression needs INSERT-time
11243/// evaluation rather than being cacheable as a literal Value.
11244/// FunctionCall is the immediate case (`now()`,
11245/// `current_timestamp`). Literal expressions and simple sign-
11246/// flipped numerics still take the static-cache path.
11247fn is_runtime_default_expr(expr: &Expr) -> bool {
11248    match expr {
11249        Expr::FunctionCall { .. } => true,
11250        Expr::Unary { expr, .. } => is_runtime_default_expr(expr),
11251        _ => false,
11252    }
11253}
11254
11255fn column_def_to_schema(c: ColumnDef) -> Result<ColumnSchema, EngineError> {
11256    let ty = column_type_to_data_type(c.ty);
11257    let mut schema = ColumnSchema::new(c.name.clone(), ty, c.nullable);
11258    if let Some(default_expr) = c.default {
11259        // v7.9.21 — distinguish literal defaults (evaluated once
11260        // at CREATE TABLE) from expression defaults (deferred to
11261        // INSERT). Function calls (`now()`, `current_timestamp`
11262        // — see v7.9.20 keyword promotion) take the runtime path.
11263        // Literals continue to cache. mailrs G4.
11264        if is_runtime_default_expr(&default_expr) {
11265            let display = alloc::format!("{default_expr}");
11266            schema = schema.with_runtime_default(display);
11267        } else {
11268            let raw = literal_expr_to_value(default_expr)?;
11269            let coerced = coerce_value(raw, ty, &c.name, 0)?;
11270            schema = schema.with_default(coerced);
11271        }
11272    }
11273    if c.auto_increment {
11274        // AUTO_INCREMENT only makes sense on integer-shaped columns.
11275        if !matches!(ty, DataType::SmallInt | DataType::Int | DataType::BigInt) {
11276            return Err(EngineError::Unsupported(alloc::format!(
11277                "AUTO_INCREMENT requires an integer column type, got {ty:?}"
11278            )));
11279        }
11280        schema = schema.with_auto_increment();
11281    }
11282    Ok(schema)
11283}
11284
11285/// v7.10.4 — decode a BYTEA literal. Accepts:
11286///   * `\xDEADBEEF` (case-insensitive hex; whitespace stripped)
11287///   * `Hello\000world` (backslash escape form; `\\` for literal backslash)
11288///   * Anything else → raw UTF-8 bytes of the input (PG accepts this too).
11289fn decode_bytea_literal(s: &str) -> Result<alloc::vec::Vec<u8>, &'static str> {
11290    let s = s.trim();
11291    if let Some(hex) = s.strip_prefix("\\x").or_else(|| s.strip_prefix("\\X")) {
11292        // Hex form. Each pair of hex digits → one byte.
11293        let cleaned: alloc::string::String = hex.chars().filter(|c| !c.is_whitespace()).collect();
11294        if cleaned.len() % 2 != 0 {
11295            return Err("odd-length hex literal");
11296        }
11297        let mut out = alloc::vec::Vec::with_capacity(cleaned.len() / 2);
11298        let cleaned_bytes = cleaned.as_bytes();
11299        for i in (0..cleaned_bytes.len()).step_by(2) {
11300            let hi = hex_nibble(cleaned_bytes[i])?;
11301            let lo = hex_nibble(cleaned_bytes[i + 1])?;
11302            out.push((hi << 4) | lo);
11303        }
11304        return Ok(out);
11305    }
11306    // Escape form or raw. Walk char-by-char; `\\` and `\NNN` octal
11307    // sequences decode; anything else is a literal byte.
11308    let bytes = s.as_bytes();
11309    let mut out = alloc::vec::Vec::with_capacity(bytes.len());
11310    let mut i = 0;
11311    while i < bytes.len() {
11312        let b = bytes[i];
11313        if b == b'\\' && i + 1 < bytes.len() {
11314            let n = bytes[i + 1];
11315            if n == b'\\' {
11316                out.push(b'\\');
11317                i += 2;
11318                continue;
11319            }
11320            if n.is_ascii_digit()
11321                && i + 3 < bytes.len()
11322                && bytes[i + 2].is_ascii_digit()
11323                && bytes[i + 3].is_ascii_digit()
11324            {
11325                let oct = |x: u8| (x - b'0') as u32;
11326                let v = oct(n) * 64 + oct(bytes[i + 2]) * 8 + oct(bytes[i + 3]);
11327                if v <= 0xFF {
11328                    out.push(v as u8);
11329                    i += 4;
11330                    continue;
11331                }
11332            }
11333        }
11334        out.push(b);
11335        i += 1;
11336    }
11337    Ok(out)
11338}
11339
11340fn hex_nibble(b: u8) -> Result<u8, &'static str> {
11341    match b {
11342        b'0'..=b'9' => Ok(b - b'0'),
11343        b'a'..=b'f' => Ok(b - b'a' + 10),
11344        b'A'..=b'F' => Ok(b - b'A' + 10),
11345        _ => Err("invalid hex digit"),
11346    }
11347}
11348
11349/// v7.10.11 — decode a PG TEXT[] external array form
11350/// (`{a,b,NULL}` with optional double-quoted elements). The
11351/// engine takes a leading/trailing `{`/`}` and splits at commas.
11352/// Quoted elements (`"hello, world"`) preserve embedded commas;
11353/// `\\` and `\"` decode to literal backslash / quote. Plain
11354/// unquoted `NULL` (case-insensitive) maps to `None`.
11355/// v7.11.13 — pick the array type for `ARRAY[lit, …]` from the
11356/// element values. Single-element-type rules:
11357///   - all NULL / all Text → TextArray
11358///   - all Int (or Int+NULL) → IntArray
11359///   - any BigInt without Text → BigIntArray (widening)
11360///   - any Text → TextArray (fallback; non-string elements
11361///     render as text)
11362fn array_literal_widen(items: alloc::vec::Vec<Value>) -> Value {
11363    let mut has_text = false;
11364    let mut has_bigint = false;
11365    let mut has_int = false;
11366    for v in &items {
11367        match v {
11368            Value::Null => {}
11369            Value::Text(_) | Value::Json(_) => has_text = true,
11370            Value::BigInt(_) => has_bigint = true,
11371            Value::Int(_) | Value::SmallInt(_) => has_int = true,
11372            _ => has_text = true,
11373        }
11374    }
11375    if has_text || (!has_bigint && !has_int) {
11376        let out: alloc::vec::Vec<Option<alloc::string::String>> = items
11377            .into_iter()
11378            .map(|v| match v {
11379                Value::Null => None,
11380                Value::Text(s) | Value::Json(s) => Some(s),
11381                other => Some(alloc::format!("{other:?}")),
11382            })
11383            .collect();
11384        return Value::TextArray(out);
11385    }
11386    if has_bigint {
11387        let out: alloc::vec::Vec<Option<i64>> = items
11388            .into_iter()
11389            .map(|v| match v {
11390                Value::Null => None,
11391                Value::Int(n) => Some(i64::from(n)),
11392                Value::SmallInt(n) => Some(i64::from(n)),
11393                Value::BigInt(n) => Some(n),
11394                _ => unreachable!("widen: unexpected non-integer in BigInt path"),
11395            })
11396            .collect();
11397        return Value::BigIntArray(out);
11398    }
11399    let out: alloc::vec::Vec<Option<i32>> = items
11400        .into_iter()
11401        .map(|v| match v {
11402            Value::Null => None,
11403            Value::Int(n) => Some(n),
11404            Value::SmallInt(n) => Some(i32::from(n)),
11405            _ => unreachable!("widen: unexpected non-i32-compatible in Int path"),
11406        })
11407        .collect();
11408    Value::IntArray(out)
11409}
11410
11411fn decode_text_array_literal(
11412    s: &str,
11413) -> Result<alloc::vec::Vec<Option<alloc::string::String>>, &'static str> {
11414    let trimmed = s.trim();
11415    let inner = trimmed
11416        .strip_prefix('{')
11417        .and_then(|x| x.strip_suffix('}'))
11418        .ok_or("TEXT[] literal must be enclosed in '{...}'")?;
11419    let mut out: alloc::vec::Vec<Option<alloc::string::String>> = alloc::vec::Vec::new();
11420    if inner.trim().is_empty() {
11421        return Ok(out);
11422    }
11423    let bytes = inner.as_bytes();
11424    let mut i = 0;
11425    while i <= bytes.len() {
11426        // Skip leading whitespace.
11427        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
11428            i += 1;
11429        }
11430        // Quoted element.
11431        if i < bytes.len() && bytes[i] == b'"' {
11432            i += 1; // open quote
11433            let mut buf = alloc::string::String::new();
11434            while i < bytes.len() && bytes[i] != b'"' {
11435                if bytes[i] == b'\\' && i + 1 < bytes.len() {
11436                    buf.push(bytes[i + 1] as char);
11437                    i += 2;
11438                } else {
11439                    buf.push(bytes[i] as char);
11440                    i += 1;
11441                }
11442            }
11443            if i >= bytes.len() {
11444                return Err("unterminated quoted element");
11445            }
11446            i += 1; // close quote
11447            out.push(Some(buf));
11448        } else {
11449            // Unquoted element — read until next comma or end.
11450            let start = i;
11451            while i < bytes.len() && bytes[i] != b',' {
11452                i += 1;
11453            }
11454            let raw = inner[start..i].trim();
11455            if raw.eq_ignore_ascii_case("NULL") {
11456                out.push(None);
11457            } else {
11458                out.push(Some(alloc::string::ToString::to_string(raw)));
11459            }
11460        }
11461        // Skip whitespace, expect comma or end.
11462        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
11463            i += 1;
11464        }
11465        if i >= bytes.len() {
11466            break;
11467        }
11468        if bytes[i] != b',' {
11469            return Err("expected ',' between TEXT[] elements");
11470        }
11471        i += 1;
11472    }
11473    Ok(out)
11474}
11475
11476/// v7.10.11 — encode a TEXT[] back into the PG external array
11477/// form. NULL elements become the literal `NULL`; elements
11478/// containing commas, quotes, backslashes, or braces are
11479/// double-quoted with `\\` / `\"` escapes.
11480fn encode_text_array(items: &[Option<alloc::string::String>]) -> alloc::string::String {
11481    let mut out = alloc::string::String::with_capacity(2 + items.len() * 8);
11482    out.push('{');
11483    for (i, item) in items.iter().enumerate() {
11484        if i > 0 {
11485            out.push(',');
11486        }
11487        match item {
11488            None => out.push_str("NULL"),
11489            Some(s) => {
11490                let needs_quote = s.is_empty()
11491                    || s.eq_ignore_ascii_case("NULL")
11492                    || s.chars()
11493                        .any(|c| matches!(c, ',' | '{' | '}' | '"' | '\\' | ' ' | '\t'));
11494                if needs_quote {
11495                    out.push('"');
11496                    for c in s.chars() {
11497                        if c == '"' || c == '\\' {
11498                            out.push('\\');
11499                        }
11500                        out.push(c);
11501                    }
11502                    out.push('"');
11503                } else {
11504                    out.push_str(s);
11505                }
11506            }
11507        }
11508    }
11509    out.push('}');
11510    out
11511}
11512
11513/// v7.10.4 — encode BYTEA bytes in PG hex output format
11514/// (`\x` prefix, lowercase hex pairs). Used by Text-side
11515/// round-trip + the wire layer's text-mode encoder.
11516fn encode_bytea_hex(b: &[u8]) -> alloc::string::String {
11517    let mut out = alloc::string::String::with_capacity(2 + 2 * b.len());
11518    out.push_str("\\x");
11519    for byte in b {
11520        let hi = byte >> 4;
11521        let lo = byte & 0x0F;
11522        out.push(hex_digit(hi));
11523        out.push(hex_digit(lo));
11524    }
11525    out
11526}
11527
11528const fn hex_digit(n: u8) -> char {
11529    match n {
11530        0..=9 => (b'0' + n) as char,
11531        10..=15 => (b'a' + n - 10) as char,
11532        _ => '?',
11533    }
11534}
11535
11536const fn column_type_to_data_type(t: ColumnTypeName) -> DataType {
11537    match t {
11538        ColumnTypeName::SmallInt => DataType::SmallInt,
11539        ColumnTypeName::Int => DataType::Int,
11540        ColumnTypeName::BigInt => DataType::BigInt,
11541        ColumnTypeName::Float => DataType::Float,
11542        ColumnTypeName::Text => DataType::Text,
11543        ColumnTypeName::Varchar(n) => DataType::Varchar(n),
11544        ColumnTypeName::Char(n) => DataType::Char(n),
11545        ColumnTypeName::Bool => DataType::Bool,
11546        ColumnTypeName::Vector { dim, encoding } => DataType::Vector {
11547            dim,
11548            encoding: match encoding {
11549                SqlVecEncoding::F32 => VecEncoding::F32,
11550                SqlVecEncoding::Sq8 => VecEncoding::Sq8,
11551                SqlVecEncoding::F16 => VecEncoding::F16,
11552            },
11553        },
11554        ColumnTypeName::Numeric(precision, scale) => DataType::Numeric { precision, scale },
11555        ColumnTypeName::Date => DataType::Date,
11556        ColumnTypeName::Timestamp => DataType::Timestamp,
11557        ColumnTypeName::Timestamptz => DataType::Timestamptz,
11558        ColumnTypeName::Json => DataType::Json,
11559        ColumnTypeName::Jsonb => DataType::Jsonb,
11560        ColumnTypeName::Bytes => DataType::Bytes,
11561        ColumnTypeName::TextArray => DataType::TextArray,
11562        ColumnTypeName::IntArray => DataType::IntArray,
11563        ColumnTypeName::BigIntArray => DataType::BigIntArray,
11564        ColumnTypeName::TsVector => DataType::TsVector,
11565        ColumnTypeName::TsQuery => DataType::TsQuery,
11566    }
11567}
11568
11569/// Convert an INSERT VALUES expression to a storage Value. Supports literal
11570/// expressions, unary-minus over numeric literals, and pgvector-style
11571/// `'[..]'::vector` cast (v1.2). Anything more complex returns `Unsupported`.
11572fn literal_expr_to_value(expr: Expr) -> Result<Value, EngineError> {
11573    match expr {
11574        Expr::Literal(l) => Ok(literal_to_value(l)),
11575        Expr::Cast { expr, target } => {
11576            let inner_value = literal_expr_to_value(*expr)?;
11577            crate::eval::cast_value(inner_value, target).map_err(EngineError::Eval)
11578        }
11579        Expr::Unary {
11580            op: UnOp::Neg,
11581            expr,
11582        } => match *expr {
11583            Expr::Literal(Literal::Integer(n)) => {
11584                // Fold to i32 if it fits, else BigInt. Parser emits Integer(i64)
11585                // — overflow on negate of i64::MIN is the one edge case.
11586                let neg = n.checked_neg().ok_or_else(|| {
11587                    EngineError::Unsupported("integer literal overflow on negation".into())
11588                })?;
11589                Ok(int_value_for(neg))
11590            }
11591            Expr::Literal(Literal::Float(x)) => Ok(Value::Float(-x)),
11592            other => Err(EngineError::Unsupported(alloc::format!(
11593                "unary minus over non-literal expression: {other:?}"
11594            ))),
11595        },
11596        // v7.10.10 — `ARRAY[lit, lit, …]` constructor accepted at
11597        // INSERT-time. Each element must reduce to a Value through
11598        // `literal_expr_to_value`; NULL elements become `None`.
11599        // v7.11.13 — deduce shape from element values: all Int →
11600        // IntArray; any BigInt → BigIntArray (widening); any Text
11601        // → TextArray. Cast targets (`ARRAY[]::INT[]`) flow through
11602        // the outer Cast arm before reaching here and re-coerce.
11603        Expr::Array(items) => {
11604            let mut materialised: alloc::vec::Vec<Value> =
11605                alloc::vec::Vec::with_capacity(items.len());
11606            for elem in items {
11607                materialised.push(literal_expr_to_value(elem)?);
11608            }
11609            Ok(array_literal_widen(materialised))
11610        }
11611        other => Err(EngineError::Unsupported(alloc::format!(
11612            "non-literal INSERT value expression: {other:?}"
11613        ))),
11614    }
11615}
11616
11617fn literal_to_value(l: Literal) -> Value {
11618    match l {
11619        Literal::Integer(n) => int_value_for(n),
11620        Literal::Float(x) => Value::Float(x),
11621        Literal::String(s) => Value::Text(s),
11622        Literal::Bool(b) => Value::Bool(b),
11623        Literal::Null => Value::Null,
11624        Literal::Vector(v) => Value::Vector(v),
11625        Literal::Interval { months, micros, .. } => Value::Interval { months, micros },
11626    }
11627}
11628
11629/// Pick `Int` (`i32`) when the literal fits, else `BigInt`. `INT` vs `BIGINT`
11630/// columns will still enforce the right tag downstream — this is just the
11631/// default we synthesise from an unannotated integer literal.
11632fn int_value_for(n: i64) -> Value {
11633    if let Ok(small) = i32::try_from(n) {
11634        Value::Int(small)
11635    } else {
11636        Value::BigInt(n)
11637    }
11638}
11639
11640/// Widen / narrow `v` to fit `expected`. Numerics permit safe widening
11641/// (`Int → BigInt`, `Int/BigInt → Float`) and best-effort narrowing
11642/// (`BigInt → Int` succeeds only when the value fits in `i32`). Everything
11643/// else returns `TypeMismatch` carrying the column name for caller diagnostics.
11644/// `NULL` is always permitted; the nullability check happens later in storage.
11645#[allow(clippy::too_many_lines)]
11646fn coerce_value(
11647    v: Value,
11648    expected: DataType,
11649    col_name: &str,
11650    position: usize,
11651) -> Result<Value, EngineError> {
11652    if v.is_null() {
11653        return Ok(Value::Null);
11654    }
11655    let actual = v.data_type().expect("non-null");
11656    if actual == expected {
11657        return Ok(v);
11658    }
11659    let coerced = match (v, expected) {
11660        (Value::Int(n), DataType::BigInt) => Some(Value::BigInt(i64::from(n))),
11661        (Value::Int(n), DataType::Float) => Some(Value::Float(f64::from(n))),
11662        (Value::Int(n), DataType::SmallInt) => i16::try_from(n).ok().map(Value::SmallInt),
11663        (Value::Int(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
11664            i128::from(n),
11665            precision,
11666            scale,
11667            col_name,
11668        )?),
11669        (Value::SmallInt(n), DataType::Int) => Some(Value::Int(i32::from(n))),
11670        (Value::SmallInt(n), DataType::BigInt) => Some(Value::BigInt(i64::from(n))),
11671        (Value::SmallInt(n), DataType::Float) => Some(Value::Float(f64::from(n))),
11672        (Value::SmallInt(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
11673            i128::from(n),
11674            precision,
11675            scale,
11676            col_name,
11677        )?),
11678        (Value::BigInt(n), DataType::Int) => i32::try_from(n).ok().map(Value::Int),
11679        (Value::BigInt(n), DataType::SmallInt) => i16::try_from(n).ok().map(Value::SmallInt),
11680        #[allow(clippy::cast_precision_loss)]
11681        (Value::BigInt(n), DataType::Float) => Some(Value::Float(n as f64)),
11682        (Value::BigInt(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
11683            i128::from(n),
11684            precision,
11685            scale,
11686            col_name,
11687        )?),
11688        (Value::Float(x), DataType::Numeric { precision, scale }) => {
11689            Some(numeric_from_float(x, precision, scale, col_name)?)
11690        }
11691        // Text → DATE / TIMESTAMP: parse canonical text forms.
11692        (Value::Text(s), DataType::Date) => {
11693            let d = eval::parse_date_literal(&s).ok_or_else(|| {
11694                EngineError::Eval(EvalError::TypeMismatch {
11695                    detail: alloc::format!("cannot parse {s:?} as DATE for column `{col_name}`"),
11696                })
11697            })?;
11698            Some(Value::Date(d))
11699        }
11700        // v7.14.0 — MySQL DEFAULT clauses quote integer / float
11701        // / boolean literals (`DEFAULT '0'`, `DEFAULT '1'`,
11702        // `DEFAULT '3.14'`, `DEFAULT 'true'`). Coerce the text
11703        // form to the column's numeric / bool type at DEFAULT-
11704        // installation time so the storage check sees a typed
11705        // value. Parse failures fall through to TypeMismatch.
11706        (Value::Text(s), DataType::SmallInt) => s.parse::<i16>().ok().map(Value::SmallInt),
11707        (Value::Text(s), DataType::Int) => s.parse::<i32>().ok().map(Value::Int),
11708        (Value::Text(s), DataType::BigInt) => s.parse::<i64>().ok().map(Value::BigInt),
11709        (Value::Text(s), DataType::Float) => s.parse::<f64>().ok().map(Value::Float),
11710        (Value::Text(s), DataType::Bool) => match s.to_ascii_lowercase().as_str() {
11711            "0" | "false" | "f" | "no" | "off" => Some(Value::Bool(false)),
11712            "1" | "true" | "t" | "yes" | "on" => Some(Value::Bool(true)),
11713            _ => None,
11714        },
11715        // v4.9: Text ↔ JSON coercion. No structural validation —
11716        // any text literal is accepted; the responsibility for
11717        // valid JSON lies with the producer.
11718        (Value::Text(s), DataType::Json | DataType::Jsonb) => Some(Value::Json(s)),
11719        (Value::Json(s), DataType::Text) => Some(Value::Text(s)),
11720        // v7.13.3 — mailrs round-7 S10. SPG's storage represents
11721        // both JSON and JSONB on-disk as `Value::Json(String)` —
11722        // they share the underlying text payload. The cast
11723        // `'<text>'::jsonb` produces a Value::Json that needs to
11724        // satisfy a DataType::Jsonb column. Identity coerce in
11725        // both directions so JSON ↔ JSONB assignments work at all
11726        // INSERT / ALTER COLUMN TYPE / DEFAULT contexts.
11727        (Value::Json(s), DataType::Jsonb | DataType::Json) => Some(Value::Json(s)),
11728        // v7.10.4 — Text → BYTEA. Decode PG-style literal forms:
11729        //   - Hex:    `\x48656c6c6f`  (case-insensitive hex pairs)
11730        //   - Escape: `Hello\\000world`  (backslash + octal triples)
11731        //   - Plain:  any string → raw UTF-8 bytes (PG also accepts)
11732        // Errors surface as TypeMismatch so the operator gets a
11733        // clear "this literal isn't a bytea literal" hint.
11734        (Value::Text(s), DataType::Bytes) => {
11735            let bytes = decode_bytea_literal(&s).map_err(|e| {
11736                EngineError::Eval(EvalError::TypeMismatch {
11737                    detail: alloc::format!(
11738                        "cannot parse {s:?} as BYTEA for column `{col_name}`: {e}"
11739                    ),
11740                })
11741            })?;
11742            Some(Value::Bytes(bytes))
11743        }
11744        // v7.10.4 — BYTEA → Text round-trip uses the PG hex
11745        // output (lowercase, `\x` prefix). Important when a
11746        // SELECT pulls a bytea cell through a Text column path.
11747        (Value::Bytes(b), DataType::Text) => Some(Value::Text(encode_bytea_hex(&b))),
11748        // v7.10.11 — Text → TEXT[]. Decode PG's external array
11749        // form `'{a,b,NULL}'`. NULL element token (case-insensitive)
11750        // is the literal `NULL`; everything else is a quoted or
11751        // unquoted text element. mailrs `'{label1,label2}'::TEXT[]`.
11752        (Value::Text(s), DataType::TextArray) => {
11753            let arr = decode_text_array_literal(&s).map_err(|e| {
11754                EngineError::Eval(EvalError::TypeMismatch {
11755                    detail: alloc::format!(
11756                        "cannot parse {s:?} as TEXT[] for column `{col_name}`: {e}"
11757                    ),
11758                })
11759            })?;
11760            Some(Value::TextArray(arr))
11761        }
11762        // v7.10.11 — TEXT[] → Text round-trip uses PG's
11763        // external array form (`{a,b,NULL}`). Lets a SELECT
11764        // pull an array column through any Text-side codepath.
11765        (Value::TextArray(items), DataType::Text) => Some(Value::Text(encode_text_array(&items))),
11766        (Value::Text(s), DataType::Timestamp | DataType::Timestamptz) => {
11767            let t = eval::parse_timestamp_literal(&s).ok_or_else(|| {
11768                EngineError::Eval(EvalError::TypeMismatch {
11769                    detail: alloc::format!(
11770                        "cannot parse {s:?} as TIMESTAMP for column `{col_name}`"
11771                    ),
11772                })
11773            })?;
11774            Some(Value::Timestamp(t))
11775        }
11776        // DATE ↔ TIMESTAMP convertibility (DATE → midnight,
11777        // TIMESTAMP → day truncation).
11778        (Value::Date(d), DataType::Timestamp | DataType::Timestamptz) => {
11779            Some(Value::Timestamp(i64::from(d) * 86_400_000_000))
11780        }
11781        // v7.9.21 — Value::Timestamp lands in either Timestamp
11782        // or Timestamptz columns; the on-disk layout is the
11783        // same i64 microseconds UTC.
11784        (Value::Timestamp(t), DataType::Timestamptz) => Some(Value::Timestamp(t)),
11785        (Value::Timestamp(t), DataType::Date) => {
11786            let days = t.div_euclid(86_400_000_000);
11787            i32::try_from(days).ok().map(Value::Date)
11788        }
11789        (
11790            Value::Numeric {
11791                scaled,
11792                scale: src_scale,
11793            },
11794            DataType::Numeric { precision, scale },
11795        ) => Some(numeric_rescale(
11796            scaled, src_scale, precision, scale, col_name,
11797        )?),
11798        #[allow(clippy::cast_precision_loss)]
11799        (Value::Numeric { scaled, scale }, DataType::Float) => {
11800            let mut div = 1.0_f64;
11801            for _ in 0..scale {
11802                div *= 10.0;
11803            }
11804            Some(Value::Float((scaled as f64) / div))
11805        }
11806        (Value::Numeric { scaled, scale }, DataType::Int) => {
11807            let truncated = numeric_truncate_to_integer(scaled, scale);
11808            i32::try_from(truncated).ok().map(Value::Int)
11809        }
11810        (Value::Numeric { scaled, scale }, DataType::BigInt) => {
11811            let truncated = numeric_truncate_to_integer(scaled, scale);
11812            i64::try_from(truncated).ok().map(Value::BigInt)
11813        }
11814        (Value::Numeric { scaled, scale }, DataType::SmallInt) => {
11815            let truncated = numeric_truncate_to_integer(scaled, scale);
11816            i16::try_from(truncated).ok().map(Value::SmallInt)
11817        }
11818        // VARCHAR(n) enforces an upper bound on character count.
11819        (Value::Text(s), DataType::Varchar(max)) => {
11820            if u32::try_from(s.chars().count()).unwrap_or(u32::MAX) <= max {
11821                Some(Value::Text(s))
11822            } else {
11823                return Err(EngineError::Unsupported(alloc::format!(
11824                    "value for VARCHAR({max}) column `{col_name}` exceeds length: \
11825                     {} chars",
11826                    s.chars().count()
11827                )));
11828            }
11829        }
11830        // v6.0.1: f32 → SQ8 INSERT-time quantisation. Triggered
11831        // when the column declares `VECTOR(N) USING SQ8` and
11832        // the INSERT VALUES expression yields a raw f32 vector
11833        // (the normal pgvector-shape literal). Dim mismatch
11834        // falls through the `_ => None` arm and surfaces as
11835        // `TypeMismatch` with the expected SQ8 column type —
11836        // matching the F32 path's existing error.
11837        (
11838            Value::Vector(v),
11839            DataType::Vector {
11840                dim,
11841                encoding: VecEncoding::Sq8,
11842            },
11843        ) if v.len() == dim as usize => Some(Value::Sq8Vector(spg_storage::quantize::quantize(&v))),
11844        // v6.0.3: f32 → f16 INSERT-time conversion for HALF
11845        // columns. Bit-exact at the storage layer (modulo
11846        // half-precision rounding); no rerank pass needed at
11847        // search time.
11848        (
11849            Value::Vector(v),
11850            DataType::Vector {
11851                dim,
11852                encoding: VecEncoding::F16,
11853            },
11854        ) if v.len() == dim as usize => Some(Value::HalfVector(
11855            spg_storage::halfvec::HalfVector::from_f32_slice(&v),
11856        )),
11857        // CHAR(n) right-pads with U+0020 to exactly n chars; if the input
11858        // is already longer we reject (PG truncates trailing-space-only;
11859        // staying strict for v1).
11860        (Value::Text(s), DataType::Char(size)) => {
11861            let len = u32::try_from(s.chars().count()).unwrap_or(u32::MAX);
11862            if len > size {
11863                return Err(EngineError::Unsupported(alloc::format!(
11864                    "value for CHAR({size}) column `{col_name}` exceeds length: \
11865                     {len} chars"
11866                )));
11867            }
11868            let need = (size - len) as usize;
11869            let mut padded = s;
11870            padded.reserve(need);
11871            for _ in 0..need {
11872                padded.push(' ');
11873            }
11874            Some(Value::Text(padded))
11875        }
11876        _ => None,
11877    };
11878    coerced.ok_or(EngineError::Storage(StorageError::TypeMismatch {
11879        column: col_name.into(),
11880        expected,
11881        actual,
11882        position,
11883    }))
11884}
11885
11886/// v7.12.4 — render a function arg list into the
11887/// canonical form the storage layer caches as
11888/// [`spg_storage::FunctionDef::args_repr`]. The catalogue uses
11889/// this string for both display + as a coarse signature key
11890/// for the (deferred) overload resolution v7.12.5+ adds.
11891fn render_function_args(args: &[spg_sql::ast::FunctionArg]) -> alloc::string::String {
11892    use core::fmt::Write;
11893    let mut out = alloc::string::String::from("(");
11894    for (i, a) in args.iter().enumerate() {
11895        if i > 0 {
11896            out.push_str(", ");
11897        }
11898        match a.mode {
11899            spg_sql::ast::FunctionArgMode::In => {}
11900            spg_sql::ast::FunctionArgMode::Out => out.push_str("OUT "),
11901            spg_sql::ast::FunctionArgMode::InOut => out.push_str("INOUT "),
11902        }
11903        if let Some(n) = &a.name {
11904            out.push_str(n);
11905            out.push(' ');
11906        }
11907        match &a.ty {
11908            spg_sql::ast::FunctionArgType::Typed(t) => {
11909                let _ = write!(out, "{t}");
11910            }
11911            spg_sql::ast::FunctionArgType::Raw(s) => out.push_str(s),
11912        }
11913    }
11914    out.push(')');
11915    out
11916}
11917
11918#[cfg(test)]
11919mod tests {
11920    use super::*;
11921    use alloc::vec;
11922
11923    fn unwrap_command_ok(r: &QueryResult) -> usize {
11924        match r {
11925            QueryResult::CommandOk { affected, .. } => *affected,
11926            QueryResult::Rows { .. } => panic!("expected CommandOk, got Rows"),
11927        }
11928    }
11929
11930    #[test]
11931    fn create_table_registers_schema() {
11932        let mut e = Engine::new();
11933        e.execute("CREATE TABLE foo (a INT NOT NULL, b TEXT)")
11934            .unwrap();
11935        assert_eq!(e.catalog().table_count(), 1);
11936        let t = e.catalog().get("foo").unwrap();
11937        assert_eq!(t.schema().columns.len(), 2);
11938        assert_eq!(t.schema().columns[0].ty, DataType::Int);
11939        assert!(!t.schema().columns[0].nullable);
11940        assert_eq!(t.schema().columns[1].ty, DataType::Text);
11941    }
11942
11943    #[test]
11944    fn create_table_vector_default_is_f32_encoded() {
11945        let mut e = Engine::new();
11946        e.execute("CREATE TABLE t (v VECTOR(8))").unwrap();
11947        let t = e.catalog().get("t").unwrap();
11948        assert_eq!(
11949            t.schema().columns[0].ty,
11950            DataType::Vector {
11951                dim: 8,
11952                encoding: VecEncoding::F32,
11953            },
11954        );
11955    }
11956
11957    #[test]
11958    fn create_table_vector_using_sq8_succeeds() {
11959        // v6.0.1 step 3: the step-1 fence in `column_def_to_schema`
11960        // is lifted. CREATE TABLE persists an SQ8 column type in
11961        // the catalog; INSERT (next test) quantises raw f32 input.
11962        let mut e = Engine::new();
11963        e.execute("CREATE TABLE t (v VECTOR(8) USING SQ8)").unwrap();
11964        let t = e.catalog().get("t").unwrap();
11965        assert_eq!(
11966            t.schema().columns[0].ty,
11967            DataType::Vector {
11968                dim: 8,
11969                encoding: VecEncoding::Sq8,
11970            },
11971        );
11972    }
11973
11974    #[test]
11975    fn insert_into_sq8_column_quantises_f32_payload() {
11976        // v6.0.1 step 3: INSERT-time `coerce_value` rewrites a raw
11977        // `Value::Vector(Vec<f32>)` literal into the column's
11978        // quantised representation. The row that lands in the
11979        // catalog must therefore hold a `Value::Sq8Vector`, not the
11980        // original f32 buffer — that's the bit that delivers the
11981        // 4× compression target.
11982        let mut e = Engine::new();
11983        e.execute("CREATE TABLE t (v VECTOR(4) USING SQ8)").unwrap();
11984        e.execute("INSERT INTO t VALUES ([0.0, 0.25, 0.5, 1.0])")
11985            .unwrap();
11986        let t = e.catalog().get("t").unwrap();
11987        assert_eq!(t.rows().len(), 1);
11988        match &t.rows()[0].values[0] {
11989            Value::Sq8Vector(q) => {
11990                assert_eq!(q.bytes.len(), 4);
11991                // min/max are derived from the payload: min=0.0, max=1.0.
11992                assert!((q.min - 0.0).abs() < 1e-6);
11993                assert!((q.max - 1.0).abs() < 1e-6);
11994            }
11995            other => panic!("expected Sq8Vector cell, got {other:?}"),
11996        }
11997    }
11998
11999    #[test]
12000    fn create_table_vector_using_half_succeeds_and_insert_converts_to_f16() {
12001        // v6.0.3: CREATE TABLE accepts USING HALF; INSERT path
12002        // converts the incoming `Value::Vector(Vec<f32>)` cell
12003        // into `Value::HalfVector(HalfVector)` via the new
12004        // `coerce_value` arm. The dequantised round-trip is
12005        // bit-exact for f16-representable values, so 0.0 / 0.25
12006        // / 0.5 / 1.0 hit their grid points exactly.
12007        let mut e = Engine::new();
12008        e.execute("CREATE TABLE t (v VECTOR(4) USING HALF)")
12009            .unwrap();
12010        e.execute("INSERT INTO t VALUES ([0.0, 0.25, 0.5, 1.0])")
12011            .unwrap();
12012        let t = e.catalog().get("t").unwrap();
12013        assert_eq!(t.rows().len(), 1);
12014        match &t.rows()[0].values[0] {
12015            Value::HalfVector(h) => {
12016                assert_eq!(h.dim(), 4);
12017                let back = h.to_f32_vec();
12018                let expected = alloc::vec![0.0_f32, 0.25, 0.5, 1.0];
12019                for (g, e) in back.iter().zip(expected.iter()) {
12020                    assert!(
12021                        (g - e).abs() < 1e-6,
12022                        "{g} vs {e} should be exact on f16 grid"
12023                    );
12024                }
12025            }
12026            other => panic!("expected HalfVector cell, got {other:?}"),
12027        }
12028    }
12029
12030    #[test]
12031    fn alter_index_rebuild_in_place_succeeds() {
12032        // v6.0.4: bare REBUILD (no encoding switch) walks every
12033        // row again to rebuild the NSW graph. Verifies the engine
12034        // dispatch + storage helper plumbing without changing any
12035        // cell encoding.
12036        let mut e = Engine::new();
12037        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(3) NOT NULL)")
12038            .unwrap();
12039        for i in 0..8_i32 {
12040            #[allow(clippy::cast_precision_loss)]
12041            let base = (i as f32) * 0.1;
12042            e.execute(&alloc::format!(
12043                "INSERT INTO t VALUES ({i}, [{base}, {b1}, {b2}])",
12044                b1 = base + 0.01,
12045                b2 = base + 0.02,
12046            ))
12047            .unwrap();
12048        }
12049        e.execute("CREATE INDEX t_idx ON t USING hnsw (v)").unwrap();
12050        e.execute("ALTER INDEX t_idx REBUILD").unwrap();
12051        // Schema encoding stays F32 (no encoding clause).
12052        assert_eq!(
12053            e.catalog().get("t").unwrap().schema().columns[1].ty,
12054            DataType::Vector {
12055                dim: 3,
12056                encoding: VecEncoding::F32,
12057            },
12058        );
12059    }
12060
12061    #[test]
12062    fn alter_index_rebuild_with_encoding_switches_cell_type() {
12063        // v6.0.4: REBUILD WITH (encoding = SQ8) recodes every
12064        // stored cell from F32 → SQ8 + rebuilds the graph atop the
12065        // new encoding. Post-rebuild, cells must be Sq8Vector and
12066        // the schema must report encoding = Sq8.
12067        let mut e = Engine::new();
12068        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(4) NOT NULL)")
12069            .unwrap();
12070        e.execute("INSERT INTO t VALUES (1, [0.0, 0.25, 0.5, 1.0])")
12071            .unwrap();
12072        e.execute("CREATE INDEX t_idx ON t USING hnsw (v)").unwrap();
12073        e.execute("ALTER INDEX t_idx REBUILD WITH (encoding = SQ8)")
12074            .unwrap();
12075        let t = e.catalog().get("t").unwrap();
12076        assert_eq!(
12077            t.schema().columns[1].ty,
12078            DataType::Vector {
12079                dim: 4,
12080                encoding: VecEncoding::Sq8,
12081            },
12082        );
12083        assert!(matches!(t.rows()[0].values[1], Value::Sq8Vector(_)));
12084    }
12085
12086    #[test]
12087    fn alter_index_rebuild_unknown_index_errors() {
12088        let mut e = Engine::new();
12089        let err = e.execute("ALTER INDEX nope REBUILD").unwrap_err();
12090        assert!(
12091            matches!(
12092                &err,
12093                EngineError::Storage(StorageError::IndexNotFound { name }) if name == "nope"
12094            ),
12095            "got: {err}"
12096        );
12097    }
12098
12099    #[test]
12100    fn alter_index_rebuild_on_btree_index_errors() {
12101        // REBUILD on a B-tree index has no semantic meaning in
12102        // v6.0.4 — rejected at the storage layer with `Unsupported`.
12103        let mut e = Engine::new();
12104        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12105        e.execute("INSERT INTO t VALUES (1)").unwrap();
12106        e.execute("CREATE INDEX t_idx ON t (id)").unwrap();
12107        let err = e.execute("ALTER INDEX t_idx REBUILD").unwrap_err();
12108        assert!(
12109            matches!(&err, EngineError::Storage(StorageError::Unsupported(_))),
12110            "got: {err}"
12111        );
12112    }
12113
12114    #[test]
12115    fn prepared_insert_substitutes_placeholders() {
12116        // v6.1.1: prepare() parses once; execute_prepared() walks the
12117        // AST and replaces $1/$2 with the param Values BEFORE the
12118        // dispatch sees them. Same logical result as a simple-query
12119        // INSERT, but parse happens once per *statement*, not per
12120        // execution.
12121        let mut e = Engine::new();
12122        e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT NOT NULL)")
12123            .unwrap();
12124        let stmt = e.prepare("INSERT INTO t VALUES ($1, $2)").unwrap();
12125        for (id, name) in [(1, "alice"), (2, "bob"), (3, "carol")] {
12126            e.execute_prepared(stmt.clone(), &[Value::Int(id), Value::Text(name.into())])
12127                .unwrap();
12128        }
12129        // Read back via simple-query SELECT.
12130        let rows_result = e.execute("SELECT id, name FROM t").unwrap();
12131        let QueryResult::Rows { rows, .. } = rows_result else {
12132            panic!("expected Rows")
12133        };
12134        assert_eq!(rows.len(), 3);
12135    }
12136
12137    #[test]
12138    fn prepared_select_with_placeholder_filters_rows() {
12139        let mut e = Engine::new();
12140        e.execute("CREATE TABLE t (id INT NOT NULL, v INT NOT NULL)")
12141            .unwrap();
12142        for i in 0..10_i32 {
12143            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, {})", i * 7))
12144                .unwrap();
12145        }
12146        let stmt = e.prepare("SELECT id FROM t WHERE v = $1").unwrap();
12147        let QueryResult::Rows { rows, .. } = e.execute_prepared(stmt, &[Value::Int(35)]).unwrap()
12148        else {
12149            panic!("expected Rows")
12150        };
12151        // v = 35 means i*7 = 35 → i = 5.
12152        assert_eq!(rows.len(), 1);
12153        assert_eq!(rows[0].values[0], Value::Int(5));
12154    }
12155
12156    #[test]
12157    fn prepared_too_few_params_errors() {
12158        let mut e = Engine::new();
12159        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12160        let stmt = e.prepare("INSERT INTO t VALUES ($1)").unwrap();
12161        let err = e.execute_prepared(stmt, &[]).unwrap_err();
12162        assert!(
12163            matches!(
12164                &err,
12165                EngineError::Eval(EvalError::PlaceholderOutOfRange { n: 1, bound: 0 })
12166            ),
12167            "got: {err}"
12168        );
12169    }
12170
12171    #[test]
12172    fn insert_into_half_column_dim_mismatch_errors() {
12173        let mut e = Engine::new();
12174        e.execute("CREATE TABLE t (v VECTOR(4) USING HALF)")
12175            .unwrap();
12176        let err = e.execute("INSERT INTO t VALUES ([1.0, 2.0])").unwrap_err();
12177        assert!(matches!(
12178            &err,
12179            EngineError::Storage(StorageError::TypeMismatch { .. })
12180        ));
12181    }
12182
12183    #[test]
12184    fn insert_into_sq8_column_dim_mismatch_errors() {
12185        // Dim mismatch falls through the `coerce_value` Vector→Sq8
12186        // arm's guard and surfaces as `TypeMismatch` — the same
12187        // error the F32 path produces today, so client error
12188        // handling stays uniform across encodings.
12189        let mut e = Engine::new();
12190        e.execute("CREATE TABLE t (v VECTOR(4) USING SQ8)").unwrap();
12191        let err = e.execute("INSERT INTO t VALUES ([1.0, 2.0])").unwrap_err();
12192        assert!(
12193            matches!(
12194                &err,
12195                EngineError::Storage(StorageError::TypeMismatch { .. })
12196            ),
12197            "got: {err}",
12198        );
12199    }
12200
12201    #[test]
12202    fn create_table_duplicate_errors() {
12203        let mut e = Engine::new();
12204        e.execute("CREATE TABLE foo (a INT)").unwrap();
12205        let err = e.execute("CREATE TABLE foo (a INT)").unwrap_err();
12206        assert!(matches!(
12207            err,
12208            EngineError::Storage(StorageError::DuplicateTable { ref name }) if name == "foo"
12209        ));
12210    }
12211
12212    #[test]
12213    fn insert_into_unknown_table_errors() {
12214        let mut e = Engine::new();
12215        let err = e.execute("INSERT INTO ghost VALUES (1)").unwrap_err();
12216        assert!(matches!(
12217            err,
12218            EngineError::Storage(StorageError::TableNotFound { ref name }) if name == "ghost"
12219        ));
12220    }
12221
12222    #[test]
12223    fn insert_happy_path_reports_one_affected() {
12224        let mut e = Engine::new();
12225        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
12226        let r = e.execute("INSERT INTO foo VALUES (42)").unwrap();
12227        assert_eq!(unwrap_command_ok(&r), 1);
12228        assert_eq!(e.catalog().get("foo").unwrap().row_count(), 1);
12229    }
12230
12231    #[test]
12232    fn insert_arity_mismatch_propagates() {
12233        let mut e = Engine::new();
12234        e.execute("CREATE TABLE foo (a INT, b TEXT)").unwrap();
12235        let err = e.execute("INSERT INTO foo VALUES (1)").unwrap_err();
12236        assert!(matches!(
12237            err,
12238            EngineError::Storage(StorageError::ArityMismatch { .. })
12239        ));
12240    }
12241
12242    #[test]
12243    fn insert_negative_integer_via_unary_minus() {
12244        let mut e = Engine::new();
12245        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
12246        e.execute("INSERT INTO foo VALUES (-7)").unwrap();
12247        let rows = e.catalog().get("foo").unwrap().rows();
12248        assert_eq!(rows[0].values[0], Value::Int(-7));
12249    }
12250
12251    #[test]
12252    fn insert_non_literal_expr_unsupported() {
12253        let mut e = Engine::new();
12254        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
12255        let err = e.execute("INSERT INTO foo VALUES (1 + 2)").unwrap_err();
12256        assert!(matches!(err, EngineError::Unsupported(_)));
12257    }
12258
12259    #[test]
12260    fn select_star_returns_all_rows_in_insertion_order() {
12261        let mut e = Engine::new();
12262        e.execute("CREATE TABLE foo (a INT NOT NULL, b TEXT NOT NULL)")
12263            .unwrap();
12264        e.execute("INSERT INTO foo VALUES (1, 'one')").unwrap();
12265        e.execute("INSERT INTO foo VALUES (2, 'two')").unwrap();
12266        e.execute("INSERT INTO foo VALUES (3, 'three')").unwrap();
12267
12268        let r = e.execute("SELECT * FROM foo").unwrap();
12269        let QueryResult::Rows { columns, rows } = r else {
12270            panic!("expected Rows")
12271        };
12272        assert_eq!(columns.len(), 2);
12273        assert_eq!(columns[0].name, "a");
12274        assert_eq!(rows.len(), 3);
12275        assert_eq!(
12276            rows[1].values,
12277            vec![Value::Int(2), Value::Text("two".into())]
12278        );
12279    }
12280
12281    #[test]
12282    fn select_star_on_empty_table_returns_zero_rows() {
12283        let mut e = Engine::new();
12284        e.execute("CREATE TABLE foo (a INT)").unwrap();
12285        let r = e.execute("SELECT * FROM foo").unwrap();
12286        match r {
12287            QueryResult::Rows { rows, .. } => assert!(rows.is_empty()),
12288            QueryResult::CommandOk { .. } => panic!("expected Rows"),
12289        }
12290    }
12291
12292    // --- v0.4: WHERE + projection ------------------------------------------
12293
12294    fn make_three_row_users(e: &mut Engine) {
12295        e.execute("CREATE TABLE users (id INT NOT NULL, name TEXT NOT NULL, score INT)")
12296            .unwrap();
12297        e.execute("INSERT INTO users VALUES (1, 'alice', 90)")
12298            .unwrap();
12299        e.execute("INSERT INTO users VALUES (2, 'bob', NULL)")
12300            .unwrap();
12301        e.execute("INSERT INTO users VALUES (3, 'cara', 70)")
12302            .unwrap();
12303    }
12304
12305    fn unwrap_rows(r: QueryResult) -> (Vec<ColumnSchema>, Vec<Row>) {
12306        match r {
12307            QueryResult::Rows { columns, rows } => (columns, rows),
12308            QueryResult::CommandOk { .. } => panic!("expected Rows"),
12309        }
12310    }
12311
12312    #[test]
12313    fn where_filter_passes_only_true_rows() {
12314        let mut e = Engine::new();
12315        make_three_row_users(&mut e);
12316        let r = e.execute("SELECT * FROM users WHERE id > 1").unwrap();
12317        let (_, rows) = unwrap_rows(r);
12318        assert_eq!(rows.len(), 2);
12319        assert_eq!(rows[0].values[0], Value::Int(2));
12320        assert_eq!(rows[1].values[0], Value::Int(3));
12321    }
12322
12323    #[test]
12324    fn where_with_null_result_filters_out_row() {
12325        let mut e = Engine::new();
12326        make_three_row_users(&mut e);
12327        // score is NULL for bob → score > 80 is NULL → row excluded
12328        let r = e.execute("SELECT * FROM users WHERE score > 80").unwrap();
12329        let (_, rows) = unwrap_rows(r);
12330        assert_eq!(rows.len(), 1);
12331        assert_eq!(rows[0].values[1], Value::Text("alice".into()));
12332    }
12333
12334    #[test]
12335    fn projection_named_columns() {
12336        let mut e = Engine::new();
12337        make_three_row_users(&mut e);
12338        let r = e.execute("SELECT name, score FROM users").unwrap();
12339        let (cols, rows) = unwrap_rows(r);
12340        assert_eq!(cols.len(), 2);
12341        assert_eq!(cols[0].name, "name");
12342        assert_eq!(cols[1].name, "score");
12343        assert_eq!(rows.len(), 3);
12344        assert_eq!(
12345            rows[0].values,
12346            vec![Value::Text("alice".into()), Value::Int(90)]
12347        );
12348    }
12349
12350    #[test]
12351    fn projection_with_column_alias() {
12352        let mut e = Engine::new();
12353        make_three_row_users(&mut e);
12354        let r = e
12355            .execute("SELECT name AS who FROM users WHERE id = 1")
12356            .unwrap();
12357        let (cols, rows) = unwrap_rows(r);
12358        assert_eq!(cols[0].name, "who");
12359        assert_eq!(rows.len(), 1);
12360        assert_eq!(rows[0].values[0], Value::Text("alice".into()));
12361    }
12362
12363    #[test]
12364    fn qualified_column_with_table_alias_resolves() {
12365        let mut e = Engine::new();
12366        make_three_row_users(&mut e);
12367        let r = e
12368            .execute("SELECT u.id, u.name FROM users AS u WHERE u.id < 3")
12369            .unwrap();
12370        let (cols, rows) = unwrap_rows(r);
12371        assert_eq!(cols.len(), 2);
12372        assert_eq!(rows.len(), 2);
12373    }
12374
12375    #[test]
12376    fn qualified_column_with_wrong_alias_errors() {
12377        let mut e = Engine::new();
12378        make_three_row_users(&mut e);
12379        let err = e.execute("SELECT x.id FROM users AS u").unwrap_err();
12380        assert!(matches!(
12381            err,
12382            EngineError::Eval(EvalError::UnknownQualifier { ref qualifier }) if qualifier == "x"
12383        ));
12384    }
12385
12386    #[test]
12387    fn select_unknown_column_errors_in_projection() {
12388        let mut e = Engine::new();
12389        make_three_row_users(&mut e);
12390        let err = e.execute("SELECT ghost FROM users").unwrap_err();
12391        assert!(matches!(
12392            err,
12393            EngineError::Eval(EvalError::ColumnNotFound { ref name }) if name == "ghost"
12394        ));
12395    }
12396
12397    #[test]
12398    fn where_unknown_column_errors() {
12399        let mut e = Engine::new();
12400        make_three_row_users(&mut e);
12401        let err = e
12402            .execute("SELECT * FROM users WHERE ghost = 1")
12403            .unwrap_err();
12404        assert!(matches!(
12405            err,
12406            EngineError::Eval(EvalError::ColumnNotFound { .. })
12407        ));
12408    }
12409
12410    #[test]
12411    fn expression_projection_evaluates_and_renders() {
12412        // Compound expressions in the SELECT list are evaluated per row;
12413        // the output column is typed TEXT, name defaults to the expression.
12414        let mut e = Engine::new();
12415        e.execute("CREATE TABLE t (a INT NOT NULL)").unwrap();
12416        e.execute("INSERT INTO t VALUES (3)").unwrap();
12417        let (_, rows) = unwrap_rows(e.execute("SELECT 1 + 2 FROM t").unwrap());
12418        assert_eq!(rows.len(), 1);
12419        // The expression evaluates to integer 3; rendered as the cell value
12420        // (storage::Value::Int(3) since arithmetic kept ints).
12421        assert_eq!(rows[0].values[0], Value::Int(3));
12422    }
12423
12424    #[test]
12425    fn select_unknown_table_errors() {
12426        let mut e = Engine::new();
12427        let err = e.execute("SELECT * FROM ghost").unwrap_err();
12428        assert!(matches!(
12429            err,
12430            EngineError::Storage(StorageError::TableNotFound { .. })
12431        ));
12432    }
12433
12434    #[test]
12435    fn invalid_sql_returns_parse_error() {
12436        // v4.4: UPDATE is now real SQL, so use a true syntactic
12437        // garbage payload for the parse-error path.
12438        let mut e = Engine::new();
12439        let err = e.execute("THIS_IS_NOT_A_KEYWORD foo bar baz").unwrap_err();
12440        assert!(matches!(err, EngineError::Parse(_)));
12441    }
12442
12443    // --- v0.8 CREATE INDEX + index seek ------------------------------------
12444
12445    #[test]
12446    fn create_index_registers_on_table() {
12447        let mut e = Engine::new();
12448        make_three_row_users(&mut e);
12449        e.execute("CREATE INDEX by_name ON users (name)").unwrap();
12450        let t = e.catalog().get("users").unwrap();
12451        assert_eq!(t.indices().len(), 1);
12452        assert_eq!(t.indices()[0].name, "by_name");
12453    }
12454
12455    #[test]
12456    fn create_index_on_unknown_table_errors() {
12457        let mut e = Engine::new();
12458        let err = e.execute("CREATE INDEX i ON ghost (a)").unwrap_err();
12459        assert!(matches!(
12460            err,
12461            EngineError::Storage(StorageError::TableNotFound { .. })
12462        ));
12463    }
12464
12465    #[test]
12466    fn create_index_on_unknown_column_errors() {
12467        let mut e = Engine::new();
12468        make_three_row_users(&mut e);
12469        let err = e.execute("CREATE INDEX i ON users (ghost)").unwrap_err();
12470        assert!(matches!(
12471            err,
12472            EngineError::Storage(StorageError::ColumnNotFound { .. })
12473        ));
12474    }
12475
12476    #[test]
12477    fn select_eq_uses_index_returns_same_rows_as_scan() {
12478        // Build two engines: one with an index, one without. Same query →
12479        // same row set (index is a planner optimisation, not a semantic
12480        // change).
12481        let mut without = Engine::new();
12482        make_three_row_users(&mut without);
12483        let mut with = Engine::new();
12484        make_three_row_users(&mut with);
12485        with.execute("CREATE INDEX by_id ON users (id)").unwrap();
12486
12487        let q = "SELECT * FROM users WHERE id = 2";
12488        let (_, no_idx_rows) = unwrap_rows(without.execute(q).unwrap());
12489        let (_, idx_rows) = unwrap_rows(with.execute(q).unwrap());
12490        assert_eq!(no_idx_rows, idx_rows);
12491        assert_eq!(idx_rows.len(), 1);
12492    }
12493
12494    #[test]
12495    fn select_eq_with_no_matching_index_value_returns_empty() {
12496        let mut e = Engine::new();
12497        make_three_row_users(&mut e);
12498        e.execute("CREATE INDEX by_id ON users (id)").unwrap();
12499        let (_, rows) = unwrap_rows(e.execute("SELECT * FROM users WHERE id = 999").unwrap());
12500        assert_eq!(rows.len(), 0);
12501    }
12502
12503    // --- v0.9 transactions -------------------------------------------------
12504
12505    #[test]
12506    fn begin_sets_in_transaction_flag() {
12507        let mut e = Engine::new();
12508        assert!(!e.in_transaction());
12509        e.execute("BEGIN").unwrap();
12510        assert!(e.in_transaction());
12511    }
12512
12513    #[test]
12514    fn double_begin_errors() {
12515        let mut e = Engine::new();
12516        e.execute("BEGIN").unwrap();
12517        let err = e.execute("BEGIN").unwrap_err();
12518        assert_eq!(err, EngineError::TransactionAlreadyOpen);
12519    }
12520
12521    #[test]
12522    fn commit_without_begin_errors() {
12523        let mut e = Engine::new();
12524        let err = e.execute("COMMIT").unwrap_err();
12525        assert_eq!(err, EngineError::NoActiveTransaction);
12526    }
12527
12528    #[test]
12529    fn rollback_without_begin_errors() {
12530        let mut e = Engine::new();
12531        let err = e.execute("ROLLBACK").unwrap_err();
12532        assert_eq!(err, EngineError::NoActiveTransaction);
12533    }
12534
12535    #[test]
12536    fn commit_applies_shadow_to_committed_catalog() {
12537        let mut e = Engine::new();
12538        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
12539        e.execute("BEGIN").unwrap();
12540        e.execute("INSERT INTO t VALUES (1)").unwrap();
12541        e.execute("INSERT INTO t VALUES (2)").unwrap();
12542        e.execute("COMMIT").unwrap();
12543        assert!(!e.in_transaction());
12544        assert_eq!(e.catalog().get("t").unwrap().row_count(), 2);
12545    }
12546
12547    #[test]
12548    fn rollback_discards_shadow() {
12549        let mut e = Engine::new();
12550        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
12551        e.execute("BEGIN").unwrap();
12552        e.execute("INSERT INTO t VALUES (1)").unwrap();
12553        e.execute("INSERT INTO t VALUES (2)").unwrap();
12554        e.execute("ROLLBACK").unwrap();
12555        assert!(!e.in_transaction());
12556        assert_eq!(e.catalog().get("t").unwrap().row_count(), 0);
12557    }
12558
12559    #[test]
12560    fn select_during_tx_sees_uncommitted_writes_own_session() {
12561        // The shadow catalog is read by SELECTs while a TX is open — the
12562        // session can see its own pending writes.
12563        let mut e = Engine::new();
12564        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
12565        e.execute("BEGIN").unwrap();
12566        e.execute("INSERT INTO t VALUES (42)").unwrap();
12567        let (_, rows) = unwrap_rows(e.execute("SELECT * FROM t").unwrap());
12568        assert_eq!(rows.len(), 1);
12569        assert_eq!(rows[0].values[0], Value::Int(42));
12570    }
12571
12572    #[test]
12573    fn snapshot_with_no_users_is_bare_catalog_format() {
12574        let mut e = Engine::new();
12575        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12576        let bytes = e.snapshot();
12577        assert_eq!(
12578            &bytes[..8],
12579            b"SPGDB001",
12580            "must be the bare v3.x catalog magic"
12581        );
12582        let e2 = Engine::restore_envelope(&bytes).unwrap();
12583        assert!(e2.users().is_empty());
12584        assert_eq!(e2.catalog().table_count(), 1);
12585    }
12586
12587    #[test]
12588    fn snapshot_with_users_round_trips_both_via_envelope() {
12589        let mut e = Engine::new();
12590        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12591        e.create_user("alice", "pw1", Role::Admin, [9; 16]).unwrap();
12592        e.create_user("bob", "pw2", Role::ReadOnly, [5; 16])
12593            .unwrap();
12594        let bytes = e.snapshot();
12595        assert_eq!(&bytes[..8], b"SPGENV01", "must be the v4.1 envelope magic");
12596        let e2 = Engine::restore_envelope(&bytes).unwrap();
12597        assert_eq!(e2.users().len(), 2);
12598        assert_eq!(e2.verify_user("alice", "pw1"), Some(Role::Admin));
12599        assert_eq!(e2.verify_user("bob", "pw2"), Some(Role::ReadOnly));
12600        assert_eq!(e2.verify_user("alice", "wrong"), None);
12601        assert_eq!(e2.catalog().table_count(), 1);
12602    }
12603
12604    #[test]
12605    fn ddl_inside_tx_also_rolled_back() {
12606        let mut e = Engine::new();
12607        e.execute("BEGIN").unwrap();
12608        e.execute("CREATE TABLE t (v INT)").unwrap();
12609        // Visible inside the TX.
12610        e.execute("SELECT * FROM t").unwrap();
12611        e.execute("ROLLBACK").unwrap();
12612        // Gone after rollback.
12613        let err = e.execute("SELECT * FROM t").unwrap_err();
12614        assert!(matches!(
12615            err,
12616            EngineError::Storage(StorageError::TableNotFound { .. })
12617        ));
12618    }
12619
12620    // ── v6.1.2: CREATE / DROP PUBLICATION (engine-side) ──────
12621
12622    #[test]
12623    fn create_publication_lands_in_catalog() {
12624        let mut e = Engine::new();
12625        assert!(e.publications().is_empty());
12626        e.execute("CREATE PUBLICATION pub_a").unwrap();
12627        assert_eq!(e.publications().len(), 1);
12628        assert!(e.publications().contains("pub_a"));
12629    }
12630
12631    #[test]
12632    fn create_publication_duplicate_errors() {
12633        let mut e = Engine::new();
12634        e.execute("CREATE PUBLICATION pub_a").unwrap();
12635        let err = e.execute("CREATE PUBLICATION pub_a").unwrap_err();
12636        assert!(
12637            alloc::format!("{err:?}").contains("DuplicateName"),
12638            "got {err:?}"
12639        );
12640    }
12641
12642    #[test]
12643    fn drop_publication_silent_when_absent() {
12644        let mut e = Engine::new();
12645        // PG-compatible: DROP a publication that doesn't exist
12646        // succeeds (no-op) but reports zero affected.
12647        let r = e.execute("DROP PUBLICATION nope").unwrap();
12648        match r {
12649            QueryResult::CommandOk { affected, .. } => assert_eq!(affected, 0),
12650            other => panic!("expected CommandOk, got {other:?}"),
12651        }
12652    }
12653
12654    #[test]
12655    fn drop_publication_present_reports_one_affected() {
12656        let mut e = Engine::new();
12657        e.execute("CREATE PUBLICATION pub_a").unwrap();
12658        let r = e.execute("DROP PUBLICATION pub_a").unwrap();
12659        match r {
12660            QueryResult::CommandOk {
12661                affected,
12662                modified_catalog,
12663            } => {
12664                assert_eq!(affected, 1);
12665                assert!(modified_catalog);
12666            }
12667            other => panic!("expected CommandOk, got {other:?}"),
12668        }
12669        assert!(e.publications().is_empty());
12670    }
12671
12672    #[test]
12673    fn publications_persist_across_snapshot_restore() {
12674        // The persist-across-restart ship-gate at the engine layer —
12675        // snapshot → restore_envelope round trip must preserve the
12676        // publication catalog. The spg-server e2e covers the
12677        // process-restart variant.
12678        let mut e = Engine::new();
12679        e.execute("CREATE PUBLICATION pub_a").unwrap();
12680        e.execute("CREATE PUBLICATION pub_b FOR ALL TABLES")
12681            .unwrap();
12682        let snap = e.snapshot();
12683        let e2 = Engine::restore_envelope(&snap).unwrap();
12684        assert_eq!(e2.publications().len(), 2);
12685        assert!(e2.publications().contains("pub_a"));
12686        assert!(e2.publications().contains("pub_b"));
12687    }
12688
12689    #[test]
12690    fn create_publication_allowed_inside_transaction() {
12691        // v6.1.4 dropped the v6.1.2 in-TX guard — PG allows
12692        // CREATE PUBLICATION inside a TX and the auto-commit
12693        // wrap path needs the same allowance.
12694        let mut e = Engine::new();
12695        e.execute("BEGIN").unwrap();
12696        e.execute("CREATE PUBLICATION pub_a").unwrap();
12697        e.execute("COMMIT").unwrap();
12698        assert!(e.publications().contains("pub_a"));
12699    }
12700
12701    // ── v6.1.3: SHOW PUBLICATIONS + FOR-list variants ───────
12702
12703    #[test]
12704    fn create_publication_for_table_list_lands_with_scope() {
12705        let mut e = Engine::new();
12706        e.execute("CREATE TABLE t1 (id INT NOT NULL)").unwrap();
12707        e.execute("CREATE TABLE t2 (id INT NOT NULL)").unwrap();
12708        e.execute("CREATE PUBLICATION pub_a FOR TABLE t1, t2")
12709            .unwrap();
12710        let scope = e.publications().get("pub_a").cloned();
12711        let Some(spg_sql::ast::PublicationScope::ForTables(ts)) = scope else {
12712            panic!("expected ForTables scope, got {scope:?}")
12713        };
12714        assert_eq!(ts, alloc::vec!["t1".to_string(), "t2".to_string()]);
12715    }
12716
12717    #[test]
12718    fn create_publication_all_tables_except_lands_with_scope() {
12719        let mut e = Engine::new();
12720        e.execute("CREATE PUBLICATION pub_a FOR ALL TABLES EXCEPT t3")
12721            .unwrap();
12722        let scope = e.publications().get("pub_a").cloned();
12723        let Some(spg_sql::ast::PublicationScope::AllTablesExcept(ts)) = scope else {
12724            panic!("expected AllTablesExcept scope, got {scope:?}")
12725        };
12726        assert_eq!(ts, alloc::vec!["t3".to_string()]);
12727    }
12728
12729    #[test]
12730    fn show_publications_empty_returns_zero_rows() {
12731        let e = Engine::new();
12732        let r = e.execute_readonly("SHOW PUBLICATIONS").unwrap();
12733        let QueryResult::Rows { rows, columns } = r else {
12734            panic!()
12735        };
12736        assert!(rows.is_empty());
12737        assert_eq!(columns.len(), 3);
12738        assert_eq!(columns[0].name, "name");
12739        assert_eq!(columns[1].name, "scope");
12740        assert_eq!(columns[2].name, "table_count");
12741    }
12742
12743    #[test]
12744    fn show_publications_returns_one_row_per_publication_ordered_by_name() {
12745        let mut e = Engine::new();
12746        e.execute("CREATE PUBLICATION z_pub").unwrap();
12747        e.execute("CREATE PUBLICATION a_pub FOR TABLE t1, t2")
12748            .unwrap();
12749        e.execute("CREATE PUBLICATION m_pub FOR ALL TABLES EXCEPT bad")
12750            .unwrap();
12751        let r = e.execute_readonly("SHOW PUBLICATIONS").unwrap();
12752        let QueryResult::Rows { rows, .. } = r else {
12753            panic!()
12754        };
12755        assert_eq!(rows.len(), 3);
12756        // Alphabetical order: a_pub, m_pub, z_pub.
12757        let names: Vec<&str> = rows
12758            .iter()
12759            .map(|r| {
12760                if let Value::Text(s) = &r.values[0] {
12761                    s.as_str()
12762                } else {
12763                    panic!()
12764                }
12765            })
12766            .collect();
12767        assert_eq!(names, alloc::vec!["a_pub", "m_pub", "z_pub"]);
12768        // Row 0 — a_pub scope summary + table_count = 2.
12769        match &rows[0].values[1] {
12770            Value::Text(s) => assert_eq!(s, "FOR TABLE t1, t2"),
12771            other => panic!("expected Text, got {other:?}"),
12772        }
12773        assert_eq!(rows[0].values[2], Value::Int(2));
12774        // Row 1 — m_pub.
12775        match &rows[1].values[1] {
12776            Value::Text(s) => assert_eq!(s, "FOR ALL TABLES EXCEPT bad"),
12777            other => panic!("expected Text, got {other:?}"),
12778        }
12779        assert_eq!(rows[1].values[2], Value::Int(1));
12780        // Row 2 — z_pub (AllTables → NULL count).
12781        match &rows[2].values[1] {
12782            Value::Text(s) => assert_eq!(s, "FOR ALL TABLES"),
12783            other => panic!("expected Text, got {other:?}"),
12784        }
12785        assert_eq!(rows[2].values[2], Value::Null);
12786    }
12787
12788    #[test]
12789    fn for_list_scopes_persist_across_snapshot() {
12790        // The v6.1.2 envelope-v3 round-trip exercised AllTables;
12791        // v6.1.3 needs the scope-1 / scope-2 tags to survive too.
12792        let mut e = Engine::new();
12793        e.execute("CREATE PUBLICATION p1 FOR TABLE t1, t2").unwrap();
12794        e.execute("CREATE PUBLICATION p2 FOR ALL TABLES EXCEPT bad, worse")
12795            .unwrap();
12796        let snap = e.snapshot();
12797        let e2 = Engine::restore_envelope(&snap).unwrap();
12798        assert_eq!(e2.publications().len(), 2);
12799        let p1 = e2.publications().get("p1").cloned();
12800        let Some(spg_sql::ast::PublicationScope::ForTables(ts)) = p1 else {
12801            panic!("p1 scope lost: {p1:?}")
12802        };
12803        assert_eq!(ts, alloc::vec!["t1".to_string(), "t2".to_string()]);
12804        let p2 = e2.publications().get("p2").cloned();
12805        let Some(spg_sql::ast::PublicationScope::AllTablesExcept(ts)) = p2 else {
12806            panic!("p2 scope lost: {p2:?}")
12807        };
12808        assert_eq!(ts, alloc::vec!["bad".to_string(), "worse".to_string()]);
12809    }
12810
12811    // ── v6.1.4: CREATE / DROP SUBSCRIPTION + SHOW + envelope v4 ─
12812
12813    #[test]
12814    fn create_subscription_lands_in_catalog_with_defaults() {
12815        let mut e = Engine::new();
12816        e.execute(
12817            "CREATE SUBSCRIPTION sub_a CONNECTION 'host=127.0.0.1 port=20002' PUBLICATION pub_a",
12818        )
12819        .unwrap();
12820        let s = e.subscriptions().get("sub_a").cloned().expect("present");
12821        assert_eq!(s.conn_str, "host=127.0.0.1 port=20002");
12822        assert_eq!(s.publications, alloc::vec!["pub_a".to_string()]);
12823        assert!(s.enabled);
12824        assert_eq!(s.last_received_pos, 0);
12825    }
12826
12827    #[test]
12828    fn create_subscription_duplicate_name_errors() {
12829        let mut e = Engine::new();
12830        e.execute("CREATE SUBSCRIPTION s CONNECTION 'host=x' PUBLICATION p")
12831            .unwrap();
12832        let err = e
12833            .execute("CREATE SUBSCRIPTION s CONNECTION 'host=y' PUBLICATION p")
12834            .unwrap_err();
12835        assert!(
12836            alloc::format!("{err:?}").contains("DuplicateName"),
12837            "got {err:?}"
12838        );
12839    }
12840
12841    #[test]
12842    fn drop_subscription_silent_when_absent() {
12843        let mut e = Engine::new();
12844        let r = e.execute("DROP SUBSCRIPTION never").unwrap();
12845        match r {
12846            QueryResult::CommandOk { affected, .. } => assert_eq!(affected, 0),
12847            other => panic!("expected CommandOk, got {other:?}"),
12848        }
12849    }
12850
12851    #[test]
12852    fn subscription_advance_updates_last_pos_monotone() {
12853        let mut e = Engine::new();
12854        e.execute("CREATE SUBSCRIPTION s CONNECTION 'h=x' PUBLICATION p")
12855            .unwrap();
12856        assert!(e.subscription_advance("s", 100));
12857        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 100);
12858        assert!(e.subscription_advance("s", 50)); // stale → ignored
12859        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 100);
12860        assert!(e.subscription_advance("s", 200));
12861        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 200);
12862        assert!(!e.subscription_advance("missing", 1));
12863    }
12864
12865    #[test]
12866    fn show_subscriptions_returns_rows_ordered_by_name() {
12867        let mut e = Engine::new();
12868        e.execute("CREATE SUBSCRIPTION z_sub CONNECTION 'h=x' PUBLICATION p1, p2")
12869            .unwrap();
12870        e.execute("CREATE SUBSCRIPTION a_sub CONNECTION 'h=y' PUBLICATION p3")
12871            .unwrap();
12872        let r = e.execute_readonly("SHOW SUBSCRIPTIONS").unwrap();
12873        let QueryResult::Rows { rows, columns } = r else {
12874            panic!()
12875        };
12876        assert_eq!(rows.len(), 2);
12877        assert_eq!(columns.len(), 5);
12878        assert_eq!(columns[0].name, "name");
12879        assert_eq!(columns[4].name, "last_received_pos");
12880        // Alphabetical: a_sub, z_sub.
12881        let names: Vec<&str> = rows
12882            .iter()
12883            .map(|r| {
12884                if let Value::Text(s) = &r.values[0] {
12885                    s.as_str()
12886                } else {
12887                    panic!()
12888                }
12889            })
12890            .collect();
12891        assert_eq!(names, alloc::vec!["a_sub", "z_sub"]);
12892        // Row 0: a_sub
12893        assert_eq!(rows[0].values[1], Value::Text("h=y".to_string()));
12894        assert_eq!(rows[0].values[2], Value::Text("p3".to_string()));
12895        assert_eq!(rows[0].values[3], Value::Bool(true));
12896        assert_eq!(rows[0].values[4], Value::BigInt(0));
12897        // Row 1: z_sub — publications join with ", "
12898        assert_eq!(rows[1].values[2], Value::Text("p1, p2".to_string()));
12899    }
12900
12901    #[test]
12902    fn subscriptions_persist_across_snapshot_envelope_v4() {
12903        let mut e = Engine::new();
12904        e.execute("CREATE SUBSCRIPTION s1 CONNECTION 'h=A' PUBLICATION p1, p2")
12905            .unwrap();
12906        e.execute("CREATE SUBSCRIPTION s2 CONNECTION 'h=B' PUBLICATION p3")
12907            .unwrap();
12908        e.subscription_advance("s2", 42);
12909        let snap = e.snapshot();
12910        let e2 = Engine::restore_envelope(&snap).unwrap();
12911        assert_eq!(e2.subscriptions().len(), 2);
12912        let s1 = e2.subscriptions().get("s1").unwrap();
12913        assert_eq!(s1.conn_str, "h=A");
12914        assert_eq!(
12915            s1.publications,
12916            alloc::vec!["p1".to_string(), "p2".to_string()]
12917        );
12918        assert_eq!(s1.last_received_pos, 0);
12919        let s2 = e2.subscriptions().get("s2").unwrap();
12920        assert_eq!(s2.last_received_pos, 42);
12921    }
12922
12923    #[test]
12924    fn v3_envelope_loads_with_empty_subscriptions() {
12925        // v3 snapshot (publications-only). Forge it by hand so we
12926        // verify v6.1.4 readers don't panic — they must surface
12927        // empty subscriptions and a populated publication table.
12928        let mut e = Engine::new();
12929        e.execute("CREATE PUBLICATION pub_legacy").unwrap();
12930        let catalog = e.catalog.serialize();
12931        let users = crate::users::serialize_users(&e.users);
12932        let pubs = e.publications.serialize();
12933        let mut buf = Vec::new();
12934        buf.extend_from_slice(b"SPGENV01");
12935        buf.push(3u8); // v3
12936        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
12937        buf.extend_from_slice(&catalog);
12938        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
12939        buf.extend_from_slice(&users);
12940        buf.extend_from_slice(&u32::try_from(pubs.len()).unwrap().to_le_bytes());
12941        buf.extend_from_slice(&pubs);
12942        let crc = spg_crypto::crc32::crc32(&buf);
12943        buf.extend_from_slice(&crc.to_le_bytes());
12944
12945        let e2 = Engine::restore_envelope(&buf).expect("v3 envelope restores under v4 reader");
12946        assert!(e2.subscriptions().is_empty());
12947        assert!(e2.publications().contains("pub_legacy"));
12948    }
12949
12950    #[test]
12951    fn create_subscription_allowed_inside_transaction() {
12952        let mut e = Engine::new();
12953        e.execute("BEGIN").unwrap();
12954        e.execute("CREATE SUBSCRIPTION s CONNECTION 'h=x' PUBLICATION p")
12955            .unwrap();
12956        e.execute("COMMIT").unwrap();
12957        assert!(e.subscriptions().contains("s"));
12958    }
12959
12960    // ── v6.2.0: ANALYZE + spg_statistic + envelope v5 ──────────
12961    #[test]
12962    fn analyze_populates_histogram_bounds() {
12963        let mut e = Engine::new();
12964        e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT)")
12965            .unwrap();
12966        for i in 0..50 {
12967            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, 'name{i}')"))
12968                .unwrap();
12969        }
12970        e.execute("ANALYZE t").unwrap();
12971        let stats = e.statistics();
12972        let id_stats = stats.get("t", "id").unwrap();
12973        assert!(id_stats.histogram_bounds.len() >= 2);
12974        assert_eq!(id_stats.histogram_bounds.first().unwrap(), "0");
12975        assert_eq!(id_stats.histogram_bounds.last().unwrap(), "49");
12976        assert!((id_stats.null_frac - 0.0).abs() < 1e-6);
12977        assert_eq!(id_stats.n_distinct, 50);
12978    }
12979
12980    #[test]
12981    fn reanalyze_overwrites_prior_stats() {
12982        let mut e = Engine::new();
12983        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12984        for i in 0..10 {
12985            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
12986                .unwrap();
12987        }
12988        e.execute("ANALYZE t").unwrap();
12989        let n1 = e.statistics().get("t", "id").unwrap().n_distinct;
12990        assert_eq!(n1, 10);
12991        for i in 10..30 {
12992            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
12993                .unwrap();
12994        }
12995        e.execute("ANALYZE t").unwrap();
12996        let n2 = e.statistics().get("t", "id").unwrap().n_distinct;
12997        assert_eq!(n2, 30);
12998    }
12999
13000    #[test]
13001    fn analyze_unknown_table_errors() {
13002        let mut e = Engine::new();
13003        let err = e.execute("ANALYZE nonexistent").unwrap_err();
13004        assert!(matches!(
13005            err,
13006            EngineError::Storage(StorageError::TableNotFound { .. })
13007        ));
13008    }
13009
13010    #[test]
13011    fn bare_analyze_covers_all_user_tables() {
13012        let mut e = Engine::new();
13013        e.execute("CREATE TABLE t1 (id INT NOT NULL)").unwrap();
13014        e.execute("CREATE TABLE t2 (name TEXT NOT NULL)").unwrap();
13015        e.execute("INSERT INTO t1 VALUES (1)").unwrap();
13016        e.execute("INSERT INTO t2 VALUES ('alice')").unwrap();
13017        let r = e.execute("ANALYZE").unwrap();
13018        match r {
13019            QueryResult::CommandOk {
13020                affected,
13021                modified_catalog,
13022            } => {
13023                assert_eq!(affected, 2);
13024                assert!(modified_catalog);
13025            }
13026            other => panic!("expected CommandOk, got {other:?}"),
13027        }
13028        assert!(e.statistics().get("t1", "id").is_some());
13029        assert!(e.statistics().get("t2", "name").is_some());
13030    }
13031
13032    #[test]
13033    fn select_from_spg_statistic_returns_rows_per_column() {
13034        let mut e = Engine::new();
13035        e.execute("CREATE TABLE t (id INT NOT NULL, label TEXT)")
13036            .unwrap();
13037        e.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
13038        e.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
13039        e.execute("ANALYZE t").unwrap();
13040        let r = e.execute_readonly("SELECT * FROM spg_statistic").unwrap();
13041        let QueryResult::Rows { rows, columns } = r else {
13042            panic!()
13043        };
13044        // v6.7.0 — spg_statistic gained a `cold_row_count` column.
13045        assert_eq!(columns.len(), 6);
13046        assert_eq!(columns[0].name, "table_name");
13047        assert_eq!(columns[4].name, "histogram_bounds");
13048        assert_eq!(columns[5].name, "cold_row_count");
13049        assert_eq!(rows.len(), 2, "one row per column of t");
13050        // Sorted by (table_name, column_name).
13051        match (&rows[0].values[0], &rows[0].values[1]) {
13052            (Value::Text(t), Value::Text(c)) => {
13053                assert_eq!(t, "t");
13054                // BTreeMap orders (table, column); columns "id" < "label".
13055                assert_eq!(c, "id");
13056            }
13057            _ => panic!(),
13058        }
13059    }
13060
13061    #[test]
13062    fn analyze_skips_vector_columns() {
13063        // Vector columns have their own stats shape (HNSW graph);
13064        // ANALYZE leaves them out of spg_statistic.
13065        let mut e = Engine::new();
13066        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(3) NOT NULL)")
13067            .unwrap();
13068        e.execute("INSERT INTO t VALUES (1, [1, 2, 3])").unwrap();
13069        e.execute("ANALYZE t").unwrap();
13070        assert!(e.statistics().get("t", "id").is_some());
13071        assert!(e.statistics().get("t", "v").is_none());
13072    }
13073
13074    #[test]
13075    fn statistics_persist_across_envelope_v5_round_trip() {
13076        let mut e = Engine::new();
13077        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
13078        for i in 0..20 {
13079            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13080                .unwrap();
13081        }
13082        e.execute("ANALYZE").unwrap();
13083        let snap = e.snapshot();
13084        let e2 = Engine::restore_envelope(&snap).unwrap();
13085        let s = e2.statistics().get("t", "id").unwrap();
13086        assert_eq!(s.n_distinct, 20);
13087    }
13088
13089    // ── v6.2.1 auto-analyze threshold ───────────────────────────
13090
13091    #[test]
13092    fn auto_analyze_threshold_fires_after_10pct_of_min_rows_on_small_table() {
13093        // For a table with 0 rows then 10 inserts → modified=10,
13094        // row_count=10. Threshold = 0.1 × max(10, 100) = 10. So
13095        // after the 10th INSERT the threshold is met.
13096        let mut e = Engine::new();
13097        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
13098        for i in 0..9 {
13099            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13100                .unwrap();
13101        }
13102        assert!(e.tables_needing_analyze().is_empty(), "9 < threshold");
13103        e.execute("INSERT INTO t VALUES (9)").unwrap();
13104        let needs = e.tables_needing_analyze();
13105        assert_eq!(needs, alloc::vec!["t".to_string()]);
13106    }
13107
13108    #[test]
13109    fn auto_analyze_threshold_uses_10pct_of_row_count_for_large_tables() {
13110        // After ANALYZE on 1000 rows, threshold = 0.1 × row_count.
13111        // Each new INSERT bumps both modified and row_count, so to
13112        // trigger from N=1000 we need modifications ≥ 0.1 × (1000+M),
13113        // i.e. M ≥ 112. The test inserts 50 (no fire), then 150
13114        // more (200 total mods, row_count=1200, threshold=120 → fire).
13115        let mut e = Engine::new();
13116        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
13117        for i in 0..1000 {
13118            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13119                .unwrap();
13120        }
13121        e.execute("ANALYZE t").unwrap();
13122        assert!(e.tables_needing_analyze().is_empty(), "fresh ANALYZE");
13123        for i in 1000..1050 {
13124            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13125                .unwrap();
13126        }
13127        assert!(
13128            e.tables_needing_analyze().is_empty(),
13129            "50 inserts < threshold of ~105"
13130        );
13131        for i in 1050..1200 {
13132            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13133                .unwrap();
13134        }
13135        assert_eq!(
13136            e.tables_needing_analyze(),
13137            alloc::vec!["t".to_string()],
13138            "200 inserts > 0.1 × 1200 threshold"
13139        );
13140    }
13141
13142    #[test]
13143    fn auto_analyze_threshold_resets_after_analyze() {
13144        let mut e = Engine::new();
13145        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
13146        for i in 0..200 {
13147            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
13148                .unwrap();
13149        }
13150        assert!(!e.tables_needing_analyze().is_empty());
13151        e.execute("ANALYZE").unwrap();
13152        assert!(
13153            e.tables_needing_analyze().is_empty(),
13154            "ANALYZE must reset the counter"
13155        );
13156    }
13157
13158    #[test]
13159    fn auto_analyze_threshold_tracks_updates_and_deletes() {
13160        let mut e = Engine::new();
13161        e.execute("CREATE TABLE t (id INT NOT NULL, label TEXT)")
13162            .unwrap();
13163        for i in 0..50 {
13164            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, 'x')"))
13165                .unwrap();
13166        }
13167        e.execute("ANALYZE t").unwrap();
13168        // UPDATE 20 rows + DELETE 5 → modified=25. Threshold = 0.1
13169        // × max(50, 100) = 10. So 25 >= 10 → trigger.
13170        e.execute("UPDATE t SET label = 'y' WHERE id < 20").unwrap();
13171        e.execute("DELETE FROM t WHERE id >= 45").unwrap();
13172        assert_eq!(e.tables_needing_analyze(), alloc::vec!["t".to_string()]);
13173    }
13174
13175    #[test]
13176    fn v4_envelope_loads_with_empty_statistics() {
13177        // Forge a v4 envelope by hand: catalog + users + pubs +
13178        // subs trailer, no statistics. A v6.2.0 reader must accept
13179        // it and surface an empty Statistics.
13180        let mut e = Engine::new();
13181        e.create_user("alice", "secret", crate::users::Role::ReadOnly, [0u8; 16])
13182            .unwrap();
13183        let catalog = e.catalog.serialize();
13184        let users = crate::users::serialize_users(&e.users);
13185        let pubs = e.publications.serialize();
13186        let subs = e.subscriptions.serialize();
13187        let mut buf = Vec::new();
13188        buf.extend_from_slice(b"SPGENV01");
13189        buf.push(4u8);
13190        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
13191        buf.extend_from_slice(&catalog);
13192        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
13193        buf.extend_from_slice(&users);
13194        buf.extend_from_slice(&u32::try_from(pubs.len()).unwrap().to_le_bytes());
13195        buf.extend_from_slice(&pubs);
13196        buf.extend_from_slice(&u32::try_from(subs.len()).unwrap().to_le_bytes());
13197        buf.extend_from_slice(&subs);
13198        let crc = spg_crypto::crc32::crc32(&buf);
13199        buf.extend_from_slice(&crc.to_le_bytes());
13200        let e2 = Engine::restore_envelope(&buf).expect("v4 envelope restores");
13201        assert!(e2.statistics().is_empty());
13202    }
13203
13204    #[test]
13205    fn v1_v2_envelope_loads_with_empty_publications() {
13206        // A snapshot taken before v6.1.2 (no publication trailer,
13207        // envelope v2) must still deserialise — and the resulting
13208        // engine must report zero publications. Use the engine's own
13209        // round-trip with no publications: that emits v3 but with an
13210        // empty pubs block. Then forge a v2 envelope by hand to lock
13211        // the back-compat path.
13212        let mut e = Engine::new();
13213        // Force users to be non-empty so the snapshot takes the
13214        // envelope path rather than the bare-catalog fallback.
13215        e.create_user("alice", "secret", crate::users::Role::ReadOnly, [0u8; 16])
13216            .unwrap();
13217
13218        // Forge an envelope v2: same shape as v3 but no pubs trailer.
13219        let catalog = e.catalog.serialize();
13220        let users = crate::users::serialize_users(&e.users);
13221        let mut buf = Vec::new();
13222        buf.extend_from_slice(b"SPGENV01");
13223        buf.push(2u8); // v2
13224        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
13225        buf.extend_from_slice(&catalog);
13226        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
13227        buf.extend_from_slice(&users);
13228        let crc = spg_crypto::crc32::crc32(&buf);
13229        buf.extend_from_slice(&crc.to_le_bytes());
13230
13231        let e2 = Engine::restore_envelope(&buf).expect("v2 envelope restores");
13232        assert!(e2.publications().is_empty());
13233    }
13234}