Skip to main content

spg_engine/
lib.rs

1//! SPG execution engine — v0.3 wires the SQL front-end to the in-memory
2//! storage layer. Implements `CREATE TABLE`, single-row `INSERT VALUES`, and
3//! `SELECT * FROM <table>` (no WHERE yet — that lands in v0.4 alongside
4//! expression evaluation against rows).
5#![no_std]
6
7extern crate alloc;
8
9pub mod aggregate;
10pub mod describe;
11pub mod eval;
12pub mod json;
13pub mod memoize;
14pub mod plan_cache;
15pub mod publications;
16pub mod query_stats;
17pub mod reorder;
18pub mod selectivity;
19pub mod statistics;
20pub mod subscriptions;
21pub mod users;
22
23pub use crate::users::{Role, ScramSecrets, UserError, UserStore};
24
25use alloc::borrow::Cow;
26use alloc::boxed::Box;
27use alloc::collections::BTreeMap;
28use alloc::string::{String, ToString};
29use alloc::vec::Vec;
30use core::fmt;
31
32use spg_sql::ast::{
33    BinOp, ColumnDef, ColumnName, ColumnTypeName, CreateIndexStatement,
34    CreatePublicationStatement, CreateSubscriptionStatement, CreateTableStatement,
35    CreateUserStatement, Expr, FrameBound, FrameKind, FromClause, IndexMethod, InsertStatement,
36    JoinKind, Literal, OrderBy, SelectItem, SelectStatement, Statement, TableRef, UnOp, UnionKind,
37    VecEncoding as SqlVecEncoding, WindowFrame,
38};
39use spg_sql::parser::{self, ParseError};
40use spg_storage::{
41    Catalog, ColumnSchema, CompactReport, DataType, IndexKey, IndexKind, Row, StorageError, Table,
42    TableSchema, Value, VecEncoding,
43};
44
45use crate::eval::{EvalContext, EvalError};
46
47/// Result of executing one statement.
48#[derive(Debug, Clone, PartialEq)]
49#[non_exhaustive]
50pub enum QueryResult {
51    /// DDL or DML succeeded.
52    ///
53    /// `affected` is the row count for `INSERT` and 0 elsewhere.
54    /// `modified_catalog` tells the server whether this statement
55    /// caused the *committed* catalog to change — it's the signal to
56    /// snapshot/audit. False for `BEGIN`/`ROLLBACK`, false for writeful
57    /// statements executed inside a transaction (those only touch the
58    /// shadow), and true for `COMMIT` and for writes outside a TX.
59    CommandOk {
60        affected: usize,
61        modified_catalog: bool,
62    },
63    /// `SELECT` returned a (possibly empty) row set.
64    Rows {
65        columns: Vec<ColumnSchema>,
66        rows: Vec<Row>,
67    },
68}
69
70/// All errors the engine can return.
71///
72/// Marked `#[non_exhaustive]` from v7.5.0 onward: external `match`
73/// must include a `_` arm so new variants in subsequent v7.x releases
74/// are not breaking changes.
75#[derive(Debug, Clone, PartialEq)]
76#[non_exhaustive]
77pub enum EngineError {
78    Parse(ParseError),
79    Storage(StorageError),
80    Eval(EvalError),
81    /// Front-end accepted a construct that the v0.x executor doesn't support.
82    Unsupported(String),
83    /// `BEGIN` while another transaction is already open.
84    TransactionAlreadyOpen,
85    /// `COMMIT` / `ROLLBACK` with no active transaction.
86    NoActiveTransaction,
87    /// v4.0 sentinel: `execute_readonly` got a statement that
88    /// mutates engine state (INSERT / CREATE / BEGIN / COMMIT / …).
89    /// The caller should retake the write lock and dispatch through
90    /// `execute(&mut self)` instead.
91    WriteRequired,
92    /// v4.2: a SELECT would have returned more rows than the
93    /// configured `max_query_rows` cap. Carries the cap.
94    RowLimitExceeded(usize),
95    /// v4.5: cooperative cancellation — the host (server's
96    /// per-query watchdog) set the cancel flag while a long-running
97    /// SELECT / UPDATE / DELETE was scanning rows. The partial work
98    /// is discarded; the caller should surface this as a timeout
99    /// to the client.
100    Cancelled,
101}
102
103impl fmt::Display for EngineError {
104    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
105        match self {
106            Self::Parse(e) => write!(f, "parse: {e}"),
107            Self::Storage(e) => write!(f, "storage: {e}"),
108            Self::Eval(e) => write!(f, "eval: {e}"),
109            Self::Unsupported(s) => write!(f, "unsupported: {s}"),
110            Self::TransactionAlreadyOpen => f.write_str("a transaction is already open"),
111            Self::NoActiveTransaction => f.write_str("no active transaction"),
112            Self::WriteRequired => {
113                f.write_str("statement requires a write lock (use execute, not execute_readonly)")
114            }
115            Self::RowLimitExceeded(n) => {
116                write!(f, "query exceeded max_query_rows={n}")
117            }
118            Self::Cancelled => f.write_str("query cancelled (timeout or client request)"),
119        }
120    }
121}
122
123impl From<ParseError> for EngineError {
124    fn from(e: ParseError) -> Self {
125        Self::Parse(e)
126    }
127}
128impl From<StorageError> for EngineError {
129    fn from(e: StorageError) -> Self {
130        Self::Storage(e)
131    }
132}
133impl From<EvalError> for EngineError {
134    fn from(e: EvalError) -> Self {
135        Self::Eval(e)
136    }
137}
138
139/// The execution engine. Holds the catalog and (later) other server-scope
140/// state. `Engine::new()` is intentionally cheap so callers can construct one
141/// per database, per test.
142/// Function pointer that returns "now" as microseconds since Unix
143/// epoch. The engine is `no_std`, so it can't reach for `std::time`
144/// itself — callers (`spg-server`, the sqllogictest runner) inject a
145/// concrete implementation. `None` means `NOW()` / `CURRENT_*` raise
146/// `Unsupported`.
147pub type ClockFn = fn() -> i64;
148
149/// Function pointer that produces 16 cryptographically random bytes.
150/// Like `ClockFn`, the engine is `no_std` and can't reach for /dev/urandom
151/// itself — host (`spg-server`) injects an OS-backed source. `None`
152/// means SQL-driven `CREATE USER` falls back to a deterministic salt
153/// derived from the username (acceptable in tests; the server always
154/// installs a real RNG so production paths never see this).
155pub type SaltFn = fn() -> [u8; 16];
156
157/// v4.5 cooperative cancellation token. A long-running SELECT /
158/// UPDATE / DELETE checks `is_cancelled` at row-loop checkpoints
159/// and bails with `EngineError::Cancelled`. The host
160/// (`spg-server`) creates an `AtomicBool` per query, spawns a
161/// watchdog thread that sets it after `SPG_QUERY_TIMEOUT_MS`,
162/// and passes it via `execute_with_cancel` / `execute_readonly_with_cancel`.
163///
164/// `CancelToken::none()` is a no-op — used by the legacy `execute`
165/// and `execute_readonly` entry points so existing callers don't
166/// change.
167#[derive(Debug, Clone, Copy)]
168pub struct CancelToken<'a> {
169    flag: Option<&'a core::sync::atomic::AtomicBool>,
170}
171
172impl<'a> CancelToken<'a> {
173    #[must_use]
174    pub const fn none() -> Self {
175        Self { flag: None }
176    }
177
178    #[must_use]
179    pub const fn from_flag(f: &'a core::sync::atomic::AtomicBool) -> Self {
180        Self { flag: Some(f) }
181    }
182
183    #[must_use]
184    pub fn is_cancelled(self) -> bool {
185        self.flag
186            .is_some_and(|f| f.load(core::sync::atomic::Ordering::Relaxed))
187    }
188
189    /// Returns `Err(Cancelled)` if the token has been tripped.
190    /// Used at row-loop checkpoints to bail cooperatively without
191    /// scattering raw `is_cancelled` checks across the executor.
192    #[inline]
193    pub fn check(self) -> Result<(), EngineError> {
194        if self.is_cancelled() {
195            Err(EngineError::Cancelled)
196        } else {
197            Ok(())
198        }
199    }
200}
201
202// ---- snapshot envelope (v4.1, extended with CRC32 in v4.37,  ----
203// ----   publications in v6.1.2 v3, subscriptions in v6.1.4 v4) ----
204//
205// Wraps a catalog blob + a user blob behind a small header so the
206// server can persist both atomically without inventing a new file.
207// Bare catalog blobs (v3.x) still load via `restore_envelope` since
208// the magic check fails fast and the function falls back to
209// `Catalog::deserialize`.
210//
211// Layout — v1 (v4.1, no CRC):
212//   [8 bytes magic "SPGENV01"]
213//   [u8 version = 1]
214//   [u32 catalog_len][catalog bytes]
215//   [u32 users_len][users bytes]
216//
217// Layout — v2 (v4.37, CRC32 of body):
218//   [8 bytes magic "SPGENV01"]
219//   [u8 version = 2]
220//   [u32 catalog_len][catalog bytes]
221//   [u32 users_len][users bytes]
222//   [u32 crc32]                      ← CRC32 of every byte before it.
223//
224// Layout — v3 (v6.1.2, publications trailer):
225//   [8 bytes magic "SPGENV01"]
226//   [u8 version = 3]
227//   [u32 catalog_len][catalog bytes]
228//   [u32 users_len][users bytes]
229//   [u32 pubs_len][publications bytes]
230//   [u32 crc32]
231//
232// Layout — v4 (v6.1.4, subscriptions trailer):
233//   [8 bytes magic "SPGENV01"]
234//   [u8 version = 4]
235//   [u32 catalog_len][catalog bytes]
236//   [u32 users_len][users bytes]
237//   [u32 pubs_len][publications bytes]
238//   [u32 subs_len][subscriptions bytes]
239//   [u32 crc32]
240//
241// Layout — v5 (v6.2.0, statistics trailer):
242//   [8 bytes magic "SPGENV01"]
243//   [u8 version = 5]
244//   [u32 catalog_len][catalog bytes]
245//   [u32 users_len][users bytes]
246//   [u32 pubs_len][publications bytes]
247//   [u32 subs_len][subscriptions bytes]
248//   [u32 stats_len][statistics bytes]      ← NEW
249//   [u32 crc32]
250//
251// Writers emit v5 from v6.2.0 on. Readers accept all of {v1, v2,
252// v3, v4, v5}: v1/v2 load with empty publications / subscriptions /
253// statistics; v3 loads with empty subscriptions + statistics; v4
254// loads with empty statistics; v5 deserialises all three. Older
255// SPG versions reading a v5 envelope fall through the version
256// match to `EnvelopeParse::Bare` — pre-v6.2.0 binaries cannot
257// open v6.2.0+ snapshots (matches the v6.1.2 / v6.1.4 breaks).
258
259const ENVELOPE_MAGIC: &[u8; 8] = b"SPGENV01";
260const ENVELOPE_VERSION_V1: u8 = 1;
261const ENVELOPE_VERSION_V2: u8 = 2;
262const ENVELOPE_VERSION_V3: u8 = 3;
263const ENVELOPE_VERSION_V4: u8 = 4;
264const ENVELOPE_VERSION_V5: u8 = 5;
265
266fn build_envelope(
267    catalog: &[u8],
268    users: &[u8],
269    pubs: &[u8],
270    subs: &[u8],
271    stats: &[u8],
272) -> Vec<u8> {
273    let mut out = Vec::with_capacity(
274        8 + 1
275            + 4
276            + catalog.len()
277            + 4
278            + users.len()
279            + 4
280            + pubs.len()
281            + 4
282            + subs.len()
283            + 4
284            + stats.len()
285            + 4,
286    );
287    out.extend_from_slice(ENVELOPE_MAGIC);
288    out.push(ENVELOPE_VERSION_V5);
289    out.extend_from_slice(
290        &u32::try_from(catalog.len())
291            .expect("≤ 4G catalog")
292            .to_le_bytes(),
293    );
294    out.extend_from_slice(catalog);
295    out.extend_from_slice(
296        &u32::try_from(users.len())
297            .expect("≤ 4G users")
298            .to_le_bytes(),
299    );
300    out.extend_from_slice(users);
301    out.extend_from_slice(
302        &u32::try_from(pubs.len())
303            .expect("≤ 4G publications")
304            .to_le_bytes(),
305    );
306    out.extend_from_slice(pubs);
307    out.extend_from_slice(
308        &u32::try_from(subs.len())
309            .expect("≤ 4G subscriptions")
310            .to_le_bytes(),
311    );
312    out.extend_from_slice(subs);
313    out.extend_from_slice(
314        &u32::try_from(stats.len())
315            .expect("≤ 4G statistics")
316            .to_le_bytes(),
317    );
318    out.extend_from_slice(stats);
319    let crc = spg_crypto::crc32::crc32(&out);
320    out.extend_from_slice(&crc.to_le_bytes());
321    out
322}
323
324/// Outcome of envelope parsing: either bare-catalog fallback, a
325/// successfully split section trio from a v1/v2/v3 envelope, or an
326/// explicit corruption error from a v2/v3 CRC mismatch. `Bare`
327/// (catalog-only fallback) preserves v3.x readability. v1/v2
328/// envelopes set `publications` to `None`; v3 sets it to the
329/// publications byte slice.
330enum EnvelopeParse<'a> {
331    Bare,
332    Pair {
333        catalog: &'a [u8],
334        users: &'a [u8],
335        publications: Option<&'a [u8]>,
336        subscriptions: Option<&'a [u8]>,
337        statistics: Option<&'a [u8]>,
338    },
339    CrcMismatch {
340        expected: u32,
341        computed: u32,
342    },
343}
344
345/// Returns `EnvelopeParse::Pair` for a valid v1 / v2 / v3 envelope,
346/// `Bare` for a buffer that doesn't look like an envelope (v3.x
347/// bare catalog fallback), and `CrcMismatch` for a v2/v3 envelope
348/// whose trailing CRC32 doesn't match the body.
349fn split_envelope(buf: &[u8]) -> EnvelopeParse<'_> {
350    if buf.len() < 8 + 1 + 4 || &buf[..8] != ENVELOPE_MAGIC {
351        return EnvelopeParse::Bare;
352    }
353    let version = buf[8];
354    if !matches!(
355        version,
356        ENVELOPE_VERSION_V1
357            | ENVELOPE_VERSION_V2
358            | ENVELOPE_VERSION_V3
359            | ENVELOPE_VERSION_V4
360            | ENVELOPE_VERSION_V5
361    ) {
362        return EnvelopeParse::Bare;
363    }
364    let mut p = 9usize;
365    let Some(cat_len_bytes) = buf.get(p..p + 4) else {
366        return EnvelopeParse::Bare;
367    };
368    let Ok(cat_len_arr) = cat_len_bytes.try_into() else {
369        return EnvelopeParse::Bare;
370    };
371    let cat_len = u32::from_le_bytes(cat_len_arr) as usize;
372    p += 4;
373    if p + cat_len + 4 > buf.len() {
374        return EnvelopeParse::Bare;
375    }
376    let catalog = &buf[p..p + cat_len];
377    p += cat_len;
378    let Some(user_len_bytes) = buf.get(p..p + 4) else {
379        return EnvelopeParse::Bare;
380    };
381    let Ok(user_len_arr) = user_len_bytes.try_into() else {
382        return EnvelopeParse::Bare;
383    };
384    let user_len = u32::from_le_bytes(user_len_arr) as usize;
385    p += 4;
386    if p + user_len > buf.len() {
387        return EnvelopeParse::Bare;
388    }
389    let users = &buf[p..p + user_len];
390    p += user_len;
391    let publications = if matches!(
392        version,
393        ENVELOPE_VERSION_V3 | ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5
394    ) {
395        // [u32 pubs_len][publications bytes]
396        let Some(pubs_len_bytes) = buf.get(p..p + 4) else {
397            return EnvelopeParse::Bare;
398        };
399        let Ok(pubs_len_arr) = pubs_len_bytes.try_into() else {
400            return EnvelopeParse::Bare;
401        };
402        let pubs_len = u32::from_le_bytes(pubs_len_arr) as usize;
403        p += 4;
404        if p + pubs_len > buf.len() {
405            return EnvelopeParse::Bare;
406        }
407        let pubs_slice = &buf[p..p + pubs_len];
408        p += pubs_len;
409        Some(pubs_slice)
410    } else {
411        None
412    };
413    let subscriptions = if matches!(version, ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5) {
414        // [u32 subs_len][subscriptions bytes]
415        let Some(subs_len_bytes) = buf.get(p..p + 4) else {
416            return EnvelopeParse::Bare;
417        };
418        let Ok(subs_len_arr) = subs_len_bytes.try_into() else {
419            return EnvelopeParse::Bare;
420        };
421        let subs_len = u32::from_le_bytes(subs_len_arr) as usize;
422        p += 4;
423        if p + subs_len > buf.len() {
424            return EnvelopeParse::Bare;
425        }
426        let subs_slice = &buf[p..p + subs_len];
427        p += subs_len;
428        Some(subs_slice)
429    } else {
430        None
431    };
432    let statistics = if version == ENVELOPE_VERSION_V5 {
433        // [u32 stats_len][statistics bytes]
434        let Some(stats_len_bytes) = buf.get(p..p + 4) else {
435            return EnvelopeParse::Bare;
436        };
437        let Ok(stats_len_arr) = stats_len_bytes.try_into() else {
438            return EnvelopeParse::Bare;
439        };
440        let stats_len = u32::from_le_bytes(stats_len_arr) as usize;
441        p += 4;
442        if p + stats_len > buf.len() {
443            return EnvelopeParse::Bare;
444        }
445        let stats_slice = &buf[p..p + stats_len];
446        p += stats_len;
447        Some(stats_slice)
448    } else {
449        None
450    };
451    if matches!(
452        version,
453        ENVELOPE_VERSION_V2 | ENVELOPE_VERSION_V3 | ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5
454    ) {
455        if p + 4 != buf.len() {
456            return EnvelopeParse::Bare;
457        }
458        let Ok(crc_arr) = buf[p..p + 4].try_into() else {
459            return EnvelopeParse::Bare;
460        };
461        let expected = u32::from_le_bytes(crc_arr);
462        let computed = spg_crypto::crc32::crc32(&buf[..p]);
463        if expected != computed {
464            return EnvelopeParse::CrcMismatch { expected, computed };
465        }
466    } else if p != buf.len() {
467        // v1: must end exactly at the users section.
468        return EnvelopeParse::Bare;
469    }
470    EnvelopeParse::Pair {
471        catalog,
472        users,
473        publications,
474        subscriptions,
475        statistics,
476    }
477}
478
479/// v4.41.1 opaque transaction handle. Returned by `Engine::alloc_tx_id`,
480/// threaded through `Engine::execute_in` so dispatch can identify which
481/// in-flight TX a statement belongs to. `IMPLICIT_TX` is the reserved
482/// slot every legacy caller — engine self-tests, spg-cli, spg-embedded,
483/// startup replay — implicitly uses through the unchanged
484/// `Engine::execute(sql)` API. v4.41.1 keeps at most one active slot at
485/// runtime (dispatch holds `engine.write()` across the wrap, same as
486/// v4.34); the map shape is here to let v4.42 turn on N in-flight
487/// implicit TXs without reshuffling the engine internals.
488#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
489pub struct TxId(pub u64);
490
491/// Reserved slot used by `Engine::execute(sql)` — the legacy single-
492/// global-shadow path. New `alloc_tx_id` handles start at 1.
493pub const IMPLICIT_TX: TxId = TxId(0);
494
495/// v6.7.3 — default segment-size threshold used by `COMPACT COLD
496/// SEGMENTS` when no explicit target is supplied. Segments whose
497/// `OwnedSegment::bytes().len()` is **strictly** less than this
498/// value are eligible to merge. spg-server reads
499/// `SPG_COMPACTION_TARGET_SEGMENT_BYTES` to override.
500pub const COMPACTION_TARGET_DEFAULT_BYTES: u64 = 4 * 1024 * 1024;
501
502/// Per-slot transaction state. Held inside `tx_catalogs[tx_id]` for the
503/// lifetime of a BEGIN..COMMIT (or BEGIN..ROLLBACK) window. Drops when
504/// the TX commits (its `catalog` is moved over `Engine.catalog`) or
505/// rolls back (slot removed, catalog discarded).
506#[derive(Debug, Default, Clone)]
507struct TxState {
508    /// The TX's shadow copy of the catalog. Started as a clone of
509    /// `Engine.catalog` at BEGIN time; writes flow into it; COMMIT
510    /// installs it over `Engine.catalog`. `Catalog::clone()` is O(1)
511    /// since v4.40 (`PersistentVec` rows + `PersistentBTreeMap` indices).
512    catalog: Catalog,
513    /// Per-TX savepoint stack. Each entry pairs the savepoint name with
514    /// a clone of `catalog` at the moment `SAVEPOINT <name>` fired.
515    /// `ROLLBACK TO <name>` restores from the entry and pops everything
516    /// after it; `RELEASE <name>` discards the entry and everything
517    /// after; COMMIT/ROLLBACK clears the whole stack.
518    savepoints: Vec<(String, Catalog)>,
519}
520
521/// v7.11.0 — frozen read-only view of the engine's committed state.
522/// Constructed via [`Engine::clone_snapshot`]. Holds clones of the
523/// catalog, statistics, clock function, and row-cap config — the
524/// four fields the `execute_readonly` path actually reads. Cheap to
525/// `Clone` (each clone shares the underlying `PersistentVec` row
526/// storage; only the trie root pointers copy). Send + Sync so a
527/// snapshot can be moved across `tokio::task::spawn_blocking`
528/// boundaries without coordination.
529///
530/// The contract: a snapshot reflects the engine's state at the
531/// moment `clone_snapshot()` returned. Subsequent writes to the
532/// engine are NOT visible. Callers who need fresher data take a
533/// new snapshot.
534#[derive(Debug, Clone)]
535pub struct CatalogSnapshot {
536    catalog: Catalog,
537    statistics: statistics::Statistics,
538    clock: Option<ClockFn>,
539    max_query_rows: Option<usize>,
540}
541
542#[derive(Debug, Default)]
543pub struct Engine {
544    /// Committed catalog — what survives `Engine::snapshot()` and what
545    /// outside-TX `SELECT`s read.
546    catalog: Catalog,
547    /// Active TX slots, keyed by `TxId`. Empty when no TX is in flight.
548    /// v4.41.1 runtime invariant: at most one entry (single-writer
549    /// model unchanged). v4.42 will let dispatch hold multiple entries
550    /// concurrently for group commit + engine MVCC.
551    tx_catalogs: BTreeMap<TxId, TxState>,
552    /// Which slot the next exec_* call should mutate. Set by
553    /// `execute_in(sql, tx_id)` at the entry point; legacy `execute(sql)`
554    /// sets it to `IMPLICIT_TX`. None when no TX is in flight (read /
555    /// write goes straight against `catalog`).
556    current_tx: Option<TxId>,
557    /// Monotonic counter for `alloc_tx_id`. Starts at 1 — slot 0 is
558    /// reserved for `IMPLICIT_TX`.
559    next_tx_id: u64,
560    /// Optional wall clock used to satisfy `NOW()` / `CURRENT_TIMESTAMP`
561    /// / `CURRENT_DATE`. Set by the host environment.
562    clock: Option<ClockFn>,
563    /// v4.1 cryptographic RNG for per-user password salt. Set by the
564    /// host. `None` means SQL-driven `CREATE USER` uses a
565    /// deterministic fallback — see `SaltFn`.
566    salt_fn: Option<SaltFn>,
567    /// v4.2 per-query row cap. `None` = unlimited. When set, a
568    /// SELECT that materialises more than `n` rows returns
569    /// `EngineError::RowLimitExceeded`. Enforced before the result
570    /// is shaped into wire frames so a runaway scan can't blow the
571    /// server's heap.
572    max_query_rows: Option<usize>,
573    /// v4.1 RBAC user table. Empty means "no RBAC configured yet" —
574    /// the server decides what that means at the auth boundary
575    /// (open mode vs legacy single-password mode). User CRUD goes
576    /// through `create_user`/`drop_user`/`verify_user`; persistence
577    /// rides the snapshot envelope alongside the catalog.
578    users: UserStore,
579    /// v6.1.2 logical-replication publication catalog. Empty until
580    /// `CREATE PUBLICATION` runs. Persistence rides the v3 envelope
581    /// trailer (see `build_envelope`).
582    publications: publications::Publications,
583    /// v6.1.4 logical-replication subscription catalog. Empty until
584    /// `CREATE SUBSCRIPTION` runs. Persistence rides the v4 envelope
585    /// trailer.
586    subscriptions: subscriptions::Subscriptions,
587    /// v6.2.0 — per-column statistics for the cost-based optimizer.
588    /// Populated by `ANALYZE`; queried via `spg_statistic` virtual
589    /// table. Persistence rides the v5 envelope trailer.
590    statistics: statistics::Statistics,
591    /// v6.3.0 — engine-level plan cache. Caches the post-`prepare()`
592    /// `Statement` keyed on SQL text. In-memory only — does NOT ride
593    /// the snapshot envelope (rebuilt on demand after restart).
594    plan_cache: plan_cache::PlanCache,
595    /// v6.5.1 — per-distinct-SQL execution stats. In-memory only,
596    /// surfaced via `spg_stat_query` virtual table. Updated by the
597    /// `execute_*` paths after a successful execute.
598    query_stats: query_stats::QueryStats,
599    /// v6.5.2 — connection-state provider callback. spg-server
600    /// registers a function at startup that snapshots its
601    /// per-pgwire-connection registry into `ActivityRow`s; engine
602    /// reads through it on every `SELECT * FROM spg_stat_activity`.
603    /// `None` ⇒ no-data (returns empty rows; matches the no_std
604    /// embedded callers that don't run pgwire).
605    activity_provider: Option<ActivityProvider>,
606    /// v6.5.3 — audit-chain provider + verifier. Same pattern as
607    /// activity_provider: spg-server registers both at startup;
608    /// engine reads through on `SELECT * FROM spg_audit_chain` and
609    /// `SELECT * FROM spg_audit_verify`. `None` ⇒ no-data.
610    audit_chain_provider: Option<AuditChainProvider>,
611    audit_verifier: Option<AuditVerifier>,
612    /// v6.5.6 — slow-query log threshold in microseconds. When set,
613    /// every successful execute whose elapsed exceeds the threshold
614    /// gets fed to the registered slow-query log callback (so
615    /// spg-server can emit a structured log line). Default `None`
616    /// = no slow-query logging.
617    slow_query_threshold_us: Option<u64>,
618    slow_query_logger: Option<SlowQueryLogger>,
619}
620
621/// v6.5.6 — callback signature for slow-query log emission. Called
622/// with `(sql, elapsed_us)` once per successful execute that crosses
623/// the threshold.
624pub type SlowQueryLogger = fn(&str, u64);
625
626/// v6.5.4 — synthesise a `CREATE TABLE` statement from catalog
627/// state. Round-trips through `Engine::execute` to recreate the
628/// same schema (sans data + indexes — indexes are emitted as a
629/// separate `CREATE INDEX` chain in `spg_database_ddl`).
630fn render_create_table(name: &str, columns: &[ColumnSchema]) -> String {
631    let mut out = alloc::format!("CREATE TABLE {name} (");
632    for (i, col) in columns.iter().enumerate() {
633        if i > 0 {
634            out.push_str(", ");
635        }
636        out.push_str(&col.name);
637        out.push(' ');
638        out.push_str(&render_data_type(col.ty));
639        if !col.nullable {
640            out.push_str(" NOT NULL");
641        }
642        if col.auto_increment {
643            out.push_str(" AUTO_INCREMENT");
644        }
645    }
646    out.push(')');
647    out
648}
649
650fn render_data_type(ty: DataType) -> String {
651    match ty {
652        DataType::SmallInt => "SMALLINT".into(),
653        DataType::Int => "INT".into(),
654        DataType::BigInt => "BIGINT".into(),
655        DataType::Float => "FLOAT".into(),
656        DataType::Text => "TEXT".into(),
657        DataType::Varchar(n) => alloc::format!("VARCHAR({n})"),
658        DataType::Char(n) => alloc::format!("CHAR({n})"),
659        DataType::Bool => "BOOL".into(),
660        DataType::Vector { dim, encoding } => match encoding {
661            spg_storage::VecEncoding::F32 => alloc::format!("VECTOR({dim})"),
662            spg_storage::VecEncoding::Sq8 => alloc::format!("VECTOR({dim}) USING SQ8"),
663            spg_storage::VecEncoding::F16 => alloc::format!("VECTOR({dim}) USING HALF"),
664        },
665        DataType::Numeric { precision, scale } => {
666            alloc::format!("NUMERIC({precision},{scale})")
667        }
668        DataType::Date => "DATE".into(),
669        DataType::Timestamp => "TIMESTAMP".into(),
670        DataType::Interval => "INTERVAL".into(),
671        DataType::Json => "JSON".into(),
672        DataType::Jsonb => "JSONB".into(),
673        DataType::Timestamptz => "TIMESTAMPTZ".into(),
674        DataType::Bytes => "BYTEA".into(),
675        DataType::TextArray => "TEXT[]".into(),
676    }
677}
678
679/// v6.5.2 — one row of `spg_stat_activity`. Engine-public so
680/// spg-server can construct rows without re-exporting internal
681/// dispatch types.
682#[derive(Debug, Clone)]
683pub struct ActivityRow {
684    pub pid: u32,
685    pub user: String,
686    pub started_at_us: i64,
687    pub current_sql: String,
688    pub wait_event: String,
689    pub elapsed_us: i64,
690    pub in_transaction: bool,
691}
692
693/// v6.5.2 — provider callback type. Fresh snapshot returned each
694/// call; engine doesn't cache the slice.
695pub type ActivityProvider = fn() -> Vec<ActivityRow>;
696
697/// v6.5.3 — one row of `spg_audit_chain`. Engine-public so
698/// spg-server can construct rows directly from `AuditEntry`.
699#[derive(Debug, Clone)]
700pub struct AuditRow {
701    pub seq: i64,
702    pub ts_ms: i64,
703    pub prev_hash_hex: String,
704    pub entry_hash_hex: String,
705    pub sql: String,
706}
707
708/// v6.5.3 — chain-table provider + verifier. spg-server registers
709/// fn pointers that snapshot / verify the audit log. `verify`
710/// returns `(verified_count, broken_at_seq)` — `broken_at_seq` is
711/// `-1` on a clean chain.
712pub type AuditChainProvider = fn() -> Vec<AuditRow>;
713pub type AuditVerifier = fn() -> (i64, i64);
714
715impl Engine {
716    pub fn new() -> Self {
717        Self {
718            catalog: Catalog::new(),
719            tx_catalogs: BTreeMap::new(),
720            current_tx: None,
721            next_tx_id: 1,
722            clock: None,
723            salt_fn: None,
724            max_query_rows: None,
725            users: UserStore::new(),
726            publications: publications::Publications::new(),
727            subscriptions: subscriptions::Subscriptions::new(),
728            statistics: statistics::Statistics::new(),
729            plan_cache: plan_cache::PlanCache::new(),
730            query_stats: query_stats::QueryStats::new(),
731            activity_provider: None,
732            audit_chain_provider: None,
733            audit_verifier: None,
734            slow_query_threshold_us: None,
735            slow_query_logger: None,
736        }
737    }
738
739    /// v7.11.0 — clone the engine's committed catalog + read-time
740    /// state into a frozen `CatalogSnapshot`. Cheap (`Catalog` is
741    /// backed by `PersistentVec`; cloning is O(log n) per table).
742    /// Subsequent writes to this engine are invisible to the
743    /// snapshot; the snapshot is self-contained and can be moved
744    /// to another thread for concurrent `execute_readonly_on_snapshot`
745    /// calls. The basis for [`AsyncReadHandle`] in spg-embedded-tokio
746    /// and any other read-fanout pattern.
747    #[must_use]
748    pub fn clone_snapshot(&self) -> CatalogSnapshot {
749        CatalogSnapshot {
750            catalog: self.active_catalog().clone(),
751            statistics: self.statistics.clone(),
752            clock: self.clock,
753            max_query_rows: self.max_query_rows,
754        }
755    }
756
757    /// v7.11.1 — execute a read-only SQL statement against a
758    /// `CatalogSnapshot` without touching this engine. Same
759    /// semantics as `execute_readonly` but parameterised on the
760    /// snapshot's catalog. Reject DDL/DML the same way
761    /// `execute_readonly` does. Static-on-Self so the caller can
762    /// dispatch without holding an `Engine` borrow alongside the
763    /// snapshot.
764    pub fn execute_readonly_on_snapshot(
765        snapshot: &CatalogSnapshot,
766        sql: &str,
767    ) -> Result<QueryResult, EngineError> {
768        Self::execute_readonly_on_snapshot_with_cancel(snapshot, sql, CancelToken::none())
769    }
770
771    /// v7.11.1 — `execute_readonly_on_snapshot` with cooperative
772    /// cancellation. Builds a transient `Engine` over the snapshot
773    /// state, runs `execute_readonly_with_cancel`, drops. The
774    /// transient engine is cheap to construct (no I/O; everything
775    /// is just struct moves) and lets the existing read path stay
776    /// untouched.
777    pub fn execute_readonly_on_snapshot_with_cancel(
778        snapshot: &CatalogSnapshot,
779        sql: &str,
780        cancel: CancelToken<'_>,
781    ) -> Result<QueryResult, EngineError> {
782        let transient = Engine {
783            catalog: snapshot.catalog.clone(),
784            statistics: snapshot.statistics.clone(),
785            clock: snapshot.clock,
786            max_query_rows: snapshot.max_query_rows,
787            ..Engine::default()
788        };
789        transient.execute_readonly_with_cancel(sql, cancel)
790    }
791
792    /// Construct an engine restored from a previously-snapshotted catalog
793    /// (see `snapshot()`).
794    pub fn restore(catalog: Catalog) -> Self {
795        Self {
796            catalog,
797            tx_catalogs: BTreeMap::new(),
798            current_tx: None,
799            next_tx_id: 1,
800            clock: None,
801            salt_fn: None,
802            max_query_rows: None,
803            users: UserStore::new(),
804            publications: publications::Publications::new(),
805            subscriptions: subscriptions::Subscriptions::new(),
806            statistics: statistics::Statistics::new(),
807            plan_cache: plan_cache::PlanCache::new(),
808            query_stats: query_stats::QueryStats::new(),
809            activity_provider: None,
810            audit_chain_provider: None,
811            audit_verifier: None,
812            slow_query_threshold_us: None,
813            slow_query_logger: None,
814        }
815    }
816
817    /// Restore an engine + user table from a v4.1 envelope produced
818    /// by `snapshot_with_users()`. Falls back to plain catalog-only
819    /// restore if the envelope magic isn't present (so v3.x snapshot
820    /// files still load). v6.1.2 adds the optional publications
821    /// trailer (envelope v3); a v1/v2 envelope deserialises to an
822    /// empty publication table.
823    pub fn restore_envelope(buf: &[u8]) -> Result<Self, EngineError> {
824        match split_envelope(buf) {
825            EnvelopeParse::Pair {
826                catalog: catalog_bytes,
827                users: user_bytes,
828                publications: pub_bytes,
829                subscriptions: sub_bytes,
830                statistics: stats_bytes,
831            } => {
832                let catalog = Catalog::deserialize(catalog_bytes).map_err(EngineError::Storage)?;
833                let users = users::deserialize_users(user_bytes)
834                    .map_err(|e| EngineError::Unsupported(alloc::format!("users restore: {e}")))?;
835                let publications = match pub_bytes {
836                    Some(b) => publications::Publications::deserialize(b).map_err(|e| {
837                        EngineError::Unsupported(alloc::format!("publications restore: {e:?}"))
838                    })?,
839                    None => publications::Publications::new(),
840                };
841                let subscriptions = match sub_bytes {
842                    Some(b) => subscriptions::Subscriptions::deserialize(b).map_err(|e| {
843                        EngineError::Unsupported(alloc::format!("subscriptions restore: {e:?}"))
844                    })?,
845                    None => subscriptions::Subscriptions::new(),
846                };
847                let statistics = match stats_bytes {
848                    Some(b) => statistics::Statistics::deserialize(b).map_err(|e| {
849                        EngineError::Unsupported(alloc::format!("statistics restore: {e:?}"))
850                    })?,
851                    None => statistics::Statistics::new(),
852                };
853                Ok(Self {
854                    catalog,
855                    tx_catalogs: BTreeMap::new(),
856                    current_tx: None,
857                    next_tx_id: 1,
858                    clock: None,
859                    salt_fn: None,
860                    max_query_rows: None,
861                    users,
862                    publications,
863                    subscriptions,
864                    statistics,
865                    plan_cache: plan_cache::PlanCache::new(),
866                    query_stats: query_stats::QueryStats::new(),
867                    activity_provider: None,
868                    audit_chain_provider: None,
869                    audit_verifier: None,
870                    slow_query_threshold_us: None,
871                    slow_query_logger: None,
872                })
873            }
874            EnvelopeParse::CrcMismatch { expected, computed } => {
875                Err(EngineError::Storage(StorageError::Corrupt(alloc::format!(
876                    "snapshot envelope CRC32 mismatch (expected={expected:#010x}, computed={computed:#010x})"
877                ))))
878            }
879            EnvelopeParse::Bare => {
880                let catalog = Catalog::deserialize(buf).map_err(EngineError::Storage)?;
881                Ok(Self::restore(catalog))
882            }
883        }
884    }
885
886    pub const fn users(&self) -> &UserStore {
887        &self.users
888    }
889
890    /// `salt` is supplied by the caller (the host has a random
891    /// source; the engine is `no_std`). Caller should pass a fresh
892    /// 16-byte random value per user.
893    pub fn create_user(
894        &mut self,
895        name: &str,
896        password: &str,
897        role: Role,
898        salt: [u8; 16],
899    ) -> Result<(), UserError> {
900        self.users.create(name, password, role, salt)?;
901        // v4.8: also derive SCRAM-SHA-256 secrets so PG-wire SASL
902        // auth can verify without re-running PBKDF2 per attempt.
903        // Uses a fresh salt from the host RNG (falls back to a
904        // deterministic per-username salt when no RNG is wired, same
905        // as the legacy hash path).
906        let scram_salt = self.salt_fn.map_or_else(
907            || {
908                let mut s = [0u8; users::SCRAM_SALT_LEN];
909                let digest = spg_crypto::hash(name.as_bytes());
910                // Use bytes 16..32 of BLAKE3 so we don't reuse the
911                // exact same fallback salt as the BLAKE3 hash path.
912                s.copy_from_slice(&digest[16..32]);
913                s
914            },
915            |f| f(),
916        );
917        self.users
918            .enable_scram(name, password, scram_salt, users::SCRAM_DEFAULT_ITERS)?;
919        Ok(())
920    }
921
922    pub fn drop_user(&mut self, name: &str) -> Result<(), UserError> {
923        self.users.drop(name)
924    }
925
926    pub fn verify_user(&self, name: &str, password: &str) -> Option<Role> {
927        self.users.verify(name, password)
928    }
929
930    /// Builder: attach a wall clock so `NOW()` / `CURRENT_TIMESTAMP` /
931    /// `CURRENT_DATE` evaluate to a real value instead of erroring out.
932    #[must_use]
933    pub const fn with_clock(mut self, clock: ClockFn) -> Self {
934        self.clock = Some(clock);
935        self
936    }
937
938    /// Builder: attach an OS-backed RNG for per-user password salts.
939    /// The host (`spg-server`) typically wires this to `/dev/urandom`.
940    #[must_use]
941    pub const fn with_salt_fn(mut self, f: SaltFn) -> Self {
942        self.salt_fn = Some(f);
943        self
944    }
945
946    /// Builder: cap the number of rows a single SELECT may return.
947    /// Exceeding the cap raises `EngineError::RowLimitExceeded` —
948    /// the bound is checked inside the executor so a runaway
949    /// catalog scan can't allocate millions of rows before the
950    /// server gets a chance to reject the result.
951    #[must_use]
952    pub const fn with_max_query_rows(mut self, n: usize) -> Self {
953        self.max_query_rows = Some(n);
954        self
955    }
956
957    /// The *committed* catalog. Note: during a transaction this returns the
958    /// pre-TX state — `SELECT` inside a TX goes through `execute()` and reads
959    /// the shadow. Tests that inspect outside-TX state should use this.
960    pub const fn catalog(&self) -> &Catalog {
961        &self.catalog
962    }
963
964    /// Serialize the *committed* catalog to bytes. v0.6 was full-snapshot; v0.9
965    /// adds the rule that an open TX's shadow is never snapshotted — only the
966    /// post-COMMIT state is persisted. v4.1 wraps the catalog in an envelope
967    /// when there are users to persist; an empty user table snapshots as the
968    /// bare catalog format (backwards-compat with v3.x readers). v6.1.2
969    /// adds publications to the envelope condition: either non-empty
970    /// users OR non-empty publications now triggers the envelope path.
971    pub fn snapshot(&self) -> Vec<u8> {
972        if self.users.is_empty()
973            && self.publications.is_empty()
974            && self.subscriptions.is_empty()
975            && self.statistics.is_empty()
976        {
977            self.catalog.serialize()
978        } else {
979            build_envelope(
980                &self.catalog.serialize(),
981                &users::serialize_users(&self.users),
982                &self.publications.serialize(),
983                &self.subscriptions.serialize(),
984                &self.statistics.serialize(),
985            )
986        }
987    }
988
989    /// True when at least one TX slot is in flight. v4.41.1 runtime
990    /// invariant: at most one slot active at a time (dispatch holds
991    /// `engine.write()` across the entire wrap). v4.42 will let this
992    /// return true with multiple slots concurrently.
993    pub fn in_transaction(&self) -> bool {
994        !self.tx_catalogs.is_empty()
995    }
996
997    /// v4.41.1 allocate a fresh TX handle. Used by spg-server dispatch
998    /// to scope each implicit-wrap BEGIN..stmt..COMMIT to its own slot
999    /// in `tx_catalogs`. v4.42 — the commit-barrier leader allocates
1000    /// one of these per task in its group, runs `BEGIN`+sql+`COMMIT`
1001    /// sequentially under a single `engine.write()` so each task's
1002    /// mutations accumulate into shared state, then either keeps the
1003    /// accumulated state (fsync OK) or restores the pre-image via
1004    /// `replace_catalog` (fsync err).
1005    pub fn alloc_tx_id(&mut self) -> TxId {
1006        let id = TxId(self.next_tx_id);
1007        self.next_tx_id = self.next_tx_id.saturating_add(1);
1008        id
1009    }
1010
1011    /// v4.42 — atomically replace the live catalog. Used by the
1012    /// commit-barrier leader to roll back a group whose batched
1013    /// fsync failed: the leader snapshots `engine.catalog().clone()`
1014    /// (O(1) Arc bump after the v4.39/v4.40 persistent migration)
1015    /// at group start, sequentially applies each task's BEGIN+sql+
1016    /// COMMIT under the same write lock to accumulate mutations
1017    /// into shared state, batches the WAL bytes, fsyncs once, and
1018    /// on failure calls this with the pre-image to undo every
1019    /// task in the group at once.
1020    ///
1021    /// **Does NOT touch `tx_catalogs` / `current_tx`.** Any
1022    /// explicit-TX slot from a concurrent client (created via the
1023    /// legacy `IMPLICIT_TX`-less dispatch path or via the future
1024    /// MVCC-readers v5+ work) has its own snapshot baked into the
1025    /// slot — restoring `self.catalog` to the pre-image leaves
1026    /// those slots untouched, exactly as they were when the leader
1027    /// took the lock. The leader's own implicit-TX slots are all
1028    /// already discarded (`exec_commit` removed them as each
1029    /// task's COMMIT ran) by the time this is reached.
1030    pub fn replace_catalog(&mut self, catalog: Catalog) {
1031        self.catalog = catalog;
1032    }
1033
1034    /// v6.7.0 — public shim around `Catalog::freeze_oldest_to_cold`
1035    /// so tests + the spg-server freezer can drive a freeze without
1036    /// reaching into the private `active_catalog_mut`. v6.7.4
1037    /// parallel freezer will build on this surface.
1038    ///
1039    /// Marks the table's cached `cold_row_count` stale because the
1040    /// freeze added cold locators that ANALYZE hasn't yet refreshed.
1041    pub fn freeze_oldest_to_cold(
1042        &mut self,
1043        table_name: &str,
1044        index_name: &str,
1045        max_rows: usize,
1046    ) -> Result<spg_storage::FreezeReport, EngineError> {
1047        let report = self
1048            .active_catalog_mut()
1049            .freeze_oldest_to_cold(table_name, index_name, max_rows)
1050            .map_err(EngineError::Storage)?;
1051        if let Some(t) = self.active_catalog_mut().get_mut(table_name) {
1052            t.mark_cold_row_count_stale();
1053        }
1054        Ok(report)
1055    }
1056
1057    /// v6.7.5 — public shim used by the spg-server follower's
1058    /// segment-forwarding receiver. Registers a cold-tier segment
1059    /// at a specific id (the master's id, as transmitted on the
1060    /// wire) so the follower's BTree-Cold locators stay byte-
1061    /// identical with the master's. Wraps
1062    /// `Catalog::load_segment_bytes_at` under the standard
1063    /// clone-mutate-replace pattern.
1064    ///
1065    /// Returns `Ok(())` on success **and** on the "slot already
1066    /// occupied" case — a follower mid-reconnect may receive a
1067    /// segment chunk for a segment_id it already has on disk
1068    /// (forwarded last session); the caller should treat that
1069    /// path as a no-op rather than a fatal error.
1070    pub fn receive_cold_segment(
1071        &mut self,
1072        segment_id: u32,
1073        bytes: Vec<u8>,
1074    ) -> Result<(), EngineError> {
1075        let mut new_cat = self.catalog.clone();
1076        match new_cat.load_segment_bytes_at(segment_id, bytes) {
1077            Ok(()) => {
1078                self.replace_catalog(new_cat);
1079                Ok(())
1080            }
1081            Err(StorageError::Corrupt(msg)) if msg.contains("already occupied") => Ok(()),
1082            Err(e) => Err(EngineError::Storage(e)),
1083        }
1084    }
1085
1086    /// v6.7.3 — public shim around `Catalog::compact_cold_segments`
1087    /// driving every BTree index on every user table. Returns one
1088    /// `(table, index, report)` triple for each merge that
1089    /// actually happened (no-op (table, index) pairs are filtered
1090    /// out so callers can size persist-side work to the live
1091    /// merges). Caller is responsible for persisting each
1092    /// `report.merged_segment_bytes` and updating the on-disk
1093    /// segment registry; engine layer is no_std and never
1094    /// touches disk.
1095    ///
1096    /// Marks every touched table's cached `cold_row_count` stale
1097    /// — compaction GC'd some shadowed rows, so the count must be
1098    /// re-derived on the next ANALYZE.
1099    pub fn compact_cold_segments_with_target(
1100        &mut self,
1101        target_segment_bytes: u64,
1102    ) -> Result<Vec<(String, String, CompactReport)>, EngineError> {
1103        let table_names = self.active_catalog().table_names();
1104        let mut reports: Vec<(String, String, CompactReport)> = Vec::new();
1105        for tname in table_names {
1106            if is_internal_table_name(&tname) {
1107                continue;
1108            }
1109            let idx_names: Vec<String> = {
1110                let Some(t) = self.active_catalog().get(&tname) else {
1111                    continue;
1112                };
1113                t.indices()
1114                    .iter()
1115                    .filter(|i| matches!(i.kind, IndexKind::BTree(_)))
1116                    .map(|i| i.name.clone())
1117                    .collect()
1118            };
1119            for iname in idx_names {
1120                let report = self
1121                    .active_catalog_mut()
1122                    .compact_cold_segments(&tname, &iname, target_segment_bytes)
1123                    .map_err(EngineError::Storage)?;
1124                if report.merged_segment_id.is_some() {
1125                    if let Some(t) = self.active_catalog_mut().get_mut(&tname) {
1126                        t.mark_cold_row_count_stale();
1127                    }
1128                    reports.push((tname.clone(), iname, report));
1129                }
1130            }
1131        }
1132        Ok(reports)
1133    }
1134
1135    fn active_catalog(&self) -> &Catalog {
1136        match self.current_tx {
1137            Some(t) => self
1138                .tx_catalogs
1139                .get(&t)
1140                .map_or(&self.catalog, |s| &s.catalog),
1141            None => &self.catalog,
1142        }
1143    }
1144
1145    fn active_catalog_mut(&mut self) -> &mut Catalog {
1146        let tx = self.current_tx;
1147        match tx {
1148            Some(t) => match self.tx_catalogs.get_mut(&t) {
1149                Some(s) => &mut s.catalog,
1150                None => &mut self.catalog,
1151            },
1152            None => &mut self.catalog,
1153        }
1154    }
1155
1156    /// Read-only execute path. Succeeds for `SELECT` / `SHOW TABLES`
1157    /// / `SHOW COLUMNS`; returns `EngineError::WriteRequired` for
1158    /// every other statement, so the caller can fall through to the
1159    /// `&mut self` `execute` path under a write lock. Engine state is
1160    /// not mutated even on the success path (`rewrite_clock_calls`
1161    /// and `resolve_order_by_position` both mutate the locally-owned
1162    /// AST, not `self`).
1163    ///
1164    /// **v4.0 concurrency**: this is the entry point the server takes
1165    /// under an `RwLock::read()` so multiple `SELECT` clients run in
1166    /// parallel without serialising on a single mutex.
1167    pub fn execute_readonly(&self, sql: &str) -> Result<QueryResult, EngineError> {
1168        self.execute_readonly_with_cancel(sql, CancelToken::none())
1169    }
1170
1171    /// v4.5 — read path with cooperative cancellation. Token's
1172    /// `is_cancelled` is checked at the start (so a watchdog that
1173    /// already fired returns Cancelled immediately) and at row-loop
1174    /// checkpoints inside `exec_select`. SHOW paths are O(small) and
1175    /// don't bother checking.
1176    pub fn execute_readonly_with_cancel(
1177        &self,
1178        sql: &str,
1179        cancel: CancelToken<'_>,
1180    ) -> Result<QueryResult, EngineError> {
1181        cancel.check()?;
1182        let mut stmt = parser::parse_statement(sql)?;
1183        let now_micros = self.clock.map(|f| f());
1184        rewrite_clock_calls(&mut stmt, now_micros);
1185        if let Statement::Select(s) = &mut stmt {
1186            resolve_order_by_position(s);
1187            // v6.2.3 — cost-based JOIN reorder (read path).
1188            reorder::reorder_joins(s, &self.catalog, &self.statistics);
1189        }
1190        let result = match stmt {
1191            Statement::Select(s) => self.exec_select_cancel(&s, cancel),
1192            Statement::ShowTables => Ok(self.exec_show_tables()),
1193            Statement::ShowColumns(table) => self.exec_show_columns(&table),
1194            Statement::ShowUsers => Ok(self.exec_show_users()),
1195            Statement::ShowPublications => Ok(self.exec_show_publications()),
1196            Statement::ShowSubscriptions => Ok(self.exec_show_subscriptions()),
1197            Statement::WaitForWalPosition { .. } => Err(EngineError::Unsupported(
1198                "WAIT FOR WAL POSITION must be handled by the server layer".into(),
1199            )),
1200            Statement::Explain(e) => self.exec_explain(&e, cancel),
1201            _ => Err(EngineError::WriteRequired),
1202        };
1203        self.enforce_row_limit(result)
1204    }
1205
1206    /// v4.2: cap result-set size. Applied after the executor
1207    /// materialises rows but before they leave the engine — wrapping
1208    /// every Rows-returning exec_* function would scatter the check.
1209    fn enforce_row_limit(
1210        &self,
1211        result: Result<QueryResult, EngineError>,
1212    ) -> Result<QueryResult, EngineError> {
1213        if let (Ok(QueryResult::Rows { rows, .. }), Some(cap)) = (&result, self.max_query_rows)
1214            && rows.len() > cap
1215        {
1216            return Err(EngineError::RowLimitExceeded(cap));
1217        }
1218        result
1219    }
1220
1221    pub fn execute(&mut self, sql: &str) -> Result<QueryResult, EngineError> {
1222        self.execute_in_with_cancel(sql, IMPLICIT_TX, CancelToken::none())
1223    }
1224
1225    /// v4.5 — write path with cooperative cancellation. Same dispatch
1226    /// as `execute_in_with_cancel(sql, IMPLICIT_TX, cancel)`. Kept as
1227    /// a separate entry point for backward-compat with the v4.5
1228    /// public API.
1229    pub fn execute_with_cancel(
1230        &mut self,
1231        sql: &str,
1232        cancel: CancelToken<'_>,
1233    ) -> Result<QueryResult, EngineError> {
1234        self.execute_in_with_cancel(sql, IMPLICIT_TX, cancel)
1235    }
1236
1237    /// v4.41.1 multi-slot write entry. Routes `sql` through the TX
1238    /// slot identified by `tx_id` so spg-server dispatch can scope
1239    /// each implicit-wrap BEGIN..stmt..COMMIT to its own slot in
1240    /// `tx_catalogs`. `IMPLICIT_TX` is the legacy single-slot path
1241    /// every other caller (engine self-tests, replay, spg-embedded)
1242    /// implicitly takes via `execute()` / `execute_with_cancel()`.
1243    pub fn execute_in(&mut self, sql: &str, tx_id: TxId) -> Result<QueryResult, EngineError> {
1244        self.execute_in_with_cancel(sql, tx_id, CancelToken::none())
1245    }
1246
1247    /// v4.41.1 write path with cooperative cancellation + explicit TX
1248    /// scope. Sets `self.current_tx` for the duration of the call so
1249    /// every `exec_*` helper transparently sees its TX's shadow
1250    /// catalog and savepoint stack; restores on exit so the field is
1251    /// only valid mid-call (no leakage across calls).
1252    pub fn execute_in_with_cancel(
1253        &mut self,
1254        sql: &str,
1255        tx_id: TxId,
1256        cancel: CancelToken<'_>,
1257    ) -> Result<QueryResult, EngineError> {
1258        let saved = self.current_tx;
1259        self.current_tx = Some(tx_id);
1260        let result = self.execute_inner_with_cancel(sql, cancel);
1261        self.current_tx = saved;
1262        result
1263    }
1264
1265    /// v6.1.1 — parse and pre-process a SQL string ONCE so the
1266    /// resulting [`Statement`] can be cached and re-executed via
1267    /// [`Engine::execute_prepared`]. Returns the same `Statement`
1268    /// the simple-query path would synthesise internally (clock
1269    /// rewrites + ORDER BY position-ref resolution applied at
1270    /// prepare time, since both are session-independent). The
1271    /// `$N` placeholders in the SQL stay as `Expr::Placeholder(n)`
1272    /// nodes; they're resolved to concrete values per-call by
1273    /// `execute_prepared`'s substitution walk.
1274    ///
1275    /// Pgwire's `Parse` (P) message lands here.
1276    pub fn prepare(&self, sql: &str) -> Result<Statement, ParseError> {
1277        let mut stmt = parser::parse_statement(sql)?;
1278        let now_micros = self.clock.map(|f| f());
1279        rewrite_clock_calls(&mut stmt, now_micros);
1280        if let Statement::Select(s) = &mut stmt {
1281            // v6.4.1 — expand `GROUP BY ALL` to every non-aggregate
1282            // SELECT-list item BEFORE position / alias resolution so
1283            // downstream passes see the explicit list.
1284            expand_group_by_all(s);
1285            resolve_order_by_position(s);
1286            // v6.2.3 — cost-based JOIN reorder. No-op for
1287            // single-table FROMs or any non-INNER join shape.
1288            reorder::reorder_joins(s, &self.catalog, &self.statistics);
1289        }
1290        Ok(stmt)
1291    }
1292
1293    /// v6.3.0 — cached prepare. Returns a cloned `Statement` from
1294    /// the plan cache on hit, runs the full `prepare()` path on miss
1295    /// and inserts the resulting plan before returning. Skipping the
1296    /// parse + JOIN-reorder pipeline on hit is the dominant win for
1297    /// JDBC / sqlx / pgx clients that reuse the same SQL string.
1298    ///
1299    /// Returns a cloned `Statement` (not a borrow) because the
1300    /// pgwire layer owns its `PreparedStmt` map per-session and the
1301    /// engine-level cache must stay available for other sessions.
1302    /// Clone cost on a 5-table JOIN AST is well under the parse cost
1303    /// it replaces.
1304    pub fn prepare_cached(&mut self, sql: &str) -> Result<Statement, ParseError> {
1305        // v6.3.1 — version-aware lookup. If the cached plan was
1306        // prepared before the most recent ANALYZE, evict and replan.
1307        let current_version = self.statistics.version();
1308        if let Some(plan) = self.plan_cache.get(sql) {
1309            if plan.statistics_version == current_version {
1310                return Ok(plan.stmt.clone());
1311            }
1312            // Stale entry — fall through to evict + re-prepare.
1313        }
1314        self.plan_cache.evict(sql);
1315        let stmt = self.prepare(sql)?;
1316        let source_tables = plan_cache::collect_source_tables(&stmt);
1317        let plan = plan_cache::PreparedPlan {
1318            stmt: stmt.clone(),
1319            statistics_version: current_version,
1320            source_tables,
1321            describe_columns: alloc::vec::Vec::new(),
1322        };
1323        self.plan_cache.insert(String::from(sql), plan);
1324        Ok(stmt)
1325    }
1326
1327    /// v6.3.0 — read-only accessor for tests and v6.3.1 invalidation.
1328    pub fn plan_cache(&self) -> &plan_cache::PlanCache {
1329        &self.plan_cache
1330    }
1331
1332    /// v6.3.0 — mutable accessor for v6.3.1 invalidation hooks.
1333    pub fn plan_cache_mut(&mut self) -> &mut plan_cache::PlanCache {
1334        &mut self.plan_cache
1335    }
1336
1337    /// v6.3.3 — Describe a prepared `Statement` without executing.
1338    /// Returns `(parameter_oids, output_columns)`. Empty
1339    /// `output_columns` means the statement has no row-producing
1340    /// shape we could resolve here (JOIN, subquery, non-SELECT, …)
1341    /// — pgwire layer maps that to a `NoData` reply.
1342    pub fn describe_prepared(
1343        &self,
1344        stmt: &Statement,
1345    ) -> (Vec<u32>, Vec<ColumnSchema>) {
1346        describe::describe_prepared(stmt, self.active_catalog())
1347    }
1348
1349    /// v6.1.1 — execute a [`Statement`] previously returned by
1350    /// [`Engine::prepare`], substituting `Expr::Placeholder(n)`
1351    /// nodes for the corresponding [`Value`] in `params` (1-based
1352    /// per PG: `$1` → `params[0]`). Bind-time string parameters
1353    /// are decoded into typed `Value`s by the pgwire layer before
1354    /// this call so the resulting AST hits the same execution
1355    /// path as a simple query — no SQL re-parse.
1356    ///
1357    /// Pgwire's `Execute` (E) message after a `Bind` (B) lands here.
1358    pub fn execute_prepared(
1359        &mut self,
1360        mut stmt: Statement,
1361        params: &[Value],
1362    ) -> Result<QueryResult, EngineError> {
1363        substitute_placeholders(&mut stmt, params)?;
1364        self.execute_stmt_with_cancel(stmt, CancelToken::none())
1365    }
1366
1367    fn execute_inner_with_cancel(
1368        &mut self,
1369        sql: &str,
1370        cancel: CancelToken<'_>,
1371    ) -> Result<QueryResult, EngineError> {
1372        cancel.check()?;
1373        let stmt = self.prepare(sql)?;
1374        // v6.5.1 — wrap the executor with a wall-clock window so we
1375        // can record into spg_stat_query. Skip when the engine has
1376        // no clock attached (no_std embedded callers).
1377        let start_us = self.clock.map(|f| f());
1378        let result = self.execute_stmt_with_cancel(stmt, cancel);
1379        if let (Some(t0), Ok(_)) = (start_us, &result) {
1380            let now = self.clock.map_or(t0, |f| f());
1381            let elapsed = now.saturating_sub(t0).max(0) as u64;
1382            self.query_stats.record(sql, elapsed, now as u64);
1383            // v6.5.6 — slow-query log: fire callback when elapsed
1384            // exceeds the configured floor.
1385            if let (Some(threshold), Some(logger)) =
1386                (self.slow_query_threshold_us, self.slow_query_logger)
1387                && elapsed >= threshold
1388            {
1389                logger(sql, elapsed);
1390            }
1391        }
1392        result
1393    }
1394
1395    fn execute_stmt_with_cancel(
1396        &mut self,
1397        stmt: Statement,
1398        cancel: CancelToken<'_>,
1399    ) -> Result<QueryResult, EngineError> {
1400        cancel.check()?;
1401        let result = match stmt {
1402            Statement::CreateTable(s) => self.exec_create_table(s),
1403            // v7.9.15 — CREATE EXTENSION is a no-op on SPG. Returns
1404            // CommandOk with affected=0; modified_catalog=false so
1405            // the WAL doesn't grow a useless entry. mailrs F3.
1406            Statement::CreateExtension(_) => Ok(QueryResult::CommandOk {
1407                affected: 0,
1408                modified_catalog: false,
1409            }),
1410            // v7.9.27 — DO $$ ... $$ is also a no-op (SPG has no
1411            // PL/pgSQL). mailrs H1 + pg_dump compat.
1412            Statement::DoBlock => Ok(QueryResult::CommandOk {
1413                affected: 0,
1414                modified_catalog: false,
1415            }),
1416            Statement::CreateIndex(s) => self.exec_create_index(s),
1417            Statement::Insert(s) => self.exec_insert(s),
1418            Statement::Update(s) => self.exec_update_cancel(&s, cancel),
1419            Statement::Delete(s) => self.exec_delete_cancel(&s, cancel),
1420            Statement::Select(s) => self.exec_select_cancel(&s, cancel),
1421            Statement::Begin => self.exec_begin(),
1422            Statement::Commit => self.exec_commit(),
1423            Statement::Rollback => self.exec_rollback(),
1424            Statement::Savepoint(name) => self.exec_savepoint(name),
1425            Statement::RollbackToSavepoint(name) => self.exec_rollback_to_savepoint(&name),
1426            Statement::ReleaseSavepoint(name) => self.exec_release_savepoint(&name),
1427            Statement::ShowTables => Ok(self.exec_show_tables()),
1428            Statement::ShowColumns(table) => self.exec_show_columns(&table),
1429            Statement::ShowUsers => Ok(self.exec_show_users()),
1430            Statement::ShowPublications => Ok(self.exec_show_publications()),
1431            Statement::ShowSubscriptions => Ok(self.exec_show_subscriptions()),
1432            Statement::CreateUser(s) => self.exec_create_user(&s),
1433            Statement::DropUser(name) => self.exec_drop_user(&name),
1434            Statement::Explain(e) => self.exec_explain(&e, cancel),
1435            Statement::AlterIndex(s) => self.exec_alter_index(s),
1436            Statement::AlterTable(s) => self.exec_alter_table(s),
1437            Statement::CreatePublication(s) => self.exec_create_publication(s),
1438            Statement::DropPublication(name) => self.exec_drop_publication(&name),
1439            Statement::CreateSubscription(s) => self.exec_create_subscription(s),
1440            Statement::DropSubscription(name) => self.exec_drop_subscription(&name),
1441            // v6.1.7 — WAIT FOR WAL POSITION needs `lag_state`,
1442            // which lives in spg-server's ServerState. The engine
1443            // surfaces a clear error; the server-layer dispatch
1444            // intercepts the SQL before it reaches the engine on
1445            // a server build, so this arm only fires for
1446            // engine-only callers (spg-embedded, lib tests).
1447            Statement::WaitForWalPosition { .. } => Err(EngineError::Unsupported(
1448                "WAIT FOR WAL POSITION must be handled by the server layer".into(),
1449            )),
1450            // v6.2.0 — ANALYZE recomputes per-column histograms.
1451            Statement::Analyze(target) => self.exec_analyze(target.as_deref()),
1452            // v6.7.3 — COMPACT COLD SEGMENTS.
1453            Statement::CompactColdSegments => self.exec_compact_cold_segments(),
1454        };
1455        self.enforce_row_limit(result)
1456    }
1457
1458    /// v6.1.2 — `CREATE PUBLICATION` runtime path. Duplicate names
1459    /// surface as `EngineError::Unsupported` so the existing PG-wire
1460    /// error mapping stays uniform; the message carries the name so
1461    /// operators can grep replication-log noise. Inside-transaction
1462    /// invocation is rejected (matches `CREATE USER` / `DROP USER`
1463    /// stance) — replication-catalog mutation is a connection-level
1464    /// administrative op, not a transactional one.
1465    fn exec_create_publication(
1466        &mut self,
1467        s: CreatePublicationStatement,
1468    ) -> Result<QueryResult, EngineError> {
1469        // v6.1.4 — the v6.1.2 "no DDL inside a transaction" guard
1470        // was over-cautious: it also blocked the auto-commit wrap
1471        // path (which begins an internal TX around every WAL-
1472        // logged statement). PG itself allows CREATE PUBLICATION
1473        // inside a transaction (it rolls back with the TX).
1474        self.publications
1475            .create(s.name, s.scope)
1476            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE PUBLICATION: {e:?}")))?;
1477        Ok(QueryResult::CommandOk {
1478            affected: 1,
1479            modified_catalog: true,
1480        })
1481    }
1482
1483    /// v6.1.2 — `DROP PUBLICATION` runtime path. PG-compatible silent
1484    /// no-op when the publication doesn't exist (returns `affected=0`
1485    /// in that case so the wire-level command tag distinguishes
1486    /// "dropped" from "no-op", though both succeed).
1487    fn exec_drop_publication(&mut self, name: &str) -> Result<QueryResult, EngineError> {
1488        let removed = self.publications.drop(name);
1489        Ok(QueryResult::CommandOk {
1490            affected: usize::from(removed),
1491            modified_catalog: removed,
1492        })
1493    }
1494
1495    /// v6.1.2 — read access to the publication catalog. Used by
1496    /// the v6.1.5 publisher-side WAL filter, by `SHOW PUBLICATIONS`
1497    /// (v6.1.3+), and by e2e tests that need to assert state without
1498    /// going through the wire.
1499    pub const fn publications(&self) -> &publications::Publications {
1500        &self.publications
1501    }
1502
1503    /// v6.1.4 — `CREATE SUBSCRIPTION` runtime path. Defaults
1504    /// `enabled = true` and `last_received_pos = 0` for a freshly-
1505    /// created subscription. The actual worker thread is spawned
1506    /// by spg-server once the engine returns success.
1507    fn exec_create_subscription(
1508        &mut self,
1509        s: CreateSubscriptionStatement,
1510    ) -> Result<QueryResult, EngineError> {
1511        // See exec_create_publication — the in_transaction gate
1512        // was over-cautious; the auto-commit wrap path holds an
1513        // internal TX that this check was incorrectly blocking.
1514        let sub = subscriptions::Subscription {
1515            conn_str: s.conn_str,
1516            publications: s.publications,
1517            enabled: true,
1518            last_received_pos: 0,
1519        };
1520        self.subscriptions
1521            .create(s.name, sub)
1522            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE SUBSCRIPTION: {e:?}")))?;
1523        Ok(QueryResult::CommandOk {
1524            affected: 1,
1525            modified_catalog: true,
1526        })
1527    }
1528
1529    /// v6.1.4 — `DROP SUBSCRIPTION`. Silent no-op when the name
1530    /// doesn't exist (PG-compatible). The associated worker is
1531    /// torn down by spg-server when it observes the catalog
1532    /// change at the next snapshot or via the engine's
1533    /// subscriptions accessor (the worker polls the catalog on
1534    /// reconnect; v6.1.5's filter-side will tighten this to an
1535    /// explicit signal).
1536    fn exec_drop_subscription(&mut self, name: &str) -> Result<QueryResult, EngineError> {
1537        let removed = self.subscriptions.drop(name);
1538        Ok(QueryResult::CommandOk {
1539            affected: usize::from(removed),
1540            modified_catalog: removed,
1541        })
1542    }
1543
1544    /// v6.1.4 — read access to the subscription catalog. Used by
1545    /// the subscription worker (read its own row to find its
1546    /// publications + last applied position), by SHOW SUBSCRIPTIONS,
1547    /// and by e2e tests asserting state directly.
1548    pub const fn subscriptions(&self) -> &subscriptions::Subscriptions {
1549        &self.subscriptions
1550    }
1551
1552    /// v6.1.4 — write access to `last_received_pos`. Worker
1553    /// calls this after each apply batch (under the engine's
1554    /// write-lock). Returns `false` when the subscription was
1555    /// dropped between when the worker received the record and
1556    /// when this call landed.
1557    pub fn subscription_advance(&mut self, name: &str, pos: u64) -> bool {
1558        self.subscriptions.update_last_received_pos(name, pos)
1559    }
1560
1561    /// v6.1.4 — `SHOW SUBSCRIPTIONS` row materialisation. Returns
1562    /// `(name, conn_str, publications, enabled, last_received_pos)`
1563    /// ordered by subscription name. The `publications` column is
1564    /// the comma-joined list ("p1, p2") for ergonomic SHOW output;
1565    /// callers wanting structured access read `Engine::subscriptions`.
1566    fn exec_show_subscriptions(&self) -> QueryResult {
1567        let columns = alloc::vec![
1568            ColumnSchema::new("name", DataType::Text, false),
1569            ColumnSchema::new("conn_str", DataType::Text, false),
1570            ColumnSchema::new("publications", DataType::Text, false),
1571            ColumnSchema::new("enabled", DataType::Bool, false),
1572            ColumnSchema::new("last_received_pos", DataType::BigInt, false),
1573        ];
1574        let rows: Vec<Row> = self
1575            .subscriptions
1576            .iter()
1577            .map(|(name, sub)| {
1578                Row::new(alloc::vec![
1579                    Value::Text(name.clone()),
1580                    Value::Text(sub.conn_str.clone()),
1581                    Value::Text(sub.publications.join(", ")),
1582                    Value::Bool(sub.enabled),
1583                    Value::BigInt(i64::try_from(sub.last_received_pos).unwrap_or(i64::MAX)),
1584                ])
1585            })
1586            .collect();
1587        QueryResult::Rows { columns, rows }
1588    }
1589
1590    /// v6.2.0 — materialise `spg_statistic` rows. One row per
1591    /// `(table, column)` pair tracked in `Statistics`, with
1592    /// `histogram_bounds` rendered as a `[v0, v1, ...]` string —
1593    /// the same canonical form vector literals use for round-trip.
1594    fn exec_spg_statistic(&self) -> QueryResult {
1595        let columns = alloc::vec![
1596            ColumnSchema::new("table_name", DataType::Text, false),
1597            ColumnSchema::new("column_name", DataType::Text, false),
1598            ColumnSchema::new("null_frac", DataType::Float, false),
1599            ColumnSchema::new("n_distinct", DataType::BigInt, false),
1600            ColumnSchema::new("histogram_bounds", DataType::Text, false),
1601            // v6.7.0 — appended column (v6.2.0 stability contract
1602            // allows APPEND to spg_statistic, not reorder/rename).
1603            // Reports the cached per-table cold-row count; same
1604            // value across every column row of the same table.
1605            ColumnSchema::new("cold_row_count", DataType::BigInt, false),
1606        ];
1607        let rows: Vec<Row> = self
1608            .statistics
1609            .iter()
1610            .map(|((t, c), s)| {
1611                let cold = self
1612                    .catalog
1613                    .get(t)
1614                    .map_or(0, |table| table.cold_row_count());
1615                Row::new(alloc::vec![
1616                    Value::Text(t.clone()),
1617                    Value::Text(c.clone()),
1618                    Value::Float(f64::from(s.null_frac)),
1619                    Value::BigInt(i64::try_from(s.n_distinct).unwrap_or(i64::MAX)),
1620                    Value::Text(render_histogram_bounds(&s.histogram_bounds)),
1621                    Value::BigInt(i64::try_from(cold).unwrap_or(i64::MAX)),
1622                ])
1623            })
1624            .collect();
1625        QueryResult::Rows { columns, rows }
1626    }
1627
1628    /// v6.5.0 — materialise `spg_stat_replication` rows. One row
1629    /// per subscription with `(name, conn_str, publications,
1630    /// last_received_pos, enabled)`. Surface mirrors
1631    /// `SHOW SUBSCRIPTIONS` but follows the virtual-table dispatch
1632    /// shape so it composes with SELECT clauses (WHERE, projection
1633    /// onto specific columns, etc).
1634    fn exec_spg_stat_replication(&self) -> QueryResult {
1635        let columns = alloc::vec![
1636            ColumnSchema::new("name", DataType::Text, false),
1637            ColumnSchema::new("conn_str", DataType::Text, false),
1638            ColumnSchema::new("publications", DataType::Text, false),
1639            ColumnSchema::new("last_received_pos", DataType::BigInt, false),
1640            ColumnSchema::new("enabled", DataType::Bool, false),
1641        ];
1642        let rows: Vec<Row> = self
1643            .subscriptions
1644            .iter()
1645            .map(|(name, sub)| {
1646                Row::new(alloc::vec![
1647                    Value::Text(name.clone()),
1648                    Value::Text(sub.conn_str.clone()),
1649                    Value::Text(sub.publications.join(",")),
1650                    Value::BigInt(i64::try_from(sub.last_received_pos).unwrap_or(i64::MAX)),
1651                    Value::Bool(sub.enabled),
1652                ])
1653            })
1654            .collect();
1655        QueryResult::Rows { columns, rows }
1656    }
1657
1658    /// v6.5.0 — materialise `spg_stat_segment` rows. One row per
1659    /// cold-tier segment with `(segment_id, num_rows, num_pages,
1660    /// total_bytes)`.
1661    ///
1662    /// v6.7.0 — appended `table_name` column resolves the v6.5.0
1663    /// carve-out. Walks every user table's BTree indices to find
1664    /// which table's Cold locators point at each segment. Empty
1665    /// string for orphan segments (loaded via SPG_PRELOAD_COLD_SEGMENT
1666    /// before any index registered a locator). The walk is
1667    /// O(tables × indices × keys); cached per call, not across
1668    /// calls — re-walked on every `SELECT * FROM spg_stat_segment`.
1669    fn exec_spg_stat_segment(&self) -> QueryResult {
1670        let columns = alloc::vec![
1671            ColumnSchema::new("segment_id", DataType::BigInt, false),
1672            ColumnSchema::new("table_name", DataType::Text, false),
1673            ColumnSchema::new("num_rows", DataType::BigInt, false),
1674            ColumnSchema::new("num_pages", DataType::BigInt, false),
1675            ColumnSchema::new("total_bytes", DataType::BigInt, false),
1676        ];
1677        // v6.7.0 — build a segment_id → table_name map by walking
1678        // every user table's BTree indices once. O(tables × indices
1679        // × keys) for the v6.5.0 carve-out resolution; acceptable
1680        // because spg_stat_segment is operator-facing (not on a
1681        // hot-loop path).
1682        let mut segment_owners: alloc::collections::BTreeMap<u32, String> = BTreeMap::new();
1683        for tname in self.catalog.table_names() {
1684            if is_internal_table_name(&tname) {
1685                continue;
1686            }
1687            let Some(t) = self.catalog.get(&tname) else {
1688                continue;
1689            };
1690            for idx in t.indices() {
1691                if let spg_storage::IndexKind::BTree(map) = &idx.kind {
1692                    for (_, locs) in map.iter() {
1693                        for loc in locs {
1694                            if let spg_storage::RowLocator::Cold { segment_id, .. } = loc {
1695                                segment_owners.entry(*segment_id).or_insert_with(|| tname.clone());
1696                            }
1697                        }
1698                    }
1699                }
1700            }
1701        }
1702        let rows: Vec<Row> = self
1703            .catalog
1704            .cold_segment_ids_global()
1705            .iter()
1706            .filter_map(|&id| {
1707                let seg = self.catalog.cold_segment(id)?;
1708                let meta = seg.meta();
1709                let owner = segment_owners
1710                    .get(&id)
1711                    .cloned()
1712                    .unwrap_or_default();
1713                Some(Row::new(alloc::vec![
1714                    Value::BigInt(i64::from(id)),
1715                    Value::Text(owner),
1716                    Value::BigInt(i64::try_from(meta.num_rows).unwrap_or(i64::MAX)),
1717                    Value::BigInt(i64::from(meta.num_pages)),
1718                    Value::BigInt(i64::try_from(meta.total_bytes).unwrap_or(i64::MAX)),
1719                ]))
1720            })
1721            .collect();
1722        QueryResult::Rows { columns, rows }
1723    }
1724
1725    /// v6.5.1 — materialise `spg_stat_query` rows. One row per
1726    /// distinct SQL text recorded since the engine booted, capped
1727    /// at `QUERY_STATS_MAX` (1024). Columns:
1728    ///   sql, exec_count, total_us, mean_us, max_us, last_seen_us
1729    /// mean_us = total_us / exec_count (saturating).
1730    fn exec_spg_stat_query(&self) -> QueryResult {
1731        let columns = alloc::vec![
1732            ColumnSchema::new("sql", DataType::Text, false),
1733            ColumnSchema::new("exec_count", DataType::BigInt, false),
1734            ColumnSchema::new("total_us", DataType::BigInt, false),
1735            ColumnSchema::new("mean_us", DataType::BigInt, false),
1736            ColumnSchema::new("max_us", DataType::BigInt, false),
1737            ColumnSchema::new("last_seen_us", DataType::BigInt, false),
1738        ];
1739        let rows: Vec<Row> = self
1740            .query_stats
1741            .snapshot()
1742            .into_iter()
1743            .map(|(sql, s)| {
1744                let mean = if s.exec_count == 0 {
1745                    0
1746                } else {
1747                    s.total_us / s.exec_count
1748                };
1749                Row::new(alloc::vec![
1750                    Value::Text(sql),
1751                    Value::BigInt(i64::try_from(s.exec_count).unwrap_or(i64::MAX)),
1752                    Value::BigInt(i64::try_from(s.total_us).unwrap_or(i64::MAX)),
1753                    Value::BigInt(i64::try_from(mean).unwrap_or(i64::MAX)),
1754                    Value::BigInt(i64::try_from(s.max_us).unwrap_or(i64::MAX)),
1755                    Value::BigInt(i64::try_from(s.last_seen_us).unwrap_or(i64::MAX)),
1756                ])
1757            })
1758            .collect();
1759        QueryResult::Rows { columns, rows }
1760    }
1761
1762    /// v6.5.2 — register a connection-state provider. spg-server
1763    /// calls this at startup with a function that snapshots its
1764    /// per-pgwire-connection registry. Engine reads through the
1765    /// callback on `SELECT * FROM spg_stat_activity`.
1766    #[must_use]
1767    pub const fn with_activity_provider(mut self, f: ActivityProvider) -> Self {
1768        self.activity_provider = Some(f);
1769        self
1770    }
1771
1772    /// v6.5.3 — register audit chain provider + verifier.
1773    #[must_use]
1774    pub const fn with_audit_providers(
1775        mut self,
1776        chain: AuditChainProvider,
1777        verify: AuditVerifier,
1778    ) -> Self {
1779        self.audit_chain_provider = Some(chain);
1780        self.audit_verifier = Some(verify);
1781        self
1782    }
1783
1784    /// v6.5.6 — register a slow-query log callback. `threshold_us`
1785    /// is the floor (in microseconds); only executes above the floor
1786    /// fire the callback. spg-server wires this from
1787    /// `SPG_SLOW_QUERY_THRESHOLD_MS` (default 100 ms).
1788    #[must_use]
1789    pub const fn with_slow_query_log(
1790        mut self,
1791        threshold_us: u64,
1792        logger: SlowQueryLogger,
1793    ) -> Self {
1794        self.slow_query_threshold_us = Some(threshold_us);
1795        self.slow_query_logger = Some(logger);
1796        self
1797    }
1798
1799    /// v6.5.6 — operator knob for plan cache cap. spg-server reads
1800    /// `SPG_PLAN_CACHE_MAX` env at startup; uses this to override
1801    /// the compile-time default of 256.
1802    pub fn set_plan_cache_max(&mut self, n: usize) {
1803        self.plan_cache.set_max_entries(n);
1804    }
1805
1806    /// v6.5.2 — materialise `spg_stat_activity` rows. Pulls a fresh
1807    /// snapshot from the registered `ActivityProvider`. Returns an
1808    /// empty result set when no provider is registered (the no_std
1809    /// embedded path with no pgwire layer).
1810    fn exec_spg_stat_activity(&self) -> QueryResult {
1811        let columns = alloc::vec![
1812            ColumnSchema::new("pid", DataType::Int, false),
1813            ColumnSchema::new("user", DataType::Text, false),
1814            ColumnSchema::new("started_at_us", DataType::BigInt, false),
1815            ColumnSchema::new("current_sql", DataType::Text, false),
1816            ColumnSchema::new("wait_event", DataType::Text, false),
1817            ColumnSchema::new("elapsed_us", DataType::BigInt, false),
1818            ColumnSchema::new("in_transaction", DataType::Bool, false),
1819        ];
1820        let rows: Vec<Row> = self
1821            .activity_provider
1822            .map(|f| f())
1823            .unwrap_or_default()
1824            .into_iter()
1825            .map(|r| {
1826                Row::new(alloc::vec![
1827                    Value::Int(i32::try_from(r.pid).unwrap_or(i32::MAX)),
1828                    Value::Text(r.user),
1829                    Value::BigInt(r.started_at_us),
1830                    Value::Text(r.current_sql),
1831                    Value::Text(r.wait_event),
1832                    Value::BigInt(r.elapsed_us),
1833                    Value::Bool(r.in_transaction),
1834                ])
1835            })
1836            .collect();
1837        QueryResult::Rows { columns, rows }
1838    }
1839
1840    /// v6.5.4 — materialise `spg_table_ddl` rows. One row per user
1841    /// table with `(table_name, ddl)`. Reconstructed from catalog
1842    /// state on demand.
1843    fn exec_spg_table_ddl(&self) -> QueryResult {
1844        let columns = alloc::vec![
1845            ColumnSchema::new("table_name", DataType::Text, false),
1846            ColumnSchema::new("ddl", DataType::Text, false),
1847        ];
1848        let rows: Vec<Row> = self
1849            .catalog
1850            .table_names()
1851            .into_iter()
1852            .filter(|n| !is_internal_table_name(n))
1853            .filter_map(|name| {
1854                let table = self.catalog.get(&name)?;
1855                let ddl = render_create_table(&name, &table.schema().columns);
1856                Some(Row::new(alloc::vec![
1857                    Value::Text(name),
1858                    Value::Text(ddl),
1859                ]))
1860            })
1861            .collect();
1862        QueryResult::Rows { columns, rows }
1863    }
1864
1865    /// v6.5.4 — materialise `spg_role_ddl` rows. One row per user
1866    /// with `(role_name, ddl)`. Password is redacted (matches the
1867    /// `Statement::CreateUser` Display which prints `'<redacted>'`).
1868    fn exec_spg_role_ddl(&self) -> QueryResult {
1869        let columns = alloc::vec![
1870            ColumnSchema::new("role_name", DataType::Text, false),
1871            ColumnSchema::new("ddl", DataType::Text, false),
1872        ];
1873        let rows: Vec<Row> = self
1874            .users
1875            .iter()
1876            .map(|(name, rec)| {
1877                let ddl = alloc::format!(
1878                    "CREATE USER {name} WITH PASSWORD '<redacted>' ROLE '{}'",
1879                    rec.role.as_str(),
1880                );
1881                Row::new(alloc::vec![Value::Text(String::from(name)), Value::Text(ddl)])
1882            })
1883            .collect();
1884        QueryResult::Rows { columns, rows }
1885    }
1886
1887    /// v6.5.4 — materialise `spg_database_ddl`: single row whose
1888    /// `ddl` column concatenates every user table's CREATE +
1889    /// every role's CREATE in deterministic catalog order. Suitable
1890    /// for piping back through `Engine::execute` to recreate a
1891    /// schema-equivalent database.
1892    fn exec_spg_database_ddl(&self) -> QueryResult {
1893        let columns = alloc::vec![ColumnSchema::new("ddl", DataType::Text, false)];
1894        let mut out = String::new();
1895        for (name, rec) in self.users.iter() {
1896            out.push_str(&alloc::format!(
1897                "CREATE USER {name} WITH PASSWORD '<redacted>' ROLE '{}';\n",
1898                rec.role.as_str(),
1899            ));
1900        }
1901        for name in self.catalog.table_names() {
1902            if is_internal_table_name(&name) {
1903                continue;
1904            }
1905            if let Some(table) = self.catalog.get(&name) {
1906                out.push_str(&render_create_table(&name, &table.schema().columns));
1907                out.push_str(";\n");
1908            }
1909        }
1910        QueryResult::Rows {
1911            columns,
1912            rows: alloc::vec![Row::new(alloc::vec![Value::Text(out)])],
1913        }
1914    }
1915
1916    /// v6.5.3 — materialise `spg_audit_chain` rows. Pulls a fresh
1917    /// snapshot from the registered provider; empty when no
1918    /// provider is set.
1919    fn exec_spg_audit_chain(&self) -> QueryResult {
1920        let columns = alloc::vec![
1921            ColumnSchema::new("seq", DataType::BigInt, false),
1922            ColumnSchema::new("ts_ms", DataType::BigInt, false),
1923            ColumnSchema::new("prev_hash", DataType::Text, false),
1924            ColumnSchema::new("entry_hash", DataType::Text, false),
1925            ColumnSchema::new("sql", DataType::Text, false),
1926        ];
1927        let rows: Vec<Row> = self
1928            .audit_chain_provider
1929            .map(|f| f())
1930            .unwrap_or_default()
1931            .into_iter()
1932            .map(|r| {
1933                Row::new(alloc::vec![
1934                    Value::BigInt(r.seq),
1935                    Value::BigInt(r.ts_ms),
1936                    Value::Text(r.prev_hash_hex),
1937                    Value::Text(r.entry_hash_hex),
1938                    Value::Text(r.sql),
1939                ])
1940            })
1941            .collect();
1942        QueryResult::Rows { columns, rows }
1943    }
1944
1945    /// v6.5.3 — materialise `spg_audit_verify` single-row result.
1946    /// `(verified_count, broken_at_seq)` — broken_at_seq is `-1`
1947    /// on a clean chain. Returns one row with both values 0 when
1948    /// no verifier is registered (no-data fallback for embedded
1949    /// callers).
1950    fn exec_spg_audit_verify(&self) -> QueryResult {
1951        let columns = alloc::vec![
1952            ColumnSchema::new("verified_count", DataType::BigInt, false),
1953            ColumnSchema::new("broken_at_seq", DataType::BigInt, false),
1954        ];
1955        let (verified, broken) = self.audit_verifier.map(|f| f()).unwrap_or((0, -1));
1956        let row = Row::new(alloc::vec![
1957            Value::BigInt(verified),
1958            Value::BigInt(broken),
1959        ]);
1960        QueryResult::Rows {
1961            columns,
1962            rows: alloc::vec![row],
1963        }
1964    }
1965
1966    /// v6.5.1 — read-only accessor for tests + v6.5.6 ops resets.
1967    pub fn query_stats(&self) -> &query_stats::QueryStats {
1968        &self.query_stats
1969    }
1970
1971    /// v6.5.1 — mutable accessor (clear, etc).
1972    pub fn query_stats_mut(&mut self) -> &mut query_stats::QueryStats {
1973        &mut self.query_stats
1974    }
1975
1976    /// v6.2.0 — read access to the per-column statistics table.
1977    /// Used by the planner (v6.2.2 selectivity functions read this),
1978    /// by `SELECT * FROM spg_statistic`, and by e2e tests.
1979    pub const fn statistics(&self) -> &statistics::Statistics {
1980        &self.statistics
1981    }
1982
1983    /// v6.2.1 — return tables whose modified-row count crossed the
1984    /// auto-analyze threshold since the last ANALYZE on that table.
1985    /// The threshold is `0.1 × max(row_count, MIN_ROWS_FOR_AUTO_
1986    /// ANALYZE)` — combines PG-style fractional + absolute lower
1987    /// bound so a fresh / tiny table doesn't get hammered on every
1988    /// INSERT.
1989    ///
1990    /// Designed to be cheap: walks every user table's
1991    /// `Catalog::table_names()` + reads `statistics::modified_
1992    /// since_last_analyze()` (BTreeMap lookup). The background
1993    /// worker calls this under `engine.read()` then drops the lock
1994    /// before re-acquiring `engine.write()` for the actual ANALYZE.
1995    pub fn tables_needing_analyze(&self) -> Vec<String> {
1996        const MIN_ROWS: u64 = 100;
1997        let mut out = Vec::new();
1998        for name in self.catalog.table_names() {
1999            if is_internal_table_name(&name) {
2000                continue;
2001            }
2002            let Some(table) = self.catalog.get(&name) else {
2003                continue;
2004            };
2005            let row_count = table.rows().len() as u64;
2006            let modified = self.statistics.modified_since_last_analyze(&name);
2007            // Threshold: ceil(0.1 × max(row_count, MIN_ROWS)),
2008            // computed in integer arithmetic so spg-engine stays
2009            // no_std without pulling in libm. `(n + 9) / 10` is
2010            // `ceil(n / 10)` for non-negative `n`.
2011            let base = row_count.max(MIN_ROWS);
2012            let threshold = base.saturating_add(9) / 10;
2013            if modified >= threshold {
2014                out.push(name);
2015            }
2016        }
2017        out
2018    }
2019
2020    /// v6.2.0 — `ANALYZE [<table>]` runtime. Bare `ANALYZE` walks
2021    /// every user table; `ANALYZE <name>` re-stats one. For each
2022    /// target table, single-pass scan + per-column histogram +
2023    /// `null_frac` + `n_distinct`. Replaces the table's prior
2024    /// stats; resets the modified-row counter.
2025    ///
2026    /// v6.2.0 doesn't sample — it scans the full table. v6.2.x
2027    /// can add reservoir sampling at the > 100 K-row mark; not a
2028    /// scope blocker for the current commit since rows ≤ 100 K
2029    /// analyse in milliseconds.
2030    fn exec_analyze(&mut self, target: Option<&str>) -> Result<QueryResult, EngineError> {
2031        let names: Vec<String> = if let Some(name) = target {
2032            // Verify the table exists; surface a clear error if not.
2033            if self.catalog.get(name).is_none() {
2034                return Err(EngineError::Storage(StorageError::TableNotFound {
2035                    name: name.to_string(),
2036                }));
2037            }
2038            alloc::vec![name.to_string()]
2039        } else {
2040            self.catalog
2041                .table_names()
2042                .into_iter()
2043                .filter(|n| !is_internal_table_name(n))
2044                .collect()
2045        };
2046        let mut analysed = 0usize;
2047        for table_name in &names {
2048            self.analyze_one_table(table_name)?;
2049            analysed += 1;
2050        }
2051        // v6.3.1 — plan cache invalidation. Bump stats version so
2052        // future lookups see the new generation, and selectively
2053        // evict every plan whose `source_tables` overlap with the
2054        // ANALYZE target set. Bare ANALYZE (all tables) clears the
2055        // whole cache.
2056        if analysed > 0 {
2057            self.statistics.bump_version();
2058            if target.is_some() {
2059                for t in &names {
2060                    self.plan_cache.evict_referencing(t);
2061                }
2062            } else {
2063                self.plan_cache.clear();
2064            }
2065        }
2066        Ok(QueryResult::CommandOk {
2067            affected: analysed,
2068            modified_catalog: true,
2069        })
2070    }
2071
2072    /// v6.7.3 — `COMPACT COLD SEGMENTS` runtime path. Drives the
2073    /// engine-layer compaction shim with the default
2074    /// 4 MiB segment-size threshold. spg-server intercepts the
2075    /// SQL before it reaches the engine on a server build —
2076    /// it reads `SPG_COMPACTION_TARGET_SEGMENT_BYTES`, calls
2077    /// `Engine::compact_cold_segments_with_target` directly with
2078    /// the env value, and persists every merged segment to
2079    /// `<db>.spg/segments/`. This arm only fires for engine-only
2080    /// callers (spg-embedded, lib tests); in that mode merged
2081    /// segments live in memory and are dropped at process exit.
2082    fn exec_compact_cold_segments(&mut self) -> Result<QueryResult, EngineError> {
2083        let target = COMPACTION_TARGET_DEFAULT_BYTES;
2084        let reports = self.compact_cold_segments_with_target(target)?;
2085        let columns = alloc::vec![
2086            ColumnSchema::new("table_name", DataType::Text, false),
2087            ColumnSchema::new("index_name", DataType::Text, false),
2088            ColumnSchema::new("sources_merged", DataType::BigInt, false),
2089            ColumnSchema::new("merged_segment_id", DataType::BigInt, false),
2090            ColumnSchema::new("merged_rows", DataType::BigInt, false),
2091            ColumnSchema::new("deleted_rows_pruned", DataType::BigInt, false),
2092            ColumnSchema::new("bytes_reclaimed_estimate", DataType::BigInt, false),
2093        ];
2094        let rows: Vec<Row> = reports
2095            .into_iter()
2096            .map(|(tname, iname, report)| {
2097                Row::new(alloc::vec![
2098                    Value::Text(tname),
2099                    Value::Text(iname),
2100                    Value::BigInt(i64::try_from(report.sources.len()).unwrap_or(i64::MAX)),
2101                    Value::BigInt(i64::from(report.merged_segment_id.unwrap_or(0))),
2102                    Value::BigInt(i64::try_from(report.merged_rows).unwrap_or(i64::MAX)),
2103                    Value::BigInt(
2104                        i64::try_from(report.deleted_rows_pruned).unwrap_or(i64::MAX),
2105                    ),
2106                    Value::BigInt(
2107                        i64::try_from(report.bytes_reclaimed_estimate).unwrap_or(i64::MAX),
2108                    ),
2109                ])
2110            })
2111            .collect();
2112        Ok(QueryResult::Rows { columns, rows })
2113    }
2114
2115    /// Walk a single table's rows once and (re-)populate per-column
2116    /// stats. Drops the existing stats for `table` first so columns
2117    /// that have been DROP-ed between ANALYZEs don't leave stale
2118    /// rows.
2119    fn analyze_one_table(&mut self, table_name: &str) -> Result<(), EngineError> {
2120        let table = self.catalog.get(table_name).ok_or_else(|| {
2121            EngineError::Storage(StorageError::TableNotFound {
2122                name: table_name.to_string(),
2123            })
2124        })?;
2125        let schema = table.schema().clone();
2126        let row_count = table.rows().len();
2127        // For each column, collect (sorted) non-NULL textual values
2128        // + count NULLs; then ask `statistics::build_histogram` to
2129        // produce the 101 bounds and `estimate_n_distinct` the
2130        // distinct count.
2131        self.statistics.clear_table(table_name);
2132        for (col_pos, col_schema) in schema.columns.iter().enumerate() {
2133            // v6.2.0 skip: vector columns have their own stats
2134            // shape (HNSW graph topology). v6.2 deliberation #1.
2135            if matches!(col_schema.ty, DataType::Vector { .. }) {
2136                continue;
2137            }
2138            let mut non_null_values: Vec<Value> = Vec::with_capacity(row_count);
2139            let mut nulls: u64 = 0;
2140            for row in table.rows() {
2141                match row.values.get(col_pos) {
2142                    Some(Value::Null) | None => nulls += 1,
2143                    Some(v) => non_null_values.push(v.clone()),
2144                }
2145            }
2146            // Sort by type-aware ordering (Int as int, Text as
2147            // lex, etc.) so histogram bounds reflect the column's
2148            // natural order — not lexicographic on the string
2149            // representation, which would put "9" after "49".
2150            non_null_values.sort_by(|a, b| sort_values_for_histogram(a, b));
2151            let non_null: Vec<String> = non_null_values
2152                .iter()
2153                .map(canonical_value_repr)
2154                .collect();
2155            let null_frac = if row_count == 0 {
2156                0.0
2157            } else {
2158                #[allow(clippy::cast_precision_loss)]
2159                let f = nulls as f32 / row_count as f32;
2160                f
2161            };
2162            let n_distinct = statistics::estimate_n_distinct(&non_null);
2163            let histogram_bounds = statistics::build_histogram(&non_null);
2164            self.statistics.set(
2165                table_name.to_string(),
2166                col_schema.name.clone(),
2167                statistics::ColumnStats {
2168                    null_frac,
2169                    n_distinct,
2170                    histogram_bounds,
2171                },
2172            );
2173        }
2174        self.statistics.reset_modified(table_name);
2175        // v6.7.0 — refresh the per-table cold_rows cache. Walk the
2176        // BTree indices and count Cold locators (MAX across
2177        // indices); store the result on the table. Surfaced via
2178        // `spg_statistic.cold_row_count` (new column) and
2179        // `spg_stat_segment.table_name` (new column).
2180        let cold_count = {
2181            let table = self
2182                .active_catalog()
2183                .get(table_name)
2184                .expect("table still present");
2185            table.count_cold_locators()
2186        };
2187        let table_mut = self
2188            .active_catalog_mut()
2189            .get_mut(table_name)
2190            .expect("table still present");
2191        table_mut.set_cold_row_count(cold_count);
2192        Ok(())
2193    }
2194
2195    /// v6.1.3 — `SHOW PUBLICATIONS` row materialisation. Returns
2196    /// `(name, scope, table_count)` ordered by publication name.
2197    ///   - `scope` is the human-readable string:
2198    ///       `"FOR ALL TABLES"` /
2199    ///       `"FOR TABLE t1, t2"` /
2200    ///       `"FOR ALL TABLES EXCEPT t1, t2"`.
2201    ///   - `table_count` is NULL for `AllTables`, the list length
2202    ///     otherwise. NULLability lets clients distinguish "publish
2203    ///     everything" from "publish exactly 0 tables" (the v6.1.3
2204    ///     parser forbids the empty list, but the column shape is
2205    ///     ready for the v6.1.5 publisher-side semantics).
2206    fn exec_show_publications(&self) -> QueryResult {
2207        let columns = alloc::vec![
2208            ColumnSchema::new("name", DataType::Text, false),
2209            ColumnSchema::new("scope", DataType::Text, false),
2210            ColumnSchema::new("table_count", DataType::Int, true),
2211        ];
2212        let rows: Vec<Row> = self
2213            .publications
2214            .iter()
2215            .map(|(name, scope)| {
2216                let (scope_str, count_val) = match scope {
2217                    spg_sql::ast::PublicationScope::AllTables => {
2218                        ("FOR ALL TABLES".to_string(), Value::Null)
2219                    }
2220                    spg_sql::ast::PublicationScope::ForTables(ts) => (
2221                        alloc::format!("FOR TABLE {}", ts.join(", ")),
2222                        Value::Int(i32::try_from(ts.len()).unwrap_or(i32::MAX)),
2223                    ),
2224                    spg_sql::ast::PublicationScope::AllTablesExcept(ts) => (
2225                        alloc::format!("FOR ALL TABLES EXCEPT {}", ts.join(", ")),
2226                        Value::Int(i32::try_from(ts.len()).unwrap_or(i32::MAX)),
2227                    ),
2228                };
2229                Row::new(alloc::vec![
2230                    Value::Text(name.clone()),
2231                    Value::Text(scope_str),
2232                    count_val,
2233                ])
2234            })
2235            .collect();
2236        QueryResult::Rows { columns, rows }
2237    }
2238
2239    /// v4.1 `SHOW USERS` — `(name, role)` per row, ordered by name.
2240    fn exec_show_users(&self) -> QueryResult {
2241        let columns = alloc::vec![
2242            ColumnSchema::new("name", DataType::Text, false),
2243            ColumnSchema::new("role", DataType::Text, false),
2244        ];
2245        let rows: Vec<Row> = self
2246            .users
2247            .iter()
2248            .map(|(name, rec)| {
2249                Row::new(alloc::vec![
2250                    Value::Text(name.to_string()),
2251                    Value::Text(rec.role.as_str().to_string()),
2252                ])
2253            })
2254            .collect();
2255        QueryResult::Rows { columns, rows }
2256    }
2257
2258    fn exec_create_user(&mut self, s: &CreateUserStatement) -> Result<QueryResult, EngineError> {
2259        if self.in_transaction() {
2260            return Err(EngineError::Unsupported(
2261                "CREATE USER is not allowed inside a transaction".into(),
2262            ));
2263        }
2264        let role = users::Role::parse(&s.role).ok_or_else(|| {
2265            EngineError::Unsupported(alloc::format!("invalid role: {:?}", s.role))
2266        })?;
2267        // Prefer the host-injected RNG. Falls back to a deterministic
2268        // salt derived from the username only when no RNG is wired —
2269        // acceptable for tests; the server always installs one.
2270        let salt = self.salt_fn.map_or_else(
2271            || {
2272                let mut s_bytes = [0u8; 16];
2273                let digest = spg_crypto::hash(s.name.as_bytes());
2274                s_bytes.copy_from_slice(&digest[..16]);
2275                s_bytes
2276            },
2277            |f| f(),
2278        );
2279        self.users
2280            .create(&s.name, &s.password, role, salt)
2281            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE USER: {e}")))?;
2282        Ok(QueryResult::CommandOk {
2283            affected: 1,
2284            modified_catalog: true,
2285        })
2286    }
2287
2288    fn exec_drop_user(&mut self, name: &str) -> Result<QueryResult, EngineError> {
2289        if self.in_transaction() {
2290            return Err(EngineError::Unsupported(
2291                "DROP USER is not allowed inside a transaction".into(),
2292            ));
2293        }
2294        self.users
2295            .drop(name)
2296            .map_err(|e| EngineError::Unsupported(alloc::format!("DROP USER: {e}")))?;
2297        Ok(QueryResult::CommandOk {
2298            affected: 1,
2299            modified_catalog: true,
2300        })
2301    }
2302
2303    /// v4.4 `UPDATE <table> SET col = expr [, ...] [WHERE cond]`.
2304    /// Filter pass uses the same WHERE eval as `exec_select`. Per
2305    /// matched row, evaluate each RHS expression against the *old*
2306    /// row, then call `Table::update_row` which rebuilds indices.
2307    /// Indexed columns are correctly reflected because rebuild
2308    /// happens after the cell rewrite.
2309    fn exec_update_cancel(
2310        &mut self,
2311        stmt: &spg_sql::ast::UpdateStatement,
2312        cancel: CancelToken<'_>,
2313    ) -> Result<QueryResult, EngineError> {
2314        // v5.2.3: if the WHERE is a PK equality and matches a cold-
2315        // tier row, promote it back to the hot tier *before* the
2316        // hot-row walk. The promote pushes the row to the end of
2317        // `table.rows`, where the upcoming SET-evaluation loop will
2318        // pick it up and apply the assignments. Lookups for the key
2319        // never observe a gap because `promote_cold_row` inserts the
2320        // hot row before retiring the cold locator.
2321        if let Some(w) = &stmt.where_ {
2322            let schema_cols = self
2323                .active_catalog()
2324                .get(&stmt.table)
2325                .ok_or_else(|| {
2326                    EngineError::Storage(StorageError::TableNotFound {
2327                        name: stmt.table.clone(),
2328                    })
2329                })?
2330                .schema()
2331                .columns
2332                .clone();
2333            if let Some((col_pos, key)) = try_pk_predicate(w, &schema_cols, stmt.table.as_str())
2334                && let Some(idx_name) = self
2335                    .active_catalog()
2336                    .get(&stmt.table)
2337                    .and_then(|t| t.index_on(col_pos).map(|i| i.name.clone()))
2338            {
2339                // Promote may be a no-op (key is hot-only or absent);
2340                // we don't care about the return value here — the
2341                // subsequent hot walk will either match or not.
2342                let _ = self
2343                    .active_catalog_mut()
2344                    .promote_cold_row(&stmt.table, &idx_name, &key);
2345            }
2346        }
2347
2348        let table = self
2349            .active_catalog_mut()
2350            .get_mut(&stmt.table)
2351            .ok_or_else(|| {
2352                EngineError::Storage(StorageError::TableNotFound {
2353                    name: stmt.table.clone(),
2354                })
2355            })?;
2356        let schema_cols: Vec<ColumnSchema> = table.schema().columns.clone();
2357        // Resolve each SET target to a column position once, validate
2358        // up front so a typo'd column doesn't leave a partial mutation
2359        // behind.
2360        let mut targets: Vec<(usize, &Expr)> = Vec::with_capacity(stmt.assignments.len());
2361        for (col, expr) in &stmt.assignments {
2362            let pos = schema_cols
2363                .iter()
2364                .position(|c| c.name == *col)
2365                .ok_or_else(|| {
2366                    EngineError::Eval(EvalError::ColumnNotFound { name: col.clone() })
2367                })?;
2368            targets.push((pos, expr));
2369        }
2370        let ctx = EvalContext::new(&schema_cols, Some(stmt.table.as_str()));
2371        // Walk every row, evaluate WHERE then SET expressions. We
2372        // gather (position, new_values) tuples first and apply them
2373        // afterwards so the WHERE/RHS evaluation reads the original
2374        // row state — matches PG semantics (UPDATE doesn't see its
2375        // own writes).
2376        let mut planned: Vec<(usize, Vec<Value>)> = Vec::new();
2377        for (i, row) in table.rows().iter().enumerate() {
2378            // v4.5: cooperative cancel checkpoint every 256 rows so
2379            // a runaway UPDATE without WHERE doesn't drag past the
2380            // server's query-timeout watchdog.
2381            if i.is_multiple_of(256) {
2382                cancel.check()?;
2383            }
2384            if let Some(w) = &stmt.where_ {
2385                let cond = eval::eval_expr(w, row, &ctx)?;
2386                if !matches!(cond, Value::Bool(true)) {
2387                    continue;
2388                }
2389            }
2390            let mut new_vals = row.values.clone();
2391            for (pos, expr) in &targets {
2392                let v = eval::eval_expr(expr, row, &ctx)?;
2393                new_vals[*pos] =
2394                    coerce_value(v, schema_cols[*pos].ty, &schema_cols[*pos].name, *pos)?;
2395            }
2396            planned.push((i, new_vals));
2397        }
2398        // v7.6.6 — capture pre-update row values for the FK
2399        // enforcement passes below. `planned` carries new values
2400        // only; pair them with the old row.
2401        let plan_with_old: Vec<(usize, Vec<Value>, Vec<Value>)> = planned
2402            .iter()
2403            .map(|(pos, new_vals)| (*pos, table.rows()[*pos].values.clone(), new_vals.clone()))
2404            .collect();
2405        let self_fks = table.schema().foreign_keys.clone();
2406        let affected = planned.len();
2407        // Release mutable borrow on `table` for the FK passes.
2408        let _ = table;
2409        // v7.6.6 — Stage 2a: outbound FK check. For every row whose
2410        // local FK columns changed, the new value must exist in the
2411        // parent.
2412        if !self_fks.is_empty() {
2413            let new_rows: Vec<Vec<Value>> = planned
2414                .iter()
2415                .map(|(_pos, new_vals)| new_vals.clone())
2416                .collect();
2417            enforce_fk_inserts(self.active_catalog(), &stmt.table, &self_fks, &new_rows)?;
2418        }
2419        // v7.6.6 — Stage 2b: inbound FK check. For every row that
2420        // changed value in a column that *some other table* uses as
2421        // a FK parent column, react per `on_update` action.
2422        let child_plan = plan_fk_parent_updates(self.active_catalog(), &stmt.table, &plan_with_old)?;
2423        // Stage 3a — apply each child-side action.
2424        for step in &child_plan {
2425            apply_fk_child_step(self.active_catalog_mut(), step)?;
2426        }
2427        // Stage 3b — apply the original UPDATE.
2428        let table = self
2429            .active_catalog_mut()
2430            .get_mut(&stmt.table)
2431            .ok_or_else(|| {
2432                EngineError::Storage(StorageError::TableNotFound {
2433                    name: stmt.table.clone(),
2434                })
2435            })?;
2436        // v7.9.4 — snapshot post-update values for RETURNING.
2437        let updated_for_returning: Vec<Vec<Value>> =
2438            if stmt.returning.is_some() {
2439                planned.iter().map(|(_pos, vals)| vals.clone()).collect()
2440            } else {
2441                Vec::new()
2442            };
2443        for (pos, vals) in planned {
2444            table.update_row(pos, vals)?;
2445        }
2446        let _ = table;
2447        // v6.2.1 — auto-analyze modified-row tracking for UPDATE.
2448        if !self.in_transaction() && affected > 0 {
2449            self.statistics
2450                .record_modifications(&stmt.table, affected as u64);
2451        }
2452        // v7.9.4 — RETURNING projection.
2453        if let Some(items) = &stmt.returning {
2454            return self.build_returning_rows(
2455                &stmt.table,
2456                items,
2457                updated_for_returning,
2458            );
2459        }
2460        Ok(QueryResult::CommandOk {
2461            affected,
2462            modified_catalog: !self.in_transaction(),
2463        })
2464    }
2465
2466    /// v4.4 `DELETE FROM <table> [WHERE cond]`. Collects matching
2467    /// positions then delegates to `Table::delete_rows` (single index
2468    /// rebuild for the batch).
2469    fn exec_delete_cancel(
2470        &mut self,
2471        stmt: &spg_sql::ast::DeleteStatement,
2472        cancel: CancelToken<'_>,
2473    ) -> Result<QueryResult, EngineError> {
2474        // v5.2.3: PK-targeted DELETE → first retire any cold-tier
2475        // locator for the key. The cold row body stays in the
2476        // segment (becoming shadowed garbage that a future
2477        // compaction pass reclaims) but the index no longer
2478        // resolves it. The shadow count contributes to the
2479        // affected total; the subsequent hot walk handles any hot
2480        // rows for the same key.
2481        let mut cold_shadow_count: usize = 0;
2482        if let Some(w) = &stmt.where_ {
2483            let schema_cols = self
2484                .active_catalog()
2485                .get(&stmt.table)
2486                .ok_or_else(|| {
2487                    EngineError::Storage(StorageError::TableNotFound {
2488                        name: stmt.table.clone(),
2489                    })
2490                })?
2491                .schema()
2492                .columns
2493                .clone();
2494            if let Some((col_pos, key)) = try_pk_predicate(w, &schema_cols, stmt.table.as_str())
2495                && let Some(idx_name) = self
2496                    .active_catalog()
2497                    .get(&stmt.table)
2498                    .and_then(|t| t.index_on(col_pos).map(|i| i.name.clone()))
2499            {
2500                cold_shadow_count = self
2501                    .active_catalog_mut()
2502                    .shadow_cold_row(&stmt.table, &idx_name, &key)
2503                    .unwrap_or(0);
2504            }
2505        }
2506
2507        let table = self
2508            .active_catalog_mut()
2509            .get_mut(&stmt.table)
2510            .ok_or_else(|| {
2511                EngineError::Storage(StorageError::TableNotFound {
2512                    name: stmt.table.clone(),
2513                })
2514            })?;
2515        let schema_cols: Vec<ColumnSchema> = table.schema().columns.clone();
2516        let ctx = EvalContext::new(&schema_cols, Some(stmt.table.as_str()));
2517        let mut positions: Vec<usize> = Vec::new();
2518        // v7.6.3 — collect every to-delete row's full Value tuple
2519        // alongside its position, so the FK enforcement pass can
2520        // run after the mut borrow drops.
2521        let mut to_delete_rows: Vec<Vec<Value>> = Vec::new();
2522        for (i, row) in table.rows().iter().enumerate() {
2523            if i.is_multiple_of(256) {
2524                cancel.check()?;
2525            }
2526            let keep = if let Some(w) = &stmt.where_ {
2527                let cond = eval::eval_expr(w, row, &ctx)?;
2528                !matches!(cond, Value::Bool(true))
2529            } else {
2530                false
2531            };
2532            if !keep {
2533                positions.push(i);
2534                to_delete_rows.push(row.values.clone());
2535            }
2536        }
2537        // v7.6.3 / v7.6.4 — Stage 2: FK enforcement on the immutable
2538        // catalog. Release the mut borrow and run reverse-scan
2539        // against every child table whose FK targets this table.
2540        // RESTRICT / NoAction raise an error; CASCADE returns a
2541        // cascade plan that stage 3 applies after the primary delete.
2542        // SET NULL / SET DEFAULT remain Unsupported until v7.6.5.
2543        let _ = table;
2544        let cascade_plan = plan_fk_parent_deletions(
2545            self.active_catalog(),
2546            &stmt.table,
2547            &positions,
2548            &to_delete_rows,
2549        )?;
2550        // Stage 3a — apply each FK child step (SET NULL / SET
2551        // DEFAULT / CASCADE delete) before deleting the parent.
2552        // The plan is already ordered: nulls/defaults first, then
2553        // cascade deletes (so a row mutated and later deleted
2554        // surfaces as deleted — though v7.6.5 doesn't produce
2555        // that overlap today).
2556        for step in &cascade_plan {
2557            apply_fk_child_step(self.active_catalog_mut(), step)?;
2558        }
2559        // Stage 3b — actually delete the original target rows.
2560        let table = self
2561            .active_catalog_mut()
2562            .get_mut(&stmt.table)
2563            .ok_or_else(|| {
2564                EngineError::Storage(StorageError::TableNotFound {
2565                    name: stmt.table.clone(),
2566                })
2567            })?;
2568        let affected = table.delete_rows(&positions) + cold_shadow_count;
2569        let _ = table;
2570        // v6.2.1 — auto-analyze modified-row tracking for DELETE.
2571        if !self.in_transaction() && affected > 0 {
2572            self.statistics
2573                .record_modifications(&stmt.table, affected as u64);
2574        }
2575        // v7.9.4 — RETURNING projection over the soon-to-be-gone
2576        // rows. `to_delete_rows` was snapshotted in stage 1 before
2577        // mutation, so the projection sees the pre-delete state
2578        // (matches PG semantics: DELETE RETURNING returns the row
2579        // as it was just before removal).
2580        if let Some(items) = &stmt.returning {
2581            return self.build_returning_rows(
2582                &stmt.table,
2583                items,
2584                to_delete_rows,
2585            );
2586        }
2587        Ok(QueryResult::CommandOk {
2588            affected,
2589            modified_catalog: !self.in_transaction(),
2590        })
2591    }
2592
2593    /// `SHOW TABLES` — one row per table in the active catalog.
2594    /// Column name is `name` so result-set consumers can downstream
2595    /// `SELECT name FROM ...` style logic if needed.
2596    /// v4.26: `EXPLAIN [ANALYZE] <select>`. Returns a single-column
2597    /// `QUERY PLAN` text table — first line names the top operator
2598    /// (Scan / Aggregate / Window / etc.), indented children list
2599    /// FROM joins, WHERE filters, ORDER BY / LIMIT, projection
2600    /// shape, and any active index hits. `ANALYZE` execs the inner
2601    /// SELECT and appends actual-row + elapsed-micros annotations.
2602    #[allow(clippy::format_push_string)]
2603    fn exec_explain(
2604        &self,
2605        e: &spg_sql::ast::ExplainStatement,
2606        cancel: CancelToken<'_>,
2607    ) -> Result<QueryResult, EngineError> {
2608        let mut lines = Vec::<String>::new();
2609        explain_select(&e.inner, self, 0, &mut lines);
2610        if e.suggest {
2611            // v6.8.3 — index advisor. Walks the SELECT's FROM
2612            // tables + WHERE column refs; for each (table, column)
2613            // pair that lacks an index, append a SUGGEST line with
2614            // a copy-pastable `CREATE INDEX` statement. This is a
2615            // pure-syntax heuristic — no cardinality estimation —
2616            // matching the v6.8.3 design intent of "tell the
2617            // operator where indexes are missing", not "give the
2618            // mathematically optimal index set".
2619            let suggestions = build_index_suggestions(&e.inner, self);
2620            for s in suggestions {
2621                lines.push(s);
2622            }
2623        } else if e.analyze {
2624            // v6.2.4 — EXPLAIN ANALYZE annotates each operator line
2625            // with `(rows=N)` where the row count is computable
2626            // without re-executing the full query:
2627            //   - Top-level operator (first non-indented line):
2628            //     rows = final result.len()
2629            //   - "From: <table> [full scan]" lines: rows =
2630            //     table.rows().len() (catalog read; no execution)
2631            //   - "From: <table> [index seek]": indeterminate —
2632            //     the index step would need re-execution; v6.2.5
2633            //     adds per-operator wall-clock + hot/cold rows
2634            //     instrumentation that makes this concrete.
2635            //   - Everything else: marked `(—)` so the surface
2636            //     stays well-defined without silently dropping
2637            //     stats. v6.2.5 fills in via inline executor
2638            //     instrumentation.
2639            // Total elapsed lands on a trailing `Total: …` line.
2640            let started = self.clock.map(|f| f());
2641            let exec = self.exec_select_cancel(&e.inner, cancel)?;
2642            let elapsed_micros = match (self.clock, started) {
2643                (Some(f), Some(s)) => Some(f().saturating_sub(s)),
2644                _ => None,
2645            };
2646            let row_count = if let QueryResult::Rows { rows, .. } = &exec {
2647                rows.len()
2648            } else {
2649                0
2650            };
2651            annotate_explain_lines(&mut lines, row_count, self);
2652            let mut total = alloc::format!("Total: rows={row_count}");
2653            if let Some(us) = elapsed_micros {
2654                total.push_str(&alloc::format!(" elapsed={us}us"));
2655            }
2656            lines.push(total);
2657        }
2658        let columns = alloc::vec![ColumnSchema::new("QUERY PLAN", DataType::Text, false)];
2659        let rows: Vec<Row> = lines
2660            .into_iter()
2661            .map(|l| Row::new(alloc::vec![Value::Text(l)]))
2662            .collect();
2663        Ok(QueryResult::Rows { columns, rows })
2664    }
2665
2666    fn exec_show_tables(&self) -> QueryResult {
2667        let columns = alloc::vec![ColumnSchema::new("name", DataType::Text, false)];
2668        let rows: Vec<Row> = self
2669            .active_catalog()
2670            .table_names()
2671            .into_iter()
2672            .map(|n| Row::new(alloc::vec![Value::Text(n)]))
2673            .collect();
2674        QueryResult::Rows { columns, rows }
2675    }
2676
2677    /// `SHOW COLUMNS FROM <table>` — one row per column with the
2678    /// declared name, SQL type rendering, and nullability flag.
2679    fn exec_show_columns(&self, table_name: &str) -> Result<QueryResult, EngineError> {
2680        let table =
2681            self.active_catalog()
2682                .get(table_name)
2683                .ok_or_else(|| StorageError::TableNotFound {
2684                    name: table_name.into(),
2685                })?;
2686        let columns = alloc::vec![
2687            ColumnSchema::new("name", DataType::Text, false),
2688            ColumnSchema::new("type", DataType::Text, false),
2689            ColumnSchema::new("nullable", DataType::Bool, false),
2690        ];
2691        let rows: Vec<Row> = table
2692            .schema()
2693            .columns
2694            .iter()
2695            .map(|c| {
2696                Row::new(alloc::vec![
2697                    Value::Text(c.name.clone()),
2698                    Value::Text(alloc::format!("{}", c.ty)),
2699                    Value::Bool(c.nullable),
2700                ])
2701            })
2702            .collect();
2703        Ok(QueryResult::Rows { columns, rows })
2704    }
2705
2706    fn exec_begin(&mut self) -> Result<QueryResult, EngineError> {
2707        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
2708        if self.tx_catalogs.contains_key(&tx_id) {
2709            return Err(EngineError::TransactionAlreadyOpen);
2710        }
2711        self.tx_catalogs.insert(
2712            tx_id,
2713            TxState {
2714                catalog: self.catalog.clone(),
2715                savepoints: Vec::new(),
2716            },
2717        );
2718        Ok(QueryResult::CommandOk {
2719            affected: 0,
2720            modified_catalog: false,
2721        })
2722    }
2723
2724    fn exec_commit(&mut self) -> Result<QueryResult, EngineError> {
2725        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
2726        let state = self
2727            .tx_catalogs
2728            .remove(&tx_id)
2729            .ok_or(EngineError::NoActiveTransaction)?;
2730        self.catalog = state.catalog;
2731        // All savepoints become permanent at COMMIT and the stack
2732        // resets for the next TX (`state.savepoints` is discarded with
2733        // `state`).
2734        Ok(QueryResult::CommandOk {
2735            affected: 0,
2736            modified_catalog: true,
2737        })
2738    }
2739
2740    fn exec_rollback(&mut self) -> Result<QueryResult, EngineError> {
2741        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
2742        if self.tx_catalogs.remove(&tx_id).is_none() {
2743            return Err(EngineError::NoActiveTransaction);
2744        }
2745        // savepoints discarded with the TxState
2746        Ok(QueryResult::CommandOk {
2747            affected: 0,
2748            modified_catalog: false,
2749        })
2750    }
2751
2752    fn exec_savepoint(&mut self, name: String) -> Result<QueryResult, EngineError> {
2753        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
2754        let state = self
2755            .tx_catalogs
2756            .get_mut(&tx_id)
2757            .ok_or(EngineError::NoActiveTransaction)?;
2758        // PG re-uses an existing savepoint name by dropping the older
2759        // entry and pushing a fresh one — match that behaviour so
2760        // application code can `SAVEPOINT sp; ...; SAVEPOINT sp` freely.
2761        state.savepoints.retain(|(n, _)| n != &name);
2762        let snapshot = state.catalog.clone();
2763        state.savepoints.push((name, snapshot));
2764        Ok(QueryResult::CommandOk {
2765            affected: 0,
2766            modified_catalog: false,
2767        })
2768    }
2769
2770    fn exec_rollback_to_savepoint(&mut self, name: &str) -> Result<QueryResult, EngineError> {
2771        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
2772        let state = self
2773            .tx_catalogs
2774            .get_mut(&tx_id)
2775            .ok_or(EngineError::NoActiveTransaction)?;
2776        let pos = state
2777            .savepoints
2778            .iter()
2779            .rposition(|(n, _)| n == name)
2780            .ok_or_else(|| {
2781                EngineError::Unsupported(alloc::format!("savepoint not found: {name}"))
2782            })?;
2783        // The savepoint stays on the stack (PG semantics): a later
2784        // `RELEASE` or further `ROLLBACK TO` is still allowed. Everything
2785        // after it is discarded.
2786        let snapshot = state.savepoints[pos].1.clone();
2787        state.savepoints.truncate(pos + 1);
2788        state.catalog = snapshot;
2789        Ok(QueryResult::CommandOk {
2790            affected: 0,
2791            modified_catalog: false,
2792        })
2793    }
2794
2795    fn exec_release_savepoint(&mut self, name: &str) -> Result<QueryResult, EngineError> {
2796        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
2797        let state = self
2798            .tx_catalogs
2799            .get_mut(&tx_id)
2800            .ok_or(EngineError::NoActiveTransaction)?;
2801        let pos = state
2802            .savepoints
2803            .iter()
2804            .rposition(|(n, _)| n == name)
2805            .ok_or_else(|| {
2806                EngineError::Unsupported(alloc::format!("savepoint not found: {name}"))
2807            })?;
2808        // RELEASE keeps the work since the savepoint, just discards the
2809        // bookmark plus everything nested under it.
2810        state.savepoints.truncate(pos);
2811        Ok(QueryResult::CommandOk {
2812            affected: 0,
2813            modified_catalog: false,
2814        })
2815    }
2816
2817    /// v6.0.4 — synchronous `ALTER INDEX <name> REBUILD [WITH
2818    /// (encoding = …)]`. Walks every table in the active catalog
2819    /// looking for an index matching `stmt.name`, then delegates the
2820    /// rebuild (including any encoding switch) to
2821    /// `Table::rebuild_nsw_index`. The "live" non-blocking
2822    /// optimisation is v6.0.4.1 / v6.1.x territory.
2823    /// v6.7.2 — `ALTER TABLE t SET hot_tier_bytes = X`. Dispatch
2824    /// arm. Currently the only setting is `hot_tier_bytes`; later
2825    /// v6.7.x can extend `AlterTableTarget` without touching this
2826    /// arm structure.
2827    fn exec_alter_table(
2828        &mut self,
2829        s: spg_sql::ast::AlterTableStatement,
2830    ) -> Result<QueryResult, EngineError> {
2831        match s.target {
2832            spg_sql::ast::AlterTableTarget::SetHotTierBytes(n) => {
2833                let table = self
2834                    .active_catalog_mut()
2835                    .get_mut(&s.name)
2836                    .ok_or_else(|| {
2837                        EngineError::Storage(StorageError::TableNotFound {
2838                            name: s.name.clone(),
2839                        })
2840                    })?;
2841                table.schema_mut().hot_tier_bytes = Some(n);
2842            }
2843            spg_sql::ast::AlterTableTarget::AddForeignKey(fk) => {
2844                // v7.6.8 — resolve FK against the live catalog first
2845                // (validates parent table, columns, indices). Then
2846                // verify every existing row in the child table
2847                // satisfies the new constraint. Then install it.
2848                let cols_snapshot = self
2849                    .active_catalog()
2850                    .get(&s.name)
2851                    .ok_or_else(|| {
2852                        EngineError::Storage(StorageError::TableNotFound {
2853                            name: s.name.clone(),
2854                        })
2855                    })?
2856                    .schema()
2857                    .columns
2858                    .clone();
2859                let storage_fk = resolve_foreign_key(
2860                    &s.name,
2861                    &cols_snapshot,
2862                    fk,
2863                    self.active_catalog(),
2864                )?;
2865                // Verify existing rows. Treat them as a virtual
2866                // INSERT batch — reusing the v7.6.2 enforce helper.
2867                let existing_rows: Vec<Vec<Value>> = self
2868                    .active_catalog()
2869                    .get(&s.name)
2870                    .expect("checked above")
2871                    .rows()
2872                    .iter()
2873                    .map(|r| r.values.clone())
2874                    .collect();
2875                enforce_fk_inserts(
2876                    self.active_catalog(),
2877                    &s.name,
2878                    core::slice::from_ref(&storage_fk),
2879                    &existing_rows,
2880                )?;
2881                // Reject duplicate constraint name.
2882                let table = self
2883                    .active_catalog_mut()
2884                    .get_mut(&s.name)
2885                    .expect("checked above");
2886                if let Some(name) = &storage_fk.name
2887                    && table
2888                        .schema()
2889                        .foreign_keys
2890                        .iter()
2891                        .any(|f| f.name.as_ref() == Some(name))
2892                {
2893                    return Err(EngineError::Unsupported(alloc::format!(
2894                        "ALTER TABLE ADD CONSTRAINT: a constraint named {name:?} already exists"
2895                    )));
2896                }
2897                table.schema_mut().foreign_keys.push(storage_fk);
2898            }
2899            spg_sql::ast::AlterTableTarget::DropForeignKey(name) => {
2900                let table = self
2901                    .active_catalog_mut()
2902                    .get_mut(&s.name)
2903                    .ok_or_else(|| {
2904                        EngineError::Storage(StorageError::TableNotFound {
2905                            name: s.name.clone(),
2906                        })
2907                    })?;
2908                let fks = &mut table.schema_mut().foreign_keys;
2909                let before = fks.len();
2910                fks.retain(|f| f.name.as_ref() != Some(&name));
2911                if fks.len() == before {
2912                    return Err(EngineError::Unsupported(alloc::format!(
2913                        "ALTER TABLE DROP CONSTRAINT: no FK named {name:?} on {:?}",
2914                        s.name
2915                    )));
2916                }
2917            }
2918        }
2919        Ok(QueryResult::CommandOk {
2920            affected: 0,
2921            modified_catalog: !self.in_transaction(),
2922        })
2923    }
2924
2925    fn exec_alter_index(
2926        &mut self,
2927        stmt: spg_sql::ast::AlterIndexStatement,
2928    ) -> Result<QueryResult, EngineError> {
2929        // Translate the optional SQL-side encoding choice into the
2930        // storage-side enum; the same SqlVecEncoding -> VecEncoding
2931        // bridge `column_type_to_data_type` uses.
2932        let spg_sql::ast::AlterIndexStatement {
2933            name: idx_name,
2934            target,
2935        } = stmt;
2936        let spg_sql::ast::AlterIndexTarget::Rebuild { encoding } = target;
2937        let target = encoding.map(|e| match e {
2938            SqlVecEncoding::F32 => VecEncoding::F32,
2939            SqlVecEncoding::Sq8 => VecEncoding::Sq8,
2940            SqlVecEncoding::F16 => VecEncoding::F16,
2941        });
2942        // Linear scan: index names are globally unique within a
2943        // catalog (enforced by add_nsw_index_inner) so the first
2944        // match is the only one. Save the table name to avoid
2945        // borrowing while we then take a mut borrow.
2946        let table_name = {
2947            let cat = self.active_catalog();
2948            let mut found: Option<String> = None;
2949            for tname in cat.table_names() {
2950                if let Some(t) = cat.get(&tname)
2951                    && t.indices().iter().any(|i| i.name == idx_name)
2952                {
2953                    found = Some(tname);
2954                    break;
2955                }
2956            }
2957            found.ok_or_else(|| {
2958                EngineError::Storage(StorageError::IndexNotFound {
2959                    name: idx_name.clone(),
2960                })
2961            })?
2962        };
2963        let table = self
2964            .active_catalog_mut()
2965            .get_mut(&table_name)
2966            .expect("table found above");
2967        table.rebuild_nsw_index(&idx_name, target)?;
2968        // v6.3.1 — ALTER INDEX REBUILD potentially with new encoding
2969        // changes cost characteristics; evict any cached plans.
2970        self.plan_cache.evict_referencing(&table_name);
2971        Ok(QueryResult::CommandOk {
2972            affected: 0,
2973            modified_catalog: !self.in_transaction(),
2974        })
2975    }
2976
2977    fn exec_create_index(
2978        &mut self,
2979        stmt: CreateIndexStatement,
2980    ) -> Result<QueryResult, EngineError> {
2981        let table = self
2982            .active_catalog_mut()
2983            .get_mut(&stmt.table)
2984            .ok_or_else(|| {
2985                EngineError::Storage(StorageError::TableNotFound {
2986                    name: stmt.table.clone(),
2987                })
2988            })?;
2989        // `IF NOT EXISTS` reduces DuplicateIndex to a no-op CommandOk.
2990        if stmt.if_not_exists && table.indices().iter().any(|i| i.name == stmt.name) {
2991            return Ok(QueryResult::CommandOk {
2992                affected: 0,
2993                modified_catalog: false,
2994            });
2995        }
2996        // v7.9.14 — multi-column index parses through; engine
2997        // builds a single-column BTree on the leading column only.
2998        // The extras live on the AST so spg-server's dispatcher
2999        // can emit a PG-wire NoticeResponse / log line. Composite
3000        // BTree keys land in v7.10.
3001        let _ = &stmt.extra_columns; // intentional drop on engine side
3002        let table_name = stmt.table.clone();
3003        // v6.8.0 — resolve INCLUDE column names to positions. Done
3004        // before `add_index` so a typo error surfaces before any
3005        // catalog mutation lands.
3006        let included_positions: Vec<usize> = if stmt.included_columns.is_empty() {
3007            Vec::new()
3008        } else {
3009            let schema = table.schema();
3010            stmt.included_columns
3011                .iter()
3012                .map(|c| {
3013                    schema.column_position(c).ok_or_else(|| {
3014                        EngineError::Storage(StorageError::ColumnNotFound {
3015                            column: c.clone(),
3016                        })
3017                    })
3018                })
3019                .collect::<Result<Vec<_>, _>>()?
3020        };
3021        match stmt.method {
3022            IndexMethod::BTree => table.add_index(stmt.name.clone(), &stmt.column)?,
3023            IndexMethod::Hnsw => {
3024                if !included_positions.is_empty() {
3025                    return Err(EngineError::Unsupported(
3026                        "INCLUDE columns are not supported on HNSW indexes".into(),
3027                    ));
3028                }
3029                table.add_nsw_index(stmt.name.clone(), &stmt.column, spg_storage::NSW_DEFAULT_M)?;
3030            }
3031            // v6.7.1 — BRIN. Pure metadata; no in-memory data.
3032            IndexMethod::Brin => {
3033                if !included_positions.is_empty() {
3034                    return Err(EngineError::Unsupported(
3035                        "INCLUDE columns are not supported on BRIN indexes".into(),
3036                    ));
3037                }
3038                table.add_brin_index(stmt.name.clone(), &stmt.column)?;
3039            }
3040        }
3041        if !included_positions.is_empty()
3042            && let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name)
3043        {
3044            idx.included_columns = included_positions;
3045        }
3046        // v6.8.1 — persist partial-index predicate. Stored as the
3047        // expression's Display form so the catalog snapshot stays
3048        // pure (storage has no spg-sql dependency). The runtime
3049        // maintenance path treats partial indexes identically to
3050        // full indexes for v6.8.1 (over-maintenance is safe; the
3051        // planner-side "use partial when query WHERE implies the
3052        // predicate" pass is STABILITY carve-out).
3053        if let Some(pred_expr) = &stmt.partial_predicate {
3054            let canonical = pred_expr.to_string();
3055            if matches!(stmt.method, IndexMethod::Hnsw | IndexMethod::Brin) {
3056                return Err(EngineError::Unsupported(
3057                    "WHERE predicates are not supported on HNSW or BRIN indexes".into(),
3058                ));
3059            }
3060            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
3061                idx.partial_predicate = Some(canonical);
3062            }
3063        }
3064        // v6.8.2 — persist expression index key. Same Display-form
3065        // storage; the runtime maintenance pass evaluates each
3066        // row's expression to derive the index key, but for v6.8.2
3067        // the engine falls through to the bare-column-reference
3068        // path and the expression is preserved for format-layer
3069        // round-trip + future planner work. Carved-out in
3070        // STABILITY § "Out of v6.8".
3071        if let Some(key_expr) = &stmt.expression {
3072            if matches!(stmt.method, IndexMethod::Hnsw | IndexMethod::Brin) {
3073                return Err(EngineError::Unsupported(
3074                    "Expression keys are not supported on HNSW or BRIN indexes".into(),
3075                ));
3076            }
3077            let canonical = key_expr.to_string();
3078            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
3079                idx.expression = Some(canonical);
3080            }
3081        }
3082        // v7.9.29 — persist `is_unique` flag on the storage Index.
3083        // Combined with `partial_predicate`, INSERT enforcement
3084        // checks that no other row whose predicate evaluates true
3085        // shares the same indexed key. Parser already rejected
3086        // `UNIQUE` on HNSW / BRIN, so plain BTree here.
3087        // For multi-column UNIQUE INDEX the extras matter (the
3088        // full tuple is the uniqueness key), so resolve them to
3089        // column positions and persist on the index too.
3090        if stmt.is_unique {
3091            let mut extra_positions: alloc::vec::Vec<usize> = alloc::vec::Vec::new();
3092            for col_name in &stmt.extra_columns {
3093                let pos = table
3094                    .schema()
3095                    .columns
3096                    .iter()
3097                    .position(|c| c.name.eq_ignore_ascii_case(col_name))
3098                    .ok_or_else(|| {
3099                        EngineError::Unsupported(alloc::format!(
3100                            "UNIQUE INDEX {:?}: extra column {col_name:?} not in table {:?}",
3101                            stmt.name, stmt.table
3102                        ))
3103                    })?;
3104                extra_positions.push(pos);
3105            }
3106            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
3107                idx.is_unique = true;
3108                idx.extra_column_positions = extra_positions;
3109            }
3110            // At index-creation time, check the existing rows for
3111            // pre-existing duplicates that would have violated the
3112            // new constraint — otherwise CREATE UNIQUE INDEX would
3113            // silently leave duplicates in place.
3114            let snapshot_indices = table.indices().to_vec();
3115            let snapshot_rows: alloc::vec::Vec<spg_storage::Row> =
3116                table.rows().iter().cloned().collect();
3117            let snapshot_schema = table.schema().clone();
3118            let idx_ref = snapshot_indices
3119                .iter()
3120                .find(|i| i.name == stmt.name)
3121                .expect("just-added index");
3122            check_existing_unique_violation(idx_ref, &snapshot_schema, &snapshot_rows)?;
3123        }
3124        // v6.3.1 — adding an index can change the optimal plan for
3125        // any cached query that references this table.
3126        self.plan_cache.evict_referencing(&table_name);
3127        Ok(QueryResult::CommandOk {
3128            affected: 0,
3129            modified_catalog: !self.in_transaction(),
3130        })
3131    }
3132
3133    fn exec_create_table(
3134        &mut self,
3135        stmt: CreateTableStatement,
3136    ) -> Result<QueryResult, EngineError> {
3137        if stmt.if_not_exists && self.active_catalog().get(&stmt.name).is_some() {
3138            return Ok(QueryResult::CommandOk {
3139                affected: 0,
3140                modified_catalog: false,
3141            });
3142        }
3143        let table_name = stmt.name.clone();
3144        // v7.9.13 — pluck the names of any columns marked
3145        // `PRIMARY KEY` inline so the post-create-table pass can
3146        // build an implicit BTree index. mailrs F1.
3147        let inline_pk_columns: Vec<String> = stmt
3148            .columns
3149            .iter()
3150            .filter(|c| c.is_primary_key)
3151            .map(|c| c.name.clone())
3152            .collect();
3153        // v7.9.19 — table-level constraints: PRIMARY KEY (a, b, ...)
3154        // and UNIQUE (a, b, ...). Each builds a BTree index on the
3155        // leading column (the existing single-column storage tier)
3156        // and registers a UniquenessConstraint on the schema for
3157        // INSERT-time enforcement of the full tuple. mailrs G1/G6.
3158        let cols = stmt
3159            .columns
3160            .into_iter()
3161            .map(column_def_to_schema)
3162            .collect::<Result<Vec<_>, _>>()?;
3163        // Composite NOT-NULL implication for PRIMARY KEY columns.
3164        let mut cols = cols;
3165        for tc in &stmt.table_constraints {
3166            if let spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } = tc {
3167                for col_name in columns {
3168                    if let Some(col) = cols.iter_mut().find(|c| c.name == *col_name) {
3169                        col.nullable = false;
3170                    }
3171                }
3172            }
3173        }
3174        // v7.6.1 — resolve every FK in the statement against the
3175        // already-known catalog. Validates: parent table exists,
3176        // parent column names exist, arity matches, parent columns
3177        // have a PK / UNIQUE index. Self-referencing FKs (parent
3178        // table == this table) resolve against the column list we
3179        // just built — they don't need the catalog yet.
3180        let mut fks: Vec<spg_storage::ForeignKeyConstraint> =
3181            Vec::with_capacity(stmt.foreign_keys.len());
3182        for fk in stmt.foreign_keys {
3183            fks.push(resolve_foreign_key(
3184                &table_name,
3185                &cols,
3186                fk,
3187                self.active_catalog(),
3188            )?);
3189        }
3190        let mut schema = TableSchema::new(table_name.clone(), cols);
3191        schema.foreign_keys = fks;
3192        // v7.9.19 — translate AST table_constraints to storage
3193        // UniquenessConstraints (column name → position) so the
3194        // INSERT enforcement helper sees positions directly.
3195        let mut uc_storage: Vec<spg_storage::UniquenessConstraint> = Vec::new();
3196        for tc in &stmt.table_constraints {
3197            let (is_pk, names) = match tc {
3198                spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } => {
3199                    (true, columns.clone())
3200                }
3201                spg_sql::ast::TableConstraint::Unique { columns, .. } => {
3202                    (false, columns.clone())
3203                }
3204            };
3205            let mut positions = Vec::with_capacity(names.len());
3206            for n in &names {
3207                let pos = schema
3208                    .columns
3209                    .iter()
3210                    .position(|c| c.name == *n)
3211                    .ok_or_else(|| {
3212                        EngineError::Unsupported(alloc::format!(
3213                            "table constraint references unknown column {n:?}"
3214                        ))
3215                    })?;
3216                positions.push(pos);
3217            }
3218            uc_storage.push(spg_storage::UniquenessConstraint {
3219                is_primary_key: is_pk,
3220                columns: positions,
3221            });
3222        }
3223        schema.uniqueness_constraints = uc_storage.clone();
3224        self.active_catalog_mut().create_table(schema)?;
3225        // v7.9.13 — implicit BTree per inline PK column +
3226        // v7.9.19 — implicit BTree on the leading column of every
3227        // table-level PRIMARY KEY / UNIQUE constraint.
3228        let table = self
3229            .active_catalog_mut()
3230            .get_mut(&table_name)
3231            .expect("just created");
3232        for (i, col_name) in inline_pk_columns.iter().enumerate() {
3233            let idx_name = if inline_pk_columns.len() == 1 {
3234                alloc::format!("{table_name}_pkey")
3235            } else {
3236                alloc::format!("{table_name}_pkey_{i}")
3237            };
3238            if let Err(e) = table.add_index(idx_name, col_name) {
3239                return Err(EngineError::Storage(e));
3240            }
3241        }
3242        for (i, tc) in stmt.table_constraints.iter().enumerate() {
3243            let (is_pk, names) = match tc {
3244                spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } => {
3245                    (true, columns)
3246                }
3247                spg_sql::ast::TableConstraint::Unique { columns, .. } => {
3248                    (false, columns)
3249                }
3250            };
3251            let leading = &names[0];
3252            // Skip if a same-column BTree already exists (e.g.
3253            // inline PK on the leading column).
3254            let already = table
3255                .indices()
3256                .iter()
3257                .any(|idx| {
3258                    matches!(idx.kind, spg_storage::IndexKind::BTree(_))
3259                        && table.schema().columns[idx.column_position].name == *leading
3260                });
3261            if already {
3262                continue;
3263            }
3264            let suffix = if is_pk { "pkey" } else { "key" };
3265            let idx_name = if names.len() == 1 {
3266                alloc::format!("{table_name}_{leading}_{suffix}")
3267            } else {
3268                alloc::format!("{table_name}_{leading}_{suffix}_{i}")
3269            };
3270            if let Err(e) = table.add_index(idx_name, leading) {
3271                return Err(EngineError::Storage(e));
3272            }
3273        }
3274        Ok(QueryResult::CommandOk {
3275            affected: 0,
3276            modified_catalog: !self.in_transaction(),
3277        })
3278    }
3279
3280    fn exec_insert(&mut self, stmt: InsertStatement) -> Result<QueryResult, EngineError> {
3281        // v7.9.21 — snapshot the clock fn pointer before the mut
3282        // borrow on the catalog opens; runtime DEFAULT eval needs
3283        // it inside the row hot loop.
3284        let clock = self.clock;
3285        let table = self
3286            .active_catalog_mut()
3287            .get_mut(&stmt.table)
3288            .ok_or_else(|| {
3289                EngineError::Storage(StorageError::TableNotFound {
3290                    name: stmt.table.clone(),
3291                })
3292            })?;
3293        // v3.1.5: clone the columns vector only (not the whole
3294        // TableSchema — saves one String alloc for the table name).
3295        // We need an owned snapshot because we'll call `table.insert`
3296        // (mutable borrow on `table`) inside the row loop while
3297        // reading schema fields.
3298        let column_meta: Vec<ColumnSchema> = table.schema().columns.clone();
3299        let schema_cols_len = column_meta.len();
3300        // Build a permutation `tuple_pos[c] = Some(j)` meaning schema
3301        // column `c` is filled from the `j`-th tuple slot; `None` means
3302        // "fill with NULL". Validated once and reused for every row.
3303        let tuple_pos: Option<Vec<Option<usize>>> = match &stmt.columns {
3304            None => None, // 1-1 mapping, fast path
3305            Some(cols) => {
3306                let mut map = alloc::vec![None; schema_cols_len];
3307                for (j, name) in cols.iter().enumerate() {
3308                    let idx = column_meta
3309                        .iter()
3310                        .position(|c| c.name == *name)
3311                        .ok_or_else(|| {
3312                            EngineError::Eval(EvalError::ColumnNotFound { name: name.clone() })
3313                        })?;
3314                    if map[idx].is_some() {
3315                        return Err(EngineError::Storage(StorageError::ArityMismatch {
3316                            expected: schema_cols_len,
3317                            actual: cols.len(),
3318                        }));
3319                    }
3320                    map[idx] = Some(j);
3321                }
3322                // Omitted columns must either be nullable, carry a
3323                // DEFAULT, or be AUTO_INCREMENT. Catch NOT NULL
3324                // omissions up front so the WAL stays clean.
3325                for (i, col) in column_meta.iter().enumerate() {
3326                    if map[i].is_none()
3327                        && !col.nullable
3328                        && col.default.is_none()
3329                        && col.runtime_default.is_none()
3330                        && !col.auto_increment
3331                    {
3332                        return Err(EngineError::Storage(StorageError::NullInNotNull {
3333                            column: col.name.clone(),
3334                        }));
3335                    }
3336                }
3337                Some(map)
3338            }
3339        };
3340        let expected_tuple_len = stmt.columns.as_ref().map_or(schema_cols_len, Vec::len);
3341        // v7.6.2 — snapshot this table's FK list before the
3342        // mutable-borrow window so we can run parent lookups
3343        // against the immutable catalog after parsing. Empty vec is
3344        // the no-FK fast path; clone cost is O(fks * arity) which
3345        // is < 100 ns for typical schemas.
3346        let fks = table.schema().foreign_keys.clone();
3347        let mut affected = 0usize;
3348        // Stage 1 — parse + AUTO_INC + coerce all rows under the
3349        // single mutable borrow.
3350        let mut all_values: Vec<Vec<Value>> = Vec::with_capacity(stmt.rows.len());
3351        for tuple in stmt.rows {
3352            if tuple.len() != expected_tuple_len {
3353                return Err(EngineError::Storage(StorageError::ArityMismatch {
3354                    expected: expected_tuple_len,
3355                    actual: tuple.len(),
3356                }));
3357            }
3358            // Fast path: no column-list permutation → tuple slot j
3359            // maps to schema column j. We can zip schema with tuple
3360            // and skip the `raw_tuple` staging allocation entirely.
3361            let values: Vec<Value> = if let Some(map) = &tuple_pos {
3362                // Permuted path: still need raw_tuple to index by `map[i]`.
3363                let raw_tuple: Vec<Value> = tuple
3364                    .into_iter()
3365                    .map(literal_expr_to_value)
3366                    .collect::<Result<_, _>>()?;
3367                let mut out = Vec::with_capacity(schema_cols_len);
3368                for (i, col) in column_meta.iter().enumerate() {
3369                    let mut raw = match map[i] {
3370                        Some(j) => raw_tuple[j].clone(),
3371                        None => resolve_column_default_free(col, clock)?,
3372                    };
3373                    if col.auto_increment && raw.is_null() {
3374                        let next = table.next_auto_value(i).ok_or_else(|| {
3375                            EngineError::Unsupported(alloc::format!(
3376                                "AUTO_INCREMENT applies to integer columns only (column `{}`)",
3377                                col.name
3378                            ))
3379                        })?;
3380                        raw = Value::BigInt(next);
3381                    }
3382                    out.push(coerce_value(raw, col.ty, &col.name, i)?);
3383                }
3384                out
3385            } else {
3386                // 1-1 mapping fast path: single Vec alloc, no raw_tuple.
3387                let mut out = Vec::with_capacity(schema_cols_len);
3388                for (i, (col, expr)) in column_meta.iter().zip(tuple).enumerate() {
3389                    let mut raw = literal_expr_to_value(expr)?;
3390                    if col.auto_increment && raw.is_null() {
3391                        let next = table.next_auto_value(i).ok_or_else(|| {
3392                            EngineError::Unsupported(alloc::format!(
3393                                "AUTO_INCREMENT applies to integer columns only (column `{}`)",
3394                                col.name
3395                            ))
3396                        })?;
3397                        raw = Value::BigInt(next);
3398                    }
3399                    out.push(coerce_value(raw, col.ty, &col.name, i)?);
3400                }
3401                out
3402            };
3403            all_values.push(values);
3404        }
3405        // Stage 2 — FK enforcement on the immutable catalog.
3406        // Non-lexical lifetimes release the mutable borrow on
3407        // `table` here since stage 1 was the last use. The
3408        // parent-table lookup runs before any row is committed.
3409        let uniqueness = table.schema().uniqueness_constraints.clone();
3410        let _ = table;
3411        if !fks.is_empty() {
3412            enforce_fk_inserts(self.active_catalog(), &stmt.table, &fks, &all_values)?;
3413        }
3414        // v7.9.19 — composite UNIQUE / PRIMARY KEY enforcement.
3415        enforce_uniqueness_inserts(
3416            self.active_catalog(),
3417            &stmt.table,
3418            &uniqueness,
3419            &all_values,
3420        )?;
3421        // v7.9.29 — CREATE UNIQUE INDEX [WHERE pred] enforcement.
3422        // Independent of table-level UniquenessConstraint (which
3423        // can't carry a predicate). Walks the table's indexes;
3424        // for each `is_unique` index, only rows whose
3425        // partial_predicate evaluates truthy are checked for
3426        // collision. mailrs K1.
3427        enforce_unique_index_inserts(
3428            self.active_catalog(),
3429            &stmt.table,
3430            &all_values,
3431        )?;
3432        // v7.9.8 / v7.9.9 — ON CONFLICT handling.
3433        //   - `DO NOTHING` filters `all_values` to non-conflicting
3434        //     rows + drops within-batch duplicates.
3435        //   - `DO UPDATE SET …` ALSO filters, but for each
3436        //     conflicting row it queues an UPDATE on the existing
3437        //     row using the incoming row's values as `EXCLUDED.*`.
3438        let mut pending_updates: Vec<(usize, Vec<Value>)> = Vec::new();
3439        let mut skipped_count = 0usize;
3440        if let Some(clause) = &stmt.on_conflict {
3441            let conflict_cols = resolve_on_conflict_columns(
3442                self.active_catalog(),
3443                &stmt.table,
3444                clause.target_columns.as_slice(),
3445            )?;
3446            let mut kept: Vec<Vec<Value>> = Vec::with_capacity(all_values.len());
3447            let mut seen_keys: Vec<Vec<Value>> = Vec::new();
3448            for values in all_values {
3449                let key_tuple: Vec<&Value> =
3450                    conflict_cols.iter().map(|&c| &values[c]).collect();
3451                // SQL spec: NULL in any conflict column means "no
3452                // conflict possible" (NULL ≠ NULL for uniqueness).
3453                let has_null_key = key_tuple.iter().any(|v| matches!(v, Value::Null));
3454                let collides_with_table = !has_null_key
3455                    && on_conflict_keys_exist(
3456                        self.active_catalog(),
3457                        &stmt.table,
3458                        &conflict_cols,
3459                        &key_tuple,
3460                    );
3461                let key_tuple_owned: Vec<Value> =
3462                    key_tuple.iter().map(|v| (*v).clone()).collect();
3463                let collides_with_batch = !has_null_key
3464                    && seen_keys.iter().any(|k| k == &key_tuple_owned);
3465                let collides = collides_with_table || collides_with_batch;
3466                match (&clause.action, collides) {
3467                    (_, false) => {
3468                        seen_keys.push(key_tuple_owned);
3469                        kept.push(values);
3470                    }
3471                    (spg_sql::ast::OnConflictAction::Nothing, true) => {
3472                        skipped_count += 1;
3473                    }
3474                    (
3475                        spg_sql::ast::OnConflictAction::Update {
3476                            assignments,
3477                            where_,
3478                        },
3479                        true,
3480                    ) => {
3481                        if !collides_with_table {
3482                            skipped_count += 1;
3483                            continue;
3484                        }
3485                        let target_pos = lookup_row_position_by_keys(
3486                            self.active_catalog(),
3487                            &stmt.table,
3488                            &conflict_cols,
3489                            &key_tuple,
3490                        )
3491                        .ok_or_else(|| {
3492                            EngineError::Unsupported(
3493                                "ON CONFLICT DO UPDATE: conflict detected but row \
3494                                 position could not be resolved (cold-tier row?)"
3495                                    .into(),
3496                            )
3497                        })?;
3498                        let updated = apply_on_conflict_assignments(
3499                            self.active_catalog(),
3500                            &stmt.table,
3501                            target_pos,
3502                            &values,
3503                            assignments,
3504                            where_.as_ref(),
3505                        )?;
3506                        if let Some(new_row) = updated {
3507                            pending_updates.push((target_pos, new_row));
3508                        } else {
3509                            skipped_count += 1;
3510                        }
3511                    }
3512                }
3513            }
3514            all_values = kept;
3515        }
3516        // Stage 3 — insert all rows under a fresh mutable borrow.
3517        let table = self
3518            .active_catalog_mut()
3519            .get_mut(&stmt.table)
3520            .ok_or_else(|| {
3521                EngineError::Storage(StorageError::TableNotFound {
3522                    name: stmt.table.clone(),
3523                })
3524            })?;
3525        // v7.9.4 — keep RETURNING projection rows separate per
3526        // INSERT and per UPDATE branch so DO UPDATE pushes the new
3527        // post-update state, not the incoming-only values.
3528        let mut returning_rows: Vec<Vec<Value>> = Vec::new();
3529        for values in all_values {
3530            if stmt.returning.is_some() {
3531                returning_rows.push(values.clone());
3532            }
3533            table.insert(Row::new(values))?;
3534            affected += 1;
3535        }
3536        // v7.9.9 — apply ON CONFLICT DO UPDATE rewrites collected
3537        // in the conflict-resolution pass. update_row handles
3538        // index maintenance + body re-encoding.
3539        for (pos, new_row) in pending_updates {
3540            if stmt.returning.is_some() {
3541                returning_rows.push(new_row.clone());
3542            }
3543            table.update_row(pos, new_row)?;
3544            affected += 1;
3545        }
3546        let _ = skipped_count;
3547        // v7.9.4/v7.9.9 — RETURNING streams the rows that ended
3548        // up in the table after this statement (insert or
3549        // post-update on conflict).
3550        if let Some(items) = &stmt.returning {
3551            let _ = table;
3552            return self.build_returning_rows(
3553                &stmt.table,
3554                items,
3555                returning_rows,
3556            );
3557        }
3558        // v6.2.1 — auto-analyze: track per-table modified-row
3559        // counter so the background sweep can decide when to
3560        // re-ANALYZE. Cheap path on the autocommit-wrap hot loop
3561        // — one BTreeMap entry update per INSERT batch.
3562        if !self.in_transaction() && affected > 0 {
3563            self.statistics
3564                .record_modifications(&stmt.table, affected as u64);
3565        }
3566        Ok(QueryResult::CommandOk {
3567            affected,
3568            modified_catalog: !self.in_transaction(),
3569        })
3570    }
3571
3572    /// v4.5: SELECT with cooperative cancellation. The token is
3573    /// honoured between UNION peers and inside the bare-SELECT row
3574    /// loop; HNSW kNN graph walks and the aggregate executor don't
3575    /// honour it yet (deferred — those paths bound their work
3576    /// internally by `LIMIT k` and `GROUP BY` cardinality).
3577    /// v6.10.2 — cold-tier time-travel scan. Resolves the segment
3578    /// by id, decodes each row body against the table's current
3579    /// schema, applies the SELECT's projection + optional WHERE +
3580    /// optional LIMIT, returns a `Rows` result. JOINs / aggregates
3581    /// / ORDER BY are unsupported on this path (STABILITY carve-
3582    /// out); operators wanting them should restore the segment
3583    /// into a regular table first.
3584    fn exec_select_as_of_segment(
3585        &self,
3586        stmt: &SelectStatement,
3587        from: &spg_sql::ast::FromClause,
3588        segment_id: u32,
3589    ) -> Result<QueryResult, EngineError> {
3590        // v6.10.2 scope: no joins, no aggregates, no ORDER BY,
3591        // no GROUP BY / HAVING / UNION / OFFSET / DISTINCT.
3592        if !from.joins.is_empty()
3593            || stmt.group_by.is_some()
3594            || stmt.having.is_some()
3595            || !stmt.unions.is_empty()
3596            || !stmt.order_by.is_empty()
3597            || stmt.offset.is_some()
3598            || stmt.distinct
3599            || aggregate::uses_aggregate(stmt)
3600        {
3601            return Err(EngineError::Unsupported(
3602                "AS OF SEGMENT supports SELECT projection + WHERE + LIMIT only \
3603                 (joins / aggregates / ORDER BY are STABILITY § \"Out of v6.10\")"
3604                    .into(),
3605            ));
3606        }
3607        let table = self
3608            .active_catalog()
3609            .get(&from.primary.name)
3610            .ok_or_else(|| StorageError::TableNotFound {
3611                name: from.primary.name.clone(),
3612            })?;
3613        let schema = table.schema().clone();
3614        let schema_cols = &schema.columns;
3615        let alias = from
3616            .primary
3617            .alias
3618            .as_deref()
3619            .unwrap_or(from.primary.name.as_str());
3620        let ctx = EvalContext::new(schema_cols, Some(alias));
3621        let seg = self
3622            .active_catalog()
3623            .cold_segment(segment_id)
3624            .ok_or_else(|| {
3625                EngineError::Unsupported(alloc::format!(
3626                    "AS OF SEGMENT: cold segment {segment_id} not registered"
3627                ))
3628            })?;
3629        let mut out_rows: Vec<Row> = Vec::new();
3630        let mut limit_remaining: Option<usize> =
3631            stmt.limit_literal().and_then(|n| usize::try_from(n).ok());
3632        for (_key, body) in seg.scan() {
3633            let (row, _consumed) = spg_storage::decode_row_body_dense(&body, &schema)
3634                .map_err(EngineError::Storage)?;
3635            if let Some(where_expr) = &stmt.where_ {
3636                let cond = self.eval_expr_simple(where_expr, &row, &ctx)?;
3637                if !matches!(cond, Value::Bool(true)) {
3638                    continue;
3639                }
3640            }
3641            // Projection.
3642            let projected = self.project_row_simple(&row, &stmt.items, schema_cols, alias)?;
3643            out_rows.push(projected);
3644            if let Some(rem) = limit_remaining.as_mut() {
3645                if *rem == 0 {
3646                    out_rows.pop();
3647                    break;
3648                }
3649                *rem -= 1;
3650            }
3651        }
3652        // Output column schema: derive from SELECT items.
3653        let columns = self.derive_output_columns(&stmt.items, schema_cols, alias);
3654        Ok(QueryResult::Rows {
3655            columns,
3656            rows: out_rows,
3657        })
3658    }
3659
3660    /// v6.10.2 — simple-path WHERE eval that doesn't go through
3661    /// the correlated-subquery / Memoize machinery. AS OF SEGMENT
3662    /// scan paths predicate against a snapshot frozen segment, no
3663    /// cross-row state.
3664    fn eval_expr_simple(
3665        &self,
3666        expr: &Expr,
3667        row: &Row,
3668        ctx: &EvalContext,
3669    ) -> Result<Value, EngineError> {
3670        let cancel = CancelToken::none();
3671        self.eval_expr_with_correlated(expr, row, ctx, cancel, None)
3672    }
3673
3674    /// v7.9.4 — INSERT / UPDATE / DELETE RETURNING projector.
3675    /// Given the table name, the user-supplied projection items,
3676    /// and the mutated rows (post-insert / post-update values, or
3677    /// pre-delete snapshot), build a `QueryResult::Rows` whose
3678    /// schema describes the projected columns. Mailrs migration
3679    /// blocker #1.
3680    fn build_returning_rows(
3681        &self,
3682        table_name: &str,
3683        items: &[SelectItem],
3684        mutated_rows: Vec<Vec<Value>>,
3685    ) -> Result<QueryResult, EngineError> {
3686        let table = self.active_catalog().get(table_name).ok_or_else(|| {
3687            EngineError::Storage(StorageError::TableNotFound {
3688                name: table_name.into(),
3689            })
3690        })?;
3691        let schema_cols = table.schema().columns.clone();
3692        let columns = self.derive_output_columns(items, &schema_cols, table_name);
3693        let mut out_rows: Vec<Row> = Vec::with_capacity(mutated_rows.len());
3694        for values in mutated_rows {
3695            let row = Row::new(values);
3696            let projected = self.project_row_simple(&row, items, &schema_cols, table_name)?;
3697            out_rows.push(projected);
3698        }
3699        Ok(QueryResult::Rows {
3700            columns,
3701            rows: out_rows,
3702        })
3703    }
3704
3705    /// v6.10.2 — projection for AS OF SEGMENT. Resolves
3706    /// `SelectItem::Wildcard` to all schema columns and
3707    /// `SelectItem::Expr` via the regular eval path.
3708    fn project_row_simple(
3709        &self,
3710        row: &Row,
3711        items: &[SelectItem],
3712        schema_cols: &[ColumnSchema],
3713        alias: &str,
3714    ) -> Result<Row, EngineError> {
3715        let ctx = EvalContext::new(schema_cols, Some(alias));
3716        let cancel = CancelToken::none();
3717        let mut out_vals = Vec::new();
3718        for item in items {
3719            match item {
3720                SelectItem::Wildcard => {
3721                    out_vals.extend(row.values.iter().cloned());
3722                }
3723                SelectItem::Expr { expr, .. } => {
3724                    let v = self.eval_expr_with_correlated(expr, row, &ctx, cancel, None)?;
3725                    out_vals.push(v);
3726                }
3727            }
3728        }
3729        Ok(Row::new(out_vals))
3730    }
3731
3732    /// v6.10.2 — derive the output `ColumnSchema` list for an
3733    /// AS OF SEGMENT projection. Wildcards take the full schema;
3734    /// expressions take the alias if present or a synthetic
3735    /// `?column?` (PG convention) otherwise.
3736    fn derive_output_columns(
3737        &self,
3738        items: &[SelectItem],
3739        schema_cols: &[ColumnSchema],
3740        _alias: &str,
3741    ) -> Vec<ColumnSchema> {
3742        let mut out = Vec::new();
3743        for item in items {
3744            match item {
3745                SelectItem::Wildcard => {
3746                    out.extend(schema_cols.iter().cloned());
3747                }
3748                SelectItem::Expr { alias, .. } => {
3749                    let name = alias
3750                        .clone()
3751                        .unwrap_or_else(|| "?column?".to_string());
3752                    // Default to Text; the caller's row values
3753                    // carry the actual type. v6.10.2 scope.
3754                    out.push(ColumnSchema::new(name, DataType::Text, true));
3755                }
3756            }
3757        }
3758        out
3759    }
3760
3761    fn exec_select_cancel(
3762        &self,
3763        stmt: &SelectStatement,
3764        cancel: CancelToken<'_>,
3765    ) -> Result<QueryResult, EngineError> {
3766        cancel.check()?;
3767        // v6.10.2 — cold-tier time-travel short-circuit. When the
3768        // primary TableRef carries `AS OF SEGMENT '<id>'`, run a
3769        // dedicated cold-segment scan instead of the regular
3770        // hot+index path. The scope is intentionally narrow for
3771        // v6.10.2 — bare `SELECT * FROM <t> AS OF SEGMENT 'id'`,
3772        // optionally with a single-column-equality WHERE. JOINs /
3773        // aggregates / ORDER BY / subqueries on top of a time-
3774        // travelled scan are STABILITY § "Out of v6.10".
3775        if let Some(from) = &stmt.from
3776            && let Some(seg_id) = from.primary.as_of_segment
3777        {
3778            return self.exec_select_as_of_segment(stmt, from, seg_id);
3779        }
3780        // v6.2.0 / v6.5.0 — virtual-table short-circuits. Detected
3781        // pre-CTE because they don't read from the catalog and
3782        // shouldn't participate in regular FROM resolution.
3783        if let Some(from) = &stmt.from
3784            && from.joins.is_empty()
3785            && stmt.where_.is_none()
3786            && stmt.group_by.is_none()
3787            && stmt.having.is_none()
3788            && stmt.unions.is_empty()
3789            && stmt.order_by.is_empty()
3790            && stmt.limit.is_none()
3791            && stmt.offset.is_none()
3792            && !stmt.distinct
3793            && stmt.items.iter().all(|i| matches!(i, SelectItem::Wildcard))
3794        {
3795            let lower = from.primary.name.to_ascii_lowercase();
3796            match lower.as_str() {
3797                "spg_statistic" => return Ok(self.exec_spg_statistic()),
3798                // v6.5.0 — observability v2 virtual tables.
3799                "spg_stat_replication" => return Ok(self.exec_spg_stat_replication()),
3800                "spg_stat_segment" => return Ok(self.exec_spg_stat_segment()),
3801                "spg_stat_query" => return Ok(self.exec_spg_stat_query()),
3802                "spg_stat_activity" => return Ok(self.exec_spg_stat_activity()),
3803                "spg_audit_chain" => return Ok(self.exec_spg_audit_chain()),
3804                "spg_audit_verify" => return Ok(self.exec_spg_audit_verify()),
3805                "spg_table_ddl" => return Ok(self.exec_spg_table_ddl()),
3806                "spg_role_ddl" => return Ok(self.exec_spg_role_ddl()),
3807                "spg_database_ddl" => return Ok(self.exec_spg_database_ddl()),
3808                _ => {}
3809            }
3810        }
3811        // v4.11: CTEs materialise into a temporary enriched catalog
3812        // *before* anything else — the body SELECT can then refer
3813        // to CTE names via the regular FROM-clause resolution.
3814        // Uncorrelated only: each CTE body runs once against the
3815        // current catalog, not against later CTEs' results (left-
3816        // to-right materialisation would relax this, but we keep
3817        // it simple for v4.11 MVP).
3818        if !stmt.ctes.is_empty() {
3819            return self.exec_with_ctes(stmt, cancel);
3820        }
3821        // v4.10: subqueries (uncorrelated) are resolved here, before
3822        // the executor sees the row loop. We clone the statement so
3823        // we can mutate without disturbing the caller's AST — most
3824        // queries pass through with no subquery nodes and the clone
3825        // is cheap; with subqueries the materialisation cost
3826        // dominates anyway.
3827        let mut stmt_owned;
3828        let stmt_ref: &SelectStatement = if expr_tree_has_subquery(stmt) {
3829            stmt_owned = stmt.clone();
3830            self.resolve_select_subqueries(&mut stmt_owned, cancel)?;
3831            &stmt_owned
3832        } else {
3833            stmt
3834        };
3835        if stmt_ref.unions.is_empty() {
3836            return self.exec_bare_select_cancel(stmt_ref, cancel);
3837        }
3838        // UNION path: clone-strip the head into a bare block (its own
3839        // DISTINCT and any inner ORDER BY are dropped by parser rule —
3840        // the wrapper SelectStatement carries them), execute, then chain
3841        // peers with left-associative dedup semantics.
3842        let mut head = stmt_ref.clone();
3843        head.unions = Vec::new();
3844        head.order_by = Vec::new();
3845        head.limit = None;
3846        let QueryResult::Rows { columns, mut rows } =
3847            self.exec_bare_select_cancel(&head, cancel)?
3848        else {
3849            unreachable!("bare SELECT cannot return CommandOk")
3850        };
3851        for (kind, peer) in &stmt_ref.unions {
3852            let QueryResult::Rows {
3853                columns: peer_cols,
3854                rows: peer_rows,
3855            } = self.exec_bare_select_cancel(peer, cancel)?
3856            else {
3857                unreachable!("bare SELECT cannot return CommandOk")
3858            };
3859            if peer_cols.len() != columns.len() {
3860                return Err(EngineError::Unsupported(alloc::format!(
3861                    "UNION arity mismatch: head has {} columns, peer has {}",
3862                    columns.len(),
3863                    peer_cols.len()
3864                )));
3865            }
3866            rows.extend(peer_rows);
3867            if matches!(kind, UnionKind::Distinct) {
3868                rows = dedup_rows(rows);
3869            }
3870        }
3871        // ORDER BY at the top of a UNION applies to the combined result.
3872        // Eval against the projected schema (NOT the source table).
3873        if !stmt.order_by.is_empty() {
3874            let synth_ctx = EvalContext::new(&columns, None);
3875            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
3876            let mut tagged: Vec<(Vec<f64>, Row)> = Vec::with_capacity(rows.len());
3877            for r in rows {
3878                let keys = build_order_keys(&stmt.order_by, &r, &synth_ctx)?;
3879                tagged.push((keys, r));
3880            }
3881            sort_by_keys(&mut tagged, &descs);
3882            rows = tagged.into_iter().map(|(_, r)| r).collect();
3883        }
3884        apply_offset_and_limit(&mut rows, stmt.offset_literal(), stmt.limit_literal());
3885        Ok(QueryResult::Rows { columns, rows })
3886    }
3887
3888    #[allow(clippy::too_many_lines)]
3889    #[allow(clippy::too_many_lines)] // huge match — splitting fragments the planner
3890    /// v7.11.7 — execute `SELECT … FROM unnest(expr) [AS] alias …`.
3891    /// Synthesises a single-column virtual table whose column type
3892    /// is TEXT and whose rows are the array elements. Routes
3893    /// through the regular projection / WHERE / ORDER BY / LIMIT
3894    /// machinery so set-returning UNNEST composes naturally with
3895    /// the rest of the SELECT surface.
3896    fn exec_select_unnest(
3897        &self,
3898        stmt: &SelectStatement,
3899        primary: &TableRef,
3900        cancel: CancelToken<'_>,
3901    ) -> Result<QueryResult, EngineError> {
3902        let expr = primary
3903            .unnest_expr
3904            .as_deref()
3905            .expect("caller guards unnest_expr.is_some()");
3906        // Evaluate the array expression once. Empty schema / empty
3907        // row — uncorrelated UNNEST cannot reference outer columns.
3908        let empty_schema: alloc::vec::Vec<ColumnSchema> = alloc::vec::Vec::new();
3909        let ctx = EvalContext::new(&empty_schema, None);
3910        let dummy_row = Row::new(alloc::vec::Vec::new());
3911        let items: alloc::vec::Vec<Option<alloc::string::String>> =
3912            match eval::eval_expr(expr, &dummy_row, &ctx).map_err(EngineError::Eval)? {
3913                Value::Null => alloc::vec::Vec::new(),
3914                Value::TextArray(items) => items,
3915                other => {
3916                    return Err(EngineError::Unsupported(alloc::format!(
3917                        "unnest() expects a TEXT[] argument, got {:?}",
3918                        other.data_type()
3919                    )));
3920                }
3921            };
3922        let alias = primary
3923            .alias
3924            .clone()
3925            .unwrap_or_else(|| "unnest".to_string());
3926        let col_schema = ColumnSchema::new(alias.clone(), DataType::Text, true);
3927        let schema_cols = alloc::vec![col_schema.clone()];
3928        let scan_ctx = EvalContext::new(&schema_cols, Some(&alias));
3929        // Materialise the synthetic rows.
3930        let rows: alloc::vec::Vec<Row> = items
3931            .into_iter()
3932            .map(|item| {
3933                Row::new(alloc::vec![match item {
3934                    Some(s) => Value::Text(s),
3935                    None => Value::Null,
3936                }])
3937            })
3938            .collect();
3939        // Apply WHERE.
3940        let filtered: alloc::vec::Vec<Row> = if let Some(w) = &stmt.where_ {
3941            let mut out = alloc::vec::Vec::with_capacity(rows.len());
3942            for row in rows {
3943                cancel.check()?;
3944                let v = eval::eval_expr(w, &row, &scan_ctx).map_err(EngineError::Eval)?;
3945                if matches!(v, Value::Bool(true)) {
3946                    out.push(row);
3947                }
3948            }
3949            out
3950        } else {
3951            rows
3952        };
3953        // Projection.
3954        let projection = build_projection(&stmt.items, &schema_cols, &alias)?;
3955        let mut projected_rows: alloc::vec::Vec<Row> =
3956            alloc::vec::Vec::with_capacity(filtered.len());
3957        for row in &filtered {
3958            let mut vals = alloc::vec::Vec::with_capacity(projection.len());
3959            for p in &projection {
3960                vals.push(eval::eval_expr(&p.expr, row, &scan_ctx).map_err(EngineError::Eval)?);
3961            }
3962            projected_rows.push(Row::new(vals));
3963        }
3964        // ORDER BY / LIMIT — apply on the projected rows (cheap;
3965        // unnest result sets are small by design).
3966        let columns: alloc::vec::Vec<ColumnSchema> = projection
3967            .iter()
3968            .map(|p| ColumnSchema::new(p.output_name.clone(), p.ty, p.nullable))
3969            .collect();
3970        // Re-evaluate ORDER BY against the source schema (pre-projection
3971        // so col refs by name still resolve through `scan_ctx`).
3972        if !stmt.order_by.is_empty() {
3973            let mut indexed: alloc::vec::Vec<(usize, Vec<Value>)> = filtered
3974                .iter()
3975                .enumerate()
3976                .map(|(i, r)| -> Result<_, EngineError> {
3977                    let keys: Result<Vec<Value>, EngineError> = stmt
3978                        .order_by
3979                        .iter()
3980                        .map(|ob| {
3981                            eval::eval_expr(&ob.expr, r, &scan_ctx).map_err(EngineError::Eval)
3982                        })
3983                        .collect();
3984                    Ok((i, keys?))
3985                })
3986                .collect::<Result<_, _>>()?;
3987            indexed.sort_by(|a, b| {
3988                for (idx, (ka, kb)) in a.1.iter().zip(b.1.iter()).enumerate() {
3989                    let mut cmp = value_cmp(ka, kb);
3990                    if stmt.order_by[idx].desc {
3991                        cmp = cmp.reverse();
3992                    }
3993                    if cmp != core::cmp::Ordering::Equal {
3994                        return cmp;
3995                    }
3996                }
3997                core::cmp::Ordering::Equal
3998            });
3999            projected_rows = indexed
4000                .into_iter()
4001                .map(|(i, _)| projected_rows[i].clone())
4002                .collect();
4003        }
4004        // LIMIT / OFFSET — apply at the tail.
4005        if let Some(offset) = stmt.offset_literal() {
4006            let off = (offset as usize).min(projected_rows.len());
4007            projected_rows.drain(..off);
4008        }
4009        if let Some(limit) = stmt.limit_literal() {
4010            projected_rows.truncate(limit as usize);
4011        }
4012        Ok(QueryResult::Rows {
4013            columns,
4014            rows: projected_rows,
4015        })
4016    }
4017
4018    fn exec_bare_select_cancel(
4019        &self,
4020        stmt: &SelectStatement,
4021        cancel: CancelToken<'_>,
4022    ) -> Result<QueryResult, EngineError> {
4023        // v4.12: window-function path. When the projection contains
4024        // any `name(args) OVER (...)` we route to the dedicated
4025        // executor — partition + sort + per-row window value before
4026        // the regular projection.
4027        if select_has_window(stmt) {
4028            return self.exec_select_with_window(stmt, cancel);
4029        }
4030        // Constant SELECT (no FROM) — evaluate each item once against an
4031        // empty dummy row. Useful for `SELECT 1`, `SELECT coalesce(...)`,
4032        // `SELECT '7'::INT`. Column references will surface as
4033        // ColumnNotFound on eval since the schema is empty.
4034        let Some(from) = &stmt.from else {
4035            let empty_schema: Vec<ColumnSchema> = Vec::new();
4036            let ctx = EvalContext::new(&empty_schema, None);
4037            let projection = build_projection(&stmt.items, &empty_schema, "")?;
4038            let dummy_row = Row::new(Vec::new());
4039            let mut values = Vec::with_capacity(projection.len());
4040            for p in &projection {
4041                values.push(eval::eval_expr(&p.expr, &dummy_row, &ctx)?);
4042            }
4043            let columns: Vec<ColumnSchema> = projection
4044                .into_iter()
4045                .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
4046                .collect();
4047            return Ok(QueryResult::Rows {
4048                columns,
4049                rows: alloc::vec![Row::new(values)],
4050            });
4051        };
4052        // Multi-table FROM (one or more joined peers) goes through the
4053        // nested-loop join executor. Single-table FROM stays on the
4054        // existing scan + index-seek path.
4055        if !from.joins.is_empty() {
4056            return self.exec_joined_select(stmt, from);
4057        }
4058        // v7.11.7 — `FROM unnest(<expr>) [AS] <alias>`. Synthesise a
4059        // single-column table at SELECT entry by evaluating the
4060        // expression once against the empty row (UNNEST is
4061        // uncorrelated in v7.11; correlated / LATERAL unnest is a
4062        // v7.12 carve-out). Build a virtual `Table` in a heap-only
4063        // catalog, then route to the regular scan path.
4064        if from.primary.unnest_expr.is_some() {
4065            return self.exec_select_unnest(stmt, &from.primary, cancel);
4066        }
4067        let primary = &from.primary;
4068        let table = self.active_catalog().get(&primary.name).ok_or_else(|| {
4069            StorageError::TableNotFound {
4070                name: primary.name.clone(),
4071            }
4072        })?;
4073        let schema_cols = &table.schema().columns;
4074        // The qualifier accepted on column refs is the alias (if any) else the
4075        // bare table name.
4076        let alias = primary.alias.as_deref().unwrap_or(primary.name.as_str());
4077        let ctx = EvalContext::new(schema_cols, Some(alias));
4078
4079        // NSW kNN planner: `ORDER BY col <-> literal LIMIT k` with no
4080        // WHERE and an NSW index on `col` skips the full scan. The
4081        // walk returns rows already in ascending-distance order, so
4082        // ORDER BY / LIMIT are honoured implicitly.
4083        if let Some(nsw_rows) = try_nsw_knn(stmt, table, schema_cols, alias) {
4084            return materialise_in_order(stmt, table, schema_cols, alias, &nsw_rows);
4085        }
4086
4087        // Index seek: if WHERE is `col = literal` (or commuted) and the
4088        // referenced column has an index, dispatch each locator through
4089        // the catalog (hot tier → borrow, cold tier → page-read +
4090        // decode) and iterate just those rows. Otherwise fall back to a
4091        // full scan over the hot tier (cold-tier rows are only reached
4092        // via index seek in v5.1 — full table scans against cold-tier
4093        // data ship in v5.2 with the freezer's per-segment scan API).
4094        let indexed_rows: Option<Vec<Cow<'_, Row>>> = stmt
4095            .where_
4096            .as_ref()
4097            .and_then(|w| try_index_seek(w, schema_cols, self.active_catalog(), table, alias));
4098
4099        // Aggregate path: filter rows first, then hand off to the
4100        // aggregate executor which does its own projection + ORDER BY.
4101        if aggregate::uses_aggregate(stmt) {
4102            let mut filtered: Vec<&Row> = Vec::new();
4103            // v6.2.6 — Memoize: per-query LRU cache for correlated
4104            // scalar subqueries. Fresh per row-loop entry so each
4105            // SELECT execution gets an isolated cache.
4106            let mut memo = memoize::MemoizeCache::new();
4107            if let Some(rows) = &indexed_rows {
4108                for cow in rows {
4109                    let row = cow.as_ref();
4110                    if let Some(where_expr) = &stmt.where_ {
4111                        let cond = self.eval_expr_with_correlated(
4112                            where_expr,
4113                            row,
4114                            &ctx,
4115                            cancel,
4116                            Some(&mut memo),
4117                        )?;
4118                        if !matches!(cond, Value::Bool(true)) {
4119                            continue;
4120                        }
4121                    }
4122                    filtered.push(row);
4123                }
4124            } else {
4125                for i in 0..table.row_count() {
4126                    let row = &table.rows()[i];
4127                    if let Some(where_expr) = &stmt.where_ {
4128                        let cond = self.eval_expr_with_correlated(
4129                            where_expr,
4130                            row,
4131                            &ctx,
4132                            cancel,
4133                            Some(&mut memo),
4134                        )?;
4135                        if !matches!(cond, Value::Bool(true)) {
4136                            continue;
4137                        }
4138                    }
4139                    filtered.push(row);
4140                }
4141            }
4142            let mut agg = aggregate::run(stmt, &filtered, schema_cols, Some(alias))?;
4143            apply_offset_and_limit(&mut agg.rows, stmt.offset_literal(), stmt.limit_literal());
4144            return Ok(QueryResult::Rows {
4145                columns: agg.columns,
4146                rows: agg.rows,
4147            });
4148        }
4149
4150        let projection = build_projection(&stmt.items, schema_cols, alias)?;
4151
4152        // Materialise the filter pass into `(order_key, projected_row)`
4153        // tuples. The order key is `None` when there's no ORDER BY clause.
4154        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::new();
4155        // v6.2.6 — Memoize per-row WHERE eval shares one cache.
4156        let mut memo = memoize::MemoizeCache::new();
4157        // Inline the per-row work in a closure so the indexed and full-
4158        // scan branches share the body.
4159        let mut process_row = |row: &Row, loop_idx: usize| -> Result<(), EngineError> {
4160            if loop_idx.is_multiple_of(256) {
4161                cancel.check()?;
4162            }
4163            if let Some(where_expr) = &stmt.where_ {
4164                let cond = self.eval_expr_with_correlated(
4165                    where_expr,
4166                    row,
4167                    &ctx,
4168                    cancel,
4169                    Some(&mut memo),
4170                )?;
4171                if !matches!(cond, Value::Bool(true)) {
4172                    return Ok(());
4173                }
4174            }
4175            let mut values = Vec::with_capacity(projection.len());
4176            for p in &projection {
4177                values.push(eval::eval_expr(&p.expr, row, &ctx)?);
4178            }
4179            let order_keys = if stmt.order_by.is_empty() {
4180                Vec::new()
4181            } else {
4182                build_order_keys(&stmt.order_by, row, &ctx)?
4183            };
4184            tagged.push((order_keys, Row::new(values)));
4185            Ok(())
4186        };
4187        if let Some(rows) = &indexed_rows {
4188            for (loop_idx, cow) in rows.iter().enumerate() {
4189                process_row(cow.as_ref(), loop_idx)?;
4190            }
4191        } else {
4192            for i in 0..table.row_count() {
4193                process_row(&table.rows()[i], i)?;
4194            }
4195        }
4196
4197        if !stmt.order_by.is_empty() {
4198            // Partial-sort fast path: when LIMIT is small relative to
4199            // the row count, select_nth_unstable + sort just the
4200            // prefix is O(n + k log k) instead of O(n log n). DISTINCT
4201            // requires the full sort because de-dup happens after.
4202            let keep = if stmt.distinct {
4203                None
4204            } else {
4205                stmt.limit_literal()
4206                    .map(|l| l as usize + stmt.offset_literal().map_or(0, |o| o as usize))
4207            };
4208            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
4209            partial_sort_tagged(&mut tagged, keep, &descs);
4210        }
4211
4212        let mut output_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
4213        if stmt.distinct {
4214            output_rows = dedup_rows(output_rows);
4215        }
4216        apply_offset_and_limit(&mut output_rows, stmt.offset_literal(), stmt.limit_literal());
4217
4218        let columns: Vec<ColumnSchema> = projection
4219            .into_iter()
4220            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
4221            .collect();
4222
4223        Ok(QueryResult::Rows {
4224            columns,
4225            rows: output_rows,
4226        })
4227    }
4228
4229    /// Multi-table SELECT executor (one or more JOIN peers).
4230    ///
4231    /// v1.10 builds the joined row set up-front via nested-loop joins,
4232    /// then runs WHERE + projection + ORDER BY against the combined
4233    /// rows. No index seek. Aggregates and DISTINCT still work because
4234    /// the executor delegates projection through the same shared paths.
4235    #[allow(clippy::too_many_lines)]
4236    fn exec_joined_select(
4237        &self,
4238        stmt: &SelectStatement,
4239        from: &FromClause,
4240    ) -> Result<QueryResult, EngineError> {
4241        // Resolve every table reference up front so we surface
4242        // TableNotFound before we start the cartesian work.
4243        let primary_table = self
4244            .active_catalog()
4245            .get(&from.primary.name)
4246            .ok_or_else(|| StorageError::TableNotFound {
4247                name: from.primary.name.clone(),
4248            })?;
4249        let primary_alias = from
4250            .primary
4251            .alias
4252            .as_deref()
4253            .unwrap_or(from.primary.name.as_str())
4254            .to_string();
4255        let mut joined_tables: Vec<(&Table, String, JoinKind, Option<&Expr>)> = Vec::new();
4256        for j in &from.joins {
4257            let t = self.active_catalog().get(&j.table.name).ok_or_else(|| {
4258                StorageError::TableNotFound {
4259                    name: j.table.name.clone(),
4260                }
4261            })?;
4262            let a = j
4263                .table
4264                .alias
4265                .as_deref()
4266                .unwrap_or(j.table.name.as_str())
4267                .to_string();
4268            joined_tables.push((t, a, j.kind, j.on.as_ref()));
4269        }
4270
4271        // Build the combined schema: composite "alias.col" names so the
4272        // qualified-column resolver can find anything by exact match.
4273        let mut combined_schema: Vec<ColumnSchema> = Vec::new();
4274        for col in &primary_table.schema().columns {
4275            combined_schema.push(ColumnSchema::new(
4276                alloc::format!("{primary_alias}.{}", col.name),
4277                col.ty,
4278                col.nullable,
4279            ));
4280        }
4281        for (t, a, _, _) in &joined_tables {
4282            for col in &t.schema().columns {
4283                combined_schema.push(ColumnSchema::new(
4284                    alloc::format!("{a}.{}", col.name),
4285                    col.ty,
4286                    col.nullable,
4287                ));
4288            }
4289        }
4290        let ctx = EvalContext::new(&combined_schema, None);
4291
4292        // Nested-loop join. Starting set: every primary row, padded with
4293        // (no joined columns yet).
4294        let mut working: Vec<Row> = primary_table.rows().iter().cloned().collect();
4295        let mut produced_len = primary_table.schema().columns.len();
4296        for (t, _, kind, on) in &joined_tables {
4297            let right_arity = t.schema().columns.len();
4298            let mut next: Vec<Row> = Vec::new();
4299            for left in &working {
4300                let mut left_matched = false;
4301                for right in t.rows() {
4302                    let mut combined_vals = left.values.clone();
4303                    combined_vals.extend(right.values.iter().cloned());
4304                    // Pad combined to the eventual full width so the
4305                    // partial schema still matches positions used by ON.
4306                    let combined = Row::new(combined_vals);
4307                    let keep = if let Some(on_expr) = on {
4308                        let cond = eval::eval_expr(on_expr, &combined, &ctx)?;
4309                        matches!(cond, Value::Bool(true))
4310                    } else {
4311                        // CROSS / comma-list: every pair survives.
4312                        true
4313                    };
4314                    if keep {
4315                        next.push(combined);
4316                        left_matched = true;
4317                    }
4318                }
4319                if !left_matched && matches!(kind, JoinKind::Left) {
4320                    // LEFT OUTER JOIN: emit the left row with NULLs on
4321                    // the right side when no peer matched.
4322                    let mut combined_vals = left.values.clone();
4323                    for _ in 0..right_arity {
4324                        combined_vals.push(Value::Null);
4325                    }
4326                    next.push(Row::new(combined_vals));
4327                }
4328            }
4329            working = next;
4330            produced_len += right_arity;
4331            debug_assert!(produced_len <= combined_schema.len());
4332        }
4333
4334        // WHERE filter against combined rows.
4335        let mut filtered: Vec<Row> = Vec::new();
4336        for row in working {
4337            if let Some(where_expr) = &stmt.where_ {
4338                let cond = eval::eval_expr(where_expr, &row, &ctx)?;
4339                if !matches!(cond, Value::Bool(true)) {
4340                    continue;
4341                }
4342            }
4343            filtered.push(row);
4344        }
4345
4346        // Aggregate path: handle GROUP BY / aggregate calls over the
4347        // joined+filtered rows.
4348        if aggregate::uses_aggregate(stmt) {
4349            let refs: Vec<&Row> = filtered.iter().collect();
4350            let mut agg = aggregate::run(stmt, &refs, &combined_schema, None)?;
4351            apply_offset_and_limit(&mut agg.rows, stmt.offset_literal(), stmt.limit_literal());
4352            return Ok(QueryResult::Rows {
4353                columns: agg.columns,
4354                rows: agg.rows,
4355            });
4356        }
4357
4358        let projection = build_projection(&stmt.items, &combined_schema, "")?;
4359        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::new();
4360        for row in &filtered {
4361            let mut values = Vec::with_capacity(projection.len());
4362            for p in &projection {
4363                values.push(eval::eval_expr(&p.expr, row, &ctx)?);
4364            }
4365            let order_keys = if stmt.order_by.is_empty() {
4366                Vec::new()
4367            } else {
4368                build_order_keys(&stmt.order_by, row, &ctx)?
4369            };
4370            tagged.push((order_keys, Row::new(values)));
4371        }
4372        if !stmt.order_by.is_empty() {
4373            let keep = if stmt.distinct {
4374                None
4375            } else {
4376                stmt.limit_literal()
4377                    .map(|l| l as usize + stmt.offset_literal().map_or(0, |o| o as usize))
4378            };
4379            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
4380            partial_sort_tagged(&mut tagged, keep, &descs);
4381        }
4382        let mut output_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
4383        if stmt.distinct {
4384            output_rows = dedup_rows(output_rows);
4385        }
4386        apply_offset_and_limit(&mut output_rows, stmt.offset_literal(), stmt.limit_literal());
4387        let columns: Vec<ColumnSchema> = projection
4388            .into_iter()
4389            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
4390            .collect();
4391        Ok(QueryResult::Rows {
4392            columns,
4393            rows: output_rows,
4394        })
4395    }
4396}
4397
4398/// One row-producing projection: an expression to evaluate, the resulting
4399/// column's user-visible name, its inferred type, and nullability.
4400#[derive(Debug, Clone)]
4401struct ProjectedItem {
4402    expr: Expr,
4403    output_name: String,
4404    ty: DataType,
4405    nullable: bool,
4406}
4407
4408/// Dedupe a row set, preserving first-seen order. `Row`'s `PartialEq` is
4409/// structural (`Vec<Value>` ⇒ pairwise `Value` equality), which gives SQL
4410/// `NULL = NULL → TRUE` and `NaN = NaN → FALSE`. The first agrees with
4411/// the spec's "two NULLs are not distinct"; the second is a tolerated
4412/// quirk for v1 (no NaN literals are reachable from the SQL surface).
4413fn dedup_rows(rows: Vec<Row>) -> Vec<Row> {
4414    let mut out: Vec<Row> = Vec::with_capacity(rows.len());
4415    for r in rows {
4416        if !out.iter().any(|seen| seen == &r) {
4417            out.push(r);
4418        }
4419    }
4420    out
4421}
4422
4423/// Coerce a `Value` to an `f64` sort key for ORDER BY. Numbers map directly;
4424/// NULL sorts last (treated as `+∞`); booleans are 0.0 / 1.0; text uses lex
4425/// order via the byte values; vectors are not sortable.
4426fn value_to_order_key(v: &Value) -> Result<f64, EngineError> {
4427    match v {
4428        Value::Null => Ok(f64::INFINITY),
4429        Value::SmallInt(n) => Ok(f64::from(*n)),
4430        Value::Int(n) => Ok(f64::from(*n)),
4431        Value::Date(d) => Ok(f64::from(*d)),
4432        #[allow(clippy::cast_precision_loss)]
4433        Value::Timestamp(t) => Ok(*t as f64),
4434        #[allow(clippy::cast_precision_loss)]
4435        Value::Numeric { scaled, scale } => {
4436            // Scaled integer / 10^scale, computed via f64 for sort
4437            // ordering only. Precision losses here only matter for
4438            // ORDER BY tie-breaks well past 15 significant digits.
4439            // `f64::powi` lives in std; we hand-roll the loop so the
4440            // no_std engine crate doesn't need it.
4441            let mut divisor = 1.0_f64;
4442            for _ in 0..*scale {
4443                divisor *= 10.0;
4444            }
4445            Ok((*scaled as f64) / divisor)
4446        }
4447        #[allow(clippy::cast_precision_loss)]
4448        Value::BigInt(n) => Ok(*n as f64),
4449        Value::Float(x) => Ok(*x),
4450        Value::Bool(b) => Ok(if *b { 1.0 } else { 0.0 }),
4451        Value::Text(s) => {
4452            // Lex order by codepoints — good enough for ORDER BY name.
4453            // Map first 8 bytes packed into u64 as a coarse key; ties fall to
4454            // partial_cmp Equal. v1.x can swap in a real string comparator.
4455            let mut key: u64 = 0;
4456            for &b in s.as_bytes().iter().take(8) {
4457                key = (key << 8) | u64::from(b);
4458            }
4459            #[allow(clippy::cast_precision_loss)]
4460            Ok(key as f64)
4461        }
4462        Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => {
4463            Err(EngineError::Unsupported(
4464                "ORDER BY of a raw vector column is not meaningful — use `<->`".into(),
4465            ))
4466        }
4467        Value::Interval { .. } => Err(EngineError::Unsupported(
4468            "ORDER BY of an INTERVAL is not supported in v2.11 \
4469             (months vs micros has no single canonical ordering)"
4470                .into(),
4471        )),
4472        Value::Json(_) => Err(EngineError::Unsupported(
4473            "ORDER BY of a JSON value is not supported — cast the document to text first".into(),
4474        )),
4475        // v7.5.0 — Value is #[non_exhaustive]; future variants need
4476        // an explicit ORDER BY mapping. Surface as Unsupported until
4477        // engine support is added.
4478        _ => Err(EngineError::Unsupported(
4479            "ORDER BY of this value type is not supported".into(),
4480        )),
4481    }
4482}
4483
4484/// Try to plan a WHERE clause as an equality lookup against an existing
4485/// index. Returns the candidate row indices on success; `None` means the
4486/// caller should fall back to a full scan.
4487///
4488/// v0.8 recognises a single top-level `col = literal` (in either operand
4489/// order). AND chains and range scans land in later milestones.
4490/// Look for `ORDER BY col <dist-op> literal LIMIT k` against an
4491/// NSW-indexed vector column. Recognised distance ops: `<->` (L2),
4492/// `<#>` (inner product), `<=>` (cosine). When a WHERE clause is
4493/// present, the planner does an "over-fetch and filter" pass — it
4494/// asks the graph for `k * over_fetch` candidates, evaluates WHERE
4495/// against each, and trims back to `k`. Returns the row indices in
4496/// ascending-distance order when the plan applies.
4497fn try_nsw_knn(
4498    stmt: &SelectStatement,
4499    table: &Table,
4500    schema_cols: &[ColumnSchema],
4501    table_alias: &str,
4502) -> Option<Vec<usize>> {
4503    if stmt.distinct {
4504        return None;
4505    }
4506    let limit = usize::try_from(stmt.limit_literal()?).ok()?;
4507    if limit == 0 {
4508        return None;
4509    }
4510    // v6.4.0 — NSW kNN dispatch needs a single ORDER BY key on the
4511    // distance metric. Multi-key ORDER BY falls through to the
4512    // generic sort path.
4513    if stmt.order_by.len() != 1 {
4514        return None;
4515    }
4516    let order = &stmt.order_by[0];
4517    // NSW kNN returns rows ascending by distance — DESC inverts the
4518    // natural order, so the planner can't handle it without a sort
4519    // pass. Fall back to the generic ORDER BY path.
4520    if order.desc {
4521        return None;
4522    }
4523    let Expr::Binary { lhs, op, rhs } = &order.expr else {
4524        return None;
4525    };
4526    let metric = match op {
4527        BinOp::L2Distance => spg_storage::NswMetric::L2,
4528        BinOp::InnerProduct => spg_storage::NswMetric::InnerProduct,
4529        BinOp::CosineDistance => spg_storage::NswMetric::Cosine,
4530        _ => return None,
4531    };
4532    // Accept both `col <op> literal` and `literal <op> col`.
4533    let ((Expr::Column(col), literal) | (literal, Expr::Column(col))) =
4534        (lhs.as_ref(), rhs.as_ref())
4535    else {
4536        return None;
4537    };
4538    if let Some(q) = &col.qualifier
4539        && q != table_alias
4540    {
4541        return None;
4542    }
4543    let col_pos = schema_cols.iter().position(|s| s.name == col.name)?;
4544    let query = literal_to_vector(literal)?;
4545    let idx = spg_storage::nsw_index_on(table, col_pos)?;
4546    if let Some(where_expr) = &stmt.where_ {
4547        // Over-fetch and filter. The factor (10×) is a heuristic that
4548        // covers typical selectivity for the corpus tests; v2.x will
4549        // make it configurable.
4550        let over_fetch = limit.saturating_mul(10).max(NSW_OVER_FETCH_FLOOR);
4551        let candidates = spg_storage::nsw_query(table, &idx.name, &query, over_fetch, metric);
4552        let ctx = EvalContext::new(schema_cols, Some(table_alias));
4553        let mut kept: Vec<usize> = Vec::with_capacity(limit);
4554        for i in candidates {
4555            let row = &table.rows()[i];
4556            let cond = eval::eval_expr(where_expr, row, &ctx).ok()?;
4557            if matches!(cond, Value::Bool(true)) {
4558                kept.push(i);
4559                if kept.len() >= limit {
4560                    break;
4561                }
4562            }
4563        }
4564        Some(kept)
4565    } else {
4566        Some(spg_storage::nsw_query(
4567            table, &idx.name, &query, limit, metric,
4568        ))
4569    }
4570}
4571
4572/// Lower bound on the over-fetch pool when WHERE is present — even
4573/// for tiny `LIMIT 1` queries we keep enough candidates to absorb a
4574/// few WHERE rejections.
4575const NSW_OVER_FETCH_FLOOR: usize = 32;
4576
4577/// Pull a `Vec<f32>` out of a literal-or-cast expression. Returns
4578/// `None` for anything we can't fold at plan time.
4579fn literal_to_vector(e: &Expr) -> Option<Vec<f32>> {
4580    match e {
4581        Expr::Literal(Literal::Vector(v)) => Some(v.clone()),
4582        Expr::Cast { expr, .. } => literal_to_vector(expr),
4583        _ => None,
4584    }
4585}
4586
4587/// Materialise rows in a planner-supplied order (used by the NSW path)
4588/// without re-running ORDER BY. The projection + LIMIT slot mirror the
4589/// equivalent block in `exec_bare_select`.
4590fn materialise_in_order(
4591    stmt: &SelectStatement,
4592    table: &Table,
4593    schema_cols: &[ColumnSchema],
4594    table_alias: &str,
4595    ordered_rows: &[usize],
4596) -> Result<QueryResult, EngineError> {
4597    let ctx = EvalContext::new(schema_cols, Some(table_alias));
4598    let projection = build_projection(&stmt.items, schema_cols, table_alias)?;
4599    let mut output_rows: Vec<Row> = Vec::with_capacity(ordered_rows.len());
4600    for &i in ordered_rows {
4601        let row = &table.rows()[i];
4602        let mut values = Vec::with_capacity(projection.len());
4603        for p in &projection {
4604            values.push(eval::eval_expr(&p.expr, row, &ctx)?);
4605        }
4606        output_rows.push(Row::new(values));
4607    }
4608    apply_offset_and_limit(&mut output_rows, stmt.offset_literal(), stmt.limit_literal());
4609    let columns: Vec<ColumnSchema> = projection
4610        .into_iter()
4611        .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
4612        .collect();
4613    Ok(QueryResult::Rows {
4614        columns,
4615        rows: output_rows,
4616    })
4617}
4618
4619fn try_index_seek<'a>(
4620    where_expr: &Expr,
4621    schema_cols: &[ColumnSchema],
4622    catalog: &'a Catalog,
4623    table: &'a Table,
4624    table_alias: &str,
4625) -> Option<Vec<Cow<'a, Row>>> {
4626    let Expr::Binary {
4627        lhs,
4628        op: BinOp::Eq,
4629        rhs,
4630    } = where_expr
4631    else {
4632        return None;
4633    };
4634    let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
4635        .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
4636    let idx = table.index_on(col_pos)?;
4637    let key = IndexKey::from_value(&value)?;
4638    let locators = idx.lookup_eq(&key);
4639    let table_name = table.schema().name.as_str();
4640    // v5.1: each locator dispatches to either the hot tier (zero-
4641    // copy borrow of `table.rows()[i]`) or a cold-tier segment
4642    // (one page read + dense row decode, ~µs scale). Cold rows are
4643    // returned as `Cow::Owned` so the caller's `&Row` iteration
4644    // doesn't see a tier distinction; pre-freezer (no cold
4645    // segments loaded) every locator is `Hot` and every entry is
4646    // `Cow::Borrowed` — identical cost to the pre-v5.1 path.
4647    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(locators.len());
4648    for loc in locators {
4649        match *loc {
4650            spg_storage::RowLocator::Hot(i) => {
4651                if let Some(row) = table.rows().get(i) {
4652                    out.push(Cow::Borrowed(row));
4653                }
4654            }
4655            spg_storage::RowLocator::Cold { segment_id, .. } => {
4656                if let Some(row) = catalog.resolve_cold_locator(table_name, segment_id, &key) {
4657                    out.push(Cow::Owned(row));
4658                }
4659            }
4660        }
4661    }
4662    Some(out)
4663}
4664
4665/// v5.2.3: extract `(column_position, IndexKey)` when `where_expr`
4666/// is a simple `col = literal` predicate suitable for a `BTree` index
4667/// seek. Used by `exec_update_cancel` / `exec_delete_cancel` to
4668/// decide whether a write touches a cold-tier row (which requires
4669/// promote-on-write / shadow-on-delete) before falling through to
4670/// the hot-tier row walk.
4671///
4672/// Returns `None` for any predicate shape the planner can't push
4673/// down to an index seek — complex WHERE clauses always take the
4674/// hot-only path (cold rows are immutable to non-indexed writes
4675/// until a future scan-fanout sub-version).
4676fn try_pk_predicate(
4677    where_expr: &Expr,
4678    schema_cols: &[ColumnSchema],
4679    table_alias: &str,
4680) -> Option<(usize, IndexKey)> {
4681    let Expr::Binary {
4682        lhs,
4683        op: BinOp::Eq,
4684        rhs,
4685    } = where_expr
4686    else {
4687        return None;
4688    };
4689    let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
4690        .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
4691    let key = IndexKey::from_value(&value)?;
4692    Some((col_pos, key))
4693}
4694
4695fn resolve_col_literal_pair(
4696    col_side: &Expr,
4697    lit_side: &Expr,
4698    schema_cols: &[ColumnSchema],
4699    table_alias: &str,
4700) -> Option<(usize, Value)> {
4701    let Expr::Column(c) = col_side else {
4702        return None;
4703    };
4704    if let Some(q) = &c.qualifier
4705        && q != table_alias
4706    {
4707        return None;
4708    }
4709    let pos = schema_cols.iter().position(|s| s.name == c.name)?;
4710    let Expr::Literal(l) = lit_side else {
4711        return None;
4712    };
4713    let v = match l {
4714        Literal::Integer(n) => {
4715            if let Ok(small) = i32::try_from(*n) {
4716                Value::Int(small)
4717            } else {
4718                Value::BigInt(*n)
4719            }
4720        }
4721        Literal::Float(x) => Value::Float(*x),
4722        Literal::String(s) => Value::Text(s.clone()),
4723        Literal::Bool(b) => Value::Bool(*b),
4724        Literal::Null => Value::Null,
4725        // Vector and Interval literals can't be used as B-tree index keys.
4726        // Tell the planner to fall back to full-scan.
4727        Literal::Vector(_) | Literal::Interval { .. } => return None,
4728    };
4729    Some((pos, v))
4730}
4731
4732/// Find the schema entry that a SELECT-list `Expr::Column` refers to.
4733/// Mirrors `resolve_column` in `eval.rs`, but returns a proper
4734/// `EngineError` so the projection-build path keeps `UnknownQualifier`
4735/// vs `ColumnNotFound` distinct.
4736fn resolve_projection_column<'a>(
4737    c: &ColumnName,
4738    schema_cols: &'a [ColumnSchema],
4739    table_alias: &str,
4740) -> Result<&'a ColumnSchema, EngineError> {
4741    if let Some(q) = &c.qualifier {
4742        let composite = alloc::format!("{q}.{name}", name = c.name);
4743        if let Some(s) = schema_cols.iter().find(|s| s.name == composite) {
4744            return Ok(s);
4745        }
4746        // Single-table case: the qualifier may equal the active alias —
4747        // then look for the bare column name.
4748        if q == table_alias
4749            && let Some(s) = schema_cols.iter().find(|s| s.name == c.name)
4750        {
4751            return Ok(s);
4752        }
4753        // For multi-table schemas the qualifier is unknown only if no
4754        // column bears the "<q>." prefix. For single-table, the alias
4755        // mismatch alone is enough.
4756        let prefix = alloc::format!("{q}.");
4757        let qualifier_known =
4758            q == table_alias || schema_cols.iter().any(|s| s.name.starts_with(&prefix));
4759        if !qualifier_known {
4760            return Err(EngineError::Eval(EvalError::UnknownQualifier {
4761                qualifier: q.clone(),
4762            }));
4763        }
4764        return Err(EngineError::Eval(EvalError::ColumnNotFound {
4765            name: c.name.clone(),
4766        }));
4767    }
4768    if let Some(s) = schema_cols.iter().find(|s| s.name == c.name) {
4769        return Ok(s);
4770    }
4771    let suffix = alloc::format!(".{name}", name = c.name);
4772    let mut matches = schema_cols.iter().filter(|s| s.name.ends_with(&suffix));
4773    let first = matches.next();
4774    let extra = matches.next();
4775    match (first, extra) {
4776        (Some(s), None) => Ok(s),
4777        (Some(_), Some(_)) => Err(EngineError::Eval(EvalError::TypeMismatch {
4778            detail: alloc::format!("ambiguous column reference: {}", c.name),
4779        })),
4780        _ => Err(EngineError::Eval(EvalError::ColumnNotFound {
4781            name: c.name.clone(),
4782        })),
4783    }
4784}
4785
4786fn build_projection(
4787    items: &[SelectItem],
4788    schema_cols: &[ColumnSchema],
4789    table_alias: &str,
4790) -> Result<Vec<ProjectedItem>, EngineError> {
4791    let mut out = Vec::new();
4792    for item in items {
4793        match item {
4794            SelectItem::Wildcard => {
4795                for col in schema_cols {
4796                    out.push(ProjectedItem {
4797                        expr: Expr::Column(ColumnName {
4798                            qualifier: None,
4799                            name: col.name.clone(),
4800                        }),
4801                        output_name: col.name.clone(),
4802                        ty: col.ty,
4803                        nullable: col.nullable,
4804                    });
4805                }
4806            }
4807            SelectItem::Expr { expr, alias } => {
4808                // Plain column ref keeps full schema info (real type +
4809                // nullability). Compound expressions evaluate fine but have
4810                // no static type — surface them as nullable TEXT, which is
4811                // what most clients render anyway.
4812                if let Expr::Column(c) = expr {
4813                    let sch = resolve_projection_column(c, schema_cols, table_alias)?;
4814                    let output_name = alias.clone().unwrap_or_else(|| c.name.clone());
4815                    out.push(ProjectedItem {
4816                        expr: expr.clone(),
4817                        output_name,
4818                        ty: sch.ty,
4819                        nullable: sch.nullable,
4820                    });
4821                } else {
4822                    let output_name = alias.clone().unwrap_or_else(|| expr.to_string());
4823                    out.push(ProjectedItem {
4824                        expr: expr.clone(),
4825                        output_name,
4826                        ty: DataType::Text,
4827                        nullable: true,
4828                    });
4829                }
4830            }
4831        }
4832    }
4833    Ok(out)
4834}
4835
4836/// Promote an integer to a NUMERIC value at the requested scale.
4837/// Rejects values that, after scaling, would overflow the column's
4838/// precision budget.
4839fn numeric_from_integer(
4840    n: i128,
4841    precision: u8,
4842    scale: u8,
4843    col_name: &str,
4844) -> Result<Value, EngineError> {
4845    let factor = pow10_i128(scale);
4846    let scaled = n.checked_mul(factor).ok_or_else(|| {
4847        EngineError::Unsupported(alloc::format!(
4848            "integer overflow scaling value for column `{col_name}` to scale {scale}"
4849        ))
4850    })?;
4851    check_precision(scaled, precision, col_name)?;
4852    Ok(Value::Numeric { scaled, scale })
4853}
4854
4855/// Float → NUMERIC. Uses round-half-away-from-zero on `x * 10^scale`,
4856/// then verifies the result fits the column's precision.
4857#[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)]
4858fn numeric_from_float(
4859    x: f64,
4860    precision: u8,
4861    scale: u8,
4862    col_name: &str,
4863) -> Result<Value, EngineError> {
4864    if !x.is_finite() {
4865        return Err(EngineError::Unsupported(alloc::format!(
4866            "cannot store non-finite float in NUMERIC column `{col_name}`"
4867        )));
4868    }
4869    let mut factor = 1.0_f64;
4870    for _ in 0..scale {
4871        factor *= 10.0;
4872    }
4873    // Round half-away-from-zero by biasing then casting (`as i128`
4874    // truncates toward zero, so the bias + truncation gives the
4875    // desired rounding). `f64::floor` / `ceil` live in std; we don't
4876    // need them — the cast handles the truncation step.
4877    let shifted = x * factor;
4878    let biased = if shifted >= 0.0 {
4879        shifted + 0.5
4880    } else {
4881        shifted - 0.5
4882    };
4883    // Range-check before casting back to i128 — the cast itself is
4884    // saturating in Rust, which would silently truncate huge inputs.
4885    if !(-1e38..=1e38).contains(&biased) {
4886        return Err(EngineError::Unsupported(alloc::format!(
4887            "value {x} overflows NUMERIC range for column `{col_name}`"
4888        )));
4889    }
4890    let scaled = biased as i128;
4891    check_precision(scaled, precision, col_name)?;
4892    Ok(Value::Numeric { scaled, scale })
4893}
4894
4895/// Move a Numeric value from `src_scale` to `dst_scale`. Going up
4896/// multiplies by 10; going down rounds half-away-from-zero.
4897fn numeric_rescale(
4898    scaled: i128,
4899    src_scale: u8,
4900    precision: u8,
4901    dst_scale: u8,
4902    col_name: &str,
4903) -> Result<Value, EngineError> {
4904    let new_scaled = if dst_scale >= src_scale {
4905        let bump = pow10_i128(dst_scale - src_scale);
4906        scaled.checked_mul(bump).ok_or_else(|| {
4907            EngineError::Unsupported(alloc::format!(
4908                "overflow rescaling NUMERIC for column `{col_name}`"
4909            ))
4910        })?
4911    } else {
4912        let drop = pow10_i128(src_scale - dst_scale);
4913        let half = drop / 2;
4914        if scaled >= 0 {
4915            (scaled + half) / drop
4916        } else {
4917            (scaled - half) / drop
4918        }
4919    };
4920    check_precision(new_scaled, precision, col_name)?;
4921    Ok(Value::Numeric {
4922        scaled: new_scaled,
4923        scale: dst_scale,
4924    })
4925}
4926
4927/// Drop the fractional part of a scaled integer, returning the integer
4928/// portion (toward zero). Used for NUMERIC → INT casts.
4929const fn numeric_truncate_to_integer(scaled: i128, scale: u8) -> i128 {
4930    if scale == 0 {
4931        return scaled;
4932    }
4933    let factor = pow10_i128_const(scale);
4934    scaled / factor
4935}
4936
4937/// Verify a scaled NUMERIC value fits the column's declared precision.
4938/// `precision == 0` is the "unconstrained" form (bare `NUMERIC`); we
4939/// skip the check there.
4940fn check_precision(scaled: i128, precision: u8, col_name: &str) -> Result<(), EngineError> {
4941    if precision == 0 {
4942        return Ok(());
4943    }
4944    let limit = pow10_i128(precision);
4945    if scaled.unsigned_abs() >= limit.unsigned_abs() {
4946        return Err(EngineError::Unsupported(alloc::format!(
4947            "NUMERIC value exceeds precision {precision} for column `{col_name}`"
4948        )));
4949    }
4950    Ok(())
4951}
4952
4953const fn pow10_i128_const(p: u8) -> i128 {
4954    let mut acc: i128 = 1;
4955    let mut i = 0;
4956    while i < p {
4957        acc *= 10;
4958        i += 1;
4959    }
4960    acc
4961}
4962
4963fn pow10_i128(p: u8) -> i128 {
4964    pow10_i128_const(p)
4965}
4966
4967/// Walk a parsed `Statement`, swapping any `NOW()` /
4968/// `CURRENT_TIMESTAMP()` / `CURRENT_DATE()` function calls for a
4969/// literal cast that wraps the engine's per-statement clock reading.
4970/// When `now_micros` is `None`, calls stay as-is and surface as
4971/// `unknown function` at eval time — keeps the error path explicit.
4972/// v4.10: pre-walk the WHERE / projection / etc. of a SELECT and
4973/// replace every subquery node with a materialised literal. SPG
4974/// only supports uncorrelated subqueries — the inner SELECT does
4975/// not see outer-row columns, so the result is the same for every
4976/// outer row and can be evaluated once.
4977///
4978/// Returns the rewritten statement; the caller passes this to the
4979/// regular row-loop executor which no longer sees Subquery nodes
4980/// in its tree.
4981impl Engine {
4982    /// v4.12 window executor. Implements `ROW_NUMBER` / `RANK` /
4983    /// `DENSE_RANK` and the partition-aware aggregates `SUM` /
4984    /// `AVG` / `COUNT` / `MIN` / `MAX`. The plan is:
4985    /// 1. Apply the WHERE filter.
4986    /// 2. For each unique `WindowFunction` node in the projection,
4987    ///    partition + sort, compute the per-row value.
4988    /// 3. Append the window values as synthetic columns (`__win_N`)
4989    ///    to the row schema.
4990    /// 4. Rewrite the projection to read those columns.
4991    /// 5. Hand off to the regular project / ORDER BY / LIMIT pipe.
4992    #[allow(
4993        clippy::too_many_lines,
4994        clippy::type_complexity,
4995        clippy::needless_range_loop
4996    )] // window-eval is one cohesive pipe; splitting fragments
4997    fn exec_select_with_window(
4998        &self,
4999        stmt: &SelectStatement,
5000        cancel: CancelToken<'_>,
5001    ) -> Result<QueryResult, EngineError> {
5002        let from = stmt.from.as_ref().ok_or_else(|| {
5003            EngineError::Unsupported("window functions require a FROM clause".into())
5004        })?;
5005        // For v4.12 we only support a single-table FROM. Joins +
5006        // windows is queued for v5.x.
5007        if !from.joins.is_empty() {
5008            return Err(EngineError::Unsupported(
5009                "JOIN with window functions not yet supported".into(),
5010            ));
5011        }
5012        let primary = &from.primary;
5013        let table = self.active_catalog().get(&primary.name).ok_or_else(|| {
5014            StorageError::TableNotFound {
5015                name: primary.name.clone(),
5016            }
5017        })?;
5018        let alias = primary.alias.as_deref().unwrap_or(primary.name.as_str());
5019        let schema_cols = &table.schema().columns;
5020        let ctx = EvalContext::new(schema_cols, Some(alias));
5021
5022        // 1) Filter pass.
5023        let mut filtered: Vec<&Row> = Vec::new();
5024        for (i, row) in table.rows().iter().enumerate() {
5025            if i.is_multiple_of(256) {
5026                cancel.check()?;
5027            }
5028            if let Some(w) = &stmt.where_ {
5029                let cond = eval::eval_expr(w, row, &ctx)?;
5030                if !matches!(cond, Value::Bool(true)) {
5031                    continue;
5032                }
5033            }
5034            filtered.push(row);
5035        }
5036        let n_rows = filtered.len();
5037
5038        // 2) Collect unique window function nodes from projection.
5039        let mut window_nodes: Vec<Expr> = Vec::new();
5040        for item in &stmt.items {
5041            if let SelectItem::Expr { expr, .. } = item {
5042                collect_window_nodes(expr, &mut window_nodes);
5043            }
5044        }
5045
5046        // 3) For each window, compute per-row value.
5047        // Index: same order as window_nodes; for row i, win_vals[w][i].
5048        let mut win_vals: Vec<Vec<Value>> = Vec::with_capacity(window_nodes.len());
5049        for wnode in &window_nodes {
5050            let Expr::WindowFunction {
5051                name,
5052                args,
5053                partition_by,
5054                order_by,
5055                frame,
5056                null_treatment,
5057            } = wnode
5058            else {
5059                unreachable!("collect_window_nodes pushes only WindowFunction");
5060            };
5061            // Compute (partition_key, order_key, original_index) for each row.
5062            let mut indexed: Vec<(Vec<Value>, Vec<(Value, bool)>, usize)> =
5063                Vec::with_capacity(n_rows);
5064            for (i, row) in filtered.iter().enumerate() {
5065                let pkey: Vec<Value> = partition_by
5066                    .iter()
5067                    .map(|p| eval::eval_expr(p, row, &ctx))
5068                    .collect::<Result<_, _>>()?;
5069                let okey: Vec<(Value, bool)> = order_by
5070                    .iter()
5071                    .map(|(e, desc)| eval::eval_expr(e, row, &ctx).map(|v| (v, *desc)))
5072                    .collect::<Result<_, _>>()?;
5073                indexed.push((pkey, okey, i));
5074            }
5075            // Sort by (partition_key, order_key). Partition key uses
5076            // a stable encoded form; order key respects ASC/DESC.
5077            indexed.sort_by(|a, b| {
5078                let p_cmp = partition_key_cmp(&a.0, &b.0);
5079                if p_cmp != core::cmp::Ordering::Equal {
5080                    return p_cmp;
5081                }
5082                order_key_cmp(&a.1, &b.1)
5083            });
5084            // Per-partition compute.
5085            let mut out_vals: Vec<Value> = alloc::vec![Value::Null; n_rows];
5086            let mut p_start = 0;
5087            while p_start < indexed.len() {
5088                let mut p_end = p_start + 1;
5089                while p_end < indexed.len()
5090                    && partition_key_cmp(&indexed[p_start].0, &indexed[p_end].0)
5091                        == core::cmp::Ordering::Equal
5092                {
5093                    p_end += 1;
5094                }
5095                // Compute the function within this partition slice.
5096                compute_window_partition(
5097                    name,
5098                    args,
5099                    !order_by.is_empty(),
5100                    frame.as_ref(),
5101                    *null_treatment,
5102                    &indexed[p_start..p_end],
5103                    &filtered,
5104                    &ctx,
5105                    &mut out_vals,
5106                )?;
5107                p_start = p_end;
5108            }
5109            win_vals.push(out_vals);
5110        }
5111
5112        // 4) Build extended schema: original columns + synthetic.
5113        let mut ext_cols = schema_cols.clone();
5114        for i in 0..window_nodes.len() {
5115            ext_cols.push(ColumnSchema::new(
5116                alloc::format!("__win_{i}"),
5117                DataType::Text, // type doesn't matter for projection eval
5118                true,
5119            ));
5120        }
5121        // 5) Build extended rows: each row gets its window values appended.
5122        let mut ext_rows: Vec<Row> = Vec::with_capacity(n_rows);
5123        for i in 0..n_rows {
5124            let mut values = filtered[i].values.clone();
5125            for w in 0..window_nodes.len() {
5126                values.push(win_vals[w][i].clone());
5127            }
5128            ext_rows.push(Row::new(values));
5129        }
5130        // 6) Rewrite the projection: WindowFunction nodes → Column(__win_N).
5131        let mut rewritten_items: Vec<SelectItem> = Vec::with_capacity(stmt.items.len());
5132        for item in &stmt.items {
5133            let new_item = match item {
5134                SelectItem::Wildcard => SelectItem::Wildcard,
5135                SelectItem::Expr { expr, alias } => {
5136                    let mut e = expr.clone();
5137                    rewrite_window_to_columns(&mut e, &window_nodes);
5138                    SelectItem::Expr {
5139                        expr: e,
5140                        alias: alias.clone(),
5141                    }
5142                }
5143            };
5144            rewritten_items.push(new_item);
5145        }
5146
5147        // 7) Project into final rows.
5148        let ext_ctx = EvalContext::new(&ext_cols, Some(alias));
5149        let projection = build_projection(&rewritten_items, &ext_cols, alias)?;
5150        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::with_capacity(n_rows);
5151        for (i, row) in ext_rows.iter().enumerate() {
5152            if i.is_multiple_of(256) {
5153                cancel.check()?;
5154            }
5155            let mut values = Vec::with_capacity(projection.len());
5156            for p in &projection {
5157                values.push(eval::eval_expr(&p.expr, row, &ext_ctx)?);
5158            }
5159            let order_keys = if stmt.order_by.is_empty() {
5160                Vec::new()
5161            } else {
5162                let mut keys = Vec::with_capacity(stmt.order_by.len());
5163                for o in &stmt.order_by {
5164                    let mut e = o.expr.clone();
5165                    rewrite_window_to_columns(&mut e, &window_nodes);
5166                    let key = eval::eval_expr(&e, row, &ext_ctx)?;
5167                    keys.push(value_to_order_key(&key)?);
5168                }
5169                keys
5170            };
5171            tagged.push((order_keys, Row::new(values)));
5172        }
5173        // ORDER BY + LIMIT/OFFSET on the projected rows.
5174        if !stmt.order_by.is_empty() {
5175            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
5176            sort_by_keys(&mut tagged, &descs);
5177        }
5178        let mut out_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
5179        apply_offset_and_limit(&mut out_rows, stmt.offset_literal(), stmt.limit_literal());
5180        let final_cols: Vec<ColumnSchema> = projection
5181            .into_iter()
5182            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
5183            .collect();
5184        Ok(QueryResult::Rows {
5185            columns: final_cols,
5186            rows: out_rows,
5187        })
5188    }
5189
5190    /// v4.11: materialise each CTE into a temp table inside a
5191    /// cloned catalog, then run the body SELECT against a fresh
5192    /// engine instance that owns the enriched catalog. The clone
5193    /// is moderately expensive — only paid by CTE-bearing queries.
5194    /// Subqueries inside CTE bodies / the main body resolve as
5195    /// usual; `clock_fn` is propagated so `NOW()` lines up.
5196    fn exec_with_ctes(
5197        &self,
5198        stmt: &SelectStatement,
5199        cancel: CancelToken<'_>,
5200    ) -> Result<QueryResult, EngineError> {
5201        cancel.check()?;
5202        let mut catalog = self.active_catalog().clone();
5203        for cte in &stmt.ctes {
5204            if catalog.get(&cte.name).is_some() {
5205                return Err(EngineError::Unsupported(alloc::format!(
5206                    "CTE name {:?} shadows an existing table; rename the CTE",
5207                    cte.name
5208                )));
5209            }
5210            let (columns, rows) = if cte.recursive {
5211                self.materialise_recursive_cte(cte, &catalog, cancel)?
5212            } else {
5213                let body_result = self.exec_select_cancel(&cte.body, cancel)?;
5214                let QueryResult::Rows { columns, rows } = body_result else {
5215                    return Err(EngineError::Unsupported(alloc::format!(
5216                        "CTE {:?} body did not return rows",
5217                        cte.name
5218                    )));
5219                };
5220                (columns, rows)
5221            };
5222            // v4.22: the projection builder labels any non-column
5223            // expression as Text — including literal SELECT 1.
5224            // Promote each column's type to whatever the rows
5225            // actually carry so the CTE storage table accepts them.
5226            let inferred = infer_column_types(&columns, &rows);
5227            let mut columns = inferred;
5228            // v4.22: apply optional `WITH name(a, b, c)` overrides.
5229            if !cte.column_overrides.is_empty() {
5230                if cte.column_overrides.len() != columns.len() {
5231                    return Err(EngineError::Unsupported(alloc::format!(
5232                        "CTE {:?} column list has {} names but body returns {} columns",
5233                        cte.name,
5234                        cte.column_overrides.len(),
5235                        columns.len()
5236                    )));
5237                }
5238                for (col, name) in columns.iter_mut().zip(cte.column_overrides.iter()) {
5239                    col.name.clone_from(name);
5240                }
5241            }
5242            let schema = TableSchema::new(cte.name.clone(), columns);
5243            catalog.create_table(schema).map_err(EngineError::Storage)?;
5244            let table = catalog
5245                .get_mut(&cte.name)
5246                .expect("just-created CTE table must exist");
5247            for row in rows {
5248                table.insert(row).map_err(EngineError::Storage)?;
5249            }
5250        }
5251        // Strip CTEs from the body before running on the temp engine
5252        // so we don't recurse forever.
5253        let mut body = stmt.clone();
5254        body.ctes = Vec::new();
5255        let mut temp = Engine::restore(catalog);
5256        if let Some(c) = self.clock {
5257            temp = temp.with_clock(c);
5258        }
5259        if let Some(f) = self.salt_fn {
5260            temp = temp.with_salt_fn(f);
5261        }
5262        temp.exec_select_cancel(&body, cancel)
5263    }
5264
5265    /// v4.22: materialise a WITH RECURSIVE CTE. The body must be a
5266    /// UNION (or UNION ALL) of an anchor that does not reference
5267    /// the CTE name, and one or more recursive terms that do. The
5268    /// anchor runs first; each subsequent iteration runs the
5269    /// recursive term against a temp catalog where the CTE name is
5270    /// bound to the *previous* iteration's output. Iteration stops
5271    /// when the recursive term yields no rows; UNION (DISTINCT)
5272    /// deduplicates against the accumulated result, UNION ALL does
5273    /// not. A hard cap on total rows prevents runaway queries.
5274    #[allow(clippy::too_many_lines)]
5275    fn materialise_recursive_cte(
5276        &self,
5277        cte: &spg_sql::ast::Cte,
5278        base_catalog: &Catalog,
5279        cancel: CancelToken<'_>,
5280    ) -> Result<(Vec<ColumnSchema>, Vec<Row>), EngineError> {
5281        const MAX_TOTAL_ROWS: usize = 1_000_000;
5282        const MAX_ITERATIONS: usize = 100_000;
5283        cancel.check()?;
5284        if cte.body.unions.is_empty() {
5285            return Err(EngineError::Unsupported(alloc::format!(
5286                "WITH RECURSIVE {:?} body must be a UNION of an anchor and a recursive term",
5287                cte.name
5288            )));
5289        }
5290        // Anchor: the body's leading SELECT, with unions stripped.
5291        let mut anchor = cte.body.clone();
5292        let union_terms = core::mem::take(&mut anchor.unions);
5293        anchor.ctes = Vec::new();
5294        // Anchor must not reference the CTE name.
5295        if select_refers_to(&anchor, &cte.name) {
5296            return Err(EngineError::Unsupported(alloc::format!(
5297                "WITH RECURSIVE {:?}: the anchor must not reference the CTE itself",
5298                cte.name
5299            )));
5300        }
5301        let anchor_result = self.exec_select_cancel(&anchor, cancel)?;
5302        let QueryResult::Rows {
5303            columns: anchor_cols,
5304            rows: anchor_rows,
5305        } = anchor_result
5306        else {
5307            return Err(EngineError::Unsupported(alloc::format!(
5308                "WITH RECURSIVE {:?}: anchor did not return rows",
5309                cte.name
5310            )));
5311        };
5312        // The projection builder labels non-column expressions Text;
5313        // refine column types from the anchor's actual values so the
5314        // intermediate iter-catalog tables accept them.
5315        let mut columns = infer_column_types(&anchor_cols, &anchor_rows);
5316        if !cte.column_overrides.is_empty() {
5317            if cte.column_overrides.len() != columns.len() {
5318                return Err(EngineError::Unsupported(alloc::format!(
5319                    "CTE {:?} column list has {} names but anchor returns {} columns",
5320                    cte.name,
5321                    cte.column_overrides.len(),
5322                    columns.len()
5323                )));
5324            }
5325            for (col, name) in columns.iter_mut().zip(cte.column_overrides.iter()) {
5326                col.name.clone_from(name);
5327            }
5328        }
5329        let mut all_rows: Vec<Row> = anchor_rows.clone();
5330        let mut working_set: Vec<Row> = anchor_rows;
5331        let mut seen: alloc::collections::BTreeSet<Vec<u8>> = alloc::collections::BTreeSet::new();
5332        // Track at least one "all UNION ALL" flag — if every union
5333        // kind is ALL we skip the dedup step (faster + matches PG).
5334        let all_union_all = union_terms.iter().all(|(k, _)| matches!(k, UnionKind::All));
5335        if !all_union_all {
5336            for r in &all_rows {
5337                seen.insert(encode_row_key(r));
5338            }
5339        }
5340        for iter in 0..MAX_ITERATIONS {
5341            cancel.check()?;
5342            if working_set.is_empty() {
5343                break;
5344            }
5345            // Build a fresh catalog: base + CTE bound to working_set.
5346            let mut iter_catalog = base_catalog.clone();
5347            let schema = TableSchema::new(cte.name.clone(), columns.clone());
5348            iter_catalog
5349                .create_table(schema)
5350                .map_err(EngineError::Storage)?;
5351            {
5352                let table = iter_catalog.get_mut(&cte.name).expect("just-created");
5353                for row in &working_set {
5354                    table.insert(row.clone()).map_err(EngineError::Storage)?;
5355                }
5356            }
5357            let mut iter_engine = Engine::restore(iter_catalog);
5358            if let Some(c) = self.clock {
5359                iter_engine = iter_engine.with_clock(c);
5360            }
5361            if let Some(f) = self.salt_fn {
5362                iter_engine = iter_engine.with_salt_fn(f);
5363            }
5364            // Run each recursive term in sequence and collect new rows.
5365            let mut next_set: Vec<Row> = Vec::new();
5366            for (_, term) in &union_terms {
5367                let mut term = term.clone();
5368                term.ctes = Vec::new();
5369                let r = iter_engine.exec_select_cancel(&term, cancel)?;
5370                let QueryResult::Rows {
5371                    columns: rc,
5372                    rows: rs,
5373                } = r
5374                else {
5375                    return Err(EngineError::Unsupported(alloc::format!(
5376                        "WITH RECURSIVE {:?}: recursive term did not return rows",
5377                        cte.name
5378                    )));
5379                };
5380                if rc.len() != columns.len() {
5381                    return Err(EngineError::Unsupported(alloc::format!(
5382                        "WITH RECURSIVE {:?}: column count of recursive term ({}) does not match anchor ({})",
5383                        cte.name,
5384                        rc.len(),
5385                        columns.len()
5386                    )));
5387                }
5388                for row in rs {
5389                    if !all_union_all {
5390                        let key = encode_row_key(&row);
5391                        if !seen.insert(key) {
5392                            continue;
5393                        }
5394                    }
5395                    next_set.push(row);
5396                }
5397            }
5398            if next_set.is_empty() {
5399                break;
5400            }
5401            all_rows.extend(next_set.iter().cloned());
5402            working_set = next_set;
5403            if all_rows.len() > MAX_TOTAL_ROWS {
5404                return Err(EngineError::Unsupported(alloc::format!(
5405                    "WITH RECURSIVE {:?}: produced more than {MAX_TOTAL_ROWS} rows — likely runaway recursion",
5406                    cte.name
5407                )));
5408            }
5409            if iter + 1 == MAX_ITERATIONS {
5410                return Err(EngineError::Unsupported(alloc::format!(
5411                    "WITH RECURSIVE {:?}: exceeded {MAX_ITERATIONS} iterations",
5412                    cte.name
5413                )));
5414            }
5415        }
5416        Ok((columns, all_rows))
5417    }
5418
5419    fn resolve_select_subqueries(
5420        &self,
5421        stmt: &mut SelectStatement,
5422        cancel: CancelToken<'_>,
5423    ) -> Result<(), EngineError> {
5424        for item in &mut stmt.items {
5425            if let SelectItem::Expr { expr, .. } = item {
5426                self.resolve_expr_subqueries(expr, cancel)?;
5427            }
5428        }
5429        if let Some(w) = &mut stmt.where_ {
5430            self.resolve_expr_subqueries(w, cancel)?;
5431        }
5432        if let Some(gs) = &mut stmt.group_by {
5433            for g in gs {
5434                self.resolve_expr_subqueries(g, cancel)?;
5435            }
5436        }
5437        if let Some(h) = &mut stmt.having {
5438            self.resolve_expr_subqueries(h, cancel)?;
5439        }
5440        for o in &mut stmt.order_by {
5441            self.resolve_expr_subqueries(&mut o.expr, cancel)?;
5442        }
5443        for (_, peer) in &mut stmt.unions {
5444            self.resolve_select_subqueries(peer, cancel)?;
5445        }
5446        Ok(())
5447    }
5448
5449    #[allow(clippy::only_used_in_recursion)] // engine handle reads aren't really pure
5450    fn resolve_expr_subqueries(
5451        &self,
5452        e: &mut Expr,
5453        cancel: CancelToken<'_>,
5454    ) -> Result<(), EngineError> {
5455        // Replace-on-this-node cases first.
5456        if let Some(replacement) = self.subquery_replacement(e, cancel)? {
5457            *e = replacement;
5458            return Ok(());
5459        }
5460        match e {
5461            Expr::Binary { lhs, rhs, .. } => {
5462                self.resolve_expr_subqueries(lhs, cancel)?;
5463                self.resolve_expr_subqueries(rhs, cancel)?;
5464            }
5465            Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
5466                self.resolve_expr_subqueries(expr, cancel)?;
5467            }
5468            Expr::FunctionCall { args, .. } => {
5469                for a in args {
5470                    self.resolve_expr_subqueries(a, cancel)?;
5471                }
5472            }
5473            Expr::Like { expr, pattern, .. } => {
5474                self.resolve_expr_subqueries(expr, cancel)?;
5475                self.resolve_expr_subqueries(pattern, cancel)?;
5476            }
5477            Expr::Extract { source, .. } => self.resolve_expr_subqueries(source, cancel)?,
5478            // v4.12 window functions — recurse into args + ORDER BY
5479            // + PARTITION BY in case they carry inner subqueries.
5480            Expr::WindowFunction {
5481                args,
5482                partition_by,
5483                order_by,
5484                ..
5485            } => {
5486                for a in args {
5487                    self.resolve_expr_subqueries(a, cancel)?;
5488                }
5489                for p in partition_by {
5490                    self.resolve_expr_subqueries(p, cancel)?;
5491                }
5492                for (e, _) in order_by {
5493                    self.resolve_expr_subqueries(e, cancel)?;
5494                }
5495            }
5496            // Subquery nodes are handled in subquery_replacement
5497            // (which returned None — defensive no-op); Literal /
5498            // Column are leaves.
5499            Expr::ScalarSubquery(_)
5500            | Expr::Exists { .. }
5501            | Expr::InSubquery { .. }
5502            | Expr::Literal(_)
5503            | Expr::Placeholder(_)
5504            | Expr::Column(_) => {}
5505            // v7.10.10 — recurse children.
5506            Expr::Array(items) => {
5507                for elem in items {
5508                    self.resolve_expr_subqueries(elem, cancel)?;
5509                }
5510            }
5511            Expr::ArraySubscript { target, index } => {
5512                self.resolve_expr_subqueries(target, cancel)?;
5513                self.resolve_expr_subqueries(index, cancel)?;
5514            }
5515            Expr::AnyAll { expr, array, .. } => {
5516                self.resolve_expr_subqueries(expr, cancel)?;
5517                self.resolve_expr_subqueries(array, cancel)?;
5518            }
5519        }
5520        Ok(())
5521    }
5522
5523    /// v4.23: per-row eval that handles correlated subqueries.
5524    /// Equivalent to `eval::eval_expr` when the expression has no
5525    /// subqueries; otherwise clones the expression, substitutes
5526    /// outer-row columns into each surviving subquery node, runs
5527    /// the inner SELECT, and replaces the node with the literal
5528    /// result. Only the WHERE-filter call sites use this path so
5529    /// the uncorrelated fast path is preserved everywhere else.
5530    fn eval_expr_with_correlated(
5531        &self,
5532        expr: &Expr,
5533        row: &Row,
5534        ctx: &EvalContext<'_>,
5535        cancel: CancelToken<'_>,
5536        memo: Option<&mut memoize::MemoizeCache>,
5537    ) -> Result<Value, EngineError> {
5538        if !expr_has_subquery(expr) {
5539            return eval::eval_expr(expr, row, ctx).map_err(EngineError::Eval);
5540        }
5541        let mut e = expr.clone();
5542        self.resolve_correlated_in_expr(&mut e, row, ctx, cancel, memo)?;
5543        eval::eval_expr(&e, row, ctx).map_err(EngineError::Eval)
5544    }
5545
5546    fn resolve_correlated_in_expr(
5547        &self,
5548        e: &mut Expr,
5549        row: &Row,
5550        ctx: &EvalContext<'_>,
5551        cancel: CancelToken<'_>,
5552        mut memo: Option<&mut memoize::MemoizeCache>,
5553    ) -> Result<(), EngineError> {
5554        match e {
5555            Expr::ScalarSubquery(inner) => {
5556                // v6.2.6 — Memoize: build the cache key from the
5557                // pre-substitution subquery repr + the outer row's
5558                // values. Two outer rows with identical correlated
5559                // values hit the same entry.
5560                let cache_key = memo.as_ref().map(|_| memoize::CacheKey {
5561                    subquery_repr: alloc::format!("{}", **inner),
5562                    outer_values: row.values.clone(),
5563                });
5564                if let (Some(cache), Some(k)) = (memo.as_deref_mut(), cache_key.as_ref())
5565                    && let Some(cached) = cache.get(k)
5566                {
5567                    *e = value_to_literal_expr(cached)?;
5568                    return Ok(());
5569                }
5570                let mut s = (**inner).clone();
5571                substitute_outer_columns(&mut s, row, ctx);
5572                let r = self.exec_select_cancel(&s, cancel)?;
5573                let QueryResult::Rows { rows, .. } = r else {
5574                    return Err(EngineError::Unsupported(
5575                        "scalar subquery: inner did not return rows".into(),
5576                    ));
5577                };
5578                let value = match rows.as_slice() {
5579                    [] => Value::Null,
5580                    [r0] => r0.values.first().cloned().unwrap_or(Value::Null),
5581                    _ => {
5582                        return Err(EngineError::Unsupported(alloc::format!(
5583                            "scalar subquery returned {} rows; expected 0 or 1",
5584                            rows.len()
5585                        )));
5586                    }
5587                };
5588                if let (Some(cache), Some(k)) = (memo.as_deref_mut(), cache_key) {
5589                    cache.insert(k, value.clone());
5590                }
5591                *e = value_to_literal_expr(value)?;
5592            }
5593            Expr::Exists { subquery, negated } => {
5594                let mut s = (**subquery).clone();
5595                substitute_outer_columns(&mut s, row, ctx);
5596                let r = self.exec_select_cancel(&s, cancel)?;
5597                let exists = matches!(r, QueryResult::Rows { rows, .. } if !rows.is_empty());
5598                let bit = if *negated { !exists } else { exists };
5599                *e = Expr::Literal(Literal::Bool(bit));
5600            }
5601            Expr::InSubquery {
5602                expr: lhs,
5603                subquery,
5604                negated,
5605            } => {
5606                self.resolve_correlated_in_expr(lhs, row, ctx, cancel, memo.as_deref_mut())?;
5607                let lhs_val = eval::eval_expr(lhs, row, ctx).map_err(EngineError::Eval)?;
5608                let mut s = (**subquery).clone();
5609                substitute_outer_columns(&mut s, row, ctx);
5610                let r = self.exec_select_cancel(&s, cancel)?;
5611                let QueryResult::Rows { columns, rows, .. } = r else {
5612                    return Err(EngineError::Unsupported(
5613                        "IN-subquery: inner did not return rows".into(),
5614                    ));
5615                };
5616                if columns.len() != 1 {
5617                    return Err(EngineError::Unsupported(alloc::format!(
5618                        "IN-subquery must project exactly one column; got {}",
5619                        columns.len()
5620                    )));
5621                }
5622                let mut found = false;
5623                let mut any_null = false;
5624                for r0 in rows {
5625                    let v = r0.values.into_iter().next().unwrap_or(Value::Null);
5626                    if v.is_null() {
5627                        any_null = true;
5628                        continue;
5629                    }
5630                    if value_cmp(&v, &lhs_val) == core::cmp::Ordering::Equal {
5631                        found = true;
5632                        break;
5633                    }
5634                }
5635                let bit = if found {
5636                    !*negated
5637                } else if any_null {
5638                    return Err(EngineError::Unsupported(
5639                        "IN-subquery with NULL in result and no match: NULL semantics not yet implemented".into(),
5640                    ));
5641                } else {
5642                    *negated
5643                };
5644                *e = Expr::Literal(Literal::Bool(bit));
5645            }
5646            Expr::Binary { lhs, rhs, .. } => {
5647                self.resolve_correlated_in_expr(lhs, row, ctx, cancel, memo.as_deref_mut())?;
5648                self.resolve_correlated_in_expr(rhs, row, ctx, cancel, memo.as_deref_mut())?;
5649            }
5650            Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
5651                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
5652            }
5653            Expr::Like { expr, pattern, .. } => {
5654                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
5655                self.resolve_correlated_in_expr(pattern, row, ctx, cancel, memo.as_deref_mut())?;
5656            }
5657            Expr::FunctionCall { args, .. } => {
5658                for a in args {
5659                    self.resolve_correlated_in_expr(a, row, ctx, cancel, memo.as_deref_mut())?;
5660                }
5661            }
5662            Expr::Extract { source, .. } => {
5663                self.resolve_correlated_in_expr(source, row, ctx, cancel, memo.as_deref_mut())?;
5664            }
5665            Expr::WindowFunction { .. } | Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
5666            // v7.10.10 — recurse children.
5667            Expr::Array(items) => {
5668                for elem in items {
5669                    self.resolve_correlated_in_expr(elem, row, ctx, cancel, memo.as_deref_mut())?;
5670                }
5671            }
5672            Expr::ArraySubscript { target, index } => {
5673                self.resolve_correlated_in_expr(target, row, ctx, cancel, memo.as_deref_mut())?;
5674                self.resolve_correlated_in_expr(index, row, ctx, cancel, memo.as_deref_mut())?;
5675            }
5676            Expr::AnyAll { expr, array, .. } => {
5677                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
5678                self.resolve_correlated_in_expr(array, row, ctx, cancel, memo.as_deref_mut())?;
5679            }
5680        }
5681        Ok(())
5682    }
5683
5684    fn subquery_replacement(
5685        &self,
5686        e: &Expr,
5687        cancel: CancelToken<'_>,
5688    ) -> Result<Option<Expr>, EngineError> {
5689        match e {
5690            Expr::ScalarSubquery(inner) => {
5691                let mut s = (**inner).clone();
5692                // Recurse into the inner SELECT first so nested
5693                // subqueries materialise bottom-up.
5694                self.resolve_select_subqueries(&mut s, cancel)?;
5695                let r = match self.exec_bare_select_cancel(&s, cancel) {
5696                    Ok(r) => r,
5697                    Err(e) if is_correlation_error(&e) => return Ok(None),
5698                    Err(e) => return Err(e),
5699                };
5700                let QueryResult::Rows { rows, .. } = r else {
5701                    return Err(EngineError::Unsupported(
5702                        "scalar subquery: inner statement did not return rows".into(),
5703                    ));
5704                };
5705                let value = match rows.as_slice() {
5706                    [] => Value::Null,
5707                    [row] => row.values.first().cloned().unwrap_or(Value::Null),
5708                    _ => {
5709                        return Err(EngineError::Unsupported(alloc::format!(
5710                            "scalar subquery returned {} rows; expected 0 or 1",
5711                            rows.len()
5712                        )));
5713                    }
5714                };
5715                Ok(Some(value_to_literal_expr(value)?))
5716            }
5717            Expr::Exists { subquery, negated } => {
5718                let mut s = (**subquery).clone();
5719                self.resolve_select_subqueries(&mut s, cancel)?;
5720                let r = match self.exec_bare_select_cancel(&s, cancel) {
5721                    Ok(r) => r,
5722                    Err(e) if is_correlation_error(&e) => return Ok(None),
5723                    Err(e) => return Err(e),
5724                };
5725                let exists = match r {
5726                    QueryResult::Rows { rows, .. } => !rows.is_empty(),
5727                    QueryResult::CommandOk { .. } => false,
5728                };
5729                let bit = if *negated { !exists } else { exists };
5730                Ok(Some(Expr::Literal(Literal::Bool(bit))))
5731            }
5732            Expr::InSubquery {
5733                expr,
5734                subquery,
5735                negated,
5736            } => {
5737                let mut s = (**subquery).clone();
5738                self.resolve_select_subqueries(&mut s, cancel)?;
5739                let r = match self.exec_bare_select_cancel(&s, cancel) {
5740                    Ok(r) => r,
5741                    Err(e) if is_correlation_error(&e) => return Ok(None),
5742                    Err(e) => return Err(e),
5743                };
5744                let QueryResult::Rows { columns, rows, .. } = r else {
5745                    return Err(EngineError::Unsupported(
5746                        "IN-subquery: inner statement did not return rows".into(),
5747                    ));
5748                };
5749                if columns.len() != 1 {
5750                    return Err(EngineError::Unsupported(alloc::format!(
5751                        "IN-subquery must project exactly one column; got {}",
5752                        columns.len()
5753                    )));
5754                }
5755                // Build the same OR-Eq chain the parse-time literal-list
5756                // path constructs, with each value lifted into a Literal.
5757                let mut acc: Option<Expr> = None;
5758                for row in rows {
5759                    let v = row.values.into_iter().next().unwrap_or(Value::Null);
5760                    let lit = value_to_literal_expr(v)?;
5761                    let cmp = Expr::Binary {
5762                        lhs: expr.clone(),
5763                        op: BinOp::Eq,
5764                        rhs: Box::new(lit),
5765                    };
5766                    acc = Some(match acc {
5767                        None => cmp,
5768                        Some(prev) => Expr::Binary {
5769                            lhs: Box::new(prev),
5770                            op: BinOp::Or,
5771                            rhs: Box::new(cmp),
5772                        },
5773                    });
5774                }
5775                let combined = acc.unwrap_or(Expr::Literal(Literal::Bool(false)));
5776                let final_expr = if *negated {
5777                    Expr::Unary {
5778                        op: UnOp::Not,
5779                        expr: Box::new(combined),
5780                    }
5781                } else {
5782                    combined
5783                };
5784                Ok(Some(final_expr))
5785            }
5786            _ => Ok(None),
5787        }
5788    }
5789}
5790
5791// ---- v4.12 window-function helpers ----
5792// The (partition-key, order-key, original-index) tuple shape used
5793// across these helpers is intrinsic to the planner. Factoring it
5794// into a typedef adds indirection without making the code clearer,
5795// so several lints are allowed inline on the affected functions
5796// rather than module-wide.
5797
5798/// v4.22: cheap structural scan for `FROM <name>` (qualified or
5799/// not) inside a SELECT — used to verify the anchor of a WITH
5800/// RECURSIVE CTE doesn't recurse into itself. Conservative: walks
5801/// FROM joins, subqueries, and unions.
5802fn select_refers_to(stmt: &SelectStatement, target: &str) -> bool {
5803    if let Some(from) = &stmt.from
5804        && from_refers_to(from, target)
5805    {
5806        return true;
5807    }
5808    for (_, peer) in &stmt.unions {
5809        if select_refers_to(peer, target) {
5810            return true;
5811        }
5812    }
5813    for item in &stmt.items {
5814        if let SelectItem::Expr { expr, .. } = item
5815            && expr_refers_to(expr, target)
5816        {
5817            return true;
5818        }
5819    }
5820    if let Some(w) = &stmt.where_
5821        && expr_refers_to(w, target)
5822    {
5823        return true;
5824    }
5825    false
5826}
5827
5828fn from_refers_to(from: &FromClause, target: &str) -> bool {
5829    if from.primary.name.eq_ignore_ascii_case(target) {
5830        return true;
5831    }
5832    from.joins
5833        .iter()
5834        .any(|j| j.table.name.eq_ignore_ascii_case(target))
5835}
5836
5837fn expr_refers_to(e: &Expr, target: &str) -> bool {
5838    match e {
5839        Expr::ScalarSubquery(s) => select_refers_to(s, target),
5840        Expr::Exists { subquery, .. } | Expr::InSubquery { subquery, .. } => {
5841            select_refers_to(subquery, target)
5842        }
5843        Expr::Binary { lhs, rhs, .. } => expr_refers_to(lhs, target) || expr_refers_to(rhs, target),
5844        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
5845            expr_refers_to(expr, target)
5846        }
5847        Expr::Like { expr, pattern, .. } => {
5848            expr_refers_to(expr, target) || expr_refers_to(pattern, target)
5849        }
5850        Expr::FunctionCall { args, .. } => args.iter().any(|a| expr_refers_to(a, target)),
5851        Expr::Extract { source, .. } => expr_refers_to(source, target),
5852        Expr::WindowFunction {
5853            args,
5854            partition_by,
5855            order_by,
5856            ..
5857        } => {
5858            args.iter().any(|a| expr_refers_to(a, target))
5859                || partition_by.iter().any(|p| expr_refers_to(p, target))
5860                || order_by.iter().any(|(o, _)| expr_refers_to(o, target))
5861        }
5862        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => false,
5863        Expr::Array(items) => items.iter().any(|e| expr_refers_to(e, target)),
5864        Expr::ArraySubscript { target: t, index } => {
5865            expr_refers_to(t, target) || expr_refers_to(index, target)
5866        }
5867        Expr::AnyAll { expr, array, .. } => {
5868            expr_refers_to(expr, target) || expr_refers_to(array, target)
5869        }
5870    }
5871}
5872
5873/// v4.22: pick more specific column types from observed rows when
5874/// the projection builder defaulted to Text (the v1.x behavior for
5875/// non-column expressions). Lets `WITH t(n) AS (SELECT 1 ...)`
5876/// land an Int column in the CTE storage table rather than failing
5877/// the insert with "expected TEXT, got INT".
5878fn infer_column_types(columns: &[ColumnSchema], rows: &[Row]) -> Vec<ColumnSchema> {
5879    let mut out = columns.to_vec();
5880    for (col_idx, col) in out.iter_mut().enumerate() {
5881        if col.ty != DataType::Text {
5882            continue;
5883        }
5884        let mut inferred: Option<DataType> = None;
5885        let mut all_null = true;
5886        for row in rows {
5887            let Some(v) = row.values.get(col_idx) else {
5888                continue;
5889            };
5890            let ty = match v {
5891                Value::Null => continue,
5892                Value::SmallInt(_) => DataType::SmallInt,
5893                Value::Int(_) => DataType::Int,
5894                Value::BigInt(_) => DataType::BigInt,
5895                Value::Float(_) => DataType::Float,
5896                Value::Bool(_) => DataType::Bool,
5897                Value::Vector(_) => DataType::Vector {
5898                    dim: 0,
5899                    encoding: VecEncoding::F32,
5900                },
5901                _ => DataType::Text,
5902            };
5903            all_null = false;
5904            inferred = Some(match inferred {
5905                None => ty,
5906                Some(prev) if prev == ty => prev,
5907                Some(_) => DataType::Text,
5908            });
5909        }
5910        if let Some(t) = inferred {
5911            col.ty = t;
5912            col.nullable = true;
5913        } else if all_null {
5914            col.nullable = true;
5915        }
5916    }
5917    out
5918}
5919
5920/// v4.26: render a human-readable plan tree for `EXPLAIN <select>`.
5921/// Lines are pushed into `out`; `depth` controls indentation. We
5922/// describe the rewritten SELECT — what the executor *would* do —
5923/// using the engine handle to spot indexed lookups and table shapes.
5924#[allow(clippy::too_many_lines, clippy::format_push_string)]
5925/// v6.2.4 — Walk every line of the rendered plan tree and append
5926/// per-operator stats. Lines that name a known operator get
5927/// `(rows=N)` (`actual_rows` of the top-level operator equals the
5928/// final result row count; scans report their catalog row count
5929/// as the rows-considered metric). Other lines — Filter / Join /
5930/// GroupBy / OrderBy etc. — are marked `(—)` so the surface is
5931/// complete-by-construction; v6.2.5 fills these in via inline
5932/// executor counters.
5933/// v6.8.3 — surface "CREATE INDEX …" suggestions for every
5934/// `(table, column)` pair the query touches via WHERE / JOIN
5935/// that doesn't already have an index on the owning table.
5936/// Walks the SELECT's FROM clauses + WHERE expression tree;
5937/// returns one line per missing index. Deterministic order:
5938/// FROM-clause iteration order, then column-reference walk
5939/// order inside each WHERE. Each suggestion is a copy-pastable
5940/// DDL string.
5941fn build_index_suggestions(stmt: &SelectStatement, engine: &Engine) -> Vec<String> {
5942    use alloc::collections::BTreeSet;
5943    let mut seen: BTreeSet<(String, String)> = BTreeSet::new();
5944    let mut out: Vec<String> = Vec::new();
5945    let cat = engine.active_catalog();
5946    // Build a (table, qualifier-or-alias) list from the FROM clause
5947    // so unqualified column refs in WHERE resolve to the correct
5948    // table.
5949    let Some(from) = &stmt.from else {
5950        return out;
5951    };
5952    let mut tables: Vec<String> = Vec::new();
5953    tables.push(from.primary.name.clone());
5954    for j in &from.joins {
5955        tables.push(j.table.name.clone());
5956    }
5957    // Collect column refs from the WHERE expression. JOIN ON
5958    // predicates also feed in.
5959    let mut col_refs: Vec<spg_sql::ast::ColumnName> = Vec::new();
5960    if let Some(w) = &stmt.where_ {
5961        collect_column_refs(w, &mut col_refs);
5962    }
5963    for j in &from.joins {
5964        if let Some(on) = &j.on {
5965            collect_column_refs(on, &mut col_refs);
5966        }
5967    }
5968    for cn in &col_refs {
5969        // Resolve owner table: explicit qualifier first, else
5970        // first table in FROM that has a column of this name.
5971        let owner: Option<String> = if let Some(q) = &cn.qualifier {
5972            tables.iter().find(|t| t == &q).cloned()
5973        } else {
5974            tables.iter().find_map(|t| {
5975                cat.get(t).and_then(|tbl| {
5976                    if tbl.schema().column_position(&cn.name).is_some() {
5977                        Some(t.clone())
5978                    } else {
5979                        None
5980                    }
5981                })
5982            })
5983        };
5984        let Some(owner) = owner else {
5985            continue;
5986        };
5987        let Some(tbl) = cat.get(&owner) else {
5988            continue;
5989        };
5990        let Some(col_pos) = tbl.schema().column_position(&cn.name) else {
5991            continue;
5992        };
5993        // Skip if any BTree index already covers this column as
5994        // its key.
5995        let already_indexed = tbl.indices().iter().any(|i| {
5996            matches!(i.kind, spg_storage::IndexKind::BTree(_))
5997                && i.column_position == col_pos
5998                && i.expression.is_none()
5999                && i.partial_predicate.is_none()
6000        });
6001        if already_indexed {
6002            continue;
6003        }
6004        if seen.insert((owner.clone(), cn.name.clone())) {
6005            out.push(alloc::format!(
6006                "SUGGEST: CREATE INDEX ix_{}_{} ON {} ({})",
6007                owner,
6008                cn.name,
6009                owner,
6010                cn.name
6011            ));
6012        }
6013    }
6014    out
6015}
6016
6017/// Walks an `Expr` and pushes every `ColumnName` it references.
6018/// Order is depth-first, left-to-right.
6019fn collect_column_refs(expr: &Expr, out: &mut Vec<spg_sql::ast::ColumnName>) {
6020    match expr {
6021        Expr::Column(cn) => out.push(cn.clone()),
6022        Expr::FunctionCall { args, .. } => {
6023            for a in args {
6024                collect_column_refs(a, out);
6025            }
6026        }
6027        Expr::Binary { lhs, rhs, .. } => {
6028            collect_column_refs(lhs, out);
6029            collect_column_refs(rhs, out);
6030        }
6031        Expr::Unary { expr: e, .. } => collect_column_refs(e, out),
6032        _ => {}
6033    }
6034}
6035
6036fn annotate_explain_lines(lines: &mut [String], total_rows: usize, engine: &Engine) {
6037    let catalog = engine.active_catalog();
6038    let cold_ids = catalog.cold_segment_ids_global();
6039    let any_cold = !cold_ids.is_empty();
6040    let cold_ids_repr = if any_cold {
6041        let mut s = alloc::string::String::from("[");
6042        for (i, id) in cold_ids.iter().enumerate() {
6043            if i > 0 {
6044                s.push(',');
6045            }
6046            s.push_str(&alloc::format!("{id}"));
6047        }
6048        s.push(']');
6049        s
6050    } else {
6051        alloc::string::String::new()
6052    };
6053    for (idx, line) in lines.iter_mut().enumerate() {
6054        let trimmed = line.trim_start();
6055        let is_top_level = idx == 0;
6056        if is_top_level {
6057            line.push_str(&alloc::format!(" (rows={total_rows})"));
6058            continue;
6059        }
6060        if let Some(rest) = trimmed.strip_prefix("From: ") {
6061            let (name, scan_kind) = match rest.split_once(" [") {
6062                Some((n, k)) => (n.trim(), k.trim_end_matches(']')),
6063                None => (rest.trim(), ""),
6064            };
6065            let bare = name.split_whitespace().next().unwrap_or(name);
6066            let hot = catalog.get(bare).map(|t| t.rows().len());
6067            // v6.2.7 — `cold_segments=[id0,id1,…]` enumerates every
6068            // cold-tier segment the scan COULD have walked. v6.2.x
6069            // can tighten to per-table by walking the table's
6070            // BTree-index cold locators.
6071            let annot = match (hot, scan_kind) {
6072                (Some(h), "full scan") => {
6073                    let mut s = alloc::format!(" (hot_rows={h}");
6074                    if any_cold {
6075                        s.push_str(&alloc::format!(
6076                            ", cold_tier=present, cold_segments={cold_ids_repr}"
6077                        ));
6078                    }
6079                    s.push(')');
6080                    s
6081                }
6082                (Some(h), "index seek") => {
6083                    let mut s = alloc::format!(" (hot_rows≤{h}");
6084                    if any_cold {
6085                        s.push_str(&alloc::format!(
6086                            ", cold_tier=present, cold_segments={cold_ids_repr}"
6087                        ));
6088                    }
6089                    s.push(')');
6090                    s
6091                }
6092                _ => " (rows=—)".to_string(),
6093            };
6094            line.push_str(&annot);
6095            continue;
6096        }
6097        // Filter / GroupBy / Having / OrderBy / Limit / Join etc.
6098        line.push_str(" (rows=—)");
6099    }
6100}
6101
6102fn explain_select(stmt: &SelectStatement, engine: &Engine, depth: usize, out: &mut Vec<String>) {
6103    let pad = "  ".repeat(depth);
6104    // 1) Top-level operator label.
6105    let top = if !stmt.ctes.is_empty() {
6106        if stmt.ctes.iter().any(|c| c.recursive) {
6107            "CTEScan (WITH RECURSIVE)"
6108        } else {
6109            "CTEScan (WITH)"
6110        }
6111    } else if !stmt.unions.is_empty() {
6112        "UnionScan"
6113    } else if select_has_window(stmt) {
6114        "WindowAgg"
6115    } else if aggregate::uses_aggregate(stmt) {
6116        "Aggregate"
6117    } else if stmt.distinct {
6118        "Distinct"
6119    } else if stmt.from.is_some() {
6120        "TableScan"
6121    } else {
6122        "Result"
6123    };
6124    out.push(alloc::format!("{pad}{top}"));
6125    let child = "  ".repeat(depth + 1);
6126    // 2) CTE bodies.
6127    for cte in &stmt.ctes {
6128        let head = if cte.recursive {
6129            alloc::format!("{child}CTE (recursive): {}", cte.name)
6130        } else {
6131            alloc::format!("{child}CTE: {}", cte.name)
6132        };
6133        out.push(head);
6134        explain_select(&cte.body, engine, depth + 2, out);
6135    }
6136    // 3) FROM details — primary table + joins, index hits.
6137    if let Some(from) = &stmt.from {
6138        let mut tag = alloc::format!("{child}From: {}", from.primary.name);
6139        if let Some(alias) = &from.primary.alias {
6140            tag.push_str(&alloc::format!(" AS {alias}"));
6141        }
6142        // Try to detect an index-seek opportunity on WHERE against
6143        // the primary table — same heuristic the executor uses.
6144        if let Some(w) = &stmt.where_
6145            && let Some(table) = engine.active_catalog().get(&from.primary.name)
6146        {
6147            let alias = from.primary.alias.as_deref().unwrap_or(&from.primary.name);
6148            let cols = &table.schema().columns;
6149            if try_index_seek(w, cols, engine.active_catalog(), table, alias).is_some() {
6150                tag.push_str(" [index seek]");
6151            } else {
6152                tag.push_str(" [full scan]");
6153            }
6154        } else {
6155            tag.push_str(" [full scan]");
6156        }
6157        out.push(tag);
6158        for j in &from.joins {
6159            let kind = match j.kind {
6160                spg_sql::ast::JoinKind::Inner => "INNER JOIN",
6161                spg_sql::ast::JoinKind::Left => "LEFT JOIN",
6162                spg_sql::ast::JoinKind::Cross => "CROSS JOIN",
6163            };
6164            let mut s = alloc::format!("{child}{kind}: {}", j.table.name);
6165            if let Some(alias) = &j.table.alias {
6166                s.push_str(&alloc::format!(" AS {alias}"));
6167            }
6168            if j.on.is_some() {
6169                s.push_str(" (ON …)");
6170            }
6171            out.push(s);
6172        }
6173    }
6174    // 4) WHERE / GROUP BY / HAVING / ORDER BY / LIMIT / OFFSET.
6175    if let Some(w) = &stmt.where_ {
6176        let mut s = alloc::format!("{child}Filter: {w}");
6177        if expr_has_subquery(w) {
6178            s.push_str(" [subquery]");
6179        }
6180        out.push(s);
6181    }
6182    if let Some(gs) = &stmt.group_by {
6183        let mut parts = Vec::new();
6184        for g in gs {
6185            parts.push(alloc::format!("{g}"));
6186        }
6187        out.push(alloc::format!("{child}GroupBy: {}", parts.join(", ")));
6188    }
6189    if let Some(h) = &stmt.having {
6190        out.push(alloc::format!("{child}Having: {h}"));
6191    }
6192    for o in &stmt.order_by {
6193        let dir = if o.desc { "DESC" } else { "ASC" };
6194        out.push(alloc::format!("{child}OrderBy: {} {dir}", o.expr));
6195    }
6196    if let Some(lim) = stmt.limit {
6197        out.push(alloc::format!("{child}Limit: {lim}"));
6198    }
6199    if let Some(off) = stmt.offset {
6200        out.push(alloc::format!("{child}Offset: {off}"));
6201    }
6202    // 5) Projection — collapse Wildcard or render N items.
6203    if stmt
6204        .items
6205        .iter()
6206        .any(|it| matches!(it, SelectItem::Wildcard))
6207    {
6208        out.push(alloc::format!("{child}Project: *"));
6209    } else {
6210        out.push(alloc::format!(
6211            "{child}Project: {} item(s)",
6212            stmt.items.len()
6213        ));
6214    }
6215    // 6) Recurse into UNION peers.
6216    for (kind, peer) in &stmt.unions {
6217        let label = match kind {
6218            UnionKind::All => "UNION ALL",
6219            UnionKind::Distinct => "UNION",
6220        };
6221        out.push(alloc::format!("{child}{label}"));
6222        explain_select(peer, engine, depth + 2, out);
6223    }
6224}
6225
6226/// v4.23: recognise the engine errors that indicate the inner
6227/// SELECT couldn't be evaluated in isolation because it references
6228/// an outer column — used by `subquery_replacement` to skip
6229/// materialisation and let row-eval handle it instead.
6230fn is_correlation_error(e: &EngineError) -> bool {
6231    matches!(
6232        e,
6233        EngineError::Eval(
6234            eval::EvalError::ColumnNotFound { .. } | eval::EvalError::UnknownQualifier { .. }
6235        )
6236    )
6237}
6238
6239/// v4.23: walk every Expr in `stmt` and replace each Column ref
6240/// that targets the outer scope (qualifier matches the outer
6241/// table alias) with a Literal carrying the outer row's value.
6242/// Conservative: only qualified refs are substituted, so the user
6243/// must write `outer_alias.col` to reference an outer column. This
6244/// matches PG's lexical scoping for correlated subqueries and
6245/// avoids accidentally rebinding inner columns of the same name.
6246fn substitute_outer_columns(stmt: &mut SelectStatement, row: &Row, ctx: &EvalContext<'_>) {
6247    let Some(outer_alias) = ctx.table_alias else {
6248        return;
6249    };
6250    substitute_in_select(stmt, row, ctx, outer_alias);
6251}
6252
6253fn substitute_in_select(
6254    stmt: &mut SelectStatement,
6255    row: &Row,
6256    ctx: &EvalContext<'_>,
6257    outer_alias: &str,
6258) {
6259    for item in &mut stmt.items {
6260        if let SelectItem::Expr { expr, .. } = item {
6261            substitute_in_expr(expr, row, ctx, outer_alias);
6262        }
6263    }
6264    if let Some(w) = &mut stmt.where_ {
6265        substitute_in_expr(w, row, ctx, outer_alias);
6266    }
6267    if let Some(gs) = &mut stmt.group_by {
6268        for g in gs {
6269            substitute_in_expr(g, row, ctx, outer_alias);
6270        }
6271    }
6272    if let Some(h) = &mut stmt.having {
6273        substitute_in_expr(h, row, ctx, outer_alias);
6274    }
6275    for o in &mut stmt.order_by {
6276        substitute_in_expr(&mut o.expr, row, ctx, outer_alias);
6277    }
6278    for (_, peer) in &mut stmt.unions {
6279        substitute_in_select(peer, row, ctx, outer_alias);
6280    }
6281}
6282
6283fn substitute_in_expr(e: &mut Expr, row: &Row, ctx: &EvalContext<'_>, outer_alias: &str) {
6284    if let Expr::Column(c) = e
6285        && let Some(qual) = &c.qualifier
6286        && qual.eq_ignore_ascii_case(outer_alias)
6287    {
6288        // Look up the column's index in the outer schema.
6289        if let Some(idx) = ctx
6290            .columns
6291            .iter()
6292            .position(|sc| sc.name.eq_ignore_ascii_case(&c.name))
6293        {
6294            let v = row.values.get(idx).cloned().unwrap_or(Value::Null);
6295            if let Ok(lit) = value_to_literal_expr(v) {
6296                *e = lit;
6297                return;
6298            }
6299        }
6300    }
6301    match e {
6302        Expr::Binary { lhs, rhs, .. } => {
6303            substitute_in_expr(lhs, row, ctx, outer_alias);
6304            substitute_in_expr(rhs, row, ctx, outer_alias);
6305        }
6306        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
6307            substitute_in_expr(expr, row, ctx, outer_alias);
6308        }
6309        Expr::Like { expr, pattern, .. } => {
6310            substitute_in_expr(expr, row, ctx, outer_alias);
6311            substitute_in_expr(pattern, row, ctx, outer_alias);
6312        }
6313        Expr::FunctionCall { args, .. } => {
6314            for a in args {
6315                substitute_in_expr(a, row, ctx, outer_alias);
6316            }
6317        }
6318        Expr::Extract { source, .. } => substitute_in_expr(source, row, ctx, outer_alias),
6319        Expr::WindowFunction {
6320            args,
6321            partition_by,
6322            order_by,
6323            ..
6324        } => {
6325            for a in args {
6326                substitute_in_expr(a, row, ctx, outer_alias);
6327            }
6328            for p in partition_by {
6329                substitute_in_expr(p, row, ctx, outer_alias);
6330            }
6331            for (o, _) in order_by {
6332                substitute_in_expr(o, row, ctx, outer_alias);
6333            }
6334        }
6335        Expr::ScalarSubquery(s) => substitute_in_select(s, row, ctx, outer_alias),
6336        Expr::Exists { subquery, .. } | Expr::InSubquery { subquery, .. } => {
6337            substitute_in_select(subquery, row, ctx, outer_alias);
6338        }
6339        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
6340        Expr::Array(items) => {
6341            for elem in items {
6342                substitute_in_expr(elem, row, ctx, outer_alias);
6343            }
6344        }
6345        Expr::ArraySubscript { target, index } => {
6346            substitute_in_expr(target, row, ctx, outer_alias);
6347            substitute_in_expr(index, row, ctx, outer_alias);
6348        }
6349        Expr::AnyAll { expr, array, .. } => {
6350            substitute_in_expr(expr, row, ctx, outer_alias);
6351            substitute_in_expr(array, row, ctx, outer_alias);
6352        }
6353    }
6354}
6355
6356/// v4.22: encode a Row to a comparable byte key for UNION-DISTINCT
6357/// dedup inside the recursive iteration. Crude but deterministic
6358/// — Debug prints embed type discriminants so NULL ≠ "" ≠ 0.
6359fn encode_row_key(row: &Row) -> Vec<u8> {
6360    let mut out = Vec::new();
6361    for v in &row.values {
6362        let s = alloc::format!("{v:?}|");
6363        out.extend_from_slice(s.as_bytes());
6364    }
6365    out
6366}
6367
6368fn select_has_window(stmt: &SelectStatement) -> bool {
6369    for item in &stmt.items {
6370        if let SelectItem::Expr { expr, .. } = item
6371            && expr_has_window(expr)
6372        {
6373            return true;
6374        }
6375    }
6376    false
6377}
6378
6379fn expr_has_window(e: &Expr) -> bool {
6380    match e {
6381        Expr::WindowFunction { .. } => true,
6382        Expr::Binary { lhs, rhs, .. } => expr_has_window(lhs) || expr_has_window(rhs),
6383        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
6384            expr_has_window(expr)
6385        }
6386        Expr::FunctionCall { args, .. } => args.iter().any(expr_has_window),
6387        Expr::Like { expr, pattern, .. } => expr_has_window(expr) || expr_has_window(pattern),
6388        Expr::Extract { source, .. } => expr_has_window(source),
6389        Expr::ScalarSubquery(_)
6390        | Expr::Exists { .. }
6391        | Expr::InSubquery { .. }
6392        | Expr::Literal(_)
6393        | Expr::Placeholder(_)
6394        | Expr::Column(_) => false,
6395        Expr::Array(items) => items.iter().any(expr_has_window),
6396        Expr::ArraySubscript { target, index } => {
6397            expr_has_window(target) || expr_has_window(index)
6398        }
6399        Expr::AnyAll { expr, array, .. } => {
6400            expr_has_window(expr) || expr_has_window(array)
6401        }
6402    }
6403}
6404
6405fn collect_window_nodes(e: &Expr, out: &mut Vec<Expr>) {
6406    if let Expr::WindowFunction { .. } = e {
6407        // Deduplicate by structural equality on the expression
6408        // (cheap because window args + partition + order are
6409        // small). Without dedup we'd recompute identical windows
6410        // once per occurrence in the projection.
6411        if !out.iter().any(|x| x == e) {
6412            out.push(e.clone());
6413        }
6414        return;
6415    }
6416    match e {
6417        // Already handled by the early-return at the top.
6418        Expr::WindowFunction { .. } => unreachable!(),
6419        Expr::Binary { lhs, rhs, .. } => {
6420            collect_window_nodes(lhs, out);
6421            collect_window_nodes(rhs, out);
6422        }
6423        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
6424            collect_window_nodes(expr, out);
6425        }
6426        Expr::FunctionCall { args, .. } => {
6427            for a in args {
6428                collect_window_nodes(a, out);
6429            }
6430        }
6431        Expr::Like { expr, pattern, .. } => {
6432            collect_window_nodes(expr, out);
6433            collect_window_nodes(pattern, out);
6434        }
6435        Expr::Extract { source, .. } => collect_window_nodes(source, out),
6436        _ => {}
6437    }
6438}
6439
6440fn rewrite_window_to_columns(e: &mut Expr, window_nodes: &[Expr]) {
6441    if let Expr::WindowFunction { .. } = e
6442        && let Some(idx) = window_nodes.iter().position(|w| w == e)
6443    {
6444        *e = Expr::Column(spg_sql::ast::ColumnName {
6445            qualifier: None,
6446            name: alloc::format!("__win_{idx}"),
6447        });
6448        return;
6449    }
6450    match e {
6451        Expr::Binary { lhs, rhs, .. } => {
6452            rewrite_window_to_columns(lhs, window_nodes);
6453            rewrite_window_to_columns(rhs, window_nodes);
6454        }
6455        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
6456            rewrite_window_to_columns(expr, window_nodes);
6457        }
6458        Expr::FunctionCall { args, .. } => {
6459            for a in args {
6460                rewrite_window_to_columns(a, window_nodes);
6461            }
6462        }
6463        Expr::Like { expr, pattern, .. } => {
6464            rewrite_window_to_columns(expr, window_nodes);
6465            rewrite_window_to_columns(pattern, window_nodes);
6466        }
6467        Expr::Extract { source, .. } => rewrite_window_to_columns(source, window_nodes),
6468        _ => {}
6469    }
6470}
6471
6472/// Total order over partition-key tuples. NULL sorts as the
6473/// lowest value (matches the `<` partial order's NULL-last
6474/// behaviour with `INFINITY` flipped).
6475fn partition_key_cmp(a: &[Value], b: &[Value]) -> core::cmp::Ordering {
6476    for (x, y) in a.iter().zip(b.iter()) {
6477        let c = value_cmp(x, y);
6478        if c != core::cmp::Ordering::Equal {
6479            return c;
6480        }
6481    }
6482    a.len().cmp(&b.len())
6483}
6484
6485fn order_key_cmp(a: &[(Value, bool)], b: &[(Value, bool)]) -> core::cmp::Ordering {
6486    for ((va, desc), (vb, _)) in a.iter().zip(b.iter()) {
6487        let c = value_cmp(va, vb);
6488        let c = if *desc { c.reverse() } else { c };
6489        if c != core::cmp::Ordering::Equal {
6490            return c;
6491        }
6492    }
6493    a.len().cmp(&b.len())
6494}
6495
6496#[allow(clippy::match_same_arms)] // explicit arms per type document the supported pairs
6497fn value_cmp(a: &Value, b: &Value) -> core::cmp::Ordering {
6498    use core::cmp::Ordering;
6499    match (a, b) {
6500        (Value::Null, Value::Null) => Ordering::Equal,
6501        (Value::Null, _) => Ordering::Less,
6502        (_, Value::Null) => Ordering::Greater,
6503        (Value::Int(x), Value::Int(y)) => x.cmp(y),
6504        (Value::BigInt(x), Value::BigInt(y)) => x.cmp(y),
6505        (Value::SmallInt(x), Value::SmallInt(y)) => x.cmp(y),
6506        (Value::Text(x), Value::Text(y)) => x.cmp(y),
6507        (Value::Bool(x), Value::Bool(y)) => x.cmp(y),
6508        (Value::Float(x), Value::Float(y)) => x.partial_cmp(y).unwrap_or(Ordering::Equal),
6509        (Value::Date(x), Value::Date(y)) => x.cmp(y),
6510        (Value::Timestamp(x), Value::Timestamp(y)) => x.cmp(y),
6511        // Cross-type compare: fall back to the debug rendering —
6512        // same-partition is the goal, exact order is irrelevant.
6513        _ => alloc::format!("{a:?}").cmp(&alloc::format!("{b:?}")),
6514    }
6515}
6516
6517/// Compute the window function's per-row output for one partition.
6518/// `slice` has (partition key, order key, original-row-index)
6519/// tuples already sorted by order key. `filtered_rows` is the
6520/// full row list indexed by original-row-index. `out_vals` is
6521/// the destination, also indexed by original-row-index.
6522#[allow(
6523    clippy::too_many_arguments,
6524    clippy::cast_possible_truncation,
6525    clippy::cast_possible_wrap,
6526    clippy::cast_precision_loss,
6527    clippy::cast_sign_loss,
6528    clippy::doc_markdown,
6529    clippy::too_many_lines,
6530    clippy::type_complexity,
6531    clippy::match_same_arms
6532)]
6533fn compute_window_partition(
6534    name: &str,
6535    args: &[Expr],
6536    ordered: bool,
6537    frame: Option<&WindowFrame>,
6538    null_treatment: spg_sql::ast::NullTreatment,
6539    slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)],
6540    filtered_rows: &[&Row],
6541    ctx: &EvalContext<'_>,
6542    out_vals: &mut [Value],
6543) -> Result<(), EngineError> {
6544    let ignore_nulls = matches!(null_treatment, spg_sql::ast::NullTreatment::Ignore);
6545    let lower = name.to_ascii_lowercase();
6546    match lower.as_str() {
6547        "row_number" => {
6548            for (rank, (_, _, idx)) in slice.iter().enumerate() {
6549                out_vals[*idx] = Value::BigInt((rank + 1) as i64);
6550            }
6551            Ok(())
6552        }
6553        "rank" => {
6554            let mut prev_key: Option<&[(Value, bool)]> = None;
6555            let mut current_rank: i64 = 1;
6556            for (i, (_, okey, idx)) in slice.iter().enumerate() {
6557                if let Some(p) = prev_key
6558                    && order_key_cmp(p, okey) != core::cmp::Ordering::Equal
6559                {
6560                    current_rank = (i + 1) as i64;
6561                }
6562                if prev_key.is_none() {
6563                    current_rank = 1;
6564                }
6565                out_vals[*idx] = Value::BigInt(current_rank);
6566                prev_key = Some(okey.as_slice());
6567            }
6568            Ok(())
6569        }
6570        "dense_rank" => {
6571            let mut prev_key: Option<&[(Value, bool)]> = None;
6572            let mut current_rank: i64 = 0;
6573            for (_, okey, idx) in slice {
6574                if prev_key.is_none_or(|p| order_key_cmp(p, okey) != core::cmp::Ordering::Equal) {
6575                    current_rank += 1;
6576                }
6577                out_vals[*idx] = Value::BigInt(current_rank);
6578                prev_key = Some(okey.as_slice());
6579            }
6580            Ok(())
6581        }
6582        "sum" | "avg" | "min" | "max" | "count" | "count_star" => {
6583            // Pre-evaluate the function arg per row in the slice
6584            // (count_star has no arg).
6585            let arg_values: Vec<Value> = if lower == "count_star" || args.is_empty() {
6586                slice.iter().map(|_| Value::Null).collect()
6587            } else {
6588                slice
6589                    .iter()
6590                    .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
6591                    .collect::<Result<_, _>>()
6592                    .map_err(EngineError::Eval)?
6593            };
6594            // v4.20: pick the effective frame. Explicit frame
6595            // overrides the implicit default (running for ordered,
6596            // whole-partition for unordered).
6597            let eff = effective_frame(frame, ordered)?;
6598            #[allow(clippy::needless_range_loop)]
6599            for i in 0..slice.len() {
6600                let (lo, hi) = frame_bounds_for_row(&eff, i, slice);
6601                let mut sum: f64 = 0.0;
6602                let mut count: i64 = 0;
6603                let mut min_v: Option<f64> = None;
6604                let mut max_v: Option<f64> = None;
6605                let mut row_count: i64 = 0;
6606                if lo <= hi {
6607                    for j in lo..=hi {
6608                        let v = &arg_values[j];
6609                        match lower.as_str() {
6610                            "count_star" => row_count += 1,
6611                            "count" => {
6612                                if !v.is_null() {
6613                                    count += 1;
6614                                }
6615                            }
6616                            _ => {
6617                                if let Some(x) = value_to_f64(v) {
6618                                    sum += x;
6619                                    count += 1;
6620                                    min_v = Some(min_v.map_or(x, |m| m.min(x)));
6621                                    max_v = Some(max_v.map_or(x, |m| m.max(x)));
6622                                }
6623                            }
6624                        }
6625                    }
6626                }
6627                let value = match lower.as_str() {
6628                    "count_star" => Value::BigInt(row_count),
6629                    "count" => Value::BigInt(count),
6630                    "sum" => Value::Float(sum),
6631                    "avg" => {
6632                        if count == 0 {
6633                            Value::Null
6634                        } else {
6635                            Value::Float(sum / count as f64)
6636                        }
6637                    }
6638                    "min" => min_v.map_or(Value::Null, Value::Float),
6639                    "max" => max_v.map_or(Value::Null, Value::Float),
6640                    _ => unreachable!(),
6641                };
6642                let (_, _, idx) = &slice[i];
6643                out_vals[*idx] = value;
6644            }
6645            Ok(())
6646        }
6647        "lag" | "lead" => {
6648            // lag(expr [, offset [, default]])
6649            // lead(expr [, offset [, default]])
6650            if args.is_empty() {
6651                return Err(EngineError::Unsupported(alloc::format!(
6652                    "{lower}() requires at least one argument"
6653                )));
6654            }
6655            let offset: i64 = if args.len() >= 2 {
6656                let v = eval::eval_expr(&args[1], filtered_rows[slice[0].2], ctx)
6657                    .map_err(EngineError::Eval)?;
6658                match v {
6659                    Value::SmallInt(n) => i64::from(n),
6660                    Value::Int(n) => i64::from(n),
6661                    Value::BigInt(n) => n,
6662                    _ => {
6663                        return Err(EngineError::Unsupported(alloc::format!(
6664                            "{lower}() offset must be integer"
6665                        )));
6666                    }
6667                }
6668            } else {
6669                1
6670            };
6671            let default: Value = if args.len() >= 3 {
6672                eval::eval_expr(&args[2], filtered_rows[slice[0].2], ctx)
6673                    .map_err(EngineError::Eval)?
6674            } else {
6675                Value::Null
6676            };
6677            let values: Vec<Value> = slice
6678                .iter()
6679                .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
6680                .collect::<Result<_, _>>()
6681                .map_err(EngineError::Eval)?;
6682            let n = slice.len();
6683            for (i, (_, _, idx)) in slice.iter().enumerate() {
6684                let signed_offset = if lower == "lag" { -offset } else { offset };
6685                let v = if ignore_nulls {
6686                    // v6.4.2 — IGNORE NULLS: walk in the offset direction
6687                    // skipping NULL values; the `offset`-th non-NULL
6688                    // encountered is the result.
6689                    let step: i64 = if signed_offset >= 0 { 1 } else { -1 };
6690                    let needed: i64 = signed_offset.abs();
6691                    if needed == 0 {
6692                        values[i].clone()
6693                    } else {
6694                        let mut j: i64 = i as i64;
6695                        let mut hits: i64 = 0;
6696                        let mut found: Option<Value> = None;
6697                        loop {
6698                            j += step;
6699                            if j < 0 || j >= n as i64 {
6700                                break;
6701                            }
6702                            #[allow(clippy::cast_sign_loss)]
6703                            let v = &values[j as usize];
6704                            if !v.is_null() {
6705                                hits += 1;
6706                                if hits == needed {
6707                                    found = Some(v.clone());
6708                                    break;
6709                                }
6710                            }
6711                        }
6712                        found.unwrap_or_else(|| default.clone())
6713                    }
6714                } else {
6715                    let target_signed = i64::try_from(i).unwrap_or(i64::MAX) + signed_offset;
6716                    if target_signed < 0
6717                        || target_signed >= i64::try_from(n).unwrap_or(i64::MAX)
6718                    {
6719                        default.clone()
6720                    } else {
6721                        #[allow(clippy::cast_sign_loss)]
6722                        {
6723                            values[target_signed as usize].clone()
6724                        }
6725                    }
6726                };
6727                out_vals[*idx] = v;
6728            }
6729            Ok(())
6730        }
6731        "first_value" | "last_value" | "nth_value" => {
6732            if args.is_empty() {
6733                return Err(EngineError::Unsupported(alloc::format!(
6734                    "{lower}() requires at least one argument"
6735                )));
6736            }
6737            let values: Vec<Value> = slice
6738                .iter()
6739                .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
6740                .collect::<Result<_, _>>()
6741                .map_err(EngineError::Eval)?;
6742            let nth: usize = if lower == "nth_value" {
6743                if args.len() < 2 {
6744                    return Err(EngineError::Unsupported(
6745                        "nth_value() requires (expr, n)".into(),
6746                    ));
6747                }
6748                let v = eval::eval_expr(&args[1], filtered_rows[slice[0].2], ctx)
6749                    .map_err(EngineError::Eval)?;
6750                let raw = match v {
6751                    Value::SmallInt(n) => i64::from(n),
6752                    Value::Int(n) => i64::from(n),
6753                    Value::BigInt(n) => n,
6754                    _ => {
6755                        return Err(EngineError::Unsupported(
6756                            "nth_value() n must be integer".into(),
6757                        ));
6758                    }
6759                };
6760                if raw < 1 {
6761                    return Err(EngineError::Unsupported(
6762                        "nth_value() n must be >= 1".into(),
6763                    ));
6764                }
6765                #[allow(clippy::cast_sign_loss)]
6766                {
6767                    raw as usize
6768                }
6769            } else {
6770                0
6771            };
6772            let eff = effective_frame(frame, ordered)?;
6773            for i in 0..slice.len() {
6774                let (lo, hi) = frame_bounds_for_row(&eff, i, slice);
6775                let (_, _, idx) = &slice[i];
6776                let v = if lo > hi {
6777                    Value::Null
6778                } else if ignore_nulls && matches!(lower.as_str(), "first_value" | "last_value") {
6779                    // v6.4.2 — IGNORE NULLS: skip NULL cells when
6780                    // selecting the boundary value within the frame.
6781                    if lower == "first_value" {
6782                        (lo..=hi)
6783                            .find_map(|j| {
6784                                let v = &values[j];
6785                                (!v.is_null()).then(|| v.clone())
6786                            })
6787                            .unwrap_or(Value::Null)
6788                    } else {
6789                        (lo..=hi)
6790                            .rev()
6791                            .find_map(|j| {
6792                                let v = &values[j];
6793                                (!v.is_null()).then(|| v.clone())
6794                            })
6795                            .unwrap_or(Value::Null)
6796                    }
6797                } else {
6798                    match lower.as_str() {
6799                        "first_value" => values[lo].clone(),
6800                        "last_value" => values[hi].clone(),
6801                        "nth_value" => {
6802                            let pos = lo + nth - 1;
6803                            if pos > hi {
6804                                Value::Null
6805                            } else {
6806                                values[pos].clone()
6807                            }
6808                        }
6809                        _ => unreachable!(),
6810                    }
6811                };
6812                out_vals[*idx] = v;
6813            }
6814            Ok(())
6815        }
6816        "ntile" => {
6817            if args.is_empty() {
6818                return Err(EngineError::Unsupported(
6819                    "ntile(n) requires an integer argument".into(),
6820                ));
6821            }
6822            let v = eval::eval_expr(&args[0], filtered_rows[slice[0].2], ctx)
6823                .map_err(EngineError::Eval)?;
6824            let bucket_count: i64 = match v {
6825                Value::SmallInt(n) => i64::from(n),
6826                Value::Int(n) => i64::from(n),
6827                Value::BigInt(n) => n,
6828                _ => {
6829                    return Err(EngineError::Unsupported(
6830                        "ntile() argument must be integer".into(),
6831                    ));
6832                }
6833            };
6834            if bucket_count < 1 {
6835                return Err(EngineError::Unsupported(
6836                    "ntile() argument must be >= 1".into(),
6837                ));
6838            }
6839            #[allow(clippy::cast_sign_loss)]
6840            let buckets = bucket_count as usize;
6841            let n = slice.len();
6842            // Each bucket gets `base` rows; the first `extras` buckets
6843            // get one extra. PG semantics.
6844            let base = n / buckets;
6845            let extras = n % buckets;
6846            let mut bucket: usize = 1;
6847            let mut remaining_in_bucket = if extras > 0 { base + 1 } else { base };
6848            let mut buckets_with_extra_remaining = extras;
6849            for (_, _, idx) in slice {
6850                if remaining_in_bucket == 0 {
6851                    bucket += 1;
6852                    buckets_with_extra_remaining = buckets_with_extra_remaining.saturating_sub(1);
6853                    remaining_in_bucket = if buckets_with_extra_remaining > 0 {
6854                        base + 1
6855                    } else {
6856                        base
6857                    };
6858                    // Edge: if base==0 and extras==0, all rows fit;
6859                    // shouldn't reach here, but guard anyway.
6860                    if remaining_in_bucket == 0 {
6861                        remaining_in_bucket = 1;
6862                    }
6863                }
6864                out_vals[*idx] = Value::BigInt(i64::try_from(bucket).unwrap_or(i64::MAX));
6865                remaining_in_bucket -= 1;
6866            }
6867            Ok(())
6868        }
6869        "percent_rank" => {
6870            // (rank - 1) / (n - 1) where rank is the standard RANK().
6871            // Single-row partitions get 0.
6872            let n = slice.len();
6873            let mut prev_key: Option<&[(Value, bool)]> = None;
6874            let mut current_rank: i64 = 1;
6875            for (i, (_, okey, idx)) in slice.iter().enumerate() {
6876                if let Some(p) = prev_key
6877                    && order_key_cmp(p, okey) != core::cmp::Ordering::Equal
6878                {
6879                    current_rank = i64::try_from(i + 1).unwrap_or(i64::MAX);
6880                }
6881                if prev_key.is_none() {
6882                    current_rank = 1;
6883                }
6884                #[allow(clippy::cast_precision_loss)]
6885                let pr = if n <= 1 {
6886                    0.0
6887                } else {
6888                    (current_rank - 1) as f64 / (n - 1) as f64
6889                };
6890                out_vals[*idx] = Value::Float(pr);
6891                prev_key = Some(okey.as_slice());
6892            }
6893            Ok(())
6894        }
6895        "cume_dist" => {
6896            // # rows up to and including this row's peer group / n.
6897            let n = slice.len();
6898            // First pass: find peer-group-end rank for each row.
6899            for i in 0..slice.len() {
6900                let peer_end = peer_group_end(slice, i);
6901                #[allow(clippy::cast_precision_loss)]
6902                let cd = (peer_end + 1) as f64 / n as f64;
6903                let (_, _, idx) = &slice[i];
6904                out_vals[*idx] = Value::Float(cd);
6905            }
6906            Ok(())
6907        }
6908        other => Err(EngineError::Unsupported(alloc::format!(
6909            "window function {other:?} not supported (v4.21: row_number/rank/dense_rank/sum/avg/count/min/max/lag/lead/first_value/last_value/nth_value/ntile/percent_rank/cume_dist)"
6910        ))),
6911    }
6912}
6913
6914/// v4.20: resolve the user-provided frame down to a normalised
6915/// `(kind, start, end)`. `None` means default — derive from
6916/// `ordered`: ordered ⇒ RANGE UNBOUNDED PRECEDING AND CURRENT ROW,
6917/// unordered ⇒ ROWS UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING.
6918/// Single-bound shorthand (e.g. `ROWS 5 PRECEDING`) normalises
6919/// end → CURRENT ROW per the PG spec.
6920fn effective_frame(
6921    frame: Option<&WindowFrame>,
6922    ordered: bool,
6923) -> Result<(FrameKind, FrameBound, FrameBound), EngineError> {
6924    match frame {
6925        None => {
6926            if ordered {
6927                Ok((
6928                    FrameKind::Range,
6929                    FrameBound::UnboundedPreceding,
6930                    FrameBound::CurrentRow,
6931                ))
6932            } else {
6933                Ok((
6934                    FrameKind::Rows,
6935                    FrameBound::UnboundedPreceding,
6936                    FrameBound::UnboundedFollowing,
6937                ))
6938            }
6939        }
6940        Some(fr) => {
6941            let end = fr.end.clone().unwrap_or(FrameBound::CurrentRow);
6942            // Reject start > end (a few impossible combinations).
6943            if matches!(fr.start, FrameBound::UnboundedFollowing)
6944                || matches!(end, FrameBound::UnboundedPreceding)
6945            {
6946                return Err(EngineError::Unsupported(alloc::format!(
6947                    "invalid frame: start={:?} end={:?}",
6948                    fr.start,
6949                    end
6950                )));
6951            }
6952            // RANGE OFFSET PRECEDING / FOLLOWING needs value-typed
6953            // arithmetic on the ORDER BY key (e.g. `RANGE BETWEEN
6954            // INTERVAL '1 day' PRECEDING AND CURRENT ROW`). Not
6955            // implemented in v4.20.
6956            if fr.kind == FrameKind::Range
6957                && (matches!(
6958                    fr.start,
6959                    FrameBound::OffsetPreceding(_) | FrameBound::OffsetFollowing(_)
6960                ) || matches!(
6961                    end,
6962                    FrameBound::OffsetPreceding(_) | FrameBound::OffsetFollowing(_)
6963                ))
6964            {
6965                return Err(EngineError::Unsupported(
6966                    "RANGE with explicit offset bounds is not supported (v4.20: only UNBOUNDED / CURRENT ROW for RANGE)".into(),
6967                ));
6968            }
6969            Ok((fr.kind, fr.start.clone(), end))
6970        }
6971    }
6972}
6973
6974/// Compute `(lo, hi)` row-index bounds inside the partition slice
6975/// for the row at position `i`. Inclusive, clamped to
6976/// `[0, slice.len()-1]`. Empty result if `lo > hi`.
6977#[allow(clippy::type_complexity)]
6978fn frame_bounds_for_row(
6979    eff: &(FrameKind, FrameBound, FrameBound),
6980    i: usize,
6981    slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)],
6982) -> (usize, usize) {
6983    let (kind, start, end) = eff;
6984    let n = slice.len();
6985    let last = n.saturating_sub(1);
6986    let (mut lo, mut hi) = match kind {
6987        FrameKind::Rows => {
6988            let lo = match start {
6989                FrameBound::UnboundedPreceding => 0,
6990                FrameBound::OffsetPreceding(k) => {
6991                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
6992                    i.saturating_sub(k)
6993                }
6994                FrameBound::CurrentRow => i,
6995                FrameBound::OffsetFollowing(k) => {
6996                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
6997                    i.saturating_add(k).min(last)
6998                }
6999                FrameBound::UnboundedFollowing => last,
7000            };
7001            let hi = match end {
7002                FrameBound::UnboundedPreceding => 0,
7003                FrameBound::OffsetPreceding(k) => {
7004                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
7005                    i.saturating_sub(k)
7006                }
7007                FrameBound::CurrentRow => i,
7008                FrameBound::OffsetFollowing(k) => {
7009                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
7010                    i.saturating_add(k).min(last)
7011                }
7012                FrameBound::UnboundedFollowing => last,
7013            };
7014            (lo, hi)
7015        }
7016        FrameKind::Range => {
7017            // RANGE bounds are peer-aware. With only UNBOUNDED and
7018            // CURRENT ROW supported (rejected at effective_frame for
7019            // explicit offsets), the start/end map to the
7020            // partition's full extent at the same-order-key peer
7021            // group boundary.
7022            let lo = match start {
7023                FrameBound::UnboundedPreceding => 0,
7024                FrameBound::CurrentRow => peer_group_start(slice, i),
7025                FrameBound::UnboundedFollowing => last,
7026                _ => unreachable!("offset bounds rejected for RANGE"),
7027            };
7028            let hi = match end {
7029                FrameBound::UnboundedPreceding => 0,
7030                FrameBound::CurrentRow => peer_group_end(slice, i),
7031                FrameBound::UnboundedFollowing => last,
7032                _ => unreachable!("offset bounds rejected for RANGE"),
7033            };
7034            (lo, hi)
7035        }
7036    };
7037    if hi >= n {
7038        hi = last;
7039    }
7040    if lo >= n {
7041        lo = last;
7042    }
7043    (lo, hi)
7044}
7045
7046/// Find the inclusive index of the first row with the same ORDER
7047/// BY key as `slice[i]`. Slice is already sorted by partition then
7048/// order, so peers are contiguous.
7049#[allow(clippy::type_complexity)]
7050fn peer_group_start(slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)], i: usize) -> usize {
7051    let key = &slice[i].1;
7052    let mut j = i;
7053    while j > 0 && order_key_cmp(&slice[j - 1].1, key) == core::cmp::Ordering::Equal {
7054        j -= 1;
7055    }
7056    j
7057}
7058
7059/// Find the inclusive index of the last row with the same ORDER
7060/// BY key as `slice[i]`.
7061#[allow(clippy::type_complexity)]
7062fn peer_group_end(slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)], i: usize) -> usize {
7063    let key = &slice[i].1;
7064    let mut j = i;
7065    while j + 1 < slice.len() && order_key_cmp(&slice[j + 1].1, key) == core::cmp::Ordering::Equal {
7066        j += 1;
7067    }
7068    j
7069}
7070
7071fn value_to_f64(v: &Value) -> Option<f64> {
7072    match v {
7073        Value::SmallInt(n) => Some(f64::from(*n)),
7074        Value::Int(n) => Some(f64::from(*n)),
7075        #[allow(clippy::cast_precision_loss)]
7076        Value::BigInt(n) => Some(*n as f64),
7077        Value::Float(x) => Some(*x),
7078        _ => None,
7079    }
7080}
7081
7082/// Quick scan for any subquery-bearing node in a SELECT's WHERE /
7083/// projection / `order_by` — saves cloning the AST when there are
7084/// none (the common case).
7085fn expr_tree_has_subquery(stmt: &SelectStatement) -> bool {
7086    let mut any = false;
7087    for item in &stmt.items {
7088        if let SelectItem::Expr { expr, .. } = item {
7089            any = any || expr_has_subquery(expr);
7090        }
7091    }
7092    if let Some(w) = &stmt.where_ {
7093        any = any || expr_has_subquery(w);
7094    }
7095    if let Some(h) = &stmt.having {
7096        any = any || expr_has_subquery(h);
7097    }
7098    for o in &stmt.order_by {
7099        any = any || expr_has_subquery(&o.expr);
7100    }
7101    for (_, peer) in &stmt.unions {
7102        any = any || expr_tree_has_subquery(peer);
7103    }
7104    any
7105}
7106
7107fn expr_has_subquery(e: &Expr) -> bool {
7108    match e {
7109        Expr::ScalarSubquery(_) | Expr::Exists { .. } | Expr::InSubquery { .. } => true,
7110        Expr::Binary { lhs, rhs, .. } => expr_has_subquery(lhs) || expr_has_subquery(rhs),
7111        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7112            expr_has_subquery(expr)
7113        }
7114        Expr::FunctionCall { args, .. } => args.iter().any(expr_has_subquery),
7115        Expr::Like { expr, pattern, .. } => expr_has_subquery(expr) || expr_has_subquery(pattern),
7116        Expr::Extract { source, .. } => expr_has_subquery(source),
7117        Expr::WindowFunction {
7118            args,
7119            partition_by,
7120            order_by,
7121            ..
7122        } => {
7123            args.iter().any(expr_has_subquery)
7124                || partition_by.iter().any(expr_has_subquery)
7125                || order_by.iter().any(|(e, _)| expr_has_subquery(e))
7126        }
7127        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => false,
7128        Expr::Array(items) => items.iter().any(expr_has_subquery),
7129        Expr::ArraySubscript { target, index } => {
7130            expr_has_subquery(target) || expr_has_subquery(index)
7131        }
7132        Expr::AnyAll { expr, array, .. } => {
7133            expr_has_subquery(expr) || expr_has_subquery(array)
7134        }
7135    }
7136}
7137
7138/// v4.10 helper: materialise a runtime `Value` back into an AST
7139/// `Expr::Literal` for the subquery-rewrite path. Supports the
7140/// types `Literal` can represent (Integer / Float / Text / Bool /
7141/// Null). Date / Timestamp / Numeric / Vector / Interval / JSON
7142/// would lose precision through Literal and aren't supported in
7143/// uncorrelated-subquery results; they error with a clear hint.
7144fn value_to_literal_expr(v: Value) -> Result<Expr, EngineError> {
7145    let lit = match v {
7146        Value::Null => Literal::Null,
7147        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
7148        Value::Int(n) => Literal::Integer(i64::from(n)),
7149        Value::BigInt(n) => Literal::Integer(n),
7150        Value::Float(x) => Literal::Float(x),
7151        Value::Text(s) | Value::Json(s) => Literal::String(s),
7152        Value::Bool(b) => Literal::Bool(b),
7153        other => {
7154            return Err(EngineError::Unsupported(alloc::format!(
7155                "subquery result type {:?} not yet materialisable; cast to text or integer in the inner SELECT",
7156                other.data_type()
7157            )));
7158        }
7159    };
7160    Ok(Expr::Literal(lit))
7161}
7162
7163/// v6.1.1 — walk the prepared `Statement` AST and replace every
7164/// `Expr::Placeholder(n)` with `Expr::Literal(value_to_literal(
7165/// params[n-1]))`. The dispatch downstream sees a `Statement`
7166/// indistinguishable from a simple-query parse, so the exec path
7167/// stays unchanged.
7168///
7169/// Errors fall into one shape: a `$N` references past the bound
7170/// `params.len()`. Out-of-range happens when the Bind didn't
7171/// supply enough values; pgwire surfaces this as a protocol error
7172/// to the client.
7173fn substitute_placeholders(stmt: &mut Statement, params: &[Value]) -> Result<(), EngineError> {
7174    match stmt {
7175        Statement::Select(s) => substitute_select(s, params)?,
7176        Statement::Insert(ins) => {
7177            for row in &mut ins.rows {
7178                for e in row {
7179                    substitute_expr(e, params)?;
7180                }
7181            }
7182        }
7183        Statement::Update(u) => {
7184            for (_, e) in &mut u.assignments {
7185                substitute_expr(e, params)?;
7186            }
7187            if let Some(w) = &mut u.where_ {
7188                substitute_expr(w, params)?;
7189            }
7190        }
7191        Statement::Delete(d) => {
7192            if let Some(w) = &mut d.where_ {
7193                substitute_expr(w, params)?;
7194            }
7195        }
7196        Statement::Explain(e) => substitute_select(&mut e.inner, params)?,
7197        // Other statements (CREATE / BEGIN / SHOW / …) have no
7198        // expression slots; no walk needed.
7199        _ => {}
7200    }
7201    Ok(())
7202}
7203
7204fn substitute_select(
7205    s: &mut SelectStatement,
7206    params: &[Value],
7207) -> Result<(), EngineError> {
7208    for item in &mut s.items {
7209        if let SelectItem::Expr { expr, .. } = item {
7210            substitute_expr(expr, params)?;
7211        }
7212    }
7213    if let Some(w) = &mut s.where_ {
7214        substitute_expr(w, params)?;
7215    }
7216    if let Some(gs) = &mut s.group_by {
7217        for g in gs {
7218            substitute_expr(g, params)?;
7219        }
7220    }
7221    if let Some(h) = &mut s.having {
7222        substitute_expr(h, params)?;
7223    }
7224    for o in &mut s.order_by {
7225        substitute_expr(&mut o.expr, params)?;
7226    }
7227    for (_, peer) in &mut s.unions {
7228        substitute_select(peer, params)?;
7229    }
7230    // v7.9.24 — LIMIT $N / OFFSET $N placeholder resolution.
7231    // mailrs H2. After this pass each LIMIT/OFFSET that was a
7232    // Placeholder is rewritten to Literal so the existing
7233    // `LimitExpr::as_literal` path consumes a concrete u32.
7234    if let Some(le) = s.limit {
7235        s.limit = Some(resolve_limit_placeholder(le, params)?);
7236    }
7237    if let Some(le) = s.offset {
7238        s.offset = Some(resolve_limit_placeholder(le, params)?);
7239    }
7240    Ok(())
7241}
7242
7243fn resolve_limit_placeholder(
7244    le: spg_sql::ast::LimitExpr,
7245    params: &[Value],
7246) -> Result<spg_sql::ast::LimitExpr, EngineError> {
7247    use spg_sql::ast::LimitExpr;
7248    match le {
7249        LimitExpr::Literal(_) => Ok(le),
7250        LimitExpr::Placeholder(n) => {
7251            let idx = usize::from(n).saturating_sub(1);
7252            let v = params.get(idx).ok_or_else(|| {
7253                EngineError::Eval(EvalError::PlaceholderOutOfRange {
7254                    n,
7255                    bound: u16::try_from(params.len()).unwrap_or(u16::MAX),
7256                })
7257            })?;
7258            let int = match v {
7259                Value::SmallInt(x) => Some(i64::from(*x)),
7260                Value::Int(x) => Some(i64::from(*x)),
7261                Value::BigInt(x) => Some(*x),
7262                _ => None,
7263            }
7264            .ok_or_else(|| {
7265                EngineError::Unsupported(alloc::format!(
7266                    "LIMIT/OFFSET ${n} bound to non-integer {v:?}"
7267                ))
7268            })?;
7269            if int < 0 {
7270                return Err(EngineError::Unsupported(alloc::format!(
7271                    "LIMIT/OFFSET ${n} bound to negative value {int}"
7272                )));
7273            }
7274            let bounded = u32::try_from(int).map_err(|_| {
7275                EngineError::Unsupported(alloc::format!(
7276                    "LIMIT/OFFSET ${n} value {int} exceeds u32 range"
7277                ))
7278            })?;
7279            Ok(LimitExpr::Literal(bounded))
7280        }
7281    }
7282}
7283
7284fn substitute_expr(e: &mut Expr, params: &[Value]) -> Result<(), EngineError> {
7285    if let Expr::Placeholder(n) = e {
7286        let idx = usize::from(*n).saturating_sub(1);
7287        let v = params.get(idx).ok_or_else(|| {
7288            EngineError::Eval(EvalError::PlaceholderOutOfRange {
7289                n: *n,
7290                bound: u16::try_from(params.len()).unwrap_or(u16::MAX),
7291            })
7292        })?;
7293        *e = Expr::Literal(value_to_literal(v.clone()));
7294        return Ok(());
7295    }
7296    match e {
7297        Expr::Binary { lhs, rhs, .. } => {
7298            substitute_expr(lhs, params)?;
7299            substitute_expr(rhs, params)?;
7300        }
7301        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7302            substitute_expr(expr, params)?;
7303        }
7304        Expr::FunctionCall { args, .. } => {
7305            for a in args {
7306                substitute_expr(a, params)?;
7307            }
7308        }
7309        Expr::Like { expr, pattern, .. } => {
7310            substitute_expr(expr, params)?;
7311            substitute_expr(pattern, params)?;
7312        }
7313        Expr::Extract { source, .. } => substitute_expr(source, params)?,
7314        Expr::ScalarSubquery(s) => substitute_select(s, params)?,
7315        Expr::Exists { subquery, .. } => substitute_select(subquery, params)?,
7316        Expr::InSubquery { expr, subquery, .. } => {
7317            substitute_expr(expr, params)?;
7318            substitute_select(subquery, params)?;
7319        }
7320        Expr::WindowFunction {
7321            args,
7322            partition_by,
7323            order_by,
7324            ..
7325        } => {
7326            for a in args {
7327                substitute_expr(a, params)?;
7328            }
7329            for p in partition_by {
7330                substitute_expr(p, params)?;
7331            }
7332            for (e, _) in order_by {
7333                substitute_expr(e, params)?;
7334            }
7335        }
7336        Expr::Literal(_) | Expr::Column(_) => {}
7337        // Already handled above.
7338        Expr::Placeholder(_) => unreachable!("Placeholder handled at top of fn"),
7339        Expr::Array(items) => {
7340            for elem in items {
7341                substitute_expr(elem, params)?;
7342            }
7343        }
7344        Expr::ArraySubscript { target, index } => {
7345            substitute_expr(target, params)?;
7346            substitute_expr(index, params)?;
7347        }
7348        Expr::AnyAll { expr, array, .. } => {
7349            substitute_expr(expr, params)?;
7350            substitute_expr(array, params)?;
7351        }
7352    }
7353    Ok(())
7354}
7355
7356/// v6.1.1 — convert a runtime `Value` into the closest matching
7357/// `Literal` for the substitute walker. Lossless for the simple
7358/// scalars (Int / Float / Text / Bool); Numeric / Date / Timestamp
7359/// / Json / Interval render as their canonical text form so the
7360/// downstream coerce_value can re-parse against the target column
7361/// type. SQ8 / HalfVector cells are NOT expected as bind params;
7362/// pgwire's Bind decodes vector params to the f32 representation
7363/// before they reach this helper.
7364/// v6.2.0 — total ordering on `Value`s used by ANALYZE to sort a
7365/// column's non-NULL sample before histogram building. Cross-type
7366/// pairs (Int vs Float, Date vs Timestamp, …) compare via the
7367/// same widening the eval-side `compare` operator uses; everything
7368/// else (the genuinely-incompatible pairs) falls back to ordering
7369/// by canonical string form so the sort is still total + stable.
7370/// Vector / SQ8 / Half / Json / Numeric / Interval values reach
7371/// here only via the string-fallback path because vector columns
7372/// are filtered out upstream.
7373fn sort_values_for_histogram(a: &Value, b: &Value) -> core::cmp::Ordering {
7374    use core::cmp::Ordering;
7375    match (a, b) {
7376        (Value::SmallInt(a), Value::SmallInt(b)) => a.cmp(b),
7377        (Value::Int(a), Value::Int(b)) => a.cmp(b),
7378        (Value::BigInt(a), Value::BigInt(b)) => a.cmp(b),
7379        (Value::SmallInt(a), Value::Int(b)) => i32::from(*a).cmp(b),
7380        (Value::Int(a), Value::SmallInt(b)) => a.cmp(&i32::from(*b)),
7381        (Value::Int(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
7382        (Value::BigInt(a), Value::Int(b)) => a.cmp(&i64::from(*b)),
7383        (Value::SmallInt(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
7384        (Value::BigInt(a), Value::SmallInt(b)) => a.cmp(&i64::from(*b)),
7385        (Value::Float(a), Value::Float(b)) => a.partial_cmp(b).unwrap_or(Ordering::Equal),
7386        (Value::Text(a), Value::Text(b)) | (Value::Json(a), Value::Json(b)) => a.cmp(b),
7387        (Value::Bool(a), Value::Bool(b)) => a.cmp(b),
7388        (Value::Date(a), Value::Date(b)) => a.cmp(b),
7389        (Value::Timestamp(a), Value::Timestamp(b)) => a.cmp(b),
7390        // Mixed numeric/float — widen to f64 and compare.
7391        (Value::SmallInt(n), Value::Float(x)) => {
7392            (f64::from(*n)).partial_cmp(x).unwrap_or(Ordering::Equal)
7393        }
7394        (Value::Float(x), Value::SmallInt(n)) => {
7395            x.partial_cmp(&f64::from(*n)).unwrap_or(Ordering::Equal)
7396        }
7397        (Value::Int(n), Value::Float(x)) => {
7398            (f64::from(*n)).partial_cmp(x).unwrap_or(Ordering::Equal)
7399        }
7400        (Value::Float(x), Value::Int(n)) => {
7401            x.partial_cmp(&f64::from(*n)).unwrap_or(Ordering::Equal)
7402        }
7403        (Value::BigInt(n), Value::Float(x)) => {
7404            #[allow(clippy::cast_precision_loss)]
7405            let nf = *n as f64;
7406            nf.partial_cmp(x).unwrap_or(Ordering::Equal)
7407        }
7408        (Value::Float(x), Value::BigInt(n)) => {
7409            #[allow(clippy::cast_precision_loss)]
7410            let nf = *n as f64;
7411            x.partial_cmp(&nf).unwrap_or(Ordering::Equal)
7412        }
7413        // Cross-type fallback: lexicographic on canonical form.
7414        // Total + stable so the sort is well-defined.
7415        _ => canonical_value_repr(a).cmp(&canonical_value_repr(b)),
7416    }
7417}
7418
7419/// v6.2.0 — render the histogram bounds list as a `[v0, v1, ...]`
7420/// string for the `spg_statistic.histogram_bounds` column. Values
7421/// containing `,` or `[` / `]` are JSON-style escaped so the
7422/// rendering round-trips through a future parser; v6.2.0 only
7423/// uses the rendered form for human consumption, so the escaping
7424/// is conservative.
7425fn render_histogram_bounds(bounds: &[alloc::string::String]) -> alloc::string::String {
7426    let mut out = alloc::string::String::with_capacity(bounds.len() * 8 + 2);
7427    out.push('[');
7428    for (i, b) in bounds.iter().enumerate() {
7429        if i > 0 {
7430            out.push_str(", ");
7431        }
7432        let needs_quote = b.contains([',', '[', ']', '"']) || b.is_empty();
7433        if needs_quote {
7434            out.push('"');
7435            for ch in b.chars() {
7436                if ch == '"' || ch == '\\' {
7437                    out.push('\\');
7438                }
7439                out.push(ch);
7440            }
7441            out.push('"');
7442        } else {
7443            out.push_str(b);
7444        }
7445    }
7446    out.push(']');
7447    out
7448}
7449
7450/// v6.2.0 — canonical textual form of a `Value` for histogram
7451/// bound storage. Strings used by ANALYZE for sort + bound output.
7452/// INT / BIGINT → decimal; FLOAT → shortest-round-trip via
7453/// `{:?}`; TEXT pass-through; BOOL → `t` / `f`; DATE / TIMESTAMP →
7454/// the same form `format_date` / `format_timestamp` produce for
7455/// SQL Display. Vector / SQ8 / Half / Json / Numeric / Interval
7456/// reach this only via a non-Vector column (vector columns are
7457/// skipped upstream); they fall back to a Debug-derived form so
7458/// stats still serialise without crashing.
7459pub(crate) fn canonical_value_repr(v: &Value) -> alloc::string::String {
7460    match v {
7461        Value::Null => "NULL".to_string(),
7462        Value::SmallInt(n) => alloc::format!("{n}"),
7463        Value::Int(n) => alloc::format!("{n}"),
7464        Value::BigInt(n) => alloc::format!("{n}"),
7465        Value::Float(x) => alloc::format!("{x:?}"),
7466        Value::Text(s) | Value::Json(s) => s.clone(),
7467        Value::Bool(b) => if *b { "t" } else { "f" }.to_string(),
7468        Value::Date(d) => eval::format_date(*d),
7469        Value::Timestamp(t) => eval::format_timestamp(*t),
7470        Value::Interval { months, micros } => eval::format_interval(*months, *micros),
7471        Value::Numeric { scaled, scale } => eval::format_numeric(*scaled, *scale),
7472        Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => {
7473            // Unreachable in practice (vector columns are filtered
7474            // out before this). Defensive fallback so a future
7475            // vector-stats path doesn't crash.
7476            alloc::format!("{v:?}")
7477        }
7478        // v7.5.0 — Value is #[non_exhaustive] for downstream
7479        // forward-compat. Future variants fall through to Debug
7480        // form here (same shape as the vector fallback above).
7481        _ => alloc::format!("{v:?}"),
7482    }
7483}
7484
7485/// v6.2.0 — true for engine-managed catalog tables that the bare
7486/// `ANALYZE` (no target) should skip. v6.2.0 has no internal
7487/// tables yet (publications / subscriptions / users / statistics
7488/// all live as engine fields, not catalog tables), so this is a
7489/// reserved future-proofing hook — every existing user table is
7490/// analysed.
7491const fn is_internal_table_name(_name: &str) -> bool {
7492    false
7493}
7494
7495fn value_to_literal(v: Value) -> Literal {
7496    match v {
7497        Value::Null => Literal::Null,
7498        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
7499        Value::Int(n) => Literal::Integer(i64::from(n)),
7500        Value::BigInt(n) => Literal::Integer(n),
7501        Value::Float(x) => Literal::Float(x),
7502        Value::Text(s) | Value::Json(s) => Literal::String(s),
7503        Value::Bool(b) => Literal::Bool(b),
7504        Value::Vector(v) => Literal::Vector(v),
7505        Value::Numeric { scaled, scale } => {
7506            Literal::String(eval::format_numeric(scaled, scale))
7507        }
7508        Value::Date(d) => Literal::String(eval::format_date(d)),
7509        Value::Timestamp(t) => Literal::String(eval::format_timestamp(t)),
7510        Value::Interval { months, micros } => Literal::Interval {
7511            months,
7512            micros,
7513            text: eval::format_interval(months, micros),
7514        },
7515        // SQ8 / halfvec cells dequantise to f32 before reaching the
7516        // substitute walker; pgwire's Bind path handles that.
7517        Value::Sq8Vector(q) => Literal::Vector(spg_storage::quantize::dequantize(&q)),
7518        Value::HalfVector(h) => Literal::Vector(h.to_f32_vec()),
7519        // v7.5.0 — Value is #[non_exhaustive]; future variants
7520        // render as Debug-form String literal until explicit
7521        // mapping is added.
7522        v => Literal::String(alloc::format!("{v:?}")),
7523    }
7524}
7525
7526fn rewrite_clock_calls(stmt: &mut Statement, now_micros: Option<i64>) {
7527    let Some(now) = now_micros else {
7528        return;
7529    };
7530    match stmt {
7531        Statement::Select(s) => rewrite_select_clock(s, now),
7532        Statement::Insert(ins) => {
7533            for row in &mut ins.rows {
7534                for e in row {
7535                    rewrite_expr_clock(e, now);
7536                }
7537            }
7538        }
7539        _ => {}
7540    }
7541}
7542
7543fn rewrite_select_clock(s: &mut SelectStatement, now: i64) {
7544    for item in &mut s.items {
7545        if let SelectItem::Expr { expr, .. } = item {
7546            rewrite_expr_clock(expr, now);
7547        }
7548    }
7549    if let Some(w) = &mut s.where_ {
7550        rewrite_expr_clock(w, now);
7551    }
7552    if let Some(gs) = &mut s.group_by {
7553        for g in gs {
7554            rewrite_expr_clock(g, now);
7555        }
7556    }
7557    if let Some(h) = &mut s.having {
7558        rewrite_expr_clock(h, now);
7559    }
7560    for o in &mut s.order_by {
7561        rewrite_expr_clock(&mut o.expr, now);
7562    }
7563    for (_, peer) in &mut s.unions {
7564        rewrite_select_clock(peer, now);
7565    }
7566}
7567
7568/// v3.0.3 hot path: every recursion lands in exactly one `match` arm.
7569/// Literal / Column-with-qualifier (the dominant cases on a typical
7570/// AST) take a single pattern dispatch and exit. The clock-rewrite
7571/// targets (zero-arg `NOW` / `CURRENT_TIMESTAMP` / `CURRENT_DATE`
7572/// functions, and bare `CURRENT_TIMESTAMP` / `CURRENT_DATE` column
7573/// refs) sit on their own arms with match guards so the fall-through
7574/// to the recursive arms is unambiguous.
7575fn rewrite_expr_clock(e: &mut Expr, now: i64) {
7576    // Fast-path test on the no-recursion shapes first. We can't fold
7577    // them into the big match below because they need to *replace* `e`
7578    // outright; the recursive arms below match on its sub-fields.
7579    if let Some(replacement) = clock_replacement_for(e, now) {
7580        *e = replacement;
7581        return;
7582    }
7583    match e {
7584        Expr::Binary { lhs, rhs, .. } => {
7585            rewrite_expr_clock(lhs, now);
7586            rewrite_expr_clock(rhs, now);
7587        }
7588        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7589            rewrite_expr_clock(expr, now);
7590        }
7591        Expr::FunctionCall { args, .. } => {
7592            for a in args {
7593                rewrite_expr_clock(a, now);
7594            }
7595        }
7596        Expr::Like { expr, pattern, .. } => {
7597            rewrite_expr_clock(expr, now);
7598            rewrite_expr_clock(pattern, now);
7599        }
7600        Expr::Extract { source, .. } => rewrite_expr_clock(source, now),
7601        // v4.10 subquery nodes — recurse into the inner SELECT's
7602        // expression slots so e.g. SELECT NOW() in a scalar
7603        // subquery picks up the same instant as the outer query.
7604        Expr::ScalarSubquery(s) => rewrite_select_clock(s, now),
7605        Expr::Exists { subquery, .. } => rewrite_select_clock(subquery, now),
7606        Expr::InSubquery { expr, subquery, .. } => {
7607            rewrite_expr_clock(expr, now);
7608            rewrite_select_clock(subquery, now);
7609        }
7610        // v4.12 window functions — args + PARTITION BY + ORDER BY
7611        // may all reference clock literals.
7612        Expr::WindowFunction {
7613            args,
7614            partition_by,
7615            order_by,
7616            ..
7617        } => {
7618            for a in args {
7619                rewrite_expr_clock(a, now);
7620            }
7621            for p in partition_by {
7622                rewrite_expr_clock(p, now);
7623            }
7624            for (e, _) in order_by {
7625                rewrite_expr_clock(e, now);
7626            }
7627        }
7628        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
7629        Expr::Array(items) => {
7630            for elem in items {
7631                rewrite_expr_clock(elem, now);
7632            }
7633        }
7634        Expr::ArraySubscript { target, index } => {
7635            rewrite_expr_clock(target, now);
7636            rewrite_expr_clock(index, now);
7637        }
7638        Expr::AnyAll { expr, array, .. } => {
7639            rewrite_expr_clock(expr, now);
7640            rewrite_expr_clock(array, now);
7641        }
7642    }
7643}
7644
7645/// Returns `Some(Expr)` when `e` is one of the clock-call shapes that
7646/// must be rewritten; otherwise `None` so the caller falls through to
7647/// the recursive walk. Identifies both function-call forms (`NOW()` /
7648/// `CURRENT_TIMESTAMP()` / `CURRENT_DATE()`) and bare-identifier forms
7649/// (`CURRENT_TIMESTAMP` / `CURRENT_DATE` as unqualified column refs,
7650/// which is how PG accepts them without parens).
7651fn clock_replacement_for(e: &Expr, now: i64) -> Option<Expr> {
7652    let (kind, name) = match e {
7653        Expr::FunctionCall { name, args } if args.is_empty() => (ClockSite::Fn, name.as_str()),
7654        Expr::Column(c) if c.qualifier.is_none() => (ClockSite::BareIdent, c.name.as_str()),
7655        _ => return None,
7656    };
7657    // ASCII case-insensitive name match. Limited to the three keywords
7658    // that actually need rewriting.
7659    let matched = match name.len() {
7660        3 if kind == ClockSite::Fn && name.eq_ignore_ascii_case("now") => Some(true),
7661        12 if name.eq_ignore_ascii_case("current_date") => Some(false),
7662        17 if name.eq_ignore_ascii_case("current_timestamp") => Some(true),
7663        _ => None,
7664    };
7665    let is_timestamp = matched?;
7666    let payload = if is_timestamp {
7667        now
7668    } else {
7669        now.div_euclid(86_400_000_000)
7670    };
7671    let target = if is_timestamp {
7672        spg_sql::ast::CastTarget::Timestamp
7673    } else {
7674        spg_sql::ast::CastTarget::Date
7675    };
7676    Some(Expr::Cast {
7677        expr: alloc::boxed::Box::new(Expr::Literal(spg_sql::ast::Literal::Integer(payload))),
7678        target,
7679    })
7680}
7681
7682#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7683enum ClockSite {
7684    Fn,
7685    BareIdent,
7686}
7687
7688/// `ORDER BY <integer>` references the N-th SELECT item (1-based).
7689/// Swap the integer literal for the matching item's expression so the
7690/// executor doesn't need a special-case branch. Recurses into UNION
7691/// peers because each peer keeps its own SELECT list.
7692/// v6.4.1 — expand `GROUP BY ALL` to every non-aggregate SELECT-list
7693/// item. Mirrors DuckDB / PG 19 semantics. Wildcards (`SELECT * …`)
7694/// are NOT expanded by GROUP BY ALL (PG 19 leaves the wildcard intact
7695/// and groups by whatever explicit non-aggregates remain — none in
7696/// the wildcard-only case, which still works for non-aggregate
7697/// queries).
7698fn expand_group_by_all(s: &mut SelectStatement) {
7699    if !s.group_by_all {
7700        for (_, peer) in &mut s.unions {
7701            expand_group_by_all(peer);
7702        }
7703        return;
7704    }
7705    let mut groups: Vec<Expr> = Vec::new();
7706    for item in &s.items {
7707        if let SelectItem::Expr { expr, .. } = item
7708            && !aggregate::contains_aggregate(expr)
7709        {
7710            groups.push(expr.clone());
7711        }
7712    }
7713    s.group_by = Some(groups);
7714    s.group_by_all = false;
7715    for (_, peer) in &mut s.unions {
7716        expand_group_by_all(peer);
7717    }
7718}
7719
7720fn resolve_order_by_position(s: &mut SelectStatement) {
7721    // v6.4.0 — iterate every ORDER BY key. Position references
7722    // (`ORDER BY 2`) bind to the 1-based projection index;
7723    // identifier references that match a SELECT-list alias bind to
7724    // the projected expression (Step 4 of L3a).
7725    for order in &mut s.order_by {
7726        match &order.expr {
7727            Expr::Literal(Literal::Integer(n)) if *n >= 1 => {
7728                if let Ok(idx_one_based) = usize::try_from(*n) {
7729                    let idx = idx_one_based - 1;
7730                    if idx < s.items.len()
7731                        && let SelectItem::Expr { expr, .. } = &s.items[idx]
7732                    {
7733                        order.expr = expr.clone();
7734                    }
7735                }
7736            }
7737            Expr::Column(c) if c.qualifier.is_none() => {
7738                // Alias-in-ORDER-BY lookup.
7739                for item in &s.items {
7740                    if let SelectItem::Expr {
7741                        expr,
7742                        alias: Some(a),
7743                    } = item
7744                        && a == &c.name
7745                    {
7746                        order.expr = expr.clone();
7747                        break;
7748                    }
7749                }
7750            }
7751            _ => {}
7752        }
7753    }
7754    for (_, peer) in &mut s.unions {
7755        resolve_order_by_position(peer);
7756    }
7757}
7758
7759/// Sort `tagged` by `f64` key, reversing the comparator under DESC.
7760/// Used by the UNION ORDER BY path; per-block paths inline the same
7761/// comparator because they already hold `&OrderBy` directly.
7762/// v3.1.1: partial-sort helper. When `keep` (= offset + limit) is
7763/// strictly less than `tagged.len()`, run `select_nth_unstable_by` to
7764/// partition the prefix in O(n), then sort just that prefix in O(k
7765/// log k). Total O(n + k log k), vs O(n log n) for a full sort. The
7766/// caller decides what `keep` is; passing `None` (no LIMIT) keeps the
7767/// full-sort behaviour.
7768///
7769/// `tagged` holds `(Option<f64>, Row)` (the SELECT path) — `None` keys
7770/// sort last in ascending order, mirroring NULL-sorts-last in SQL.
7771fn partial_sort_tagged(
7772    tagged: &mut Vec<(Vec<f64>, Row)>,
7773    keep: Option<usize>,
7774    descs: &[bool],
7775) {
7776    let cmp = |a: &(Vec<f64>, Row), b: &(Vec<f64>, Row)| cmp_multi_key(&a.0, &b.0, descs);
7777    match keep {
7778        Some(k) if k < tagged.len() && k > 0 => {
7779            let pivot = k - 1;
7780            tagged.select_nth_unstable_by(pivot, cmp);
7781            tagged[..k].sort_by(cmp);
7782            tagged.truncate(k);
7783        }
7784        _ => {
7785            tagged.sort_by(cmp);
7786        }
7787    }
7788}
7789
7790fn sort_by_keys(tagged: &mut [(Vec<f64>, Row)], descs: &[bool]) {
7791    tagged.sort_by(|a, b| cmp_multi_key(&a.0, &b.0, descs));
7792}
7793
7794/// v6.4.0 — multi-key ORDER BY comparator. Each key's per-key DESC
7795/// flag is honored independently. NULL is encoded as `f64::INFINITY`
7796/// so it sorts last in ASC and first in DESC (matches PG default).
7797fn cmp_multi_key(a: &[f64], b: &[f64], descs: &[bool]) -> core::cmp::Ordering {
7798    use core::cmp::Ordering;
7799    for (i, (ka, kb)) in a.iter().zip(b.iter()).enumerate() {
7800        let ord = ka.partial_cmp(kb).unwrap_or(Ordering::Equal);
7801        let ord = if descs.get(i).copied().unwrap_or(false) {
7802            ord.reverse()
7803        } else {
7804            ord
7805        };
7806        if ord != Ordering::Equal {
7807            return ord;
7808        }
7809    }
7810    Ordering::Equal
7811}
7812
7813/// v6.4.0 — eval every ORDER BY expression for a row and pack the
7814/// resulting keys into a `Vec<f64>`. NULL → `f64::INFINITY`.
7815fn build_order_keys(
7816    order_by: &[OrderBy],
7817    row: &Row,
7818    ctx: &EvalContext,
7819) -> Result<Vec<f64>, EngineError> {
7820    let mut keys = Vec::with_capacity(order_by.len());
7821    for o in order_by {
7822        let v = eval::eval_expr(&o.expr, row, ctx)?;
7823        keys.push(value_to_order_key(&v)?);
7824    }
7825    Ok(keys)
7826}
7827
7828/// Drop the first `offset` rows then truncate to `limit`. PG / `MySQL`
7829/// agree: OFFSET applies *after* ORDER BY but *before* LIMIT (so
7830/// `LIMIT 10 OFFSET 5` keeps rows 6..=15).
7831fn apply_offset_and_limit(rows: &mut Vec<Row>, offset: Option<u32>, limit: Option<u32>) {
7832    if let Some(off) = offset {
7833        let off = off as usize;
7834        if off >= rows.len() {
7835            rows.clear();
7836        } else {
7837            rows.drain(..off);
7838        }
7839    }
7840    if let Some(n) = limit {
7841        rows.truncate(n as usize);
7842    }
7843}
7844
7845/// v7.6.1 — resolve a parser-level `ForeignKeyConstraint` (column
7846/// names + parent table name) into the storage-layer shape (column
7847/// indices + same parent table). Validates everything the engine
7848/// needs to know about the FK at CREATE TABLE time:
7849///
7850///   - parent table exists (catalog lookup, unless self-referencing)
7851///   - parent columns exist on the parent table
7852///   - parent column list matches the local arity (defaults to the
7853///     parent's primary index column when omitted)
7854///   - parent columns are covered by a `BTree` UNIQUE-class index
7855///     (SPG's stand-in for `PRIMARY KEY`/`UNIQUE`) — required so
7856///     the v7.6.2 INSERT path can do an O(log n) parent lookup
7857///   - local columns exist on the table being created
7858fn resolve_foreign_key(
7859    local_table_name: &str,
7860    local_cols: &[ColumnSchema],
7861    fk: spg_sql::ast::ForeignKeyConstraint,
7862    catalog: &Catalog,
7863) -> Result<spg_storage::ForeignKeyConstraint, EngineError> {
7864    // Resolve local columns.
7865    let mut local_columns = Vec::with_capacity(fk.columns.len());
7866    for name in &fk.columns {
7867        let pos = local_cols
7868            .iter()
7869            .position(|c| c.name == *name)
7870            .ok_or_else(|| {
7871                EngineError::Unsupported(alloc::format!(
7872                    "FOREIGN KEY references unknown local column {name:?}"
7873                ))
7874            })?;
7875        local_columns.push(pos);
7876    }
7877    // Self-referencing FK: parent table is the one we're creating.
7878    // The parent column resolution uses the local column list since
7879    // the catalog doesn't have this table yet.
7880    let is_self_ref = fk.parent_table == local_table_name;
7881    let (parent_cols_for_lookup, parent_table_str): (&[ColumnSchema], &str) = if is_self_ref {
7882        (local_cols, local_table_name)
7883    } else {
7884        let parent_table = catalog.get(&fk.parent_table).ok_or_else(|| {
7885            EngineError::Storage(StorageError::TableNotFound {
7886                name: fk.parent_table.clone(),
7887            })
7888        })?;
7889        (parent_table.schema().columns.as_slice(), fk.parent_table.as_str())
7890    };
7891    // Resolve parent column names → positions. If the FK omitted the
7892    // parent column list, fall back to the parent's primary index
7893    // column (single-column only — composite default is rejected
7894    // because there's no unambiguous "PK" in SPG's index list).
7895    let parent_columns: Vec<usize> = if fk.parent_columns.is_empty() {
7896        if fk.columns.len() != 1 {
7897            return Err(EngineError::Unsupported(
7898                "composite FOREIGN KEY without explicit parent column list is not supported \
7899                 — list the parent columns explicitly"
7900                    .into(),
7901            ));
7902        }
7903        // Find a single BTree index on the parent and use its column.
7904        let pos = pick_pk_index_column(catalog, parent_table_str, is_self_ref, local_cols)
7905            .ok_or_else(|| {
7906                EngineError::Unsupported(alloc::format!(
7907                    "parent table {parent_table_str:?} has no PRIMARY-key / UNIQUE BTree index \
7908                     to default the FOREIGN KEY against"
7909                ))
7910            })?;
7911        alloc::vec![pos]
7912    } else {
7913        let mut out = Vec::with_capacity(fk.parent_columns.len());
7914        for name in &fk.parent_columns {
7915            let pos = parent_cols_for_lookup
7916                .iter()
7917                .position(|c| c.name == *name)
7918                .ok_or_else(|| {
7919                    EngineError::Unsupported(alloc::format!(
7920                        "FOREIGN KEY references unknown parent column \
7921                         {name:?} on table {parent_table_str:?}"
7922                    ))
7923                })?;
7924            out.push(pos);
7925        }
7926        out
7927    };
7928    if parent_columns.len() != local_columns.len() {
7929        return Err(EngineError::Unsupported(alloc::format!(
7930            "FOREIGN KEY arity mismatch: {} local columns vs {} parent columns",
7931            local_columns.len(),
7932            parent_columns.len()
7933        )));
7934    }
7935    // For non-self-referencing FKs, verify the parent column set is
7936    // covered by a BTree index. SPG doesn't have a `PRIMARY KEY`
7937    // declaration; the convention is "the parent column for FK
7938    // purposes must have a BTree index" — which the user creates via
7939    // `CREATE INDEX ... USING btree (col)` (the default). We accept
7940    // any single-column BTree index that covers a parent column;
7941    // composite parent column lists require an index whose `column_position`
7942    // matches the first parent column (multi-column BTree indices
7943    // are not in the v7.x roadmap).
7944    if !is_self_ref {
7945        let parent_table = catalog
7946            .get(&fk.parent_table)
7947            .expect("checked above");
7948        let primary_parent_col = parent_columns[0];
7949        let has_btree = parent_table.schema().columns.get(primary_parent_col).is_some()
7950            && parent_table
7951                .indices()
7952                .iter()
7953                .any(|idx| {
7954                    matches!(idx.kind, spg_storage::IndexKind::BTree(_))
7955                        && idx.column_position == primary_parent_col
7956                        && idx.partial_predicate.is_none()
7957                });
7958        if !has_btree {
7959            return Err(EngineError::Unsupported(alloc::format!(
7960                "FOREIGN KEY parent column on {:?} is not covered by an unconditional BTree \
7961                 index — create one with `CREATE INDEX ... ON {} ({})` first",
7962                parent_table_str,
7963                parent_table_str,
7964                parent_table.schema().columns[primary_parent_col].name,
7965            )));
7966        }
7967    }
7968    let on_delete = fk_action_sql_to_storage(fk.on_delete);
7969    let on_update = fk_action_sql_to_storage(fk.on_update);
7970    Ok(spg_storage::ForeignKeyConstraint {
7971        name: fk.name,
7972        local_columns,
7973        parent_table: fk.parent_table,
7974        parent_columns,
7975        on_delete,
7976        on_update,
7977    })
7978}
7979
7980/// v7.6.1 — pick a sentinel "primary key" column from the parent
7981/// table when the FK didn't name parent columns. Picks the first
7982/// single-column unconditional BTree index — that's the closest
7983/// thing SPG has to a PRIMARY KEY today. Self-referencing FKs use
7984/// `local_cols` as the column source.
7985fn pick_pk_index_column(
7986    catalog: &Catalog,
7987    parent_name: &str,
7988    is_self_ref: bool,
7989    local_cols: &[ColumnSchema],
7990) -> Option<usize> {
7991    if is_self_ref {
7992        // Self-ref FK omitted parent columns: pick column 0 by
7993        // convention (no catalog entry yet). Engine will widen this
7994        // when v7.6.7 lands; v7.6.1 only handles the explicit form.
7995        let _ = local_cols;
7996        return Some(0);
7997    }
7998    let parent = catalog.get(parent_name)?;
7999    parent.indices().iter().find_map(|idx| {
8000        if matches!(idx.kind, spg_storage::IndexKind::BTree(_))
8001            && idx.partial_predicate.is_none()
8002            && idx.included_columns.is_empty()
8003            && idx.expression.is_none()
8004        {
8005            Some(idx.column_position)
8006        } else {
8007            None
8008        }
8009    })
8010}
8011
8012/// v7.9.8 / v7.9.10 — resolve the column positions that
8013/// identify a conflict for ON CONFLICT. Returns a Vec of
8014/// column positions (1 element for single-column form, N for
8015/// composite). When the user wrote bare `ON CONFLICT DO …`,
8016/// falls back to the table's first unconditional BTree index
8017/// (always single-column today).
8018fn resolve_on_conflict_columns(
8019    catalog: &Catalog,
8020    table_name: &str,
8021    target: &[String],
8022) -> Result<Vec<usize>, EngineError> {
8023    let table = catalog.get(table_name).ok_or_else(|| {
8024        EngineError::Storage(StorageError::TableNotFound {
8025            name: table_name.into(),
8026        })
8027    })?;
8028    if target.is_empty() {
8029        let pos = table
8030            .indices()
8031            .iter()
8032            .find_map(|idx| {
8033                if matches!(idx.kind, spg_storage::IndexKind::BTree(_))
8034                    && idx.partial_predicate.is_none()
8035                    && idx.included_columns.is_empty()
8036                    && idx.expression.is_none()
8037                {
8038                    Some(idx.column_position)
8039                } else {
8040                    None
8041                }
8042            })
8043            .ok_or_else(|| {
8044                EngineError::Unsupported(alloc::format!(
8045                    "ON CONFLICT without target requires a UNIQUE BTree index on {table_name:?}"
8046                ))
8047            })?;
8048        return Ok(alloc::vec![pos]);
8049    }
8050    let mut out = Vec::with_capacity(target.len());
8051    for name in target {
8052        let pos = table
8053            .schema()
8054            .columns
8055            .iter()
8056            .position(|c| c.name == *name)
8057            .ok_or_else(|| {
8058                EngineError::Unsupported(alloc::format!(
8059                    "ON CONFLICT target column {name:?} not found on {table_name:?}"
8060                ))
8061            })?;
8062        out.push(pos);
8063    }
8064    Ok(out)
8065}
8066
8067/// v7.9.8 — check whether the BTree index on `column_pos` of
8068/// `table_name` already has a row with this key.
8069fn on_conflict_key_exists(
8070    catalog: &Catalog,
8071    table_name: &str,
8072    column_pos: usize,
8073    key: &Value,
8074) -> bool {
8075    let Some(table) = catalog.get(table_name) else {
8076        return false;
8077    };
8078    let Some(idx_key) = spg_storage::IndexKey::from_value(key) else {
8079        return false;
8080    };
8081    table.indices().iter().any(|idx| {
8082        matches!(idx.kind, spg_storage::IndexKind::BTree(_))
8083            && idx.column_position == column_pos
8084            && idx.partial_predicate.is_none()
8085            && !idx.lookup_eq(&idx_key).is_empty()
8086    })
8087}
8088
8089/// v7.9.9 / v7.9.10 — look up an existing row's position by
8090/// matching all `column_positions` against the incoming `key`
8091/// tuple. Single-column shape (one column) reduces to the
8092/// canonical PK lookup; composite shapes scan linearly until
8093/// every position matches.
8094fn lookup_row_position_by_keys(
8095    catalog: &Catalog,
8096    table_name: &str,
8097    column_positions: &[usize],
8098    key: &[&Value],
8099) -> Option<usize> {
8100    let table = catalog.get(table_name)?;
8101    table.rows().iter().position(|r| {
8102        column_positions
8103            .iter()
8104            .enumerate()
8105            .all(|(i, &pos)| r.values.get(pos) == Some(key[i]))
8106    })
8107}
8108
8109/// v7.9.10 — does the table already contain a row whose
8110/// `column_positions` tuple equals `key`? Single-column shape
8111/// uses the existing BTree fast path; composite shapes fall
8112/// back to a row scan.
8113fn on_conflict_keys_exist(
8114    catalog: &Catalog,
8115    table_name: &str,
8116    column_positions: &[usize],
8117    key: &[&Value],
8118) -> bool {
8119    if column_positions.len() == 1 {
8120        return on_conflict_key_exists(
8121            catalog,
8122            table_name,
8123            column_positions[0],
8124            key[0],
8125        );
8126    }
8127    let Some(table) = catalog.get(table_name) else {
8128        return false;
8129    };
8130    table.rows().iter().any(|r| {
8131        column_positions
8132            .iter()
8133            .enumerate()
8134            .all(|(i, &pos)| r.values.get(pos) == Some(key[i]))
8135    })
8136}
8137
8138/// v7.9.9 — apply ON CONFLICT DO UPDATE SET assignments to an
8139/// existing row.
8140///
8141/// `incoming` is the rejected INSERT row (used to resolve
8142/// `EXCLUDED.col` references in the assignment exprs);
8143/// `target_pos` is the position of the existing row in the table.
8144/// Each assignment substitutes `EXCLUDED.col` with the matching
8145/// incoming value, evaluates the resulting expression against
8146/// the existing row, and writes the new value into the
8147/// corresponding column of the returned `Vec<Value>`. If
8148/// `where_` evaluates falsy, returns Ok(None) — PG behaviour:
8149/// the conflicting row is silently kept unchanged.
8150fn apply_on_conflict_assignments(
8151    catalog: &Catalog,
8152    table_name: &str,
8153    target_pos: usize,
8154    incoming: &[Value],
8155    assignments: &[(String, Expr)],
8156    where_: Option<&Expr>,
8157) -> Result<Option<Vec<Value>>, EngineError> {
8158    let table = catalog.get(table_name).ok_or_else(|| {
8159        EngineError::Storage(StorageError::TableNotFound {
8160            name: table_name.into(),
8161        })
8162    })?;
8163    let schema_cols = table.schema().columns.clone();
8164    let existing = table
8165        .rows()
8166        .get(target_pos)
8167        .ok_or_else(|| {
8168            EngineError::Unsupported(alloc::format!(
8169                "ON CONFLICT DO UPDATE: row position {target_pos} out of bounds on {table_name:?}"
8170            ))
8171        })?
8172        .clone();
8173    let ctx = eval::EvalContext::new(&schema_cols, Some(table_name));
8174    // Optional WHERE filter on the conflict row.
8175    if let Some(w) = where_ {
8176        let pred = w.clone();
8177        let pred = substitute_excluded_refs(pred, &schema_cols, incoming);
8178        let v = eval::eval_expr(&pred, &existing, &ctx)?;
8179        if !matches!(v, Value::Bool(true)) {
8180            return Ok(None);
8181        }
8182    }
8183    let mut new_values = existing.values.clone();
8184    for (col_name, expr) in assignments {
8185        let target_idx = schema_cols
8186            .iter()
8187            .position(|c| c.name == *col_name)
8188            .ok_or_else(|| {
8189                EngineError::Eval(EvalError::ColumnNotFound {
8190                    name: col_name.clone(),
8191                })
8192            })?;
8193        let sub = substitute_excluded_refs(expr.clone(), &schema_cols, incoming);
8194        let v = eval::eval_expr(&sub, &existing, &ctx)?;
8195        new_values[target_idx] =
8196            coerce_value(v, schema_cols[target_idx].ty, col_name, target_idx)?;
8197    }
8198    Ok(Some(new_values))
8199}
8200
8201/// v7.9.9 — walk an `Expr` tree replacing any `Column { qualifier:
8202/// "EXCLUDED", name }` reference with a `Literal` of the matching
8203/// value from the incoming-row vec. Resolution against the
8204/// child-table column list (by name).
8205fn substitute_excluded_refs(
8206    expr: Expr,
8207    schema_cols: &[ColumnSchema],
8208    incoming: &[Value],
8209) -> Expr {
8210    use spg_sql::ast::ColumnName;
8211    match expr {
8212        Expr::Column(ColumnName { qualifier, name })
8213            if qualifier
8214                .as_deref()
8215                .is_some_and(|q| q.eq_ignore_ascii_case("excluded")) =>
8216        {
8217            let pos = schema_cols.iter().position(|c| c.name == name);
8218            match pos {
8219                Some(p) => {
8220                    let v = incoming.get(p).cloned().unwrap_or(Value::Null);
8221                    value_to_literal_expr(v).unwrap_or_else(|_| {
8222                        Expr::Literal(spg_sql::ast::Literal::Null)
8223                    })
8224                }
8225                None => Expr::Column(ColumnName { qualifier, name }),
8226            }
8227        }
8228        Expr::Binary { op, lhs, rhs } => Expr::Binary {
8229            op,
8230            lhs: Box::new(substitute_excluded_refs(*lhs, schema_cols, incoming)),
8231            rhs: Box::new(substitute_excluded_refs(*rhs, schema_cols, incoming)),
8232        },
8233        Expr::Unary { op, expr } => Expr::Unary {
8234            op,
8235            expr: Box::new(substitute_excluded_refs(*expr, schema_cols, incoming)),
8236        },
8237        Expr::FunctionCall { name, args } => Expr::FunctionCall {
8238            name,
8239            args: args
8240                .into_iter()
8241                .map(|a| substitute_excluded_refs(a, schema_cols, incoming))
8242                .collect(),
8243        },
8244        other => other,
8245    }
8246}
8247
8248/// v7.6.2 / v7.6.7 — INSERT-side FK enforcement. For every row
8249/// about to be inserted into `child_table`, every FK declared on
8250/// that table is checked: the row's FK columns must either be
8251/// NULL (SQL spec skip) or match an existing parent row via the
8252/// parent's BTree PK / UNIQUE index.
8253///
8254/// Returns `EngineError::Unsupported` with a `FOREIGN KEY violation`
8255/// payload on first failure.
8256///
8257/// **Self-referencing FKs (v7.6.7 widening):** when `fk.parent_table
8258/// == child_table`, the parent rows visible to this check are
8259///  (a) rows already committed to the table, plus
8260///  (b) earlier rows from the *same* `rows` batch.
8261/// This makes `INSERT INTO tree VALUES (1, NULL), (2, 1), (3, 2)`
8262/// work in a single statement — common pattern for bulk-loading
8263/// hierarchies.
8264/// v7.9.19 — enforce table-level UNIQUE / PRIMARY KEY tuple
8265/// constraints at INSERT time. For each constraint declared on
8266/// the target table, check that no existing row + no earlier row
8267/// in the same batch has the same full-column tuple. NULL in
8268/// any column lifts the row out of the check (SQL spec: NULL
8269/// ≠ NULL for uniqueness). mailrs G1 + G6.
8270fn enforce_uniqueness_inserts(
8271    catalog: &Catalog,
8272    child_table: &str,
8273    constraints: &[spg_storage::UniquenessConstraint],
8274    rows: &[Vec<Value>],
8275) -> Result<(), EngineError> {
8276    if constraints.is_empty() {
8277        return Ok(());
8278    }
8279    let table = catalog.get(child_table).ok_or_else(|| {
8280        EngineError::Storage(StorageError::TableNotFound {
8281            name: child_table.into(),
8282        })
8283    })?;
8284    for uc in constraints {
8285        for (batch_idx, row_values) in rows.iter().enumerate() {
8286            let key: Vec<&Value> = uc.columns.iter().map(|&i| &row_values[i]).collect();
8287            let has_null = key.iter().any(|v| matches!(v, Value::Null));
8288            if has_null {
8289                continue;
8290            }
8291            // Table-side collision: scan existing rows.
8292            let collides_in_table = table.rows().iter().any(|prow| {
8293                uc.columns
8294                    .iter()
8295                    .enumerate()
8296                    .all(|(i, &p)| prow.values.get(p) == Some(key[i]))
8297            });
8298            // Batch-side collision: earlier rows in the same INSERT.
8299            let collides_in_batch = rows[..batch_idx].iter().any(|earlier| {
8300                uc.columns
8301                    .iter()
8302                    .enumerate()
8303                    .all(|(i, &p)| earlier.get(p) == Some(key[i]))
8304            });
8305            if collides_in_table || collides_in_batch {
8306                let kind = if uc.is_primary_key { "PRIMARY KEY" } else { "UNIQUE" };
8307                let col_names: Vec<String> = uc
8308                    .columns
8309                    .iter()
8310                    .map(|&i| table.schema().columns[i].name.clone())
8311                    .collect();
8312                return Err(EngineError::Unsupported(alloc::format!(
8313                    "{kind} violation on {child_table:?} columns {col_names:?}: \
8314                     row #{batch_idx} duplicates an existing key"
8315                )));
8316            }
8317        }
8318    }
8319    Ok(())
8320}
8321
8322/// v7.9.29 — `true` iff `v` counts as a truthy SQL value for a
8323/// WHERE-style predicate. NULL → false (three-valued logic
8324/// collapses to "skip this row" for index inclusion). Numeric
8325/// non-zero, BIGINT non-zero, TINYINT non-zero, BOOLEAN true → true.
8326/// Everything else (strings, vectors, JSON, …) is not a valid
8327/// predicate result and surfaces as `false` so a malformed
8328/// predicate degrades to "row not in index" rather than panicking.
8329fn predicate_truthy(v: &spg_storage::Value) -> bool {
8330    use spg_storage::Value as V;
8331    match v {
8332        V::Bool(b) => *b,
8333        V::Int(n) => *n != 0,
8334        V::BigInt(n) => *n != 0,
8335        V::SmallInt(n) => *n != 0,
8336        _ => false,
8337    }
8338}
8339
8340/// v7.9.29 — at CREATE UNIQUE INDEX time, scan the table's
8341/// committed rows for pre-existing duplicates. If any pair of rows
8342/// matches the predicate AND has the same index key, refuse to
8343/// create the index so the user fixes the data before retrying.
8344fn check_existing_unique_violation(
8345    idx: &spg_storage::Index,
8346    schema: &spg_storage::TableSchema,
8347    rows: &[spg_storage::Row],
8348) -> Result<(), EngineError> {
8349    let predicate_expr = match idx.partial_predicate.as_deref() {
8350        Some(s) => Some(spg_sql::parser::parse_expression(s).map_err(|e| {
8351            EngineError::Unsupported(alloc::format!(
8352                "stored partial predicate {s:?} failed to re-parse: {e:?}"
8353            ))
8354        })?),
8355        None => None,
8356    };
8357    let ctx = eval::EvalContext::new(&schema.columns, None);
8358    let key_positions = unique_key_positions(idx);
8359    let mut seen: alloc::vec::Vec<alloc::vec::Vec<spg_storage::Value>> = alloc::vec::Vec::new();
8360    for row in rows {
8361        if let Some(expr) = &predicate_expr {
8362            let v = eval::eval_expr(expr, row, &ctx).map_err(|e| {
8363                EngineError::Unsupported(alloc::format!(
8364                    "evaluating UNIQUE INDEX predicate against existing row: {e:?}"
8365                ))
8366            })?;
8367            if !predicate_truthy(&v) {
8368                continue;
8369            }
8370        }
8371        let key: alloc::vec::Vec<spg_storage::Value> = key_positions
8372            .iter()
8373            .map(|&p| {
8374                row.values
8375                    .get(p)
8376                    .cloned()
8377                    .unwrap_or(spg_storage::Value::Null)
8378            })
8379            .collect();
8380        if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
8381            continue;
8382        }
8383        if seen.iter().any(|other| *other == key) {
8384            return Err(EngineError::Unsupported(alloc::format!(
8385                "CREATE UNIQUE INDEX {:?}: existing rows already violate the constraint",
8386                idx.name
8387            )));
8388        }
8389        seen.push(key);
8390    }
8391    Ok(())
8392}
8393
8394/// v7.9.29 — full key tuple for a UNIQUE INDEX (leading +
8395/// extra positions). For single-column indexes this is just
8396/// `[column_position]`.
8397fn unique_key_positions(idx: &spg_storage::Index) -> alloc::vec::Vec<usize> {
8398    let mut out = alloc::vec::Vec::with_capacity(1 + idx.extra_column_positions.len());
8399    out.push(idx.column_position);
8400    out.extend_from_slice(&idx.extra_column_positions);
8401    out
8402}
8403
8404/// v7.9.29 — at INSERT time, walk every `is_unique` index on the
8405/// target table. For each, eval the index's optional predicate
8406/// against (a) the candidate row and (b) every committed row plus
8407/// earlier batch rows; only rows where the predicate is truthy
8408/// participate. A duplicate key among predicate-matching rows is a
8409/// uniqueness violation. NULL keys lift the row out of the check
8410/// (matching PG's "UNIQUE allows multiple NULLs" semantics).
8411fn enforce_unique_index_inserts(
8412    catalog: &Catalog,
8413    table_name: &str,
8414    rows: &[alloc::vec::Vec<spg_storage::Value>],
8415) -> Result<(), EngineError> {
8416    let table = catalog.get(table_name).ok_or_else(|| {
8417        EngineError::Storage(StorageError::TableNotFound {
8418            name: table_name.into(),
8419        })
8420    })?;
8421    let schema = table.schema();
8422    let ctx = eval::EvalContext::new(&schema.columns, None);
8423    for idx in table.indices() {
8424        if !idx.is_unique {
8425            continue;
8426        }
8427        // Re-parse the predicate once per index per batch.
8428        let predicate_expr = match idx.partial_predicate.as_deref() {
8429            Some(s) => Some(spg_sql::parser::parse_expression(s).map_err(|e| {
8430                EngineError::Unsupported(alloc::format!(
8431                    "UNIQUE INDEX {:?} predicate {s:?} failed to re-parse: {e:?}",
8432                    idx.name
8433                ))
8434            })?),
8435            None => None,
8436        };
8437        let key_positions = unique_key_positions(idx);
8438        let key_of = |values: &[spg_storage::Value]| -> alloc::vec::Vec<spg_storage::Value> {
8439            key_positions
8440                .iter()
8441                .map(|&p| {
8442                    values
8443                        .get(p)
8444                        .cloned()
8445                        .unwrap_or(spg_storage::Value::Null)
8446                })
8447                .collect()
8448        };
8449        // Helper: does `values` participate in this index? (predicate
8450        // truthy when present.) Wraps `values` into a transient Row
8451        // because eval_expr requires &Row.
8452        let participates = |values: &[spg_storage::Value]| -> Result<bool, EngineError> {
8453            let Some(expr) = &predicate_expr else {
8454                return Ok(true);
8455            };
8456            let tmp_row = spg_storage::Row {
8457                values: values.to_vec(),
8458            };
8459            let v = eval::eval_expr(expr, &tmp_row, &ctx).map_err(|e| {
8460                EngineError::Unsupported(alloc::format!(
8461                    "UNIQUE INDEX {:?} predicate eval: {e:?}",
8462                    idx.name
8463                ))
8464            })?;
8465            Ok(predicate_truthy(&v))
8466        };
8467        for (batch_idx, row_values) in rows.iter().enumerate() {
8468            if !participates(row_values)? {
8469                continue;
8470            }
8471            let key = key_of(row_values);
8472            if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
8473                continue;
8474            }
8475            // Committed-table collision.
8476            for prow in table.rows() {
8477                if !participates(&prow.values)? {
8478                    continue;
8479                }
8480                if key_of(&prow.values) == key {
8481                    return Err(EngineError::Unsupported(alloc::format!(
8482                        "UNIQUE INDEX {:?} violation on {table_name:?}: \
8483                         row #{batch_idx} duplicates an existing key",
8484                        idx.name
8485                    )));
8486                }
8487            }
8488            // Within-batch collision: earlier rows in the same INSERT.
8489            for earlier in &rows[..batch_idx] {
8490                if !participates(earlier)? {
8491                    continue;
8492                }
8493                if key_of(earlier) == key {
8494                    return Err(EngineError::Unsupported(alloc::format!(
8495                        "UNIQUE INDEX {:?} violation on {table_name:?}: \
8496                         row #{batch_idx} duplicates an earlier row in the same batch",
8497                        idx.name
8498                    )));
8499                }
8500            }
8501        }
8502    }
8503    Ok(())
8504}
8505
8506fn enforce_fk_inserts(
8507    catalog: &Catalog,
8508    child_table: &str,
8509    fks: &[spg_storage::ForeignKeyConstraint],
8510    rows: &[Vec<Value>],
8511) -> Result<(), EngineError> {
8512    for fk in fks {
8513        let parent_is_self = fk.parent_table == child_table;
8514        let parent = if parent_is_self {
8515            // Self-ref: read the current state of the same table.
8516            // The mut borrow on child has been dropped by the caller.
8517            catalog.get(child_table).ok_or_else(|| {
8518                EngineError::Storage(StorageError::TableNotFound {
8519                    name: child_table.into(),
8520                })
8521            })?
8522        } else {
8523            catalog.get(&fk.parent_table).ok_or_else(|| {
8524                EngineError::Storage(StorageError::TableNotFound {
8525                    name: fk.parent_table.clone(),
8526                })
8527            })?
8528        };
8529        for (batch_idx, row_values) in rows.iter().enumerate() {
8530            // Single-column FK fast path: try the parent's BTree
8531            // index for an O(log n) lookup. Composite FKs fall back
8532            // to a parent-row scan.
8533            if fk.local_columns.len() == 1 {
8534                let v = &row_values[fk.local_columns[0]];
8535                if matches!(v, Value::Null) {
8536                    continue;
8537                }
8538                let parent_col = fk.parent_columns[0];
8539                let key = spg_storage::IndexKey::from_value(v).ok_or_else(|| {
8540                    EngineError::Unsupported(alloc::format!(
8541                        "FOREIGN KEY column value of type {:?} is not index-eligible",
8542                        v.data_type()
8543                    ))
8544                })?;
8545                let present_committed = parent.indices().iter().any(|idx| {
8546                    matches!(idx.kind, spg_storage::IndexKind::BTree(_))
8547                        && idx.column_position == parent_col
8548                        && idx.partial_predicate.is_none()
8549                        && !idx.lookup_eq(&key).is_empty()
8550                });
8551                // v7.6.7 self-ref widening: also accept a match
8552                // against earlier rows in this same batch when the
8553                // FK points at the table being inserted into.
8554                let present_in_batch = parent_is_self
8555                    && rows[..batch_idx].iter().any(|earlier| {
8556                        earlier.get(parent_col) == Some(v)
8557                    });
8558                if !(present_committed || present_in_batch) {
8559                    return Err(EngineError::Unsupported(alloc::format!(
8560                        "FOREIGN KEY violation: no parent row in {:?} where {} = {:?}",
8561                        fk.parent_table,
8562                        parent
8563                            .schema()
8564                            .columns
8565                            .get(parent_col)
8566                            .map_or("?", |c| c.name.as_str()),
8567                        v,
8568                    )));
8569                }
8570            } else {
8571                // Composite FK: scan parent rows. v7.6.7 also
8572                // accepts a match against earlier rows in the same
8573                // batch (self-ref bulk-loading of hierarchies).
8574                if fk.local_columns
8575                    .iter()
8576                    .all(|&i| matches!(row_values.get(i), Some(Value::Null)))
8577                {
8578                    continue;
8579                }
8580                let local: Vec<&Value> = fk.local_columns.iter().map(|&i| &row_values[i]).collect();
8581                let parent_match_committed = parent.rows().iter().any(|prow| {
8582                    fk.parent_columns
8583                        .iter()
8584                        .enumerate()
8585                        .all(|(i, &pi)| prow.values.get(pi) == Some(local[i]))
8586                });
8587                let parent_match_in_batch = parent_is_self
8588                    && rows[..batch_idx].iter().any(|earlier| {
8589                        fk.parent_columns
8590                            .iter()
8591                            .enumerate()
8592                            .all(|(i, &pi)| earlier.get(pi) == Some(local[i]))
8593                    });
8594                if !(parent_match_committed || parent_match_in_batch) {
8595                    return Err(EngineError::Unsupported(alloc::format!(
8596                        "FOREIGN KEY violation: no parent row in {:?} matching composite key",
8597                        fk.parent_table,
8598                    )));
8599                }
8600            }
8601        }
8602    }
8603    Ok(())
8604}
8605
8606/// v7.6.4 / v7.6.5 — one step of the FK action plan computed for a
8607/// DELETE on a parent. The plan is a list of these steps, stacked
8608/// across the FK graph by `plan_fk_parent_deletions`.
8609#[derive(Debug, Clone)]
8610struct FkChildStep {
8611    child_table: String,
8612    action: FkChildAction,
8613}
8614
8615#[derive(Debug, Clone)]
8616enum FkChildAction {
8617    /// CASCADE — remove these rows. Sorted, deduplicated positions.
8618    Delete { positions: Vec<usize> },
8619    /// SET NULL — for each (row, column) in the flat list, write
8620    /// NULL into that child cell. Multiple FKs on the same row may
8621    /// produce overlapping entries (deduped at plan time).
8622    SetNull {
8623        positions: Vec<usize>,
8624        columns: Vec<usize>,
8625    },
8626    /// SET DEFAULT — same shape as SetNull but writes the column's
8627    /// declared DEFAULT value (resolved at plan time). Columns
8628    /// without a DEFAULT raise an error during planning.
8629    SetDefault {
8630        positions: Vec<usize>,
8631        columns: Vec<usize>,
8632        defaults: Vec<Value>,
8633    },
8634}
8635
8636/// v7.6.3 → v7.6.5 — plan FK fallout for a DELETE on a parent table.
8637///
8638/// Walks every table in the catalog looking for FKs whose
8639/// `parent_table` is `parent_table_name`. For each such FK + each
8640/// to-be-deleted parent row:
8641///
8642///   - RESTRICT / NoAction → error, no plan returned
8643///   - CASCADE → child rows get scheduled for deletion; recursive
8644///   - SetNull → child FK column(s) scheduled to be NULL-ed.
8645///     Verified NULL-able at plan time.
8646///   - SetDefault → child FK column(s) scheduled to be reset to
8647///     their declared DEFAULT. Columns without a DEFAULT raise.
8648///
8649/// SET NULL / SET DEFAULT do NOT cascade further — the child row
8650/// stays; only one of its columns mutates.
8651fn plan_fk_parent_deletions(
8652    catalog: &Catalog,
8653    parent_table_name: &str,
8654    to_delete_positions: &[usize],
8655    to_delete_rows: &[Vec<Value>],
8656) -> Result<Vec<FkChildStep>, EngineError> {
8657    use alloc::collections::{BTreeMap, BTreeSet};
8658    if to_delete_rows.is_empty() {
8659        return Ok(Vec::new());
8660    }
8661    let mut delete_plan: BTreeMap<String, BTreeSet<usize>> = BTreeMap::new();
8662    // setnull / setdefault keyed by child_table → (row_idx, col_idx) → optional default
8663    let mut setnull_plan: BTreeMap<String, BTreeSet<(usize, usize)>> = BTreeMap::new();
8664    let mut setdefault_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> =
8665        BTreeMap::new();
8666    let mut visited: BTreeSet<(String, usize)> = BTreeSet::new();
8667    for &p in to_delete_positions {
8668        visited.insert((parent_table_name.to_string(), p));
8669    }
8670    let mut work: Vec<(String, Vec<Value>)> = to_delete_rows
8671        .iter()
8672        .map(|r| (parent_table_name.to_string(), r.clone()))
8673        .collect();
8674    while let Some((cur_parent, parent_row)) = work.pop() {
8675        for child_name in catalog.table_names() {
8676            let child = catalog
8677                .get(&child_name)
8678                .expect("table_names → catalog.get round-trip is total");
8679            for fk in &child.schema().foreign_keys {
8680                if fk.parent_table != cur_parent {
8681                    continue;
8682                }
8683                let parent_key: Vec<&Value> = fk
8684                    .parent_columns
8685                    .iter()
8686                    .map(|&pi| &parent_row[pi])
8687                    .collect();
8688                if parent_key.iter().any(|v| matches!(v, Value::Null)) {
8689                    continue;
8690                }
8691                for (child_row_idx, child_row) in child.rows().iter().enumerate() {
8692                    if child_name == cur_parent
8693                        && visited.contains(&(child_name.clone(), child_row_idx))
8694                    {
8695                        continue;
8696                    }
8697                    let matches_key = fk
8698                        .local_columns
8699                        .iter()
8700                        .enumerate()
8701                        .all(|(i, &li)| child_row.values.get(li) == Some(parent_key[i]));
8702                    if !matches_key {
8703                        continue;
8704                    }
8705                    match fk.on_delete {
8706                        spg_storage::FkAction::Restrict
8707                        | spg_storage::FkAction::NoAction => {
8708                            return Err(EngineError::Unsupported(alloc::format!(
8709                                "FOREIGN KEY violation: DELETE on {cur_parent:?} is \
8710                                 restricted by FK from {child_name:?}.{:?}",
8711                                fk.local_columns,
8712                            )));
8713                        }
8714                        spg_storage::FkAction::Cascade => {
8715                            if visited.insert((child_name.clone(), child_row_idx)) {
8716                                delete_plan
8717                                    .entry(child_name.clone())
8718                                    .or_default()
8719                                    .insert(child_row_idx);
8720                                work.push((child_name.clone(), child_row.values.clone()));
8721                            }
8722                        }
8723                        spg_storage::FkAction::SetNull => {
8724                            // Verify every local FK column is NULL-able.
8725                            for &li in &fk.local_columns {
8726                                let col = child.schema().columns.get(li).ok_or_else(|| {
8727                                    EngineError::Unsupported(alloc::format!(
8728                                        "FK local column {li} missing in {child_name:?}"
8729                                    ))
8730                                })?;
8731                                if !col.nullable {
8732                                    return Err(EngineError::Unsupported(alloc::format!(
8733                                        "FOREIGN KEY ON DELETE SET NULL: column \
8734                                         {child_name:?}.{:?} is NOT NULL — cannot SET NULL",
8735                                        col.name,
8736                                    )));
8737                                }
8738                            }
8739                            let entry = setnull_plan.entry(child_name.clone()).or_default();
8740                            for &li in &fk.local_columns {
8741                                entry.insert((child_row_idx, li));
8742                            }
8743                        }
8744                        spg_storage::FkAction::SetDefault => {
8745                            // Resolve the DEFAULT for every local FK col.
8746                            let entry =
8747                                setdefault_plan.entry(child_name.clone()).or_default();
8748                            for &li in &fk.local_columns {
8749                                let col = child.schema().columns.get(li).ok_or_else(|| {
8750                                    EngineError::Unsupported(alloc::format!(
8751                                        "FK local column {li} missing in {child_name:?}"
8752                                    ))
8753                                })?;
8754                                let default = col.default.clone().ok_or_else(|| {
8755                                    EngineError::Unsupported(alloc::format!(
8756                                        "FOREIGN KEY ON DELETE SET DEFAULT: column \
8757                                         {child_name:?}.{:?} has no DEFAULT declared",
8758                                        col.name,
8759                                    ))
8760                                })?;
8761                                entry.insert((child_row_idx, li), default);
8762                            }
8763                        }
8764                    }
8765                }
8766            }
8767        }
8768    }
8769    // Flatten the three plans into the ordered `FkChildStep` list.
8770    // Deletes are applied last per child (after any null/default
8771    // re-writes on the same child) so a child row that's both
8772    // re-written and then cascade-deleted only ends up deleted —
8773    // but in v7.6.5 SetNull/Cascade never overlap on the same row
8774    // (a single FK chooses exactly one action), so the order is
8775    // mostly a precaution.
8776    let mut steps: Vec<FkChildStep> = Vec::new();
8777    for (child_table, entries) in setnull_plan {
8778        let (positions, columns): (Vec<usize>, Vec<usize>) = entries.into_iter().unzip();
8779        steps.push(FkChildStep {
8780            child_table,
8781            action: FkChildAction::SetNull { positions, columns },
8782        });
8783    }
8784    for (child_table, entries) in setdefault_plan {
8785        let mut positions = Vec::with_capacity(entries.len());
8786        let mut columns = Vec::with_capacity(entries.len());
8787        let mut defaults = Vec::with_capacity(entries.len());
8788        for ((p, c), v) in entries {
8789            positions.push(p);
8790            columns.push(c);
8791            defaults.push(v);
8792        }
8793        steps.push(FkChildStep {
8794            child_table,
8795            action: FkChildAction::SetDefault {
8796                positions,
8797                columns,
8798                defaults,
8799            },
8800        });
8801    }
8802    for (child_table, positions) in delete_plan {
8803        steps.push(FkChildStep {
8804            child_table,
8805            action: FkChildAction::Delete {
8806                positions: positions.into_iter().collect(),
8807            },
8808        });
8809    }
8810    Ok(steps)
8811}
8812
8813/// v7.6.6 — plan FK fallout for an UPDATE that mutates parent-side
8814/// PK/UNIQUE columns. Walks every other table whose FK references
8815/// `parent_table_name`; for each FK whose parent_columns overlap a
8816/// mutated column, decides the action by `fk.on_update`.
8817///
8818///   - RESTRICT / NoAction → error if any child references the OLD
8819///     value
8820///   - CASCADE → child FK columns get rewritten to the NEW parent
8821///     value (a SetNull-style update step with the new value)
8822///   - SetNull → child FK columns set to NULL
8823///   - SetDefault → child FK columns set to declared default
8824///
8825/// `plan_with_old` is `(row_position, old_values, new_values)` so
8826/// the planner can detect "did this row's parent key actually
8827/// change?" — only rows where at least one referenced parent
8828/// column moved trigger inbound work.
8829fn plan_fk_parent_updates(
8830    catalog: &Catalog,
8831    parent_table_name: &str,
8832    plan_with_old: &[(usize, Vec<Value>, Vec<Value>)],
8833) -> Result<Vec<FkChildStep>, EngineError> {
8834    use alloc::collections::BTreeMap;
8835    if plan_with_old.is_empty() {
8836        return Ok(Vec::new());
8837    }
8838    // For each child table we may touch, build per-child step
8839    // lists. UPDATE never deletes children — `delete_plan` stays
8840    // empty here but is kept structurally aligned with
8841    // `plan_fk_parent_deletions` for future use.
8842    let delete_plan: BTreeMap<String, alloc::collections::BTreeSet<usize>> = BTreeMap::new();
8843    let mut setnull_plan: BTreeMap<
8844        String,
8845        alloc::collections::BTreeSet<(usize, usize)>,
8846    > = BTreeMap::new();
8847    let mut setdefault_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> =
8848        BTreeMap::new();
8849    // Cascade-update plan: child_table → row_idx → col_idx → new_value
8850    let mut cascade_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
8851
8852    for child_name in catalog.table_names() {
8853        let child = catalog
8854            .get(&child_name)
8855            .expect("table_names → catalog.get total");
8856        for fk in &child.schema().foreign_keys {
8857            if fk.parent_table != parent_table_name {
8858                continue;
8859            }
8860            for (_pos, old_row, new_row) in plan_with_old {
8861                // Did any parent FK column change?
8862                let key_changed = fk
8863                    .parent_columns
8864                    .iter()
8865                    .any(|&pi| old_row.get(pi) != new_row.get(pi));
8866                if !key_changed {
8867                    continue;
8868                }
8869                // The OLD parent key — used to find referring children.
8870                let old_key: Vec<&Value> = fk
8871                    .parent_columns
8872                    .iter()
8873                    .map(|&pi| &old_row[pi])
8874                    .collect();
8875                if old_key.iter().any(|v| matches!(v, Value::Null)) {
8876                    // NULL parent has no children — skip.
8877                    continue;
8878                }
8879                let new_key: Vec<&Value> = fk
8880                    .parent_columns
8881                    .iter()
8882                    .map(|&pi| &new_row[pi])
8883                    .collect();
8884                for (child_row_idx, child_row) in child.rows().iter().enumerate() {
8885                    // Self-ref same-row updates: a row updating its
8886                    // own PK doesn't restrict itself.
8887                    if child_name == parent_table_name
8888                        && plan_with_old
8889                            .iter()
8890                            .any(|(p, _, _)| *p == child_row_idx)
8891                    {
8892                        continue;
8893                    }
8894                    let matches_key = fk
8895                        .local_columns
8896                        .iter()
8897                        .enumerate()
8898                        .all(|(i, &li)| child_row.values.get(li) == Some(old_key[i]));
8899                    if !matches_key {
8900                        continue;
8901                    }
8902                    match fk.on_update {
8903                        spg_storage::FkAction::Restrict
8904                        | spg_storage::FkAction::NoAction => {
8905                            return Err(EngineError::Unsupported(alloc::format!(
8906                                "FOREIGN KEY violation: UPDATE on {parent_table_name:?} PK is \
8907                                 restricted by FK from {child_name:?}.{:?}",
8908                                fk.local_columns,
8909                            )));
8910                        }
8911                        spg_storage::FkAction::Cascade => {
8912                            // Rewrite child FK columns to new key.
8913                            let entry = cascade_plan.entry(child_name.clone()).or_default();
8914                            for (i, &li) in fk.local_columns.iter().enumerate() {
8915                                entry.insert((child_row_idx, li), new_key[i].clone());
8916                            }
8917                        }
8918                        spg_storage::FkAction::SetNull => {
8919                            for &li in &fk.local_columns {
8920                                let col = child.schema().columns.get(li).ok_or_else(|| {
8921                                    EngineError::Unsupported(alloc::format!(
8922                                        "FK local column {li} missing in {child_name:?}"
8923                                    ))
8924                                })?;
8925                                if !col.nullable {
8926                                    return Err(EngineError::Unsupported(alloc::format!(
8927                                        "FOREIGN KEY ON UPDATE SET NULL: column \
8928                                         {child_name:?}.{:?} is NOT NULL",
8929                                        col.name,
8930                                    )));
8931                                }
8932                            }
8933                            let entry = setnull_plan.entry(child_name.clone()).or_default();
8934                            for &li in &fk.local_columns {
8935                                entry.insert((child_row_idx, li));
8936                            }
8937                        }
8938                        spg_storage::FkAction::SetDefault => {
8939                            let entry =
8940                                setdefault_plan.entry(child_name.clone()).or_default();
8941                            for &li in &fk.local_columns {
8942                                let col = child.schema().columns.get(li).ok_or_else(|| {
8943                                    EngineError::Unsupported(alloc::format!(
8944                                        "FK local column {li} missing in {child_name:?}"
8945                                    ))
8946                                })?;
8947                                let default = col.default.clone().ok_or_else(|| {
8948                                    EngineError::Unsupported(alloc::format!(
8949                                        "FOREIGN KEY ON UPDATE SET DEFAULT: column \
8950                                         {child_name:?}.{:?} has no DEFAULT",
8951                                        col.name,
8952                                    ))
8953                                })?;
8954                                entry.insert((child_row_idx, li), default);
8955                            }
8956                        }
8957                    }
8958                }
8959            }
8960        }
8961    }
8962    // Flatten into FkChildStep list. UPDATE doesn't produce
8963    // DeleteSteps (CASCADE on UPDATE just rewrites FK values).
8964    let mut steps: Vec<FkChildStep> = Vec::new();
8965    for (child_table, entries) in cascade_plan {
8966        let mut positions = Vec::with_capacity(entries.len());
8967        let mut columns = Vec::with_capacity(entries.len());
8968        let mut defaults = Vec::with_capacity(entries.len());
8969        for ((p, c), v) in entries {
8970            positions.push(p);
8971            columns.push(c);
8972            defaults.push(v);
8973        }
8974        // We reuse `FkChildAction::SetDefault` for cascade-update:
8975        // both shapes are "write a known value into specific cells"
8976        // — `apply_per_cell_writes` doesn't care whether the value
8977        // came from a DEFAULT declaration or a new parent key.
8978        steps.push(FkChildStep {
8979            child_table,
8980            action: FkChildAction::SetDefault {
8981                positions,
8982                columns,
8983                defaults,
8984            },
8985        });
8986    }
8987    for (child_table, entries) in setnull_plan {
8988        let (positions, columns): (Vec<usize>, Vec<usize>) = entries.into_iter().unzip();
8989        steps.push(FkChildStep {
8990            child_table,
8991            action: FkChildAction::SetNull { positions, columns },
8992        });
8993    }
8994    for (child_table, entries) in setdefault_plan {
8995        let mut positions = Vec::with_capacity(entries.len());
8996        let mut columns = Vec::with_capacity(entries.len());
8997        let mut defaults = Vec::with_capacity(entries.len());
8998        for ((p, c), v) in entries {
8999            positions.push(p);
9000            columns.push(c);
9001            defaults.push(v);
9002        }
9003        steps.push(FkChildStep {
9004            child_table,
9005            action: FkChildAction::SetDefault {
9006                positions,
9007                columns,
9008                defaults,
9009            },
9010        });
9011    }
9012    let _ = delete_plan; // UPDATE never deletes children.
9013    Ok(steps)
9014}
9015
9016/// v7.6.5 — apply one FK child step to the catalog. Encapsulates
9017/// the three action variants so the DELETE executor stays a
9018/// simple loop over the planned steps.
9019fn apply_fk_child_step(
9020    catalog: &mut Catalog,
9021    step: &FkChildStep,
9022) -> Result<(), EngineError> {
9023    let child = catalog.get_mut(&step.child_table).ok_or_else(|| {
9024        EngineError::Storage(StorageError::TableNotFound {
9025            name: step.child_table.clone(),
9026        })
9027    })?;
9028    match &step.action {
9029        FkChildAction::Delete { positions } => {
9030            let _ = child.delete_rows(positions);
9031        }
9032        FkChildAction::SetNull { positions, columns } => {
9033            apply_per_cell_writes(child, positions, columns, |_| Value::Null)?;
9034        }
9035        FkChildAction::SetDefault {
9036            positions,
9037            columns,
9038            defaults,
9039        } => {
9040            apply_per_cell_writes(child, positions, columns, |i| defaults[i].clone())?;
9041        }
9042    }
9043    Ok(())
9044}
9045
9046/// v7.6.5 — write new values into selected child cells via
9047/// `Table::update_row` (the catalog's existing UPDATE entry).
9048/// Groups writes by row position so multi-column updates on the
9049/// same row only call `update_row` once. `value_for(i)` produces
9050/// the new value for the i-th (position, column) entry.
9051fn apply_per_cell_writes(
9052    child: &mut spg_storage::Table,
9053    positions: &[usize],
9054    columns: &[usize],
9055    mut value_for: impl FnMut(usize) -> Value,
9056) -> Result<(), EngineError> {
9057    use alloc::collections::BTreeMap;
9058    let mut by_row: BTreeMap<usize, Vec<(usize, Value)>> = BTreeMap::new();
9059    for i in 0..positions.len() {
9060        by_row
9061            .entry(positions[i])
9062            .or_default()
9063            .push((columns[i], value_for(i)));
9064    }
9065    for (pos, mutations) in by_row {
9066        let mut new_values = child.rows()[pos].values.clone();
9067        for (col, v) in mutations {
9068            if let Some(slot) = new_values.get_mut(col) {
9069                *slot = v;
9070            }
9071        }
9072        child
9073            .update_row(pos, new_values)
9074            .map_err(EngineError::Storage)?;
9075    }
9076    Ok(())
9077}
9078
9079fn fk_action_sql_to_storage(a: spg_sql::ast::FkAction) -> spg_storage::FkAction {
9080    match a {
9081        spg_sql::ast::FkAction::Restrict => spg_storage::FkAction::Restrict,
9082        spg_sql::ast::FkAction::Cascade => spg_storage::FkAction::Cascade,
9083        spg_sql::ast::FkAction::SetNull => spg_storage::FkAction::SetNull,
9084        spg_sql::ast::FkAction::SetDefault => spg_storage::FkAction::SetDefault,
9085        spg_sql::ast::FkAction::NoAction => spg_storage::FkAction::NoAction,
9086    }
9087}
9088
9089/// v7.9.21 — resolve a column's DEFAULT for INSERT-time
9090/// default-fill. Free fn (rather than `&self`) so callers
9091/// with an active `&mut Table` borrow can still use it.
9092/// Literal defaults take the cached path (`col.default`);
9093/// runtime defaults hit `clock_fn` at each call. mailrs G4.
9094fn resolve_column_default_free(
9095    col: &ColumnSchema,
9096    clock_fn: Option<ClockFn>,
9097) -> Result<Value, EngineError> {
9098    if let Some(rt) = &col.runtime_default {
9099        return eval_runtime_default_free(rt, col.ty, clock_fn);
9100    }
9101    Ok(col.default.clone().unwrap_or(Value::Null))
9102}
9103
9104fn eval_runtime_default_free(
9105    rt: &str,
9106    ty: DataType,
9107    clock_fn: Option<ClockFn>,
9108) -> Result<Value, EngineError> {
9109    let s = rt.trim().to_ascii_lowercase();
9110    let canonical = s.trim_end_matches("()");
9111    let now_us = match clock_fn {
9112        Some(f) => f(),
9113        None => 0,
9114    };
9115    let v = match canonical {
9116        "now" | "current_timestamp" | "localtimestamp" => {
9117            Value::Timestamp(now_us)
9118        }
9119        "current_date" => Value::Date((now_us / 86_400_000_000) as i32),
9120        "current_time" | "localtime" => Value::Timestamp(now_us),
9121        other => {
9122            return Err(EngineError::Unsupported(alloc::format!(
9123                "runtime DEFAULT expression {other:?} not supported \
9124                 (v7.9.21 whitelist: now() / current_timestamp / \
9125                 current_date / current_time / localtimestamp / \
9126                 localtime)"
9127            )));
9128        }
9129    };
9130    coerce_value(v, ty, "DEFAULT", 0)
9131}
9132
9133/// v7.9.21 — true when a DEFAULT expression needs INSERT-time
9134/// evaluation rather than being cacheable as a literal Value.
9135/// FunctionCall is the immediate case (`now()`,
9136/// `current_timestamp`). Literal expressions and simple sign-
9137/// flipped numerics still take the static-cache path.
9138fn is_runtime_default_expr(expr: &Expr) -> bool {
9139    match expr {
9140        Expr::FunctionCall { .. } => true,
9141        Expr::Unary { expr, .. } => is_runtime_default_expr(expr),
9142        _ => false,
9143    }
9144}
9145
9146fn column_def_to_schema(c: ColumnDef) -> Result<ColumnSchema, EngineError> {
9147    let ty = column_type_to_data_type(c.ty);
9148    let mut schema = ColumnSchema::new(c.name.clone(), ty, c.nullable);
9149    if let Some(default_expr) = c.default {
9150        // v7.9.21 — distinguish literal defaults (evaluated once
9151        // at CREATE TABLE) from expression defaults (deferred to
9152        // INSERT). Function calls (`now()`, `current_timestamp`
9153        // — see v7.9.20 keyword promotion) take the runtime path.
9154        // Literals continue to cache. mailrs G4.
9155        if is_runtime_default_expr(&default_expr) {
9156            let display = alloc::format!("{default_expr}");
9157            schema = schema.with_runtime_default(display);
9158        } else {
9159            let raw = literal_expr_to_value(default_expr)?;
9160            let coerced = coerce_value(raw, ty, &c.name, 0)?;
9161            schema = schema.with_default(coerced);
9162        }
9163    }
9164    if c.auto_increment {
9165        // AUTO_INCREMENT only makes sense on integer-shaped columns.
9166        if !matches!(ty, DataType::SmallInt | DataType::Int | DataType::BigInt) {
9167            return Err(EngineError::Unsupported(alloc::format!(
9168                "AUTO_INCREMENT requires an integer column type, got {ty:?}"
9169            )));
9170        }
9171        schema = schema.with_auto_increment();
9172    }
9173    Ok(schema)
9174}
9175
9176/// v7.10.4 — decode a BYTEA literal. Accepts:
9177///   * `\xDEADBEEF` (case-insensitive hex; whitespace stripped)
9178///   * `Hello\000world` (backslash escape form; `\\` for literal backslash)
9179///   * Anything else → raw UTF-8 bytes of the input (PG accepts this too).
9180fn decode_bytea_literal(s: &str) -> Result<alloc::vec::Vec<u8>, &'static str> {
9181    let s = s.trim();
9182    if let Some(hex) = s.strip_prefix("\\x").or_else(|| s.strip_prefix("\\X")) {
9183        // Hex form. Each pair of hex digits → one byte.
9184        let cleaned: alloc::string::String = hex.chars().filter(|c| !c.is_whitespace()).collect();
9185        if cleaned.len() % 2 != 0 {
9186            return Err("odd-length hex literal");
9187        }
9188        let mut out = alloc::vec::Vec::with_capacity(cleaned.len() / 2);
9189        let cleaned_bytes = cleaned.as_bytes();
9190        for i in (0..cleaned_bytes.len()).step_by(2) {
9191            let hi = hex_nibble(cleaned_bytes[i])?;
9192            let lo = hex_nibble(cleaned_bytes[i + 1])?;
9193            out.push((hi << 4) | lo);
9194        }
9195        return Ok(out);
9196    }
9197    // Escape form or raw. Walk char-by-char; `\\` and `\NNN` octal
9198    // sequences decode; anything else is a literal byte.
9199    let bytes = s.as_bytes();
9200    let mut out = alloc::vec::Vec::with_capacity(bytes.len());
9201    let mut i = 0;
9202    while i < bytes.len() {
9203        let b = bytes[i];
9204        if b == b'\\' && i + 1 < bytes.len() {
9205            let n = bytes[i + 1];
9206            if n == b'\\' {
9207                out.push(b'\\');
9208                i += 2;
9209                continue;
9210            }
9211            if n.is_ascii_digit() && i + 3 < bytes.len() && bytes[i + 2].is_ascii_digit()
9212                && bytes[i + 3].is_ascii_digit()
9213            {
9214                let oct = |x: u8| (x - b'0') as u32;
9215                let v = oct(n) * 64 + oct(bytes[i + 2]) * 8 + oct(bytes[i + 3]);
9216                if v <= 0xFF {
9217                    out.push(v as u8);
9218                    i += 4;
9219                    continue;
9220                }
9221            }
9222        }
9223        out.push(b);
9224        i += 1;
9225    }
9226    Ok(out)
9227}
9228
9229fn hex_nibble(b: u8) -> Result<u8, &'static str> {
9230    match b {
9231        b'0'..=b'9' => Ok(b - b'0'),
9232        b'a'..=b'f' => Ok(b - b'a' + 10),
9233        b'A'..=b'F' => Ok(b - b'A' + 10),
9234        _ => Err("invalid hex digit"),
9235    }
9236}
9237
9238/// v7.10.11 — decode a PG TEXT[] external array form
9239/// (`{a,b,NULL}` with optional double-quoted elements). The
9240/// engine takes a leading/trailing `{`/`}` and splits at commas.
9241/// Quoted elements (`"hello, world"`) preserve embedded commas;
9242/// `\\` and `\"` decode to literal backslash / quote. Plain
9243/// unquoted `NULL` (case-insensitive) maps to `None`.
9244fn decode_text_array_literal(
9245    s: &str,
9246) -> Result<alloc::vec::Vec<Option<alloc::string::String>>, &'static str> {
9247    let trimmed = s.trim();
9248    let inner = trimmed
9249        .strip_prefix('{')
9250        .and_then(|x| x.strip_suffix('}'))
9251        .ok_or("TEXT[] literal must be enclosed in '{...}'")?;
9252    let mut out: alloc::vec::Vec<Option<alloc::string::String>> = alloc::vec::Vec::new();
9253    if inner.trim().is_empty() {
9254        return Ok(out);
9255    }
9256    let bytes = inner.as_bytes();
9257    let mut i = 0;
9258    while i <= bytes.len() {
9259        // Skip leading whitespace.
9260        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
9261            i += 1;
9262        }
9263        // Quoted element.
9264        if i < bytes.len() && bytes[i] == b'"' {
9265            i += 1; // open quote
9266            let mut buf = alloc::string::String::new();
9267            while i < bytes.len() && bytes[i] != b'"' {
9268                if bytes[i] == b'\\' && i + 1 < bytes.len() {
9269                    buf.push(bytes[i + 1] as char);
9270                    i += 2;
9271                } else {
9272                    buf.push(bytes[i] as char);
9273                    i += 1;
9274                }
9275            }
9276            if i >= bytes.len() {
9277                return Err("unterminated quoted element");
9278            }
9279            i += 1; // close quote
9280            out.push(Some(buf));
9281        } else {
9282            // Unquoted element — read until next comma or end.
9283            let start = i;
9284            while i < bytes.len() && bytes[i] != b',' {
9285                i += 1;
9286            }
9287            let raw = inner[start..i].trim();
9288            if raw.eq_ignore_ascii_case("NULL") {
9289                out.push(None);
9290            } else {
9291                out.push(Some(alloc::string::ToString::to_string(raw)));
9292            }
9293        }
9294        // Skip whitespace, expect comma or end.
9295        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
9296            i += 1;
9297        }
9298        if i >= bytes.len() {
9299            break;
9300        }
9301        if bytes[i] != b',' {
9302            return Err("expected ',' between TEXT[] elements");
9303        }
9304        i += 1;
9305    }
9306    Ok(out)
9307}
9308
9309/// v7.10.11 — encode a TEXT[] back into the PG external array
9310/// form. NULL elements become the literal `NULL`; elements
9311/// containing commas, quotes, backslashes, or braces are
9312/// double-quoted with `\\` / `\"` escapes.
9313fn encode_text_array(items: &[Option<alloc::string::String>]) -> alloc::string::String {
9314    let mut out = alloc::string::String::with_capacity(2 + items.len() * 8);
9315    out.push('{');
9316    for (i, item) in items.iter().enumerate() {
9317        if i > 0 {
9318            out.push(',');
9319        }
9320        match item {
9321            None => out.push_str("NULL"),
9322            Some(s) => {
9323                let needs_quote = s.is_empty()
9324                    || s.eq_ignore_ascii_case("NULL")
9325                    || s.chars().any(|c| {
9326                        matches!(c, ',' | '{' | '}' | '"' | '\\' | ' ' | '\t')
9327                    });
9328                if needs_quote {
9329                    out.push('"');
9330                    for c in s.chars() {
9331                        if c == '"' || c == '\\' {
9332                            out.push('\\');
9333                        }
9334                        out.push(c);
9335                    }
9336                    out.push('"');
9337                } else {
9338                    out.push_str(s);
9339                }
9340            }
9341        }
9342    }
9343    out.push('}');
9344    out
9345}
9346
9347/// v7.10.4 — encode BYTEA bytes in PG hex output format
9348/// (`\x` prefix, lowercase hex pairs). Used by Text-side
9349/// round-trip + the wire layer's text-mode encoder.
9350fn encode_bytea_hex(b: &[u8]) -> alloc::string::String {
9351    let mut out = alloc::string::String::with_capacity(2 + 2 * b.len());
9352    out.push_str("\\x");
9353    for byte in b {
9354        let hi = byte >> 4;
9355        let lo = byte & 0x0F;
9356        out.push(hex_digit(hi));
9357        out.push(hex_digit(lo));
9358    }
9359    out
9360}
9361
9362const fn hex_digit(n: u8) -> char {
9363    match n {
9364        0..=9 => (b'0' + n) as char,
9365        10..=15 => (b'a' + n - 10) as char,
9366        _ => '?',
9367    }
9368}
9369
9370const fn column_type_to_data_type(t: ColumnTypeName) -> DataType {
9371    match t {
9372        ColumnTypeName::SmallInt => DataType::SmallInt,
9373        ColumnTypeName::Int => DataType::Int,
9374        ColumnTypeName::BigInt => DataType::BigInt,
9375        ColumnTypeName::Float => DataType::Float,
9376        ColumnTypeName::Text => DataType::Text,
9377        ColumnTypeName::Varchar(n) => DataType::Varchar(n),
9378        ColumnTypeName::Char(n) => DataType::Char(n),
9379        ColumnTypeName::Bool => DataType::Bool,
9380        ColumnTypeName::Vector { dim, encoding } => DataType::Vector {
9381            dim,
9382            encoding: match encoding {
9383                SqlVecEncoding::F32 => VecEncoding::F32,
9384                SqlVecEncoding::Sq8 => VecEncoding::Sq8,
9385                SqlVecEncoding::F16 => VecEncoding::F16,
9386            },
9387        },
9388        ColumnTypeName::Numeric(precision, scale) => DataType::Numeric { precision, scale },
9389        ColumnTypeName::Date => DataType::Date,
9390        ColumnTypeName::Timestamp => DataType::Timestamp,
9391        ColumnTypeName::Timestamptz => DataType::Timestamptz,
9392        ColumnTypeName::Json => DataType::Json,
9393        ColumnTypeName::Jsonb => DataType::Jsonb,
9394        ColumnTypeName::Bytes => DataType::Bytes,
9395        ColumnTypeName::TextArray => DataType::TextArray,
9396    }
9397}
9398
9399/// Convert an INSERT VALUES expression to a storage Value. Supports literal
9400/// expressions, unary-minus over numeric literals, and pgvector-style
9401/// `'[..]'::vector` cast (v1.2). Anything more complex returns `Unsupported`.
9402fn literal_expr_to_value(expr: Expr) -> Result<Value, EngineError> {
9403    match expr {
9404        Expr::Literal(l) => Ok(literal_to_value(l)),
9405        Expr::Cast { expr, target } => {
9406            let inner_value = literal_expr_to_value(*expr)?;
9407            crate::eval::cast_value(inner_value, target).map_err(EngineError::Eval)
9408        }
9409        Expr::Unary {
9410            op: UnOp::Neg,
9411            expr,
9412        } => match *expr {
9413            Expr::Literal(Literal::Integer(n)) => {
9414                // Fold to i32 if it fits, else BigInt. Parser emits Integer(i64)
9415                // — overflow on negate of i64::MIN is the one edge case.
9416                let neg = n.checked_neg().ok_or_else(|| {
9417                    EngineError::Unsupported("integer literal overflow on negation".into())
9418                })?;
9419                Ok(int_value_for(neg))
9420            }
9421            Expr::Literal(Literal::Float(x)) => Ok(Value::Float(-x)),
9422            other => Err(EngineError::Unsupported(alloc::format!(
9423                "unary minus over non-literal expression: {other:?}"
9424            ))),
9425        },
9426        // v7.10.10 — `ARRAY[lit, lit, …]` constructor accepted at
9427        // INSERT-time. Each element must reduce to a Value through
9428        // `literal_expr_to_value`; NULL elements become `None`.
9429        // Casts (e.g. `ARRAY[]::TEXT[]`) flow through the outer
9430        // Cast arm before reaching here.
9431        Expr::Array(items) => {
9432            let mut out: alloc::vec::Vec<Option<alloc::string::String>> =
9433                alloc::vec::Vec::with_capacity(items.len());
9434            for elem in items {
9435                match literal_expr_to_value(elem)? {
9436                    Value::Null => out.push(None),
9437                    Value::Text(s) => out.push(Some(s)),
9438                    other => out.push(Some(alloc::format!("{other:?}"))),
9439                }
9440            }
9441            Ok(Value::TextArray(out))
9442        }
9443        other => Err(EngineError::Unsupported(alloc::format!(
9444            "non-literal INSERT value expression: {other:?}"
9445        ))),
9446    }
9447}
9448
9449fn literal_to_value(l: Literal) -> Value {
9450    match l {
9451        Literal::Integer(n) => int_value_for(n),
9452        Literal::Float(x) => Value::Float(x),
9453        Literal::String(s) => Value::Text(s),
9454        Literal::Bool(b) => Value::Bool(b),
9455        Literal::Null => Value::Null,
9456        Literal::Vector(v) => Value::Vector(v),
9457        Literal::Interval { months, micros, .. } => Value::Interval { months, micros },
9458    }
9459}
9460
9461/// Pick `Int` (`i32`) when the literal fits, else `BigInt`. `INT` vs `BIGINT`
9462/// columns will still enforce the right tag downstream — this is just the
9463/// default we synthesise from an unannotated integer literal.
9464fn int_value_for(n: i64) -> Value {
9465    if let Ok(small) = i32::try_from(n) {
9466        Value::Int(small)
9467    } else {
9468        Value::BigInt(n)
9469    }
9470}
9471
9472/// Widen / narrow `v` to fit `expected`. Numerics permit safe widening
9473/// (`Int → BigInt`, `Int/BigInt → Float`) and best-effort narrowing
9474/// (`BigInt → Int` succeeds only when the value fits in `i32`). Everything
9475/// else returns `TypeMismatch` carrying the column name for caller diagnostics.
9476/// `NULL` is always permitted; the nullability check happens later in storage.
9477#[allow(clippy::too_many_lines)]
9478fn coerce_value(
9479    v: Value,
9480    expected: DataType,
9481    col_name: &str,
9482    position: usize,
9483) -> Result<Value, EngineError> {
9484    if v.is_null() {
9485        return Ok(Value::Null);
9486    }
9487    let actual = v.data_type().expect("non-null");
9488    if actual == expected {
9489        return Ok(v);
9490    }
9491    let coerced =
9492        match (v, expected) {
9493            (Value::Int(n), DataType::BigInt) => Some(Value::BigInt(i64::from(n))),
9494            (Value::Int(n), DataType::Float) => Some(Value::Float(f64::from(n))),
9495            (Value::Int(n), DataType::SmallInt) => i16::try_from(n).ok().map(Value::SmallInt),
9496            (Value::Int(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
9497                i128::from(n),
9498                precision,
9499                scale,
9500                col_name,
9501            )?),
9502            (Value::SmallInt(n), DataType::Int) => Some(Value::Int(i32::from(n))),
9503            (Value::SmallInt(n), DataType::BigInt) => Some(Value::BigInt(i64::from(n))),
9504            (Value::SmallInt(n), DataType::Float) => Some(Value::Float(f64::from(n))),
9505            (Value::SmallInt(n), DataType::Numeric { precision, scale }) => Some(
9506                numeric_from_integer(i128::from(n), precision, scale, col_name)?,
9507            ),
9508            (Value::BigInt(n), DataType::Int) => i32::try_from(n).ok().map(Value::Int),
9509            (Value::BigInt(n), DataType::SmallInt) => i16::try_from(n).ok().map(Value::SmallInt),
9510            #[allow(clippy::cast_precision_loss)]
9511            (Value::BigInt(n), DataType::Float) => Some(Value::Float(n as f64)),
9512            (Value::BigInt(n), DataType::Numeric { precision, scale }) => Some(
9513                numeric_from_integer(i128::from(n), precision, scale, col_name)?,
9514            ),
9515            (Value::Float(x), DataType::Numeric { precision, scale }) => {
9516                Some(numeric_from_float(x, precision, scale, col_name)?)
9517            }
9518            // Text → DATE / TIMESTAMP: parse canonical text forms.
9519            (Value::Text(s), DataType::Date) => {
9520                let d = eval::parse_date_literal(&s).ok_or_else(|| {
9521                    EngineError::Eval(EvalError::TypeMismatch {
9522                        detail: alloc::format!(
9523                            "cannot parse {s:?} as DATE for column `{col_name}`"
9524                        ),
9525                    })
9526                })?;
9527                Some(Value::Date(d))
9528            }
9529            // v4.9: Text ↔ JSON coercion. No structural validation —
9530            // any text literal is accepted; the responsibility for
9531            // valid JSON lies with the producer.
9532            (Value::Text(s), DataType::Json | DataType::Jsonb) => Some(Value::Json(s)),
9533            (Value::Json(s), DataType::Text) => Some(Value::Text(s)),
9534            // v7.10.4 — Text → BYTEA. Decode PG-style literal forms:
9535            //   - Hex:    `\x48656c6c6f`  (case-insensitive hex pairs)
9536            //   - Escape: `Hello\\000world`  (backslash + octal triples)
9537            //   - Plain:  any string → raw UTF-8 bytes (PG also accepts)
9538            // Errors surface as TypeMismatch so the operator gets a
9539            // clear "this literal isn't a bytea literal" hint.
9540            (Value::Text(s), DataType::Bytes) => {
9541                let bytes = decode_bytea_literal(&s).map_err(|e| {
9542                    EngineError::Eval(EvalError::TypeMismatch {
9543                        detail: alloc::format!(
9544                            "cannot parse {s:?} as BYTEA for column `{col_name}`: {e}"
9545                        ),
9546                    })
9547                })?;
9548                Some(Value::Bytes(bytes))
9549            }
9550            // v7.10.4 — BYTEA → Text round-trip uses the PG hex
9551            // output (lowercase, `\x` prefix). Important when a
9552            // SELECT pulls a bytea cell through a Text column path.
9553            (Value::Bytes(b), DataType::Text) => Some(Value::Text(encode_bytea_hex(&b))),
9554            // v7.10.11 — Text → TEXT[]. Decode PG's external array
9555            // form `'{a,b,NULL}'`. NULL element token (case-insensitive)
9556            // is the literal `NULL`; everything else is a quoted or
9557            // unquoted text element. mailrs `'{label1,label2}'::TEXT[]`.
9558            (Value::Text(s), DataType::TextArray) => {
9559                let arr = decode_text_array_literal(&s).map_err(|e| {
9560                    EngineError::Eval(EvalError::TypeMismatch {
9561                        detail: alloc::format!(
9562                            "cannot parse {s:?} as TEXT[] for column `{col_name}`: {e}"
9563                        ),
9564                    })
9565                })?;
9566                Some(Value::TextArray(arr))
9567            }
9568            // v7.10.11 — TEXT[] → Text round-trip uses PG's
9569            // external array form (`{a,b,NULL}`). Lets a SELECT
9570            // pull an array column through any Text-side codepath.
9571            (Value::TextArray(items), DataType::Text) => {
9572                Some(Value::Text(encode_text_array(&items)))
9573            }
9574            (Value::Text(s), DataType::Timestamp | DataType::Timestamptz) => {
9575                let t = eval::parse_timestamp_literal(&s).ok_or_else(|| {
9576                    EngineError::Eval(EvalError::TypeMismatch {
9577                        detail: alloc::format!(
9578                            "cannot parse {s:?} as TIMESTAMP for column `{col_name}`"
9579                        ),
9580                    })
9581                })?;
9582                Some(Value::Timestamp(t))
9583            }
9584            // DATE ↔ TIMESTAMP convertibility (DATE → midnight,
9585            // TIMESTAMP → day truncation).
9586            (Value::Date(d), DataType::Timestamp | DataType::Timestamptz) => {
9587                Some(Value::Timestamp(i64::from(d) * 86_400_000_000))
9588            }
9589            // v7.9.21 — Value::Timestamp lands in either Timestamp
9590            // or Timestamptz columns; the on-disk layout is the
9591            // same i64 microseconds UTC.
9592            (Value::Timestamp(t), DataType::Timestamptz) => Some(Value::Timestamp(t)),
9593            (Value::Timestamp(t), DataType::Date) => {
9594                let days = t.div_euclid(86_400_000_000);
9595                i32::try_from(days).ok().map(Value::Date)
9596            }
9597            (
9598                Value::Numeric {
9599                    scaled,
9600                    scale: src_scale,
9601                },
9602                DataType::Numeric { precision, scale },
9603            ) => Some(numeric_rescale(
9604                scaled, src_scale, precision, scale, col_name,
9605            )?),
9606            #[allow(clippy::cast_precision_loss)]
9607            (Value::Numeric { scaled, scale }, DataType::Float) => {
9608                let mut div = 1.0_f64;
9609                for _ in 0..scale {
9610                    div *= 10.0;
9611                }
9612                Some(Value::Float((scaled as f64) / div))
9613            }
9614            (Value::Numeric { scaled, scale }, DataType::Int) => {
9615                let truncated = numeric_truncate_to_integer(scaled, scale);
9616                i32::try_from(truncated).ok().map(Value::Int)
9617            }
9618            (Value::Numeric { scaled, scale }, DataType::BigInt) => {
9619                let truncated = numeric_truncate_to_integer(scaled, scale);
9620                i64::try_from(truncated).ok().map(Value::BigInt)
9621            }
9622            (Value::Numeric { scaled, scale }, DataType::SmallInt) => {
9623                let truncated = numeric_truncate_to_integer(scaled, scale);
9624                i16::try_from(truncated).ok().map(Value::SmallInt)
9625            }
9626            // VARCHAR(n) enforces an upper bound on character count.
9627            (Value::Text(s), DataType::Varchar(max)) => {
9628                if u32::try_from(s.chars().count()).unwrap_or(u32::MAX) <= max {
9629                    Some(Value::Text(s))
9630                } else {
9631                    return Err(EngineError::Unsupported(alloc::format!(
9632                        "value for VARCHAR({max}) column `{col_name}` exceeds length: \
9633                     {} chars",
9634                        s.chars().count()
9635                    )));
9636                }
9637            }
9638            // v6.0.1: f32 → SQ8 INSERT-time quantisation. Triggered
9639            // when the column declares `VECTOR(N) USING SQ8` and
9640            // the INSERT VALUES expression yields a raw f32 vector
9641            // (the normal pgvector-shape literal). Dim mismatch
9642            // falls through the `_ => None` arm and surfaces as
9643            // `TypeMismatch` with the expected SQ8 column type —
9644            // matching the F32 path's existing error.
9645            (
9646                Value::Vector(v),
9647                DataType::Vector {
9648                    dim,
9649                    encoding: VecEncoding::Sq8,
9650                },
9651            ) if v.len() == dim as usize => {
9652                Some(Value::Sq8Vector(spg_storage::quantize::quantize(&v)))
9653            }
9654            // v6.0.3: f32 → f16 INSERT-time conversion for HALF
9655            // columns. Bit-exact at the storage layer (modulo
9656            // half-precision rounding); no rerank pass needed at
9657            // search time.
9658            (
9659                Value::Vector(v),
9660                DataType::Vector {
9661                    dim,
9662                    encoding: VecEncoding::F16,
9663                },
9664            ) if v.len() == dim as usize => Some(Value::HalfVector(
9665                spg_storage::halfvec::HalfVector::from_f32_slice(&v),
9666            )),
9667            // CHAR(n) right-pads with U+0020 to exactly n chars; if the input
9668            // is already longer we reject (PG truncates trailing-space-only;
9669            // staying strict for v1).
9670            (Value::Text(s), DataType::Char(size)) => {
9671                let len = u32::try_from(s.chars().count()).unwrap_or(u32::MAX);
9672                if len > size {
9673                    return Err(EngineError::Unsupported(alloc::format!(
9674                        "value for CHAR({size}) column `{col_name}` exceeds length: \
9675                     {len} chars"
9676                    )));
9677                }
9678                let need = (size - len) as usize;
9679                let mut padded = s;
9680                padded.reserve(need);
9681                for _ in 0..need {
9682                    padded.push(' ');
9683                }
9684                Some(Value::Text(padded))
9685            }
9686            _ => None,
9687        };
9688    coerced.ok_or(EngineError::Storage(StorageError::TypeMismatch {
9689        column: col_name.into(),
9690        expected,
9691        actual,
9692        position,
9693    }))
9694}
9695
9696#[cfg(test)]
9697mod tests {
9698    use super::*;
9699    use alloc::vec;
9700
9701    fn unwrap_command_ok(r: &QueryResult) -> usize {
9702        match r {
9703            QueryResult::CommandOk { affected, .. } => *affected,
9704            QueryResult::Rows { .. } => panic!("expected CommandOk, got Rows"),
9705        }
9706    }
9707
9708    #[test]
9709    fn create_table_registers_schema() {
9710        let mut e = Engine::new();
9711        e.execute("CREATE TABLE foo (a INT NOT NULL, b TEXT)")
9712            .unwrap();
9713        assert_eq!(e.catalog().table_count(), 1);
9714        let t = e.catalog().get("foo").unwrap();
9715        assert_eq!(t.schema().columns.len(), 2);
9716        assert_eq!(t.schema().columns[0].ty, DataType::Int);
9717        assert!(!t.schema().columns[0].nullable);
9718        assert_eq!(t.schema().columns[1].ty, DataType::Text);
9719    }
9720
9721    #[test]
9722    fn create_table_vector_default_is_f32_encoded() {
9723        let mut e = Engine::new();
9724        e.execute("CREATE TABLE t (v VECTOR(8))").unwrap();
9725        let t = e.catalog().get("t").unwrap();
9726        assert_eq!(
9727            t.schema().columns[0].ty,
9728            DataType::Vector {
9729                dim: 8,
9730                encoding: VecEncoding::F32,
9731            },
9732        );
9733    }
9734
9735    #[test]
9736    fn create_table_vector_using_sq8_succeeds() {
9737        // v6.0.1 step 3: the step-1 fence in `column_def_to_schema`
9738        // is lifted. CREATE TABLE persists an SQ8 column type in
9739        // the catalog; INSERT (next test) quantises raw f32 input.
9740        let mut e = Engine::new();
9741        e.execute("CREATE TABLE t (v VECTOR(8) USING SQ8)").unwrap();
9742        let t = e.catalog().get("t").unwrap();
9743        assert_eq!(
9744            t.schema().columns[0].ty,
9745            DataType::Vector {
9746                dim: 8,
9747                encoding: VecEncoding::Sq8,
9748            },
9749        );
9750    }
9751
9752    #[test]
9753    fn insert_into_sq8_column_quantises_f32_payload() {
9754        // v6.0.1 step 3: INSERT-time `coerce_value` rewrites a raw
9755        // `Value::Vector(Vec<f32>)` literal into the column's
9756        // quantised representation. The row that lands in the
9757        // catalog must therefore hold a `Value::Sq8Vector`, not the
9758        // original f32 buffer — that's the bit that delivers the
9759        // 4× compression target.
9760        let mut e = Engine::new();
9761        e.execute("CREATE TABLE t (v VECTOR(4) USING SQ8)").unwrap();
9762        e.execute("INSERT INTO t VALUES ([0.0, 0.25, 0.5, 1.0])")
9763            .unwrap();
9764        let t = e.catalog().get("t").unwrap();
9765        assert_eq!(t.rows().len(), 1);
9766        match &t.rows()[0].values[0] {
9767            Value::Sq8Vector(q) => {
9768                assert_eq!(q.bytes.len(), 4);
9769                // min/max are derived from the payload: min=0.0, max=1.0.
9770                assert!((q.min - 0.0).abs() < 1e-6);
9771                assert!((q.max - 1.0).abs() < 1e-6);
9772            }
9773            other => panic!("expected Sq8Vector cell, got {other:?}"),
9774        }
9775    }
9776
9777    #[test]
9778    fn create_table_vector_using_half_succeeds_and_insert_converts_to_f16() {
9779        // v6.0.3: CREATE TABLE accepts USING HALF; INSERT path
9780        // converts the incoming `Value::Vector(Vec<f32>)` cell
9781        // into `Value::HalfVector(HalfVector)` via the new
9782        // `coerce_value` arm. The dequantised round-trip is
9783        // bit-exact for f16-representable values, so 0.0 / 0.25
9784        // / 0.5 / 1.0 hit their grid points exactly.
9785        let mut e = Engine::new();
9786        e.execute("CREATE TABLE t (v VECTOR(4) USING HALF)")
9787            .unwrap();
9788        e.execute("INSERT INTO t VALUES ([0.0, 0.25, 0.5, 1.0])")
9789            .unwrap();
9790        let t = e.catalog().get("t").unwrap();
9791        assert_eq!(t.rows().len(), 1);
9792        match &t.rows()[0].values[0] {
9793            Value::HalfVector(h) => {
9794                assert_eq!(h.dim(), 4);
9795                let back = h.to_f32_vec();
9796                let expected = alloc::vec![0.0_f32, 0.25, 0.5, 1.0];
9797                for (g, e) in back.iter().zip(expected.iter()) {
9798                    assert!(
9799                        (g - e).abs() < 1e-6,
9800                        "{g} vs {e} should be exact on f16 grid"
9801                    );
9802                }
9803            }
9804            other => panic!("expected HalfVector cell, got {other:?}"),
9805        }
9806    }
9807
9808    #[test]
9809    fn alter_index_rebuild_in_place_succeeds() {
9810        // v6.0.4: bare REBUILD (no encoding switch) walks every
9811        // row again to rebuild the NSW graph. Verifies the engine
9812        // dispatch + storage helper plumbing without changing any
9813        // cell encoding.
9814        let mut e = Engine::new();
9815        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(3) NOT NULL)")
9816            .unwrap();
9817        for i in 0..8_i32 {
9818            #[allow(clippy::cast_precision_loss)]
9819            let base = (i as f32) * 0.1;
9820            e.execute(&alloc::format!(
9821                "INSERT INTO t VALUES ({i}, [{base}, {b1}, {b2}])",
9822                b1 = base + 0.01,
9823                b2 = base + 0.02,
9824            ))
9825            .unwrap();
9826        }
9827        e.execute("CREATE INDEX t_idx ON t USING hnsw (v)").unwrap();
9828        e.execute("ALTER INDEX t_idx REBUILD").unwrap();
9829        // Schema encoding stays F32 (no encoding clause).
9830        assert_eq!(
9831            e.catalog().get("t").unwrap().schema().columns[1].ty,
9832            DataType::Vector {
9833                dim: 3,
9834                encoding: VecEncoding::F32,
9835            },
9836        );
9837    }
9838
9839    #[test]
9840    fn alter_index_rebuild_with_encoding_switches_cell_type() {
9841        // v6.0.4: REBUILD WITH (encoding = SQ8) recodes every
9842        // stored cell from F32 → SQ8 + rebuilds the graph atop the
9843        // new encoding. Post-rebuild, cells must be Sq8Vector and
9844        // the schema must report encoding = Sq8.
9845        let mut e = Engine::new();
9846        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(4) NOT NULL)")
9847            .unwrap();
9848        e.execute("INSERT INTO t VALUES (1, [0.0, 0.25, 0.5, 1.0])")
9849            .unwrap();
9850        e.execute("CREATE INDEX t_idx ON t USING hnsw (v)").unwrap();
9851        e.execute("ALTER INDEX t_idx REBUILD WITH (encoding = SQ8)")
9852            .unwrap();
9853        let t = e.catalog().get("t").unwrap();
9854        assert_eq!(
9855            t.schema().columns[1].ty,
9856            DataType::Vector {
9857                dim: 4,
9858                encoding: VecEncoding::Sq8,
9859            },
9860        );
9861        assert!(matches!(t.rows()[0].values[1], Value::Sq8Vector(_)));
9862    }
9863
9864    #[test]
9865    fn alter_index_rebuild_unknown_index_errors() {
9866        let mut e = Engine::new();
9867        let err = e.execute("ALTER INDEX nope REBUILD").unwrap_err();
9868        assert!(
9869            matches!(
9870                &err,
9871                EngineError::Storage(StorageError::IndexNotFound { name }) if name == "nope"
9872            ),
9873            "got: {err}"
9874        );
9875    }
9876
9877    #[test]
9878    fn alter_index_rebuild_on_btree_index_errors() {
9879        // REBUILD on a B-tree index has no semantic meaning in
9880        // v6.0.4 — rejected at the storage layer with `Unsupported`.
9881        let mut e = Engine::new();
9882        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
9883        e.execute("INSERT INTO t VALUES (1)").unwrap();
9884        e.execute("CREATE INDEX t_idx ON t (id)").unwrap();
9885        let err = e.execute("ALTER INDEX t_idx REBUILD").unwrap_err();
9886        assert!(
9887            matches!(&err, EngineError::Storage(StorageError::Unsupported(_))),
9888            "got: {err}"
9889        );
9890    }
9891
9892    #[test]
9893    fn prepared_insert_substitutes_placeholders() {
9894        // v6.1.1: prepare() parses once; execute_prepared() walks the
9895        // AST and replaces $1/$2 with the param Values BEFORE the
9896        // dispatch sees them. Same logical result as a simple-query
9897        // INSERT, but parse happens once per *statement*, not per
9898        // execution.
9899        let mut e = Engine::new();
9900        e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT NOT NULL)")
9901            .unwrap();
9902        let stmt = e.prepare("INSERT INTO t VALUES ($1, $2)").unwrap();
9903        for (id, name) in [(1, "alice"), (2, "bob"), (3, "carol")] {
9904            e.execute_prepared(
9905                stmt.clone(),
9906                &[Value::Int(id), Value::Text(name.into())],
9907            )
9908            .unwrap();
9909        }
9910        // Read back via simple-query SELECT.
9911        let rows_result = e.execute("SELECT id, name FROM t").unwrap();
9912        let QueryResult::Rows { rows, .. } = rows_result else {
9913            panic!("expected Rows")
9914        };
9915        assert_eq!(rows.len(), 3);
9916    }
9917
9918    #[test]
9919    fn prepared_select_with_placeholder_filters_rows() {
9920        let mut e = Engine::new();
9921        e.execute("CREATE TABLE t (id INT NOT NULL, v INT NOT NULL)")
9922            .unwrap();
9923        for i in 0..10_i32 {
9924            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, {})", i * 7))
9925                .unwrap();
9926        }
9927        let stmt = e
9928            .prepare("SELECT id FROM t WHERE v = $1")
9929            .unwrap();
9930        let QueryResult::Rows { rows, .. } = e
9931            .execute_prepared(stmt, &[Value::Int(35)])
9932            .unwrap()
9933        else {
9934            panic!("expected Rows")
9935        };
9936        // v = 35 means i*7 = 35 → i = 5.
9937        assert_eq!(rows.len(), 1);
9938        assert_eq!(rows[0].values[0], Value::Int(5));
9939    }
9940
9941    #[test]
9942    fn prepared_too_few_params_errors() {
9943        let mut e = Engine::new();
9944        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
9945        let stmt = e.prepare("INSERT INTO t VALUES ($1)").unwrap();
9946        let err = e.execute_prepared(stmt, &[]).unwrap_err();
9947        assert!(
9948            matches!(
9949                &err,
9950                EngineError::Eval(EvalError::PlaceholderOutOfRange { n: 1, bound: 0 })
9951            ),
9952            "got: {err}"
9953        );
9954    }
9955
9956    #[test]
9957    fn insert_into_half_column_dim_mismatch_errors() {
9958        let mut e = Engine::new();
9959        e.execute("CREATE TABLE t (v VECTOR(4) USING HALF)")
9960            .unwrap();
9961        let err = e.execute("INSERT INTO t VALUES ([1.0, 2.0])").unwrap_err();
9962        assert!(matches!(
9963            &err,
9964            EngineError::Storage(StorageError::TypeMismatch { .. })
9965        ));
9966    }
9967
9968    #[test]
9969    fn insert_into_sq8_column_dim_mismatch_errors() {
9970        // Dim mismatch falls through the `coerce_value` Vector→Sq8
9971        // arm's guard and surfaces as `TypeMismatch` — the same
9972        // error the F32 path produces today, so client error
9973        // handling stays uniform across encodings.
9974        let mut e = Engine::new();
9975        e.execute("CREATE TABLE t (v VECTOR(4) USING SQ8)").unwrap();
9976        let err = e.execute("INSERT INTO t VALUES ([1.0, 2.0])").unwrap_err();
9977        assert!(
9978            matches!(
9979                &err,
9980                EngineError::Storage(StorageError::TypeMismatch { .. })
9981            ),
9982            "got: {err}",
9983        );
9984    }
9985
9986    #[test]
9987    fn create_table_duplicate_errors() {
9988        let mut e = Engine::new();
9989        e.execute("CREATE TABLE foo (a INT)").unwrap();
9990        let err = e.execute("CREATE TABLE foo (a INT)").unwrap_err();
9991        assert!(matches!(
9992            err,
9993            EngineError::Storage(StorageError::DuplicateTable { ref name }) if name == "foo"
9994        ));
9995    }
9996
9997    #[test]
9998    fn insert_into_unknown_table_errors() {
9999        let mut e = Engine::new();
10000        let err = e.execute("INSERT INTO ghost VALUES (1)").unwrap_err();
10001        assert!(matches!(
10002            err,
10003            EngineError::Storage(StorageError::TableNotFound { ref name }) if name == "ghost"
10004        ));
10005    }
10006
10007    #[test]
10008    fn insert_happy_path_reports_one_affected() {
10009        let mut e = Engine::new();
10010        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
10011        let r = e.execute("INSERT INTO foo VALUES (42)").unwrap();
10012        assert_eq!(unwrap_command_ok(&r), 1);
10013        assert_eq!(e.catalog().get("foo").unwrap().row_count(), 1);
10014    }
10015
10016    #[test]
10017    fn insert_arity_mismatch_propagates() {
10018        let mut e = Engine::new();
10019        e.execute("CREATE TABLE foo (a INT, b TEXT)").unwrap();
10020        let err = e.execute("INSERT INTO foo VALUES (1)").unwrap_err();
10021        assert!(matches!(
10022            err,
10023            EngineError::Storage(StorageError::ArityMismatch { .. })
10024        ));
10025    }
10026
10027    #[test]
10028    fn insert_negative_integer_via_unary_minus() {
10029        let mut e = Engine::new();
10030        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
10031        e.execute("INSERT INTO foo VALUES (-7)").unwrap();
10032        let rows = e.catalog().get("foo").unwrap().rows();
10033        assert_eq!(rows[0].values[0], Value::Int(-7));
10034    }
10035
10036    #[test]
10037    fn insert_non_literal_expr_unsupported() {
10038        let mut e = Engine::new();
10039        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
10040        let err = e.execute("INSERT INTO foo VALUES (1 + 2)").unwrap_err();
10041        assert!(matches!(err, EngineError::Unsupported(_)));
10042    }
10043
10044    #[test]
10045    fn select_star_returns_all_rows_in_insertion_order() {
10046        let mut e = Engine::new();
10047        e.execute("CREATE TABLE foo (a INT NOT NULL, b TEXT NOT NULL)")
10048            .unwrap();
10049        e.execute("INSERT INTO foo VALUES (1, 'one')").unwrap();
10050        e.execute("INSERT INTO foo VALUES (2, 'two')").unwrap();
10051        e.execute("INSERT INTO foo VALUES (3, 'three')").unwrap();
10052
10053        let r = e.execute("SELECT * FROM foo").unwrap();
10054        let QueryResult::Rows { columns, rows } = r else {
10055            panic!("expected Rows")
10056        };
10057        assert_eq!(columns.len(), 2);
10058        assert_eq!(columns[0].name, "a");
10059        assert_eq!(rows.len(), 3);
10060        assert_eq!(
10061            rows[1].values,
10062            vec![Value::Int(2), Value::Text("two".into())]
10063        );
10064    }
10065
10066    #[test]
10067    fn select_star_on_empty_table_returns_zero_rows() {
10068        let mut e = Engine::new();
10069        e.execute("CREATE TABLE foo (a INT)").unwrap();
10070        let r = e.execute("SELECT * FROM foo").unwrap();
10071        match r {
10072            QueryResult::Rows { rows, .. } => assert!(rows.is_empty()),
10073            QueryResult::CommandOk { .. } => panic!("expected Rows"),
10074        }
10075    }
10076
10077    // --- v0.4: WHERE + projection ------------------------------------------
10078
10079    fn make_three_row_users(e: &mut Engine) {
10080        e.execute("CREATE TABLE users (id INT NOT NULL, name TEXT NOT NULL, score INT)")
10081            .unwrap();
10082        e.execute("INSERT INTO users VALUES (1, 'alice', 90)")
10083            .unwrap();
10084        e.execute("INSERT INTO users VALUES (2, 'bob', NULL)")
10085            .unwrap();
10086        e.execute("INSERT INTO users VALUES (3, 'cara', 70)")
10087            .unwrap();
10088    }
10089
10090    fn unwrap_rows(r: QueryResult) -> (Vec<ColumnSchema>, Vec<Row>) {
10091        match r {
10092            QueryResult::Rows { columns, rows } => (columns, rows),
10093            QueryResult::CommandOk { .. } => panic!("expected Rows"),
10094        }
10095    }
10096
10097    #[test]
10098    fn where_filter_passes_only_true_rows() {
10099        let mut e = Engine::new();
10100        make_three_row_users(&mut e);
10101        let r = e.execute("SELECT * FROM users WHERE id > 1").unwrap();
10102        let (_, rows) = unwrap_rows(r);
10103        assert_eq!(rows.len(), 2);
10104        assert_eq!(rows[0].values[0], Value::Int(2));
10105        assert_eq!(rows[1].values[0], Value::Int(3));
10106    }
10107
10108    #[test]
10109    fn where_with_null_result_filters_out_row() {
10110        let mut e = Engine::new();
10111        make_three_row_users(&mut e);
10112        // score is NULL for bob → score > 80 is NULL → row excluded
10113        let r = e.execute("SELECT * FROM users WHERE score > 80").unwrap();
10114        let (_, rows) = unwrap_rows(r);
10115        assert_eq!(rows.len(), 1);
10116        assert_eq!(rows[0].values[1], Value::Text("alice".into()));
10117    }
10118
10119    #[test]
10120    fn projection_named_columns() {
10121        let mut e = Engine::new();
10122        make_three_row_users(&mut e);
10123        let r = e.execute("SELECT name, score FROM users").unwrap();
10124        let (cols, rows) = unwrap_rows(r);
10125        assert_eq!(cols.len(), 2);
10126        assert_eq!(cols[0].name, "name");
10127        assert_eq!(cols[1].name, "score");
10128        assert_eq!(rows.len(), 3);
10129        assert_eq!(
10130            rows[0].values,
10131            vec![Value::Text("alice".into()), Value::Int(90)]
10132        );
10133    }
10134
10135    #[test]
10136    fn projection_with_column_alias() {
10137        let mut e = Engine::new();
10138        make_three_row_users(&mut e);
10139        let r = e
10140            .execute("SELECT name AS who FROM users WHERE id = 1")
10141            .unwrap();
10142        let (cols, rows) = unwrap_rows(r);
10143        assert_eq!(cols[0].name, "who");
10144        assert_eq!(rows.len(), 1);
10145        assert_eq!(rows[0].values[0], Value::Text("alice".into()));
10146    }
10147
10148    #[test]
10149    fn qualified_column_with_table_alias_resolves() {
10150        let mut e = Engine::new();
10151        make_three_row_users(&mut e);
10152        let r = e
10153            .execute("SELECT u.id, u.name FROM users AS u WHERE u.id < 3")
10154            .unwrap();
10155        let (cols, rows) = unwrap_rows(r);
10156        assert_eq!(cols.len(), 2);
10157        assert_eq!(rows.len(), 2);
10158    }
10159
10160    #[test]
10161    fn qualified_column_with_wrong_alias_errors() {
10162        let mut e = Engine::new();
10163        make_three_row_users(&mut e);
10164        let err = e.execute("SELECT x.id FROM users AS u").unwrap_err();
10165        assert!(matches!(
10166            err,
10167            EngineError::Eval(EvalError::UnknownQualifier { ref qualifier }) if qualifier == "x"
10168        ));
10169    }
10170
10171    #[test]
10172    fn select_unknown_column_errors_in_projection() {
10173        let mut e = Engine::new();
10174        make_three_row_users(&mut e);
10175        let err = e.execute("SELECT ghost FROM users").unwrap_err();
10176        assert!(matches!(
10177            err,
10178            EngineError::Eval(EvalError::ColumnNotFound { ref name }) if name == "ghost"
10179        ));
10180    }
10181
10182    #[test]
10183    fn where_unknown_column_errors() {
10184        let mut e = Engine::new();
10185        make_three_row_users(&mut e);
10186        let err = e
10187            .execute("SELECT * FROM users WHERE ghost = 1")
10188            .unwrap_err();
10189        assert!(matches!(
10190            err,
10191            EngineError::Eval(EvalError::ColumnNotFound { .. })
10192        ));
10193    }
10194
10195    #[test]
10196    fn expression_projection_evaluates_and_renders() {
10197        // Compound expressions in the SELECT list are evaluated per row;
10198        // the output column is typed TEXT, name defaults to the expression.
10199        let mut e = Engine::new();
10200        e.execute("CREATE TABLE t (a INT NOT NULL)").unwrap();
10201        e.execute("INSERT INTO t VALUES (3)").unwrap();
10202        let (_, rows) = unwrap_rows(e.execute("SELECT 1 + 2 FROM t").unwrap());
10203        assert_eq!(rows.len(), 1);
10204        // The expression evaluates to integer 3; rendered as the cell value
10205        // (storage::Value::Int(3) since arithmetic kept ints).
10206        assert_eq!(rows[0].values[0], Value::Int(3));
10207    }
10208
10209    #[test]
10210    fn select_unknown_table_errors() {
10211        let mut e = Engine::new();
10212        let err = e.execute("SELECT * FROM ghost").unwrap_err();
10213        assert!(matches!(
10214            err,
10215            EngineError::Storage(StorageError::TableNotFound { .. })
10216        ));
10217    }
10218
10219    #[test]
10220    fn invalid_sql_returns_parse_error() {
10221        // v4.4: UPDATE is now real SQL, so use a true syntactic
10222        // garbage payload for the parse-error path.
10223        let mut e = Engine::new();
10224        let err = e.execute("THIS_IS_NOT_A_KEYWORD foo bar baz").unwrap_err();
10225        assert!(matches!(err, EngineError::Parse(_)));
10226    }
10227
10228    // --- v0.8 CREATE INDEX + index seek ------------------------------------
10229
10230    #[test]
10231    fn create_index_registers_on_table() {
10232        let mut e = Engine::new();
10233        make_three_row_users(&mut e);
10234        e.execute("CREATE INDEX by_name ON users (name)").unwrap();
10235        let t = e.catalog().get("users").unwrap();
10236        assert_eq!(t.indices().len(), 1);
10237        assert_eq!(t.indices()[0].name, "by_name");
10238    }
10239
10240    #[test]
10241    fn create_index_on_unknown_table_errors() {
10242        let mut e = Engine::new();
10243        let err = e.execute("CREATE INDEX i ON ghost (a)").unwrap_err();
10244        assert!(matches!(
10245            err,
10246            EngineError::Storage(StorageError::TableNotFound { .. })
10247        ));
10248    }
10249
10250    #[test]
10251    fn create_index_on_unknown_column_errors() {
10252        let mut e = Engine::new();
10253        make_three_row_users(&mut e);
10254        let err = e.execute("CREATE INDEX i ON users (ghost)").unwrap_err();
10255        assert!(matches!(
10256            err,
10257            EngineError::Storage(StorageError::ColumnNotFound { .. })
10258        ));
10259    }
10260
10261    #[test]
10262    fn select_eq_uses_index_returns_same_rows_as_scan() {
10263        // Build two engines: one with an index, one without. Same query →
10264        // same row set (index is a planner optimisation, not a semantic
10265        // change).
10266        let mut without = Engine::new();
10267        make_three_row_users(&mut without);
10268        let mut with = Engine::new();
10269        make_three_row_users(&mut with);
10270        with.execute("CREATE INDEX by_id ON users (id)").unwrap();
10271
10272        let q = "SELECT * FROM users WHERE id = 2";
10273        let (_, no_idx_rows) = unwrap_rows(without.execute(q).unwrap());
10274        let (_, idx_rows) = unwrap_rows(with.execute(q).unwrap());
10275        assert_eq!(no_idx_rows, idx_rows);
10276        assert_eq!(idx_rows.len(), 1);
10277    }
10278
10279    #[test]
10280    fn select_eq_with_no_matching_index_value_returns_empty() {
10281        let mut e = Engine::new();
10282        make_three_row_users(&mut e);
10283        e.execute("CREATE INDEX by_id ON users (id)").unwrap();
10284        let (_, rows) = unwrap_rows(e.execute("SELECT * FROM users WHERE id = 999").unwrap());
10285        assert_eq!(rows.len(), 0);
10286    }
10287
10288    // --- v0.9 transactions -------------------------------------------------
10289
10290    #[test]
10291    fn begin_sets_in_transaction_flag() {
10292        let mut e = Engine::new();
10293        assert!(!e.in_transaction());
10294        e.execute("BEGIN").unwrap();
10295        assert!(e.in_transaction());
10296    }
10297
10298    #[test]
10299    fn double_begin_errors() {
10300        let mut e = Engine::new();
10301        e.execute("BEGIN").unwrap();
10302        let err = e.execute("BEGIN").unwrap_err();
10303        assert_eq!(err, EngineError::TransactionAlreadyOpen);
10304    }
10305
10306    #[test]
10307    fn commit_without_begin_errors() {
10308        let mut e = Engine::new();
10309        let err = e.execute("COMMIT").unwrap_err();
10310        assert_eq!(err, EngineError::NoActiveTransaction);
10311    }
10312
10313    #[test]
10314    fn rollback_without_begin_errors() {
10315        let mut e = Engine::new();
10316        let err = e.execute("ROLLBACK").unwrap_err();
10317        assert_eq!(err, EngineError::NoActiveTransaction);
10318    }
10319
10320    #[test]
10321    fn commit_applies_shadow_to_committed_catalog() {
10322        let mut e = Engine::new();
10323        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
10324        e.execute("BEGIN").unwrap();
10325        e.execute("INSERT INTO t VALUES (1)").unwrap();
10326        e.execute("INSERT INTO t VALUES (2)").unwrap();
10327        e.execute("COMMIT").unwrap();
10328        assert!(!e.in_transaction());
10329        assert_eq!(e.catalog().get("t").unwrap().row_count(), 2);
10330    }
10331
10332    #[test]
10333    fn rollback_discards_shadow() {
10334        let mut e = Engine::new();
10335        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
10336        e.execute("BEGIN").unwrap();
10337        e.execute("INSERT INTO t VALUES (1)").unwrap();
10338        e.execute("INSERT INTO t VALUES (2)").unwrap();
10339        e.execute("ROLLBACK").unwrap();
10340        assert!(!e.in_transaction());
10341        assert_eq!(e.catalog().get("t").unwrap().row_count(), 0);
10342    }
10343
10344    #[test]
10345    fn select_during_tx_sees_uncommitted_writes_own_session() {
10346        // The shadow catalog is read by SELECTs while a TX is open — the
10347        // session can see its own pending writes.
10348        let mut e = Engine::new();
10349        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
10350        e.execute("BEGIN").unwrap();
10351        e.execute("INSERT INTO t VALUES (42)").unwrap();
10352        let (_, rows) = unwrap_rows(e.execute("SELECT * FROM t").unwrap());
10353        assert_eq!(rows.len(), 1);
10354        assert_eq!(rows[0].values[0], Value::Int(42));
10355    }
10356
10357    #[test]
10358    fn snapshot_with_no_users_is_bare_catalog_format() {
10359        let mut e = Engine::new();
10360        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
10361        let bytes = e.snapshot();
10362        assert_eq!(
10363            &bytes[..8],
10364            b"SPGDB001",
10365            "must be the bare v3.x catalog magic"
10366        );
10367        let e2 = Engine::restore_envelope(&bytes).unwrap();
10368        assert!(e2.users().is_empty());
10369        assert_eq!(e2.catalog().table_count(), 1);
10370    }
10371
10372    #[test]
10373    fn snapshot_with_users_round_trips_both_via_envelope() {
10374        let mut e = Engine::new();
10375        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
10376        e.create_user("alice", "pw1", Role::Admin, [9; 16]).unwrap();
10377        e.create_user("bob", "pw2", Role::ReadOnly, [5; 16])
10378            .unwrap();
10379        let bytes = e.snapshot();
10380        assert_eq!(&bytes[..8], b"SPGENV01", "must be the v4.1 envelope magic");
10381        let e2 = Engine::restore_envelope(&bytes).unwrap();
10382        assert_eq!(e2.users().len(), 2);
10383        assert_eq!(e2.verify_user("alice", "pw1"), Some(Role::Admin));
10384        assert_eq!(e2.verify_user("bob", "pw2"), Some(Role::ReadOnly));
10385        assert_eq!(e2.verify_user("alice", "wrong"), None);
10386        assert_eq!(e2.catalog().table_count(), 1);
10387    }
10388
10389    #[test]
10390    fn ddl_inside_tx_also_rolled_back() {
10391        let mut e = Engine::new();
10392        e.execute("BEGIN").unwrap();
10393        e.execute("CREATE TABLE t (v INT)").unwrap();
10394        // Visible inside the TX.
10395        e.execute("SELECT * FROM t").unwrap();
10396        e.execute("ROLLBACK").unwrap();
10397        // Gone after rollback.
10398        let err = e.execute("SELECT * FROM t").unwrap_err();
10399        assert!(matches!(
10400            err,
10401            EngineError::Storage(StorageError::TableNotFound { .. })
10402        ));
10403    }
10404
10405    // ── v6.1.2: CREATE / DROP PUBLICATION (engine-side) ──────
10406
10407    #[test]
10408    fn create_publication_lands_in_catalog() {
10409        let mut e = Engine::new();
10410        assert!(e.publications().is_empty());
10411        e.execute("CREATE PUBLICATION pub_a").unwrap();
10412        assert_eq!(e.publications().len(), 1);
10413        assert!(e.publications().contains("pub_a"));
10414    }
10415
10416    #[test]
10417    fn create_publication_duplicate_errors() {
10418        let mut e = Engine::new();
10419        e.execute("CREATE PUBLICATION pub_a").unwrap();
10420        let err = e.execute("CREATE PUBLICATION pub_a").unwrap_err();
10421        assert!(
10422            alloc::format!("{err:?}").contains("DuplicateName"),
10423            "got {err:?}"
10424        );
10425    }
10426
10427    #[test]
10428    fn drop_publication_silent_when_absent() {
10429        let mut e = Engine::new();
10430        // PG-compatible: DROP a publication that doesn't exist
10431        // succeeds (no-op) but reports zero affected.
10432        let r = e.execute("DROP PUBLICATION nope").unwrap();
10433        match r {
10434            QueryResult::CommandOk { affected, .. } => assert_eq!(affected, 0),
10435            other => panic!("expected CommandOk, got {other:?}"),
10436        }
10437    }
10438
10439    #[test]
10440    fn drop_publication_present_reports_one_affected() {
10441        let mut e = Engine::new();
10442        e.execute("CREATE PUBLICATION pub_a").unwrap();
10443        let r = e.execute("DROP PUBLICATION pub_a").unwrap();
10444        match r {
10445            QueryResult::CommandOk {
10446                affected,
10447                modified_catalog,
10448            } => {
10449                assert_eq!(affected, 1);
10450                assert!(modified_catalog);
10451            }
10452            other => panic!("expected CommandOk, got {other:?}"),
10453        }
10454        assert!(e.publications().is_empty());
10455    }
10456
10457    #[test]
10458    fn publications_persist_across_snapshot_restore() {
10459        // The persist-across-restart ship-gate at the engine layer —
10460        // snapshot → restore_envelope round trip must preserve the
10461        // publication catalog. The spg-server e2e covers the
10462        // process-restart variant.
10463        let mut e = Engine::new();
10464        e.execute("CREATE PUBLICATION pub_a").unwrap();
10465        e.execute("CREATE PUBLICATION pub_b FOR ALL TABLES").unwrap();
10466        let snap = e.snapshot();
10467        let e2 = Engine::restore_envelope(&snap).unwrap();
10468        assert_eq!(e2.publications().len(), 2);
10469        assert!(e2.publications().contains("pub_a"));
10470        assert!(e2.publications().contains("pub_b"));
10471    }
10472
10473    #[test]
10474    fn create_publication_allowed_inside_transaction() {
10475        // v6.1.4 dropped the v6.1.2 in-TX guard — PG allows
10476        // CREATE PUBLICATION inside a TX and the auto-commit
10477        // wrap path needs the same allowance.
10478        let mut e = Engine::new();
10479        e.execute("BEGIN").unwrap();
10480        e.execute("CREATE PUBLICATION pub_a").unwrap();
10481        e.execute("COMMIT").unwrap();
10482        assert!(e.publications().contains("pub_a"));
10483    }
10484
10485    // ── v6.1.3: SHOW PUBLICATIONS + FOR-list variants ───────
10486
10487    #[test]
10488    fn create_publication_for_table_list_lands_with_scope() {
10489        let mut e = Engine::new();
10490        e.execute("CREATE TABLE t1 (id INT NOT NULL)").unwrap();
10491        e.execute("CREATE TABLE t2 (id INT NOT NULL)").unwrap();
10492        e.execute("CREATE PUBLICATION pub_a FOR TABLE t1, t2")
10493            .unwrap();
10494        let scope = e.publications().get("pub_a").cloned();
10495        let Some(spg_sql::ast::PublicationScope::ForTables(ts)) = scope else {
10496            panic!("expected ForTables scope, got {scope:?}")
10497        };
10498        assert_eq!(ts, alloc::vec!["t1".to_string(), "t2".to_string()]);
10499    }
10500
10501    #[test]
10502    fn create_publication_all_tables_except_lands_with_scope() {
10503        let mut e = Engine::new();
10504        e.execute("CREATE PUBLICATION pub_a FOR ALL TABLES EXCEPT t3")
10505            .unwrap();
10506        let scope = e.publications().get("pub_a").cloned();
10507        let Some(spg_sql::ast::PublicationScope::AllTablesExcept(ts)) = scope else {
10508            panic!("expected AllTablesExcept scope, got {scope:?}")
10509        };
10510        assert_eq!(ts, alloc::vec!["t3".to_string()]);
10511    }
10512
10513    #[test]
10514    fn show_publications_empty_returns_zero_rows() {
10515        let e = Engine::new();
10516        let r = e.execute_readonly("SHOW PUBLICATIONS").unwrap();
10517        let QueryResult::Rows { rows, columns } = r else {
10518            panic!()
10519        };
10520        assert!(rows.is_empty());
10521        assert_eq!(columns.len(), 3);
10522        assert_eq!(columns[0].name, "name");
10523        assert_eq!(columns[1].name, "scope");
10524        assert_eq!(columns[2].name, "table_count");
10525    }
10526
10527    #[test]
10528    fn show_publications_returns_one_row_per_publication_ordered_by_name() {
10529        let mut e = Engine::new();
10530        e.execute("CREATE PUBLICATION z_pub").unwrap();
10531        e.execute("CREATE PUBLICATION a_pub FOR TABLE t1, t2")
10532            .unwrap();
10533        e.execute("CREATE PUBLICATION m_pub FOR ALL TABLES EXCEPT bad")
10534            .unwrap();
10535        let r = e.execute_readonly("SHOW PUBLICATIONS").unwrap();
10536        let QueryResult::Rows { rows, .. } = r else {
10537            panic!()
10538        };
10539        assert_eq!(rows.len(), 3);
10540        // Alphabetical order: a_pub, m_pub, z_pub.
10541        let names: Vec<&str> = rows
10542            .iter()
10543            .map(|r| {
10544                if let Value::Text(s) = &r.values[0] {
10545                    s.as_str()
10546                } else {
10547                    panic!()
10548                }
10549            })
10550            .collect();
10551        assert_eq!(names, alloc::vec!["a_pub", "m_pub", "z_pub"]);
10552        // Row 0 — a_pub scope summary + table_count = 2.
10553        match &rows[0].values[1] {
10554            Value::Text(s) => assert_eq!(s, "FOR TABLE t1, t2"),
10555            other => panic!("expected Text, got {other:?}"),
10556        }
10557        assert_eq!(rows[0].values[2], Value::Int(2));
10558        // Row 1 — m_pub.
10559        match &rows[1].values[1] {
10560            Value::Text(s) => assert_eq!(s, "FOR ALL TABLES EXCEPT bad"),
10561            other => panic!("expected Text, got {other:?}"),
10562        }
10563        assert_eq!(rows[1].values[2], Value::Int(1));
10564        // Row 2 — z_pub (AllTables → NULL count).
10565        match &rows[2].values[1] {
10566            Value::Text(s) => assert_eq!(s, "FOR ALL TABLES"),
10567            other => panic!("expected Text, got {other:?}"),
10568        }
10569        assert_eq!(rows[2].values[2], Value::Null);
10570    }
10571
10572    #[test]
10573    fn for_list_scopes_persist_across_snapshot() {
10574        // The v6.1.2 envelope-v3 round-trip exercised AllTables;
10575        // v6.1.3 needs the scope-1 / scope-2 tags to survive too.
10576        let mut e = Engine::new();
10577        e.execute("CREATE PUBLICATION p1 FOR TABLE t1, t2").unwrap();
10578        e.execute("CREATE PUBLICATION p2 FOR ALL TABLES EXCEPT bad, worse")
10579            .unwrap();
10580        let snap = e.snapshot();
10581        let e2 = Engine::restore_envelope(&snap).unwrap();
10582        assert_eq!(e2.publications().len(), 2);
10583        let p1 = e2.publications().get("p1").cloned();
10584        let Some(spg_sql::ast::PublicationScope::ForTables(ts)) = p1 else {
10585            panic!("p1 scope lost: {p1:?}")
10586        };
10587        assert_eq!(ts, alloc::vec!["t1".to_string(), "t2".to_string()]);
10588        let p2 = e2.publications().get("p2").cloned();
10589        let Some(spg_sql::ast::PublicationScope::AllTablesExcept(ts)) = p2 else {
10590            panic!("p2 scope lost: {p2:?}")
10591        };
10592        assert_eq!(ts, alloc::vec!["bad".to_string(), "worse".to_string()]);
10593    }
10594
10595    // ── v6.1.4: CREATE / DROP SUBSCRIPTION + SHOW + envelope v4 ─
10596
10597    #[test]
10598    fn create_subscription_lands_in_catalog_with_defaults() {
10599        let mut e = Engine::new();
10600        e.execute(
10601            "CREATE SUBSCRIPTION sub_a CONNECTION 'host=127.0.0.1 port=20002' PUBLICATION pub_a",
10602        )
10603        .unwrap();
10604        let s = e.subscriptions().get("sub_a").cloned().expect("present");
10605        assert_eq!(s.conn_str, "host=127.0.0.1 port=20002");
10606        assert_eq!(s.publications, alloc::vec!["pub_a".to_string()]);
10607        assert!(s.enabled);
10608        assert_eq!(s.last_received_pos, 0);
10609    }
10610
10611    #[test]
10612    fn create_subscription_duplicate_name_errors() {
10613        let mut e = Engine::new();
10614        e.execute("CREATE SUBSCRIPTION s CONNECTION 'host=x' PUBLICATION p")
10615            .unwrap();
10616        let err = e
10617            .execute("CREATE SUBSCRIPTION s CONNECTION 'host=y' PUBLICATION p")
10618            .unwrap_err();
10619        assert!(
10620            alloc::format!("{err:?}").contains("DuplicateName"),
10621            "got {err:?}"
10622        );
10623    }
10624
10625    #[test]
10626    fn drop_subscription_silent_when_absent() {
10627        let mut e = Engine::new();
10628        let r = e.execute("DROP SUBSCRIPTION never").unwrap();
10629        match r {
10630            QueryResult::CommandOk { affected, .. } => assert_eq!(affected, 0),
10631            other => panic!("expected CommandOk, got {other:?}"),
10632        }
10633    }
10634
10635    #[test]
10636    fn subscription_advance_updates_last_pos_monotone() {
10637        let mut e = Engine::new();
10638        e.execute("CREATE SUBSCRIPTION s CONNECTION 'h=x' PUBLICATION p")
10639            .unwrap();
10640        assert!(e.subscription_advance("s", 100));
10641        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 100);
10642        assert!(e.subscription_advance("s", 50)); // stale → ignored
10643        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 100);
10644        assert!(e.subscription_advance("s", 200));
10645        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 200);
10646        assert!(!e.subscription_advance("missing", 1));
10647    }
10648
10649    #[test]
10650    fn show_subscriptions_returns_rows_ordered_by_name() {
10651        let mut e = Engine::new();
10652        e.execute("CREATE SUBSCRIPTION z_sub CONNECTION 'h=x' PUBLICATION p1, p2")
10653            .unwrap();
10654        e.execute("CREATE SUBSCRIPTION a_sub CONNECTION 'h=y' PUBLICATION p3")
10655            .unwrap();
10656        let r = e.execute_readonly("SHOW SUBSCRIPTIONS").unwrap();
10657        let QueryResult::Rows { rows, columns } = r else {
10658            panic!()
10659        };
10660        assert_eq!(rows.len(), 2);
10661        assert_eq!(columns.len(), 5);
10662        assert_eq!(columns[0].name, "name");
10663        assert_eq!(columns[4].name, "last_received_pos");
10664        // Alphabetical: a_sub, z_sub.
10665        let names: Vec<&str> = rows
10666            .iter()
10667            .map(|r| {
10668                if let Value::Text(s) = &r.values[0] {
10669                    s.as_str()
10670                } else {
10671                    panic!()
10672                }
10673            })
10674            .collect();
10675        assert_eq!(names, alloc::vec!["a_sub", "z_sub"]);
10676        // Row 0: a_sub
10677        assert_eq!(rows[0].values[1], Value::Text("h=y".to_string()));
10678        assert_eq!(rows[0].values[2], Value::Text("p3".to_string()));
10679        assert_eq!(rows[0].values[3], Value::Bool(true));
10680        assert_eq!(rows[0].values[4], Value::BigInt(0));
10681        // Row 1: z_sub — publications join with ", "
10682        assert_eq!(rows[1].values[2], Value::Text("p1, p2".to_string()));
10683    }
10684
10685    #[test]
10686    fn subscriptions_persist_across_snapshot_envelope_v4() {
10687        let mut e = Engine::new();
10688        e.execute("CREATE SUBSCRIPTION s1 CONNECTION 'h=A' PUBLICATION p1, p2")
10689            .unwrap();
10690        e.execute("CREATE SUBSCRIPTION s2 CONNECTION 'h=B' PUBLICATION p3")
10691            .unwrap();
10692        e.subscription_advance("s2", 42);
10693        let snap = e.snapshot();
10694        let e2 = Engine::restore_envelope(&snap).unwrap();
10695        assert_eq!(e2.subscriptions().len(), 2);
10696        let s1 = e2.subscriptions().get("s1").unwrap();
10697        assert_eq!(s1.conn_str, "h=A");
10698        assert_eq!(s1.publications, alloc::vec!["p1".to_string(), "p2".to_string()]);
10699        assert_eq!(s1.last_received_pos, 0);
10700        let s2 = e2.subscriptions().get("s2").unwrap();
10701        assert_eq!(s2.last_received_pos, 42);
10702    }
10703
10704    #[test]
10705    fn v3_envelope_loads_with_empty_subscriptions() {
10706        // v3 snapshot (publications-only). Forge it by hand so we
10707        // verify v6.1.4 readers don't panic — they must surface
10708        // empty subscriptions and a populated publication table.
10709        let mut e = Engine::new();
10710        e.execute("CREATE PUBLICATION pub_legacy").unwrap();
10711        let catalog = e.catalog.serialize();
10712        let users = crate::users::serialize_users(&e.users);
10713        let pubs = e.publications.serialize();
10714        let mut buf = Vec::new();
10715        buf.extend_from_slice(b"SPGENV01");
10716        buf.push(3u8); // v3
10717        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
10718        buf.extend_from_slice(&catalog);
10719        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
10720        buf.extend_from_slice(&users);
10721        buf.extend_from_slice(&u32::try_from(pubs.len()).unwrap().to_le_bytes());
10722        buf.extend_from_slice(&pubs);
10723        let crc = spg_crypto::crc32::crc32(&buf);
10724        buf.extend_from_slice(&crc.to_le_bytes());
10725
10726        let e2 = Engine::restore_envelope(&buf).expect("v3 envelope restores under v4 reader");
10727        assert!(e2.subscriptions().is_empty());
10728        assert!(e2.publications().contains("pub_legacy"));
10729    }
10730
10731    #[test]
10732    fn create_subscription_allowed_inside_transaction() {
10733        let mut e = Engine::new();
10734        e.execute("BEGIN").unwrap();
10735        e.execute("CREATE SUBSCRIPTION s CONNECTION 'h=x' PUBLICATION p")
10736            .unwrap();
10737        e.execute("COMMIT").unwrap();
10738        assert!(e.subscriptions().contains("s"));
10739    }
10740
10741    #[test]
10742    // ── v6.2.0: ANALYZE + spg_statistic + envelope v5 ──────────
10743
10744    #[test]
10745    fn analyze_populates_histogram_bounds() {
10746        let mut e = Engine::new();
10747        e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT)").unwrap();
10748        for i in 0..50 {
10749            e.execute(&alloc::format!(
10750                "INSERT INTO t VALUES ({i}, 'name{i}')"
10751            ))
10752            .unwrap();
10753        }
10754        e.execute("ANALYZE t").unwrap();
10755        let stats = e.statistics();
10756        let id_stats = stats.get("t", "id").unwrap();
10757        assert!(id_stats.histogram_bounds.len() >= 2);
10758        assert_eq!(id_stats.histogram_bounds.first().unwrap(), "0");
10759        assert_eq!(id_stats.histogram_bounds.last().unwrap(), "49");
10760        assert!((id_stats.null_frac - 0.0).abs() < 1e-6);
10761        assert_eq!(id_stats.n_distinct, 50);
10762    }
10763
10764    #[test]
10765    fn reanalyze_overwrites_prior_stats() {
10766        let mut e = Engine::new();
10767        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
10768        for i in 0..10 {
10769            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})")).unwrap();
10770        }
10771        e.execute("ANALYZE t").unwrap();
10772        let n1 = e.statistics().get("t", "id").unwrap().n_distinct;
10773        assert_eq!(n1, 10);
10774        for i in 10..30 {
10775            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})")).unwrap();
10776        }
10777        e.execute("ANALYZE t").unwrap();
10778        let n2 = e.statistics().get("t", "id").unwrap().n_distinct;
10779        assert_eq!(n2, 30);
10780    }
10781
10782    #[test]
10783    fn analyze_unknown_table_errors() {
10784        let mut e = Engine::new();
10785        let err = e.execute("ANALYZE nonexistent").unwrap_err();
10786        assert!(matches!(err, EngineError::Storage(StorageError::TableNotFound { .. })));
10787    }
10788
10789    #[test]
10790    fn bare_analyze_covers_all_user_tables() {
10791        let mut e = Engine::new();
10792        e.execute("CREATE TABLE t1 (id INT NOT NULL)").unwrap();
10793        e.execute("CREATE TABLE t2 (name TEXT NOT NULL)").unwrap();
10794        e.execute("INSERT INTO t1 VALUES (1)").unwrap();
10795        e.execute("INSERT INTO t2 VALUES ('alice')").unwrap();
10796        let r = e.execute("ANALYZE").unwrap();
10797        match r {
10798            QueryResult::CommandOk { affected, modified_catalog } => {
10799                assert_eq!(affected, 2);
10800                assert!(modified_catalog);
10801            }
10802            other => panic!("expected CommandOk, got {other:?}"),
10803        }
10804        assert!(e.statistics().get("t1", "id").is_some());
10805        assert!(e.statistics().get("t2", "name").is_some());
10806    }
10807
10808    #[test]
10809    fn select_from_spg_statistic_returns_rows_per_column() {
10810        let mut e = Engine::new();
10811        e.execute("CREATE TABLE t (id INT NOT NULL, label TEXT)")
10812            .unwrap();
10813        e.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
10814        e.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
10815        e.execute("ANALYZE t").unwrap();
10816        let r = e.execute_readonly("SELECT * FROM spg_statistic").unwrap();
10817        let QueryResult::Rows { rows, columns } = r else {
10818            panic!()
10819        };
10820        // v6.7.0 — spg_statistic gained a `cold_row_count` column.
10821        assert_eq!(columns.len(), 6);
10822        assert_eq!(columns[0].name, "table_name");
10823        assert_eq!(columns[4].name, "histogram_bounds");
10824        assert_eq!(columns[5].name, "cold_row_count");
10825        assert_eq!(rows.len(), 2, "one row per column of t");
10826        // Sorted by (table_name, column_name).
10827        match (&rows[0].values[0], &rows[0].values[1]) {
10828            (Value::Text(t), Value::Text(c)) => {
10829                assert_eq!(t, "t");
10830                // BTreeMap orders (table, column); columns "id" < "label".
10831                assert_eq!(c, "id");
10832            }
10833            _ => panic!(),
10834        }
10835    }
10836
10837    #[test]
10838    fn analyze_skips_vector_columns() {
10839        // Vector columns have their own stats shape (HNSW graph);
10840        // ANALYZE leaves them out of spg_statistic.
10841        let mut e = Engine::new();
10842        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(3) NOT NULL)")
10843            .unwrap();
10844        e.execute("INSERT INTO t VALUES (1, [1, 2, 3])").unwrap();
10845        e.execute("ANALYZE t").unwrap();
10846        assert!(e.statistics().get("t", "id").is_some());
10847        assert!(e.statistics().get("t", "v").is_none());
10848    }
10849
10850    #[test]
10851    fn statistics_persist_across_envelope_v5_round_trip() {
10852        let mut e = Engine::new();
10853        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
10854        for i in 0..20 {
10855            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})")).unwrap();
10856        }
10857        e.execute("ANALYZE").unwrap();
10858        let snap = e.snapshot();
10859        let e2 = Engine::restore_envelope(&snap).unwrap();
10860        let s = e2.statistics().get("t", "id").unwrap();
10861        assert_eq!(s.n_distinct, 20);
10862    }
10863
10864    // ── v6.2.1 auto-analyze threshold ───────────────────────────
10865
10866    #[test]
10867    fn auto_analyze_threshold_fires_after_10pct_of_min_rows_on_small_table() {
10868        // For a table with 0 rows then 10 inserts → modified=10,
10869        // row_count=10. Threshold = 0.1 × max(10, 100) = 10. So
10870        // after the 10th INSERT the threshold is met.
10871        let mut e = Engine::new();
10872        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
10873        for i in 0..9 {
10874            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})")).unwrap();
10875        }
10876        assert!(e.tables_needing_analyze().is_empty(), "9 < threshold");
10877        e.execute("INSERT INTO t VALUES (9)").unwrap();
10878        let needs = e.tables_needing_analyze();
10879        assert_eq!(needs, alloc::vec!["t".to_string()]);
10880    }
10881
10882    #[test]
10883    fn auto_analyze_threshold_uses_10pct_of_row_count_for_large_tables() {
10884        // After ANALYZE on 1000 rows, threshold = 0.1 × row_count.
10885        // Each new INSERT bumps both modified and row_count, so to
10886        // trigger from N=1000 we need modifications ≥ 0.1 × (1000+M),
10887        // i.e. M ≥ 112. The test inserts 50 (no fire), then 150
10888        // more (200 total mods, row_count=1200, threshold=120 → fire).
10889        let mut e = Engine::new();
10890        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
10891        for i in 0..1000 {
10892            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})")).unwrap();
10893        }
10894        e.execute("ANALYZE t").unwrap();
10895        assert!(e.tables_needing_analyze().is_empty(), "fresh ANALYZE");
10896        for i in 1000..1050 {
10897            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})")).unwrap();
10898        }
10899        assert!(
10900            e.tables_needing_analyze().is_empty(),
10901            "50 inserts < threshold of ~105"
10902        );
10903        for i in 1050..1200 {
10904            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})")).unwrap();
10905        }
10906        assert_eq!(
10907            e.tables_needing_analyze(),
10908            alloc::vec!["t".to_string()],
10909            "200 inserts > 0.1 × 1200 threshold"
10910        );
10911    }
10912
10913    #[test]
10914    fn auto_analyze_threshold_resets_after_analyze() {
10915        let mut e = Engine::new();
10916        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
10917        for i in 0..200 {
10918            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})")).unwrap();
10919        }
10920        assert!(!e.tables_needing_analyze().is_empty());
10921        e.execute("ANALYZE").unwrap();
10922        assert!(
10923            e.tables_needing_analyze().is_empty(),
10924            "ANALYZE must reset the counter"
10925        );
10926    }
10927
10928    #[test]
10929    fn auto_analyze_threshold_tracks_updates_and_deletes() {
10930        let mut e = Engine::new();
10931        e.execute("CREATE TABLE t (id INT NOT NULL, label TEXT)").unwrap();
10932        for i in 0..50 {
10933            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, 'x')"))
10934                .unwrap();
10935        }
10936        e.execute("ANALYZE t").unwrap();
10937        // UPDATE 20 rows + DELETE 5 → modified=25. Threshold = 0.1
10938        // × max(50, 100) = 10. So 25 >= 10 → trigger.
10939        e.execute("UPDATE t SET label = 'y' WHERE id < 20").unwrap();
10940        e.execute("DELETE FROM t WHERE id >= 45").unwrap();
10941        assert_eq!(
10942            e.tables_needing_analyze(),
10943            alloc::vec!["t".to_string()]
10944        );
10945    }
10946
10947    #[test]
10948    fn v4_envelope_loads_with_empty_statistics() {
10949        // Forge a v4 envelope by hand: catalog + users + pubs +
10950        // subs trailer, no statistics. A v6.2.0 reader must accept
10951        // it and surface an empty Statistics.
10952        let mut e = Engine::new();
10953        e.create_user("alice", "secret", crate::users::Role::ReadOnly, [0u8; 16])
10954            .unwrap();
10955        let catalog = e.catalog.serialize();
10956        let users = crate::users::serialize_users(&e.users);
10957        let pubs = e.publications.serialize();
10958        let subs = e.subscriptions.serialize();
10959        let mut buf = Vec::new();
10960        buf.extend_from_slice(b"SPGENV01");
10961        buf.push(4u8);
10962        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
10963        buf.extend_from_slice(&catalog);
10964        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
10965        buf.extend_from_slice(&users);
10966        buf.extend_from_slice(&u32::try_from(pubs.len()).unwrap().to_le_bytes());
10967        buf.extend_from_slice(&pubs);
10968        buf.extend_from_slice(&u32::try_from(subs.len()).unwrap().to_le_bytes());
10969        buf.extend_from_slice(&subs);
10970        let crc = spg_crypto::crc32::crc32(&buf);
10971        buf.extend_from_slice(&crc.to_le_bytes());
10972        let e2 = Engine::restore_envelope(&buf).expect("v4 envelope restores");
10973        assert!(e2.statistics().is_empty());
10974    }
10975
10976    #[test]
10977    fn v1_v2_envelope_loads_with_empty_publications() {
10978        // A snapshot taken before v6.1.2 (no publication trailer,
10979        // envelope v2) must still deserialise — and the resulting
10980        // engine must report zero publications. Use the engine's own
10981        // round-trip with no publications: that emits v3 but with an
10982        // empty pubs block. Then forge a v2 envelope by hand to lock
10983        // the back-compat path.
10984        let mut e = Engine::new();
10985        // Force users to be non-empty so the snapshot takes the
10986        // envelope path rather than the bare-catalog fallback.
10987        e.create_user(
10988            "alice",
10989            "secret",
10990            crate::users::Role::ReadOnly,
10991            [0u8; 16],
10992        )
10993        .unwrap();
10994
10995        // Forge an envelope v2: same shape as v3 but no pubs trailer.
10996        let catalog = e.catalog.serialize();
10997        let users = crate::users::serialize_users(&e.users);
10998        let mut buf = Vec::new();
10999        buf.extend_from_slice(b"SPGENV01");
11000        buf.push(2u8); // v2
11001        buf.extend_from_slice(
11002            &u32::try_from(catalog.len()).unwrap().to_le_bytes(),
11003        );
11004        buf.extend_from_slice(&catalog);
11005        buf.extend_from_slice(
11006            &u32::try_from(users.len()).unwrap().to_le_bytes(),
11007        );
11008        buf.extend_from_slice(&users);
11009        let crc = spg_crypto::crc32::crc32(&buf);
11010        buf.extend_from_slice(&crc.to_le_bytes());
11011
11012        let e2 = Engine::restore_envelope(&buf).expect("v2 envelope restores");
11013        assert!(e2.publications().is_empty());
11014    }
11015}