Skip to main content

spg_engine/
lib.rs

1//! SPG execution engine — v0.3 wires the SQL front-end to the in-memory
2//! storage layer. Implements `CREATE TABLE`, single-row `INSERT VALUES`, and
3//! `SELECT * FROM <table>` (no WHERE yet — that lands in v0.4 alongside
4//! expression evaluation against rows).
5#![no_std]
6
7extern crate alloc;
8
9pub mod aggregate;
10pub mod describe;
11pub mod eval;
12pub mod fts;
13pub mod json;
14pub mod memoize;
15pub mod plan_cache;
16pub mod publications;
17pub mod query_stats;
18pub mod reorder;
19pub mod selectivity;
20pub mod statistics;
21pub mod subscriptions;
22pub mod triggers;
23pub mod users;
24
25pub use crate::users::{Role, ScramSecrets, UserError, UserStore};
26
27use alloc::borrow::Cow;
28use alloc::boxed::Box;
29use alloc::collections::BTreeMap;
30use alloc::string::{String, ToString};
31use alloc::vec::Vec;
32use core::fmt;
33
34use spg_sql::ast::{
35    BinOp, ColumnDef, ColumnName, ColumnTypeName, CreateIndexStatement, CreatePublicationStatement,
36    CreateSubscriptionStatement, CreateTableStatement, CreateUserStatement, Expr, FrameBound,
37    FrameKind, FromClause, IndexMethod, InsertStatement, JoinKind, Literal, OrderBy, SelectItem,
38    SelectStatement, Statement, TableRef, UnOp, UnionKind, VecEncoding as SqlVecEncoding,
39    WindowFrame,
40};
41use spg_sql::parser::{self, ParseError};
42use spg_storage::{
43    Catalog, ColumnSchema, CompactReport, DataType, IndexKey, IndexKind, Row, StorageError, Table,
44    TableSchema, Value, VecEncoding,
45};
46
47use crate::eval::{EvalContext, EvalError};
48
49/// Result of executing one statement.
50#[derive(Debug, Clone, PartialEq)]
51#[non_exhaustive]
52pub enum QueryResult {
53    /// DDL or DML succeeded.
54    ///
55    /// `affected` is the row count for `INSERT` and 0 elsewhere.
56    /// `modified_catalog` tells the server whether this statement
57    /// caused the *committed* catalog to change — it's the signal to
58    /// snapshot/audit. False for `BEGIN`/`ROLLBACK`, false for writeful
59    /// statements executed inside a transaction (those only touch the
60    /// shadow), and true for `COMMIT` and for writes outside a TX.
61    CommandOk {
62        affected: usize,
63        modified_catalog: bool,
64    },
65    /// `SELECT` returned a (possibly empty) row set.
66    Rows {
67        columns: Vec<ColumnSchema>,
68        rows: Vec<Row>,
69    },
70}
71
72/// All errors the engine can return.
73///
74/// Marked `#[non_exhaustive]` from v7.5.0 onward: external `match`
75/// must include a `_` arm so new variants in subsequent v7.x releases
76/// are not breaking changes.
77#[derive(Debug, Clone, PartialEq)]
78#[non_exhaustive]
79pub enum EngineError {
80    Parse(ParseError),
81    Storage(StorageError),
82    Eval(EvalError),
83    /// Front-end accepted a construct that the v0.x executor doesn't support.
84    Unsupported(String),
85    /// `BEGIN` while another transaction is already open.
86    TransactionAlreadyOpen,
87    /// `COMMIT` / `ROLLBACK` with no active transaction.
88    NoActiveTransaction,
89    /// v4.0 sentinel: `execute_readonly` got a statement that
90    /// mutates engine state (INSERT / CREATE / BEGIN / COMMIT / …).
91    /// The caller should retake the write lock and dispatch through
92    /// `execute(&mut self)` instead.
93    WriteRequired,
94    /// v4.2: a SELECT would have returned more rows than the
95    /// configured `max_query_rows` cap. Carries the cap.
96    RowLimitExceeded(usize),
97    /// v4.5: cooperative cancellation — the host (server's
98    /// per-query watchdog) set the cancel flag while a long-running
99    /// SELECT / UPDATE / DELETE was scanning rows. The partial work
100    /// is discarded; the caller should surface this as a timeout
101    /// to the client.
102    Cancelled,
103}
104
105impl fmt::Display for EngineError {
106    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
107        match self {
108            Self::Parse(e) => write!(f, "parse: {e}"),
109            Self::Storage(e) => write!(f, "storage: {e}"),
110            Self::Eval(e) => write!(f, "eval: {e}"),
111            Self::Unsupported(s) => write!(f, "unsupported: {s}"),
112            Self::TransactionAlreadyOpen => f.write_str("a transaction is already open"),
113            Self::NoActiveTransaction => f.write_str("no active transaction"),
114            Self::WriteRequired => {
115                f.write_str("statement requires a write lock (use execute, not execute_readonly)")
116            }
117            Self::RowLimitExceeded(n) => {
118                write!(f, "query exceeded max_query_rows={n}")
119            }
120            Self::Cancelled => f.write_str("query cancelled (timeout or client request)"),
121        }
122    }
123}
124
125impl From<ParseError> for EngineError {
126    fn from(e: ParseError) -> Self {
127        Self::Parse(e)
128    }
129}
130impl From<StorageError> for EngineError {
131    fn from(e: StorageError) -> Self {
132        Self::Storage(e)
133    }
134}
135impl From<EvalError> for EngineError {
136    fn from(e: EvalError) -> Self {
137        Self::Eval(e)
138    }
139}
140
141/// The execution engine. Holds the catalog and (later) other server-scope
142/// state. `Engine::new()` is intentionally cheap so callers can construct one
143/// per database, per test.
144/// Function pointer that returns "now" as microseconds since Unix
145/// epoch. The engine is `no_std`, so it can't reach for `std::time`
146/// itself — callers (`spg-server`, the sqllogictest runner) inject a
147/// concrete implementation. `None` means `NOW()` / `CURRENT_*` raise
148/// `Unsupported`.
149pub type ClockFn = fn() -> i64;
150
151/// Function pointer that produces 16 cryptographically random bytes.
152/// Like `ClockFn`, the engine is `no_std` and can't reach for /dev/urandom
153/// itself — host (`spg-server`) injects an OS-backed source. `None`
154/// means SQL-driven `CREATE USER` falls back to a deterministic salt
155/// derived from the username (acceptable in tests; the server always
156/// installs a real RNG so production paths never see this).
157pub type SaltFn = fn() -> [u8; 16];
158
159/// v4.5 cooperative cancellation token. A long-running SELECT /
160/// UPDATE / DELETE checks `is_cancelled` at row-loop checkpoints
161/// and bails with `EngineError::Cancelled`. The host
162/// (`spg-server`) creates an `AtomicBool` per query, spawns a
163/// watchdog thread that sets it after `SPG_QUERY_TIMEOUT_MS`,
164/// and passes it via `execute_with_cancel` / `execute_readonly_with_cancel`.
165///
166/// `CancelToken::none()` is a no-op — used by the legacy `execute`
167/// and `execute_readonly` entry points so existing callers don't
168/// change.
169#[derive(Debug, Clone, Copy)]
170pub struct CancelToken<'a> {
171    flag: Option<&'a core::sync::atomic::AtomicBool>,
172}
173
174impl<'a> CancelToken<'a> {
175    #[must_use]
176    pub const fn none() -> Self {
177        Self { flag: None }
178    }
179
180    #[must_use]
181    pub const fn from_flag(f: &'a core::sync::atomic::AtomicBool) -> Self {
182        Self { flag: Some(f) }
183    }
184
185    #[must_use]
186    pub fn is_cancelled(self) -> bool {
187        self.flag
188            .is_some_and(|f| f.load(core::sync::atomic::Ordering::Relaxed))
189    }
190
191    /// Returns `Err(Cancelled)` if the token has been tripped.
192    /// Used at row-loop checkpoints to bail cooperatively without
193    /// scattering raw `is_cancelled` checks across the executor.
194    #[inline]
195    pub fn check(self) -> Result<(), EngineError> {
196        if self.is_cancelled() {
197            Err(EngineError::Cancelled)
198        } else {
199            Ok(())
200        }
201    }
202}
203
204// ---- snapshot envelope (v4.1, extended with CRC32 in v4.37,  ----
205// ----   publications in v6.1.2 v3, subscriptions in v6.1.4 v4) ----
206//
207// Wraps a catalog blob + a user blob behind a small header so the
208// server can persist both atomically without inventing a new file.
209// Bare catalog blobs (v3.x) still load via `restore_envelope` since
210// the magic check fails fast and the function falls back to
211// `Catalog::deserialize`.
212//
213// Layout — v1 (v4.1, no CRC):
214//   [8 bytes magic "SPGENV01"]
215//   [u8 version = 1]
216//   [u32 catalog_len][catalog bytes]
217//   [u32 users_len][users bytes]
218//
219// Layout — v2 (v4.37, CRC32 of body):
220//   [8 bytes magic "SPGENV01"]
221//   [u8 version = 2]
222//   [u32 catalog_len][catalog bytes]
223//   [u32 users_len][users bytes]
224//   [u32 crc32]                      ← CRC32 of every byte before it.
225//
226// Layout — v3 (v6.1.2, publications trailer):
227//   [8 bytes magic "SPGENV01"]
228//   [u8 version = 3]
229//   [u32 catalog_len][catalog bytes]
230//   [u32 users_len][users bytes]
231//   [u32 pubs_len][publications bytes]
232//   [u32 crc32]
233//
234// Layout — v4 (v6.1.4, subscriptions trailer):
235//   [8 bytes magic "SPGENV01"]
236//   [u8 version = 4]
237//   [u32 catalog_len][catalog bytes]
238//   [u32 users_len][users bytes]
239//   [u32 pubs_len][publications bytes]
240//   [u32 subs_len][subscriptions bytes]
241//   [u32 crc32]
242//
243// Layout — v5 (v6.2.0, statistics trailer):
244//   [8 bytes magic "SPGENV01"]
245//   [u8 version = 5]
246//   [u32 catalog_len][catalog bytes]
247//   [u32 users_len][users bytes]
248//   [u32 pubs_len][publications bytes]
249//   [u32 subs_len][subscriptions bytes]
250//   [u32 stats_len][statistics bytes]      ← NEW
251//   [u32 crc32]
252//
253// Writers emit v5 from v6.2.0 on. Readers accept all of {v1, v2,
254// v3, v4, v5}: v1/v2 load with empty publications / subscriptions /
255// statistics; v3 loads with empty subscriptions + statistics; v4
256// loads with empty statistics; v5 deserialises all three. Older
257// SPG versions reading a v5 envelope fall through the version
258// match to `EnvelopeParse::Bare` — pre-v6.2.0 binaries cannot
259// open v6.2.0+ snapshots (matches the v6.1.2 / v6.1.4 breaks).
260
261const ENVELOPE_MAGIC: &[u8; 8] = b"SPGENV01";
262const ENVELOPE_VERSION_V1: u8 = 1;
263const ENVELOPE_VERSION_V2: u8 = 2;
264const ENVELOPE_VERSION_V3: u8 = 3;
265const ENVELOPE_VERSION_V4: u8 = 4;
266const ENVELOPE_VERSION_V5: u8 = 5;
267
268fn build_envelope(catalog: &[u8], users: &[u8], pubs: &[u8], subs: &[u8], stats: &[u8]) -> Vec<u8> {
269    let mut out = Vec::with_capacity(
270        8 + 1
271            + 4
272            + catalog.len()
273            + 4
274            + users.len()
275            + 4
276            + pubs.len()
277            + 4
278            + subs.len()
279            + 4
280            + stats.len()
281            + 4,
282    );
283    out.extend_from_slice(ENVELOPE_MAGIC);
284    out.push(ENVELOPE_VERSION_V5);
285    out.extend_from_slice(
286        &u32::try_from(catalog.len())
287            .expect("≤ 4G catalog")
288            .to_le_bytes(),
289    );
290    out.extend_from_slice(catalog);
291    out.extend_from_slice(
292        &u32::try_from(users.len())
293            .expect("≤ 4G users")
294            .to_le_bytes(),
295    );
296    out.extend_from_slice(users);
297    out.extend_from_slice(
298        &u32::try_from(pubs.len())
299            .expect("≤ 4G publications")
300            .to_le_bytes(),
301    );
302    out.extend_from_slice(pubs);
303    out.extend_from_slice(
304        &u32::try_from(subs.len())
305            .expect("≤ 4G subscriptions")
306            .to_le_bytes(),
307    );
308    out.extend_from_slice(subs);
309    out.extend_from_slice(
310        &u32::try_from(stats.len())
311            .expect("≤ 4G statistics")
312            .to_le_bytes(),
313    );
314    out.extend_from_slice(stats);
315    let crc = spg_crypto::crc32::crc32(&out);
316    out.extend_from_slice(&crc.to_le_bytes());
317    out
318}
319
320/// Outcome of envelope parsing: either bare-catalog fallback, a
321/// successfully split section trio from a v1/v2/v3 envelope, or an
322/// explicit corruption error from a v2/v3 CRC mismatch. `Bare`
323/// (catalog-only fallback) preserves v3.x readability. v1/v2
324/// envelopes set `publications` to `None`; v3 sets it to the
325/// publications byte slice.
326enum EnvelopeParse<'a> {
327    Bare,
328    Pair {
329        catalog: &'a [u8],
330        users: &'a [u8],
331        publications: Option<&'a [u8]>,
332        subscriptions: Option<&'a [u8]>,
333        statistics: Option<&'a [u8]>,
334    },
335    CrcMismatch {
336        expected: u32,
337        computed: u32,
338    },
339}
340
341/// Returns `EnvelopeParse::Pair` for a valid v1 / v2 / v3 envelope,
342/// `Bare` for a buffer that doesn't look like an envelope (v3.x
343/// bare catalog fallback), and `CrcMismatch` for a v2/v3 envelope
344/// whose trailing CRC32 doesn't match the body.
345fn split_envelope(buf: &[u8]) -> EnvelopeParse<'_> {
346    if buf.len() < 8 + 1 + 4 || &buf[..8] != ENVELOPE_MAGIC {
347        return EnvelopeParse::Bare;
348    }
349    let version = buf[8];
350    if !matches!(
351        version,
352        ENVELOPE_VERSION_V1
353            | ENVELOPE_VERSION_V2
354            | ENVELOPE_VERSION_V3
355            | ENVELOPE_VERSION_V4
356            | ENVELOPE_VERSION_V5
357    ) {
358        return EnvelopeParse::Bare;
359    }
360    let mut p = 9usize;
361    let Some(cat_len_bytes) = buf.get(p..p + 4) else {
362        return EnvelopeParse::Bare;
363    };
364    let Ok(cat_len_arr) = cat_len_bytes.try_into() else {
365        return EnvelopeParse::Bare;
366    };
367    let cat_len = u32::from_le_bytes(cat_len_arr) as usize;
368    p += 4;
369    if p + cat_len + 4 > buf.len() {
370        return EnvelopeParse::Bare;
371    }
372    let catalog = &buf[p..p + cat_len];
373    p += cat_len;
374    let Some(user_len_bytes) = buf.get(p..p + 4) else {
375        return EnvelopeParse::Bare;
376    };
377    let Ok(user_len_arr) = user_len_bytes.try_into() else {
378        return EnvelopeParse::Bare;
379    };
380    let user_len = u32::from_le_bytes(user_len_arr) as usize;
381    p += 4;
382    if p + user_len > buf.len() {
383        return EnvelopeParse::Bare;
384    }
385    let users = &buf[p..p + user_len];
386    p += user_len;
387    let publications = if matches!(
388        version,
389        ENVELOPE_VERSION_V3 | ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5
390    ) {
391        // [u32 pubs_len][publications bytes]
392        let Some(pubs_len_bytes) = buf.get(p..p + 4) else {
393            return EnvelopeParse::Bare;
394        };
395        let Ok(pubs_len_arr) = pubs_len_bytes.try_into() else {
396            return EnvelopeParse::Bare;
397        };
398        let pubs_len = u32::from_le_bytes(pubs_len_arr) as usize;
399        p += 4;
400        if p + pubs_len > buf.len() {
401            return EnvelopeParse::Bare;
402        }
403        let pubs_slice = &buf[p..p + pubs_len];
404        p += pubs_len;
405        Some(pubs_slice)
406    } else {
407        None
408    };
409    let subscriptions = if matches!(version, ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5) {
410        // [u32 subs_len][subscriptions bytes]
411        let Some(subs_len_bytes) = buf.get(p..p + 4) else {
412            return EnvelopeParse::Bare;
413        };
414        let Ok(subs_len_arr) = subs_len_bytes.try_into() else {
415            return EnvelopeParse::Bare;
416        };
417        let subs_len = u32::from_le_bytes(subs_len_arr) as usize;
418        p += 4;
419        if p + subs_len > buf.len() {
420            return EnvelopeParse::Bare;
421        }
422        let subs_slice = &buf[p..p + subs_len];
423        p += subs_len;
424        Some(subs_slice)
425    } else {
426        None
427    };
428    let statistics = if version == ENVELOPE_VERSION_V5 {
429        // [u32 stats_len][statistics bytes]
430        let Some(stats_len_bytes) = buf.get(p..p + 4) else {
431            return EnvelopeParse::Bare;
432        };
433        let Ok(stats_len_arr) = stats_len_bytes.try_into() else {
434            return EnvelopeParse::Bare;
435        };
436        let stats_len = u32::from_le_bytes(stats_len_arr) as usize;
437        p += 4;
438        if p + stats_len > buf.len() {
439            return EnvelopeParse::Bare;
440        }
441        let stats_slice = &buf[p..p + stats_len];
442        p += stats_len;
443        Some(stats_slice)
444    } else {
445        None
446    };
447    if matches!(
448        version,
449        ENVELOPE_VERSION_V2 | ENVELOPE_VERSION_V3 | ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5
450    ) {
451        if p + 4 != buf.len() {
452            return EnvelopeParse::Bare;
453        }
454        let Ok(crc_arr) = buf[p..p + 4].try_into() else {
455            return EnvelopeParse::Bare;
456        };
457        let expected = u32::from_le_bytes(crc_arr);
458        let computed = spg_crypto::crc32::crc32(&buf[..p]);
459        if expected != computed {
460            return EnvelopeParse::CrcMismatch { expected, computed };
461        }
462    } else if p != buf.len() {
463        // v1: must end exactly at the users section.
464        return EnvelopeParse::Bare;
465    }
466    EnvelopeParse::Pair {
467        catalog,
468        users,
469        publications,
470        subscriptions,
471        statistics,
472    }
473}
474
475/// v4.41.1 opaque transaction handle. Returned by `Engine::alloc_tx_id`,
476/// threaded through `Engine::execute_in` so dispatch can identify which
477/// in-flight TX a statement belongs to. `IMPLICIT_TX` is the reserved
478/// slot every legacy caller — engine self-tests, spg-cli, spg-embedded,
479/// startup replay — implicitly uses through the unchanged
480/// `Engine::execute(sql)` API. v4.41.1 keeps at most one active slot at
481/// runtime (dispatch holds `engine.write()` across the wrap, same as
482/// v4.34); the map shape is here to let v4.42 turn on N in-flight
483/// implicit TXs without reshuffling the engine internals.
484#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
485pub struct TxId(pub u64);
486
487/// Reserved slot used by `Engine::execute(sql)` — the legacy single-
488/// global-shadow path. New `alloc_tx_id` handles start at 1.
489pub const IMPLICIT_TX: TxId = TxId(0);
490
491/// v6.7.3 — default segment-size threshold used by `COMPACT COLD
492/// SEGMENTS` when no explicit target is supplied. Segments whose
493/// `OwnedSegment::bytes().len()` is **strictly** less than this
494/// value are eligible to merge. spg-server reads
495/// `SPG_COMPACTION_TARGET_SEGMENT_BYTES` to override.
496pub const COMPACTION_TARGET_DEFAULT_BYTES: u64 = 4 * 1024 * 1024;
497
498/// Per-slot transaction state. Held inside `tx_catalogs[tx_id]` for the
499/// lifetime of a BEGIN..COMMIT (or BEGIN..ROLLBACK) window. Drops when
500/// the TX commits (its `catalog` is moved over `Engine.catalog`) or
501/// rolls back (slot removed, catalog discarded).
502#[derive(Debug, Default, Clone)]
503struct TxState {
504    /// The TX's shadow copy of the catalog. Started as a clone of
505    /// `Engine.catalog` at BEGIN time; writes flow into it; COMMIT
506    /// installs it over `Engine.catalog`. `Catalog::clone()` is O(1)
507    /// since v4.40 (`PersistentVec` rows + `PersistentBTreeMap` indices).
508    catalog: Catalog,
509    /// Per-TX savepoint stack. Each entry pairs the savepoint name with
510    /// a clone of `catalog` at the moment `SAVEPOINT <name>` fired.
511    /// `ROLLBACK TO <name>` restores from the entry and pops everything
512    /// after it; `RELEASE <name>` discards the entry and everything
513    /// after; COMMIT/ROLLBACK clears the whole stack.
514    savepoints: Vec<(String, Catalog)>,
515}
516
517/// v7.11.0 — frozen read-only view of the engine's committed state.
518/// Constructed via [`Engine::clone_snapshot`]. Holds clones of the
519/// catalog, statistics, clock function, and row-cap config — the
520/// four fields the `execute_readonly` path actually reads. Cheap to
521/// `Clone` (each clone shares the underlying `PersistentVec` row
522/// storage; only the trie root pointers copy). Send + Sync so a
523/// snapshot can be moved across `tokio::task::spawn_blocking`
524/// boundaries without coordination.
525///
526/// The contract: a snapshot reflects the engine's state at the
527/// moment `clone_snapshot()` returned. Subsequent writes to the
528/// engine are NOT visible. Callers who need fresher data take a
529/// new snapshot.
530#[derive(Debug, Clone)]
531pub struct CatalogSnapshot {
532    catalog: Catalog,
533    statistics: statistics::Statistics,
534    clock: Option<ClockFn>,
535    max_query_rows: Option<usize>,
536}
537
538#[derive(Debug, Default)]
539pub struct Engine {
540    /// Committed catalog — what survives `Engine::snapshot()` and what
541    /// outside-TX `SELECT`s read.
542    catalog: Catalog,
543    /// Active TX slots, keyed by `TxId`. Empty when no TX is in flight.
544    /// v4.41.1 runtime invariant: at most one entry (single-writer
545    /// model unchanged). v4.42 will let dispatch hold multiple entries
546    /// concurrently for group commit + engine MVCC.
547    tx_catalogs: BTreeMap<TxId, TxState>,
548    /// Which slot the next exec_* call should mutate. Set by
549    /// `execute_in(sql, tx_id)` at the entry point; legacy `execute(sql)`
550    /// sets it to `IMPLICIT_TX`. None when no TX is in flight (read /
551    /// write goes straight against `catalog`).
552    current_tx: Option<TxId>,
553    /// Monotonic counter for `alloc_tx_id`. Starts at 1 — slot 0 is
554    /// reserved for `IMPLICIT_TX`.
555    next_tx_id: u64,
556    /// Optional wall clock used to satisfy `NOW()` / `CURRENT_TIMESTAMP`
557    /// / `CURRENT_DATE`. Set by the host environment.
558    clock: Option<ClockFn>,
559    /// v4.1 cryptographic RNG for per-user password salt. Set by the
560    /// host. `None` means SQL-driven `CREATE USER` uses a
561    /// deterministic fallback — see `SaltFn`.
562    salt_fn: Option<SaltFn>,
563    /// v4.2 per-query row cap. `None` = unlimited. When set, a
564    /// SELECT that materialises more than `n` rows returns
565    /// `EngineError::RowLimitExceeded`. Enforced before the result
566    /// is shaped into wire frames so a runaway scan can't blow the
567    /// server's heap.
568    max_query_rows: Option<usize>,
569    /// v4.1 RBAC user table. Empty means "no RBAC configured yet" —
570    /// the server decides what that means at the auth boundary
571    /// (open mode vs legacy single-password mode). User CRUD goes
572    /// through `create_user`/`drop_user`/`verify_user`; persistence
573    /// rides the snapshot envelope alongside the catalog.
574    users: UserStore,
575    /// v6.1.2 logical-replication publication catalog. Empty until
576    /// `CREATE PUBLICATION` runs. Persistence rides the v3 envelope
577    /// trailer (see `build_envelope`).
578    publications: publications::Publications,
579    /// v6.1.4 logical-replication subscription catalog. Empty until
580    /// `CREATE SUBSCRIPTION` runs. Persistence rides the v4 envelope
581    /// trailer.
582    subscriptions: subscriptions::Subscriptions,
583    /// v6.2.0 — per-column statistics for the cost-based optimizer.
584    /// Populated by `ANALYZE`; queried via `spg_statistic` virtual
585    /// table. Persistence rides the v5 envelope trailer.
586    statistics: statistics::Statistics,
587    /// v6.3.0 — engine-level plan cache. Caches the post-`prepare()`
588    /// `Statement` keyed on SQL text. In-memory only — does NOT ride
589    /// the snapshot envelope (rebuilt on demand after restart).
590    plan_cache: plan_cache::PlanCache,
591    /// v6.5.1 — per-distinct-SQL execution stats. In-memory only,
592    /// surfaced via `spg_stat_query` virtual table. Updated by the
593    /// `execute_*` paths after a successful execute.
594    query_stats: query_stats::QueryStats,
595    /// v6.5.2 — connection-state provider callback. spg-server
596    /// registers a function at startup that snapshots its
597    /// per-pgwire-connection registry into `ActivityRow`s; engine
598    /// reads through it on every `SELECT * FROM spg_stat_activity`.
599    /// `None` ⇒ no-data (returns empty rows; matches the no_std
600    /// embedded callers that don't run pgwire).
601    activity_provider: Option<ActivityProvider>,
602    /// v6.5.3 — audit-chain provider + verifier. Same pattern as
603    /// activity_provider: spg-server registers both at startup;
604    /// engine reads through on `SELECT * FROM spg_audit_chain` and
605    /// `SELECT * FROM spg_audit_verify`. `None` ⇒ no-data.
606    audit_chain_provider: Option<AuditChainProvider>,
607    audit_verifier: Option<AuditVerifier>,
608    /// v6.5.6 — slow-query log threshold in microseconds. When set,
609    /// every successful execute whose elapsed exceeds the threshold
610    /// gets fed to the registered slow-query log callback (so
611    /// spg-server can emit a structured log line). Default `None`
612    /// = no slow-query logging.
613    slow_query_threshold_us: Option<u64>,
614    slow_query_logger: Option<SlowQueryLogger>,
615    /// v7.12.1 — session parameters set via `SET <name> = <value>`.
616    /// Only `default_text_search_config` is consumed by the engine
617    /// today (the FTS function dispatcher reads it when
618    /// `to_tsvector(text)` is called without an explicit config).
619    /// All other names are accepted + recorded so PG-dump output
620    /// loads, but have no behavioural effect.
621    session_params: BTreeMap<String, String>,
622    /// v7.12.7 — depth counter for trigger-emitted embedded SQL.
623    /// Each time the engine executes a `DeferredEmbeddedStmt` it
624    /// increments this; the recursive `execute_stmt_with_cancel`
625    /// inside that path checks against [`MAX_TRIGGER_RECURSION`]
626    /// to bound runaway cascades (trigger A's UPDATE on table B
627    /// fires trigger B which UPDATEs table A which fires trigger
628    /// A again…). Reset to 0 once the original DML returns.
629    trigger_recursion_depth: u32,
630    /// v7.14.0 — when `SET FOREIGN_KEY_CHECKS=0` is in effect
631    /// (mysqldump preamble), the FK existence + arity check at
632    /// CREATE TABLE time is deferred. FKs referencing a
633    /// not-yet-existing parent land in `pending_foreign_keys`
634    /// keyed by child table; `SET FOREIGN_KEY_CHECKS=1` drains
635    /// the queue and resolves each FK against the now-complete
636    /// catalog. Empty by default; the queue is drained on every
637    /// `RESET ALL` too.
638    foreign_key_checks: bool,
639    pending_foreign_keys: Vec<(alloc::string::String, spg_sql::ast::ForeignKeyConstraint)>,
640}
641
642/// v7.12.7 — hard cap on nested trigger-emitted embedded SQL
643/// fires. 16 deep is well past anything a normal trigger graph
644/// uses while still preventing infinite-loop wedging.
645const MAX_TRIGGER_RECURSION: u32 = 16;
646
647/// v6.5.6 — callback signature for slow-query log emission. Called
648/// with `(sql, elapsed_us)` once per successful execute that crosses
649/// the threshold.
650pub type SlowQueryLogger = fn(&str, u64);
651
652/// v6.5.4 — synthesise a `CREATE TABLE` statement from catalog
653/// state. Round-trips through `Engine::execute` to recreate the
654/// same schema (sans data + indexes — indexes are emitted as a
655/// separate `CREATE INDEX` chain in `spg_database_ddl`).
656fn render_create_table(name: &str, columns: &[ColumnSchema]) -> String {
657    let mut out = alloc::format!("CREATE TABLE {name} (");
658    for (i, col) in columns.iter().enumerate() {
659        if i > 0 {
660            out.push_str(", ");
661        }
662        out.push_str(&col.name);
663        out.push(' ');
664        out.push_str(&render_data_type(col.ty));
665        if !col.nullable {
666            out.push_str(" NOT NULL");
667        }
668        if col.auto_increment {
669            out.push_str(" AUTO_INCREMENT");
670        }
671    }
672    out.push(')');
673    out
674}
675
676fn render_data_type(ty: DataType) -> String {
677    match ty {
678        DataType::SmallInt => "SMALLINT".into(),
679        DataType::Int => "INT".into(),
680        DataType::BigInt => "BIGINT".into(),
681        DataType::Float => "FLOAT".into(),
682        DataType::Text => "TEXT".into(),
683        DataType::Varchar(n) => alloc::format!("VARCHAR({n})"),
684        DataType::Char(n) => alloc::format!("CHAR({n})"),
685        DataType::Bool => "BOOL".into(),
686        DataType::Vector { dim, encoding } => match encoding {
687            spg_storage::VecEncoding::F32 => alloc::format!("VECTOR({dim})"),
688            spg_storage::VecEncoding::Sq8 => alloc::format!("VECTOR({dim}) USING SQ8"),
689            spg_storage::VecEncoding::F16 => alloc::format!("VECTOR({dim}) USING HALF"),
690        },
691        DataType::Numeric { precision, scale } => {
692            alloc::format!("NUMERIC({precision},{scale})")
693        }
694        DataType::Date => "DATE".into(),
695        DataType::Timestamp => "TIMESTAMP".into(),
696        DataType::Interval => "INTERVAL".into(),
697        DataType::Json => "JSON".into(),
698        DataType::Jsonb => "JSONB".into(),
699        DataType::Timestamptz => "TIMESTAMPTZ".into(),
700        DataType::Bytes => "BYTEA".into(),
701        DataType::TextArray => "TEXT[]".into(),
702        DataType::IntArray => "INT[]".into(),
703        DataType::BigIntArray => "BIGINT[]".into(),
704        DataType::TsVector => "TSVECTOR".into(),
705        DataType::TsQuery => "TSQUERY".into(),
706    }
707}
708
709/// v6.5.2 — one row of `spg_stat_activity`. Engine-public so
710/// spg-server can construct rows without re-exporting internal
711/// dispatch types.
712#[derive(Debug, Clone)]
713pub struct ActivityRow {
714    pub pid: u32,
715    pub user: String,
716    pub started_at_us: i64,
717    pub current_sql: String,
718    pub wait_event: String,
719    pub elapsed_us: i64,
720    pub in_transaction: bool,
721}
722
723/// v6.5.2 — provider callback type. Fresh snapshot returned each
724/// call; engine doesn't cache the slice.
725pub type ActivityProvider = fn() -> Vec<ActivityRow>;
726
727/// v6.5.3 — one row of `spg_audit_chain`. Engine-public so
728/// spg-server can construct rows directly from `AuditEntry`.
729#[derive(Debug, Clone)]
730pub struct AuditRow {
731    pub seq: i64,
732    pub ts_ms: i64,
733    pub prev_hash_hex: String,
734    pub entry_hash_hex: String,
735    pub sql: String,
736}
737
738/// v6.5.3 — chain-table provider + verifier. spg-server registers
739/// fn pointers that snapshot / verify the audit log. `verify`
740/// returns `(verified_count, broken_at_seq)` — `broken_at_seq` is
741/// `-1` on a clean chain.
742pub type AuditChainProvider = fn() -> Vec<AuditRow>;
743pub type AuditVerifier = fn() -> (i64, i64);
744
745impl Engine {
746    pub fn new() -> Self {
747        Self {
748            catalog: Catalog::new(),
749            tx_catalogs: BTreeMap::new(),
750            current_tx: None,
751            next_tx_id: 1,
752            clock: None,
753            salt_fn: None,
754            max_query_rows: None,
755            users: UserStore::new(),
756            publications: publications::Publications::new(),
757            subscriptions: subscriptions::Subscriptions::new(),
758            statistics: statistics::Statistics::new(),
759            plan_cache: plan_cache::PlanCache::new(),
760            query_stats: query_stats::QueryStats::new(),
761            activity_provider: None,
762            audit_chain_provider: None,
763            audit_verifier: None,
764            slow_query_threshold_us: None,
765            slow_query_logger: None,
766            session_params: BTreeMap::new(),
767            trigger_recursion_depth: 0,
768            foreign_key_checks: true,
769            pending_foreign_keys: Vec::new(),
770        }
771    }
772
773    /// v7.11.0 — clone the engine's committed catalog + read-time
774    /// state into a frozen `CatalogSnapshot`. Cheap (`Catalog` is
775    /// backed by `PersistentVec`; cloning is O(log n) per table).
776    /// Subsequent writes to this engine are invisible to the
777    /// snapshot; the snapshot is self-contained and can be moved
778    /// to another thread for concurrent `execute_readonly_on_snapshot`
779    /// calls. The basis for [`AsyncReadHandle`] in spg-embedded-tokio
780    /// and any other read-fanout pattern.
781    #[must_use]
782    pub fn clone_snapshot(&self) -> CatalogSnapshot {
783        CatalogSnapshot {
784            catalog: self.active_catalog().clone(),
785            statistics: self.statistics.clone(),
786            clock: self.clock,
787            max_query_rows: self.max_query_rows,
788        }
789    }
790
791    /// v7.11.1 — execute a read-only SQL statement against a
792    /// `CatalogSnapshot` without touching this engine. Same
793    /// semantics as `execute_readonly` but parameterised on the
794    /// snapshot's catalog. Reject DDL/DML the same way
795    /// `execute_readonly` does. Static-on-Self so the caller can
796    /// dispatch without holding an `Engine` borrow alongside the
797    /// snapshot.
798    pub fn execute_readonly_on_snapshot(
799        snapshot: &CatalogSnapshot,
800        sql: &str,
801    ) -> Result<QueryResult, EngineError> {
802        Self::execute_readonly_on_snapshot_with_cancel(snapshot, sql, CancelToken::none())
803    }
804
805    /// v7.11.1 — `execute_readonly_on_snapshot` with cooperative
806    /// cancellation. Builds a transient `Engine` over the snapshot
807    /// state, runs `execute_readonly_with_cancel`, drops. The
808    /// transient engine is cheap to construct (no I/O; everything
809    /// is just struct moves) and lets the existing read path stay
810    /// untouched.
811    pub fn execute_readonly_on_snapshot_with_cancel(
812        snapshot: &CatalogSnapshot,
813        sql: &str,
814        cancel: CancelToken<'_>,
815    ) -> Result<QueryResult, EngineError> {
816        let transient = Engine {
817            catalog: snapshot.catalog.clone(),
818            statistics: snapshot.statistics.clone(),
819            clock: snapshot.clock,
820            max_query_rows: snapshot.max_query_rows,
821            ..Engine::default()
822        };
823        transient.execute_readonly_with_cancel(sql, cancel)
824    }
825
826    /// Construct an engine restored from a previously-snapshotted catalog
827    /// (see `snapshot()`).
828    pub fn restore(catalog: Catalog) -> Self {
829        Self {
830            catalog,
831            tx_catalogs: BTreeMap::new(),
832            current_tx: None,
833            next_tx_id: 1,
834            clock: None,
835            salt_fn: None,
836            max_query_rows: None,
837            users: UserStore::new(),
838            publications: publications::Publications::new(),
839            subscriptions: subscriptions::Subscriptions::new(),
840            statistics: statistics::Statistics::new(),
841            plan_cache: plan_cache::PlanCache::new(),
842            query_stats: query_stats::QueryStats::new(),
843            activity_provider: None,
844            audit_chain_provider: None,
845            audit_verifier: None,
846            slow_query_threshold_us: None,
847            slow_query_logger: None,
848            session_params: BTreeMap::new(),
849            trigger_recursion_depth: 0,
850            foreign_key_checks: true,
851            pending_foreign_keys: Vec::new(),
852        }
853    }
854
855    /// Restore an engine + user table from a v4.1 envelope produced
856    /// by `snapshot_with_users()`. Falls back to plain catalog-only
857    /// restore if the envelope magic isn't present (so v3.x snapshot
858    /// files still load). v6.1.2 adds the optional publications
859    /// trailer (envelope v3); a v1/v2 envelope deserialises to an
860    /// empty publication table.
861    pub fn restore_envelope(buf: &[u8]) -> Result<Self, EngineError> {
862        match split_envelope(buf) {
863            EnvelopeParse::Pair {
864                catalog: catalog_bytes,
865                users: user_bytes,
866                publications: pub_bytes,
867                subscriptions: sub_bytes,
868                statistics: stats_bytes,
869            } => {
870                let catalog = Catalog::deserialize(catalog_bytes).map_err(EngineError::Storage)?;
871                let users = users::deserialize_users(user_bytes)
872                    .map_err(|e| EngineError::Unsupported(alloc::format!("users restore: {e}")))?;
873                let publications = match pub_bytes {
874                    Some(b) => publications::Publications::deserialize(b).map_err(|e| {
875                        EngineError::Unsupported(alloc::format!("publications restore: {e:?}"))
876                    })?,
877                    None => publications::Publications::new(),
878                };
879                let subscriptions = match sub_bytes {
880                    Some(b) => subscriptions::Subscriptions::deserialize(b).map_err(|e| {
881                        EngineError::Unsupported(alloc::format!("subscriptions restore: {e:?}"))
882                    })?,
883                    None => subscriptions::Subscriptions::new(),
884                };
885                let statistics = match stats_bytes {
886                    Some(b) => statistics::Statistics::deserialize(b).map_err(|e| {
887                        EngineError::Unsupported(alloc::format!("statistics restore: {e:?}"))
888                    })?,
889                    None => statistics::Statistics::new(),
890                };
891                Ok(Self {
892                    catalog,
893                    tx_catalogs: BTreeMap::new(),
894                    current_tx: None,
895                    next_tx_id: 1,
896                    clock: None,
897                    salt_fn: None,
898                    max_query_rows: None,
899                    users,
900                    publications,
901                    subscriptions,
902                    statistics,
903                    plan_cache: plan_cache::PlanCache::new(),
904                    query_stats: query_stats::QueryStats::new(),
905                    activity_provider: None,
906                    audit_chain_provider: None,
907                    audit_verifier: None,
908                    slow_query_threshold_us: None,
909                    slow_query_logger: None,
910                    session_params: BTreeMap::new(),
911                    trigger_recursion_depth: 0,
912            foreign_key_checks: true,
913            pending_foreign_keys: Vec::new(),
914                })
915            }
916            EnvelopeParse::CrcMismatch { expected, computed } => {
917                Err(EngineError::Storage(StorageError::Corrupt(alloc::format!(
918                    "snapshot envelope CRC32 mismatch (expected={expected:#010x}, computed={computed:#010x})"
919                ))))
920            }
921            EnvelopeParse::Bare => {
922                let catalog = Catalog::deserialize(buf).map_err(EngineError::Storage)?;
923                Ok(Self::restore(catalog))
924            }
925        }
926    }
927
928    pub const fn users(&self) -> &UserStore {
929        &self.users
930    }
931
932    /// `salt` is supplied by the caller (the host has a random
933    /// source; the engine is `no_std`). Caller should pass a fresh
934    /// 16-byte random value per user.
935    pub fn create_user(
936        &mut self,
937        name: &str,
938        password: &str,
939        role: Role,
940        salt: [u8; 16],
941    ) -> Result<(), UserError> {
942        self.users.create(name, password, role, salt)?;
943        // v4.8: also derive SCRAM-SHA-256 secrets so PG-wire SASL
944        // auth can verify without re-running PBKDF2 per attempt.
945        // Uses a fresh salt from the host RNG (falls back to a
946        // deterministic per-username salt when no RNG is wired, same
947        // as the legacy hash path).
948        let scram_salt = self.salt_fn.map_or_else(
949            || {
950                let mut s = [0u8; users::SCRAM_SALT_LEN];
951                let digest = spg_crypto::hash(name.as_bytes());
952                // Use bytes 16..32 of BLAKE3 so we don't reuse the
953                // exact same fallback salt as the BLAKE3 hash path.
954                s.copy_from_slice(&digest[16..32]);
955                s
956            },
957            |f| f(),
958        );
959        self.users
960            .enable_scram(name, password, scram_salt, users::SCRAM_DEFAULT_ITERS)?;
961        Ok(())
962    }
963
964    pub fn drop_user(&mut self, name: &str) -> Result<(), UserError> {
965        self.users.drop(name)
966    }
967
968    pub fn verify_user(&self, name: &str, password: &str) -> Option<Role> {
969        self.users.verify(name, password)
970    }
971
972    /// Builder: attach a wall clock so `NOW()` / `CURRENT_TIMESTAMP` /
973    /// `CURRENT_DATE` evaluate to a real value instead of erroring out.
974    #[must_use]
975    pub const fn with_clock(mut self, clock: ClockFn) -> Self {
976        self.clock = Some(clock);
977        self
978    }
979
980    /// Builder: attach an OS-backed RNG for per-user password salts.
981    /// The host (`spg-server`) typically wires this to `/dev/urandom`.
982    #[must_use]
983    pub const fn with_salt_fn(mut self, f: SaltFn) -> Self {
984        self.salt_fn = Some(f);
985        self
986    }
987
988    /// Builder: cap the number of rows a single SELECT may return.
989    /// Exceeding the cap raises `EngineError::RowLimitExceeded` —
990    /// the bound is checked inside the executor so a runaway
991    /// catalog scan can't allocate millions of rows before the
992    /// server gets a chance to reject the result.
993    #[must_use]
994    pub const fn with_max_query_rows(mut self, n: usize) -> Self {
995        self.max_query_rows = Some(n);
996        self
997    }
998
999    /// The *committed* catalog. Note: during a transaction this returns the
1000    /// pre-TX state — `SELECT` inside a TX goes through `execute()` and reads
1001    /// the shadow. Tests that inspect outside-TX state should use this.
1002    pub const fn catalog(&self) -> &Catalog {
1003        &self.catalog
1004    }
1005
1006    /// Serialize the *committed* catalog to bytes. v0.6 was full-snapshot; v0.9
1007    /// adds the rule that an open TX's shadow is never snapshotted — only the
1008    /// post-COMMIT state is persisted. v4.1 wraps the catalog in an envelope
1009    /// when there are users to persist; an empty user table snapshots as the
1010    /// bare catalog format (backwards-compat with v3.x readers). v6.1.2
1011    /// adds publications to the envelope condition: either non-empty
1012    /// users OR non-empty publications now triggers the envelope path.
1013    pub fn snapshot(&self) -> Vec<u8> {
1014        if self.users.is_empty()
1015            && self.publications.is_empty()
1016            && self.subscriptions.is_empty()
1017            && self.statistics.is_empty()
1018        {
1019            self.catalog.serialize()
1020        } else {
1021            build_envelope(
1022                &self.catalog.serialize(),
1023                &users::serialize_users(&self.users),
1024                &self.publications.serialize(),
1025                &self.subscriptions.serialize(),
1026                &self.statistics.serialize(),
1027            )
1028        }
1029    }
1030
1031    /// True when at least one TX slot is in flight. v4.41.1 runtime
1032    /// invariant: at most one slot active at a time (dispatch holds
1033    /// `engine.write()` across the entire wrap). v4.42 will let this
1034    /// return true with multiple slots concurrently.
1035    pub fn in_transaction(&self) -> bool {
1036        !self.tx_catalogs.is_empty()
1037    }
1038
1039    /// v4.41.1 allocate a fresh TX handle. Used by spg-server dispatch
1040    /// to scope each implicit-wrap BEGIN..stmt..COMMIT to its own slot
1041    /// in `tx_catalogs`. v4.42 — the commit-barrier leader allocates
1042    /// one of these per task in its group, runs `BEGIN`+sql+`COMMIT`
1043    /// sequentially under a single `engine.write()` so each task's
1044    /// mutations accumulate into shared state, then either keeps the
1045    /// accumulated state (fsync OK) or restores the pre-image via
1046    /// `replace_catalog` (fsync err).
1047    pub fn alloc_tx_id(&mut self) -> TxId {
1048        let id = TxId(self.next_tx_id);
1049        self.next_tx_id = self.next_tx_id.saturating_add(1);
1050        id
1051    }
1052
1053    /// v4.42 — atomically replace the live catalog. Used by the
1054    /// commit-barrier leader to roll back a group whose batched
1055    /// fsync failed: the leader snapshots `engine.catalog().clone()`
1056    /// (O(1) Arc bump after the v4.39/v4.40 persistent migration)
1057    /// at group start, sequentially applies each task's BEGIN+sql+
1058    /// COMMIT under the same write lock to accumulate mutations
1059    /// into shared state, batches the WAL bytes, fsyncs once, and
1060    /// on failure calls this with the pre-image to undo every
1061    /// task in the group at once.
1062    ///
1063    /// **Does NOT touch `tx_catalogs` / `current_tx`.** Any
1064    /// explicit-TX slot from a concurrent client (created via the
1065    /// legacy `IMPLICIT_TX`-less dispatch path or via the future
1066    /// MVCC-readers v5+ work) has its own snapshot baked into the
1067    /// slot — restoring `self.catalog` to the pre-image leaves
1068    /// those slots untouched, exactly as they were when the leader
1069    /// took the lock. The leader's own implicit-TX slots are all
1070    /// already discarded (`exec_commit` removed them as each
1071    /// task's COMMIT ran) by the time this is reached.
1072    pub fn replace_catalog(&mut self, catalog: Catalog) {
1073        self.catalog = catalog;
1074    }
1075
1076    /// v6.7.0 — public shim around `Catalog::freeze_oldest_to_cold`
1077    /// so tests + the spg-server freezer can drive a freeze without
1078    /// reaching into the private `active_catalog_mut`. v6.7.4
1079    /// parallel freezer will build on this surface.
1080    ///
1081    /// Marks the table's cached `cold_row_count` stale because the
1082    /// freeze added cold locators that ANALYZE hasn't yet refreshed.
1083    pub fn freeze_oldest_to_cold(
1084        &mut self,
1085        table_name: &str,
1086        index_name: &str,
1087        max_rows: usize,
1088    ) -> Result<spg_storage::FreezeReport, EngineError> {
1089        let report = self
1090            .active_catalog_mut()
1091            .freeze_oldest_to_cold(table_name, index_name, max_rows)
1092            .map_err(EngineError::Storage)?;
1093        if let Some(t) = self.active_catalog_mut().get_mut(table_name) {
1094            t.mark_cold_row_count_stale();
1095        }
1096        Ok(report)
1097    }
1098
1099    /// v6.7.5 — public shim used by the spg-server follower's
1100    /// segment-forwarding receiver. Registers a cold-tier segment
1101    /// at a specific id (the master's id, as transmitted on the
1102    /// wire) so the follower's BTree-Cold locators stay byte-
1103    /// identical with the master's. Wraps
1104    /// `Catalog::load_segment_bytes_at` under the standard
1105    /// clone-mutate-replace pattern.
1106    ///
1107    /// Returns `Ok(())` on success **and** on the "slot already
1108    /// occupied" case — a follower mid-reconnect may receive a
1109    /// segment chunk for a segment_id it already has on disk
1110    /// (forwarded last session); the caller should treat that
1111    /// path as a no-op rather than a fatal error.
1112    pub fn receive_cold_segment(
1113        &mut self,
1114        segment_id: u32,
1115        bytes: Vec<u8>,
1116    ) -> Result<(), EngineError> {
1117        let mut new_cat = self.catalog.clone();
1118        match new_cat.load_segment_bytes_at(segment_id, bytes) {
1119            Ok(()) => {
1120                self.replace_catalog(new_cat);
1121                Ok(())
1122            }
1123            Err(StorageError::Corrupt(msg)) if msg.contains("already occupied") => Ok(()),
1124            Err(e) => Err(EngineError::Storage(e)),
1125        }
1126    }
1127
1128    /// v6.7.3 — public shim around `Catalog::compact_cold_segments`
1129    /// driving every BTree index on every user table. Returns one
1130    /// `(table, index, report)` triple for each merge that
1131    /// actually happened (no-op (table, index) pairs are filtered
1132    /// out so callers can size persist-side work to the live
1133    /// merges). Caller is responsible for persisting each
1134    /// `report.merged_segment_bytes` and updating the on-disk
1135    /// segment registry; engine layer is no_std and never
1136    /// touches disk.
1137    ///
1138    /// Marks every touched table's cached `cold_row_count` stale
1139    /// — compaction GC'd some shadowed rows, so the count must be
1140    /// re-derived on the next ANALYZE.
1141    pub fn compact_cold_segments_with_target(
1142        &mut self,
1143        target_segment_bytes: u64,
1144    ) -> Result<Vec<(String, String, CompactReport)>, EngineError> {
1145        let table_names = self.active_catalog().table_names();
1146        let mut reports: Vec<(String, String, CompactReport)> = Vec::new();
1147        for tname in table_names {
1148            if is_internal_table_name(&tname) {
1149                continue;
1150            }
1151            let idx_names: Vec<String> = {
1152                let Some(t) = self.active_catalog().get(&tname) else {
1153                    continue;
1154                };
1155                t.indices()
1156                    .iter()
1157                    .filter(|i| matches!(i.kind, IndexKind::BTree(_)))
1158                    .map(|i| i.name.clone())
1159                    .collect()
1160            };
1161            for iname in idx_names {
1162                let report = self
1163                    .active_catalog_mut()
1164                    .compact_cold_segments(&tname, &iname, target_segment_bytes)
1165                    .map_err(EngineError::Storage)?;
1166                if report.merged_segment_id.is_some() {
1167                    if let Some(t) = self.active_catalog_mut().get_mut(&tname) {
1168                        t.mark_cold_row_count_stale();
1169                    }
1170                    reports.push((tname.clone(), iname, report));
1171                }
1172            }
1173        }
1174        Ok(reports)
1175    }
1176
1177    fn active_catalog(&self) -> &Catalog {
1178        match self.current_tx {
1179            Some(t) => self
1180                .tx_catalogs
1181                .get(&t)
1182                .map_or(&self.catalog, |s| &s.catalog),
1183            None => &self.catalog,
1184        }
1185    }
1186
1187    /// v7.12.4 — snapshot every row-level trigger on `table` that
1188    /// fires for `event` (`"INSERT"` / `"UPDATE"` / `"DELETE"`) at
1189    /// the given `timing` (`"BEFORE"` / `"AFTER"`), and clone its
1190    /// referenced function definition. Returned as a vec of owned
1191    /// `FunctionDef` so the row-write loop can fire them without
1192    /// holding a borrow on the catalog (which would conflict with
1193    /// the table.insert / update_row / delete mutable borrows).
1194    fn snapshot_row_triggers(
1195        &self,
1196        table: &str,
1197        event: &str,
1198        timing: &str,
1199    ) -> Vec<spg_storage::FunctionDef> {
1200        let cat = self.active_catalog();
1201        cat.triggers()
1202            .iter()
1203            .filter(|t| {
1204                t.table == table
1205                    && t.timing.eq_ignore_ascii_case(timing)
1206                    && t.for_each.eq_ignore_ascii_case("row")
1207                    && t.events.iter().any(|e| e.eq_ignore_ascii_case(event))
1208            })
1209            .filter_map(|t| cat.functions().get(&t.function).cloned())
1210            .collect()
1211    }
1212
1213    /// v7.13.0 — UPDATE-side snapshot that pairs each trigger's
1214    /// function with its `UPDATE OF cols` filter (mailrs round-5
1215    /// G7). Empty filter Vec means "fire unconditionally", matching
1216    /// the v7.12 behaviour.
1217    fn snapshot_update_row_triggers(
1218        &self,
1219        table: &str,
1220        timing: &str,
1221    ) -> Vec<(spg_storage::FunctionDef, Vec<String>)> {
1222        let cat = self.active_catalog();
1223        cat.triggers()
1224            .iter()
1225            .filter(|t| {
1226                t.table == table
1227                    && t.timing.eq_ignore_ascii_case(timing)
1228                    && t.for_each.eq_ignore_ascii_case("row")
1229                    && t.events.iter().any(|e| e.eq_ignore_ascii_case("UPDATE"))
1230            })
1231            .filter_map(|t| {
1232                cat.functions()
1233                    .get(&t.function)
1234                    .cloned()
1235                    .map(|fd| (fd, t.update_columns.clone()))
1236            })
1237            .collect()
1238    }
1239
1240    /// v7.12.7 — drain the trigger-emitted embedded SQL queue.
1241    /// Called by the INSERT / UPDATE / DELETE executors after
1242    /// their main row-write loop returns. Each statement runs
1243    /// inside the same cancel scope as the firing DML and bumps
1244    /// the recursion counter; nested embedded SQL beyond
1245    /// [`MAX_TRIGGER_RECURSION`] errors with a clear message so
1246    /// a trigger-graph cycle surfaces as a query failure instead
1247    /// of stack-blowing the engine.
1248    fn execute_deferred_trigger_stmts(
1249        &mut self,
1250        deferred: Vec<triggers::DeferredEmbeddedStmt>,
1251        cancel: CancelToken<'_>,
1252    ) -> Result<(), EngineError> {
1253        for d in deferred {
1254            if self.trigger_recursion_depth >= MAX_TRIGGER_RECURSION {
1255                return Err(EngineError::Storage(StorageError::Corrupt(alloc::format!(
1256                    "trigger embedded SQL recursion depth {} exceeded (trigger function \
1257                     {:?} would push past the {} cap — check for trigger cycles)",
1258                    self.trigger_recursion_depth,
1259                    d.function,
1260                    MAX_TRIGGER_RECURSION,
1261                ))));
1262            }
1263            self.trigger_recursion_depth += 1;
1264            let res = self.execute_stmt_with_cancel(d.stmt, cancel);
1265            self.trigger_recursion_depth -= 1;
1266            res?;
1267        }
1268        Ok(())
1269    }
1270
1271    fn active_catalog_mut(&mut self) -> &mut Catalog {
1272        let tx = self.current_tx;
1273        match tx {
1274            Some(t) => match self.tx_catalogs.get_mut(&t) {
1275                Some(s) => &mut s.catalog,
1276                None => &mut self.catalog,
1277            },
1278            None => &mut self.catalog,
1279        }
1280    }
1281
1282    /// Read-only execute path. Succeeds for `SELECT` / `SHOW TABLES`
1283    /// / `SHOW COLUMNS`; returns `EngineError::WriteRequired` for
1284    /// every other statement, so the caller can fall through to the
1285    /// `&mut self` `execute` path under a write lock. Engine state is
1286    /// not mutated even on the success path (`rewrite_clock_calls`
1287    /// and `resolve_order_by_position` both mutate the locally-owned
1288    /// AST, not `self`).
1289    ///
1290    /// **v4.0 concurrency**: this is the entry point the server takes
1291    /// under an `RwLock::read()` so multiple `SELECT` clients run in
1292    /// parallel without serialising on a single mutex.
1293    pub fn execute_readonly(&self, sql: &str) -> Result<QueryResult, EngineError> {
1294        self.execute_readonly_with_cancel(sql, CancelToken::none())
1295    }
1296
1297    /// v4.5 — read path with cooperative cancellation. Token's
1298    /// `is_cancelled` is checked at the start (so a watchdog that
1299    /// already fired returns Cancelled immediately) and at row-loop
1300    /// checkpoints inside `exec_select`. SHOW paths are O(small) and
1301    /// don't bother checking.
1302    pub fn execute_readonly_with_cancel(
1303        &self,
1304        sql: &str,
1305        cancel: CancelToken<'_>,
1306    ) -> Result<QueryResult, EngineError> {
1307        cancel.check()?;
1308        let mut stmt = parser::parse_statement(sql)?;
1309        let now_micros = self.clock.map(|f| f());
1310        rewrite_clock_calls(&mut stmt, now_micros);
1311        if let Statement::Select(s) = &mut stmt {
1312            resolve_order_by_position(s);
1313            // v6.2.3 — cost-based JOIN reorder (read path).
1314            reorder::reorder_joins(s, &self.catalog, &self.statistics);
1315        }
1316        let result = match stmt {
1317            Statement::Select(s) => self.exec_select_cancel(&s, cancel),
1318            Statement::ShowTables => Ok(self.exec_show_tables()),
1319            Statement::ShowColumns(table) => self.exec_show_columns(&table),
1320            Statement::ShowUsers => Ok(self.exec_show_users()),
1321            Statement::ShowPublications => Ok(self.exec_show_publications()),
1322            Statement::ShowSubscriptions => Ok(self.exec_show_subscriptions()),
1323            Statement::WaitForWalPosition { .. } => Err(EngineError::Unsupported(
1324                "WAIT FOR WAL POSITION must be handled by the server layer".into(),
1325            )),
1326            Statement::Explain(e) => self.exec_explain(&e, cancel),
1327            _ => Err(EngineError::WriteRequired),
1328        };
1329        self.enforce_row_limit(result)
1330    }
1331
1332    /// v4.2: cap result-set size. Applied after the executor
1333    /// materialises rows but before they leave the engine — wrapping
1334    /// every Rows-returning exec_* function would scatter the check.
1335    fn enforce_row_limit(
1336        &self,
1337        result: Result<QueryResult, EngineError>,
1338    ) -> Result<QueryResult, EngineError> {
1339        if let (Ok(QueryResult::Rows { rows, .. }), Some(cap)) = (&result, self.max_query_rows)
1340            && rows.len() > cap
1341        {
1342            return Err(EngineError::RowLimitExceeded(cap));
1343        }
1344        result
1345    }
1346
1347    pub fn execute(&mut self, sql: &str) -> Result<QueryResult, EngineError> {
1348        self.execute_in_with_cancel(sql, IMPLICIT_TX, CancelToken::none())
1349    }
1350
1351    /// v4.5 — write path with cooperative cancellation. Same dispatch
1352    /// as `execute_in_with_cancel(sql, IMPLICIT_TX, cancel)`. Kept as
1353    /// a separate entry point for backward-compat with the v4.5
1354    /// public API.
1355    pub fn execute_with_cancel(
1356        &mut self,
1357        sql: &str,
1358        cancel: CancelToken<'_>,
1359    ) -> Result<QueryResult, EngineError> {
1360        self.execute_in_with_cancel(sql, IMPLICIT_TX, cancel)
1361    }
1362
1363    /// v4.41.1 multi-slot write entry. Routes `sql` through the TX
1364    /// slot identified by `tx_id` so spg-server dispatch can scope
1365    /// each implicit-wrap BEGIN..stmt..COMMIT to its own slot in
1366    /// `tx_catalogs`. `IMPLICIT_TX` is the legacy single-slot path
1367    /// every other caller (engine self-tests, replay, spg-embedded)
1368    /// implicitly takes via `execute()` / `execute_with_cancel()`.
1369    pub fn execute_in(&mut self, sql: &str, tx_id: TxId) -> Result<QueryResult, EngineError> {
1370        self.execute_in_with_cancel(sql, tx_id, CancelToken::none())
1371    }
1372
1373    /// v4.41.1 write path with cooperative cancellation + explicit TX
1374    /// scope. Sets `self.current_tx` for the duration of the call so
1375    /// every `exec_*` helper transparently sees its TX's shadow
1376    /// catalog and savepoint stack; restores on exit so the field is
1377    /// only valid mid-call (no leakage across calls).
1378    pub fn execute_in_with_cancel(
1379        &mut self,
1380        sql: &str,
1381        tx_id: TxId,
1382        cancel: CancelToken<'_>,
1383    ) -> Result<QueryResult, EngineError> {
1384        let saved = self.current_tx;
1385        self.current_tx = Some(tx_id);
1386        let result = self.execute_inner_with_cancel(sql, cancel);
1387        self.current_tx = saved;
1388        result
1389    }
1390
1391    /// v6.1.1 — parse and pre-process a SQL string ONCE so the
1392    /// resulting [`Statement`] can be cached and re-executed via
1393    /// [`Engine::execute_prepared`]. Returns the same `Statement`
1394    /// the simple-query path would synthesise internally (clock
1395    /// rewrites + ORDER BY position-ref resolution applied at
1396    /// prepare time, since both are session-independent). The
1397    /// `$N` placeholders in the SQL stay as `Expr::Placeholder(n)`
1398    /// nodes; they're resolved to concrete values per-call by
1399    /// `execute_prepared`'s substitution walk.
1400    ///
1401    /// Pgwire's `Parse` (P) message lands here.
1402    pub fn prepare(&self, sql: &str) -> Result<Statement, ParseError> {
1403        let mut stmt = parser::parse_statement(sql)?;
1404        let now_micros = self.clock.map(|f| f());
1405        rewrite_clock_calls(&mut stmt, now_micros);
1406        if let Statement::Select(s) = &mut stmt {
1407            // v6.4.1 — expand `GROUP BY ALL` to every non-aggregate
1408            // SELECT-list item BEFORE position / alias resolution so
1409            // downstream passes see the explicit list.
1410            expand_group_by_all(s);
1411            resolve_order_by_position(s);
1412            // v6.2.3 — cost-based JOIN reorder. No-op for
1413            // single-table FROMs or any non-INNER join shape.
1414            reorder::reorder_joins(s, &self.catalog, &self.statistics);
1415        }
1416        Ok(stmt)
1417    }
1418
1419    /// v6.3.0 — cached prepare. Returns a cloned `Statement` from
1420    /// the plan cache on hit, runs the full `prepare()` path on miss
1421    /// and inserts the resulting plan before returning. Skipping the
1422    /// parse + JOIN-reorder pipeline on hit is the dominant win for
1423    /// JDBC / sqlx / pgx clients that reuse the same SQL string.
1424    ///
1425    /// Returns a cloned `Statement` (not a borrow) because the
1426    /// pgwire layer owns its `PreparedStmt` map per-session and the
1427    /// engine-level cache must stay available for other sessions.
1428    /// Clone cost on a 5-table JOIN AST is well under the parse cost
1429    /// it replaces.
1430    pub fn prepare_cached(&mut self, sql: &str) -> Result<Statement, ParseError> {
1431        // v6.3.1 — version-aware lookup. If the cached plan was
1432        // prepared before the most recent ANALYZE, evict and replan.
1433        let current_version = self.statistics.version();
1434        if let Some(plan) = self.plan_cache.get(sql) {
1435            if plan.statistics_version == current_version {
1436                return Ok(plan.stmt.clone());
1437            }
1438            // Stale entry — fall through to evict + re-prepare.
1439        }
1440        self.plan_cache.evict(sql);
1441        let stmt = self.prepare(sql)?;
1442        let source_tables = plan_cache::collect_source_tables(&stmt);
1443        let plan = plan_cache::PreparedPlan {
1444            stmt: stmt.clone(),
1445            statistics_version: current_version,
1446            source_tables,
1447            describe_columns: alloc::vec::Vec::new(),
1448        };
1449        self.plan_cache.insert(String::from(sql), plan);
1450        Ok(stmt)
1451    }
1452
1453    /// v6.3.0 — read-only accessor for tests and v6.3.1 invalidation.
1454    pub fn plan_cache(&self) -> &plan_cache::PlanCache {
1455        &self.plan_cache
1456    }
1457
1458    /// v6.3.0 — mutable accessor for v6.3.1 invalidation hooks.
1459    pub fn plan_cache_mut(&mut self) -> &mut plan_cache::PlanCache {
1460        &mut self.plan_cache
1461    }
1462
1463    /// v6.3.3 — Describe a prepared `Statement` without executing.
1464    /// Returns `(parameter_oids, output_columns)`. Empty
1465    /// `output_columns` means the statement has no row-producing
1466    /// shape we could resolve here (JOIN, subquery, non-SELECT, …)
1467    /// — pgwire layer maps that to a `NoData` reply.
1468    pub fn describe_prepared(&self, stmt: &Statement) -> (Vec<u32>, Vec<ColumnSchema>) {
1469        describe::describe_prepared(stmt, self.active_catalog())
1470    }
1471
1472    /// v6.1.1 — execute a [`Statement`] previously returned by
1473    /// [`Engine::prepare`], substituting `Expr::Placeholder(n)`
1474    /// nodes for the corresponding [`Value`] in `params` (1-based
1475    /// per PG: `$1` → `params[0]`). Bind-time string parameters
1476    /// are decoded into typed `Value`s by the pgwire layer before
1477    /// this call so the resulting AST hits the same execution
1478    /// path as a simple query — no SQL re-parse.
1479    ///
1480    /// Pgwire's `Execute` (E) message after a `Bind` (B) lands here.
1481    pub fn execute_prepared(
1482        &mut self,
1483        mut stmt: Statement,
1484        params: &[Value],
1485    ) -> Result<QueryResult, EngineError> {
1486        substitute_placeholders(&mut stmt, params)?;
1487        self.execute_stmt_with_cancel(stmt, CancelToken::none())
1488    }
1489
1490    fn execute_inner_with_cancel(
1491        &mut self,
1492        sql: &str,
1493        cancel: CancelToken<'_>,
1494    ) -> Result<QueryResult, EngineError> {
1495        cancel.check()?;
1496        let stmt = self.prepare(sql)?;
1497        // v6.5.1 — wrap the executor with a wall-clock window so we
1498        // can record into spg_stat_query. Skip when the engine has
1499        // no clock attached (no_std embedded callers).
1500        let start_us = self.clock.map(|f| f());
1501        let result = self.execute_stmt_with_cancel(stmt, cancel);
1502        if let (Some(t0), Ok(_)) = (start_us, &result) {
1503            let now = self.clock.map_or(t0, |f| f());
1504            let elapsed = now.saturating_sub(t0).max(0) as u64;
1505            self.query_stats.record(sql, elapsed, now as u64);
1506            // v6.5.6 — slow-query log: fire callback when elapsed
1507            // exceeds the configured floor.
1508            if let (Some(threshold), Some(logger)) =
1509                (self.slow_query_threshold_us, self.slow_query_logger)
1510                && elapsed >= threshold
1511            {
1512                logger(sql, elapsed);
1513            }
1514        }
1515        result
1516    }
1517
1518    fn execute_stmt_with_cancel(
1519        &mut self,
1520        stmt: Statement,
1521        cancel: CancelToken<'_>,
1522    ) -> Result<QueryResult, EngineError> {
1523        cancel.check()?;
1524        let result = match stmt {
1525            Statement::CreateTable(s) => self.exec_create_table(s),
1526            // v7.9.15 — CREATE EXTENSION is a no-op on SPG. Returns
1527            // CommandOk with affected=0; modified_catalog=false so
1528            // the WAL doesn't grow a useless entry. mailrs F3.
1529            Statement::CreateExtension(_) => Ok(QueryResult::CommandOk {
1530                affected: 0,
1531                modified_catalog: false,
1532            }),
1533            // v7.9.27 — DO $$ ... $$ is also a no-op (SPG has no
1534            // PL/pgSQL). mailrs H1 + pg_dump compat.
1535            Statement::DoBlock => Ok(QueryResult::CommandOk {
1536                affected: 0,
1537                modified_catalog: false,
1538            }),
1539            // v7.14.0 — empty-statement no-op for pg_dump /
1540            // mysqldump preamble lines that collapse to nothing
1541            // after comment-stripping.
1542            Statement::Empty => Ok(QueryResult::CommandOk {
1543                affected: 0,
1544                modified_catalog: false,
1545            }),
1546            Statement::DropTable { names, if_exists } => self.exec_drop_table(names, if_exists),
1547            Statement::DropIndex { name, if_exists } => self.exec_drop_index(name, if_exists),
1548            Statement::CreateIndex(s) => self.exec_create_index(s),
1549            Statement::Insert(s) => self.exec_insert(s),
1550            Statement::Update(s) => self.exec_update_cancel(&s, cancel),
1551            Statement::Delete(s) => self.exec_delete_cancel(&s, cancel),
1552            Statement::Select(s) => self.exec_select_cancel(&s, cancel),
1553            Statement::Begin => self.exec_begin(),
1554            Statement::Commit => self.exec_commit(),
1555            Statement::Rollback => self.exec_rollback(),
1556            Statement::Savepoint(name) => self.exec_savepoint(name),
1557            Statement::RollbackToSavepoint(name) => self.exec_rollback_to_savepoint(&name),
1558            Statement::ReleaseSavepoint(name) => self.exec_release_savepoint(&name),
1559            Statement::ShowTables => Ok(self.exec_show_tables()),
1560            Statement::ShowColumns(table) => self.exec_show_columns(&table),
1561            Statement::ShowUsers => Ok(self.exec_show_users()),
1562            Statement::ShowPublications => Ok(self.exec_show_publications()),
1563            Statement::ShowSubscriptions => Ok(self.exec_show_subscriptions()),
1564            Statement::CreateUser(s) => self.exec_create_user(&s),
1565            Statement::DropUser(name) => self.exec_drop_user(&name),
1566            Statement::Explain(e) => self.exec_explain(&e, cancel),
1567            Statement::AlterIndex(s) => self.exec_alter_index(s),
1568            Statement::AlterTable(s) => self.exec_alter_table(s),
1569            Statement::CreatePublication(s) => self.exec_create_publication(s),
1570            Statement::DropPublication(name) => self.exec_drop_publication(&name),
1571            Statement::CreateSubscription(s) => self.exec_create_subscription(s),
1572            Statement::DropSubscription(name) => self.exec_drop_subscription(&name),
1573            // v6.1.7 — WAIT FOR WAL POSITION needs `lag_state`,
1574            // which lives in spg-server's ServerState. The engine
1575            // surfaces a clear error; the server-layer dispatch
1576            // intercepts the SQL before it reaches the engine on
1577            // a server build, so this arm only fires for
1578            // engine-only callers (spg-embedded, lib tests).
1579            Statement::WaitForWalPosition { .. } => Err(EngineError::Unsupported(
1580                "WAIT FOR WAL POSITION must be handled by the server layer".into(),
1581            )),
1582            // v6.2.0 — ANALYZE recomputes per-column histograms.
1583            Statement::Analyze(target) => self.exec_analyze(target.as_deref()),
1584            // v6.7.3 — COMPACT COLD SEGMENTS.
1585            Statement::CompactColdSegments => self.exec_compact_cold_segments(),
1586            // v7.12.1 — SET / RESET session parameter. Engine
1587            // tracks the value in `session_params`; FTS dispatcher
1588            // reads `default_text_search_config`. Everything else
1589            // is a recorded no-op (PG dump compat).
1590            Statement::SetParameter { name, value } => {
1591                self.set_session_param(name, value);
1592                Ok(QueryResult::CommandOk {
1593                    affected: 0,
1594                    modified_catalog: false,
1595                })
1596            }
1597            // v7.14.0 — MySQL multi-assignment SET. Each pair runs
1598            // through `set_session_param` so engine-known params
1599            // (FOREIGN_KEY_CHECKS, session_replication_role, …) take
1600            // effect; unknown pairs (including `@VAR` LHS from the
1601            // mysqldump preamble) are recorded then ignored.
1602            Statement::SetParameterList(pairs) => {
1603                for (name, value) in pairs {
1604                    self.set_session_param(name, value);
1605                }
1606                Ok(QueryResult::CommandOk {
1607                    affected: 0,
1608                    modified_catalog: false,
1609                })
1610            }
1611            // v7.12.4 — CREATE FUNCTION / CREATE TRIGGER / DROP …
1612            // for the PL/pgSQL trigger surface. exec_* methods are
1613            // defined alongside the existing CREATE handlers below.
1614            Statement::CreateFunction(s) => self.exec_create_function(s),
1615            Statement::CreateTrigger(s) => self.exec_create_trigger(s),
1616            Statement::DropTrigger {
1617                name,
1618                table,
1619                if_exists,
1620            } => self.exec_drop_trigger(&name, &table, if_exists),
1621            Statement::DropFunction { name, if_exists } => {
1622                self.exec_drop_function(&name, if_exists)
1623            }
1624            Statement::ResetParameter(target) => {
1625                match target {
1626                    None => self.session_params.clear(),
1627                    Some(name) => {
1628                        self.session_params.remove(&name.to_ascii_lowercase());
1629                    }
1630                }
1631                Ok(QueryResult::CommandOk {
1632                    affected: 0,
1633                    modified_catalog: false,
1634                })
1635            }
1636        };
1637        self.enforce_row_limit(result)
1638    }
1639
1640    /// v6.1.2 — `CREATE PUBLICATION` runtime path. Duplicate names
1641    /// surface as `EngineError::Unsupported` so the existing PG-wire
1642    /// error mapping stays uniform; the message carries the name so
1643    /// operators can grep replication-log noise. Inside-transaction
1644    /// invocation is rejected (matches `CREATE USER` / `DROP USER`
1645    /// stance) — replication-catalog mutation is a connection-level
1646    /// administrative op, not a transactional one.
1647    fn exec_create_publication(
1648        &mut self,
1649        s: CreatePublicationStatement,
1650    ) -> Result<QueryResult, EngineError> {
1651        // v6.1.4 — the v6.1.2 "no DDL inside a transaction" guard
1652        // was over-cautious: it also blocked the auto-commit wrap
1653        // path (which begins an internal TX around every WAL-
1654        // logged statement). PG itself allows CREATE PUBLICATION
1655        // inside a transaction (it rolls back with the TX).
1656        self.publications
1657            .create(s.name, s.scope)
1658            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE PUBLICATION: {e:?}")))?;
1659        Ok(QueryResult::CommandOk {
1660            affected: 1,
1661            modified_catalog: true,
1662        })
1663    }
1664
1665    /// v6.1.2 — `DROP PUBLICATION` runtime path. PG-compatible silent
1666    /// no-op when the publication doesn't exist (returns `affected=0`
1667    /// in that case so the wire-level command tag distinguishes
1668    /// "dropped" from "no-op", though both succeed).
1669    fn exec_drop_publication(&mut self, name: &str) -> Result<QueryResult, EngineError> {
1670        let removed = self.publications.drop(name);
1671        Ok(QueryResult::CommandOk {
1672            affected: usize::from(removed),
1673            modified_catalog: removed,
1674        })
1675    }
1676
1677    /// v6.1.2 — read access to the publication catalog. Used by
1678    /// the v6.1.5 publisher-side WAL filter, by `SHOW PUBLICATIONS`
1679    /// (v6.1.3+), and by e2e tests that need to assert state without
1680    /// going through the wire.
1681    pub const fn publications(&self) -> &publications::Publications {
1682        &self.publications
1683    }
1684
1685    /// v6.1.4 — `CREATE SUBSCRIPTION` runtime path. Defaults
1686    /// `enabled = true` and `last_received_pos = 0` for a freshly-
1687    /// created subscription. The actual worker thread is spawned
1688    /// by spg-server once the engine returns success.
1689    fn exec_create_subscription(
1690        &mut self,
1691        s: CreateSubscriptionStatement,
1692    ) -> Result<QueryResult, EngineError> {
1693        // See exec_create_publication — the in_transaction gate
1694        // was over-cautious; the auto-commit wrap path holds an
1695        // internal TX that this check was incorrectly blocking.
1696        let sub = subscriptions::Subscription {
1697            conn_str: s.conn_str,
1698            publications: s.publications,
1699            enabled: true,
1700            last_received_pos: 0,
1701        };
1702        self.subscriptions
1703            .create(s.name, sub)
1704            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE SUBSCRIPTION: {e:?}")))?;
1705        Ok(QueryResult::CommandOk {
1706            affected: 1,
1707            modified_catalog: true,
1708        })
1709    }
1710
1711    /// v6.1.4 — `DROP SUBSCRIPTION`. Silent no-op when the name
1712    /// doesn't exist (PG-compatible). The associated worker is
1713    /// torn down by spg-server when it observes the catalog
1714    /// change at the next snapshot or via the engine's
1715    /// subscriptions accessor (the worker polls the catalog on
1716    /// reconnect; v6.1.5's filter-side will tighten this to an
1717    /// explicit signal).
1718    fn exec_drop_subscription(&mut self, name: &str) -> Result<QueryResult, EngineError> {
1719        let removed = self.subscriptions.drop(name);
1720        Ok(QueryResult::CommandOk {
1721            affected: usize::from(removed),
1722            modified_catalog: removed,
1723        })
1724    }
1725
1726    /// v6.1.4 — read access to the subscription catalog. Used by
1727    /// the subscription worker (read its own row to find its
1728    /// publications + last applied position), by SHOW SUBSCRIPTIONS,
1729    /// and by e2e tests asserting state directly.
1730    pub const fn subscriptions(&self) -> &subscriptions::Subscriptions {
1731        &self.subscriptions
1732    }
1733
1734    /// v6.1.4 — write access to `last_received_pos`. Worker
1735    /// calls this after each apply batch (under the engine's
1736    /// write-lock). Returns `false` when the subscription was
1737    /// dropped between when the worker received the record and
1738    /// when this call landed.
1739    pub fn subscription_advance(&mut self, name: &str, pos: u64) -> bool {
1740        self.subscriptions.update_last_received_pos(name, pos)
1741    }
1742
1743    /// v6.1.4 — `SHOW SUBSCRIPTIONS` row materialisation. Returns
1744    /// `(name, conn_str, publications, enabled, last_received_pos)`
1745    /// ordered by subscription name. The `publications` column is
1746    /// the comma-joined list ("p1, p2") for ergonomic SHOW output;
1747    /// callers wanting structured access read `Engine::subscriptions`.
1748    fn exec_show_subscriptions(&self) -> QueryResult {
1749        let columns = alloc::vec![
1750            ColumnSchema::new("name", DataType::Text, false),
1751            ColumnSchema::new("conn_str", DataType::Text, false),
1752            ColumnSchema::new("publications", DataType::Text, false),
1753            ColumnSchema::new("enabled", DataType::Bool, false),
1754            ColumnSchema::new("last_received_pos", DataType::BigInt, false),
1755        ];
1756        let rows: Vec<Row> = self
1757            .subscriptions
1758            .iter()
1759            .map(|(name, sub)| {
1760                Row::new(alloc::vec![
1761                    Value::Text(name.clone()),
1762                    Value::Text(sub.conn_str.clone()),
1763                    Value::Text(sub.publications.join(", ")),
1764                    Value::Bool(sub.enabled),
1765                    Value::BigInt(i64::try_from(sub.last_received_pos).unwrap_or(i64::MAX)),
1766                ])
1767            })
1768            .collect();
1769        QueryResult::Rows { columns, rows }
1770    }
1771
1772    /// v6.2.0 — materialise `spg_statistic` rows. One row per
1773    /// `(table, column)` pair tracked in `Statistics`, with
1774    /// `histogram_bounds` rendered as a `[v0, v1, ...]` string —
1775    /// the same canonical form vector literals use for round-trip.
1776    fn exec_spg_statistic(&self) -> QueryResult {
1777        let columns = alloc::vec![
1778            ColumnSchema::new("table_name", DataType::Text, false),
1779            ColumnSchema::new("column_name", DataType::Text, false),
1780            ColumnSchema::new("null_frac", DataType::Float, false),
1781            ColumnSchema::new("n_distinct", DataType::BigInt, false),
1782            ColumnSchema::new("histogram_bounds", DataType::Text, false),
1783            // v6.7.0 — appended column (v6.2.0 stability contract
1784            // allows APPEND to spg_statistic, not reorder/rename).
1785            // Reports the cached per-table cold-row count; same
1786            // value across every column row of the same table.
1787            ColumnSchema::new("cold_row_count", DataType::BigInt, false),
1788        ];
1789        let rows: Vec<Row> = self
1790            .statistics
1791            .iter()
1792            .map(|((t, c), s)| {
1793                let cold = self
1794                    .catalog
1795                    .get(t)
1796                    .map_or(0, |table| table.cold_row_count());
1797                Row::new(alloc::vec![
1798                    Value::Text(t.clone()),
1799                    Value::Text(c.clone()),
1800                    Value::Float(f64::from(s.null_frac)),
1801                    Value::BigInt(i64::try_from(s.n_distinct).unwrap_or(i64::MAX)),
1802                    Value::Text(render_histogram_bounds(&s.histogram_bounds)),
1803                    Value::BigInt(i64::try_from(cold).unwrap_or(i64::MAX)),
1804                ])
1805            })
1806            .collect();
1807        QueryResult::Rows { columns, rows }
1808    }
1809
1810    /// v6.5.0 — materialise `spg_stat_replication` rows. One row
1811    /// per subscription with `(name, conn_str, publications,
1812    /// last_received_pos, enabled)`. Surface mirrors
1813    /// `SHOW SUBSCRIPTIONS` but follows the virtual-table dispatch
1814    /// shape so it composes with SELECT clauses (WHERE, projection
1815    /// onto specific columns, etc).
1816    fn exec_spg_stat_replication(&self) -> QueryResult {
1817        let columns = alloc::vec![
1818            ColumnSchema::new("name", DataType::Text, false),
1819            ColumnSchema::new("conn_str", DataType::Text, false),
1820            ColumnSchema::new("publications", DataType::Text, false),
1821            ColumnSchema::new("last_received_pos", DataType::BigInt, false),
1822            ColumnSchema::new("enabled", DataType::Bool, false),
1823        ];
1824        let rows: Vec<Row> = self
1825            .subscriptions
1826            .iter()
1827            .map(|(name, sub)| {
1828                Row::new(alloc::vec![
1829                    Value::Text(name.clone()),
1830                    Value::Text(sub.conn_str.clone()),
1831                    Value::Text(sub.publications.join(",")),
1832                    Value::BigInt(i64::try_from(sub.last_received_pos).unwrap_or(i64::MAX)),
1833                    Value::Bool(sub.enabled),
1834                ])
1835            })
1836            .collect();
1837        QueryResult::Rows { columns, rows }
1838    }
1839
1840    /// v6.5.0 — materialise `spg_stat_segment` rows. One row per
1841    /// cold-tier segment with `(segment_id, num_rows, num_pages,
1842    /// total_bytes)`.
1843    ///
1844    /// v6.7.0 — appended `table_name` column resolves the v6.5.0
1845    /// carve-out. Walks every user table's BTree indices to find
1846    /// which table's Cold locators point at each segment. Empty
1847    /// string for orphan segments (loaded via SPG_PRELOAD_COLD_SEGMENT
1848    /// before any index registered a locator). The walk is
1849    /// O(tables × indices × keys); cached per call, not across
1850    /// calls — re-walked on every `SELECT * FROM spg_stat_segment`.
1851    fn exec_spg_stat_segment(&self) -> QueryResult {
1852        let columns = alloc::vec![
1853            ColumnSchema::new("segment_id", DataType::BigInt, false),
1854            ColumnSchema::new("table_name", DataType::Text, false),
1855            ColumnSchema::new("num_rows", DataType::BigInt, false),
1856            ColumnSchema::new("num_pages", DataType::BigInt, false),
1857            ColumnSchema::new("total_bytes", DataType::BigInt, false),
1858        ];
1859        // v6.7.0 — build a segment_id → table_name map by walking
1860        // every user table's BTree indices once. O(tables × indices
1861        // × keys) for the v6.5.0 carve-out resolution; acceptable
1862        // because spg_stat_segment is operator-facing (not on a
1863        // hot-loop path).
1864        let mut segment_owners: alloc::collections::BTreeMap<u32, String> = BTreeMap::new();
1865        for tname in self.catalog.table_names() {
1866            if is_internal_table_name(&tname) {
1867                continue;
1868            }
1869            let Some(t) = self.catalog.get(&tname) else {
1870                continue;
1871            };
1872            for idx in t.indices() {
1873                if let spg_storage::IndexKind::BTree(map) = &idx.kind {
1874                    for (_, locs) in map.iter() {
1875                        for loc in locs {
1876                            if let spg_storage::RowLocator::Cold { segment_id, .. } = loc {
1877                                segment_owners
1878                                    .entry(*segment_id)
1879                                    .or_insert_with(|| tname.clone());
1880                            }
1881                        }
1882                    }
1883                }
1884            }
1885        }
1886        let rows: Vec<Row> = self
1887            .catalog
1888            .cold_segment_ids_global()
1889            .iter()
1890            .filter_map(|&id| {
1891                let seg = self.catalog.cold_segment(id)?;
1892                let meta = seg.meta();
1893                let owner = segment_owners.get(&id).cloned().unwrap_or_default();
1894                Some(Row::new(alloc::vec![
1895                    Value::BigInt(i64::from(id)),
1896                    Value::Text(owner),
1897                    Value::BigInt(i64::try_from(meta.num_rows).unwrap_or(i64::MAX)),
1898                    Value::BigInt(i64::from(meta.num_pages)),
1899                    Value::BigInt(i64::try_from(meta.total_bytes).unwrap_or(i64::MAX)),
1900                ]))
1901            })
1902            .collect();
1903        QueryResult::Rows { columns, rows }
1904    }
1905
1906    /// v6.5.1 — materialise `spg_stat_query` rows. One row per
1907    /// distinct SQL text recorded since the engine booted, capped
1908    /// at `QUERY_STATS_MAX` (1024). Columns:
1909    ///   sql, exec_count, total_us, mean_us, max_us, last_seen_us
1910    /// mean_us = total_us / exec_count (saturating).
1911    fn exec_spg_stat_query(&self) -> QueryResult {
1912        let columns = alloc::vec![
1913            ColumnSchema::new("sql", DataType::Text, false),
1914            ColumnSchema::new("exec_count", DataType::BigInt, false),
1915            ColumnSchema::new("total_us", DataType::BigInt, false),
1916            ColumnSchema::new("mean_us", DataType::BigInt, false),
1917            ColumnSchema::new("max_us", DataType::BigInt, false),
1918            ColumnSchema::new("last_seen_us", DataType::BigInt, false),
1919        ];
1920        let rows: Vec<Row> = self
1921            .query_stats
1922            .snapshot()
1923            .into_iter()
1924            .map(|(sql, s)| {
1925                let mean = if s.exec_count == 0 {
1926                    0
1927                } else {
1928                    s.total_us / s.exec_count
1929                };
1930                Row::new(alloc::vec![
1931                    Value::Text(sql),
1932                    Value::BigInt(i64::try_from(s.exec_count).unwrap_or(i64::MAX)),
1933                    Value::BigInt(i64::try_from(s.total_us).unwrap_or(i64::MAX)),
1934                    Value::BigInt(i64::try_from(mean).unwrap_or(i64::MAX)),
1935                    Value::BigInt(i64::try_from(s.max_us).unwrap_or(i64::MAX)),
1936                    Value::BigInt(i64::try_from(s.last_seen_us).unwrap_or(i64::MAX)),
1937                ])
1938            })
1939            .collect();
1940        QueryResult::Rows { columns, rows }
1941    }
1942
1943    /// v6.5.2 — register a connection-state provider. spg-server
1944    /// calls this at startup with a function that snapshots its
1945    /// per-pgwire-connection registry. Engine reads through the
1946    /// callback on `SELECT * FROM spg_stat_activity`.
1947    #[must_use]
1948    pub const fn with_activity_provider(mut self, f: ActivityProvider) -> Self {
1949        self.activity_provider = Some(f);
1950        self
1951    }
1952
1953    /// v6.5.3 — register audit chain provider + verifier.
1954    #[must_use]
1955    pub const fn with_audit_providers(
1956        mut self,
1957        chain: AuditChainProvider,
1958        verify: AuditVerifier,
1959    ) -> Self {
1960        self.audit_chain_provider = Some(chain);
1961        self.audit_verifier = Some(verify);
1962        self
1963    }
1964
1965    /// v6.5.6 — register a slow-query log callback. `threshold_us`
1966    /// is the floor (in microseconds); only executes above the floor
1967    /// fire the callback. spg-server wires this from
1968    /// `SPG_SLOW_QUERY_THRESHOLD_MS` (default 100 ms).
1969    #[must_use]
1970    pub const fn with_slow_query_log(mut self, threshold_us: u64, logger: SlowQueryLogger) -> Self {
1971        self.slow_query_threshold_us = Some(threshold_us);
1972        self.slow_query_logger = Some(logger);
1973        self
1974    }
1975
1976    /// v6.5.6 — operator knob for plan cache cap. spg-server reads
1977    /// `SPG_PLAN_CACHE_MAX` env at startup; uses this to override
1978    /// the compile-time default of 256.
1979    pub fn set_plan_cache_max(&mut self, n: usize) {
1980        self.plan_cache.set_max_entries(n);
1981    }
1982
1983    /// v6.5.2 — materialise `spg_stat_activity` rows. Pulls a fresh
1984    /// snapshot from the registered `ActivityProvider`. Returns an
1985    /// empty result set when no provider is registered (the no_std
1986    /// embedded path with no pgwire layer).
1987    fn exec_spg_stat_activity(&self) -> QueryResult {
1988        let columns = alloc::vec![
1989            ColumnSchema::new("pid", DataType::Int, false),
1990            ColumnSchema::new("user", DataType::Text, false),
1991            ColumnSchema::new("started_at_us", DataType::BigInt, false),
1992            ColumnSchema::new("current_sql", DataType::Text, false),
1993            ColumnSchema::new("wait_event", DataType::Text, false),
1994            ColumnSchema::new("elapsed_us", DataType::BigInt, false),
1995            ColumnSchema::new("in_transaction", DataType::Bool, false),
1996        ];
1997        let rows: Vec<Row> = self
1998            .activity_provider
1999            .map(|f| f())
2000            .unwrap_or_default()
2001            .into_iter()
2002            .map(|r| {
2003                Row::new(alloc::vec![
2004                    Value::Int(i32::try_from(r.pid).unwrap_or(i32::MAX)),
2005                    Value::Text(r.user),
2006                    Value::BigInt(r.started_at_us),
2007                    Value::Text(r.current_sql),
2008                    Value::Text(r.wait_event),
2009                    Value::BigInt(r.elapsed_us),
2010                    Value::Bool(r.in_transaction),
2011                ])
2012            })
2013            .collect();
2014        QueryResult::Rows { columns, rows }
2015    }
2016
2017    /// v6.5.4 — materialise `spg_table_ddl` rows. One row per user
2018    /// table with `(table_name, ddl)`. Reconstructed from catalog
2019    /// state on demand.
2020    fn exec_spg_table_ddl(&self) -> QueryResult {
2021        let columns = alloc::vec![
2022            ColumnSchema::new("table_name", DataType::Text, false),
2023            ColumnSchema::new("ddl", DataType::Text, false),
2024        ];
2025        let rows: Vec<Row> = self
2026            .catalog
2027            .table_names()
2028            .into_iter()
2029            .filter(|n| !is_internal_table_name(n))
2030            .filter_map(|name| {
2031                let table = self.catalog.get(&name)?;
2032                let ddl = render_create_table(&name, &table.schema().columns);
2033                Some(Row::new(alloc::vec![Value::Text(name), Value::Text(ddl),]))
2034            })
2035            .collect();
2036        QueryResult::Rows { columns, rows }
2037    }
2038
2039    /// v6.5.4 — materialise `spg_role_ddl` rows. One row per user
2040    /// with `(role_name, ddl)`. Password is redacted (matches the
2041    /// `Statement::CreateUser` Display which prints `'<redacted>'`).
2042    fn exec_spg_role_ddl(&self) -> QueryResult {
2043        let columns = alloc::vec![
2044            ColumnSchema::new("role_name", DataType::Text, false),
2045            ColumnSchema::new("ddl", DataType::Text, false),
2046        ];
2047        let rows: Vec<Row> = self
2048            .users
2049            .iter()
2050            .map(|(name, rec)| {
2051                let ddl = alloc::format!(
2052                    "CREATE USER {name} WITH PASSWORD '<redacted>' ROLE '{}'",
2053                    rec.role.as_str(),
2054                );
2055                Row::new(alloc::vec![
2056                    Value::Text(String::from(name)),
2057                    Value::Text(ddl)
2058                ])
2059            })
2060            .collect();
2061        QueryResult::Rows { columns, rows }
2062    }
2063
2064    /// v6.5.4 — materialise `spg_database_ddl`: single row whose
2065    /// `ddl` column concatenates every user table's CREATE +
2066    /// every role's CREATE in deterministic catalog order. Suitable
2067    /// for piping back through `Engine::execute` to recreate a
2068    /// schema-equivalent database.
2069    fn exec_spg_database_ddl(&self) -> QueryResult {
2070        let columns = alloc::vec![ColumnSchema::new("ddl", DataType::Text, false)];
2071        let mut out = String::new();
2072        for (name, rec) in self.users.iter() {
2073            out.push_str(&alloc::format!(
2074                "CREATE USER {name} WITH PASSWORD '<redacted>' ROLE '{}';\n",
2075                rec.role.as_str(),
2076            ));
2077        }
2078        for name in self.catalog.table_names() {
2079            if is_internal_table_name(&name) {
2080                continue;
2081            }
2082            if let Some(table) = self.catalog.get(&name) {
2083                out.push_str(&render_create_table(&name, &table.schema().columns));
2084                out.push_str(";\n");
2085            }
2086        }
2087        QueryResult::Rows {
2088            columns,
2089            rows: alloc::vec![Row::new(alloc::vec![Value::Text(out)])],
2090        }
2091    }
2092
2093    /// v6.5.3 — materialise `spg_audit_chain` rows. Pulls a fresh
2094    /// snapshot from the registered provider; empty when no
2095    /// provider is set.
2096    fn exec_spg_audit_chain(&self) -> QueryResult {
2097        let columns = alloc::vec![
2098            ColumnSchema::new("seq", DataType::BigInt, false),
2099            ColumnSchema::new("ts_ms", DataType::BigInt, false),
2100            ColumnSchema::new("prev_hash", DataType::Text, false),
2101            ColumnSchema::new("entry_hash", DataType::Text, false),
2102            ColumnSchema::new("sql", DataType::Text, false),
2103        ];
2104        let rows: Vec<Row> = self
2105            .audit_chain_provider
2106            .map(|f| f())
2107            .unwrap_or_default()
2108            .into_iter()
2109            .map(|r| {
2110                Row::new(alloc::vec![
2111                    Value::BigInt(r.seq),
2112                    Value::BigInt(r.ts_ms),
2113                    Value::Text(r.prev_hash_hex),
2114                    Value::Text(r.entry_hash_hex),
2115                    Value::Text(r.sql),
2116                ])
2117            })
2118            .collect();
2119        QueryResult::Rows { columns, rows }
2120    }
2121
2122    /// v6.5.3 — materialise `spg_audit_verify` single-row result.
2123    /// `(verified_count, broken_at_seq)` — broken_at_seq is `-1`
2124    /// on a clean chain. Returns one row with both values 0 when
2125    /// no verifier is registered (no-data fallback for embedded
2126    /// callers).
2127    fn exec_spg_audit_verify(&self) -> QueryResult {
2128        let columns = alloc::vec![
2129            ColumnSchema::new("verified_count", DataType::BigInt, false),
2130            ColumnSchema::new("broken_at_seq", DataType::BigInt, false),
2131        ];
2132        let (verified, broken) = self.audit_verifier.map(|f| f()).unwrap_or((0, -1));
2133        let row = Row::new(alloc::vec![Value::BigInt(verified), Value::BigInt(broken),]);
2134        QueryResult::Rows {
2135            columns,
2136            rows: alloc::vec![row],
2137        }
2138    }
2139
2140    /// v6.5.1 — read-only accessor for tests + v6.5.6 ops resets.
2141    pub fn query_stats(&self) -> &query_stats::QueryStats {
2142        &self.query_stats
2143    }
2144
2145    /// v6.5.1 — mutable accessor (clear, etc).
2146    pub fn query_stats_mut(&mut self) -> &mut query_stats::QueryStats {
2147        &mut self.query_stats
2148    }
2149
2150    /// v6.2.0 — read access to the per-column statistics table.
2151    /// Used by the planner (v6.2.2 selectivity functions read this),
2152    /// by `SELECT * FROM spg_statistic`, and by e2e tests.
2153    pub const fn statistics(&self) -> &statistics::Statistics {
2154        &self.statistics
2155    }
2156
2157    /// v6.2.1 — return tables whose modified-row count crossed the
2158    /// auto-analyze threshold since the last ANALYZE on that table.
2159    /// The threshold is `0.1 × max(row_count, MIN_ROWS_FOR_AUTO_
2160    /// ANALYZE)` — combines PG-style fractional + absolute lower
2161    /// bound so a fresh / tiny table doesn't get hammered on every
2162    /// INSERT.
2163    ///
2164    /// Designed to be cheap: walks every user table's
2165    /// `Catalog::table_names()` + reads `statistics::modified_
2166    /// since_last_analyze()` (BTreeMap lookup). The background
2167    /// worker calls this under `engine.read()` then drops the lock
2168    /// before re-acquiring `engine.write()` for the actual ANALYZE.
2169    pub fn tables_needing_analyze(&self) -> Vec<String> {
2170        const MIN_ROWS: u64 = 100;
2171        let mut out = Vec::new();
2172        for name in self.catalog.table_names() {
2173            if is_internal_table_name(&name) {
2174                continue;
2175            }
2176            let Some(table) = self.catalog.get(&name) else {
2177                continue;
2178            };
2179            let row_count = table.rows().len() as u64;
2180            let modified = self.statistics.modified_since_last_analyze(&name);
2181            // Threshold: ceil(0.1 × max(row_count, MIN_ROWS)),
2182            // computed in integer arithmetic so spg-engine stays
2183            // no_std without pulling in libm. `(n + 9) / 10` is
2184            // `ceil(n / 10)` for non-negative `n`.
2185            let base = row_count.max(MIN_ROWS);
2186            let threshold = base.saturating_add(9) / 10;
2187            if modified >= threshold {
2188                out.push(name);
2189            }
2190        }
2191        out
2192    }
2193
2194    /// v6.2.0 — `ANALYZE [<table>]` runtime. Bare `ANALYZE` walks
2195    /// every user table; `ANALYZE <name>` re-stats one. For each
2196    /// target table, single-pass scan + per-column histogram +
2197    /// `null_frac` + `n_distinct`. Replaces the table's prior
2198    /// stats; resets the modified-row counter.
2199    ///
2200    /// v6.2.0 doesn't sample — it scans the full table. v6.2.x
2201    /// can add reservoir sampling at the > 100 K-row mark; not a
2202    /// scope blocker for the current commit since rows ≤ 100 K
2203    /// analyse in milliseconds.
2204    fn exec_analyze(&mut self, target: Option<&str>) -> Result<QueryResult, EngineError> {
2205        let names: Vec<String> = if let Some(name) = target {
2206            // Verify the table exists; surface a clear error if not.
2207            if self.catalog.get(name).is_none() {
2208                return Err(EngineError::Storage(StorageError::TableNotFound {
2209                    name: name.to_string(),
2210                }));
2211            }
2212            alloc::vec![name.to_string()]
2213        } else {
2214            self.catalog
2215                .table_names()
2216                .into_iter()
2217                .filter(|n| !is_internal_table_name(n))
2218                .collect()
2219        };
2220        let mut analysed = 0usize;
2221        for table_name in &names {
2222            self.analyze_one_table(table_name)?;
2223            analysed += 1;
2224        }
2225        // v6.3.1 — plan cache invalidation. Bump stats version so
2226        // future lookups see the new generation, and selectively
2227        // evict every plan whose `source_tables` overlap with the
2228        // ANALYZE target set. Bare ANALYZE (all tables) clears the
2229        // whole cache.
2230        if analysed > 0 {
2231            self.statistics.bump_version();
2232            if target.is_some() {
2233                for t in &names {
2234                    self.plan_cache.evict_referencing(t);
2235                }
2236            } else {
2237                self.plan_cache.clear();
2238            }
2239        }
2240        Ok(QueryResult::CommandOk {
2241            affected: analysed,
2242            modified_catalog: true,
2243        })
2244    }
2245
2246    /// v6.7.3 — `COMPACT COLD SEGMENTS` runtime path. Drives the
2247    /// engine-layer compaction shim with the default
2248    /// 4 MiB segment-size threshold. spg-server intercepts the
2249    /// SQL before it reaches the engine on a server build —
2250    /// it reads `SPG_COMPACTION_TARGET_SEGMENT_BYTES`, calls
2251    /// `Engine::compact_cold_segments_with_target` directly with
2252    /// the env value, and persists every merged segment to
2253    /// v7.12.1 — record a `SET <name> = <value>` parameter. Names
2254    /// are case-folded to lowercase to match PG; values keep their
2255    /// caller-supplied form so observability paths see what was
2256    /// requested. Only `default_text_search_config` is consulted by
2257    /// the engine today.
2258    fn set_session_param(&mut self, name: String, value: spg_sql::ast::SetValue) {
2259        let normalised = match value {
2260            spg_sql::ast::SetValue::String(s) => s,
2261            spg_sql::ast::SetValue::Ident(s) => s,
2262            spg_sql::ast::SetValue::Number(s) => s,
2263            spg_sql::ast::SetValue::Default => String::new(),
2264        };
2265        let key = name.to_ascii_lowercase();
2266        // v7.14.0 — mysqldump preamble emits
2267        // `SET FOREIGN_KEY_CHECKS=0` so it can CREATE TABLE in any
2268        // order despite cross-table FK references; the closing
2269        // section emits `SET FOREIGN_KEY_CHECKS=1` (or
2270        // `=@OLD_FOREIGN_KEY_CHECKS` which resolves to "ON" in our
2271        // session-variable-aware path). Match both shapes.
2272        // Also accept PG's `session_replication_role = 'replica'`
2273        // which suppresses trigger + FK enforcement during a
2274        // logical replication apply (pg_dump preserves this for
2275        // schema-only mode but it shows up in some restores).
2276        let value_off = matches!(
2277            normalised.to_ascii_lowercase().as_str(),
2278            "0" | "off" | "false"
2279        );
2280        let value_on = matches!(
2281            normalised.to_ascii_lowercase().as_str(),
2282            "1" | "on" | "true"
2283        );
2284        if key == "foreign_key_checks"
2285            || key == "session_replication_role" && normalised.eq_ignore_ascii_case("replica")
2286        {
2287            if value_off || key == "session_replication_role" {
2288                self.foreign_key_checks = false;
2289            } else if value_on
2290                || (key == "session_replication_role"
2291                    && normalised.eq_ignore_ascii_case("origin"))
2292            {
2293                self.foreign_key_checks = true;
2294                // Drain pending FK queue against the now-complete
2295                // catalog. Errors here surface as the SET reply —
2296                // caller knows enabling checks revealed orphans.
2297                let _ = self.drain_pending_foreign_keys();
2298            }
2299        }
2300        self.session_params.insert(key, normalised);
2301    }
2302
2303    /// v7.14.0 — resolve every queued FK whose installation was
2304    /// deferred (`SET FOREIGN_KEY_CHECKS=0` window). Called by
2305    /// `set_session_param` when checks flip back on and by the
2306    /// drop-import release gate. Each FK is resolved against the
2307    /// current catalog; remaining missing-parent errors propagate
2308    /// up so the caller knows the import was incomplete.
2309    fn drain_pending_foreign_keys(&mut self) -> Result<(), EngineError> {
2310        let pending = core::mem::take(&mut self.pending_foreign_keys);
2311        for (child, fk) in pending {
2312            // Resolve against the current catalog. Skip silently
2313            // when the child table itself was dropped between
2314            // queue + drain.
2315            let cols_snapshot = match self.active_catalog().get(&child) {
2316                Some(t) => t.schema().columns.clone(),
2317                None => continue,
2318            };
2319            let storage_fk = resolve_foreign_key(&child, &cols_snapshot, fk, self.active_catalog())?;
2320            let table = self
2321                .active_catalog_mut()
2322                .get_mut(&child)
2323                .expect("checked above");
2324            table.schema_mut().foreign_keys.push(storage_fk);
2325        }
2326        Ok(())
2327    }
2328
2329    /// v7.12.1 — read a session parameter set via `SET`. Used by
2330    /// the FTS function dispatcher to resolve the default config
2331    /// for `to_tsvector(text)` / `plainto_tsquery(text)` etc.
2332    #[must_use]
2333    pub fn session_param(&self, name: &str) -> Option<&str> {
2334        self.session_params
2335            .get(&name.to_ascii_lowercase())
2336            .map(String::as_str)
2337    }
2338
2339    /// v7.12.1 — build an `EvalContext` chained with the session's
2340    /// `default_text_search_config`. Engine-internal callers use
2341    /// this instead of `EvalContext::new` so the FTS function
2342    /// dispatcher sees the SET configuration.
2343    fn ev_ctx<'a>(
2344        &'a self,
2345        columns: &'a [ColumnSchema],
2346        alias: Option<&'a str>,
2347    ) -> EvalContext<'a> {
2348        EvalContext::new(columns, alias)
2349            .with_default_text_search_config(self.session_param("default_text_search_config"))
2350    }
2351
2352    /// `<db>.spg/segments/`. This arm only fires for engine-only
2353    /// callers (spg-embedded, lib tests); in that mode merged
2354    /// segments live in memory and are dropped at process exit.
2355    fn exec_compact_cold_segments(&mut self) -> Result<QueryResult, EngineError> {
2356        let target = COMPACTION_TARGET_DEFAULT_BYTES;
2357        let reports = self.compact_cold_segments_with_target(target)?;
2358        let columns = alloc::vec![
2359            ColumnSchema::new("table_name", DataType::Text, false),
2360            ColumnSchema::new("index_name", DataType::Text, false),
2361            ColumnSchema::new("sources_merged", DataType::BigInt, false),
2362            ColumnSchema::new("merged_segment_id", DataType::BigInt, false),
2363            ColumnSchema::new("merged_rows", DataType::BigInt, false),
2364            ColumnSchema::new("deleted_rows_pruned", DataType::BigInt, false),
2365            ColumnSchema::new("bytes_reclaimed_estimate", DataType::BigInt, false),
2366        ];
2367        let rows: Vec<Row> = reports
2368            .into_iter()
2369            .map(|(tname, iname, report)| {
2370                Row::new(alloc::vec![
2371                    Value::Text(tname),
2372                    Value::Text(iname),
2373                    Value::BigInt(i64::try_from(report.sources.len()).unwrap_or(i64::MAX)),
2374                    Value::BigInt(i64::from(report.merged_segment_id.unwrap_or(0))),
2375                    Value::BigInt(i64::try_from(report.merged_rows).unwrap_or(i64::MAX)),
2376                    Value::BigInt(i64::try_from(report.deleted_rows_pruned).unwrap_or(i64::MAX),),
2377                    Value::BigInt(
2378                        i64::try_from(report.bytes_reclaimed_estimate).unwrap_or(i64::MAX),
2379                    ),
2380                ])
2381            })
2382            .collect();
2383        Ok(QueryResult::Rows { columns, rows })
2384    }
2385
2386    /// Walk a single table's rows once and (re-)populate per-column
2387    /// stats. Drops the existing stats for `table` first so columns
2388    /// that have been DROP-ed between ANALYZEs don't leave stale
2389    /// rows.
2390    fn analyze_one_table(&mut self, table_name: &str) -> Result<(), EngineError> {
2391        let table = self.catalog.get(table_name).ok_or_else(|| {
2392            EngineError::Storage(StorageError::TableNotFound {
2393                name: table_name.to_string(),
2394            })
2395        })?;
2396        let schema = table.schema().clone();
2397        let row_count = table.rows().len();
2398        // For each column, collect (sorted) non-NULL textual values
2399        // + count NULLs; then ask `statistics::build_histogram` to
2400        // produce the 101 bounds and `estimate_n_distinct` the
2401        // distinct count.
2402        self.statistics.clear_table(table_name);
2403        for (col_pos, col_schema) in schema.columns.iter().enumerate() {
2404            // v6.2.0 skip: vector columns have their own stats
2405            // shape (HNSW graph topology). v6.2 deliberation #1.
2406            if matches!(col_schema.ty, DataType::Vector { .. }) {
2407                continue;
2408            }
2409            let mut non_null_values: Vec<Value> = Vec::with_capacity(row_count);
2410            let mut nulls: u64 = 0;
2411            for row in table.rows() {
2412                match row.values.get(col_pos) {
2413                    Some(Value::Null) | None => nulls += 1,
2414                    Some(v) => non_null_values.push(v.clone()),
2415                }
2416            }
2417            // Sort by type-aware ordering (Int as int, Text as
2418            // lex, etc.) so histogram bounds reflect the column's
2419            // natural order — not lexicographic on the string
2420            // representation, which would put "9" after "49".
2421            non_null_values.sort_by(|a, b| sort_values_for_histogram(a, b));
2422            let non_null: Vec<String> = non_null_values.iter().map(canonical_value_repr).collect();
2423            let null_frac = if row_count == 0 {
2424                0.0
2425            } else {
2426                #[allow(clippy::cast_precision_loss)]
2427                let f = nulls as f32 / row_count as f32;
2428                f
2429            };
2430            let n_distinct = statistics::estimate_n_distinct(&non_null);
2431            let histogram_bounds = statistics::build_histogram(&non_null);
2432            self.statistics.set(
2433                table_name.to_string(),
2434                col_schema.name.clone(),
2435                statistics::ColumnStats {
2436                    null_frac,
2437                    n_distinct,
2438                    histogram_bounds,
2439                },
2440            );
2441        }
2442        self.statistics.reset_modified(table_name);
2443        // v6.7.0 — refresh the per-table cold_rows cache. Walk the
2444        // BTree indices and count Cold locators (MAX across
2445        // indices); store the result on the table. Surfaced via
2446        // `spg_statistic.cold_row_count` (new column) and
2447        // `spg_stat_segment.table_name` (new column).
2448        let cold_count = {
2449            let table = self
2450                .active_catalog()
2451                .get(table_name)
2452                .expect("table still present");
2453            table.count_cold_locators()
2454        };
2455        let table_mut = self
2456            .active_catalog_mut()
2457            .get_mut(table_name)
2458            .expect("table still present");
2459        table_mut.set_cold_row_count(cold_count);
2460        Ok(())
2461    }
2462
2463    /// v6.1.3 — `SHOW PUBLICATIONS` row materialisation. Returns
2464    /// `(name, scope, table_count)` ordered by publication name.
2465    ///   - `scope` is the human-readable string:
2466    ///       `"FOR ALL TABLES"` /
2467    ///       `"FOR TABLE t1, t2"` /
2468    ///       `"FOR ALL TABLES EXCEPT t1, t2"`.
2469    ///   - `table_count` is NULL for `AllTables`, the list length
2470    ///     otherwise. NULLability lets clients distinguish "publish
2471    ///     everything" from "publish exactly 0 tables" (the v6.1.3
2472    ///     parser forbids the empty list, but the column shape is
2473    ///     ready for the v6.1.5 publisher-side semantics).
2474    fn exec_show_publications(&self) -> QueryResult {
2475        let columns = alloc::vec![
2476            ColumnSchema::new("name", DataType::Text, false),
2477            ColumnSchema::new("scope", DataType::Text, false),
2478            ColumnSchema::new("table_count", DataType::Int, true),
2479        ];
2480        let rows: Vec<Row> = self
2481            .publications
2482            .iter()
2483            .map(|(name, scope)| {
2484                let (scope_str, count_val) = match scope {
2485                    spg_sql::ast::PublicationScope::AllTables => {
2486                        ("FOR ALL TABLES".to_string(), Value::Null)
2487                    }
2488                    spg_sql::ast::PublicationScope::ForTables(ts) => (
2489                        alloc::format!("FOR TABLE {}", ts.join(", ")),
2490                        Value::Int(i32::try_from(ts.len()).unwrap_or(i32::MAX)),
2491                    ),
2492                    spg_sql::ast::PublicationScope::AllTablesExcept(ts) => (
2493                        alloc::format!("FOR ALL TABLES EXCEPT {}", ts.join(", ")),
2494                        Value::Int(i32::try_from(ts.len()).unwrap_or(i32::MAX)),
2495                    ),
2496                };
2497                Row::new(alloc::vec![
2498                    Value::Text(name.clone()),
2499                    Value::Text(scope_str),
2500                    count_val,
2501                ])
2502            })
2503            .collect();
2504        QueryResult::Rows { columns, rows }
2505    }
2506
2507    /// v4.1 `SHOW USERS` — `(name, role)` per row, ordered by name.
2508    fn exec_show_users(&self) -> QueryResult {
2509        let columns = alloc::vec![
2510            ColumnSchema::new("name", DataType::Text, false),
2511            ColumnSchema::new("role", DataType::Text, false),
2512        ];
2513        let rows: Vec<Row> = self
2514            .users
2515            .iter()
2516            .map(|(name, rec)| {
2517                Row::new(alloc::vec![
2518                    Value::Text(name.to_string()),
2519                    Value::Text(rec.role.as_str().to_string()),
2520                ])
2521            })
2522            .collect();
2523        QueryResult::Rows { columns, rows }
2524    }
2525
2526    fn exec_create_user(&mut self, s: &CreateUserStatement) -> Result<QueryResult, EngineError> {
2527        if self.in_transaction() {
2528            return Err(EngineError::Unsupported(
2529                "CREATE USER is not allowed inside a transaction".into(),
2530            ));
2531        }
2532        let role = users::Role::parse(&s.role).ok_or_else(|| {
2533            EngineError::Unsupported(alloc::format!("invalid role: {:?}", s.role))
2534        })?;
2535        // Prefer the host-injected RNG. Falls back to a deterministic
2536        // salt derived from the username only when no RNG is wired —
2537        // acceptable for tests; the server always installs one.
2538        let salt = self.salt_fn.map_or_else(
2539            || {
2540                let mut s_bytes = [0u8; 16];
2541                let digest = spg_crypto::hash(s.name.as_bytes());
2542                s_bytes.copy_from_slice(&digest[..16]);
2543                s_bytes
2544            },
2545            |f| f(),
2546        );
2547        self.users
2548            .create(&s.name, &s.password, role, salt)
2549            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE USER: {e}")))?;
2550        Ok(QueryResult::CommandOk {
2551            affected: 1,
2552            modified_catalog: true,
2553        })
2554    }
2555
2556    fn exec_drop_user(&mut self, name: &str) -> Result<QueryResult, EngineError> {
2557        if self.in_transaction() {
2558            return Err(EngineError::Unsupported(
2559                "DROP USER is not allowed inside a transaction".into(),
2560            ));
2561        }
2562        self.users
2563            .drop(name)
2564            .map_err(|e| EngineError::Unsupported(alloc::format!("DROP USER: {e}")))?;
2565        Ok(QueryResult::CommandOk {
2566            affected: 1,
2567            modified_catalog: true,
2568        })
2569    }
2570
2571    /// v7.12.4 — `CREATE [OR REPLACE] FUNCTION`. Stores the
2572    /// function metadata in the catalog. PL/pgSQL bodies are
2573    /// already parsed by the SQL parser; we re-canonicalise the
2574    /// body to source text for storage (the executor re-parses
2575    /// it at trigger fire time — see the trigger fire path).
2576    fn exec_create_function(
2577        &mut self,
2578        s: spg_sql::ast::CreateFunctionStatement,
2579    ) -> Result<QueryResult, EngineError> {
2580        let args_repr = render_function_args(&s.args);
2581        let returns = match &s.returns {
2582            spg_sql::ast::FunctionReturn::Trigger => alloc::string::String::from("TRIGGER"),
2583            spg_sql::ast::FunctionReturn::Void => alloc::string::String::from("VOID"),
2584            spg_sql::ast::FunctionReturn::Type(t) => alloc::format!("{t}"),
2585            spg_sql::ast::FunctionReturn::Other(s) => s.clone(),
2586        };
2587        let body_text = match &s.body {
2588            spg_sql::ast::FunctionBody::PlPgSql(b) => alloc::format!("{b}"),
2589            spg_sql::ast::FunctionBody::Raw(s) => s.clone(),
2590        };
2591        let def = spg_storage::FunctionDef {
2592            name: s.name.clone(),
2593            args_repr,
2594            returns,
2595            language: s.language.clone(),
2596            body: body_text,
2597        };
2598        self.active_catalog_mut()
2599            .create_function(def, s.or_replace)
2600            .map_err(EngineError::Storage)?;
2601        Ok(QueryResult::CommandOk {
2602            affected: 0,
2603            modified_catalog: true,
2604        })
2605    }
2606
2607    /// v7.12.4 — `CREATE [OR REPLACE] TRIGGER`. The referenced
2608    /// function must already exist in the catalog (forward
2609    /// references defer to a later release). Persists the
2610    /// trigger metadata for the row-write hooks below to consult.
2611    fn exec_create_trigger(
2612        &mut self,
2613        s: spg_sql::ast::CreateTriggerStatement,
2614    ) -> Result<QueryResult, EngineError> {
2615        let timing = match s.timing {
2616            spg_sql::ast::TriggerTiming::Before => "BEFORE",
2617            spg_sql::ast::TriggerTiming::After => "AFTER",
2618            spg_sql::ast::TriggerTiming::InsteadOf => "INSTEAD OF",
2619        };
2620        let events: Vec<alloc::string::String> = s
2621            .events
2622            .iter()
2623            .map(|e| match e {
2624                spg_sql::ast::TriggerEvent::Insert => alloc::string::String::from("INSERT"),
2625                spg_sql::ast::TriggerEvent::Update => alloc::string::String::from("UPDATE"),
2626                spg_sql::ast::TriggerEvent::Delete => alloc::string::String::from("DELETE"),
2627                spg_sql::ast::TriggerEvent::Truncate => alloc::string::String::from("TRUNCATE"),
2628            })
2629            .collect();
2630        let for_each = match s.for_each {
2631            spg_sql::ast::TriggerForEach::Row => "ROW",
2632            spg_sql::ast::TriggerForEach::Statement => "STATEMENT",
2633        };
2634        let def = spg_storage::TriggerDef {
2635            name: s.name.clone(),
2636            table: s.table.clone(),
2637            timing: alloc::string::String::from(timing),
2638            events,
2639            for_each: alloc::string::String::from(for_each),
2640            function: s.function.clone(),
2641            update_columns: s.update_columns.clone(),
2642        };
2643        self.active_catalog_mut()
2644            .create_trigger(def, s.or_replace)
2645            .map_err(EngineError::Storage)?;
2646        Ok(QueryResult::CommandOk {
2647            affected: 0,
2648            modified_catalog: true,
2649        })
2650    }
2651
2652    fn exec_drop_trigger(
2653        &mut self,
2654        name: &str,
2655        table: &str,
2656        if_exists: bool,
2657    ) -> Result<QueryResult, EngineError> {
2658        let removed = self.active_catalog_mut().drop_trigger(name, table);
2659        if !removed && !if_exists {
2660            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
2661                alloc::format!("trigger {name:?} on {table:?} does not exist"),
2662            )));
2663        }
2664        Ok(QueryResult::CommandOk {
2665            affected: usize::from(removed),
2666            modified_catalog: removed,
2667        })
2668    }
2669
2670    fn exec_drop_function(
2671        &mut self,
2672        name: &str,
2673        if_exists: bool,
2674    ) -> Result<QueryResult, EngineError> {
2675        let removed = self.active_catalog_mut().drop_function(name);
2676        if !removed && !if_exists {
2677            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
2678                alloc::format!("function {name:?} does not exist"),
2679            )));
2680        }
2681        Ok(QueryResult::CommandOk {
2682            affected: usize::from(removed),
2683            modified_catalog: removed,
2684        })
2685    }
2686
2687    /// v4.4 `UPDATE <table> SET col = expr [, ...] [WHERE cond]`.
2688    /// Filter pass uses the same WHERE eval as `exec_select`. Per
2689    /// matched row, evaluate each RHS expression against the *old*
2690    /// row, then call `Table::update_row` which rebuilds indices.
2691    /// Indexed columns are correctly reflected because rebuild
2692    /// happens after the cell rewrite.
2693    fn exec_update_cancel(
2694        &mut self,
2695        stmt: &spg_sql::ast::UpdateStatement,
2696        cancel: CancelToken<'_>,
2697    ) -> Result<QueryResult, EngineError> {
2698        // v7.12.5 — snapshot BEFORE/AFTER UPDATE row triggers + the
2699        // session FTS config before the table mut-borrow opens (the
2700        // INSERT path uses the same pattern). Empty vecs are the
2701        // common "no triggers on this table" fast path.
2702        // v7.13.0 — UPDATE triggers carry an optional `UPDATE OF
2703        // cols` filter. The filter is paired with each function so
2704        // the per-row fire loop can skip when no listed column
2705        // actually differs between OLD and NEW.
2706        let before_update_triggers = self.snapshot_update_row_triggers(&stmt.table, "BEFORE");
2707        let after_update_triggers = self.snapshot_update_row_triggers(&stmt.table, "AFTER");
2708        let trigger_session_cfg: Option<String> = self
2709            .session_params
2710            .get("default_text_search_config")
2711            .cloned();
2712        // v5.2.3: if the WHERE is a PK equality and matches a cold-
2713        // tier row, promote it back to the hot tier *before* the
2714        // hot-row walk. The promote pushes the row to the end of
2715        // `table.rows`, where the upcoming SET-evaluation loop will
2716        // pick it up and apply the assignments. Lookups for the key
2717        // never observe a gap because `promote_cold_row` inserts the
2718        // hot row before retiring the cold locator.
2719        if let Some(w) = &stmt.where_ {
2720            let schema_cols = self
2721                .active_catalog()
2722                .get(&stmt.table)
2723                .ok_or_else(|| {
2724                    EngineError::Storage(StorageError::TableNotFound {
2725                        name: stmt.table.clone(),
2726                    })
2727                })?
2728                .schema()
2729                .columns
2730                .clone();
2731            if let Some((col_pos, key)) = try_pk_predicate(w, &schema_cols, stmt.table.as_str())
2732                && let Some(idx_name) = self
2733                    .active_catalog()
2734                    .get(&stmt.table)
2735                    .and_then(|t| t.index_on(col_pos).map(|i| i.name.clone()))
2736            {
2737                // Promote may be a no-op (key is hot-only or absent);
2738                // we don't care about the return value here — the
2739                // subsequent hot walk will either match or not.
2740                let _ = self
2741                    .active_catalog_mut()
2742                    .promote_cold_row(&stmt.table, &idx_name, &key);
2743            }
2744        }
2745
2746        // v7.12.1 — cache session FTS config before the table
2747        // mut-borrow (same reason as exec_delete).
2748        let ts_cfg: Option<String> = self
2749            .session_param("default_text_search_config")
2750            .map(String::from);
2751        let table = self
2752            .active_catalog_mut()
2753            .get_mut(&stmt.table)
2754            .ok_or_else(|| {
2755                EngineError::Storage(StorageError::TableNotFound {
2756                    name: stmt.table.clone(),
2757                })
2758            })?;
2759        let schema_cols: Vec<ColumnSchema> = table.schema().columns.clone();
2760        // Resolve each SET target to a column position once, validate
2761        // up front so a typo'd column doesn't leave a partial mutation
2762        // behind.
2763        let mut targets: Vec<(usize, &Expr)> = Vec::with_capacity(stmt.assignments.len());
2764        for (col, expr) in &stmt.assignments {
2765            let pos = schema_cols
2766                .iter()
2767                .position(|c| c.name == *col)
2768                .ok_or_else(|| {
2769                    EngineError::Eval(EvalError::ColumnNotFound { name: col.clone() })
2770                })?;
2771            targets.push((pos, expr));
2772        }
2773        let ctx = EvalContext::new(&schema_cols, Some(stmt.table.as_str()))
2774            .with_default_text_search_config(ts_cfg.as_deref());
2775        // Walk every row, evaluate WHERE then SET expressions. We
2776        // gather (position, new_values) tuples first and apply them
2777        // afterwards so the WHERE/RHS evaluation reads the original
2778        // row state — matches PG semantics (UPDATE doesn't see its
2779        // own writes).
2780        let mut planned: Vec<(usize, Vec<Value>)> = Vec::new();
2781        for (i, row) in table.rows().iter().enumerate() {
2782            // v4.5: cooperative cancel checkpoint every 256 rows so
2783            // a runaway UPDATE without WHERE doesn't drag past the
2784            // server's query-timeout watchdog.
2785            if i.is_multiple_of(256) {
2786                cancel.check()?;
2787            }
2788            if let Some(w) = &stmt.where_ {
2789                let cond = eval::eval_expr(w, row, &ctx)?;
2790                if !matches!(cond, Value::Bool(true)) {
2791                    continue;
2792                }
2793            }
2794            let mut new_vals = row.values.clone();
2795            for (pos, expr) in &targets {
2796                let v = eval::eval_expr(expr, row, &ctx)?;
2797                new_vals[*pos] =
2798                    coerce_value(v, schema_cols[*pos].ty, &schema_cols[*pos].name, *pos)?;
2799            }
2800            planned.push((i, new_vals));
2801        }
2802        // v7.6.6 — capture pre-update row values for the FK
2803        // enforcement passes below. `planned` carries new values
2804        // only; pair them with the old row.
2805        let plan_with_old: Vec<(usize, Vec<Value>, Vec<Value>)> = planned
2806            .iter()
2807            .map(|(pos, new_vals)| (*pos, table.rows()[*pos].values.clone(), new_vals.clone()))
2808            .collect();
2809        let self_fks = table.schema().foreign_keys.clone();
2810        // v7.12.5 — `affected` is computed post-BEFORE-trigger
2811        // below (triggers may RETURN NULL to skip individual
2812        // rows). The pre-trigger len shape is no longer accurate.
2813        // Release mutable borrow on `table` for the FK passes.
2814        let _ = table;
2815        // v7.6.6 — Stage 2a: outbound FK check. For every row whose
2816        // local FK columns changed, the new value must exist in the
2817        // parent.
2818        if !self_fks.is_empty() {
2819            let new_rows: Vec<Vec<Value>> = planned
2820                .iter()
2821                .map(|(_pos, new_vals)| new_vals.clone())
2822                .collect();
2823            enforce_fk_inserts(self.active_catalog(), &stmt.table, &self_fks, &new_rows)?;
2824        }
2825        // v7.13.0 — CHECK constraint enforcement on UPDATE
2826        // (mailrs round-5 G3). Predicates evaluated against the
2827        // candidate post-UPDATE row; false rejects the UPDATE.
2828        {
2829            let new_rows: Vec<Vec<Value>> = planned
2830                .iter()
2831                .map(|(_pos, new_vals)| new_vals.clone())
2832                .collect();
2833            enforce_check_constraints(self.active_catalog(), &stmt.table, &new_rows)?;
2834        }
2835        // v7.6.6 — Stage 2b: inbound FK check. For every row that
2836        // changed value in a column that *some other table* uses as
2837        // a FK parent column, react per `on_update` action.
2838        let child_plan =
2839            plan_fk_parent_updates(self.active_catalog(), &stmt.table, &plan_with_old)?;
2840        // Stage 3a — apply each child-side action.
2841        for step in &child_plan {
2842            apply_fk_child_step(self.active_catalog_mut(), step)?;
2843        }
2844        // Stage 3b — apply the original UPDATE.
2845        let table = self
2846            .active_catalog_mut()
2847            .get_mut(&stmt.table)
2848            .ok_or_else(|| {
2849                EngineError::Storage(StorageError::TableNotFound {
2850                    name: stmt.table.clone(),
2851                })
2852            })?;
2853        // v7.12.5 — fire BEFORE/AFTER UPDATE row-level triggers
2854        // around the apply loop. BEFORE sees NEW=candidate +
2855        // OLD=current; may rewrite NEW or RETURN NULL to skip.
2856        // AFTER sees NEW=post-write + OLD=pre-write (both read-
2857        // only).
2858        //
2859        // Filter `planned` through the BEFORE pass first so the
2860        // RETURNING snapshot reflects what actually got written
2861        // (triggers may rewrite cells, including a cancellation).
2862        let mut applied_after_before: Vec<(usize, Row, Row)> = Vec::with_capacity(planned.len());
2863        // v7.12.7 — embedded SQL queue.
2864        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
2865        for (pos, new_vals) in &planned {
2866            let old_row = table.rows()[*pos].clone();
2867            let mut new_row = Row::new(new_vals.clone());
2868            let mut skip = false;
2869            for (fd, filter) in &before_update_triggers {
2870                // v7.13.0 — `UPDATE OF cols` filter (mailrs round-5
2871                // G7). Skip this trigger when the filter is set and
2872                // no listed column actually differs between OLD and
2873                // NEW for this row.
2874                if !filter.is_empty()
2875                    && !any_column_changed(filter, &schema_cols, &old_row, &new_row)
2876                {
2877                    continue;
2878                }
2879                let (outcome, deferred) = triggers::fire_row_trigger(
2880                    fd,
2881                    Some(new_row.clone()),
2882                    Some(&old_row),
2883                    &stmt.table,
2884                    &schema_cols,
2885                    &[],
2886                    trigger_session_cfg.as_deref(),
2887                    false,
2888                )
2889                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
2890                deferred_embedded.extend(deferred);
2891                match outcome {
2892                    triggers::TriggerOutcome::Row(r) => new_row = r,
2893                    triggers::TriggerOutcome::Skip => {
2894                        skip = true;
2895                        break;
2896                    }
2897                }
2898            }
2899            if !skip {
2900                applied_after_before.push((*pos, new_row, old_row));
2901            }
2902        }
2903        // v7.9.4 — snapshot post-update values for RETURNING (post-
2904        // BEFORE-trigger because triggers can rewrite cells).
2905        let updated_for_returning: Vec<Vec<Value>> = if stmt.returning.is_some() {
2906            applied_after_before
2907                .iter()
2908                .map(|(_pos, new_row, _old)| new_row.values.clone())
2909                .collect()
2910        } else {
2911            Vec::new()
2912        };
2913        let affected = applied_after_before.len();
2914        // Apply, then fire AFTER triggers per row. AFTER runs read-
2915        // only against the freshly-written row; v7.12.4-shape
2916        // assignment errors with a clear message.
2917        for (pos, new_row, old_row) in applied_after_before {
2918            table.update_row(pos, new_row.values.clone())?;
2919            for (fd, filter) in &after_update_triggers {
2920                if !filter.is_empty()
2921                    && !any_column_changed(filter, &schema_cols, &old_row, &new_row)
2922                {
2923                    continue;
2924                }
2925                let (_outcome, deferred) = triggers::fire_row_trigger(
2926                    fd,
2927                    Some(new_row.clone()),
2928                    Some(&old_row),
2929                    &stmt.table,
2930                    &schema_cols,
2931                    &[],
2932                    trigger_session_cfg.as_deref(),
2933                    true,
2934                )
2935                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
2936                deferred_embedded.extend(deferred);
2937            }
2938        }
2939        let _ = table;
2940        // v7.12.7 — drain trigger-emitted embedded SQL for this UPDATE.
2941        self.execute_deferred_trigger_stmts(deferred_embedded, cancel)?;
2942        // v6.2.1 — auto-analyze modified-row tracking for UPDATE.
2943        if !self.in_transaction() && affected > 0 {
2944            self.statistics
2945                .record_modifications(&stmt.table, affected as u64);
2946        }
2947        // v7.9.4 — RETURNING projection.
2948        if let Some(items) = &stmt.returning {
2949            return self.build_returning_rows(&stmt.table, items, updated_for_returning);
2950        }
2951        Ok(QueryResult::CommandOk {
2952            affected,
2953            modified_catalog: !self.in_transaction(),
2954        })
2955    }
2956
2957    /// v4.4 `DELETE FROM <table> [WHERE cond]`. Collects matching
2958    /// positions then delegates to `Table::delete_rows` (single index
2959    /// rebuild for the batch).
2960    fn exec_delete_cancel(
2961        &mut self,
2962        stmt: &spg_sql::ast::DeleteStatement,
2963        cancel: CancelToken<'_>,
2964    ) -> Result<QueryResult, EngineError> {
2965        // v7.12.5 — snapshot BEFORE/AFTER DELETE row triggers + the
2966        // session FTS config before the mut borrow (same shape as
2967        // INSERT / UPDATE).
2968        let before_delete_triggers = self.snapshot_row_triggers(&stmt.table, "DELETE", "BEFORE");
2969        let after_delete_triggers = self.snapshot_row_triggers(&stmt.table, "DELETE", "AFTER");
2970        let trigger_session_cfg: Option<String> = self
2971            .session_params
2972            .get("default_text_search_config")
2973            .cloned();
2974        // v5.2.3: PK-targeted DELETE → first retire any cold-tier
2975        // locator for the key. The cold row body stays in the
2976        // segment (becoming shadowed garbage that a future
2977        // compaction pass reclaims) but the index no longer
2978        // resolves it. The shadow count contributes to the
2979        // affected total; the subsequent hot walk handles any hot
2980        // rows for the same key.
2981        let mut cold_shadow_count: usize = 0;
2982        if let Some(w) = &stmt.where_ {
2983            let schema_cols = self
2984                .active_catalog()
2985                .get(&stmt.table)
2986                .ok_or_else(|| {
2987                    EngineError::Storage(StorageError::TableNotFound {
2988                        name: stmt.table.clone(),
2989                    })
2990                })?
2991                .schema()
2992                .columns
2993                .clone();
2994            if let Some((col_pos, key)) = try_pk_predicate(w, &schema_cols, stmt.table.as_str())
2995                && let Some(idx_name) = self
2996                    .active_catalog()
2997                    .get(&stmt.table)
2998                    .and_then(|t| t.index_on(col_pos).map(|i| i.name.clone()))
2999            {
3000                cold_shadow_count = self
3001                    .active_catalog_mut()
3002                    .shadow_cold_row(&stmt.table, &idx_name, &key)
3003                    .unwrap_or(0);
3004            }
3005        }
3006
3007        // v7.12.1 — cache the session FTS config as an owned
3008        // String before the mutable table borrow below; the
3009        // ctx-builder then references it via `as_deref` so the
3010        // immutable read of `session_params` doesn't conflict
3011        // with the mut borrow chain.
3012        let ts_cfg: Option<String> = self
3013            .session_param("default_text_search_config")
3014            .map(String::from);
3015        let table = self
3016            .active_catalog_mut()
3017            .get_mut(&stmt.table)
3018            .ok_or_else(|| {
3019                EngineError::Storage(StorageError::TableNotFound {
3020                    name: stmt.table.clone(),
3021                })
3022            })?;
3023        let schema_cols: Vec<ColumnSchema> = table.schema().columns.clone();
3024        let ctx = EvalContext::new(&schema_cols, Some(stmt.table.as_str()))
3025            .with_default_text_search_config(ts_cfg.as_deref());
3026        let mut positions: Vec<usize> = Vec::new();
3027        // v7.6.3 — collect every to-delete row's full Value tuple
3028        // alongside its position, so the FK enforcement pass can
3029        // run after the mut borrow drops.
3030        let mut to_delete_rows: Vec<Vec<Value>> = Vec::new();
3031        for (i, row) in table.rows().iter().enumerate() {
3032            if i.is_multiple_of(256) {
3033                cancel.check()?;
3034            }
3035            let keep = if let Some(w) = &stmt.where_ {
3036                let cond = eval::eval_expr(w, row, &ctx)?;
3037                !matches!(cond, Value::Bool(true))
3038            } else {
3039                false
3040            };
3041            if !keep {
3042                positions.push(i);
3043                to_delete_rows.push(row.values.clone());
3044            }
3045        }
3046        // v7.6.3 / v7.6.4 — Stage 2: FK enforcement on the immutable
3047        // catalog. Release the mut borrow and run reverse-scan
3048        // against every child table whose FK targets this table.
3049        // RESTRICT / NoAction raise an error; CASCADE returns a
3050        // cascade plan that stage 3 applies after the primary delete.
3051        // SET NULL / SET DEFAULT remain Unsupported until v7.6.5.
3052        let _ = table;
3053        // v7.12.5 — BEFORE DELETE row-level triggers. Each fires
3054        // with NEW=None / OLD=pre-delete row; RETURN OLD (or NEW)
3055        // = proceed, RETURN NULL = skip the row entirely. The
3056        // filter must run BEFORE the FK cascade plan so cascaded
3057        // child rows track the trigger's skip-decision on the
3058        // parent.
3059        // v7.12.7 — embedded SQL queue.
3060        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
3061        if !before_delete_triggers.is_empty() {
3062            let mut filtered_positions: Vec<usize> = Vec::with_capacity(positions.len());
3063            let mut filtered_old_rows: Vec<Vec<Value>> = Vec::with_capacity(to_delete_rows.len());
3064            for (pos, old_vals) in positions.iter().zip(to_delete_rows.iter()) {
3065                let old_row = Row::new(old_vals.clone());
3066                let mut cancel_this = false;
3067                for fd in &before_delete_triggers {
3068                    let (outcome, deferred) = triggers::fire_row_trigger(
3069                        fd,
3070                        None,
3071                        Some(&old_row),
3072                        &stmt.table,
3073                        &schema_cols,
3074                        &[],
3075                        trigger_session_cfg.as_deref(),
3076                        false,
3077                    )
3078                    .map_err(|e| {
3079                        EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}")))
3080                    })?;
3081                    deferred_embedded.extend(deferred);
3082                    if matches!(outcome, triggers::TriggerOutcome::Skip) {
3083                        cancel_this = true;
3084                        break;
3085                    }
3086                }
3087                if !cancel_this {
3088                    filtered_positions.push(*pos);
3089                    filtered_old_rows.push(old_vals.clone());
3090                }
3091            }
3092            positions = filtered_positions;
3093            to_delete_rows = filtered_old_rows;
3094        }
3095        let cascade_plan = plan_fk_parent_deletions(
3096            self.active_catalog(),
3097            &stmt.table,
3098            &positions,
3099            &to_delete_rows,
3100        )?;
3101        // Stage 3a — apply each FK child step (SET NULL / SET
3102        // DEFAULT / CASCADE delete) before deleting the parent.
3103        // The plan is already ordered: nulls/defaults first, then
3104        // cascade deletes (so a row mutated and later deleted
3105        // surfaces as deleted — though v7.6.5 doesn't produce
3106        // that overlap today).
3107        for step in &cascade_plan {
3108            apply_fk_child_step(self.active_catalog_mut(), step)?;
3109        }
3110        // Stage 3b — actually delete the original target rows.
3111        let table = self
3112            .active_catalog_mut()
3113            .get_mut(&stmt.table)
3114            .ok_or_else(|| {
3115                EngineError::Storage(StorageError::TableNotFound {
3116                    name: stmt.table.clone(),
3117                })
3118            })?;
3119        let affected = table.delete_rows(&positions) + cold_shadow_count;
3120        let _ = table;
3121        // v7.12.5 — AFTER DELETE row-level triggers fire post-write
3122        // with NEW=None / OLD=pre-delete row (each from the
3123        // already-snapshotted to_delete_rows). Return value is
3124        // ignored (matches PG AFTER semantics).
3125        if !after_delete_triggers.is_empty() {
3126            for old_vals in &to_delete_rows {
3127                let old_row = Row::new(old_vals.clone());
3128                for fd in &after_delete_triggers {
3129                    let (_outcome, deferred) = triggers::fire_row_trigger(
3130                        fd,
3131                        None,
3132                        Some(&old_row),
3133                        &stmt.table,
3134                        &schema_cols,
3135                        &[],
3136                        trigger_session_cfg.as_deref(),
3137                        true,
3138                    )
3139                    .map_err(|e| {
3140                        EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}")))
3141                    })?;
3142                    deferred_embedded.extend(deferred);
3143                }
3144            }
3145        }
3146        // v7.12.7 — drain trigger-emitted embedded SQL for this DELETE.
3147        self.execute_deferred_trigger_stmts(deferred_embedded, cancel)?;
3148        // v6.2.1 — auto-analyze modified-row tracking for DELETE.
3149        if !self.in_transaction() && affected > 0 {
3150            self.statistics
3151                .record_modifications(&stmt.table, affected as u64);
3152        }
3153        // v7.9.4 — RETURNING projection over the soon-to-be-gone
3154        // rows. `to_delete_rows` was snapshotted in stage 1 before
3155        // mutation, so the projection sees the pre-delete state
3156        // (matches PG semantics: DELETE RETURNING returns the row
3157        // as it was just before removal).
3158        if let Some(items) = &stmt.returning {
3159            return self.build_returning_rows(&stmt.table, items, to_delete_rows);
3160        }
3161        Ok(QueryResult::CommandOk {
3162            affected,
3163            modified_catalog: !self.in_transaction(),
3164        })
3165    }
3166
3167    /// `SHOW TABLES` — one row per table in the active catalog.
3168    /// Column name is `name` so result-set consumers can downstream
3169    /// `SELECT name FROM ...` style logic if needed.
3170    /// v4.26: `EXPLAIN [ANALYZE] <select>`. Returns a single-column
3171    /// `QUERY PLAN` text table — first line names the top operator
3172    /// (Scan / Aggregate / Window / etc.), indented children list
3173    /// FROM joins, WHERE filters, ORDER BY / LIMIT, projection
3174    /// shape, and any active index hits. `ANALYZE` execs the inner
3175    /// SELECT and appends actual-row + elapsed-micros annotations.
3176    #[allow(clippy::format_push_string)]
3177    fn exec_explain(
3178        &self,
3179        e: &spg_sql::ast::ExplainStatement,
3180        cancel: CancelToken<'_>,
3181    ) -> Result<QueryResult, EngineError> {
3182        let mut lines = Vec::<String>::new();
3183        explain_select(&e.inner, self, 0, &mut lines);
3184        if e.suggest {
3185            // v6.8.3 — index advisor. Walks the SELECT's FROM
3186            // tables + WHERE column refs; for each (table, column)
3187            // pair that lacks an index, append a SUGGEST line with
3188            // a copy-pastable `CREATE INDEX` statement. This is a
3189            // pure-syntax heuristic — no cardinality estimation —
3190            // matching the v6.8.3 design intent of "tell the
3191            // operator where indexes are missing", not "give the
3192            // mathematically optimal index set".
3193            let suggestions = build_index_suggestions(&e.inner, self);
3194            for s in suggestions {
3195                lines.push(s);
3196            }
3197        } else if e.analyze {
3198            // v6.2.4 — EXPLAIN ANALYZE annotates each operator line
3199            // with `(rows=N)` where the row count is computable
3200            // without re-executing the full query:
3201            //   - Top-level operator (first non-indented line):
3202            //     rows = final result.len()
3203            //   - "From: <table> [full scan]" lines: rows =
3204            //     table.rows().len() (catalog read; no execution)
3205            //   - "From: <table> [index seek]": indeterminate —
3206            //     the index step would need re-execution; v6.2.5
3207            //     adds per-operator wall-clock + hot/cold rows
3208            //     instrumentation that makes this concrete.
3209            //   - Everything else: marked `(—)` so the surface
3210            //     stays well-defined without silently dropping
3211            //     stats. v6.2.5 fills in via inline executor
3212            //     instrumentation.
3213            // Total elapsed lands on a trailing `Total: …` line.
3214            let started = self.clock.map(|f| f());
3215            let exec = self.exec_select_cancel(&e.inner, cancel)?;
3216            let elapsed_micros = match (self.clock, started) {
3217                (Some(f), Some(s)) => Some(f().saturating_sub(s)),
3218                _ => None,
3219            };
3220            let row_count = if let QueryResult::Rows { rows, .. } = &exec {
3221                rows.len()
3222            } else {
3223                0
3224            };
3225            annotate_explain_lines(&mut lines, row_count, self);
3226            let mut total = alloc::format!("Total: rows={row_count}");
3227            if let Some(us) = elapsed_micros {
3228                total.push_str(&alloc::format!(" elapsed={us}us"));
3229            }
3230            lines.push(total);
3231        }
3232        let columns = alloc::vec![ColumnSchema::new("QUERY PLAN", DataType::Text, false)];
3233        let rows: Vec<Row> = lines
3234            .into_iter()
3235            .map(|l| Row::new(alloc::vec![Value::Text(l)]))
3236            .collect();
3237        Ok(QueryResult::Rows { columns, rows })
3238    }
3239
3240    fn exec_show_tables(&self) -> QueryResult {
3241        let columns = alloc::vec![ColumnSchema::new("name", DataType::Text, false)];
3242        let rows: Vec<Row> = self
3243            .active_catalog()
3244            .table_names()
3245            .into_iter()
3246            .map(|n| Row::new(alloc::vec![Value::Text(n)]))
3247            .collect();
3248        QueryResult::Rows { columns, rows }
3249    }
3250
3251    /// `SHOW COLUMNS FROM <table>` — one row per column with the
3252    /// declared name, SQL type rendering, and nullability flag.
3253    fn exec_show_columns(&self, table_name: &str) -> Result<QueryResult, EngineError> {
3254        let table =
3255            self.active_catalog()
3256                .get(table_name)
3257                .ok_or_else(|| StorageError::TableNotFound {
3258                    name: table_name.into(),
3259                })?;
3260        let columns = alloc::vec![
3261            ColumnSchema::new("name", DataType::Text, false),
3262            ColumnSchema::new("type", DataType::Text, false),
3263            ColumnSchema::new("nullable", DataType::Bool, false),
3264        ];
3265        let rows: Vec<Row> = table
3266            .schema()
3267            .columns
3268            .iter()
3269            .map(|c| {
3270                Row::new(alloc::vec![
3271                    Value::Text(c.name.clone()),
3272                    Value::Text(alloc::format!("{}", c.ty)),
3273                    Value::Bool(c.nullable),
3274                ])
3275            })
3276            .collect();
3277        Ok(QueryResult::Rows { columns, rows })
3278    }
3279
3280    fn exec_begin(&mut self) -> Result<QueryResult, EngineError> {
3281        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3282        if self.tx_catalogs.contains_key(&tx_id) {
3283            return Err(EngineError::TransactionAlreadyOpen);
3284        }
3285        self.tx_catalogs.insert(
3286            tx_id,
3287            TxState {
3288                catalog: self.catalog.clone(),
3289                savepoints: Vec::new(),
3290            },
3291        );
3292        Ok(QueryResult::CommandOk {
3293            affected: 0,
3294            modified_catalog: false,
3295        })
3296    }
3297
3298    fn exec_commit(&mut self) -> Result<QueryResult, EngineError> {
3299        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3300        let state = self
3301            .tx_catalogs
3302            .remove(&tx_id)
3303            .ok_or(EngineError::NoActiveTransaction)?;
3304        self.catalog = state.catalog;
3305        // All savepoints become permanent at COMMIT and the stack
3306        // resets for the next TX (`state.savepoints` is discarded with
3307        // `state`).
3308        Ok(QueryResult::CommandOk {
3309            affected: 0,
3310            modified_catalog: true,
3311        })
3312    }
3313
3314    fn exec_rollback(&mut self) -> Result<QueryResult, EngineError> {
3315        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3316        if self.tx_catalogs.remove(&tx_id).is_none() {
3317            return Err(EngineError::NoActiveTransaction);
3318        }
3319        // savepoints discarded with the TxState
3320        Ok(QueryResult::CommandOk {
3321            affected: 0,
3322            modified_catalog: false,
3323        })
3324    }
3325
3326    fn exec_savepoint(&mut self, name: String) -> Result<QueryResult, EngineError> {
3327        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3328        let state = self
3329            .tx_catalogs
3330            .get_mut(&tx_id)
3331            .ok_or(EngineError::NoActiveTransaction)?;
3332        // PG re-uses an existing savepoint name by dropping the older
3333        // entry and pushing a fresh one — match that behaviour so
3334        // application code can `SAVEPOINT sp; ...; SAVEPOINT sp` freely.
3335        state.savepoints.retain(|(n, _)| n != &name);
3336        let snapshot = state.catalog.clone();
3337        state.savepoints.push((name, snapshot));
3338        Ok(QueryResult::CommandOk {
3339            affected: 0,
3340            modified_catalog: false,
3341        })
3342    }
3343
3344    fn exec_rollback_to_savepoint(&mut self, name: &str) -> Result<QueryResult, EngineError> {
3345        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3346        let state = self
3347            .tx_catalogs
3348            .get_mut(&tx_id)
3349            .ok_or(EngineError::NoActiveTransaction)?;
3350        let pos = state
3351            .savepoints
3352            .iter()
3353            .rposition(|(n, _)| n == name)
3354            .ok_or_else(|| {
3355                EngineError::Unsupported(alloc::format!("savepoint not found: {name}"))
3356            })?;
3357        // The savepoint stays on the stack (PG semantics): a later
3358        // `RELEASE` or further `ROLLBACK TO` is still allowed. Everything
3359        // after it is discarded.
3360        let snapshot = state.savepoints[pos].1.clone();
3361        state.savepoints.truncate(pos + 1);
3362        state.catalog = snapshot;
3363        Ok(QueryResult::CommandOk {
3364            affected: 0,
3365            modified_catalog: false,
3366        })
3367    }
3368
3369    fn exec_release_savepoint(&mut self, name: &str) -> Result<QueryResult, EngineError> {
3370        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
3371        let state = self
3372            .tx_catalogs
3373            .get_mut(&tx_id)
3374            .ok_or(EngineError::NoActiveTransaction)?;
3375        let pos = state
3376            .savepoints
3377            .iter()
3378            .rposition(|(n, _)| n == name)
3379            .ok_or_else(|| {
3380                EngineError::Unsupported(alloc::format!("savepoint not found: {name}"))
3381            })?;
3382        // RELEASE keeps the work since the savepoint, just discards the
3383        // bookmark plus everything nested under it.
3384        state.savepoints.truncate(pos);
3385        Ok(QueryResult::CommandOk {
3386            affected: 0,
3387            modified_catalog: false,
3388        })
3389    }
3390
3391    /// v6.0.4 — synchronous `ALTER INDEX <name> REBUILD [WITH
3392    /// (encoding = …)]`. Walks every table in the active catalog
3393    /// looking for an index matching `stmt.name`, then delegates the
3394    /// rebuild (including any encoding switch) to
3395    /// `Table::rebuild_nsw_index`. The "live" non-blocking
3396    /// optimisation is v6.0.4.1 / v6.1.x territory.
3397    /// v6.7.2 — `ALTER TABLE t SET hot_tier_bytes = X`. Dispatch
3398    /// arm. Currently the only setting is `hot_tier_bytes`; later
3399    /// v6.7.x can extend `AlterTableTarget` without touching this
3400    /// arm structure.
3401    fn exec_alter_table(
3402        &mut self,
3403        s: spg_sql::ast::AlterTableStatement,
3404    ) -> Result<QueryResult, EngineError> {
3405        // v7.13.2 — mailrs round-6 S1: apply each subaction in order.
3406        // On first error the statement aborts; subactions already
3407        // applied stay (no transactional rollback in v7.13 — wrap in
3408        // BEGIN/COMMIT if atomicity matters).
3409        let table_name = s.name.clone();
3410        for target in s.targets {
3411            self.exec_alter_table_subaction(&table_name, target)?;
3412        }
3413        Ok(QueryResult::CommandOk {
3414            affected: 0,
3415            modified_catalog: !self.in_transaction(),
3416        })
3417    }
3418
3419    fn exec_alter_table_subaction(
3420        &mut self,
3421        table_name_outer: &str,
3422        target: spg_sql::ast::AlterTableTarget,
3423    ) -> Result<(), EngineError> {
3424        // Inner helper retains the s.name closure shape; alias to `s`
3425        // for minimal diff against the v7.13.0 body.
3426        struct S<'a> {
3427            name: &'a str,
3428        }
3429        let s = S {
3430            name: table_name_outer,
3431        };
3432        match target {
3433            spg_sql::ast::AlterTableTarget::SetHotTierBytes(n) => {
3434                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3435                    EngineError::Storage(StorageError::TableNotFound {
3436                        name: s.name.into(),
3437                    })
3438                })?;
3439                table.schema_mut().hot_tier_bytes = Some(n);
3440            }
3441            spg_sql::ast::AlterTableTarget::AddForeignKey(fk) => {
3442                // v7.6.8 — resolve FK against the live catalog first
3443                // (validates parent table, columns, indices). Then
3444                // verify every existing row in the child table
3445                // satisfies the new constraint. Then install it.
3446                let cols_snapshot = self
3447                    .active_catalog()
3448                    .get(s.name)
3449                    .ok_or_else(|| {
3450                        EngineError::Storage(StorageError::TableNotFound {
3451                            name: s.name.into(),
3452                        })
3453                    })?
3454                    .schema()
3455                    .columns
3456                    .clone();
3457                let storage_fk =
3458                    resolve_foreign_key(s.name, &cols_snapshot, fk, self.active_catalog())?;
3459                // Verify existing rows. Treat them as a virtual
3460                // INSERT batch — reusing the v7.6.2 enforce helper.
3461                let existing_rows: Vec<Vec<Value>> = self
3462                    .active_catalog()
3463                    .get(&s.name)
3464                    .expect("checked above")
3465                    .rows()
3466                    .iter()
3467                    .map(|r| r.values.clone())
3468                    .collect();
3469                enforce_fk_inserts(
3470                    self.active_catalog(),
3471                    s.name,
3472                    core::slice::from_ref(&storage_fk),
3473                    &existing_rows,
3474                )?;
3475                // Reject duplicate constraint name.
3476                let table = self
3477                    .active_catalog_mut()
3478                    .get_mut(s.name)
3479                    .expect("checked above");
3480                if let Some(name) = &storage_fk.name
3481                    && table
3482                        .schema()
3483                        .foreign_keys
3484                        .iter()
3485                        .any(|f| f.name.as_ref() == Some(name))
3486                {
3487                    return Err(EngineError::Unsupported(alloc::format!(
3488                        "ALTER TABLE ADD CONSTRAINT: a constraint named {name:?} already exists"
3489                    )));
3490                }
3491                table.schema_mut().foreign_keys.push(storage_fk);
3492            }
3493            spg_sql::ast::AlterTableTarget::DropForeignKey { name, if_exists } => {
3494                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3495                    EngineError::Storage(StorageError::TableNotFound {
3496                        name: s.name.into(),
3497                    })
3498                })?;
3499                let fks = &mut table.schema_mut().foreign_keys;
3500                let before = fks.len();
3501                fks.retain(|f| f.name.as_ref() != Some(&name));
3502                if fks.len() == before && !if_exists {
3503                    return Err(EngineError::Unsupported(alloc::format!(
3504                        "ALTER TABLE DROP CONSTRAINT: no FK named {name:?} on {:?}",
3505                        s.name
3506                    )));
3507                }
3508                // v7.13.2 mailrs round-6 S7: IF EXISTS silences the miss.
3509            }
3510            spg_sql::ast::AlterTableTarget::AddColumn {
3511                column,
3512                if_not_exists,
3513            } => {
3514                // v7.13.0 — mailrs round-5 G1. Append-only column add
3515                // with back-fill of the DEFAULT (or NULL) into every
3516                // existing row. Column positions don't shift, so we
3517                // skip index rebuild.
3518                let clock = self.clock;
3519                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3520                    EngineError::Storage(StorageError::TableNotFound {
3521                        name: s.name.into(),
3522                    })
3523                })?;
3524                if table
3525                    .schema()
3526                    .columns
3527                    .iter()
3528                    .any(|c| c.name.eq_ignore_ascii_case(&column.name))
3529                {
3530                    if if_not_exists {
3531                        return Ok(());
3532                    }
3533                    return Err(EngineError::Unsupported(alloc::format!(
3534                        "ALTER TABLE ADD COLUMN: column {:?} already exists on {:?}",
3535                        column.name,
3536                        s.name
3537                    )));
3538                }
3539                let col_name = column.name.clone();
3540                let nullable = column.nullable;
3541                let has_default =
3542                    column.default.is_some() || column.auto_increment;
3543                let col_schema = column_def_to_schema(column)?;
3544                let row_count = table.row_count();
3545                // Compute the back-fill value. Literal / runtime DEFAULT
3546                // funnels through the same resolver that INSERT uses
3547                // (v7.9.21 `resolve_column_default_free`). NULL when
3548                // the column is nullable and has no DEFAULT. NOT NULL
3549                // without DEFAULT errors when the table has existing
3550                // rows — same as PG.
3551                let fill_value: Value = if has_default
3552                    || col_schema.runtime_default.is_some()
3553                {
3554                    resolve_column_default_free(&col_schema, clock)?
3555                } else if nullable || row_count == 0 {
3556                    Value::Null
3557                } else {
3558                    return Err(EngineError::Unsupported(alloc::format!(
3559                        "ALTER TABLE ADD COLUMN {col_name:?}: NOT NULL column requires DEFAULT \
3560                         when the table has existing rows"
3561                    )));
3562                };
3563                table.add_column(col_schema, fill_value);
3564            }
3565            spg_sql::ast::AlterTableTarget::AlterColumnType {
3566                column,
3567                new_type,
3568                using,
3569            } => {
3570                // v7.13.0 — mailrs round-5 G8. Re-evaluate each
3571                // row's column value (either through the USING
3572                // expression if supplied, or as a direct CAST of
3573                // the existing value) and re-coerce to the new
3574                // type. Indices on the column get rebuilt.
3575                let new_data_type = column_type_to_data_type(new_type);
3576                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3577                    EngineError::Storage(StorageError::TableNotFound {
3578                        name: s.name.into(),
3579                    })
3580                })?;
3581                let col_pos = table
3582                    .schema()
3583                    .columns
3584                    .iter()
3585                    .position(|c| c.name.eq_ignore_ascii_case(&column))
3586                    .ok_or_else(|| {
3587                        EngineError::Unsupported(alloc::format!(
3588                            "ALTER COLUMN TYPE: column {column:?} not found on {:?}",
3589                            s.name
3590                        ))
3591                    })?;
3592                let schema_cols = table.schema().columns.clone();
3593                let ctx = eval::EvalContext::new(&schema_cols, None);
3594                let mut new_values: alloc::vec::Vec<Value> =
3595                    alloc::vec::Vec::with_capacity(table.row_count());
3596                for row in table.rows().iter() {
3597                    let raw = match &using {
3598                        Some(expr) => eval::eval_expr(expr, row, &ctx).map_err(|e| {
3599                            EngineError::Unsupported(alloc::format!(
3600                                "ALTER COLUMN TYPE: USING expression failed: {e:?}"
3601                            ))
3602                        })?,
3603                        None => row.values.get(col_pos).cloned().unwrap_or(Value::Null),
3604                    };
3605                    let coerced = coerce_value(raw, new_data_type, &column, col_pos)?;
3606                    new_values.push(coerced);
3607                }
3608                table.schema_mut().columns[col_pos].ty = new_data_type;
3609                for (i, v) in new_values.into_iter().enumerate() {
3610                    let mut row_values = table
3611                        .rows()
3612                        .get(i)
3613                        .expect("bounds-checked above")
3614                        .values
3615                        .clone();
3616                    row_values[col_pos] = v;
3617                    table.update_row(i, row_values)?;
3618                }
3619            }
3620            spg_sql::ast::AlterTableTarget::AddTableConstraint(tc) => {
3621                // v7.14.0 — pg_dump emits PKs as a separate
3622                // ALTER TABLE ADD CONSTRAINT post-CREATE-TABLE.
3623                // For PRIMARY KEY / UNIQUE, install a UC entry
3624                // and the implicit BTree index on the leading
3625                // column. CHECK: append predicate to schema.
3626                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3627                    EngineError::Storage(StorageError::TableNotFound {
3628                        name: s.name.into(),
3629                    })
3630                })?;
3631                let is_pk = matches!(
3632                    tc,
3633                    spg_sql::ast::TableConstraint::PrimaryKey { .. }
3634                );
3635                match tc {
3636                    spg_sql::ast::TableConstraint::PrimaryKey { columns, .. }
3637                    | spg_sql::ast::TableConstraint::Unique { columns, .. } => {
3638                        let positions: Vec<usize> = columns
3639                            .iter()
3640                            .map(|c| {
3641                                table
3642                                    .schema()
3643                                    .columns
3644                                    .iter()
3645                                    .position(|sc| sc.name.eq_ignore_ascii_case(c))
3646                                    .ok_or_else(|| {
3647                                        EngineError::Unsupported(alloc::format!(
3648                                            "ALTER TABLE ADD CONSTRAINT: column {c:?} not found on {:?}",
3649                                            s.name
3650                                        ))
3651                                    })
3652                            })
3653                            .collect::<Result<Vec<_>, _>>()?;
3654                        // Skip if an equivalent UC is already there
3655                        // (idempotent — pg_dump's PK + a prior inline
3656                        // PK shouldn't double-install).
3657                        let already = table
3658                            .schema()
3659                            .uniqueness_constraints
3660                            .iter()
3661                            .any(|u| u.columns == positions);
3662                        if !already {
3663                            table.schema_mut().uniqueness_constraints.push(
3664                                spg_storage::UniquenessConstraint {
3665                                    is_primary_key: is_pk,
3666                                    columns: positions.clone(),
3667                                    nulls_not_distinct: false,
3668                                },
3669                            );
3670                            // PK implies NOT NULL on referenced cols.
3671                            if is_pk {
3672                                for p in &positions {
3673                                    if let Some(c) = table.schema_mut().columns.get_mut(*p) {
3674                                        c.nullable = false;
3675                                    }
3676                                }
3677                            }
3678                            // Add a BTree index on the leading
3679                            // column for INSERT-side enforcement.
3680                            let leading = &columns[0];
3681                            let already_idx = table.indices().iter().any(|idx| {
3682                                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
3683                                    && table.schema().columns[idx.column_position].name
3684                                        == *leading
3685                            });
3686                            if !already_idx {
3687                                let suffix = if is_pk { "pkey" } else { "key" };
3688                                let idx_name = alloc::format!("{}_{leading}_{suffix}", s.name);
3689                                let _ = table.add_index(idx_name, leading);
3690                            }
3691                        }
3692                    }
3693                    spg_sql::ast::TableConstraint::Check { expr, .. } => {
3694                        table.schema_mut().checks.push(alloc::format!("{expr}"));
3695                    }
3696                }
3697            }
3698            spg_sql::ast::AlterTableTarget::DropColumn {
3699                column,
3700                if_exists,
3701                cascade,
3702            } => {
3703                // v7.13.3 — mailrs round-7 S8. Remove the column +
3704                // every row's value at that position; drop any index
3705                // on the column. RESTRICT (default) rejects when an
3706                // FK on this table or partial-index predicate
3707                // references the column; CASCADE removes those
3708                // dependents first.
3709                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
3710                    EngineError::Storage(StorageError::TableNotFound {
3711                        name: s.name.into(),
3712                    })
3713                })?;
3714                let col_pos = match table
3715                    .schema()
3716                    .columns
3717                    .iter()
3718                    .position(|c| c.name.eq_ignore_ascii_case(&column))
3719                {
3720                    Some(p) => p,
3721                    None => {
3722                        if if_exists {
3723                            return Ok(());
3724                        }
3725                        return Err(EngineError::Unsupported(alloc::format!(
3726                            "ALTER TABLE DROP COLUMN: column {column:?} not found on {:?}",
3727                            s.name
3728                        )));
3729                    }
3730                };
3731                // Dependent check: FKs whose local columns include
3732                // col_pos. CASCADE drops them; otherwise reject.
3733                let dependent_fks: Vec<usize> = table
3734                    .schema()
3735                    .foreign_keys
3736                    .iter()
3737                    .enumerate()
3738                    .filter_map(|(i, fk)| {
3739                        if fk.local_columns.contains(&col_pos) {
3740                            Some(i)
3741                        } else {
3742                            None
3743                        }
3744                    })
3745                    .collect();
3746                if !dependent_fks.is_empty() && !cascade {
3747                    return Err(EngineError::Unsupported(alloc::format!(
3748                        "ALTER TABLE DROP COLUMN {column:?}: column has FK dependents; \
3749                         use DROP COLUMN ... CASCADE to remove them"
3750                    )));
3751                }
3752                // CASCADE the FK removals first.
3753                if cascade {
3754                    // Drop in reverse so indices stay valid.
3755                    let mut sorted = dependent_fks.clone();
3756                    sorted.sort();
3757                    sorted.reverse();
3758                    let fks = &mut table.schema_mut().foreign_keys;
3759                    for i in sorted {
3760                        fks.remove(i);
3761                    }
3762                }
3763                // Drop the column. New helper on Table does the
3764                // row + schema + index shift atomically.
3765                table.drop_column(col_pos);
3766            }
3767        }
3768        Ok(())
3769    }
3770
3771    fn exec_alter_index(
3772        &mut self,
3773        stmt: spg_sql::ast::AlterIndexStatement,
3774    ) -> Result<QueryResult, EngineError> {
3775        // Translate the optional SQL-side encoding choice into the
3776        // storage-side enum; the same SqlVecEncoding -> VecEncoding
3777        // bridge `column_type_to_data_type` uses.
3778        let spg_sql::ast::AlterIndexStatement {
3779            name: idx_name,
3780            target,
3781        } = stmt;
3782        let spg_sql::ast::AlterIndexTarget::Rebuild { encoding } = target;
3783        let target = encoding.map(|e| match e {
3784            SqlVecEncoding::F32 => VecEncoding::F32,
3785            SqlVecEncoding::Sq8 => VecEncoding::Sq8,
3786            SqlVecEncoding::F16 => VecEncoding::F16,
3787        });
3788        // Linear scan: index names are globally unique within a
3789        // catalog (enforced by add_nsw_index_inner) so the first
3790        // match is the only one. Save the table name to avoid
3791        // borrowing while we then take a mut borrow.
3792        let table_name = {
3793            let cat = self.active_catalog();
3794            let mut found: Option<String> = None;
3795            for tname in cat.table_names() {
3796                if let Some(t) = cat.get(&tname)
3797                    && t.indices().iter().any(|i| i.name == idx_name)
3798                {
3799                    found = Some(tname);
3800                    break;
3801                }
3802            }
3803            found.ok_or_else(|| {
3804                EngineError::Storage(StorageError::IndexNotFound {
3805                    name: idx_name.clone(),
3806                })
3807            })?
3808        };
3809        let table = self
3810            .active_catalog_mut()
3811            .get_mut(&table_name)
3812            .expect("table found above");
3813        table.rebuild_nsw_index(&idx_name, target)?;
3814        // v6.3.1 — ALTER INDEX REBUILD potentially with new encoding
3815        // changes cost characteristics; evict any cached plans.
3816        self.plan_cache.evict_referencing(&table_name);
3817        Ok(QueryResult::CommandOk {
3818            affected: 0,
3819            modified_catalog: !self.in_transaction(),
3820        })
3821    }
3822
3823    fn exec_create_index(
3824        &mut self,
3825        stmt: CreateIndexStatement,
3826    ) -> Result<QueryResult, EngineError> {
3827        let table = self
3828            .active_catalog_mut()
3829            .get_mut(&stmt.table)
3830            .ok_or_else(|| {
3831                EngineError::Storage(StorageError::TableNotFound {
3832                    name: stmt.table.clone(),
3833                })
3834            })?;
3835        // `IF NOT EXISTS` reduces DuplicateIndex to a no-op CommandOk.
3836        if stmt.if_not_exists && table.indices().iter().any(|i| i.name == stmt.name) {
3837            return Ok(QueryResult::CommandOk {
3838                affected: 0,
3839                modified_catalog: false,
3840            });
3841        }
3842        // v7.9.14 — multi-column index parses through; engine
3843        // builds a single-column BTree on the leading column only.
3844        // The extras live on the AST so spg-server's dispatcher
3845        // can emit a PG-wire NoticeResponse / log line. Composite
3846        // BTree keys land in v7.10.
3847        let _ = &stmt.extra_columns; // intentional drop on engine side
3848        let table_name = stmt.table.clone();
3849        // v6.8.0 — resolve INCLUDE column names to positions. Done
3850        // before `add_index` so a typo error surfaces before any
3851        // catalog mutation lands.
3852        let included_positions: Vec<usize> = if stmt.included_columns.is_empty() {
3853            Vec::new()
3854        } else {
3855            let schema = table.schema();
3856            stmt.included_columns
3857                .iter()
3858                .map(|c| {
3859                    schema.column_position(c).ok_or_else(|| {
3860                        EngineError::Storage(StorageError::ColumnNotFound { column: c.clone() })
3861                    })
3862                })
3863                .collect::<Result<Vec<_>, _>>()?
3864        };
3865        match stmt.method {
3866            IndexMethod::BTree => table.add_index(stmt.name.clone(), &stmt.column)?,
3867            IndexMethod::Hnsw => {
3868                if !included_positions.is_empty() {
3869                    return Err(EngineError::Unsupported(
3870                        "INCLUDE columns are not supported on HNSW indexes".into(),
3871                    ));
3872                }
3873                table.add_nsw_index(stmt.name.clone(), &stmt.column, spg_storage::NSW_DEFAULT_M)?;
3874            }
3875            // v6.7.1 — BRIN. Pure metadata; no in-memory data.
3876            IndexMethod::Brin => {
3877                if !included_positions.is_empty() {
3878                    return Err(EngineError::Unsupported(
3879                        "INCLUDE columns are not supported on BRIN indexes".into(),
3880                    ));
3881                }
3882                table.add_brin_index(stmt.name.clone(), &stmt.column)?;
3883            }
3884            // v7.12.3 — GIN inverted index. Real posting-list-backed
3885            // GIN when the indexed column is `tsvector`; falls back
3886            // to a BTree on the leading column for any other column
3887            // type so v7.9.26b's `pg_dump` compatibility (GIN on
3888            // JSONB etc. silently loading as BTree) is preserved.
3889            // Operators see the real GIN only where it matters; old
3890            // schemas keep loading.
3891            IndexMethod::Gin => {
3892                if !included_positions.is_empty() {
3893                    return Err(EngineError::Unsupported(
3894                        "INCLUDE columns are not supported on GIN indexes".into(),
3895                    ));
3896                }
3897                let col_pos = table
3898                    .schema()
3899                    .column_position(&stmt.column)
3900                    .ok_or_else(|| {
3901                        EngineError::Storage(StorageError::ColumnNotFound {
3902                            column: stmt.column.clone(),
3903                        })
3904                    })?;
3905                if table.schema().columns[col_pos].ty == spg_storage::DataType::TsVector {
3906                    table
3907                        .add_gin_index(stmt.name.clone(), &stmt.column)
3908                        .map_err(EngineError::Storage)?;
3909                } else {
3910                    // v7.9.26b BTree fallback — the catalog still
3911                    // gets an index entry on the leading column so
3912                    // pg_dump scripts that name GIN on JSONB / etc.
3913                    // load clean; query-time gain stays opt-in for
3914                    // tsvector callers.
3915                    table.add_index(stmt.name.clone(), &stmt.column)?;
3916                }
3917            }
3918        }
3919        if !included_positions.is_empty()
3920            && let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name)
3921        {
3922            idx.included_columns = included_positions;
3923        }
3924        // v6.8.1 — persist partial-index predicate. Stored as the
3925        // expression's Display form so the catalog snapshot stays
3926        // pure (storage has no spg-sql dependency). The runtime
3927        // maintenance path treats partial indexes identically to
3928        // full indexes for v6.8.1 (over-maintenance is safe; the
3929        // planner-side "use partial when query WHERE implies the
3930        // predicate" pass is STABILITY carve-out).
3931        if let Some(pred_expr) = &stmt.partial_predicate {
3932            let canonical = pred_expr.to_string();
3933            // v7.13.2 — mailrs round-6 S2. PG's `pg_trgm` uses
3934            // `CREATE INDEX … USING gin(col gin_trgm_ops) WHERE …`
3935            // routinely to slim trigram indexes. SPG now persists
3936            // the predicate for GIN / BRIN / HNSW the same way it
3937            // already does for BTree — same v6.8.1 "over-maintain
3938            // is safe; planner-side partial routing is STABILITY
3939            // carve-out" semantics. HNSW carries an additional
3940            // caveat: the predicate isn't applied at index build
3941            // time (would require per-row eval inside the NSW
3942            // construction loop), so the index oversamples; query
3943            // time the WHERE clause still filters correctly.
3944            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
3945                idx.partial_predicate = Some(canonical);
3946            }
3947        }
3948        // v6.8.2 — persist expression index key. Same Display-form
3949        // storage; the runtime maintenance pass evaluates each
3950        // row's expression to derive the index key, but for v6.8.2
3951        // the engine falls through to the bare-column-reference
3952        // path and the expression is preserved for format-layer
3953        // round-trip + future planner work. Carved-out in
3954        // STABILITY § "Out of v6.8".
3955        if let Some(key_expr) = &stmt.expression {
3956            if matches!(
3957                stmt.method,
3958                IndexMethod::Hnsw | IndexMethod::Brin | IndexMethod::Gin
3959            ) {
3960                return Err(EngineError::Unsupported(
3961                    "Expression keys are not supported on HNSW or BRIN indexes".into(),
3962                ));
3963            }
3964            let canonical = key_expr.to_string();
3965            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
3966                idx.expression = Some(canonical);
3967            }
3968        }
3969        // v7.9.29 — persist `is_unique` flag on the storage Index.
3970        // Combined with `partial_predicate`, INSERT enforcement
3971        // checks that no other row whose predicate evaluates true
3972        // shares the same indexed key. Parser already rejected
3973        // `UNIQUE` on HNSW / BRIN, so plain BTree here.
3974        // For multi-column UNIQUE INDEX the extras matter (the
3975        // full tuple is the uniqueness key), so resolve them to
3976        // column positions and persist on the index too.
3977        if stmt.is_unique {
3978            let mut extra_positions: alloc::vec::Vec<usize> = alloc::vec::Vec::new();
3979            for col_name in &stmt.extra_columns {
3980                let pos = table
3981                    .schema()
3982                    .columns
3983                    .iter()
3984                    .position(|c| c.name.eq_ignore_ascii_case(col_name))
3985                    .ok_or_else(|| {
3986                        EngineError::Unsupported(alloc::format!(
3987                            "UNIQUE INDEX {:?}: extra column {col_name:?} not in table {:?}",
3988                            stmt.name,
3989                            stmt.table
3990                        ))
3991                    })?;
3992                extra_positions.push(pos);
3993            }
3994            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
3995                idx.is_unique = true;
3996                idx.extra_column_positions = extra_positions;
3997            }
3998            // At index-creation time, check the existing rows for
3999            // pre-existing duplicates that would have violated the
4000            // new constraint — otherwise CREATE UNIQUE INDEX would
4001            // silently leave duplicates in place.
4002            let snapshot_indices = table.indices().to_vec();
4003            let snapshot_rows: alloc::vec::Vec<spg_storage::Row> =
4004                table.rows().iter().cloned().collect();
4005            let snapshot_schema = table.schema().clone();
4006            let idx_ref = snapshot_indices
4007                .iter()
4008                .find(|i| i.name == stmt.name)
4009                .expect("just-added index");
4010            check_existing_unique_violation(idx_ref, &snapshot_schema, &snapshot_rows)?;
4011        }
4012        // v6.3.1 — adding an index can change the optimal plan for
4013        // any cached query that references this table.
4014        self.plan_cache.evict_referencing(&table_name);
4015        Ok(QueryResult::CommandOk {
4016            affected: 0,
4017            modified_catalog: !self.in_transaction(),
4018        })
4019    }
4020
4021    /// v7.13.3 — mailrs round-7 S9. SPG-specific reconciliation
4022    /// for `CREATE TABLE IF NOT EXISTS` when the table already
4023    /// exists. Adds missing columns + inline FKs from the new
4024    /// definition; existing columns / constraints stay untouched.
4025    /// New columns with a `NOT NULL` declaration without a
4026    /// `DEFAULT` are reported as a clear error rather than
4027    /// silently dropped — this is the "fail loud on real
4028    /// incompatibility, fail silent on schema-superset" tradeoff.
4029    fn reconcile_table_if_not_exists(
4030        &mut self,
4031        stmt: CreateTableStatement,
4032    ) -> Result<QueryResult, EngineError> {
4033        let table_name = stmt.name.clone();
4034        let clock = self.clock;
4035        let existing_col_names: alloc::collections::BTreeSet<String> = self
4036            .active_catalog()
4037            .get(&table_name)
4038            .expect("checked above")
4039            .schema()
4040            .columns
4041            .iter()
4042            .map(|c| c.name.to_ascii_lowercase())
4043            .collect();
4044        let row_count = self
4045            .active_catalog()
4046            .get(&table_name)
4047            .expect("checked above")
4048            .row_count();
4049        // Collect missing column defs in source order.
4050        let new_columns: alloc::vec::Vec<spg_sql::ast::ColumnDef> = stmt
4051            .columns
4052            .iter()
4053            .filter(|c| !existing_col_names.contains(&c.name.to_ascii_lowercase()))
4054            .cloned()
4055            .collect();
4056        for col_def in new_columns {
4057            let col_name = col_def.name.clone();
4058            let nullable = col_def.nullable;
4059            let has_default = col_def.default.is_some() || col_def.auto_increment;
4060            let col_schema = column_def_to_schema(col_def)?;
4061            let fill_value: Value = if has_default || col_schema.runtime_default.is_some() {
4062                resolve_column_default_free(&col_schema, clock)?
4063            } else if nullable || row_count == 0 {
4064                Value::Null
4065            } else {
4066                return Err(EngineError::Unsupported(alloc::format!(
4067                    "CREATE TABLE IF NOT EXISTS {table_name:?}: reconciling \
4068                     column {col_name:?} requires DEFAULT (existing rows would violate NOT NULL)"
4069                )));
4070            };
4071            let table = self
4072                .active_catalog_mut()
4073                .get_mut(&table_name)
4074                .expect("checked above");
4075            table.add_column(col_schema, fill_value);
4076        }
4077        // Resolve any newly-added inline FKs (column-level
4078        // REFERENCES forms) and install. Skip FKs whose local
4079        // columns we didn't have in the existing table.
4080        let table_cols_now = self
4081            .active_catalog()
4082            .get(&table_name)
4083            .expect("checked above")
4084            .schema()
4085            .columns
4086            .clone();
4087        for fk in stmt.foreign_keys {
4088            // Only install FKs whose every local column resolves
4089            // — older catalogs may have a column the new FK
4090            // references but not the column the new FK declares.
4091            let all_resolved = fk
4092                .columns
4093                .iter()
4094                .all(|c| table_cols_now.iter().any(|sc| sc.name.eq_ignore_ascii_case(c)));
4095            if !all_resolved {
4096                continue;
4097            }
4098            let already_present = {
4099                let table = self
4100                    .active_catalog()
4101                    .get(&table_name)
4102                    .expect("checked above");
4103                table.schema().foreign_keys.iter().any(|f| {
4104                    f.parent_table.eq_ignore_ascii_case(&fk.parent_table)
4105                        && f.local_columns.len() == fk.columns.len()
4106                })
4107            };
4108            if already_present {
4109                continue;
4110            }
4111            let storage_fk =
4112                resolve_foreign_key(&table_name, &table_cols_now, fk, self.active_catalog())?;
4113            let table = self
4114                .active_catalog_mut()
4115                .get_mut(&table_name)
4116                .expect("checked above");
4117            table.schema_mut().foreign_keys.push(storage_fk);
4118        }
4119        Ok(QueryResult::CommandOk {
4120            affected: 0,
4121            modified_catalog: !self.in_transaction(),
4122        })
4123    }
4124
4125    /// v7.14.0 — DROP TABLE handler (pg_dump / mysqldump preamble).
4126    fn exec_drop_table(
4127        &mut self,
4128        names: Vec<String>,
4129        if_exists: bool,
4130    ) -> Result<QueryResult, EngineError> {
4131        for name in names {
4132            let dropped = self.active_catalog_mut().drop_table(&name);
4133            if !dropped && !if_exists {
4134                return Err(EngineError::Storage(StorageError::TableNotFound { name }));
4135            }
4136        }
4137        Ok(QueryResult::CommandOk {
4138            affected: 0,
4139            modified_catalog: !self.in_transaction(),
4140        })
4141    }
4142
4143    /// v7.14.0 — DROP INDEX handler.
4144    fn exec_drop_index(
4145        &mut self,
4146        name: String,
4147        if_exists: bool,
4148    ) -> Result<QueryResult, EngineError> {
4149        let dropped = self.active_catalog_mut().drop_named_index(&name);
4150        if !dropped && !if_exists {
4151            return Err(EngineError::Storage(StorageError::IndexNotFound { name }));
4152        }
4153        Ok(QueryResult::CommandOk {
4154            affected: 0,
4155            modified_catalog: !self.in_transaction(),
4156        })
4157    }
4158
4159    fn exec_create_table(
4160        &mut self,
4161        stmt: CreateTableStatement,
4162    ) -> Result<QueryResult, EngineError> {
4163        if stmt.if_not_exists && self.active_catalog().get(&stmt.name).is_some() {
4164            // v7.13.3 — mailrs round-7 S9 reconciliation. PG's
4165            // semantics for `CREATE TABLE IF NOT EXISTS` is a
4166            // silent no-op when the table exists, even if the new
4167            // definition adds columns or constraints. SPG extends
4168            // this: any column in the new definition that's
4169            // missing from the existing table is added (with
4170            // DEFAULT back-fill / NULL); inline FKs likewise.
4171            // Existing columns are NOT modified. This makes
4172            // mailrs's schema layering (init-schema's `contacts`
4173            // sender-tracking table + migrate-023's CardDAV
4174            // `contacts` extension) converge correctly without
4175            // mailrs-side edits. PG users who want PG-strict
4176            // silent-no-op behaviour can use SPG's `--strict-pg`
4177            // flag (deferred to v7.14).
4178            return self.reconcile_table_if_not_exists(stmt);
4179        }
4180        let table_name = stmt.name.clone();
4181        // v7.9.13 — pluck the names of any columns marked
4182        // `PRIMARY KEY` inline so the post-create-table pass can
4183        // build an implicit BTree index. mailrs F1.
4184        let inline_pk_columns: Vec<String> = stmt
4185            .columns
4186            .iter()
4187            .filter(|c| c.is_primary_key)
4188            .map(|c| c.name.clone())
4189            .collect();
4190        // v7.9.19 — table-level constraints: PRIMARY KEY (a, b, ...)
4191        // and UNIQUE (a, b, ...). Each builds a BTree index on the
4192        // leading column (the existing single-column storage tier)
4193        // and registers a UniquenessConstraint on the schema for
4194        // INSERT-time enforcement of the full tuple. mailrs G1/G6.
4195        let cols = stmt
4196            .columns
4197            .into_iter()
4198            .map(column_def_to_schema)
4199            .collect::<Result<Vec<_>, _>>()?;
4200        // Composite NOT-NULL implication for PRIMARY KEY columns.
4201        let mut cols = cols;
4202        for tc in &stmt.table_constraints {
4203            if let spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } = tc {
4204                for col_name in columns {
4205                    if let Some(col) = cols.iter_mut().find(|c| c.name == *col_name) {
4206                        col.nullable = false;
4207                    }
4208                }
4209            }
4210        }
4211        // v7.6.1 — resolve every FK in the statement against the
4212        // already-known catalog. Validates: parent table exists,
4213        // parent column names exist, arity matches, parent columns
4214        // have a PK / UNIQUE index. Self-referencing FKs (parent
4215        // table == this table) resolve against the column list we
4216        // just built — they don't need the catalog yet.
4217        let mut fks: Vec<spg_storage::ForeignKeyConstraint> =
4218            Vec::with_capacity(stmt.foreign_keys.len());
4219        for fk in stmt.foreign_keys {
4220            // v7.14.0 — when SET FOREIGN_KEY_CHECKS=0 is in effect
4221            // (mysqldump preamble + bulk imports), defer FK
4222            // resolution if the parent table isn't in the catalog
4223            // yet. The FK is queued and resolved when checks flip
4224            // back on. Self-references stay in-band (the parent is
4225            // the same as the child we're building).
4226            let needs_parent = !fk.parent_table.eq_ignore_ascii_case(&table_name);
4227            if !self.foreign_key_checks
4228                && needs_parent
4229                && self.active_catalog().get(&fk.parent_table).is_none()
4230            {
4231                self.pending_foreign_keys
4232                    .push((table_name.clone(), fk));
4233                continue;
4234            }
4235            fks.push(resolve_foreign_key(
4236                &table_name,
4237                &cols,
4238                fk,
4239                self.active_catalog(),
4240            )?);
4241        }
4242        let mut schema = TableSchema::new(table_name.clone(), cols);
4243        schema.foreign_keys = fks;
4244        // v7.9.19 — translate AST table_constraints to storage
4245        // UniquenessConstraints (column name → position) so the
4246        // INSERT enforcement helper sees positions directly.
4247        let mut uc_storage: Vec<spg_storage::UniquenessConstraint> = Vec::new();
4248        let mut check_exprs: Vec<String> = Vec::new();
4249        for tc in &stmt.table_constraints {
4250            let (is_pk, names, nnd) = match tc {
4251                spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } => {
4252                    (true, columns.clone(), false)
4253                }
4254                spg_sql::ast::TableConstraint::Unique {
4255                    columns,
4256                    nulls_not_distinct,
4257                    ..
4258                } => (false, columns.clone(), *nulls_not_distinct),
4259                spg_sql::ast::TableConstraint::Check { expr, .. } => {
4260                    // v7.13.0 — collect CHECK predicate sources;
4261                    // they get attached to the schema below.
4262                    check_exprs.push(alloc::format!("{expr}"));
4263                    continue;
4264                }
4265            };
4266            let mut positions = Vec::with_capacity(names.len());
4267            for n in &names {
4268                let pos = schema
4269                    .columns
4270                    .iter()
4271                    .position(|c| c.name == *n)
4272                    .ok_or_else(|| {
4273                        EngineError::Unsupported(alloc::format!(
4274                            "table constraint references unknown column {n:?}"
4275                        ))
4276                    })?;
4277                positions.push(pos);
4278            }
4279            uc_storage.push(spg_storage::UniquenessConstraint {
4280                is_primary_key: is_pk,
4281                columns: positions,
4282                nulls_not_distinct: nnd,
4283            });
4284        }
4285        schema.uniqueness_constraints = uc_storage.clone();
4286        schema.checks = check_exprs;
4287        self.active_catalog_mut().create_table(schema)?;
4288        // v7.9.13 — implicit BTree per inline PK column +
4289        // v7.9.19 — implicit BTree on the leading column of every
4290        // table-level PRIMARY KEY / UNIQUE constraint.
4291        let table = self
4292            .active_catalog_mut()
4293            .get_mut(&table_name)
4294            .expect("just created");
4295        for (i, col_name) in inline_pk_columns.iter().enumerate() {
4296            let idx_name = if inline_pk_columns.len() == 1 {
4297                alloc::format!("{table_name}_pkey")
4298            } else {
4299                alloc::format!("{table_name}_pkey_{i}")
4300            };
4301            if let Err(e) = table.add_index(idx_name, col_name) {
4302                return Err(EngineError::Storage(e));
4303            }
4304        }
4305        for (i, tc) in stmt.table_constraints.iter().enumerate() {
4306            let (is_pk, names) = match tc {
4307                spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } => (true, columns),
4308                spg_sql::ast::TableConstraint::Unique { columns, .. } => (false, columns),
4309                spg_sql::ast::TableConstraint::Check { .. } => continue,
4310            };
4311            let leading = &names[0];
4312            // Skip if a same-column BTree already exists (e.g.
4313            // inline PK on the leading column).
4314            let already = table.indices().iter().any(|idx| {
4315                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
4316                    && table.schema().columns[idx.column_position].name == *leading
4317            });
4318            if already {
4319                continue;
4320            }
4321            let suffix = if is_pk { "pkey" } else { "key" };
4322            let idx_name = if names.len() == 1 {
4323                alloc::format!("{table_name}_{leading}_{suffix}")
4324            } else {
4325                alloc::format!("{table_name}_{leading}_{suffix}_{i}")
4326            };
4327            if let Err(e) = table.add_index(idx_name, leading) {
4328                return Err(EngineError::Storage(e));
4329            }
4330        }
4331        Ok(QueryResult::CommandOk {
4332            affected: 0,
4333            modified_catalog: !self.in_transaction(),
4334        })
4335    }
4336
4337    fn exec_insert(&mut self, stmt: InsertStatement) -> Result<QueryResult, EngineError> {
4338        // v7.13.0 — `INSERT INTO t [(cols)] SELECT …` (mailrs
4339        // round-5 G4). Execute the inner SELECT first, then route
4340        // back through the regular VALUES code path with the
4341        // materialised rows.
4342        if let Some(select) = stmt.select_source.clone() {
4343            let select_result = self.exec_select_cancel(&select, CancelToken::none())?;
4344            let rows = match select_result {
4345                QueryResult::Rows { rows, .. } => rows,
4346                other => {
4347                    return Err(EngineError::Unsupported(alloc::format!(
4348                        "INSERT … SELECT: inner statement produced {other:?} instead of a row set"
4349                    )));
4350                }
4351            };
4352            let mut materialised: Vec<Vec<Expr>> = Vec::with_capacity(rows.len());
4353            for row in rows {
4354                let mut tuple: Vec<Expr> = Vec::with_capacity(row.values.len());
4355                for v in row.values {
4356                    tuple.push(value_to_literal_expr_permissive(v)?);
4357                }
4358                materialised.push(tuple);
4359            }
4360            let recurse = InsertStatement {
4361                table: stmt.table,
4362                columns: stmt.columns,
4363                rows: materialised,
4364                select_source: None,
4365                on_conflict: stmt.on_conflict,
4366                returning: stmt.returning,
4367            };
4368            return self.exec_insert(recurse);
4369        }
4370        // v7.9.21 — snapshot the clock fn pointer before the mut
4371        // borrow on the catalog opens; runtime DEFAULT eval needs
4372        // it inside the row hot loop.
4373        let clock = self.clock;
4374        // v7.12.4 — snapshot row-level triggers + their referenced
4375        // functions before the mut borrow on the catalog opens.
4376        // Cloned out so the row hot loop can fire them without
4377        // re-borrowing the catalog (which would conflict with
4378        // table.insert's mutable borrow).
4379        let before_insert_triggers = self.snapshot_row_triggers(&stmt.table, "INSERT", "BEFORE");
4380        let after_insert_triggers = self.snapshot_row_triggers(&stmt.table, "INSERT", "AFTER");
4381        let trigger_session_cfg: Option<alloc::string::String> = self
4382            .session_params
4383            .get("default_text_search_config")
4384            .cloned();
4385        let table = self
4386            .active_catalog_mut()
4387            .get_mut(&stmt.table)
4388            .ok_or_else(|| {
4389                EngineError::Storage(StorageError::TableNotFound {
4390                    name: stmt.table.clone(),
4391                })
4392            })?;
4393        // v3.1.5: clone the columns vector only (not the whole
4394        // TableSchema — saves one String alloc for the table name).
4395        // We need an owned snapshot because we'll call `table.insert`
4396        // (mutable borrow on `table`) inside the row loop while
4397        // reading schema fields.
4398        let column_meta: Vec<ColumnSchema> = table.schema().columns.clone();
4399        let schema_cols_len = column_meta.len();
4400        // Build a permutation `tuple_pos[c] = Some(j)` meaning schema
4401        // column `c` is filled from the `j`-th tuple slot; `None` means
4402        // "fill with NULL". Validated once and reused for every row.
4403        let tuple_pos: Option<Vec<Option<usize>>> = match &stmt.columns {
4404            None => None, // 1-1 mapping, fast path
4405            Some(cols) => {
4406                let mut map = alloc::vec![None; schema_cols_len];
4407                for (j, name) in cols.iter().enumerate() {
4408                    let idx = column_meta
4409                        .iter()
4410                        .position(|c| c.name == *name)
4411                        .ok_or_else(|| {
4412                            EngineError::Eval(EvalError::ColumnNotFound { name: name.clone() })
4413                        })?;
4414                    if map[idx].is_some() {
4415                        return Err(EngineError::Storage(StorageError::ArityMismatch {
4416                            expected: schema_cols_len,
4417                            actual: cols.len(),
4418                        }));
4419                    }
4420                    map[idx] = Some(j);
4421                }
4422                // Omitted columns must either be nullable, carry a
4423                // DEFAULT, or be AUTO_INCREMENT. Catch NOT NULL
4424                // omissions up front so the WAL stays clean.
4425                for (i, col) in column_meta.iter().enumerate() {
4426                    if map[i].is_none()
4427                        && !col.nullable
4428                        && col.default.is_none()
4429                        && col.runtime_default.is_none()
4430                        && !col.auto_increment
4431                    {
4432                        return Err(EngineError::Storage(StorageError::NullInNotNull {
4433                            column: col.name.clone(),
4434                        }));
4435                    }
4436                }
4437                Some(map)
4438            }
4439        };
4440        let expected_tuple_len = stmt.columns.as_ref().map_or(schema_cols_len, Vec::len);
4441        // v7.6.2 — snapshot this table's FK list before the
4442        // mutable-borrow window so we can run parent lookups
4443        // against the immutable catalog after parsing. Empty vec is
4444        // the no-FK fast path; clone cost is O(fks * arity) which
4445        // is < 100 ns for typical schemas.
4446        let fks = table.schema().foreign_keys.clone();
4447        let mut affected = 0usize;
4448        // Stage 1 — parse + AUTO_INC + coerce all rows under the
4449        // single mutable borrow.
4450        let mut all_values: Vec<Vec<Value>> = Vec::with_capacity(stmt.rows.len());
4451        for tuple in stmt.rows {
4452            if tuple.len() != expected_tuple_len {
4453                return Err(EngineError::Storage(StorageError::ArityMismatch {
4454                    expected: expected_tuple_len,
4455                    actual: tuple.len(),
4456                }));
4457            }
4458            // Fast path: no column-list permutation → tuple slot j
4459            // maps to schema column j. We can zip schema with tuple
4460            // and skip the `raw_tuple` staging allocation entirely.
4461            let values: Vec<Value> = if let Some(map) = &tuple_pos {
4462                // Permuted path: still need raw_tuple to index by `map[i]`.
4463                let raw_tuple: Vec<Value> = tuple
4464                    .into_iter()
4465                    .map(literal_expr_to_value)
4466                    .collect::<Result<_, _>>()?;
4467                let mut out = Vec::with_capacity(schema_cols_len);
4468                for (i, col) in column_meta.iter().enumerate() {
4469                    let mut raw = match map[i] {
4470                        Some(j) => raw_tuple[j].clone(),
4471                        None => resolve_column_default_free(col, clock)?,
4472                    };
4473                    if col.auto_increment && raw.is_null() {
4474                        let next = table.next_auto_value(i).ok_or_else(|| {
4475                            EngineError::Unsupported(alloc::format!(
4476                                "AUTO_INCREMENT applies to integer columns only (column `{}`)",
4477                                col.name
4478                            ))
4479                        })?;
4480                        raw = Value::BigInt(next);
4481                    }
4482                    out.push(coerce_value(raw, col.ty, &col.name, i)?);
4483                }
4484                out
4485            } else {
4486                // 1-1 mapping fast path: single Vec alloc, no raw_tuple.
4487                let mut out = Vec::with_capacity(schema_cols_len);
4488                for (i, (col, expr)) in column_meta.iter().zip(tuple).enumerate() {
4489                    let mut raw = literal_expr_to_value(expr)?;
4490                    if col.auto_increment && raw.is_null() {
4491                        let next = table.next_auto_value(i).ok_or_else(|| {
4492                            EngineError::Unsupported(alloc::format!(
4493                                "AUTO_INCREMENT applies to integer columns only (column `{}`)",
4494                                col.name
4495                            ))
4496                        })?;
4497                        raw = Value::BigInt(next);
4498                    }
4499                    out.push(coerce_value(raw, col.ty, &col.name, i)?);
4500                }
4501                out
4502            };
4503            all_values.push(values);
4504        }
4505        // Stage 2 — FK enforcement on the immutable catalog.
4506        // Non-lexical lifetimes release the mutable borrow on
4507        // `table` here since stage 1 was the last use. The
4508        // parent-table lookup runs before any row is committed.
4509        let uniqueness = table.schema().uniqueness_constraints.clone();
4510        let _ = table;
4511        if !fks.is_empty() {
4512            enforce_fk_inserts(self.active_catalog(), &stmt.table, &fks, &all_values)?;
4513        }
4514        // v7.13.0 — CHECK constraint enforcement (mailrs round-5 G3).
4515        enforce_check_constraints(self.active_catalog(), &stmt.table, &all_values)?;
4516        // v7.9.19 — composite UNIQUE / PRIMARY KEY enforcement.
4517        enforce_uniqueness_inserts(self.active_catalog(), &stmt.table, &uniqueness, &all_values)?;
4518        // v7.9.29 — CREATE UNIQUE INDEX [WHERE pred] enforcement.
4519        // Independent of table-level UniquenessConstraint (which
4520        // can't carry a predicate). Walks the table's indexes;
4521        // for each `is_unique` index, only rows whose
4522        // partial_predicate evaluates truthy are checked for
4523        // collision. mailrs K1.
4524        enforce_unique_index_inserts(self.active_catalog(), &stmt.table, &all_values)?;
4525        // v7.9.8 / v7.9.9 — ON CONFLICT handling.
4526        //   - `DO NOTHING` filters `all_values` to non-conflicting
4527        //     rows + drops within-batch duplicates.
4528        //   - `DO UPDATE SET …` ALSO filters, but for each
4529        //     conflicting row it queues an UPDATE on the existing
4530        //     row using the incoming row's values as `EXCLUDED.*`.
4531        let mut pending_updates: Vec<(usize, Vec<Value>)> = Vec::new();
4532        let mut skipped_count = 0usize;
4533        if let Some(clause) = &stmt.on_conflict {
4534            let conflict_cols = resolve_on_conflict_columns(
4535                self.active_catalog(),
4536                &stmt.table,
4537                clause.target_columns.as_slice(),
4538            )?;
4539            let mut kept: Vec<Vec<Value>> = Vec::with_capacity(all_values.len());
4540            let mut seen_keys: Vec<Vec<Value>> = Vec::new();
4541            for values in all_values {
4542                let key_tuple: Vec<&Value> = conflict_cols.iter().map(|&c| &values[c]).collect();
4543                // SQL spec: NULL in any conflict column means "no
4544                // conflict possible" (NULL ≠ NULL for uniqueness).
4545                let has_null_key = key_tuple.iter().any(|v| matches!(v, Value::Null));
4546                let collides_with_table = !has_null_key
4547                    && on_conflict_keys_exist(
4548                        self.active_catalog(),
4549                        &stmt.table,
4550                        &conflict_cols,
4551                        &key_tuple,
4552                    );
4553                let key_tuple_owned: Vec<Value> = key_tuple.iter().map(|v| (*v).clone()).collect();
4554                let collides_with_batch =
4555                    !has_null_key && seen_keys.iter().any(|k| k == &key_tuple_owned);
4556                let collides = collides_with_table || collides_with_batch;
4557                match (&clause.action, collides) {
4558                    (_, false) => {
4559                        seen_keys.push(key_tuple_owned);
4560                        kept.push(values);
4561                    }
4562                    (spg_sql::ast::OnConflictAction::Nothing, true) => {
4563                        skipped_count += 1;
4564                    }
4565                    (
4566                        spg_sql::ast::OnConflictAction::Update {
4567                            assignments,
4568                            where_,
4569                        },
4570                        true,
4571                    ) => {
4572                        if !collides_with_table {
4573                            skipped_count += 1;
4574                            continue;
4575                        }
4576                        let target_pos = lookup_row_position_by_keys(
4577                            self.active_catalog(),
4578                            &stmt.table,
4579                            &conflict_cols,
4580                            &key_tuple,
4581                        )
4582                        .ok_or_else(|| {
4583                            EngineError::Unsupported(
4584                                "ON CONFLICT DO UPDATE: conflict detected but row \
4585                                 position could not be resolved (cold-tier row?)"
4586                                    .into(),
4587                            )
4588                        })?;
4589                        let updated = apply_on_conflict_assignments(
4590                            self.active_catalog(),
4591                            &stmt.table,
4592                            target_pos,
4593                            &values,
4594                            assignments,
4595                            where_.as_ref(),
4596                        )?;
4597                        if let Some(new_row) = updated {
4598                            pending_updates.push((target_pos, new_row));
4599                        } else {
4600                            skipped_count += 1;
4601                        }
4602                    }
4603                }
4604            }
4605            all_values = kept;
4606        }
4607        // Stage 3 — insert all rows under a fresh mutable borrow.
4608        let table = self
4609            .active_catalog_mut()
4610            .get_mut(&stmt.table)
4611            .ok_or_else(|| {
4612                EngineError::Storage(StorageError::TableNotFound {
4613                    name: stmt.table.clone(),
4614                })
4615            })?;
4616        // v7.9.4 — keep RETURNING projection rows separate per
4617        // INSERT and per UPDATE branch so DO UPDATE pushes the new
4618        // post-update state, not the incoming-only values.
4619        let mut returning_rows: Vec<Vec<Value>> = Vec::new();
4620        // v7.12.7 — collect embedded SQL emitted by any trigger
4621        // fire across the row loop; engine drains the queue after
4622        // the table mut borrow drops.
4623        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
4624        'rowloop: for values in all_values {
4625            let mut row = Row::new(values);
4626            // v7.12.4 — BEFORE INSERT row-level triggers. Each
4627            // trigger may rewrite NEW cells (e.g. populate
4628            // `search_vector := to_tsvector(...)`) and may return
4629            // NULL to skip the row entirely.
4630            for fd in &before_insert_triggers {
4631                let (outcome, deferred) = triggers::fire_row_trigger(
4632                    fd,
4633                    Some(row.clone()),
4634                    None,
4635                    &stmt.table,
4636                    &column_meta,
4637                    &[],
4638                    trigger_session_cfg.as_deref(),
4639                    false,
4640                )
4641                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
4642                deferred_embedded.extend(deferred);
4643                match outcome {
4644                    triggers::TriggerOutcome::Row(r) => row = r,
4645                    triggers::TriggerOutcome::Skip => continue 'rowloop,
4646                }
4647            }
4648            if stmt.returning.is_some() {
4649                returning_rows.push(row.values.clone());
4650            }
4651            // v7.12.4 — clone for the AFTER trigger view; insert
4652            // moves the row into the table.
4653            let inserted = row.clone();
4654            table.insert(row)?;
4655            affected += 1;
4656            // v7.12.4 — AFTER INSERT row-level triggers fire post-
4657            // write. Return value is ignored (PG semantics); we
4658            // surface any error from the body up to the caller.
4659            for fd in &after_insert_triggers {
4660                let (_outcome, deferred) = triggers::fire_row_trigger(
4661                    fd,
4662                    Some(inserted.clone()),
4663                    None,
4664                    &stmt.table,
4665                    &column_meta,
4666                    &[],
4667                    trigger_session_cfg.as_deref(),
4668                    true,
4669                )
4670                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
4671                deferred_embedded.extend(deferred);
4672            }
4673        }
4674        // v7.9.9 — apply ON CONFLICT DO UPDATE rewrites collected
4675        // in the conflict-resolution pass. update_row handles
4676        // index maintenance + body re-encoding.
4677        for (pos, new_row) in pending_updates {
4678            if stmt.returning.is_some() {
4679                returning_rows.push(new_row.clone());
4680            }
4681            table.update_row(pos, new_row)?;
4682            affected += 1;
4683        }
4684        let _ = skipped_count;
4685        // v7.12.7 — drop the table mut borrow and drain any
4686        // trigger-emitted embedded SQL queued during this INSERT.
4687        // The borrow has to release first because each deferred
4688        // stmt may UPDATE / INSERT / DELETE the same (or another)
4689        // table — including, in principle, this one.
4690        let _ = table;
4691        self.execute_deferred_trigger_stmts(deferred_embedded, CancelToken::none())?;
4692        // v7.9.4/v7.9.9 — RETURNING streams the rows that ended
4693        // up in the table after this statement (insert or
4694        // post-update on conflict).
4695        if let Some(items) = &stmt.returning {
4696            return self.build_returning_rows(&stmt.table, items, returning_rows);
4697        }
4698        // v6.2.1 — auto-analyze: track per-table modified-row
4699        // counter so the background sweep can decide when to
4700        // re-ANALYZE. Cheap path on the autocommit-wrap hot loop
4701        // — one BTreeMap entry update per INSERT batch.
4702        if !self.in_transaction() && affected > 0 {
4703            self.statistics
4704                .record_modifications(&stmt.table, affected as u64);
4705        }
4706        Ok(QueryResult::CommandOk {
4707            affected,
4708            modified_catalog: !self.in_transaction(),
4709        })
4710    }
4711
4712    /// v4.5: SELECT with cooperative cancellation. The token is
4713    /// honoured between UNION peers and inside the bare-SELECT row
4714    /// loop; HNSW kNN graph walks and the aggregate executor don't
4715    /// honour it yet (deferred — those paths bound their work
4716    /// internally by `LIMIT k` and `GROUP BY` cardinality).
4717    /// v6.10.2 — cold-tier time-travel scan. Resolves the segment
4718    /// by id, decodes each row body against the table's current
4719    /// schema, applies the SELECT's projection + optional WHERE +
4720    /// optional LIMIT, returns a `Rows` result. JOINs / aggregates
4721    /// / ORDER BY are unsupported on this path (STABILITY carve-
4722    /// out); operators wanting them should restore the segment
4723    /// into a regular table first.
4724    fn exec_select_as_of_segment(
4725        &self,
4726        stmt: &SelectStatement,
4727        from: &spg_sql::ast::FromClause,
4728        segment_id: u32,
4729    ) -> Result<QueryResult, EngineError> {
4730        // v6.10.2 scope: no joins, no aggregates, no ORDER BY,
4731        // no GROUP BY / HAVING / UNION / OFFSET / DISTINCT.
4732        if !from.joins.is_empty()
4733            || stmt.group_by.is_some()
4734            || stmt.having.is_some()
4735            || !stmt.unions.is_empty()
4736            || !stmt.order_by.is_empty()
4737            || stmt.offset.is_some()
4738            || stmt.distinct
4739            || aggregate::uses_aggregate(stmt)
4740        {
4741            return Err(EngineError::Unsupported(
4742                "AS OF SEGMENT supports SELECT projection + WHERE + LIMIT only \
4743                 (joins / aggregates / ORDER BY are STABILITY § \"Out of v6.10\")"
4744                    .into(),
4745            ));
4746        }
4747        let table = self
4748            .active_catalog()
4749            .get(&from.primary.name)
4750            .ok_or_else(|| StorageError::TableNotFound {
4751                name: from.primary.name.clone(),
4752            })?;
4753        let schema = table.schema().clone();
4754        let schema_cols = &schema.columns;
4755        let alias = from
4756            .primary
4757            .alias
4758            .as_deref()
4759            .unwrap_or(from.primary.name.as_str());
4760        let ctx = EvalContext::new(schema_cols, Some(alias));
4761        let seg = self
4762            .active_catalog()
4763            .cold_segment(segment_id)
4764            .ok_or_else(|| {
4765                EngineError::Unsupported(alloc::format!(
4766                    "AS OF SEGMENT: cold segment {segment_id} not registered"
4767                ))
4768            })?;
4769        let mut out_rows: Vec<Row> = Vec::new();
4770        let mut limit_remaining: Option<usize> =
4771            stmt.limit_literal().and_then(|n| usize::try_from(n).ok());
4772        for (_key, body) in seg.scan() {
4773            let (row, _consumed) =
4774                spg_storage::decode_row_body_dense(&body, &schema).map_err(EngineError::Storage)?;
4775            if let Some(where_expr) = &stmt.where_ {
4776                let cond = self.eval_expr_simple(where_expr, &row, &ctx)?;
4777                if !matches!(cond, Value::Bool(true)) {
4778                    continue;
4779                }
4780            }
4781            // Projection.
4782            let projected = self.project_row_simple(&row, &stmt.items, schema_cols, alias)?;
4783            out_rows.push(projected);
4784            if let Some(rem) = limit_remaining.as_mut() {
4785                if *rem == 0 {
4786                    out_rows.pop();
4787                    break;
4788                }
4789                *rem -= 1;
4790            }
4791        }
4792        // Output column schema: derive from SELECT items.
4793        let columns = self.derive_output_columns(&stmt.items, schema_cols, alias);
4794        Ok(QueryResult::Rows {
4795            columns,
4796            rows: out_rows,
4797        })
4798    }
4799
4800    /// v6.10.2 — simple-path WHERE eval that doesn't go through
4801    /// the correlated-subquery / Memoize machinery. AS OF SEGMENT
4802    /// scan paths predicate against a snapshot frozen segment, no
4803    /// cross-row state.
4804    fn eval_expr_simple(
4805        &self,
4806        expr: &Expr,
4807        row: &Row,
4808        ctx: &EvalContext,
4809    ) -> Result<Value, EngineError> {
4810        let cancel = CancelToken::none();
4811        self.eval_expr_with_correlated(expr, row, ctx, cancel, None)
4812    }
4813
4814    /// v7.9.4 — INSERT / UPDATE / DELETE RETURNING projector.
4815    /// Given the table name, the user-supplied projection items,
4816    /// and the mutated rows (post-insert / post-update values, or
4817    /// pre-delete snapshot), build a `QueryResult::Rows` whose
4818    /// schema describes the projected columns. Mailrs migration
4819    /// blocker #1.
4820    fn build_returning_rows(
4821        &self,
4822        table_name: &str,
4823        items: &[SelectItem],
4824        mutated_rows: Vec<Vec<Value>>,
4825    ) -> Result<QueryResult, EngineError> {
4826        let table = self.active_catalog().get(table_name).ok_or_else(|| {
4827            EngineError::Storage(StorageError::TableNotFound {
4828                name: table_name.into(),
4829            })
4830        })?;
4831        let schema_cols = table.schema().columns.clone();
4832        let columns = self.derive_output_columns(items, &schema_cols, table_name);
4833        let mut out_rows: Vec<Row> = Vec::with_capacity(mutated_rows.len());
4834        for values in mutated_rows {
4835            let row = Row::new(values);
4836            let projected = self.project_row_simple(&row, items, &schema_cols, table_name)?;
4837            out_rows.push(projected);
4838        }
4839        Ok(QueryResult::Rows {
4840            columns,
4841            rows: out_rows,
4842        })
4843    }
4844
4845    /// v6.10.2 — projection for AS OF SEGMENT. Resolves
4846    /// `SelectItem::Wildcard` to all schema columns and
4847    /// `SelectItem::Expr` via the regular eval path.
4848    fn project_row_simple(
4849        &self,
4850        row: &Row,
4851        items: &[SelectItem],
4852        schema_cols: &[ColumnSchema],
4853        alias: &str,
4854    ) -> Result<Row, EngineError> {
4855        let ctx = EvalContext::new(schema_cols, Some(alias));
4856        let cancel = CancelToken::none();
4857        let mut out_vals = Vec::new();
4858        for item in items {
4859            match item {
4860                SelectItem::Wildcard => {
4861                    out_vals.extend(row.values.iter().cloned());
4862                }
4863                SelectItem::Expr { expr, .. } => {
4864                    let v = self.eval_expr_with_correlated(expr, row, &ctx, cancel, None)?;
4865                    out_vals.push(v);
4866                }
4867            }
4868        }
4869        Ok(Row::new(out_vals))
4870    }
4871
4872    /// v6.10.2 — derive the output `ColumnSchema` list for an
4873    /// AS OF SEGMENT projection. Wildcards take the full schema;
4874    /// expressions take the alias if present or a synthetic
4875    /// `?column?` (PG convention) otherwise.
4876    fn derive_output_columns(
4877        &self,
4878        items: &[SelectItem],
4879        schema_cols: &[ColumnSchema],
4880        _alias: &str,
4881    ) -> Vec<ColumnSchema> {
4882        let mut out = Vec::new();
4883        for item in items {
4884            match item {
4885                SelectItem::Wildcard => {
4886                    out.extend(schema_cols.iter().cloned());
4887                }
4888                SelectItem::Expr { alias, .. } => {
4889                    let name = alias.clone().unwrap_or_else(|| "?column?".to_string());
4890                    // Default to Text; the caller's row values
4891                    // carry the actual type. v6.10.2 scope.
4892                    out.push(ColumnSchema::new(name, DataType::Text, true));
4893                }
4894            }
4895        }
4896        out
4897    }
4898
4899    fn exec_select_cancel(
4900        &self,
4901        stmt: &SelectStatement,
4902        cancel: CancelToken<'_>,
4903    ) -> Result<QueryResult, EngineError> {
4904        cancel.check()?;
4905        // v6.10.2 — cold-tier time-travel short-circuit. When the
4906        // primary TableRef carries `AS OF SEGMENT '<id>'`, run a
4907        // dedicated cold-segment scan instead of the regular
4908        // hot+index path. The scope is intentionally narrow for
4909        // v6.10.2 — bare `SELECT * FROM <t> AS OF SEGMENT 'id'`,
4910        // optionally with a single-column-equality WHERE. JOINs /
4911        // aggregates / ORDER BY / subqueries on top of a time-
4912        // travelled scan are STABILITY § "Out of v6.10".
4913        if let Some(from) = &stmt.from
4914            && let Some(seg_id) = from.primary.as_of_segment
4915        {
4916            return self.exec_select_as_of_segment(stmt, from, seg_id);
4917        }
4918        // v6.2.0 / v6.5.0 — virtual-table short-circuits. Detected
4919        // pre-CTE because they don't read from the catalog and
4920        // shouldn't participate in regular FROM resolution.
4921        if let Some(from) = &stmt.from
4922            && from.joins.is_empty()
4923            && stmt.where_.is_none()
4924            && stmt.group_by.is_none()
4925            && stmt.having.is_none()
4926            && stmt.unions.is_empty()
4927            && stmt.order_by.is_empty()
4928            && stmt.limit.is_none()
4929            && stmt.offset.is_none()
4930            && !stmt.distinct
4931            && stmt.items.iter().all(|i| matches!(i, SelectItem::Wildcard))
4932        {
4933            let lower = from.primary.name.to_ascii_lowercase();
4934            match lower.as_str() {
4935                "spg_statistic" => return Ok(self.exec_spg_statistic()),
4936                // v6.5.0 — observability v2 virtual tables.
4937                "spg_stat_replication" => return Ok(self.exec_spg_stat_replication()),
4938                "spg_stat_segment" => return Ok(self.exec_spg_stat_segment()),
4939                "spg_stat_query" => return Ok(self.exec_spg_stat_query()),
4940                "spg_stat_activity" => return Ok(self.exec_spg_stat_activity()),
4941                "spg_audit_chain" => return Ok(self.exec_spg_audit_chain()),
4942                "spg_audit_verify" => return Ok(self.exec_spg_audit_verify()),
4943                "spg_table_ddl" => return Ok(self.exec_spg_table_ddl()),
4944                "spg_role_ddl" => return Ok(self.exec_spg_role_ddl()),
4945                "spg_database_ddl" => return Ok(self.exec_spg_database_ddl()),
4946                _ => {}
4947            }
4948        }
4949        // v4.11: CTEs materialise into a temporary enriched catalog
4950        // *before* anything else — the body SELECT can then refer
4951        // to CTE names via the regular FROM-clause resolution.
4952        // Uncorrelated only: each CTE body runs once against the
4953        // current catalog, not against later CTEs' results (left-
4954        // to-right materialisation would relax this, but we keep
4955        // it simple for v4.11 MVP).
4956        if !stmt.ctes.is_empty() {
4957            return self.exec_with_ctes(stmt, cancel);
4958        }
4959        // v4.10: subqueries (uncorrelated) are resolved here, before
4960        // the executor sees the row loop. We clone the statement so
4961        // we can mutate without disturbing the caller's AST — most
4962        // queries pass through with no subquery nodes and the clone
4963        // is cheap; with subqueries the materialisation cost
4964        // dominates anyway.
4965        let mut stmt_owned;
4966        let stmt_ref: &SelectStatement = if expr_tree_has_subquery(stmt) {
4967            stmt_owned = stmt.clone();
4968            self.resolve_select_subqueries(&mut stmt_owned, cancel)?;
4969            &stmt_owned
4970        } else {
4971            stmt
4972        };
4973        if stmt_ref.unions.is_empty() {
4974            return self.exec_bare_select_cancel(stmt_ref, cancel);
4975        }
4976        // UNION path: clone-strip the head into a bare block (its own
4977        // DISTINCT and any inner ORDER BY are dropped by parser rule —
4978        // the wrapper SelectStatement carries them), execute, then chain
4979        // peers with left-associative dedup semantics.
4980        let mut head = stmt_ref.clone();
4981        head.unions = Vec::new();
4982        head.order_by = Vec::new();
4983        head.limit = None;
4984        let QueryResult::Rows { columns, mut rows } =
4985            self.exec_bare_select_cancel(&head, cancel)?
4986        else {
4987            unreachable!("bare SELECT cannot return CommandOk")
4988        };
4989        for (kind, peer) in &stmt_ref.unions {
4990            let QueryResult::Rows {
4991                columns: peer_cols,
4992                rows: peer_rows,
4993            } = self.exec_bare_select_cancel(peer, cancel)?
4994            else {
4995                unreachable!("bare SELECT cannot return CommandOk")
4996            };
4997            if peer_cols.len() != columns.len() {
4998                return Err(EngineError::Unsupported(alloc::format!(
4999                    "UNION arity mismatch: head has {} columns, peer has {}",
5000                    columns.len(),
5001                    peer_cols.len()
5002                )));
5003            }
5004            rows.extend(peer_rows);
5005            if matches!(kind, UnionKind::Distinct) {
5006                rows = dedup_rows(rows);
5007            }
5008        }
5009        // ORDER BY at the top of a UNION applies to the combined result.
5010        // Eval against the projected schema (NOT the source table).
5011        if !stmt.order_by.is_empty() {
5012            let synth_ctx = EvalContext::new(&columns, None);
5013            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
5014            let mut tagged: Vec<(Vec<f64>, Row)> = Vec::with_capacity(rows.len());
5015            for r in rows {
5016                let keys = build_order_keys(&stmt.order_by, &r, &synth_ctx)?;
5017                tagged.push((keys, r));
5018            }
5019            sort_by_keys(&mut tagged, &descs);
5020            rows = tagged.into_iter().map(|(_, r)| r).collect();
5021        }
5022        apply_offset_and_limit(&mut rows, stmt.offset_literal(), stmt.limit_literal());
5023        Ok(QueryResult::Rows { columns, rows })
5024    }
5025
5026    #[allow(clippy::too_many_lines)]
5027    #[allow(clippy::too_many_lines)] // huge match — splitting fragments the planner
5028    /// v7.11.7 — execute `SELECT … FROM unnest(expr) [AS] alias …`.
5029    /// Synthesises a single-column virtual table whose column type
5030    /// is TEXT and whose rows are the array elements. Routes
5031    /// through the regular projection / WHERE / ORDER BY / LIMIT
5032    /// machinery so set-returning UNNEST composes naturally with
5033    /// the rest of the SELECT surface.
5034    fn exec_select_unnest(
5035        &self,
5036        stmt: &SelectStatement,
5037        primary: &TableRef,
5038        cancel: CancelToken<'_>,
5039    ) -> Result<QueryResult, EngineError> {
5040        let expr = primary
5041            .unnest_expr
5042            .as_deref()
5043            .expect("caller guards unnest_expr.is_some()");
5044        // Evaluate the array expression once. Empty schema / empty
5045        // row — uncorrelated UNNEST cannot reference outer columns.
5046        let empty_schema: alloc::vec::Vec<ColumnSchema> = alloc::vec::Vec::new();
5047        let ctx = EvalContext::new(&empty_schema, None);
5048        let dummy_row = Row::new(alloc::vec::Vec::new());
5049        // v7.11.13 — unnest dispatches per array element type so
5050        // INT[] / BIGINT[] surface their PG types in projection.
5051        let (elem_dtype, rows): (DataType, alloc::vec::Vec<Row>) =
5052            match eval::eval_expr(expr, &dummy_row, &ctx).map_err(EngineError::Eval)? {
5053                Value::Null => (DataType::Text, alloc::vec::Vec::new()),
5054                Value::TextArray(items) => {
5055                    let rows = items
5056                        .into_iter()
5057                        .map(|item| {
5058                            Row::new(alloc::vec![match item {
5059                                Some(s) => Value::Text(s),
5060                                None => Value::Null,
5061                            }])
5062                        })
5063                        .collect();
5064                    (DataType::Text, rows)
5065                }
5066                Value::IntArray(items) => {
5067                    let rows = items
5068                        .into_iter()
5069                        .map(|item| {
5070                            Row::new(alloc::vec![match item {
5071                                Some(n) => Value::Int(n),
5072                                None => Value::Null,
5073                            }])
5074                        })
5075                        .collect();
5076                    (DataType::Int, rows)
5077                }
5078                Value::BigIntArray(items) => {
5079                    let rows = items
5080                        .into_iter()
5081                        .map(|item| {
5082                            Row::new(alloc::vec![match item {
5083                                Some(n) => Value::BigInt(n),
5084                                None => Value::Null,
5085                            }])
5086                        })
5087                        .collect();
5088                    (DataType::BigInt, rows)
5089                }
5090                other => {
5091                    return Err(EngineError::Unsupported(alloc::format!(
5092                        "unnest() expects an array argument, got {:?}",
5093                        other.data_type()
5094                    )));
5095                }
5096            };
5097        let alias = primary
5098            .alias
5099            .clone()
5100            .unwrap_or_else(|| "unnest".to_string());
5101        // v7.13.2 — mailrs round-6 S5. Honour PG-standard
5102        // `UNNEST(arr) AS p(col_name)` column-list aliasing: the
5103        // first entry overrides the projected column's name.
5104        // Without the column list, fall back to the table alias
5105        // (pre-v7.13.2 behaviour).
5106        let col_name = primary
5107            .unnest_column_aliases
5108            .first()
5109            .cloned()
5110            .unwrap_or_else(|| alias.clone());
5111        let col_schema = ColumnSchema::new(col_name, elem_dtype, true);
5112        let schema_cols = alloc::vec![col_schema.clone()];
5113        let scan_ctx = EvalContext::new(&schema_cols, Some(&alias));
5114        // Apply WHERE.
5115        let filtered: alloc::vec::Vec<Row> = if let Some(w) = &stmt.where_ {
5116            let mut out = alloc::vec::Vec::with_capacity(rows.len());
5117            for row in rows {
5118                cancel.check()?;
5119                let v = eval::eval_expr(w, &row, &scan_ctx).map_err(EngineError::Eval)?;
5120                if matches!(v, Value::Bool(true)) {
5121                    out.push(row);
5122                }
5123            }
5124            out
5125        } else {
5126            rows
5127        };
5128        // Projection.
5129        let projection = build_projection(&stmt.items, &schema_cols, &alias)?;
5130        let mut projected_rows: alloc::vec::Vec<Row> =
5131            alloc::vec::Vec::with_capacity(filtered.len());
5132        for row in &filtered {
5133            let mut vals = alloc::vec::Vec::with_capacity(projection.len());
5134            for p in &projection {
5135                vals.push(eval::eval_expr(&p.expr, row, &scan_ctx).map_err(EngineError::Eval)?);
5136            }
5137            projected_rows.push(Row::new(vals));
5138        }
5139        // ORDER BY / LIMIT — apply on the projected rows (cheap;
5140        // unnest result sets are small by design).
5141        let columns: alloc::vec::Vec<ColumnSchema> = projection
5142            .iter()
5143            .map(|p| ColumnSchema::new(p.output_name.clone(), p.ty, p.nullable))
5144            .collect();
5145        // Re-evaluate ORDER BY against the source schema (pre-projection
5146        // so col refs by name still resolve through `scan_ctx`).
5147        if !stmt.order_by.is_empty() {
5148            let mut indexed: alloc::vec::Vec<(usize, Vec<Value>)> = filtered
5149                .iter()
5150                .enumerate()
5151                .map(|(i, r)| -> Result<_, EngineError> {
5152                    let keys: Result<Vec<Value>, EngineError> = stmt
5153                        .order_by
5154                        .iter()
5155                        .map(|ob| {
5156                            eval::eval_expr(&ob.expr, r, &scan_ctx).map_err(EngineError::Eval)
5157                        })
5158                        .collect();
5159                    Ok((i, keys?))
5160                })
5161                .collect::<Result<_, _>>()?;
5162            indexed.sort_by(|a, b| {
5163                for (idx, (ka, kb)) in a.1.iter().zip(b.1.iter()).enumerate() {
5164                    let mut cmp = value_cmp(ka, kb);
5165                    if stmt.order_by[idx].desc {
5166                        cmp = cmp.reverse();
5167                    }
5168                    if cmp != core::cmp::Ordering::Equal {
5169                        return cmp;
5170                    }
5171                }
5172                core::cmp::Ordering::Equal
5173            });
5174            projected_rows = indexed
5175                .into_iter()
5176                .map(|(i, _)| projected_rows[i].clone())
5177                .collect();
5178        }
5179        // LIMIT / OFFSET — apply at the tail.
5180        if let Some(offset) = stmt.offset_literal() {
5181            let off = (offset as usize).min(projected_rows.len());
5182            projected_rows.drain(..off);
5183        }
5184        if let Some(limit) = stmt.limit_literal() {
5185            projected_rows.truncate(limit as usize);
5186        }
5187        Ok(QueryResult::Rows {
5188            columns,
5189            rows: projected_rows,
5190        })
5191    }
5192
5193    fn exec_bare_select_cancel(
5194        &self,
5195        stmt: &SelectStatement,
5196        cancel: CancelToken<'_>,
5197    ) -> Result<QueryResult, EngineError> {
5198        // v4.12: window-function path. When the projection contains
5199        // any `name(args) OVER (...)` we route to the dedicated
5200        // executor — partition + sort + per-row window value before
5201        // the regular projection.
5202        if select_has_window(stmt) {
5203            return self.exec_select_with_window(stmt, cancel);
5204        }
5205        // Constant SELECT (no FROM) — evaluate each item once against an
5206        // empty dummy row. Useful for `SELECT 1`, `SELECT coalesce(...)`,
5207        // `SELECT '7'::INT`. Column references will surface as
5208        // ColumnNotFound on eval since the schema is empty.
5209        let Some(from) = &stmt.from else {
5210            let empty_schema: Vec<ColumnSchema> = Vec::new();
5211            let ctx = self.ev_ctx(&empty_schema, None);
5212            let projection = build_projection(&stmt.items, &empty_schema, "")?;
5213            let dummy_row = Row::new(Vec::new());
5214            let mut values = Vec::with_capacity(projection.len());
5215            for p in &projection {
5216                values.push(eval::eval_expr(&p.expr, &dummy_row, &ctx)?);
5217            }
5218            let columns: Vec<ColumnSchema> = projection
5219                .into_iter()
5220                .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
5221                .collect();
5222            return Ok(QueryResult::Rows {
5223                columns,
5224                rows: alloc::vec![Row::new(values)],
5225            });
5226        };
5227        // Multi-table FROM (one or more joined peers) goes through the
5228        // nested-loop join executor. Single-table FROM stays on the
5229        // existing scan + index-seek path.
5230        if !from.joins.is_empty() {
5231            return self.exec_joined_select(stmt, from);
5232        }
5233        // v7.11.7 — `FROM unnest(<expr>) [AS] <alias>`. Synthesise a
5234        // single-column table at SELECT entry by evaluating the
5235        // expression once against the empty row (UNNEST is
5236        // uncorrelated in v7.11; correlated / LATERAL unnest is a
5237        // v7.12 carve-out). Build a virtual `Table` in a heap-only
5238        // catalog, then route to the regular scan path.
5239        if from.primary.unnest_expr.is_some() {
5240            return self.exec_select_unnest(stmt, &from.primary, cancel);
5241        }
5242        let primary = &from.primary;
5243        let table = self.active_catalog().get(&primary.name).ok_or_else(|| {
5244            StorageError::TableNotFound {
5245                name: primary.name.clone(),
5246            }
5247        })?;
5248        let schema_cols = &table.schema().columns;
5249        // The qualifier accepted on column refs is the alias (if any) else the
5250        // bare table name.
5251        let alias = primary.alias.as_deref().unwrap_or(primary.name.as_str());
5252        let ctx = self.ev_ctx(schema_cols, Some(alias));
5253
5254        // NSW kNN planner: `ORDER BY col <-> literal LIMIT k` with no
5255        // WHERE and an NSW index on `col` skips the full scan. The
5256        // walk returns rows already in ascending-distance order, so
5257        // ORDER BY / LIMIT are honoured implicitly.
5258        if let Some(nsw_rows) = try_nsw_knn(stmt, table, schema_cols, alias) {
5259            return materialise_in_order(stmt, table, schema_cols, alias, &nsw_rows);
5260        }
5261
5262        // Index seek: if WHERE is `col = literal` (or commuted) and the
5263        // referenced column has an index, dispatch each locator through
5264        // the catalog (hot tier → borrow, cold tier → page-read +
5265        // decode) and iterate just those rows. Otherwise fall back to a
5266        // full scan over the hot tier (cold-tier rows are only reached
5267        // via index seek in v5.1 — full table scans against cold-tier
5268        // data ship in v5.2 with the freezer's per-segment scan API).
5269        let indexed_rows: Option<Vec<Cow<'_, Row>>> = stmt.where_.as_ref().and_then(|w| {
5270            // BTree / col=literal seek first — covers the v7.11.3 multi-
5271            // column AND case and the leading-column equality lookup.
5272            try_index_seek(w, schema_cols, self.active_catalog(), table, alias).or_else(|| {
5273                // v7.12.3 — GIN-accelerated `WHERE col @@ tsquery`
5274                // when the column has a `USING gin` index. Returns an
5275                // over-approximate candidate set; the WHERE re-eval
5276                // loop below verifies the full `@@` predicate per row.
5277                try_gin_seek(w, schema_cols, self.active_catalog(), table, alias, &ctx)
5278            })
5279        });
5280
5281        // Aggregate path: filter rows first, then hand off to the
5282        // aggregate executor which does its own projection + ORDER BY.
5283        if aggregate::uses_aggregate(stmt) {
5284            let mut filtered: Vec<&Row> = Vec::new();
5285            // v6.2.6 — Memoize: per-query LRU cache for correlated
5286            // scalar subqueries. Fresh per row-loop entry so each
5287            // SELECT execution gets an isolated cache.
5288            let mut memo = memoize::MemoizeCache::new();
5289            if let Some(rows) = &indexed_rows {
5290                for cow in rows {
5291                    let row = cow.as_ref();
5292                    if let Some(where_expr) = &stmt.where_ {
5293                        let cond = self.eval_expr_with_correlated(
5294                            where_expr,
5295                            row,
5296                            &ctx,
5297                            cancel,
5298                            Some(&mut memo),
5299                        )?;
5300                        if !matches!(cond, Value::Bool(true)) {
5301                            continue;
5302                        }
5303                    }
5304                    filtered.push(row);
5305                }
5306            } else {
5307                for i in 0..table.row_count() {
5308                    let row = &table.rows()[i];
5309                    if let Some(where_expr) = &stmt.where_ {
5310                        let cond = self.eval_expr_with_correlated(
5311                            where_expr,
5312                            row,
5313                            &ctx,
5314                            cancel,
5315                            Some(&mut memo),
5316                        )?;
5317                        if !matches!(cond, Value::Bool(true)) {
5318                            continue;
5319                        }
5320                    }
5321                    filtered.push(row);
5322                }
5323            }
5324            let mut agg = aggregate::run(stmt, &filtered, schema_cols, Some(alias))?;
5325            apply_offset_and_limit(&mut agg.rows, stmt.offset_literal(), stmt.limit_literal());
5326            return Ok(QueryResult::Rows {
5327                columns: agg.columns,
5328                rows: agg.rows,
5329            });
5330        }
5331
5332        let projection = build_projection(&stmt.items, schema_cols, alias)?;
5333
5334        // Materialise the filter pass into `(order_key, projected_row)`
5335        // tuples. The order key is `None` when there's no ORDER BY clause.
5336        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::new();
5337        // v6.2.6 — Memoize per-row WHERE eval shares one cache.
5338        let mut memo = memoize::MemoizeCache::new();
5339        // Inline the per-row work in a closure so the indexed and full-
5340        // scan branches share the body.
5341        let mut process_row = |row: &Row, loop_idx: usize| -> Result<(), EngineError> {
5342            if loop_idx.is_multiple_of(256) {
5343                cancel.check()?;
5344            }
5345            if let Some(where_expr) = &stmt.where_ {
5346                let cond =
5347                    self.eval_expr_with_correlated(where_expr, row, &ctx, cancel, Some(&mut memo))?;
5348                if !matches!(cond, Value::Bool(true)) {
5349                    return Ok(());
5350                }
5351            }
5352            let mut values = Vec::with_capacity(projection.len());
5353            for p in &projection {
5354                values.push(eval::eval_expr(&p.expr, row, &ctx)?);
5355            }
5356            let order_keys = if stmt.order_by.is_empty() {
5357                Vec::new()
5358            } else {
5359                build_order_keys(&stmt.order_by, row, &ctx)?
5360            };
5361            tagged.push((order_keys, Row::new(values)));
5362            Ok(())
5363        };
5364        if let Some(rows) = &indexed_rows {
5365            for (loop_idx, cow) in rows.iter().enumerate() {
5366                process_row(cow.as_ref(), loop_idx)?;
5367            }
5368        } else {
5369            for i in 0..table.row_count() {
5370                process_row(&table.rows()[i], i)?;
5371            }
5372        }
5373
5374        if !stmt.order_by.is_empty() {
5375            // Partial-sort fast path: when LIMIT is small relative to
5376            // the row count, select_nth_unstable + sort just the
5377            // prefix is O(n + k log k) instead of O(n log n). DISTINCT
5378            // requires the full sort because de-dup happens after.
5379            let keep = if stmt.distinct {
5380                None
5381            } else {
5382                stmt.limit_literal()
5383                    .map(|l| l as usize + stmt.offset_literal().map_or(0, |o| o as usize))
5384            };
5385            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
5386            partial_sort_tagged(&mut tagged, keep, &descs);
5387        }
5388
5389        let mut output_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
5390        if stmt.distinct {
5391            output_rows = dedup_rows(output_rows);
5392        }
5393        apply_offset_and_limit(
5394            &mut output_rows,
5395            stmt.offset_literal(),
5396            stmt.limit_literal(),
5397        );
5398
5399        let columns: Vec<ColumnSchema> = projection
5400            .into_iter()
5401            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
5402            .collect();
5403
5404        Ok(QueryResult::Rows {
5405            columns,
5406            rows: output_rows,
5407        })
5408    }
5409
5410    /// Multi-table SELECT executor (one or more JOIN peers).
5411    ///
5412    /// v1.10 builds the joined row set up-front via nested-loop joins,
5413    /// then runs WHERE + projection + ORDER BY against the combined
5414    /// rows. No index seek. Aggregates and DISTINCT still work because
5415    /// the executor delegates projection through the same shared paths.
5416    #[allow(clippy::too_many_lines)]
5417    /// v7.13.2 — mailrs round-6 S5. Resolve a TableRef into an
5418    /// owned (rows, schema) pair. Catalog tables clone their hot
5419    /// rows + schema; UNNEST table refs evaluate their array
5420    /// expression once and synthesise a single-column row set
5421    /// using the same dispatch as `exec_select_unnest`. Used by
5422    /// the joined-select path so UNNEST can appear in any FROM
5423    /// position, not just as the primary.
5424    fn materialise_table_ref(
5425        &self,
5426        tref: &TableRef,
5427    ) -> Result<(Vec<Row>, Vec<ColumnSchema>), EngineError> {
5428        if let Some(expr) = tref.unnest_expr.as_deref() {
5429            let empty_schema: Vec<ColumnSchema> = Vec::new();
5430            let ctx = EvalContext::new(&empty_schema, None);
5431            let dummy_row = Row::new(Vec::new());
5432            let (elem_dtype, rows) =
5433                match eval::eval_expr(expr, &dummy_row, &ctx).map_err(EngineError::Eval)? {
5434                    Value::Null => (DataType::Text, Vec::new()),
5435                    Value::TextArray(items) => (
5436                        DataType::Text,
5437                        items
5438                            .into_iter()
5439                            .map(|item| {
5440                                Row::new(alloc::vec![match item {
5441                                    Some(s) => Value::Text(s),
5442                                    None => Value::Null,
5443                                }])
5444                            })
5445                            .collect(),
5446                    ),
5447                    Value::IntArray(items) => (
5448                        DataType::Int,
5449                        items
5450                            .into_iter()
5451                            .map(|item| {
5452                                Row::new(alloc::vec![match item {
5453                                    Some(n) => Value::Int(n),
5454                                    None => Value::Null,
5455                                }])
5456                            })
5457                            .collect(),
5458                    ),
5459                    Value::BigIntArray(items) => (
5460                        DataType::BigInt,
5461                        items
5462                            .into_iter()
5463                            .map(|item| {
5464                                Row::new(alloc::vec![match item {
5465                                    Some(n) => Value::BigInt(n),
5466                                    None => Value::Null,
5467                                }])
5468                            })
5469                            .collect(),
5470                    ),
5471                    other => {
5472                        return Err(EngineError::Unsupported(alloc::format!(
5473                            "unnest() expects an array argument, got {:?}",
5474                            other.data_type()
5475                        )));
5476                    }
5477                };
5478            let alias = tref.alias.clone().unwrap_or_else(|| "unnest".to_string());
5479            let col_name = tref
5480                .unnest_column_aliases
5481                .first()
5482                .cloned()
5483                .unwrap_or(alias);
5484            return Ok((rows, alloc::vec![ColumnSchema::new(col_name, elem_dtype, true)]));
5485        }
5486        let table = self
5487            .active_catalog()
5488            .get(&tref.name)
5489            .ok_or_else(|| StorageError::TableNotFound {
5490                name: tref.name.clone(),
5491            })?;
5492        let rows: Vec<Row> = table.rows().iter().cloned().collect();
5493        let cols = table.schema().columns.clone();
5494        Ok((rows, cols))
5495    }
5496
5497    fn exec_joined_select(
5498        &self,
5499        stmt: &SelectStatement,
5500        from: &FromClause,
5501    ) -> Result<QueryResult, EngineError> {
5502        // v7.13.2 — mailrs round-6 S5. UNNEST peers materialise
5503        // into virtual (rows, schema) sources alongside catalog
5504        // tables, so `FROM t, UNNEST(arr) AS p(col)` works in
5505        // any join-list position. The lookup helper handles both
5506        // shapes uniformly.
5507        let (primary_rows, primary_cols) = self.materialise_table_ref(&from.primary)?;
5508        let primary_alias = from
5509            .primary
5510            .alias
5511            .as_deref()
5512            .unwrap_or(from.primary.name.as_str())
5513            .to_string();
5514        // Owned (rows, schema) per peer — borrows from the catalog
5515        // would not survive UNNEST-side materialisation.
5516        let mut joined: Vec<(Vec<Row>, Vec<ColumnSchema>, String, JoinKind, Option<&Expr>)> =
5517            Vec::new();
5518        for j in &from.joins {
5519            let (rows, cols) = self.materialise_table_ref(&j.table)?;
5520            let a = j
5521                .table
5522                .alias
5523                .as_deref()
5524                .unwrap_or(j.table.name.as_str())
5525                .to_string();
5526            joined.push((rows, cols, a, j.kind, j.on.as_ref()));
5527        }
5528
5529        // Build the combined schema: composite "alias.col" names so the
5530        // qualified-column resolver can find anything by exact match.
5531        let mut combined_schema: Vec<ColumnSchema> = Vec::new();
5532        for col in &primary_cols {
5533            combined_schema.push(ColumnSchema::new(
5534                alloc::format!("{primary_alias}.{}", col.name),
5535                col.ty,
5536                col.nullable,
5537            ));
5538        }
5539        for (_, cols, a, _, _) in &joined {
5540            for col in cols {
5541                combined_schema.push(ColumnSchema::new(
5542                    alloc::format!("{a}.{}", col.name),
5543                    col.ty,
5544                    col.nullable,
5545                ));
5546            }
5547        }
5548        let ctx = EvalContext::new(&combined_schema, None);
5549
5550        // Nested-loop join.
5551        let mut working: Vec<Row> = primary_rows;
5552        let mut produced_len = primary_cols.len();
5553        for (rrows, rcols, _, kind, on) in &joined {
5554            let right_arity = rcols.len();
5555            let mut next: Vec<Row> = Vec::new();
5556            for left in &working {
5557                let mut left_matched = false;
5558                for right in rrows {
5559                    let mut combined_vals = left.values.clone();
5560                    combined_vals.extend(right.values.iter().cloned());
5561                    // Pad combined to the eventual full width so the
5562                    // partial schema still matches positions used by ON.
5563                    let combined = Row::new(combined_vals);
5564                    let keep = if let Some(on_expr) = on {
5565                        let cond = eval::eval_expr(on_expr, &combined, &ctx)?;
5566                        matches!(cond, Value::Bool(true))
5567                    } else {
5568                        // CROSS / comma-list: every pair survives.
5569                        true
5570                    };
5571                    if keep {
5572                        next.push(combined);
5573                        left_matched = true;
5574                    }
5575                }
5576                if !left_matched && matches!(kind, JoinKind::Left) {
5577                    // LEFT OUTER JOIN: emit the left row with NULLs on
5578                    // the right side when no peer matched.
5579                    let mut combined_vals = left.values.clone();
5580                    for _ in 0..right_arity {
5581                        combined_vals.push(Value::Null);
5582                    }
5583                    next.push(Row::new(combined_vals));
5584                }
5585            }
5586            working = next;
5587            produced_len += right_arity;
5588            debug_assert!(produced_len <= combined_schema.len());
5589        }
5590
5591        // WHERE filter against combined rows.
5592        let mut filtered: Vec<Row> = Vec::new();
5593        for row in working {
5594            if let Some(where_expr) = &stmt.where_ {
5595                let cond = eval::eval_expr(where_expr, &row, &ctx)?;
5596                if !matches!(cond, Value::Bool(true)) {
5597                    continue;
5598                }
5599            }
5600            filtered.push(row);
5601        }
5602
5603        // Aggregate path: handle GROUP BY / aggregate calls over the
5604        // joined+filtered rows.
5605        if aggregate::uses_aggregate(stmt) {
5606            let refs: Vec<&Row> = filtered.iter().collect();
5607            let mut agg = aggregate::run(stmt, &refs, &combined_schema, None)?;
5608            apply_offset_and_limit(&mut agg.rows, stmt.offset_literal(), stmt.limit_literal());
5609            return Ok(QueryResult::Rows {
5610                columns: agg.columns,
5611                rows: agg.rows,
5612            });
5613        }
5614
5615        let projection = build_projection(&stmt.items, &combined_schema, "")?;
5616        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::new();
5617        for row in &filtered {
5618            let mut values = Vec::with_capacity(projection.len());
5619            for p in &projection {
5620                values.push(eval::eval_expr(&p.expr, row, &ctx)?);
5621            }
5622            let order_keys = if stmt.order_by.is_empty() {
5623                Vec::new()
5624            } else {
5625                build_order_keys(&stmt.order_by, row, &ctx)?
5626            };
5627            tagged.push((order_keys, Row::new(values)));
5628        }
5629        if !stmt.order_by.is_empty() {
5630            let keep = if stmt.distinct {
5631                None
5632            } else {
5633                stmt.limit_literal()
5634                    .map(|l| l as usize + stmt.offset_literal().map_or(0, |o| o as usize))
5635            };
5636            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
5637            partial_sort_tagged(&mut tagged, keep, &descs);
5638        }
5639        let mut output_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
5640        if stmt.distinct {
5641            output_rows = dedup_rows(output_rows);
5642        }
5643        apply_offset_and_limit(
5644            &mut output_rows,
5645            stmt.offset_literal(),
5646            stmt.limit_literal(),
5647        );
5648        let columns: Vec<ColumnSchema> = projection
5649            .into_iter()
5650            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
5651            .collect();
5652        Ok(QueryResult::Rows {
5653            columns,
5654            rows: output_rows,
5655        })
5656    }
5657}
5658
5659/// One row-producing projection: an expression to evaluate, the resulting
5660/// column's user-visible name, its inferred type, and nullability.
5661#[derive(Debug, Clone)]
5662struct ProjectedItem {
5663    expr: Expr,
5664    output_name: String,
5665    ty: DataType,
5666    nullable: bool,
5667}
5668
5669/// Dedupe a row set, preserving first-seen order. `Row`'s `PartialEq` is
5670/// structural (`Vec<Value>` ⇒ pairwise `Value` equality), which gives SQL
5671/// `NULL = NULL → TRUE` and `NaN = NaN → FALSE`. The first agrees with
5672/// the spec's "two NULLs are not distinct"; the second is a tolerated
5673/// quirk for v1 (no NaN literals are reachable from the SQL surface).
5674fn dedup_rows(rows: Vec<Row>) -> Vec<Row> {
5675    let mut out: Vec<Row> = Vec::with_capacity(rows.len());
5676    for r in rows {
5677        if !out.iter().any(|seen| seen == &r) {
5678            out.push(r);
5679        }
5680    }
5681    out
5682}
5683
5684/// Coerce a `Value` to an `f64` sort key for ORDER BY. Numbers map directly;
5685/// NULL sorts last (treated as `+∞`); booleans are 0.0 / 1.0; text uses lex
5686/// order via the byte values; vectors are not sortable.
5687fn value_to_order_key(v: &Value) -> Result<f64, EngineError> {
5688    match v {
5689        Value::Null => Ok(f64::INFINITY),
5690        Value::SmallInt(n) => Ok(f64::from(*n)),
5691        Value::Int(n) => Ok(f64::from(*n)),
5692        Value::Date(d) => Ok(f64::from(*d)),
5693        #[allow(clippy::cast_precision_loss)]
5694        Value::Timestamp(t) => Ok(*t as f64),
5695        #[allow(clippy::cast_precision_loss)]
5696        Value::Numeric { scaled, scale } => {
5697            // Scaled integer / 10^scale, computed via f64 for sort
5698            // ordering only. Precision losses here only matter for
5699            // ORDER BY tie-breaks well past 15 significant digits.
5700            // `f64::powi` lives in std; we hand-roll the loop so the
5701            // no_std engine crate doesn't need it.
5702            let mut divisor = 1.0_f64;
5703            for _ in 0..*scale {
5704                divisor *= 10.0;
5705            }
5706            Ok((*scaled as f64) / divisor)
5707        }
5708        #[allow(clippy::cast_precision_loss)]
5709        Value::BigInt(n) => Ok(*n as f64),
5710        Value::Float(x) => Ok(*x),
5711        Value::Bool(b) => Ok(if *b { 1.0 } else { 0.0 }),
5712        Value::Text(s) => {
5713            // Lex order by codepoints — good enough for ORDER BY name.
5714            // Map first 8 bytes packed into u64 as a coarse key; ties fall to
5715            // partial_cmp Equal. v1.x can swap in a real string comparator.
5716            let mut key: u64 = 0;
5717            for &b in s.as_bytes().iter().take(8) {
5718                key = (key << 8) | u64::from(b);
5719            }
5720            #[allow(clippy::cast_precision_loss)]
5721            Ok(key as f64)
5722        }
5723        Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => {
5724            Err(EngineError::Unsupported(
5725                "ORDER BY of a raw vector column is not meaningful — use `<->`".into(),
5726            ))
5727        }
5728        Value::Interval { .. } => Err(EngineError::Unsupported(
5729            "ORDER BY of an INTERVAL is not supported in v2.11 \
5730             (months vs micros has no single canonical ordering)"
5731                .into(),
5732        )),
5733        Value::Json(_) => Err(EngineError::Unsupported(
5734            "ORDER BY of a JSON value is not supported — cast the document to text first".into(),
5735        )),
5736        // v7.5.0 — Value is #[non_exhaustive]; future variants need
5737        // an explicit ORDER BY mapping. Surface as Unsupported until
5738        // engine support is added.
5739        _ => Err(EngineError::Unsupported(
5740            "ORDER BY of this value type is not supported".into(),
5741        )),
5742    }
5743}
5744
5745/// Try to plan a WHERE clause as an equality lookup against an existing
5746/// index. Returns the candidate row indices on success; `None` means the
5747/// caller should fall back to a full scan.
5748///
5749/// v0.8 recognises a single top-level `col = literal` (in either operand
5750/// order). AND chains and range scans land in later milestones.
5751/// Look for `ORDER BY col <dist-op> literal LIMIT k` against an
5752/// NSW-indexed vector column. Recognised distance ops: `<->` (L2),
5753/// `<#>` (inner product), `<=>` (cosine). When a WHERE clause is
5754/// present, the planner does an "over-fetch and filter" pass — it
5755/// asks the graph for `k * over_fetch` candidates, evaluates WHERE
5756/// against each, and trims back to `k`. Returns the row indices in
5757/// ascending-distance order when the plan applies.
5758fn try_nsw_knn(
5759    stmt: &SelectStatement,
5760    table: &Table,
5761    schema_cols: &[ColumnSchema],
5762    table_alias: &str,
5763) -> Option<Vec<usize>> {
5764    if stmt.distinct {
5765        return None;
5766    }
5767    let limit = usize::try_from(stmt.limit_literal()?).ok()?;
5768    if limit == 0 {
5769        return None;
5770    }
5771    // v6.4.0 — NSW kNN dispatch needs a single ORDER BY key on the
5772    // distance metric. Multi-key ORDER BY falls through to the
5773    // generic sort path.
5774    if stmt.order_by.len() != 1 {
5775        return None;
5776    }
5777    let order = &stmt.order_by[0];
5778    // NSW kNN returns rows ascending by distance — DESC inverts the
5779    // natural order, so the planner can't handle it without a sort
5780    // pass. Fall back to the generic ORDER BY path.
5781    if order.desc {
5782        return None;
5783    }
5784    let Expr::Binary { lhs, op, rhs } = &order.expr else {
5785        return None;
5786    };
5787    let metric = match op {
5788        BinOp::L2Distance => spg_storage::NswMetric::L2,
5789        BinOp::InnerProduct => spg_storage::NswMetric::InnerProduct,
5790        BinOp::CosineDistance => spg_storage::NswMetric::Cosine,
5791        _ => return None,
5792    };
5793    // Accept both `col <op> literal` and `literal <op> col`.
5794    let ((Expr::Column(col), literal) | (literal, Expr::Column(col))) =
5795        (lhs.as_ref(), rhs.as_ref())
5796    else {
5797        return None;
5798    };
5799    if let Some(q) = &col.qualifier
5800        && q != table_alias
5801    {
5802        return None;
5803    }
5804    let col_pos = schema_cols.iter().position(|s| s.name == col.name)?;
5805    let query = literal_to_vector(literal)?;
5806    let idx = spg_storage::nsw_index_on(table, col_pos)?;
5807    if let Some(where_expr) = &stmt.where_ {
5808        // Over-fetch and filter. The factor (10×) is a heuristic that
5809        // covers typical selectivity for the corpus tests; v2.x will
5810        // make it configurable.
5811        let over_fetch = limit.saturating_mul(10).max(NSW_OVER_FETCH_FLOOR);
5812        let candidates = spg_storage::nsw_query(table, &idx.name, &query, over_fetch, metric);
5813        let ctx = EvalContext::new(schema_cols, Some(table_alias));
5814        let mut kept: Vec<usize> = Vec::with_capacity(limit);
5815        for i in candidates {
5816            let row = &table.rows()[i];
5817            let cond = eval::eval_expr(where_expr, row, &ctx).ok()?;
5818            if matches!(cond, Value::Bool(true)) {
5819                kept.push(i);
5820                if kept.len() >= limit {
5821                    break;
5822                }
5823            }
5824        }
5825        Some(kept)
5826    } else {
5827        Some(spg_storage::nsw_query(
5828            table, &idx.name, &query, limit, metric,
5829        ))
5830    }
5831}
5832
5833/// Lower bound on the over-fetch pool when WHERE is present — even
5834/// for tiny `LIMIT 1` queries we keep enough candidates to absorb a
5835/// few WHERE rejections.
5836const NSW_OVER_FETCH_FLOOR: usize = 32;
5837
5838/// Pull a `Vec<f32>` out of a literal-or-cast expression. Returns
5839/// `None` for anything we can't fold at plan time.
5840fn literal_to_vector(e: &Expr) -> Option<Vec<f32>> {
5841    match e {
5842        Expr::Literal(Literal::Vector(v)) => Some(v.clone()),
5843        Expr::Cast { expr, .. } => literal_to_vector(expr),
5844        _ => None,
5845    }
5846}
5847
5848/// Materialise rows in a planner-supplied order (used by the NSW path)
5849/// without re-running ORDER BY. The projection + LIMIT slot mirror the
5850/// equivalent block in `exec_bare_select`.
5851fn materialise_in_order(
5852    stmt: &SelectStatement,
5853    table: &Table,
5854    schema_cols: &[ColumnSchema],
5855    table_alias: &str,
5856    ordered_rows: &[usize],
5857) -> Result<QueryResult, EngineError> {
5858    let ctx = EvalContext::new(schema_cols, Some(table_alias));
5859    let projection = build_projection(&stmt.items, schema_cols, table_alias)?;
5860    let mut output_rows: Vec<Row> = Vec::with_capacity(ordered_rows.len());
5861    for &i in ordered_rows {
5862        let row = &table.rows()[i];
5863        let mut values = Vec::with_capacity(projection.len());
5864        for p in &projection {
5865            values.push(eval::eval_expr(&p.expr, row, &ctx)?);
5866        }
5867        output_rows.push(Row::new(values));
5868    }
5869    apply_offset_and_limit(
5870        &mut output_rows,
5871        stmt.offset_literal(),
5872        stmt.limit_literal(),
5873    );
5874    let columns: Vec<ColumnSchema> = projection
5875        .into_iter()
5876        .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
5877        .collect();
5878    Ok(QueryResult::Rows {
5879        columns,
5880        rows: output_rows,
5881    })
5882}
5883
5884fn try_index_seek<'a>(
5885    where_expr: &Expr,
5886    schema_cols: &[ColumnSchema],
5887    catalog: &'a Catalog,
5888    table: &'a Table,
5889    table_alias: &str,
5890) -> Option<Vec<Cow<'a, Row>>> {
5891    // v7.11.3 — recurse through top-level `AND` so a PG-style
5892    // composite predicate like `WHERE id = 1 AND created_at > $1`
5893    // still hits the index on `id`. The caller re-applies the
5894    // full WHERE expression to each returned row, so dropping the
5895    // residual conjuncts here is correct — the index just narrows
5896    // the candidate set.
5897    if let Expr::Binary {
5898        lhs,
5899        op: BinOp::And,
5900        rhs,
5901    } = where_expr
5902    {
5903        // Try LHS first (typical convention: leading equality on
5904        // the indexed column comes first in user-written SQL).
5905        if let Some(rows) = try_index_seek(lhs, schema_cols, catalog, table, table_alias) {
5906            return Some(rows);
5907        }
5908        return try_index_seek(rhs, schema_cols, catalog, table, table_alias);
5909    }
5910    let Expr::Binary {
5911        lhs,
5912        op: BinOp::Eq,
5913        rhs,
5914    } = where_expr
5915    else {
5916        return None;
5917    };
5918    let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
5919        .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
5920    let idx = table.index_on(col_pos)?;
5921    let key = IndexKey::from_value(&value)?;
5922    let locators = idx.lookup_eq(&key);
5923    let table_name = table.schema().name.as_str();
5924    // v5.1: each locator dispatches to either the hot tier (zero-
5925    // copy borrow of `table.rows()[i]`) or a cold-tier segment
5926    // (one page read + dense row decode, ~µs scale). Cold rows are
5927    // returned as `Cow::Owned` so the caller's `&Row` iteration
5928    // doesn't see a tier distinction; pre-freezer (no cold
5929    // segments loaded) every locator is `Hot` and every entry is
5930    // `Cow::Borrowed` — identical cost to the pre-v5.1 path.
5931    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(locators.len());
5932    for loc in locators {
5933        match *loc {
5934            spg_storage::RowLocator::Hot(i) => {
5935                if let Some(row) = table.rows().get(i) {
5936                    out.push(Cow::Borrowed(row));
5937                }
5938            }
5939            spg_storage::RowLocator::Cold { segment_id, .. } => {
5940                if let Some(row) = catalog.resolve_cold_locator(table_name, segment_id, &key) {
5941                    out.push(Cow::Owned(row));
5942                }
5943            }
5944        }
5945    }
5946    Some(out)
5947}
5948
5949/// v7.12.3 — GIN-accelerated candidate seek for `WHERE col @@ <ts_query>`.
5950///
5951/// Recurses through top-level `AND` like [`try_index_seek`] so a
5952/// composite predicate `WHERE search_vector @@ q AND id > $1` still
5953/// hits the GIN index on `search_vector` — the caller re-applies the
5954/// full WHERE expression to each returned candidate, so dropping the
5955/// `id > $1` residual here stays semantically correct.
5956///
5957/// Returns `None` when:
5958///   - no leaf is a `col @@ <rhs>` shape on a GIN-indexed column;
5959///   - the RHS can't be const-evaluated to a `Value::TsQuery`
5960///     (typically because it references row columns);
5961///   - the resolved `TsQuery` uses query shapes the MVP doesn't
5962///     accelerate (`Not`, `Phrase` — those fall through to full scan).
5963///
5964/// On `Some(rows)` the caller iterates only `rows` and re-evaluates
5965/// the full `@@` predicate per row, so an over-approximate candidate
5966/// set is safe.
5967fn try_gin_seek<'a>(
5968    where_expr: &Expr,
5969    schema_cols: &[ColumnSchema],
5970    catalog: &'a Catalog,
5971    table: &'a Table,
5972    table_alias: &str,
5973    ctx: &eval::EvalContext<'_>,
5974) -> Option<Vec<Cow<'a, Row>>> {
5975    if let Expr::Binary {
5976        lhs,
5977        op: BinOp::And,
5978        rhs,
5979    } = where_expr
5980    {
5981        if let Some(rows) = try_gin_seek(lhs, schema_cols, catalog, table, table_alias, ctx) {
5982            return Some(rows);
5983        }
5984        return try_gin_seek(rhs, schema_cols, catalog, table, table_alias, ctx);
5985    }
5986    let Expr::Binary {
5987        lhs,
5988        op: BinOp::TsMatch,
5989        rhs,
5990    } = where_expr
5991    else {
5992        return None;
5993    };
5994    // Either side can be the column; pgvector idiom (`vec @@ q`)
5995    // hits the first arm, FROM-clause-derived (`plainto_tsquery($1)
5996    // q ... WHERE search_vector @@ q`) the same. CROSS JOIN derived
5997    // tables resolve `q` to a Column too.
5998    let (col_pos, query) = resolve_gin_col_query(lhs, rhs, schema_cols, table_alias, ctx)
5999        .or_else(|| resolve_gin_col_query(rhs, lhs, schema_cols, table_alias, ctx))?;
6000    let idx = table
6001        .indices()
6002        .iter()
6003        .find(|i| i.column_position == col_pos && i.is_gin())?;
6004    let candidates = gin_query_candidates(idx, &query)?;
6005    let _ = catalog; // cold-tier row resolution unused in MVP; see below.
6006    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(candidates.len());
6007    for loc in candidates {
6008        match loc {
6009            spg_storage::RowLocator::Hot(i) => {
6010                if let Some(row) = table.rows().get(i) {
6011                    out.push(Cow::Borrowed(row));
6012                }
6013            }
6014            // GIN cold-tier rows in the MVP: skipped, matching the
6015            // full-scan `@@` path which itself only iterates
6016            // `table.rows()` (hot tier). When v7.13+ adds cold-tier
6017            // scan-time materialisation for `@@`, the parallel
6018            // resolution lands here; until then both paths see the
6019            // same hot-only candidate set so correctness is preserved.
6020            spg_storage::RowLocator::Cold { .. } => {}
6021        }
6022    }
6023    Some(out)
6024}
6025
6026/// v7.12.3 — extract `(column_position, TsQueryAst)` when one side of
6027/// the binary is a column reference to a GIN-indexed tsvector column
6028/// and the other side const-evaluates to a `Value::TsQuery`. Returns
6029/// `None` if the column reference is for the wrong table alias, or if
6030/// the RHS expression depends on row data.
6031fn resolve_gin_col_query(
6032    col_side: &Expr,
6033    query_side: &Expr,
6034    schema_cols: &[ColumnSchema],
6035    table_alias: &str,
6036    ctx: &eval::EvalContext<'_>,
6037) -> Option<(usize, spg_storage::TsQueryAst)> {
6038    let Expr::Column(c) = col_side else {
6039        return None;
6040    };
6041    if let Some(q) = &c.qualifier
6042        && q != table_alias
6043    {
6044        return None;
6045    }
6046    let pos = schema_cols.iter().position(|s| s.name == c.name)?;
6047    // Const-evaluate the query side with an empty row — fails fast
6048    // (with a `ColumnNotFound` / similar) if the expression actually
6049    // depends on row data, which is exactly the bail signal we want.
6050    let empty_row = Row::new(Vec::new());
6051    let v = eval::eval_expr(query_side, &empty_row, ctx).ok()?;
6052    let Value::TsQuery(q) = v else { return None };
6053    Some((pos, q))
6054}
6055
6056/// v7.12.3 — walk a `TsQueryAst` against an [`IndexKind::Gin`] index
6057/// to produce a candidate row-locator set. Returns `None` for query
6058/// shapes the MVP doesn't accelerate (`Not` / `Phrase` — both bail to
6059/// full scan since their semantics need either complementation across
6060/// the whole row set or positional verification beyond what the
6061/// posting list carries).
6062///
6063/// Candidate sets are over-approximate — the caller re-applies the
6064/// full `@@` predicate per row, so reporting "row was in some
6065/// posting list" without verifying positions / weights stays correct.
6066fn gin_query_candidates(
6067    idx: &spg_storage::Index,
6068    query: &spg_storage::TsQueryAst,
6069) -> Option<Vec<spg_storage::RowLocator>> {
6070    use spg_storage::TsQueryAst;
6071    match query {
6072        TsQueryAst::Term { word, .. } => {
6073            let mut v: Vec<spg_storage::RowLocator> = idx.gin_lookup_word(word).to_vec();
6074            v.sort_by_key(locator_sort_key);
6075            v.dedup_by_key(|l| locator_sort_key(l));
6076            Some(v)
6077        }
6078        TsQueryAst::And(l, r) => {
6079            let mut left = gin_query_candidates(idx, l)?;
6080            let mut right = gin_query_candidates(idx, r)?;
6081            left.sort_by_key(locator_sort_key);
6082            right.sort_by_key(locator_sort_key);
6083            // Sorted-merge intersection.
6084            let mut out: Vec<spg_storage::RowLocator> = Vec::new();
6085            let (mut i, mut j) = (0usize, 0usize);
6086            while i < left.len() && j < right.len() {
6087                let lk = locator_sort_key(&left[i]);
6088                let rk = locator_sort_key(&right[j]);
6089                match lk.cmp(&rk) {
6090                    core::cmp::Ordering::Less => i += 1,
6091                    core::cmp::Ordering::Greater => j += 1,
6092                    core::cmp::Ordering::Equal => {
6093                        out.push(left[i]);
6094                        i += 1;
6095                        j += 1;
6096                    }
6097                }
6098            }
6099            Some(out)
6100        }
6101        TsQueryAst::Or(l, r) => {
6102            let mut out = gin_query_candidates(idx, l)?;
6103            out.extend(gin_query_candidates(idx, r)?);
6104            out.sort_by_key(locator_sort_key);
6105            out.dedup_by_key(|l| locator_sort_key(l));
6106            Some(out)
6107        }
6108        // Not / Phrase bail to full scan in the MVP. Not needs
6109        // complementation against the whole row set (not represented
6110        // in the posting-list view); Phrase needs positional
6111        // verification beyond what `word → rows` carries.
6112        TsQueryAst::Not(_) | TsQueryAst::Phrase { .. } => None,
6113    }
6114}
6115
6116/// v7.12.3 — total ordering on `RowLocator` for sort/dedup purposes
6117/// inside the GIN intersection / union loops. Hot rows order by their
6118/// row index; Cold rows order after all Hot rows, then by
6119/// `(segment_id, the cold sub-key)`.
6120fn locator_sort_key(l: &spg_storage::RowLocator) -> (u8, u64, u64) {
6121    match *l {
6122        spg_storage::RowLocator::Hot(i) => (0, i as u64, 0),
6123        spg_storage::RowLocator::Cold {
6124            segment_id,
6125            page_offset,
6126        } => (1, u64::from(segment_id), u64::from(page_offset)),
6127    }
6128}
6129
6130/// v5.2.3: extract `(column_position, IndexKey)` when `where_expr`
6131/// is a simple `col = literal` predicate suitable for a `BTree` index
6132/// seek. Used by `exec_update_cancel` / `exec_delete_cancel` to
6133/// decide whether a write touches a cold-tier row (which requires
6134/// promote-on-write / shadow-on-delete) before falling through to
6135/// the hot-tier row walk.
6136///
6137/// Returns `None` for any predicate shape the planner can't push
6138/// down to an index seek — complex WHERE clauses always take the
6139/// hot-only path (cold rows are immutable to non-indexed writes
6140/// until a future scan-fanout sub-version).
6141fn try_pk_predicate(
6142    where_expr: &Expr,
6143    schema_cols: &[ColumnSchema],
6144    table_alias: &str,
6145) -> Option<(usize, IndexKey)> {
6146    let Expr::Binary {
6147        lhs,
6148        op: BinOp::Eq,
6149        rhs,
6150    } = where_expr
6151    else {
6152        return None;
6153    };
6154    let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
6155        .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
6156    let key = IndexKey::from_value(&value)?;
6157    Some((col_pos, key))
6158}
6159
6160fn resolve_col_literal_pair(
6161    col_side: &Expr,
6162    lit_side: &Expr,
6163    schema_cols: &[ColumnSchema],
6164    table_alias: &str,
6165) -> Option<(usize, Value)> {
6166    let Expr::Column(c) = col_side else {
6167        return None;
6168    };
6169    if let Some(q) = &c.qualifier
6170        && q != table_alias
6171    {
6172        return None;
6173    }
6174    let pos = schema_cols.iter().position(|s| s.name == c.name)?;
6175    let Expr::Literal(l) = lit_side else {
6176        return None;
6177    };
6178    let v = match l {
6179        Literal::Integer(n) => {
6180            if let Ok(small) = i32::try_from(*n) {
6181                Value::Int(small)
6182            } else {
6183                Value::BigInt(*n)
6184            }
6185        }
6186        Literal::Float(x) => Value::Float(*x),
6187        Literal::String(s) => Value::Text(s.clone()),
6188        Literal::Bool(b) => Value::Bool(*b),
6189        Literal::Null => Value::Null,
6190        // Vector and Interval literals can't be used as B-tree index keys.
6191        // Tell the planner to fall back to full-scan.
6192        Literal::Vector(_) | Literal::Interval { .. } => return None,
6193    };
6194    Some((pos, v))
6195}
6196
6197/// Find the schema entry that a SELECT-list `Expr::Column` refers to.
6198/// Mirrors `resolve_column` in `eval.rs`, but returns a proper
6199/// `EngineError` so the projection-build path keeps `UnknownQualifier`
6200/// vs `ColumnNotFound` distinct.
6201fn resolve_projection_column<'a>(
6202    c: &ColumnName,
6203    schema_cols: &'a [ColumnSchema],
6204    table_alias: &str,
6205) -> Result<&'a ColumnSchema, EngineError> {
6206    if let Some(q) = &c.qualifier {
6207        let composite = alloc::format!("{q}.{name}", name = c.name);
6208        if let Some(s) = schema_cols.iter().find(|s| s.name == composite) {
6209            return Ok(s);
6210        }
6211        // Single-table case: the qualifier may equal the active alias —
6212        // then look for the bare column name.
6213        if q == table_alias
6214            && let Some(s) = schema_cols.iter().find(|s| s.name == c.name)
6215        {
6216            return Ok(s);
6217        }
6218        // For multi-table schemas the qualifier is unknown only if no
6219        // column bears the "<q>." prefix. For single-table, the alias
6220        // mismatch alone is enough.
6221        let prefix = alloc::format!("{q}.");
6222        let qualifier_known =
6223            q == table_alias || schema_cols.iter().any(|s| s.name.starts_with(&prefix));
6224        if !qualifier_known {
6225            return Err(EngineError::Eval(EvalError::UnknownQualifier {
6226                qualifier: q.clone(),
6227            }));
6228        }
6229        return Err(EngineError::Eval(EvalError::ColumnNotFound {
6230            name: c.name.clone(),
6231        }));
6232    }
6233    if let Some(s) = schema_cols.iter().find(|s| s.name == c.name) {
6234        return Ok(s);
6235    }
6236    let suffix = alloc::format!(".{name}", name = c.name);
6237    let mut matches = schema_cols.iter().filter(|s| s.name.ends_with(&suffix));
6238    let first = matches.next();
6239    let extra = matches.next();
6240    match (first, extra) {
6241        (Some(s), None) => Ok(s),
6242        (Some(_), Some(_)) => Err(EngineError::Eval(EvalError::TypeMismatch {
6243            detail: alloc::format!("ambiguous column reference: {}", c.name),
6244        })),
6245        _ => Err(EngineError::Eval(EvalError::ColumnNotFound {
6246            name: c.name.clone(),
6247        })),
6248    }
6249}
6250
6251fn build_projection(
6252    items: &[SelectItem],
6253    schema_cols: &[ColumnSchema],
6254    table_alias: &str,
6255) -> Result<Vec<ProjectedItem>, EngineError> {
6256    let mut out = Vec::new();
6257    for item in items {
6258        match item {
6259            SelectItem::Wildcard => {
6260                for col in schema_cols {
6261                    out.push(ProjectedItem {
6262                        expr: Expr::Column(ColumnName {
6263                            qualifier: None,
6264                            name: col.name.clone(),
6265                        }),
6266                        output_name: col.name.clone(),
6267                        ty: col.ty,
6268                        nullable: col.nullable,
6269                    });
6270                }
6271            }
6272            SelectItem::Expr { expr, alias } => {
6273                // Plain column ref keeps full schema info (real type +
6274                // nullability). Compound expressions evaluate fine but have
6275                // no static type — surface them as nullable TEXT, which is
6276                // what most clients render anyway.
6277                if let Expr::Column(c) = expr {
6278                    let sch = resolve_projection_column(c, schema_cols, table_alias)?;
6279                    let output_name = alias.clone().unwrap_or_else(|| c.name.clone());
6280                    out.push(ProjectedItem {
6281                        expr: expr.clone(),
6282                        output_name,
6283                        ty: sch.ty,
6284                        nullable: sch.nullable,
6285                    });
6286                } else {
6287                    let output_name = alias.clone().unwrap_or_else(|| expr.to_string());
6288                    out.push(ProjectedItem {
6289                        expr: expr.clone(),
6290                        output_name,
6291                        ty: DataType::Text,
6292                        nullable: true,
6293                    });
6294                }
6295            }
6296        }
6297    }
6298    Ok(out)
6299}
6300
6301/// Promote an integer to a NUMERIC value at the requested scale.
6302/// Rejects values that, after scaling, would overflow the column's
6303/// precision budget.
6304fn numeric_from_integer(
6305    n: i128,
6306    precision: u8,
6307    scale: u8,
6308    col_name: &str,
6309) -> Result<Value, EngineError> {
6310    let factor = pow10_i128(scale);
6311    let scaled = n.checked_mul(factor).ok_or_else(|| {
6312        EngineError::Unsupported(alloc::format!(
6313            "integer overflow scaling value for column `{col_name}` to scale {scale}"
6314        ))
6315    })?;
6316    check_precision(scaled, precision, col_name)?;
6317    Ok(Value::Numeric { scaled, scale })
6318}
6319
6320/// Float → NUMERIC. Uses round-half-away-from-zero on `x * 10^scale`,
6321/// then verifies the result fits the column's precision.
6322#[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)]
6323fn numeric_from_float(
6324    x: f64,
6325    precision: u8,
6326    scale: u8,
6327    col_name: &str,
6328) -> Result<Value, EngineError> {
6329    if !x.is_finite() {
6330        return Err(EngineError::Unsupported(alloc::format!(
6331            "cannot store non-finite float in NUMERIC column `{col_name}`"
6332        )));
6333    }
6334    let mut factor = 1.0_f64;
6335    for _ in 0..scale {
6336        factor *= 10.0;
6337    }
6338    // Round half-away-from-zero by biasing then casting (`as i128`
6339    // truncates toward zero, so the bias + truncation gives the
6340    // desired rounding). `f64::floor` / `ceil` live in std; we don't
6341    // need them — the cast handles the truncation step.
6342    let shifted = x * factor;
6343    let biased = if shifted >= 0.0 {
6344        shifted + 0.5
6345    } else {
6346        shifted - 0.5
6347    };
6348    // Range-check before casting back to i128 — the cast itself is
6349    // saturating in Rust, which would silently truncate huge inputs.
6350    if !(-1e38..=1e38).contains(&biased) {
6351        return Err(EngineError::Unsupported(alloc::format!(
6352            "value {x} overflows NUMERIC range for column `{col_name}`"
6353        )));
6354    }
6355    let scaled = biased as i128;
6356    check_precision(scaled, precision, col_name)?;
6357    Ok(Value::Numeric { scaled, scale })
6358}
6359
6360/// Move a Numeric value from `src_scale` to `dst_scale`. Going up
6361/// multiplies by 10; going down rounds half-away-from-zero.
6362fn numeric_rescale(
6363    scaled: i128,
6364    src_scale: u8,
6365    precision: u8,
6366    dst_scale: u8,
6367    col_name: &str,
6368) -> Result<Value, EngineError> {
6369    let new_scaled = if dst_scale >= src_scale {
6370        let bump = pow10_i128(dst_scale - src_scale);
6371        scaled.checked_mul(bump).ok_or_else(|| {
6372            EngineError::Unsupported(alloc::format!(
6373                "overflow rescaling NUMERIC for column `{col_name}`"
6374            ))
6375        })?
6376    } else {
6377        let drop = pow10_i128(src_scale - dst_scale);
6378        let half = drop / 2;
6379        if scaled >= 0 {
6380            (scaled + half) / drop
6381        } else {
6382            (scaled - half) / drop
6383        }
6384    };
6385    check_precision(new_scaled, precision, col_name)?;
6386    Ok(Value::Numeric {
6387        scaled: new_scaled,
6388        scale: dst_scale,
6389    })
6390}
6391
6392/// Drop the fractional part of a scaled integer, returning the integer
6393/// portion (toward zero). Used for NUMERIC → INT casts.
6394const fn numeric_truncate_to_integer(scaled: i128, scale: u8) -> i128 {
6395    if scale == 0 {
6396        return scaled;
6397    }
6398    let factor = pow10_i128_const(scale);
6399    scaled / factor
6400}
6401
6402/// Verify a scaled NUMERIC value fits the column's declared precision.
6403/// `precision == 0` is the "unconstrained" form (bare `NUMERIC`); we
6404/// skip the check there.
6405fn check_precision(scaled: i128, precision: u8, col_name: &str) -> Result<(), EngineError> {
6406    if precision == 0 {
6407        return Ok(());
6408    }
6409    let limit = pow10_i128(precision);
6410    if scaled.unsigned_abs() >= limit.unsigned_abs() {
6411        return Err(EngineError::Unsupported(alloc::format!(
6412            "NUMERIC value exceeds precision {precision} for column `{col_name}`"
6413        )));
6414    }
6415    Ok(())
6416}
6417
6418const fn pow10_i128_const(p: u8) -> i128 {
6419    let mut acc: i128 = 1;
6420    let mut i = 0;
6421    while i < p {
6422        acc *= 10;
6423        i += 1;
6424    }
6425    acc
6426}
6427
6428fn pow10_i128(p: u8) -> i128 {
6429    pow10_i128_const(p)
6430}
6431
6432/// Walk a parsed `Statement`, swapping any `NOW()` /
6433/// `CURRENT_TIMESTAMP()` / `CURRENT_DATE()` function calls for a
6434/// literal cast that wraps the engine's per-statement clock reading.
6435/// When `now_micros` is `None`, calls stay as-is and surface as
6436/// `unknown function` at eval time — keeps the error path explicit.
6437/// v4.10: pre-walk the WHERE / projection / etc. of a SELECT and
6438/// replace every subquery node with a materialised literal. SPG
6439/// only supports uncorrelated subqueries — the inner SELECT does
6440/// not see outer-row columns, so the result is the same for every
6441/// outer row and can be evaluated once.
6442///
6443/// Returns the rewritten statement; the caller passes this to the
6444/// regular row-loop executor which no longer sees Subquery nodes
6445/// in its tree.
6446impl Engine {
6447    /// v4.12 window executor. Implements `ROW_NUMBER` / `RANK` /
6448    /// `DENSE_RANK` and the partition-aware aggregates `SUM` /
6449    /// `AVG` / `COUNT` / `MIN` / `MAX`. The plan is:
6450    /// 1. Apply the WHERE filter.
6451    /// 2. For each unique `WindowFunction` node in the projection,
6452    ///    partition + sort, compute the per-row value.
6453    /// 3. Append the window values as synthetic columns (`__win_N`)
6454    ///    to the row schema.
6455    /// 4. Rewrite the projection to read those columns.
6456    /// 5. Hand off to the regular project / ORDER BY / LIMIT pipe.
6457    #[allow(
6458        clippy::too_many_lines,
6459        clippy::type_complexity,
6460        clippy::needless_range_loop
6461    )] // window-eval is one cohesive pipe; splitting fragments
6462    fn exec_select_with_window(
6463        &self,
6464        stmt: &SelectStatement,
6465        cancel: CancelToken<'_>,
6466    ) -> Result<QueryResult, EngineError> {
6467        let from = stmt.from.as_ref().ok_or_else(|| {
6468            EngineError::Unsupported("window functions require a FROM clause".into())
6469        })?;
6470        // For v4.12 we only support a single-table FROM. Joins +
6471        // windows is queued for v5.x.
6472        if !from.joins.is_empty() {
6473            return Err(EngineError::Unsupported(
6474                "JOIN with window functions not yet supported".into(),
6475            ));
6476        }
6477        let primary = &from.primary;
6478        let table = self.active_catalog().get(&primary.name).ok_or_else(|| {
6479            StorageError::TableNotFound {
6480                name: primary.name.clone(),
6481            }
6482        })?;
6483        let alias = primary.alias.as_deref().unwrap_or(primary.name.as_str());
6484        let schema_cols = &table.schema().columns;
6485        let ctx = self.ev_ctx(schema_cols, Some(alias));
6486
6487        // 1) Filter pass.
6488        let mut filtered: Vec<&Row> = Vec::new();
6489        for (i, row) in table.rows().iter().enumerate() {
6490            if i.is_multiple_of(256) {
6491                cancel.check()?;
6492            }
6493            if let Some(w) = &stmt.where_ {
6494                let cond = eval::eval_expr(w, row, &ctx)?;
6495                if !matches!(cond, Value::Bool(true)) {
6496                    continue;
6497                }
6498            }
6499            filtered.push(row);
6500        }
6501        let n_rows = filtered.len();
6502
6503        // 2) Collect unique window function nodes from projection.
6504        let mut window_nodes: Vec<Expr> = Vec::new();
6505        for item in &stmt.items {
6506            if let SelectItem::Expr { expr, .. } = item {
6507                collect_window_nodes(expr, &mut window_nodes);
6508            }
6509        }
6510
6511        // 3) For each window, compute per-row value.
6512        // Index: same order as window_nodes; for row i, win_vals[w][i].
6513        let mut win_vals: Vec<Vec<Value>> = Vec::with_capacity(window_nodes.len());
6514        for wnode in &window_nodes {
6515            let Expr::WindowFunction {
6516                name,
6517                args,
6518                partition_by,
6519                order_by,
6520                frame,
6521                null_treatment,
6522            } = wnode
6523            else {
6524                unreachable!("collect_window_nodes pushes only WindowFunction");
6525            };
6526            // Compute (partition_key, order_key, original_index) for each row.
6527            let mut indexed: Vec<(Vec<Value>, Vec<(Value, bool)>, usize)> =
6528                Vec::with_capacity(n_rows);
6529            for (i, row) in filtered.iter().enumerate() {
6530                let pkey: Vec<Value> = partition_by
6531                    .iter()
6532                    .map(|p| eval::eval_expr(p, row, &ctx))
6533                    .collect::<Result<_, _>>()?;
6534                let okey: Vec<(Value, bool)> = order_by
6535                    .iter()
6536                    .map(|(e, desc)| eval::eval_expr(e, row, &ctx).map(|v| (v, *desc)))
6537                    .collect::<Result<_, _>>()?;
6538                indexed.push((pkey, okey, i));
6539            }
6540            // Sort by (partition_key, order_key). Partition key uses
6541            // a stable encoded form; order key respects ASC/DESC.
6542            indexed.sort_by(|a, b| {
6543                let p_cmp = partition_key_cmp(&a.0, &b.0);
6544                if p_cmp != core::cmp::Ordering::Equal {
6545                    return p_cmp;
6546                }
6547                order_key_cmp(&a.1, &b.1)
6548            });
6549            // Per-partition compute.
6550            let mut out_vals: Vec<Value> = alloc::vec![Value::Null; n_rows];
6551            let mut p_start = 0;
6552            while p_start < indexed.len() {
6553                let mut p_end = p_start + 1;
6554                while p_end < indexed.len()
6555                    && partition_key_cmp(&indexed[p_start].0, &indexed[p_end].0)
6556                        == core::cmp::Ordering::Equal
6557                {
6558                    p_end += 1;
6559                }
6560                // Compute the function within this partition slice.
6561                compute_window_partition(
6562                    name,
6563                    args,
6564                    !order_by.is_empty(),
6565                    frame.as_ref(),
6566                    *null_treatment,
6567                    &indexed[p_start..p_end],
6568                    &filtered,
6569                    &ctx,
6570                    &mut out_vals,
6571                )?;
6572                p_start = p_end;
6573            }
6574            win_vals.push(out_vals);
6575        }
6576
6577        // 4) Build extended schema: original columns + synthetic.
6578        let mut ext_cols = schema_cols.clone();
6579        for i in 0..window_nodes.len() {
6580            ext_cols.push(ColumnSchema::new(
6581                alloc::format!("__win_{i}"),
6582                DataType::Text, // type doesn't matter for projection eval
6583                true,
6584            ));
6585        }
6586        // 5) Build extended rows: each row gets its window values appended.
6587        let mut ext_rows: Vec<Row> = Vec::with_capacity(n_rows);
6588        for i in 0..n_rows {
6589            let mut values = filtered[i].values.clone();
6590            for w in 0..window_nodes.len() {
6591                values.push(win_vals[w][i].clone());
6592            }
6593            ext_rows.push(Row::new(values));
6594        }
6595        // 6) Rewrite the projection: WindowFunction nodes → Column(__win_N).
6596        let mut rewritten_items: Vec<SelectItem> = Vec::with_capacity(stmt.items.len());
6597        for item in &stmt.items {
6598            let new_item = match item {
6599                SelectItem::Wildcard => SelectItem::Wildcard,
6600                SelectItem::Expr { expr, alias } => {
6601                    let mut e = expr.clone();
6602                    rewrite_window_to_columns(&mut e, &window_nodes);
6603                    SelectItem::Expr {
6604                        expr: e,
6605                        alias: alias.clone(),
6606                    }
6607                }
6608            };
6609            rewritten_items.push(new_item);
6610        }
6611
6612        // 7) Project into final rows.
6613        let ext_ctx = EvalContext::new(&ext_cols, Some(alias));
6614        let projection = build_projection(&rewritten_items, &ext_cols, alias)?;
6615        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::with_capacity(n_rows);
6616        for (i, row) in ext_rows.iter().enumerate() {
6617            if i.is_multiple_of(256) {
6618                cancel.check()?;
6619            }
6620            let mut values = Vec::with_capacity(projection.len());
6621            for p in &projection {
6622                values.push(eval::eval_expr(&p.expr, row, &ext_ctx)?);
6623            }
6624            let order_keys = if stmt.order_by.is_empty() {
6625                Vec::new()
6626            } else {
6627                let mut keys = Vec::with_capacity(stmt.order_by.len());
6628                for o in &stmt.order_by {
6629                    let mut e = o.expr.clone();
6630                    rewrite_window_to_columns(&mut e, &window_nodes);
6631                    let key = eval::eval_expr(&e, row, &ext_ctx)?;
6632                    keys.push(value_to_order_key(&key)?);
6633                }
6634                keys
6635            };
6636            tagged.push((order_keys, Row::new(values)));
6637        }
6638        // ORDER BY + LIMIT/OFFSET on the projected rows.
6639        if !stmt.order_by.is_empty() {
6640            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
6641            sort_by_keys(&mut tagged, &descs);
6642        }
6643        let mut out_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
6644        apply_offset_and_limit(&mut out_rows, stmt.offset_literal(), stmt.limit_literal());
6645        let final_cols: Vec<ColumnSchema> = projection
6646            .into_iter()
6647            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
6648            .collect();
6649        Ok(QueryResult::Rows {
6650            columns: final_cols,
6651            rows: out_rows,
6652        })
6653    }
6654
6655    /// v4.11: materialise each CTE into a temp table inside a
6656    /// cloned catalog, then run the body SELECT against a fresh
6657    /// engine instance that owns the enriched catalog. The clone
6658    /// is moderately expensive — only paid by CTE-bearing queries.
6659    /// Subqueries inside CTE bodies / the main body resolve as
6660    /// usual; `clock_fn` is propagated so `NOW()` lines up.
6661    fn exec_with_ctes(
6662        &self,
6663        stmt: &SelectStatement,
6664        cancel: CancelToken<'_>,
6665    ) -> Result<QueryResult, EngineError> {
6666        cancel.check()?;
6667        let mut catalog = self.active_catalog().clone();
6668        for cte in &stmt.ctes {
6669            if catalog.get(&cte.name).is_some() {
6670                return Err(EngineError::Unsupported(alloc::format!(
6671                    "CTE name {:?} shadows an existing table; rename the CTE",
6672                    cte.name
6673                )));
6674            }
6675            let (columns, rows) = if cte.recursive {
6676                self.materialise_recursive_cte(cte, &catalog, cancel)?
6677            } else {
6678                let body_result = self.exec_select_cancel(&cte.body, cancel)?;
6679                let QueryResult::Rows { columns, rows } = body_result else {
6680                    return Err(EngineError::Unsupported(alloc::format!(
6681                        "CTE {:?} body did not return rows",
6682                        cte.name
6683                    )));
6684                };
6685                (columns, rows)
6686            };
6687            // v4.22: the projection builder labels any non-column
6688            // expression as Text — including literal SELECT 1.
6689            // Promote each column's type to whatever the rows
6690            // actually carry so the CTE storage table accepts them.
6691            let inferred = infer_column_types(&columns, &rows);
6692            let mut columns = inferred;
6693            // v4.22: apply optional `WITH name(a, b, c)` overrides.
6694            if !cte.column_overrides.is_empty() {
6695                if cte.column_overrides.len() != columns.len() {
6696                    return Err(EngineError::Unsupported(alloc::format!(
6697                        "CTE {:?} column list has {} names but body returns {} columns",
6698                        cte.name,
6699                        cte.column_overrides.len(),
6700                        columns.len()
6701                    )));
6702                }
6703                for (col, name) in columns.iter_mut().zip(cte.column_overrides.iter()) {
6704                    col.name.clone_from(name);
6705                }
6706            }
6707            let schema = TableSchema::new(cte.name.clone(), columns);
6708            catalog.create_table(schema).map_err(EngineError::Storage)?;
6709            let table = catalog
6710                .get_mut(&cte.name)
6711                .expect("just-created CTE table must exist");
6712            for row in rows {
6713                table.insert(row).map_err(EngineError::Storage)?;
6714            }
6715        }
6716        // Strip CTEs from the body before running on the temp engine
6717        // so we don't recurse forever.
6718        let mut body = stmt.clone();
6719        body.ctes = Vec::new();
6720        let mut temp = Engine::restore(catalog);
6721        if let Some(c) = self.clock {
6722            temp = temp.with_clock(c);
6723        }
6724        if let Some(f) = self.salt_fn {
6725            temp = temp.with_salt_fn(f);
6726        }
6727        temp.exec_select_cancel(&body, cancel)
6728    }
6729
6730    /// v4.22: materialise a WITH RECURSIVE CTE. The body must be a
6731    /// UNION (or UNION ALL) of an anchor that does not reference
6732    /// the CTE name, and one or more recursive terms that do. The
6733    /// anchor runs first; each subsequent iteration runs the
6734    /// recursive term against a temp catalog where the CTE name is
6735    /// bound to the *previous* iteration's output. Iteration stops
6736    /// when the recursive term yields no rows; UNION (DISTINCT)
6737    /// deduplicates against the accumulated result, UNION ALL does
6738    /// not. A hard cap on total rows prevents runaway queries.
6739    #[allow(clippy::too_many_lines)]
6740    fn materialise_recursive_cte(
6741        &self,
6742        cte: &spg_sql::ast::Cte,
6743        base_catalog: &Catalog,
6744        cancel: CancelToken<'_>,
6745    ) -> Result<(Vec<ColumnSchema>, Vec<Row>), EngineError> {
6746        const MAX_TOTAL_ROWS: usize = 1_000_000;
6747        const MAX_ITERATIONS: usize = 100_000;
6748        cancel.check()?;
6749        if cte.body.unions.is_empty() {
6750            return Err(EngineError::Unsupported(alloc::format!(
6751                "WITH RECURSIVE {:?} body must be a UNION of an anchor and a recursive term",
6752                cte.name
6753            )));
6754        }
6755        // Anchor: the body's leading SELECT, with unions stripped.
6756        let mut anchor = cte.body.clone();
6757        let union_terms = core::mem::take(&mut anchor.unions);
6758        anchor.ctes = Vec::new();
6759        // Anchor must not reference the CTE name.
6760        if select_refers_to(&anchor, &cte.name) {
6761            return Err(EngineError::Unsupported(alloc::format!(
6762                "WITH RECURSIVE {:?}: the anchor must not reference the CTE itself",
6763                cte.name
6764            )));
6765        }
6766        let anchor_result = self.exec_select_cancel(&anchor, cancel)?;
6767        let QueryResult::Rows {
6768            columns: anchor_cols,
6769            rows: anchor_rows,
6770        } = anchor_result
6771        else {
6772            return Err(EngineError::Unsupported(alloc::format!(
6773                "WITH RECURSIVE {:?}: anchor did not return rows",
6774                cte.name
6775            )));
6776        };
6777        // The projection builder labels non-column expressions Text;
6778        // refine column types from the anchor's actual values so the
6779        // intermediate iter-catalog tables accept them.
6780        let mut columns = infer_column_types(&anchor_cols, &anchor_rows);
6781        if !cte.column_overrides.is_empty() {
6782            if cte.column_overrides.len() != columns.len() {
6783                return Err(EngineError::Unsupported(alloc::format!(
6784                    "CTE {:?} column list has {} names but anchor returns {} columns",
6785                    cte.name,
6786                    cte.column_overrides.len(),
6787                    columns.len()
6788                )));
6789            }
6790            for (col, name) in columns.iter_mut().zip(cte.column_overrides.iter()) {
6791                col.name.clone_from(name);
6792            }
6793        }
6794        let mut all_rows: Vec<Row> = anchor_rows.clone();
6795        let mut working_set: Vec<Row> = anchor_rows;
6796        let mut seen: alloc::collections::BTreeSet<Vec<u8>> = alloc::collections::BTreeSet::new();
6797        // Track at least one "all UNION ALL" flag — if every union
6798        // kind is ALL we skip the dedup step (faster + matches PG).
6799        let all_union_all = union_terms.iter().all(|(k, _)| matches!(k, UnionKind::All));
6800        if !all_union_all {
6801            for r in &all_rows {
6802                seen.insert(encode_row_key(r));
6803            }
6804        }
6805        for iter in 0..MAX_ITERATIONS {
6806            cancel.check()?;
6807            if working_set.is_empty() {
6808                break;
6809            }
6810            // Build a fresh catalog: base + CTE bound to working_set.
6811            let mut iter_catalog = base_catalog.clone();
6812            let schema = TableSchema::new(cte.name.clone(), columns.clone());
6813            iter_catalog
6814                .create_table(schema)
6815                .map_err(EngineError::Storage)?;
6816            {
6817                let table = iter_catalog.get_mut(&cte.name).expect("just-created");
6818                for row in &working_set {
6819                    table.insert(row.clone()).map_err(EngineError::Storage)?;
6820                }
6821            }
6822            let mut iter_engine = Engine::restore(iter_catalog);
6823            if let Some(c) = self.clock {
6824                iter_engine = iter_engine.with_clock(c);
6825            }
6826            if let Some(f) = self.salt_fn {
6827                iter_engine = iter_engine.with_salt_fn(f);
6828            }
6829            // Run each recursive term in sequence and collect new rows.
6830            let mut next_set: Vec<Row> = Vec::new();
6831            for (_, term) in &union_terms {
6832                let mut term = term.clone();
6833                term.ctes = Vec::new();
6834                let r = iter_engine.exec_select_cancel(&term, cancel)?;
6835                let QueryResult::Rows {
6836                    columns: rc,
6837                    rows: rs,
6838                } = r
6839                else {
6840                    return Err(EngineError::Unsupported(alloc::format!(
6841                        "WITH RECURSIVE {:?}: recursive term did not return rows",
6842                        cte.name
6843                    )));
6844                };
6845                if rc.len() != columns.len() {
6846                    return Err(EngineError::Unsupported(alloc::format!(
6847                        "WITH RECURSIVE {:?}: column count of recursive term ({}) does not match anchor ({})",
6848                        cte.name,
6849                        rc.len(),
6850                        columns.len()
6851                    )));
6852                }
6853                for row in rs {
6854                    if !all_union_all {
6855                        let key = encode_row_key(&row);
6856                        if !seen.insert(key) {
6857                            continue;
6858                        }
6859                    }
6860                    next_set.push(row);
6861                }
6862            }
6863            if next_set.is_empty() {
6864                break;
6865            }
6866            all_rows.extend(next_set.iter().cloned());
6867            working_set = next_set;
6868            if all_rows.len() > MAX_TOTAL_ROWS {
6869                return Err(EngineError::Unsupported(alloc::format!(
6870                    "WITH RECURSIVE {:?}: produced more than {MAX_TOTAL_ROWS} rows — likely runaway recursion",
6871                    cte.name
6872                )));
6873            }
6874            if iter + 1 == MAX_ITERATIONS {
6875                return Err(EngineError::Unsupported(alloc::format!(
6876                    "WITH RECURSIVE {:?}: exceeded {MAX_ITERATIONS} iterations",
6877                    cte.name
6878                )));
6879            }
6880        }
6881        Ok((columns, all_rows))
6882    }
6883
6884    fn resolve_select_subqueries(
6885        &self,
6886        stmt: &mut SelectStatement,
6887        cancel: CancelToken<'_>,
6888    ) -> Result<(), EngineError> {
6889        for item in &mut stmt.items {
6890            if let SelectItem::Expr { expr, .. } = item {
6891                self.resolve_expr_subqueries(expr, cancel)?;
6892            }
6893        }
6894        if let Some(w) = &mut stmt.where_ {
6895            self.resolve_expr_subqueries(w, cancel)?;
6896        }
6897        if let Some(gs) = &mut stmt.group_by {
6898            for g in gs {
6899                self.resolve_expr_subqueries(g, cancel)?;
6900            }
6901        }
6902        if let Some(h) = &mut stmt.having {
6903            self.resolve_expr_subqueries(h, cancel)?;
6904        }
6905        for o in &mut stmt.order_by {
6906            self.resolve_expr_subqueries(&mut o.expr, cancel)?;
6907        }
6908        for (_, peer) in &mut stmt.unions {
6909            self.resolve_select_subqueries(peer, cancel)?;
6910        }
6911        Ok(())
6912    }
6913
6914    #[allow(clippy::only_used_in_recursion)] // engine handle reads aren't really pure
6915    fn resolve_expr_subqueries(
6916        &self,
6917        e: &mut Expr,
6918        cancel: CancelToken<'_>,
6919    ) -> Result<(), EngineError> {
6920        // Replace-on-this-node cases first.
6921        if let Some(replacement) = self.subquery_replacement(e, cancel)? {
6922            *e = replacement;
6923            return Ok(());
6924        }
6925        match e {
6926            Expr::Binary { lhs, rhs, .. } => {
6927                self.resolve_expr_subqueries(lhs, cancel)?;
6928                self.resolve_expr_subqueries(rhs, cancel)?;
6929            }
6930            Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
6931                self.resolve_expr_subqueries(expr, cancel)?;
6932            }
6933            Expr::FunctionCall { args, .. } => {
6934                for a in args {
6935                    self.resolve_expr_subqueries(a, cancel)?;
6936                }
6937            }
6938            Expr::Like { expr, pattern, .. } => {
6939                self.resolve_expr_subqueries(expr, cancel)?;
6940                self.resolve_expr_subqueries(pattern, cancel)?;
6941            }
6942            Expr::Extract { source, .. } => self.resolve_expr_subqueries(source, cancel)?,
6943            // v4.12 window functions — recurse into args + ORDER BY
6944            // + PARTITION BY in case they carry inner subqueries.
6945            Expr::WindowFunction {
6946                args,
6947                partition_by,
6948                order_by,
6949                ..
6950            } => {
6951                for a in args {
6952                    self.resolve_expr_subqueries(a, cancel)?;
6953                }
6954                for p in partition_by {
6955                    self.resolve_expr_subqueries(p, cancel)?;
6956                }
6957                for (e, _) in order_by {
6958                    self.resolve_expr_subqueries(e, cancel)?;
6959                }
6960            }
6961            // Subquery nodes are handled in subquery_replacement
6962            // (which returned None — defensive no-op); Literal /
6963            // Column are leaves.
6964            Expr::ScalarSubquery(_)
6965            | Expr::Exists { .. }
6966            | Expr::InSubquery { .. }
6967            | Expr::Literal(_)
6968            | Expr::Placeholder(_)
6969            | Expr::Column(_) => {}
6970            // v7.10.10 — recurse children.
6971            Expr::Array(items) => {
6972                for elem in items {
6973                    self.resolve_expr_subqueries(elem, cancel)?;
6974                }
6975            }
6976            Expr::ArraySubscript { target, index } => {
6977                self.resolve_expr_subqueries(target, cancel)?;
6978                self.resolve_expr_subqueries(index, cancel)?;
6979            }
6980            Expr::AnyAll { expr, array, .. } => {
6981                self.resolve_expr_subqueries(expr, cancel)?;
6982                self.resolve_expr_subqueries(array, cancel)?;
6983            }
6984            Expr::Case {
6985                operand,
6986                branches,
6987                else_branch,
6988            } => {
6989                if let Some(o) = operand {
6990                    self.resolve_expr_subqueries(o, cancel)?;
6991                }
6992                for (w, t) in branches {
6993                    self.resolve_expr_subqueries(w, cancel)?;
6994                    self.resolve_expr_subqueries(t, cancel)?;
6995                }
6996                if let Some(e) = else_branch {
6997                    self.resolve_expr_subqueries(e, cancel)?;
6998                }
6999            }
7000        }
7001        Ok(())
7002    }
7003
7004    /// v4.23: per-row eval that handles correlated subqueries.
7005    /// Equivalent to `eval::eval_expr` when the expression has no
7006    /// subqueries; otherwise clones the expression, substitutes
7007    /// outer-row columns into each surviving subquery node, runs
7008    /// the inner SELECT, and replaces the node with the literal
7009    /// result. Only the WHERE-filter call sites use this path so
7010    /// the uncorrelated fast path is preserved everywhere else.
7011    fn eval_expr_with_correlated(
7012        &self,
7013        expr: &Expr,
7014        row: &Row,
7015        ctx: &EvalContext<'_>,
7016        cancel: CancelToken<'_>,
7017        memo: Option<&mut memoize::MemoizeCache>,
7018    ) -> Result<Value, EngineError> {
7019        if !expr_has_subquery(expr) {
7020            return eval::eval_expr(expr, row, ctx).map_err(EngineError::Eval);
7021        }
7022        let mut e = expr.clone();
7023        self.resolve_correlated_in_expr(&mut e, row, ctx, cancel, memo)?;
7024        eval::eval_expr(&e, row, ctx).map_err(EngineError::Eval)
7025    }
7026
7027    fn resolve_correlated_in_expr(
7028        &self,
7029        e: &mut Expr,
7030        row: &Row,
7031        ctx: &EvalContext<'_>,
7032        cancel: CancelToken<'_>,
7033        mut memo: Option<&mut memoize::MemoizeCache>,
7034    ) -> Result<(), EngineError> {
7035        match e {
7036            Expr::ScalarSubquery(inner) => {
7037                // v6.2.6 — Memoize: build the cache key from the
7038                // pre-substitution subquery repr + the outer row's
7039                // values. Two outer rows with identical correlated
7040                // values hit the same entry.
7041                let cache_key = memo.as_ref().map(|_| memoize::CacheKey {
7042                    subquery_repr: alloc::format!("{}", **inner),
7043                    outer_values: row.values.clone(),
7044                });
7045                if let (Some(cache), Some(k)) = (memo.as_deref_mut(), cache_key.as_ref())
7046                    && let Some(cached) = cache.get(k)
7047                {
7048                    *e = value_to_literal_expr(cached)?;
7049                    return Ok(());
7050                }
7051                let mut s = (**inner).clone();
7052                substitute_outer_columns(&mut s, row, ctx);
7053                let r = self.exec_select_cancel(&s, cancel)?;
7054                let QueryResult::Rows { rows, .. } = r else {
7055                    return Err(EngineError::Unsupported(
7056                        "scalar subquery: inner did not return rows".into(),
7057                    ));
7058                };
7059                let value = match rows.as_slice() {
7060                    [] => Value::Null,
7061                    [r0] => r0.values.first().cloned().unwrap_or(Value::Null),
7062                    _ => {
7063                        return Err(EngineError::Unsupported(alloc::format!(
7064                            "scalar subquery returned {} rows; expected 0 or 1",
7065                            rows.len()
7066                        )));
7067                    }
7068                };
7069                if let (Some(cache), Some(k)) = (memo.as_deref_mut(), cache_key) {
7070                    cache.insert(k, value.clone());
7071                }
7072                *e = value_to_literal_expr(value)?;
7073            }
7074            Expr::Exists { subquery, negated } => {
7075                let mut s = (**subquery).clone();
7076                substitute_outer_columns(&mut s, row, ctx);
7077                let r = self.exec_select_cancel(&s, cancel)?;
7078                let exists = matches!(r, QueryResult::Rows { rows, .. } if !rows.is_empty());
7079                let bit = if *negated { !exists } else { exists };
7080                *e = Expr::Literal(Literal::Bool(bit));
7081            }
7082            Expr::InSubquery {
7083                expr: lhs,
7084                subquery,
7085                negated,
7086            } => {
7087                self.resolve_correlated_in_expr(lhs, row, ctx, cancel, memo.as_deref_mut())?;
7088                let lhs_val = eval::eval_expr(lhs, row, ctx).map_err(EngineError::Eval)?;
7089                let mut s = (**subquery).clone();
7090                substitute_outer_columns(&mut s, row, ctx);
7091                let r = self.exec_select_cancel(&s, cancel)?;
7092                let QueryResult::Rows { columns, rows, .. } = r else {
7093                    return Err(EngineError::Unsupported(
7094                        "IN-subquery: inner did not return rows".into(),
7095                    ));
7096                };
7097                if columns.len() != 1 {
7098                    return Err(EngineError::Unsupported(alloc::format!(
7099                        "IN-subquery must project exactly one column; got {}",
7100                        columns.len()
7101                    )));
7102                }
7103                let mut found = false;
7104                let mut any_null = false;
7105                for r0 in rows {
7106                    let v = r0.values.into_iter().next().unwrap_or(Value::Null);
7107                    if v.is_null() {
7108                        any_null = true;
7109                        continue;
7110                    }
7111                    if value_cmp(&v, &lhs_val) == core::cmp::Ordering::Equal {
7112                        found = true;
7113                        break;
7114                    }
7115                }
7116                let bit = if found {
7117                    !*negated
7118                } else if any_null {
7119                    return Err(EngineError::Unsupported(
7120                        "IN-subquery with NULL in result and no match: NULL semantics not yet implemented".into(),
7121                    ));
7122                } else {
7123                    *negated
7124                };
7125                *e = Expr::Literal(Literal::Bool(bit));
7126            }
7127            Expr::Binary { lhs, rhs, .. } => {
7128                self.resolve_correlated_in_expr(lhs, row, ctx, cancel, memo.as_deref_mut())?;
7129                self.resolve_correlated_in_expr(rhs, row, ctx, cancel, memo.as_deref_mut())?;
7130            }
7131            Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7132                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
7133            }
7134            Expr::Like { expr, pattern, .. } => {
7135                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
7136                self.resolve_correlated_in_expr(pattern, row, ctx, cancel, memo.as_deref_mut())?;
7137            }
7138            Expr::FunctionCall { args, .. } => {
7139                for a in args {
7140                    self.resolve_correlated_in_expr(a, row, ctx, cancel, memo.as_deref_mut())?;
7141                }
7142            }
7143            Expr::Extract { source, .. } => {
7144                self.resolve_correlated_in_expr(source, row, ctx, cancel, memo.as_deref_mut())?;
7145            }
7146            Expr::WindowFunction { .. }
7147            | Expr::Literal(_)
7148            | Expr::Placeholder(_)
7149            | Expr::Column(_) => {}
7150            // v7.10.10 — recurse children.
7151            Expr::Array(items) => {
7152                for elem in items {
7153                    self.resolve_correlated_in_expr(elem, row, ctx, cancel, memo.as_deref_mut())?;
7154                }
7155            }
7156            Expr::ArraySubscript { target, index } => {
7157                self.resolve_correlated_in_expr(target, row, ctx, cancel, memo.as_deref_mut())?;
7158                self.resolve_correlated_in_expr(index, row, ctx, cancel, memo.as_deref_mut())?;
7159            }
7160            Expr::AnyAll { expr, array, .. } => {
7161                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
7162                self.resolve_correlated_in_expr(array, row, ctx, cancel, memo.as_deref_mut())?;
7163            }
7164            Expr::Case {
7165                operand,
7166                branches,
7167                else_branch,
7168            } => {
7169                if let Some(o) = operand {
7170                    self.resolve_correlated_in_expr(o, row, ctx, cancel, memo.as_deref_mut())?;
7171                }
7172                for (w, t) in branches {
7173                    self.resolve_correlated_in_expr(w, row, ctx, cancel, memo.as_deref_mut())?;
7174                    self.resolve_correlated_in_expr(t, row, ctx, cancel, memo.as_deref_mut())?;
7175                }
7176                if let Some(e) = else_branch {
7177                    self.resolve_correlated_in_expr(e, row, ctx, cancel, memo.as_deref_mut())?;
7178                }
7179            }
7180        }
7181        Ok(())
7182    }
7183
7184    fn subquery_replacement(
7185        &self,
7186        e: &Expr,
7187        cancel: CancelToken<'_>,
7188    ) -> Result<Option<Expr>, EngineError> {
7189        match e {
7190            Expr::ScalarSubquery(inner) => {
7191                let mut s = (**inner).clone();
7192                // Recurse into the inner SELECT first so nested
7193                // subqueries materialise bottom-up.
7194                self.resolve_select_subqueries(&mut s, cancel)?;
7195                let r = match self.exec_bare_select_cancel(&s, cancel) {
7196                    Ok(r) => r,
7197                    Err(e) if is_correlation_error(&e) => return Ok(None),
7198                    Err(e) => return Err(e),
7199                };
7200                let QueryResult::Rows { rows, .. } = r else {
7201                    return Err(EngineError::Unsupported(
7202                        "scalar subquery: inner statement did not return rows".into(),
7203                    ));
7204                };
7205                let value = match rows.as_slice() {
7206                    [] => Value::Null,
7207                    [row] => row.values.first().cloned().unwrap_or(Value::Null),
7208                    _ => {
7209                        return Err(EngineError::Unsupported(alloc::format!(
7210                            "scalar subquery returned {} rows; expected 0 or 1",
7211                            rows.len()
7212                        )));
7213                    }
7214                };
7215                Ok(Some(value_to_literal_expr(value)?))
7216            }
7217            Expr::Exists { subquery, negated } => {
7218                let mut s = (**subquery).clone();
7219                self.resolve_select_subqueries(&mut s, cancel)?;
7220                let r = match self.exec_bare_select_cancel(&s, cancel) {
7221                    Ok(r) => r,
7222                    Err(e) if is_correlation_error(&e) => return Ok(None),
7223                    Err(e) => return Err(e),
7224                };
7225                let exists = match r {
7226                    QueryResult::Rows { rows, .. } => !rows.is_empty(),
7227                    QueryResult::CommandOk { .. } => false,
7228                };
7229                let bit = if *negated { !exists } else { exists };
7230                Ok(Some(Expr::Literal(Literal::Bool(bit))))
7231            }
7232            Expr::InSubquery {
7233                expr,
7234                subquery,
7235                negated,
7236            } => {
7237                let mut s = (**subquery).clone();
7238                self.resolve_select_subqueries(&mut s, cancel)?;
7239                let r = match self.exec_bare_select_cancel(&s, cancel) {
7240                    Ok(r) => r,
7241                    Err(e) if is_correlation_error(&e) => return Ok(None),
7242                    Err(e) => return Err(e),
7243                };
7244                let QueryResult::Rows { columns, rows, .. } = r else {
7245                    return Err(EngineError::Unsupported(
7246                        "IN-subquery: inner statement did not return rows".into(),
7247                    ));
7248                };
7249                if columns.len() != 1 {
7250                    return Err(EngineError::Unsupported(alloc::format!(
7251                        "IN-subquery must project exactly one column; got {}",
7252                        columns.len()
7253                    )));
7254                }
7255                // Build the same OR-Eq chain the parse-time literal-list
7256                // path constructs, with each value lifted into a Literal.
7257                let mut acc: Option<Expr> = None;
7258                for row in rows {
7259                    let v = row.values.into_iter().next().unwrap_or(Value::Null);
7260                    let lit = value_to_literal_expr(v)?;
7261                    let cmp = Expr::Binary {
7262                        lhs: expr.clone(),
7263                        op: BinOp::Eq,
7264                        rhs: Box::new(lit),
7265                    };
7266                    acc = Some(match acc {
7267                        None => cmp,
7268                        Some(prev) => Expr::Binary {
7269                            lhs: Box::new(prev),
7270                            op: BinOp::Or,
7271                            rhs: Box::new(cmp),
7272                        },
7273                    });
7274                }
7275                let combined = acc.unwrap_or(Expr::Literal(Literal::Bool(false)));
7276                let final_expr = if *negated {
7277                    Expr::Unary {
7278                        op: UnOp::Not,
7279                        expr: Box::new(combined),
7280                    }
7281                } else {
7282                    combined
7283                };
7284                Ok(Some(final_expr))
7285            }
7286            _ => Ok(None),
7287        }
7288    }
7289}
7290
7291// ---- v4.12 window-function helpers ----
7292// The (partition-key, order-key, original-index) tuple shape used
7293// across these helpers is intrinsic to the planner. Factoring it
7294// into a typedef adds indirection without making the code clearer,
7295// so several lints are allowed inline on the affected functions
7296// rather than module-wide.
7297
7298/// v4.22: cheap structural scan for `FROM <name>` (qualified or
7299/// not) inside a SELECT — used to verify the anchor of a WITH
7300/// RECURSIVE CTE doesn't recurse into itself. Conservative: walks
7301/// FROM joins, subqueries, and unions.
7302fn select_refers_to(stmt: &SelectStatement, target: &str) -> bool {
7303    if let Some(from) = &stmt.from
7304        && from_refers_to(from, target)
7305    {
7306        return true;
7307    }
7308    for (_, peer) in &stmt.unions {
7309        if select_refers_to(peer, target) {
7310            return true;
7311        }
7312    }
7313    for item in &stmt.items {
7314        if let SelectItem::Expr { expr, .. } = item
7315            && expr_refers_to(expr, target)
7316        {
7317            return true;
7318        }
7319    }
7320    if let Some(w) = &stmt.where_
7321        && expr_refers_to(w, target)
7322    {
7323        return true;
7324    }
7325    false
7326}
7327
7328fn from_refers_to(from: &FromClause, target: &str) -> bool {
7329    if from.primary.name.eq_ignore_ascii_case(target) {
7330        return true;
7331    }
7332    from.joins
7333        .iter()
7334        .any(|j| j.table.name.eq_ignore_ascii_case(target))
7335}
7336
7337fn expr_refers_to(e: &Expr, target: &str) -> bool {
7338    match e {
7339        Expr::ScalarSubquery(s) => select_refers_to(s, target),
7340        Expr::Exists { subquery, .. } | Expr::InSubquery { subquery, .. } => {
7341            select_refers_to(subquery, target)
7342        }
7343        Expr::Binary { lhs, rhs, .. } => expr_refers_to(lhs, target) || expr_refers_to(rhs, target),
7344        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7345            expr_refers_to(expr, target)
7346        }
7347        Expr::Like { expr, pattern, .. } => {
7348            expr_refers_to(expr, target) || expr_refers_to(pattern, target)
7349        }
7350        Expr::FunctionCall { args, .. } => args.iter().any(|a| expr_refers_to(a, target)),
7351        Expr::Extract { source, .. } => expr_refers_to(source, target),
7352        Expr::WindowFunction {
7353            args,
7354            partition_by,
7355            order_by,
7356            ..
7357        } => {
7358            args.iter().any(|a| expr_refers_to(a, target))
7359                || partition_by.iter().any(|p| expr_refers_to(p, target))
7360                || order_by.iter().any(|(o, _)| expr_refers_to(o, target))
7361        }
7362        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => false,
7363        Expr::Array(items) => items.iter().any(|e| expr_refers_to(e, target)),
7364        Expr::ArraySubscript { target: t, index } => {
7365            expr_refers_to(t, target) || expr_refers_to(index, target)
7366        }
7367        Expr::AnyAll { expr, array, .. } => {
7368            expr_refers_to(expr, target) || expr_refers_to(array, target)
7369        }
7370        Expr::Case {
7371            operand,
7372            branches,
7373            else_branch,
7374        } => {
7375            operand.as_deref().is_some_and(|o| expr_refers_to(o, target))
7376                || branches
7377                    .iter()
7378                    .any(|(w, t)| expr_refers_to(w, target) || expr_refers_to(t, target))
7379                || else_branch
7380                    .as_deref()
7381                    .is_some_and(|e| expr_refers_to(e, target))
7382        }
7383    }
7384}
7385
7386/// v4.22: pick more specific column types from observed rows when
7387/// the projection builder defaulted to Text (the v1.x behavior for
7388/// non-column expressions). Lets `WITH t(n) AS (SELECT 1 ...)`
7389/// land an Int column in the CTE storage table rather than failing
7390/// the insert with "expected TEXT, got INT".
7391fn infer_column_types(columns: &[ColumnSchema], rows: &[Row]) -> Vec<ColumnSchema> {
7392    let mut out = columns.to_vec();
7393    for (col_idx, col) in out.iter_mut().enumerate() {
7394        if col.ty != DataType::Text {
7395            continue;
7396        }
7397        let mut inferred: Option<DataType> = None;
7398        let mut all_null = true;
7399        for row in rows {
7400            let Some(v) = row.values.get(col_idx) else {
7401                continue;
7402            };
7403            let ty = match v {
7404                Value::Null => continue,
7405                Value::SmallInt(_) => DataType::SmallInt,
7406                Value::Int(_) => DataType::Int,
7407                Value::BigInt(_) => DataType::BigInt,
7408                Value::Float(_) => DataType::Float,
7409                Value::Bool(_) => DataType::Bool,
7410                Value::Vector(_) => DataType::Vector {
7411                    dim: 0,
7412                    encoding: VecEncoding::F32,
7413                },
7414                _ => DataType::Text,
7415            };
7416            all_null = false;
7417            inferred = Some(match inferred {
7418                None => ty,
7419                Some(prev) if prev == ty => prev,
7420                Some(_) => DataType::Text,
7421            });
7422        }
7423        if let Some(t) = inferred {
7424            col.ty = t;
7425            col.nullable = true;
7426        } else if all_null {
7427            col.nullable = true;
7428        }
7429    }
7430    out
7431}
7432
7433/// v4.26: render a human-readable plan tree for `EXPLAIN <select>`.
7434/// Lines are pushed into `out`; `depth` controls indentation. We
7435/// describe the rewritten SELECT — what the executor *would* do —
7436/// using the engine handle to spot indexed lookups and table shapes.
7437#[allow(clippy::too_many_lines, clippy::format_push_string)]
7438/// v6.2.4 — Walk every line of the rendered plan tree and append
7439/// per-operator stats. Lines that name a known operator get
7440/// `(rows=N)` (`actual_rows` of the top-level operator equals the
7441/// final result row count; scans report their catalog row count
7442/// as the rows-considered metric). Other lines — Filter / Join /
7443/// GroupBy / OrderBy etc. — are marked `(—)` so the surface is
7444/// complete-by-construction; v6.2.5 fills these in via inline
7445/// executor counters.
7446/// v6.8.3 — surface "CREATE INDEX …" suggestions for every
7447/// `(table, column)` pair the query touches via WHERE / JOIN
7448/// that doesn't already have an index on the owning table.
7449/// Walks the SELECT's FROM clauses + WHERE expression tree;
7450/// returns one line per missing index. Deterministic order:
7451/// FROM-clause iteration order, then column-reference walk
7452/// order inside each WHERE. Each suggestion is a copy-pastable
7453/// DDL string.
7454fn build_index_suggestions(stmt: &SelectStatement, engine: &Engine) -> Vec<String> {
7455    use alloc::collections::BTreeSet;
7456    let mut seen: BTreeSet<(String, String)> = BTreeSet::new();
7457    let mut out: Vec<String> = Vec::new();
7458    let cat = engine.active_catalog();
7459    // Build a (table, qualifier-or-alias) list from the FROM clause
7460    // so unqualified column refs in WHERE resolve to the correct
7461    // table.
7462    let Some(from) = &stmt.from else {
7463        return out;
7464    };
7465    let mut tables: Vec<String> = Vec::new();
7466    tables.push(from.primary.name.clone());
7467    for j in &from.joins {
7468        tables.push(j.table.name.clone());
7469    }
7470    // Collect column refs from the WHERE expression. JOIN ON
7471    // predicates also feed in.
7472    let mut col_refs: Vec<spg_sql::ast::ColumnName> = Vec::new();
7473    if let Some(w) = &stmt.where_ {
7474        collect_column_refs(w, &mut col_refs);
7475    }
7476    for j in &from.joins {
7477        if let Some(on) = &j.on {
7478            collect_column_refs(on, &mut col_refs);
7479        }
7480    }
7481    for cn in &col_refs {
7482        // Resolve owner table: explicit qualifier first, else
7483        // first table in FROM that has a column of this name.
7484        let owner: Option<String> = if let Some(q) = &cn.qualifier {
7485            tables.iter().find(|t| t == &q).cloned()
7486        } else {
7487            tables.iter().find_map(|t| {
7488                cat.get(t).and_then(|tbl| {
7489                    if tbl.schema().column_position(&cn.name).is_some() {
7490                        Some(t.clone())
7491                    } else {
7492                        None
7493                    }
7494                })
7495            })
7496        };
7497        let Some(owner) = owner else {
7498            continue;
7499        };
7500        let Some(tbl) = cat.get(&owner) else {
7501            continue;
7502        };
7503        let Some(col_pos) = tbl.schema().column_position(&cn.name) else {
7504            continue;
7505        };
7506        // Skip if any BTree index already covers this column as
7507        // its key.
7508        let already_indexed = tbl.indices().iter().any(|i| {
7509            matches!(i.kind, spg_storage::IndexKind::BTree(_))
7510                && i.column_position == col_pos
7511                && i.expression.is_none()
7512                && i.partial_predicate.is_none()
7513        });
7514        if already_indexed {
7515            continue;
7516        }
7517        if seen.insert((owner.clone(), cn.name.clone())) {
7518            out.push(alloc::format!(
7519                "SUGGEST: CREATE INDEX ix_{}_{} ON {} ({})",
7520                owner,
7521                cn.name,
7522                owner,
7523                cn.name
7524            ));
7525        }
7526    }
7527    out
7528}
7529
7530/// Walks an `Expr` and pushes every `ColumnName` it references.
7531/// Order is depth-first, left-to-right.
7532fn collect_column_refs(expr: &Expr, out: &mut Vec<spg_sql::ast::ColumnName>) {
7533    match expr {
7534        Expr::Column(cn) => out.push(cn.clone()),
7535        Expr::FunctionCall { args, .. } => {
7536            for a in args {
7537                collect_column_refs(a, out);
7538            }
7539        }
7540        Expr::Binary { lhs, rhs, .. } => {
7541            collect_column_refs(lhs, out);
7542            collect_column_refs(rhs, out);
7543        }
7544        Expr::Unary { expr: e, .. } => collect_column_refs(e, out),
7545        _ => {}
7546    }
7547}
7548
7549fn annotate_explain_lines(lines: &mut [String], total_rows: usize, engine: &Engine) {
7550    let catalog = engine.active_catalog();
7551    let cold_ids = catalog.cold_segment_ids_global();
7552    let any_cold = !cold_ids.is_empty();
7553    let cold_ids_repr = if any_cold {
7554        let mut s = alloc::string::String::from("[");
7555        for (i, id) in cold_ids.iter().enumerate() {
7556            if i > 0 {
7557                s.push(',');
7558            }
7559            s.push_str(&alloc::format!("{id}"));
7560        }
7561        s.push(']');
7562        s
7563    } else {
7564        alloc::string::String::new()
7565    };
7566    for (idx, line) in lines.iter_mut().enumerate() {
7567        let trimmed = line.trim_start();
7568        let is_top_level = idx == 0;
7569        if is_top_level {
7570            line.push_str(&alloc::format!(" (rows={total_rows})"));
7571            continue;
7572        }
7573        if let Some(rest) = trimmed.strip_prefix("From: ") {
7574            let (name, scan_kind) = match rest.split_once(" [") {
7575                Some((n, k)) => (n.trim(), k.trim_end_matches(']')),
7576                None => (rest.trim(), ""),
7577            };
7578            let bare = name.split_whitespace().next().unwrap_or(name);
7579            let hot = catalog.get(bare).map(|t| t.rows().len());
7580            // v6.2.7 — `cold_segments=[id0,id1,…]` enumerates every
7581            // cold-tier segment the scan COULD have walked. v6.2.x
7582            // can tighten to per-table by walking the table's
7583            // BTree-index cold locators.
7584            let annot = match (hot, scan_kind) {
7585                (Some(h), "full scan") => {
7586                    let mut s = alloc::format!(" (hot_rows={h}");
7587                    if any_cold {
7588                        s.push_str(&alloc::format!(
7589                            ", cold_tier=present, cold_segments={cold_ids_repr}"
7590                        ));
7591                    }
7592                    s.push(')');
7593                    s
7594                }
7595                (Some(h), "index seek") => {
7596                    let mut s = alloc::format!(" (hot_rows≤{h}");
7597                    if any_cold {
7598                        s.push_str(&alloc::format!(
7599                            ", cold_tier=present, cold_segments={cold_ids_repr}"
7600                        ));
7601                    }
7602                    s.push(')');
7603                    s
7604                }
7605                _ => " (rows=—)".to_string(),
7606            };
7607            line.push_str(&annot);
7608            continue;
7609        }
7610        // Filter / GroupBy / Having / OrderBy / Limit / Join etc.
7611        line.push_str(" (rows=—)");
7612    }
7613}
7614
7615fn explain_select(stmt: &SelectStatement, engine: &Engine, depth: usize, out: &mut Vec<String>) {
7616    let pad = "  ".repeat(depth);
7617    // 1) Top-level operator label.
7618    let top = if !stmt.ctes.is_empty() {
7619        if stmt.ctes.iter().any(|c| c.recursive) {
7620            "CTEScan (WITH RECURSIVE)"
7621        } else {
7622            "CTEScan (WITH)"
7623        }
7624    } else if !stmt.unions.is_empty() {
7625        "UnionScan"
7626    } else if select_has_window(stmt) {
7627        "WindowAgg"
7628    } else if aggregate::uses_aggregate(stmt) {
7629        "Aggregate"
7630    } else if stmt.distinct {
7631        "Distinct"
7632    } else if stmt.from.is_some() {
7633        "TableScan"
7634    } else {
7635        "Result"
7636    };
7637    out.push(alloc::format!("{pad}{top}"));
7638    let child = "  ".repeat(depth + 1);
7639    // 2) CTE bodies.
7640    for cte in &stmt.ctes {
7641        let head = if cte.recursive {
7642            alloc::format!("{child}CTE (recursive): {}", cte.name)
7643        } else {
7644            alloc::format!("{child}CTE: {}", cte.name)
7645        };
7646        out.push(head);
7647        explain_select(&cte.body, engine, depth + 2, out);
7648    }
7649    // 3) FROM details — primary table + joins, index hits.
7650    if let Some(from) = &stmt.from {
7651        let mut tag = alloc::format!("{child}From: {}", from.primary.name);
7652        if let Some(alias) = &from.primary.alias {
7653            tag.push_str(&alloc::format!(" AS {alias}"));
7654        }
7655        // Try to detect an index-seek opportunity on WHERE against
7656        // the primary table — same heuristic the executor uses.
7657        if let Some(w) = &stmt.where_
7658            && let Some(table) = engine.active_catalog().get(&from.primary.name)
7659        {
7660            let alias = from.primary.alias.as_deref().unwrap_or(&from.primary.name);
7661            let cols = &table.schema().columns;
7662            if try_index_seek(w, cols, engine.active_catalog(), table, alias).is_some() {
7663                tag.push_str(" [index seek]");
7664            } else {
7665                tag.push_str(" [full scan]");
7666            }
7667        } else {
7668            tag.push_str(" [full scan]");
7669        }
7670        out.push(tag);
7671        for j in &from.joins {
7672            let kind = match j.kind {
7673                spg_sql::ast::JoinKind::Inner => "INNER JOIN",
7674                spg_sql::ast::JoinKind::Left => "LEFT JOIN",
7675                spg_sql::ast::JoinKind::Cross => "CROSS JOIN",
7676            };
7677            let mut s = alloc::format!("{child}{kind}: {}", j.table.name);
7678            if let Some(alias) = &j.table.alias {
7679                s.push_str(&alloc::format!(" AS {alias}"));
7680            }
7681            if j.on.is_some() {
7682                s.push_str(" (ON …)");
7683            }
7684            out.push(s);
7685        }
7686    }
7687    // 4) WHERE / GROUP BY / HAVING / ORDER BY / LIMIT / OFFSET.
7688    if let Some(w) = &stmt.where_ {
7689        let mut s = alloc::format!("{child}Filter: {w}");
7690        if expr_has_subquery(w) {
7691            s.push_str(" [subquery]");
7692        }
7693        out.push(s);
7694    }
7695    if let Some(gs) = &stmt.group_by {
7696        let mut parts = Vec::new();
7697        for g in gs {
7698            parts.push(alloc::format!("{g}"));
7699        }
7700        out.push(alloc::format!("{child}GroupBy: {}", parts.join(", ")));
7701    }
7702    if let Some(h) = &stmt.having {
7703        out.push(alloc::format!("{child}Having: {h}"));
7704    }
7705    for o in &stmt.order_by {
7706        let dir = if o.desc { "DESC" } else { "ASC" };
7707        out.push(alloc::format!("{child}OrderBy: {} {dir}", o.expr));
7708    }
7709    if let Some(lim) = stmt.limit {
7710        out.push(alloc::format!("{child}Limit: {lim}"));
7711    }
7712    if let Some(off) = stmt.offset {
7713        out.push(alloc::format!("{child}Offset: {off}"));
7714    }
7715    // 5) Projection — collapse Wildcard or render N items.
7716    if stmt
7717        .items
7718        .iter()
7719        .any(|it| matches!(it, SelectItem::Wildcard))
7720    {
7721        out.push(alloc::format!("{child}Project: *"));
7722    } else {
7723        out.push(alloc::format!(
7724            "{child}Project: {} item(s)",
7725            stmt.items.len()
7726        ));
7727    }
7728    // 6) Recurse into UNION peers.
7729    for (kind, peer) in &stmt.unions {
7730        let label = match kind {
7731            UnionKind::All => "UNION ALL",
7732            UnionKind::Distinct => "UNION",
7733        };
7734        out.push(alloc::format!("{child}{label}"));
7735        explain_select(peer, engine, depth + 2, out);
7736    }
7737}
7738
7739/// v4.23: recognise the engine errors that indicate the inner
7740/// SELECT couldn't be evaluated in isolation because it references
7741/// an outer column — used by `subquery_replacement` to skip
7742/// materialisation and let row-eval handle it instead.
7743fn is_correlation_error(e: &EngineError) -> bool {
7744    matches!(
7745        e,
7746        EngineError::Eval(
7747            eval::EvalError::ColumnNotFound { .. } | eval::EvalError::UnknownQualifier { .. }
7748        )
7749    )
7750}
7751
7752/// v4.23: walk every Expr in `stmt` and replace each Column ref
7753/// that targets the outer scope (qualifier matches the outer
7754/// table alias) with a Literal carrying the outer row's value.
7755/// Conservative: only qualified refs are substituted, so the user
7756/// must write `outer_alias.col` to reference an outer column. This
7757/// matches PG's lexical scoping for correlated subqueries and
7758/// avoids accidentally rebinding inner columns of the same name.
7759fn substitute_outer_columns(stmt: &mut SelectStatement, row: &Row, ctx: &EvalContext<'_>) {
7760    let Some(outer_alias) = ctx.table_alias else {
7761        return;
7762    };
7763    substitute_in_select(stmt, row, ctx, outer_alias);
7764}
7765
7766fn substitute_in_select(
7767    stmt: &mut SelectStatement,
7768    row: &Row,
7769    ctx: &EvalContext<'_>,
7770    outer_alias: &str,
7771) {
7772    for item in &mut stmt.items {
7773        if let SelectItem::Expr { expr, .. } = item {
7774            substitute_in_expr(expr, row, ctx, outer_alias);
7775        }
7776    }
7777    if let Some(w) = &mut stmt.where_ {
7778        substitute_in_expr(w, row, ctx, outer_alias);
7779    }
7780    if let Some(gs) = &mut stmt.group_by {
7781        for g in gs {
7782            substitute_in_expr(g, row, ctx, outer_alias);
7783        }
7784    }
7785    if let Some(h) = &mut stmt.having {
7786        substitute_in_expr(h, row, ctx, outer_alias);
7787    }
7788    for o in &mut stmt.order_by {
7789        substitute_in_expr(&mut o.expr, row, ctx, outer_alias);
7790    }
7791    for (_, peer) in &mut stmt.unions {
7792        substitute_in_select(peer, row, ctx, outer_alias);
7793    }
7794}
7795
7796fn substitute_in_expr(e: &mut Expr, row: &Row, ctx: &EvalContext<'_>, outer_alias: &str) {
7797    if let Expr::Column(c) = e
7798        && let Some(qual) = &c.qualifier
7799        && qual.eq_ignore_ascii_case(outer_alias)
7800    {
7801        // Look up the column's index in the outer schema.
7802        if let Some(idx) = ctx
7803            .columns
7804            .iter()
7805            .position(|sc| sc.name.eq_ignore_ascii_case(&c.name))
7806        {
7807            let v = row.values.get(idx).cloned().unwrap_or(Value::Null);
7808            if let Ok(lit) = value_to_literal_expr(v) {
7809                *e = lit;
7810                return;
7811            }
7812        }
7813    }
7814    match e {
7815        Expr::Binary { lhs, rhs, .. } => {
7816            substitute_in_expr(lhs, row, ctx, outer_alias);
7817            substitute_in_expr(rhs, row, ctx, outer_alias);
7818        }
7819        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7820            substitute_in_expr(expr, row, ctx, outer_alias);
7821        }
7822        Expr::Like { expr, pattern, .. } => {
7823            substitute_in_expr(expr, row, ctx, outer_alias);
7824            substitute_in_expr(pattern, row, ctx, outer_alias);
7825        }
7826        Expr::FunctionCall { args, .. } => {
7827            for a in args {
7828                substitute_in_expr(a, row, ctx, outer_alias);
7829            }
7830        }
7831        Expr::Extract { source, .. } => substitute_in_expr(source, row, ctx, outer_alias),
7832        Expr::WindowFunction {
7833            args,
7834            partition_by,
7835            order_by,
7836            ..
7837        } => {
7838            for a in args {
7839                substitute_in_expr(a, row, ctx, outer_alias);
7840            }
7841            for p in partition_by {
7842                substitute_in_expr(p, row, ctx, outer_alias);
7843            }
7844            for (o, _) in order_by {
7845                substitute_in_expr(o, row, ctx, outer_alias);
7846            }
7847        }
7848        Expr::ScalarSubquery(s) => substitute_in_select(s, row, ctx, outer_alias),
7849        Expr::Exists { subquery, .. } | Expr::InSubquery { subquery, .. } => {
7850            substitute_in_select(subquery, row, ctx, outer_alias);
7851        }
7852        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
7853        Expr::Array(items) => {
7854            for elem in items {
7855                substitute_in_expr(elem, row, ctx, outer_alias);
7856            }
7857        }
7858        Expr::ArraySubscript { target, index } => {
7859            substitute_in_expr(target, row, ctx, outer_alias);
7860            substitute_in_expr(index, row, ctx, outer_alias);
7861        }
7862        Expr::AnyAll { expr, array, .. } => {
7863            substitute_in_expr(expr, row, ctx, outer_alias);
7864            substitute_in_expr(array, row, ctx, outer_alias);
7865        }
7866        Expr::Case {
7867            operand,
7868            branches,
7869            else_branch,
7870        } => {
7871            if let Some(o) = operand {
7872                substitute_in_expr(o, row, ctx, outer_alias);
7873            }
7874            for (w, t) in branches {
7875                substitute_in_expr(w, row, ctx, outer_alias);
7876                substitute_in_expr(t, row, ctx, outer_alias);
7877            }
7878            if let Some(e) = else_branch {
7879                substitute_in_expr(e, row, ctx, outer_alias);
7880            }
7881        }
7882    }
7883}
7884
7885/// v4.22: encode a Row to a comparable byte key for UNION-DISTINCT
7886/// dedup inside the recursive iteration. Crude but deterministic
7887/// — Debug prints embed type discriminants so NULL ≠ "" ≠ 0.
7888fn encode_row_key(row: &Row) -> Vec<u8> {
7889    let mut out = Vec::new();
7890    for v in &row.values {
7891        let s = alloc::format!("{v:?}|");
7892        out.extend_from_slice(s.as_bytes());
7893    }
7894    out
7895}
7896
7897fn select_has_window(stmt: &SelectStatement) -> bool {
7898    for item in &stmt.items {
7899        if let SelectItem::Expr { expr, .. } = item
7900            && expr_has_window(expr)
7901        {
7902            return true;
7903        }
7904    }
7905    false
7906}
7907
7908fn expr_has_window(e: &Expr) -> bool {
7909    match e {
7910        Expr::WindowFunction { .. } => true,
7911        Expr::Binary { lhs, rhs, .. } => expr_has_window(lhs) || expr_has_window(rhs),
7912        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7913            expr_has_window(expr)
7914        }
7915        Expr::FunctionCall { args, .. } => args.iter().any(expr_has_window),
7916        Expr::Like { expr, pattern, .. } => expr_has_window(expr) || expr_has_window(pattern),
7917        Expr::Extract { source, .. } => expr_has_window(source),
7918        Expr::ScalarSubquery(_)
7919        | Expr::Exists { .. }
7920        | Expr::InSubquery { .. }
7921        | Expr::Literal(_)
7922        | Expr::Placeholder(_)
7923        | Expr::Column(_) => false,
7924        Expr::Array(items) => items.iter().any(expr_has_window),
7925        Expr::ArraySubscript { target, index } => expr_has_window(target) || expr_has_window(index),
7926        Expr::AnyAll { expr, array, .. } => expr_has_window(expr) || expr_has_window(array),
7927        Expr::Case {
7928            operand,
7929            branches,
7930            else_branch,
7931        } => {
7932            operand.as_deref().is_some_and(expr_has_window)
7933                || branches
7934                    .iter()
7935                    .any(|(w, t)| expr_has_window(w) || expr_has_window(t))
7936                || else_branch.as_deref().is_some_and(expr_has_window)
7937        }
7938    }
7939}
7940
7941fn collect_window_nodes(e: &Expr, out: &mut Vec<Expr>) {
7942    if let Expr::WindowFunction { .. } = e {
7943        // Deduplicate by structural equality on the expression
7944        // (cheap because window args + partition + order are
7945        // small). Without dedup we'd recompute identical windows
7946        // once per occurrence in the projection.
7947        if !out.iter().any(|x| x == e) {
7948            out.push(e.clone());
7949        }
7950        return;
7951    }
7952    match e {
7953        // Already handled by the early-return at the top.
7954        Expr::WindowFunction { .. } => unreachable!(),
7955        Expr::Binary { lhs, rhs, .. } => {
7956            collect_window_nodes(lhs, out);
7957            collect_window_nodes(rhs, out);
7958        }
7959        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7960            collect_window_nodes(expr, out);
7961        }
7962        Expr::FunctionCall { args, .. } => {
7963            for a in args {
7964                collect_window_nodes(a, out);
7965            }
7966        }
7967        Expr::Like { expr, pattern, .. } => {
7968            collect_window_nodes(expr, out);
7969            collect_window_nodes(pattern, out);
7970        }
7971        Expr::Extract { source, .. } => collect_window_nodes(source, out),
7972        _ => {}
7973    }
7974}
7975
7976fn rewrite_window_to_columns(e: &mut Expr, window_nodes: &[Expr]) {
7977    if let Expr::WindowFunction { .. } = e
7978        && let Some(idx) = window_nodes.iter().position(|w| w == e)
7979    {
7980        *e = Expr::Column(spg_sql::ast::ColumnName {
7981            qualifier: None,
7982            name: alloc::format!("__win_{idx}"),
7983        });
7984        return;
7985    }
7986    match e {
7987        Expr::Binary { lhs, rhs, .. } => {
7988            rewrite_window_to_columns(lhs, window_nodes);
7989            rewrite_window_to_columns(rhs, window_nodes);
7990        }
7991        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7992            rewrite_window_to_columns(expr, window_nodes);
7993        }
7994        Expr::FunctionCall { args, .. } => {
7995            for a in args {
7996                rewrite_window_to_columns(a, window_nodes);
7997            }
7998        }
7999        Expr::Like { expr, pattern, .. } => {
8000            rewrite_window_to_columns(expr, window_nodes);
8001            rewrite_window_to_columns(pattern, window_nodes);
8002        }
8003        Expr::Extract { source, .. } => rewrite_window_to_columns(source, window_nodes),
8004        _ => {}
8005    }
8006}
8007
8008/// Total order over partition-key tuples. NULL sorts as the
8009/// lowest value (matches the `<` partial order's NULL-last
8010/// behaviour with `INFINITY` flipped).
8011fn partition_key_cmp(a: &[Value], b: &[Value]) -> core::cmp::Ordering {
8012    for (x, y) in a.iter().zip(b.iter()) {
8013        let c = value_cmp(x, y);
8014        if c != core::cmp::Ordering::Equal {
8015            return c;
8016        }
8017    }
8018    a.len().cmp(&b.len())
8019}
8020
8021fn order_key_cmp(a: &[(Value, bool)], b: &[(Value, bool)]) -> core::cmp::Ordering {
8022    for ((va, desc), (vb, _)) in a.iter().zip(b.iter()) {
8023        let c = value_cmp(va, vb);
8024        let c = if *desc { c.reverse() } else { c };
8025        if c != core::cmp::Ordering::Equal {
8026            return c;
8027        }
8028    }
8029    a.len().cmp(&b.len())
8030}
8031
8032#[allow(clippy::match_same_arms)] // explicit arms per type document the supported pairs
8033fn value_cmp(a: &Value, b: &Value) -> core::cmp::Ordering {
8034    use core::cmp::Ordering;
8035    match (a, b) {
8036        (Value::Null, Value::Null) => Ordering::Equal,
8037        (Value::Null, _) => Ordering::Less,
8038        (_, Value::Null) => Ordering::Greater,
8039        (Value::Int(x), Value::Int(y)) => x.cmp(y),
8040        (Value::BigInt(x), Value::BigInt(y)) => x.cmp(y),
8041        (Value::SmallInt(x), Value::SmallInt(y)) => x.cmp(y),
8042        (Value::Text(x), Value::Text(y)) => x.cmp(y),
8043        (Value::Bool(x), Value::Bool(y)) => x.cmp(y),
8044        (Value::Float(x), Value::Float(y)) => x.partial_cmp(y).unwrap_or(Ordering::Equal),
8045        (Value::Date(x), Value::Date(y)) => x.cmp(y),
8046        (Value::Timestamp(x), Value::Timestamp(y)) => x.cmp(y),
8047        // Cross-type compare: fall back to the debug rendering —
8048        // same-partition is the goal, exact order is irrelevant.
8049        _ => alloc::format!("{a:?}").cmp(&alloc::format!("{b:?}")),
8050    }
8051}
8052
8053/// Compute the window function's per-row output for one partition.
8054/// `slice` has (partition key, order key, original-row-index)
8055/// tuples already sorted by order key. `filtered_rows` is the
8056/// full row list indexed by original-row-index. `out_vals` is
8057/// the destination, also indexed by original-row-index.
8058#[allow(
8059    clippy::too_many_arguments,
8060    clippy::cast_possible_truncation,
8061    clippy::cast_possible_wrap,
8062    clippy::cast_precision_loss,
8063    clippy::cast_sign_loss,
8064    clippy::doc_markdown,
8065    clippy::too_many_lines,
8066    clippy::type_complexity,
8067    clippy::match_same_arms
8068)]
8069fn compute_window_partition(
8070    name: &str,
8071    args: &[Expr],
8072    ordered: bool,
8073    frame: Option<&WindowFrame>,
8074    null_treatment: spg_sql::ast::NullTreatment,
8075    slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)],
8076    filtered_rows: &[&Row],
8077    ctx: &EvalContext<'_>,
8078    out_vals: &mut [Value],
8079) -> Result<(), EngineError> {
8080    let ignore_nulls = matches!(null_treatment, spg_sql::ast::NullTreatment::Ignore);
8081    let lower = name.to_ascii_lowercase();
8082    match lower.as_str() {
8083        "row_number" => {
8084            for (rank, (_, _, idx)) in slice.iter().enumerate() {
8085                out_vals[*idx] = Value::BigInt((rank + 1) as i64);
8086            }
8087            Ok(())
8088        }
8089        "rank" => {
8090            let mut prev_key: Option<&[(Value, bool)]> = None;
8091            let mut current_rank: i64 = 1;
8092            for (i, (_, okey, idx)) in slice.iter().enumerate() {
8093                if let Some(p) = prev_key
8094                    && order_key_cmp(p, okey) != core::cmp::Ordering::Equal
8095                {
8096                    current_rank = (i + 1) as i64;
8097                }
8098                if prev_key.is_none() {
8099                    current_rank = 1;
8100                }
8101                out_vals[*idx] = Value::BigInt(current_rank);
8102                prev_key = Some(okey.as_slice());
8103            }
8104            Ok(())
8105        }
8106        "dense_rank" => {
8107            let mut prev_key: Option<&[(Value, bool)]> = None;
8108            let mut current_rank: i64 = 0;
8109            for (_, okey, idx) in slice {
8110                if prev_key.is_none_or(|p| order_key_cmp(p, okey) != core::cmp::Ordering::Equal) {
8111                    current_rank += 1;
8112                }
8113                out_vals[*idx] = Value::BigInt(current_rank);
8114                prev_key = Some(okey.as_slice());
8115            }
8116            Ok(())
8117        }
8118        "sum" | "avg" | "min" | "max" | "count" | "count_star" => {
8119            // Pre-evaluate the function arg per row in the slice
8120            // (count_star has no arg).
8121            let arg_values: Vec<Value> = if lower == "count_star" || args.is_empty() {
8122                slice.iter().map(|_| Value::Null).collect()
8123            } else {
8124                slice
8125                    .iter()
8126                    .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
8127                    .collect::<Result<_, _>>()
8128                    .map_err(EngineError::Eval)?
8129            };
8130            // v4.20: pick the effective frame. Explicit frame
8131            // overrides the implicit default (running for ordered,
8132            // whole-partition for unordered).
8133            let eff = effective_frame(frame, ordered)?;
8134            #[allow(clippy::needless_range_loop)]
8135            for i in 0..slice.len() {
8136                let (lo, hi) = frame_bounds_for_row(&eff, i, slice);
8137                let mut sum: f64 = 0.0;
8138                let mut count: i64 = 0;
8139                let mut min_v: Option<f64> = None;
8140                let mut max_v: Option<f64> = None;
8141                let mut row_count: i64 = 0;
8142                if lo <= hi {
8143                    for j in lo..=hi {
8144                        let v = &arg_values[j];
8145                        match lower.as_str() {
8146                            "count_star" => row_count += 1,
8147                            "count" => {
8148                                if !v.is_null() {
8149                                    count += 1;
8150                                }
8151                            }
8152                            _ => {
8153                                if let Some(x) = value_to_f64(v) {
8154                                    sum += x;
8155                                    count += 1;
8156                                    min_v = Some(min_v.map_or(x, |m| m.min(x)));
8157                                    max_v = Some(max_v.map_or(x, |m| m.max(x)));
8158                                }
8159                            }
8160                        }
8161                    }
8162                }
8163                let value = match lower.as_str() {
8164                    "count_star" => Value::BigInt(row_count),
8165                    "count" => Value::BigInt(count),
8166                    "sum" => Value::Float(sum),
8167                    "avg" => {
8168                        if count == 0 {
8169                            Value::Null
8170                        } else {
8171                            Value::Float(sum / count as f64)
8172                        }
8173                    }
8174                    "min" => min_v.map_or(Value::Null, Value::Float),
8175                    "max" => max_v.map_or(Value::Null, Value::Float),
8176                    _ => unreachable!(),
8177                };
8178                let (_, _, idx) = &slice[i];
8179                out_vals[*idx] = value;
8180            }
8181            Ok(())
8182        }
8183        "lag" | "lead" => {
8184            // lag(expr [, offset [, default]])
8185            // lead(expr [, offset [, default]])
8186            if args.is_empty() {
8187                return Err(EngineError::Unsupported(alloc::format!(
8188                    "{lower}() requires at least one argument"
8189                )));
8190            }
8191            let offset: i64 = if args.len() >= 2 {
8192                let v = eval::eval_expr(&args[1], filtered_rows[slice[0].2], ctx)
8193                    .map_err(EngineError::Eval)?;
8194                match v {
8195                    Value::SmallInt(n) => i64::from(n),
8196                    Value::Int(n) => i64::from(n),
8197                    Value::BigInt(n) => n,
8198                    _ => {
8199                        return Err(EngineError::Unsupported(alloc::format!(
8200                            "{lower}() offset must be integer"
8201                        )));
8202                    }
8203                }
8204            } else {
8205                1
8206            };
8207            let default: Value = if args.len() >= 3 {
8208                eval::eval_expr(&args[2], filtered_rows[slice[0].2], ctx)
8209                    .map_err(EngineError::Eval)?
8210            } else {
8211                Value::Null
8212            };
8213            let values: Vec<Value> = slice
8214                .iter()
8215                .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
8216                .collect::<Result<_, _>>()
8217                .map_err(EngineError::Eval)?;
8218            let n = slice.len();
8219            for (i, (_, _, idx)) in slice.iter().enumerate() {
8220                let signed_offset = if lower == "lag" { -offset } else { offset };
8221                let v = if ignore_nulls {
8222                    // v6.4.2 — IGNORE NULLS: walk in the offset direction
8223                    // skipping NULL values; the `offset`-th non-NULL
8224                    // encountered is the result.
8225                    let step: i64 = if signed_offset >= 0 { 1 } else { -1 };
8226                    let needed: i64 = signed_offset.abs();
8227                    if needed == 0 {
8228                        values[i].clone()
8229                    } else {
8230                        let mut j: i64 = i as i64;
8231                        let mut hits: i64 = 0;
8232                        let mut found: Option<Value> = None;
8233                        loop {
8234                            j += step;
8235                            if j < 0 || j >= n as i64 {
8236                                break;
8237                            }
8238                            #[allow(clippy::cast_sign_loss)]
8239                            let v = &values[j as usize];
8240                            if !v.is_null() {
8241                                hits += 1;
8242                                if hits == needed {
8243                                    found = Some(v.clone());
8244                                    break;
8245                                }
8246                            }
8247                        }
8248                        found.unwrap_or_else(|| default.clone())
8249                    }
8250                } else {
8251                    let target_signed = i64::try_from(i).unwrap_or(i64::MAX) + signed_offset;
8252                    if target_signed < 0 || target_signed >= i64::try_from(n).unwrap_or(i64::MAX) {
8253                        default.clone()
8254                    } else {
8255                        #[allow(clippy::cast_sign_loss)]
8256                        {
8257                            values[target_signed as usize].clone()
8258                        }
8259                    }
8260                };
8261                out_vals[*idx] = v;
8262            }
8263            Ok(())
8264        }
8265        "first_value" | "last_value" | "nth_value" => {
8266            if args.is_empty() {
8267                return Err(EngineError::Unsupported(alloc::format!(
8268                    "{lower}() requires at least one argument"
8269                )));
8270            }
8271            let values: Vec<Value> = slice
8272                .iter()
8273                .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
8274                .collect::<Result<_, _>>()
8275                .map_err(EngineError::Eval)?;
8276            let nth: usize = if lower == "nth_value" {
8277                if args.len() < 2 {
8278                    return Err(EngineError::Unsupported(
8279                        "nth_value() requires (expr, n)".into(),
8280                    ));
8281                }
8282                let v = eval::eval_expr(&args[1], filtered_rows[slice[0].2], ctx)
8283                    .map_err(EngineError::Eval)?;
8284                let raw = match v {
8285                    Value::SmallInt(n) => i64::from(n),
8286                    Value::Int(n) => i64::from(n),
8287                    Value::BigInt(n) => n,
8288                    _ => {
8289                        return Err(EngineError::Unsupported(
8290                            "nth_value() n must be integer".into(),
8291                        ));
8292                    }
8293                };
8294                if raw < 1 {
8295                    return Err(EngineError::Unsupported(
8296                        "nth_value() n must be >= 1".into(),
8297                    ));
8298                }
8299                #[allow(clippy::cast_sign_loss)]
8300                {
8301                    raw as usize
8302                }
8303            } else {
8304                0
8305            };
8306            let eff = effective_frame(frame, ordered)?;
8307            for i in 0..slice.len() {
8308                let (lo, hi) = frame_bounds_for_row(&eff, i, slice);
8309                let (_, _, idx) = &slice[i];
8310                let v = if lo > hi {
8311                    Value::Null
8312                } else if ignore_nulls && matches!(lower.as_str(), "first_value" | "last_value") {
8313                    // v6.4.2 — IGNORE NULLS: skip NULL cells when
8314                    // selecting the boundary value within the frame.
8315                    if lower == "first_value" {
8316                        (lo..=hi)
8317                            .find_map(|j| {
8318                                let v = &values[j];
8319                                (!v.is_null()).then(|| v.clone())
8320                            })
8321                            .unwrap_or(Value::Null)
8322                    } else {
8323                        (lo..=hi)
8324                            .rev()
8325                            .find_map(|j| {
8326                                let v = &values[j];
8327                                (!v.is_null()).then(|| v.clone())
8328                            })
8329                            .unwrap_or(Value::Null)
8330                    }
8331                } else {
8332                    match lower.as_str() {
8333                        "first_value" => values[lo].clone(),
8334                        "last_value" => values[hi].clone(),
8335                        "nth_value" => {
8336                            let pos = lo + nth - 1;
8337                            if pos > hi {
8338                                Value::Null
8339                            } else {
8340                                values[pos].clone()
8341                            }
8342                        }
8343                        _ => unreachable!(),
8344                    }
8345                };
8346                out_vals[*idx] = v;
8347            }
8348            Ok(())
8349        }
8350        "ntile" => {
8351            if args.is_empty() {
8352                return Err(EngineError::Unsupported(
8353                    "ntile(n) requires an integer argument".into(),
8354                ));
8355            }
8356            let v = eval::eval_expr(&args[0], filtered_rows[slice[0].2], ctx)
8357                .map_err(EngineError::Eval)?;
8358            let bucket_count: i64 = match v {
8359                Value::SmallInt(n) => i64::from(n),
8360                Value::Int(n) => i64::from(n),
8361                Value::BigInt(n) => n,
8362                _ => {
8363                    return Err(EngineError::Unsupported(
8364                        "ntile() argument must be integer".into(),
8365                    ));
8366                }
8367            };
8368            if bucket_count < 1 {
8369                return Err(EngineError::Unsupported(
8370                    "ntile() argument must be >= 1".into(),
8371                ));
8372            }
8373            #[allow(clippy::cast_sign_loss)]
8374            let buckets = bucket_count as usize;
8375            let n = slice.len();
8376            // Each bucket gets `base` rows; the first `extras` buckets
8377            // get one extra. PG semantics.
8378            let base = n / buckets;
8379            let extras = n % buckets;
8380            let mut bucket: usize = 1;
8381            let mut remaining_in_bucket = if extras > 0 { base + 1 } else { base };
8382            let mut buckets_with_extra_remaining = extras;
8383            for (_, _, idx) in slice {
8384                if remaining_in_bucket == 0 {
8385                    bucket += 1;
8386                    buckets_with_extra_remaining = buckets_with_extra_remaining.saturating_sub(1);
8387                    remaining_in_bucket = if buckets_with_extra_remaining > 0 {
8388                        base + 1
8389                    } else {
8390                        base
8391                    };
8392                    // Edge: if base==0 and extras==0, all rows fit;
8393                    // shouldn't reach here, but guard anyway.
8394                    if remaining_in_bucket == 0 {
8395                        remaining_in_bucket = 1;
8396                    }
8397                }
8398                out_vals[*idx] = Value::BigInt(i64::try_from(bucket).unwrap_or(i64::MAX));
8399                remaining_in_bucket -= 1;
8400            }
8401            Ok(())
8402        }
8403        "percent_rank" => {
8404            // (rank - 1) / (n - 1) where rank is the standard RANK().
8405            // Single-row partitions get 0.
8406            let n = slice.len();
8407            let mut prev_key: Option<&[(Value, bool)]> = None;
8408            let mut current_rank: i64 = 1;
8409            for (i, (_, okey, idx)) in slice.iter().enumerate() {
8410                if let Some(p) = prev_key
8411                    && order_key_cmp(p, okey) != core::cmp::Ordering::Equal
8412                {
8413                    current_rank = i64::try_from(i + 1).unwrap_or(i64::MAX);
8414                }
8415                if prev_key.is_none() {
8416                    current_rank = 1;
8417                }
8418                #[allow(clippy::cast_precision_loss)]
8419                let pr = if n <= 1 {
8420                    0.0
8421                } else {
8422                    (current_rank - 1) as f64 / (n - 1) as f64
8423                };
8424                out_vals[*idx] = Value::Float(pr);
8425                prev_key = Some(okey.as_slice());
8426            }
8427            Ok(())
8428        }
8429        "cume_dist" => {
8430            // # rows up to and including this row's peer group / n.
8431            let n = slice.len();
8432            // First pass: find peer-group-end rank for each row.
8433            for i in 0..slice.len() {
8434                let peer_end = peer_group_end(slice, i);
8435                #[allow(clippy::cast_precision_loss)]
8436                let cd = (peer_end + 1) as f64 / n as f64;
8437                let (_, _, idx) = &slice[i];
8438                out_vals[*idx] = Value::Float(cd);
8439            }
8440            Ok(())
8441        }
8442        other => Err(EngineError::Unsupported(alloc::format!(
8443            "window function {other:?} not supported (v4.21: row_number/rank/dense_rank/sum/avg/count/min/max/lag/lead/first_value/last_value/nth_value/ntile/percent_rank/cume_dist)"
8444        ))),
8445    }
8446}
8447
8448/// v4.20: resolve the user-provided frame down to a normalised
8449/// `(kind, start, end)`. `None` means default — derive from
8450/// `ordered`: ordered ⇒ RANGE UNBOUNDED PRECEDING AND CURRENT ROW,
8451/// unordered ⇒ ROWS UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING.
8452/// Single-bound shorthand (e.g. `ROWS 5 PRECEDING`) normalises
8453/// end → CURRENT ROW per the PG spec.
8454fn effective_frame(
8455    frame: Option<&WindowFrame>,
8456    ordered: bool,
8457) -> Result<(FrameKind, FrameBound, FrameBound), EngineError> {
8458    match frame {
8459        None => {
8460            if ordered {
8461                Ok((
8462                    FrameKind::Range,
8463                    FrameBound::UnboundedPreceding,
8464                    FrameBound::CurrentRow,
8465                ))
8466            } else {
8467                Ok((
8468                    FrameKind::Rows,
8469                    FrameBound::UnboundedPreceding,
8470                    FrameBound::UnboundedFollowing,
8471                ))
8472            }
8473        }
8474        Some(fr) => {
8475            let end = fr.end.clone().unwrap_or(FrameBound::CurrentRow);
8476            // Reject start > end (a few impossible combinations).
8477            if matches!(fr.start, FrameBound::UnboundedFollowing)
8478                || matches!(end, FrameBound::UnboundedPreceding)
8479            {
8480                return Err(EngineError::Unsupported(alloc::format!(
8481                    "invalid frame: start={:?} end={:?}",
8482                    fr.start,
8483                    end
8484                )));
8485            }
8486            // RANGE OFFSET PRECEDING / FOLLOWING needs value-typed
8487            // arithmetic on the ORDER BY key (e.g. `RANGE BETWEEN
8488            // INTERVAL '1 day' PRECEDING AND CURRENT ROW`). Not
8489            // implemented in v4.20.
8490            if fr.kind == FrameKind::Range
8491                && (matches!(
8492                    fr.start,
8493                    FrameBound::OffsetPreceding(_) | FrameBound::OffsetFollowing(_)
8494                ) || matches!(
8495                    end,
8496                    FrameBound::OffsetPreceding(_) | FrameBound::OffsetFollowing(_)
8497                ))
8498            {
8499                return Err(EngineError::Unsupported(
8500                    "RANGE with explicit offset bounds is not supported (v4.20: only UNBOUNDED / CURRENT ROW for RANGE)".into(),
8501                ));
8502            }
8503            Ok((fr.kind, fr.start.clone(), end))
8504        }
8505    }
8506}
8507
8508/// Compute `(lo, hi)` row-index bounds inside the partition slice
8509/// for the row at position `i`. Inclusive, clamped to
8510/// `[0, slice.len()-1]`. Empty result if `lo > hi`.
8511#[allow(clippy::type_complexity)]
8512fn frame_bounds_for_row(
8513    eff: &(FrameKind, FrameBound, FrameBound),
8514    i: usize,
8515    slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)],
8516) -> (usize, usize) {
8517    let (kind, start, end) = eff;
8518    let n = slice.len();
8519    let last = n.saturating_sub(1);
8520    let (mut lo, mut hi) = match kind {
8521        FrameKind::Rows => {
8522            let lo = match start {
8523                FrameBound::UnboundedPreceding => 0,
8524                FrameBound::OffsetPreceding(k) => {
8525                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
8526                    i.saturating_sub(k)
8527                }
8528                FrameBound::CurrentRow => i,
8529                FrameBound::OffsetFollowing(k) => {
8530                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
8531                    i.saturating_add(k).min(last)
8532                }
8533                FrameBound::UnboundedFollowing => last,
8534            };
8535            let hi = match end {
8536                FrameBound::UnboundedPreceding => 0,
8537                FrameBound::OffsetPreceding(k) => {
8538                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
8539                    i.saturating_sub(k)
8540                }
8541                FrameBound::CurrentRow => i,
8542                FrameBound::OffsetFollowing(k) => {
8543                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
8544                    i.saturating_add(k).min(last)
8545                }
8546                FrameBound::UnboundedFollowing => last,
8547            };
8548            (lo, hi)
8549        }
8550        FrameKind::Range => {
8551            // RANGE bounds are peer-aware. With only UNBOUNDED and
8552            // CURRENT ROW supported (rejected at effective_frame for
8553            // explicit offsets), the start/end map to the
8554            // partition's full extent at the same-order-key peer
8555            // group boundary.
8556            let lo = match start {
8557                FrameBound::UnboundedPreceding => 0,
8558                FrameBound::CurrentRow => peer_group_start(slice, i),
8559                FrameBound::UnboundedFollowing => last,
8560                _ => unreachable!("offset bounds rejected for RANGE"),
8561            };
8562            let hi = match end {
8563                FrameBound::UnboundedPreceding => 0,
8564                FrameBound::CurrentRow => peer_group_end(slice, i),
8565                FrameBound::UnboundedFollowing => last,
8566                _ => unreachable!("offset bounds rejected for RANGE"),
8567            };
8568            (lo, hi)
8569        }
8570    };
8571    if hi >= n {
8572        hi = last;
8573    }
8574    if lo >= n {
8575        lo = last;
8576    }
8577    (lo, hi)
8578}
8579
8580/// Find the inclusive index of the first row with the same ORDER
8581/// BY key as `slice[i]`. Slice is already sorted by partition then
8582/// order, so peers are contiguous.
8583#[allow(clippy::type_complexity)]
8584fn peer_group_start(slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)], i: usize) -> usize {
8585    let key = &slice[i].1;
8586    let mut j = i;
8587    while j > 0 && order_key_cmp(&slice[j - 1].1, key) == core::cmp::Ordering::Equal {
8588        j -= 1;
8589    }
8590    j
8591}
8592
8593/// Find the inclusive index of the last row with the same ORDER
8594/// BY key as `slice[i]`.
8595#[allow(clippy::type_complexity)]
8596fn peer_group_end(slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)], i: usize) -> usize {
8597    let key = &slice[i].1;
8598    let mut j = i;
8599    while j + 1 < slice.len() && order_key_cmp(&slice[j + 1].1, key) == core::cmp::Ordering::Equal {
8600        j += 1;
8601    }
8602    j
8603}
8604
8605fn value_to_f64(v: &Value) -> Option<f64> {
8606    match v {
8607        Value::SmallInt(n) => Some(f64::from(*n)),
8608        Value::Int(n) => Some(f64::from(*n)),
8609        #[allow(clippy::cast_precision_loss)]
8610        Value::BigInt(n) => Some(*n as f64),
8611        Value::Float(x) => Some(*x),
8612        _ => None,
8613    }
8614}
8615
8616/// Quick scan for any subquery-bearing node in a SELECT's WHERE /
8617/// projection / `order_by` — saves cloning the AST when there are
8618/// none (the common case).
8619fn expr_tree_has_subquery(stmt: &SelectStatement) -> bool {
8620    let mut any = false;
8621    for item in &stmt.items {
8622        if let SelectItem::Expr { expr, .. } = item {
8623            any = any || expr_has_subquery(expr);
8624        }
8625    }
8626    if let Some(w) = &stmt.where_ {
8627        any = any || expr_has_subquery(w);
8628    }
8629    if let Some(h) = &stmt.having {
8630        any = any || expr_has_subquery(h);
8631    }
8632    for o in &stmt.order_by {
8633        any = any || expr_has_subquery(&o.expr);
8634    }
8635    for (_, peer) in &stmt.unions {
8636        any = any || expr_tree_has_subquery(peer);
8637    }
8638    any
8639}
8640
8641fn expr_has_subquery(e: &Expr) -> bool {
8642    match e {
8643        Expr::ScalarSubquery(_) | Expr::Exists { .. } | Expr::InSubquery { .. } => true,
8644        Expr::Binary { lhs, rhs, .. } => expr_has_subquery(lhs) || expr_has_subquery(rhs),
8645        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
8646            expr_has_subquery(expr)
8647        }
8648        Expr::FunctionCall { args, .. } => args.iter().any(expr_has_subquery),
8649        Expr::Like { expr, pattern, .. } => expr_has_subquery(expr) || expr_has_subquery(pattern),
8650        Expr::Extract { source, .. } => expr_has_subquery(source),
8651        Expr::WindowFunction {
8652            args,
8653            partition_by,
8654            order_by,
8655            ..
8656        } => {
8657            args.iter().any(expr_has_subquery)
8658                || partition_by.iter().any(expr_has_subquery)
8659                || order_by.iter().any(|(e, _)| expr_has_subquery(e))
8660        }
8661        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => false,
8662        Expr::Array(items) => items.iter().any(expr_has_subquery),
8663        Expr::ArraySubscript { target, index } => {
8664            expr_has_subquery(target) || expr_has_subquery(index)
8665        }
8666        Expr::AnyAll { expr, array, .. } => expr_has_subquery(expr) || expr_has_subquery(array),
8667        Expr::Case {
8668            operand,
8669            branches,
8670            else_branch,
8671        } => {
8672            operand.as_deref().is_some_and(expr_has_subquery)
8673                || branches
8674                    .iter()
8675                    .any(|(w, t)| expr_has_subquery(w) || expr_has_subquery(t))
8676                || else_branch.as_deref().is_some_and(expr_has_subquery)
8677        }
8678    }
8679}
8680
8681/// v4.10 helper: materialise a runtime `Value` back into an AST
8682/// `Expr::Literal` for the subquery-rewrite path. Supports the
8683/// types `Literal` can represent (Integer / Float / Text / Bool /
8684/// Null). Date / Timestamp / Numeric / Vector / Interval / JSON
8685/// would lose precision through Literal and aren't supported in
8686/// uncorrelated-subquery results; they error with a clear hint.
8687fn value_to_literal_expr(v: Value) -> Result<Expr, EngineError> {
8688    let lit = match v {
8689        Value::Null => Literal::Null,
8690        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
8691        Value::Int(n) => Literal::Integer(i64::from(n)),
8692        Value::BigInt(n) => Literal::Integer(n),
8693        Value::Float(x) => Literal::Float(x),
8694        Value::Text(s) | Value::Json(s) => Literal::String(s),
8695        Value::Bool(b) => Literal::Bool(b),
8696        other => {
8697            return Err(EngineError::Unsupported(alloc::format!(
8698                "subquery result type {:?} not yet materialisable; cast to text or integer in the inner SELECT",
8699                other.data_type()
8700            )));
8701        }
8702    };
8703    Ok(Expr::Literal(lit))
8704}
8705
8706/// v7.13.0 — wider helper used by `INSERT … SELECT` (mailrs
8707/// round-5 G4). Covers the most common `Value` variants. Types
8708/// that need lossy textual round-trip (BYTEA, arrays, ts*)
8709/// surface as an Unsupported error so the caller can add a cast
8710/// in the inner SELECT.
8711fn value_to_literal_expr_permissive(v: Value) -> Result<Expr, EngineError> {
8712    let lit = match v {
8713        Value::Null => Literal::Null,
8714        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
8715        Value::Int(n) => Literal::Integer(i64::from(n)),
8716        Value::BigInt(n) => Literal::Integer(n),
8717        Value::Float(x) => Literal::Float(x),
8718        Value::Text(s) | Value::Json(s) => Literal::String(s),
8719        Value::Bool(b) => Literal::Bool(b),
8720        Value::Vector(xs) => Literal::Vector(xs),
8721        // Date / Timestamp / Timestamptz / Numeric round-trip
8722        // through a TEXT literal that `coerce_value` re-parses
8723        // against the target column type.
8724        Value::Date(days) => {
8725            let micros = (i64::from(days)) * 86_400_000_000;
8726            Literal::String(format_timestamp_micros_as_date(micros))
8727        }
8728        Value::Timestamp(us) => Literal::String(format_timestamp_micros(us)),
8729        Value::Numeric { scaled, scale } => {
8730            Literal::String(format_numeric(scaled, scale))
8731        }
8732        other => {
8733            return Err(EngineError::Unsupported(alloc::format!(
8734                "INSERT … SELECT cannot materialise value of type {:?}; \
8735                 add an explicit CAST in the inner SELECT",
8736                other.data_type()
8737            )));
8738        }
8739    };
8740    Ok(Expr::Literal(lit))
8741}
8742
8743fn format_timestamp_micros(us: i64) -> String {
8744    // Same Y/M/D split used by the wire layer; epoch-relative.
8745    let days = us.div_euclid(86_400_000_000);
8746    let intra_day = us.rem_euclid(86_400_000_000);
8747    let date = format_timestamp_micros_as_date(days * 86_400_000_000);
8748    let secs = intra_day / 1_000_000;
8749    let us_rem = intra_day % 1_000_000;
8750    let h = (secs / 3600) % 24;
8751    let m = (secs / 60) % 60;
8752    let s = secs % 60;
8753    if us_rem == 0 {
8754        alloc::format!("{date} {h:02}:{m:02}:{s:02}")
8755    } else {
8756        alloc::format!("{date} {h:02}:{m:02}:{s:02}.{us_rem:06}")
8757    }
8758}
8759
8760fn format_timestamp_micros_as_date(us: i64) -> String {
8761    // Days since 1970-01-01 → calendar Y-M-D via the proleptic
8762    // Gregorian conversion used by spg-engine's date helpers.
8763    let days = us.div_euclid(86_400_000_000);
8764    // 1970-01-01 = JDN 2440588.
8765    let jdn = days + 2_440_588;
8766    let (y, mo, d) = jdn_to_ymd(jdn);
8767    alloc::format!("{y:04}-{mo:02}-{d:02}")
8768}
8769
8770fn jdn_to_ymd(jdn: i64) -> (i64, u32, u32) {
8771    // Fliegel & Van Flandern (1968) — works for all positive JDNs.
8772    let l = jdn + 68569;
8773    let n = (4 * l) / 146_097;
8774    let l = l - (146_097 * n + 3) / 4;
8775    let i = (4000 * (l + 1)) / 1_461_001;
8776    let l = l - (1461 * i) / 4 + 31;
8777    let j = (80 * l) / 2447;
8778    let day = (l - (2447 * j) / 80) as u32;
8779    let l = j / 11;
8780    let month = (j + 2 - 12 * l) as u32;
8781    let year = 100 * (n - 49) + i + l;
8782    (year, month, day)
8783}
8784
8785fn format_numeric(scaled: i128, scale: u8) -> String {
8786    if scale == 0 {
8787        return alloc::format!("{scaled}");
8788    }
8789    let abs = scaled.unsigned_abs();
8790    let divisor = 10u128.pow(u32::from(scale));
8791    let whole = abs / divisor;
8792    let frac = abs % divisor;
8793    let sign = if scaled < 0 { "-" } else { "" };
8794    alloc::format!(
8795        "{sign}{whole}.{frac:0width$}",
8796        width = usize::from(scale)
8797    )
8798}
8799
8800/// v6.1.1 — walk the prepared `Statement` AST and replace every
8801/// `Expr::Placeholder(n)` with `Expr::Literal(value_to_literal(
8802/// params[n-1]))`. The dispatch downstream sees a `Statement`
8803/// indistinguishable from a simple-query parse, so the exec path
8804/// stays unchanged.
8805///
8806/// Errors fall into one shape: a `$N` references past the bound
8807/// `params.len()`. Out-of-range happens when the Bind didn't
8808/// supply enough values; pgwire surfaces this as a protocol error
8809/// to the client.
8810fn substitute_placeholders(stmt: &mut Statement, params: &[Value]) -> Result<(), EngineError> {
8811    match stmt {
8812        Statement::Select(s) => substitute_select(s, params)?,
8813        Statement::Insert(ins) => {
8814            for row in &mut ins.rows {
8815                for e in row {
8816                    substitute_expr(e, params)?;
8817                }
8818            }
8819        }
8820        Statement::Update(u) => {
8821            for (_, e) in &mut u.assignments {
8822                substitute_expr(e, params)?;
8823            }
8824            if let Some(w) = &mut u.where_ {
8825                substitute_expr(w, params)?;
8826            }
8827        }
8828        Statement::Delete(d) => {
8829            if let Some(w) = &mut d.where_ {
8830                substitute_expr(w, params)?;
8831            }
8832        }
8833        Statement::Explain(e) => substitute_select(&mut e.inner, params)?,
8834        // Other statements (CREATE / BEGIN / SHOW / …) have no
8835        // expression slots; no walk needed.
8836        _ => {}
8837    }
8838    Ok(())
8839}
8840
8841fn substitute_select(s: &mut SelectStatement, params: &[Value]) -> Result<(), EngineError> {
8842    for item in &mut s.items {
8843        if let SelectItem::Expr { expr, .. } = item {
8844            substitute_expr(expr, params)?;
8845        }
8846    }
8847    if let Some(w) = &mut s.where_ {
8848        substitute_expr(w, params)?;
8849    }
8850    if let Some(gs) = &mut s.group_by {
8851        for g in gs {
8852            substitute_expr(g, params)?;
8853        }
8854    }
8855    if let Some(h) = &mut s.having {
8856        substitute_expr(h, params)?;
8857    }
8858    for o in &mut s.order_by {
8859        substitute_expr(&mut o.expr, params)?;
8860    }
8861    for (_, peer) in &mut s.unions {
8862        substitute_select(peer, params)?;
8863    }
8864    // v7.9.24 — LIMIT $N / OFFSET $N placeholder resolution.
8865    // mailrs H2. After this pass each LIMIT/OFFSET that was a
8866    // Placeholder is rewritten to Literal so the existing
8867    // `LimitExpr::as_literal` path consumes a concrete u32.
8868    if let Some(le) = s.limit {
8869        s.limit = Some(resolve_limit_placeholder(le, params)?);
8870    }
8871    if let Some(le) = s.offset {
8872        s.offset = Some(resolve_limit_placeholder(le, params)?);
8873    }
8874    Ok(())
8875}
8876
8877fn resolve_limit_placeholder(
8878    le: spg_sql::ast::LimitExpr,
8879    params: &[Value],
8880) -> Result<spg_sql::ast::LimitExpr, EngineError> {
8881    use spg_sql::ast::LimitExpr;
8882    match le {
8883        LimitExpr::Literal(_) => Ok(le),
8884        LimitExpr::Placeholder(n) => {
8885            let idx = usize::from(n).saturating_sub(1);
8886            let v = params.get(idx).ok_or_else(|| {
8887                EngineError::Eval(EvalError::PlaceholderOutOfRange {
8888                    n,
8889                    bound: u16::try_from(params.len()).unwrap_or(u16::MAX),
8890                })
8891            })?;
8892            let int = match v {
8893                Value::SmallInt(x) => Some(i64::from(*x)),
8894                Value::Int(x) => Some(i64::from(*x)),
8895                Value::BigInt(x) => Some(*x),
8896                _ => None,
8897            }
8898            .ok_or_else(|| {
8899                EngineError::Unsupported(alloc::format!(
8900                    "LIMIT/OFFSET ${n} bound to non-integer {v:?}"
8901                ))
8902            })?;
8903            if int < 0 {
8904                return Err(EngineError::Unsupported(alloc::format!(
8905                    "LIMIT/OFFSET ${n} bound to negative value {int}"
8906                )));
8907            }
8908            let bounded = u32::try_from(int).map_err(|_| {
8909                EngineError::Unsupported(alloc::format!(
8910                    "LIMIT/OFFSET ${n} value {int} exceeds u32 range"
8911                ))
8912            })?;
8913            Ok(LimitExpr::Literal(bounded))
8914        }
8915    }
8916}
8917
8918fn substitute_expr(e: &mut Expr, params: &[Value]) -> Result<(), EngineError> {
8919    if let Expr::Placeholder(n) = e {
8920        let idx = usize::from(*n).saturating_sub(1);
8921        let v = params.get(idx).ok_or_else(|| {
8922            EngineError::Eval(EvalError::PlaceholderOutOfRange {
8923                n: *n,
8924                bound: u16::try_from(params.len()).unwrap_or(u16::MAX),
8925            })
8926        })?;
8927        *e = Expr::Literal(value_to_literal(v.clone()));
8928        return Ok(());
8929    }
8930    match e {
8931        Expr::Binary { lhs, rhs, .. } => {
8932            substitute_expr(lhs, params)?;
8933            substitute_expr(rhs, params)?;
8934        }
8935        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
8936            substitute_expr(expr, params)?;
8937        }
8938        Expr::FunctionCall { args, .. } => {
8939            for a in args {
8940                substitute_expr(a, params)?;
8941            }
8942        }
8943        Expr::Like { expr, pattern, .. } => {
8944            substitute_expr(expr, params)?;
8945            substitute_expr(pattern, params)?;
8946        }
8947        Expr::Extract { source, .. } => substitute_expr(source, params)?,
8948        Expr::ScalarSubquery(s) => substitute_select(s, params)?,
8949        Expr::Exists { subquery, .. } => substitute_select(subquery, params)?,
8950        Expr::InSubquery { expr, subquery, .. } => {
8951            substitute_expr(expr, params)?;
8952            substitute_select(subquery, params)?;
8953        }
8954        Expr::WindowFunction {
8955            args,
8956            partition_by,
8957            order_by,
8958            ..
8959        } => {
8960            for a in args {
8961                substitute_expr(a, params)?;
8962            }
8963            for p in partition_by {
8964                substitute_expr(p, params)?;
8965            }
8966            for (e, _) in order_by {
8967                substitute_expr(e, params)?;
8968            }
8969        }
8970        Expr::Literal(_) | Expr::Column(_) => {}
8971        // Already handled above.
8972        Expr::Placeholder(_) => unreachable!("Placeholder handled at top of fn"),
8973        Expr::Array(items) => {
8974            for elem in items {
8975                substitute_expr(elem, params)?;
8976            }
8977        }
8978        Expr::ArraySubscript { target, index } => {
8979            substitute_expr(target, params)?;
8980            substitute_expr(index, params)?;
8981        }
8982        Expr::AnyAll { expr, array, .. } => {
8983            substitute_expr(expr, params)?;
8984            substitute_expr(array, params)?;
8985        }
8986        Expr::Case {
8987            operand,
8988            branches,
8989            else_branch,
8990        } => {
8991            if let Some(o) = operand {
8992                substitute_expr(o, params)?;
8993            }
8994            for (w, t) in branches {
8995                substitute_expr(w, params)?;
8996                substitute_expr(t, params)?;
8997            }
8998            if let Some(e) = else_branch {
8999                substitute_expr(e, params)?;
9000            }
9001        }
9002    }
9003    Ok(())
9004}
9005
9006/// v6.1.1 — convert a runtime `Value` into the closest matching
9007/// `Literal` for the substitute walker. Lossless for the simple
9008/// scalars (Int / Float / Text / Bool); Numeric / Date / Timestamp
9009/// / Json / Interval render as their canonical text form so the
9010/// downstream coerce_value can re-parse against the target column
9011/// type. SQ8 / HalfVector cells are NOT expected as bind params;
9012/// pgwire's Bind decodes vector params to the f32 representation
9013/// before they reach this helper.
9014/// v6.2.0 — total ordering on `Value`s used by ANALYZE to sort a
9015/// column's non-NULL sample before histogram building. Cross-type
9016/// pairs (Int vs Float, Date vs Timestamp, …) compare via the
9017/// same widening the eval-side `compare` operator uses; everything
9018/// else (the genuinely-incompatible pairs) falls back to ordering
9019/// by canonical string form so the sort is still total + stable.
9020/// Vector / SQ8 / Half / Json / Numeric / Interval values reach
9021/// here only via the string-fallback path because vector columns
9022/// are filtered out upstream.
9023fn sort_values_for_histogram(a: &Value, b: &Value) -> core::cmp::Ordering {
9024    use core::cmp::Ordering;
9025    match (a, b) {
9026        (Value::SmallInt(a), Value::SmallInt(b)) => a.cmp(b),
9027        (Value::Int(a), Value::Int(b)) => a.cmp(b),
9028        (Value::BigInt(a), Value::BigInt(b)) => a.cmp(b),
9029        (Value::SmallInt(a), Value::Int(b)) => i32::from(*a).cmp(b),
9030        (Value::Int(a), Value::SmallInt(b)) => a.cmp(&i32::from(*b)),
9031        (Value::Int(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
9032        (Value::BigInt(a), Value::Int(b)) => a.cmp(&i64::from(*b)),
9033        (Value::SmallInt(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
9034        (Value::BigInt(a), Value::SmallInt(b)) => a.cmp(&i64::from(*b)),
9035        (Value::Float(a), Value::Float(b)) => a.partial_cmp(b).unwrap_or(Ordering::Equal),
9036        (Value::Text(a), Value::Text(b)) | (Value::Json(a), Value::Json(b)) => a.cmp(b),
9037        (Value::Bool(a), Value::Bool(b)) => a.cmp(b),
9038        (Value::Date(a), Value::Date(b)) => a.cmp(b),
9039        (Value::Timestamp(a), Value::Timestamp(b)) => a.cmp(b),
9040        // Mixed numeric/float — widen to f64 and compare.
9041        (Value::SmallInt(n), Value::Float(x)) => {
9042            (f64::from(*n)).partial_cmp(x).unwrap_or(Ordering::Equal)
9043        }
9044        (Value::Float(x), Value::SmallInt(n)) => {
9045            x.partial_cmp(&f64::from(*n)).unwrap_or(Ordering::Equal)
9046        }
9047        (Value::Int(n), Value::Float(x)) => {
9048            (f64::from(*n)).partial_cmp(x).unwrap_or(Ordering::Equal)
9049        }
9050        (Value::Float(x), Value::Int(n)) => {
9051            x.partial_cmp(&f64::from(*n)).unwrap_or(Ordering::Equal)
9052        }
9053        (Value::BigInt(n), Value::Float(x)) => {
9054            #[allow(clippy::cast_precision_loss)]
9055            let nf = *n as f64;
9056            nf.partial_cmp(x).unwrap_or(Ordering::Equal)
9057        }
9058        (Value::Float(x), Value::BigInt(n)) => {
9059            #[allow(clippy::cast_precision_loss)]
9060            let nf = *n as f64;
9061            x.partial_cmp(&nf).unwrap_or(Ordering::Equal)
9062        }
9063        // Cross-type fallback: lexicographic on canonical form.
9064        // Total + stable so the sort is well-defined.
9065        _ => canonical_value_repr(a).cmp(&canonical_value_repr(b)),
9066    }
9067}
9068
9069/// v6.2.0 — render the histogram bounds list as a `[v0, v1, ...]`
9070/// string for the `spg_statistic.histogram_bounds` column. Values
9071/// containing `,` or `[` / `]` are JSON-style escaped so the
9072/// rendering round-trips through a future parser; v6.2.0 only
9073/// uses the rendered form for human consumption, so the escaping
9074/// is conservative.
9075fn render_histogram_bounds(bounds: &[alloc::string::String]) -> alloc::string::String {
9076    let mut out = alloc::string::String::with_capacity(bounds.len() * 8 + 2);
9077    out.push('[');
9078    for (i, b) in bounds.iter().enumerate() {
9079        if i > 0 {
9080            out.push_str(", ");
9081        }
9082        let needs_quote = b.contains([',', '[', ']', '"']) || b.is_empty();
9083        if needs_quote {
9084            out.push('"');
9085            for ch in b.chars() {
9086                if ch == '"' || ch == '\\' {
9087                    out.push('\\');
9088                }
9089                out.push(ch);
9090            }
9091            out.push('"');
9092        } else {
9093            out.push_str(b);
9094        }
9095    }
9096    out.push(']');
9097    out
9098}
9099
9100/// v6.2.0 — canonical textual form of a `Value` for histogram
9101/// bound storage. Strings used by ANALYZE for sort + bound output.
9102/// INT / BIGINT → decimal; FLOAT → shortest-round-trip via
9103/// `{:?}`; TEXT pass-through; BOOL → `t` / `f`; DATE / TIMESTAMP →
9104/// the same form `format_date` / `format_timestamp` produce for
9105/// SQL Display. Vector / SQ8 / Half / Json / Numeric / Interval
9106/// reach this only via a non-Vector column (vector columns are
9107/// skipped upstream); they fall back to a Debug-derived form so
9108/// stats still serialise without crashing.
9109pub(crate) fn canonical_value_repr(v: &Value) -> alloc::string::String {
9110    match v {
9111        Value::Null => "NULL".to_string(),
9112        Value::SmallInt(n) => alloc::format!("{n}"),
9113        Value::Int(n) => alloc::format!("{n}"),
9114        Value::BigInt(n) => alloc::format!("{n}"),
9115        Value::Float(x) => alloc::format!("{x:?}"),
9116        Value::Text(s) | Value::Json(s) => s.clone(),
9117        Value::Bool(b) => if *b { "t" } else { "f" }.to_string(),
9118        Value::Date(d) => eval::format_date(*d),
9119        Value::Timestamp(t) => eval::format_timestamp(*t),
9120        Value::Interval { months, micros } => eval::format_interval(*months, *micros),
9121        Value::Numeric { scaled, scale } => eval::format_numeric(*scaled, *scale),
9122        Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => {
9123            // Unreachable in practice (vector columns are filtered
9124            // out before this). Defensive fallback so a future
9125            // vector-stats path doesn't crash.
9126            alloc::format!("{v:?}")
9127        }
9128        // v7.5.0 — Value is #[non_exhaustive] for downstream
9129        // forward-compat. Future variants fall through to Debug
9130        // form here (same shape as the vector fallback above).
9131        _ => alloc::format!("{v:?}"),
9132    }
9133}
9134
9135/// v6.2.0 — true for engine-managed catalog tables that the bare
9136/// `ANALYZE` (no target) should skip. v6.2.0 has no internal
9137/// tables yet (publications / subscriptions / users / statistics
9138/// all live as engine fields, not catalog tables), so this is a
9139/// reserved future-proofing hook — every existing user table is
9140/// analysed.
9141const fn is_internal_table_name(_name: &str) -> bool {
9142    false
9143}
9144
9145fn value_to_literal(v: Value) -> Literal {
9146    match v {
9147        Value::Null => Literal::Null,
9148        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
9149        Value::Int(n) => Literal::Integer(i64::from(n)),
9150        Value::BigInt(n) => Literal::Integer(n),
9151        Value::Float(x) => Literal::Float(x),
9152        Value::Text(s) | Value::Json(s) => Literal::String(s),
9153        Value::Bool(b) => Literal::Bool(b),
9154        Value::Vector(v) => Literal::Vector(v),
9155        Value::Numeric { scaled, scale } => Literal::String(eval::format_numeric(scaled, scale)),
9156        Value::Date(d) => Literal::String(eval::format_date(d)),
9157        Value::Timestamp(t) => Literal::String(eval::format_timestamp(t)),
9158        Value::Interval { months, micros } => Literal::Interval {
9159            months,
9160            micros,
9161            text: eval::format_interval(months, micros),
9162        },
9163        // SQ8 / halfvec cells dequantise to f32 before reaching the
9164        // substitute walker; pgwire's Bind path handles that.
9165        Value::Sq8Vector(q) => Literal::Vector(spg_storage::quantize::dequantize(&q)),
9166        Value::HalfVector(h) => Literal::Vector(h.to_f32_vec()),
9167        // v7.5.0 — Value is #[non_exhaustive]; future variants
9168        // render as Debug-form String literal until explicit
9169        // mapping is added.
9170        v => Literal::String(alloc::format!("{v:?}")),
9171    }
9172}
9173
9174fn rewrite_clock_calls(stmt: &mut Statement, now_micros: Option<i64>) {
9175    let Some(now) = now_micros else {
9176        return;
9177    };
9178    match stmt {
9179        Statement::Select(s) => rewrite_select_clock(s, now),
9180        Statement::Insert(ins) => {
9181            for row in &mut ins.rows {
9182                for e in row {
9183                    rewrite_expr_clock(e, now);
9184                }
9185            }
9186        }
9187        _ => {}
9188    }
9189}
9190
9191fn rewrite_select_clock(s: &mut SelectStatement, now: i64) {
9192    for item in &mut s.items {
9193        if let SelectItem::Expr { expr, .. } = item {
9194            rewrite_expr_clock(expr, now);
9195        }
9196    }
9197    if let Some(w) = &mut s.where_ {
9198        rewrite_expr_clock(w, now);
9199    }
9200    if let Some(gs) = &mut s.group_by {
9201        for g in gs {
9202            rewrite_expr_clock(g, now);
9203        }
9204    }
9205    if let Some(h) = &mut s.having {
9206        rewrite_expr_clock(h, now);
9207    }
9208    for o in &mut s.order_by {
9209        rewrite_expr_clock(&mut o.expr, now);
9210    }
9211    for (_, peer) in &mut s.unions {
9212        rewrite_select_clock(peer, now);
9213    }
9214}
9215
9216/// v3.0.3 hot path: every recursion lands in exactly one `match` arm.
9217/// Literal / Column-with-qualifier (the dominant cases on a typical
9218/// AST) take a single pattern dispatch and exit. The clock-rewrite
9219/// targets (zero-arg `NOW` / `CURRENT_TIMESTAMP` / `CURRENT_DATE`
9220/// functions, and bare `CURRENT_TIMESTAMP` / `CURRENT_DATE` column
9221/// refs) sit on their own arms with match guards so the fall-through
9222/// to the recursive arms is unambiguous.
9223fn rewrite_expr_clock(e: &mut Expr, now: i64) {
9224    // Fast-path test on the no-recursion shapes first. We can't fold
9225    // them into the big match below because they need to *replace* `e`
9226    // outright; the recursive arms below match on its sub-fields.
9227    if let Some(replacement) = clock_replacement_for(e, now) {
9228        *e = replacement;
9229        return;
9230    }
9231    match e {
9232        Expr::Binary { lhs, rhs, .. } => {
9233            rewrite_expr_clock(lhs, now);
9234            rewrite_expr_clock(rhs, now);
9235        }
9236        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
9237            rewrite_expr_clock(expr, now);
9238        }
9239        Expr::FunctionCall { args, .. } => {
9240            for a in args {
9241                rewrite_expr_clock(a, now);
9242            }
9243        }
9244        Expr::Like { expr, pattern, .. } => {
9245            rewrite_expr_clock(expr, now);
9246            rewrite_expr_clock(pattern, now);
9247        }
9248        Expr::Extract { source, .. } => rewrite_expr_clock(source, now),
9249        // v4.10 subquery nodes — recurse into the inner SELECT's
9250        // expression slots so e.g. SELECT NOW() in a scalar
9251        // subquery picks up the same instant as the outer query.
9252        Expr::ScalarSubquery(s) => rewrite_select_clock(s, now),
9253        Expr::Exists { subquery, .. } => rewrite_select_clock(subquery, now),
9254        Expr::InSubquery { expr, subquery, .. } => {
9255            rewrite_expr_clock(expr, now);
9256            rewrite_select_clock(subquery, now);
9257        }
9258        // v4.12 window functions — args + PARTITION BY + ORDER BY
9259        // may all reference clock literals.
9260        Expr::WindowFunction {
9261            args,
9262            partition_by,
9263            order_by,
9264            ..
9265        } => {
9266            for a in args {
9267                rewrite_expr_clock(a, now);
9268            }
9269            for p in partition_by {
9270                rewrite_expr_clock(p, now);
9271            }
9272            for (e, _) in order_by {
9273                rewrite_expr_clock(e, now);
9274            }
9275        }
9276        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
9277        Expr::Array(items) => {
9278            for elem in items {
9279                rewrite_expr_clock(elem, now);
9280            }
9281        }
9282        Expr::ArraySubscript { target, index } => {
9283            rewrite_expr_clock(target, now);
9284            rewrite_expr_clock(index, now);
9285        }
9286        Expr::AnyAll { expr, array, .. } => {
9287            rewrite_expr_clock(expr, now);
9288            rewrite_expr_clock(array, now);
9289        }
9290        Expr::Case {
9291            operand,
9292            branches,
9293            else_branch,
9294        } => {
9295            if let Some(o) = operand {
9296                rewrite_expr_clock(o, now);
9297            }
9298            for (w, t) in branches {
9299                rewrite_expr_clock(w, now);
9300                rewrite_expr_clock(t, now);
9301            }
9302            if let Some(e) = else_branch {
9303                rewrite_expr_clock(e, now);
9304            }
9305        }
9306    }
9307}
9308
9309/// Returns `Some(Expr)` when `e` is one of the clock-call shapes that
9310/// must be rewritten; otherwise `None` so the caller falls through to
9311/// the recursive walk. Identifies both function-call forms (`NOW()` /
9312/// `CURRENT_TIMESTAMP()` / `CURRENT_DATE()`) and bare-identifier forms
9313/// (`CURRENT_TIMESTAMP` / `CURRENT_DATE` as unqualified column refs,
9314/// which is how PG accepts them without parens).
9315fn clock_replacement_for(e: &Expr, now: i64) -> Option<Expr> {
9316    let (kind, name) = match e {
9317        Expr::FunctionCall { name, args } if args.is_empty() => (ClockSite::Fn, name.as_str()),
9318        Expr::Column(c) if c.qualifier.is_none() => (ClockSite::BareIdent, c.name.as_str()),
9319        _ => return None,
9320    };
9321    // ASCII case-insensitive name match. Limited to the three keywords
9322    // that actually need rewriting.
9323    let matched = match name.len() {
9324        3 if kind == ClockSite::Fn && name.eq_ignore_ascii_case("now") => Some(true),
9325        12 if name.eq_ignore_ascii_case("current_date") => Some(false),
9326        17 if name.eq_ignore_ascii_case("current_timestamp") => Some(true),
9327        _ => None,
9328    };
9329    let is_timestamp = matched?;
9330    let payload = if is_timestamp {
9331        now
9332    } else {
9333        now.div_euclid(86_400_000_000)
9334    };
9335    let target = if is_timestamp {
9336        spg_sql::ast::CastTarget::Timestamp
9337    } else {
9338        spg_sql::ast::CastTarget::Date
9339    };
9340    Some(Expr::Cast {
9341        expr: alloc::boxed::Box::new(Expr::Literal(spg_sql::ast::Literal::Integer(payload))),
9342        target,
9343    })
9344}
9345
9346#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9347enum ClockSite {
9348    Fn,
9349    BareIdent,
9350}
9351
9352/// `ORDER BY <integer>` references the N-th SELECT item (1-based).
9353/// Swap the integer literal for the matching item's expression so the
9354/// executor doesn't need a special-case branch. Recurses into UNION
9355/// peers because each peer keeps its own SELECT list.
9356/// v6.4.1 — expand `GROUP BY ALL` to every non-aggregate SELECT-list
9357/// item. Mirrors DuckDB / PG 19 semantics. Wildcards (`SELECT * …`)
9358/// are NOT expanded by GROUP BY ALL (PG 19 leaves the wildcard intact
9359/// and groups by whatever explicit non-aggregates remain — none in
9360/// the wildcard-only case, which still works for non-aggregate
9361/// queries).
9362fn expand_group_by_all(s: &mut SelectStatement) {
9363    if !s.group_by_all {
9364        for (_, peer) in &mut s.unions {
9365            expand_group_by_all(peer);
9366        }
9367        return;
9368    }
9369    let mut groups: Vec<Expr> = Vec::new();
9370    for item in &s.items {
9371        if let SelectItem::Expr { expr, .. } = item
9372            && !aggregate::contains_aggregate(expr)
9373        {
9374            groups.push(expr.clone());
9375        }
9376    }
9377    s.group_by = Some(groups);
9378    s.group_by_all = false;
9379    for (_, peer) in &mut s.unions {
9380        expand_group_by_all(peer);
9381    }
9382}
9383
9384fn resolve_order_by_position(s: &mut SelectStatement) {
9385    // v6.4.0 — iterate every ORDER BY key. Position references
9386    // (`ORDER BY 2`) bind to the 1-based projection index;
9387    // identifier references that match a SELECT-list alias bind to
9388    // the projected expression (Step 4 of L3a).
9389    for order in &mut s.order_by {
9390        match &order.expr {
9391            Expr::Literal(Literal::Integer(n)) if *n >= 1 => {
9392                if let Ok(idx_one_based) = usize::try_from(*n) {
9393                    let idx = idx_one_based - 1;
9394                    if idx < s.items.len()
9395                        && let SelectItem::Expr { expr, .. } = &s.items[idx]
9396                    {
9397                        order.expr = expr.clone();
9398                    }
9399                }
9400            }
9401            Expr::Column(c) if c.qualifier.is_none() => {
9402                // Alias-in-ORDER-BY lookup.
9403                for item in &s.items {
9404                    if let SelectItem::Expr {
9405                        expr,
9406                        alias: Some(a),
9407                    } = item
9408                        && a == &c.name
9409                    {
9410                        order.expr = expr.clone();
9411                        break;
9412                    }
9413                }
9414            }
9415            _ => {}
9416        }
9417    }
9418    for (_, peer) in &mut s.unions {
9419        resolve_order_by_position(peer);
9420    }
9421}
9422
9423/// Sort `tagged` by `f64` key, reversing the comparator under DESC.
9424/// Used by the UNION ORDER BY path; per-block paths inline the same
9425/// comparator because they already hold `&OrderBy` directly.
9426/// v3.1.1: partial-sort helper. When `keep` (= offset + limit) is
9427/// strictly less than `tagged.len()`, run `select_nth_unstable_by` to
9428/// partition the prefix in O(n), then sort just that prefix in O(k
9429/// log k). Total O(n + k log k), vs O(n log n) for a full sort. The
9430/// caller decides what `keep` is; passing `None` (no LIMIT) keeps the
9431/// full-sort behaviour.
9432///
9433/// `tagged` holds `(Option<f64>, Row)` (the SELECT path) — `None` keys
9434/// sort last in ascending order, mirroring NULL-sorts-last in SQL.
9435fn partial_sort_tagged(tagged: &mut Vec<(Vec<f64>, Row)>, keep: Option<usize>, descs: &[bool]) {
9436    let cmp = |a: &(Vec<f64>, Row), b: &(Vec<f64>, Row)| cmp_multi_key(&a.0, &b.0, descs);
9437    match keep {
9438        Some(k) if k < tagged.len() && k > 0 => {
9439            let pivot = k - 1;
9440            tagged.select_nth_unstable_by(pivot, cmp);
9441            tagged[..k].sort_by(cmp);
9442            tagged.truncate(k);
9443        }
9444        _ => {
9445            tagged.sort_by(cmp);
9446        }
9447    }
9448}
9449
9450fn sort_by_keys(tagged: &mut [(Vec<f64>, Row)], descs: &[bool]) {
9451    tagged.sort_by(|a, b| cmp_multi_key(&a.0, &b.0, descs));
9452}
9453
9454/// v6.4.0 — multi-key ORDER BY comparator. Each key's per-key DESC
9455/// flag is honored independently. NULL is encoded as `f64::INFINITY`
9456/// so it sorts last in ASC and first in DESC (matches PG default).
9457fn cmp_multi_key(a: &[f64], b: &[f64], descs: &[bool]) -> core::cmp::Ordering {
9458    use core::cmp::Ordering;
9459    for (i, (ka, kb)) in a.iter().zip(b.iter()).enumerate() {
9460        let ord = ka.partial_cmp(kb).unwrap_or(Ordering::Equal);
9461        let ord = if descs.get(i).copied().unwrap_or(false) {
9462            ord.reverse()
9463        } else {
9464            ord
9465        };
9466        if ord != Ordering::Equal {
9467            return ord;
9468        }
9469    }
9470    Ordering::Equal
9471}
9472
9473/// v6.4.0 — eval every ORDER BY expression for a row and pack the
9474/// resulting keys into a `Vec<f64>`. NULL → `f64::INFINITY`.
9475fn build_order_keys(
9476    order_by: &[OrderBy],
9477    row: &Row,
9478    ctx: &EvalContext,
9479) -> Result<Vec<f64>, EngineError> {
9480    let mut keys = Vec::with_capacity(order_by.len());
9481    for o in order_by {
9482        let v = eval::eval_expr(&o.expr, row, ctx)?;
9483        keys.push(value_to_order_key(&v)?);
9484    }
9485    Ok(keys)
9486}
9487
9488/// Drop the first `offset` rows then truncate to `limit`. PG / `MySQL`
9489/// agree: OFFSET applies *after* ORDER BY but *before* LIMIT (so
9490/// `LIMIT 10 OFFSET 5` keeps rows 6..=15).
9491fn apply_offset_and_limit(rows: &mut Vec<Row>, offset: Option<u32>, limit: Option<u32>) {
9492    if let Some(off) = offset {
9493        let off = off as usize;
9494        if off >= rows.len() {
9495            rows.clear();
9496        } else {
9497            rows.drain(..off);
9498        }
9499    }
9500    if let Some(n) = limit {
9501        rows.truncate(n as usize);
9502    }
9503}
9504
9505/// v7.6.1 — resolve a parser-level `ForeignKeyConstraint` (column
9506/// names + parent table name) into the storage-layer shape (column
9507/// indices + same parent table). Validates everything the engine
9508/// needs to know about the FK at CREATE TABLE time:
9509///
9510///   - parent table exists (catalog lookup, unless self-referencing)
9511///   - parent columns exist on the parent table
9512///   - parent column list matches the local arity (defaults to the
9513///     parent's primary index column when omitted)
9514///   - parent columns are covered by a `BTree` UNIQUE-class index
9515///     (SPG's stand-in for `PRIMARY KEY`/`UNIQUE`) — required so
9516///     the v7.6.2 INSERT path can do an O(log n) parent lookup
9517///   - local columns exist on the table being created
9518fn resolve_foreign_key(
9519    local_table_name: &str,
9520    local_cols: &[ColumnSchema],
9521    fk: spg_sql::ast::ForeignKeyConstraint,
9522    catalog: &Catalog,
9523) -> Result<spg_storage::ForeignKeyConstraint, EngineError> {
9524    // Resolve local columns.
9525    let mut local_columns = Vec::with_capacity(fk.columns.len());
9526    for name in &fk.columns {
9527        let pos = local_cols
9528            .iter()
9529            .position(|c| c.name == *name)
9530            .ok_or_else(|| {
9531                EngineError::Unsupported(alloc::format!(
9532                    "FOREIGN KEY references unknown local column {name:?}"
9533                ))
9534            })?;
9535        local_columns.push(pos);
9536    }
9537    // Self-referencing FK: parent table is the one we're creating.
9538    // The parent column resolution uses the local column list since
9539    // the catalog doesn't have this table yet.
9540    let is_self_ref = fk.parent_table == local_table_name;
9541    let (parent_cols_for_lookup, parent_table_str): (&[ColumnSchema], &str) = if is_self_ref {
9542        (local_cols, local_table_name)
9543    } else {
9544        let parent_table = catalog.get(&fk.parent_table).ok_or_else(|| {
9545            EngineError::Storage(StorageError::TableNotFound {
9546                name: fk.parent_table.clone(),
9547            })
9548        })?;
9549        (
9550            parent_table.schema().columns.as_slice(),
9551            fk.parent_table.as_str(),
9552        )
9553    };
9554    // Resolve parent column names → positions. If the FK omitted the
9555    // parent column list, fall back to the parent's primary index
9556    // column (single-column only — composite default is rejected
9557    // because there's no unambiguous "PK" in SPG's index list).
9558    let parent_columns: Vec<usize> = if fk.parent_columns.is_empty() {
9559        if fk.columns.len() != 1 {
9560            return Err(EngineError::Unsupported(
9561                "composite FOREIGN KEY without explicit parent column list is not supported \
9562                 — list the parent columns explicitly"
9563                    .into(),
9564            ));
9565        }
9566        // Find a single BTree index on the parent and use its column.
9567        let pos = pick_pk_index_column(catalog, parent_table_str, is_self_ref, local_cols)
9568            .ok_or_else(|| {
9569                EngineError::Unsupported(alloc::format!(
9570                    "parent table {parent_table_str:?} has no PRIMARY-key / UNIQUE BTree index \
9571                     to default the FOREIGN KEY against"
9572                ))
9573            })?;
9574        alloc::vec![pos]
9575    } else {
9576        let mut out = Vec::with_capacity(fk.parent_columns.len());
9577        for name in &fk.parent_columns {
9578            let pos = parent_cols_for_lookup
9579                .iter()
9580                .position(|c| c.name == *name)
9581                .ok_or_else(|| {
9582                    EngineError::Unsupported(alloc::format!(
9583                        "FOREIGN KEY references unknown parent column \
9584                         {name:?} on table {parent_table_str:?}"
9585                    ))
9586                })?;
9587            out.push(pos);
9588        }
9589        out
9590    };
9591    if parent_columns.len() != local_columns.len() {
9592        return Err(EngineError::Unsupported(alloc::format!(
9593            "FOREIGN KEY arity mismatch: {} local columns vs {} parent columns",
9594            local_columns.len(),
9595            parent_columns.len()
9596        )));
9597    }
9598    // For non-self-referencing FKs, verify the parent column set is
9599    // covered by a BTree index. SPG doesn't have a `PRIMARY KEY`
9600    // declaration; the convention is "the parent column for FK
9601    // purposes must have a BTree index" — which the user creates via
9602    // `CREATE INDEX ... USING btree (col)` (the default). We accept
9603    // any single-column BTree index that covers a parent column;
9604    // composite parent column lists require an index whose `column_position`
9605    // matches the first parent column (multi-column BTree indices
9606    // are not in the v7.x roadmap).
9607    if !is_self_ref {
9608        let parent_table = catalog.get(&fk.parent_table).expect("checked above");
9609        let primary_parent_col = parent_columns[0];
9610        let has_btree = parent_table
9611            .schema()
9612            .columns
9613            .get(primary_parent_col)
9614            .is_some()
9615            && parent_table.indices().iter().any(|idx| {
9616                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
9617                    && idx.column_position == primary_parent_col
9618                    && idx.partial_predicate.is_none()
9619            });
9620        if !has_btree {
9621            return Err(EngineError::Unsupported(alloc::format!(
9622                "FOREIGN KEY parent column on {:?} is not covered by an unconditional BTree \
9623                 index — create one with `CREATE INDEX ... ON {} ({})` first",
9624                parent_table_str,
9625                parent_table_str,
9626                parent_table.schema().columns[primary_parent_col].name,
9627            )));
9628        }
9629    }
9630    let on_delete = fk_action_sql_to_storage(fk.on_delete);
9631    let on_update = fk_action_sql_to_storage(fk.on_update);
9632    Ok(spg_storage::ForeignKeyConstraint {
9633        name: fk.name,
9634        local_columns,
9635        parent_table: fk.parent_table,
9636        parent_columns,
9637        on_delete,
9638        on_update,
9639    })
9640}
9641
9642/// v7.6.1 — pick a sentinel "primary key" column from the parent
9643/// table when the FK didn't name parent columns. Picks the first
9644/// single-column unconditional BTree index — that's the closest
9645/// thing SPG has to a PRIMARY KEY today. Self-referencing FKs use
9646/// `local_cols` as the column source.
9647fn pick_pk_index_column(
9648    catalog: &Catalog,
9649    parent_name: &str,
9650    is_self_ref: bool,
9651    local_cols: &[ColumnSchema],
9652) -> Option<usize> {
9653    if is_self_ref {
9654        // Self-ref FK omitted parent columns: pick column 0 by
9655        // convention (no catalog entry yet). Engine will widen this
9656        // when v7.6.7 lands; v7.6.1 only handles the explicit form.
9657        let _ = local_cols;
9658        return Some(0);
9659    }
9660    let parent = catalog.get(parent_name)?;
9661    parent.indices().iter().find_map(|idx| {
9662        if matches!(idx.kind, spg_storage::IndexKind::BTree(_))
9663            && idx.partial_predicate.is_none()
9664            && idx.included_columns.is_empty()
9665            && idx.expression.is_none()
9666        {
9667            Some(idx.column_position)
9668        } else {
9669            None
9670        }
9671    })
9672}
9673
9674/// v7.9.8 / v7.9.10 — resolve the column positions that
9675/// identify a conflict for ON CONFLICT. Returns a Vec of
9676/// column positions (1 element for single-column form, N for
9677/// composite). When the user wrote bare `ON CONFLICT DO …`,
9678/// falls back to the table's first unconditional BTree index
9679/// (always single-column today).
9680fn resolve_on_conflict_columns(
9681    catalog: &Catalog,
9682    table_name: &str,
9683    target: &[String],
9684) -> Result<Vec<usize>, EngineError> {
9685    let table = catalog.get(table_name).ok_or_else(|| {
9686        EngineError::Storage(StorageError::TableNotFound {
9687            name: table_name.into(),
9688        })
9689    })?;
9690    if target.is_empty() {
9691        // v7.13.2 — mailrs round-6 S5 follow-up. Composite UNIQUE
9692        // constraints carry a multi-column tuple; the prior code
9693        // path picked only the leading column of the first BTree
9694        // index, which caused `ON CONFLICT DO NOTHING` to dedup
9695        // by leading column alone (3 rows with same group_id but
9696        // different permission collapsed to 1). PG semantics use
9697        // the full tuple. Prefer a UniquenessConstraint's full
9698        // column list when one exists; fall back to the leading
9699        // BTree column for legacy single-column UNIQUE.
9700        if let Some(uc) = table.schema().uniqueness_constraints.first() {
9701            return Ok(uc.columns.clone());
9702        }
9703        let pos = table
9704            .indices()
9705            .iter()
9706            .find_map(|idx| {
9707                if matches!(idx.kind, spg_storage::IndexKind::BTree(_))
9708                    && idx.partial_predicate.is_none()
9709                    && idx.included_columns.is_empty()
9710                    && idx.expression.is_none()
9711                {
9712                    Some(idx.column_position)
9713                } else {
9714                    None
9715                }
9716            })
9717            .ok_or_else(|| {
9718                EngineError::Unsupported(alloc::format!(
9719                    "ON CONFLICT without target requires a UNIQUE BTree index on {table_name:?}"
9720                ))
9721            })?;
9722        return Ok(alloc::vec![pos]);
9723    }
9724    let mut out = Vec::with_capacity(target.len());
9725    for name in target {
9726        let pos = table
9727            .schema()
9728            .columns
9729            .iter()
9730            .position(|c| c.name == *name)
9731            .ok_or_else(|| {
9732                EngineError::Unsupported(alloc::format!(
9733                    "ON CONFLICT target column {name:?} not found on {table_name:?}"
9734                ))
9735            })?;
9736        out.push(pos);
9737    }
9738    Ok(out)
9739}
9740
9741/// v7.9.8 — check whether the BTree index on `column_pos` of
9742/// `table_name` already has a row with this key.
9743fn on_conflict_key_exists(
9744    catalog: &Catalog,
9745    table_name: &str,
9746    column_pos: usize,
9747    key: &Value,
9748) -> bool {
9749    let Some(table) = catalog.get(table_name) else {
9750        return false;
9751    };
9752    let Some(idx_key) = spg_storage::IndexKey::from_value(key) else {
9753        return false;
9754    };
9755    table.indices().iter().any(|idx| {
9756        matches!(idx.kind, spg_storage::IndexKind::BTree(_))
9757            && idx.column_position == column_pos
9758            && idx.partial_predicate.is_none()
9759            && !idx.lookup_eq(&idx_key).is_empty()
9760    })
9761}
9762
9763/// v7.9.9 / v7.9.10 — look up an existing row's position by
9764/// matching all `column_positions` against the incoming `key`
9765/// tuple. Single-column shape (one column) reduces to the
9766/// canonical PK lookup; composite shapes scan linearly until
9767/// every position matches.
9768fn lookup_row_position_by_keys(
9769    catalog: &Catalog,
9770    table_name: &str,
9771    column_positions: &[usize],
9772    key: &[&Value],
9773) -> Option<usize> {
9774    let table = catalog.get(table_name)?;
9775    table.rows().iter().position(|r| {
9776        column_positions
9777            .iter()
9778            .enumerate()
9779            .all(|(i, &pos)| r.values.get(pos) == Some(key[i]))
9780    })
9781}
9782
9783/// v7.9.10 — does the table already contain a row whose
9784/// `column_positions` tuple equals `key`? Single-column shape
9785/// uses the existing BTree fast path; composite shapes fall
9786/// back to a row scan.
9787fn on_conflict_keys_exist(
9788    catalog: &Catalog,
9789    table_name: &str,
9790    column_positions: &[usize],
9791    key: &[&Value],
9792) -> bool {
9793    if column_positions.len() == 1 {
9794        return on_conflict_key_exists(catalog, table_name, column_positions[0], key[0]);
9795    }
9796    let Some(table) = catalog.get(table_name) else {
9797        return false;
9798    };
9799    table.rows().iter().any(|r| {
9800        column_positions
9801            .iter()
9802            .enumerate()
9803            .all(|(i, &pos)| r.values.get(pos) == Some(key[i]))
9804    })
9805}
9806
9807/// v7.9.9 — apply ON CONFLICT DO UPDATE SET assignments to an
9808/// existing row.
9809///
9810/// `incoming` is the rejected INSERT row (used to resolve
9811/// `EXCLUDED.col` references in the assignment exprs);
9812/// `target_pos` is the position of the existing row in the table.
9813/// Each assignment substitutes `EXCLUDED.col` with the matching
9814/// incoming value, evaluates the resulting expression against
9815/// the existing row, and writes the new value into the
9816/// corresponding column of the returned `Vec<Value>`. If
9817/// `where_` evaluates falsy, returns Ok(None) — PG behaviour:
9818/// the conflicting row is silently kept unchanged.
9819fn apply_on_conflict_assignments(
9820    catalog: &Catalog,
9821    table_name: &str,
9822    target_pos: usize,
9823    incoming: &[Value],
9824    assignments: &[(String, Expr)],
9825    where_: Option<&Expr>,
9826) -> Result<Option<Vec<Value>>, EngineError> {
9827    let table = catalog.get(table_name).ok_or_else(|| {
9828        EngineError::Storage(StorageError::TableNotFound {
9829            name: table_name.into(),
9830        })
9831    })?;
9832    let schema_cols = table.schema().columns.clone();
9833    let existing = table
9834        .rows()
9835        .get(target_pos)
9836        .ok_or_else(|| {
9837            EngineError::Unsupported(alloc::format!(
9838                "ON CONFLICT DO UPDATE: row position {target_pos} out of bounds on {table_name:?}"
9839            ))
9840        })?
9841        .clone();
9842    let ctx = eval::EvalContext::new(&schema_cols, Some(table_name));
9843    // Optional WHERE filter on the conflict row.
9844    if let Some(w) = where_ {
9845        let pred = w.clone();
9846        let pred = substitute_excluded_refs(pred, &schema_cols, incoming);
9847        let v = eval::eval_expr(&pred, &existing, &ctx)?;
9848        if !matches!(v, Value::Bool(true)) {
9849            return Ok(None);
9850        }
9851    }
9852    let mut new_values = existing.values.clone();
9853    for (col_name, expr) in assignments {
9854        let target_idx = schema_cols
9855            .iter()
9856            .position(|c| c.name == *col_name)
9857            .ok_or_else(|| {
9858                EngineError::Eval(EvalError::ColumnNotFound {
9859                    name: col_name.clone(),
9860                })
9861            })?;
9862        let sub = substitute_excluded_refs(expr.clone(), &schema_cols, incoming);
9863        let v = eval::eval_expr(&sub, &existing, &ctx)?;
9864        new_values[target_idx] = coerce_value(v, schema_cols[target_idx].ty, col_name, target_idx)?;
9865    }
9866    Ok(Some(new_values))
9867}
9868
9869/// v7.9.9 — walk an `Expr` tree replacing any `Column { qualifier:
9870/// "EXCLUDED", name }` reference with a `Literal` of the matching
9871/// value from the incoming-row vec. Resolution against the
9872/// child-table column list (by name).
9873fn substitute_excluded_refs(expr: Expr, schema_cols: &[ColumnSchema], incoming: &[Value]) -> Expr {
9874    use spg_sql::ast::ColumnName;
9875    match expr {
9876        Expr::Column(ColumnName { qualifier, name })
9877            if qualifier
9878                .as_deref()
9879                .is_some_and(|q| q.eq_ignore_ascii_case("excluded")) =>
9880        {
9881            let pos = schema_cols.iter().position(|c| c.name == name);
9882            match pos {
9883                Some(p) => {
9884                    let v = incoming.get(p).cloned().unwrap_or(Value::Null);
9885                    value_to_literal_expr(v)
9886                        .unwrap_or_else(|_| Expr::Literal(spg_sql::ast::Literal::Null))
9887                }
9888                None => Expr::Column(ColumnName { qualifier, name }),
9889            }
9890        }
9891        Expr::Binary { op, lhs, rhs } => Expr::Binary {
9892            op,
9893            lhs: Box::new(substitute_excluded_refs(*lhs, schema_cols, incoming)),
9894            rhs: Box::new(substitute_excluded_refs(*rhs, schema_cols, incoming)),
9895        },
9896        Expr::Unary { op, expr } => Expr::Unary {
9897            op,
9898            expr: Box::new(substitute_excluded_refs(*expr, schema_cols, incoming)),
9899        },
9900        Expr::FunctionCall { name, args } => Expr::FunctionCall {
9901            name,
9902            args: args
9903                .into_iter()
9904                .map(|a| substitute_excluded_refs(a, schema_cols, incoming))
9905                .collect(),
9906        },
9907        other => other,
9908    }
9909}
9910
9911/// v7.6.2 / v7.6.7 — INSERT-side FK enforcement. For every row
9912/// about to be inserted into `child_table`, every FK declared on
9913/// that table is checked: the row's FK columns must either be
9914/// NULL (SQL spec skip) or match an existing parent row via the
9915/// parent's BTree PK / UNIQUE index.
9916///
9917/// Returns `EngineError::Unsupported` with a `FOREIGN KEY violation`
9918/// payload on first failure.
9919///
9920/// **Self-referencing FKs (v7.6.7 widening):** when `fk.parent_table
9921/// == child_table`, the parent rows visible to this check are
9922///  (a) rows already committed to the table, plus
9923///  (b) earlier rows from the *same* `rows` batch.
9924/// This makes `INSERT INTO tree VALUES (1, NULL), (2, 1), (3, 2)`
9925/// work in a single statement — common pattern for bulk-loading
9926/// hierarchies.
9927/// v7.9.19 — enforce table-level UNIQUE / PRIMARY KEY tuple
9928/// constraints at INSERT time. For each constraint declared on
9929/// the target table, check that no existing row + no earlier row
9930/// in the same batch has the same full-column tuple. NULL in
9931/// any column lifts the row out of the check (SQL spec: NULL
9932/// ≠ NULL for uniqueness). mailrs G1 + G6.
9933fn enforce_uniqueness_inserts(
9934    catalog: &Catalog,
9935    child_table: &str,
9936    constraints: &[spg_storage::UniquenessConstraint],
9937    rows: &[Vec<Value>],
9938) -> Result<(), EngineError> {
9939    if constraints.is_empty() {
9940        return Ok(());
9941    }
9942    let table = catalog.get(child_table).ok_or_else(|| {
9943        EngineError::Storage(StorageError::TableNotFound {
9944            name: child_table.into(),
9945        })
9946    })?;
9947    for uc in constraints {
9948        for (batch_idx, row_values) in rows.iter().enumerate() {
9949            let key: Vec<&Value> = uc.columns.iter().map(|&i| &row_values[i]).collect();
9950            let has_null = key.iter().any(|v| matches!(v, Value::Null));
9951            // v7.13.0 — `NULLS NOT DISTINCT` (mailrs round-5 G10,
9952            // PG 15+): two rows whose constrained columns are all
9953            // NULL collide. SQL-standard `NULLS DISTINCT` lets any
9954            // NULL skip the check.
9955            if has_null && !uc.nulls_not_distinct {
9956                continue;
9957            }
9958            // Table-side collision: scan existing rows.
9959            let collides_in_table = table.rows().iter().any(|prow| {
9960                uc.columns
9961                    .iter()
9962                    .enumerate()
9963                    .all(|(i, &p)| prow.values.get(p) == Some(key[i]))
9964            });
9965            // Batch-side collision: earlier rows in the same INSERT.
9966            let collides_in_batch = rows[..batch_idx].iter().any(|earlier| {
9967                uc.columns
9968                    .iter()
9969                    .enumerate()
9970                    .all(|(i, &p)| earlier.get(p) == Some(key[i]))
9971            });
9972            if collides_in_table || collides_in_batch {
9973                let kind = if uc.is_primary_key {
9974                    "PRIMARY KEY"
9975                } else {
9976                    "UNIQUE"
9977                };
9978                let col_names: Vec<String> = uc
9979                    .columns
9980                    .iter()
9981                    .map(|&i| table.schema().columns[i].name.clone())
9982                    .collect();
9983                return Err(EngineError::Unsupported(alloc::format!(
9984                    "{kind} violation on {child_table:?} columns {col_names:?}: \
9985                     row #{batch_idx} duplicates an existing key"
9986                )));
9987            }
9988        }
9989    }
9990    Ok(())
9991}
9992
9993/// v7.9.29 — `true` iff `v` counts as a truthy SQL value for a
9994/// WHERE-style predicate. NULL → false (three-valued logic
9995/// collapses to "skip this row" for index inclusion). Numeric
9996/// non-zero, BIGINT non-zero, TINYINT non-zero, BOOLEAN true → true.
9997/// Everything else (strings, vectors, JSON, …) is not a valid
9998/// predicate result and surfaces as `false` so a malformed
9999/// predicate degrades to "row not in index" rather than panicking.
10000fn predicate_truthy(v: &spg_storage::Value) -> bool {
10001    use spg_storage::Value as V;
10002    match v {
10003        V::Bool(b) => *b,
10004        V::Int(n) => *n != 0,
10005        V::BigInt(n) => *n != 0,
10006        V::SmallInt(n) => *n != 0,
10007        _ => false,
10008    }
10009}
10010
10011/// v7.9.29 — at CREATE UNIQUE INDEX time, scan the table's
10012/// committed rows for pre-existing duplicates. If any pair of rows
10013/// matches the predicate AND has the same index key, refuse to
10014/// create the index so the user fixes the data before retrying.
10015fn check_existing_unique_violation(
10016    idx: &spg_storage::Index,
10017    schema: &spg_storage::TableSchema,
10018    rows: &[spg_storage::Row],
10019) -> Result<(), EngineError> {
10020    let predicate_expr = match idx.partial_predicate.as_deref() {
10021        Some(s) => Some(spg_sql::parser::parse_expression(s).map_err(|e| {
10022            EngineError::Unsupported(alloc::format!(
10023                "stored partial predicate {s:?} failed to re-parse: {e:?}"
10024            ))
10025        })?),
10026        None => None,
10027    };
10028    let ctx = eval::EvalContext::new(&schema.columns, None);
10029    let key_positions = unique_key_positions(idx);
10030    let mut seen: alloc::vec::Vec<alloc::vec::Vec<spg_storage::Value>> = alloc::vec::Vec::new();
10031    for row in rows {
10032        if let Some(expr) = &predicate_expr {
10033            let v = eval::eval_expr(expr, row, &ctx).map_err(|e| {
10034                EngineError::Unsupported(alloc::format!(
10035                    "evaluating UNIQUE INDEX predicate against existing row: {e:?}"
10036                ))
10037            })?;
10038            if !predicate_truthy(&v) {
10039                continue;
10040            }
10041        }
10042        let key: alloc::vec::Vec<spg_storage::Value> = key_positions
10043            .iter()
10044            .map(|&p| {
10045                row.values
10046                    .get(p)
10047                    .cloned()
10048                    .unwrap_or(spg_storage::Value::Null)
10049            })
10050            .collect();
10051        if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
10052            continue;
10053        }
10054        if seen.iter().any(|other| *other == key) {
10055            return Err(EngineError::Unsupported(alloc::format!(
10056                "CREATE UNIQUE INDEX {:?}: existing rows already violate the constraint",
10057                idx.name
10058            )));
10059        }
10060        seen.push(key);
10061    }
10062    Ok(())
10063}
10064
10065/// v7.9.29 — full key tuple for a UNIQUE INDEX (leading +
10066/// extra positions). For single-column indexes this is just
10067/// `[column_position]`.
10068fn unique_key_positions(idx: &spg_storage::Index) -> alloc::vec::Vec<usize> {
10069    let mut out = alloc::vec::Vec::with_capacity(1 + idx.extra_column_positions.len());
10070    out.push(idx.column_position);
10071    out.extend_from_slice(&idx.extra_column_positions);
10072    out
10073}
10074
10075/// v7.9.29 — at INSERT time, walk every `is_unique` index on the
10076/// target table. For each, eval the index's optional predicate
10077/// against (a) the candidate row and (b) every committed row plus
10078/// earlier batch rows; only rows where the predicate is truthy
10079/// participate. A duplicate key among predicate-matching rows is a
10080/// uniqueness violation. NULL keys lift the row out of the check
10081/// (matching PG's "UNIQUE allows multiple NULLs" semantics).
10082fn enforce_unique_index_inserts(
10083    catalog: &Catalog,
10084    table_name: &str,
10085    rows: &[alloc::vec::Vec<spg_storage::Value>],
10086) -> Result<(), EngineError> {
10087    let table = catalog.get(table_name).ok_or_else(|| {
10088        EngineError::Storage(StorageError::TableNotFound {
10089            name: table_name.into(),
10090        })
10091    })?;
10092    let schema = table.schema();
10093    let ctx = eval::EvalContext::new(&schema.columns, None);
10094    for idx in table.indices() {
10095        if !idx.is_unique {
10096            continue;
10097        }
10098        // Re-parse the predicate once per index per batch.
10099        let predicate_expr = match idx.partial_predicate.as_deref() {
10100            Some(s) => Some(spg_sql::parser::parse_expression(s).map_err(|e| {
10101                EngineError::Unsupported(alloc::format!(
10102                    "UNIQUE INDEX {:?} predicate {s:?} failed to re-parse: {e:?}",
10103                    idx.name
10104                ))
10105            })?),
10106            None => None,
10107        };
10108        let key_positions = unique_key_positions(idx);
10109        let key_of = |values: &[spg_storage::Value]| -> alloc::vec::Vec<spg_storage::Value> {
10110            key_positions
10111                .iter()
10112                .map(|&p| values.get(p).cloned().unwrap_or(spg_storage::Value::Null))
10113                .collect()
10114        };
10115        // Helper: does `values` participate in this index? (predicate
10116        // truthy when present.) Wraps `values` into a transient Row
10117        // because eval_expr requires &Row.
10118        let participates = |values: &[spg_storage::Value]| -> Result<bool, EngineError> {
10119            let Some(expr) = &predicate_expr else {
10120                return Ok(true);
10121            };
10122            let tmp_row = spg_storage::Row {
10123                values: values.to_vec(),
10124            };
10125            let v = eval::eval_expr(expr, &tmp_row, &ctx).map_err(|e| {
10126                EngineError::Unsupported(alloc::format!(
10127                    "UNIQUE INDEX {:?} predicate eval: {e:?}",
10128                    idx.name
10129                ))
10130            })?;
10131            Ok(predicate_truthy(&v))
10132        };
10133        for (batch_idx, row_values) in rows.iter().enumerate() {
10134            if !participates(row_values)? {
10135                continue;
10136            }
10137            let key = key_of(row_values);
10138            if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
10139                continue;
10140            }
10141            // Committed-table collision.
10142            for prow in table.rows() {
10143                if !participates(&prow.values)? {
10144                    continue;
10145                }
10146                if key_of(&prow.values) == key {
10147                    return Err(EngineError::Unsupported(alloc::format!(
10148                        "UNIQUE INDEX {:?} violation on {table_name:?}: \
10149                         row #{batch_idx} duplicates an existing key",
10150                        idx.name
10151                    )));
10152                }
10153            }
10154            // Within-batch collision: earlier rows in the same INSERT.
10155            for earlier in &rows[..batch_idx] {
10156                if !participates(earlier)? {
10157                    continue;
10158                }
10159                if key_of(earlier) == key {
10160                    return Err(EngineError::Unsupported(alloc::format!(
10161                        "UNIQUE INDEX {:?} violation on {table_name:?}: \
10162                         row #{batch_idx} duplicates an earlier row in the same batch",
10163                        idx.name
10164                    )));
10165                }
10166            }
10167        }
10168    }
10169    Ok(())
10170}
10171
10172/// v7.13.0 — `UPDATE OF cols` filter helper (mailrs round-5 G7).
10173/// Returns `true` when at least one of `filter_cols` has a
10174/// different value in `new_row` vs `old_row`. Column lookup is
10175/// case-insensitive against `schema_cols`; unknown filter columns
10176/// are treated as "not changed" (the trigger therefore won't
10177/// fire on them — surfacing a parse-time error would be too
10178/// strict for catalog reloads where the schema may have drifted).
10179fn any_column_changed(
10180    filter_cols: &[String],
10181    schema_cols: &[ColumnSchema],
10182    old_row: &Row,
10183    new_row: &Row,
10184) -> bool {
10185    for col_name in filter_cols {
10186        let Some(pos) = schema_cols
10187            .iter()
10188            .position(|c| c.name.eq_ignore_ascii_case(col_name))
10189        else {
10190            continue;
10191        };
10192        let old_v = old_row.values.get(pos);
10193        let new_v = new_row.values.get(pos);
10194        if old_v != new_v {
10195            return true;
10196        }
10197    }
10198    false
10199}
10200
10201/// v7.13.0 — evaluate every CHECK predicate on the schema against
10202/// each candidate row. Mirrors PG semantics: a `false` result
10203/// rejects the mutation; a NULL result *passes* (CHECK rejects
10204/// only on definite-false, not on unknown). mailrs round-5 G3.
10205fn enforce_check_constraints(
10206    catalog: &Catalog,
10207    table_name: &str,
10208    rows: &[alloc::vec::Vec<spg_storage::Value>],
10209) -> Result<(), EngineError> {
10210    let table = catalog.get(table_name).ok_or_else(|| {
10211        EngineError::Storage(StorageError::TableNotFound {
10212            name: table_name.into(),
10213        })
10214    })?;
10215    let schema = table.schema();
10216    if schema.checks.is_empty() {
10217        return Ok(());
10218    }
10219    let ctx = eval::EvalContext::new(&schema.columns, None);
10220    let mut parsed: alloc::vec::Vec<(usize, Expr)> = alloc::vec::Vec::new();
10221    for (i, src) in schema.checks.iter().enumerate() {
10222        let expr = spg_sql::parser::parse_expression(src).map_err(|e| {
10223            EngineError::Unsupported(alloc::format!(
10224                "CHECK constraint #{i} on {table_name:?} ({src:?}) failed to re-parse: {e:?}"
10225            ))
10226        })?;
10227        parsed.push((i, expr));
10228    }
10229    for (batch_idx, row_values) in rows.iter().enumerate() {
10230        let tmp_row = spg_storage::Row {
10231            values: row_values.clone(),
10232        };
10233        for (i, expr) in &parsed {
10234            let v = eval::eval_expr(expr, &tmp_row, &ctx).map_err(|e| {
10235                EngineError::Unsupported(alloc::format!(
10236                    "CHECK constraint #{i} on {table_name:?} eval at row #{batch_idx}: {e:?}"
10237                ))
10238            })?;
10239            // PG: NULL passes (CHECK rejects on definite-false only).
10240            if matches!(v, spg_storage::Value::Bool(false)) {
10241                return Err(EngineError::Unsupported(alloc::format!(
10242                    "CHECK constraint violation on {table_name:?} (row #{batch_idx}): {:?}",
10243                    schema.checks[*i]
10244                )));
10245            }
10246        }
10247    }
10248    Ok(())
10249}
10250
10251fn enforce_fk_inserts(
10252    catalog: &Catalog,
10253    child_table: &str,
10254    fks: &[spg_storage::ForeignKeyConstraint],
10255    rows: &[Vec<Value>],
10256) -> Result<(), EngineError> {
10257    for fk in fks {
10258        let parent_is_self = fk.parent_table == child_table;
10259        let parent = if parent_is_self {
10260            // Self-ref: read the current state of the same table.
10261            // The mut borrow on child has been dropped by the caller.
10262            catalog.get(child_table).ok_or_else(|| {
10263                EngineError::Storage(StorageError::TableNotFound {
10264                    name: child_table.into(),
10265                })
10266            })?
10267        } else {
10268            catalog.get(&fk.parent_table).ok_or_else(|| {
10269                EngineError::Storage(StorageError::TableNotFound {
10270                    name: fk.parent_table.clone(),
10271                })
10272            })?
10273        };
10274        for (batch_idx, row_values) in rows.iter().enumerate() {
10275            // Single-column FK fast path: try the parent's BTree
10276            // index for an O(log n) lookup. Composite FKs fall back
10277            // to a parent-row scan.
10278            if fk.local_columns.len() == 1 {
10279                let v = &row_values[fk.local_columns[0]];
10280                if matches!(v, Value::Null) {
10281                    continue;
10282                }
10283                let parent_col = fk.parent_columns[0];
10284                let key = spg_storage::IndexKey::from_value(v).ok_or_else(|| {
10285                    EngineError::Unsupported(alloc::format!(
10286                        "FOREIGN KEY column value of type {:?} is not index-eligible",
10287                        v.data_type()
10288                    ))
10289                })?;
10290                let present_committed = parent.indices().iter().any(|idx| {
10291                    matches!(idx.kind, spg_storage::IndexKind::BTree(_))
10292                        && idx.column_position == parent_col
10293                        && idx.partial_predicate.is_none()
10294                        && !idx.lookup_eq(&key).is_empty()
10295                });
10296                // v7.6.7 self-ref widening: also accept a match
10297                // against earlier rows in this same batch when the
10298                // FK points at the table being inserted into.
10299                let present_in_batch = parent_is_self
10300                    && rows[..batch_idx]
10301                        .iter()
10302                        .any(|earlier| earlier.get(parent_col) == Some(v));
10303                if !(present_committed || present_in_batch) {
10304                    return Err(EngineError::Unsupported(alloc::format!(
10305                        "FOREIGN KEY violation: no parent row in {:?} where {} = {:?}",
10306                        fk.parent_table,
10307                        parent
10308                            .schema()
10309                            .columns
10310                            .get(parent_col)
10311                            .map_or("?", |c| c.name.as_str()),
10312                        v,
10313                    )));
10314                }
10315            } else {
10316                // Composite FK: scan parent rows. v7.6.7 also
10317                // accepts a match against earlier rows in the same
10318                // batch (self-ref bulk-loading of hierarchies).
10319                if fk
10320                    .local_columns
10321                    .iter()
10322                    .all(|&i| matches!(row_values.get(i), Some(Value::Null)))
10323                {
10324                    continue;
10325                }
10326                let local: Vec<&Value> = fk.local_columns.iter().map(|&i| &row_values[i]).collect();
10327                let parent_match_committed = parent.rows().iter().any(|prow| {
10328                    fk.parent_columns
10329                        .iter()
10330                        .enumerate()
10331                        .all(|(i, &pi)| prow.values.get(pi) == Some(local[i]))
10332                });
10333                let parent_match_in_batch = parent_is_self
10334                    && rows[..batch_idx].iter().any(|earlier| {
10335                        fk.parent_columns
10336                            .iter()
10337                            .enumerate()
10338                            .all(|(i, &pi)| earlier.get(pi) == Some(local[i]))
10339                    });
10340                if !(parent_match_committed || parent_match_in_batch) {
10341                    return Err(EngineError::Unsupported(alloc::format!(
10342                        "FOREIGN KEY violation: no parent row in {:?} matching composite key",
10343                        fk.parent_table,
10344                    )));
10345                }
10346            }
10347        }
10348    }
10349    Ok(())
10350}
10351
10352/// v7.6.4 / v7.6.5 — one step of the FK action plan computed for a
10353/// DELETE on a parent. The plan is a list of these steps, stacked
10354/// across the FK graph by `plan_fk_parent_deletions`.
10355#[derive(Debug, Clone)]
10356struct FkChildStep {
10357    child_table: String,
10358    action: FkChildAction,
10359}
10360
10361#[derive(Debug, Clone)]
10362enum FkChildAction {
10363    /// CASCADE — remove these rows. Sorted, deduplicated positions.
10364    Delete { positions: Vec<usize> },
10365    /// SET NULL — for each (row, column) in the flat list, write
10366    /// NULL into that child cell. Multiple FKs on the same row may
10367    /// produce overlapping entries (deduped at plan time).
10368    SetNull {
10369        positions: Vec<usize>,
10370        columns: Vec<usize>,
10371    },
10372    /// SET DEFAULT — same shape as SetNull but writes the column's
10373    /// declared DEFAULT value (resolved at plan time). Columns
10374    /// without a DEFAULT raise an error during planning.
10375    SetDefault {
10376        positions: Vec<usize>,
10377        columns: Vec<usize>,
10378        defaults: Vec<Value>,
10379    },
10380}
10381
10382/// v7.6.3 → v7.6.5 — plan FK fallout for a DELETE on a parent table.
10383///
10384/// Walks every table in the catalog looking for FKs whose
10385/// `parent_table` is `parent_table_name`. For each such FK + each
10386/// to-be-deleted parent row:
10387///
10388///   - RESTRICT / NoAction → error, no plan returned
10389///   - CASCADE → child rows get scheduled for deletion; recursive
10390///   - SetNull → child FK column(s) scheduled to be NULL-ed.
10391///     Verified NULL-able at plan time.
10392///   - SetDefault → child FK column(s) scheduled to be reset to
10393///     their declared DEFAULT. Columns without a DEFAULT raise.
10394///
10395/// SET NULL / SET DEFAULT do NOT cascade further — the child row
10396/// stays; only one of its columns mutates.
10397fn plan_fk_parent_deletions(
10398    catalog: &Catalog,
10399    parent_table_name: &str,
10400    to_delete_positions: &[usize],
10401    to_delete_rows: &[Vec<Value>],
10402) -> Result<Vec<FkChildStep>, EngineError> {
10403    use alloc::collections::{BTreeMap, BTreeSet};
10404    if to_delete_rows.is_empty() {
10405        return Ok(Vec::new());
10406    }
10407    let mut delete_plan: BTreeMap<String, BTreeSet<usize>> = BTreeMap::new();
10408    // setnull / setdefault keyed by child_table → (row_idx, col_idx) → optional default
10409    let mut setnull_plan: BTreeMap<String, BTreeSet<(usize, usize)>> = BTreeMap::new();
10410    let mut setdefault_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
10411    let mut visited: BTreeSet<(String, usize)> = BTreeSet::new();
10412    for &p in to_delete_positions {
10413        visited.insert((parent_table_name.to_string(), p));
10414    }
10415    let mut work: Vec<(String, Vec<Value>)> = to_delete_rows
10416        .iter()
10417        .map(|r| (parent_table_name.to_string(), r.clone()))
10418        .collect();
10419    while let Some((cur_parent, parent_row)) = work.pop() {
10420        for child_name in catalog.table_names() {
10421            let child = catalog
10422                .get(&child_name)
10423                .expect("table_names → catalog.get round-trip is total");
10424            for fk in &child.schema().foreign_keys {
10425                if fk.parent_table != cur_parent {
10426                    continue;
10427                }
10428                let parent_key: Vec<&Value> = fk
10429                    .parent_columns
10430                    .iter()
10431                    .map(|&pi| &parent_row[pi])
10432                    .collect();
10433                if parent_key.iter().any(|v| matches!(v, Value::Null)) {
10434                    continue;
10435                }
10436                for (child_row_idx, child_row) in child.rows().iter().enumerate() {
10437                    if child_name == cur_parent
10438                        && visited.contains(&(child_name.clone(), child_row_idx))
10439                    {
10440                        continue;
10441                    }
10442                    let matches_key = fk
10443                        .local_columns
10444                        .iter()
10445                        .enumerate()
10446                        .all(|(i, &li)| child_row.values.get(li) == Some(parent_key[i]));
10447                    if !matches_key {
10448                        continue;
10449                    }
10450                    match fk.on_delete {
10451                        spg_storage::FkAction::Restrict | spg_storage::FkAction::NoAction => {
10452                            return Err(EngineError::Unsupported(alloc::format!(
10453                                "FOREIGN KEY violation: DELETE on {cur_parent:?} is \
10454                                 restricted by FK from {child_name:?}.{:?}",
10455                                fk.local_columns,
10456                            )));
10457                        }
10458                        spg_storage::FkAction::Cascade => {
10459                            if visited.insert((child_name.clone(), child_row_idx)) {
10460                                delete_plan
10461                                    .entry(child_name.clone())
10462                                    .or_default()
10463                                    .insert(child_row_idx);
10464                                work.push((child_name.clone(), child_row.values.clone()));
10465                            }
10466                        }
10467                        spg_storage::FkAction::SetNull => {
10468                            // Verify every local FK column is NULL-able.
10469                            for &li in &fk.local_columns {
10470                                let col = child.schema().columns.get(li).ok_or_else(|| {
10471                                    EngineError::Unsupported(alloc::format!(
10472                                        "FK local column {li} missing in {child_name:?}"
10473                                    ))
10474                                })?;
10475                                if !col.nullable {
10476                                    return Err(EngineError::Unsupported(alloc::format!(
10477                                        "FOREIGN KEY ON DELETE SET NULL: column \
10478                                         {child_name:?}.{:?} is NOT NULL — cannot SET NULL",
10479                                        col.name,
10480                                    )));
10481                                }
10482                            }
10483                            let entry = setnull_plan.entry(child_name.clone()).or_default();
10484                            for &li in &fk.local_columns {
10485                                entry.insert((child_row_idx, li));
10486                            }
10487                        }
10488                        spg_storage::FkAction::SetDefault => {
10489                            // Resolve the DEFAULT for every local FK col.
10490                            let entry = setdefault_plan.entry(child_name.clone()).or_default();
10491                            for &li in &fk.local_columns {
10492                                let col = child.schema().columns.get(li).ok_or_else(|| {
10493                                    EngineError::Unsupported(alloc::format!(
10494                                        "FK local column {li} missing in {child_name:?}"
10495                                    ))
10496                                })?;
10497                                let default = col.default.clone().ok_or_else(|| {
10498                                    EngineError::Unsupported(alloc::format!(
10499                                        "FOREIGN KEY ON DELETE SET DEFAULT: column \
10500                                         {child_name:?}.{:?} has no DEFAULT declared",
10501                                        col.name,
10502                                    ))
10503                                })?;
10504                                entry.insert((child_row_idx, li), default);
10505                            }
10506                        }
10507                    }
10508                }
10509            }
10510        }
10511    }
10512    // Flatten the three plans into the ordered `FkChildStep` list.
10513    // Deletes are applied last per child (after any null/default
10514    // re-writes on the same child) so a child row that's both
10515    // re-written and then cascade-deleted only ends up deleted —
10516    // but in v7.6.5 SetNull/Cascade never overlap on the same row
10517    // (a single FK chooses exactly one action), so the order is
10518    // mostly a precaution.
10519    let mut steps: Vec<FkChildStep> = Vec::new();
10520    for (child_table, entries) in setnull_plan {
10521        let (positions, columns): (Vec<usize>, Vec<usize>) = entries.into_iter().unzip();
10522        steps.push(FkChildStep {
10523            child_table,
10524            action: FkChildAction::SetNull { positions, columns },
10525        });
10526    }
10527    for (child_table, entries) in setdefault_plan {
10528        let mut positions = Vec::with_capacity(entries.len());
10529        let mut columns = Vec::with_capacity(entries.len());
10530        let mut defaults = Vec::with_capacity(entries.len());
10531        for ((p, c), v) in entries {
10532            positions.push(p);
10533            columns.push(c);
10534            defaults.push(v);
10535        }
10536        steps.push(FkChildStep {
10537            child_table,
10538            action: FkChildAction::SetDefault {
10539                positions,
10540                columns,
10541                defaults,
10542            },
10543        });
10544    }
10545    for (child_table, positions) in delete_plan {
10546        steps.push(FkChildStep {
10547            child_table,
10548            action: FkChildAction::Delete {
10549                positions: positions.into_iter().collect(),
10550            },
10551        });
10552    }
10553    Ok(steps)
10554}
10555
10556/// v7.6.6 — plan FK fallout for an UPDATE that mutates parent-side
10557/// PK/UNIQUE columns. Walks every other table whose FK references
10558/// `parent_table_name`; for each FK whose parent_columns overlap a
10559/// mutated column, decides the action by `fk.on_update`.
10560///
10561///   - RESTRICT / NoAction → error if any child references the OLD
10562///     value
10563///   - CASCADE → child FK columns get rewritten to the NEW parent
10564///     value (a SetNull-style update step with the new value)
10565///   - SetNull → child FK columns set to NULL
10566///   - SetDefault → child FK columns set to declared default
10567///
10568/// `plan_with_old` is `(row_position, old_values, new_values)` so
10569/// the planner can detect "did this row's parent key actually
10570/// change?" — only rows where at least one referenced parent
10571/// column moved trigger inbound work.
10572fn plan_fk_parent_updates(
10573    catalog: &Catalog,
10574    parent_table_name: &str,
10575    plan_with_old: &[(usize, Vec<Value>, Vec<Value>)],
10576) -> Result<Vec<FkChildStep>, EngineError> {
10577    use alloc::collections::BTreeMap;
10578    if plan_with_old.is_empty() {
10579        return Ok(Vec::new());
10580    }
10581    // For each child table we may touch, build per-child step
10582    // lists. UPDATE never deletes children — `delete_plan` stays
10583    // empty here but is kept structurally aligned with
10584    // `plan_fk_parent_deletions` for future use.
10585    let delete_plan: BTreeMap<String, alloc::collections::BTreeSet<usize>> = BTreeMap::new();
10586    let mut setnull_plan: BTreeMap<String, alloc::collections::BTreeSet<(usize, usize)>> =
10587        BTreeMap::new();
10588    let mut setdefault_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
10589    // Cascade-update plan: child_table → row_idx → col_idx → new_value
10590    let mut cascade_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
10591
10592    for child_name in catalog.table_names() {
10593        let child = catalog
10594            .get(&child_name)
10595            .expect("table_names → catalog.get total");
10596        for fk in &child.schema().foreign_keys {
10597            if fk.parent_table != parent_table_name {
10598                continue;
10599            }
10600            for (_pos, old_row, new_row) in plan_with_old {
10601                // Did any parent FK column change?
10602                let key_changed = fk
10603                    .parent_columns
10604                    .iter()
10605                    .any(|&pi| old_row.get(pi) != new_row.get(pi));
10606                if !key_changed {
10607                    continue;
10608                }
10609                // The OLD parent key — used to find referring children.
10610                let old_key: Vec<&Value> =
10611                    fk.parent_columns.iter().map(|&pi| &old_row[pi]).collect();
10612                if old_key.iter().any(|v| matches!(v, Value::Null)) {
10613                    // NULL parent has no children — skip.
10614                    continue;
10615                }
10616                let new_key: Vec<&Value> =
10617                    fk.parent_columns.iter().map(|&pi| &new_row[pi]).collect();
10618                for (child_row_idx, child_row) in child.rows().iter().enumerate() {
10619                    // Self-ref same-row updates: a row updating its
10620                    // own PK doesn't restrict itself.
10621                    if child_name == parent_table_name
10622                        && plan_with_old.iter().any(|(p, _, _)| *p == child_row_idx)
10623                    {
10624                        continue;
10625                    }
10626                    let matches_key = fk
10627                        .local_columns
10628                        .iter()
10629                        .enumerate()
10630                        .all(|(i, &li)| child_row.values.get(li) == Some(old_key[i]));
10631                    if !matches_key {
10632                        continue;
10633                    }
10634                    match fk.on_update {
10635                        spg_storage::FkAction::Restrict | spg_storage::FkAction::NoAction => {
10636                            return Err(EngineError::Unsupported(alloc::format!(
10637                                "FOREIGN KEY violation: UPDATE on {parent_table_name:?} PK is \
10638                                 restricted by FK from {child_name:?}.{:?}",
10639                                fk.local_columns,
10640                            )));
10641                        }
10642                        spg_storage::FkAction::Cascade => {
10643                            // Rewrite child FK columns to new key.
10644                            let entry = cascade_plan.entry(child_name.clone()).or_default();
10645                            for (i, &li) in fk.local_columns.iter().enumerate() {
10646                                entry.insert((child_row_idx, li), new_key[i].clone());
10647                            }
10648                        }
10649                        spg_storage::FkAction::SetNull => {
10650                            for &li in &fk.local_columns {
10651                                let col = child.schema().columns.get(li).ok_or_else(|| {
10652                                    EngineError::Unsupported(alloc::format!(
10653                                        "FK local column {li} missing in {child_name:?}"
10654                                    ))
10655                                })?;
10656                                if !col.nullable {
10657                                    return Err(EngineError::Unsupported(alloc::format!(
10658                                        "FOREIGN KEY ON UPDATE SET NULL: column \
10659                                         {child_name:?}.{:?} is NOT NULL",
10660                                        col.name,
10661                                    )));
10662                                }
10663                            }
10664                            let entry = setnull_plan.entry(child_name.clone()).or_default();
10665                            for &li in &fk.local_columns {
10666                                entry.insert((child_row_idx, li));
10667                            }
10668                        }
10669                        spg_storage::FkAction::SetDefault => {
10670                            let entry = setdefault_plan.entry(child_name.clone()).or_default();
10671                            for &li in &fk.local_columns {
10672                                let col = child.schema().columns.get(li).ok_or_else(|| {
10673                                    EngineError::Unsupported(alloc::format!(
10674                                        "FK local column {li} missing in {child_name:?}"
10675                                    ))
10676                                })?;
10677                                let default = col.default.clone().ok_or_else(|| {
10678                                    EngineError::Unsupported(alloc::format!(
10679                                        "FOREIGN KEY ON UPDATE SET DEFAULT: column \
10680                                         {child_name:?}.{:?} has no DEFAULT",
10681                                        col.name,
10682                                    ))
10683                                })?;
10684                                entry.insert((child_row_idx, li), default);
10685                            }
10686                        }
10687                    }
10688                }
10689            }
10690        }
10691    }
10692    // Flatten into FkChildStep list. UPDATE doesn't produce
10693    // DeleteSteps (CASCADE on UPDATE just rewrites FK values).
10694    let mut steps: Vec<FkChildStep> = Vec::new();
10695    for (child_table, entries) in cascade_plan {
10696        let mut positions = Vec::with_capacity(entries.len());
10697        let mut columns = Vec::with_capacity(entries.len());
10698        let mut defaults = Vec::with_capacity(entries.len());
10699        for ((p, c), v) in entries {
10700            positions.push(p);
10701            columns.push(c);
10702            defaults.push(v);
10703        }
10704        // We reuse `FkChildAction::SetDefault` for cascade-update:
10705        // both shapes are "write a known value into specific cells"
10706        // — `apply_per_cell_writes` doesn't care whether the value
10707        // came from a DEFAULT declaration or a new parent key.
10708        steps.push(FkChildStep {
10709            child_table,
10710            action: FkChildAction::SetDefault {
10711                positions,
10712                columns,
10713                defaults,
10714            },
10715        });
10716    }
10717    for (child_table, entries) in setnull_plan {
10718        let (positions, columns): (Vec<usize>, Vec<usize>) = entries.into_iter().unzip();
10719        steps.push(FkChildStep {
10720            child_table,
10721            action: FkChildAction::SetNull { positions, columns },
10722        });
10723    }
10724    for (child_table, entries) in setdefault_plan {
10725        let mut positions = Vec::with_capacity(entries.len());
10726        let mut columns = Vec::with_capacity(entries.len());
10727        let mut defaults = Vec::with_capacity(entries.len());
10728        for ((p, c), v) in entries {
10729            positions.push(p);
10730            columns.push(c);
10731            defaults.push(v);
10732        }
10733        steps.push(FkChildStep {
10734            child_table,
10735            action: FkChildAction::SetDefault {
10736                positions,
10737                columns,
10738                defaults,
10739            },
10740        });
10741    }
10742    let _ = delete_plan; // UPDATE never deletes children.
10743    Ok(steps)
10744}
10745
10746/// v7.6.5 — apply one FK child step to the catalog. Encapsulates
10747/// the three action variants so the DELETE executor stays a
10748/// simple loop over the planned steps.
10749fn apply_fk_child_step(catalog: &mut Catalog, step: &FkChildStep) -> Result<(), EngineError> {
10750    let child = catalog.get_mut(&step.child_table).ok_or_else(|| {
10751        EngineError::Storage(StorageError::TableNotFound {
10752            name: step.child_table.clone(),
10753        })
10754    })?;
10755    match &step.action {
10756        FkChildAction::Delete { positions } => {
10757            let _ = child.delete_rows(positions);
10758        }
10759        FkChildAction::SetNull { positions, columns } => {
10760            apply_per_cell_writes(child, positions, columns, |_| Value::Null)?;
10761        }
10762        FkChildAction::SetDefault {
10763            positions,
10764            columns,
10765            defaults,
10766        } => {
10767            apply_per_cell_writes(child, positions, columns, |i| defaults[i].clone())?;
10768        }
10769    }
10770    Ok(())
10771}
10772
10773/// v7.6.5 — write new values into selected child cells via
10774/// `Table::update_row` (the catalog's existing UPDATE entry).
10775/// Groups writes by row position so multi-column updates on the
10776/// same row only call `update_row` once. `value_for(i)` produces
10777/// the new value for the i-th (position, column) entry.
10778fn apply_per_cell_writes(
10779    child: &mut spg_storage::Table,
10780    positions: &[usize],
10781    columns: &[usize],
10782    mut value_for: impl FnMut(usize) -> Value,
10783) -> Result<(), EngineError> {
10784    use alloc::collections::BTreeMap;
10785    let mut by_row: BTreeMap<usize, Vec<(usize, Value)>> = BTreeMap::new();
10786    for i in 0..positions.len() {
10787        by_row
10788            .entry(positions[i])
10789            .or_default()
10790            .push((columns[i], value_for(i)));
10791    }
10792    for (pos, mutations) in by_row {
10793        let mut new_values = child.rows()[pos].values.clone();
10794        for (col, v) in mutations {
10795            if let Some(slot) = new_values.get_mut(col) {
10796                *slot = v;
10797            }
10798        }
10799        child
10800            .update_row(pos, new_values)
10801            .map_err(EngineError::Storage)?;
10802    }
10803    Ok(())
10804}
10805
10806fn fk_action_sql_to_storage(a: spg_sql::ast::FkAction) -> spg_storage::FkAction {
10807    match a {
10808        spg_sql::ast::FkAction::Restrict => spg_storage::FkAction::Restrict,
10809        spg_sql::ast::FkAction::Cascade => spg_storage::FkAction::Cascade,
10810        spg_sql::ast::FkAction::SetNull => spg_storage::FkAction::SetNull,
10811        spg_sql::ast::FkAction::SetDefault => spg_storage::FkAction::SetDefault,
10812        spg_sql::ast::FkAction::NoAction => spg_storage::FkAction::NoAction,
10813    }
10814}
10815
10816/// v7.9.21 — resolve a column's DEFAULT for INSERT-time
10817/// default-fill. Free fn (rather than `&self`) so callers
10818/// with an active `&mut Table` borrow can still use it.
10819/// Literal defaults take the cached path (`col.default`);
10820/// runtime defaults hit `clock_fn` at each call. mailrs G4.
10821fn resolve_column_default_free(
10822    col: &ColumnSchema,
10823    clock_fn: Option<ClockFn>,
10824) -> Result<Value, EngineError> {
10825    if let Some(rt) = &col.runtime_default {
10826        return eval_runtime_default_free(rt, col.ty, clock_fn);
10827    }
10828    Ok(col.default.clone().unwrap_or(Value::Null))
10829}
10830
10831fn eval_runtime_default_free(
10832    rt: &str,
10833    ty: DataType,
10834    clock_fn: Option<ClockFn>,
10835) -> Result<Value, EngineError> {
10836    let s = rt.trim().to_ascii_lowercase();
10837    let canonical = s.trim_end_matches("()");
10838    let now_us = match clock_fn {
10839        Some(f) => f(),
10840        None => 0,
10841    };
10842    let v = match canonical {
10843        "now" | "current_timestamp" | "localtimestamp" => Value::Timestamp(now_us),
10844        "current_date" => Value::Date((now_us / 86_400_000_000) as i32),
10845        "current_time" | "localtime" => Value::Timestamp(now_us),
10846        other => {
10847            return Err(EngineError::Unsupported(alloc::format!(
10848                "runtime DEFAULT expression {other:?} not supported \
10849                 (v7.9.21 whitelist: now() / current_timestamp / \
10850                 current_date / current_time / localtimestamp / \
10851                 localtime)"
10852            )));
10853        }
10854    };
10855    coerce_value(v, ty, "DEFAULT", 0)
10856}
10857
10858/// v7.9.21 — true when a DEFAULT expression needs INSERT-time
10859/// evaluation rather than being cacheable as a literal Value.
10860/// FunctionCall is the immediate case (`now()`,
10861/// `current_timestamp`). Literal expressions and simple sign-
10862/// flipped numerics still take the static-cache path.
10863fn is_runtime_default_expr(expr: &Expr) -> bool {
10864    match expr {
10865        Expr::FunctionCall { .. } => true,
10866        Expr::Unary { expr, .. } => is_runtime_default_expr(expr),
10867        _ => false,
10868    }
10869}
10870
10871fn column_def_to_schema(c: ColumnDef) -> Result<ColumnSchema, EngineError> {
10872    let ty = column_type_to_data_type(c.ty);
10873    let mut schema = ColumnSchema::new(c.name.clone(), ty, c.nullable);
10874    if let Some(default_expr) = c.default {
10875        // v7.9.21 — distinguish literal defaults (evaluated once
10876        // at CREATE TABLE) from expression defaults (deferred to
10877        // INSERT). Function calls (`now()`, `current_timestamp`
10878        // — see v7.9.20 keyword promotion) take the runtime path.
10879        // Literals continue to cache. mailrs G4.
10880        if is_runtime_default_expr(&default_expr) {
10881            let display = alloc::format!("{default_expr}");
10882            schema = schema.with_runtime_default(display);
10883        } else {
10884            let raw = literal_expr_to_value(default_expr)?;
10885            let coerced = coerce_value(raw, ty, &c.name, 0)?;
10886            schema = schema.with_default(coerced);
10887        }
10888    }
10889    if c.auto_increment {
10890        // AUTO_INCREMENT only makes sense on integer-shaped columns.
10891        if !matches!(ty, DataType::SmallInt | DataType::Int | DataType::BigInt) {
10892            return Err(EngineError::Unsupported(alloc::format!(
10893                "AUTO_INCREMENT requires an integer column type, got {ty:?}"
10894            )));
10895        }
10896        schema = schema.with_auto_increment();
10897    }
10898    Ok(schema)
10899}
10900
10901/// v7.10.4 — decode a BYTEA literal. Accepts:
10902///   * `\xDEADBEEF` (case-insensitive hex; whitespace stripped)
10903///   * `Hello\000world` (backslash escape form; `\\` for literal backslash)
10904///   * Anything else → raw UTF-8 bytes of the input (PG accepts this too).
10905fn decode_bytea_literal(s: &str) -> Result<alloc::vec::Vec<u8>, &'static str> {
10906    let s = s.trim();
10907    if let Some(hex) = s.strip_prefix("\\x").or_else(|| s.strip_prefix("\\X")) {
10908        // Hex form. Each pair of hex digits → one byte.
10909        let cleaned: alloc::string::String = hex.chars().filter(|c| !c.is_whitespace()).collect();
10910        if cleaned.len() % 2 != 0 {
10911            return Err("odd-length hex literal");
10912        }
10913        let mut out = alloc::vec::Vec::with_capacity(cleaned.len() / 2);
10914        let cleaned_bytes = cleaned.as_bytes();
10915        for i in (0..cleaned_bytes.len()).step_by(2) {
10916            let hi = hex_nibble(cleaned_bytes[i])?;
10917            let lo = hex_nibble(cleaned_bytes[i + 1])?;
10918            out.push((hi << 4) | lo);
10919        }
10920        return Ok(out);
10921    }
10922    // Escape form or raw. Walk char-by-char; `\\` and `\NNN` octal
10923    // sequences decode; anything else is a literal byte.
10924    let bytes = s.as_bytes();
10925    let mut out = alloc::vec::Vec::with_capacity(bytes.len());
10926    let mut i = 0;
10927    while i < bytes.len() {
10928        let b = bytes[i];
10929        if b == b'\\' && i + 1 < bytes.len() {
10930            let n = bytes[i + 1];
10931            if n == b'\\' {
10932                out.push(b'\\');
10933                i += 2;
10934                continue;
10935            }
10936            if n.is_ascii_digit()
10937                && i + 3 < bytes.len()
10938                && bytes[i + 2].is_ascii_digit()
10939                && bytes[i + 3].is_ascii_digit()
10940            {
10941                let oct = |x: u8| (x - b'0') as u32;
10942                let v = oct(n) * 64 + oct(bytes[i + 2]) * 8 + oct(bytes[i + 3]);
10943                if v <= 0xFF {
10944                    out.push(v as u8);
10945                    i += 4;
10946                    continue;
10947                }
10948            }
10949        }
10950        out.push(b);
10951        i += 1;
10952    }
10953    Ok(out)
10954}
10955
10956fn hex_nibble(b: u8) -> Result<u8, &'static str> {
10957    match b {
10958        b'0'..=b'9' => Ok(b - b'0'),
10959        b'a'..=b'f' => Ok(b - b'a' + 10),
10960        b'A'..=b'F' => Ok(b - b'A' + 10),
10961        _ => Err("invalid hex digit"),
10962    }
10963}
10964
10965/// v7.10.11 — decode a PG TEXT[] external array form
10966/// (`{a,b,NULL}` with optional double-quoted elements). The
10967/// engine takes a leading/trailing `{`/`}` and splits at commas.
10968/// Quoted elements (`"hello, world"`) preserve embedded commas;
10969/// `\\` and `\"` decode to literal backslash / quote. Plain
10970/// unquoted `NULL` (case-insensitive) maps to `None`.
10971/// v7.11.13 — pick the array type for `ARRAY[lit, …]` from the
10972/// element values. Single-element-type rules:
10973///   - all NULL / all Text → TextArray
10974///   - all Int (or Int+NULL) → IntArray
10975///   - any BigInt without Text → BigIntArray (widening)
10976///   - any Text → TextArray (fallback; non-string elements
10977///     render as text)
10978fn array_literal_widen(items: alloc::vec::Vec<Value>) -> Value {
10979    let mut has_text = false;
10980    let mut has_bigint = false;
10981    let mut has_int = false;
10982    for v in &items {
10983        match v {
10984            Value::Null => {}
10985            Value::Text(_) | Value::Json(_) => has_text = true,
10986            Value::BigInt(_) => has_bigint = true,
10987            Value::Int(_) | Value::SmallInt(_) => has_int = true,
10988            _ => has_text = true,
10989        }
10990    }
10991    if has_text || (!has_bigint && !has_int) {
10992        let out: alloc::vec::Vec<Option<alloc::string::String>> = items
10993            .into_iter()
10994            .map(|v| match v {
10995                Value::Null => None,
10996                Value::Text(s) | Value::Json(s) => Some(s),
10997                other => Some(alloc::format!("{other:?}")),
10998            })
10999            .collect();
11000        return Value::TextArray(out);
11001    }
11002    if has_bigint {
11003        let out: alloc::vec::Vec<Option<i64>> = items
11004            .into_iter()
11005            .map(|v| match v {
11006                Value::Null => None,
11007                Value::Int(n) => Some(i64::from(n)),
11008                Value::SmallInt(n) => Some(i64::from(n)),
11009                Value::BigInt(n) => Some(n),
11010                _ => unreachable!("widen: unexpected non-integer in BigInt path"),
11011            })
11012            .collect();
11013        return Value::BigIntArray(out);
11014    }
11015    let out: alloc::vec::Vec<Option<i32>> = items
11016        .into_iter()
11017        .map(|v| match v {
11018            Value::Null => None,
11019            Value::Int(n) => Some(n),
11020            Value::SmallInt(n) => Some(i32::from(n)),
11021            _ => unreachable!("widen: unexpected non-i32-compatible in Int path"),
11022        })
11023        .collect();
11024    Value::IntArray(out)
11025}
11026
11027fn decode_text_array_literal(
11028    s: &str,
11029) -> Result<alloc::vec::Vec<Option<alloc::string::String>>, &'static str> {
11030    let trimmed = s.trim();
11031    let inner = trimmed
11032        .strip_prefix('{')
11033        .and_then(|x| x.strip_suffix('}'))
11034        .ok_or("TEXT[] literal must be enclosed in '{...}'")?;
11035    let mut out: alloc::vec::Vec<Option<alloc::string::String>> = alloc::vec::Vec::new();
11036    if inner.trim().is_empty() {
11037        return Ok(out);
11038    }
11039    let bytes = inner.as_bytes();
11040    let mut i = 0;
11041    while i <= bytes.len() {
11042        // Skip leading whitespace.
11043        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
11044            i += 1;
11045        }
11046        // Quoted element.
11047        if i < bytes.len() && bytes[i] == b'"' {
11048            i += 1; // open quote
11049            let mut buf = alloc::string::String::new();
11050            while i < bytes.len() && bytes[i] != b'"' {
11051                if bytes[i] == b'\\' && i + 1 < bytes.len() {
11052                    buf.push(bytes[i + 1] as char);
11053                    i += 2;
11054                } else {
11055                    buf.push(bytes[i] as char);
11056                    i += 1;
11057                }
11058            }
11059            if i >= bytes.len() {
11060                return Err("unterminated quoted element");
11061            }
11062            i += 1; // close quote
11063            out.push(Some(buf));
11064        } else {
11065            // Unquoted element — read until next comma or end.
11066            let start = i;
11067            while i < bytes.len() && bytes[i] != b',' {
11068                i += 1;
11069            }
11070            let raw = inner[start..i].trim();
11071            if raw.eq_ignore_ascii_case("NULL") {
11072                out.push(None);
11073            } else {
11074                out.push(Some(alloc::string::ToString::to_string(raw)));
11075            }
11076        }
11077        // Skip whitespace, expect comma or end.
11078        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
11079            i += 1;
11080        }
11081        if i >= bytes.len() {
11082            break;
11083        }
11084        if bytes[i] != b',' {
11085            return Err("expected ',' between TEXT[] elements");
11086        }
11087        i += 1;
11088    }
11089    Ok(out)
11090}
11091
11092/// v7.10.11 — encode a TEXT[] back into the PG external array
11093/// form. NULL elements become the literal `NULL`; elements
11094/// containing commas, quotes, backslashes, or braces are
11095/// double-quoted with `\\` / `\"` escapes.
11096fn encode_text_array(items: &[Option<alloc::string::String>]) -> alloc::string::String {
11097    let mut out = alloc::string::String::with_capacity(2 + items.len() * 8);
11098    out.push('{');
11099    for (i, item) in items.iter().enumerate() {
11100        if i > 0 {
11101            out.push(',');
11102        }
11103        match item {
11104            None => out.push_str("NULL"),
11105            Some(s) => {
11106                let needs_quote = s.is_empty()
11107                    || s.eq_ignore_ascii_case("NULL")
11108                    || s.chars()
11109                        .any(|c| matches!(c, ',' | '{' | '}' | '"' | '\\' | ' ' | '\t'));
11110                if needs_quote {
11111                    out.push('"');
11112                    for c in s.chars() {
11113                        if c == '"' || c == '\\' {
11114                            out.push('\\');
11115                        }
11116                        out.push(c);
11117                    }
11118                    out.push('"');
11119                } else {
11120                    out.push_str(s);
11121                }
11122            }
11123        }
11124    }
11125    out.push('}');
11126    out
11127}
11128
11129/// v7.10.4 — encode BYTEA bytes in PG hex output format
11130/// (`\x` prefix, lowercase hex pairs). Used by Text-side
11131/// round-trip + the wire layer's text-mode encoder.
11132fn encode_bytea_hex(b: &[u8]) -> alloc::string::String {
11133    let mut out = alloc::string::String::with_capacity(2 + 2 * b.len());
11134    out.push_str("\\x");
11135    for byte in b {
11136        let hi = byte >> 4;
11137        let lo = byte & 0x0F;
11138        out.push(hex_digit(hi));
11139        out.push(hex_digit(lo));
11140    }
11141    out
11142}
11143
11144const fn hex_digit(n: u8) -> char {
11145    match n {
11146        0..=9 => (b'0' + n) as char,
11147        10..=15 => (b'a' + n - 10) as char,
11148        _ => '?',
11149    }
11150}
11151
11152const fn column_type_to_data_type(t: ColumnTypeName) -> DataType {
11153    match t {
11154        ColumnTypeName::SmallInt => DataType::SmallInt,
11155        ColumnTypeName::Int => DataType::Int,
11156        ColumnTypeName::BigInt => DataType::BigInt,
11157        ColumnTypeName::Float => DataType::Float,
11158        ColumnTypeName::Text => DataType::Text,
11159        ColumnTypeName::Varchar(n) => DataType::Varchar(n),
11160        ColumnTypeName::Char(n) => DataType::Char(n),
11161        ColumnTypeName::Bool => DataType::Bool,
11162        ColumnTypeName::Vector { dim, encoding } => DataType::Vector {
11163            dim,
11164            encoding: match encoding {
11165                SqlVecEncoding::F32 => VecEncoding::F32,
11166                SqlVecEncoding::Sq8 => VecEncoding::Sq8,
11167                SqlVecEncoding::F16 => VecEncoding::F16,
11168            },
11169        },
11170        ColumnTypeName::Numeric(precision, scale) => DataType::Numeric { precision, scale },
11171        ColumnTypeName::Date => DataType::Date,
11172        ColumnTypeName::Timestamp => DataType::Timestamp,
11173        ColumnTypeName::Timestamptz => DataType::Timestamptz,
11174        ColumnTypeName::Json => DataType::Json,
11175        ColumnTypeName::Jsonb => DataType::Jsonb,
11176        ColumnTypeName::Bytes => DataType::Bytes,
11177        ColumnTypeName::TextArray => DataType::TextArray,
11178        ColumnTypeName::IntArray => DataType::IntArray,
11179        ColumnTypeName::BigIntArray => DataType::BigIntArray,
11180        ColumnTypeName::TsVector => DataType::TsVector,
11181        ColumnTypeName::TsQuery => DataType::TsQuery,
11182    }
11183}
11184
11185/// Convert an INSERT VALUES expression to a storage Value. Supports literal
11186/// expressions, unary-minus over numeric literals, and pgvector-style
11187/// `'[..]'::vector` cast (v1.2). Anything more complex returns `Unsupported`.
11188fn literal_expr_to_value(expr: Expr) -> Result<Value, EngineError> {
11189    match expr {
11190        Expr::Literal(l) => Ok(literal_to_value(l)),
11191        Expr::Cast { expr, target } => {
11192            let inner_value = literal_expr_to_value(*expr)?;
11193            crate::eval::cast_value(inner_value, target).map_err(EngineError::Eval)
11194        }
11195        Expr::Unary {
11196            op: UnOp::Neg,
11197            expr,
11198        } => match *expr {
11199            Expr::Literal(Literal::Integer(n)) => {
11200                // Fold to i32 if it fits, else BigInt. Parser emits Integer(i64)
11201                // — overflow on negate of i64::MIN is the one edge case.
11202                let neg = n.checked_neg().ok_or_else(|| {
11203                    EngineError::Unsupported("integer literal overflow on negation".into())
11204                })?;
11205                Ok(int_value_for(neg))
11206            }
11207            Expr::Literal(Literal::Float(x)) => Ok(Value::Float(-x)),
11208            other => Err(EngineError::Unsupported(alloc::format!(
11209                "unary minus over non-literal expression: {other:?}"
11210            ))),
11211        },
11212        // v7.10.10 — `ARRAY[lit, lit, …]` constructor accepted at
11213        // INSERT-time. Each element must reduce to a Value through
11214        // `literal_expr_to_value`; NULL elements become `None`.
11215        // v7.11.13 — deduce shape from element values: all Int →
11216        // IntArray; any BigInt → BigIntArray (widening); any Text
11217        // → TextArray. Cast targets (`ARRAY[]::INT[]`) flow through
11218        // the outer Cast arm before reaching here and re-coerce.
11219        Expr::Array(items) => {
11220            let mut materialised: alloc::vec::Vec<Value> =
11221                alloc::vec::Vec::with_capacity(items.len());
11222            for elem in items {
11223                materialised.push(literal_expr_to_value(elem)?);
11224            }
11225            Ok(array_literal_widen(materialised))
11226        }
11227        other => Err(EngineError::Unsupported(alloc::format!(
11228            "non-literal INSERT value expression: {other:?}"
11229        ))),
11230    }
11231}
11232
11233fn literal_to_value(l: Literal) -> Value {
11234    match l {
11235        Literal::Integer(n) => int_value_for(n),
11236        Literal::Float(x) => Value::Float(x),
11237        Literal::String(s) => Value::Text(s),
11238        Literal::Bool(b) => Value::Bool(b),
11239        Literal::Null => Value::Null,
11240        Literal::Vector(v) => Value::Vector(v),
11241        Literal::Interval { months, micros, .. } => Value::Interval { months, micros },
11242    }
11243}
11244
11245/// Pick `Int` (`i32`) when the literal fits, else `BigInt`. `INT` vs `BIGINT`
11246/// columns will still enforce the right tag downstream — this is just the
11247/// default we synthesise from an unannotated integer literal.
11248fn int_value_for(n: i64) -> Value {
11249    if let Ok(small) = i32::try_from(n) {
11250        Value::Int(small)
11251    } else {
11252        Value::BigInt(n)
11253    }
11254}
11255
11256/// Widen / narrow `v` to fit `expected`. Numerics permit safe widening
11257/// (`Int → BigInt`, `Int/BigInt → Float`) and best-effort narrowing
11258/// (`BigInt → Int` succeeds only when the value fits in `i32`). Everything
11259/// else returns `TypeMismatch` carrying the column name for caller diagnostics.
11260/// `NULL` is always permitted; the nullability check happens later in storage.
11261#[allow(clippy::too_many_lines)]
11262fn coerce_value(
11263    v: Value,
11264    expected: DataType,
11265    col_name: &str,
11266    position: usize,
11267) -> Result<Value, EngineError> {
11268    if v.is_null() {
11269        return Ok(Value::Null);
11270    }
11271    let actual = v.data_type().expect("non-null");
11272    if actual == expected {
11273        return Ok(v);
11274    }
11275    let coerced = match (v, expected) {
11276        (Value::Int(n), DataType::BigInt) => Some(Value::BigInt(i64::from(n))),
11277        (Value::Int(n), DataType::Float) => Some(Value::Float(f64::from(n))),
11278        (Value::Int(n), DataType::SmallInt) => i16::try_from(n).ok().map(Value::SmallInt),
11279        (Value::Int(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
11280            i128::from(n),
11281            precision,
11282            scale,
11283            col_name,
11284        )?),
11285        (Value::SmallInt(n), DataType::Int) => Some(Value::Int(i32::from(n))),
11286        (Value::SmallInt(n), DataType::BigInt) => Some(Value::BigInt(i64::from(n))),
11287        (Value::SmallInt(n), DataType::Float) => Some(Value::Float(f64::from(n))),
11288        (Value::SmallInt(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
11289            i128::from(n),
11290            precision,
11291            scale,
11292            col_name,
11293        )?),
11294        (Value::BigInt(n), DataType::Int) => i32::try_from(n).ok().map(Value::Int),
11295        (Value::BigInt(n), DataType::SmallInt) => i16::try_from(n).ok().map(Value::SmallInt),
11296        #[allow(clippy::cast_precision_loss)]
11297        (Value::BigInt(n), DataType::Float) => Some(Value::Float(n as f64)),
11298        (Value::BigInt(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
11299            i128::from(n),
11300            precision,
11301            scale,
11302            col_name,
11303        )?),
11304        (Value::Float(x), DataType::Numeric { precision, scale }) => {
11305            Some(numeric_from_float(x, precision, scale, col_name)?)
11306        }
11307        // Text → DATE / TIMESTAMP: parse canonical text forms.
11308        (Value::Text(s), DataType::Date) => {
11309            let d = eval::parse_date_literal(&s).ok_or_else(|| {
11310                EngineError::Eval(EvalError::TypeMismatch {
11311                    detail: alloc::format!("cannot parse {s:?} as DATE for column `{col_name}`"),
11312                })
11313            })?;
11314            Some(Value::Date(d))
11315        }
11316        // v7.14.0 — MySQL DEFAULT clauses quote integer / float
11317        // / boolean literals (`DEFAULT '0'`, `DEFAULT '1'`,
11318        // `DEFAULT '3.14'`, `DEFAULT 'true'`). Coerce the text
11319        // form to the column's numeric / bool type at DEFAULT-
11320        // installation time so the storage check sees a typed
11321        // value. Parse failures fall through to TypeMismatch.
11322        (Value::Text(s), DataType::SmallInt) => s.parse::<i16>().ok().map(Value::SmallInt),
11323        (Value::Text(s), DataType::Int) => s.parse::<i32>().ok().map(Value::Int),
11324        (Value::Text(s), DataType::BigInt) => s.parse::<i64>().ok().map(Value::BigInt),
11325        (Value::Text(s), DataType::Float) => s.parse::<f64>().ok().map(Value::Float),
11326        (Value::Text(s), DataType::Bool) => match s.to_ascii_lowercase().as_str() {
11327            "0" | "false" | "f" | "no" | "off" => Some(Value::Bool(false)),
11328            "1" | "true" | "t" | "yes" | "on" => Some(Value::Bool(true)),
11329            _ => None,
11330        },
11331        // v4.9: Text ↔ JSON coercion. No structural validation —
11332        // any text literal is accepted; the responsibility for
11333        // valid JSON lies with the producer.
11334        (Value::Text(s), DataType::Json | DataType::Jsonb) => Some(Value::Json(s)),
11335        (Value::Json(s), DataType::Text) => Some(Value::Text(s)),
11336        // v7.13.3 — mailrs round-7 S10. SPG's storage represents
11337        // both JSON and JSONB on-disk as `Value::Json(String)` —
11338        // they share the underlying text payload. The cast
11339        // `'<text>'::jsonb` produces a Value::Json that needs to
11340        // satisfy a DataType::Jsonb column. Identity coerce in
11341        // both directions so JSON ↔ JSONB assignments work at all
11342        // INSERT / ALTER COLUMN TYPE / DEFAULT contexts.
11343        (Value::Json(s), DataType::Jsonb | DataType::Json) => Some(Value::Json(s)),
11344        // v7.10.4 — Text → BYTEA. Decode PG-style literal forms:
11345        //   - Hex:    `\x48656c6c6f`  (case-insensitive hex pairs)
11346        //   - Escape: `Hello\\000world`  (backslash + octal triples)
11347        //   - Plain:  any string → raw UTF-8 bytes (PG also accepts)
11348        // Errors surface as TypeMismatch so the operator gets a
11349        // clear "this literal isn't a bytea literal" hint.
11350        (Value::Text(s), DataType::Bytes) => {
11351            let bytes = decode_bytea_literal(&s).map_err(|e| {
11352                EngineError::Eval(EvalError::TypeMismatch {
11353                    detail: alloc::format!(
11354                        "cannot parse {s:?} as BYTEA for column `{col_name}`: {e}"
11355                    ),
11356                })
11357            })?;
11358            Some(Value::Bytes(bytes))
11359        }
11360        // v7.10.4 — BYTEA → Text round-trip uses the PG hex
11361        // output (lowercase, `\x` prefix). Important when a
11362        // SELECT pulls a bytea cell through a Text column path.
11363        (Value::Bytes(b), DataType::Text) => Some(Value::Text(encode_bytea_hex(&b))),
11364        // v7.10.11 — Text → TEXT[]. Decode PG's external array
11365        // form `'{a,b,NULL}'`. NULL element token (case-insensitive)
11366        // is the literal `NULL`; everything else is a quoted or
11367        // unquoted text element. mailrs `'{label1,label2}'::TEXT[]`.
11368        (Value::Text(s), DataType::TextArray) => {
11369            let arr = decode_text_array_literal(&s).map_err(|e| {
11370                EngineError::Eval(EvalError::TypeMismatch {
11371                    detail: alloc::format!(
11372                        "cannot parse {s:?} as TEXT[] for column `{col_name}`: {e}"
11373                    ),
11374                })
11375            })?;
11376            Some(Value::TextArray(arr))
11377        }
11378        // v7.10.11 — TEXT[] → Text round-trip uses PG's
11379        // external array form (`{a,b,NULL}`). Lets a SELECT
11380        // pull an array column through any Text-side codepath.
11381        (Value::TextArray(items), DataType::Text) => Some(Value::Text(encode_text_array(&items))),
11382        (Value::Text(s), DataType::Timestamp | DataType::Timestamptz) => {
11383            let t = eval::parse_timestamp_literal(&s).ok_or_else(|| {
11384                EngineError::Eval(EvalError::TypeMismatch {
11385                    detail: alloc::format!(
11386                        "cannot parse {s:?} as TIMESTAMP for column `{col_name}`"
11387                    ),
11388                })
11389            })?;
11390            Some(Value::Timestamp(t))
11391        }
11392        // DATE ↔ TIMESTAMP convertibility (DATE → midnight,
11393        // TIMESTAMP → day truncation).
11394        (Value::Date(d), DataType::Timestamp | DataType::Timestamptz) => {
11395            Some(Value::Timestamp(i64::from(d) * 86_400_000_000))
11396        }
11397        // v7.9.21 — Value::Timestamp lands in either Timestamp
11398        // or Timestamptz columns; the on-disk layout is the
11399        // same i64 microseconds UTC.
11400        (Value::Timestamp(t), DataType::Timestamptz) => Some(Value::Timestamp(t)),
11401        (Value::Timestamp(t), DataType::Date) => {
11402            let days = t.div_euclid(86_400_000_000);
11403            i32::try_from(days).ok().map(Value::Date)
11404        }
11405        (
11406            Value::Numeric {
11407                scaled,
11408                scale: src_scale,
11409            },
11410            DataType::Numeric { precision, scale },
11411        ) => Some(numeric_rescale(
11412            scaled, src_scale, precision, scale, col_name,
11413        )?),
11414        #[allow(clippy::cast_precision_loss)]
11415        (Value::Numeric { scaled, scale }, DataType::Float) => {
11416            let mut div = 1.0_f64;
11417            for _ in 0..scale {
11418                div *= 10.0;
11419            }
11420            Some(Value::Float((scaled as f64) / div))
11421        }
11422        (Value::Numeric { scaled, scale }, DataType::Int) => {
11423            let truncated = numeric_truncate_to_integer(scaled, scale);
11424            i32::try_from(truncated).ok().map(Value::Int)
11425        }
11426        (Value::Numeric { scaled, scale }, DataType::BigInt) => {
11427            let truncated = numeric_truncate_to_integer(scaled, scale);
11428            i64::try_from(truncated).ok().map(Value::BigInt)
11429        }
11430        (Value::Numeric { scaled, scale }, DataType::SmallInt) => {
11431            let truncated = numeric_truncate_to_integer(scaled, scale);
11432            i16::try_from(truncated).ok().map(Value::SmallInt)
11433        }
11434        // VARCHAR(n) enforces an upper bound on character count.
11435        (Value::Text(s), DataType::Varchar(max)) => {
11436            if u32::try_from(s.chars().count()).unwrap_or(u32::MAX) <= max {
11437                Some(Value::Text(s))
11438            } else {
11439                return Err(EngineError::Unsupported(alloc::format!(
11440                    "value for VARCHAR({max}) column `{col_name}` exceeds length: \
11441                     {} chars",
11442                    s.chars().count()
11443                )));
11444            }
11445        }
11446        // v6.0.1: f32 → SQ8 INSERT-time quantisation. Triggered
11447        // when the column declares `VECTOR(N) USING SQ8` and
11448        // the INSERT VALUES expression yields a raw f32 vector
11449        // (the normal pgvector-shape literal). Dim mismatch
11450        // falls through the `_ => None` arm and surfaces as
11451        // `TypeMismatch` with the expected SQ8 column type —
11452        // matching the F32 path's existing error.
11453        (
11454            Value::Vector(v),
11455            DataType::Vector {
11456                dim,
11457                encoding: VecEncoding::Sq8,
11458            },
11459        ) if v.len() == dim as usize => Some(Value::Sq8Vector(spg_storage::quantize::quantize(&v))),
11460        // v6.0.3: f32 → f16 INSERT-time conversion for HALF
11461        // columns. Bit-exact at the storage layer (modulo
11462        // half-precision rounding); no rerank pass needed at
11463        // search time.
11464        (
11465            Value::Vector(v),
11466            DataType::Vector {
11467                dim,
11468                encoding: VecEncoding::F16,
11469            },
11470        ) if v.len() == dim as usize => Some(Value::HalfVector(
11471            spg_storage::halfvec::HalfVector::from_f32_slice(&v),
11472        )),
11473        // CHAR(n) right-pads with U+0020 to exactly n chars; if the input
11474        // is already longer we reject (PG truncates trailing-space-only;
11475        // staying strict for v1).
11476        (Value::Text(s), DataType::Char(size)) => {
11477            let len = u32::try_from(s.chars().count()).unwrap_or(u32::MAX);
11478            if len > size {
11479                return Err(EngineError::Unsupported(alloc::format!(
11480                    "value for CHAR({size}) column `{col_name}` exceeds length: \
11481                     {len} chars"
11482                )));
11483            }
11484            let need = (size - len) as usize;
11485            let mut padded = s;
11486            padded.reserve(need);
11487            for _ in 0..need {
11488                padded.push(' ');
11489            }
11490            Some(Value::Text(padded))
11491        }
11492        _ => None,
11493    };
11494    coerced.ok_or(EngineError::Storage(StorageError::TypeMismatch {
11495        column: col_name.into(),
11496        expected,
11497        actual,
11498        position,
11499    }))
11500}
11501
11502/// v7.12.4 — render a function arg list into the
11503/// canonical form the storage layer caches as
11504/// [`spg_storage::FunctionDef::args_repr`]. The catalogue uses
11505/// this string for both display + as a coarse signature key
11506/// for the (deferred) overload resolution v7.12.5+ adds.
11507fn render_function_args(args: &[spg_sql::ast::FunctionArg]) -> alloc::string::String {
11508    use core::fmt::Write;
11509    let mut out = alloc::string::String::from("(");
11510    for (i, a) in args.iter().enumerate() {
11511        if i > 0 {
11512            out.push_str(", ");
11513        }
11514        match a.mode {
11515            spg_sql::ast::FunctionArgMode::In => {}
11516            spg_sql::ast::FunctionArgMode::Out => out.push_str("OUT "),
11517            spg_sql::ast::FunctionArgMode::InOut => out.push_str("INOUT "),
11518        }
11519        if let Some(n) = &a.name {
11520            out.push_str(n);
11521            out.push(' ');
11522        }
11523        match &a.ty {
11524            spg_sql::ast::FunctionArgType::Typed(t) => {
11525                let _ = write!(out, "{t}");
11526            }
11527            spg_sql::ast::FunctionArgType::Raw(s) => out.push_str(s),
11528        }
11529    }
11530    out.push(')');
11531    out
11532}
11533
11534#[cfg(test)]
11535mod tests {
11536    use super::*;
11537    use alloc::vec;
11538
11539    fn unwrap_command_ok(r: &QueryResult) -> usize {
11540        match r {
11541            QueryResult::CommandOk { affected, .. } => *affected,
11542            QueryResult::Rows { .. } => panic!("expected CommandOk, got Rows"),
11543        }
11544    }
11545
11546    #[test]
11547    fn create_table_registers_schema() {
11548        let mut e = Engine::new();
11549        e.execute("CREATE TABLE foo (a INT NOT NULL, b TEXT)")
11550            .unwrap();
11551        assert_eq!(e.catalog().table_count(), 1);
11552        let t = e.catalog().get("foo").unwrap();
11553        assert_eq!(t.schema().columns.len(), 2);
11554        assert_eq!(t.schema().columns[0].ty, DataType::Int);
11555        assert!(!t.schema().columns[0].nullable);
11556        assert_eq!(t.schema().columns[1].ty, DataType::Text);
11557    }
11558
11559    #[test]
11560    fn create_table_vector_default_is_f32_encoded() {
11561        let mut e = Engine::new();
11562        e.execute("CREATE TABLE t (v VECTOR(8))").unwrap();
11563        let t = e.catalog().get("t").unwrap();
11564        assert_eq!(
11565            t.schema().columns[0].ty,
11566            DataType::Vector {
11567                dim: 8,
11568                encoding: VecEncoding::F32,
11569            },
11570        );
11571    }
11572
11573    #[test]
11574    fn create_table_vector_using_sq8_succeeds() {
11575        // v6.0.1 step 3: the step-1 fence in `column_def_to_schema`
11576        // is lifted. CREATE TABLE persists an SQ8 column type in
11577        // the catalog; INSERT (next test) quantises raw f32 input.
11578        let mut e = Engine::new();
11579        e.execute("CREATE TABLE t (v VECTOR(8) USING SQ8)").unwrap();
11580        let t = e.catalog().get("t").unwrap();
11581        assert_eq!(
11582            t.schema().columns[0].ty,
11583            DataType::Vector {
11584                dim: 8,
11585                encoding: VecEncoding::Sq8,
11586            },
11587        );
11588    }
11589
11590    #[test]
11591    fn insert_into_sq8_column_quantises_f32_payload() {
11592        // v6.0.1 step 3: INSERT-time `coerce_value` rewrites a raw
11593        // `Value::Vector(Vec<f32>)` literal into the column's
11594        // quantised representation. The row that lands in the
11595        // catalog must therefore hold a `Value::Sq8Vector`, not the
11596        // original f32 buffer — that's the bit that delivers the
11597        // 4× compression target.
11598        let mut e = Engine::new();
11599        e.execute("CREATE TABLE t (v VECTOR(4) USING SQ8)").unwrap();
11600        e.execute("INSERT INTO t VALUES ([0.0, 0.25, 0.5, 1.0])")
11601            .unwrap();
11602        let t = e.catalog().get("t").unwrap();
11603        assert_eq!(t.rows().len(), 1);
11604        match &t.rows()[0].values[0] {
11605            Value::Sq8Vector(q) => {
11606                assert_eq!(q.bytes.len(), 4);
11607                // min/max are derived from the payload: min=0.0, max=1.0.
11608                assert!((q.min - 0.0).abs() < 1e-6);
11609                assert!((q.max - 1.0).abs() < 1e-6);
11610            }
11611            other => panic!("expected Sq8Vector cell, got {other:?}"),
11612        }
11613    }
11614
11615    #[test]
11616    fn create_table_vector_using_half_succeeds_and_insert_converts_to_f16() {
11617        // v6.0.3: CREATE TABLE accepts USING HALF; INSERT path
11618        // converts the incoming `Value::Vector(Vec<f32>)` cell
11619        // into `Value::HalfVector(HalfVector)` via the new
11620        // `coerce_value` arm. The dequantised round-trip is
11621        // bit-exact for f16-representable values, so 0.0 / 0.25
11622        // / 0.5 / 1.0 hit their grid points exactly.
11623        let mut e = Engine::new();
11624        e.execute("CREATE TABLE t (v VECTOR(4) USING HALF)")
11625            .unwrap();
11626        e.execute("INSERT INTO t VALUES ([0.0, 0.25, 0.5, 1.0])")
11627            .unwrap();
11628        let t = e.catalog().get("t").unwrap();
11629        assert_eq!(t.rows().len(), 1);
11630        match &t.rows()[0].values[0] {
11631            Value::HalfVector(h) => {
11632                assert_eq!(h.dim(), 4);
11633                let back = h.to_f32_vec();
11634                let expected = alloc::vec![0.0_f32, 0.25, 0.5, 1.0];
11635                for (g, e) in back.iter().zip(expected.iter()) {
11636                    assert!(
11637                        (g - e).abs() < 1e-6,
11638                        "{g} vs {e} should be exact on f16 grid"
11639                    );
11640                }
11641            }
11642            other => panic!("expected HalfVector cell, got {other:?}"),
11643        }
11644    }
11645
11646    #[test]
11647    fn alter_index_rebuild_in_place_succeeds() {
11648        // v6.0.4: bare REBUILD (no encoding switch) walks every
11649        // row again to rebuild the NSW graph. Verifies the engine
11650        // dispatch + storage helper plumbing without changing any
11651        // cell encoding.
11652        let mut e = Engine::new();
11653        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(3) NOT NULL)")
11654            .unwrap();
11655        for i in 0..8_i32 {
11656            #[allow(clippy::cast_precision_loss)]
11657            let base = (i as f32) * 0.1;
11658            e.execute(&alloc::format!(
11659                "INSERT INTO t VALUES ({i}, [{base}, {b1}, {b2}])",
11660                b1 = base + 0.01,
11661                b2 = base + 0.02,
11662            ))
11663            .unwrap();
11664        }
11665        e.execute("CREATE INDEX t_idx ON t USING hnsw (v)").unwrap();
11666        e.execute("ALTER INDEX t_idx REBUILD").unwrap();
11667        // Schema encoding stays F32 (no encoding clause).
11668        assert_eq!(
11669            e.catalog().get("t").unwrap().schema().columns[1].ty,
11670            DataType::Vector {
11671                dim: 3,
11672                encoding: VecEncoding::F32,
11673            },
11674        );
11675    }
11676
11677    #[test]
11678    fn alter_index_rebuild_with_encoding_switches_cell_type() {
11679        // v6.0.4: REBUILD WITH (encoding = SQ8) recodes every
11680        // stored cell from F32 → SQ8 + rebuilds the graph atop the
11681        // new encoding. Post-rebuild, cells must be Sq8Vector and
11682        // the schema must report encoding = Sq8.
11683        let mut e = Engine::new();
11684        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(4) NOT NULL)")
11685            .unwrap();
11686        e.execute("INSERT INTO t VALUES (1, [0.0, 0.25, 0.5, 1.0])")
11687            .unwrap();
11688        e.execute("CREATE INDEX t_idx ON t USING hnsw (v)").unwrap();
11689        e.execute("ALTER INDEX t_idx REBUILD WITH (encoding = SQ8)")
11690            .unwrap();
11691        let t = e.catalog().get("t").unwrap();
11692        assert_eq!(
11693            t.schema().columns[1].ty,
11694            DataType::Vector {
11695                dim: 4,
11696                encoding: VecEncoding::Sq8,
11697            },
11698        );
11699        assert!(matches!(t.rows()[0].values[1], Value::Sq8Vector(_)));
11700    }
11701
11702    #[test]
11703    fn alter_index_rebuild_unknown_index_errors() {
11704        let mut e = Engine::new();
11705        let err = e.execute("ALTER INDEX nope REBUILD").unwrap_err();
11706        assert!(
11707            matches!(
11708                &err,
11709                EngineError::Storage(StorageError::IndexNotFound { name }) if name == "nope"
11710            ),
11711            "got: {err}"
11712        );
11713    }
11714
11715    #[test]
11716    fn alter_index_rebuild_on_btree_index_errors() {
11717        // REBUILD on a B-tree index has no semantic meaning in
11718        // v6.0.4 — rejected at the storage layer with `Unsupported`.
11719        let mut e = Engine::new();
11720        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
11721        e.execute("INSERT INTO t VALUES (1)").unwrap();
11722        e.execute("CREATE INDEX t_idx ON t (id)").unwrap();
11723        let err = e.execute("ALTER INDEX t_idx REBUILD").unwrap_err();
11724        assert!(
11725            matches!(&err, EngineError::Storage(StorageError::Unsupported(_))),
11726            "got: {err}"
11727        );
11728    }
11729
11730    #[test]
11731    fn prepared_insert_substitutes_placeholders() {
11732        // v6.1.1: prepare() parses once; execute_prepared() walks the
11733        // AST and replaces $1/$2 with the param Values BEFORE the
11734        // dispatch sees them. Same logical result as a simple-query
11735        // INSERT, but parse happens once per *statement*, not per
11736        // execution.
11737        let mut e = Engine::new();
11738        e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT NOT NULL)")
11739            .unwrap();
11740        let stmt = e.prepare("INSERT INTO t VALUES ($1, $2)").unwrap();
11741        for (id, name) in [(1, "alice"), (2, "bob"), (3, "carol")] {
11742            e.execute_prepared(stmt.clone(), &[Value::Int(id), Value::Text(name.into())])
11743                .unwrap();
11744        }
11745        // Read back via simple-query SELECT.
11746        let rows_result = e.execute("SELECT id, name FROM t").unwrap();
11747        let QueryResult::Rows { rows, .. } = rows_result else {
11748            panic!("expected Rows")
11749        };
11750        assert_eq!(rows.len(), 3);
11751    }
11752
11753    #[test]
11754    fn prepared_select_with_placeholder_filters_rows() {
11755        let mut e = Engine::new();
11756        e.execute("CREATE TABLE t (id INT NOT NULL, v INT NOT NULL)")
11757            .unwrap();
11758        for i in 0..10_i32 {
11759            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, {})", i * 7))
11760                .unwrap();
11761        }
11762        let stmt = e.prepare("SELECT id FROM t WHERE v = $1").unwrap();
11763        let QueryResult::Rows { rows, .. } = e.execute_prepared(stmt, &[Value::Int(35)]).unwrap()
11764        else {
11765            panic!("expected Rows")
11766        };
11767        // v = 35 means i*7 = 35 → i = 5.
11768        assert_eq!(rows.len(), 1);
11769        assert_eq!(rows[0].values[0], Value::Int(5));
11770    }
11771
11772    #[test]
11773    fn prepared_too_few_params_errors() {
11774        let mut e = Engine::new();
11775        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
11776        let stmt = e.prepare("INSERT INTO t VALUES ($1)").unwrap();
11777        let err = e.execute_prepared(stmt, &[]).unwrap_err();
11778        assert!(
11779            matches!(
11780                &err,
11781                EngineError::Eval(EvalError::PlaceholderOutOfRange { n: 1, bound: 0 })
11782            ),
11783            "got: {err}"
11784        );
11785    }
11786
11787    #[test]
11788    fn insert_into_half_column_dim_mismatch_errors() {
11789        let mut e = Engine::new();
11790        e.execute("CREATE TABLE t (v VECTOR(4) USING HALF)")
11791            .unwrap();
11792        let err = e.execute("INSERT INTO t VALUES ([1.0, 2.0])").unwrap_err();
11793        assert!(matches!(
11794            &err,
11795            EngineError::Storage(StorageError::TypeMismatch { .. })
11796        ));
11797    }
11798
11799    #[test]
11800    fn insert_into_sq8_column_dim_mismatch_errors() {
11801        // Dim mismatch falls through the `coerce_value` Vector→Sq8
11802        // arm's guard and surfaces as `TypeMismatch` — the same
11803        // error the F32 path produces today, so client error
11804        // handling stays uniform across encodings.
11805        let mut e = Engine::new();
11806        e.execute("CREATE TABLE t (v VECTOR(4) USING SQ8)").unwrap();
11807        let err = e.execute("INSERT INTO t VALUES ([1.0, 2.0])").unwrap_err();
11808        assert!(
11809            matches!(
11810                &err,
11811                EngineError::Storage(StorageError::TypeMismatch { .. })
11812            ),
11813            "got: {err}",
11814        );
11815    }
11816
11817    #[test]
11818    fn create_table_duplicate_errors() {
11819        let mut e = Engine::new();
11820        e.execute("CREATE TABLE foo (a INT)").unwrap();
11821        let err = e.execute("CREATE TABLE foo (a INT)").unwrap_err();
11822        assert!(matches!(
11823            err,
11824            EngineError::Storage(StorageError::DuplicateTable { ref name }) if name == "foo"
11825        ));
11826    }
11827
11828    #[test]
11829    fn insert_into_unknown_table_errors() {
11830        let mut e = Engine::new();
11831        let err = e.execute("INSERT INTO ghost VALUES (1)").unwrap_err();
11832        assert!(matches!(
11833            err,
11834            EngineError::Storage(StorageError::TableNotFound { ref name }) if name == "ghost"
11835        ));
11836    }
11837
11838    #[test]
11839    fn insert_happy_path_reports_one_affected() {
11840        let mut e = Engine::new();
11841        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
11842        let r = e.execute("INSERT INTO foo VALUES (42)").unwrap();
11843        assert_eq!(unwrap_command_ok(&r), 1);
11844        assert_eq!(e.catalog().get("foo").unwrap().row_count(), 1);
11845    }
11846
11847    #[test]
11848    fn insert_arity_mismatch_propagates() {
11849        let mut e = Engine::new();
11850        e.execute("CREATE TABLE foo (a INT, b TEXT)").unwrap();
11851        let err = e.execute("INSERT INTO foo VALUES (1)").unwrap_err();
11852        assert!(matches!(
11853            err,
11854            EngineError::Storage(StorageError::ArityMismatch { .. })
11855        ));
11856    }
11857
11858    #[test]
11859    fn insert_negative_integer_via_unary_minus() {
11860        let mut e = Engine::new();
11861        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
11862        e.execute("INSERT INTO foo VALUES (-7)").unwrap();
11863        let rows = e.catalog().get("foo").unwrap().rows();
11864        assert_eq!(rows[0].values[0], Value::Int(-7));
11865    }
11866
11867    #[test]
11868    fn insert_non_literal_expr_unsupported() {
11869        let mut e = Engine::new();
11870        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
11871        let err = e.execute("INSERT INTO foo VALUES (1 + 2)").unwrap_err();
11872        assert!(matches!(err, EngineError::Unsupported(_)));
11873    }
11874
11875    #[test]
11876    fn select_star_returns_all_rows_in_insertion_order() {
11877        let mut e = Engine::new();
11878        e.execute("CREATE TABLE foo (a INT NOT NULL, b TEXT NOT NULL)")
11879            .unwrap();
11880        e.execute("INSERT INTO foo VALUES (1, 'one')").unwrap();
11881        e.execute("INSERT INTO foo VALUES (2, 'two')").unwrap();
11882        e.execute("INSERT INTO foo VALUES (3, 'three')").unwrap();
11883
11884        let r = e.execute("SELECT * FROM foo").unwrap();
11885        let QueryResult::Rows { columns, rows } = r else {
11886            panic!("expected Rows")
11887        };
11888        assert_eq!(columns.len(), 2);
11889        assert_eq!(columns[0].name, "a");
11890        assert_eq!(rows.len(), 3);
11891        assert_eq!(
11892            rows[1].values,
11893            vec![Value::Int(2), Value::Text("two".into())]
11894        );
11895    }
11896
11897    #[test]
11898    fn select_star_on_empty_table_returns_zero_rows() {
11899        let mut e = Engine::new();
11900        e.execute("CREATE TABLE foo (a INT)").unwrap();
11901        let r = e.execute("SELECT * FROM foo").unwrap();
11902        match r {
11903            QueryResult::Rows { rows, .. } => assert!(rows.is_empty()),
11904            QueryResult::CommandOk { .. } => panic!("expected Rows"),
11905        }
11906    }
11907
11908    // --- v0.4: WHERE + projection ------------------------------------------
11909
11910    fn make_three_row_users(e: &mut Engine) {
11911        e.execute("CREATE TABLE users (id INT NOT NULL, name TEXT NOT NULL, score INT)")
11912            .unwrap();
11913        e.execute("INSERT INTO users VALUES (1, 'alice', 90)")
11914            .unwrap();
11915        e.execute("INSERT INTO users VALUES (2, 'bob', NULL)")
11916            .unwrap();
11917        e.execute("INSERT INTO users VALUES (3, 'cara', 70)")
11918            .unwrap();
11919    }
11920
11921    fn unwrap_rows(r: QueryResult) -> (Vec<ColumnSchema>, Vec<Row>) {
11922        match r {
11923            QueryResult::Rows { columns, rows } => (columns, rows),
11924            QueryResult::CommandOk { .. } => panic!("expected Rows"),
11925        }
11926    }
11927
11928    #[test]
11929    fn where_filter_passes_only_true_rows() {
11930        let mut e = Engine::new();
11931        make_three_row_users(&mut e);
11932        let r = e.execute("SELECT * FROM users WHERE id > 1").unwrap();
11933        let (_, rows) = unwrap_rows(r);
11934        assert_eq!(rows.len(), 2);
11935        assert_eq!(rows[0].values[0], Value::Int(2));
11936        assert_eq!(rows[1].values[0], Value::Int(3));
11937    }
11938
11939    #[test]
11940    fn where_with_null_result_filters_out_row() {
11941        let mut e = Engine::new();
11942        make_three_row_users(&mut e);
11943        // score is NULL for bob → score > 80 is NULL → row excluded
11944        let r = e.execute("SELECT * FROM users WHERE score > 80").unwrap();
11945        let (_, rows) = unwrap_rows(r);
11946        assert_eq!(rows.len(), 1);
11947        assert_eq!(rows[0].values[1], Value::Text("alice".into()));
11948    }
11949
11950    #[test]
11951    fn projection_named_columns() {
11952        let mut e = Engine::new();
11953        make_three_row_users(&mut e);
11954        let r = e.execute("SELECT name, score FROM users").unwrap();
11955        let (cols, rows) = unwrap_rows(r);
11956        assert_eq!(cols.len(), 2);
11957        assert_eq!(cols[0].name, "name");
11958        assert_eq!(cols[1].name, "score");
11959        assert_eq!(rows.len(), 3);
11960        assert_eq!(
11961            rows[0].values,
11962            vec![Value::Text("alice".into()), Value::Int(90)]
11963        );
11964    }
11965
11966    #[test]
11967    fn projection_with_column_alias() {
11968        let mut e = Engine::new();
11969        make_three_row_users(&mut e);
11970        let r = e
11971            .execute("SELECT name AS who FROM users WHERE id = 1")
11972            .unwrap();
11973        let (cols, rows) = unwrap_rows(r);
11974        assert_eq!(cols[0].name, "who");
11975        assert_eq!(rows.len(), 1);
11976        assert_eq!(rows[0].values[0], Value::Text("alice".into()));
11977    }
11978
11979    #[test]
11980    fn qualified_column_with_table_alias_resolves() {
11981        let mut e = Engine::new();
11982        make_three_row_users(&mut e);
11983        let r = e
11984            .execute("SELECT u.id, u.name FROM users AS u WHERE u.id < 3")
11985            .unwrap();
11986        let (cols, rows) = unwrap_rows(r);
11987        assert_eq!(cols.len(), 2);
11988        assert_eq!(rows.len(), 2);
11989    }
11990
11991    #[test]
11992    fn qualified_column_with_wrong_alias_errors() {
11993        let mut e = Engine::new();
11994        make_three_row_users(&mut e);
11995        let err = e.execute("SELECT x.id FROM users AS u").unwrap_err();
11996        assert!(matches!(
11997            err,
11998            EngineError::Eval(EvalError::UnknownQualifier { ref qualifier }) if qualifier == "x"
11999        ));
12000    }
12001
12002    #[test]
12003    fn select_unknown_column_errors_in_projection() {
12004        let mut e = Engine::new();
12005        make_three_row_users(&mut e);
12006        let err = e.execute("SELECT ghost FROM users").unwrap_err();
12007        assert!(matches!(
12008            err,
12009            EngineError::Eval(EvalError::ColumnNotFound { ref name }) if name == "ghost"
12010        ));
12011    }
12012
12013    #[test]
12014    fn where_unknown_column_errors() {
12015        let mut e = Engine::new();
12016        make_three_row_users(&mut e);
12017        let err = e
12018            .execute("SELECT * FROM users WHERE ghost = 1")
12019            .unwrap_err();
12020        assert!(matches!(
12021            err,
12022            EngineError::Eval(EvalError::ColumnNotFound { .. })
12023        ));
12024    }
12025
12026    #[test]
12027    fn expression_projection_evaluates_and_renders() {
12028        // Compound expressions in the SELECT list are evaluated per row;
12029        // the output column is typed TEXT, name defaults to the expression.
12030        let mut e = Engine::new();
12031        e.execute("CREATE TABLE t (a INT NOT NULL)").unwrap();
12032        e.execute("INSERT INTO t VALUES (3)").unwrap();
12033        let (_, rows) = unwrap_rows(e.execute("SELECT 1 + 2 FROM t").unwrap());
12034        assert_eq!(rows.len(), 1);
12035        // The expression evaluates to integer 3; rendered as the cell value
12036        // (storage::Value::Int(3) since arithmetic kept ints).
12037        assert_eq!(rows[0].values[0], Value::Int(3));
12038    }
12039
12040    #[test]
12041    fn select_unknown_table_errors() {
12042        let mut e = Engine::new();
12043        let err = e.execute("SELECT * FROM ghost").unwrap_err();
12044        assert!(matches!(
12045            err,
12046            EngineError::Storage(StorageError::TableNotFound { .. })
12047        ));
12048    }
12049
12050    #[test]
12051    fn invalid_sql_returns_parse_error() {
12052        // v4.4: UPDATE is now real SQL, so use a true syntactic
12053        // garbage payload for the parse-error path.
12054        let mut e = Engine::new();
12055        let err = e.execute("THIS_IS_NOT_A_KEYWORD foo bar baz").unwrap_err();
12056        assert!(matches!(err, EngineError::Parse(_)));
12057    }
12058
12059    // --- v0.8 CREATE INDEX + index seek ------------------------------------
12060
12061    #[test]
12062    fn create_index_registers_on_table() {
12063        let mut e = Engine::new();
12064        make_three_row_users(&mut e);
12065        e.execute("CREATE INDEX by_name ON users (name)").unwrap();
12066        let t = e.catalog().get("users").unwrap();
12067        assert_eq!(t.indices().len(), 1);
12068        assert_eq!(t.indices()[0].name, "by_name");
12069    }
12070
12071    #[test]
12072    fn create_index_on_unknown_table_errors() {
12073        let mut e = Engine::new();
12074        let err = e.execute("CREATE INDEX i ON ghost (a)").unwrap_err();
12075        assert!(matches!(
12076            err,
12077            EngineError::Storage(StorageError::TableNotFound { .. })
12078        ));
12079    }
12080
12081    #[test]
12082    fn create_index_on_unknown_column_errors() {
12083        let mut e = Engine::new();
12084        make_three_row_users(&mut e);
12085        let err = e.execute("CREATE INDEX i ON users (ghost)").unwrap_err();
12086        assert!(matches!(
12087            err,
12088            EngineError::Storage(StorageError::ColumnNotFound { .. })
12089        ));
12090    }
12091
12092    #[test]
12093    fn select_eq_uses_index_returns_same_rows_as_scan() {
12094        // Build two engines: one with an index, one without. Same query →
12095        // same row set (index is a planner optimisation, not a semantic
12096        // change).
12097        let mut without = Engine::new();
12098        make_three_row_users(&mut without);
12099        let mut with = Engine::new();
12100        make_three_row_users(&mut with);
12101        with.execute("CREATE INDEX by_id ON users (id)").unwrap();
12102
12103        let q = "SELECT * FROM users WHERE id = 2";
12104        let (_, no_idx_rows) = unwrap_rows(without.execute(q).unwrap());
12105        let (_, idx_rows) = unwrap_rows(with.execute(q).unwrap());
12106        assert_eq!(no_idx_rows, idx_rows);
12107        assert_eq!(idx_rows.len(), 1);
12108    }
12109
12110    #[test]
12111    fn select_eq_with_no_matching_index_value_returns_empty() {
12112        let mut e = Engine::new();
12113        make_three_row_users(&mut e);
12114        e.execute("CREATE INDEX by_id ON users (id)").unwrap();
12115        let (_, rows) = unwrap_rows(e.execute("SELECT * FROM users WHERE id = 999").unwrap());
12116        assert_eq!(rows.len(), 0);
12117    }
12118
12119    // --- v0.9 transactions -------------------------------------------------
12120
12121    #[test]
12122    fn begin_sets_in_transaction_flag() {
12123        let mut e = Engine::new();
12124        assert!(!e.in_transaction());
12125        e.execute("BEGIN").unwrap();
12126        assert!(e.in_transaction());
12127    }
12128
12129    #[test]
12130    fn double_begin_errors() {
12131        let mut e = Engine::new();
12132        e.execute("BEGIN").unwrap();
12133        let err = e.execute("BEGIN").unwrap_err();
12134        assert_eq!(err, EngineError::TransactionAlreadyOpen);
12135    }
12136
12137    #[test]
12138    fn commit_without_begin_errors() {
12139        let mut e = Engine::new();
12140        let err = e.execute("COMMIT").unwrap_err();
12141        assert_eq!(err, EngineError::NoActiveTransaction);
12142    }
12143
12144    #[test]
12145    fn rollback_without_begin_errors() {
12146        let mut e = Engine::new();
12147        let err = e.execute("ROLLBACK").unwrap_err();
12148        assert_eq!(err, EngineError::NoActiveTransaction);
12149    }
12150
12151    #[test]
12152    fn commit_applies_shadow_to_committed_catalog() {
12153        let mut e = Engine::new();
12154        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
12155        e.execute("BEGIN").unwrap();
12156        e.execute("INSERT INTO t VALUES (1)").unwrap();
12157        e.execute("INSERT INTO t VALUES (2)").unwrap();
12158        e.execute("COMMIT").unwrap();
12159        assert!(!e.in_transaction());
12160        assert_eq!(e.catalog().get("t").unwrap().row_count(), 2);
12161    }
12162
12163    #[test]
12164    fn rollback_discards_shadow() {
12165        let mut e = Engine::new();
12166        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
12167        e.execute("BEGIN").unwrap();
12168        e.execute("INSERT INTO t VALUES (1)").unwrap();
12169        e.execute("INSERT INTO t VALUES (2)").unwrap();
12170        e.execute("ROLLBACK").unwrap();
12171        assert!(!e.in_transaction());
12172        assert_eq!(e.catalog().get("t").unwrap().row_count(), 0);
12173    }
12174
12175    #[test]
12176    fn select_during_tx_sees_uncommitted_writes_own_session() {
12177        // The shadow catalog is read by SELECTs while a TX is open — the
12178        // session can see its own pending writes.
12179        let mut e = Engine::new();
12180        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
12181        e.execute("BEGIN").unwrap();
12182        e.execute("INSERT INTO t VALUES (42)").unwrap();
12183        let (_, rows) = unwrap_rows(e.execute("SELECT * FROM t").unwrap());
12184        assert_eq!(rows.len(), 1);
12185        assert_eq!(rows[0].values[0], Value::Int(42));
12186    }
12187
12188    #[test]
12189    fn snapshot_with_no_users_is_bare_catalog_format() {
12190        let mut e = Engine::new();
12191        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12192        let bytes = e.snapshot();
12193        assert_eq!(
12194            &bytes[..8],
12195            b"SPGDB001",
12196            "must be the bare v3.x catalog magic"
12197        );
12198        let e2 = Engine::restore_envelope(&bytes).unwrap();
12199        assert!(e2.users().is_empty());
12200        assert_eq!(e2.catalog().table_count(), 1);
12201    }
12202
12203    #[test]
12204    fn snapshot_with_users_round_trips_both_via_envelope() {
12205        let mut e = Engine::new();
12206        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12207        e.create_user("alice", "pw1", Role::Admin, [9; 16]).unwrap();
12208        e.create_user("bob", "pw2", Role::ReadOnly, [5; 16])
12209            .unwrap();
12210        let bytes = e.snapshot();
12211        assert_eq!(&bytes[..8], b"SPGENV01", "must be the v4.1 envelope magic");
12212        let e2 = Engine::restore_envelope(&bytes).unwrap();
12213        assert_eq!(e2.users().len(), 2);
12214        assert_eq!(e2.verify_user("alice", "pw1"), Some(Role::Admin));
12215        assert_eq!(e2.verify_user("bob", "pw2"), Some(Role::ReadOnly));
12216        assert_eq!(e2.verify_user("alice", "wrong"), None);
12217        assert_eq!(e2.catalog().table_count(), 1);
12218    }
12219
12220    #[test]
12221    fn ddl_inside_tx_also_rolled_back() {
12222        let mut e = Engine::new();
12223        e.execute("BEGIN").unwrap();
12224        e.execute("CREATE TABLE t (v INT)").unwrap();
12225        // Visible inside the TX.
12226        e.execute("SELECT * FROM t").unwrap();
12227        e.execute("ROLLBACK").unwrap();
12228        // Gone after rollback.
12229        let err = e.execute("SELECT * FROM t").unwrap_err();
12230        assert!(matches!(
12231            err,
12232            EngineError::Storage(StorageError::TableNotFound { .. })
12233        ));
12234    }
12235
12236    // ── v6.1.2: CREATE / DROP PUBLICATION (engine-side) ──────
12237
12238    #[test]
12239    fn create_publication_lands_in_catalog() {
12240        let mut e = Engine::new();
12241        assert!(e.publications().is_empty());
12242        e.execute("CREATE PUBLICATION pub_a").unwrap();
12243        assert_eq!(e.publications().len(), 1);
12244        assert!(e.publications().contains("pub_a"));
12245    }
12246
12247    #[test]
12248    fn create_publication_duplicate_errors() {
12249        let mut e = Engine::new();
12250        e.execute("CREATE PUBLICATION pub_a").unwrap();
12251        let err = e.execute("CREATE PUBLICATION pub_a").unwrap_err();
12252        assert!(
12253            alloc::format!("{err:?}").contains("DuplicateName"),
12254            "got {err:?}"
12255        );
12256    }
12257
12258    #[test]
12259    fn drop_publication_silent_when_absent() {
12260        let mut e = Engine::new();
12261        // PG-compatible: DROP a publication that doesn't exist
12262        // succeeds (no-op) but reports zero affected.
12263        let r = e.execute("DROP PUBLICATION nope").unwrap();
12264        match r {
12265            QueryResult::CommandOk { affected, .. } => assert_eq!(affected, 0),
12266            other => panic!("expected CommandOk, got {other:?}"),
12267        }
12268    }
12269
12270    #[test]
12271    fn drop_publication_present_reports_one_affected() {
12272        let mut e = Engine::new();
12273        e.execute("CREATE PUBLICATION pub_a").unwrap();
12274        let r = e.execute("DROP PUBLICATION pub_a").unwrap();
12275        match r {
12276            QueryResult::CommandOk {
12277                affected,
12278                modified_catalog,
12279            } => {
12280                assert_eq!(affected, 1);
12281                assert!(modified_catalog);
12282            }
12283            other => panic!("expected CommandOk, got {other:?}"),
12284        }
12285        assert!(e.publications().is_empty());
12286    }
12287
12288    #[test]
12289    fn publications_persist_across_snapshot_restore() {
12290        // The persist-across-restart ship-gate at the engine layer —
12291        // snapshot → restore_envelope round trip must preserve the
12292        // publication catalog. The spg-server e2e covers the
12293        // process-restart variant.
12294        let mut e = Engine::new();
12295        e.execute("CREATE PUBLICATION pub_a").unwrap();
12296        e.execute("CREATE PUBLICATION pub_b FOR ALL TABLES")
12297            .unwrap();
12298        let snap = e.snapshot();
12299        let e2 = Engine::restore_envelope(&snap).unwrap();
12300        assert_eq!(e2.publications().len(), 2);
12301        assert!(e2.publications().contains("pub_a"));
12302        assert!(e2.publications().contains("pub_b"));
12303    }
12304
12305    #[test]
12306    fn create_publication_allowed_inside_transaction() {
12307        // v6.1.4 dropped the v6.1.2 in-TX guard — PG allows
12308        // CREATE PUBLICATION inside a TX and the auto-commit
12309        // wrap path needs the same allowance.
12310        let mut e = Engine::new();
12311        e.execute("BEGIN").unwrap();
12312        e.execute("CREATE PUBLICATION pub_a").unwrap();
12313        e.execute("COMMIT").unwrap();
12314        assert!(e.publications().contains("pub_a"));
12315    }
12316
12317    // ── v6.1.3: SHOW PUBLICATIONS + FOR-list variants ───────
12318
12319    #[test]
12320    fn create_publication_for_table_list_lands_with_scope() {
12321        let mut e = Engine::new();
12322        e.execute("CREATE TABLE t1 (id INT NOT NULL)").unwrap();
12323        e.execute("CREATE TABLE t2 (id INT NOT NULL)").unwrap();
12324        e.execute("CREATE PUBLICATION pub_a FOR TABLE t1, t2")
12325            .unwrap();
12326        let scope = e.publications().get("pub_a").cloned();
12327        let Some(spg_sql::ast::PublicationScope::ForTables(ts)) = scope else {
12328            panic!("expected ForTables scope, got {scope:?}")
12329        };
12330        assert_eq!(ts, alloc::vec!["t1".to_string(), "t2".to_string()]);
12331    }
12332
12333    #[test]
12334    fn create_publication_all_tables_except_lands_with_scope() {
12335        let mut e = Engine::new();
12336        e.execute("CREATE PUBLICATION pub_a FOR ALL TABLES EXCEPT t3")
12337            .unwrap();
12338        let scope = e.publications().get("pub_a").cloned();
12339        let Some(spg_sql::ast::PublicationScope::AllTablesExcept(ts)) = scope else {
12340            panic!("expected AllTablesExcept scope, got {scope:?}")
12341        };
12342        assert_eq!(ts, alloc::vec!["t3".to_string()]);
12343    }
12344
12345    #[test]
12346    fn show_publications_empty_returns_zero_rows() {
12347        let e = Engine::new();
12348        let r = e.execute_readonly("SHOW PUBLICATIONS").unwrap();
12349        let QueryResult::Rows { rows, columns } = r else {
12350            panic!()
12351        };
12352        assert!(rows.is_empty());
12353        assert_eq!(columns.len(), 3);
12354        assert_eq!(columns[0].name, "name");
12355        assert_eq!(columns[1].name, "scope");
12356        assert_eq!(columns[2].name, "table_count");
12357    }
12358
12359    #[test]
12360    fn show_publications_returns_one_row_per_publication_ordered_by_name() {
12361        let mut e = Engine::new();
12362        e.execute("CREATE PUBLICATION z_pub").unwrap();
12363        e.execute("CREATE PUBLICATION a_pub FOR TABLE t1, t2")
12364            .unwrap();
12365        e.execute("CREATE PUBLICATION m_pub FOR ALL TABLES EXCEPT bad")
12366            .unwrap();
12367        let r = e.execute_readonly("SHOW PUBLICATIONS").unwrap();
12368        let QueryResult::Rows { rows, .. } = r else {
12369            panic!()
12370        };
12371        assert_eq!(rows.len(), 3);
12372        // Alphabetical order: a_pub, m_pub, z_pub.
12373        let names: Vec<&str> = rows
12374            .iter()
12375            .map(|r| {
12376                if let Value::Text(s) = &r.values[0] {
12377                    s.as_str()
12378                } else {
12379                    panic!()
12380                }
12381            })
12382            .collect();
12383        assert_eq!(names, alloc::vec!["a_pub", "m_pub", "z_pub"]);
12384        // Row 0 — a_pub scope summary + table_count = 2.
12385        match &rows[0].values[1] {
12386            Value::Text(s) => assert_eq!(s, "FOR TABLE t1, t2"),
12387            other => panic!("expected Text, got {other:?}"),
12388        }
12389        assert_eq!(rows[0].values[2], Value::Int(2));
12390        // Row 1 — m_pub.
12391        match &rows[1].values[1] {
12392            Value::Text(s) => assert_eq!(s, "FOR ALL TABLES EXCEPT bad"),
12393            other => panic!("expected Text, got {other:?}"),
12394        }
12395        assert_eq!(rows[1].values[2], Value::Int(1));
12396        // Row 2 — z_pub (AllTables → NULL count).
12397        match &rows[2].values[1] {
12398            Value::Text(s) => assert_eq!(s, "FOR ALL TABLES"),
12399            other => panic!("expected Text, got {other:?}"),
12400        }
12401        assert_eq!(rows[2].values[2], Value::Null);
12402    }
12403
12404    #[test]
12405    fn for_list_scopes_persist_across_snapshot() {
12406        // The v6.1.2 envelope-v3 round-trip exercised AllTables;
12407        // v6.1.3 needs the scope-1 / scope-2 tags to survive too.
12408        let mut e = Engine::new();
12409        e.execute("CREATE PUBLICATION p1 FOR TABLE t1, t2").unwrap();
12410        e.execute("CREATE PUBLICATION p2 FOR ALL TABLES EXCEPT bad, worse")
12411            .unwrap();
12412        let snap = e.snapshot();
12413        let e2 = Engine::restore_envelope(&snap).unwrap();
12414        assert_eq!(e2.publications().len(), 2);
12415        let p1 = e2.publications().get("p1").cloned();
12416        let Some(spg_sql::ast::PublicationScope::ForTables(ts)) = p1 else {
12417            panic!("p1 scope lost: {p1:?}")
12418        };
12419        assert_eq!(ts, alloc::vec!["t1".to_string(), "t2".to_string()]);
12420        let p2 = e2.publications().get("p2").cloned();
12421        let Some(spg_sql::ast::PublicationScope::AllTablesExcept(ts)) = p2 else {
12422            panic!("p2 scope lost: {p2:?}")
12423        };
12424        assert_eq!(ts, alloc::vec!["bad".to_string(), "worse".to_string()]);
12425    }
12426
12427    // ── v6.1.4: CREATE / DROP SUBSCRIPTION + SHOW + envelope v4 ─
12428
12429    #[test]
12430    fn create_subscription_lands_in_catalog_with_defaults() {
12431        let mut e = Engine::new();
12432        e.execute(
12433            "CREATE SUBSCRIPTION sub_a CONNECTION 'host=127.0.0.1 port=20002' PUBLICATION pub_a",
12434        )
12435        .unwrap();
12436        let s = e.subscriptions().get("sub_a").cloned().expect("present");
12437        assert_eq!(s.conn_str, "host=127.0.0.1 port=20002");
12438        assert_eq!(s.publications, alloc::vec!["pub_a".to_string()]);
12439        assert!(s.enabled);
12440        assert_eq!(s.last_received_pos, 0);
12441    }
12442
12443    #[test]
12444    fn create_subscription_duplicate_name_errors() {
12445        let mut e = Engine::new();
12446        e.execute("CREATE SUBSCRIPTION s CONNECTION 'host=x' PUBLICATION p")
12447            .unwrap();
12448        let err = e
12449            .execute("CREATE SUBSCRIPTION s CONNECTION 'host=y' PUBLICATION p")
12450            .unwrap_err();
12451        assert!(
12452            alloc::format!("{err:?}").contains("DuplicateName"),
12453            "got {err:?}"
12454        );
12455    }
12456
12457    #[test]
12458    fn drop_subscription_silent_when_absent() {
12459        let mut e = Engine::new();
12460        let r = e.execute("DROP SUBSCRIPTION never").unwrap();
12461        match r {
12462            QueryResult::CommandOk { affected, .. } => assert_eq!(affected, 0),
12463            other => panic!("expected CommandOk, got {other:?}"),
12464        }
12465    }
12466
12467    #[test]
12468    fn subscription_advance_updates_last_pos_monotone() {
12469        let mut e = Engine::new();
12470        e.execute("CREATE SUBSCRIPTION s CONNECTION 'h=x' PUBLICATION p")
12471            .unwrap();
12472        assert!(e.subscription_advance("s", 100));
12473        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 100);
12474        assert!(e.subscription_advance("s", 50)); // stale → ignored
12475        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 100);
12476        assert!(e.subscription_advance("s", 200));
12477        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 200);
12478        assert!(!e.subscription_advance("missing", 1));
12479    }
12480
12481    #[test]
12482    fn show_subscriptions_returns_rows_ordered_by_name() {
12483        let mut e = Engine::new();
12484        e.execute("CREATE SUBSCRIPTION z_sub CONNECTION 'h=x' PUBLICATION p1, p2")
12485            .unwrap();
12486        e.execute("CREATE SUBSCRIPTION a_sub CONNECTION 'h=y' PUBLICATION p3")
12487            .unwrap();
12488        let r = e.execute_readonly("SHOW SUBSCRIPTIONS").unwrap();
12489        let QueryResult::Rows { rows, columns } = r else {
12490            panic!()
12491        };
12492        assert_eq!(rows.len(), 2);
12493        assert_eq!(columns.len(), 5);
12494        assert_eq!(columns[0].name, "name");
12495        assert_eq!(columns[4].name, "last_received_pos");
12496        // Alphabetical: a_sub, z_sub.
12497        let names: Vec<&str> = rows
12498            .iter()
12499            .map(|r| {
12500                if let Value::Text(s) = &r.values[0] {
12501                    s.as_str()
12502                } else {
12503                    panic!()
12504                }
12505            })
12506            .collect();
12507        assert_eq!(names, alloc::vec!["a_sub", "z_sub"]);
12508        // Row 0: a_sub
12509        assert_eq!(rows[0].values[1], Value::Text("h=y".to_string()));
12510        assert_eq!(rows[0].values[2], Value::Text("p3".to_string()));
12511        assert_eq!(rows[0].values[3], Value::Bool(true));
12512        assert_eq!(rows[0].values[4], Value::BigInt(0));
12513        // Row 1: z_sub — publications join with ", "
12514        assert_eq!(rows[1].values[2], Value::Text("p1, p2".to_string()));
12515    }
12516
12517    #[test]
12518    fn subscriptions_persist_across_snapshot_envelope_v4() {
12519        let mut e = Engine::new();
12520        e.execute("CREATE SUBSCRIPTION s1 CONNECTION 'h=A' PUBLICATION p1, p2")
12521            .unwrap();
12522        e.execute("CREATE SUBSCRIPTION s2 CONNECTION 'h=B' PUBLICATION p3")
12523            .unwrap();
12524        e.subscription_advance("s2", 42);
12525        let snap = e.snapshot();
12526        let e2 = Engine::restore_envelope(&snap).unwrap();
12527        assert_eq!(e2.subscriptions().len(), 2);
12528        let s1 = e2.subscriptions().get("s1").unwrap();
12529        assert_eq!(s1.conn_str, "h=A");
12530        assert_eq!(
12531            s1.publications,
12532            alloc::vec!["p1".to_string(), "p2".to_string()]
12533        );
12534        assert_eq!(s1.last_received_pos, 0);
12535        let s2 = e2.subscriptions().get("s2").unwrap();
12536        assert_eq!(s2.last_received_pos, 42);
12537    }
12538
12539    #[test]
12540    fn v3_envelope_loads_with_empty_subscriptions() {
12541        // v3 snapshot (publications-only). Forge it by hand so we
12542        // verify v6.1.4 readers don't panic — they must surface
12543        // empty subscriptions and a populated publication table.
12544        let mut e = Engine::new();
12545        e.execute("CREATE PUBLICATION pub_legacy").unwrap();
12546        let catalog = e.catalog.serialize();
12547        let users = crate::users::serialize_users(&e.users);
12548        let pubs = e.publications.serialize();
12549        let mut buf = Vec::new();
12550        buf.extend_from_slice(b"SPGENV01");
12551        buf.push(3u8); // v3
12552        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
12553        buf.extend_from_slice(&catalog);
12554        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
12555        buf.extend_from_slice(&users);
12556        buf.extend_from_slice(&u32::try_from(pubs.len()).unwrap().to_le_bytes());
12557        buf.extend_from_slice(&pubs);
12558        let crc = spg_crypto::crc32::crc32(&buf);
12559        buf.extend_from_slice(&crc.to_le_bytes());
12560
12561        let e2 = Engine::restore_envelope(&buf).expect("v3 envelope restores under v4 reader");
12562        assert!(e2.subscriptions().is_empty());
12563        assert!(e2.publications().contains("pub_legacy"));
12564    }
12565
12566    #[test]
12567    fn create_subscription_allowed_inside_transaction() {
12568        let mut e = Engine::new();
12569        e.execute("BEGIN").unwrap();
12570        e.execute("CREATE SUBSCRIPTION s CONNECTION 'h=x' PUBLICATION p")
12571            .unwrap();
12572        e.execute("COMMIT").unwrap();
12573        assert!(e.subscriptions().contains("s"));
12574    }
12575
12576    // ── v6.2.0: ANALYZE + spg_statistic + envelope v5 ──────────
12577    #[test]
12578    fn analyze_populates_histogram_bounds() {
12579        let mut e = Engine::new();
12580        e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT)")
12581            .unwrap();
12582        for i in 0..50 {
12583            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, 'name{i}')"))
12584                .unwrap();
12585        }
12586        e.execute("ANALYZE t").unwrap();
12587        let stats = e.statistics();
12588        let id_stats = stats.get("t", "id").unwrap();
12589        assert!(id_stats.histogram_bounds.len() >= 2);
12590        assert_eq!(id_stats.histogram_bounds.first().unwrap(), "0");
12591        assert_eq!(id_stats.histogram_bounds.last().unwrap(), "49");
12592        assert!((id_stats.null_frac - 0.0).abs() < 1e-6);
12593        assert_eq!(id_stats.n_distinct, 50);
12594    }
12595
12596    #[test]
12597    fn reanalyze_overwrites_prior_stats() {
12598        let mut e = Engine::new();
12599        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12600        for i in 0..10 {
12601            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
12602                .unwrap();
12603        }
12604        e.execute("ANALYZE t").unwrap();
12605        let n1 = e.statistics().get("t", "id").unwrap().n_distinct;
12606        assert_eq!(n1, 10);
12607        for i in 10..30 {
12608            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
12609                .unwrap();
12610        }
12611        e.execute("ANALYZE t").unwrap();
12612        let n2 = e.statistics().get("t", "id").unwrap().n_distinct;
12613        assert_eq!(n2, 30);
12614    }
12615
12616    #[test]
12617    fn analyze_unknown_table_errors() {
12618        let mut e = Engine::new();
12619        let err = e.execute("ANALYZE nonexistent").unwrap_err();
12620        assert!(matches!(
12621            err,
12622            EngineError::Storage(StorageError::TableNotFound { .. })
12623        ));
12624    }
12625
12626    #[test]
12627    fn bare_analyze_covers_all_user_tables() {
12628        let mut e = Engine::new();
12629        e.execute("CREATE TABLE t1 (id INT NOT NULL)").unwrap();
12630        e.execute("CREATE TABLE t2 (name TEXT NOT NULL)").unwrap();
12631        e.execute("INSERT INTO t1 VALUES (1)").unwrap();
12632        e.execute("INSERT INTO t2 VALUES ('alice')").unwrap();
12633        let r = e.execute("ANALYZE").unwrap();
12634        match r {
12635            QueryResult::CommandOk {
12636                affected,
12637                modified_catalog,
12638            } => {
12639                assert_eq!(affected, 2);
12640                assert!(modified_catalog);
12641            }
12642            other => panic!("expected CommandOk, got {other:?}"),
12643        }
12644        assert!(e.statistics().get("t1", "id").is_some());
12645        assert!(e.statistics().get("t2", "name").is_some());
12646    }
12647
12648    #[test]
12649    fn select_from_spg_statistic_returns_rows_per_column() {
12650        let mut e = Engine::new();
12651        e.execute("CREATE TABLE t (id INT NOT NULL, label TEXT)")
12652            .unwrap();
12653        e.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
12654        e.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
12655        e.execute("ANALYZE t").unwrap();
12656        let r = e.execute_readonly("SELECT * FROM spg_statistic").unwrap();
12657        let QueryResult::Rows { rows, columns } = r else {
12658            panic!()
12659        };
12660        // v6.7.0 — spg_statistic gained a `cold_row_count` column.
12661        assert_eq!(columns.len(), 6);
12662        assert_eq!(columns[0].name, "table_name");
12663        assert_eq!(columns[4].name, "histogram_bounds");
12664        assert_eq!(columns[5].name, "cold_row_count");
12665        assert_eq!(rows.len(), 2, "one row per column of t");
12666        // Sorted by (table_name, column_name).
12667        match (&rows[0].values[0], &rows[0].values[1]) {
12668            (Value::Text(t), Value::Text(c)) => {
12669                assert_eq!(t, "t");
12670                // BTreeMap orders (table, column); columns "id" < "label".
12671                assert_eq!(c, "id");
12672            }
12673            _ => panic!(),
12674        }
12675    }
12676
12677    #[test]
12678    fn analyze_skips_vector_columns() {
12679        // Vector columns have their own stats shape (HNSW graph);
12680        // ANALYZE leaves them out of spg_statistic.
12681        let mut e = Engine::new();
12682        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(3) NOT NULL)")
12683            .unwrap();
12684        e.execute("INSERT INTO t VALUES (1, [1, 2, 3])").unwrap();
12685        e.execute("ANALYZE t").unwrap();
12686        assert!(e.statistics().get("t", "id").is_some());
12687        assert!(e.statistics().get("t", "v").is_none());
12688    }
12689
12690    #[test]
12691    fn statistics_persist_across_envelope_v5_round_trip() {
12692        let mut e = Engine::new();
12693        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12694        for i in 0..20 {
12695            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
12696                .unwrap();
12697        }
12698        e.execute("ANALYZE").unwrap();
12699        let snap = e.snapshot();
12700        let e2 = Engine::restore_envelope(&snap).unwrap();
12701        let s = e2.statistics().get("t", "id").unwrap();
12702        assert_eq!(s.n_distinct, 20);
12703    }
12704
12705    // ── v6.2.1 auto-analyze threshold ───────────────────────────
12706
12707    #[test]
12708    fn auto_analyze_threshold_fires_after_10pct_of_min_rows_on_small_table() {
12709        // For a table with 0 rows then 10 inserts → modified=10,
12710        // row_count=10. Threshold = 0.1 × max(10, 100) = 10. So
12711        // after the 10th INSERT the threshold is met.
12712        let mut e = Engine::new();
12713        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12714        for i in 0..9 {
12715            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
12716                .unwrap();
12717        }
12718        assert!(e.tables_needing_analyze().is_empty(), "9 < threshold");
12719        e.execute("INSERT INTO t VALUES (9)").unwrap();
12720        let needs = e.tables_needing_analyze();
12721        assert_eq!(needs, alloc::vec!["t".to_string()]);
12722    }
12723
12724    #[test]
12725    fn auto_analyze_threshold_uses_10pct_of_row_count_for_large_tables() {
12726        // After ANALYZE on 1000 rows, threshold = 0.1 × row_count.
12727        // Each new INSERT bumps both modified and row_count, so to
12728        // trigger from N=1000 we need modifications ≥ 0.1 × (1000+M),
12729        // i.e. M ≥ 112. The test inserts 50 (no fire), then 150
12730        // more (200 total mods, row_count=1200, threshold=120 → fire).
12731        let mut e = Engine::new();
12732        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12733        for i in 0..1000 {
12734            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
12735                .unwrap();
12736        }
12737        e.execute("ANALYZE t").unwrap();
12738        assert!(e.tables_needing_analyze().is_empty(), "fresh ANALYZE");
12739        for i in 1000..1050 {
12740            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
12741                .unwrap();
12742        }
12743        assert!(
12744            e.tables_needing_analyze().is_empty(),
12745            "50 inserts < threshold of ~105"
12746        );
12747        for i in 1050..1200 {
12748            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
12749                .unwrap();
12750        }
12751        assert_eq!(
12752            e.tables_needing_analyze(),
12753            alloc::vec!["t".to_string()],
12754            "200 inserts > 0.1 × 1200 threshold"
12755        );
12756    }
12757
12758    #[test]
12759    fn auto_analyze_threshold_resets_after_analyze() {
12760        let mut e = Engine::new();
12761        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
12762        for i in 0..200 {
12763            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
12764                .unwrap();
12765        }
12766        assert!(!e.tables_needing_analyze().is_empty());
12767        e.execute("ANALYZE").unwrap();
12768        assert!(
12769            e.tables_needing_analyze().is_empty(),
12770            "ANALYZE must reset the counter"
12771        );
12772    }
12773
12774    #[test]
12775    fn auto_analyze_threshold_tracks_updates_and_deletes() {
12776        let mut e = Engine::new();
12777        e.execute("CREATE TABLE t (id INT NOT NULL, label TEXT)")
12778            .unwrap();
12779        for i in 0..50 {
12780            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, 'x')"))
12781                .unwrap();
12782        }
12783        e.execute("ANALYZE t").unwrap();
12784        // UPDATE 20 rows + DELETE 5 → modified=25. Threshold = 0.1
12785        // × max(50, 100) = 10. So 25 >= 10 → trigger.
12786        e.execute("UPDATE t SET label = 'y' WHERE id < 20").unwrap();
12787        e.execute("DELETE FROM t WHERE id >= 45").unwrap();
12788        assert_eq!(e.tables_needing_analyze(), alloc::vec!["t".to_string()]);
12789    }
12790
12791    #[test]
12792    fn v4_envelope_loads_with_empty_statistics() {
12793        // Forge a v4 envelope by hand: catalog + users + pubs +
12794        // subs trailer, no statistics. A v6.2.0 reader must accept
12795        // it and surface an empty Statistics.
12796        let mut e = Engine::new();
12797        e.create_user("alice", "secret", crate::users::Role::ReadOnly, [0u8; 16])
12798            .unwrap();
12799        let catalog = e.catalog.serialize();
12800        let users = crate::users::serialize_users(&e.users);
12801        let pubs = e.publications.serialize();
12802        let subs = e.subscriptions.serialize();
12803        let mut buf = Vec::new();
12804        buf.extend_from_slice(b"SPGENV01");
12805        buf.push(4u8);
12806        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
12807        buf.extend_from_slice(&catalog);
12808        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
12809        buf.extend_from_slice(&users);
12810        buf.extend_from_slice(&u32::try_from(pubs.len()).unwrap().to_le_bytes());
12811        buf.extend_from_slice(&pubs);
12812        buf.extend_from_slice(&u32::try_from(subs.len()).unwrap().to_le_bytes());
12813        buf.extend_from_slice(&subs);
12814        let crc = spg_crypto::crc32::crc32(&buf);
12815        buf.extend_from_slice(&crc.to_le_bytes());
12816        let e2 = Engine::restore_envelope(&buf).expect("v4 envelope restores");
12817        assert!(e2.statistics().is_empty());
12818    }
12819
12820    #[test]
12821    fn v1_v2_envelope_loads_with_empty_publications() {
12822        // A snapshot taken before v6.1.2 (no publication trailer,
12823        // envelope v2) must still deserialise — and the resulting
12824        // engine must report zero publications. Use the engine's own
12825        // round-trip with no publications: that emits v3 but with an
12826        // empty pubs block. Then forge a v2 envelope by hand to lock
12827        // the back-compat path.
12828        let mut e = Engine::new();
12829        // Force users to be non-empty so the snapshot takes the
12830        // envelope path rather than the bare-catalog fallback.
12831        e.create_user("alice", "secret", crate::users::Role::ReadOnly, [0u8; 16])
12832            .unwrap();
12833
12834        // Forge an envelope v2: same shape as v3 but no pubs trailer.
12835        let catalog = e.catalog.serialize();
12836        let users = crate::users::serialize_users(&e.users);
12837        let mut buf = Vec::new();
12838        buf.extend_from_slice(b"SPGENV01");
12839        buf.push(2u8); // v2
12840        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
12841        buf.extend_from_slice(&catalog);
12842        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
12843        buf.extend_from_slice(&users);
12844        let crc = spg_crypto::crc32::crc32(&buf);
12845        buf.extend_from_slice(&crc.to_le_bytes());
12846
12847        let e2 = Engine::restore_envelope(&buf).expect("v2 envelope restores");
12848        assert!(e2.publications().is_empty());
12849    }
12850}