Skip to main content

spg_engine/
lib.rs

1//! SPG execution engine — v0.3 wires the SQL front-end to the in-memory
2//! storage layer. Implements `CREATE TABLE`, single-row `INSERT VALUES`, and
3//! `SELECT * FROM <table>` (no WHERE yet — that lands in v0.4 alongside
4//! expression evaluation against rows).
5#![no_std]
6
7extern crate alloc;
8
9pub mod aggregate;
10pub mod copy;
11pub mod describe;
12pub mod eval;
13pub mod fts;
14pub mod json;
15pub mod memoize;
16pub mod plan_cache;
17pub mod publications;
18pub mod query_stats;
19pub mod reorder;
20pub mod selectivity;
21pub mod statistics;
22pub mod subscriptions;
23pub mod triggers;
24pub mod users;
25
26pub use crate::users::{Role, ScramSecrets, UserError, UserStore};
27
28use alloc::borrow::Cow;
29use alloc::boxed::Box;
30use alloc::collections::BTreeMap;
31use alloc::string::{String, ToString};
32use alloc::vec::Vec;
33use core::fmt;
34
35use spg_sql::ast::{
36    BinOp, ColumnDef, ColumnName, ColumnTypeName, CreateIndexStatement, CreatePublicationStatement,
37    CreateSubscriptionStatement, CreateTableStatement, CreateUserStatement, Expr, FrameBound,
38    FrameKind, FromClause, IndexMethod, InsertStatement, JoinKind, Literal, OrderBy, SelectItem,
39    SelectStatement, Statement, TableRef, UnOp, UnionKind, VecEncoding as SqlVecEncoding,
40    WindowFrame,
41};
42// v7.16.0 — re-export the parsed-statement AST so downstream
43// crates (spg-embedded → spg-sqlx) don't need a direct dep on
44// spg-sql for the prepare/bind handle.
45pub use spg_sql::ast::Statement as ParsedStatement;
46use spg_sql::parser::{self, ParseError};
47use spg_storage::{
48    Catalog, ColumnSchema, CompactReport, DataType, IndexKey, IndexKind, Row, StorageError, Table,
49    TableSchema, Value, VecEncoding,
50};
51
52use crate::eval::{EvalContext, EvalError};
53
54/// Result of executing one statement.
55#[derive(Debug, Clone, PartialEq)]
56#[non_exhaustive]
57pub enum QueryResult {
58    /// DDL or DML succeeded.
59    ///
60    /// `affected` is the row count for `INSERT` and 0 elsewhere.
61    /// `modified_catalog` tells the server whether this statement
62    /// caused the *committed* catalog to change — it's the signal to
63    /// snapshot/audit. False for `BEGIN`/`ROLLBACK`, false for writeful
64    /// statements executed inside a transaction (those only touch the
65    /// shadow), and true for `COMMIT` and for writes outside a TX.
66    CommandOk {
67        affected: usize,
68        modified_catalog: bool,
69    },
70    /// `SELECT` returned a (possibly empty) row set.
71    Rows {
72        columns: Vec<ColumnSchema>,
73        rows: Vec<Row>,
74    },
75}
76
77/// All errors the engine can return.
78///
79/// Marked `#[non_exhaustive]` from v7.5.0 onward: external `match`
80/// must include a `_` arm so new variants in subsequent v7.x releases
81/// are not breaking changes.
82#[derive(Debug, Clone, PartialEq)]
83#[non_exhaustive]
84pub enum EngineError {
85    Parse(ParseError),
86    Storage(StorageError),
87    Eval(EvalError),
88    /// Front-end accepted a construct that the v0.x executor doesn't support.
89    Unsupported(String),
90    /// `BEGIN` while another transaction is already open.
91    TransactionAlreadyOpen,
92    /// `COMMIT` / `ROLLBACK` with no active transaction.
93    NoActiveTransaction,
94    /// v4.0 sentinel: `execute_readonly` got a statement that
95    /// mutates engine state (INSERT / CREATE / BEGIN / COMMIT / …).
96    /// The caller should retake the write lock and dispatch through
97    /// `execute(&mut self)` instead.
98    WriteRequired,
99    /// v4.2: a SELECT would have returned more rows than the
100    /// configured `max_query_rows` cap. Carries the cap.
101    RowLimitExceeded(usize),
102    /// v7.30.3 (mailrs round-26): a SELECT's join/filter
103    /// materialisation would have held more (approximate) heap
104    /// bytes than the configured `max_query_bytes` cap. The row
105    /// cap above counts rows; this counts bytes, because one row
106    /// can be a multi-MB mail body — 1000 fat rows pressure the
107    /// host long before any row ceiling trips. Carries the cap.
108    QueryBytesExceeded(usize),
109    /// v4.5: cooperative cancellation — the host (server's
110    /// per-query watchdog) set the cancel flag while a long-running
111    /// SELECT / UPDATE / DELETE was scanning rows. The partial work
112    /// is discarded; the caller should surface this as a timeout
113    /// to the client.
114    Cancelled,
115}
116
117impl fmt::Display for EngineError {
118    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
119        match self {
120            Self::Parse(e) => write!(f, "parse: {e}"),
121            Self::Storage(e) => write!(f, "storage: {e}"),
122            Self::Eval(e) => write!(f, "eval: {e}"),
123            Self::Unsupported(s) => write!(f, "unsupported: {s}"),
124            Self::TransactionAlreadyOpen => f.write_str("a transaction is already open"),
125            Self::NoActiveTransaction => f.write_str("no active transaction"),
126            Self::WriteRequired => {
127                f.write_str("statement requires a write lock (use execute, not execute_readonly)")
128            }
129            Self::RowLimitExceeded(n) => {
130                write!(f, "query exceeded max_query_rows={n}")
131            }
132            Self::QueryBytesExceeded(n) => {
133                write!(
134                    f,
135                    "query materialisation exceeded max_query_bytes={n} (set SPG_MAX_QUERY_BYTES to raise, 0 to disable)"
136                )
137            }
138            Self::Cancelled => f.write_str("query cancelled (timeout or client request)"),
139        }
140    }
141}
142
143impl From<ParseError> for EngineError {
144    fn from(e: ParseError) -> Self {
145        Self::Parse(e)
146    }
147}
148impl From<StorageError> for EngineError {
149    fn from(e: StorageError) -> Self {
150        Self::Storage(e)
151    }
152}
153impl From<EvalError> for EngineError {
154    fn from(e: EvalError) -> Self {
155        Self::Eval(e)
156    }
157}
158
159/// The execution engine. Holds the catalog and (later) other server-scope
160/// state. `Engine::new()` is intentionally cheap so callers can construct one
161/// per database, per test.
162/// Function pointer that returns "now" as microseconds since Unix
163/// epoch. The engine is `no_std`, so it can't reach for `std::time`
164/// itself — callers (`spg-server`, the sqllogictest runner) inject a
165/// concrete implementation. `None` means `NOW()` / `CURRENT_*` raise
166/// `Unsupported`.
167pub type ClockFn = fn() -> i64;
168
169/// Function pointer that produces 16 cryptographically random bytes.
170/// Like `ClockFn`, the engine is `no_std` and can't reach for /dev/urandom
171/// itself — host (`spg-server`) injects an OS-backed source. `None`
172/// means SQL-driven `CREATE USER` falls back to a deterministic salt
173/// derived from the username (acceptable in tests; the server always
174/// installs a real RNG so production paths never see this).
175pub type SaltFn = fn() -> [u8; 16];
176
177/// v4.5 cooperative cancellation token. A long-running SELECT /
178/// UPDATE / DELETE checks `is_cancelled` at row-loop checkpoints
179/// and bails with `EngineError::Cancelled`. The host
180/// (`spg-server`) creates an `AtomicBool` per query, spawns a
181/// watchdog thread that sets it after `SPG_QUERY_TIMEOUT_MS`,
182/// and passes it via `execute_with_cancel` / `execute_readonly_with_cancel`.
183///
184/// `CancelToken::none()` is a no-op — used by the legacy `execute`
185/// and `execute_readonly` entry points so existing callers don't
186/// change.
187/// v7.17.0 Phase 2.3 — monotonic time source for deadline-aware
188/// cancellation (PG `statement_timeout`). Returns microseconds
189/// since some host-stable monotonic origin (typically the first
190/// call into `Instant::now()` on the server). The engine never
191/// calls `Instant::now()` directly so the crate stays `#![no_std]`.
192pub type MonotonicNowFn = fn() -> u64;
193
194#[derive(Debug, Clone, Copy)]
195struct Deadline {
196    now_fn: MonotonicNowFn,
197    /// Absolute deadline in `now_fn()` units (microseconds).
198    deadline_us: u64,
199}
200
201#[derive(Debug, Clone, Copy)]
202pub struct CancelToken<'a> {
203    flag: Option<&'a core::sync::atomic::AtomicBool>,
204    // v7.17.0 Phase 2.3 — when set, every existing `cancel.check()`
205    // checkpoint also fires `EngineError::Cancelled` once
206    // `(now_fn)() >= deadline_us`. No new check sites, no thread
207    // spawn per query — the monotonic now-fn read is a vDSO
208    // `clock_gettime(CLOCK_MONOTONIC)` (~20ns) and only runs when
209    // the host actually wired a deadline (statement_timeout > 0).
210    deadline: Option<Deadline>,
211}
212
213impl<'a> CancelToken<'a> {
214    #[must_use]
215    pub const fn none() -> Self {
216        Self {
217            flag: None,
218            deadline: None,
219        }
220    }
221
222    #[must_use]
223    pub const fn from_flag(f: &'a core::sync::atomic::AtomicBool) -> Self {
224        Self {
225            flag: Some(f),
226            deadline: None,
227        }
228    }
229
230    /// v7.17.0 Phase 2.3 — attach a monotonic deadline. `now_fn`
231    /// must return microseconds since a stable origin; the token
232    /// trips when `now_fn() >= deadline_us`. Compose with
233    /// `from_flag(...)` when both a watchdog flag and a per-statement
234    /// timeout are in play (e.g. server-wide `SPG_QUERY_TIMEOUT_MS`
235    /// plus session `statement_timeout`); the tighter of the two
236    /// wins by virtue of either signaling first.
237    #[must_use]
238    pub const fn with_deadline(mut self, now_fn: MonotonicNowFn, deadline_us: u64) -> Self {
239        self.deadline = Some(Deadline {
240            now_fn,
241            deadline_us,
242        });
243        self
244    }
245
246    #[must_use]
247    pub fn is_cancelled(self) -> bool {
248        if self
249            .flag
250            .is_some_and(|f| f.load(core::sync::atomic::Ordering::Relaxed))
251        {
252            return true;
253        }
254        // Deadline check is the second branch so the "no timeout"
255        // hot path (`deadline: None`) elides the now-fn call —
256        // predicted-not-taken on the SLO INSERT loop.
257        if let Some(d) = self.deadline
258            && (d.now_fn)() >= d.deadline_us
259        {
260            return true;
261        }
262        false
263    }
264
265    /// Returns `Err(Cancelled)` if the token has been tripped.
266    /// Used at row-loop checkpoints to bail cooperatively without
267    /// scattering raw `is_cancelled` checks across the executor.
268    #[inline]
269    pub fn check(self) -> Result<(), EngineError> {
270        if self.is_cancelled() {
271            Err(EngineError::Cancelled)
272        } else {
273            Ok(())
274        }
275    }
276}
277
278// ---- snapshot envelope (v4.1, extended with CRC32 in v4.37,  ----
279// ----   publications in v6.1.2 v3, subscriptions in v6.1.4 v4) ----
280//
281// Wraps a catalog blob + a user blob behind a small header so the
282// server can persist both atomically without inventing a new file.
283// Bare catalog blobs (v3.x) still load via `restore_envelope` since
284// the magic check fails fast and the function falls back to
285// `Catalog::deserialize`.
286//
287// Layout — v1 (v4.1, no CRC):
288//   [8 bytes magic "SPGENV01"]
289//   [u8 version = 1]
290//   [u32 catalog_len][catalog bytes]
291//   [u32 users_len][users bytes]
292//
293// Layout — v2 (v4.37, CRC32 of body):
294//   [8 bytes magic "SPGENV01"]
295//   [u8 version = 2]
296//   [u32 catalog_len][catalog bytes]
297//   [u32 users_len][users bytes]
298//   [u32 crc32]                      ← CRC32 of every byte before it.
299//
300// Layout — v3 (v6.1.2, publications trailer):
301//   [8 bytes magic "SPGENV01"]
302//   [u8 version = 3]
303//   [u32 catalog_len][catalog bytes]
304//   [u32 users_len][users bytes]
305//   [u32 pubs_len][publications bytes]
306//   [u32 crc32]
307//
308// Layout — v4 (v6.1.4, subscriptions trailer):
309//   [8 bytes magic "SPGENV01"]
310//   [u8 version = 4]
311//   [u32 catalog_len][catalog bytes]
312//   [u32 users_len][users bytes]
313//   [u32 pubs_len][publications bytes]
314//   [u32 subs_len][subscriptions bytes]
315//   [u32 crc32]
316//
317// Layout — v5 (v6.2.0, statistics trailer):
318//   [8 bytes magic "SPGENV01"]
319//   [u8 version = 5]
320//   [u32 catalog_len][catalog bytes]
321//   [u32 users_len][users bytes]
322//   [u32 pubs_len][publications bytes]
323//   [u32 subs_len][subscriptions bytes]
324//   [u32 stats_len][statistics bytes]      ← NEW
325//   [u32 crc32]
326//
327// Writers emit v5 from v6.2.0 on. Readers accept all of {v1, v2,
328// v3, v4, v5}: v1/v2 load with empty publications / subscriptions /
329// statistics; v3 loads with empty subscriptions + statistics; v4
330// loads with empty statistics; v5 deserialises all three. Older
331// SPG versions reading a v5 envelope fall through the version
332// match to `EnvelopeParse::Bare` — pre-v6.2.0 binaries cannot
333// open v6.2.0+ snapshots (matches the v6.1.2 / v6.1.4 breaks).
334
335const ENVELOPE_MAGIC: &[u8; 8] = b"SPGENV01";
336const ENVELOPE_VERSION_V1: u8 = 1;
337const ENVELOPE_VERSION_V2: u8 = 2;
338const ENVELOPE_VERSION_V3: u8 = 3;
339const ENVELOPE_VERSION_V4: u8 = 4;
340const ENVELOPE_VERSION_V5: u8 = 5;
341
342fn build_envelope(catalog: &[u8], users: &[u8], pubs: &[u8], subs: &[u8], stats: &[u8]) -> Vec<u8> {
343    let mut out = Vec::with_capacity(
344        8 + 1
345            + 4
346            + catalog.len()
347            + 4
348            + users.len()
349            + 4
350            + pubs.len()
351            + 4
352            + subs.len()
353            + 4
354            + stats.len()
355            + 4,
356    );
357    out.extend_from_slice(ENVELOPE_MAGIC);
358    out.push(ENVELOPE_VERSION_V5);
359    out.extend_from_slice(
360        &u32::try_from(catalog.len())
361            .expect("≤ 4G catalog")
362            .to_le_bytes(),
363    );
364    out.extend_from_slice(catalog);
365    out.extend_from_slice(
366        &u32::try_from(users.len())
367            .expect("≤ 4G users")
368            .to_le_bytes(),
369    );
370    out.extend_from_slice(users);
371    out.extend_from_slice(
372        &u32::try_from(pubs.len())
373            .expect("≤ 4G publications")
374            .to_le_bytes(),
375    );
376    out.extend_from_slice(pubs);
377    out.extend_from_slice(
378        &u32::try_from(subs.len())
379            .expect("≤ 4G subscriptions")
380            .to_le_bytes(),
381    );
382    out.extend_from_slice(subs);
383    out.extend_from_slice(
384        &u32::try_from(stats.len())
385            .expect("≤ 4G statistics")
386            .to_le_bytes(),
387    );
388    out.extend_from_slice(stats);
389    let crc = spg_crypto::crc32::crc32(&out);
390    out.extend_from_slice(&crc.to_le_bytes());
391    out
392}
393
394/// Outcome of envelope parsing: either bare-catalog fallback, a
395/// successfully split section trio from a v1/v2/v3 envelope, or an
396/// explicit corruption error from a v2/v3 CRC mismatch. `Bare`
397/// (catalog-only fallback) preserves v3.x readability. v1/v2
398/// envelopes set `publications` to `None`; v3 sets it to the
399/// publications byte slice.
400enum EnvelopeParse<'a> {
401    Bare,
402    Pair {
403        catalog: &'a [u8],
404        users: &'a [u8],
405        publications: Option<&'a [u8]>,
406        subscriptions: Option<&'a [u8]>,
407        statistics: Option<&'a [u8]>,
408    },
409    CrcMismatch {
410        expected: u32,
411        computed: u32,
412    },
413}
414
415/// Returns `EnvelopeParse::Pair` for a valid v1 / v2 / v3 envelope,
416/// `Bare` for a buffer that doesn't look like an envelope (v3.x
417/// bare catalog fallback), and `CrcMismatch` for a v2/v3 envelope
418/// whose trailing CRC32 doesn't match the body.
419fn split_envelope(buf: &[u8]) -> EnvelopeParse<'_> {
420    if buf.len() < 8 + 1 + 4 || &buf[..8] != ENVELOPE_MAGIC {
421        return EnvelopeParse::Bare;
422    }
423    let version = buf[8];
424    if !matches!(
425        version,
426        ENVELOPE_VERSION_V1
427            | ENVELOPE_VERSION_V2
428            | ENVELOPE_VERSION_V3
429            | ENVELOPE_VERSION_V4
430            | ENVELOPE_VERSION_V5
431    ) {
432        return EnvelopeParse::Bare;
433    }
434    let mut p = 9usize;
435    let Some(cat_len_bytes) = buf.get(p..p + 4) else {
436        return EnvelopeParse::Bare;
437    };
438    let Ok(cat_len_arr) = cat_len_bytes.try_into() else {
439        return EnvelopeParse::Bare;
440    };
441    let cat_len = u32::from_le_bytes(cat_len_arr) as usize;
442    p += 4;
443    if p + cat_len + 4 > buf.len() {
444        return EnvelopeParse::Bare;
445    }
446    let catalog = &buf[p..p + cat_len];
447    p += cat_len;
448    let Some(user_len_bytes) = buf.get(p..p + 4) else {
449        return EnvelopeParse::Bare;
450    };
451    let Ok(user_len_arr) = user_len_bytes.try_into() else {
452        return EnvelopeParse::Bare;
453    };
454    let user_len = u32::from_le_bytes(user_len_arr) as usize;
455    p += 4;
456    if p + user_len > buf.len() {
457        return EnvelopeParse::Bare;
458    }
459    let users = &buf[p..p + user_len];
460    p += user_len;
461    let publications = if matches!(
462        version,
463        ENVELOPE_VERSION_V3 | ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5
464    ) {
465        // [u32 pubs_len][publications bytes]
466        let Some(pubs_len_bytes) = buf.get(p..p + 4) else {
467            return EnvelopeParse::Bare;
468        };
469        let Ok(pubs_len_arr) = pubs_len_bytes.try_into() else {
470            return EnvelopeParse::Bare;
471        };
472        let pubs_len = u32::from_le_bytes(pubs_len_arr) as usize;
473        p += 4;
474        if p + pubs_len > buf.len() {
475            return EnvelopeParse::Bare;
476        }
477        let pubs_slice = &buf[p..p + pubs_len];
478        p += pubs_len;
479        Some(pubs_slice)
480    } else {
481        None
482    };
483    let subscriptions = if matches!(version, ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5) {
484        // [u32 subs_len][subscriptions bytes]
485        let Some(subs_len_bytes) = buf.get(p..p + 4) else {
486            return EnvelopeParse::Bare;
487        };
488        let Ok(subs_len_arr) = subs_len_bytes.try_into() else {
489            return EnvelopeParse::Bare;
490        };
491        let subs_len = u32::from_le_bytes(subs_len_arr) as usize;
492        p += 4;
493        if p + subs_len > buf.len() {
494            return EnvelopeParse::Bare;
495        }
496        let subs_slice = &buf[p..p + subs_len];
497        p += subs_len;
498        Some(subs_slice)
499    } else {
500        None
501    };
502    let statistics = if version == ENVELOPE_VERSION_V5 {
503        // [u32 stats_len][statistics bytes]
504        let Some(stats_len_bytes) = buf.get(p..p + 4) else {
505            return EnvelopeParse::Bare;
506        };
507        let Ok(stats_len_arr) = stats_len_bytes.try_into() else {
508            return EnvelopeParse::Bare;
509        };
510        let stats_len = u32::from_le_bytes(stats_len_arr) as usize;
511        p += 4;
512        if p + stats_len > buf.len() {
513            return EnvelopeParse::Bare;
514        }
515        let stats_slice = &buf[p..p + stats_len];
516        p += stats_len;
517        Some(stats_slice)
518    } else {
519        None
520    };
521    if matches!(
522        version,
523        ENVELOPE_VERSION_V2 | ENVELOPE_VERSION_V3 | ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5
524    ) {
525        if p + 4 != buf.len() {
526            return EnvelopeParse::Bare;
527        }
528        let Ok(crc_arr) = buf[p..p + 4].try_into() else {
529            return EnvelopeParse::Bare;
530        };
531        let expected = u32::from_le_bytes(crc_arr);
532        let computed = spg_crypto::crc32::crc32(&buf[..p]);
533        if expected != computed {
534            return EnvelopeParse::CrcMismatch { expected, computed };
535        }
536    } else if p != buf.len() {
537        // v1: must end exactly at the users section.
538        return EnvelopeParse::Bare;
539    }
540    EnvelopeParse::Pair {
541        catalog,
542        users,
543        publications,
544        subscriptions,
545        statistics,
546    }
547}
548
549/// v4.41.1 opaque transaction handle. Returned by `Engine::alloc_tx_id`,
550/// threaded through `Engine::execute_in` so dispatch can identify which
551/// in-flight TX a statement belongs to. `IMPLICIT_TX` is the reserved
552/// slot every legacy caller — engine self-tests, spg-cli, spg-embedded,
553/// startup replay — implicitly uses through the unchanged
554/// `Engine::execute(sql)` API. v4.41.1 keeps at most one active slot at
555/// runtime (dispatch holds `engine.write()` across the wrap, same as
556/// v4.34); the map shape is here to let v4.42 turn on N in-flight
557/// implicit TXs without reshuffling the engine internals.
558#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
559pub struct TxId(pub u64);
560
561/// Reserved slot used by `Engine::execute(sql)` — the legacy single-
562/// global-shadow path. New `alloc_tx_id` handles start at 1.
563pub const IMPLICIT_TX: TxId = TxId(0);
564
565/// v6.7.3 — default segment-size threshold used by `COMPACT COLD
566/// SEGMENTS` when no explicit target is supplied. Segments whose
567/// `OwnedSegment::bytes().len()` is **strictly** less than this
568/// value are eligible to merge. spg-server reads
569/// `SPG_COMPACTION_TARGET_SEGMENT_BYTES` to override.
570pub const COMPACTION_TARGET_DEFAULT_BYTES: u64 = 4 * 1024 * 1024;
571
572/// Per-slot transaction state. Held inside `tx_catalogs[tx_id]` for the
573/// lifetime of a BEGIN..COMMIT (or BEGIN..ROLLBACK) window. Drops when
574/// the TX commits (its `catalog` is moved over `Engine.catalog`) or
575/// rolls back (slot removed, catalog discarded).
576#[derive(Debug, Default, Clone)]
577struct TxState {
578    /// The TX's shadow copy of the catalog. Started as a clone of
579    /// `Engine.catalog` at BEGIN time; writes flow into it; COMMIT
580    /// installs it over `Engine.catalog`. `Catalog::clone()` is O(1)
581    /// since v4.40 (`PersistentVec` rows + `PersistentBTreeMap` indices).
582    catalog: Catalog,
583    /// Per-TX savepoint stack. Each entry pairs the savepoint name with
584    /// a clone of `catalog` at the moment `SAVEPOINT <name>` fired.
585    /// `ROLLBACK TO <name>` restores from the entry and pops everything
586    /// after it; `RELEASE <name>` discards the entry and everything
587    /// after; COMMIT/ROLLBACK clears the whole stack.
588    savepoints: Vec<(String, Catalog)>,
589}
590
591/// v7.11.0 — frozen read-only view of the engine's committed state.
592/// Constructed via [`Engine::clone_snapshot`]. Holds clones of the
593/// catalog, statistics, clock function, and row-cap config — the
594/// four fields the `execute_readonly` path actually reads. Cheap to
595/// `Clone` (each clone shares the underlying `PersistentVec` row
596/// storage; only the trie root pointers copy). Send + Sync so a
597/// snapshot can be moved across `tokio::task::spawn_blocking`
598/// boundaries without coordination.
599///
600/// The contract: a snapshot reflects the engine's state at the
601/// moment `clone_snapshot()` returned. Subsequent writes to the
602/// engine are NOT visible. Callers who need fresher data take a
603/// new snapshot.
604#[derive(Debug, Clone)]
605pub struct CatalogSnapshot {
606    catalog: Catalog,
607    statistics: statistics::Statistics,
608    clock: Option<ClockFn>,
609    max_query_rows: Option<usize>,
610}
611
612#[derive(Debug, Default)]
613pub struct Engine {
614    /// Committed catalog — what survives `Engine::snapshot()` and what
615    /// outside-TX `SELECT`s read.
616    catalog: Catalog,
617    /// Active TX slots, keyed by `TxId`. Empty when no TX is in flight.
618    /// v4.41.1 runtime invariant: at most one entry (single-writer
619    /// model unchanged). v4.42 will let dispatch hold multiple entries
620    /// concurrently for group commit + engine MVCC.
621    tx_catalogs: BTreeMap<TxId, TxState>,
622    /// Which slot the next exec_* call should mutate. Set by
623    /// `execute_in(sql, tx_id)` at the entry point; legacy `execute(sql)`
624    /// sets it to `IMPLICIT_TX`. None when no TX is in flight (read /
625    /// write goes straight against `catalog`).
626    current_tx: Option<TxId>,
627    /// Monotonic counter for `alloc_tx_id`. Starts at 1 — slot 0 is
628    /// reserved for `IMPLICIT_TX`.
629    next_tx_id: u64,
630    /// v7.22 (round-13 T3) — session string-literal dialect. `false`
631    /// (default) = PG semantics (backslash literal, `''` escape);
632    /// `true` = MySQL semantics (`\'` etc.). Flipped by the
633    /// deterministic session signals each dump emits: `SET sql_mode`
634    /// (only MySQL clients/dumps send it) turns it on,
635    /// `SET standard_conforming_strings = on` (every pg_dump
636    /// preamble) turns it off. The plan cache is cleared on every
637    /// flip — the same SQL text lexes differently per dialect.
638    backslash_escapes: bool,
639    /// Optional wall clock used to satisfy `NOW()` / `CURRENT_TIMESTAMP`
640    /// / `CURRENT_DATE`. Set by the host environment.
641    clock: Option<ClockFn>,
642    /// v4.1 cryptographic RNG for per-user password salt. Set by the
643    /// host. `None` means SQL-driven `CREATE USER` uses a
644    /// deterministic fallback — see `SaltFn`.
645    salt_fn: Option<SaltFn>,
646    /// v4.2 per-query row cap. `None` = unlimited. When set, a
647    /// SELECT that materialises more than `n` rows returns
648    /// `EngineError::RowLimitExceeded`. Enforced before the result
649    /// is shaped into wire frames so a runaway scan can't blow the
650    /// server's heap.
651    max_query_rows: Option<usize>,
652    /// v7.30.3 (mailrs round-26) per-query byte cap on join/filter
653    /// materialisation. `None` = unlimited. Approximate net
654    /// accounting (Value heap payloads + per-cell enum overhead)
655    /// charged at every point the join pipeline clones rows;
656    /// crossing the cap raises `EngineError::QueryBytesExceeded`
657    /// instead of pressuring the host into reclaim livelock. The
658    /// host wires this to `SPG_MAX_QUERY_BYTES` (embed defaults it
659    /// ON; the server keeps its allocator-precise budget as the
660    /// outer layer).
661    max_query_bytes: Option<usize>,
662    /// v4.1 RBAC user table. Empty means "no RBAC configured yet" —
663    /// the server decides what that means at the auth boundary
664    /// (open mode vs legacy single-password mode). User CRUD goes
665    /// through `create_user`/`drop_user`/`verify_user`; persistence
666    /// rides the snapshot envelope alongside the catalog.
667    users: UserStore,
668    /// v6.1.2 logical-replication publication catalog. Empty until
669    /// `CREATE PUBLICATION` runs. Persistence rides the v3 envelope
670    /// trailer (see `build_envelope`).
671    publications: publications::Publications,
672    /// v6.1.4 logical-replication subscription catalog. Empty until
673    /// `CREATE SUBSCRIPTION` runs. Persistence rides the v4 envelope
674    /// trailer.
675    subscriptions: subscriptions::Subscriptions,
676    /// v6.2.0 — per-column statistics for the cost-based optimizer.
677    /// Populated by `ANALYZE`; queried via `spg_statistic` virtual
678    /// table. Persistence rides the v5 envelope trailer.
679    statistics: statistics::Statistics,
680    /// v6.3.0 — engine-level plan cache. Caches the post-`prepare()`
681    /// `Statement` keyed on SQL text. In-memory only — does NOT ride
682    /// the snapshot envelope (rebuilt on demand after restart).
683    plan_cache: plan_cache::PlanCache,
684    /// v6.5.1 — per-distinct-SQL execution stats. In-memory only,
685    /// surfaced via `spg_stat_query` virtual table. Updated by the
686    /// `execute_*` paths after a successful execute.
687    query_stats: query_stats::QueryStats,
688    /// v6.5.2 — connection-state provider callback. spg-server
689    /// registers a function at startup that snapshots its
690    /// per-pgwire-connection registry into `ActivityRow`s; engine
691    /// reads through it on every `SELECT * FROM spg_stat_activity`.
692    /// `None` ⇒ no-data (returns empty rows; matches the no_std
693    /// embedded callers that don't run pgwire).
694    activity_provider: Option<ActivityProvider>,
695    /// v6.5.3 — audit-chain provider + verifier. Same pattern as
696    /// activity_provider: spg-server registers both at startup;
697    /// engine reads through on `SELECT * FROM spg_audit_chain` and
698    /// `SELECT * FROM spg_audit_verify`. `None` ⇒ no-data.
699    audit_chain_provider: Option<AuditChainProvider>,
700    audit_verifier: Option<AuditVerifier>,
701    /// v6.5.6 — slow-query log threshold in microseconds. When set,
702    /// every successful execute whose elapsed exceeds the threshold
703    /// gets fed to the registered slow-query log callback (so
704    /// spg-server can emit a structured log line). Default `None`
705    /// = no slow-query logging.
706    slow_query_threshold_us: Option<u64>,
707    slow_query_logger: Option<SlowQueryLogger>,
708    /// v7.12.1 — session parameters set via `SET <name> = <value>`.
709    /// Only `default_text_search_config` is consumed by the engine
710    /// today (the FTS function dispatcher reads it when
711    /// `to_tsvector(text)` is called without an explicit config).
712    /// All other names are accepted + recorded so PG-dump output
713    /// loads, but have no behavioural effect.
714    session_params: BTreeMap<String, String>,
715    /// v7.12.7 — depth counter for trigger-emitted embedded SQL.
716    /// Each time the engine executes a `DeferredEmbeddedStmt` it
717    /// increments this; the recursive `execute_stmt_with_cancel`
718    /// inside that path checks against [`MAX_TRIGGER_RECURSION`]
719    /// to bound runaway cascades (trigger A's UPDATE on table B
720    /// fires trigger B which UPDATEs table A which fires trigger
721    /// A again…). Reset to 0 once the original DML returns.
722    trigger_recursion_depth: u32,
723    /// v7.14.0 — when `SET FOREIGN_KEY_CHECKS=0` is in effect
724    /// (mysqldump preamble), the FK existence + arity check at
725    /// CREATE TABLE time is deferred. FKs referencing a
726    /// not-yet-existing parent land in `pending_foreign_keys`
727    /// keyed by child table; `SET FOREIGN_KEY_CHECKS=1` drains
728    /// the queue and resolves each FK against the now-complete
729    /// catalog. Empty by default; the queue is drained on every
730    /// `RESET ALL` too.
731    foreign_key_checks: bool,
732    /// v7.16.2 — true on the temp Engine an outer
733    /// `exec_select_with_meta_views` builds, telling that
734    /// temp engine "stop short-circuiting into the meta-view
735    /// path — your catalog already has the materialised
736    /// tables; just run the regular SELECT." Without this we'd
737    /// infinite-loop since the meta-view name (e.g.
738    /// `__spg_info_columns`) still triggers
739    /// `select_references_meta_view`.
740    meta_views_materialised: bool,
741    pending_foreign_keys: Vec<(alloc::string::String, spg_sql::ast::ForeignKeyConstraint)>,
742}
743
744/// v7.12.7 — hard cap on nested trigger-emitted embedded SQL
745/// fires. 16 deep is well past anything a normal trigger graph
746/// uses while still preventing infinite-loop wedging.
747const MAX_TRIGGER_RECURSION: u32 = 16;
748
749/// v6.5.6 — callback signature for slow-query log emission. Called
750/// with `(sql, elapsed_us)` once per successful execute that crosses
751/// the threshold.
752pub type SlowQueryLogger = fn(&str, u64);
753
754/// v6.5.4 — synthesise a `CREATE TABLE` statement from catalog
755/// state. Round-trips through `Engine::execute` to recreate the
756/// same schema (sans data + indexes — indexes are emitted as a
757/// separate `CREATE INDEX` chain in `spg_database_ddl`).
758fn render_create_table(name: &str, columns: &[ColumnSchema]) -> String {
759    let mut out = alloc::format!("CREATE TABLE {name} (");
760    for (i, col) in columns.iter().enumerate() {
761        if i > 0 {
762            out.push_str(", ");
763        }
764        out.push_str(&col.name);
765        out.push(' ');
766        out.push_str(&render_data_type(col.ty));
767        if !col.nullable {
768            out.push_str(" NOT NULL");
769        }
770        if col.auto_increment {
771            out.push_str(" AUTO_INCREMENT");
772        }
773    }
774    out.push(')');
775    out
776}
777
778fn render_data_type(ty: DataType) -> String {
779    match ty {
780        DataType::SmallInt => "SMALLINT".into(),
781        DataType::Int => "INT".into(),
782        DataType::BigInt => "BIGINT".into(),
783        DataType::Float => "FLOAT".into(),
784        DataType::Text => "TEXT".into(),
785        DataType::Varchar(n) => alloc::format!("VARCHAR({n})"),
786        DataType::Char(n) => alloc::format!("CHAR({n})"),
787        DataType::Bool => "BOOL".into(),
788        DataType::Vector { dim, encoding } => match encoding {
789            spg_storage::VecEncoding::F32 => alloc::format!("VECTOR({dim})"),
790            spg_storage::VecEncoding::Sq8 => alloc::format!("VECTOR({dim}) USING SQ8"),
791            spg_storage::VecEncoding::F16 => alloc::format!("VECTOR({dim}) USING HALF"),
792        },
793        DataType::Numeric { precision, scale } => {
794            alloc::format!("NUMERIC({precision},{scale})")
795        }
796        DataType::Date => "DATE".into(),
797        DataType::Timestamp => "TIMESTAMP".into(),
798        DataType::Interval => "INTERVAL".into(),
799        DataType::Json => "JSON".into(),
800        DataType::Jsonb => "JSONB".into(),
801        DataType::Timestamptz => "TIMESTAMPTZ".into(),
802        DataType::Bytes => "BYTEA".into(),
803        DataType::TextArray => "TEXT[]".into(),
804        DataType::IntArray => "INT[]".into(),
805        DataType::BigIntArray => "BIGINT[]".into(),
806        DataType::TsVector => "TSVECTOR".into(),
807        DataType::TsQuery => "TSQUERY".into(),
808        DataType::Uuid => "UUID".into(),
809        DataType::Time => "TIME".into(),
810        DataType::Year => "YEAR".into(),
811        DataType::TimeTz => "TIMETZ".into(),
812        DataType::Money => "MONEY".into(),
813        DataType::Range(k) => k.keyword().into(),
814        DataType::Hstore => "HSTORE".into(),
815        DataType::IntArray2D => "INT[][]".into(),
816        DataType::BigIntArray2D => "BIGINT[][]".into(),
817        DataType::TextArray2D => "TEXT[][]".into(),
818    }
819}
820
821/// v6.5.2 — one row of `spg_stat_activity`. Engine-public so
822/// spg-server can construct rows without re-exporting internal
823/// dispatch types.
824#[derive(Debug, Clone)]
825pub struct ActivityRow {
826    pub pid: u32,
827    pub user: String,
828    pub started_at_us: i64,
829    pub current_sql: String,
830    pub wait_event: String,
831    pub elapsed_us: i64,
832    pub in_transaction: bool,
833    /// v7.17 Phase 2.4 — startup-param `application_name` (or the
834    /// last value the client sent via `SET application_name = '...'`).
835    /// Empty when the client never declared one.
836    pub application_name: String,
837}
838
839/// v6.5.2 — provider callback type. Fresh snapshot returned each
840/// call; engine doesn't cache the slice.
841pub type ActivityProvider = fn() -> Vec<ActivityRow>;
842
843/// v6.5.3 — one row of `spg_audit_chain`. Engine-public so
844/// spg-server can construct rows directly from `AuditEntry`.
845#[derive(Debug, Clone)]
846pub struct AuditRow {
847    pub seq: i64,
848    pub ts_ms: i64,
849    pub prev_hash_hex: String,
850    pub entry_hash_hex: String,
851    pub sql: String,
852}
853
854/// v6.5.3 — chain-table provider + verifier. spg-server registers
855/// fn pointers that snapshot / verify the audit log. `verify`
856/// returns `(verified_count, broken_at_seq)` — `broken_at_seq` is
857/// `-1` on a clean chain.
858pub type AuditChainProvider = fn() -> Vec<AuditRow>;
859pub type AuditVerifier = fn() -> (i64, i64);
860
861impl Engine {
862    pub fn new() -> Self {
863        Self {
864            catalog: Catalog::new(),
865            tx_catalogs: BTreeMap::new(),
866            current_tx: None,
867            backslash_escapes: false,
868            next_tx_id: 1,
869            clock: None,
870            salt_fn: None,
871            max_query_rows: None,
872            max_query_bytes: None,
873            users: UserStore::new(),
874            publications: publications::Publications::new(),
875            subscriptions: subscriptions::Subscriptions::new(),
876            statistics: statistics::Statistics::new(),
877            plan_cache: plan_cache::PlanCache::new(),
878            query_stats: query_stats::QueryStats::new(),
879            activity_provider: None,
880            audit_chain_provider: None,
881            audit_verifier: None,
882            slow_query_threshold_us: None,
883            slow_query_logger: None,
884            session_params: BTreeMap::new(),
885            trigger_recursion_depth: 0,
886            foreign_key_checks: true,
887            meta_views_materialised: false,
888            pending_foreign_keys: Vec::new(),
889        }
890    }
891
892    /// v7.11.0 — clone the engine's committed catalog + read-time
893    /// state into a frozen `CatalogSnapshot`. Cheap (`Catalog` is
894    /// backed by `PersistentVec`; cloning is O(log n) per table).
895    /// Subsequent writes to this engine are invisible to the
896    /// snapshot; the snapshot is self-contained and can be moved
897    /// to another thread for concurrent `execute_readonly_on_snapshot`
898    /// calls. The basis for [`AsyncReadHandle`] in spg-embedded-tokio
899    /// and any other read-fanout pattern.
900    #[must_use]
901    pub fn clone_snapshot(&self) -> CatalogSnapshot {
902        CatalogSnapshot {
903            catalog: self.active_catalog().clone(),
904            statistics: self.statistics.clone(),
905            clock: self.clock,
906            max_query_rows: self.max_query_rows,
907        }
908    }
909
910    /// v7.11.1 — execute a read-only SQL statement against a
911    /// `CatalogSnapshot` without touching this engine. Same
912    /// semantics as `execute_readonly` but parameterised on the
913    /// snapshot's catalog. Reject DDL/DML the same way
914    /// `execute_readonly` does. Static-on-Self so the caller can
915    /// dispatch without holding an `Engine` borrow alongside the
916    /// snapshot.
917    pub fn execute_readonly_on_snapshot(
918        snapshot: &CatalogSnapshot,
919        sql: &str,
920    ) -> Result<QueryResult, EngineError> {
921        Self::execute_readonly_on_snapshot_with_cancel(snapshot, sql, CancelToken::none())
922    }
923
924    /// v7.11.1 — `execute_readonly_on_snapshot` with cooperative
925    /// cancellation. Builds a transient `Engine` over the snapshot
926    /// state, runs `execute_readonly_with_cancel`, drops. The
927    /// transient engine is cheap to construct (no I/O; everything
928    /// is just struct moves) and lets the existing read path stay
929    /// untouched.
930    pub fn execute_readonly_on_snapshot_with_cancel(
931        snapshot: &CatalogSnapshot,
932        sql: &str,
933        cancel: CancelToken<'_>,
934    ) -> Result<QueryResult, EngineError> {
935        let transient = Engine {
936            catalog: snapshot.catalog.clone(),
937            statistics: snapshot.statistics.clone(),
938            clock: snapshot.clock,
939            max_query_rows: snapshot.max_query_rows,
940            ..Engine::default()
941        };
942        transient.execute_readonly_with_cancel(sql, cancel)
943    }
944
945    /// v7.18 — execute a previously-prepared `Statement` against a
946    /// `CatalogSnapshot` in read-only mode. Mirror of
947    /// [`Engine::execute_prepared`] for the fan-out read path:
948    /// substitutes `Expr::Placeholder(n)` nodes from `params`, then
949    /// dispatches through [`Engine::execute_readonly_stmt_with_cancel`]
950    /// (writes / DDL hit `EngineError::WriteRequired`). Static-on-Self
951    /// so multiple readonly threads can dispatch against the same
952    /// snapshot concurrently without an `Engine` borrow.
953    ///
954    /// **Schema drift contract**. The `Statement` was prepared against
955    /// some prior catalog. If the snapshot's catalog has since
956    /// diverged (DDL renamed / dropped a referenced column / table),
957    /// execution surfaces the normal `EngineError` — same shape as
958    /// PG's "cached plan must not change result type". Caller decides
959    /// whether to re-prepare; engine does NOT auto-retry.
960    pub fn execute_readonly_prepared_on_snapshot(
961        snapshot: &CatalogSnapshot,
962        stmt: Statement,
963        params: &[Value],
964    ) -> Result<QueryResult, EngineError> {
965        Self::execute_readonly_prepared_on_snapshot_with_cancel(
966            snapshot,
967            stmt,
968            params,
969            CancelToken::none(),
970        )
971    }
972
973    /// v7.18 — cancellable variant of
974    /// [`Engine::execute_readonly_prepared_on_snapshot`].
975    pub fn execute_readonly_prepared_on_snapshot_with_cancel(
976        snapshot: &CatalogSnapshot,
977        mut stmt: Statement,
978        params: &[Value],
979        cancel: CancelToken<'_>,
980    ) -> Result<QueryResult, EngineError> {
981        cancel.check()?;
982        substitute_placeholders(&mut stmt, params)?;
983        let transient = Engine {
984            catalog: snapshot.catalog.clone(),
985            statistics: snapshot.statistics.clone(),
986            clock: snapshot.clock,
987            max_query_rows: snapshot.max_query_rows,
988            ..Engine::default()
989        };
990        transient.execute_readonly_stmt_with_cancel(stmt, cancel)
991    }
992
993    /// v7.18 — describe a prepared `Statement` against a
994    /// `CatalogSnapshot`. Same `(parameter_oids, output_columns)`
995    /// shape as [`Engine::describe_prepared`]; resolves names
996    /// against the snapshot's catalog instead of `self`. Pure
997    /// function — no engine state read.
998    pub fn describe_prepared_on_snapshot(
999        snapshot: &CatalogSnapshot,
1000        stmt: &Statement,
1001    ) -> (Vec<u32>, Vec<ColumnSchema>) {
1002        describe::describe_prepared(stmt, &snapshot.catalog)
1003    }
1004
1005    /// v7.18 — does this SQL string classify as read-only? Parses
1006    /// `sql` with the engine parser and consults
1007    /// `Statement::is_readonly()`. A parse error returns `false`
1008    /// (route to the writer path so the user sees the canonical
1009    /// parse error from the writer's simple-query dispatch).
1010    /// Static-on-Self so the spg-sqlx connection layer can ask
1011    /// without an `Engine` borrow.
1012    #[must_use]
1013    pub fn is_readonly_sql(sql: &str) -> bool {
1014        parser::parse_statement(sql)
1015            .as_ref()
1016            .map(spg_sql::ast::Statement::is_readonly)
1017            .unwrap_or(false)
1018    }
1019
1020    /// v7.18 — parse + plan a SQL string against a
1021    /// `CatalogSnapshot`. Mirror of [`Engine::prepare`] for the
1022    /// readonly fan-out path: applies the same prepare-time
1023    /// transforms (clock rewrite, `GROUP BY ALL` expansion, ORDER
1024    /// BY position resolve, cost-based JOIN reorder) but resolves
1025    /// catalog + statistics against the snapshot, not a live
1026    /// engine. Static-on-Self — `AsyncReadHandle::prepare` calls
1027    /// this without taking the writer lock so multiple read
1028    /// handles can prepare concurrently against frozen views.
1029    ///
1030    /// # Errors
1031    /// Propagates [`ParseError`] from the parser. Schema
1032    /// validation deferred to execute time, same as
1033    /// [`Engine::prepare`].
1034    pub fn prepare_on_snapshot(
1035        snapshot: &CatalogSnapshot,
1036        sql: &str,
1037    ) -> Result<Statement, ParseError> {
1038        let mut stmt = parser::parse_statement(sql)?;
1039        let now_micros = snapshot.clock.map(|f| f());
1040        rewrite_clock_calls(&mut stmt, now_micros);
1041        if let Statement::Select(s) = &mut stmt {
1042            expand_group_by_all(s);
1043            resolve_order_by_position(s);
1044            reorder::reorder_joins(s, &snapshot.catalog, &snapshot.statistics);
1045        }
1046        Ok(stmt)
1047    }
1048
1049    /// Construct an engine restored from a previously-snapshotted catalog
1050    /// (see `snapshot()`).
1051    pub fn restore(catalog: Catalog) -> Self {
1052        Self {
1053            catalog,
1054            tx_catalogs: BTreeMap::new(),
1055            current_tx: None,
1056            backslash_escapes: false,
1057            next_tx_id: 1,
1058            clock: None,
1059            salt_fn: None,
1060            max_query_rows: None,
1061            max_query_bytes: None,
1062            users: UserStore::new(),
1063            publications: publications::Publications::new(),
1064            subscriptions: subscriptions::Subscriptions::new(),
1065            statistics: statistics::Statistics::new(),
1066            plan_cache: plan_cache::PlanCache::new(),
1067            query_stats: query_stats::QueryStats::new(),
1068            activity_provider: None,
1069            audit_chain_provider: None,
1070            audit_verifier: None,
1071            slow_query_threshold_us: None,
1072            slow_query_logger: None,
1073            session_params: BTreeMap::new(),
1074            trigger_recursion_depth: 0,
1075            foreign_key_checks: true,
1076            meta_views_materialised: false,
1077            pending_foreign_keys: Vec::new(),
1078        }
1079    }
1080
1081    /// Restore an engine + user table from a v4.1 envelope produced
1082    /// by `snapshot_with_users()`. Falls back to plain catalog-only
1083    /// restore if the envelope magic isn't present (so v3.x snapshot
1084    /// files still load). v6.1.2 adds the optional publications
1085    /// trailer (envelope v3); a v1/v2 envelope deserialises to an
1086    /// empty publication table.
1087    pub fn restore_envelope(buf: &[u8]) -> Result<Self, EngineError> {
1088        match split_envelope(buf) {
1089            EnvelopeParse::Pair {
1090                catalog: catalog_bytes,
1091                users: user_bytes,
1092                publications: pub_bytes,
1093                subscriptions: sub_bytes,
1094                statistics: stats_bytes,
1095            } => {
1096                let catalog = Catalog::deserialize(catalog_bytes).map_err(EngineError::Storage)?;
1097                let users = users::deserialize_users(user_bytes)
1098                    .map_err(|e| EngineError::Unsupported(alloc::format!("users restore: {e}")))?;
1099                let publications = match pub_bytes {
1100                    Some(b) => publications::Publications::deserialize(b).map_err(|e| {
1101                        EngineError::Unsupported(alloc::format!("publications restore: {e:?}"))
1102                    })?,
1103                    None => publications::Publications::new(),
1104                };
1105                let subscriptions = match sub_bytes {
1106                    Some(b) => subscriptions::Subscriptions::deserialize(b).map_err(|e| {
1107                        EngineError::Unsupported(alloc::format!("subscriptions restore: {e:?}"))
1108                    })?,
1109                    None => subscriptions::Subscriptions::new(),
1110                };
1111                let statistics = match stats_bytes {
1112                    Some(b) => statistics::Statistics::deserialize(b).map_err(|e| {
1113                        EngineError::Unsupported(alloc::format!("statistics restore: {e:?}"))
1114                    })?,
1115                    None => statistics::Statistics::new(),
1116                };
1117                Ok(Self {
1118                    catalog,
1119                    tx_catalogs: BTreeMap::new(),
1120                    current_tx: None,
1121                    backslash_escapes: false,
1122                    next_tx_id: 1,
1123                    clock: None,
1124                    salt_fn: None,
1125                    max_query_rows: None,
1126                    max_query_bytes: None,
1127                    users,
1128                    publications,
1129                    subscriptions,
1130                    statistics,
1131                    plan_cache: plan_cache::PlanCache::new(),
1132                    query_stats: query_stats::QueryStats::new(),
1133                    activity_provider: None,
1134                    audit_chain_provider: None,
1135                    audit_verifier: None,
1136                    slow_query_threshold_us: None,
1137                    slow_query_logger: None,
1138                    session_params: BTreeMap::new(),
1139                    trigger_recursion_depth: 0,
1140                    foreign_key_checks: true,
1141                    meta_views_materialised: false,
1142                    pending_foreign_keys: Vec::new(),
1143                })
1144            }
1145            EnvelopeParse::CrcMismatch { expected, computed } => {
1146                Err(EngineError::Storage(StorageError::Corrupt(alloc::format!(
1147                    "snapshot envelope CRC32 mismatch (expected={expected:#010x}, computed={computed:#010x})"
1148                ))))
1149            }
1150            EnvelopeParse::Bare => {
1151                let catalog = Catalog::deserialize(buf).map_err(EngineError::Storage)?;
1152                Ok(Self::restore(catalog))
1153            }
1154        }
1155    }
1156
1157    pub const fn users(&self) -> &UserStore {
1158        &self.users
1159    }
1160
1161    /// `salt` is supplied by the caller (the host has a random
1162    /// source; the engine is `no_std`). Caller should pass a fresh
1163    /// 16-byte random value per user.
1164    pub fn create_user(
1165        &mut self,
1166        name: &str,
1167        password: &str,
1168        role: Role,
1169        salt: [u8; 16],
1170    ) -> Result<(), UserError> {
1171        self.users.create(name, password, role, salt)?;
1172        // v4.8: also derive SCRAM-SHA-256 secrets so PG-wire SASL
1173        // auth can verify without re-running PBKDF2 per attempt.
1174        // Uses a fresh salt from the host RNG (falls back to a
1175        // deterministic per-username salt when no RNG is wired, same
1176        // as the legacy hash path).
1177        let scram_salt = self.salt_fn.map_or_else(
1178            || {
1179                let mut s = [0u8; users::SCRAM_SALT_LEN];
1180                let digest = spg_crypto::hash(name.as_bytes());
1181                // Use bytes 16..32 of BLAKE3 so we don't reuse the
1182                // exact same fallback salt as the BLAKE3 hash path.
1183                s.copy_from_slice(&digest[16..32]);
1184                s
1185            },
1186            |f| f(),
1187        );
1188        self.users
1189            .enable_scram(name, password, scram_salt, users::SCRAM_DEFAULT_ITERS)?;
1190        Ok(())
1191    }
1192
1193    pub fn drop_user(&mut self, name: &str) -> Result<(), UserError> {
1194        self.users.drop(name)
1195    }
1196
1197    pub fn verify_user(&self, name: &str, password: &str) -> Option<Role> {
1198        self.users.verify(name, password)
1199    }
1200
1201    /// Builder: attach a wall clock so `NOW()` / `CURRENT_TIMESTAMP` /
1202    /// `CURRENT_DATE` evaluate to a real value instead of erroring out.
1203    #[must_use]
1204    pub const fn with_clock(mut self, clock: ClockFn) -> Self {
1205        self.clock = Some(clock);
1206        self
1207    }
1208
1209    /// Builder: attach an OS-backed RNG for per-user password salts.
1210    /// The host (`spg-server`) typically wires this to `/dev/urandom`.
1211    #[must_use]
1212    pub const fn with_salt_fn(mut self, f: SaltFn) -> Self {
1213        self.salt_fn = Some(f);
1214        self
1215    }
1216
1217    /// Builder: cap the number of rows a single SELECT may return.
1218    /// Exceeding the cap raises `EngineError::RowLimitExceeded` —
1219    /// the bound is checked inside the executor so a runaway
1220    /// catalog scan can't allocate millions of rows before the
1221    /// server gets a chance to reject the result.
1222    #[must_use]
1223    pub const fn with_max_query_rows(mut self, n: usize) -> Self {
1224        self.max_query_rows = Some(n);
1225        self
1226    }
1227
1228    /// Builder: cap the approximate heap bytes a single SELECT's
1229    /// join/filter materialisation may hold. Exceeding the cap
1230    /// raises `EngineError::QueryBytesExceeded`. Rows are the wrong
1231    /// unit when one row carries a multi-MB body (mailrs round-26:
1232    /// 1000-row batches of full mail text walked a 15 GiB host into
1233    /// reclaim livelock without ever tripping a row ceiling).
1234    #[must_use]
1235    pub const fn with_max_query_bytes(mut self, n: usize) -> Self {
1236        self.max_query_bytes = Some(n);
1237        self
1238    }
1239
1240    /// The *committed* catalog. Note: during a transaction this returns the
1241    /// pre-TX state — `SELECT` inside a TX goes through `execute()` and reads
1242    /// the shadow. Tests that inspect outside-TX state should use this.
1243    pub const fn catalog(&self) -> &Catalog {
1244        &self.catalog
1245    }
1246
1247    /// Serialize the *committed* catalog to bytes. v0.6 was full-snapshot; v0.9
1248    /// adds the rule that an open TX's shadow is never snapshotted — only the
1249    /// post-COMMIT state is persisted. v4.1 wraps the catalog in an envelope
1250    /// when there are users to persist; an empty user table snapshots as the
1251    /// bare catalog format (backwards-compat with v3.x readers). v6.1.2
1252    /// adds publications to the envelope condition: either non-empty
1253    /// users OR non-empty publications now triggers the envelope path.
1254    pub fn snapshot(&self) -> Vec<u8> {
1255        if self.users.is_empty()
1256            && self.publications.is_empty()
1257            && self.subscriptions.is_empty()
1258            && self.statistics.is_empty()
1259        {
1260            self.catalog.serialize()
1261        } else {
1262            build_envelope(
1263                &self.catalog.serialize(),
1264                &users::serialize_users(&self.users),
1265                &self.publications.serialize(),
1266                &self.subscriptions.serialize(),
1267                &self.statistics.serialize(),
1268            )
1269        }
1270    }
1271
1272    /// True when at least one TX slot is in flight. v4.41.1 runtime
1273    /// invariant: at most one slot active at a time (dispatch holds
1274    /// `engine.write()` across the entire wrap). v4.42 will let this
1275    /// return true with multiple slots concurrently.
1276    pub fn in_transaction(&self) -> bool {
1277        !self.tx_catalogs.is_empty()
1278    }
1279
1280    /// v4.41.1 allocate a fresh TX handle. Used by spg-server dispatch
1281    /// to scope each implicit-wrap BEGIN..stmt..COMMIT to its own slot
1282    /// in `tx_catalogs`. v4.42 — the commit-barrier leader allocates
1283    /// one of these per task in its group, runs `BEGIN`+sql+`COMMIT`
1284    /// sequentially under a single `engine.write()` so each task's
1285    /// mutations accumulate into shared state, then either keeps the
1286    /// accumulated state (fsync OK) or restores the pre-image via
1287    /// `replace_catalog` (fsync err).
1288    pub fn alloc_tx_id(&mut self) -> TxId {
1289        let id = TxId(self.next_tx_id);
1290        self.next_tx_id = self.next_tx_id.saturating_add(1);
1291        id
1292    }
1293
1294    /// v4.42 — atomically replace the live catalog. Used by the
1295    /// commit-barrier leader to roll back a group whose batched
1296    /// fsync failed: the leader snapshots `engine.catalog().clone()`
1297    /// (O(1) Arc bump after the v4.39/v4.40 persistent migration)
1298    /// at group start, sequentially applies each task's BEGIN+sql+
1299    /// COMMIT under the same write lock to accumulate mutations
1300    /// into shared state, batches the WAL bytes, fsyncs once, and
1301    /// on failure calls this with the pre-image to undo every
1302    /// task in the group at once.
1303    ///
1304    /// **Does NOT touch `tx_catalogs` / `current_tx`.** Any
1305    /// explicit-TX slot from a concurrent client (created via the
1306    /// legacy `IMPLICIT_TX`-less dispatch path or via the future
1307    /// MVCC-readers v5+ work) has its own snapshot baked into the
1308    /// slot — restoring `self.catalog` to the pre-image leaves
1309    /// those slots untouched, exactly as they were when the leader
1310    /// took the lock. The leader's own implicit-TX slots are all
1311    /// already discarded (`exec_commit` removed them as each
1312    /// task's COMMIT ran) by the time this is reached.
1313    pub fn replace_catalog(&mut self, catalog: Catalog) {
1314        self.catalog = catalog;
1315    }
1316
1317    /// v6.7.0 — public shim around `Catalog::freeze_oldest_to_cold`
1318    /// so tests + the spg-server freezer can drive a freeze without
1319    /// reaching into the private `active_catalog_mut`. v6.7.4
1320    /// parallel freezer will build on this surface.
1321    ///
1322    /// Marks the table's cached `cold_row_count` stale because the
1323    /// freeze added cold locators that ANALYZE hasn't yet refreshed.
1324    pub fn freeze_oldest_to_cold(
1325        &mut self,
1326        table_name: &str,
1327        index_name: &str,
1328        max_rows: usize,
1329    ) -> Result<spg_storage::FreezeReport, EngineError> {
1330        let report = self
1331            .active_catalog_mut()
1332            .freeze_oldest_to_cold(table_name, index_name, max_rows)
1333            .map_err(EngineError::Storage)?;
1334        if let Some(t) = self.active_catalog_mut().get_mut(table_name) {
1335            t.mark_cold_row_count_stale();
1336        }
1337        Ok(report)
1338    }
1339
1340    /// v6.7.5 — public shim used by the spg-server follower's
1341    /// segment-forwarding receiver. Registers a cold-tier segment
1342    /// at a specific id (the master's id, as transmitted on the
1343    /// wire) so the follower's BTree-Cold locators stay byte-
1344    /// identical with the master's. Wraps
1345    /// `Catalog::load_segment_bytes_at` under the standard
1346    /// clone-mutate-replace pattern.
1347    ///
1348    /// Returns `Ok(())` on success **and** on the "slot already
1349    /// occupied" case — a follower mid-reconnect may receive a
1350    /// segment chunk for a segment_id it already has on disk
1351    /// (forwarded last session); the caller should treat that
1352    /// path as a no-op rather than a fatal error.
1353    pub fn receive_cold_segment(
1354        &mut self,
1355        segment_id: u32,
1356        bytes: Vec<u8>,
1357    ) -> Result<(), EngineError> {
1358        let mut new_cat = self.catalog.clone();
1359        match new_cat.load_segment_bytes_at(segment_id, bytes) {
1360            Ok(()) => {
1361                self.replace_catalog(new_cat);
1362                Ok(())
1363            }
1364            Err(StorageError::Corrupt(msg)) if msg.contains("already occupied") => Ok(()),
1365            Err(e) => Err(EngineError::Storage(e)),
1366        }
1367    }
1368
1369    /// v6.7.3 — public shim around `Catalog::compact_cold_segments`
1370    /// driving every BTree index on every user table. Returns one
1371    /// `(table, index, report)` triple for each merge that
1372    /// actually happened (no-op (table, index) pairs are filtered
1373    /// out so callers can size persist-side work to the live
1374    /// merges). Caller is responsible for persisting each
1375    /// `report.merged_segment_bytes` and updating the on-disk
1376    /// segment registry; engine layer is no_std and never
1377    /// touches disk.
1378    ///
1379    /// Marks every touched table's cached `cold_row_count` stale
1380    /// — compaction GC'd some shadowed rows, so the count must be
1381    /// re-derived on the next ANALYZE.
1382    pub fn compact_cold_segments_with_target(
1383        &mut self,
1384        target_segment_bytes: u64,
1385    ) -> Result<Vec<(String, String, CompactReport)>, EngineError> {
1386        let table_names = self.active_catalog().table_names();
1387        let mut reports: Vec<(String, String, CompactReport)> = Vec::new();
1388        for tname in table_names {
1389            if is_internal_table_name(&tname) {
1390                continue;
1391            }
1392            let idx_names: Vec<String> = {
1393                let Some(t) = self.active_catalog().get(&tname) else {
1394                    continue;
1395                };
1396                t.indices()
1397                    .iter()
1398                    .filter(|i| matches!(i.kind, IndexKind::BTree(_)))
1399                    .map(|i| i.name.clone())
1400                    .collect()
1401            };
1402            for iname in idx_names {
1403                let report = self
1404                    .active_catalog_mut()
1405                    .compact_cold_segments(&tname, &iname, target_segment_bytes)
1406                    .map_err(EngineError::Storage)?;
1407                if report.merged_segment_id.is_some() {
1408                    if let Some(t) = self.active_catalog_mut().get_mut(&tname) {
1409                        t.mark_cold_row_count_stale();
1410                    }
1411                    reports.push((tname.clone(), iname, report));
1412                }
1413            }
1414        }
1415        Ok(reports)
1416    }
1417
1418    fn active_catalog(&self) -> &Catalog {
1419        match self.current_tx {
1420            Some(t) => self
1421                .tx_catalogs
1422                .get(&t)
1423                .map_or(&self.catalog, |s| &s.catalog),
1424            None => &self.catalog,
1425        }
1426    }
1427
1428    /// v7.12.4 — snapshot every row-level trigger on `table` that
1429    /// fires for `event` (`"INSERT"` / `"UPDATE"` / `"DELETE"`) at
1430    /// the given `timing` (`"BEFORE"` / `"AFTER"`), and clone its
1431    /// referenced function definition. Returned as a vec of owned
1432    /// `FunctionDef` so the row-write loop can fire them without
1433    /// holding a borrow on the catalog (which would conflict with
1434    /// the table.insert / update_row / delete mutable borrows).
1435    /// v7.16.2 — top-level DO block executor. Walks the
1436    /// PlPgSqlBlock via [`triggers::execute_do_block_top_level`],
1437    /// then runs each collected EmbeddedSql statement through
1438    /// the engine's regular execute path (NOT deferred — DO is
1439    /// outside any row-write borrow). Errors from any step
1440    /// abort the block and propagate verbatim.
1441    /// v7.16.2 — resolve every subquery inside a PlPgSqlBlock's
1442    /// expression slots so the downstream trigger-flavoured
1443    /// evaluator (which expects pre-resolved Expr::Literal /
1444    /// Binary chains) doesn't trip on raw Exists/ScalarSubquery
1445    /// nodes. Walks IF conditions, Assign values, RAISE args.
1446    /// EmbeddedSql statements re-enter the engine for execution
1447    /// later so their subqueries get the normal SELECT-side
1448    /// resolution.
1449    fn resolve_plpgsql_block_subqueries(
1450        &self,
1451        block: &mut spg_sql::ast::PlPgSqlBlock,
1452        cancel: CancelToken<'_>,
1453    ) -> Result<(), EngineError> {
1454        for d in &mut block.declarations {
1455            if let Some(e) = &mut d.default {
1456                self.resolve_expr_subqueries(e, cancel)?;
1457            }
1458        }
1459        self.resolve_plpgsql_stmts_subqueries(&mut block.statements, cancel)
1460    }
1461
1462    fn resolve_plpgsql_stmts_subqueries(
1463        &self,
1464        stmts: &mut [spg_sql::ast::PlPgSqlStmt],
1465        cancel: CancelToken<'_>,
1466    ) -> Result<(), EngineError> {
1467        use spg_sql::ast::PlPgSqlStmt;
1468        for stmt in stmts {
1469            match stmt {
1470                PlPgSqlStmt::Assign { value, .. } => {
1471                    self.resolve_expr_subqueries(value, cancel)?;
1472                }
1473                PlPgSqlStmt::Return(spg_sql::ast::ReturnTarget::Expr(e)) => {
1474                    self.resolve_expr_subqueries(e, cancel)?;
1475                }
1476                PlPgSqlStmt::Return(_) => {}
1477                PlPgSqlStmt::If {
1478                    branches,
1479                    else_branch,
1480                } => {
1481                    for (cond, body) in branches.iter_mut() {
1482                        self.resolve_expr_subqueries(cond, cancel)?;
1483                        self.resolve_plpgsql_stmts_subqueries(body, cancel)?;
1484                    }
1485                    self.resolve_plpgsql_stmts_subqueries(else_branch, cancel)?;
1486                }
1487                PlPgSqlStmt::Raise { args, .. } => {
1488                    for a in args {
1489                        self.resolve_expr_subqueries(a, cancel)?;
1490                    }
1491                }
1492                PlPgSqlStmt::EmbeddedSql(_) => {
1493                    // Embedded SQL goes back through execute_stmt
1494                    // _with_cancel which runs the SELECT-side
1495                    // resolver itself; nothing to do here.
1496                }
1497                PlPgSqlStmt::SelectInto { body, .. } => {
1498                    // SELECT INTO runs through Engine::execute
1499                    // when reached, so subquery resolution
1500                    // happens via the normal SELECT-side path.
1501                    // Still walk for nested subqueries inside
1502                    // the SELECT body so eval doesn't trip.
1503                    self.resolve_select_subqueries(body, cancel)?;
1504                }
1505            }
1506        }
1507        Ok(())
1508    }
1509
1510    fn exec_do_block(
1511        &mut self,
1512        body: spg_sql::ast::PlPgSqlBlock,
1513    ) -> Result<QueryResult, EngineError> {
1514        // v7.16.2 — pre-resolve every subquery the body's
1515        // expressions reach. `eval::eval_expr` errors on
1516        // unresolved Exists/ScalarSubquery/InSubquery; the
1517        // top-level SELECT path runs `resolve_select_subqueries`
1518        // for the caller — for plpgsql we have to do the
1519        // equivalent before the body walker runs. Catches the
1520        // mailrs idiom `IF EXISTS (SELECT 1 FROM
1521        // information_schema.columns WHERE …) THEN …`.
1522        let mut body = body;
1523        self.resolve_plpgsql_block_subqueries(&mut body, CancelToken::none())?;
1524        let dts = self
1525            .session_param("default_text_search_config")
1526            .map(String::from);
1527        // v7.16.2 — SELECT … INTO resolver. The walker calls
1528        // this synchronously when it hits a SelectInto stmt
1529        // so the IF / locals scope sees the result before the
1530        // next statement. Body walks for trigger paths (no
1531        // resolver) error loudly on SelectInto.
1532        // SAFETY: the closure shares this engine borrow with
1533        // the walker, but the walker only borrows for the
1534        // duration of `execute_do_block_top_level` and doesn't
1535        // reach back into the engine through any other path —
1536        // so the recursive `&mut` is sound. We use a `RefCell`
1537        // for interior mutability since the closure is
1538        // Fn-shaped.
1539        let engine_cell = core::cell::RefCell::new(&mut *self);
1540        let resolver_fn =
1541            |stmt: &spg_sql::ast::Statement| -> Result<Value, triggers::TriggerError> {
1542                let mut eng = engine_cell.borrow_mut();
1543                let r = eng
1544                    .execute_stmt_with_cancel(stmt.clone(), CancelToken::none())
1545                    .map_err(|e| triggers::TriggerError::EvalFailed {
1546                        function: "DO".into(),
1547                        cause: eval::EvalError::TypeMismatch {
1548                            detail: alloc::format!("SELECT … INTO failed: {e}"),
1549                        },
1550                    })?;
1551                match r {
1552                    QueryResult::Rows { rows, .. } => match rows.into_iter().next() {
1553                        Some(row) => Ok(row.values.into_iter().next().unwrap_or(Value::Null)),
1554                        None => Ok(Value::Null),
1555                    },
1556                    _ => Err(triggers::TriggerError::EvalFailed {
1557                        function: "DO".into(),
1558                        cause: eval::EvalError::TypeMismatch {
1559                            detail: "SELECT … INTO body must be a SELECT".into(),
1560                        },
1561                    }),
1562                }
1563            };
1564        let collected =
1565            triggers::execute_do_block_top_level(&body, dts.as_deref(), Some(&resolver_fn))
1566                .map_err(|e| {
1567                    EngineError::Storage(StorageError::Corrupt(alloc::format!("DO: {e}")))
1568                })?;
1569        // engine_cell goes out of scope here, releasing the &mut self borrow
1570        // Run each embedded statement against the engine. The
1571        // statements were already substitute-walked for NEW/OLD/
1572        // locals (those evaluate to engine literals before they
1573        // land here) so dispatch is plain execute_stmt_with_cancel.
1574        for stmt in collected {
1575            // v7.16.2 — preserve current_tx wrap so an outer
1576            // BEGIN/COMMIT around a DO block keeps the
1577            // EmbeddedSql writes inside that same tx slot.
1578            self.execute_stmt_with_cancel(stmt, CancelToken::none())?;
1579        }
1580        Ok(QueryResult::CommandOk {
1581            affected: 0,
1582            modified_catalog: !self.in_transaction(),
1583        })
1584    }
1585
1586    fn snapshot_row_triggers(
1587        &self,
1588        table: &str,
1589        event: &str,
1590        timing: &str,
1591    ) -> Vec<spg_storage::FunctionDef> {
1592        let cat = self.active_catalog();
1593        cat.triggers()
1594            .iter()
1595            .filter(|t| {
1596                // v7.16.1 — skip disabled triggers (mailrs
1597                // round-9 A.2.b — pg_dump --disable-triggers).
1598                t.enabled
1599                    && t.table == table
1600                    && t.timing.eq_ignore_ascii_case(timing)
1601                    && t.for_each.eq_ignore_ascii_case("row")
1602                    && t.events.iter().any(|e| e.eq_ignore_ascii_case(event))
1603            })
1604            .filter_map(|t| cat.functions().get(&t.function).cloned())
1605            .collect()
1606    }
1607
1608    /// v7.13.0 — UPDATE-side snapshot that pairs each trigger's
1609    /// function with its `UPDATE OF cols` filter (mailrs round-5
1610    /// G7). Empty filter Vec means "fire unconditionally", matching
1611    /// the v7.12 behaviour.
1612    fn snapshot_update_row_triggers(
1613        &self,
1614        table: &str,
1615        timing: &str,
1616    ) -> Vec<(spg_storage::FunctionDef, Vec<String>)> {
1617        let cat = self.active_catalog();
1618        cat.triggers()
1619            .iter()
1620            .filter(|t| {
1621                // v7.16.1 — skip disabled triggers.
1622                t.enabled
1623                    && t.table == table
1624                    && t.timing.eq_ignore_ascii_case(timing)
1625                    && t.for_each.eq_ignore_ascii_case("row")
1626                    && t.events.iter().any(|e| e.eq_ignore_ascii_case("UPDATE"))
1627            })
1628            .filter_map(|t| {
1629                cat.functions()
1630                    .get(&t.function)
1631                    .cloned()
1632                    .map(|fd| (fd, t.update_columns.clone()))
1633            })
1634            .collect()
1635    }
1636
1637    /// v7.12.7 — drain the trigger-emitted embedded SQL queue.
1638    /// Called by the INSERT / UPDATE / DELETE executors after
1639    /// their main row-write loop returns. Each statement runs
1640    /// inside the same cancel scope as the firing DML and bumps
1641    /// the recursion counter; nested embedded SQL beyond
1642    /// [`MAX_TRIGGER_RECURSION`] errors with a clear message so
1643    /// a trigger-graph cycle surfaces as a query failure instead
1644    /// of stack-blowing the engine.
1645    fn execute_deferred_trigger_stmts(
1646        &mut self,
1647        deferred: Vec<triggers::DeferredEmbeddedStmt>,
1648        cancel: CancelToken<'_>,
1649    ) -> Result<(), EngineError> {
1650        for d in deferred {
1651            if self.trigger_recursion_depth >= MAX_TRIGGER_RECURSION {
1652                return Err(EngineError::Storage(StorageError::Corrupt(alloc::format!(
1653                    "trigger embedded SQL recursion depth {} exceeded (trigger function \
1654                     {:?} would push past the {} cap — check for trigger cycles)",
1655                    self.trigger_recursion_depth,
1656                    d.function,
1657                    MAX_TRIGGER_RECURSION,
1658                ))));
1659            }
1660            self.trigger_recursion_depth += 1;
1661            let res = self.execute_stmt_with_cancel(d.stmt, cancel);
1662            self.trigger_recursion_depth -= 1;
1663            res?;
1664        }
1665        Ok(())
1666    }
1667
1668    fn active_catalog_mut(&mut self) -> &mut Catalog {
1669        let tx = self.current_tx;
1670        match tx {
1671            Some(t) => match self.tx_catalogs.get_mut(&t) {
1672                Some(s) => &mut s.catalog,
1673                None => &mut self.catalog,
1674            },
1675            None => &mut self.catalog,
1676        }
1677    }
1678
1679    /// Read-only execute path. Succeeds for `SELECT` / `SHOW TABLES`
1680    /// / `SHOW COLUMNS`; returns `EngineError::WriteRequired` for
1681    /// every other statement, so the caller can fall through to the
1682    /// `&mut self` `execute` path under a write lock. Engine state is
1683    /// not mutated even on the success path (`rewrite_clock_calls`
1684    /// and `resolve_order_by_position` both mutate the locally-owned
1685    /// AST, not `self`).
1686    ///
1687    /// **v4.0 concurrency**: this is the entry point the server takes
1688    /// under an `RwLock::read()` so multiple `SELECT` clients run in
1689    /// parallel without serialising on a single mutex.
1690    pub fn execute_readonly(&self, sql: &str) -> Result<QueryResult, EngineError> {
1691        self.execute_readonly_with_cancel(sql, CancelToken::none())
1692    }
1693
1694    /// v4.5 — read path with cooperative cancellation. Token's
1695    /// `is_cancelled` is checked at the start (so a watchdog that
1696    /// already fired returns Cancelled immediately) and at row-loop
1697    /// checkpoints inside `exec_select`. SHOW paths are O(small) and
1698    /// don't bother checking.
1699    pub fn execute_readonly_with_cancel(
1700        &self,
1701        sql: &str,
1702        cancel: CancelToken<'_>,
1703    ) -> Result<QueryResult, EngineError> {
1704        cancel.check()?;
1705        let mut stmt = parser::parse_statement_with(sql, self.backslash_escapes)?;
1706        let now_micros = self.clock.map(|f| f());
1707        rewrite_clock_calls(&mut stmt, now_micros);
1708        if let Statement::Select(s) = &mut stmt {
1709            resolve_order_by_position(s);
1710            // v6.2.3 — cost-based JOIN reorder (read path).
1711            reorder::reorder_joins(s, &self.catalog, &self.statistics);
1712        }
1713        self.execute_readonly_stmt_with_cancel(stmt, cancel)
1714    }
1715
1716    /// v7.18 — readonly dispatch on a pre-parsed `Statement`.
1717    /// Internal helper shared by the SQL-string path
1718    /// ([`Engine::execute_readonly_with_cancel`]) and the prepared-
1719    /// statement path ([`Engine::execute_readonly_prepared_on_snapshot_with_cancel`]).
1720    /// Statement-level transforms (clock rewrite, ORDER BY position,
1721    /// JOIN reorder, placeholder substitution) are the caller's
1722    /// responsibility — this helper assumes the AST is already
1723    /// execution-ready. Writes / DDL hit
1724    /// [`EngineError::WriteRequired`] the same way the SQL path does.
1725    fn execute_readonly_stmt_with_cancel(
1726        &self,
1727        stmt: Statement,
1728        cancel: CancelToken<'_>,
1729    ) -> Result<QueryResult, EngineError> {
1730        let result = match stmt {
1731            Statement::Select(s) => self.exec_select_cancel(&s, cancel),
1732            Statement::ShowTables => Ok(self.exec_show_tables()),
1733            Statement::ShowDatabases => Ok(self.exec_show_databases()),
1734            Statement::ShowCreateTable(name) => self.exec_show_create_table(&name),
1735            Statement::ShowIndexes(name) => self.exec_show_indexes(&name),
1736            Statement::ShowStatus => Ok(self.exec_show_status()),
1737            Statement::ShowVariables => Ok(self.exec_show_variables()),
1738            Statement::ShowProcesslist => Ok(self.exec_show_processlist()),
1739            Statement::ShowColumns(table) => self.exec_show_columns(&table),
1740            Statement::ShowUsers => Ok(self.exec_show_users()),
1741            Statement::ShowPublications => Ok(self.exec_show_publications()),
1742            Statement::ShowSubscriptions => Ok(self.exec_show_subscriptions()),
1743            Statement::WaitForWalPosition { .. } => Err(EngineError::Unsupported(
1744                "WAIT FOR WAL POSITION must be handled by the server layer".into(),
1745            )),
1746            Statement::Explain(e) => self.exec_explain(&e, cancel),
1747            _ => Err(EngineError::WriteRequired),
1748        };
1749        self.enforce_row_limit(result)
1750    }
1751
1752    /// v4.2: cap result-set size. Applied after the executor
1753    /// materialises rows but before they leave the engine — wrapping
1754    /// every Rows-returning exec_* function would scatter the check.
1755    fn enforce_row_limit(
1756        &self,
1757        result: Result<QueryResult, EngineError>,
1758    ) -> Result<QueryResult, EngineError> {
1759        if let (Ok(QueryResult::Rows { rows, .. }), Some(cap)) = (&result, self.max_query_rows)
1760            && rows.len() > cap
1761        {
1762            return Err(EngineError::RowLimitExceeded(cap));
1763        }
1764        result
1765    }
1766
1767    pub fn execute(&mut self, sql: &str) -> Result<QueryResult, EngineError> {
1768        self.execute_in_with_cancel(sql, IMPLICIT_TX, CancelToken::none())
1769    }
1770
1771    /// v4.5 — write path with cooperative cancellation. Same dispatch
1772    /// as `execute_in_with_cancel(sql, IMPLICIT_TX, cancel)`. Kept as
1773    /// a separate entry point for backward-compat with the v4.5
1774    /// public API.
1775    pub fn execute_with_cancel(
1776        &mut self,
1777        sql: &str,
1778        cancel: CancelToken<'_>,
1779    ) -> Result<QueryResult, EngineError> {
1780        self.execute_in_with_cancel(sql, IMPLICIT_TX, cancel)
1781    }
1782
1783    /// v4.41.1 multi-slot write entry. Routes `sql` through the TX
1784    /// slot identified by `tx_id` so spg-server dispatch can scope
1785    /// each implicit-wrap BEGIN..stmt..COMMIT to its own slot in
1786    /// `tx_catalogs`. `IMPLICIT_TX` is the legacy single-slot path
1787    /// every other caller (engine self-tests, replay, spg-embedded)
1788    /// implicitly takes via `execute()` / `execute_with_cancel()`.
1789    pub fn execute_in(&mut self, sql: &str, tx_id: TxId) -> Result<QueryResult, EngineError> {
1790        self.execute_in_with_cancel(sql, tx_id, CancelToken::none())
1791    }
1792
1793    /// v4.41.1 write path with cooperative cancellation + explicit TX
1794    /// scope. Sets `self.current_tx` for the duration of the call so
1795    /// every `exec_*` helper transparently sees its TX's shadow
1796    /// catalog and savepoint stack; restores on exit so the field is
1797    /// only valid mid-call (no leakage across calls).
1798    pub fn execute_in_with_cancel(
1799        &mut self,
1800        sql: &str,
1801        tx_id: TxId,
1802        cancel: CancelToken<'_>,
1803    ) -> Result<QueryResult, EngineError> {
1804        let saved = self.current_tx;
1805        self.current_tx = Some(tx_id);
1806        let result = self.execute_inner_with_cancel(sql, cancel);
1807        self.current_tx = saved;
1808        result
1809    }
1810
1811    /// v6.1.1 — parse and pre-process a SQL string ONCE so the
1812    /// resulting [`Statement`] can be cached and re-executed via
1813    /// [`Engine::execute_prepared`]. Returns the same `Statement`
1814    /// the simple-query path would synthesise internally (clock
1815    /// rewrites + ORDER BY position-ref resolution applied at
1816    /// prepare time, since both are session-independent). The
1817    /// `$N` placeholders in the SQL stay as `Expr::Placeholder(n)`
1818    /// nodes; they're resolved to concrete values per-call by
1819    /// `execute_prepared`'s substitution walk.
1820    ///
1821    /// Pgwire's `Parse` (P) message lands here.
1822    pub fn prepare(&self, sql: &str) -> Result<Statement, ParseError> {
1823        let mut stmt = parser::parse_statement_with(sql, self.backslash_escapes)?;
1824        let now_micros = self.clock.map(|f| f());
1825        rewrite_clock_calls(&mut stmt, now_micros);
1826        if let Statement::Select(s) = &mut stmt {
1827            // v6.4.1 — expand `GROUP BY ALL` to every non-aggregate
1828            // SELECT-list item BEFORE position / alias resolution so
1829            // downstream passes see the explicit list.
1830            expand_group_by_all(s);
1831            resolve_order_by_position(s);
1832            // v6.2.3 — cost-based JOIN reorder. No-op for
1833            // single-table FROMs or any non-INNER join shape.
1834            reorder::reorder_joins(s, &self.catalog, &self.statistics);
1835        }
1836        Ok(stmt)
1837    }
1838
1839    /// v6.3.0 — cached prepare. Returns a cloned `Statement` from
1840    /// the plan cache on hit, runs the full `prepare()` path on miss
1841    /// and inserts the resulting plan before returning. Skipping the
1842    /// parse + JOIN-reorder pipeline on hit is the dominant win for
1843    /// JDBC / sqlx / pgx clients that reuse the same SQL string.
1844    ///
1845    /// Returns a cloned `Statement` (not a borrow) because the
1846    /// pgwire layer owns its `PreparedStmt` map per-session and the
1847    /// engine-level cache must stay available for other sessions.
1848    /// Clone cost on a 5-table JOIN AST is well under the parse cost
1849    /// it replaces.
1850    pub fn prepare_cached(&mut self, sql: &str) -> Result<Statement, ParseError> {
1851        // v6.3.1 — version-aware lookup. If the cached plan was
1852        // prepared before the most recent ANALYZE, evict and replan.
1853        let current_version = self.statistics.version();
1854        if let Some(plan) = self.plan_cache.get(sql) {
1855            if plan.statistics_version == current_version {
1856                return Ok(plan.stmt.clone());
1857            }
1858            // Stale entry — fall through to evict + re-prepare.
1859        }
1860        self.plan_cache.evict(sql);
1861        let stmt = self.prepare(sql)?;
1862        let source_tables = plan_cache::collect_source_tables(&stmt);
1863        let plan = plan_cache::PreparedPlan {
1864            stmt: stmt.clone(),
1865            statistics_version: current_version,
1866            source_tables,
1867            describe_columns: alloc::vec::Vec::new(),
1868        };
1869        self.plan_cache.insert(String::from(sql), plan);
1870        Ok(stmt)
1871    }
1872
1873    /// v6.3.0 — read-only accessor for tests and v6.3.1 invalidation.
1874    pub fn plan_cache(&self) -> &plan_cache::PlanCache {
1875        &self.plan_cache
1876    }
1877
1878    /// v6.3.0 — mutable accessor for v6.3.1 invalidation hooks.
1879    pub fn plan_cache_mut(&mut self) -> &mut plan_cache::PlanCache {
1880        &mut self.plan_cache
1881    }
1882
1883    /// v6.3.3 — Describe a prepared `Statement` without executing.
1884    /// Returns `(parameter_oids, output_columns)`. Empty
1885    /// `output_columns` means the statement has no row-producing
1886    /// shape we could resolve here (JOIN, subquery, non-SELECT, …)
1887    /// — pgwire layer maps that to a `NoData` reply.
1888    pub fn describe_prepared(&self, stmt: &Statement) -> (Vec<u32>, Vec<ColumnSchema>) {
1889        describe::describe_prepared(stmt, self.active_catalog())
1890    }
1891
1892    /// v6.1.1 — execute a [`Statement`] previously returned by
1893    /// [`Engine::prepare`], substituting `Expr::Placeholder(n)`
1894    /// nodes for the corresponding [`Value`] in `params` (1-based
1895    /// per PG: `$1` → `params[0]`). Bind-time string parameters
1896    /// are decoded into typed `Value`s by the pgwire layer before
1897    /// this call so the resulting AST hits the same execution
1898    /// path as a simple query — no SQL re-parse.
1899    ///
1900    /// Pgwire's `Execute` (E) message after a `Bind` (B) lands here.
1901    pub fn execute_prepared(
1902        &mut self,
1903        stmt: Statement,
1904        params: &[Value],
1905    ) -> Result<QueryResult, EngineError> {
1906        self.execute_prepared_with_cancel(stmt, params, CancelToken::none())
1907    }
1908
1909    /// v7.17.0 Phase 2.3 — prepared-statement entry that honors a
1910    /// caller-supplied `CancelToken`. Mirrors `execute_prepared`'s
1911    /// `current_tx` save/restore so the extended-query path stays
1912    /// transactionally consistent with the simple-query path.
1913    pub fn execute_prepared_with_cancel(
1914        &mut self,
1915        mut stmt: Statement,
1916        params: &[Value],
1917        cancel: CancelToken<'_>,
1918    ) -> Result<QueryResult, EngineError> {
1919        substitute_placeholders(&mut stmt, params)?;
1920        // v7.16.0 — set `current_tx` for the duration of the
1921        // dispatch so the `exec_*` helpers see the right TX
1922        // slot (matches what `execute_in_with_cancel` does for
1923        // simple-query). Pre-v7.16 the simple-query path
1924        // worked because every public entry point routed
1925        // through `execute_in_with_cancel`; the prepared path
1926        // skipped the wrap and so its INSERTs/UPDATEs landed
1927        // in the no-tx default slot, silently invisible to a
1928        // BEGIN/COMMIT-bracketed flow. Caught by spg-sqlx's
1929        // first transaction-visibility test.
1930        let saved = self.current_tx;
1931        self.current_tx = Some(IMPLICIT_TX);
1932        let result = self.execute_stmt_with_cancel(stmt, cancel);
1933        self.current_tx = saved;
1934        result
1935    }
1936
1937    fn execute_inner_with_cancel(
1938        &mut self,
1939        sql: &str,
1940        cancel: CancelToken<'_>,
1941    ) -> Result<QueryResult, EngineError> {
1942        cancel.check()?;
1943        let stmt = self.prepare(sql)?;
1944        // v6.5.1 — wrap the executor with a wall-clock window so we
1945        // can record into spg_stat_query. Skip when the engine has
1946        // no clock attached (no_std embedded callers).
1947        let start_us = self.clock.map(|f| f());
1948        let result = self.execute_stmt_with_cancel(stmt, cancel);
1949        if let (Some(t0), Ok(_)) = (start_us, &result) {
1950            let now = self.clock.map_or(t0, |f| f());
1951            let elapsed = now.saturating_sub(t0).max(0) as u64;
1952            self.query_stats.record(sql, elapsed, now as u64);
1953            // v6.5.6 — slow-query log: fire callback when elapsed
1954            // exceeds the configured floor.
1955            if let (Some(threshold), Some(logger)) =
1956                (self.slow_query_threshold_us, self.slow_query_logger)
1957                && elapsed >= threshold
1958            {
1959                logger(sql, elapsed);
1960            }
1961        }
1962        result
1963    }
1964
1965    fn execute_stmt_with_cancel(
1966        &mut self,
1967        stmt: Statement,
1968        cancel: CancelToken<'_>,
1969    ) -> Result<QueryResult, EngineError> {
1970        cancel.check()?;
1971        // v7.17.0 Phase 1.1 — pre-resolve nextval / currval /
1972        // setval calls in the statement tree. Walks SELECT
1973        // projection, INSERT VALUES, UPDATE SET, DELETE WHERE,
1974        // and DEFAULT exprs; replaces sequence FunctionCall
1975        // nodes with concrete Literal values minted against the
1976        // catalog. This is the only place that mutates sequence
1977        // state from a SELECT-shaped path (exec_select_cancel is
1978        // `&self` and can't reach the catalog mutably).
1979        //
1980        // Fast-path: when no sequences exist anywhere in the
1981        // catalog (the typical hot-path INSERT load), skip the
1982        // walker entirely. Single map-emptiness check on the
1983        // catalog beats walking every expression on every call.
1984        let mut stmt = stmt;
1985        // v7.17 dump-compat — the fast-path check
1986        // `sequences().is_empty()` skips pre-resolve when no
1987        // sequence exists in the *currently active* catalog
1988        // snapshot. The committed catalog or the implicit-TX
1989        // catalog may legitimately disagree on this between
1990        // CREATE SEQUENCE and a later setval(): always run the
1991        // resolver — the walk is O(expr-count) and dwarfed by
1992        // the parse cost we just paid.
1993        self.pre_resolve_sequence_calls_in_statement(&mut stmt)?;
1994        let result = match stmt {
1995            Statement::CreateTable(s) => self.exec_create_table(s),
1996            // v7.9.15 — CREATE EXTENSION is a no-op on SPG. Returns
1997            // CommandOk with affected=0; modified_catalog=false so
1998            // the WAL doesn't grow a useless entry. mailrs F3.
1999            Statement::CreateExtension(_) => Ok(QueryResult::CommandOk {
2000                affected: 0,
2001                modified_catalog: false,
2002            }),
2003            // v7.16.2 — DO $$ ... $$ block. mailrs round-10 A.2
2004            // — the pre-v7.9.27 no-op SILENTLY swallowed every
2005            // mailrs migrate-038/-040/-042 idempotent rename
2006            // (the IF EXISTS … THEN ALTER … END block never
2007            // ran). v7.16.2 dispatches to exec_do_block which
2008            // runs the PlPgSqlBlock at top level via the same
2009            // execute_stmts machinery the trigger executor
2010            // uses (NEW=None, OLD=None — DO blocks have no
2011            // row context).
2012            Statement::DoBlock(body) => self.exec_do_block(body),
2013            // v7.14.0 — empty-statement no-op for pg_dump /
2014            // mysqldump preamble lines that collapse to nothing
2015            // after comment-stripping.
2016            Statement::Empty => Ok(QueryResult::CommandOk {
2017                affected: 0,
2018                modified_catalog: false,
2019            }),
2020            Statement::DropTable { names, if_exists } => self.exec_drop_table(names, if_exists),
2021            Statement::DropIndex { name, if_exists } => self.exec_drop_index(name, if_exists),
2022            Statement::CreateIndex(s) => self.exec_create_index(s),
2023            Statement::Insert(s) => self.exec_insert(s),
2024            Statement::Update(mut s) => {
2025                // Materialise uncorrelated subqueries in SET / WHERE
2026                // before the row walk — the SELECT path has done this
2027                // since v4.10; UPDATE gained it for mailrs's
2028                // `UPDATE … WHERE id IN (SELECT … FOR UPDATE SKIP
2029                // LOCKED)` claim pattern (embed round-12).
2030                for (_, e) in &mut s.assignments {
2031                    self.resolve_expr_subqueries(e, cancel)?;
2032                }
2033                if let Some(w) = &mut s.where_ {
2034                    self.resolve_expr_subqueries(w, cancel)?;
2035                }
2036                self.exec_update_cancel(&s, cancel)
2037            }
2038            Statement::Delete(mut s) => {
2039                if let Some(w) = &mut s.where_ {
2040                    self.resolve_expr_subqueries(w, cancel)?;
2041                }
2042                self.exec_delete_cancel(&s, cancel)
2043            }
2044            Statement::Merge(s) => self.exec_merge_cancel(&s, cancel),
2045            Statement::Select(s) => self.exec_select_cancel(&s, cancel),
2046            Statement::Begin => self.exec_begin(),
2047            Statement::Commit => self.exec_commit(),
2048            Statement::Rollback => self.exec_rollback(),
2049            Statement::Savepoint(name) => self.exec_savepoint(name),
2050            Statement::RollbackToSavepoint(name) => self.exec_rollback_to_savepoint(&name),
2051            Statement::ReleaseSavepoint(name) => self.exec_release_savepoint(&name),
2052            Statement::ShowTables => Ok(self.exec_show_tables()),
2053            Statement::ShowDatabases => Ok(self.exec_show_databases()),
2054            Statement::ShowCreateTable(name) => self.exec_show_create_table(&name),
2055            Statement::ShowIndexes(name) => self.exec_show_indexes(&name),
2056            Statement::ShowStatus => Ok(self.exec_show_status()),
2057            Statement::ShowVariables => Ok(self.exec_show_variables()),
2058            Statement::ShowProcesslist => Ok(self.exec_show_processlist()),
2059            Statement::ShowColumns(table) => self.exec_show_columns(&table),
2060            Statement::ShowUsers => Ok(self.exec_show_users()),
2061            Statement::ShowPublications => Ok(self.exec_show_publications()),
2062            Statement::ShowSubscriptions => Ok(self.exec_show_subscriptions()),
2063            Statement::CreateUser(s) => self.exec_create_user(&s),
2064            Statement::DropUser(name) => self.exec_drop_user(&name),
2065            Statement::Explain(e) => self.exec_explain(&e, cancel),
2066            Statement::AlterIndex(s) => self.exec_alter_index(s),
2067            Statement::AlterTable(s) => self.exec_alter_table(s),
2068            Statement::CreatePublication(s) => self.exec_create_publication(s),
2069            Statement::DropPublication(name) => self.exec_drop_publication(&name),
2070            Statement::CreateSubscription(s) => self.exec_create_subscription(s),
2071            Statement::DropSubscription(name) => self.exec_drop_subscription(&name),
2072            // v6.1.7 — WAIT FOR WAL POSITION needs `lag_state`,
2073            // which lives in spg-server's ServerState. The engine
2074            // surfaces a clear error; the server-layer dispatch
2075            // intercepts the SQL before it reaches the engine on
2076            // a server build, so this arm only fires for
2077            // engine-only callers (spg-embedded, lib tests).
2078            Statement::WaitForWalPosition { .. } => Err(EngineError::Unsupported(
2079                "WAIT FOR WAL POSITION must be handled by the server layer".into(),
2080            )),
2081            // v6.2.0 — ANALYZE recomputes per-column histograms.
2082            Statement::Analyze(target) => self.exec_analyze(target.as_deref()),
2083            // v6.7.3 — COMPACT COLD SEGMENTS.
2084            Statement::CompactColdSegments => self.exec_compact_cold_segments(),
2085            // v7.12.1 — SET / RESET session parameter. Engine
2086            // tracks the value in `session_params`; FTS dispatcher
2087            // reads `default_text_search_config`. Everything else
2088            // is a recorded no-op (PG dump compat).
2089            Statement::SetParameter { name, value } => {
2090                self.set_session_param(name, value);
2091                Ok(QueryResult::CommandOk {
2092                    affected: 0,
2093                    modified_catalog: false,
2094                })
2095            }
2096            // v7.14.0 — MySQL multi-assignment SET. Each pair runs
2097            // through `set_session_param` so engine-known params
2098            // (FOREIGN_KEY_CHECKS, session_replication_role, …) take
2099            // effect; unknown pairs (including `@VAR` LHS from the
2100            // mysqldump preamble) are recorded then ignored.
2101            Statement::SetParameterList(pairs) => {
2102                for (name, value) in pairs {
2103                    self.set_session_param(name, value);
2104                }
2105                Ok(QueryResult::CommandOk {
2106                    affected: 0,
2107                    modified_catalog: false,
2108                })
2109            }
2110            // v7.12.4 — CREATE FUNCTION / CREATE TRIGGER / DROP …
2111            // for the PL/pgSQL trigger surface. exec_* methods are
2112            // defined alongside the existing CREATE handlers below.
2113            Statement::CreateFunction(s) => self.exec_create_function(s),
2114            Statement::CreateTrigger(s) => self.exec_create_trigger(s),
2115            Statement::DropTrigger {
2116                name,
2117                table,
2118                if_exists,
2119            } => self.exec_drop_trigger(&name, &table, if_exists),
2120            Statement::DropFunction { name, if_exists } => {
2121                self.exec_drop_function(&name, if_exists)
2122            }
2123            Statement::CreateSequence(s) => self.exec_create_sequence(s),
2124            Statement::AlterSequence(s) => self.exec_alter_sequence(s),
2125            Statement::DropSequence { names, if_exists } => {
2126                self.exec_drop_sequence(&names, if_exists)
2127            }
2128            Statement::CreateView(s) => self.exec_create_view(s),
2129            Statement::DropView { names, if_exists } => self.exec_drop_view(&names, if_exists),
2130            Statement::CreateMaterializedView(s) => self.exec_create_materialized_view(s),
2131            Statement::RefreshMaterializedView { name, with_data } => {
2132                self.exec_refresh_materialized_view(&name, with_data)
2133            }
2134            Statement::DropMaterializedView { names, if_exists } => {
2135                self.exec_drop_materialized_view(&names, if_exists)
2136            }
2137            Statement::CreateType(s) => self.exec_create_type(s),
2138            Statement::DropType { names, if_exists } => self.exec_drop_type(&names, if_exists),
2139            Statement::CreateDomain(s) => self.exec_create_domain(s),
2140            Statement::DropDomain { names, if_exists } => self.exec_drop_domain(&names, if_exists),
2141            Statement::CreateSchema {
2142                name,
2143                if_not_exists,
2144            } => self.exec_create_schema(name, if_not_exists),
2145            Statement::DropSchema { names, if_exists } => self.exec_drop_schema(&names, if_exists),
2146            Statement::ResetParameter(target) => {
2147                match target {
2148                    None => self.session_params.clear(),
2149                    Some(name) => {
2150                        self.session_params.remove(&name.to_ascii_lowercase());
2151                    }
2152                }
2153                Ok(QueryResult::CommandOk {
2154                    affected: 0,
2155                    modified_catalog: false,
2156                })
2157            }
2158        };
2159        self.enforce_row_limit(result)
2160    }
2161
2162    /// v6.1.2 — `CREATE PUBLICATION` runtime path. Duplicate names
2163    /// surface as `EngineError::Unsupported` so the existing PG-wire
2164    /// error mapping stays uniform; the message carries the name so
2165    /// operators can grep replication-log noise. Inside-transaction
2166    /// invocation is rejected (matches `CREATE USER` / `DROP USER`
2167    /// stance) — replication-catalog mutation is a connection-level
2168    /// administrative op, not a transactional one.
2169    fn exec_create_publication(
2170        &mut self,
2171        s: CreatePublicationStatement,
2172    ) -> Result<QueryResult, EngineError> {
2173        // v6.1.4 — the v6.1.2 "no DDL inside a transaction" guard
2174        // was over-cautious: it also blocked the auto-commit wrap
2175        // path (which begins an internal TX around every WAL-
2176        // logged statement). PG itself allows CREATE PUBLICATION
2177        // inside a transaction (it rolls back with the TX).
2178        self.publications
2179            .create(s.name, s.scope)
2180            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE PUBLICATION: {e:?}")))?;
2181        Ok(QueryResult::CommandOk {
2182            affected: 1,
2183            modified_catalog: true,
2184        })
2185    }
2186
2187    /// v6.1.2 — `DROP PUBLICATION` runtime path. PG-compatible silent
2188    /// no-op when the publication doesn't exist (returns `affected=0`
2189    /// in that case so the wire-level command tag distinguishes
2190    /// "dropped" from "no-op", though both succeed).
2191    fn exec_drop_publication(&mut self, name: &str) -> Result<QueryResult, EngineError> {
2192        let removed = self.publications.drop(name);
2193        Ok(QueryResult::CommandOk {
2194            affected: usize::from(removed),
2195            modified_catalog: removed,
2196        })
2197    }
2198
2199    /// v6.1.2 — read access to the publication catalog. Used by
2200    /// the v6.1.5 publisher-side WAL filter, by `SHOW PUBLICATIONS`
2201    /// (v6.1.3+), and by e2e tests that need to assert state without
2202    /// going through the wire.
2203    pub const fn publications(&self) -> &publications::Publications {
2204        &self.publications
2205    }
2206
2207    /// v6.1.4 — `CREATE SUBSCRIPTION` runtime path. Defaults
2208    /// `enabled = true` and `last_received_pos = 0` for a freshly-
2209    /// created subscription. The actual worker thread is spawned
2210    /// by spg-server once the engine returns success.
2211    fn exec_create_subscription(
2212        &mut self,
2213        s: CreateSubscriptionStatement,
2214    ) -> Result<QueryResult, EngineError> {
2215        // See exec_create_publication — the in_transaction gate
2216        // was over-cautious; the auto-commit wrap path holds an
2217        // internal TX that this check was incorrectly blocking.
2218        let sub = subscriptions::Subscription {
2219            conn_str: s.conn_str,
2220            publications: s.publications,
2221            enabled: true,
2222            last_received_pos: 0,
2223        };
2224        self.subscriptions
2225            .create(s.name, sub)
2226            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE SUBSCRIPTION: {e:?}")))?;
2227        Ok(QueryResult::CommandOk {
2228            affected: 1,
2229            modified_catalog: true,
2230        })
2231    }
2232
2233    /// v6.1.4 — `DROP SUBSCRIPTION`. Silent no-op when the name
2234    /// doesn't exist (PG-compatible). The associated worker is
2235    /// torn down by spg-server when it observes the catalog
2236    /// change at the next snapshot or via the engine's
2237    /// subscriptions accessor (the worker polls the catalog on
2238    /// reconnect; v6.1.5's filter-side will tighten this to an
2239    /// explicit signal).
2240    fn exec_drop_subscription(&mut self, name: &str) -> Result<QueryResult, EngineError> {
2241        let removed = self.subscriptions.drop(name);
2242        Ok(QueryResult::CommandOk {
2243            affected: usize::from(removed),
2244            modified_catalog: removed,
2245        })
2246    }
2247
2248    /// v6.1.4 — read access to the subscription catalog. Used by
2249    /// the subscription worker (read its own row to find its
2250    /// publications + last applied position), by SHOW SUBSCRIPTIONS,
2251    /// and by e2e tests asserting state directly.
2252    pub const fn subscriptions(&self) -> &subscriptions::Subscriptions {
2253        &self.subscriptions
2254    }
2255
2256    /// v6.1.4 — write access to `last_received_pos`. Worker
2257    /// calls this after each apply batch (under the engine's
2258    /// write-lock). Returns `false` when the subscription was
2259    /// dropped between when the worker received the record and
2260    /// when this call landed.
2261    pub fn subscription_advance(&mut self, name: &str, pos: u64) -> bool {
2262        self.subscriptions.update_last_received_pos(name, pos)
2263    }
2264
2265    /// v6.1.4 — `SHOW SUBSCRIPTIONS` row materialisation. Returns
2266    /// `(name, conn_str, publications, enabled, last_received_pos)`
2267    /// ordered by subscription name. The `publications` column is
2268    /// the comma-joined list ("p1, p2") for ergonomic SHOW output;
2269    /// callers wanting structured access read `Engine::subscriptions`.
2270    fn exec_show_subscriptions(&self) -> QueryResult {
2271        let columns = alloc::vec![
2272            ColumnSchema::new("name", DataType::Text, false),
2273            ColumnSchema::new("conn_str", DataType::Text, false),
2274            ColumnSchema::new("publications", DataType::Text, false),
2275            ColumnSchema::new("enabled", DataType::Bool, false),
2276            ColumnSchema::new("last_received_pos", DataType::BigInt, false),
2277        ];
2278        let rows: Vec<Row> = self
2279            .subscriptions
2280            .iter()
2281            .map(|(name, sub)| {
2282                Row::new(alloc::vec![
2283                    Value::Text(name.clone()),
2284                    Value::Text(sub.conn_str.clone()),
2285                    Value::Text(sub.publications.join(", ")),
2286                    Value::Bool(sub.enabled),
2287                    Value::BigInt(i64::try_from(sub.last_received_pos).unwrap_or(i64::MAX)),
2288                ])
2289            })
2290            .collect();
2291        QueryResult::Rows { columns, rows }
2292    }
2293
2294    /// v6.2.0 — materialise `spg_statistic` rows. One row per
2295    /// `(table, column)` pair tracked in `Statistics`, with
2296    /// `histogram_bounds` rendered as a `[v0, v1, ...]` string —
2297    /// the same canonical form vector literals use for round-trip.
2298    fn exec_spg_statistic(&self) -> QueryResult {
2299        let columns = alloc::vec![
2300            ColumnSchema::new("table_name", DataType::Text, false),
2301            ColumnSchema::new("column_name", DataType::Text, false),
2302            ColumnSchema::new("null_frac", DataType::Float, false),
2303            ColumnSchema::new("n_distinct", DataType::BigInt, false),
2304            ColumnSchema::new("histogram_bounds", DataType::Text, false),
2305            // v6.7.0 — appended column (v6.2.0 stability contract
2306            // allows APPEND to spg_statistic, not reorder/rename).
2307            // Reports the cached per-table cold-row count; same
2308            // value across every column row of the same table.
2309            ColumnSchema::new("cold_row_count", DataType::BigInt, false),
2310        ];
2311        let rows: Vec<Row> = self
2312            .statistics
2313            .iter()
2314            .map(|((t, c), s)| {
2315                let cold = self
2316                    .catalog
2317                    .get(t)
2318                    .map_or(0, |table| table.cold_row_count());
2319                Row::new(alloc::vec![
2320                    Value::Text(t.clone()),
2321                    Value::Text(c.clone()),
2322                    Value::Float(f64::from(s.null_frac)),
2323                    Value::BigInt(i64::try_from(s.n_distinct).unwrap_or(i64::MAX)),
2324                    Value::Text(render_histogram_bounds(&s.histogram_bounds)),
2325                    Value::BigInt(i64::try_from(cold).unwrap_or(i64::MAX)),
2326                ])
2327            })
2328            .collect();
2329        QueryResult::Rows { columns, rows }
2330    }
2331
2332    /// v6.5.0 — materialise `spg_stat_replication` rows. One row
2333    /// per subscription with `(name, conn_str, publications,
2334    /// last_received_pos, enabled)`. Surface mirrors
2335    /// `SHOW SUBSCRIPTIONS` but follows the virtual-table dispatch
2336    /// shape so it composes with SELECT clauses (WHERE, projection
2337    /// onto specific columns, etc).
2338    fn exec_spg_stat_replication(&self) -> QueryResult {
2339        let columns = alloc::vec![
2340            ColumnSchema::new("name", DataType::Text, false),
2341            ColumnSchema::new("conn_str", DataType::Text, false),
2342            ColumnSchema::new("publications", DataType::Text, false),
2343            ColumnSchema::new("last_received_pos", DataType::BigInt, false),
2344            ColumnSchema::new("enabled", DataType::Bool, false),
2345        ];
2346        let rows: Vec<Row> = self
2347            .subscriptions
2348            .iter()
2349            .map(|(name, sub)| {
2350                Row::new(alloc::vec![
2351                    Value::Text(name.clone()),
2352                    Value::Text(sub.conn_str.clone()),
2353                    Value::Text(sub.publications.join(",")),
2354                    Value::BigInt(i64::try_from(sub.last_received_pos).unwrap_or(i64::MAX)),
2355                    Value::Bool(sub.enabled),
2356                ])
2357            })
2358            .collect();
2359        QueryResult::Rows { columns, rows }
2360    }
2361
2362    /// v6.5.0 — materialise `spg_stat_segment` rows. One row per
2363    /// cold-tier segment with `(segment_id, num_rows, num_pages,
2364    /// total_bytes)`.
2365    ///
2366    /// v6.7.0 — appended `table_name` column resolves the v6.5.0
2367    /// carve-out. Walks every user table's BTree indices to find
2368    /// which table's Cold locators point at each segment. Empty
2369    /// string for orphan segments (loaded via SPG_PRELOAD_COLD_SEGMENT
2370    /// before any index registered a locator). The walk is
2371    /// O(tables × indices × keys); cached per call, not across
2372    /// calls — re-walked on every `SELECT * FROM spg_stat_segment`.
2373    fn exec_spg_stat_segment(&self) -> QueryResult {
2374        let columns = alloc::vec![
2375            ColumnSchema::new("segment_id", DataType::BigInt, false),
2376            ColumnSchema::new("table_name", DataType::Text, false),
2377            ColumnSchema::new("num_rows", DataType::BigInt, false),
2378            ColumnSchema::new("num_pages", DataType::BigInt, false),
2379            ColumnSchema::new("total_bytes", DataType::BigInt, false),
2380        ];
2381        // v6.7.0 — build a segment_id → table_name map by walking
2382        // every user table's BTree indices once. O(tables × indices
2383        // × keys) for the v6.5.0 carve-out resolution; acceptable
2384        // because spg_stat_segment is operator-facing (not on a
2385        // hot-loop path).
2386        let mut segment_owners: alloc::collections::BTreeMap<u32, String> = BTreeMap::new();
2387        for tname in self.catalog.table_names() {
2388            if is_internal_table_name(&tname) {
2389                continue;
2390            }
2391            let Some(t) = self.catalog.get(&tname) else {
2392                continue;
2393            };
2394            for idx in t.indices() {
2395                if let spg_storage::IndexKind::BTree(map) = &idx.kind {
2396                    for (_, locs) in map.iter() {
2397                        for loc in locs {
2398                            if let spg_storage::RowLocator::Cold { segment_id, .. } = loc {
2399                                segment_owners
2400                                    .entry(*segment_id)
2401                                    .or_insert_with(|| tname.clone());
2402                            }
2403                        }
2404                    }
2405                }
2406            }
2407        }
2408        let rows: Vec<Row> = self
2409            .catalog
2410            .cold_segment_ids_global()
2411            .iter()
2412            .filter_map(|&id| {
2413                let seg = self.catalog.cold_segment(id)?;
2414                let meta = seg.meta();
2415                let owner = segment_owners.get(&id).cloned().unwrap_or_default();
2416                Some(Row::new(alloc::vec![
2417                    Value::BigInt(i64::from(id)),
2418                    Value::Text(owner),
2419                    Value::BigInt(i64::try_from(meta.num_rows).unwrap_or(i64::MAX)),
2420                    Value::BigInt(i64::from(meta.num_pages)),
2421                    Value::BigInt(i64::try_from(meta.total_bytes).unwrap_or(i64::MAX)),
2422                ]))
2423            })
2424            .collect();
2425        QueryResult::Rows { columns, rows }
2426    }
2427
2428    /// v6.5.1 — materialise `spg_stat_query` rows. One row per
2429    /// distinct SQL text recorded since the engine booted, capped
2430    /// at `QUERY_STATS_MAX` (1024). Columns:
2431    ///   sql, exec_count, total_us, mean_us, max_us, last_seen_us
2432    /// mean_us = total_us / exec_count (saturating).
2433    fn exec_spg_stat_query(&self) -> QueryResult {
2434        let columns = alloc::vec![
2435            ColumnSchema::new("sql", DataType::Text, false),
2436            ColumnSchema::new("exec_count", DataType::BigInt, false),
2437            ColumnSchema::new("total_us", DataType::BigInt, false),
2438            ColumnSchema::new("mean_us", DataType::BigInt, false),
2439            ColumnSchema::new("max_us", DataType::BigInt, false),
2440            ColumnSchema::new("last_seen_us", DataType::BigInt, false),
2441        ];
2442        let rows: Vec<Row> = self
2443            .query_stats
2444            .snapshot()
2445            .into_iter()
2446            .map(|(sql, s)| {
2447                let mean = if s.exec_count == 0 {
2448                    0
2449                } else {
2450                    s.total_us / s.exec_count
2451                };
2452                Row::new(alloc::vec![
2453                    Value::Text(sql),
2454                    Value::BigInt(i64::try_from(s.exec_count).unwrap_or(i64::MAX)),
2455                    Value::BigInt(i64::try_from(s.total_us).unwrap_or(i64::MAX)),
2456                    Value::BigInt(i64::try_from(mean).unwrap_or(i64::MAX)),
2457                    Value::BigInt(i64::try_from(s.max_us).unwrap_or(i64::MAX)),
2458                    Value::BigInt(i64::try_from(s.last_seen_us).unwrap_or(i64::MAX)),
2459                ])
2460            })
2461            .collect();
2462        QueryResult::Rows { columns, rows }
2463    }
2464
2465    /// v6.5.2 — register a connection-state provider. spg-server
2466    /// calls this at startup with a function that snapshots its
2467    /// per-pgwire-connection registry. Engine reads through the
2468    /// callback on `SELECT * FROM spg_stat_activity`.
2469    #[must_use]
2470    pub const fn with_activity_provider(mut self, f: ActivityProvider) -> Self {
2471        self.activity_provider = Some(f);
2472        self
2473    }
2474
2475    /// v6.5.3 — register audit chain provider + verifier.
2476    #[must_use]
2477    pub const fn with_audit_providers(
2478        mut self,
2479        chain: AuditChainProvider,
2480        verify: AuditVerifier,
2481    ) -> Self {
2482        self.audit_chain_provider = Some(chain);
2483        self.audit_verifier = Some(verify);
2484        self
2485    }
2486
2487    /// v6.5.6 — register a slow-query log callback. `threshold_us`
2488    /// is the floor (in microseconds); only executes above the floor
2489    /// fire the callback. spg-server wires this from
2490    /// `SPG_SLOW_QUERY_THRESHOLD_MS` (default 100 ms).
2491    #[must_use]
2492    pub const fn with_slow_query_log(mut self, threshold_us: u64, logger: SlowQueryLogger) -> Self {
2493        self.slow_query_threshold_us = Some(threshold_us);
2494        self.slow_query_logger = Some(logger);
2495        self
2496    }
2497
2498    /// v6.5.6 — operator knob for plan cache cap. spg-server reads
2499    /// `SPG_PLAN_CACHE_MAX` env at startup; uses this to override
2500    /// the compile-time default of 256.
2501    pub fn set_plan_cache_max(&mut self, n: usize) {
2502        self.plan_cache.set_max_entries(n);
2503    }
2504
2505    /// v6.5.2 — materialise `spg_stat_activity` rows. Pulls a fresh
2506    /// snapshot from the registered `ActivityProvider`. Returns an
2507    /// empty result set when no provider is registered (the no_std
2508    /// embedded path with no pgwire layer).
2509    fn exec_spg_stat_activity(&self) -> QueryResult {
2510        let columns = alloc::vec![
2511            ColumnSchema::new("pid", DataType::Int, false),
2512            ColumnSchema::new("user", DataType::Text, false),
2513            ColumnSchema::new("started_at_us", DataType::BigInt, false),
2514            ColumnSchema::new("current_sql", DataType::Text, false),
2515            ColumnSchema::new("wait_event", DataType::Text, false),
2516            ColumnSchema::new("elapsed_us", DataType::BigInt, false),
2517            ColumnSchema::new("in_transaction", DataType::Bool, false),
2518            ColumnSchema::new("application_name", DataType::Text, false),
2519        ];
2520        let rows: Vec<Row> = self
2521            .activity_provider
2522            .map(|f| f())
2523            .unwrap_or_default()
2524            .into_iter()
2525            .map(|r| {
2526                Row::new(alloc::vec![
2527                    Value::Int(i32::try_from(r.pid).unwrap_or(i32::MAX)),
2528                    Value::Text(r.user),
2529                    Value::BigInt(r.started_at_us),
2530                    Value::Text(r.current_sql),
2531                    Value::Text(r.wait_event),
2532                    Value::BigInt(r.elapsed_us),
2533                    Value::Bool(r.in_transaction),
2534                    Value::Text(r.application_name),
2535                ])
2536            })
2537            .collect();
2538        QueryResult::Rows { columns, rows }
2539    }
2540
2541    /// v6.5.4 — materialise `spg_table_ddl` rows. One row per user
2542    /// table with `(table_name, ddl)`. Reconstructed from catalog
2543    /// state on demand.
2544    fn exec_spg_table_ddl(&self) -> QueryResult {
2545        let columns = alloc::vec![
2546            ColumnSchema::new("table_name", DataType::Text, false),
2547            ColumnSchema::new("ddl", DataType::Text, false),
2548        ];
2549        let rows: Vec<Row> = self
2550            .catalog
2551            .table_names()
2552            .into_iter()
2553            .filter(|n| !is_internal_table_name(n))
2554            .filter_map(|name| {
2555                let table = self.catalog.get(&name)?;
2556                let ddl = render_create_table(&name, &table.schema().columns);
2557                Some(Row::new(alloc::vec![Value::Text(name), Value::Text(ddl),]))
2558            })
2559            .collect();
2560        QueryResult::Rows { columns, rows }
2561    }
2562
2563    /// v6.5.4 — materialise `spg_role_ddl` rows. One row per user
2564    /// with `(role_name, ddl)`. Password is redacted (matches the
2565    /// `Statement::CreateUser` Display which prints `'<redacted>'`).
2566    fn exec_spg_role_ddl(&self) -> QueryResult {
2567        let columns = alloc::vec![
2568            ColumnSchema::new("role_name", DataType::Text, false),
2569            ColumnSchema::new("ddl", DataType::Text, false),
2570        ];
2571        let rows: Vec<Row> = self
2572            .users
2573            .iter()
2574            .map(|(name, rec)| {
2575                let ddl = alloc::format!(
2576                    "CREATE USER {name} WITH PASSWORD '<redacted>' ROLE '{}'",
2577                    rec.role.as_str(),
2578                );
2579                Row::new(alloc::vec![
2580                    Value::Text(String::from(name)),
2581                    Value::Text(ddl)
2582                ])
2583            })
2584            .collect();
2585        QueryResult::Rows { columns, rows }
2586    }
2587
2588    /// v6.5.4 — materialise `spg_database_ddl`: single row whose
2589    /// `ddl` column concatenates every user table's CREATE +
2590    /// every role's CREATE in deterministic catalog order. Suitable
2591    /// for piping back through `Engine::execute` to recreate a
2592    /// schema-equivalent database.
2593    fn exec_spg_database_ddl(&self) -> QueryResult {
2594        let columns = alloc::vec![ColumnSchema::new("ddl", DataType::Text, false)];
2595        let mut out = String::new();
2596        for (name, rec) in self.users.iter() {
2597            out.push_str(&alloc::format!(
2598                "CREATE USER {name} WITH PASSWORD '<redacted>' ROLE '{}';\n",
2599                rec.role.as_str(),
2600            ));
2601        }
2602        for name in self.catalog.table_names() {
2603            if is_internal_table_name(&name) {
2604                continue;
2605            }
2606            if let Some(table) = self.catalog.get(&name) {
2607                out.push_str(&render_create_table(&name, &table.schema().columns));
2608                out.push_str(";\n");
2609            }
2610        }
2611        QueryResult::Rows {
2612            columns,
2613            rows: alloc::vec![Row::new(alloc::vec![Value::Text(out)])],
2614        }
2615    }
2616
2617    /// v6.5.3 — materialise `spg_audit_chain` rows. Pulls a fresh
2618    /// snapshot from the registered provider; empty when no
2619    /// provider is set.
2620    fn exec_spg_audit_chain(&self) -> QueryResult {
2621        let columns = alloc::vec![
2622            ColumnSchema::new("seq", DataType::BigInt, false),
2623            ColumnSchema::new("ts_ms", DataType::BigInt, false),
2624            ColumnSchema::new("prev_hash", DataType::Text, false),
2625            ColumnSchema::new("entry_hash", DataType::Text, false),
2626            ColumnSchema::new("sql", DataType::Text, false),
2627        ];
2628        let rows: Vec<Row> = self
2629            .audit_chain_provider
2630            .map(|f| f())
2631            .unwrap_or_default()
2632            .into_iter()
2633            .map(|r| {
2634                Row::new(alloc::vec![
2635                    Value::BigInt(r.seq),
2636                    Value::BigInt(r.ts_ms),
2637                    Value::Text(r.prev_hash_hex),
2638                    Value::Text(r.entry_hash_hex),
2639                    Value::Text(r.sql),
2640                ])
2641            })
2642            .collect();
2643        QueryResult::Rows { columns, rows }
2644    }
2645
2646    /// v6.5.3 — materialise `spg_audit_verify` single-row result.
2647    /// `(verified_count, broken_at_seq)` — broken_at_seq is `-1`
2648    /// on a clean chain. Returns one row with both values 0 when
2649    /// no verifier is registered (no-data fallback for embedded
2650    /// callers).
2651    fn exec_spg_audit_verify(&self) -> QueryResult {
2652        let columns = alloc::vec![
2653            ColumnSchema::new("verified_count", DataType::BigInt, false),
2654            ColumnSchema::new("broken_at_seq", DataType::BigInt, false),
2655        ];
2656        let (verified, broken) = self.audit_verifier.map(|f| f()).unwrap_or((0, -1));
2657        let row = Row::new(alloc::vec![Value::BigInt(verified), Value::BigInt(broken),]);
2658        QueryResult::Rows {
2659            columns,
2660            rows: alloc::vec![row],
2661        }
2662    }
2663
2664    /// v6.5.1 — read-only accessor for tests + v6.5.6 ops resets.
2665    pub fn query_stats(&self) -> &query_stats::QueryStats {
2666        &self.query_stats
2667    }
2668
2669    /// v6.5.1 — mutable accessor (clear, etc).
2670    pub fn query_stats_mut(&mut self) -> &mut query_stats::QueryStats {
2671        &mut self.query_stats
2672    }
2673
2674    /// v6.2.0 — read access to the per-column statistics table.
2675    /// Used by the planner (v6.2.2 selectivity functions read this),
2676    /// by `SELECT * FROM spg_statistic`, and by e2e tests.
2677    pub const fn statistics(&self) -> &statistics::Statistics {
2678        &self.statistics
2679    }
2680
2681    /// v6.2.1 — return tables whose modified-row count crossed the
2682    /// auto-analyze threshold since the last ANALYZE on that table.
2683    /// The threshold is `0.1 × max(row_count, MIN_ROWS_FOR_AUTO_
2684    /// ANALYZE)` — combines PG-style fractional + absolute lower
2685    /// bound so a fresh / tiny table doesn't get hammered on every
2686    /// INSERT.
2687    ///
2688    /// Designed to be cheap: walks every user table's
2689    /// `Catalog::table_names()` + reads `statistics::modified_
2690    /// since_last_analyze()` (BTreeMap lookup). The background
2691    /// worker calls this under `engine.read()` then drops the lock
2692    /// before re-acquiring `engine.write()` for the actual ANALYZE.
2693    pub fn tables_needing_analyze(&self) -> Vec<String> {
2694        const MIN_ROWS: u64 = 100;
2695        let mut out = Vec::new();
2696        for name in self.catalog.table_names() {
2697            if is_internal_table_name(&name) {
2698                continue;
2699            }
2700            let Some(table) = self.catalog.get(&name) else {
2701                continue;
2702            };
2703            let row_count = table.rows().len() as u64;
2704            let modified = self.statistics.modified_since_last_analyze(&name);
2705            // Threshold: ceil(0.1 × max(row_count, MIN_ROWS)),
2706            // computed in integer arithmetic so spg-engine stays
2707            // no_std without pulling in libm. `(n + 9) / 10` is
2708            // `ceil(n / 10)` for non-negative `n`.
2709            let base = row_count.max(MIN_ROWS);
2710            let threshold = base.saturating_add(9) / 10;
2711            if modified >= threshold {
2712                out.push(name);
2713            }
2714        }
2715        out
2716    }
2717
2718    /// v6.2.0 — `ANALYZE [<table>]` runtime. Bare `ANALYZE` walks
2719    /// every user table; `ANALYZE <name>` re-stats one. For each
2720    /// target table, single-pass scan + per-column histogram +
2721    /// `null_frac` + `n_distinct`. Replaces the table's prior
2722    /// stats; resets the modified-row counter.
2723    ///
2724    /// v6.2.0 doesn't sample — it scans the full table. v6.2.x
2725    /// can add reservoir sampling at the > 100 K-row mark; not a
2726    /// scope blocker for the current commit since rows ≤ 100 K
2727    /// analyse in milliseconds.
2728    fn exec_analyze(&mut self, target: Option<&str>) -> Result<QueryResult, EngineError> {
2729        let names: Vec<String> = if let Some(name) = target {
2730            // Verify the table exists; surface a clear error if not.
2731            if self.catalog.get(name).is_none() {
2732                return Err(EngineError::Storage(StorageError::TableNotFound {
2733                    name: name.to_string(),
2734                }));
2735            }
2736            alloc::vec![name.to_string()]
2737        } else {
2738            self.catalog
2739                .table_names()
2740                .into_iter()
2741                .filter(|n| !is_internal_table_name(n))
2742                .collect()
2743        };
2744        let mut analysed = 0usize;
2745        for table_name in &names {
2746            self.analyze_one_table(table_name)?;
2747            analysed += 1;
2748        }
2749        // v6.3.1 — plan cache invalidation. Bump stats version so
2750        // future lookups see the new generation, and selectively
2751        // evict every plan whose `source_tables` overlap with the
2752        // ANALYZE target set. Bare ANALYZE (all tables) clears the
2753        // whole cache.
2754        if analysed > 0 {
2755            self.statistics.bump_version();
2756            if target.is_some() {
2757                for t in &names {
2758                    self.plan_cache.evict_referencing(t);
2759                }
2760            } else {
2761                self.plan_cache.clear();
2762            }
2763        }
2764        Ok(QueryResult::CommandOk {
2765            affected: analysed,
2766            modified_catalog: true,
2767        })
2768    }
2769
2770    /// v6.7.3 — `COMPACT COLD SEGMENTS` runtime path. Drives the
2771    /// engine-layer compaction shim with the default
2772    /// 4 MiB segment-size threshold. spg-server intercepts the
2773    /// SQL before it reaches the engine on a server build —
2774    /// it reads `SPG_COMPACTION_TARGET_SEGMENT_BYTES`, calls
2775    /// `Engine::compact_cold_segments_with_target` directly with
2776    /// the env value, and persists every merged segment to
2777    /// v7.12.1 — record a `SET <name> = <value>` parameter. Names
2778    /// are case-folded to lowercase to match PG; values keep their
2779    /// caller-supplied form so observability paths see what was
2780    /// requested. Only `default_text_search_config` is consulted by
2781    /// the engine today.
2782    fn set_session_param(&mut self, name: String, value: spg_sql::ast::SetValue) {
2783        let normalised = match value {
2784            spg_sql::ast::SetValue::String(s) => s,
2785            spg_sql::ast::SetValue::Ident(s) => s,
2786            spg_sql::ast::SetValue::Number(s) => s,
2787            spg_sql::ast::SetValue::Default => String::new(),
2788        };
2789        let key = name.to_ascii_lowercase();
2790        // v7.14.0 — mysqldump preamble emits
2791        // `SET FOREIGN_KEY_CHECKS=0` so it can CREATE TABLE in any
2792        // order despite cross-table FK references; the closing
2793        // section emits `SET FOREIGN_KEY_CHECKS=1` (or
2794        // `=@OLD_FOREIGN_KEY_CHECKS` which resolves to "ON" in our
2795        // session-variable-aware path). Match both shapes.
2796        // Also accept PG's `session_replication_role = 'replica'`
2797        // which suppresses trigger + FK enforcement during a
2798        // logical replication apply (pg_dump preserves this for
2799        // schema-only mode but it shows up in some restores).
2800        let value_off = matches!(
2801            normalised.to_ascii_lowercase().as_str(),
2802            "0" | "off" | "false"
2803        );
2804        let value_on = matches!(
2805            normalised.to_ascii_lowercase().as_str(),
2806            "1" | "on" | "true"
2807        );
2808        if key == "foreign_key_checks"
2809            || key == "session_replication_role" && normalised.eq_ignore_ascii_case("replica")
2810        {
2811            if value_off || key == "session_replication_role" {
2812                self.foreign_key_checks = false;
2813            } else if value_on
2814                || (key == "session_replication_role" && normalised.eq_ignore_ascii_case("origin"))
2815            {
2816                self.foreign_key_checks = true;
2817                // Drain pending FK queue against the now-complete
2818                // catalog. Errors here surface as the SET reply —
2819                // caller knows enabling checks revealed orphans.
2820                let _ = self.drain_pending_foreign_keys();
2821            }
2822        }
2823        // v7.22 (round-13 T3) — string-literal dialect signals.
2824        // `SET sql_mode = …` is something only MySQL clients and
2825        // mysqldump preambles emit → MySQL escape semantics.
2826        // `SET standard_conforming_strings = on|off` is PG's own
2827        // switch for exactly this behaviour (every pg_dump preamble
2828        // sets it to on). The same SQL text lexes differently per
2829        // dialect, so a flip invalidates the plan cache.
2830        let new_escapes = if key == "sql_mode" {
2831            Some(true)
2832        } else if key == "standard_conforming_strings" {
2833            Some(value_off)
2834        } else {
2835            None
2836        };
2837        if let Some(flag) = new_escapes
2838            && flag != self.backslash_escapes
2839        {
2840            self.backslash_escapes = flag;
2841            self.plan_cache.clear();
2842        }
2843        self.session_params.insert(key, normalised);
2844    }
2845
2846    /// v7.14.0 — resolve every queued FK whose installation was
2847    /// deferred (`SET FOREIGN_KEY_CHECKS=0` window). Called by
2848    /// `set_session_param` when checks flip back on and by the
2849    /// drop-import release gate. Each FK is resolved against the
2850    /// current catalog; remaining missing-parent errors propagate
2851    /// up so the caller knows the import was incomplete.
2852    fn drain_pending_foreign_keys(&mut self) -> Result<(), EngineError> {
2853        let pending = core::mem::take(&mut self.pending_foreign_keys);
2854        for (child, fk) in pending {
2855            // Resolve against the current catalog. Skip silently
2856            // when the child table itself was dropped between
2857            // queue + drain.
2858            let cols_snapshot = match self.active_catalog().get(&child) {
2859                Some(t) => t.schema().columns.clone(),
2860                None => continue,
2861            };
2862            let storage_fk =
2863                resolve_foreign_key(&child, &cols_snapshot, fk, self.active_catalog())?;
2864            let table = self
2865                .active_catalog_mut()
2866                .get_mut(&child)
2867                .expect("checked above");
2868            table.schema_mut().foreign_keys.push(storage_fk);
2869        }
2870        Ok(())
2871    }
2872
2873    /// v7.12.1 — read a session parameter set via `SET`. Used by
2874    /// the FTS function dispatcher to resolve the default config
2875    /// for `to_tsvector(text)` / `plainto_tsquery(text)` etc.
2876    #[must_use]
2877    pub fn session_param(&self, name: &str) -> Option<&str> {
2878        self.session_params
2879            .get(&name.to_ascii_lowercase())
2880            .map(String::as_str)
2881    }
2882
2883    /// v7.12.1 — build an `EvalContext` chained with the session's
2884    /// `default_text_search_config`. Engine-internal callers use
2885    /// this instead of `EvalContext::new` so the FTS function
2886    /// dispatcher sees the SET configuration.
2887    fn ev_ctx<'a>(
2888        &'a self,
2889        columns: &'a [ColumnSchema],
2890        alias: Option<&'a str>,
2891    ) -> EvalContext<'a> {
2892        EvalContext::new(columns, alias)
2893            .with_default_text_search_config(self.session_param("default_text_search_config"))
2894    }
2895
2896    /// `<db>.spg/segments/`. This arm only fires for engine-only
2897    /// callers (spg-embedded, lib tests); in that mode merged
2898    /// segments live in memory and are dropped at process exit.
2899    fn exec_compact_cold_segments(&mut self) -> Result<QueryResult, EngineError> {
2900        let target = COMPACTION_TARGET_DEFAULT_BYTES;
2901        let reports = self.compact_cold_segments_with_target(target)?;
2902        let columns = alloc::vec![
2903            ColumnSchema::new("table_name", DataType::Text, false),
2904            ColumnSchema::new("index_name", DataType::Text, false),
2905            ColumnSchema::new("sources_merged", DataType::BigInt, false),
2906            ColumnSchema::new("merged_segment_id", DataType::BigInt, false),
2907            ColumnSchema::new("merged_rows", DataType::BigInt, false),
2908            ColumnSchema::new("deleted_rows_pruned", DataType::BigInt, false),
2909            ColumnSchema::new("bytes_reclaimed_estimate", DataType::BigInt, false),
2910        ];
2911        let rows: Vec<Row> = reports
2912            .into_iter()
2913            .map(|(tname, iname, report)| {
2914                Row::new(alloc::vec![
2915                    Value::Text(tname),
2916                    Value::Text(iname),
2917                    Value::BigInt(i64::try_from(report.sources.len()).unwrap_or(i64::MAX)),
2918                    Value::BigInt(i64::from(report.merged_segment_id.unwrap_or(0))),
2919                    Value::BigInt(i64::try_from(report.merged_rows).unwrap_or(i64::MAX)),
2920                    Value::BigInt(i64::try_from(report.deleted_rows_pruned).unwrap_or(i64::MAX),),
2921                    Value::BigInt(
2922                        i64::try_from(report.bytes_reclaimed_estimate).unwrap_or(i64::MAX),
2923                    ),
2924                ])
2925            })
2926            .collect();
2927        Ok(QueryResult::Rows { columns, rows })
2928    }
2929
2930    /// Walk a single table's rows once and (re-)populate per-column
2931    /// stats. Drops the existing stats for `table` first so columns
2932    /// that have been DROP-ed between ANALYZEs don't leave stale
2933    /// rows.
2934    fn analyze_one_table(&mut self, table_name: &str) -> Result<(), EngineError> {
2935        let table = self.catalog.get(table_name).ok_or_else(|| {
2936            EngineError::Storage(StorageError::TableNotFound {
2937                name: table_name.to_string(),
2938            })
2939        })?;
2940        let schema = table.schema().clone();
2941        let row_count = table.rows().len();
2942        // For each column, collect (sorted) non-NULL textual values
2943        // + count NULLs; then ask `statistics::build_histogram` to
2944        // produce the 101 bounds and `estimate_n_distinct` the
2945        // distinct count.
2946        self.statistics.clear_table(table_name);
2947        for (col_pos, col_schema) in schema.columns.iter().enumerate() {
2948            // v6.2.0 skip: vector columns have their own stats
2949            // shape (HNSW graph topology). v6.2 deliberation #1.
2950            if matches!(col_schema.ty, DataType::Vector { .. }) {
2951                continue;
2952            }
2953            let mut non_null_values: Vec<Value> = Vec::with_capacity(row_count);
2954            let mut nulls: u64 = 0;
2955            for row in table.rows() {
2956                match row.values.get(col_pos) {
2957                    Some(Value::Null) | None => nulls += 1,
2958                    Some(v) => non_null_values.push(v.clone()),
2959                }
2960            }
2961            // Sort by type-aware ordering (Int as int, Text as
2962            // lex, etc.) so histogram bounds reflect the column's
2963            // natural order — not lexicographic on the string
2964            // representation, which would put "9" after "49".
2965            non_null_values.sort_by(|a, b| sort_values_for_histogram(a, b));
2966            let non_null: Vec<String> = non_null_values.iter().map(canonical_value_repr).collect();
2967            let null_frac = if row_count == 0 {
2968                0.0
2969            } else {
2970                #[allow(clippy::cast_precision_loss)]
2971                let f = nulls as f32 / row_count as f32;
2972                f
2973            };
2974            let n_distinct = statistics::estimate_n_distinct(&non_null);
2975            let histogram_bounds = statistics::build_histogram(&non_null);
2976            self.statistics.set(
2977                table_name.to_string(),
2978                col_schema.name.clone(),
2979                statistics::ColumnStats {
2980                    null_frac,
2981                    n_distinct,
2982                    histogram_bounds,
2983                },
2984            );
2985        }
2986        self.statistics.reset_modified(table_name);
2987        // v6.7.0 — refresh the per-table cold_rows cache. Walk the
2988        // BTree indices and count Cold locators (MAX across
2989        // indices); store the result on the table. Surfaced via
2990        // `spg_statistic.cold_row_count` (new column) and
2991        // `spg_stat_segment.table_name` (new column).
2992        let cold_count = {
2993            let table = self
2994                .active_catalog()
2995                .get(table_name)
2996                .expect("table still present");
2997            table.count_cold_locators()
2998        };
2999        let table_mut = self
3000            .active_catalog_mut()
3001            .get_mut(table_name)
3002            .expect("table still present");
3003        table_mut.set_cold_row_count(cold_count);
3004        Ok(())
3005    }
3006
3007    /// v6.1.3 — `SHOW PUBLICATIONS` row materialisation. Returns
3008    /// `(name, scope, table_count)` ordered by publication name.
3009    ///   - `scope` is the human-readable string:
3010    ///       `"FOR ALL TABLES"` /
3011    ///       `"FOR TABLE t1, t2"` /
3012    ///       `"FOR ALL TABLES EXCEPT t1, t2"`.
3013    ///   - `table_count` is NULL for `AllTables`, the list length
3014    ///     otherwise. NULLability lets clients distinguish "publish
3015    ///     everything" from "publish exactly 0 tables" (the v6.1.3
3016    ///     parser forbids the empty list, but the column shape is
3017    ///     ready for the v6.1.5 publisher-side semantics).
3018    fn exec_show_publications(&self) -> QueryResult {
3019        let columns = alloc::vec![
3020            ColumnSchema::new("name", DataType::Text, false),
3021            ColumnSchema::new("scope", DataType::Text, false),
3022            ColumnSchema::new("table_count", DataType::Int, true),
3023        ];
3024        let rows: Vec<Row> = self
3025            .publications
3026            .iter()
3027            .map(|(name, scope)| {
3028                let (scope_str, count_val) = match scope {
3029                    spg_sql::ast::PublicationScope::AllTables => {
3030                        ("FOR ALL TABLES".to_string(), Value::Null)
3031                    }
3032                    spg_sql::ast::PublicationScope::ForTables(ts) => (
3033                        alloc::format!("FOR TABLE {}", ts.join(", ")),
3034                        Value::Int(i32::try_from(ts.len()).unwrap_or(i32::MAX)),
3035                    ),
3036                    spg_sql::ast::PublicationScope::AllTablesExcept(ts) => (
3037                        alloc::format!("FOR ALL TABLES EXCEPT {}", ts.join(", ")),
3038                        Value::Int(i32::try_from(ts.len()).unwrap_or(i32::MAX)),
3039                    ),
3040                };
3041                Row::new(alloc::vec![
3042                    Value::Text(name.clone()),
3043                    Value::Text(scope_str),
3044                    count_val,
3045                ])
3046            })
3047            .collect();
3048        QueryResult::Rows { columns, rows }
3049    }
3050
3051    /// v4.1 `SHOW USERS` — `(name, role)` per row, ordered by name.
3052    fn exec_show_users(&self) -> QueryResult {
3053        let columns = alloc::vec![
3054            ColumnSchema::new("name", DataType::Text, false),
3055            ColumnSchema::new("role", DataType::Text, false),
3056        ];
3057        let rows: Vec<Row> = self
3058            .users
3059            .iter()
3060            .map(|(name, rec)| {
3061                Row::new(alloc::vec![
3062                    Value::Text(name.to_string()),
3063                    Value::Text(rec.role.as_str().to_string()),
3064                ])
3065            })
3066            .collect();
3067        QueryResult::Rows { columns, rows }
3068    }
3069
3070    fn exec_create_user(&mut self, s: &CreateUserStatement) -> Result<QueryResult, EngineError> {
3071        if self.in_transaction() {
3072            return Err(EngineError::Unsupported(
3073                "CREATE USER is not allowed inside a transaction".into(),
3074            ));
3075        }
3076        let role = users::Role::parse(&s.role).ok_or_else(|| {
3077            EngineError::Unsupported(alloc::format!("invalid role: {:?}", s.role))
3078        })?;
3079        // Prefer the host-injected RNG. Falls back to a deterministic
3080        // salt derived from the username only when no RNG is wired —
3081        // acceptable for tests; the server always installs one.
3082        let salt = self.salt_fn.map_or_else(
3083            || {
3084                let mut s_bytes = [0u8; 16];
3085                let digest = spg_crypto::hash(s.name.as_bytes());
3086                s_bytes.copy_from_slice(&digest[..16]);
3087                s_bytes
3088            },
3089            |f| f(),
3090        );
3091        self.users
3092            .create(&s.name, &s.password, role, salt)
3093            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE USER: {e}")))?;
3094        Ok(QueryResult::CommandOk {
3095            affected: 1,
3096            modified_catalog: true,
3097        })
3098    }
3099
3100    fn exec_drop_user(&mut self, name: &str) -> Result<QueryResult, EngineError> {
3101        if self.in_transaction() {
3102            return Err(EngineError::Unsupported(
3103                "DROP USER is not allowed inside a transaction".into(),
3104            ));
3105        }
3106        self.users
3107            .drop(name)
3108            .map_err(|e| EngineError::Unsupported(alloc::format!("DROP USER: {e}")))?;
3109        Ok(QueryResult::CommandOk {
3110            affected: 1,
3111            modified_catalog: true,
3112        })
3113    }
3114
3115    /// v7.12.4 — `CREATE [OR REPLACE] FUNCTION`. Stores the
3116    /// function metadata in the catalog. PL/pgSQL bodies are
3117    /// already parsed by the SQL parser; we re-canonicalise the
3118    /// body to source text for storage (the executor re-parses
3119    /// it at trigger fire time — see the trigger fire path).
3120    fn exec_create_function(
3121        &mut self,
3122        s: spg_sql::ast::CreateFunctionStatement,
3123    ) -> Result<QueryResult, EngineError> {
3124        let args_repr = render_function_args(&s.args);
3125        let returns = match &s.returns {
3126            spg_sql::ast::FunctionReturn::Trigger => alloc::string::String::from("TRIGGER"),
3127            spg_sql::ast::FunctionReturn::Void => alloc::string::String::from("VOID"),
3128            spg_sql::ast::FunctionReturn::Type(t) => alloc::format!("{t}"),
3129            spg_sql::ast::FunctionReturn::Other(s) => s.clone(),
3130        };
3131        let body_text = match &s.body {
3132            spg_sql::ast::FunctionBody::PlPgSql(b) => alloc::format!("{b}"),
3133            spg_sql::ast::FunctionBody::Raw(s) => s.clone(),
3134        };
3135        let def = spg_storage::FunctionDef {
3136            name: s.name.clone(),
3137            args_repr,
3138            returns,
3139            language: s.language.clone(),
3140            body: body_text,
3141        };
3142        self.active_catalog_mut()
3143            .create_function(def, s.or_replace)
3144            .map_err(EngineError::Storage)?;
3145        Ok(QueryResult::CommandOk {
3146            affected: 0,
3147            modified_catalog: true,
3148        })
3149    }
3150
3151    /// v7.12.4 — `CREATE [OR REPLACE] TRIGGER`. The referenced
3152    /// function must already exist in the catalog (forward
3153    /// references defer to a later release). Persists the
3154    /// trigger metadata for the row-write hooks below to consult.
3155    fn exec_create_trigger(
3156        &mut self,
3157        s: spg_sql::ast::CreateTriggerStatement,
3158    ) -> Result<QueryResult, EngineError> {
3159        let timing = match s.timing {
3160            spg_sql::ast::TriggerTiming::Before => "BEFORE",
3161            spg_sql::ast::TriggerTiming::After => "AFTER",
3162            spg_sql::ast::TriggerTiming::InsteadOf => "INSTEAD OF",
3163        };
3164        let events: Vec<alloc::string::String> = s
3165            .events
3166            .iter()
3167            .map(|e| match e {
3168                spg_sql::ast::TriggerEvent::Insert => alloc::string::String::from("INSERT"),
3169                spg_sql::ast::TriggerEvent::Update => alloc::string::String::from("UPDATE"),
3170                spg_sql::ast::TriggerEvent::Delete => alloc::string::String::from("DELETE"),
3171                spg_sql::ast::TriggerEvent::Truncate => alloc::string::String::from("TRUNCATE"),
3172            })
3173            .collect();
3174        let for_each = match s.for_each {
3175            spg_sql::ast::TriggerForEach::Row => "ROW",
3176            spg_sql::ast::TriggerForEach::Statement => "STATEMENT",
3177        };
3178        let def = spg_storage::TriggerDef {
3179            name: s.name.clone(),
3180            table: s.table.clone(),
3181            timing: alloc::string::String::from(timing),
3182            events,
3183            for_each: alloc::string::String::from(for_each),
3184            function: s.function.clone(),
3185            update_columns: s.update_columns.clone(),
3186            // v7.16.1 — every trigger is born enabled. Toggled
3187            // by ALTER TABLE … { ENABLE | DISABLE } TRIGGER.
3188            enabled: true,
3189        };
3190        self.active_catalog_mut()
3191            .create_trigger(def, s.or_replace)
3192            .map_err(EngineError::Storage)?;
3193        Ok(QueryResult::CommandOk {
3194            affected: 0,
3195            modified_catalog: true,
3196        })
3197    }
3198
3199    fn exec_drop_trigger(
3200        &mut self,
3201        name: &str,
3202        table: &str,
3203        if_exists: bool,
3204    ) -> Result<QueryResult, EngineError> {
3205        let removed = self.active_catalog_mut().drop_trigger(name, table);
3206        if !removed && !if_exists {
3207            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3208                alloc::format!("trigger {name:?} on {table:?} does not exist"),
3209            )));
3210        }
3211        Ok(QueryResult::CommandOk {
3212            affected: usize::from(removed),
3213            modified_catalog: removed,
3214        })
3215    }
3216
3217    fn exec_drop_function(
3218        &mut self,
3219        name: &str,
3220        if_exists: bool,
3221    ) -> Result<QueryResult, EngineError> {
3222        let removed = self.active_catalog_mut().drop_function(name);
3223        if !removed && !if_exists {
3224            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3225                alloc::format!("function {name:?} does not exist"),
3226            )));
3227        }
3228        Ok(QueryResult::CommandOk {
3229            affected: usize::from(removed),
3230            modified_catalog: removed,
3231        })
3232    }
3233
3234    /// v7.17.0 — `CREATE SEQUENCE` engine path. Resolves
3235    /// `min_value` / `max_value` / `start` against PG defaults
3236    /// when omitted, then installs the SequenceDef in the catalog.
3237    fn exec_create_sequence(
3238        &mut self,
3239        s: spg_sql::ast::CreateSequenceStatement,
3240    ) -> Result<QueryResult, EngineError> {
3241        use spg_sql::ast::{SeqBound, SequenceDataType as AstDt};
3242        use spg_storage::{SequenceDataType, SequenceDef};
3243        let dt = match s.data_type {
3244            None => SequenceDataType::BigInt,
3245            Some(AstDt::SmallInt) => SequenceDataType::SmallInt,
3246            Some(AstDt::Int) => SequenceDataType::Int,
3247            Some(AstDt::BigInt) => SequenceDataType::BigInt,
3248        };
3249        let increment = s.options.increment.unwrap_or(1);
3250        if increment == 0 {
3251            return Err(EngineError::Unsupported(
3252                "INCREMENT must not be zero".into(),
3253            ));
3254        }
3255        let (def_min, def_max) = dt.default_bounds(increment > 0);
3256        let min_value = match s.options.min_value {
3257            None | Some(SeqBound::NoBound) => def_min,
3258            Some(SeqBound::Value(n)) => n,
3259        };
3260        let max_value = match s.options.max_value {
3261            None | Some(SeqBound::NoBound) => def_max,
3262            Some(SeqBound::Value(n)) => n,
3263        };
3264        if min_value > max_value {
3265            return Err(EngineError::Unsupported(alloc::format!(
3266                "MINVALUE ({min_value}) must be <= MAXVALUE ({max_value})"
3267            )));
3268        }
3269        let start = s
3270            .options
3271            .start
3272            .unwrap_or(if increment > 0 { min_value } else { max_value });
3273        if start < min_value || start > max_value {
3274            return Err(EngineError::Unsupported(alloc::format!(
3275                "START WITH ({start}) is outside MINVALUE..MAXVALUE ({min_value}..{max_value})"
3276            )));
3277        }
3278        let cache = s.options.cache.unwrap_or(1);
3279        if cache < 1 {
3280            return Err(EngineError::Unsupported("CACHE must be >= 1".into()));
3281        }
3282        let cycle = s.options.cycle.unwrap_or(false);
3283        let owned_by = match s.options.owned_by {
3284            None | Some(spg_sql::ast::SequenceOwnedBy::None) => None,
3285            Some(spg_sql::ast::SequenceOwnedBy::Column { table, column }) => Some((table, column)),
3286        };
3287        let def = SequenceDef {
3288            name: s.name.clone(),
3289            data_type: dt,
3290            start,
3291            increment,
3292            min_value,
3293            max_value,
3294            cache,
3295            cycle,
3296            owned_by,
3297            last_value: start,
3298            is_called: false,
3299        };
3300        self.active_catalog_mut()
3301            .create_sequence(def, s.if_not_exists)
3302            .map_err(EngineError::Storage)?;
3303        Ok(QueryResult::CommandOk {
3304            affected: 0,
3305            modified_catalog: !self.in_transaction(),
3306        })
3307    }
3308
3309    /// v7.17.0 — `ALTER SEQUENCE` engine path. Re-uses the catalog
3310    /// `alter_sequence` merge helper.
3311    fn exec_alter_sequence(
3312        &mut self,
3313        s: spg_sql::ast::AlterSequenceStatement,
3314    ) -> Result<QueryResult, EngineError> {
3315        use spg_sql::ast::SeqBound;
3316        // v7.29 (round-23a) - implicit serial sequences materialise
3317        // on first address, ALTER SEQUENCE included.
3318        self.ensure_implicit_sequence(&s.name);
3319        let cat = self.active_catalog_mut();
3320        if !cat.sequences().contains_key(&s.name) {
3321            if s.if_exists {
3322                return Ok(QueryResult::CommandOk {
3323                    affected: 0,
3324                    modified_catalog: false,
3325                });
3326            }
3327            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3328                alloc::format!("sequence {:?} does not exist", s.name),
3329            )));
3330        }
3331        let min_value = match s.options.min_value {
3332            None => None,
3333            Some(SeqBound::NoBound) => None, // NO MINVALUE → keep current
3334            Some(SeqBound::Value(n)) => Some(n),
3335        };
3336        let max_value = match s.options.max_value {
3337            None => None,
3338            Some(SeqBound::NoBound) => None,
3339            Some(SeqBound::Value(n)) => Some(n),
3340        };
3341        let owned_by = s.options.owned_by.map(|ob| match ob {
3342            spg_sql::ast::SequenceOwnedBy::None => None,
3343            spg_sql::ast::SequenceOwnedBy::Column { table, column } => Some((table, column)),
3344        });
3345        cat.alter_sequence(
3346            &s.name,
3347            s.options.increment,
3348            min_value,
3349            max_value,
3350            s.options.start,
3351            s.options.restart,
3352            s.options.cache,
3353            s.options.cycle,
3354            owned_by,
3355        )
3356        .map_err(EngineError::Storage)?;
3357        Ok(QueryResult::CommandOk {
3358            affected: 0,
3359            modified_catalog: !self.in_transaction(),
3360        })
3361    }
3362
3363    /// v7.17.0 Phase 1.1 — walk a Statement tree and pre-resolve
3364    /// any sequence FunctionCall nodes inside its Expr slots.
3365    /// Delegates per-statement-kind: SELECT projection +
3366    /// WHERE, INSERT VALUES, UPDATE SET, DELETE WHERE.
3367    fn pre_resolve_sequence_calls_in_statement(
3368        &mut self,
3369        stmt: &mut Statement,
3370    ) -> Result<(), EngineError> {
3371        match stmt {
3372            Statement::Select(s) => self.pre_resolve_sequence_calls_in_select(s),
3373            Statement::Insert(s) => {
3374                for tuple in &mut s.rows {
3375                    for cell in tuple.iter_mut() {
3376                        self.resolve_sequence_calls_in_expr(cell)?;
3377                    }
3378                }
3379                Ok(())
3380            }
3381            Statement::Update(s) => {
3382                for (_col, expr) in &mut s.assignments {
3383                    self.resolve_sequence_calls_in_expr(expr)?;
3384                }
3385                if let Some(w) = &mut s.where_ {
3386                    self.resolve_sequence_calls_in_expr(w)?;
3387                }
3388                Ok(())
3389            }
3390            Statement::Delete(s) => {
3391                if let Some(w) = &mut s.where_ {
3392                    self.resolve_sequence_calls_in_expr(w)?;
3393                }
3394                Ok(())
3395            }
3396            _ => Ok(()),
3397        }
3398    }
3399
3400    fn pre_resolve_sequence_calls_in_select(
3401        &mut self,
3402        s: &mut spg_sql::ast::SelectStatement,
3403    ) -> Result<(), EngineError> {
3404        for item in &mut s.items {
3405            match item {
3406                spg_sql::ast::SelectItem::Expr { expr, .. } => {
3407                    self.resolve_sequence_calls_in_expr(expr)?;
3408                }
3409                spg_sql::ast::SelectItem::Wildcard => {}
3410            }
3411        }
3412        if let Some(w) = &mut s.where_ {
3413            self.resolve_sequence_calls_in_expr(w)?;
3414        }
3415        Ok(())
3416    }
3417
3418    /// v7.17.0 Phase 1.1 — walk an Expr tree and pre-resolve any
3419    /// `nextval(name)` / `currval(name)` / `setval(name, value[,
3420    /// is_called])` FunctionCall nodes by calling the catalog and
3421    /// replacing the node with the resulting `Expr::Literal`.
3422    /// Used by INSERT VALUES / UPDATE SET / DEFAULT eval so the
3423    /// row-eval path sees pre-computed sequence values instead of
3424    /// needing mutable catalog access mid-eval.
3425    #[allow(clippy::too_many_lines)]
3426    fn resolve_sequence_calls_in_expr(&mut self, expr: &mut Expr) -> Result<(), EngineError> {
3427        match expr {
3428            Expr::Literal(_) | Expr::Column(_) | Expr::Placeholder(_) => Ok(()),
3429            Expr::FunctionCall { name, args } => {
3430                // Descend first so nested calls — e.g.
3431                // setval('seq', currval('other')) — resolve
3432                // innermost-first.
3433                for a in args.iter_mut() {
3434                    self.resolve_sequence_calls_in_expr(a)?;
3435                }
3436                let lc = name.to_ascii_lowercase();
3437                if lc == "nextval" || lc == "currval" || lc == "setval" {
3438                    let v = self.eval_sequence_call(&lc, args)?;
3439                    *expr = Expr::Literal(value_to_literal(v));
3440                } else if lc == "pg_get_serial_sequence" && args.len() == 2 {
3441                    // v7.29 (round-23a) — resolves to the implicit
3442                    // sequence name so the pg_dump idiom
3443                    // `setval(pg_get_serial_sequence('t','c'), n)`
3444                    // works (the setval arm receives a literal).
3445                    let lit = |e: &Expr| -> Option<String> {
3446                        match e {
3447                            Expr::Literal(spg_sql::ast::Literal::String(v)) => {
3448                                let t = v.strip_prefix("public.").unwrap_or(v).trim_matches('"');
3449                                Some(t.to_string())
3450                            }
3451                            _ => None,
3452                        }
3453                    };
3454                    if let (Some(t), Some(c)) = (lit(&args[0]), lit(&args[1])) {
3455                        let is_serial = self.active_catalog().get(&t).is_some_and(|tb| {
3456                            tb.schema()
3457                                .columns
3458                                .iter()
3459                                .any(|col| col.name == c && col.auto_increment)
3460                        });
3461                        *expr = if is_serial {
3462                            Expr::Literal(spg_sql::ast::Literal::String(alloc::format!(
3463                                "public.{t}_{c}_seq"
3464                            )))
3465                        } else {
3466                            Expr::Literal(spg_sql::ast::Literal::Null)
3467                        };
3468                    }
3469                }
3470                Ok(())
3471            }
3472            Expr::Binary { lhs, rhs, .. } => {
3473                self.resolve_sequence_calls_in_expr(lhs)?;
3474                self.resolve_sequence_calls_in_expr(rhs)
3475            }
3476            Expr::Unary { expr, .. } => self.resolve_sequence_calls_in_expr(expr),
3477            Expr::Cast { expr, .. } => self.resolve_sequence_calls_in_expr(expr),
3478            Expr::IsNull { expr, .. } => self.resolve_sequence_calls_in_expr(expr),
3479            Expr::Like { expr, pattern, .. } => {
3480                self.resolve_sequence_calls_in_expr(expr)?;
3481                self.resolve_sequence_calls_in_expr(pattern)
3482            }
3483            Expr::Extract { source, .. } => self.resolve_sequence_calls_in_expr(source),
3484            Expr::Array(items) => {
3485                for it in items.iter_mut() {
3486                    self.resolve_sequence_calls_in_expr(it)?;
3487                }
3488                Ok(())
3489            }
3490            // Window / subquery / etc — sequence calls inside these
3491            // are uncommon and require separate row-eval; leave
3492            // untouched for now and rely on the eval-time error
3493            // (no sequence_resolver attached).
3494            _ => Ok(()),
3495        }
3496    }
3497
3498    /// v7.29 (mailrs round-23a) — SERIAL/BIGSERIAL columns get their
3499    /// PG-style implicit sequence `<table>_<column>_seq` ON FIRST
3500    /// ADDRESS rather than at CREATE TABLE time, so pre-7.29 data
3501    /// directories gain addressability without a storage migration.
3502    /// The sequence is born synced to the column's current MAX so
3503    /// `nextval` immediately after creation continues the series.
3504    fn ensure_implicit_sequence(&mut self, seq_name: &str) {
3505        if self.active_catalog().sequences().contains_key(seq_name) {
3506            return;
3507        }
3508        let Some(rest) = seq_name.strip_suffix("_seq") else {
3509            return;
3510        };
3511        let mut found: Option<(String, String, i64)> = None;
3512        for tname in self.active_catalog().table_names() {
3513            let Some(table) = self.active_catalog().get(&tname) else {
3514                continue;
3515            };
3516            for (i, col) in table.schema().columns.iter().enumerate() {
3517                if col.auto_increment && alloc::format!("{tname}_{}", col.name) == rest {
3518                    let next = table.next_auto_value(i).unwrap_or(1);
3519                    found = Some((tname.clone(), col.name.clone(), next - 1));
3520                    break;
3521                }
3522            }
3523            if found.is_some() {
3524                break;
3525            }
3526        }
3527        let Some((tname, cname, last)) = found else {
3528            return;
3529        };
3530        let def = spg_storage::SequenceDef {
3531            name: seq_name.to_string(),
3532            data_type: spg_storage::SequenceDataType::BigInt,
3533            start: 1,
3534            increment: 1,
3535            min_value: 1,
3536            max_value: i64::MAX,
3537            cache: 1,
3538            cycle: false,
3539            owned_by: Some((tname, cname)),
3540            last_value: last.max(0),
3541            is_called: last > 0,
3542        };
3543        let _ = self.active_catalog_mut().create_sequence(def, true);
3544    }
3545
3546    /// v7.17.0 Phase 1.1 — evaluate a single nextval/currval/
3547    /// setval call. `args` are already pre-resolved Expr nodes
3548    /// (literals) — we extract their constant values.
3549    fn eval_sequence_call(&mut self, op: &str, args: &[Expr]) -> Result<Value, EngineError> {
3550        if args.is_empty() {
3551            return Err(EngineError::Unsupported(alloc::format!(
3552                "{op}() takes at least one argument"
3553            )));
3554        }
3555        let seq_name = match &args[0] {
3556            Expr::Literal(spg_sql::ast::Literal::String(s)) => {
3557                // v7.17 dump-compat — pg_dump emits sequence
3558                // names schema-qualified (`'public.posts_id_seq'`).
3559                // SPG is single-schema; strip a leading
3560                // `public.` / `pg_catalog.` so the catalog lookup
3561                // matches the bare-name CREATE SEQUENCE used.
3562                let trimmed = s
3563                    .strip_prefix("public.")
3564                    .or_else(|| s.strip_prefix("pg_catalog."))
3565                    .unwrap_or(s);
3566                trimmed.to_string()
3567            }
3568            // v7.17 dump-compat — pg_dump also emits
3569            // `nextval('public.posts_id_seq'::regclass)`
3570            // where the cast wraps the literal. Peel the cast
3571            // and continue.
3572            Expr::Cast { expr, .. } => {
3573                if let Expr::Literal(spg_sql::ast::Literal::String(s)) = expr.as_ref() {
3574                    let trimmed = s
3575                        .strip_prefix("public.")
3576                        .or_else(|| s.strip_prefix("pg_catalog."))
3577                        .unwrap_or(s);
3578                    trimmed.to_string()
3579                } else {
3580                    return Err(EngineError::Unsupported(alloc::format!(
3581                        "{op}() first argument must be a literal sequence name"
3582                    )));
3583                }
3584            }
3585            other => {
3586                return Err(EngineError::Unsupported(alloc::format!(
3587                    "{op}() first argument must be a literal sequence name, got {other:?}"
3588                )));
3589            }
3590        };
3591        self.ensure_implicit_sequence(&seq_name);
3592        match op {
3593            "nextval" => {
3594                let v = self
3595                    .active_catalog_mut()
3596                    .sequence_next_value(&seq_name)
3597                    .map_err(EngineError::Storage)?;
3598                Ok(Value::BigInt(v))
3599            }
3600            "currval" => {
3601                let v = self
3602                    .active_catalog()
3603                    .sequence_current_value(&seq_name)
3604                    .map_err(EngineError::Storage)?;
3605                Ok(Value::BigInt(v))
3606            }
3607            "setval" => {
3608                if args.len() < 2 || args.len() > 3 {
3609                    return Err(EngineError::Unsupported(alloc::format!(
3610                        "setval() takes 2 or 3 arguments, got {}",
3611                        args.len()
3612                    )));
3613                }
3614                let value = match &args[1] {
3615                    Expr::Literal(spg_sql::ast::Literal::Integer(n)) => *n,
3616                    other => {
3617                        return Err(EngineError::Unsupported(alloc::format!(
3618                            "setval() value argument must be a literal integer, got {other:?}"
3619                        )));
3620                    }
3621                };
3622                let is_called = if args.len() == 3 {
3623                    match &args[2] {
3624                        Expr::Literal(spg_sql::ast::Literal::Bool(b)) => *b,
3625                        other => {
3626                            return Err(EngineError::Unsupported(alloc::format!(
3627                                "setval() is_called argument must be a literal BOOL, got {other:?}"
3628                            )));
3629                        }
3630                    }
3631                } else {
3632                    true
3633                };
3634                let v = self
3635                    .active_catalog_mut()
3636                    .sequence_set_value(&seq_name, value, is_called)
3637                    .map_err(EngineError::Storage)?;
3638                Ok(Value::BigInt(v))
3639            }
3640            other => Err(EngineError::Unsupported(alloc::format!(
3641                "unknown sequence op {other:?}"
3642            ))),
3643        }
3644    }
3645
3646    /// v7.17.0 Phase 1.2 — find every catalog VIEW referenced in
3647    /// the SELECT's FROM / JOIN graph, re-parse each view's body
3648    /// source, and prepend it as a synthetic CTE on the
3649    /// returned SelectStatement. Returns `None` when no view
3650    /// references are found (caller proceeds with the original
3651    /// statement); returns `Some(rewritten)` otherwise (caller
3652    /// re-runs exec_select_cancel on the rewritten form so the
3653    /// regular CTE materialiser handles it).
3654    fn expand_views_in_select(
3655        &self,
3656        stmt: &SelectStatement,
3657    ) -> Result<Option<SelectStatement>, EngineError> {
3658        let cat = self.active_catalog();
3659        let mut referenced: Vec<String> = Vec::new();
3660        if let Some(from) = &stmt.from {
3661            collect_view_refs(&from.primary, cat, &mut referenced);
3662            for j in &from.joins {
3663                collect_view_refs(&j.table, cat, &mut referenced);
3664            }
3665        }
3666        // Don't expand a view name that's already shadowed by a
3667        // CTE on the same SELECT — the CTE wins per PG.
3668        referenced.retain(|n| !stmt.ctes.iter().any(|c| c.name == *n));
3669        if referenced.is_empty() {
3670            return Ok(None);
3671        }
3672        let mut new_ctes: Vec<spg_sql::ast::Cte> = Vec::with_capacity(referenced.len());
3673        for name in &referenced {
3674            let view = cat.views().get(name).ok_or_else(|| {
3675                EngineError::Storage(spg_storage::StorageError::Corrupt(alloc::format!(
3676                    "view {name:?} disappeared mid-expansion"
3677                )))
3678            })?;
3679            let parsed = spg_sql::parser::parse_statement(&view.body).map_err(|e| {
3680                EngineError::Unsupported(alloc::format!("view {name:?} body re-parse failed: {e}"))
3681            })?;
3682            let Statement::Select(body) = parsed else {
3683                return Err(EngineError::Unsupported(alloc::format!(
3684                    "view {name:?} body is not a SELECT (catalog corruption)"
3685                )));
3686            };
3687            new_ctes.push(spg_sql::ast::Cte {
3688                name: name.clone(),
3689                body,
3690                recursive: false,
3691                column_overrides: view.columns.clone(),
3692            });
3693        }
3694        let mut out = stmt.clone();
3695        // Prepend so view CTEs are visible to caller-supplied CTEs.
3696        new_ctes.extend(out.ctes);
3697        out.ctes = new_ctes;
3698        Ok(Some(out))
3699    }
3700
3701    /// v7.17.0 Phase 1.2 — `CREATE VIEW` engine path. Stores the
3702    /// Display-rendered body verbatim in the catalog; SELECT-from-
3703    /// view at exec time re-parses + prepends as a synthetic CTE.
3704    fn exec_create_view(
3705        &mut self,
3706        s: spg_sql::ast::CreateViewStatement,
3707    ) -> Result<QueryResult, EngineError> {
3708        // Render the SELECT body to canonical form so the catalog
3709        // round-trips a deterministic source (no whitespace /
3710        // comment surprises in the on-disk snapshot).
3711        let body_repr = alloc::format!("{}", spg_sql::ast::Statement::Select(s.body));
3712        let def = spg_storage::ViewDef {
3713            name: s.name.clone(),
3714            columns: s.columns,
3715            body: body_repr,
3716        };
3717        self.active_catalog_mut()
3718            .create_view(def, s.or_replace, s.if_not_exists)
3719            .map_err(EngineError::Storage)?;
3720        Ok(QueryResult::CommandOk {
3721            affected: 0,
3722            modified_catalog: !self.in_transaction(),
3723        })
3724    }
3725
3726    /// v7.17.0 Phase 1.4 — `CREATE TYPE name AS ENUM (…)` engine
3727    /// path. Registers the enum in the catalog with order-
3728    /// preserving labels. PG semantics: CREATE TYPE errors if the
3729    /// name is taken (no IF NOT EXISTS).
3730    fn exec_create_type(
3731        &mut self,
3732        s: spg_sql::ast::CreateTypeStatement,
3733    ) -> Result<QueryResult, EngineError> {
3734        // Name-collision check against tables / sequences / views /
3735        // materialized views.
3736        let cat = self.active_catalog();
3737        if cat.get(&s.name).is_some() {
3738            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3739                alloc::format!("type {:?} would shadow an existing table", s.name),
3740            )));
3741        }
3742        if cat.sequences().contains_key(&s.name) {
3743            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3744                alloc::format!("type {:?} would shadow an existing sequence", s.name),
3745            )));
3746        }
3747        if cat.views().contains_key(&s.name) {
3748            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3749                alloc::format!("type {:?} would shadow an existing view", s.name),
3750            )));
3751        }
3752        let def = match s.kind {
3753            spg_sql::ast::TypeKind::Enum { labels } => {
3754                if labels.is_empty() {
3755                    return Err(EngineError::Unsupported(
3756                        "CREATE TYPE … AS ENUM requires at least one label".into(),
3757                    ));
3758                }
3759                // Reject duplicate labels per PG.
3760                for i in 0..labels.len() {
3761                    for j in (i + 1)..labels.len() {
3762                        if labels[i] == labels[j] {
3763                            return Err(EngineError::Unsupported(alloc::format!(
3764                                "CREATE TYPE {:?}: duplicate ENUM label {:?}",
3765                                s.name,
3766                                labels[i]
3767                            )));
3768                        }
3769                    }
3770                }
3771                spg_storage::EnumDef {
3772                    name: s.name.clone(),
3773                    labels,
3774                }
3775            }
3776        };
3777        self.active_catalog_mut()
3778            .create_enum_type(def)
3779            .map_err(EngineError::Storage)?;
3780        Ok(QueryResult::CommandOk {
3781            affected: 0,
3782            modified_catalog: !self.in_transaction(),
3783        })
3784    }
3785
3786    /// v7.17.0 Phase 1.5 — `CREATE DOMAIN name AS base [DEFAULT
3787    /// expr] [NOT NULL] [CHECK (expr)]*` engine path. Stores the
3788    /// base type + Display-rendered CHECK / DEFAULT sources so
3789    /// INSERT/UPDATE on bound columns can re-eval the checks.
3790    fn exec_create_domain(
3791        &mut self,
3792        s: spg_sql::ast::CreateDomainStatement,
3793    ) -> Result<QueryResult, EngineError> {
3794        let cat = self.active_catalog();
3795        if cat.domain_types().contains_key(&s.name) {
3796            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3797                alloc::format!("domain {:?} already exists", s.name),
3798            )));
3799        }
3800        if cat.get(&s.name).is_some()
3801            || cat.sequences().contains_key(&s.name)
3802            || cat.views().contains_key(&s.name)
3803            || cat.enum_types().contains_key(&s.name)
3804        {
3805            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3806                alloc::format!("domain {:?} would shadow an existing object", s.name),
3807            )));
3808        }
3809        let base_type = column_type_to_data_type(s.base_type);
3810        let default = s.default.as_ref().map(|e| alloc::format!("{e}"));
3811        let checks = s
3812            .checks
3813            .iter()
3814            .map(|e| alloc::format!("{e}"))
3815            .collect::<Vec<_>>();
3816        let def = spg_storage::DomainDef {
3817            name: s.name.clone(),
3818            base_type,
3819            nullable: !s.not_null,
3820            default,
3821            checks,
3822        };
3823        self.active_catalog_mut()
3824            .create_domain_type(def)
3825            .map_err(EngineError::Storage)?;
3826        Ok(QueryResult::CommandOk {
3827            affected: 0,
3828            modified_catalog: !self.in_transaction(),
3829        })
3830    }
3831
3832    /// v7.17.0 Phase 1.5 — `DROP DOMAIN [IF EXISTS] names`.
3833    fn exec_drop_domain(
3834        &mut self,
3835        names: &[String],
3836        if_exists: bool,
3837    ) -> Result<QueryResult, EngineError> {
3838        let mut removed = 0usize;
3839        for name in names {
3840            let was_present = self.active_catalog_mut().drop_domain_type(name);
3841            if was_present {
3842                removed += 1;
3843            } else if !if_exists {
3844                return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3845                    alloc::format!("domain {name:?} does not exist"),
3846                )));
3847            }
3848        }
3849        Ok(QueryResult::CommandOk {
3850            affected: removed,
3851            modified_catalog: removed > 0 && !self.in_transaction(),
3852        })
3853    }
3854
3855    /// v7.17.0 Phase 1.6 — `CREATE SCHEMA [IF NOT EXISTS] name`.
3856    /// Registers the schema in the catalog. Schema-qualified
3857    /// table references continue to strip the prefix at lookup
3858    /// time (prefix routing, not isolation — see project-next-
3859    /// docket for the v7.18+ real-isolation tracking).
3860    fn exec_create_schema(
3861        &mut self,
3862        name: String,
3863        if_not_exists: bool,
3864    ) -> Result<QueryResult, EngineError> {
3865        self.active_catalog_mut()
3866            .create_schema(name, if_not_exists)
3867            .map_err(EngineError::Storage)?;
3868        Ok(QueryResult::CommandOk {
3869            affected: 0,
3870            modified_catalog: !self.in_transaction(),
3871        })
3872    }
3873
3874    /// v7.17.0 Phase 1.6 — `DROP SCHEMA [IF EXISTS] names`.
3875    /// Built-in schemas always reject the drop with a clear
3876    /// error.
3877    fn exec_drop_schema(
3878        &mut self,
3879        names: &[String],
3880        if_exists: bool,
3881    ) -> Result<QueryResult, EngineError> {
3882        let mut removed = 0usize;
3883        for name in names {
3884            let was_present = self
3885                .active_catalog_mut()
3886                .drop_schema(name)
3887                .map_err(EngineError::Storage)?;
3888            if was_present {
3889                removed += 1;
3890            } else if !if_exists {
3891                return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3892                    alloc::format!("schema {name:?} does not exist"),
3893                )));
3894            }
3895        }
3896        Ok(QueryResult::CommandOk {
3897            affected: removed,
3898            modified_catalog: removed > 0 && !self.in_transaction(),
3899        })
3900    }
3901
3902    /// v7.17.0 Phase 1.4 — `DROP TYPE [IF EXISTS] names`. Only
3903    /// ENUM types are catalogued today; other types silently
3904    /// no-op even outside IF EXISTS to mirror the prior
3905    /// "everything's text" lax stance.
3906    fn exec_drop_type(
3907        &mut self,
3908        names: &[String],
3909        if_exists: bool,
3910    ) -> Result<QueryResult, EngineError> {
3911        let mut removed = 0usize;
3912        for name in names {
3913            let was_present = self.active_catalog_mut().drop_enum_type(name);
3914            if was_present {
3915                removed += 1;
3916            } else if !if_exists {
3917                return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3918                    alloc::format!("type {name:?} does not exist"),
3919                )));
3920            }
3921        }
3922        Ok(QueryResult::CommandOk {
3923            affected: removed,
3924            modified_catalog: removed > 0 && !self.in_transaction(),
3925        })
3926    }
3927
3928    /// v7.17.0 Phase 1.3 — `CREATE MATERIALIZED VIEW` engine path.
3929    /// Materialises the body at CREATE time (unless WITH NO DATA),
3930    /// stores the result as a regular `Table`, and registers the
3931    /// body source in the catalog so REFRESH can re-run it.
3932    fn exec_create_materialized_view(
3933        &mut self,
3934        s: spg_sql::ast::CreateMaterializedViewStatement,
3935    ) -> Result<QueryResult, EngineError> {
3936        // Name-collision check (table / view / sequence / mat-view).
3937        let cat = self.active_catalog();
3938        if cat.materialized_views().contains_key(&s.name) || cat.get(&s.name).is_some() {
3939            if s.if_not_exists {
3940                return Ok(QueryResult::CommandOk {
3941                    affected: 0,
3942                    modified_catalog: false,
3943                });
3944            }
3945            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3946                alloc::format!("materialized view {:?} already exists", s.name),
3947            )));
3948        }
3949        if cat.views().contains_key(&s.name) {
3950            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3951                alloc::format!(
3952                    "materialized view {:?} would shadow an existing view",
3953                    s.name
3954                ),
3955            )));
3956        }
3957        if cat.sequences().contains_key(&s.name) {
3958            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3959                alloc::format!(
3960                    "materialized view {:?} would shadow an existing sequence",
3961                    s.name
3962                ),
3963            )));
3964        }
3965        // Render the body to canonical form for the registry.
3966        let body_repr = alloc::format!("{}", spg_sql::ast::Statement::Select(s.body.clone()));
3967        // Execute the body to learn the columns. With WITH DATA we
3968        // also materialise the rows; with WITH NO DATA we only need
3969        // the schema, so re-use a LIMIT 0 wrap to keep the column
3970        // inference path uniform without paying for the rows.
3971        let result = self.exec_select_cancel(&s.body, CancelToken::none())?;
3972        let (mut cols, rows) = match result {
3973            QueryResult::Rows { columns, rows } => (columns, rows),
3974            other => {
3975                return Err(EngineError::Unsupported(alloc::format!(
3976                    "CREATE MATERIALIZED VIEW body did not return rows: {other:?}"
3977                )));
3978            }
3979        };
3980        // Apply the column-rename list per PG semantics.
3981        if !s.columns.is_empty() {
3982            if s.columns.len() != cols.len() {
3983                return Err(EngineError::Unsupported(alloc::format!(
3984                    "CREATE MATERIALIZED VIEW {:?}: column list has {} names but body returns {}",
3985                    s.name,
3986                    s.columns.len(),
3987                    cols.len()
3988                )));
3989            }
3990            for (c, name) in cols.iter_mut().zip(s.columns.iter()) {
3991                c.name.clone_from(name);
3992            }
3993        }
3994        // Promote any synthetic-Text projections to their actual
3995        // observed types so the backing table accepts the rows.
3996        cols = infer_column_types(&cols, &rows);
3997        let schema = spg_storage::TableSchema::new(s.name.clone(), cols);
3998        let cat = self.active_catalog_mut();
3999        cat.create_table(schema).map_err(EngineError::Storage)?;
4000        if s.with_data {
4001            let table = cat
4002                .get_mut(&s.name)
4003                .expect("just-created materialized-view backing table must exist");
4004            for row in rows {
4005                table.insert(row).map_err(EngineError::Storage)?;
4006            }
4007        }
4008        cat.register_materialized_view(s.name.clone(), body_repr);
4009        Ok(QueryResult::CommandOk {
4010            affected: 0,
4011            modified_catalog: !self.in_transaction(),
4012        })
4013    }
4014
4015    /// v7.17.0 Phase 1.3 — `REFRESH MATERIALIZED VIEW name [WITH
4016    /// [NO] DATA]`. Looks up the source, re-runs it, replaces the
4017    /// backing table's rows.
4018    fn exec_refresh_materialized_view(
4019        &mut self,
4020        name: &str,
4021        with_data: bool,
4022    ) -> Result<QueryResult, EngineError> {
4023        let source = self
4024            .active_catalog()
4025            .materialized_views()
4026            .get(name)
4027            .cloned()
4028            .ok_or_else(|| {
4029                EngineError::Storage(spg_storage::StorageError::Corrupt(alloc::format!(
4030                    "materialized view {name:?} does not exist"
4031                )))
4032            })?;
4033        // Wipe the existing rows first (PG truncates the matview
4034        // and rebuilds; we approximate with an empty INSERT loop).
4035        {
4036            let cat = self.active_catalog_mut();
4037            let table = cat.get_mut(name).ok_or_else(|| {
4038                EngineError::Storage(spg_storage::StorageError::Corrupt(alloc::format!(
4039                    "materialized view {name:?} backing table missing"
4040                )))
4041            })?;
4042            table.truncate();
4043        }
4044        if !with_data {
4045            return Ok(QueryResult::CommandOk {
4046                affected: 0,
4047                modified_catalog: !self.in_transaction(),
4048            });
4049        }
4050        let parsed = spg_sql::parser::parse_statement(&source).map_err(|e| {
4051            EngineError::Unsupported(alloc::format!(
4052                "materialized view {name:?} body re-parse failed: {e}"
4053            ))
4054        })?;
4055        let Statement::Select(body) = parsed else {
4056            return Err(EngineError::Unsupported(alloc::format!(
4057                "materialized view {name:?} body is not a SELECT (catalog corruption)"
4058            )));
4059        };
4060        let rows = match self.exec_select_cancel(&body, CancelToken::none())? {
4061            QueryResult::Rows { rows, .. } => rows,
4062            other => {
4063                return Err(EngineError::Unsupported(alloc::format!(
4064                    "REFRESH MATERIALIZED VIEW {name:?} body did not return rows: {other:?}"
4065                )));
4066            }
4067        };
4068        let cat = self.active_catalog_mut();
4069        let table = cat.get_mut(name).expect("backing table verified above");
4070        let affected = rows.len();
4071        for row in rows {
4072            table.insert(row).map_err(EngineError::Storage)?;
4073        }
4074        Ok(QueryResult::CommandOk {
4075            affected,
4076            modified_catalog: !self.in_transaction(),
4077        })
4078    }
4079
4080    /// v7.17.0 Phase 1.3 — `DROP MATERIALIZED VIEW [IF EXISTS]
4081    /// names`. Drops the backing table + unregisters the source.
4082    fn exec_drop_materialized_view(
4083        &mut self,
4084        names: &[String],
4085        if_exists: bool,
4086    ) -> Result<QueryResult, EngineError> {
4087        let mut removed = 0usize;
4088        for name in names {
4089            let was_present = self
4090                .active_catalog_mut()
4091                .drop_materialized_view_source(name);
4092            if was_present {
4093                // Drop the backing table too.
4094                self.active_catalog_mut().drop_table(name);
4095                removed += 1;
4096            } else if !if_exists {
4097                return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
4098                    alloc::format!("materialized view {name:?} does not exist"),
4099                )));
4100            }
4101        }
4102        Ok(QueryResult::CommandOk {
4103            affected: removed,
4104            modified_catalog: removed > 0 && !self.in_transaction(),
4105        })
4106    }
4107
4108    /// v7.17.0 Phase 1.2 — `DROP VIEW [IF EXISTS] name [, name…]`.
4109    fn exec_drop_view(
4110        &mut self,
4111        names: &[String],
4112        if_exists: bool,
4113    ) -> Result<QueryResult, EngineError> {
4114        let mut removed = 0usize;
4115        for name in names {
4116            let was_present = self.active_catalog_mut().drop_view(name);
4117            if !was_present && !if_exists {
4118                return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
4119                    alloc::format!("view {name:?} does not exist"),
4120                )));
4121            }
4122            if was_present {
4123                removed += 1;
4124            }
4125        }
4126        Ok(QueryResult::CommandOk {
4127            affected: removed,
4128            modified_catalog: removed > 0 && !self.in_transaction(),
4129        })
4130    }
4131
4132    /// v7.17.0 — `DROP SEQUENCE [IF EXISTS] name [, name…]`.
4133    fn exec_drop_sequence(
4134        &mut self,
4135        names: &[String],
4136        if_exists: bool,
4137    ) -> Result<QueryResult, EngineError> {
4138        let mut removed = 0usize;
4139        for name in names {
4140            let was_present = self.active_catalog_mut().drop_sequence(name);
4141            if !was_present && !if_exists {
4142                return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
4143                    alloc::format!("sequence {name:?} does not exist"),
4144                )));
4145            }
4146            if was_present {
4147                removed += 1;
4148            }
4149        }
4150        Ok(QueryResult::CommandOk {
4151            affected: removed,
4152            modified_catalog: removed > 0 && !self.in_transaction(),
4153        })
4154    }
4155
4156    /// v4.4 `UPDATE <table> SET col = expr [, ...] [WHERE cond]`.
4157    /// Filter pass uses the same WHERE eval as `exec_select`. Per
4158    /// matched row, evaluate each RHS expression against the *old*
4159    /// row, then call `Table::update_row` which rebuilds indices.
4160    /// Indexed columns are correctly reflected because rebuild
4161    /// happens after the cell rewrite.
4162    fn exec_update_cancel(
4163        &mut self,
4164        stmt: &spg_sql::ast::UpdateStatement,
4165        cancel: CancelToken<'_>,
4166    ) -> Result<QueryResult, EngineError> {
4167        // v7.12.5 — snapshot BEFORE/AFTER UPDATE row triggers + the
4168        // session FTS config before the table mut-borrow opens (the
4169        // INSERT path uses the same pattern). Empty vecs are the
4170        // common "no triggers on this table" fast path.
4171        // v7.13.0 — UPDATE triggers carry an optional `UPDATE OF
4172        // cols` filter. The filter is paired with each function so
4173        // the per-row fire loop can skip when no listed column
4174        // actually differs between OLD and NEW.
4175        let before_update_triggers = self.snapshot_update_row_triggers(&stmt.table, "BEFORE");
4176        let after_update_triggers = self.snapshot_update_row_triggers(&stmt.table, "AFTER");
4177        let trigger_session_cfg: Option<String> = self
4178            .session_params
4179            .get("default_text_search_config")
4180            .cloned();
4181        // v5.2.3: if the WHERE is a PK equality and matches a cold-
4182        // tier row, promote it back to the hot tier *before* the
4183        // hot-row walk. The promote pushes the row to the end of
4184        // `table.rows`, where the upcoming SET-evaluation loop will
4185        // pick it up and apply the assignments. Lookups for the key
4186        // never observe a gap because `promote_cold_row` inserts the
4187        // hot row before retiring the cold locator.
4188        if let Some(w) = &stmt.where_ {
4189            let schema_cols = self
4190                .active_catalog()
4191                .get(&stmt.table)
4192                .ok_or_else(|| {
4193                    EngineError::Storage(StorageError::TableNotFound {
4194                        name: stmt.table.clone(),
4195                    })
4196                })?
4197                .schema()
4198                .columns
4199                .clone();
4200            if let Some((col_pos, key)) = try_pk_predicate(w, &schema_cols, stmt.table.as_str())
4201                && let Some(idx_name) = self
4202                    .active_catalog()
4203                    .get(&stmt.table)
4204                    .and_then(|t| t.index_on(col_pos).map(|i| i.name.clone()))
4205            {
4206                // Promote may be a no-op (key is hot-only or absent);
4207                // we don't care about the return value here — the
4208                // subsequent hot walk will either match or not.
4209                let _ = self
4210                    .active_catalog_mut()
4211                    .promote_cold_row(&stmt.table, &idx_name, &key);
4212            }
4213        }
4214
4215        // v7.12.1 — cache session FTS config before the table
4216        // mut-borrow (same reason as exec_delete).
4217        let ts_cfg: Option<String> = self
4218            .session_param("default_text_search_config")
4219            .map(String::from);
4220        // v7.17.0 Phase 2.1 — snapshot the clock pointer before
4221        // we hold the catalog mutably so ON UPDATE runtime
4222        // overrides see the engine wall clock.
4223        let clock_for_on_update = self.clock;
4224        let table = self
4225            .active_catalog_mut()
4226            .get_mut(&stmt.table)
4227            .ok_or_else(|| {
4228                EngineError::Storage(StorageError::TableNotFound {
4229                    name: stmt.table.clone(),
4230                })
4231            })?;
4232        let schema_cols: Vec<ColumnSchema> = table.schema().columns.clone();
4233        // Resolve each SET target to a column position once, validate
4234        // up front so a typo'd column doesn't leave a partial mutation
4235        // behind.
4236        let mut targets: Vec<(usize, &Expr)> = Vec::with_capacity(stmt.assignments.len());
4237        for (col, expr) in &stmt.assignments {
4238            let pos = schema_cols
4239                .iter()
4240                .position(|c| c.name == *col)
4241                .ok_or_else(|| {
4242                    EngineError::Eval(EvalError::ColumnNotFound { name: col.clone() })
4243                })?;
4244            targets.push((pos, expr));
4245        }
4246        // v7.17.0 Phase 2.1 — for every column with an
4247        // `ON UPDATE CURRENT_TIMESTAMP` binding that the caller
4248        // did NOT explicitly set, schedule an automatic override.
4249        // Reuses `eval_runtime_default_free` so the same
4250        // canonical runtime-expression whitelist (now /
4251        // current_timestamp / current_date / …) governs both
4252        // DEFAULT and ON UPDATE.
4253        let mut on_update_overrides: Vec<(usize, String)> = Vec::new();
4254        for (i, col) in schema_cols.iter().enumerate() {
4255            if targets.iter().any(|(p, _)| *p == i) {
4256                continue;
4257            }
4258            if let Some(src) = &col.on_update_runtime {
4259                on_update_overrides.push((i, src.clone()));
4260            }
4261        }
4262        let ctx = EvalContext::new(&schema_cols, Some(stmt.table.as_str()))
4263            .with_default_text_search_config(ts_cfg.as_deref());
4264        // Walk candidate rows, evaluate WHERE then SET
4265        // expressions. We gather (position, new_values) tuples
4266        // first and apply them afterwards so the WHERE/RHS
4267        // evaluation reads the original row state — matches PG
4268        // semantics (UPDATE doesn't see its own writes).
4269        //
4270        // v7.20 P4 — index seek: a single-column equality WHERE
4271        // on an indexed column narrows the walk from
4272        // O(table.rows()) to O(matches). The full WHERE still
4273        // re-evaluates per candidate (the seek may be an
4274        // over-approximation under AND-composites), so semantics
4275        // are unchanged. profile: the bench's `UPDATE … WHERE
4276        // id = $1` on a 5 000-row table was a ~1.3 ms full scan
4277        // per statement; with the seek it's ~2 µs.
4278        let seek_positions: Option<Vec<usize>> = stmt
4279            .where_
4280            .as_ref()
4281            .and_then(|w| try_index_seek_positions(w, &schema_cols, table, stmt.table.as_str()));
4282        let mut planned: Vec<(usize, Vec<Value>)> = Vec::new();
4283        let candidate_positions: Vec<usize> = match &seek_positions {
4284            Some(list) => list.clone(),
4285            None => (0..table.row_count()).collect(),
4286        };
4287        for (loop_n, &i) in candidate_positions.iter().enumerate() {
4288            // v4.5: cooperative cancel checkpoint every 256 rows so
4289            // a runaway UPDATE without WHERE doesn't drag past the
4290            // server's query-timeout watchdog.
4291            if loop_n.is_multiple_of(256) {
4292                cancel.check()?;
4293            }
4294            let Some(row) = table.rows().get(i) else {
4295                continue;
4296            };
4297            if let Some(w) = &stmt.where_ {
4298                let cond = eval::eval_expr(w, row, &ctx)?;
4299                if !matches!(cond, Value::Bool(true)) {
4300                    continue;
4301                }
4302            }
4303            let mut new_vals = row.values.clone();
4304            for (pos, expr) in &targets {
4305                let v = eval::eval_expr(expr, row, &ctx)?;
4306                let coerced = coerce_value(v, schema_cols[*pos].ty, &schema_cols[*pos].name, *pos)?;
4307                check_unsigned_range(&coerced, &schema_cols[*pos], *pos)?;
4308                new_vals[*pos] = coerced;
4309            }
4310            // v7.17.0 Phase 2.1 — apply ON UPDATE overrides for
4311            // any column the SET clause didn't touch.
4312            for (pos, src) in &on_update_overrides {
4313                let v = eval_runtime_default_free(src, schema_cols[*pos].ty, clock_for_on_update)?;
4314                new_vals[*pos] = v;
4315            }
4316            planned.push((i, new_vals));
4317        }
4318        // planned must stay position-sorted: downstream passes
4319        // (FK pairing, trigger walks, the apply loop) iterate it
4320        // assuming ascending row order, which the full-scan path
4321        // guaranteed implicitly.
4322        planned.sort_by_key(|(i, _)| *i);
4323        // v7.6.6 — capture pre-update row values for the FK
4324        // enforcement passes below. `planned` carries new values
4325        // only; pair them with the old row.
4326        let plan_with_old: Vec<(usize, Vec<Value>, Vec<Value>)> = planned
4327            .iter()
4328            .map(|(pos, new_vals)| (*pos, table.rows()[*pos].values.clone(), new_vals.clone()))
4329            .collect();
4330        let self_fks = table.schema().foreign_keys.clone();
4331        // v7.12.5 — `affected` is computed post-BEFORE-trigger
4332        // below (triggers may RETURN NULL to skip individual
4333        // rows). The pre-trigger len shape is no longer accurate.
4334        // Release mutable borrow on `table` for the FK passes.
4335        let _ = table;
4336        // v7.6.6 — Stage 2a: outbound FK check. For every row whose
4337        // local FK columns changed, the new value must exist in the
4338        // parent.
4339        if !self_fks.is_empty() {
4340            let new_rows: Vec<Vec<Value>> = planned
4341                .iter()
4342                .map(|(_pos, new_vals)| new_vals.clone())
4343                .collect();
4344            enforce_fk_inserts(self.active_catalog(), &stmt.table, &self_fks, &new_rows)?;
4345        }
4346        // v7.13.0 — CHECK constraint enforcement on UPDATE
4347        // (mailrs round-5 G3). Predicates evaluated against the
4348        // candidate post-UPDATE row; false rejects the UPDATE.
4349        {
4350            let new_rows: Vec<Vec<Value>> = planned
4351                .iter()
4352                .map(|(_pos, new_vals)| new_vals.clone())
4353                .collect();
4354            enforce_check_constraints(self.active_catalog(), &stmt.table, &new_rows)?;
4355        }
4356        // v7.6.6 — Stage 2b: inbound FK check. For every row that
4357        // changed value in a column that *some other table* uses as
4358        // a FK parent column, react per `on_update` action.
4359        let child_plan =
4360            plan_fk_parent_updates(self.active_catalog(), &stmt.table, &plan_with_old)?;
4361        // Stage 3a — apply each child-side action.
4362        for step in &child_plan {
4363            apply_fk_child_step(self.active_catalog_mut(), step)?;
4364        }
4365        // Stage 3b — apply the original UPDATE.
4366        let table = self
4367            .active_catalog_mut()
4368            .get_mut(&stmt.table)
4369            .ok_or_else(|| {
4370                EngineError::Storage(StorageError::TableNotFound {
4371                    name: stmt.table.clone(),
4372                })
4373            })?;
4374        // v7.12.5 — fire BEFORE/AFTER UPDATE row-level triggers
4375        // around the apply loop. BEFORE sees NEW=candidate +
4376        // OLD=current; may rewrite NEW or RETURN NULL to skip.
4377        // AFTER sees NEW=post-write + OLD=pre-write (both read-
4378        // only).
4379        //
4380        // Filter `planned` through the BEFORE pass first so the
4381        // RETURNING snapshot reflects what actually got written
4382        // (triggers may rewrite cells, including a cancellation).
4383        let mut applied_after_before: Vec<(usize, Row, Row)> = Vec::with_capacity(planned.len());
4384        // v7.12.7 — embedded SQL queue.
4385        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
4386        for (pos, new_vals) in &planned {
4387            let old_row = table.rows()[*pos].clone();
4388            let mut new_row = Row::new(new_vals.clone());
4389            let mut skip = false;
4390            for (fd, filter) in &before_update_triggers {
4391                // v7.13.0 — `UPDATE OF cols` filter (mailrs round-5
4392                // G7). Skip this trigger when the filter is set and
4393                // no listed column actually differs between OLD and
4394                // NEW for this row.
4395                if !filter.is_empty()
4396                    && !any_column_changed(filter, &schema_cols, &old_row, &new_row)
4397                {
4398                    continue;
4399                }
4400                let (outcome, deferred) = triggers::fire_row_trigger(
4401                    fd,
4402                    Some(new_row.clone()),
4403                    Some(&old_row),
4404                    &stmt.table,
4405                    &schema_cols,
4406                    &[],
4407                    trigger_session_cfg.as_deref(),
4408                    false,
4409                )
4410                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
4411                deferred_embedded.extend(deferred);
4412                match outcome {
4413                    triggers::TriggerOutcome::Row(r) => new_row = r,
4414                    triggers::TriggerOutcome::Skip => {
4415                        skip = true;
4416                        break;
4417                    }
4418                }
4419            }
4420            if !skip {
4421                applied_after_before.push((*pos, new_row, old_row));
4422            }
4423        }
4424        // v7.9.4 — snapshot post-update values for RETURNING (post-
4425        // BEFORE-trigger because triggers can rewrite cells).
4426        let updated_for_returning: Vec<Vec<Value>> = if stmt.returning.is_some() {
4427            applied_after_before
4428                .iter()
4429                .map(|(_pos, new_row, _old)| new_row.values.clone())
4430                .collect()
4431        } else {
4432            Vec::new()
4433        };
4434        let affected = applied_after_before.len();
4435        // Apply, then fire AFTER triggers per row. AFTER runs read-
4436        // only against the freshly-written row; v7.12.4-shape
4437        // assignment errors with a clear message.
4438        for (pos, new_row, old_row) in applied_after_before {
4439            table.update_row(pos, new_row.values.clone())?;
4440            for (fd, filter) in &after_update_triggers {
4441                if !filter.is_empty()
4442                    && !any_column_changed(filter, &schema_cols, &old_row, &new_row)
4443                {
4444                    continue;
4445                }
4446                let (_outcome, deferred) = triggers::fire_row_trigger(
4447                    fd,
4448                    Some(new_row.clone()),
4449                    Some(&old_row),
4450                    &stmt.table,
4451                    &schema_cols,
4452                    &[],
4453                    trigger_session_cfg.as_deref(),
4454                    true,
4455                )
4456                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
4457                deferred_embedded.extend(deferred);
4458            }
4459        }
4460        let _ = table;
4461        // v7.12.7 — drain trigger-emitted embedded SQL for this UPDATE.
4462        self.execute_deferred_trigger_stmts(deferred_embedded, cancel)?;
4463        // v6.2.1 — auto-analyze modified-row tracking for UPDATE.
4464        if !self.in_transaction() && affected > 0 {
4465            self.statistics
4466                .record_modifications(&stmt.table, affected as u64);
4467        }
4468        // v7.9.4 — RETURNING projection.
4469        if let Some(items) = &stmt.returning {
4470            return self.build_returning_rows(&stmt.table, items, updated_for_returning);
4471        }
4472        Ok(QueryResult::CommandOk {
4473            affected,
4474            modified_catalog: !self.in_transaction(),
4475        })
4476    }
4477
4478    /// v4.4 `DELETE FROM <table> [WHERE cond]`. Collects matching
4479    /// positions then delegates to `Table::delete_rows` (single index
4480    /// rebuild for the batch).
4481    /// v7.17.0 Phase 3.P0-42 — SQL:2003 / PG 15+ `MERGE` execution.
4482    ///
4483    /// Semantics:
4484    ///   * Resolve `target` and `source` tables (catalog reads).
4485    ///   * Build a combined `(target_alias.col, source_alias.col)`
4486    ///     schema so the ON / WHEN AND / SET / VALUES expressions
4487    ///     resolve through the standard qualifier-aware resolver.
4488    ///   * Pass 1: walk every source row × every target hot row,
4489    ///     evaluate ON, then pick the first WHEN clause that fits
4490    ///     (`Matched` if any target row matched, `NotMatched`
4491    ///     otherwise; AND-condition must hold). Collect the action
4492    ///     plan as `(deletes, updates, inserts)` so the apply pass
4493    ///     reads the original target row state.
4494    ///   * Pass 2: apply the plan against the target's mutable row
4495    ///     vector. Deletes execute by index in descending order so
4496    ///     earlier indices remain stable; updates next; inserts
4497    ///     last (matching PG's "INSERT branch sees the post-delete
4498    ///     state" behaviour for the common upsert shape).
4499    ///
4500    /// v7.17 simplifications (documented limitations):
4501    ///   * No triggers / WAL plumbing (MVP); MERGE rows don't fire
4502    ///     INSERT / UPDATE / DELETE row triggers in v7.17.
4503    ///   * No cardinality check (PG-canonical: "MERGE command
4504    ///     cannot affect row a second time" — SPG silently applies
4505    ///     the last action for a target row covered twice).
4506    ///   * Source must be a catalog-resolvable table (no subquery
4507    ///     source); RETURNING / BY SOURCE / BY TARGET unsupported.
4508    fn exec_merge_cancel(
4509        &mut self,
4510        stmt: &spg_sql::ast::MergeStatement,
4511        cancel: CancelToken<'_>,
4512    ) -> Result<QueryResult, EngineError> {
4513        let target_alias = stmt
4514            .target_alias
4515            .clone()
4516            .unwrap_or_else(|| stmt.target.clone());
4517        let source_alias = stmt
4518            .source_alias
4519            .clone()
4520            .unwrap_or_else(|| stmt.source.clone());
4521        let (target_cols, target_rows_snapshot) = {
4522            let t = self.active_catalog().get(&stmt.target).ok_or_else(|| {
4523                EngineError::Storage(StorageError::TableNotFound {
4524                    name: stmt.target.clone(),
4525                })
4526            })?;
4527            (
4528                t.schema().columns.clone(),
4529                t.rows().iter().cloned().collect::<Vec<Row>>(),
4530            )
4531        };
4532        let (source_cols, source_rows) = {
4533            let s = self.active_catalog().get(&stmt.source).ok_or_else(|| {
4534                EngineError::Storage(StorageError::TableNotFound {
4535                    name: stmt.source.clone(),
4536                })
4537            })?;
4538            (
4539                s.schema().columns.clone(),
4540                s.rows().iter().cloned().collect::<Vec<Row>>(),
4541            )
4542        };
4543        // Composite schema: target_alias.col ... source_alias.col ...
4544        let mut combined_schema: Vec<ColumnSchema> = Vec::new();
4545        for col in &target_cols {
4546            combined_schema.push(ColumnSchema::new(
4547                alloc::format!("{target_alias}.{}", col.name),
4548                col.ty,
4549                col.nullable,
4550            ));
4551        }
4552        for col in &source_cols {
4553            combined_schema.push(ColumnSchema::new(
4554                alloc::format!("{source_alias}.{}", col.name),
4555                col.ty,
4556                col.nullable,
4557            ));
4558        }
4559        let combined_ctx = EvalContext::new(&combined_schema, None);
4560        // Source-only context for WHEN NOT MATCHED actions (no
4561        // matched target row exists — the source-side qualified
4562        // columns must still resolve).
4563        let mut source_only_schema: Vec<ColumnSchema> = Vec::new();
4564        for col in &target_cols {
4565            source_only_schema.push(ColumnSchema::new(
4566                alloc::format!("{target_alias}.{}", col.name),
4567                col.ty,
4568                col.nullable,
4569            ));
4570        }
4571        for col in &source_cols {
4572            source_only_schema.push(ColumnSchema::new(
4573                alloc::format!("{source_alias}.{}", col.name),
4574                col.ty,
4575                col.nullable,
4576            ));
4577        }
4578        let source_only_ctx = EvalContext::new(&source_only_schema, None);
4579        let target_arity = target_cols.len();
4580        let source_arity = source_cols.len();
4581
4582        // Resolve INSERT column positions once (validate names).
4583        // For each clause that's an INSERT, map column names → target positions.
4584        let mut delete_indices: Vec<usize> = Vec::new();
4585        let mut updates: Vec<(usize, Vec<Value>)> = Vec::new();
4586        let mut inserts: Vec<Vec<Value>> = Vec::new();
4587        let mut affected: usize = 0;
4588
4589        for (src_idx, src_row) in source_rows.iter().enumerate() {
4590            if src_idx.is_multiple_of(256) {
4591                cancel.check()?;
4592            }
4593            // Find every matched target index (per the ON predicate).
4594            let mut matched_targets: Vec<usize> = Vec::new();
4595            for (t_idx, t_row) in target_rows_snapshot.iter().enumerate() {
4596                let mut combined_vals = t_row.values.clone();
4597                combined_vals.extend(src_row.values.iter().cloned());
4598                let combined_row = Row::new(combined_vals);
4599                let cond = eval::eval_expr(&stmt.on, &combined_row, &combined_ctx)?;
4600                if matches!(cond, Value::Bool(true)) {
4601                    matched_targets.push(t_idx);
4602                }
4603            }
4604            let is_matched = !matched_targets.is_empty();
4605            // Pick the first WHEN clause whose kind agrees with
4606            // `is_matched` and whose AND condition (if any) holds.
4607            // AND condition for MATCHED: evaluated against the
4608            // first matched target row × source. For NOT MATCHED:
4609            // evaluated with target side NULL-padded.
4610            let fired_clause = stmt.clauses.iter().find(|c| {
4611                let kind_ok = match c.matched {
4612                    spg_sql::ast::MergeMatched::Matched => is_matched,
4613                    spg_sql::ast::MergeMatched::NotMatched => !is_matched,
4614                };
4615                if !kind_ok {
4616                    return false;
4617                }
4618                let Some(cond_expr) = &c.condition else {
4619                    return true;
4620                };
4621                let row = if is_matched {
4622                    let t = &target_rows_snapshot[matched_targets[0]];
4623                    let mut vals = t.values.clone();
4624                    vals.extend(src_row.values.iter().cloned());
4625                    Row::new(vals)
4626                } else {
4627                    let mut vals: Vec<Value> = (0..target_arity).map(|_| Value::Null).collect();
4628                    vals.extend(src_row.values.iter().cloned());
4629                    Row::new(vals)
4630                };
4631                let ctx_ref = if is_matched {
4632                    &combined_ctx
4633                } else {
4634                    &source_only_ctx
4635                };
4636                matches!(
4637                    eval::eval_expr(cond_expr, &row, ctx_ref),
4638                    Ok(Value::Bool(true))
4639                )
4640            });
4641            let Some(clause) = fired_clause else { continue };
4642            match &clause.action {
4643                spg_sql::ast::MergeAction::DoNothing => {}
4644                spg_sql::ast::MergeAction::Delete => {
4645                    for &t_idx in &matched_targets {
4646                        if !delete_indices.contains(&t_idx) {
4647                            delete_indices.push(t_idx);
4648                            affected += 1;
4649                        }
4650                    }
4651                }
4652                spg_sql::ast::MergeAction::Update { assignments } => {
4653                    // Pre-resolve SET targets to target column positions.
4654                    let mut planned_sets: Vec<(usize, &Expr)> =
4655                        Vec::with_capacity(assignments.len());
4656                    for (col, expr) in assignments {
4657                        let pos =
4658                            target_cols
4659                                .iter()
4660                                .position(|c| c.name == *col)
4661                                .ok_or_else(|| {
4662                                    EngineError::Eval(EvalError::ColumnNotFound {
4663                                        name: col.clone(),
4664                                    })
4665                                })?;
4666                        planned_sets.push((pos, expr));
4667                    }
4668                    for &t_idx in &matched_targets {
4669                        let t_row = &target_rows_snapshot[t_idx];
4670                        let mut new_values = t_row.values.clone();
4671                        let mut combined_vals = t_row.values.clone();
4672                        combined_vals.extend(src_row.values.iter().cloned());
4673                        let combined_row = Row::new(combined_vals);
4674                        for (pos, expr) in &planned_sets {
4675                            let raw = eval::eval_expr(expr, &combined_row, &combined_ctx)?;
4676                            let coerced = coerce_value(
4677                                raw,
4678                                target_cols[*pos].ty,
4679                                &target_cols[*pos].name,
4680                                *pos,
4681                            )?;
4682                            new_values[*pos] = coerced;
4683                        }
4684                        updates.push((t_idx, new_values));
4685                        affected += 1;
4686                    }
4687                }
4688                spg_sql::ast::MergeAction::Insert { columns, values } => {
4689                    // For INSERT NOT MATCHED, target side is NULL-padded.
4690                    let mut vals: Vec<Value> = (0..target_arity).map(|_| Value::Null).collect();
4691                    vals.extend(src_row.values.iter().cloned());
4692                    let synth_row = Row::new(vals);
4693                    let mut new_row_values: Vec<Value> =
4694                        (0..target_arity).map(|_| Value::Null).collect();
4695                    for (col, expr) in columns.iter().zip(values.iter()) {
4696                        let pos =
4697                            target_cols
4698                                .iter()
4699                                .position(|c| c.name == *col)
4700                                .ok_or_else(|| {
4701                                    EngineError::Eval(EvalError::ColumnNotFound {
4702                                        name: col.clone(),
4703                                    })
4704                                })?;
4705                        let raw = eval::eval_expr(expr, &synth_row, &source_only_ctx)?;
4706                        let coerced =
4707                            coerce_value(raw, target_cols[pos].ty, &target_cols[pos].name, pos)?;
4708                        new_row_values[pos] = coerced;
4709                    }
4710                    inserts.push(new_row_values);
4711                    affected += 1;
4712                }
4713            }
4714        }
4715        let _ = source_arity; // captured for symmetry; cancellation cost negligible.
4716
4717        // Apply the plan to the target table.
4718        let table = self
4719            .active_catalog_mut()
4720            .get_mut(&stmt.target)
4721            .ok_or_else(|| {
4722                EngineError::Storage(StorageError::TableNotFound {
4723                    name: stmt.target.clone(),
4724                })
4725            })?;
4726        // Apply updates first (in-place), then deletes (one batch),
4727        // then inserts. The storage API uses `update_row(pos,
4728        // new_values)`, `delete_rows(&[positions])`, and `insert(row)`.
4729        for (idx, new_vals) in &updates {
4730            table
4731                .update_row(*idx, new_vals.clone())
4732                .map_err(EngineError::Storage)?;
4733        }
4734        if !delete_indices.is_empty() {
4735            table.delete_rows(&delete_indices);
4736        }
4737        for vals in inserts {
4738            table.insert(Row::new(vals)).map_err(EngineError::Storage)?;
4739        }
4740        Ok(QueryResult::CommandOk {
4741            affected,
4742            modified_catalog: affected > 0,
4743        })
4744    }
4745
4746    fn exec_delete_cancel(
4747        &mut self,
4748        stmt: &spg_sql::ast::DeleteStatement,
4749        cancel: CancelToken<'_>,
4750    ) -> Result<QueryResult, EngineError> {
4751        // v7.12.5 — snapshot BEFORE/AFTER DELETE row triggers + the
4752        // session FTS config before the mut borrow (same shape as
4753        // INSERT / UPDATE).
4754        let before_delete_triggers = self.snapshot_row_triggers(&stmt.table, "DELETE", "BEFORE");
4755        let after_delete_triggers = self.snapshot_row_triggers(&stmt.table, "DELETE", "AFTER");
4756        let trigger_session_cfg: Option<String> = self
4757            .session_params
4758            .get("default_text_search_config")
4759            .cloned();
4760        // v5.2.3: PK-targeted DELETE → first retire any cold-tier
4761        // locator for the key. The cold row body stays in the
4762        // segment (becoming shadowed garbage that a future
4763        // compaction pass reclaims) but the index no longer
4764        // resolves it. The shadow count contributes to the
4765        // affected total; the subsequent hot walk handles any hot
4766        // rows for the same key.
4767        let mut cold_shadow_count: usize = 0;
4768        if let Some(w) = &stmt.where_ {
4769            let schema_cols = self
4770                .active_catalog()
4771                .get(&stmt.table)
4772                .ok_or_else(|| {
4773                    EngineError::Storage(StorageError::TableNotFound {
4774                        name: stmt.table.clone(),
4775                    })
4776                })?
4777                .schema()
4778                .columns
4779                .clone();
4780            if let Some((col_pos, key)) = try_pk_predicate(w, &schema_cols, stmt.table.as_str())
4781                && let Some(idx_name) = self
4782                    .active_catalog()
4783                    .get(&stmt.table)
4784                    .and_then(|t| t.index_on(col_pos).map(|i| i.name.clone()))
4785            {
4786                cold_shadow_count = self
4787                    .active_catalog_mut()
4788                    .shadow_cold_row(&stmt.table, &idx_name, &key)
4789                    .unwrap_or(0);
4790            }
4791        }
4792
4793        // v7.12.1 — cache the session FTS config as an owned
4794        // String before the mutable table borrow below; the
4795        // ctx-builder then references it via `as_deref` so the
4796        // immutable read of `session_params` doesn't conflict
4797        // with the mut borrow chain.
4798        let ts_cfg: Option<String> = self
4799            .session_param("default_text_search_config")
4800            .map(String::from);
4801        let table = self
4802            .active_catalog_mut()
4803            .get_mut(&stmt.table)
4804            .ok_or_else(|| {
4805                EngineError::Storage(StorageError::TableNotFound {
4806                    name: stmt.table.clone(),
4807                })
4808            })?;
4809        let schema_cols: Vec<ColumnSchema> = table.schema().columns.clone();
4810        let ctx = EvalContext::new(&schema_cols, Some(stmt.table.as_str()))
4811            .with_default_text_search_config(ts_cfg.as_deref());
4812        let mut positions: Vec<usize> = Vec::new();
4813        // v7.6.3 — collect every to-delete row's full Value tuple
4814        // alongside its position, so the FK enforcement pass can
4815        // run after the mut borrow drops.
4816        let mut to_delete_rows: Vec<Vec<Value>> = Vec::new();
4817        // v7.20 P4 — index seek (same shape as exec_update_cancel):
4818        // an equality WHERE on an indexed column narrows the walk
4819        // to the matching hot positions; the full WHERE still
4820        // re-evaluates per candidate. Downstream passes assume
4821        // ascending position order, so the seek result is sorted.
4822        let seek_positions: Option<Vec<usize>> = stmt
4823            .where_
4824            .as_ref()
4825            .and_then(|w| try_index_seek_positions(w, &schema_cols, table, stmt.table.as_str()));
4826        let candidate_positions: Vec<usize> = match seek_positions {
4827            Some(mut list) => {
4828                list.sort_unstable();
4829                list
4830            }
4831            None => (0..table.row_count()).collect(),
4832        };
4833        for (loop_n, &i) in candidate_positions.iter().enumerate() {
4834            if loop_n.is_multiple_of(256) {
4835                cancel.check()?;
4836            }
4837            let Some(row) = table.rows().get(i) else {
4838                continue;
4839            };
4840            let keep = if let Some(w) = &stmt.where_ {
4841                let cond = eval::eval_expr(w, row, &ctx)?;
4842                !matches!(cond, Value::Bool(true))
4843            } else {
4844                false
4845            };
4846            if !keep {
4847                positions.push(i);
4848                to_delete_rows.push(row.values.clone());
4849            }
4850        }
4851        // v7.6.3 / v7.6.4 — Stage 2: FK enforcement on the immutable
4852        // catalog. Release the mut borrow and run reverse-scan
4853        // against every child table whose FK targets this table.
4854        // RESTRICT / NoAction raise an error; CASCADE returns a
4855        // cascade plan that stage 3 applies after the primary delete.
4856        // SET NULL / SET DEFAULT remain Unsupported until v7.6.5.
4857        let _ = table;
4858        // v7.12.5 — BEFORE DELETE row-level triggers. Each fires
4859        // with NEW=None / OLD=pre-delete row; RETURN OLD (or NEW)
4860        // = proceed, RETURN NULL = skip the row entirely. The
4861        // filter must run BEFORE the FK cascade plan so cascaded
4862        // child rows track the trigger's skip-decision on the
4863        // parent.
4864        // v7.12.7 — embedded SQL queue.
4865        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
4866        if !before_delete_triggers.is_empty() {
4867            let mut filtered_positions: Vec<usize> = Vec::with_capacity(positions.len());
4868            let mut filtered_old_rows: Vec<Vec<Value>> = Vec::with_capacity(to_delete_rows.len());
4869            for (pos, old_vals) in positions.iter().zip(to_delete_rows.iter()) {
4870                let old_row = Row::new(old_vals.clone());
4871                let mut cancel_this = false;
4872                for fd in &before_delete_triggers {
4873                    let (outcome, deferred) = triggers::fire_row_trigger(
4874                        fd,
4875                        None,
4876                        Some(&old_row),
4877                        &stmt.table,
4878                        &schema_cols,
4879                        &[],
4880                        trigger_session_cfg.as_deref(),
4881                        false,
4882                    )
4883                    .map_err(|e| {
4884                        EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}")))
4885                    })?;
4886                    deferred_embedded.extend(deferred);
4887                    if matches!(outcome, triggers::TriggerOutcome::Skip) {
4888                        cancel_this = true;
4889                        break;
4890                    }
4891                }
4892                if !cancel_this {
4893                    filtered_positions.push(*pos);
4894                    filtered_old_rows.push(old_vals.clone());
4895                }
4896            }
4897            positions = filtered_positions;
4898            to_delete_rows = filtered_old_rows;
4899        }
4900        let cascade_plan = plan_fk_parent_deletions(
4901            self.active_catalog(),
4902            &stmt.table,
4903            &positions,
4904            &to_delete_rows,
4905        )?;
4906        // Stage 3a — apply each FK child step (SET NULL / SET
4907        // DEFAULT / CASCADE delete) before deleting the parent.
4908        // The plan is already ordered: nulls/defaults first, then
4909        // cascade deletes (so a row mutated and later deleted
4910        // surfaces as deleted — though v7.6.5 doesn't produce
4911        // that overlap today).
4912        for step in &cascade_plan {
4913            apply_fk_child_step(self.active_catalog_mut(), step)?;
4914        }
4915        // Stage 3b — actually delete the original target rows.
4916        let table = self
4917            .active_catalog_mut()
4918            .get_mut(&stmt.table)
4919            .ok_or_else(|| {
4920                EngineError::Storage(StorageError::TableNotFound {
4921                    name: stmt.table.clone(),
4922                })
4923            })?;
4924        let affected = table.delete_rows(&positions) + cold_shadow_count;
4925        let _ = table;
4926        // v7.12.5 — AFTER DELETE row-level triggers fire post-write
4927        // with NEW=None / OLD=pre-delete row (each from the
4928        // already-snapshotted to_delete_rows). Return value is
4929        // ignored (matches PG AFTER semantics).
4930        if !after_delete_triggers.is_empty() {
4931            for old_vals in &to_delete_rows {
4932                let old_row = Row::new(old_vals.clone());
4933                for fd in &after_delete_triggers {
4934                    let (_outcome, deferred) = triggers::fire_row_trigger(
4935                        fd,
4936                        None,
4937                        Some(&old_row),
4938                        &stmt.table,
4939                        &schema_cols,
4940                        &[],
4941                        trigger_session_cfg.as_deref(),
4942                        true,
4943                    )
4944                    .map_err(|e| {
4945                        EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}")))
4946                    })?;
4947                    deferred_embedded.extend(deferred);
4948                }
4949            }
4950        }
4951        // v7.12.7 — drain trigger-emitted embedded SQL for this DELETE.
4952        self.execute_deferred_trigger_stmts(deferred_embedded, cancel)?;
4953        // v6.2.1 — auto-analyze modified-row tracking for DELETE.
4954        if !self.in_transaction() && affected > 0 {
4955            self.statistics
4956                .record_modifications(&stmt.table, affected as u64);
4957        }
4958        // v7.9.4 — RETURNING projection over the soon-to-be-gone
4959        // rows. `to_delete_rows` was snapshotted in stage 1 before
4960        // mutation, so the projection sees the pre-delete state
4961        // (matches PG semantics: DELETE RETURNING returns the row
4962        // as it was just before removal).
4963        if let Some(items) = &stmt.returning {
4964            return self.build_returning_rows(&stmt.table, items, to_delete_rows);
4965        }
4966        Ok(QueryResult::CommandOk {
4967            affected,
4968            modified_catalog: !self.in_transaction(),
4969        })
4970    }
4971
4972    /// `SHOW TABLES` — one row per table in the active catalog.
4973    /// Column name is `name` so result-set consumers can downstream
4974    /// `SELECT name FROM ...` style logic if needed.
4975    /// v4.26: `EXPLAIN [ANALYZE] <select>`. Returns a single-column
4976    /// `QUERY PLAN` text table — first line names the top operator
4977    /// (Scan / Aggregate / Window / etc.), indented children list
4978    /// FROM joins, WHERE filters, ORDER BY / LIMIT, projection
4979    /// shape, and any active index hits. `ANALYZE` execs the inner
4980    /// SELECT and appends actual-row + elapsed-micros annotations.
4981    #[allow(clippy::format_push_string)]
4982    fn exec_explain(
4983        &self,
4984        e: &spg_sql::ast::ExplainStatement,
4985        cancel: CancelToken<'_>,
4986    ) -> Result<QueryResult, EngineError> {
4987        let mut lines = Vec::<String>::new();
4988        explain_select(&e.inner, self, 0, &mut lines);
4989        if e.suggest {
4990            // v6.8.3 — index advisor. Walks the SELECT's FROM
4991            // tables + WHERE column refs; for each (table, column)
4992            // pair that lacks an index, append a SUGGEST line with
4993            // a copy-pastable `CREATE INDEX` statement. This is a
4994            // pure-syntax heuristic — no cardinality estimation —
4995            // matching the v6.8.3 design intent of "tell the
4996            // operator where indexes are missing", not "give the
4997            // mathematically optimal index set".
4998            let suggestions = build_index_suggestions(&e.inner, self);
4999            for s in suggestions {
5000                lines.push(s);
5001            }
5002        } else if e.analyze {
5003            // v6.2.4 — EXPLAIN ANALYZE annotates each operator line
5004            // with `(rows=N)` where the row count is computable
5005            // without re-executing the full query:
5006            //   - Top-level operator (first non-indented line):
5007            //     rows = final result.len()
5008            //   - "From: <table> [full scan]" lines: rows =
5009            //     table.rows().len() (catalog read; no execution)
5010            //   - "From: <table> [index seek]": indeterminate —
5011            //     the index step would need re-execution; v6.2.5
5012            //     adds per-operator wall-clock + hot/cold rows
5013            //     instrumentation that makes this concrete.
5014            //   - Everything else: marked `(—)` so the surface
5015            //     stays well-defined without silently dropping
5016            //     stats. v6.2.5 fills in via inline executor
5017            //     instrumentation.
5018            // Total elapsed lands on a trailing `Total: …` line.
5019            let started = self.clock.map(|f| f());
5020            let exec = self.exec_select_cancel(&e.inner, cancel)?;
5021            let elapsed_micros = match (self.clock, started) {
5022                (Some(f), Some(s)) => Some(f().saturating_sub(s)),
5023                _ => None,
5024            };
5025            let row_count = if let QueryResult::Rows { rows, .. } = &exec {
5026                rows.len()
5027            } else {
5028                0
5029            };
5030            annotate_explain_lines(&mut lines, row_count, self);
5031            let mut total = alloc::format!("Total: rows={row_count}");
5032            if let Some(us) = elapsed_micros {
5033                total.push_str(&alloc::format!(" elapsed={us}us"));
5034            }
5035            lines.push(total);
5036        }
5037        let columns = alloc::vec![ColumnSchema::new("QUERY PLAN", DataType::Text, false)];
5038        let rows: Vec<Row> = lines
5039            .into_iter()
5040            .map(|l| Row::new(alloc::vec![Value::Text(l)]))
5041            .collect();
5042        Ok(QueryResult::Rows { columns, rows })
5043    }
5044
5045    fn exec_show_tables(&self) -> QueryResult {
5046        let columns = alloc::vec![ColumnSchema::new("name", DataType::Text, false)];
5047        let rows: Vec<Row> = self
5048            .active_catalog()
5049            .table_names()
5050            .into_iter()
5051            .map(|n| Row::new(alloc::vec![Value::Text(n)]))
5052            .collect();
5053        QueryResult::Rows { columns, rows }
5054    }
5055
5056    /// v7.17.0 Phase 3.P0-59 — `SHOW CREATE TABLE <t>`. Synthesise
5057    /// a minimal MySQL-flavoured CREATE TABLE DDL from the
5058    /// catalog's TableSchema so mysqldump round-trips load against
5059    /// SPG without splitting init scripts.
5060    fn exec_show_create_table(&self, name: &str) -> Result<QueryResult, EngineError> {
5061        let t = self.active_catalog().get(name).ok_or_else(|| {
5062            EngineError::Storage(StorageError::TableNotFound { name: name.into() })
5063        })?;
5064        let cols: Vec<String> = t
5065            .schema()
5066            .columns
5067            .iter()
5068            .map(|c| {
5069                let ty = render_data_type(c.ty);
5070                let nullable = if c.nullable { "" } else { " NOT NULL" };
5071                alloc::format!("  `{}` {}{}", c.name, ty, nullable)
5072            })
5073            .collect();
5074        let mut body = cols.join(",\n");
5075        // Append UNIQUE / PRIMARY KEY clauses.
5076        for uc in &t.schema().uniqueness_constraints {
5077            let col_names: Vec<String> = uc
5078                .columns
5079                .iter()
5080                .map(|&p| {
5081                    t.schema().columns.get(p).map_or_else(
5082                        || alloc::format!("col{p}"),
5083                        |c| alloc::format!("`{}`", c.name),
5084                    )
5085                })
5086                .collect();
5087            let kw = if uc.is_primary_key {
5088                "PRIMARY KEY"
5089            } else {
5090                "UNIQUE KEY"
5091            };
5092            body.push_str(",\n  ");
5093            body.push_str(&alloc::format!("{kw} ({})", col_names.join(", ")));
5094        }
5095        // Foreign keys.
5096        for fk in &t.schema().foreign_keys {
5097            let local: Vec<String> = fk
5098                .local_columns
5099                .iter()
5100                .map(|&p| {
5101                    t.schema().columns.get(p).map_or_else(
5102                        || alloc::format!("col{p}"),
5103                        |c| alloc::format!("`{}`", c.name),
5104                    )
5105                })
5106                .collect();
5107            let parent_cols: Vec<String> =
5108                if let Some(parent) = self.active_catalog().get(&fk.parent_table) {
5109                    fk.parent_columns
5110                        .iter()
5111                        .map(|&p| {
5112                            parent.schema().columns.get(p).map_or_else(
5113                                || alloc::format!("col{p}"),
5114                                |c| alloc::format!("`{}`", c.name),
5115                            )
5116                        })
5117                        .collect()
5118                } else {
5119                    fk.parent_columns
5120                        .iter()
5121                        .map(|p| alloc::format!("col{p}"))
5122                        .collect()
5123                };
5124            body.push_str(",\n  ");
5125            body.push_str(&alloc::format!(
5126                "FOREIGN KEY ({}) REFERENCES `{}` ({})",
5127                local.join(", "),
5128                fk.parent_table,
5129                parent_cols.join(", ")
5130            ));
5131        }
5132        let ddl = alloc::format!(
5133            "CREATE TABLE `{}` (\n{}\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4",
5134            name,
5135            body
5136        );
5137        let columns = alloc::vec![
5138            ColumnSchema::new("Table", DataType::Text, false),
5139            ColumnSchema::new("Create Table", DataType::Text, false),
5140        ];
5141        let rows = alloc::vec![Row::new(alloc::vec![
5142            Value::Text(name.into()),
5143            Value::Text(ddl),
5144        ])];
5145        Ok(QueryResult::Rows { columns, rows })
5146    }
5147
5148    /// v7.17.0 Phase 3.P0-60 — `SHOW INDEXES FROM <t>`. MySQL
5149    /// surface returns one row per (index × column) with 14
5150    /// columns; v7.17 ships the columns admin probes actually
5151    /// filter on: Table, Non_unique, Key_name, Seq_in_index,
5152    /// Column_name, Null, Index_type.
5153    fn exec_show_indexes(&self, name: &str) -> Result<QueryResult, EngineError> {
5154        let t = self.active_catalog().get(name).ok_or_else(|| {
5155            EngineError::Storage(StorageError::TableNotFound { name: name.into() })
5156        })?;
5157        let columns = alloc::vec![
5158            ColumnSchema::new("Table", DataType::Text, false),
5159            ColumnSchema::new("Non_unique", DataType::Int, false),
5160            ColumnSchema::new("Key_name", DataType::Text, false),
5161            ColumnSchema::new("Seq_in_index", DataType::Int, false),
5162            ColumnSchema::new("Column_name", DataType::Text, false),
5163            ColumnSchema::new("Null", DataType::Text, false),
5164            ColumnSchema::new("Index_type", DataType::Text, false),
5165        ];
5166        let mut rows: Vec<Row> = Vec::new();
5167        for idx in t.indices() {
5168            let col = t
5169                .schema()
5170                .columns
5171                .get(idx.column_position)
5172                .map_or("?".into(), |c| c.name.clone());
5173            let nullable = t
5174                .schema()
5175                .columns
5176                .get(idx.column_position)
5177                .map_or(true, |c| c.nullable);
5178            rows.push(Row::new(alloc::vec![
5179                Value::Text(name.into()),
5180                Value::Int(i32::from(!idx.is_unique)),
5181                Value::Text(idx.name.clone()),
5182                Value::Int(1),
5183                Value::Text(col),
5184                Value::Text(if nullable {
5185                    "YES".into()
5186                } else {
5187                    String::new()
5188                }),
5189                Value::Text("BTREE".into()),
5190            ]));
5191        }
5192        Ok(QueryResult::Rows { columns, rows })
5193    }
5194
5195    /// v7.17.0 Phase 3.P0-61 — `SHOW STATUS`. Returns canonical
5196    /// MySQL server-status counters (2-column `(Variable_name,
5197    /// Value)`).
5198    fn exec_show_status(&self) -> QueryResult {
5199        let columns = alloc::vec![
5200            ColumnSchema::new("Variable_name", DataType::Text, false),
5201            ColumnSchema::new("Value", DataType::Text, false),
5202        ];
5203        let pairs: &[(&str, &str)] = &[
5204            ("Uptime", "0"),
5205            ("Threads_connected", "1"),
5206            ("Threads_running", "1"),
5207            ("Questions", "0"),
5208            ("Slow_queries", "0"),
5209            ("Opened_tables", "0"),
5210            ("Innodb_buffer_pool_pages_total", "0"),
5211        ];
5212        let rows: Vec<Row> = pairs
5213            .iter()
5214            .map(|(k, v)| {
5215                Row::new(alloc::vec![
5216                    Value::Text((*k).into()),
5217                    Value::Text((*v).into())
5218                ])
5219            })
5220            .collect();
5221        QueryResult::Rows { columns, rows }
5222    }
5223
5224    /// v7.17.0 Phase 3.P0-61 — `SHOW VARIABLES`. Returns server-side
5225    /// variables MySQL/MariaDB clients probe at connect time.
5226    fn exec_show_variables(&self) -> QueryResult {
5227        let columns = alloc::vec![
5228            ColumnSchema::new("Variable_name", DataType::Text, false),
5229            ColumnSchema::new("Value", DataType::Text, false),
5230        ];
5231        let mut rows: Vec<Row> = Vec::new();
5232        let canonical: &[(&str, &str)] = &[
5233            ("version", "8.0.35-spg"),
5234            ("version_comment", "SPG dual-stack engine"),
5235            ("character_set_server", "utf8mb4"),
5236            ("collation_server", "utf8mb4_0900_ai_ci"),
5237            ("max_allowed_packet", "67108864"),
5238            ("autocommit", "ON"),
5239            ("sql_mode", "STRICT_TRANS_TABLES,NO_ENGINE_SUBSTITUTION"),
5240            ("time_zone", "SYSTEM"),
5241            ("transaction_isolation", "REPEATABLE-READ"),
5242        ];
5243        for &(k, v) in canonical {
5244            rows.push(Row::new(alloc::vec![
5245                Value::Text(k.into()),
5246                Value::Text(v.into()),
5247            ]));
5248        }
5249        // Session-set parameters surface here too.
5250        for (k, v) in &self.session_params {
5251            if !canonical.iter().any(|(n, _)| (*n).eq_ignore_ascii_case(k)) {
5252                rows.push(Row::new(alloc::vec![
5253                    Value::Text(k.clone()),
5254                    Value::Text(v.clone()),
5255                ]));
5256            }
5257        }
5258        QueryResult::Rows { columns, rows }
5259    }
5260
5261    /// v7.17.0 Phase 3.P0-62 — `SHOW PROCESSLIST`. SPG is
5262    /// single-process so the surface returns one synthetic row
5263    /// describing the current connection (Id, User, Host, db,
5264    /// Command, Time, State, Info).
5265    fn exec_show_processlist(&self) -> QueryResult {
5266        let columns = alloc::vec![
5267            ColumnSchema::new("Id", DataType::Int, false),
5268            ColumnSchema::new("User", DataType::Text, false),
5269            ColumnSchema::new("Host", DataType::Text, false),
5270            ColumnSchema::new("db", DataType::Text, true),
5271            ColumnSchema::new("Command", DataType::Text, false),
5272            ColumnSchema::new("Time", DataType::Int, false),
5273            ColumnSchema::new("State", DataType::Text, true),
5274            ColumnSchema::new("Info", DataType::Text, true),
5275        ];
5276        let rows = alloc::vec![Row::new(alloc::vec![
5277            Value::Int(1),
5278            Value::Text("postgres".into()),
5279            Value::Text("localhost".into()),
5280            Value::Text("postgres".into()),
5281            Value::Text("Query".into()),
5282            Value::Int(0),
5283            Value::Text("executing".into()),
5284            Value::Text("SHOW PROCESSLIST".into()),
5285        ])];
5286        QueryResult::Rows { columns, rows }
5287    }
5288
5289    /// v7.17.0 Phase 3.P0-58 — `SHOW DATABASES` / `SHOW SCHEMAS`.
5290    /// SPG is single-database so the result is the canonical MySQL
5291    /// set every mysql/MariaDB client expects at connect time:
5292    /// `information_schema`, `mysql`, `performance_schema`, `sys`,
5293    /// plus a `postgres` slot so dual-stack callers find their
5294    /// PG-compatible database too.
5295    fn exec_show_databases(&self) -> QueryResult {
5296        let columns = alloc::vec![ColumnSchema::new("Database", DataType::Text, false)];
5297        let names = [
5298            "information_schema",
5299            "mysql",
5300            "performance_schema",
5301            "sys",
5302            "postgres",
5303        ];
5304        let rows: Vec<Row> = names
5305            .iter()
5306            .map(|n| Row::new(alloc::vec![Value::Text((*n).into())]))
5307            .collect();
5308        QueryResult::Rows { columns, rows }
5309    }
5310
5311    /// `SHOW COLUMNS FROM <table>` — one row per column with the
5312    /// declared name, SQL type rendering, and nullability flag.
5313    fn exec_show_columns(&self, table_name: &str) -> Result<QueryResult, EngineError> {
5314        let table =
5315            self.active_catalog()
5316                .get(table_name)
5317                .ok_or_else(|| StorageError::TableNotFound {
5318                    name: table_name.into(),
5319                })?;
5320        let columns = alloc::vec![
5321            ColumnSchema::new("name", DataType::Text, false),
5322            ColumnSchema::new("type", DataType::Text, false),
5323            ColumnSchema::new("nullable", DataType::Bool, false),
5324        ];
5325        let rows: Vec<Row> = table
5326            .schema()
5327            .columns
5328            .iter()
5329            .map(|c| {
5330                Row::new(alloc::vec![
5331                    Value::Text(c.name.clone()),
5332                    Value::Text(alloc::format!("{}", c.ty)),
5333                    Value::Bool(c.nullable),
5334                ])
5335            })
5336            .collect();
5337        Ok(QueryResult::Rows { columns, rows })
5338    }
5339
5340    fn exec_begin(&mut self) -> Result<QueryResult, EngineError> {
5341        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
5342        if self.tx_catalogs.contains_key(&tx_id) {
5343            return Err(EngineError::TransactionAlreadyOpen);
5344        }
5345        self.tx_catalogs.insert(
5346            tx_id,
5347            TxState {
5348                catalog: self.catalog.clone(),
5349                savepoints: Vec::new(),
5350            },
5351        );
5352        Ok(QueryResult::CommandOk {
5353            affected: 0,
5354            modified_catalog: false,
5355        })
5356    }
5357
5358    fn exec_commit(&mut self) -> Result<QueryResult, EngineError> {
5359        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
5360        let state = self
5361            .tx_catalogs
5362            .remove(&tx_id)
5363            .ok_or(EngineError::NoActiveTransaction)?;
5364        self.catalog = state.catalog;
5365        // All savepoints become permanent at COMMIT and the stack
5366        // resets for the next TX (`state.savepoints` is discarded with
5367        // `state`).
5368        Ok(QueryResult::CommandOk {
5369            affected: 0,
5370            modified_catalog: true,
5371        })
5372    }
5373
5374    fn exec_rollback(&mut self) -> Result<QueryResult, EngineError> {
5375        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
5376        if self.tx_catalogs.remove(&tx_id).is_none() {
5377            return Err(EngineError::NoActiveTransaction);
5378        }
5379        // savepoints discarded with the TxState
5380        Ok(QueryResult::CommandOk {
5381            affected: 0,
5382            modified_catalog: false,
5383        })
5384    }
5385
5386    fn exec_savepoint(&mut self, name: String) -> Result<QueryResult, EngineError> {
5387        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
5388        let state = self
5389            .tx_catalogs
5390            .get_mut(&tx_id)
5391            .ok_or(EngineError::NoActiveTransaction)?;
5392        // PG re-uses an existing savepoint name by dropping the older
5393        // entry and pushing a fresh one — match that behaviour so
5394        // application code can `SAVEPOINT sp; ...; SAVEPOINT sp` freely.
5395        state.savepoints.retain(|(n, _)| n != &name);
5396        let snapshot = state.catalog.clone();
5397        state.savepoints.push((name, snapshot));
5398        Ok(QueryResult::CommandOk {
5399            affected: 0,
5400            modified_catalog: false,
5401        })
5402    }
5403
5404    fn exec_rollback_to_savepoint(&mut self, name: &str) -> Result<QueryResult, EngineError> {
5405        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
5406        let state = self
5407            .tx_catalogs
5408            .get_mut(&tx_id)
5409            .ok_or(EngineError::NoActiveTransaction)?;
5410        let pos = state
5411            .savepoints
5412            .iter()
5413            .rposition(|(n, _)| n == name)
5414            .ok_or_else(|| {
5415                EngineError::Unsupported(alloc::format!("savepoint not found: {name}"))
5416            })?;
5417        // The savepoint stays on the stack (PG semantics): a later
5418        // `RELEASE` or further `ROLLBACK TO` is still allowed. Everything
5419        // after it is discarded.
5420        let snapshot = state.savepoints[pos].1.clone();
5421        state.savepoints.truncate(pos + 1);
5422        state.catalog = snapshot;
5423        Ok(QueryResult::CommandOk {
5424            affected: 0,
5425            modified_catalog: false,
5426        })
5427    }
5428
5429    fn exec_release_savepoint(&mut self, name: &str) -> Result<QueryResult, EngineError> {
5430        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
5431        let state = self
5432            .tx_catalogs
5433            .get_mut(&tx_id)
5434            .ok_or(EngineError::NoActiveTransaction)?;
5435        let pos = state
5436            .savepoints
5437            .iter()
5438            .rposition(|(n, _)| n == name)
5439            .ok_or_else(|| {
5440                EngineError::Unsupported(alloc::format!("savepoint not found: {name}"))
5441            })?;
5442        // RELEASE keeps the work since the savepoint, just discards the
5443        // bookmark plus everything nested under it.
5444        state.savepoints.truncate(pos);
5445        Ok(QueryResult::CommandOk {
5446            affected: 0,
5447            modified_catalog: false,
5448        })
5449    }
5450
5451    /// v6.0.4 — synchronous `ALTER INDEX <name> REBUILD [WITH
5452    /// (encoding = …)]`. Walks every table in the active catalog
5453    /// looking for an index matching `stmt.name`, then delegates the
5454    /// rebuild (including any encoding switch) to
5455    /// `Table::rebuild_nsw_index`. The "live" non-blocking
5456    /// optimisation is v6.0.4.1 / v6.1.x territory.
5457    /// v6.7.2 — `ALTER TABLE t SET hot_tier_bytes = X`. Dispatch
5458    /// arm. Currently the only setting is `hot_tier_bytes`; later
5459    /// v6.7.x can extend `AlterTableTarget` without touching this
5460    /// arm structure.
5461    fn exec_alter_table(
5462        &mut self,
5463        s: spg_sql::ast::AlterTableStatement,
5464    ) -> Result<QueryResult, EngineError> {
5465        // v7.13.2 — mailrs round-6 S1: apply each subaction in order.
5466        // On first error the statement aborts; subactions already
5467        // applied stay (no transactional rollback in v7.13 — wrap in
5468        // BEGIN/COMMIT if atomicity matters).
5469        let table_name = s.name.clone();
5470        for target in s.targets {
5471            self.exec_alter_table_subaction(&table_name, target)?;
5472        }
5473        Ok(QueryResult::CommandOk {
5474            affected: 0,
5475            modified_catalog: !self.in_transaction(),
5476        })
5477    }
5478
5479    fn exec_alter_table_subaction(
5480        &mut self,
5481        table_name_outer: &str,
5482        target: spg_sql::ast::AlterTableTarget,
5483    ) -> Result<(), EngineError> {
5484        // Inner helper retains the s.name closure shape; alias to `s`
5485        // for minimal diff against the v7.13.0 body.
5486        struct S<'a> {
5487            name: &'a str,
5488        }
5489        let s = S {
5490            name: table_name_outer,
5491        };
5492        match target {
5493            spg_sql::ast::AlterTableTarget::SetHotTierBytes(n) => {
5494                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
5495                    EngineError::Storage(StorageError::TableNotFound {
5496                        name: s.name.into(),
5497                    })
5498                })?;
5499                table.schema_mut().hot_tier_bytes = Some(n);
5500            }
5501            spg_sql::ast::AlterTableTarget::AddForeignKey(fk) => {
5502                // v7.6.8 — resolve FK against the live catalog first
5503                // (validates parent table, columns, indices). Then
5504                // verify every existing row in the child table
5505                // satisfies the new constraint. Then install it.
5506                let cols_snapshot = self
5507                    .active_catalog()
5508                    .get(s.name)
5509                    .ok_or_else(|| {
5510                        EngineError::Storage(StorageError::TableNotFound {
5511                            name: s.name.into(),
5512                        })
5513                    })?
5514                    .schema()
5515                    .columns
5516                    .clone();
5517                let storage_fk =
5518                    resolve_foreign_key(s.name, &cols_snapshot, fk, self.active_catalog())?;
5519                // Verify existing rows. Treat them as a virtual
5520                // INSERT batch — reusing the v7.6.2 enforce helper.
5521                let existing_rows: Vec<Vec<Value>> = self
5522                    .active_catalog()
5523                    .get(s.name)
5524                    .expect("checked above")
5525                    .rows()
5526                    .iter()
5527                    .map(|r| r.values.clone())
5528                    .collect();
5529                enforce_fk_inserts(
5530                    self.active_catalog(),
5531                    s.name,
5532                    core::slice::from_ref(&storage_fk),
5533                    &existing_rows,
5534                )?;
5535                // Reject duplicate constraint name.
5536                let table = self
5537                    .active_catalog_mut()
5538                    .get_mut(s.name)
5539                    .expect("checked above");
5540                if let Some(name) = &storage_fk.name
5541                    && table
5542                        .schema()
5543                        .foreign_keys
5544                        .iter()
5545                        .any(|f| f.name.as_ref() == Some(name))
5546                {
5547                    return Err(EngineError::Unsupported(alloc::format!(
5548                        "ALTER TABLE ADD CONSTRAINT: a constraint named {name:?} already exists"
5549                    )));
5550                }
5551                table.schema_mut().foreign_keys.push(storage_fk);
5552            }
5553            spg_sql::ast::AlterTableTarget::DropForeignKey { name, if_exists } => {
5554                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
5555                    EngineError::Storage(StorageError::TableNotFound {
5556                        name: s.name.into(),
5557                    })
5558                })?;
5559                let fks = &mut table.schema_mut().foreign_keys;
5560                let before = fks.len();
5561                fks.retain(|f| f.name.as_ref() != Some(&name));
5562                if fks.len() == before && !if_exists {
5563                    return Err(EngineError::Unsupported(alloc::format!(
5564                        "ALTER TABLE DROP CONSTRAINT: no FK named {name:?} on {:?}",
5565                        s.name
5566                    )));
5567                }
5568                // v7.13.2 mailrs round-6 S7: IF EXISTS silences the miss.
5569            }
5570            spg_sql::ast::AlterTableTarget::AddColumn {
5571                column,
5572                if_not_exists,
5573            } => {
5574                // v7.13.0 — mailrs round-5 G1. Append-only column add
5575                // with back-fill of the DEFAULT (or NULL) into every
5576                // existing row. Column positions don't shift, so we
5577                // skip index rebuild.
5578                let clock = self.clock;
5579                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
5580                    EngineError::Storage(StorageError::TableNotFound {
5581                        name: s.name.into(),
5582                    })
5583                })?;
5584                if table
5585                    .schema()
5586                    .columns
5587                    .iter()
5588                    .any(|c| c.name.eq_ignore_ascii_case(&column.name))
5589                {
5590                    if if_not_exists {
5591                        return Ok(());
5592                    }
5593                    return Err(EngineError::Unsupported(alloc::format!(
5594                        "ALTER TABLE ADD COLUMN: column {:?} already exists on {:?}",
5595                        column.name,
5596                        s.name
5597                    )));
5598                }
5599                let col_name = column.name.clone();
5600                let nullable = column.nullable;
5601                let has_default = column.default.is_some() || column.auto_increment;
5602                let col_schema = column_def_to_schema(column)?;
5603                let row_count = table.row_count();
5604                // Compute the back-fill value. Literal / runtime DEFAULT
5605                // funnels through the same resolver that INSERT uses
5606                // (v7.9.21 `resolve_column_default_free`). NULL when
5607                // the column is nullable and has no DEFAULT. NOT NULL
5608                // without DEFAULT errors when the table has existing
5609                // rows — same as PG.
5610                let fill_value: Value = if has_default || col_schema.runtime_default.is_some() {
5611                    resolve_column_default_free(&col_schema, clock)?
5612                } else if nullable || row_count == 0 {
5613                    Value::Null
5614                } else {
5615                    return Err(EngineError::Unsupported(alloc::format!(
5616                        "ALTER TABLE ADD COLUMN {col_name:?}: NOT NULL column requires DEFAULT \
5617                         when the table has existing rows"
5618                    )));
5619                };
5620                table.add_column(col_schema, fill_value);
5621            }
5622            spg_sql::ast::AlterTableTarget::AlterColumnType {
5623                column,
5624                new_type,
5625                using,
5626            } => {
5627                // v7.13.0 — mailrs round-5 G8. Re-evaluate each
5628                // row's column value (either through the USING
5629                // expression if supplied, or as a direct CAST of
5630                // the existing value) and re-coerce to the new
5631                // type. Indices on the column get rebuilt.
5632                let new_data_type = column_type_to_data_type(new_type);
5633                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
5634                    EngineError::Storage(StorageError::TableNotFound {
5635                        name: s.name.into(),
5636                    })
5637                })?;
5638                let col_pos = table
5639                    .schema()
5640                    .columns
5641                    .iter()
5642                    .position(|c| c.name.eq_ignore_ascii_case(&column))
5643                    .ok_or_else(|| {
5644                        EngineError::Unsupported(alloc::format!(
5645                            "ALTER COLUMN TYPE: column {column:?} not found on {:?}",
5646                            s.name
5647                        ))
5648                    })?;
5649                let schema_cols = table.schema().columns.clone();
5650                let ctx = eval::EvalContext::new(&schema_cols, None);
5651                let mut new_values: alloc::vec::Vec<Value> =
5652                    alloc::vec::Vec::with_capacity(table.row_count());
5653                for row in table.rows().iter() {
5654                    let raw = match &using {
5655                        Some(expr) => eval::eval_expr(expr, row, &ctx).map_err(|e| {
5656                            EngineError::Unsupported(alloc::format!(
5657                                "ALTER COLUMN TYPE: USING expression failed: {e:?}"
5658                            ))
5659                        })?,
5660                        None => row.values.get(col_pos).cloned().unwrap_or(Value::Null),
5661                    };
5662                    let coerced = coerce_value(raw, new_data_type, &column, col_pos)?;
5663                    new_values.push(coerced);
5664                }
5665                table.schema_mut().columns[col_pos].ty = new_data_type;
5666                for (i, v) in new_values.into_iter().enumerate() {
5667                    let mut row_values = table
5668                        .rows()
5669                        .get(i)
5670                        .expect("bounds-checked above")
5671                        .values
5672                        .clone();
5673                    row_values[col_pos] = v;
5674                    table.update_row(i, row_values)?;
5675                }
5676            }
5677            spg_sql::ast::AlterTableTarget::AddTableConstraint(tc) => {
5678                // v7.14.0 — pg_dump emits PKs as a separate
5679                // ALTER TABLE ADD CONSTRAINT post-CREATE-TABLE.
5680                // For PRIMARY KEY / UNIQUE, install a UC entry
5681                // and the implicit BTree index on the leading
5682                // column. CHECK: append predicate to schema.
5683                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
5684                    EngineError::Storage(StorageError::TableNotFound {
5685                        name: s.name.into(),
5686                    })
5687                })?;
5688                let is_pk = matches!(tc, spg_sql::ast::TableConstraint::PrimaryKey { .. });
5689                // v7.22 (mailrs round-13 gap 6) — carry the parsed
5690                // NULLS NOT DISTINCT flag through the ALTER path;
5691                // it was hardcoded false here while the CREATE
5692                // TABLE path honoured it since v7.13.
5693                let nnd = matches!(
5694                    tc,
5695                    spg_sql::ast::TableConstraint::Unique {
5696                        nulls_not_distinct: true,
5697                        ..
5698                    }
5699                );
5700                match tc {
5701                    spg_sql::ast::TableConstraint::PrimaryKey { columns, .. }
5702                    | spg_sql::ast::TableConstraint::Unique { columns, .. } => {
5703                        let positions: Vec<usize> = columns
5704                            .iter()
5705                            .map(|c| {
5706                                table
5707                                    .schema()
5708                                    .columns
5709                                    .iter()
5710                                    .position(|sc| sc.name.eq_ignore_ascii_case(c))
5711                                    .ok_or_else(|| {
5712                                        EngineError::Unsupported(alloc::format!(
5713                                            "ALTER TABLE ADD CONSTRAINT: column {c:?} not found on {:?}",
5714                                            s.name
5715                                        ))
5716                                    })
5717                            })
5718                            .collect::<Result<Vec<_>, _>>()?;
5719                        // Skip if an equivalent UC is already there
5720                        // (idempotent — pg_dump's PK + a prior inline
5721                        // PK shouldn't double-install).
5722                        let already = table
5723                            .schema()
5724                            .uniqueness_constraints
5725                            .iter()
5726                            .any(|u| u.columns == positions);
5727                        if !already {
5728                            table.schema_mut().uniqueness_constraints.push(
5729                                spg_storage::UniquenessConstraint {
5730                                    is_primary_key: is_pk,
5731                                    columns: positions.clone(),
5732                                    nulls_not_distinct: nnd,
5733                                },
5734                            );
5735                            // PK implies NOT NULL on referenced cols.
5736                            if is_pk {
5737                                for p in &positions {
5738                                    if let Some(c) = table.schema_mut().columns.get_mut(*p) {
5739                                        c.nullable = false;
5740                                    }
5741                                }
5742                            }
5743                            // Add a BTree index on the leading
5744                            // column for INSERT-side enforcement.
5745                            let leading = &columns[0];
5746                            let already_idx = table.indices().iter().any(|idx| {
5747                                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
5748                                    && table.schema().columns[idx.column_position].name == *leading
5749                            });
5750                            if !already_idx {
5751                                let suffix = if is_pk { "pkey" } else { "key" };
5752                                let idx_name = alloc::format!("{}_{leading}_{suffix}", s.name);
5753                                let _ = table.add_index(idx_name, leading);
5754                            }
5755                        }
5756                    }
5757                    spg_sql::ast::TableConstraint::Check { expr, .. } => {
5758                        table.schema_mut().checks.push(alloc::format!("{expr}"));
5759                    }
5760                    spg_sql::ast::TableConstraint::Index { name, columns } => {
5761                        // v7.15.0 — ALTER TABLE ADD KEY (cols).
5762                        // mysqldump occasionally emits this
5763                        // post-CREATE-TABLE shape; build a BTree
5764                        // on the leading column using the
5765                        // user-supplied or synthesised name.
5766                        let leading = &columns[0];
5767                        let already_idx = table.indices().iter().any(|idx| {
5768                            matches!(idx.kind, spg_storage::IndexKind::BTree(_))
5769                                && table.schema().columns[idx.column_position].name == *leading
5770                        });
5771                        if !already_idx {
5772                            let idx_name = name
5773                                .clone()
5774                                .unwrap_or_else(|| alloc::format!("{}_{leading}_idx", s.name));
5775                            let _ = table.add_index(idx_name, leading);
5776                        }
5777                    }
5778                    spg_sql::ast::TableConstraint::FulltextIndex { name, columns } => {
5779                        // v7.17.0 Phase 2.2 — ALTER TABLE ADD
5780                        // FULLTEXT KEY (cols). Builds one
5781                        // fulltext-GIN per named column so MATCH
5782                        // AGAINST gets a real inverted index.
5783                        // Multi-column declarations expand to
5784                        // per-column GINs (the leading column
5785                        // drives MATCH AGAINST planning).
5786                        for (k, col) in columns.iter().enumerate() {
5787                            let already_idx = table.indices().iter().any(|idx| {
5788                                matches!(idx.kind, spg_storage::IndexKind::GinFulltext(_))
5789                                    && table.schema().columns[idx.column_position].name == *col
5790                            });
5791                            if already_idx {
5792                                continue;
5793                            }
5794                            let idx_name = match (&name, columns.len(), k) {
5795                                (Some(n), 1, _) => n.clone(),
5796                                (Some(n), _, k) => alloc::format!("{n}_{k}"),
5797                                (None, _, _) => {
5798                                    alloc::format!("{}_{col}_ftidx", s.name)
5799                                }
5800                            };
5801                            let _ = table.add_gin_fulltext_index(idx_name, col);
5802                        }
5803                    }
5804                }
5805            }
5806            spg_sql::ast::AlterTableTarget::DropColumn {
5807                column,
5808                if_exists,
5809                cascade,
5810            } => {
5811                // v7.13.3 — mailrs round-7 S8. Remove the column +
5812                // every row's value at that position; drop any index
5813                // on the column. RESTRICT (default) rejects when an
5814                // FK on this table or partial-index predicate
5815                // references the column; CASCADE removes those
5816                // dependents first.
5817                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
5818                    EngineError::Storage(StorageError::TableNotFound {
5819                        name: s.name.into(),
5820                    })
5821                })?;
5822                let col_pos = match table
5823                    .schema()
5824                    .columns
5825                    .iter()
5826                    .position(|c| c.name.eq_ignore_ascii_case(&column))
5827                {
5828                    Some(p) => p,
5829                    None => {
5830                        if if_exists {
5831                            return Ok(());
5832                        }
5833                        return Err(EngineError::Unsupported(alloc::format!(
5834                            "ALTER TABLE DROP COLUMN: column {column:?} not found on {:?}",
5835                            s.name
5836                        )));
5837                    }
5838                };
5839                // Dependent check: FKs whose local columns include
5840                // col_pos. CASCADE drops them; otherwise reject.
5841                let dependent_fks: Vec<usize> = table
5842                    .schema()
5843                    .foreign_keys
5844                    .iter()
5845                    .enumerate()
5846                    .filter_map(|(i, fk)| {
5847                        if fk.local_columns.contains(&col_pos) {
5848                            Some(i)
5849                        } else {
5850                            None
5851                        }
5852                    })
5853                    .collect();
5854                if !dependent_fks.is_empty() && !cascade {
5855                    return Err(EngineError::Unsupported(alloc::format!(
5856                        "ALTER TABLE DROP COLUMN {column:?}: column has FK dependents; \
5857                         use DROP COLUMN ... CASCADE to remove them"
5858                    )));
5859                }
5860                // CASCADE the FK removals first.
5861                if cascade {
5862                    // Drop in reverse so indices stay valid.
5863                    let mut sorted = dependent_fks.clone();
5864                    sorted.sort();
5865                    sorted.reverse();
5866                    let fks = &mut table.schema_mut().foreign_keys;
5867                    for i in sorted {
5868                        fks.remove(i);
5869                    }
5870                }
5871                // Drop the column. New helper on Table does the
5872                // row + schema + index shift atomically.
5873                table.drop_column(col_pos);
5874            }
5875            spg_sql::ast::AlterTableTarget::SetTriggerEnabled { which, enabled } => {
5876                // v7.16.1 — mailrs round-9 A.2.b. pg_dump
5877                // --disable-triggers wraps each table's data
5878                // block with `ALTER TABLE … DISABLE TRIGGER ALL`
5879                // / `… ENABLE TRIGGER ALL`. Toggle the enabled
5880                // flag on every matching trigger so the row-
5881                // write paths skip them; the catalog snapshot
5882                // persists the new state across restarts.
5883                let table_name = s.name.to_string();
5884                let trigs = self.active_catalog_mut().triggers_mut();
5885                let mut touched = false;
5886                for t in trigs.iter_mut() {
5887                    if !t.table.eq_ignore_ascii_case(&table_name) {
5888                        continue;
5889                    }
5890                    match &which {
5891                        spg_sql::ast::TriggerSelector::All => {
5892                            t.enabled = enabled;
5893                            touched = true;
5894                        }
5895                        spg_sql::ast::TriggerSelector::Named(name) => {
5896                            if t.name.eq_ignore_ascii_case(name) {
5897                                t.enabled = enabled;
5898                                touched = true;
5899                            }
5900                        }
5901                    }
5902                }
5903                // PG semantics: `ALL` on a table with no
5904                // triggers is a no-op (no error). A `Named`
5905                // form pointing at a non-existent trigger
5906                // raises in PG; v7.16.1 also raises so we
5907                // don't silently lose state.
5908                if !touched {
5909                    if let spg_sql::ast::TriggerSelector::Named(name) = &which {
5910                        return Err(EngineError::Unsupported(alloc::format!(
5911                            "ALTER TABLE {table_name:?} {} TRIGGER {name:?}: no such trigger on table",
5912                            if enabled { "ENABLE" } else { "DISABLE" },
5913                        )));
5914                    }
5915                }
5916            }
5917            spg_sql::ast::AlterTableTarget::SetColumnAutoIncrement { column, seq_name } => {
5918                // pg_dump's identity form names an IMPLICIT sequence
5919                // (`… AS IDENTITY ( SEQUENCE NAME s … )`) that never
5920                // gets its own CREATE SEQUENCE statement, while the
5921                // data section still calls `setval(s, …)`. Make the
5922                // sequence exist (idempotent) so those calls land.
5923                if let Some(seq) = seq_name {
5924                    let _ = self.exec_create_sequence(spg_sql::ast::CreateSequenceStatement {
5925                        name: seq,
5926                        if_not_exists: true,
5927                        temporary: false,
5928                        data_type: None,
5929                        options: spg_sql::ast::SequenceOptions::default(),
5930                    })?;
5931                }
5932                // v7.22 (round-13 T2) — pg_dump's serial/identity
5933                // spellings (`SET DEFAULT nextval(…)` / `ADD
5934                // GENERATED … AS IDENTITY`) lower here: flip the
5935                // column's auto-increment flag so post-import
5936                // INSERTs without an explicit value keep numbering
5937                // (max+1 semantics; the dump's setval() calls are
5938                // no-ops by construction).
5939                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
5940                    EngineError::Storage(StorageError::TableNotFound {
5941                        name: s.name.into(),
5942                    })
5943                })?;
5944                let pos = table
5945                    .schema()
5946                    .columns
5947                    .iter()
5948                    .position(|c| c.name.eq_ignore_ascii_case(&column))
5949                    .ok_or_else(|| {
5950                        EngineError::Unsupported(alloc::format!(
5951                            "ALTER COLUMN {column:?}: no such column on {:?}",
5952                            s.name
5953                        ))
5954                    })?;
5955                let col = &table.schema().columns[pos];
5956                if !matches!(
5957                    col.ty,
5958                    spg_storage::DataType::SmallInt
5959                        | spg_storage::DataType::Int
5960                        | spg_storage::DataType::BigInt
5961                ) {
5962                    return Err(EngineError::Unsupported(alloc::format!(
5963                        "auto-increment applies to integer columns only ({column:?} is {:?})",
5964                        col.ty
5965                    )));
5966                }
5967                table.schema_mut().columns[pos].auto_increment = true;
5968            }
5969            spg_sql::ast::AlterTableTarget::RenameTable { new } => {
5970                // v7.16.2 — table-level rename (mailrs round-10
5971                // A.5 — used by migrate-042's `ALTER TABLE
5972                // contacts RENAME TO email_contacts`). Storage
5973                // helper updates the schema + by_name index +
5974                // dangling FK / trigger references in one
5975                // atomic step.
5976                let old = s.name.to_string();
5977                self.active_catalog_mut()
5978                    .rename_table(&old, &new)
5979                    .map_err(EngineError::Storage)?;
5980            }
5981            spg_sql::ast::AlterTableTarget::RenameColumn { old, new } => {
5982                // v7.15.0 — `ALTER TABLE t RENAME [COLUMN] old TO
5983                // new`. Rename the column in the schema; rewrite
5984                // every stored source string on this table that
5985                // references it as a (potentially-qualified)
5986                // column identifier: CHECK predicates, partial-
5987                // index predicates, runtime DEFAULT expressions.
5988                // Then walk catalog triggers on this table and
5989                // patch any `UPDATE OF` column list. Function and
5990                // trigger bodies are NOT auto-rewritten — that
5991                // surface is dynamic SQL territory; users update
5992                // those separately (matches PG plpgsql behavior:
5993                // a column rename invalidates name-referencing
5994                // plpgsql at call time, not rename time).
5995                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
5996                    EngineError::Storage(StorageError::TableNotFound {
5997                        name: s.name.into(),
5998                    })
5999                })?;
6000                let col_pos = table
6001                    .schema()
6002                    .columns
6003                    .iter()
6004                    .position(|c| c.name.eq_ignore_ascii_case(&old))
6005                    .ok_or_else(|| {
6006                        EngineError::Unsupported(alloc::format!(
6007                            "ALTER TABLE RENAME COLUMN: column {old:?} not found on {:?}",
6008                            s.name
6009                        ))
6010                    })?;
6011                // Reject same-name (case-insensitive) collision.
6012                if table
6013                    .schema()
6014                    .columns
6015                    .iter()
6016                    .enumerate()
6017                    .any(|(i, c)| i != col_pos && c.name.eq_ignore_ascii_case(&new))
6018                {
6019                    return Err(EngineError::Unsupported(alloc::format!(
6020                        "ALTER TABLE RENAME COLUMN: column {new:?} already exists on {:?}",
6021                        s.name
6022                    )));
6023                }
6024                // Schema rename first — even idempotent same-name
6025                // rename (`ALTER TABLE t RENAME a TO a`) needs to
6026                // be a no-op, not an error.
6027                if old.eq_ignore_ascii_case(&new) {
6028                    return Ok(());
6029                }
6030                table.rename_column(col_pos, &new);
6031                // Rewrite per-column runtime_default sources on
6032                // every column of this table — a DEFAULT expression
6033                // on column X may reference column Y by name (rare,
6034                // but legal in PG when the value is supplied via a
6035                // function that takes the row).
6036                let n_cols = table.schema().columns.len();
6037                for i in 0..n_cols {
6038                    let rt = table.schema().columns[i].runtime_default.clone();
6039                    if let Some(src) = rt {
6040                        let rewritten = rewrite_column_in_source(&src, &old, &new)?;
6041                        table.schema_mut().columns[i].runtime_default = Some(rewritten);
6042                    }
6043                }
6044                // Rewrite table-level CHECK predicates.
6045                let checks = table.schema().checks.clone();
6046                let mut new_checks = Vec::with_capacity(checks.len());
6047                for chk in checks {
6048                    new_checks.push(rewrite_column_in_source(&chk, &old, &new)?);
6049                }
6050                table.schema_mut().checks = new_checks;
6051                // Rewrite per-index partial_predicate sources.
6052                let n_idx = table.indices().len();
6053                for i in 0..n_idx {
6054                    let pred = table.indices()[i].partial_predicate.clone();
6055                    if let Some(src) = pred {
6056                        let rewritten = rewrite_column_in_source(&src, &old, &new)?;
6057                        // SAFETY: indices_mut would be cleanest, but
6058                        // partial_predicate is the only mutable field
6059                        // here; reach in via the public mut accessor.
6060                        table.set_partial_predicate(i, Some(rewritten));
6061                    }
6062                }
6063                // Walk catalog triggers; patch `update_columns` on
6064                // triggers attached to this table.
6065                let table_name = s.name.to_string();
6066                for trig in self.active_catalog_mut().triggers_mut() {
6067                    if !trig.table.eq_ignore_ascii_case(&table_name) {
6068                        continue;
6069                    }
6070                    for c in &mut trig.update_columns {
6071                        if c.eq_ignore_ascii_case(&old) {
6072                            *c = new.clone();
6073                        }
6074                    }
6075                }
6076            }
6077        }
6078        Ok(())
6079    }
6080
6081    fn exec_alter_index(
6082        &mut self,
6083        stmt: spg_sql::ast::AlterIndexStatement,
6084    ) -> Result<QueryResult, EngineError> {
6085        // Translate the optional SQL-side encoding choice into the
6086        // storage-side enum; the same SqlVecEncoding -> VecEncoding
6087        // bridge `column_type_to_data_type` uses.
6088        let spg_sql::ast::AlterIndexStatement {
6089            name: idx_name,
6090            target,
6091        } = stmt;
6092        // v7.16.2 — RENAME TO branch (mailrs round-10 migrate-042).
6093        // IF EXISTS makes a missing index a no-op rather than an
6094        // error, mirroring PG semantics.
6095        if let spg_sql::ast::AlterIndexTarget::Rename { new, if_exists } = target {
6096            let renamed = self.active_catalog_mut().rename_index(&idx_name, &new);
6097            return match renamed {
6098                Ok(()) => Ok(QueryResult::CommandOk {
6099                    affected: 0,
6100                    modified_catalog: !self.in_transaction(),
6101                }),
6102                Err(StorageError::IndexNotFound { .. }) if if_exists => {
6103                    Ok(QueryResult::CommandOk {
6104                        affected: 0,
6105                        modified_catalog: false,
6106                    })
6107                }
6108                Err(e) => Err(EngineError::Storage(e)),
6109            };
6110        }
6111        let spg_sql::ast::AlterIndexTarget::Rebuild { encoding } = target else {
6112            unreachable!("Rename branch returned above");
6113        };
6114        let target = encoding.map(|e| match e {
6115            SqlVecEncoding::F32 => VecEncoding::F32,
6116            SqlVecEncoding::Sq8 => VecEncoding::Sq8,
6117            SqlVecEncoding::F16 => VecEncoding::F16,
6118        });
6119        // Linear scan: index names are globally unique within a
6120        // catalog (enforced by add_nsw_index_inner) so the first
6121        // match is the only one. Save the table name to avoid
6122        // borrowing while we then take a mut borrow.
6123        let table_name = {
6124            let cat = self.active_catalog();
6125            let mut found: Option<String> = None;
6126            for tname in cat.table_names() {
6127                if let Some(t) = cat.get(&tname)
6128                    && t.indices().iter().any(|i| i.name == idx_name)
6129                {
6130                    found = Some(tname);
6131                    break;
6132                }
6133            }
6134            found.ok_or_else(|| {
6135                EngineError::Storage(StorageError::IndexNotFound {
6136                    name: idx_name.clone(),
6137                })
6138            })?
6139        };
6140        let table = self
6141            .active_catalog_mut()
6142            .get_mut(&table_name)
6143            .expect("table found above");
6144        table.rebuild_nsw_index(&idx_name, target)?;
6145        // v6.3.1 — ALTER INDEX REBUILD potentially with new encoding
6146        // changes cost characteristics; evict any cached plans.
6147        self.plan_cache.evict_referencing(&table_name);
6148        Ok(QueryResult::CommandOk {
6149            affected: 0,
6150            modified_catalog: !self.in_transaction(),
6151        })
6152    }
6153
6154    fn exec_create_index(
6155        &mut self,
6156        stmt: CreateIndexStatement,
6157    ) -> Result<QueryResult, EngineError> {
6158        let table = self
6159            .active_catalog_mut()
6160            .get_mut(&stmt.table)
6161            .ok_or_else(|| {
6162                EngineError::Storage(StorageError::TableNotFound {
6163                    name: stmt.table.clone(),
6164                })
6165            })?;
6166        // `IF NOT EXISTS` reduces DuplicateIndex to a no-op CommandOk.
6167        if stmt.if_not_exists && table.indices().iter().any(|i| i.name == stmt.name) {
6168            return Ok(QueryResult::CommandOk {
6169                affected: 0,
6170                modified_catalog: false,
6171            });
6172        }
6173        // v7.9.14 — multi-column index parses through; engine
6174        // builds a single-column BTree on the leading column only.
6175        // The extras live on the AST so spg-server's dispatcher
6176        // can emit a PG-wire NoticeResponse / log line. Composite
6177        // BTree keys land in v7.10.
6178        let _ = &stmt.extra_columns; // intentional drop on engine side
6179        let table_name = stmt.table.clone();
6180        // v6.8.0 — resolve INCLUDE column names to positions. Done
6181        // before `add_index` so a typo error surfaces before any
6182        // catalog mutation lands.
6183        let included_positions: Vec<usize> = if stmt.included_columns.is_empty() {
6184            Vec::new()
6185        } else {
6186            let schema = table.schema();
6187            stmt.included_columns
6188                .iter()
6189                .map(|c| {
6190                    schema.column_position(c).ok_or_else(|| {
6191                        EngineError::Storage(StorageError::ColumnNotFound { column: c.clone() })
6192                    })
6193                })
6194                .collect::<Result<Vec<_>, _>>()?
6195        };
6196        match stmt.method {
6197            IndexMethod::BTree => table.add_index(stmt.name.clone(), &stmt.column)?,
6198            IndexMethod::Hnsw => {
6199                if !included_positions.is_empty() {
6200                    return Err(EngineError::Unsupported(
6201                        "INCLUDE columns are not supported on HNSW indexes".into(),
6202                    ));
6203                }
6204                table.add_nsw_index(stmt.name.clone(), &stmt.column, spg_storage::NSW_DEFAULT_M)?;
6205            }
6206            // v6.7.1 — BRIN. Pure metadata; no in-memory data.
6207            IndexMethod::Brin => {
6208                if !included_positions.is_empty() {
6209                    return Err(EngineError::Unsupported(
6210                        "INCLUDE columns are not supported on BRIN indexes".into(),
6211                    ));
6212                }
6213                table.add_brin_index(stmt.name.clone(), &stmt.column)?;
6214            }
6215            // v7.12.3 — GIN inverted index. Real posting-list-backed
6216            // GIN when the indexed column is `tsvector`; falls back
6217            // to a BTree on the leading column for any other column
6218            // type so v7.9.26b's `pg_dump` compatibility (GIN on
6219            // JSONB etc. silently loading as BTree) is preserved.
6220            // Operators see the real GIN only where it matters; old
6221            // schemas keep loading.
6222            IndexMethod::Gin => {
6223                if !included_positions.is_empty() {
6224                    return Err(EngineError::Unsupported(
6225                        "INCLUDE columns are not supported on GIN indexes".into(),
6226                    ));
6227                }
6228                let col_pos = table
6229                    .schema()
6230                    .column_position(&stmt.column)
6231                    .ok_or_else(|| {
6232                        EngineError::Storage(StorageError::ColumnNotFound {
6233                            column: stmt.column.clone(),
6234                        })
6235                    })?;
6236                let col_ty = table.schema().columns[col_pos].ty;
6237                // v7.15.0 — `gin_trgm_ops` on a TEXT/VARCHAR
6238                // column dispatches to the real trigram-shingle
6239                // GIN build (LIKE / similarity acceleration).
6240                // Other GIN opclasses fall through to the regular
6241                // tsvector-vs-BTree split below.
6242                let is_trgm = stmt
6243                    .opclass
6244                    .as_deref()
6245                    .is_some_and(|op| op.eq_ignore_ascii_case("gin_trgm_ops"));
6246                if is_trgm
6247                    && matches!(
6248                        col_ty,
6249                        spg_storage::DataType::Text | spg_storage::DataType::Varchar(_)
6250                    )
6251                {
6252                    table
6253                        .add_gin_trgm_index(stmt.name.clone(), &stmt.column)
6254                        .map_err(EngineError::Storage)?;
6255                } else if col_ty == spg_storage::DataType::TsVector {
6256                    table
6257                        .add_gin_index(stmt.name.clone(), &stmt.column)
6258                        .map_err(EngineError::Storage)?;
6259                } else {
6260                    // v7.9.26b BTree fallback — the catalog still
6261                    // gets an index entry on the leading column so
6262                    // pg_dump scripts that name GIN on JSONB / etc.
6263                    // load clean; query-time gain stays opt-in for
6264                    // tsvector callers.
6265                    table.add_index(stmt.name.clone(), &stmt.column)?;
6266                }
6267            }
6268        }
6269        if !included_positions.is_empty()
6270            && let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name)
6271        {
6272            idx.included_columns = included_positions;
6273        }
6274        // v6.8.1 — persist partial-index predicate. Stored as the
6275        // expression's Display form so the catalog snapshot stays
6276        // pure (storage has no spg-sql dependency). The runtime
6277        // maintenance path treats partial indexes identically to
6278        // full indexes for v6.8.1 (over-maintenance is safe; the
6279        // planner-side "use partial when query WHERE implies the
6280        // predicate" pass is STABILITY carve-out).
6281        if let Some(pred_expr) = &stmt.partial_predicate {
6282            let canonical = pred_expr.to_string();
6283            // v7.13.2 — mailrs round-6 S2. PG's `pg_trgm` uses
6284            // `CREATE INDEX … USING gin(col gin_trgm_ops) WHERE …`
6285            // routinely to slim trigram indexes. SPG now persists
6286            // the predicate for GIN / BRIN / HNSW the same way it
6287            // already does for BTree — same v6.8.1 "over-maintain
6288            // is safe; planner-side partial routing is STABILITY
6289            // carve-out" semantics. HNSW carries an additional
6290            // caveat: the predicate isn't applied at index build
6291            // time (would require per-row eval inside the NSW
6292            // construction loop), so the index oversamples; query
6293            // time the WHERE clause still filters correctly.
6294            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
6295                idx.partial_predicate = Some(canonical);
6296            }
6297        }
6298        // v6.8.2 — persist expression index key. Same Display-form
6299        // storage; the runtime maintenance pass evaluates each
6300        // row's expression to derive the index key, but for v6.8.2
6301        // the engine falls through to the bare-column-reference
6302        // path and the expression is preserved for format-layer
6303        // round-trip + future planner work. Carved-out in
6304        // STABILITY § "Out of v6.8".
6305        if let Some(key_expr) = &stmt.expression {
6306            if matches!(
6307                stmt.method,
6308                IndexMethod::Hnsw | IndexMethod::Brin | IndexMethod::Gin
6309            ) {
6310                return Err(EngineError::Unsupported(
6311                    "Expression keys are not supported on HNSW or BRIN indexes".into(),
6312                ));
6313            }
6314            let canonical = key_expr.to_string();
6315            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
6316                idx.expression = Some(canonical);
6317            }
6318        }
6319        // v7.9.29 — persist `is_unique` flag on the storage Index.
6320        // Combined with `partial_predicate`, INSERT enforcement
6321        // checks that no other row whose predicate evaluates true
6322        // shares the same indexed key. Parser already rejected
6323        // `UNIQUE` on HNSW / BRIN, so plain BTree here.
6324        // For multi-column UNIQUE INDEX the extras matter (the
6325        // full tuple is the uniqueness key), so resolve them to
6326        // column positions and persist on the index too.
6327        if stmt.is_unique {
6328            let mut extra_positions: alloc::vec::Vec<usize> = alloc::vec::Vec::new();
6329            for col_name in &stmt.extra_columns {
6330                let pos = table
6331                    .schema()
6332                    .columns
6333                    .iter()
6334                    .position(|c| c.name.eq_ignore_ascii_case(col_name))
6335                    .ok_or_else(|| {
6336                        EngineError::Unsupported(alloc::format!(
6337                            "UNIQUE INDEX {:?}: extra column {col_name:?} not in table {:?}",
6338                            stmt.name,
6339                            stmt.table
6340                        ))
6341                    })?;
6342                extra_positions.push(pos);
6343            }
6344            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
6345                idx.is_unique = true;
6346                idx.extra_column_positions = extra_positions;
6347            }
6348            // At index-creation time, check the existing rows for
6349            // pre-existing duplicates that would have violated the
6350            // new constraint — otherwise CREATE UNIQUE INDEX would
6351            // silently leave duplicates in place.
6352            let snapshot_indices = table.indices().to_vec();
6353            let snapshot_rows: alloc::vec::Vec<spg_storage::Row> =
6354                table.rows().iter().cloned().collect();
6355            let snapshot_schema = table.schema().clone();
6356            let idx_ref = snapshot_indices
6357                .iter()
6358                .find(|i| i.name == stmt.name)
6359                .expect("just-added index");
6360            check_existing_unique_violation(idx_ref, &snapshot_schema, &snapshot_rows)?;
6361        }
6362        // v6.3.1 — adding an index can change the optimal plan for
6363        // any cached query that references this table.
6364        self.plan_cache.evict_referencing(&table_name);
6365        Ok(QueryResult::CommandOk {
6366            affected: 0,
6367            modified_catalog: !self.in_transaction(),
6368        })
6369    }
6370
6371    /// v7.13.3 — mailrs round-7 S9. SPG-specific reconciliation
6372    /// for `CREATE TABLE IF NOT EXISTS` when the table already
6373    /// exists. Adds missing columns + inline FKs from the new
6374    /// definition; existing columns / constraints stay untouched.
6375    /// New columns with a `NOT NULL` declaration without a
6376    /// `DEFAULT` are reported as a clear error rather than
6377    /// silently dropped — this is the "fail loud on real
6378    /// incompatibility, fail silent on schema-superset" tradeoff.
6379    fn reconcile_table_if_not_exists(
6380        &mut self,
6381        stmt: CreateTableStatement,
6382    ) -> Result<QueryResult, EngineError> {
6383        let table_name = stmt.name.clone();
6384        let clock = self.clock;
6385        let existing_col_names: alloc::collections::BTreeSet<String> = self
6386            .active_catalog()
6387            .get(&table_name)
6388            .expect("checked above")
6389            .schema()
6390            .columns
6391            .iter()
6392            .map(|c| c.name.to_ascii_lowercase())
6393            .collect();
6394        let row_count = self
6395            .active_catalog()
6396            .get(&table_name)
6397            .expect("checked above")
6398            .row_count();
6399        // Collect missing column defs in source order.
6400        let new_columns: alloc::vec::Vec<spg_sql::ast::ColumnDef> = stmt
6401            .columns
6402            .iter()
6403            .filter(|c| !existing_col_names.contains(&c.name.to_ascii_lowercase()))
6404            .cloned()
6405            .collect();
6406        for col_def in new_columns {
6407            let col_name = col_def.name.clone();
6408            let nullable = col_def.nullable;
6409            let has_default = col_def.default.is_some() || col_def.auto_increment;
6410            let col_schema = column_def_to_schema(col_def)?;
6411            let fill_value: Value = if has_default || col_schema.runtime_default.is_some() {
6412                resolve_column_default_free(&col_schema, clock)?
6413            } else if nullable || row_count == 0 {
6414                Value::Null
6415            } else {
6416                return Err(EngineError::Unsupported(alloc::format!(
6417                    "CREATE TABLE IF NOT EXISTS {table_name:?}: reconciling \
6418                     column {col_name:?} requires DEFAULT (existing rows would violate NOT NULL)"
6419                )));
6420            };
6421            let table = self
6422                .active_catalog_mut()
6423                .get_mut(&table_name)
6424                .expect("checked above");
6425            table.add_column(col_schema, fill_value);
6426        }
6427        // Resolve any newly-added inline FKs (column-level
6428        // REFERENCES forms) and install. Skip FKs whose local
6429        // columns we didn't have in the existing table.
6430        let table_cols_now = self
6431            .active_catalog()
6432            .get(&table_name)
6433            .expect("checked above")
6434            .schema()
6435            .columns
6436            .clone();
6437        for fk in stmt.foreign_keys {
6438            // Only install FKs whose every local column resolves
6439            // — older catalogs may have a column the new FK
6440            // references but not the column the new FK declares.
6441            let all_resolved = fk.columns.iter().all(|c| {
6442                table_cols_now
6443                    .iter()
6444                    .any(|sc| sc.name.eq_ignore_ascii_case(c))
6445            });
6446            if !all_resolved {
6447                continue;
6448            }
6449            let already_present = {
6450                let table = self
6451                    .active_catalog()
6452                    .get(&table_name)
6453                    .expect("checked above");
6454                table.schema().foreign_keys.iter().any(|f| {
6455                    f.parent_table.eq_ignore_ascii_case(&fk.parent_table)
6456                        && f.local_columns.len() == fk.columns.len()
6457                })
6458            };
6459            if already_present {
6460                continue;
6461            }
6462            let storage_fk =
6463                resolve_foreign_key(&table_name, &table_cols_now, fk, self.active_catalog())?;
6464            let table = self
6465                .active_catalog_mut()
6466                .get_mut(&table_name)
6467                .expect("checked above");
6468            table.schema_mut().foreign_keys.push(storage_fk);
6469        }
6470        Ok(QueryResult::CommandOk {
6471            affected: 0,
6472            modified_catalog: !self.in_transaction(),
6473        })
6474    }
6475
6476    /// v7.14.0 — DROP TABLE handler (pg_dump / mysqldump preamble).
6477    fn exec_drop_table(
6478        &mut self,
6479        names: Vec<String>,
6480        if_exists: bool,
6481    ) -> Result<QueryResult, EngineError> {
6482        for name in names {
6483            let dropped = self.active_catalog_mut().drop_table(&name);
6484            if !dropped && !if_exists {
6485                return Err(EngineError::Storage(StorageError::TableNotFound { name }));
6486            }
6487        }
6488        Ok(QueryResult::CommandOk {
6489            affected: 0,
6490            modified_catalog: !self.in_transaction(),
6491        })
6492    }
6493
6494    /// v7.14.0 — DROP INDEX handler.
6495    fn exec_drop_index(
6496        &mut self,
6497        name: String,
6498        if_exists: bool,
6499    ) -> Result<QueryResult, EngineError> {
6500        let dropped = self.active_catalog_mut().drop_named_index(&name);
6501        if !dropped && !if_exists {
6502            return Err(EngineError::Storage(StorageError::IndexNotFound { name }));
6503        }
6504        Ok(QueryResult::CommandOk {
6505            affected: 0,
6506            modified_catalog: !self.in_transaction(),
6507        })
6508    }
6509
6510    fn exec_create_table(
6511        &mut self,
6512        stmt: CreateTableStatement,
6513    ) -> Result<QueryResult, EngineError> {
6514        if stmt.if_not_exists && self.active_catalog().get(&stmt.name).is_some() {
6515            // v7.16.2 — PG-strict silent no-op (mailrs round-10
6516            // surfaced this). v7.13.3's "reconcile by adding
6517            // missing columns" was friendly for mailrs round-7
6518            // where init-schema's `contacts` and migrate-023's
6519            // CardDAV `contacts` collided; but it ALSO silently
6520            // added columns to existing tables when later
6521            // migrations had a duplicate `CREATE TABLE IF NOT
6522            // EXISTS <t> (different-shape-cols)` shape. mailrs's
6523            // migrate-030 has exactly that — re-declares
6524            // system_config with `key` even though init-schema
6525            // already created it with `config_key`. PG's silent
6526            // no-op leaves system_config at `config_key`;
6527            // v7.13.3 added a phantom `key` column that then
6528            // tripped migrate-040's idempotent rename guard.
6529            // mailrs v1.7.106 ships the proper PG-style
6530            // contacts rename via DO + IF EXISTS, so SPG can
6531            // revert to PG-strict here without re-breaking the
6532            // round-7 case.
6533            return Ok(QueryResult::CommandOk {
6534                affected: 0,
6535                modified_catalog: false,
6536            });
6537        }
6538        let table_name = stmt.name.clone();
6539        // v7.9.13 — pluck the names of any columns marked
6540        // `PRIMARY KEY` inline so the post-create-table pass can
6541        // build an implicit BTree index. mailrs F1.
6542        let inline_pk_columns: Vec<String> = stmt
6543            .columns
6544            .iter()
6545            .filter(|c| c.is_primary_key)
6546            .map(|c| c.name.clone())
6547            .collect();
6548        // v7.9.19 — table-level constraints: PRIMARY KEY (a, b, ...)
6549        // and UNIQUE (a, b, ...). Each builds a BTree index on the
6550        // leading column (the existing single-column storage tier)
6551        // and registers a UniquenessConstraint on the schema for
6552        // INSERT-time enforcement of the full tuple. mailrs G1/G6.
6553        let cols = stmt
6554            .columns
6555            .into_iter()
6556            .map(column_def_to_schema)
6557            .collect::<Result<Vec<_>, _>>()?;
6558        // v7.17.0 Phase 1.4 + 1.5 — classify every raw
6559        // user_type_ref (parked as user_enum_type by
6560        // column_def_to_schema) into either an enum binding or a
6561        // domain binding. For domains, also rewrite the column's
6562        // base DataType from the placeholder Text to the domain's
6563        // declared base. Unknown idents are still a hard error
6564        // here (same as Phase 1.4) so silent acceptance never
6565        // happens.
6566        let mut cols = cols;
6567        for col in cols.iter_mut() {
6568            let Some(name) = col.user_enum_type.take() else {
6569                continue;
6570            };
6571            let cat = self.active_catalog();
6572            if cat.enum_types().contains_key(&name) {
6573                col.user_enum_type = Some(name);
6574                continue;
6575            }
6576            if let Some(dom) = cat.domain_types().get(&name) {
6577                col.ty = dom.base_type;
6578                col.user_domain_type = Some(name);
6579                if !dom.nullable {
6580                    col.nullable = false;
6581                }
6582                continue;
6583            }
6584            return Err(EngineError::Unsupported(alloc::format!(
6585                "column {:?}: unknown column type {:?} (not a built-in, ENUM, or DOMAIN)",
6586                col.name,
6587                name
6588            )));
6589        }
6590        for tc in &stmt.table_constraints {
6591            if let spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } = tc {
6592                for col_name in columns {
6593                    if let Some(col) = cols.iter_mut().find(|c| c.name == *col_name) {
6594                        col.nullable = false;
6595                    }
6596                }
6597            }
6598        }
6599        // v7.6.1 — resolve every FK in the statement against the
6600        // already-known catalog. Validates: parent table exists,
6601        // parent column names exist, arity matches, parent columns
6602        // have a PK / UNIQUE index. Self-referencing FKs (parent
6603        // table == this table) resolve against the column list we
6604        // just built — they don't need the catalog yet.
6605        let mut fks: Vec<spg_storage::ForeignKeyConstraint> =
6606            Vec::with_capacity(stmt.foreign_keys.len());
6607        for fk in stmt.foreign_keys {
6608            // v7.14.0 — when SET FOREIGN_KEY_CHECKS=0 is in effect
6609            // (mysqldump preamble + bulk imports), defer FK
6610            // resolution if the parent table isn't in the catalog
6611            // yet. The FK is queued and resolved when checks flip
6612            // back on. Self-references stay in-band (the parent is
6613            // the same as the child we're building).
6614            let needs_parent = !fk.parent_table.eq_ignore_ascii_case(&table_name);
6615            if !self.foreign_key_checks
6616                && needs_parent
6617                && self.active_catalog().get(&fk.parent_table).is_none()
6618            {
6619                self.pending_foreign_keys.push((table_name.clone(), fk));
6620                continue;
6621            }
6622            fks.push(resolve_foreign_key(
6623                &table_name,
6624                &cols,
6625                fk,
6626                self.active_catalog(),
6627            )?);
6628        }
6629        let mut schema = TableSchema::new(table_name.clone(), cols);
6630        schema.foreign_keys = fks;
6631        // v7.9.19 — translate AST table_constraints to storage
6632        // UniquenessConstraints (column name → position) so the
6633        // INSERT enforcement helper sees positions directly.
6634        let mut uc_storage: Vec<spg_storage::UniquenessConstraint> = Vec::new();
6635        let mut check_exprs: Vec<String> = Vec::new();
6636        for tc in &stmt.table_constraints {
6637            let (is_pk, names, nnd) = match tc {
6638                spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } => {
6639                    (true, columns.clone(), false)
6640                }
6641                spg_sql::ast::TableConstraint::Unique {
6642                    columns,
6643                    nulls_not_distinct,
6644                    ..
6645                } => (false, columns.clone(), *nulls_not_distinct),
6646                spg_sql::ast::TableConstraint::Check { expr, .. } => {
6647                    // v7.13.0 — collect CHECK predicate sources;
6648                    // they get attached to the schema below.
6649                    check_exprs.push(alloc::format!("{expr}"));
6650                    continue;
6651                }
6652                // v7.15.0 — plain `KEY (cols)` from MySQL inline
6653                // is NOT a uniqueness constraint; skip the UC
6654                // build path entirely. The BTree index lands in
6655                // the post-create loop below alongside the PK/UQ
6656                // implicit indexes.
6657                spg_sql::ast::TableConstraint::Index { .. } => continue,
6658                // v7.17.0 Phase 2.2 — MySQL FULLTEXT KEY is not
6659                // a uniqueness constraint either; its GIN gets
6660                // built in the post-create loop below.
6661                spg_sql::ast::TableConstraint::FulltextIndex { .. } => continue,
6662            };
6663            let mut positions = Vec::with_capacity(names.len());
6664            for n in &names {
6665                let pos = schema
6666                    .columns
6667                    .iter()
6668                    .position(|c| c.name == *n)
6669                    .ok_or_else(|| {
6670                        EngineError::Unsupported(alloc::format!(
6671                            "table constraint references unknown column {n:?}"
6672                        ))
6673                    })?;
6674                positions.push(pos);
6675            }
6676            uc_storage.push(spg_storage::UniquenessConstraint {
6677                is_primary_key: is_pk,
6678                columns: positions,
6679                nulls_not_distinct: nnd,
6680            });
6681        }
6682        // v7.24 (round-16 collateral) — inline `PRIMARY KEY` column
6683        // constraints used to build only the implicit BTree index;
6684        // uniqueness was NEVER registered, so duplicate keys were
6685        // silently accepted (table-level PRIMARY KEY did enforce).
6686        // Register the same UniquenessConstraint the table-level
6687        // form gets, unless one already covers the column set.
6688        if !inline_pk_columns.is_empty() {
6689            let mut positions = Vec::with_capacity(inline_pk_columns.len());
6690            for n in &inline_pk_columns {
6691                if let Some(pos) = schema.columns.iter().position(|c| c.name == *n) {
6692                    positions.push(pos);
6693                }
6694            }
6695            if !uc_storage
6696                .iter()
6697                .any(|uc| uc.is_primary_key || uc.columns == positions)
6698            {
6699                uc_storage.push(spg_storage::UniquenessConstraint {
6700                    is_primary_key: true,
6701                    columns: positions,
6702                    nulls_not_distinct: false,
6703                });
6704            }
6705        }
6706        schema.uniqueness_constraints = uc_storage.clone();
6707        schema.checks = check_exprs;
6708        self.active_catalog_mut().create_table(schema)?;
6709        // v7.9.13 — implicit BTree per inline PK column +
6710        // v7.9.19 — implicit BTree on the leading column of every
6711        // table-level PRIMARY KEY / UNIQUE constraint.
6712        let table = self
6713            .active_catalog_mut()
6714            .get_mut(&table_name)
6715            .expect("just created");
6716        for (i, col_name) in inline_pk_columns.iter().enumerate() {
6717            let idx_name = if inline_pk_columns.len() == 1 {
6718                alloc::format!("{table_name}_pkey")
6719            } else {
6720                alloc::format!("{table_name}_pkey_{i}")
6721            };
6722            if let Err(e) = table.add_index(idx_name, col_name) {
6723                return Err(EngineError::Storage(e));
6724            }
6725        }
6726        for (i, tc) in stmt.table_constraints.iter().enumerate() {
6727            // v7.17.0 Phase 2.2 — FULLTEXT KEY lands a real
6728            // tsvector-GIN per declared column instead of the
6729            // BTree the PK / UQ / KEY paths build. Branch early
6730            // so the BTree loop never sees the FULLTEXT shape.
6731            if let spg_sql::ast::TableConstraint::FulltextIndex { name, columns } = tc {
6732                for (k, col) in columns.iter().enumerate() {
6733                    let already = table.indices().iter().any(|idx| {
6734                        matches!(idx.kind, spg_storage::IndexKind::GinFulltext(_))
6735                            && table.schema().columns[idx.column_position].name == *col
6736                    });
6737                    if already {
6738                        continue;
6739                    }
6740                    let idx_name = match (name.as_ref(), columns.len(), k) {
6741                        (Some(n), 1, _) => n.clone(),
6742                        (Some(n), _, k) => alloc::format!("{n}_{k}"),
6743                        (None, _, _) => {
6744                            alloc::format!("{table_name}_{col}_ftidx")
6745                        }
6746                    };
6747                    if let Err(e) = table.add_gin_fulltext_index(idx_name, col) {
6748                        return Err(EngineError::Storage(e));
6749                    }
6750                }
6751                continue;
6752            }
6753            // v7.15.0 — plain KEY/INDEX rides this same loop so
6754            // the implicit BTree gets built. It carries its own
6755            // user-supplied name; PK/UQ still synthesise.
6756            let (suffix, names, explicit_name): (&str, &Vec<String>, Option<&String>) = match tc {
6757                spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } => {
6758                    ("pkey", columns, None)
6759                }
6760                spg_sql::ast::TableConstraint::Unique { columns, .. } => ("key", columns, None),
6761                spg_sql::ast::TableConstraint::Index { name, columns } => {
6762                    ("idx", columns, name.as_ref())
6763                }
6764                spg_sql::ast::TableConstraint::Check { .. } => continue,
6765                // Handled by the early-branch above.
6766                spg_sql::ast::TableConstraint::FulltextIndex { .. } => continue,
6767            };
6768            let leading = &names[0];
6769            // Skip if a same-column BTree already exists (e.g.
6770            // inline PK on the leading column).
6771            let already = table.indices().iter().any(|idx| {
6772                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
6773                    && table.schema().columns[idx.column_position].name == *leading
6774            });
6775            if already {
6776                continue;
6777            }
6778            let idx_name = if let Some(n) = explicit_name {
6779                n.clone()
6780            } else if names.len() == 1 {
6781                alloc::format!("{table_name}_{leading}_{suffix}")
6782            } else {
6783                alloc::format!("{table_name}_{leading}_{suffix}_{i}")
6784            };
6785            if let Err(e) = table.add_index(idx_name, leading) {
6786                return Err(EngineError::Storage(e));
6787            }
6788        }
6789        Ok(QueryResult::CommandOk {
6790            affected: 0,
6791            modified_catalog: !self.in_transaction(),
6792        })
6793    }
6794
6795    fn exec_insert(&mut self, mut stmt: InsertStatement) -> Result<QueryResult, EngineError> {
6796        // v7.17.0 Phase 1.1 — pre-resolve any nextval / currval /
6797        // setval calls against the catalog before the row loop. We
6798        // walk each tuple expression and replace matching
6799        // FunctionCall nodes with their concrete Literal. This
6800        // keeps `literal_expr_to_value` free of `&mut self` and
6801        // lets multi-row INSERT VALUES (… nextval('seq') …)
6802        // mint a separate sequence value per row.
6803        for tuple in &mut stmt.rows {
6804            for cell in tuple.iter_mut() {
6805                self.resolve_sequence_calls_in_expr(cell)?;
6806            }
6807        }
6808        // v7.13.0 — `INSERT INTO t [(cols)] SELECT …` (mailrs
6809        // round-5 G4). Execute the inner SELECT first, then route
6810        // back through the regular VALUES code path with the
6811        // materialised rows.
6812        if let Some(select) = stmt.select_source.clone() {
6813            let select_result = self.exec_select_cancel(&select, CancelToken::none())?;
6814            let rows = match select_result {
6815                QueryResult::Rows { rows, .. } => rows,
6816                other => {
6817                    return Err(EngineError::Unsupported(alloc::format!(
6818                        "INSERT … SELECT: inner statement produced {other:?} instead of a row set"
6819                    )));
6820                }
6821            };
6822            let mut materialised: Vec<Vec<Expr>> = Vec::with_capacity(rows.len());
6823            for row in rows {
6824                let mut tuple: Vec<Expr> = Vec::with_capacity(row.values.len());
6825                for v in row.values {
6826                    tuple.push(value_to_literal_expr_permissive(v)?);
6827                }
6828                materialised.push(tuple);
6829            }
6830            let recurse = InsertStatement {
6831                table: stmt.table,
6832                columns: stmt.columns,
6833                rows: materialised,
6834                select_source: None,
6835                on_conflict: stmt.on_conflict,
6836                returning: stmt.returning,
6837            };
6838            return self.exec_insert(recurse);
6839        }
6840        // v7.9.21 — snapshot the clock fn pointer before the mut
6841        // borrow on the catalog opens; runtime DEFAULT eval needs
6842        // it inside the row hot loop.
6843        let clock = self.clock;
6844        // v7.12.4 — snapshot row-level triggers + their referenced
6845        // functions before the mut borrow on the catalog opens.
6846        // Cloned out so the row hot loop can fire them without
6847        // re-borrowing the catalog (which would conflict with
6848        // table.insert's mutable borrow).
6849        let before_insert_triggers = self.snapshot_row_triggers(&stmt.table, "INSERT", "BEFORE");
6850        let after_insert_triggers = self.snapshot_row_triggers(&stmt.table, "INSERT", "AFTER");
6851        let trigger_session_cfg: Option<alloc::string::String> = self
6852            .session_params
6853            .get("default_text_search_config")
6854            .cloned();
6855        // v7.17.0 Phase 1.4 — snapshot the enum label lookup BEFORE
6856        // opening the mutable borrow on the table below. We need
6857        // catalog-level read access (enum_types lives at the
6858        // catalog level, not the table) and the upcoming mutable
6859        // borrow shadows it.
6860        let pre_borrow_column_meta: Vec<ColumnSchema> = {
6861            let preview_table = self.active_catalog().get(&stmt.table).ok_or_else(|| {
6862                EngineError::Storage(StorageError::TableNotFound {
6863                    name: stmt.table.clone(),
6864                })
6865            })?;
6866            preview_table.schema().columns.clone()
6867        };
6868        let enum_label_lookup: alloc::collections::BTreeMap<usize, Vec<String>> =
6869            pre_borrow_column_meta
6870                .iter()
6871                .enumerate()
6872                .filter_map(|(i, col)| {
6873                    // v7.17.0 Phase 3.P0-36 — MySQL inline ENUM
6874                    // variant lists take priority over the PG
6875                    // catalog enum_types lookup (they're
6876                    // column-local and authoritative when set).
6877                    if let Some(inline) = &col.inline_enum_variants {
6878                        return Some((i, inline.clone()));
6879                    }
6880                    col.user_enum_type.as_ref().and_then(|ename| {
6881                        self.active_catalog()
6882                            .enum_types()
6883                            .get(ename)
6884                            .map(|e| (i, e.labels.clone()))
6885                    })
6886                })
6887                .collect();
6888        // v7.17.0 Phase 3.P0-37 — MySQL inline SET variant lists.
6889        // Distinct from enum_label_lookup: SET validates that
6890        // every comma-separated token is in the variant list, and
6891        // canonicalises the cell to definition-order de-duped text.
6892        let set_variant_lookup: alloc::collections::BTreeMap<usize, Vec<String>> =
6893            pre_borrow_column_meta
6894                .iter()
6895                .enumerate()
6896                .filter_map(|(i, col)| col.inline_set_variants.as_ref().map(|vs| (i, vs.clone())))
6897                .collect();
6898        // v7.29 (round-23a) - when the column's implicit sequence
6899        // exists (born on first nextval/setval address), a setval
6900        // above the table MAX moves the next auto-assigned id:
6901        // assign from max(table_max + 1, last_value + 1). Tables
6902        // whose sequence was never addressed keep the bare max+1
6903        // path (identical pre-7.29 behaviour, no lookup cost
6904        // beyond one map probe per auto column per statement).
6905        let mut seq_floors: alloc::collections::BTreeMap<usize, i64> =
6906            alloc::collections::BTreeMap::new();
6907        for (i, col) in pre_borrow_column_meta.iter().enumerate() {
6908            if col.auto_increment
6909                && let Some(sd) = self.active_catalog().sequences().get(&alloc::format!(
6910                    "{}_{}_seq",
6911                    stmt.table,
6912                    col.name
6913                ))
6914            {
6915                // is_called=false (fresh RESTART / setval(_, false))
6916                // means the NEXT value is last_value itself.
6917                let floor = if sd.is_called {
6918                    sd.last_value + 1
6919                } else {
6920                    sd.last_value
6921                };
6922                seq_floors.insert(i, floor);
6923            }
6924        }
6925        let table = self
6926            .active_catalog_mut()
6927            .get_mut(&stmt.table)
6928            .ok_or_else(|| {
6929                EngineError::Storage(StorageError::TableNotFound {
6930                    name: stmt.table.clone(),
6931                })
6932            })?;
6933        // v3.1.5: clone the columns vector only (not the whole
6934        // TableSchema — saves one String alloc for the table name).
6935        // We need an owned snapshot because we'll call `table.insert`
6936        // (mutable borrow on `table`) inside the row loop while
6937        // reading schema fields.
6938        let column_meta: Vec<ColumnSchema> = table.schema().columns.clone();
6939        let schema_cols_len = column_meta.len();
6940        // Build a permutation `tuple_pos[c] = Some(j)` meaning schema
6941        // column `c` is filled from the `j`-th tuple slot; `None` means
6942        // "fill with NULL". Validated once and reused for every row.
6943        let tuple_pos: Option<Vec<Option<usize>>> = match &stmt.columns {
6944            None => None, // 1-1 mapping, fast path
6945            Some(cols) => {
6946                let mut map = alloc::vec![None; schema_cols_len];
6947                for (j, name) in cols.iter().enumerate() {
6948                    let idx = column_meta
6949                        .iter()
6950                        .position(|c| c.name == *name)
6951                        .ok_or_else(|| {
6952                            EngineError::Eval(EvalError::ColumnNotFound { name: name.clone() })
6953                        })?;
6954                    if map[idx].is_some() {
6955                        return Err(EngineError::Storage(StorageError::ArityMismatch {
6956                            expected: schema_cols_len,
6957                            actual: cols.len(),
6958                        }));
6959                    }
6960                    map[idx] = Some(j);
6961                }
6962                // Omitted columns must either be nullable, carry a
6963                // DEFAULT, or be AUTO_INCREMENT. Catch NOT NULL
6964                // omissions up front so the WAL stays clean.
6965                for (i, col) in column_meta.iter().enumerate() {
6966                    if map[i].is_none()
6967                        && !col.nullable
6968                        && col.default.is_none()
6969                        && col.runtime_default.is_none()
6970                        && !col.auto_increment
6971                    {
6972                        return Err(EngineError::Storage(StorageError::NullInNotNull {
6973                            column: col.name.clone(),
6974                        }));
6975                    }
6976                }
6977                Some(map)
6978            }
6979        };
6980        let expected_tuple_len = stmt.columns.as_ref().map_or(schema_cols_len, Vec::len);
6981        // v7.6.2 — snapshot this table's FK list before the
6982        // mutable-borrow window so we can run parent lookups
6983        // against the immutable catalog after parsing. Empty vec is
6984        // the no-FK fast path; clone cost is O(fks * arity) which
6985        // is < 100 ns for typical schemas.
6986        let fks = table.schema().foreign_keys.clone();
6987        let mut affected = 0usize;
6988        // Stage 1 — parse + AUTO_INC + coerce all rows under the
6989        // single mutable borrow.
6990        let mut all_values: Vec<Vec<Value>> = Vec::with_capacity(stmt.rows.len());
6991        // v7.24 (round-16 collateral) — statement-scoped serial
6992        // cursors. next_auto_value() is a max+1 scan over COMMITTED
6993        // rows; multi-row `INSERT … VALUES (…),(…)` computed it per
6994        // tuple BEFORE any insertion, so every row drew the SAME id
6995        // (then sailed through, compounding with the inline-PK
6996        // enforcement gap). First use per column seeds from the
6997        // table; subsequent rows increment.
6998        let mut auto_cursors: alloc::collections::BTreeMap<usize, i64> =
6999            alloc::collections::BTreeMap::new();
7000        for tuple in stmt.rows {
7001            if tuple.len() != expected_tuple_len {
7002                return Err(EngineError::Storage(StorageError::ArityMismatch {
7003                    expected: expected_tuple_len,
7004                    actual: tuple.len(),
7005                }));
7006            }
7007            // Fast path: no column-list permutation → tuple slot j
7008            // maps to schema column j. We can zip schema with tuple
7009            // and skip the `raw_tuple` staging allocation entirely.
7010            let values: Vec<Value> = if let Some(map) = &tuple_pos {
7011                // Permuted path: still need raw_tuple to index by `map[i]`.
7012                let raw_tuple: Vec<Value> = tuple
7013                    .into_iter()
7014                    .map(literal_expr_to_value)
7015                    .collect::<Result<_, _>>()?;
7016                let mut out = Vec::with_capacity(schema_cols_len);
7017                for (i, col) in column_meta.iter().enumerate() {
7018                    let mut raw = match map[i] {
7019                        Some(j) => raw_tuple[j].clone(),
7020                        None => resolve_column_default_free(col, clock)?,
7021                    };
7022                    if col.auto_increment && raw.is_null() {
7023                        let next = match auto_cursors.get(&i) {
7024                            Some(n) => *n,
7025                            None => {
7026                                let base = table.next_auto_value(i).ok_or_else(|| {
7027                                    EngineError::Unsupported(alloc::format!(
7028                                        "AUTO_INCREMENT applies to integer columns only (column `{}`)",
7029                                        col.name
7030                                    ))
7031                                })?;
7032                                base.max(seq_floors.get(&i).copied().unwrap_or(i64::MIN))
7033                            }
7034                        };
7035                        auto_cursors.insert(i, next + 1);
7036                        raw = Value::BigInt(next);
7037                    }
7038                    let coerced = coerce_value(raw, col.ty, &col.name, i)?;
7039                    enforce_enum_label(&enum_label_lookup, i, &col.name, &coerced)?;
7040                    let coerced =
7041                        canonicalize_set_value(&set_variant_lookup, i, &col.name, coerced)?;
7042                    check_unsigned_range(&coerced, col, i)?;
7043                    out.push(coerced);
7044                }
7045                out
7046            } else {
7047                // 1-1 mapping fast path: single Vec alloc, no raw_tuple.
7048                let mut out = Vec::with_capacity(schema_cols_len);
7049                for (i, (col, expr)) in column_meta.iter().zip(tuple).enumerate() {
7050                    let mut raw = literal_expr_to_value(expr)?;
7051                    if col.auto_increment && raw.is_null() {
7052                        let next = match auto_cursors.get(&i) {
7053                            Some(n) => *n,
7054                            None => {
7055                                let base = table.next_auto_value(i).ok_or_else(|| {
7056                                    EngineError::Unsupported(alloc::format!(
7057                                        "AUTO_INCREMENT applies to integer columns only (column `{}`)",
7058                                        col.name
7059                                    ))
7060                                })?;
7061                                base.max(seq_floors.get(&i).copied().unwrap_or(i64::MIN))
7062                            }
7063                        };
7064                        auto_cursors.insert(i, next + 1);
7065                        raw = Value::BigInt(next);
7066                    }
7067                    let coerced = coerce_value(raw, col.ty, &col.name, i)?;
7068                    enforce_enum_label(&enum_label_lookup, i, &col.name, &coerced)?;
7069                    let coerced =
7070                        canonicalize_set_value(&set_variant_lookup, i, &col.name, coerced)?;
7071                    check_unsigned_range(&coerced, col, i)?;
7072                    out.push(coerced);
7073                }
7074                out
7075            };
7076            all_values.push(values);
7077        }
7078        // Stage 2 — FK enforcement on the immutable catalog.
7079        // Non-lexical lifetimes release the mutable borrow on
7080        // `table` here since stage 1 was the last use. The
7081        // parent-table lookup runs before any row is committed.
7082        let uniqueness = table.schema().uniqueness_constraints.clone();
7083        let _ = table;
7084        if !fks.is_empty() {
7085            enforce_fk_inserts(self.active_catalog(), &stmt.table, &fks, &all_values)?;
7086        }
7087        // v7.13.0 — CHECK constraint enforcement (mailrs round-5 G3).
7088        enforce_check_constraints(self.active_catalog(), &stmt.table, &all_values)?;
7089        // NOTE (mailrs embed round-12): UNIQUE / PRIMARY KEY and
7090        // UNIQUE INDEX enforcement moved BELOW the ON CONFLICT
7091        // resolution pass. Running them first made every
7092        // `ON CONFLICT … DO UPDATE` upsert fail with a uniqueness
7093        // violation before the conflict handler could route the row
7094        // to an UPDATE — PG resolves the conflict action first and
7095        // only errors on rows no arbiter matched.
7096        // v7.9.8 / v7.9.9 — ON CONFLICT handling.
7097        //   - `DO NOTHING` filters `all_values` to non-conflicting
7098        //     rows + drops within-batch duplicates.
7099        //   - `DO UPDATE SET …` ALSO filters, but for each
7100        //     conflicting row it queues an UPDATE on the existing
7101        //     row using the incoming row's values as `EXCLUDED.*`.
7102        let mut pending_updates: Vec<(usize, Vec<Value>)> = Vec::new();
7103        let mut skipped_count = 0usize;
7104        if let Some(clause) = &stmt.on_conflict {
7105            let (conflict_cols, conflict_nnd) = resolve_on_conflict_columns(
7106                self.active_catalog(),
7107                &stmt.table,
7108                clause.target_columns.as_slice(),
7109            )?;
7110            let mut kept: Vec<Vec<Value>> = Vec::with_capacity(all_values.len());
7111            let mut seen_keys: Vec<Vec<Value>> = Vec::new();
7112            for values in all_values {
7113                let key_tuple: Vec<&Value> = conflict_cols.iter().map(|&c| &values[c]).collect();
7114                // SQL spec: NULL in any conflict column means "no
7115                // conflict possible" (NULL ≠ NULL for uniqueness) —
7116                // UNLESS the constraint says NULLS NOT DISTINCT
7117                // (v7.29; mailrs migrate-013 replays its seed row
7118                // ('super', NULL) under exactly that declaration).
7119                let has_null_key =
7120                    !conflict_nnd && key_tuple.iter().any(|v| matches!(v, Value::Null));
7121                let collides_with_table = !has_null_key
7122                    && on_conflict_keys_exist(
7123                        self.active_catalog(),
7124                        &stmt.table,
7125                        &conflict_cols,
7126                        &key_tuple,
7127                    );
7128                let key_tuple_owned: Vec<Value> = key_tuple.iter().map(|v| (*v).clone()).collect();
7129                let collides_with_batch =
7130                    !has_null_key && seen_keys.iter().any(|k| k == &key_tuple_owned);
7131                let collides = collides_with_table || collides_with_batch;
7132                match (&clause.action, collides) {
7133                    (_, false) => {
7134                        seen_keys.push(key_tuple_owned);
7135                        kept.push(values);
7136                    }
7137                    (spg_sql::ast::OnConflictAction::Nothing, true) => {
7138                        skipped_count += 1;
7139                    }
7140                    (
7141                        spg_sql::ast::OnConflictAction::Update {
7142                            assignments,
7143                            where_,
7144                        },
7145                        true,
7146                    ) => {
7147                        if !collides_with_table {
7148                            skipped_count += 1;
7149                            continue;
7150                        }
7151                        let target_pos = lookup_row_position_by_keys(
7152                            self.active_catalog(),
7153                            &stmt.table,
7154                            &conflict_cols,
7155                            &key_tuple,
7156                        )
7157                        .ok_or_else(|| {
7158                            EngineError::Unsupported(
7159                                "ON CONFLICT DO UPDATE: conflict detected but row \
7160                                 position could not be resolved (cold-tier row?)"
7161                                    .into(),
7162                            )
7163                        })?;
7164                        let updated = apply_on_conflict_assignments(
7165                            self.active_catalog(),
7166                            &stmt.table,
7167                            target_pos,
7168                            &values,
7169                            assignments,
7170                            where_.as_ref(),
7171                        )?;
7172                        if let Some(new_row) = updated {
7173                            pending_updates.push((target_pos, new_row));
7174                        } else {
7175                            skipped_count += 1;
7176                        }
7177                    }
7178                }
7179            }
7180            all_values = kept;
7181        }
7182        // v7.9.19 — composite UNIQUE / PRIMARY KEY enforcement.
7183        // v7.9.29 — CREATE UNIQUE INDEX [WHERE pred] enforcement.
7184        // Both run on the post-ON-CONFLICT row set: conflicting rows
7185        // already left `all_values` (DO NOTHING drop / DO UPDATE
7186        // reroute), so what remains must be genuinely unique.
7187        enforce_uniqueness_inserts(self.active_catalog(), &stmt.table, &uniqueness, &all_values)?;
7188        enforce_unique_index_inserts(self.active_catalog(), &stmt.table, &all_values)?;
7189        // Stage 3 — insert all rows under a fresh mutable borrow.
7190        let table = self
7191            .active_catalog_mut()
7192            .get_mut(&stmt.table)
7193            .ok_or_else(|| {
7194                EngineError::Storage(StorageError::TableNotFound {
7195                    name: stmt.table.clone(),
7196                })
7197            })?;
7198        // v7.9.4 — keep RETURNING projection rows separate per
7199        // INSERT and per UPDATE branch so DO UPDATE pushes the new
7200        // post-update state, not the incoming-only values.
7201        let mut returning_rows: Vec<Vec<Value>> = Vec::new();
7202        // v7.12.7 — collect embedded SQL emitted by any trigger
7203        // fire across the row loop; engine drains the queue after
7204        // the table mut borrow drops.
7205        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
7206        'rowloop: for values in all_values {
7207            let mut row = Row::new(values);
7208            // v7.12.4 — BEFORE INSERT row-level triggers. Each
7209            // trigger may rewrite NEW cells (e.g. populate
7210            // `search_vector := to_tsvector(...)`) and may return
7211            // NULL to skip the row entirely.
7212            for fd in &before_insert_triggers {
7213                let (outcome, deferred) = triggers::fire_row_trigger(
7214                    fd,
7215                    Some(row.clone()),
7216                    None,
7217                    &stmt.table,
7218                    &column_meta,
7219                    &[],
7220                    trigger_session_cfg.as_deref(),
7221                    false,
7222                )
7223                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
7224                deferred_embedded.extend(deferred);
7225                match outcome {
7226                    triggers::TriggerOutcome::Row(r) => row = r,
7227                    triggers::TriggerOutcome::Skip => continue 'rowloop,
7228                }
7229            }
7230            if stmt.returning.is_some() {
7231                returning_rows.push(row.values.clone());
7232            }
7233            // v7.12.4 — clone for the AFTER trigger view; insert
7234            // moves the row into the table.
7235            let inserted = row.clone();
7236            table.insert(row)?;
7237            affected += 1;
7238            // v7.12.4 — AFTER INSERT row-level triggers fire post-
7239            // write. Return value is ignored (PG semantics); we
7240            // surface any error from the body up to the caller.
7241            for fd in &after_insert_triggers {
7242                let (_outcome, deferred) = triggers::fire_row_trigger(
7243                    fd,
7244                    Some(inserted.clone()),
7245                    None,
7246                    &stmt.table,
7247                    &column_meta,
7248                    &[],
7249                    trigger_session_cfg.as_deref(),
7250                    true,
7251                )
7252                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
7253                deferred_embedded.extend(deferred);
7254            }
7255        }
7256        // v7.9.9 — apply ON CONFLICT DO UPDATE rewrites collected
7257        // in the conflict-resolution pass. update_row handles
7258        // index maintenance + body re-encoding.
7259        for (pos, new_row) in pending_updates {
7260            if stmt.returning.is_some() {
7261                returning_rows.push(new_row.clone());
7262            }
7263            table.update_row(pos, new_row)?;
7264            affected += 1;
7265        }
7266        let _ = skipped_count;
7267        // v7.12.7 — drop the table mut borrow and drain any
7268        // trigger-emitted embedded SQL queued during this INSERT.
7269        // The borrow has to release first because each deferred
7270        // stmt may UPDATE / INSERT / DELETE the same (or another)
7271        // table — including, in principle, this one.
7272        let _ = table;
7273        self.execute_deferred_trigger_stmts(deferred_embedded, CancelToken::none())?;
7274        // v7.9.4/v7.9.9 — RETURNING streams the rows that ended
7275        // up in the table after this statement (insert or
7276        // post-update on conflict).
7277        if let Some(items) = &stmt.returning {
7278            return self.build_returning_rows(&stmt.table, items, returning_rows);
7279        }
7280        // v6.2.1 — auto-analyze: track per-table modified-row
7281        // counter so the background sweep can decide when to
7282        // re-ANALYZE. Cheap path on the autocommit-wrap hot loop
7283        // — one BTreeMap entry update per INSERT batch.
7284        if !self.in_transaction() && affected > 0 {
7285            self.statistics
7286                .record_modifications(&stmt.table, affected as u64);
7287        }
7288        Ok(QueryResult::CommandOk {
7289            affected,
7290            modified_catalog: !self.in_transaction(),
7291        })
7292    }
7293
7294    /// v4.5: SELECT with cooperative cancellation. The token is
7295    /// honoured between UNION peers and inside the bare-SELECT row
7296    /// loop; HNSW kNN graph walks and the aggregate executor don't
7297    /// honour it yet (deferred — those paths bound their work
7298    /// internally by `LIMIT k` and `GROUP BY` cardinality).
7299    /// v6.10.2 — cold-tier time-travel scan. Resolves the segment
7300    /// by id, decodes each row body against the table's current
7301    /// schema, applies the SELECT's projection + optional WHERE +
7302    /// optional LIMIT, returns a `Rows` result. JOINs / aggregates
7303    /// / ORDER BY are unsupported on this path (STABILITY carve-
7304    /// out); operators wanting them should restore the segment
7305    /// into a regular table first.
7306    fn exec_select_as_of_segment(
7307        &self,
7308        stmt: &SelectStatement,
7309        from: &spg_sql::ast::FromClause,
7310        segment_id: u32,
7311    ) -> Result<QueryResult, EngineError> {
7312        // v6.10.2 scope: no joins, no aggregates, no ORDER BY,
7313        // no GROUP BY / HAVING / UNION / OFFSET / DISTINCT.
7314        if !from.joins.is_empty()
7315            || stmt.group_by.is_some()
7316            || stmt.having.is_some()
7317            || !stmt.unions.is_empty()
7318            || !stmt.order_by.is_empty()
7319            || stmt.offset.is_some()
7320            || stmt.distinct
7321            || aggregate::uses_aggregate(stmt)
7322        {
7323            return Err(EngineError::Unsupported(
7324                "AS OF SEGMENT supports SELECT projection + WHERE + LIMIT only \
7325                 (joins / aggregates / ORDER BY are STABILITY § \"Out of v6.10\")"
7326                    .into(),
7327            ));
7328        }
7329        let table = self
7330            .active_catalog()
7331            .get(&from.primary.name)
7332            .ok_or_else(|| StorageError::TableNotFound {
7333                name: from.primary.name.clone(),
7334            })?;
7335        let schema = table.schema().clone();
7336        let schema_cols = &schema.columns;
7337        let alias = from
7338            .primary
7339            .alias
7340            .as_deref()
7341            .unwrap_or(from.primary.name.as_str());
7342        let ctx = EvalContext::new(schema_cols, Some(alias));
7343        let seg = self
7344            .active_catalog()
7345            .cold_segment(segment_id)
7346            .ok_or_else(|| {
7347                EngineError::Unsupported(alloc::format!(
7348                    "AS OF SEGMENT: cold segment {segment_id} not registered"
7349                ))
7350            })?;
7351        let mut out_rows: Vec<Row> = Vec::new();
7352        let mut limit_remaining: Option<usize> =
7353            stmt.limit_literal().and_then(|n| usize::try_from(n).ok());
7354        for (_key, body) in seg.scan() {
7355            let (row, _consumed) =
7356                spg_storage::decode_row_body_dense(&body, &schema, seg.codec_version())
7357                    .map_err(EngineError::Storage)?;
7358            if let Some(where_expr) = &stmt.where_ {
7359                let cond = self.eval_expr_simple(where_expr, &row, &ctx)?;
7360                if !matches!(cond, Value::Bool(true)) {
7361                    continue;
7362                }
7363            }
7364            // Projection.
7365            let projected = self.project_row_simple(&row, &stmt.items, schema_cols, alias)?;
7366            out_rows.push(projected);
7367            if let Some(rem) = limit_remaining.as_mut() {
7368                if *rem == 0 {
7369                    out_rows.pop();
7370                    break;
7371                }
7372                *rem -= 1;
7373            }
7374        }
7375        // Output column schema: derive from SELECT items.
7376        let columns = self.derive_output_columns(&stmt.items, schema_cols, alias);
7377        Ok(QueryResult::Rows {
7378            columns,
7379            rows: out_rows,
7380        })
7381    }
7382
7383    /// v6.10.2 — simple-path WHERE eval that doesn't go through
7384    /// the correlated-subquery / Memoize machinery. AS OF SEGMENT
7385    /// scan paths predicate against a snapshot frozen segment, no
7386    /// cross-row state.
7387    fn eval_expr_simple(
7388        &self,
7389        expr: &Expr,
7390        row: &Row,
7391        ctx: &EvalContext,
7392    ) -> Result<Value, EngineError> {
7393        let cancel = CancelToken::none();
7394        self.eval_expr_with_correlated(expr, row, ctx, cancel, None)
7395    }
7396
7397    /// v7.9.4 — INSERT / UPDATE / DELETE RETURNING projector.
7398    /// Given the table name, the user-supplied projection items,
7399    /// and the mutated rows (post-insert / post-update values, or
7400    /// pre-delete snapshot), build a `QueryResult::Rows` whose
7401    /// schema describes the projected columns. Mailrs migration
7402    /// blocker #1.
7403    fn build_returning_rows(
7404        &self,
7405        table_name: &str,
7406        items: &[SelectItem],
7407        mutated_rows: Vec<Vec<Value>>,
7408    ) -> Result<QueryResult, EngineError> {
7409        let table = self.active_catalog().get(table_name).ok_or_else(|| {
7410            EngineError::Storage(StorageError::TableNotFound {
7411                name: table_name.into(),
7412            })
7413        })?;
7414        let schema_cols = table.schema().columns.clone();
7415        let columns = self.derive_output_columns(items, &schema_cols, table_name);
7416        let mut out_rows: Vec<Row> = Vec::with_capacity(mutated_rows.len());
7417        for values in mutated_rows {
7418            let row = Row::new(values);
7419            let projected = self.project_row_simple(&row, items, &schema_cols, table_name)?;
7420            out_rows.push(projected);
7421        }
7422        Ok(QueryResult::Rows {
7423            columns,
7424            rows: out_rows,
7425        })
7426    }
7427
7428    /// v6.10.2 — projection for AS OF SEGMENT. Resolves
7429    /// `SelectItem::Wildcard` to all schema columns and
7430    /// `SelectItem::Expr` via the regular eval path.
7431    fn project_row_simple(
7432        &self,
7433        row: &Row,
7434        items: &[SelectItem],
7435        schema_cols: &[ColumnSchema],
7436        alias: &str,
7437    ) -> Result<Row, EngineError> {
7438        let ctx = EvalContext::new(schema_cols, Some(alias));
7439        let cancel = CancelToken::none();
7440        let mut out_vals = Vec::new();
7441        for item in items {
7442            match item {
7443                SelectItem::Wildcard => {
7444                    out_vals.extend(row.values.iter().cloned());
7445                }
7446                SelectItem::Expr { expr, .. } => {
7447                    let v = self.eval_expr_with_correlated(expr, row, &ctx, cancel, None)?;
7448                    out_vals.push(v);
7449                }
7450            }
7451        }
7452        Ok(Row::new(out_vals))
7453    }
7454
7455    /// v6.10.2 — derive the output `ColumnSchema` list for an
7456    /// AS OF SEGMENT projection. Wildcards take the full schema;
7457    /// expressions take the alias if present or a synthetic
7458    /// `?column?` (PG convention) otherwise.
7459    fn derive_output_columns(
7460        &self,
7461        items: &[SelectItem],
7462        schema_cols: &[ColumnSchema],
7463        _alias: &str,
7464    ) -> Vec<ColumnSchema> {
7465        let mut out = Vec::new();
7466        for item in items {
7467            match item {
7468                SelectItem::Wildcard => {
7469                    out.extend(schema_cols.iter().cloned());
7470                }
7471                SelectItem::Expr { expr, alias } => {
7472                    // Bare column references inherit the schema
7473                    // column's name + type — PG names `RETURNING id`
7474                    // "id" and types it BIGINT, and the sqlx embed
7475                    // path type-checks RowDescription against the
7476                    // Rust target (mailrs embed round-12).
7477                    if let Expr::Column(col) = expr
7478                        && let Some(sc) = schema_cols.iter().find(|c| c.name == col.name)
7479                    {
7480                        let name = alias.clone().unwrap_or_else(|| sc.name.clone());
7481                        out.push(ColumnSchema::new(name, sc.ty, sc.nullable));
7482                        continue;
7483                    }
7484                    let name = alias.clone().unwrap_or_else(|| "?column?".to_string());
7485                    // Default to Text; the caller's row values
7486                    // carry the actual type. v6.10.2 scope.
7487                    out.push(ColumnSchema::new(name, DataType::Text, true));
7488                }
7489            }
7490        }
7491        out
7492    }
7493
7494    fn exec_select_cancel(
7495        &self,
7496        stmt: &SelectStatement,
7497        cancel: CancelToken<'_>,
7498    ) -> Result<QueryResult, EngineError> {
7499        cancel.check()?;
7500        // v7.17.0 Phase 1.2 — user-defined VIEW expansion. If the
7501        // FROM / JOIN graph references any catalogued view name,
7502        // re-parse the view body and prepend it as a synthetic
7503        // CTE. Recurses on views-in-views via the regular CTE
7504        // dispatch below. Fast-path: skip the walker entirely when
7505        // the catalog has no views (the typical OLTP load).
7506        if !self.active_catalog().views().is_empty() {
7507            if let Some(rewritten) = self.expand_views_in_select(stmt)? {
7508                return self.exec_select_cancel(&rewritten, cancel);
7509            }
7510        }
7511        // v7.16.2 — information_schema / pg_catalog virtual
7512        // views (mailrs round-10 A.3). If the SELECT touches a
7513        // synthetic meta-table name (`__spg_info_*` /
7514        // `__spg_pg_*` — produced by the parser for
7515        // `information_schema.X` / `pg_catalog.X`), clone the
7516        // catalog, materialise the requested view as a real
7517        // temporary table, and re-execute against an enriched
7518        // engine. Same pattern as `exec_with_ctes` for CTEs.
7519        if !self.meta_views_materialised && select_references_meta_view(stmt) {
7520            return self.exec_select_with_meta_views(stmt, cancel);
7521        }
7522        // v6.10.2 — cold-tier time-travel short-circuit. When the
7523        // primary TableRef carries `AS OF SEGMENT '<id>'`, run a
7524        // dedicated cold-segment scan instead of the regular
7525        // hot+index path. The scope is intentionally narrow for
7526        // v6.10.2 — bare `SELECT * FROM <t> AS OF SEGMENT 'id'`,
7527        // optionally with a single-column-equality WHERE. JOINs /
7528        // aggregates / ORDER BY / subqueries on top of a time-
7529        // travelled scan are STABILITY § "Out of v6.10".
7530        if let Some(from) = &stmt.from
7531            && let Some(seg_id) = from.primary.as_of_segment
7532        {
7533            return self.exec_select_as_of_segment(stmt, from, seg_id);
7534        }
7535        // v6.2.0 / v6.5.0 — virtual-table short-circuits. Detected
7536        // pre-CTE because they don't read from the catalog and
7537        // shouldn't participate in regular FROM resolution.
7538        if let Some(from) = &stmt.from
7539            && from.joins.is_empty()
7540            && stmt.where_.is_none()
7541            && stmt.group_by.is_none()
7542            && stmt.having.is_none()
7543            && stmt.unions.is_empty()
7544            && stmt.order_by.is_empty()
7545            && stmt.limit.is_none()
7546            && stmt.offset.is_none()
7547            && !stmt.distinct
7548            && stmt.items.iter().all(|i| matches!(i, SelectItem::Wildcard))
7549        {
7550            let lower = from.primary.name.to_ascii_lowercase();
7551            match lower.as_str() {
7552                "spg_statistic" => return Ok(self.exec_spg_statistic()),
7553                // v6.5.0 — observability v2 virtual tables.
7554                "spg_stat_replication" => return Ok(self.exec_spg_stat_replication()),
7555                "spg_stat_segment" => return Ok(self.exec_spg_stat_segment()),
7556                "spg_stat_query" => return Ok(self.exec_spg_stat_query()),
7557                "spg_stat_activity" => return Ok(self.exec_spg_stat_activity()),
7558                "spg_audit_chain" => return Ok(self.exec_spg_audit_chain()),
7559                "spg_audit_verify" => return Ok(self.exec_spg_audit_verify()),
7560                "spg_table_ddl" => return Ok(self.exec_spg_table_ddl()),
7561                "spg_role_ddl" => return Ok(self.exec_spg_role_ddl()),
7562                "spg_database_ddl" => return Ok(self.exec_spg_database_ddl()),
7563                _ => {}
7564            }
7565        }
7566        // v4.11: CTEs materialise into a temporary enriched catalog
7567        // *before* anything else — the body SELECT can then refer
7568        // to CTE names via the regular FROM-clause resolution.
7569        // Uncorrelated only: each CTE body runs once against the
7570        // current catalog, not against later CTEs' results (left-
7571        // to-right materialisation would relax this, but we keep
7572        // it simple for v4.11 MVP).
7573        if !stmt.ctes.is_empty() {
7574            return self.exec_with_ctes(stmt, cancel);
7575        }
7576        // v4.10: subqueries (uncorrelated) are resolved here, before
7577        // the executor sees the row loop. We clone the statement so
7578        // we can mutate without disturbing the caller's AST — most
7579        // queries pass through with no subquery nodes and the clone
7580        // is cheap; with subqueries the materialisation cost
7581        // dominates anyway.
7582        let mut stmt_owned;
7583        let stmt_ref: &SelectStatement = if expr_tree_has_subquery(stmt) {
7584            stmt_owned = stmt.clone();
7585            self.resolve_select_subqueries(&mut stmt_owned, cancel)?;
7586            &stmt_owned
7587        } else {
7588            stmt
7589        };
7590        if stmt_ref.unions.is_empty() {
7591            return self.exec_bare_select_cancel(stmt_ref, cancel);
7592        }
7593        // UNION path: clone-strip the head into a bare block (its own
7594        // DISTINCT and any inner ORDER BY are dropped by parser rule —
7595        // the wrapper SelectStatement carries them), execute, then chain
7596        // peers with left-associative dedup semantics.
7597        let mut head = stmt_ref.clone();
7598        head.unions = Vec::new();
7599        head.order_by = Vec::new();
7600        head.limit = None;
7601        let QueryResult::Rows { columns, mut rows } =
7602            self.exec_bare_select_cancel(&head, cancel)?
7603        else {
7604            unreachable!("bare SELECT cannot return CommandOk")
7605        };
7606        for (kind, peer) in &stmt_ref.unions {
7607            let QueryResult::Rows {
7608                columns: peer_cols,
7609                rows: peer_rows,
7610            } = self.exec_bare_select_cancel(peer, cancel)?
7611            else {
7612                unreachable!("bare SELECT cannot return CommandOk")
7613            };
7614            if peer_cols.len() != columns.len() {
7615                return Err(EngineError::Unsupported(alloc::format!(
7616                    "UNION arity mismatch: head has {} columns, peer has {}",
7617                    columns.len(),
7618                    peer_cols.len()
7619                )));
7620            }
7621            rows.extend(peer_rows);
7622            if matches!(kind, UnionKind::Distinct) {
7623                rows = dedup_rows(rows);
7624            }
7625        }
7626        // ORDER BY at the top of a UNION applies to the combined result.
7627        // Eval against the projected schema (NOT the source table).
7628        if !stmt.order_by.is_empty() {
7629            let synth_ctx = EvalContext::new(&columns, None);
7630            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
7631            let mut tagged: Vec<(Vec<f64>, Row)> = Vec::with_capacity(rows.len());
7632            for r in rows {
7633                let keys = build_order_keys(&stmt.order_by, &r, &synth_ctx)?;
7634                tagged.push((keys, r));
7635            }
7636            sort_by_keys(&mut tagged, &descs);
7637            rows = tagged.into_iter().map(|(_, r)| r).collect();
7638        }
7639        apply_offset_and_limit(&mut rows, stmt.offset_literal(), stmt.limit_literal());
7640        Ok(QueryResult::Rows { columns, rows })
7641    }
7642
7643    #[allow(clippy::too_many_lines)]
7644    #[allow(clippy::too_many_lines)] // huge match — splitting fragments the planner
7645    /// v7.11.7 — execute `SELECT … FROM unnest(expr) [AS] alias …`.
7646    /// Synthesises a single-column virtual table whose column type
7647    /// is TEXT and whose rows are the array elements. Routes
7648    /// through the regular projection / WHERE / ORDER BY / LIMIT
7649    /// machinery so set-returning UNNEST composes naturally with
7650    /// the rest of the SELECT surface.
7651    fn exec_select_unnest(
7652        &self,
7653        stmt: &SelectStatement,
7654        primary: &TableRef,
7655        cancel: CancelToken<'_>,
7656    ) -> Result<QueryResult, EngineError> {
7657        let expr = primary
7658            .unnest_expr
7659            .as_deref()
7660            .expect("caller guards unnest_expr.is_some()");
7661        // Evaluate the array expression once. Empty schema / empty
7662        // row — uncorrelated UNNEST cannot reference outer columns.
7663        let empty_schema: alloc::vec::Vec<ColumnSchema> = alloc::vec::Vec::new();
7664        let ctx = EvalContext::new(&empty_schema, None);
7665        let dummy_row = Row::new(alloc::vec::Vec::new());
7666        // v7.11.13 — unnest dispatches per array element type so
7667        // INT[] / BIGINT[] surface their PG types in projection.
7668        let (elem_dtype, rows): (DataType, alloc::vec::Vec<Row>) =
7669            match eval::eval_expr(expr, &dummy_row, &ctx).map_err(EngineError::Eval)? {
7670                Value::Null => (DataType::Text, alloc::vec::Vec::new()),
7671                Value::TextArray(items) => {
7672                    let rows = items
7673                        .into_iter()
7674                        .map(|item| {
7675                            Row::new(alloc::vec![match item {
7676                                Some(s) => Value::Text(s),
7677                                None => Value::Null,
7678                            }])
7679                        })
7680                        .collect();
7681                    (DataType::Text, rows)
7682                }
7683                Value::IntArray(items) => {
7684                    let rows = items
7685                        .into_iter()
7686                        .map(|item| {
7687                            Row::new(alloc::vec![match item {
7688                                Some(n) => Value::Int(n),
7689                                None => Value::Null,
7690                            }])
7691                        })
7692                        .collect();
7693                    (DataType::Int, rows)
7694                }
7695                Value::BigIntArray(items) => {
7696                    let rows = items
7697                        .into_iter()
7698                        .map(|item| {
7699                            Row::new(alloc::vec![match item {
7700                                Some(n) => Value::BigInt(n),
7701                                None => Value::Null,
7702                            }])
7703                        })
7704                        .collect();
7705                    (DataType::BigInt, rows)
7706                }
7707                other => {
7708                    return Err(EngineError::Unsupported(alloc::format!(
7709                        "unnest() expects an array argument, got {:?}",
7710                        other.data_type()
7711                    )));
7712                }
7713            };
7714        let alias = primary
7715            .alias
7716            .clone()
7717            .unwrap_or_else(|| "unnest".to_string());
7718        // v7.13.2 — mailrs round-6 S5. Honour PG-standard
7719        // `UNNEST(arr) AS p(col_name)` column-list aliasing: the
7720        // first entry overrides the projected column's name.
7721        // Without the column list, fall back to the table alias
7722        // (pre-v7.13.2 behaviour).
7723        let col_name = primary
7724            .unnest_column_aliases
7725            .first()
7726            .cloned()
7727            .unwrap_or_else(|| alias.clone());
7728        let col_schema = ColumnSchema::new(col_name, elem_dtype, true);
7729        let schema_cols = alloc::vec![col_schema.clone()];
7730        let scan_ctx = EvalContext::new(&schema_cols, Some(&alias));
7731        // Apply WHERE.
7732        let filtered: alloc::vec::Vec<Row> = if let Some(w) = &stmt.where_ {
7733            let mut out = alloc::vec::Vec::with_capacity(rows.len());
7734            for row in rows {
7735                cancel.check()?;
7736                let v = eval::eval_expr(w, &row, &scan_ctx).map_err(EngineError::Eval)?;
7737                if matches!(v, Value::Bool(true)) {
7738                    out.push(row);
7739                }
7740            }
7741            out
7742        } else {
7743            rows
7744        };
7745        // v7.17.0 Phase 3.P0-48 — aggregate dispatch over the
7746        // unnest source. Same routing the relational scan path
7747        // already takes — without it `SELECT COUNT(*) FROM
7748        // unnest(ARRAY[…])` either errored at projection time or
7749        // returned the wrong shape.
7750        if aggregate::uses_aggregate(stmt) {
7751            // v7.29 — a per-query memo so correlated scalar
7752            // subqueries batch-evaluate once (group map) instead of
7753            // executing per group.
7754            let agg_memo = core::cell::RefCell::new(memoize::MemoizeCache::default());
7755            let agg_correlated = |e: &Expr, r: &Row, c: &EvalContext<'_>| {
7756                self.eval_expr_with_correlated(e, r, c, cancel, Some(&mut agg_memo.borrow_mut()))
7757                    .map_err(|err| match err {
7758                        EngineError::Eval(ev) => ev,
7759                        other => eval::EvalError::TypeMismatch {
7760                            detail: alloc::format!("{other}"),
7761                        },
7762                    })
7763            };
7764            let filtered_refs: alloc::vec::Vec<&Row> = filtered.iter().collect();
7765            let mut agg = aggregate::run(
7766                stmt,
7767                &filtered_refs,
7768                &schema_cols,
7769                Some(&alias),
7770                Some(&agg_correlated),
7771            )?;
7772            apply_offset_and_limit(&mut agg.rows, stmt.offset_literal(), stmt.limit_literal());
7773            return Ok(QueryResult::Rows {
7774                columns: agg.columns,
7775                rows: agg.rows,
7776            });
7777        }
7778        // Projection.
7779        let projection = build_projection(&stmt.items, &schema_cols, &alias)?;
7780        let mut projected_rows: alloc::vec::Vec<Row> =
7781            alloc::vec::Vec::with_capacity(filtered.len());
7782        // v7.19 P5 — Set-Returning-Function in projection
7783        // position (PG `SELECT unnest(arr) FROM t` shape). When a
7784        // SELECT item evaluates to a top-level unnest(arr) call,
7785        // expand it: for each input row, evaluate the array, emit
7786        // one output row per element, broadcasting non-SRF
7787        // projections from the same input row. Multi-SRF + LCM
7788        // padding stays a documented carve-out; mailrs uses
7789        // single-SRF for redirect_uris.
7790        let srf_position = projection.iter().position(|p| is_top_level_unnest(&p.expr));
7791        if let Some(srf_idx) = srf_position {
7792            let srf_arg = top_level_unnest_arg(&projection[srf_idx].expr)
7793                .expect("checked by is_top_level_unnest above");
7794            for row in &filtered {
7795                let arr_val =
7796                    eval::eval_expr(srf_arg, row, &scan_ctx).map_err(EngineError::Eval)?;
7797                let elements = array_value_to_elements(&arr_val)?;
7798                // Empty array → zero rows for this input row (PG
7799                // semantics: `SELECT unnest('{}'::int[])` returns
7800                // 0 rows, not a single NULL row).
7801                for elem in elements {
7802                    let mut vals = alloc::vec::Vec::with_capacity(projection.len());
7803                    for (i, p) in projection.iter().enumerate() {
7804                        if i == srf_idx {
7805                            vals.push(elem.clone());
7806                        } else {
7807                            vals.push(
7808                                eval::eval_expr(&p.expr, row, &scan_ctx)
7809                                    .map_err(EngineError::Eval)?,
7810                            );
7811                        }
7812                    }
7813                    projected_rows.push(Row::new(vals));
7814                }
7815            }
7816        } else {
7817            // v7.24 (round-16 B) — select-list subqueries resolve
7818            // per row (correlated-aware; plain exprs take the fast
7819            // path inside).
7820            let mut proj_memo = memoize::MemoizeCache::default();
7821            for row in &filtered {
7822                let mut vals = alloc::vec::Vec::with_capacity(projection.len());
7823                for p in &projection {
7824                    vals.push(self.eval_expr_with_correlated(
7825                        &p.expr,
7826                        row,
7827                        &scan_ctx,
7828                        cancel,
7829                        Some(&mut proj_memo),
7830                    )?);
7831                }
7832                projected_rows.push(Row::new(vals));
7833            }
7834        }
7835        // ORDER BY / LIMIT — apply on the projected rows (cheap;
7836        // unnest result sets are small by design).
7837        let columns: alloc::vec::Vec<ColumnSchema> = projection
7838            .iter()
7839            .map(|p| ColumnSchema::new(p.output_name.clone(), p.ty, p.nullable))
7840            .collect();
7841        // Re-evaluate ORDER BY against the source schema (pre-projection
7842        // so col refs by name still resolve through `scan_ctx`).
7843        if !stmt.order_by.is_empty() {
7844            let mut indexed: alloc::vec::Vec<(usize, Vec<Value>)> = filtered
7845                .iter()
7846                .enumerate()
7847                .map(|(i, r)| -> Result<_, EngineError> {
7848                    let keys: Result<Vec<Value>, EngineError> = stmt
7849                        .order_by
7850                        .iter()
7851                        .map(|ob| {
7852                            eval::eval_expr(&ob.expr, r, &scan_ctx).map_err(EngineError::Eval)
7853                        })
7854                        .collect();
7855                    Ok((i, keys?))
7856                })
7857                .collect::<Result<_, _>>()?;
7858            indexed.sort_by(|a, b| {
7859                for (idx, (ka, kb)) in a.1.iter().zip(b.1.iter()).enumerate() {
7860                    let o = &stmt.order_by[idx];
7861                    let cmp = order_by_value_cmp(o.desc, o.nulls_first, ka, kb);
7862                    if cmp != core::cmp::Ordering::Equal {
7863                        return cmp;
7864                    }
7865                }
7866                core::cmp::Ordering::Equal
7867            });
7868            projected_rows = indexed
7869                .into_iter()
7870                .map(|(i, _)| projected_rows[i].clone())
7871                .collect();
7872        }
7873        // LIMIT / OFFSET — apply at the tail.
7874        if let Some(offset) = stmt.offset_literal() {
7875            let off = (offset as usize).min(projected_rows.len());
7876            projected_rows.drain(..off);
7877        }
7878        if let Some(limit) = stmt.limit_literal() {
7879            projected_rows.truncate(limit as usize);
7880        }
7881        Ok(QueryResult::Rows {
7882            columns,
7883            rows: projected_rows,
7884        })
7885    }
7886
7887    /// v7.17.0 Phase 3.10 — `FROM generate_series(start, stop [,
7888    /// step])` set-returning source. Mirrors `exec_select_unnest`'s
7889    /// shape: evaluate the arg list once against an empty row,
7890    /// materialise the row stream by stepping start → stop, then
7891    /// route through the standard WHERE / projection / ORDER BY /
7892    /// LIMIT pipeline. Two arg-type combos in v7.17:
7893    ///   * integer / integer [/ integer] — SmallInt, Int, BigInt
7894    ///     (widened to BigInt internally; step defaults to 1)
7895    ///   * timestamp / timestamp / interval — date-range
7896    ///     iteration (mailrs's daily-report pattern)
7897    fn exec_select_generate_series(
7898        &self,
7899        stmt: &SelectStatement,
7900        primary: &TableRef,
7901        cancel: CancelToken<'_>,
7902    ) -> Result<QueryResult, EngineError> {
7903        let args = primary
7904            .generate_series_args
7905            .as_ref()
7906            .expect("caller guards generate_series_args.is_some()");
7907        let empty_schema: alloc::vec::Vec<ColumnSchema> = alloc::vec::Vec::new();
7908        let ctx = EvalContext::new(&empty_schema, None);
7909        let dummy_row = Row::new(alloc::vec::Vec::new());
7910        let mut arg_values: alloc::vec::Vec<Value> = alloc::vec::Vec::with_capacity(args.len());
7911        for a in args {
7912            arg_values.push(eval::eval_expr(a, &dummy_row, &ctx).map_err(EngineError::Eval)?);
7913        }
7914        // Dispatch on the start value's shape. Reject mixed-shape
7915        // calls early (e.g. start = timestamp, stop = integer) so
7916        // the caller gets a clean error rather than a panic.
7917        let (elem_dtype, rows) = match arg_values.as_slice() {
7918            [Value::Timestamp(start), Value::Timestamp(stop), step] => {
7919                let interval_step = match step {
7920                    Value::Interval { .. } => step.clone(),
7921                    other => {
7922                        return Err(EngineError::Unsupported(alloc::format!(
7923                            "generate_series(timestamp, timestamp, …): \
7924                             step must be INTERVAL, got {:?}",
7925                            other.data_type()
7926                        )));
7927                    }
7928                };
7929                let rows = generate_series_timestamps(*start, *stop, interval_step, &cancel)?;
7930                (DataType::Timestamp, rows)
7931            }
7932            [start, stop, step]
7933                if value_is_integer(start) && value_is_integer(stop) && value_is_integer(step) =>
7934            {
7935                let s = value_to_i64(start);
7936                let e = value_to_i64(stop);
7937                let st = value_to_i64(step);
7938                let rows = generate_series_integers(s, e, st, &cancel)?;
7939                (DataType::BigInt, rows)
7940            }
7941            [start, stop] if value_is_integer(start) && value_is_integer(stop) => {
7942                let s = value_to_i64(start);
7943                let e = value_to_i64(stop);
7944                let rows = generate_series_integers(s, e, 1, &cancel)?;
7945                (DataType::BigInt, rows)
7946            }
7947            _ => {
7948                return Err(EngineError::Unsupported(alloc::format!(
7949                    "generate_series(): v7.17 supports integer or (timestamp, timestamp, interval) \
7950                     argument shapes; got {:?}",
7951                    arg_values
7952                        .iter()
7953                        .map(|v| v.data_type())
7954                        .collect::<alloc::vec::Vec<_>>()
7955                )));
7956            }
7957        };
7958        let alias = primary
7959            .alias
7960            .clone()
7961            .unwrap_or_else(|| "generate_series".to_string());
7962        let col_name = alias.clone();
7963        let col_schema = ColumnSchema::new(col_name, elem_dtype, true);
7964        let schema_cols = alloc::vec![col_schema.clone()];
7965        let scan_ctx = EvalContext::new(&schema_cols, Some(&alias));
7966        // WHERE.
7967        let filtered: alloc::vec::Vec<Row> = if let Some(w) = &stmt.where_ {
7968            let mut out = alloc::vec::Vec::with_capacity(rows.len());
7969            for row in rows {
7970                cancel.check()?;
7971                let v = eval::eval_expr(w, &row, &scan_ctx).map_err(EngineError::Eval)?;
7972                if matches!(v, Value::Bool(true)) {
7973                    out.push(row);
7974                }
7975            }
7976            out
7977        } else {
7978            rows
7979        };
7980        // v7.17.0 Phase 3.P0-48 — aggregate dispatch for set-
7981        // returning sources. When the SELECT projection contains
7982        // aggregate functions (COUNT/SUM/MIN/MAX/AVG/string_agg/
7983        // …) we route the filtered row stream through the same
7984        // aggregate executor the relational scan path uses, so
7985        // `SELECT COUNT(*) FROM generate_series(1, 100)` returns
7986        // a single 100 row instead of erroring at projection
7987        // time. GROUP BY / HAVING / ORDER BY over the aggregate
7988        // output all ride through `aggregate::run`.
7989        if aggregate::uses_aggregate(stmt) {
7990            // v7.29 — a per-query memo so correlated scalar
7991            // subqueries batch-evaluate once (group map) instead of
7992            // executing per group.
7993            let agg_memo = core::cell::RefCell::new(memoize::MemoizeCache::default());
7994            let agg_correlated = |e: &Expr, r: &Row, c: &EvalContext<'_>| {
7995                self.eval_expr_with_correlated(e, r, c, cancel, Some(&mut agg_memo.borrow_mut()))
7996                    .map_err(|err| match err {
7997                        EngineError::Eval(ev) => ev,
7998                        other => eval::EvalError::TypeMismatch {
7999                            detail: alloc::format!("{other}"),
8000                        },
8001                    })
8002            };
8003            let filtered_refs: alloc::vec::Vec<&Row> = filtered.iter().collect();
8004            let mut agg = aggregate::run(
8005                stmt,
8006                &filtered_refs,
8007                &schema_cols,
8008                Some(&alias),
8009                Some(&agg_correlated),
8010            )?;
8011            apply_offset_and_limit(&mut agg.rows, stmt.offset_literal(), stmt.limit_literal());
8012            return Ok(QueryResult::Rows {
8013                columns: agg.columns,
8014                rows: agg.rows,
8015            });
8016        }
8017        // Projection.
8018        let projection = build_projection(&stmt.items, &schema_cols, &alias)?;
8019        let mut projected_rows: alloc::vec::Vec<Row> =
8020            alloc::vec::Vec::with_capacity(filtered.len());
8021        let mut proj_memo = memoize::MemoizeCache::default();
8022        for row in &filtered {
8023            let mut vals = alloc::vec::Vec::with_capacity(projection.len());
8024            for p in &projection {
8025                // v7.24 (round-16 B) — correlated-aware.
8026                vals.push(self.eval_expr_with_correlated(
8027                    &p.expr,
8028                    row,
8029                    &scan_ctx,
8030                    cancel,
8031                    Some(&mut proj_memo),
8032                )?);
8033            }
8034            projected_rows.push(Row::new(vals));
8035        }
8036        let columns: alloc::vec::Vec<ColumnSchema> = projection
8037            .iter()
8038            .map(|p| ColumnSchema::new(p.output_name.clone(), p.ty, p.nullable))
8039            .collect();
8040        // ORDER BY against the source schema.
8041        if !stmt.order_by.is_empty() {
8042            let mut indexed: alloc::vec::Vec<(usize, Vec<Value>)> = filtered
8043                .iter()
8044                .enumerate()
8045                .map(|(i, r)| -> Result<_, EngineError> {
8046                    let keys: Result<Vec<Value>, EngineError> = stmt
8047                        .order_by
8048                        .iter()
8049                        .map(|ob| {
8050                            eval::eval_expr(&ob.expr, r, &scan_ctx).map_err(EngineError::Eval)
8051                        })
8052                        .collect();
8053                    Ok((i, keys?))
8054                })
8055                .collect::<Result<_, _>>()?;
8056            indexed.sort_by(|a, b| {
8057                for (idx, (ka, kb)) in a.1.iter().zip(b.1.iter()).enumerate() {
8058                    let o = &stmt.order_by[idx];
8059                    let cmp = order_by_value_cmp(o.desc, o.nulls_first, ka, kb);
8060                    if cmp != core::cmp::Ordering::Equal {
8061                        return cmp;
8062                    }
8063                }
8064                core::cmp::Ordering::Equal
8065            });
8066            projected_rows = indexed
8067                .into_iter()
8068                .map(|(i, _)| projected_rows[i].clone())
8069                .collect();
8070        }
8071        if let Some(offset) = stmt.offset_literal() {
8072            let off = (offset as usize).min(projected_rows.len());
8073            projected_rows.drain(..off);
8074        }
8075        if let Some(limit) = stmt.limit_literal() {
8076            projected_rows.truncate(limit as usize);
8077        }
8078        Ok(QueryResult::Rows {
8079            columns,
8080            rows: projected_rows,
8081        })
8082    }
8083
8084    fn exec_bare_select_cancel(
8085        &self,
8086        stmt: &SelectStatement,
8087        cancel: CancelToken<'_>,
8088    ) -> Result<QueryResult, EngineError> {
8089        // v7.17.0 Phase 3.P0-49 — `FETCH FIRST N ROWS WITH TIES`
8090        // is meaningless without an ORDER BY; PG raises a hard
8091        // error and SPG mirrors the surface so the same DDL/app
8092        // path behaves identically on cutover.
8093        check_with_ties_requires_order_by(stmt)?;
8094        // v7.16.2 — same meta-view dispatch as
8095        // `exec_select_cancel`, applied here too because
8096        // `subquery_replacement` enters this function directly
8097        // for Exists / ScalarSubquery / InSubquery resolution
8098        // (bypassing the top-level entry to avoid double
8099        // subquery walking). Without this dispatch the subquery
8100        // hits `__spg_info_columns` and reports TableNotFound.
8101        if !self.meta_views_materialised && select_references_meta_view(stmt) {
8102            return self.exec_select_with_meta_views(stmt, cancel);
8103        }
8104        // v4.12: window-function path. When the projection contains
8105        // any `name(args) OVER (...)` we route to the dedicated
8106        // executor — partition + sort + per-row window value before
8107        // the regular projection.
8108        if select_has_window(stmt) {
8109            return self.exec_select_with_window(stmt, cancel);
8110        }
8111        // Constant SELECT (no FROM) — evaluate each item once against an
8112        // empty dummy row. Useful for `SELECT 1`, `SELECT coalesce(...)`,
8113        // `SELECT '7'::INT`. Column references will surface as
8114        // ColumnNotFound on eval since the schema is empty.
8115        let Some(from) = &stmt.from else {
8116            let empty_schema: Vec<ColumnSchema> = Vec::new();
8117            let ctx = self.ev_ctx(&empty_schema, None);
8118            let projection = build_projection(&stmt.items, &empty_schema, "")?;
8119            let dummy_row = Row::new(Vec::new());
8120            let mut values = Vec::with_capacity(projection.len());
8121            for p in &projection {
8122                values.push(eval::eval_expr(&p.expr, &dummy_row, &ctx)?);
8123            }
8124            let columns: Vec<ColumnSchema> = projection
8125                .into_iter()
8126                .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
8127                .collect();
8128            return Ok(QueryResult::Rows {
8129                columns,
8130                rows: alloc::vec![Row::new(values)],
8131            });
8132        };
8133        // Multi-table FROM (one or more joined peers) goes through the
8134        // nested-loop join executor. Single-table FROM stays on the
8135        // existing scan + index-seek path.
8136        if !from.joins.is_empty() {
8137            return self.exec_joined_select(stmt, from, cancel);
8138        }
8139        // v7.11.7 — `FROM unnest(<expr>) [AS] <alias>`. Synthesise a
8140        // single-column table at SELECT entry by evaluating the
8141        // expression once against the empty row (UNNEST is
8142        // uncorrelated in v7.11; correlated / LATERAL unnest is a
8143        // v7.12 carve-out). Build a virtual `Table` in a heap-only
8144        // catalog, then route to the regular scan path.
8145        if from.primary.unnest_expr.is_some() {
8146            return self.exec_select_unnest(stmt, &from.primary, cancel);
8147        }
8148        // v7.17.0 Phase 3.10 — `FROM generate_series(start, stop
8149        // [, step])` set-returning source. Dispatch mirrors UNNEST:
8150        // materialise the row stream from a single eval pass, then
8151        // run the regular projection / WHERE / ORDER BY / LIMIT
8152        // pipeline over the synthetic single-column table.
8153        if from.primary.generate_series_args.is_some() {
8154            return self.exec_select_generate_series(stmt, &from.primary, cancel);
8155        }
8156        let primary = &from.primary;
8157        let table = self.active_catalog().get(&primary.name).ok_or_else(|| {
8158            StorageError::TableNotFound {
8159                name: primary.name.clone(),
8160            }
8161        })?;
8162        let schema_cols = &table.schema().columns;
8163        // The qualifier accepted on column refs is the alias (if any) else the
8164        // bare table name.
8165        let alias = primary.alias.as_deref().unwrap_or(primary.name.as_str());
8166        let ctx = self.ev_ctx(schema_cols, Some(alias));
8167
8168        // NSW kNN planner: `ORDER BY col <-> literal LIMIT k` with no
8169        // WHERE and an NSW index on `col` skips the full scan. The
8170        // walk returns rows already in ascending-distance order, so
8171        // ORDER BY / LIMIT are honoured implicitly.
8172        if let Some(nsw_rows) = try_nsw_knn(stmt, table, schema_cols, alias) {
8173            return materialise_in_order(stmt, table, schema_cols, alias, &nsw_rows);
8174        }
8175
8176        // Index seek: if WHERE is `col = literal` (or commuted) and the
8177        // referenced column has an index, dispatch each locator through
8178        // the catalog (hot tier → borrow, cold tier → page-read +
8179        // decode) and iterate just those rows. Otherwise fall back to a
8180        // full scan over the hot tier (cold-tier rows are only reached
8181        // via index seek in v5.1 — full table scans against cold-tier
8182        // data ship in v5.2 with the freezer's per-segment scan API).
8183        let indexed_rows: Option<Vec<Cow<'_, Row>>> = stmt.where_.as_ref().and_then(|w| {
8184            // BTree / col=literal seek first — covers the v7.11.3 multi-
8185            // column AND case and the leading-column equality lookup.
8186            try_index_seek(w, schema_cols, self.active_catalog(), table, alias)
8187                .or_else(|| {
8188                    // v7.12.3 — GIN-accelerated `WHERE col @@
8189                    // tsquery` when the column has a `USING gin`
8190                    // index. Returns an over-approximate candidate
8191                    // set; the WHERE re-eval loop below verifies
8192                    // the full `@@` predicate per row.
8193                    try_gin_seek(w, schema_cols, self.active_catalog(), table, alias, &ctx)
8194                })
8195                .or_else(|| {
8196                    // v7.15.0 — trigram-GIN-accelerated
8197                    // `WHERE col LIKE / ILIKE '<pat>'` when the
8198                    // column has a `gin_trgm_ops` GIN index.
8199                    // Over-approximate candidate set; the WHERE
8200                    // re-eval verifies the LIKE per row.
8201                    try_trgm_seek(w, schema_cols, table, alias)
8202                })
8203        });
8204
8205        // Aggregate path: filter rows first, then hand off to the
8206        // aggregate executor which does its own projection + ORDER BY.
8207        if aggregate::uses_aggregate(stmt) {
8208            let mut filtered: Vec<&Row> = Vec::new();
8209            // v6.2.6 — Memoize: per-query LRU cache for correlated
8210            // scalar subqueries. Fresh per row-loop entry so each
8211            // SELECT execution gets an isolated cache.
8212            let mut memo = memoize::MemoizeCache::new();
8213            if let Some(rows) = &indexed_rows {
8214                for cow in rows {
8215                    let row = cow.as_ref();
8216                    if let Some(where_expr) = &stmt.where_ {
8217                        let cond = self.eval_expr_with_correlated(
8218                            where_expr,
8219                            row,
8220                            &ctx,
8221                            cancel,
8222                            Some(&mut memo),
8223                        )?;
8224                        if !matches!(cond, Value::Bool(true)) {
8225                            continue;
8226                        }
8227                    }
8228                    filtered.push(row);
8229                }
8230            } else {
8231                for i in 0..table.row_count() {
8232                    let row = &table.rows()[i];
8233                    if let Some(where_expr) = &stmt.where_ {
8234                        let cond = self.eval_expr_with_correlated(
8235                            where_expr,
8236                            row,
8237                            &ctx,
8238                            cancel,
8239                            Some(&mut memo),
8240                        )?;
8241                        if !matches!(cond, Value::Bool(true)) {
8242                            continue;
8243                        }
8244                    }
8245                    filtered.push(row);
8246                }
8247            }
8248            // v7.29 — a per-query memo so correlated scalar
8249            // subqueries batch-evaluate once (group map) instead of
8250            // executing per group.
8251            let agg_memo = core::cell::RefCell::new(memoize::MemoizeCache::default());
8252            let agg_correlated = |e: &Expr, r: &Row, c: &EvalContext<'_>| {
8253                self.eval_expr_with_correlated(e, r, c, cancel, Some(&mut agg_memo.borrow_mut()))
8254                    .map_err(|err| match err {
8255                        EngineError::Eval(ev) => ev,
8256                        other => eval::EvalError::TypeMismatch {
8257                            detail: alloc::format!("{other}"),
8258                        },
8259                    })
8260            };
8261            let mut agg = aggregate::run(
8262                stmt,
8263                &filtered,
8264                schema_cols,
8265                Some(alias),
8266                Some(&agg_correlated),
8267            )?;
8268            apply_offset_and_limit(&mut agg.rows, stmt.offset_literal(), stmt.limit_literal());
8269            return Ok(QueryResult::Rows {
8270                columns: agg.columns,
8271                rows: agg.rows,
8272            });
8273        }
8274
8275        let projection = build_projection(&stmt.items, schema_cols, alias)?;
8276        // v7.19 P5 — single-table SELECT path for SRF
8277        // `SELECT unnest(arr) FROM t` shape. Detect a top-level
8278        // unnest in the projection list. When present, the
8279        // per-row processor emits one output row per array
8280        // element (broadcasting non-SRF projections from the
8281        // same input row). Empty / NULL arrays emit zero rows
8282        // for that input — PG semantics.
8283        let srf_position = projection.iter().position(|p| is_top_level_unnest(&p.expr));
8284
8285        // Materialise the filter pass into `(order_key, projected_row)`
8286        // tuples. The order key is `None` when there's no ORDER BY clause.
8287        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::new();
8288        // v6.2.6 — Memoize per-row WHERE eval shares one cache.
8289        let mut memo = memoize::MemoizeCache::new();
8290        // Inline the per-row work in a closure so the indexed and full-
8291        // scan branches share the body.
8292        let mut process_row = |row: &Row, loop_idx: usize| -> Result<(), EngineError> {
8293            if loop_idx.is_multiple_of(256) {
8294                cancel.check()?;
8295            }
8296            if let Some(where_expr) = &stmt.where_ {
8297                let cond =
8298                    self.eval_expr_with_correlated(where_expr, row, &ctx, cancel, Some(&mut memo))?;
8299                if !matches!(cond, Value::Bool(true)) {
8300                    return Ok(());
8301                }
8302            }
8303            let order_keys = if stmt.order_by.is_empty() {
8304                Vec::new()
8305            } else {
8306                build_order_keys(&stmt.order_by, row, &ctx)?
8307            };
8308            if let Some(srf_idx) = srf_position {
8309                let srf_arg = top_level_unnest_arg(&projection[srf_idx].expr)
8310                    .expect("checked by is_top_level_unnest above");
8311                let arr_val = eval::eval_expr(srf_arg, row, &ctx)?;
8312                let elements = array_value_to_elements(&arr_val)?;
8313                for elem in elements {
8314                    let mut values = Vec::with_capacity(projection.len());
8315                    for (i, p) in projection.iter().enumerate() {
8316                        if i == srf_idx {
8317                            values.push(elem.clone());
8318                        } else {
8319                            values.push(eval::eval_expr(&p.expr, row, &ctx)?);
8320                        }
8321                    }
8322                    tagged.push((order_keys.clone(), Row::new(values)));
8323                }
8324            } else {
8325                let mut values = Vec::with_capacity(projection.len());
8326                for p in &projection {
8327                    // v7.24 (round-16 B) — correlated-aware.
8328                    values.push(self.eval_expr_with_correlated(&p.expr, row, &ctx, cancel, None)?);
8329                }
8330                tagged.push((order_keys, Row::new(values)));
8331            }
8332            Ok(())
8333        };
8334        if let Some(rows) = &indexed_rows {
8335            for (loop_idx, cow) in rows.iter().enumerate() {
8336                process_row(cow.as_ref(), loop_idx)?;
8337            }
8338        } else {
8339            for i in 0..table.row_count() {
8340                process_row(&table.rows()[i], i)?;
8341            }
8342        }
8343
8344        if !stmt.order_by.is_empty() {
8345            // Partial-sort fast path: when LIMIT is small relative to
8346            // the row count, select_nth_unstable + sort just the
8347            // prefix is O(n + k log k) instead of O(n log n). DISTINCT
8348            // requires the full sort because de-dup happens after.
8349            // WITH TIES likewise needs the full sort so the tie
8350            // extension can scan past `limit` to find rows that
8351            // share the last-kept row's key.
8352            let keep = if stmt.distinct || stmt.limit_with_ties {
8353                None
8354            } else {
8355                stmt.limit_literal()
8356                    .map(|l| l as usize + stmt.offset_literal().map_or(0, |o| o as usize))
8357            };
8358            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
8359            partial_sort_tagged(&mut tagged, keep, &descs);
8360        }
8361
8362        // v7.17.0 Phase 3.P0-49 — `FETCH FIRST … WITH TIES` extends
8363        // past the truncated tail through every row that shares the
8364        // last-kept row's ORDER BY key. The tie check uses the
8365        // already-computed `(order_keys, row)` pairs so it matches
8366        // the sort comparator exactly. DISTINCT + WITH TIES falls
8367        // through to the no-ties path (PG also disallows their
8368        // combination; SPG silently drops the tie extension here so
8369        // the customer doesn't see a hard error mid-query — the
8370        // user-visible result is still correct, just narrower).
8371        let output_rows: Vec<Row> = if stmt.limit_with_ties && !stmt.distinct {
8372            apply_offset_and_limit_tagged(
8373                &mut tagged,
8374                stmt.offset_literal(),
8375                stmt.limit_literal(),
8376                true,
8377            );
8378            tagged.into_iter().map(|(_, r)| r).collect()
8379        } else {
8380            let mut output_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
8381            if stmt.distinct {
8382                output_rows = dedup_rows(output_rows);
8383            }
8384            apply_offset_and_limit(
8385                &mut output_rows,
8386                stmt.offset_literal(),
8387                stmt.limit_literal(),
8388            );
8389            output_rows
8390        };
8391
8392        let columns: Vec<ColumnSchema> = projection
8393            .into_iter()
8394            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
8395            .collect();
8396
8397        Ok(QueryResult::Rows {
8398            columns,
8399            rows: output_rows,
8400        })
8401    }
8402
8403    /// Multi-table SELECT executor (one or more JOIN peers).
8404    ///
8405    /// v1.10 builds the joined row set up-front via nested-loop joins,
8406    /// then runs WHERE + projection + ORDER BY against the combined
8407    /// rows. No index seek. Aggregates and DISTINCT still work because
8408    /// the executor delegates projection through the same shared paths.
8409    #[allow(clippy::too_many_lines)]
8410    /// v7.13.2 — mailrs round-6 S5. Resolve a TableRef into an
8411    /// owned (rows, schema) pair. Catalog tables clone their hot
8412    /// rows + schema; UNNEST table refs evaluate their array
8413    /// expression once and synthesise a single-column row set
8414    /// using the same dispatch as `exec_select_unnest`. Used by
8415    /// the joined-select path so UNNEST can appear in any FROM
8416    /// position, not just as the primary.
8417    fn materialise_table_ref(
8418        &self,
8419        tref: &TableRef,
8420    ) -> Result<(Vec<Row>, Vec<ColumnSchema>), EngineError> {
8421        if let Some(expr) = tref.unnest_expr.as_deref() {
8422            let empty_schema: Vec<ColumnSchema> = Vec::new();
8423            let ctx = EvalContext::new(&empty_schema, None);
8424            let dummy_row = Row::new(Vec::new());
8425            let (elem_dtype, rows) =
8426                match eval::eval_expr(expr, &dummy_row, &ctx).map_err(EngineError::Eval)? {
8427                    Value::Null => (DataType::Text, Vec::new()),
8428                    Value::TextArray(items) => (
8429                        DataType::Text,
8430                        items
8431                            .into_iter()
8432                            .map(|item| {
8433                                Row::new(alloc::vec![match item {
8434                                    Some(s) => Value::Text(s),
8435                                    None => Value::Null,
8436                                }])
8437                            })
8438                            .collect(),
8439                    ),
8440                    Value::IntArray(items) => (
8441                        DataType::Int,
8442                        items
8443                            .into_iter()
8444                            .map(|item| {
8445                                Row::new(alloc::vec![match item {
8446                                    Some(n) => Value::Int(n),
8447                                    None => Value::Null,
8448                                }])
8449                            })
8450                            .collect(),
8451                    ),
8452                    Value::BigIntArray(items) => (
8453                        DataType::BigInt,
8454                        items
8455                            .into_iter()
8456                            .map(|item| {
8457                                Row::new(alloc::vec![match item {
8458                                    Some(n) => Value::BigInt(n),
8459                                    None => Value::Null,
8460                                }])
8461                            })
8462                            .collect(),
8463                    ),
8464                    other => {
8465                        return Err(EngineError::Unsupported(alloc::format!(
8466                            "unnest() expects an array argument, got {:?}",
8467                            other.data_type()
8468                        )));
8469                    }
8470                };
8471            let alias = tref.alias.clone().unwrap_or_else(|| "unnest".to_string());
8472            let col_name = tref.unnest_column_aliases.first().cloned().unwrap_or(alias);
8473            return Ok((
8474                rows,
8475                alloc::vec![ColumnSchema::new(col_name, elem_dtype, true)],
8476            ));
8477        }
8478        let table =
8479            self.active_catalog()
8480                .get(&tref.name)
8481                .ok_or_else(|| StorageError::TableNotFound {
8482                    name: tref.name.clone(),
8483                })?;
8484        let rows: Vec<Row> = table.rows().iter().cloned().collect();
8485        let cols = table.schema().columns.clone();
8486        Ok((rows, cols))
8487    }
8488
8489    /// v7.28 (round-22) — materialise a plain table ref with
8490    /// single-table predicates pushed BELOW the clone: an indexed
8491    /// `col = literal` narrows to the matching row ids before any
8492    /// row is cloned, the rest filter linearly. A correlated
8493    /// subquery body like `… JOIN messages m2 ON …
8494    /// WHERE m2.thread_id = '<outer>'` runs per GROUP — without
8495    /// this it cloned + scanned the full 24k-row table 23.5k times.
8496    /// Falls back to the plain path for non-table refs.
8497    fn materialise_table_ref_filtered(
8498        &self,
8499        tref: &TableRef,
8500        preds: &[&Expr],
8501    ) -> Result<(Vec<Row>, Vec<ColumnSchema>), EngineError> {
8502        if preds.is_empty()
8503            || tref.unnest_expr.is_some()
8504            || tref.lateral_subquery.is_some()
8505            || tref.as_of_segment.is_some()
8506        {
8507            return self.materialise_table_ref(tref);
8508        }
8509        let Some(table) = self.active_catalog().get(&tref.name) else {
8510            return self.materialise_table_ref(tref);
8511        };
8512        let cols = table.schema().columns.clone();
8513        let alias = tref.alias.as_deref().unwrap_or(tref.name.as_str());
8514        // Index seek on the first `col = literal` predicate with a
8515        // BTree on that column.
8516        let mut seeded: Option<Vec<usize>> = None;
8517        for p in preds {
8518            if let Expr::Binary {
8519                lhs,
8520                op: spg_sql::ast::BinOp::Eq,
8521                rhs,
8522            } = p
8523            {
8524                let pair = match (lhs.as_ref(), rhs.as_ref()) {
8525                    (Expr::Column(c), Expr::Literal(l)) | (Expr::Literal(l), Expr::Column(c)) => {
8526                        Some((c, l))
8527                    }
8528                    _ => None,
8529                };
8530                if let Some((c, l)) = pair
8531                    && c.qualifier
8532                        .as_deref()
8533                        .is_none_or(|q| q.eq_ignore_ascii_case(alias))
8534                    && let Some(pos) = cols.iter().position(|s| s.name == c.name)
8535                    && let Some(idx) = table.index_on(pos)
8536                    && let Some(key) = spg_storage::IndexKey::from_value(&eval::literal_to_value(l))
8537                {
8538                    let mut ids = Vec::new();
8539                    let mut all_hot = true;
8540                    for loc in idx.lookup_eq(&key) {
8541                        match *loc {
8542                            spg_storage::RowLocator::Hot(i) => ids.push(i),
8543                            spg_storage::RowLocator::Cold { .. } => {
8544                                all_hot = false;
8545                                break;
8546                            }
8547                        }
8548                    }
8549                    if all_hot {
8550                        seeded = Some(ids);
8551                        break;
8552                    }
8553                }
8554            }
8555        }
8556        let ctx = EvalContext::new(&cols, Some(alias));
8557        let mut out: Vec<Row> = Vec::new();
8558        let push_if = |row: &Row, out: &mut Vec<Row>| -> Result<(), EngineError> {
8559            for p in preds {
8560                let v = eval::eval_expr(p, row, &ctx).map_err(EngineError::Eval)?;
8561                if !matches!(v, Value::Bool(true)) {
8562                    return Ok(());
8563                }
8564            }
8565            out.push(row.clone());
8566            Ok(())
8567        };
8568        match seeded {
8569            Some(ids) => {
8570                for i in ids {
8571                    if let Some(row) = table.rows().get(i) {
8572                        push_if(row, &mut out)?;
8573                    }
8574                }
8575            }
8576            None => {
8577                for row in table.rows().iter() {
8578                    push_if(row, &mut out)?;
8579                }
8580            }
8581        }
8582        Ok((out, cols))
8583    }
8584
8585    /// v7.17.0 Phase 3.P0-43 — materialise a `FROM` with one or more
8586    /// JOINs into `(combined_schema, filtered_rows)`. The combined
8587    /// schema uses composite `alias.col` column names so the
8588    /// qualifier-aware column resolver finds every join peer by
8589    /// exact match; the filtered rows are the join cross-product
8590    /// after the optional WHERE clause is applied.
8591    ///
8592    /// Shared by `exec_joined_select` and the JOIN branch of
8593    /// `exec_select_with_window`; both paths used to inline the
8594    /// same nested-loop logic and the window path rejected JOIN
8595    /// outright.
8596    /// v7.28 (round-22) — resolve a Column reference against a
8597    /// composite ("alias.col") schema slice. Bare names match a
8598    /// unique ".col" suffix.
8599    fn composite_col_pos(schema: &[ColumnSchema], c: &spg_sql::ast::ColumnName) -> Option<usize> {
8600        if let Some(q) = &c.qualifier {
8601            let composite = alloc::format!("{q}.{}", c.name);
8602            return schema.iter().position(|s| s.name == composite);
8603        }
8604        let suffix = alloc::format!(".{}", c.name);
8605        let mut hits = schema
8606            .iter()
8607            .enumerate()
8608            .filter(|(_, s)| s.name.ends_with(&suffix) || s.name == c.name);
8609        let first = hits.next();
8610        if hits.next().is_some() {
8611            return None; // ambiguous — leave to the residual evaluator
8612        }
8613        first.map(|(i, _)| i)
8614    }
8615
8616    /// v7.28 (round-22) — resolve a Column against ONE peer's own
8617    /// columns (right side of a join): `alias.col` or a bare name.
8618    fn peer_col_pos(
8619        peer_alias: &str,
8620        peer_cols: &[ColumnSchema],
8621        c: &spg_sql::ast::ColumnName,
8622    ) -> Option<usize> {
8623        if let Some(q) = &c.qualifier
8624            && !q.eq_ignore_ascii_case(peer_alias)
8625        {
8626            return None;
8627        }
8628        peer_cols.iter().position(|s| s.name == c.name)
8629    }
8630
8631    /// v7.28 (round-22) — drop the VALUES of columns the statement
8632    /// never references (schema and positions stay; the value
8633    /// becomes NULL, so a 30 KB body column costs nothing through
8634    /// the join pipeline instead of being cloned per row).
8635    fn null_out_unreferenced(
8636        rows: &mut [Row],
8637        cols: &[ColumnSchema],
8638        alias: &str,
8639        needed: &alloc::collections::BTreeSet<(String, String)>,
8640    ) {
8641        let keep: Vec<bool> = cols
8642            .iter()
8643            .map(|c| needed.contains(&(alias.to_string(), c.name.clone())))
8644            .collect();
8645        if keep.iter().all(|k| *k) {
8646            return;
8647        }
8648        for row in rows.iter_mut() {
8649            for (i, k) in keep.iter().enumerate() {
8650                if !*k && i < row.values.len() {
8651                    row.values[i] = Value::Null;
8652                }
8653            }
8654        }
8655    }
8656
8657    fn build_joined_filtered_rows(
8658        &self,
8659        from: &FromClause,
8660        where_: Option<&Expr>,
8661        cancel: CancelToken<'_>,
8662        needed: Option<&alloc::collections::BTreeSet<(String, String)>>,
8663        budget: &mut ByteBudget,
8664    ) -> Result<(Vec<ColumnSchema>, Vec<Row>), EngineError> {
8665        let primary_alias = from
8666            .primary
8667            .alias
8668            .as_deref()
8669            .unwrap_or(from.primary.name.as_str())
8670            .to_string();
8671        // v7.28 (round-22) — single-table predicate pushdown. WHERE
8672        // conjuncts whose every column is QUALIFIED with one table's
8673        // alias filter that table BEFORE the join (with an index
8674        // seek when one matches `col = literal`). Only the primary
8675        // and INNER peers are eligible — pre-filtering a LEFT peer
8676        // would change which rows NULL-extend. Pushed conjuncts stay
8677        // in WHERE too (idempotent), so correctness never depends on
8678        // the pushdown.
8679        let mut primary_preds: Vec<&Expr> = Vec::new();
8680        let mut peer_preds: Vec<Vec<&Expr>> = alloc::vec![Vec::new(); from.joins.len()];
8681        if let Some(w) = where_ {
8682            for sub in reorder::split_and_conjunctions(w) {
8683                if expr_has_subquery(sub) || aggregate::contains_aggregate(sub) {
8684                    continue;
8685                }
8686                let mut quals: Vec<&str> = Vec::new();
8687                let mut all_qualified = true;
8688                collect_column_qualifiers(sub, &mut quals, &mut all_qualified);
8689                if !all_qualified || quals.is_empty() {
8690                    continue;
8691                }
8692                let q0 = quals[0];
8693                if !quals.iter().all(|q| q.eq_ignore_ascii_case(q0)) {
8694                    continue;
8695                }
8696                if q0.eq_ignore_ascii_case(&primary_alias) {
8697                    primary_preds.push(sub);
8698                    continue;
8699                }
8700                for (i, j) in from.joins.iter().enumerate() {
8701                    if matches!(j.kind, JoinKind::Inner)
8702                        && j.table.lateral_subquery.is_none()
8703                        && q0.eq_ignore_ascii_case(
8704                            j.table.alias.as_deref().unwrap_or(j.table.name.as_str()),
8705                        )
8706                    {
8707                        peer_preds[i].push(sub);
8708                        break;
8709                    }
8710                }
8711            }
8712        }
8713        // v7.28 (round-22) — table-order swap: when the primary has
8714        // no pushed predicate but an INNER peer does, start from the
8715        // filtered peer instead. Equi-joins commute; output columns
8716        // resolve by composite name, so downstream projection is
8717        // order-independent. (A correlated subquery body like
8718        // `FROM email_analysis e2 JOIN messages m2 … WHERE
8719        // m2.thread_id = '<outer>'` otherwise clones the whole
8720        // unfiltered primary once per outer group.)
8721        let mut from_owned;
8722        let mut from = from;
8723        // Safety: swapping reorders which table joins FIRST, so it is
8724        // only legal when the FIRST join's ON references no table
8725        // beyond {primary, first peer} (a later peer's ON may name
8726        // the original primary, which must already be in the
8727        // combined row when that peer joins). Restrict to i == 0 AND
8728        // an ON whose qualifiers all live in those two tables.
8729        if primary_preds.is_empty()
8730            && let Some(j0) = from.joins.first()
8731            && matches!(j0.kind, JoinKind::Inner)
8732            && j0.table.lateral_subquery.is_none()
8733            && !peer_preds[0].is_empty()
8734        {
8735            let peer_alias = j0.table.alias.as_deref().unwrap_or(j0.table.name.as_str());
8736            let on_safe = j0.on.as_ref().is_some_and(|on| {
8737                let mut quals: Vec<&str> = Vec::new();
8738                let mut all_q = true;
8739                collect_column_qualifiers(on, &mut quals, &mut all_q);
8740                all_q
8741                    && quals.iter().all(|q| {
8742                        q.eq_ignore_ascii_case(&primary_alias) || q.eq_ignore_ascii_case(peer_alias)
8743                    })
8744            });
8745            if on_safe {
8746                from_owned = from.clone();
8747                core::mem::swap(&mut from_owned.primary, &mut from_owned.joins[0].table);
8748                primary_preds = peer_preds[0].drain(..).collect();
8749                from = &from_owned;
8750            }
8751        }
8752        let primary_alias = from
8753            .primary
8754            .alias
8755            .as_deref()
8756            .unwrap_or(from.primary.name.as_str())
8757            .to_string();
8758        let (mut primary_rows, primary_cols) =
8759            self.materialise_table_ref_filtered(&from.primary, &primary_preds)?;
8760        if let Some(needed) = needed {
8761            Self::null_out_unreferenced(&mut primary_rows, &primary_cols, &primary_alias, needed);
8762        }
8763        // v7.30.3 (round-26) — byte budget: charge every clone the
8764        // pipeline makes; release a stage's input when the next
8765        // stage replaces it (net live bytes, not cumulative churn).
8766        let mut working_bytes = approx_rows_bytes(&primary_rows);
8767        budget.charge(working_bytes)?;
8768        // v7.17.0 Phase 3.P0-41 — LATERAL peers can't be
8769        // pre-materialised because their rows depend on outer
8770        // columns. For each peer, build either an eager
8771        // (rows, schema) pair or a "lateral" sentinel carrying
8772        // just the schema and the inner SELECT to re-run per
8773        // outer row.
8774        #[allow(clippy::type_complexity)]
8775        let mut joined: Vec<JoinedPeer<'_>> = Vec::new();
8776        for j in &from.joins {
8777            let a = j
8778                .table
8779                .alias
8780                .as_deref()
8781                .unwrap_or(j.table.name.as_str())
8782                .to_string();
8783            if let Some(inner_box) = &j.table.lateral_subquery {
8784                // Probe schema by running the inner SELECT against a
8785                // NULL-padded outer context. The probe gives us the
8786                // projection's column shape; rows materialise per
8787                // left-row below.
8788                let schema = self.lateral_probe_schema(inner_box)?;
8789                joined.push(JoinedPeer {
8790                    eager_rows: None,
8791                    cols: schema,
8792                    alias: a,
8793                    kind: j.kind,
8794                    on: j.on.as_ref(),
8795                    lateral: Some(inner_box.as_ref()),
8796                    join_table: None,
8797                });
8798            } else {
8799                let pidx = from
8800                    .joins
8801                    .iter()
8802                    .position(|jj| core::ptr::eq(jj, j))
8803                    .unwrap_or(0);
8804                // v7.28 - defer materialisation for plain tables with
8805                // no pushed predicate: the index-nested-loop path may
8806                // avoid cloning the table entirely.
8807                let plain = j.table.unnest_expr.is_none() && j.table.as_of_segment.is_none();
8808                if plain
8809                    && peer_preds[pidx].is_empty()
8810                    && let Some(t) = self.active_catalog().get(&j.table.name)
8811                {
8812                    joined.push(JoinedPeer {
8813                        eager_rows: None,
8814                        cols: t.schema().columns.clone(),
8815                        alias: a,
8816                        kind: j.kind,
8817                        on: j.on.as_ref(),
8818                        lateral: None,
8819                        join_table: Some(j.table.name.clone()),
8820                    });
8821                    continue;
8822                }
8823                let (mut rows, cols) =
8824                    self.materialise_table_ref_filtered(&j.table, &peer_preds[pidx])?;
8825                if let Some(needed) = needed {
8826                    Self::null_out_unreferenced(&mut rows, &cols, &a, needed);
8827                }
8828                budget.charge(approx_rows_bytes(&rows))?;
8829                joined.push(JoinedPeer {
8830                    eager_rows: Some(rows),
8831                    cols,
8832                    alias: a,
8833                    kind: j.kind,
8834                    on: j.on.as_ref(),
8835                    lateral: None,
8836                    join_table: Some(j.table.name.clone()),
8837                });
8838            }
8839        }
8840        let mut combined_schema: Vec<ColumnSchema> = Vec::new();
8841        for col in &primary_cols {
8842            combined_schema.push(ColumnSchema::new(
8843                alloc::format!("{primary_alias}.{}", col.name),
8844                col.ty,
8845                col.nullable,
8846            ));
8847        }
8848        for peer in &joined {
8849            for col in &peer.cols {
8850                combined_schema.push(ColumnSchema::new(
8851                    alloc::format!("{}.{}", peer.alias, col.name),
8852                    col.ty,
8853                    col.nullable,
8854                ));
8855            }
8856        }
8857        let ctx = EvalContext::new(&combined_schema, None);
8858        // v7.28 (round-22) - intermediate-row ceiling: a join whose
8859        // working set explodes errors instead of eating the host
8860        // (mailrs watched RSS climb to 7 GiB of 15 before a manual
8861        // restart). The ceiling is per join STAGE, not per query.
8862        const MAX_JOIN_INTERMEDIATE_ROWS: usize = 4_000_000;
8863        let mut working: Vec<Row> = primary_rows;
8864        // Track the per-row width consumed by the outer left side so
8865        // each lateral evaluation sees the correct schema slice.
8866        let mut consumed_cols = primary_cols.len();
8867        for peer in &joined {
8868            if working.len() > MAX_JOIN_INTERMEDIATE_ROWS {
8869                return Err(EngineError::Unsupported(alloc::format!(
8870                    "join intermediate result exceeds {MAX_JOIN_INTERMEDIATE_ROWS} rows ({} so far) - add join predicates",
8871                    working.len()
8872                )));
8873            }
8874            let right_arity = peer.cols.len();
8875            let mut next: Vec<Row> = Vec::new();
8876            let mut next_bytes = 0usize;
8877            // v7.28 (round-22) — hash equi-join. The old path CLONED
8878            // the full combined row for EVERY (left, right) pair and
8879            // then evaluated ON — O(L×R) row materialisations (a
8880            // 24k × 6k LEFT JOIN = 1.5e8 multi-KB clones; the inbox
8881            // query never returned). Extract `left_col = right_col`
8882            // conjuncts from ON, build a hash on the (smaller,
8883            // already-materialised) right side, and only materialise
8884            // matching pairs. Residual ON conjuncts evaluate on the
8885            // candidates. NULL keys never match (SQL equality).
8886            let mut eq_pairs: Vec<(usize, usize)> = Vec::new(); // (left combined pos, right peer pos)
8887            let mut residual: Vec<&Expr> = Vec::new();
8888            if let (Some(on_expr), None) = (peer.on, peer.lateral) {
8889                for sub in reorder::split_and_conjunctions(on_expr) {
8890                    let mut matched = None;
8891                    if let Expr::Binary {
8892                        lhs,
8893                        op: spg_sql::ast::BinOp::Eq,
8894                        rhs,
8895                    } = sub
8896                        && let (Expr::Column(a), Expr::Column(b)) = (lhs.as_ref(), rhs.as_ref())
8897                    {
8898                        let left_slice = &combined_schema[..consumed_cols];
8899                        if let (Some(l), Some(r)) = (
8900                            Self::composite_col_pos(left_slice, a),
8901                            Self::peer_col_pos(&peer.alias, &peer.cols, b),
8902                        ) {
8903                            matched = Some((l, r));
8904                        } else if let (Some(l), Some(r)) = (
8905                            Self::composite_col_pos(left_slice, b),
8906                            Self::peer_col_pos(&peer.alias, &peer.cols, a),
8907                        ) {
8908                            matched = Some((l, r));
8909                        }
8910                    }
8911                    match matched {
8912                        Some(pair) => eq_pairs.push(pair),
8913                        None => residual.push(sub),
8914                    }
8915                }
8916            }
8917            // v7.28 (round-22) - index-nested-loop: when the working
8918            // set is small and the peer's join column has a BTree,
8919            // seek per left row instead of materialising the whole
8920            // peer table (a correlated subquery body otherwise
8921            // clones the full table once per outer group).
8922            const INL_MAX_LEFT: usize = 1024;
8923            if let Some(tname) = &peer.join_table
8924                && peer.eager_rows.is_none()
8925                && !eq_pairs.is_empty()
8926                && working.len() <= INL_MAX_LEFT
8927                && let Some(table) = self.active_catalog().get(tname)
8928                && let Some(idx) = peer
8929                    .cols
8930                    .iter()
8931                    .position(|c| c.name == peer.cols[eq_pairs[0].1].name)
8932                    .and_then(|pos| table.index_on(pos))
8933            {
8934                let (lpos0, _) = eq_pairs[0];
8935                for left in &working {
8936                    cancel.check()?;
8937                    let mut left_matched = false;
8938                    let key_v = left.values.get(lpos0).cloned().unwrap_or(Value::Null);
8939                    if !matches!(key_v, Value::Null)
8940                        && let Some(key) = spg_storage::IndexKey::from_value(&key_v)
8941                    {
8942                        for loc in idx.lookup_eq(&key) {
8943                            let right = match *loc {
8944                                spg_storage::RowLocator::Hot(i) => match table.rows().get(i) {
8945                                    Some(r) => r,
8946                                    None => continue,
8947                                },
8948                                spg_storage::RowLocator::Cold { .. } => continue,
8949                            };
8950                            // Remaining eq pairs + residual ON check on
8951                            // the candidate only.
8952                            let mut ok = true;
8953                            for (lp, rp) in eq_pairs.iter().skip(1) {
8954                                let lv = left.values.get(*lp);
8955                                let rv = right.values.get(*rp);
8956                                let eq = match (lv, rv) {
8957                                    (Some(a), Some(b)) => {
8958                                        !matches!(a, Value::Null)
8959                                            && !matches!(b, Value::Null)
8960                                            && value_cmp(a, b) == core::cmp::Ordering::Equal
8961                                    }
8962                                    _ => false,
8963                                };
8964                                if !eq {
8965                                    ok = false;
8966                                    break;
8967                                }
8968                            }
8969                            if !ok {
8970                                continue;
8971                            }
8972                            let mut combined_vals = left.values.clone();
8973                            combined_vals.extend(right.values.iter().cloned());
8974                            let combined = Row::new(combined_vals);
8975                            let keep = if residual.is_empty() {
8976                                true
8977                            } else {
8978                                let mut k = true;
8979                                for r in &residual {
8980                                    let cond = self.eval_expr_with_correlated(
8981                                        r, &combined, &ctx, cancel, None,
8982                                    )?;
8983                                    if !matches!(cond, Value::Bool(true)) {
8984                                        k = false;
8985                                        break;
8986                                    }
8987                                }
8988                                k
8989                            };
8990                            if keep {
8991                                let b = approx_row_bytes(&combined);
8992                                budget.charge(b)?;
8993                                next_bytes += b;
8994                                next.push(combined);
8995                                left_matched = true;
8996                            }
8997                        }
8998                    }
8999                    if !left_matched && matches!(peer.kind, JoinKind::Left) {
9000                        let mut combined_vals = left.values.clone();
9001                        for _ in 0..right_arity {
9002                            combined_vals.push(Value::Null);
9003                        }
9004                        let nulled = Row::new(combined_vals);
9005                        let b = approx_row_bytes(&nulled);
9006                        budget.charge(b)?;
9007                        next_bytes += b;
9008                        next.push(nulled);
9009                    }
9010                }
9011                working = next;
9012                budget.release(working_bytes);
9013                working_bytes = next_bytes;
9014                consumed_cols += right_arity;
9015                continue;
9016            }
9017            // Deferred peer that didn't take the INL path: materialise
9018            // now (no pushed predicate, full table).
9019            let lazy_rows: Option<Vec<Row>> = if peer.eager_rows.is_none() && peer.lateral.is_none()
9020            {
9021                let tname = peer.join_table.as_deref().unwrap_or("");
9022                let mut rows: Vec<Row> = self
9023                    .active_catalog()
9024                    .get(tname)
9025                    .map(|t| t.rows().iter().cloned().collect())
9026                    .unwrap_or_default();
9027                if let Some(needed) = needed {
9028                    Self::null_out_unreferenced(&mut rows, &peer.cols, &peer.alias, needed);
9029                }
9030                budget.charge(approx_rows_bytes(&rows))?;
9031                Some(rows)
9032            } else {
9033                None
9034            };
9035            let eager_view: Option<&Vec<Row>> = peer.eager_rows.as_ref().or(lazy_rows.as_ref());
9036            if !eq_pairs.is_empty() && peer.lateral.is_none() {
9037                let rights = eager_view.expect("non-lateral peer eager");
9038                // v7.29 - hashbrown over BTreeMap: the ordered map
9039                // paid O(log n) string comparisons per insert/probe
9040                // (24k-row build sides spent ~100 ms in it).
9041                let mut table: hashbrown::HashMap<String, Vec<usize>> =
9042                    hashbrown::HashMap::with_capacity(rights.len());
9043                let mut keybuf: Vec<Value> = Vec::with_capacity(eq_pairs.len());
9044                'build: for (ri, right) in rights.iter().enumerate() {
9045                    keybuf.clear();
9046                    for (_, rpos) in &eq_pairs {
9047                        let v = right.values.get(*rpos).cloned().unwrap_or(Value::Null);
9048                        if matches!(v, Value::Null) {
9049                            continue 'build;
9050                        }
9051                        keybuf.push(v);
9052                    }
9053                    table
9054                        .entry(aggregate::encode_key(&keybuf))
9055                        .or_default()
9056                        .push(ri);
9057                }
9058                for left in &working {
9059                    cancel.check()?;
9060                    let mut left_matched = false;
9061                    keybuf.clear();
9062                    let mut left_has_null = false;
9063                    for (lpos, _) in &eq_pairs {
9064                        let v = left.values.get(*lpos).cloned().unwrap_or(Value::Null);
9065                        if matches!(v, Value::Null) {
9066                            left_has_null = true;
9067                            break;
9068                        }
9069                        keybuf.push(v);
9070                    }
9071                    if !left_has_null
9072                        && let Some(cands) = table.get(&aggregate::encode_key(&keybuf))
9073                    {
9074                        for &ri in cands {
9075                            let right = &rights[ri];
9076                            let mut combined_vals = left.values.clone();
9077                            combined_vals.extend(right.values.iter().cloned());
9078                            let combined = Row::new(combined_vals);
9079                            let keep = if residual.is_empty() {
9080                                true
9081                            } else {
9082                                let mut ok = true;
9083                                for r in &residual {
9084                                    let cond = self.eval_expr_with_correlated(
9085                                        r, &combined, &ctx, cancel, None,
9086                                    )?;
9087                                    if !matches!(cond, Value::Bool(true)) {
9088                                        ok = false;
9089                                        break;
9090                                    }
9091                                }
9092                                ok
9093                            };
9094                            if keep {
9095                                let b = approx_row_bytes(&combined);
9096                                budget.charge(b)?;
9097                                next_bytes += b;
9098                                next.push(combined);
9099                                left_matched = true;
9100                            }
9101                        }
9102                    }
9103                    if !left_matched && matches!(peer.kind, JoinKind::Left) {
9104                        let mut combined_vals = left.values.clone();
9105                        for _ in 0..right_arity {
9106                            combined_vals.push(Value::Null);
9107                        }
9108                        let nulled = Row::new(combined_vals);
9109                        let b = approx_row_bytes(&nulled);
9110                        budget.charge(b)?;
9111                        next_bytes += b;
9112                        next.push(nulled);
9113                    }
9114                }
9115                working = next;
9116                budget.release(working_bytes);
9117                working_bytes = next_bytes;
9118                consumed_cols += right_arity;
9119                debug_assert!(consumed_cols <= combined_schema.len());
9120                continue;
9121            }
9122            // Fallback: nested loop (lateral peers, non-equi ON).
9123            for left in &working {
9124                cancel.check()?;
9125                let mut left_matched = false;
9126                let per_left_rrows: alloc::borrow::Cow<'_, [Row]> = match peer.lateral {
9127                    Some(inner) => {
9128                        // Substitute outer columns and run the inner
9129                        // SELECT against the current left row's slice
9130                        // of the combined schema.
9131                        let outer_schema = &combined_schema[..consumed_cols];
9132                        let rows = self.materialise_lateral_for_outer(inner, outer_schema, left)?;
9133                        alloc::borrow::Cow::Owned(rows)
9134                    }
9135                    None => {
9136                        let r = eager_view.expect("non-lateral peer eager");
9137                        alloc::borrow::Cow::Borrowed(r.as_slice())
9138                    }
9139                };
9140                for right in per_left_rrows.as_ref() {
9141                    let mut combined_vals = left.values.clone();
9142                    combined_vals.extend(right.values.iter().cloned());
9143                    let combined = Row::new(combined_vals);
9144                    let keep = if let Some(on_expr) = peer.on {
9145                        // v7.24.1 — correlated-aware (subqueries in
9146                        // ON referencing earlier join columns).
9147                        let cond =
9148                            self.eval_expr_with_correlated(on_expr, &combined, &ctx, cancel, None)?;
9149                        matches!(cond, Value::Bool(true))
9150                    } else {
9151                        true
9152                    };
9153                    if keep {
9154                        let b = approx_row_bytes(&combined);
9155                        budget.charge(b)?;
9156                        next_bytes += b;
9157                        next.push(combined);
9158                        left_matched = true;
9159                    }
9160                }
9161                if !left_matched && matches!(peer.kind, JoinKind::Left) {
9162                    let mut combined_vals = left.values.clone();
9163                    for _ in 0..right_arity {
9164                        combined_vals.push(Value::Null);
9165                    }
9166                    let nulled = Row::new(combined_vals);
9167                    let b = approx_row_bytes(&nulled);
9168                    budget.charge(b)?;
9169                    next_bytes += b;
9170                    next.push(nulled);
9171                }
9172            }
9173            working = next;
9174            budget.release(working_bytes);
9175            working_bytes = next_bytes;
9176            if working.len() > MAX_JOIN_INTERMEDIATE_ROWS {
9177                return Err(EngineError::Unsupported(alloc::format!(
9178                    "join intermediate result exceeds {MAX_JOIN_INTERMEDIATE_ROWS} rows ({} so far) - add join predicates",
9179                    working.len()
9180                )));
9181            }
9182            consumed_cols += right_arity;
9183            debug_assert!(consumed_cols <= combined_schema.len());
9184        }
9185        let mut filtered: Vec<Row> = Vec::new();
9186        // v7.24 (round-16 B) — the joined WHERE filter ran the plain
9187        // row evaluator, so a correlated EXISTS/IN/scalar subquery
9188        // under a JOIN hit "subquery reached row eval". Route through
9189        // the correlated-aware evaluator (memoized, same as the
9190        // single-table path).
9191        let mut memo = memoize::MemoizeCache::default();
9192        for row in working {
9193            if let Some(where_expr) = where_ {
9194                let cond = self.eval_expr_with_correlated(
9195                    where_expr,
9196                    &row,
9197                    &ctx,
9198                    cancel,
9199                    Some(&mut memo),
9200                )?;
9201                if !matches!(cond, Value::Bool(true)) {
9202                    continue;
9203                }
9204            }
9205            filtered.push(row);
9206        }
9207        Ok((combined_schema, filtered))
9208    }
9209
9210    /// v7.17.0 Phase 3.P0-41 — probe a LATERAL subquery's projection
9211    /// schema by running it once with a NULL-padded outer context.
9212    /// The probe never materialises real outer rows; it just executes
9213    /// the inner SELECT with `outer_alias.col` references substituted
9214    /// to NULL so the projection's type inference is exercised.
9215    fn lateral_probe_schema(
9216        &self,
9217        inner: &SelectStatement,
9218    ) -> Result<Vec<ColumnSchema>, EngineError> {
9219        // Substitute every qualified column reference whose qualifier
9220        // does NOT match an in-subquery FROM alias with NULL. The
9221        // safest probe is to walk the inner SELECT and replace any
9222        // `<qual>.<col>` whose qual isn't bound inside the subquery
9223        // with a Null literal. For the v7.17 probe we just run the
9224        // unmodified subquery and surface the columns; if it fails
9225        // (e.g. references an outer column the probe can't resolve),
9226        // we synthesise a best-effort schema from the SELECT items
9227        // by inferring a single Text-typed column per projection.
9228        match self.execute_readonly_select_for_lateral_probe(inner) {
9229            Ok(QueryResult::Rows { columns, .. }) => Ok(columns),
9230            // Best-effort fallback: each SELECT item becomes a TEXT
9231            // column. Real schemas only differ when the inner SELECT
9232            // references outer columns at projection-time; those
9233            // queries surface via the substitution path during
9234            // per-row execution and still return the right values.
9235            _ => {
9236                let mut out: Vec<ColumnSchema> = Vec::new();
9237                for (i, item) in inner.items.iter().enumerate() {
9238                    let name = match item {
9239                        SelectItem::Expr { alias: Some(a), .. } => a.clone(),
9240                        SelectItem::Expr { expr, .. } => synth_lateral_col_name(expr, i),
9241                        SelectItem::Wildcard => alloc::format!("col{i}"),
9242                    };
9243                    out.push(ColumnSchema::new(name, DataType::Text, true));
9244                }
9245                Ok(out)
9246            }
9247        }
9248    }
9249
9250    /// v7.17.0 Phase 3.P0-41 — try the inner LATERAL subquery against
9251    /// the engine in read-only mode for schema-probe purposes. Failure
9252    /// is expected when the subquery references an outer column the
9253    /// probe can't resolve; the caller falls back to a best-effort
9254    /// schema based on the SELECT items.
9255    fn execute_readonly_select_for_lateral_probe(
9256        &self,
9257        inner: &SelectStatement,
9258    ) -> Result<QueryResult, EngineError> {
9259        self.exec_bare_select_cancel(inner, CancelToken::none())
9260    }
9261
9262    /// v7.17.0 Phase 3.P0-41 — materialise a LATERAL subquery's rows
9263    /// for one outer-row context. Walks the inner SELECT, replaces
9264    /// every `<outer_alias>.<col>` reference whose alias appears in
9265    /// the outer schema with the literal value from the outer row,
9266    /// then runs the rewritten SELECT against the engine.
9267    fn materialise_lateral_for_outer(
9268        &self,
9269        inner: &SelectStatement,
9270        outer_schema: &[ColumnSchema],
9271        outer_row: &Row,
9272    ) -> Result<Vec<Row>, EngineError> {
9273        let mut substituted = inner.clone();
9274        substitute_outer_columns_multi(&mut substituted, outer_row, outer_schema);
9275        let result = self.exec_bare_select_cancel(&substituted, CancelToken::none())?;
9276        match result {
9277            QueryResult::Rows { rows, .. } => Ok(rows),
9278            _ => Err(EngineError::Unsupported(
9279                "LATERAL subquery must be a SELECT (cannot be a write statement)".into(),
9280            )),
9281        }
9282    }
9283
9284    /// v7.30.3 (mailrs round-26) — bounded execution for the backfill
9285    /// shape that walked prod into reclaim livelock:
9286    ///
9287    ///   SELECT … FROM big b JOIN small s ON b.k = s.k
9288    ///   WHERE … ORDER BY … LIMIT n
9289    ///
9290    /// The general join path materialises the FULL join+filter result
9291    /// (≈2× the table's fat columns on a fresh backfill scan) before
9292    /// LIMIT truncates to n rows. Here the primary streams row-by-row
9293    /// against a hash of the materialised peer, and accepted rows feed
9294    /// a keep = LIMIT+OFFSET bounded top-N heap — peak memory scales
9295    /// with the answer, not the table. Returns Ok(None) when the shape
9296    /// doesn't qualify; the caller falls through to the general path,
9297    /// which the byte budget guards.
9298    fn try_streamed_inner_join_topn(
9299        &self,
9300        stmt: &SelectStatement,
9301        from: &FromClause,
9302        cancel: CancelToken<'_>,
9303    ) -> Result<Option<QueryResult>, EngineError> {
9304        // Shape gate — any bail lands on the general path.
9305        let Some(limit) = stmt.limit_literal() else {
9306            return Ok(None);
9307        };
9308        if stmt.offset.is_some() && stmt.offset_literal().is_none() {
9309            return Ok(None);
9310        }
9311        if stmt.distinct
9312            || stmt.group_by.is_some()
9313            || stmt.having.is_some()
9314            || aggregate::uses_aggregate(stmt)
9315        {
9316            return Ok(None);
9317        }
9318        if from.joins.len() != 1 {
9319            return Ok(None);
9320        }
9321        let j = &from.joins[0];
9322        if !matches!(j.kind, JoinKind::Inner) {
9323            return Ok(None);
9324        }
9325        let plain = |t: &TableRef| {
9326            t.unnest_expr.is_none() && t.lateral_subquery.is_none() && t.as_of_segment.is_none()
9327        };
9328        if !plain(&from.primary) || !plain(&j.table) {
9329            return Ok(None);
9330        }
9331        let Some(on_expr) = j.on.as_ref() else {
9332            return Ok(None);
9333        };
9334        // Plain catalog tables only — views / virtual tables keep the
9335        // general path's materialise_table_ref fallback.
9336        let Some(primary_table) = self.active_catalog().get(&from.primary.name) else {
9337            return Ok(None);
9338        };
9339        if self.active_catalog().get(&j.table.name).is_none() {
9340            return Ok(None);
9341        }
9342        let primary_alias = from
9343            .primary
9344            .alias
9345            .as_deref()
9346            .unwrap_or(from.primary.name.as_str())
9347            .to_string();
9348        let peer_alias = j
9349            .table
9350            .alias
9351            .as_deref()
9352            .unwrap_or(j.table.name.as_str())
9353            .to_string();
9354        let mut needed = alloc::collections::BTreeSet::new();
9355        let prunable = collect_qualified_refs(stmt, &mut needed).is_some();
9356        // Peer side: materialise + prune exactly like the general
9357        // path; the budget still guards a degenerately fat peer.
9358        let mut budget = ByteBudget::new(self.max_query_bytes);
9359        let (mut peer_rows, peer_cols) = self.materialise_table_ref_filtered(&j.table, &[])?;
9360        if prunable {
9361            Self::null_out_unreferenced(&mut peer_rows, &peer_cols, &peer_alias, &needed);
9362        }
9363        budget.charge(approx_rows_bytes(&peer_rows))?;
9364        let primary_cols = primary_table.schema().columns.clone();
9365        let mut combined_schema: Vec<ColumnSchema> = Vec::new();
9366        for col in &primary_cols {
9367            combined_schema.push(ColumnSchema::new(
9368                alloc::format!("{primary_alias}.{}", col.name),
9369                col.ty,
9370                col.nullable,
9371            ));
9372        }
9373        for col in &peer_cols {
9374            combined_schema.push(ColumnSchema::new(
9375                alloc::format!("{peer_alias}.{}", col.name),
9376                col.ty,
9377                col.nullable,
9378            ));
9379        }
9380        let ctx = EvalContext::new(&combined_schema, None);
9381        // Hash-joinable left = right equality pairs from ON; anything
9382        // else stays as a residual conjunct on the candidate row.
9383        let left_arity = primary_cols.len();
9384        let mut eq_pairs: Vec<(usize, usize)> = Vec::new();
9385        let mut residual: Vec<&Expr> = Vec::new();
9386        for sub in reorder::split_and_conjunctions(on_expr) {
9387            let mut matched = None;
9388            if let Expr::Binary {
9389                lhs,
9390                op: spg_sql::ast::BinOp::Eq,
9391                rhs,
9392            } = sub
9393                && let (Expr::Column(a), Expr::Column(b)) = (lhs.as_ref(), rhs.as_ref())
9394            {
9395                let left_slice = &combined_schema[..left_arity];
9396                if let (Some(l), Some(r)) = (
9397                    Self::composite_col_pos(left_slice, a),
9398                    Self::peer_col_pos(&peer_alias, &peer_cols, b),
9399                ) {
9400                    matched = Some((l, r));
9401                } else if let (Some(l), Some(r)) = (
9402                    Self::composite_col_pos(left_slice, b),
9403                    Self::peer_col_pos(&peer_alias, &peer_cols, a),
9404                ) {
9405                    matched = Some((l, r));
9406                }
9407            }
9408            match matched {
9409                Some(pair) => eq_pairs.push(pair),
9410                None => residual.push(sub),
9411            }
9412        }
9413        if eq_pairs.is_empty() {
9414            return Ok(None); // nested-loop shapes stay on the general path
9415        }
9416        // Hash the peer on the equality key (NULL keys never match).
9417        let mut htable: hashbrown::HashMap<String, Vec<usize>> =
9418            hashbrown::HashMap::with_capacity(peer_rows.len());
9419        let mut keybuf: Vec<Value> = Vec::with_capacity(eq_pairs.len());
9420        'build: for (ri, right) in peer_rows.iter().enumerate() {
9421            keybuf.clear();
9422            for (_, rpos) in &eq_pairs {
9423                let v = right.values.get(*rpos).cloned().unwrap_or(Value::Null);
9424                if matches!(v, Value::Null) {
9425                    continue 'build;
9426                }
9427                keybuf.push(v);
9428            }
9429            htable
9430                .entry(aggregate::encode_key(&keybuf))
9431                .or_default()
9432                .push(ri);
9433        }
9434        // Streamed twin of null_out_unreferenced: clone only the
9435        // referenced primary columns into each candidate row.
9436        let keep_mask: Vec<bool> = primary_cols
9437            .iter()
9438            .map(|c| !prunable || needed.contains(&(primary_alias.clone(), c.name.clone())))
9439            .collect();
9440        let keep = (limit as usize).saturating_add(stmt.offset_literal().map_or(0, |o| o as usize));
9441        let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
9442        let mut where_memo = memoize::MemoizeCache::default();
9443        let mut heap: alloc::collections::BinaryHeap<TopNEntry> =
9444            alloc::collections::BinaryHeap::new();
9445        let mut plain_sink: Vec<Row> = Vec::new();
9446        let mut seq: u64 = 0;
9447        'scan: for left in primary_table.rows().iter() {
9448            cancel.check()?;
9449            if keep == 0 {
9450                break 'scan;
9451            }
9452            keybuf.clear();
9453            let mut left_has_null = false;
9454            for (lpos, _) in &eq_pairs {
9455                let v = left.values.get(*lpos).cloned().unwrap_or(Value::Null);
9456                if matches!(v, Value::Null) {
9457                    left_has_null = true;
9458                    break;
9459                }
9460                keybuf.push(v);
9461            }
9462            if left_has_null {
9463                continue;
9464            }
9465            let Some(cands) = htable.get(&aggregate::encode_key(&keybuf)) else {
9466                continue;
9467            };
9468            for &ri in cands {
9469                let right = &peer_rows[ri];
9470                let mut combined_vals: Vec<Value> =
9471                    Vec::with_capacity(left_arity + peer_cols.len());
9472                for (i, v) in left.values.iter().enumerate() {
9473                    combined_vals.push(if keep_mask.get(i).copied().unwrap_or(true) {
9474                        v.clone()
9475                    } else {
9476                        Value::Null
9477                    });
9478                }
9479                combined_vals.extend(right.values.iter().cloned());
9480                let combined = Row::new(combined_vals);
9481                let mut ok = true;
9482                for r in &residual {
9483                    let cond = self.eval_expr_with_correlated(r, &combined, &ctx, cancel, None)?;
9484                    if !matches!(cond, Value::Bool(true)) {
9485                        ok = false;
9486                        break;
9487                    }
9488                }
9489                if !ok {
9490                    continue;
9491                }
9492                if let Some(w) = stmt.where_.as_ref() {
9493                    let cond = self.eval_expr_with_correlated(
9494                        w,
9495                        &combined,
9496                        &ctx,
9497                        cancel,
9498                        Some(&mut where_memo),
9499                    )?;
9500                    if !matches!(cond, Value::Bool(true)) {
9501                        continue;
9502                    }
9503                }
9504                if stmt.order_by.is_empty() {
9505                    budget.charge(approx_row_bytes(&combined))?;
9506                    plain_sink.push(combined);
9507                    if plain_sink.len() >= keep {
9508                        break 'scan;
9509                    }
9510                } else {
9511                    let raw = build_order_keys(&stmt.order_by, &combined, &ctx)?;
9512                    let keys: Vec<f64> = raw
9513                        .into_iter()
9514                        .enumerate()
9515                        .map(|(i, k)| {
9516                            if descs.get(i).copied().unwrap_or(false) {
9517                                -k
9518                            } else {
9519                                k
9520                            }
9521                        })
9522                        .collect();
9523                    let entry = TopNEntry {
9524                        keys,
9525                        seq,
9526                        row: combined,
9527                    };
9528                    seq += 1;
9529                    if heap.len() < keep {
9530                        budget.charge(approx_row_bytes(&entry.row))?;
9531                        heap.push(entry);
9532                    } else if let Some(top) = heap.peek()
9533                        && entry < *top
9534                    {
9535                        if let Some(evicted) = heap.pop() {
9536                            budget.release(approx_row_bytes(&evicted.row));
9537                        }
9538                        budget.charge(approx_row_bytes(&entry.row))?;
9539                        heap.push(entry);
9540                    }
9541                }
9542            }
9543        }
9544        let mut output: Vec<Row> = if stmt.order_by.is_empty() {
9545            plain_sink
9546        } else {
9547            heap.into_sorted_vec().into_iter().map(|e| e.row).collect()
9548        };
9549        apply_offset_and_limit(&mut output, stmt.offset_literal(), stmt.limit_literal());
9550        let projection = build_projection(&stmt.items, &combined_schema, "")?;
9551        let mut proj_memo = memoize::MemoizeCache::default();
9552        let mut rows: Vec<Row> = Vec::with_capacity(output.len());
9553        for row in &output {
9554            let mut values = Vec::with_capacity(projection.len());
9555            for p in &projection {
9556                values.push(self.eval_expr_with_correlated(
9557                    &p.expr,
9558                    row,
9559                    &ctx,
9560                    cancel,
9561                    Some(&mut proj_memo),
9562                )?);
9563            }
9564            rows.push(Row::new(values));
9565        }
9566        let columns: Vec<ColumnSchema> = projection
9567            .into_iter()
9568            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
9569            .collect();
9570        Ok(Some(QueryResult::Rows { columns, rows }))
9571    }
9572
9573    fn exec_joined_select(
9574        &self,
9575        stmt: &SelectStatement,
9576        from: &FromClause,
9577        cancel: CancelToken<'_>,
9578    ) -> Result<QueryResult, EngineError> {
9579        // v7.30.3 (mailrs round-26) — the bounded single-join path
9580        // first; peak memory scales with LIMIT instead of the table.
9581        if let Some(out) = self.try_streamed_inner_join_topn(stmt, from, cancel)? {
9582            return Ok(out);
9583        }
9584        // v7.17.0 Phase 3.P0-43 + P0-41 — delegate the join +
9585        // WHERE materialisation to the shared helper so the LATERAL
9586        // / UNNEST / regular-catalog paths route through one place.
9587        // (`build_joined_filtered_rows` carries LATERAL support as
9588        // of Phase 3.P0-41.) Downstream we still handle aggregate /
9589        // projection / ORDER BY / DISTINCT / LIMIT inline because
9590        // those depend on the SelectStatement's items list.
9591        let mut budget = ByteBudget::new(self.max_query_bytes);
9592        let (combined_schema, filtered) = {
9593            let mut needed = alloc::collections::BTreeSet::new();
9594            let prunable = collect_qualified_refs(stmt, &mut needed).is_some();
9595            self.build_joined_filtered_rows(
9596                from,
9597                stmt.where_.as_ref(),
9598                cancel,
9599                if prunable { Some(&needed) } else { None },
9600                &mut budget,
9601            )?
9602        };
9603        let ctx = EvalContext::new(&combined_schema, None);
9604        // Aggregate path: handle GROUP BY / aggregate calls over the
9605        // joined+filtered rows.
9606        if aggregate::uses_aggregate(stmt) {
9607            let refs: Vec<&Row> = filtered.iter().collect();
9608            // v7.29 — a per-query memo so correlated scalar
9609            // subqueries batch-evaluate once (group map) instead of
9610            // executing per group.
9611            let agg_memo = core::cell::RefCell::new(memoize::MemoizeCache::default());
9612            let agg_correlated = |e: &Expr, r: &Row, c: &EvalContext<'_>| {
9613                self.eval_expr_with_correlated(e, r, c, cancel, Some(&mut agg_memo.borrow_mut()))
9614                    .map_err(|err| match err {
9615                        EngineError::Eval(ev) => ev,
9616                        other => eval::EvalError::TypeMismatch {
9617                            detail: alloc::format!("{other}"),
9618                        },
9619                    })
9620            };
9621            let mut agg =
9622                aggregate::run(stmt, &refs, &combined_schema, None, Some(&agg_correlated))?;
9623            apply_offset_and_limit(&mut agg.rows, stmt.offset_literal(), stmt.limit_literal());
9624            return Ok(QueryResult::Rows {
9625                columns: agg.columns,
9626                rows: agg.rows,
9627            });
9628        }
9629
9630        let projection = build_projection(&stmt.items, &combined_schema, "")?;
9631        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::new();
9632        let mut proj_memo = memoize::MemoizeCache::default();
9633        for row in &filtered {
9634            let mut values = Vec::with_capacity(projection.len());
9635            for p in &projection {
9636                // v7.24 (round-16 B) — select-list subqueries under a
9637                // JOIN go through the correlated-aware evaluator too.
9638                values.push(self.eval_expr_with_correlated(
9639                    &p.expr,
9640                    row,
9641                    &ctx,
9642                    cancel,
9643                    Some(&mut proj_memo),
9644                )?);
9645            }
9646            let order_keys = if stmt.order_by.is_empty() {
9647                Vec::new()
9648            } else {
9649                build_order_keys(&stmt.order_by, row, &ctx)?
9650            };
9651            let out_row = Row::new(values);
9652            budget.charge(approx_row_bytes(&out_row))?;
9653            tagged.push((order_keys, out_row));
9654        }
9655        if !stmt.order_by.is_empty() {
9656            let keep = if stmt.distinct {
9657                None
9658            } else {
9659                stmt.limit_literal()
9660                    .map(|l| l as usize + stmt.offset_literal().map_or(0, |o| o as usize))
9661            };
9662            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
9663            partial_sort_tagged(&mut tagged, keep, &descs);
9664        }
9665        let mut output_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
9666        if stmt.distinct {
9667            output_rows = dedup_rows(output_rows);
9668        }
9669        apply_offset_and_limit(
9670            &mut output_rows,
9671            stmt.offset_literal(),
9672            stmt.limit_literal(),
9673        );
9674        let columns: Vec<ColumnSchema> = projection
9675            .into_iter()
9676            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
9677            .collect();
9678        Ok(QueryResult::Rows {
9679            columns,
9680            rows: output_rows,
9681        })
9682    }
9683}
9684
9685/// One row-producing projection: an expression to evaluate, the resulting
9686/// column's user-visible name, its inferred type, and nullability.
9687#[derive(Debug, Clone)]
9688struct ProjectedItem {
9689    expr: Expr,
9690    output_name: String,
9691    ty: DataType,
9692    nullable: bool,
9693}
9694
9695/// v7.30.3 (mailrs round-26) — approximate heap bytes held by one
9696/// `Value`. Fat payloads (text / json / bytea / vectors / arrays)
9697/// dominate; fixed-size variants count 0 here because the per-cell
9698/// enum overhead is charged separately in `approx_row_bytes`. An
9699/// under-estimate is acceptable — the budget is a host-pressure
9700/// guard, not an exact meter.
9701fn approx_value_bytes(v: &Value) -> usize {
9702    match v {
9703        Value::Text(s) | Value::Json(s) => s.len(),
9704        Value::Bytes(b) => b.len(),
9705        Value::Vector(v) => v.len() * 4,
9706        Value::TextArray(a) => a
9707            .iter()
9708            .map(|o| o.as_ref().map_or(0, String::len) + 8)
9709            .sum(),
9710        Value::IntArray(a) => a.len() * 8,
9711        _ => 0,
9712    }
9713}
9714
9715/// Approximate heap bytes held by one materialised `Row`: per-cell
9716/// enum slots plus fat payloads.
9717fn approx_row_bytes(row: &Row) -> usize {
9718    row.values.len() * core::mem::size_of::<Value>()
9719        + row.values.iter().map(approx_value_bytes).sum::<usize>()
9720}
9721
9722/// v7.30.3 (mailrs round-26) — per-query byte budget for join/filter
9723/// materialisation. Net accounting: stages charge what they clone and
9724/// release what they free (`working` is released when the next stage
9725/// replaces it), so the meter tracks live bytes, not cumulative
9726/// churn. `limit = usize::MAX` when the budget is disabled keeps the
9727/// hot path branch-free apart from one saturating add + compare.
9728struct ByteBudget {
9729    limit: usize,
9730    used: usize,
9731}
9732
9733impl ByteBudget {
9734    const fn new(limit: Option<usize>) -> Self {
9735        Self {
9736            limit: match limit {
9737                Some(n) => n,
9738                None => usize::MAX,
9739            },
9740            used: 0,
9741        }
9742    }
9743
9744    fn charge(&mut self, n: usize) -> Result<(), EngineError> {
9745        self.used = self.used.saturating_add(n);
9746        if self.used > self.limit {
9747            return Err(EngineError::QueryBytesExceeded(self.limit));
9748        }
9749        Ok(())
9750    }
9751
9752    fn release(&mut self, n: usize) {
9753        self.used = self.used.saturating_sub(n);
9754    }
9755}
9756
9757/// Sum `approx_row_bytes` over a freshly materialised row set.
9758fn approx_rows_bytes(rows: &[Row]) -> usize {
9759    rows.iter().map(approx_row_bytes).sum()
9760}
9761
9762/// v7.30.3 (mailrs round-26) — bounded top-N sink entry for the
9763/// streamed single-join path. `keys` carry per-key DESC pre-encoded
9764/// by negation, so ordering is plain ascending lexicographic (the
9765/// negation commutes with `cmp_multi_key`'s per-key reverse,
9766/// including the ±INF NULL placements `build_order_keys` emits).
9767/// `seq` is production order: ties keep the earliest-produced rows,
9768/// matching what the general path's stable in-budget sort yields.
9769/// The `BinaryHeap` is a max-heap, so `peek()` is the worst kept row.
9770struct TopNEntry {
9771    keys: Vec<f64>,
9772    seq: u64,
9773    row: Row,
9774}
9775
9776impl TopNEntry {
9777    fn cmp_keys(a: &[f64], b: &[f64]) -> core::cmp::Ordering {
9778        for (ka, kb) in a.iter().zip(b.iter()) {
9779            let ord = ka.partial_cmp(kb).unwrap_or(core::cmp::Ordering::Equal);
9780            if ord != core::cmp::Ordering::Equal {
9781                return ord;
9782            }
9783        }
9784        core::cmp::Ordering::Equal
9785    }
9786}
9787
9788impl PartialEq for TopNEntry {
9789    fn eq(&self, other: &Self) -> bool {
9790        self.cmp(other) == core::cmp::Ordering::Equal
9791    }
9792}
9793impl Eq for TopNEntry {}
9794impl PartialOrd for TopNEntry {
9795    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
9796        Some(self.cmp(other))
9797    }
9798}
9799impl Ord for TopNEntry {
9800    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
9801        Self::cmp_keys(&self.keys, &other.keys).then(self.seq.cmp(&other.seq))
9802    }
9803}
9804
9805/// Dedupe a row set, preserving first-seen order. `Row`'s `PartialEq` is
9806/// structural (`Vec<Value>` ⇒ pairwise `Value` equality), which gives SQL
9807/// `NULL = NULL → TRUE` and `NaN = NaN → FALSE`. The first agrees with
9808/// the spec's "two NULLs are not distinct"; the second is a tolerated
9809/// quirk for v1 (no NaN literals are reachable from the SQL surface).
9810fn dedup_rows(rows: Vec<Row>) -> Vec<Row> {
9811    let mut out: Vec<Row> = Vec::with_capacity(rows.len());
9812    for r in rows {
9813        if !out.iter().any(|seen| seen == &r) {
9814            out.push(r);
9815        }
9816    }
9817    out
9818}
9819
9820/// Coerce a `Value` to an `f64` sort key for ORDER BY. Numbers map directly;
9821/// NULL sorts last (treated as `+∞`); booleans are 0.0 / 1.0; text uses lex
9822/// order via the byte values; vectors are not sortable.
9823fn value_to_order_key(v: &Value) -> Result<f64, EngineError> {
9824    match v {
9825        Value::Null => Ok(f64::INFINITY),
9826        Value::SmallInt(n) => Ok(f64::from(*n)),
9827        Value::Int(n) => Ok(f64::from(*n)),
9828        Value::Date(d) => Ok(f64::from(*d)),
9829        #[allow(clippy::cast_precision_loss)]
9830        Value::Timestamp(t) => Ok(*t as f64),
9831        // v7.17.0 Phase 3.P0-32 — PG TIME ordered by underlying
9832        // i64 microseconds (matches wall-clock ordering).
9833        #[allow(clippy::cast_precision_loss)]
9834        Value::Time(us) => Ok(*us as f64),
9835        // v7.17.0 Phase 3.P0-33 — MySQL YEAR ordered by underlying
9836        // u16 (matches calendar ordering; zero-year sentinel
9837        // sorts before 1901).
9838        Value::Year(y) => Ok(f64::from(*y)),
9839        // v7.17.0 Phase 3.P0-34 — PG TIMETZ ordered by the
9840        // UTC-equivalent microseconds (local wall - offset). Two
9841        // values for the same physical instant in different zones
9842        // sort equal — matches PG TIMETZ index behaviour.
9843        #[allow(clippy::cast_precision_loss)]
9844        Value::TimeTz { us, offset_secs } => Ok((us - i64::from(*offset_secs) * 1_000_000) as f64),
9845        // v7.17.0 Phase 3.P0-35 — PG MONEY ordered by i64 cents.
9846        #[allow(clippy::cast_precision_loss)]
9847        Value::Money(c) => Ok(*c as f64),
9848        // v7.17.0 Phase 3.P0-38 — range ordering is not supported
9849        // in v7.17.0 (needs lex-then-inclusivity tiebreak).
9850        Value::Range { .. } => Err(EngineError::Unsupported(
9851            "ORDER BY of a range value is not supported in v7.17.0".into(),
9852        )),
9853        // v7.17.0 Phase 3.P0-39 — hstore is not orderable.
9854        Value::Hstore(_) => Err(EngineError::Unsupported(
9855            "ORDER BY of a hstore value is not supported".into(),
9856        )),
9857        // v7.17.0 Phase 3.P0-40 — 2D arrays not orderable.
9858        Value::IntArray2D(_) | Value::BigIntArray2D(_) | Value::TextArray2D(_) => Err(
9859            EngineError::Unsupported("ORDER BY of a 2D array is not supported in v7.17.0".into()),
9860        ),
9861        #[allow(clippy::cast_precision_loss)]
9862        Value::Numeric { scaled, scale } => {
9863            // Scaled integer / 10^scale, computed via f64 for sort
9864            // ordering only. Precision losses here only matter for
9865            // ORDER BY tie-breaks well past 15 significant digits.
9866            // `f64::powi` lives in std; we hand-roll the loop so the
9867            // no_std engine crate doesn't need it.
9868            let mut divisor = 1.0_f64;
9869            for _ in 0..*scale {
9870                divisor *= 10.0;
9871            }
9872            Ok((*scaled as f64) / divisor)
9873        }
9874        #[allow(clippy::cast_precision_loss)]
9875        Value::BigInt(n) => Ok(*n as f64),
9876        Value::Float(x) => Ok(*x),
9877        Value::Bool(b) => Ok(if *b { 1.0 } else { 0.0 }),
9878        Value::Text(s) => {
9879            // Lex order by codepoints — good enough for ORDER BY name.
9880            // Map first 8 bytes packed into u64 as a coarse key; ties fall to
9881            // partial_cmp Equal. v1.x can swap in a real string comparator.
9882            let mut key: u64 = 0;
9883            for &b in s.as_bytes().iter().take(8) {
9884                key = (key << 8) | u64::from(b);
9885            }
9886            #[allow(clippy::cast_precision_loss)]
9887            Ok(key as f64)
9888        }
9889        Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => {
9890            Err(EngineError::Unsupported(
9891                "ORDER BY of a raw vector column is not meaningful — use `<->`".into(),
9892            ))
9893        }
9894        Value::Interval { .. } => Err(EngineError::Unsupported(
9895            "ORDER BY of an INTERVAL is not supported in v2.11 \
9896             (months vs micros has no single canonical ordering)"
9897                .into(),
9898        )),
9899        Value::Json(_) => Err(EngineError::Unsupported(
9900            "ORDER BY of a JSON value is not supported — cast the document to text first".into(),
9901        )),
9902        // v7.5.0 — Value is #[non_exhaustive]; future variants need
9903        // an explicit ORDER BY mapping. Surface as Unsupported until
9904        // engine support is added.
9905        _ => Err(EngineError::Unsupported(
9906            "ORDER BY of this value type is not supported".into(),
9907        )),
9908    }
9909}
9910
9911/// Try to plan a WHERE clause as an equality lookup against an existing
9912/// index. Returns the candidate row indices on success; `None` means the
9913/// caller should fall back to a full scan.
9914///
9915/// v0.8 recognises a single top-level `col = literal` (in either operand
9916/// order). AND chains and range scans land in later milestones.
9917/// Look for `ORDER BY col <dist-op> literal LIMIT k` against an
9918/// NSW-indexed vector column. Recognised distance ops: `<->` (L2),
9919/// `<#>` (inner product), `<=>` (cosine). When a WHERE clause is
9920/// present, the planner does an "over-fetch and filter" pass — it
9921/// asks the graph for `k * over_fetch` candidates, evaluates WHERE
9922/// against each, and trims back to `k`. Returns the row indices in
9923/// ascending-distance order when the plan applies.
9924fn try_nsw_knn(
9925    stmt: &SelectStatement,
9926    table: &Table,
9927    schema_cols: &[ColumnSchema],
9928    table_alias: &str,
9929) -> Option<Vec<usize>> {
9930    if stmt.distinct {
9931        return None;
9932    }
9933    let limit = usize::try_from(stmt.limit_literal()?).ok()?;
9934    if limit == 0 {
9935        return None;
9936    }
9937    // v6.4.0 — NSW kNN dispatch needs a single ORDER BY key on the
9938    // distance metric. Multi-key ORDER BY falls through to the
9939    // generic sort path.
9940    if stmt.order_by.len() != 1 {
9941        return None;
9942    }
9943    let order = &stmt.order_by[0];
9944    // NSW kNN returns rows ascending by distance — DESC inverts the
9945    // natural order, so the planner can't handle it without a sort
9946    // pass. Fall back to the generic ORDER BY path.
9947    if order.desc {
9948        return None;
9949    }
9950    let Expr::Binary { lhs, op, rhs } = &order.expr else {
9951        return None;
9952    };
9953    let metric = match op {
9954        BinOp::L2Distance => spg_storage::NswMetric::L2,
9955        BinOp::InnerProduct => spg_storage::NswMetric::InnerProduct,
9956        BinOp::CosineDistance => spg_storage::NswMetric::Cosine,
9957        _ => return None,
9958    };
9959    // Accept both `col <op> literal` and `literal <op> col`.
9960    let ((Expr::Column(col), literal) | (literal, Expr::Column(col))) =
9961        (lhs.as_ref(), rhs.as_ref())
9962    else {
9963        return None;
9964    };
9965    if let Some(q) = &col.qualifier
9966        && q != table_alias
9967    {
9968        return None;
9969    }
9970    let col_pos = schema_cols.iter().position(|s| s.name == col.name)?;
9971    let query = literal_to_vector(literal)?;
9972    let idx = spg_storage::nsw_index_on(table, col_pos)?;
9973    if let Some(where_expr) = &stmt.where_ {
9974        // Over-fetch and filter. The factor (10×) is a heuristic that
9975        // covers typical selectivity for the corpus tests; v2.x will
9976        // make it configurable.
9977        let over_fetch = limit.saturating_mul(10).max(NSW_OVER_FETCH_FLOOR);
9978        let candidates = spg_storage::nsw_query(table, &idx.name, &query, over_fetch, metric);
9979        let ctx = EvalContext::new(schema_cols, Some(table_alias));
9980        let mut kept: Vec<usize> = Vec::with_capacity(limit);
9981        for i in candidates {
9982            let row = &table.rows()[i];
9983            let cond = eval::eval_expr(where_expr, row, &ctx).ok()?;
9984            if matches!(cond, Value::Bool(true)) {
9985                kept.push(i);
9986                if kept.len() >= limit {
9987                    break;
9988                }
9989            }
9990        }
9991        Some(kept)
9992    } else {
9993        Some(spg_storage::nsw_query(
9994            table, &idx.name, &query, limit, metric,
9995        ))
9996    }
9997}
9998
9999/// Lower bound on the over-fetch pool when WHERE is present — even
10000/// for tiny `LIMIT 1` queries we keep enough candidates to absorb a
10001/// few WHERE rejections.
10002const NSW_OVER_FETCH_FLOOR: usize = 32;
10003
10004/// Pull a `Vec<f32>` out of a literal-or-cast expression. Returns
10005/// `None` for anything we can't fold at plan time.
10006fn literal_to_vector(e: &Expr) -> Option<Vec<f32>> {
10007    match e {
10008        Expr::Literal(Literal::Vector(v)) => Some(v.clone()),
10009        Expr::Cast { expr, .. } => literal_to_vector(expr),
10010        _ => None,
10011    }
10012}
10013
10014/// Materialise rows in a planner-supplied order (used by the NSW path)
10015/// without re-running ORDER BY. The projection + LIMIT slot mirror the
10016/// equivalent block in `exec_bare_select`.
10017fn materialise_in_order(
10018    stmt: &SelectStatement,
10019    table: &Table,
10020    schema_cols: &[ColumnSchema],
10021    table_alias: &str,
10022    ordered_rows: &[usize],
10023) -> Result<QueryResult, EngineError> {
10024    let ctx = EvalContext::new(schema_cols, Some(table_alias));
10025    let projection = build_projection(&stmt.items, schema_cols, table_alias)?;
10026    let mut output_rows: Vec<Row> = Vec::with_capacity(ordered_rows.len());
10027    for &i in ordered_rows {
10028        let row = &table.rows()[i];
10029        let mut values = Vec::with_capacity(projection.len());
10030        for p in &projection {
10031            values.push(eval::eval_expr(&p.expr, row, &ctx)?);
10032        }
10033        output_rows.push(Row::new(values));
10034    }
10035    apply_offset_and_limit(
10036        &mut output_rows,
10037        stmt.offset_literal(),
10038        stmt.limit_literal(),
10039    );
10040    let columns: Vec<ColumnSchema> = projection
10041        .into_iter()
10042        .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
10043        .collect();
10044    Ok(QueryResult::Rows {
10045        columns,
10046        rows: output_rows,
10047    })
10048}
10049
10050/// v7.20 P4 — hot-row POSITION seek for the mutation paths
10051/// (UPDATE / DELETE index their planned writes by position in
10052/// `table.rows()`, so the Cow-row shape `try_index_seek`
10053/// returns doesn't fit). Same top-level-AND recursion and
10054/// col=literal resolution; the caller re-applies the full WHERE
10055/// to every returned row so the index only narrows candidates.
10056///
10057/// Returns `None` (→ caller full-scans) when no equality leaf
10058/// hits an index OR any matching locator lives in the cold tier
10059/// — the mutation paths operate on hot rows, and the PK
10060/// promote-then-walk upstream already handles the
10061/// cold-single-row case.
10062fn try_index_seek_positions(
10063    where_expr: &Expr,
10064    schema_cols: &[ColumnSchema],
10065    table: &Table,
10066    table_alias: &str,
10067) -> Option<Vec<usize>> {
10068    if let Expr::Binary {
10069        lhs,
10070        op: BinOp::And,
10071        rhs,
10072    } = where_expr
10073    {
10074        if let Some(p) = try_index_seek_positions(lhs, schema_cols, table, table_alias) {
10075            return Some(p);
10076        }
10077        return try_index_seek_positions(rhs, schema_cols, table, table_alias);
10078    }
10079    let Expr::Binary {
10080        lhs,
10081        op: BinOp::Eq,
10082        rhs,
10083    } = where_expr
10084    else {
10085        return None;
10086    };
10087    let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
10088        .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
10089    let idx = table.index_on(col_pos)?;
10090    let key = IndexKey::from_value(&value)?;
10091    let locators = idx.lookup_eq(&key);
10092    let mut out = Vec::with_capacity(locators.len());
10093    for loc in locators {
10094        match *loc {
10095            spg_storage::RowLocator::Hot(i) => out.push(i),
10096            spg_storage::RowLocator::Cold { .. } => return None,
10097        }
10098    }
10099    Some(out)
10100}
10101
10102fn try_index_seek<'a>(
10103    where_expr: &Expr,
10104    schema_cols: &[ColumnSchema],
10105    catalog: &'a Catalog,
10106    table: &'a Table,
10107    table_alias: &str,
10108) -> Option<Vec<Cow<'a, Row>>> {
10109    // v7.11.3 — recurse through top-level `AND` so a PG-style
10110    // composite predicate like `WHERE id = 1 AND created_at > $1`
10111    // still hits the index on `id`. The caller re-applies the
10112    // full WHERE expression to each returned row, so dropping the
10113    // residual conjuncts here is correct — the index just narrows
10114    // the candidate set.
10115    if let Expr::Binary {
10116        lhs,
10117        op: BinOp::And,
10118        rhs,
10119    } = where_expr
10120    {
10121        // Try LHS first (typical convention: leading equality on
10122        // the indexed column comes first in user-written SQL).
10123        if let Some(rows) = try_index_seek(lhs, schema_cols, catalog, table, table_alias) {
10124            return Some(rows);
10125        }
10126        return try_index_seek(rhs, schema_cols, catalog, table, table_alias);
10127    }
10128    let Expr::Binary {
10129        lhs,
10130        op: BinOp::Eq,
10131        rhs,
10132    } = where_expr
10133    else {
10134        return None;
10135    };
10136    let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
10137        .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
10138    let idx = table.index_on(col_pos)?;
10139    let key = IndexKey::from_value(&value)?;
10140    let locators = idx.lookup_eq(&key);
10141    let table_name = table.schema().name.as_str();
10142    // v5.1: each locator dispatches to either the hot tier (zero-
10143    // copy borrow of `table.rows()[i]`) or a cold-tier segment
10144    // (one page read + dense row decode, ~µs scale). Cold rows are
10145    // returned as `Cow::Owned` so the caller's `&Row` iteration
10146    // doesn't see a tier distinction; pre-freezer (no cold
10147    // segments loaded) every locator is `Hot` and every entry is
10148    // `Cow::Borrowed` — identical cost to the pre-v5.1 path.
10149    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(locators.len());
10150    for loc in locators {
10151        match *loc {
10152            spg_storage::RowLocator::Hot(i) => {
10153                if let Some(row) = table.rows().get(i) {
10154                    out.push(Cow::Borrowed(row));
10155                }
10156            }
10157            spg_storage::RowLocator::Cold { segment_id, .. } => {
10158                if let Some(row) = catalog.resolve_cold_locator(table_name, segment_id, &key) {
10159                    out.push(Cow::Owned(row));
10160                }
10161            }
10162        }
10163    }
10164    Some(out)
10165}
10166
10167/// v7.12.3 — GIN-accelerated candidate seek for `WHERE col @@ <ts_query>`.
10168///
10169/// Recurses through top-level `AND` like [`try_index_seek`] so a
10170/// composite predicate `WHERE search_vector @@ q AND id > $1` still
10171/// hits the GIN index on `search_vector` — the caller re-applies the
10172/// full WHERE expression to each returned candidate, so dropping the
10173/// `id > $1` residual here stays semantically correct.
10174///
10175/// Returns `None` when:
10176///   - no leaf is a `col @@ <rhs>` shape on a GIN-indexed column;
10177///   - the RHS can't be const-evaluated to a `Value::TsQuery`
10178///     (typically because it references row columns);
10179///   - the resolved `TsQuery` uses query shapes the MVP doesn't
10180///     accelerate (`Not`, `Phrase` — those fall through to full scan).
10181///
10182/// On `Some(rows)` the caller iterates only `rows` and re-evaluates
10183/// the full `@@` predicate per row, so an over-approximate candidate
10184/// set is safe.
10185fn try_gin_seek<'a>(
10186    where_expr: &Expr,
10187    schema_cols: &[ColumnSchema],
10188    catalog: &'a Catalog,
10189    table: &'a Table,
10190    table_alias: &str,
10191    ctx: &eval::EvalContext<'_>,
10192) -> Option<Vec<Cow<'a, Row>>> {
10193    if let Expr::Binary {
10194        lhs,
10195        op: BinOp::And,
10196        rhs,
10197    } = where_expr
10198    {
10199        if let Some(rows) = try_gin_seek(lhs, schema_cols, catalog, table, table_alias, ctx) {
10200            return Some(rows);
10201        }
10202        return try_gin_seek(rhs, schema_cols, catalog, table, table_alias, ctx);
10203    }
10204    // v7.17.0 Phase 3.P0-44 — MySQL `MATCH(col1, col2) AGAINST (...)`
10205    // desugars into `(to_tsvector(col1) @@ q) OR (to_tsvector(col2) @@ q)`
10206    // in the parser. To accelerate the multi-column case, walk OR the same
10207    // way we walk AND: only emit a candidate set if BOTH sides can seek
10208    // (otherwise the OR result is unbounded and we must fall through to
10209    // the full scan). Candidates are union'd; the caller's WHERE re-eval
10210    // verifies the full predicate per row, so duplicates / supersets stay
10211    // semantically safe.
10212    if let Expr::Binary {
10213        lhs,
10214        op: BinOp::Or,
10215        rhs,
10216    } = where_expr
10217    {
10218        let left = try_gin_seek(lhs, schema_cols, catalog, table, table_alias, ctx)?;
10219        let right = try_gin_seek(rhs, schema_cols, catalog, table, table_alias, ctx)?;
10220        let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(left.len() + right.len());
10221        out.extend(left);
10222        out.extend(right);
10223        return Some(out);
10224    }
10225    let Expr::Binary {
10226        lhs,
10227        op: BinOp::TsMatch,
10228        rhs,
10229    } = where_expr
10230    else {
10231        return None;
10232    };
10233    // Either side can be the column; pgvector idiom (`vec @@ q`)
10234    // hits the first arm, FROM-clause-derived (`plainto_tsquery($1)
10235    // q ... WHERE search_vector @@ q`) the same. CROSS JOIN derived
10236    // tables resolve `q` to a Column too.
10237    let (col_pos, query) = resolve_gin_col_query(lhs, rhs, schema_cols, table_alias, ctx)
10238        .or_else(|| resolve_gin_col_query(rhs, lhs, schema_cols, table_alias, ctx))?;
10239    // v7.17.0 Phase 3.P0-44 — MySQL `FULLTEXT KEY` builds a
10240    // `IndexKind::GinFulltext` posting list (Phase 2.2). It shares
10241    // the same `gin_lookup_word` shape as the tsvector-typed GIN,
10242    // so the MATCH-AGAINST `@@` predicate (desugared by the parser
10243    // into `to_tsvector(col) @@ plainto_tsquery('term')`) routes
10244    // through the same candidate-set seek.
10245    let idx = table
10246        .indices()
10247        .iter()
10248        .find(|i| i.column_position == col_pos && (i.is_gin() || i.is_gin_fulltext()))?;
10249    let candidates = gin_query_candidates(idx, &query)?;
10250    let _ = catalog; // cold-tier row resolution unused in MVP; see below.
10251    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(candidates.len());
10252    for loc in candidates {
10253        match loc {
10254            spg_storage::RowLocator::Hot(i) => {
10255                if let Some(row) = table.rows().get(i) {
10256                    out.push(Cow::Borrowed(row));
10257                }
10258            }
10259            // GIN cold-tier rows in the MVP: skipped, matching the
10260            // full-scan `@@` path which itself only iterates
10261            // `table.rows()` (hot tier). When v7.13+ adds cold-tier
10262            // scan-time materialisation for `@@`, the parallel
10263            // resolution lands here; until then both paths see the
10264            // same hot-only candidate set so correctness is preserved.
10265            spg_storage::RowLocator::Cold { .. } => {}
10266        }
10267    }
10268    Some(out)
10269}
10270
10271/// v7.15.0 — trigram-GIN-accelerated candidate seek for
10272/// `WHERE col LIKE '<pat>'` and `WHERE col ILIKE '<pat>'` when
10273/// the column has a `gin_trgm_ops` GIN index.
10274///
10275/// Walks top-level `AND` so multi-predicate WHEREs (`col LIKE
10276/// 'foo%' AND id > 1`) still hit the trigram index; the caller
10277/// re-evaluates the full WHERE per candidate row, so dropping
10278/// non-LIKE conjuncts here stays semantically correct.
10279///
10280/// Returns `None` when:
10281///   - no leaf is `col LIKE/ILIKE <literal>` on a trigram-GIN-
10282///     indexed column;
10283///   - the pattern's literal runs are too short to constrain
10284///     (pattern decomposes into `< 3`-char runs, e.g. `%ab%`);
10285///   - the pattern doesn't const-evaluate to a TEXT.
10286fn try_trgm_seek<'a>(
10287    where_expr: &Expr,
10288    schema_cols: &[ColumnSchema],
10289    table: &'a Table,
10290    table_alias: &str,
10291) -> Option<Vec<Cow<'a, Row>>> {
10292    if let Expr::Binary {
10293        lhs,
10294        op: BinOp::And,
10295        rhs,
10296    } = where_expr
10297    {
10298        if let Some(rows) = try_trgm_seek(lhs, schema_cols, table, table_alias) {
10299            return Some(rows);
10300        }
10301        return try_trgm_seek(rhs, schema_cols, table, table_alias);
10302    }
10303    // LIKE node is what carries the column reference + pattern.
10304    // ILIKE is the same AST node — PG's LIKE/ILIKE both lower
10305    // through `Expr::Like { expr, pattern, negated }`. The trigram
10306    // index posting-list keys are already lower-cased and
10307    // case-folded, so we only need the pattern's literal runs.
10308    let Expr::Like { expr, pattern, .. } = where_expr else {
10309        return None;
10310    };
10311    // Column side.
10312    let Expr::Column(c) = expr.as_ref() else {
10313        return None;
10314    };
10315    if let Some(q) = &c.qualifier
10316        && q != table_alias
10317    {
10318        return None;
10319    }
10320    let col_pos = schema_cols
10321        .iter()
10322        .position(|s| s.name.eq_ignore_ascii_case(&c.name))?;
10323    // Index must exist on that column AND be a trigram-GIN.
10324    let idx = table
10325        .indices()
10326        .iter()
10327        .find(|i| i.column_position == col_pos && i.is_gin_trgm())?;
10328    // Pattern side must be a literal TEXT — anything else (column
10329    // ref, function call, parameter that hasn't been bound yet)
10330    // falls through to full scan.
10331    let Expr::Literal(spg_sql::ast::Literal::String(pat)) = pattern.as_ref() else {
10332        return None;
10333    };
10334    let trigrams = spg_storage::trgm::trigrams_from_like_pattern(pat)?;
10335    // Intersect every trigram's posting list. Empty intersection
10336    // → empty candidate set (caller short-circuits its row loop).
10337    let mut iter = trigrams.iter();
10338    let first = iter.next()?;
10339    let mut acc: Vec<spg_storage::RowLocator> = {
10340        let mut v = idx.gin_trgm_lookup(first).to_vec();
10341        v.sort_by_key(locator_sort_key);
10342        v.dedup_by_key(|l| locator_sort_key(l));
10343        v
10344    };
10345    for tri in iter {
10346        let mut next: Vec<spg_storage::RowLocator> = idx.gin_trgm_lookup(tri).to_vec();
10347        next.sort_by_key(locator_sort_key);
10348        next.dedup_by_key(|l| locator_sort_key(l));
10349        // Sorted-merge intersection.
10350        let mut merged: Vec<spg_storage::RowLocator> =
10351            Vec::with_capacity(acc.len().min(next.len()));
10352        let (mut i, mut j) = (0usize, 0usize);
10353        while i < acc.len() && j < next.len() {
10354            let lk = locator_sort_key(&acc[i]);
10355            let rk = locator_sort_key(&next[j]);
10356            match lk.cmp(&rk) {
10357                core::cmp::Ordering::Less => i += 1,
10358                core::cmp::Ordering::Greater => j += 1,
10359                core::cmp::Ordering::Equal => {
10360                    merged.push(acc[i]);
10361                    i += 1;
10362                    j += 1;
10363                }
10364            }
10365        }
10366        acc = merged;
10367        if acc.is_empty() {
10368            break;
10369        }
10370    }
10371    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(acc.len());
10372    for loc in acc {
10373        if let spg_storage::RowLocator::Hot(i) = loc
10374            && let Some(row) = table.rows().get(i)
10375        {
10376            out.push(Cow::Borrowed(row));
10377        }
10378        // Cold-tier rows: skipped in MVP (same as try_gin_seek).
10379    }
10380    Some(out)
10381}
10382
10383/// v7.12.3 — extract `(column_position, TsQueryAst)` when one side of
10384/// the binary is a column reference to a GIN-indexed tsvector column
10385/// and the other side const-evaluates to a `Value::TsQuery`. Returns
10386/// `None` if the column reference is for the wrong table alias, or if
10387/// the RHS expression depends on row data.
10388fn resolve_gin_col_query(
10389    col_side: &Expr,
10390    query_side: &Expr,
10391    schema_cols: &[ColumnSchema],
10392    table_alias: &str,
10393    ctx: &eval::EvalContext<'_>,
10394) -> Option<(usize, spg_storage::TsQueryAst)> {
10395    // v7.17.0 Phase 3.P0-44 — the MATCH AGAINST desugar wraps the
10396    // column in `to_tsvector('simple', col)`, so we peel that wrapper
10397    // before the column lookup. Direct `col @@ tsquery` paths (the
10398    // tsvector-typed v7.12 surface) skip the wrapper entirely.
10399    let column = match col_side {
10400        Expr::Column(c) => c,
10401        Expr::FunctionCall { name, args }
10402            if name.eq_ignore_ascii_case("to_tsvector") && !args.is_empty() =>
10403        {
10404            // PG `to_tsvector` accepts either `to_tsvector(col)` or
10405            // `to_tsvector(config, col)`. In both shapes the column
10406            // we care about is the final argument.
10407            if let Expr::Column(c) = args.last().unwrap() {
10408                c
10409            } else {
10410                return None;
10411            }
10412        }
10413        _ => return None,
10414    };
10415    let c = column;
10416    if let Some(q) = &c.qualifier
10417        && q != table_alias
10418    {
10419        return None;
10420    }
10421    let pos = schema_cols.iter().position(|s| s.name == c.name)?;
10422    // Const-evaluate the query side with an empty row — fails fast
10423    // (with a `ColumnNotFound` / similar) if the expression actually
10424    // depends on row data, which is exactly the bail signal we want.
10425    let empty_row = Row::new(Vec::new());
10426    let v = eval::eval_expr(query_side, &empty_row, ctx).ok()?;
10427    let Value::TsQuery(q) = v else { return None };
10428    Some((pos, q))
10429}
10430
10431/// v7.12.3 — walk a `TsQueryAst` against an [`IndexKind::Gin`] index
10432/// to produce a candidate row-locator set. Returns `None` for query
10433/// shapes the MVP doesn't accelerate (`Not` / `Phrase` — both bail to
10434/// full scan since their semantics need either complementation across
10435/// the whole row set or positional verification beyond what the
10436/// posting list carries).
10437///
10438/// Candidate sets are over-approximate — the caller re-applies the
10439/// full `@@` predicate per row, so reporting "row was in some
10440/// posting list" without verifying positions / weights stays correct.
10441fn gin_query_candidates(
10442    idx: &spg_storage::Index,
10443    query: &spg_storage::TsQueryAst,
10444) -> Option<Vec<spg_storage::RowLocator>> {
10445    use spg_storage::TsQueryAst;
10446    match query {
10447        TsQueryAst::Term { word, .. } => {
10448            let mut v: Vec<spg_storage::RowLocator> = idx.gin_lookup_word(word).to_vec();
10449            v.sort_by_key(locator_sort_key);
10450            v.dedup_by_key(|l| locator_sort_key(l));
10451            Some(v)
10452        }
10453        TsQueryAst::And(l, r) => {
10454            let mut left = gin_query_candidates(idx, l)?;
10455            let mut right = gin_query_candidates(idx, r)?;
10456            left.sort_by_key(locator_sort_key);
10457            right.sort_by_key(locator_sort_key);
10458            // Sorted-merge intersection.
10459            let mut out: Vec<spg_storage::RowLocator> = Vec::new();
10460            let (mut i, mut j) = (0usize, 0usize);
10461            while i < left.len() && j < right.len() {
10462                let lk = locator_sort_key(&left[i]);
10463                let rk = locator_sort_key(&right[j]);
10464                match lk.cmp(&rk) {
10465                    core::cmp::Ordering::Less => i += 1,
10466                    core::cmp::Ordering::Greater => j += 1,
10467                    core::cmp::Ordering::Equal => {
10468                        out.push(left[i]);
10469                        i += 1;
10470                        j += 1;
10471                    }
10472                }
10473            }
10474            Some(out)
10475        }
10476        TsQueryAst::Or(l, r) => {
10477            let mut out = gin_query_candidates(idx, l)?;
10478            out.extend(gin_query_candidates(idx, r)?);
10479            out.sort_by_key(locator_sort_key);
10480            out.dedup_by_key(|l| locator_sort_key(l));
10481            Some(out)
10482        }
10483        // Not / Phrase bail to full scan in the MVP. Not needs
10484        // complementation against the whole row set (not represented
10485        // in the posting-list view); Phrase needs positional
10486        // verification beyond what `word → rows` carries.
10487        TsQueryAst::Not(_) | TsQueryAst::Phrase { .. } => None,
10488    }
10489}
10490
10491/// v7.12.3 — total ordering on `RowLocator` for sort/dedup purposes
10492/// inside the GIN intersection / union loops. Hot rows order by their
10493/// row index; Cold rows order after all Hot rows, then by
10494/// `(segment_id, the cold sub-key)`.
10495fn locator_sort_key(l: &spg_storage::RowLocator) -> (u8, u64, u64) {
10496    match *l {
10497        spg_storage::RowLocator::Hot(i) => (0, i as u64, 0),
10498        spg_storage::RowLocator::Cold {
10499            segment_id,
10500            page_offset,
10501        } => (1, u64::from(segment_id), u64::from(page_offset)),
10502    }
10503}
10504
10505/// v5.2.3: extract `(column_position, IndexKey)` when `where_expr`
10506/// is a simple `col = literal` predicate suitable for a `BTree` index
10507/// seek. Used by `exec_update_cancel` / `exec_delete_cancel` to
10508/// decide whether a write touches a cold-tier row (which requires
10509/// promote-on-write / shadow-on-delete) before falling through to
10510/// the hot-tier row walk.
10511///
10512/// Returns `None` for any predicate shape the planner can't push
10513/// down to an index seek — complex WHERE clauses always take the
10514/// hot-only path (cold rows are immutable to non-indexed writes
10515/// until a future scan-fanout sub-version).
10516fn try_pk_predicate(
10517    where_expr: &Expr,
10518    schema_cols: &[ColumnSchema],
10519    table_alias: &str,
10520) -> Option<(usize, IndexKey)> {
10521    let Expr::Binary {
10522        lhs,
10523        op: BinOp::Eq,
10524        rhs,
10525    } = where_expr
10526    else {
10527        return None;
10528    };
10529    let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
10530        .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
10531    let key = IndexKey::from_value(&value)?;
10532    Some((col_pos, key))
10533}
10534
10535fn resolve_col_literal_pair(
10536    col_side: &Expr,
10537    lit_side: &Expr,
10538    schema_cols: &[ColumnSchema],
10539    table_alias: &str,
10540) -> Option<(usize, Value)> {
10541    let Expr::Column(c) = col_side else {
10542        return None;
10543    };
10544    if let Some(q) = &c.qualifier
10545        && q != table_alias
10546    {
10547        return None;
10548    }
10549    let pos = schema_cols.iter().position(|s| s.name == c.name)?;
10550    let Expr::Literal(l) = lit_side else {
10551        return None;
10552    };
10553    let v = match l {
10554        Literal::Integer(n) => {
10555            if let Ok(small) = i32::try_from(*n) {
10556                Value::Int(small)
10557            } else {
10558                Value::BigInt(*n)
10559            }
10560        }
10561        Literal::Float(x) => Value::Float(*x),
10562        Literal::String(s) => Value::Text(s.clone()),
10563        Literal::Bool(b) => Value::Bool(*b),
10564        Literal::Null => Value::Null,
10565        // Vector, array and Interval literals can't be used as B-tree
10566        // index keys. Tell the planner to fall back to full-scan.
10567        Literal::Vector(_)
10568        | Literal::Interval { .. }
10569        | Literal::TextArray(_)
10570        | Literal::IntArray(_)
10571        | Literal::BigIntArray(_) => return None,
10572    };
10573    Some((pos, v))
10574}
10575
10576/// Find the schema entry that a SELECT-list `Expr::Column` refers to.
10577/// Mirrors `resolve_column` in `eval.rs`, but returns a proper
10578/// `EngineError` so the projection-build path keeps `UnknownQualifier`
10579/// vs `ColumnNotFound` distinct.
10580fn resolve_projection_column<'a>(
10581    c: &ColumnName,
10582    schema_cols: &'a [ColumnSchema],
10583    table_alias: &str,
10584) -> Result<&'a ColumnSchema, EngineError> {
10585    if let Some(q) = &c.qualifier {
10586        let composite = alloc::format!("{q}.{name}", name = c.name);
10587        if let Some(s) = schema_cols.iter().find(|s| s.name == composite) {
10588            return Ok(s);
10589        }
10590        // Single-table case: the qualifier may equal the active alias —
10591        // then look for the bare column name.
10592        if q == table_alias
10593            && let Some(s) = schema_cols.iter().find(|s| s.name == c.name)
10594        {
10595            return Ok(s);
10596        }
10597        // For multi-table schemas the qualifier is unknown only if no
10598        // column bears the "<q>." prefix. For single-table, the alias
10599        // mismatch alone is enough.
10600        let prefix = alloc::format!("{q}.");
10601        let qualifier_known =
10602            q == table_alias || schema_cols.iter().any(|s| s.name.starts_with(&prefix));
10603        if !qualifier_known {
10604            return Err(EngineError::Eval(EvalError::UnknownQualifier {
10605                qualifier: q.clone(),
10606            }));
10607        }
10608        return Err(EngineError::Eval(EvalError::ColumnNotFound {
10609            name: c.name.clone(),
10610        }));
10611    }
10612    if let Some(s) = schema_cols.iter().find(|s| s.name == c.name) {
10613        return Ok(s);
10614    }
10615    let suffix = alloc::format!(".{name}", name = c.name);
10616    let mut matches = schema_cols.iter().filter(|s| s.name.ends_with(&suffix));
10617    let first = matches.next();
10618    let extra = matches.next();
10619    match (first, extra) {
10620        (Some(s), None) => Ok(s),
10621        (Some(_), Some(_)) => Err(EngineError::Eval(EvalError::TypeMismatch {
10622            detail: alloc::format!("ambiguous column reference: {}", c.name),
10623        })),
10624        _ => Err(EngineError::Eval(EvalError::ColumnNotFound {
10625            name: c.name.clone(),
10626        })),
10627    }
10628}
10629
10630fn build_projection(
10631    items: &[SelectItem],
10632    schema_cols: &[ColumnSchema],
10633    table_alias: &str,
10634) -> Result<Vec<ProjectedItem>, EngineError> {
10635    let mut out = Vec::new();
10636    for item in items {
10637        match item {
10638            SelectItem::Wildcard => {
10639                for col in schema_cols {
10640                    out.push(ProjectedItem {
10641                        expr: Expr::Column(ColumnName {
10642                            qualifier: None,
10643                            name: col.name.clone(),
10644                        }),
10645                        output_name: col.name.clone(),
10646                        ty: col.ty,
10647                        nullable: col.nullable,
10648                    });
10649                }
10650            }
10651            SelectItem::Expr { expr, alias } => {
10652                // Plain column ref keeps full schema info (real type +
10653                // nullability). For compound expressions try the
10654                // describe-side function-return-type table first
10655                // (e.g. `SELECT now()` → Timestamptz, `SELECT
10656                // concat(…)` → Text). Falls back to nullable Text
10657                // for shapes the describe path can't resolve.
10658                if let Expr::Column(c) = expr {
10659                    let sch = resolve_projection_column(c, schema_cols, table_alias)?;
10660                    let output_name = alias.clone().unwrap_or_else(|| c.name.clone());
10661                    out.push(ProjectedItem {
10662                        expr: expr.clone(),
10663                        output_name,
10664                        ty: sch.ty,
10665                        nullable: sch.nullable,
10666                    });
10667                } else if let Some(shape) = describe::describe_expr(expr, schema_cols) {
10668                    let output_name = alias.clone().unwrap_or_else(|| expr.to_string());
10669                    out.push(ProjectedItem {
10670                        expr: expr.clone(),
10671                        output_name,
10672                        ty: shape.ty,
10673                        nullable: shape.nullable,
10674                    });
10675                } else {
10676                    let output_name = alias.clone().unwrap_or_else(|| expr.to_string());
10677                    out.push(ProjectedItem {
10678                        expr: expr.clone(),
10679                        output_name,
10680                        ty: DataType::Text,
10681                        nullable: true,
10682                    });
10683                }
10684            }
10685        }
10686    }
10687    Ok(out)
10688}
10689
10690/// Promote an integer to a NUMERIC value at the requested scale.
10691/// Rejects values that, after scaling, would overflow the column's
10692/// precision budget.
10693fn numeric_from_integer(
10694    n: i128,
10695    precision: u8,
10696    scale: u8,
10697    col_name: &str,
10698) -> Result<Value, EngineError> {
10699    let factor = pow10_i128(scale);
10700    let scaled = n.checked_mul(factor).ok_or_else(|| {
10701        EngineError::Unsupported(alloc::format!(
10702            "integer overflow scaling value for column `{col_name}` to scale {scale}"
10703        ))
10704    })?;
10705    check_precision(scaled, precision, col_name)?;
10706    Ok(Value::Numeric { scaled, scale })
10707}
10708
10709/// Float → NUMERIC. Uses round-half-away-from-zero on `x * 10^scale`,
10710/// then verifies the result fits the column's precision.
10711#[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)]
10712fn numeric_from_float(
10713    x: f64,
10714    precision: u8,
10715    scale: u8,
10716    col_name: &str,
10717) -> Result<Value, EngineError> {
10718    if !x.is_finite() {
10719        return Err(EngineError::Unsupported(alloc::format!(
10720            "cannot store non-finite float in NUMERIC column `{col_name}`"
10721        )));
10722    }
10723    let mut factor = 1.0_f64;
10724    for _ in 0..scale {
10725        factor *= 10.0;
10726    }
10727    // Round half-away-from-zero by biasing then casting (`as i128`
10728    // truncates toward zero, so the bias + truncation gives the
10729    // desired rounding). `f64::floor` / `ceil` live in std; we don't
10730    // need them — the cast handles the truncation step.
10731    let shifted = x * factor;
10732    let biased = if shifted >= 0.0 {
10733        shifted + 0.5
10734    } else {
10735        shifted - 0.5
10736    };
10737    // Range-check before casting back to i128 — the cast itself is
10738    // saturating in Rust, which would silently truncate huge inputs.
10739    if !(-1e38..=1e38).contains(&biased) {
10740        return Err(EngineError::Unsupported(alloc::format!(
10741            "value {x} overflows NUMERIC range for column `{col_name}`"
10742        )));
10743    }
10744    let scaled = biased as i128;
10745    check_precision(scaled, precision, col_name)?;
10746    Ok(Value::Numeric { scaled, scale })
10747}
10748
10749/// v7.17.0 Phase 3.P0-67 — parse PG-canonical decimal text into
10750/// `(mantissa: i128, source_scale: u8)`. Accepts optional sign,
10751/// optional integer part, optional fractional part. Rejects
10752/// scientific notation, embedded spaces, locale-specific
10753/// thousand separators. Returns None on bad input — coerce_value
10754/// turns that into a TypeMismatch error.
10755fn parse_numeric_text(s: &str) -> Option<(i128, u8)> {
10756    let s = s.trim();
10757    if s.is_empty() {
10758        return None;
10759    }
10760    let (negative, rest) = match s.as_bytes()[0] {
10761        b'-' => (true, &s[1..]),
10762        b'+' => (false, &s[1..]),
10763        _ => (false, s),
10764    };
10765    if rest.is_empty() {
10766        return None;
10767    }
10768    // Reject scientific notation — bigdecimal collapses it before
10769    // hitting the wire, and we want a clear error if a stray `e`
10770    // sneaks in.
10771    if rest.bytes().any(|b| b == b'e' || b == b'E') {
10772        return None;
10773    }
10774    let (int_part, frac_part) = match rest.find('.') {
10775        Some(idx) => (&rest[..idx], &rest[idx + 1..]),
10776        None => (rest, ""),
10777    };
10778    if int_part.is_empty() && frac_part.is_empty() {
10779        return None;
10780    }
10781    if int_part.bytes().any(|b| !b.is_ascii_digit()) {
10782        return None;
10783    }
10784    if frac_part.bytes().any(|b| !b.is_ascii_digit()) {
10785        return None;
10786    }
10787    let scale_u32 = u32::try_from(frac_part.len()).ok()?;
10788    if scale_u32 > u32::from(u8::MAX) {
10789        return None;
10790    }
10791    let scale = scale_u32 as u8;
10792    let mut digits = alloc::string::String::with_capacity(int_part.len() + frac_part.len() + 1);
10793    if negative {
10794        digits.push('-');
10795    }
10796    digits.push_str(int_part);
10797    digits.push_str(frac_part);
10798    // Strip a leading "+0..0" so parse doesn't choke on "00" etc.
10799    let digits = if digits == "-" {
10800        return None;
10801    } else if digits.is_empty() {
10802        "0"
10803    } else {
10804        digits.as_str()
10805    };
10806    let mantissa: i128 = digits.parse().ok()?;
10807    Some((mantissa, scale))
10808}
10809
10810/// Move a Numeric value from `src_scale` to `dst_scale`. Going up
10811/// multiplies by 10; going down rounds half-away-from-zero.
10812fn numeric_rescale(
10813    scaled: i128,
10814    src_scale: u8,
10815    precision: u8,
10816    dst_scale: u8,
10817    col_name: &str,
10818) -> Result<Value, EngineError> {
10819    let new_scaled = if dst_scale >= src_scale {
10820        let bump = pow10_i128(dst_scale - src_scale);
10821        scaled.checked_mul(bump).ok_or_else(|| {
10822            EngineError::Unsupported(alloc::format!(
10823                "overflow rescaling NUMERIC for column `{col_name}`"
10824            ))
10825        })?
10826    } else {
10827        let drop = pow10_i128(src_scale - dst_scale);
10828        let half = drop / 2;
10829        if scaled >= 0 {
10830            (scaled + half) / drop
10831        } else {
10832            (scaled - half) / drop
10833        }
10834    };
10835    check_precision(new_scaled, precision, col_name)?;
10836    Ok(Value::Numeric {
10837        scaled: new_scaled,
10838        scale: dst_scale,
10839    })
10840}
10841
10842/// Drop the fractional part of a scaled integer, returning the integer
10843/// portion (toward zero). Used for NUMERIC → INT casts.
10844const fn numeric_truncate_to_integer(scaled: i128, scale: u8) -> i128 {
10845    if scale == 0 {
10846        return scaled;
10847    }
10848    let factor = pow10_i128_const(scale);
10849    scaled / factor
10850}
10851
10852/// Verify a scaled NUMERIC value fits the column's declared precision.
10853/// `precision == 0` is the "unconstrained" form (bare `NUMERIC`); we
10854/// skip the check there.
10855fn check_precision(scaled: i128, precision: u8, col_name: &str) -> Result<(), EngineError> {
10856    if precision == 0 {
10857        return Ok(());
10858    }
10859    let limit = pow10_i128(precision);
10860    if scaled.unsigned_abs() >= limit.unsigned_abs() {
10861        return Err(EngineError::Unsupported(alloc::format!(
10862            "NUMERIC value exceeds precision {precision} for column `{col_name}`"
10863        )));
10864    }
10865    Ok(())
10866}
10867
10868const fn pow10_i128_const(p: u8) -> i128 {
10869    let mut acc: i128 = 1;
10870    let mut i = 0;
10871    while i < p {
10872        acc *= 10;
10873        i += 1;
10874    }
10875    acc
10876}
10877
10878fn pow10_i128(p: u8) -> i128 {
10879    pow10_i128_const(p)
10880}
10881
10882/// Walk a parsed `Statement`, swapping any `NOW()` /
10883/// `CURRENT_TIMESTAMP()` / `CURRENT_DATE()` function calls for a
10884/// literal cast that wraps the engine's per-statement clock reading.
10885/// When `now_micros` is `None`, calls stay as-is and surface as
10886/// `unknown function` at eval time — keeps the error path explicit.
10887/// v4.10: pre-walk the WHERE / projection / etc. of a SELECT and
10888/// replace every subquery node with a materialised literal. SPG
10889/// only supports uncorrelated subqueries — the inner SELECT does
10890/// not see outer-row columns, so the result is the same for every
10891/// outer row and can be evaluated once.
10892///
10893/// Returns the rewritten statement; the caller passes this to the
10894/// regular row-loop executor which no longer sees Subquery nodes
10895/// in its tree.
10896impl Engine {
10897    /// v4.12 window executor. Implements `ROW_NUMBER` / `RANK` /
10898    /// `DENSE_RANK` and the partition-aware aggregates `SUM` /
10899    /// `AVG` / `COUNT` / `MIN` / `MAX`. The plan is:
10900    /// 1. Apply the WHERE filter.
10901    /// 2. For each unique `WindowFunction` node in the projection,
10902    ///    partition + sort, compute the per-row value.
10903    /// 3. Append the window values as synthetic columns (`__win_N`)
10904    ///    to the row schema.
10905    /// 4. Rewrite the projection to read those columns.
10906    /// 5. Hand off to the regular project / ORDER BY / LIMIT pipe.
10907    #[allow(
10908        clippy::too_many_lines,
10909        clippy::type_complexity,
10910        clippy::needless_range_loop
10911    )] // window-eval is one cohesive pipe; splitting fragments
10912    fn exec_select_with_window(
10913        &self,
10914        stmt: &SelectStatement,
10915        cancel: CancelToken<'_>,
10916    ) -> Result<QueryResult, EngineError> {
10917        let from = stmt.from.as_ref().ok_or_else(|| {
10918            EngineError::Unsupported("window functions require a FROM clause".into())
10919        })?;
10920        // v7.17.0 Phase 3.P0-43 — JOIN + window functions. Phase
10921        // 3.6 rejected this combination outright ("queued for
10922        // v5.x"); P0-43 materialises the join + WHERE through the
10923        // existing nested-loop helper and runs the window pipeline
10924        // on the joined row set with the combined `alias.col`
10925        // schema. The window expressions resolve through the
10926        // qualifier-aware column resolver same as the aggregate /
10927        // projection paths on JOIN.
10928        let (schema_cols_owned, alias_opt): (Vec<ColumnSchema>, Option<&str>);
10929        let filtered: Vec<Row>;
10930        if from.joins.is_empty() {
10931            let primary = &from.primary;
10932            let table = self.active_catalog().get(&primary.name).ok_or_else(|| {
10933                StorageError::TableNotFound {
10934                    name: primary.name.clone(),
10935                }
10936            })?;
10937            let alias = primary.alias.as_deref().unwrap_or(primary.name.as_str());
10938            schema_cols_owned = table.schema().columns.clone();
10939            alias_opt = Some(alias);
10940            // Materialise WHERE-filtered rows owned so the JOIN
10941            // and single-table paths share a single downstream
10942            // shape. The clone is cheap relative to the window
10943            // computation that follows.
10944            let ctx = self.ev_ctx(&schema_cols_owned, alias_opt);
10945            let mut owned: Vec<Row> = Vec::new();
10946            for (i, row) in table.rows().iter().enumerate() {
10947                if i.is_multiple_of(256) {
10948                    cancel.check()?;
10949                }
10950                if let Some(w) = &stmt.where_ {
10951                    let cond = eval::eval_expr(w, row, &ctx)?;
10952                    if !matches!(cond, Value::Bool(true)) {
10953                        continue;
10954                    }
10955                }
10956                owned.push(row.clone());
10957            }
10958            filtered = owned;
10959        } else {
10960            let (combined_schema, rows) = self.build_joined_filtered_rows(
10961                from,
10962                stmt.where_.as_ref(),
10963                cancel,
10964                None,
10965                &mut ByteBudget::new(self.max_query_bytes),
10966            )?;
10967            schema_cols_owned = combined_schema;
10968            alias_opt = None;
10969            filtered = rows;
10970        }
10971        let schema_cols = &schema_cols_owned;
10972        let ctx = self.ev_ctx(schema_cols, alias_opt);
10973        let alias = alias_opt.unwrap_or("");
10974        let n_rows = filtered.len();
10975        // Borrow refs into the owned row vec once so the downstream
10976        // `compute_window_partition` call (which takes `&[&Row]`) and
10977        // the per-row eval loops share a single backing buffer.
10978        let filtered_refs: Vec<&Row> = filtered.iter().collect();
10979
10980        // 2) Collect unique window function nodes from projection.
10981        let mut window_nodes: Vec<Expr> = Vec::new();
10982        for item in &stmt.items {
10983            if let SelectItem::Expr { expr, .. } = item {
10984                collect_window_nodes(expr, &mut window_nodes);
10985            }
10986        }
10987
10988        // 3) For each window, compute per-row value.
10989        // Index: same order as window_nodes; for row i, win_vals[w][i].
10990        let mut win_vals: Vec<Vec<Value>> = Vec::with_capacity(window_nodes.len());
10991        for wnode in &window_nodes {
10992            let Expr::WindowFunction {
10993                name,
10994                args,
10995                partition_by,
10996                order_by,
10997                frame,
10998                null_treatment,
10999            } = wnode
11000            else {
11001                unreachable!("collect_window_nodes pushes only WindowFunction");
11002            };
11003            // Compute (partition_key, order_key, original_index) for each row.
11004            let mut indexed: Vec<(Vec<Value>, Vec<(Value, bool, Option<bool>)>, usize)> =
11005                Vec::with_capacity(n_rows);
11006            for (i, row) in filtered.iter().enumerate() {
11007                let pkey: Vec<Value> = partition_by
11008                    .iter()
11009                    .map(|p| eval::eval_expr(p, row, &ctx))
11010                    .collect::<Result<_, _>>()?;
11011                let okey: Vec<(Value, bool, Option<bool>)> = order_by
11012                    .iter()
11013                    .map(|(e, desc, nf)| eval::eval_expr(e, row, &ctx).map(|v| (v, *desc, *nf)))
11014                    .collect::<Result<_, _>>()?;
11015                indexed.push((pkey, okey, i));
11016            }
11017            // Sort by (partition_key, order_key). Partition key uses
11018            // a stable encoded form; order key respects ASC/DESC.
11019            indexed.sort_by(|a, b| {
11020                let p_cmp = partition_key_cmp(&a.0, &b.0);
11021                if p_cmp != core::cmp::Ordering::Equal {
11022                    return p_cmp;
11023                }
11024                order_key_cmp(&a.1, &b.1)
11025            });
11026            // Per-partition compute.
11027            let mut out_vals: Vec<Value> = alloc::vec![Value::Null; n_rows];
11028            let mut p_start = 0;
11029            while p_start < indexed.len() {
11030                let mut p_end = p_start + 1;
11031                while p_end < indexed.len()
11032                    && partition_key_cmp(&indexed[p_start].0, &indexed[p_end].0)
11033                        == core::cmp::Ordering::Equal
11034                {
11035                    p_end += 1;
11036                }
11037                // Compute the function within this partition slice.
11038                compute_window_partition(
11039                    name,
11040                    args,
11041                    !order_by.is_empty(),
11042                    frame.as_ref(),
11043                    *null_treatment,
11044                    &indexed[p_start..p_end],
11045                    &filtered_refs,
11046                    &ctx,
11047                    &mut out_vals,
11048                )?;
11049                p_start = p_end;
11050            }
11051            win_vals.push(out_vals);
11052        }
11053
11054        // 4) Build extended schema: original columns + synthetic.
11055        let mut ext_cols = schema_cols.clone();
11056        for i in 0..window_nodes.len() {
11057            ext_cols.push(ColumnSchema::new(
11058                alloc::format!("__win_{i}"),
11059                DataType::Text, // type doesn't matter for projection eval
11060                true,
11061            ));
11062        }
11063        // 5) Build extended rows: each row gets its window values appended.
11064        let mut ext_rows: Vec<Row> = Vec::with_capacity(n_rows);
11065        for i in 0..n_rows {
11066            let mut values = filtered[i].values.clone();
11067            for w in 0..window_nodes.len() {
11068                values.push(win_vals[w][i].clone());
11069            }
11070            ext_rows.push(Row::new(values));
11071        }
11072        // 6) Rewrite the projection: WindowFunction nodes → Column(__win_N).
11073        let mut rewritten_items: Vec<SelectItem> = Vec::with_capacity(stmt.items.len());
11074        for item in &stmt.items {
11075            let new_item = match item {
11076                SelectItem::Wildcard => SelectItem::Wildcard,
11077                SelectItem::Expr { expr, alias } => {
11078                    let mut e = expr.clone();
11079                    rewrite_window_to_columns(&mut e, &window_nodes);
11080                    SelectItem::Expr {
11081                        expr: e,
11082                        alias: alias.clone(),
11083                    }
11084                }
11085            };
11086            rewritten_items.push(new_item);
11087        }
11088
11089        // 7) Project into final rows. JOIN case uses None so the
11090        // qualifier check in `resolve_column` falls through to the
11091        // composite `alias.col` schema lookup; single-table case
11092        // keeps the bare alias so `bare_col` resolution still
11093        // works for the projection's per-row column references.
11094        let ext_ctx = EvalContext::new(&ext_cols, alias_opt);
11095        let projection = build_projection(&rewritten_items, &ext_cols, alias)?;
11096        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::with_capacity(n_rows);
11097        for (i, row) in ext_rows.iter().enumerate() {
11098            if i.is_multiple_of(256) {
11099                cancel.check()?;
11100            }
11101            let mut values = Vec::with_capacity(projection.len());
11102            for p in &projection {
11103                values.push(eval::eval_expr(&p.expr, row, &ext_ctx)?);
11104            }
11105            let order_keys = if stmt.order_by.is_empty() {
11106                Vec::new()
11107            } else {
11108                let mut keys = Vec::with_capacity(stmt.order_by.len());
11109                for o in &stmt.order_by {
11110                    let mut e = o.expr.clone();
11111                    rewrite_window_to_columns(&mut e, &window_nodes);
11112                    let key = eval::eval_expr(&e, row, &ext_ctx)?;
11113                    keys.push(value_to_order_key(&key)?);
11114                }
11115                keys
11116            };
11117            tagged.push((order_keys, Row::new(values)));
11118        }
11119        // ORDER BY + LIMIT/OFFSET on the projected rows.
11120        if !stmt.order_by.is_empty() {
11121            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
11122            sort_by_keys(&mut tagged, &descs);
11123        }
11124        let mut out_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
11125        apply_offset_and_limit(&mut out_rows, stmt.offset_literal(), stmt.limit_literal());
11126        let final_cols: Vec<ColumnSchema> = projection
11127            .into_iter()
11128            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
11129            .collect();
11130        Ok(QueryResult::Rows {
11131            columns: final_cols,
11132            rows: out_rows,
11133        })
11134    }
11135
11136    /// v4.11: materialise each CTE into a temp table inside a
11137    /// cloned catalog, then run the body SELECT against a fresh
11138    /// engine instance that owns the enriched catalog. The clone
11139    /// is moderately expensive — only paid by CTE-bearing queries.
11140    /// Subqueries inside CTE bodies / the main body resolve as
11141    /// usual; `clock_fn` is propagated so `NOW()` lines up.
11142    /// v7.16.2 — mailrs round-10 A.3. Materialise the
11143    /// `information_schema.*` / `pg_catalog.*` virtual views
11144    /// the SELECT references, then re-execute the SELECT
11145    /// against an enriched catalog where those views are real
11146    /// tables. Same pattern as `exec_with_ctes`. The temp
11147    /// engine carries `meta_views_materialised = true` so its
11148    /// own meta-dispatch short-circuits — without that we'd
11149    /// infinite-recurse since the temp catalog's view name
11150    /// still starts with `__spg_info_` and re-triggers the
11151    /// check.
11152    fn exec_select_with_meta_views(
11153        &self,
11154        stmt: &SelectStatement,
11155        cancel: CancelToken<'_>,
11156    ) -> Result<QueryResult, EngineError> {
11157        let mut needed: alloc::collections::BTreeSet<String> = alloc::collections::BTreeSet::new();
11158        collect_meta_view_names(stmt, &mut needed);
11159        let mut catalog = self.active_catalog().clone();
11160        for view in &needed {
11161            if catalog.get(view).is_some() {
11162                continue;
11163            }
11164            match view.as_str() {
11165                "__spg_info_columns" => {
11166                    let (schema, rows) = synth_information_schema_columns(self.active_catalog());
11167                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11168                }
11169                "__spg_info_tables" => {
11170                    let (schema, rows) = synth_information_schema_tables(self.active_catalog());
11171                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11172                }
11173                "__spg_pg_class" => {
11174                    let (schema, rows) = synth_pg_class(self.active_catalog());
11175                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11176                }
11177                "__spg_pg_attribute" => {
11178                    let (schema, rows) = synth_pg_attribute(self.active_catalog());
11179                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11180                }
11181                // v7.17.0 Phase 3.P0-50 — pg_catalog.pg_type for
11182                // sqlx / SQLAlchemy / Diesel / pgAdmin lookups.
11183                "__spg_pg_type" => {
11184                    let (schema, rows) = synth_pg_type(self.active_catalog());
11185                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11186                }
11187                // v7.17.0 Phase 3.P0-51 — pg_catalog.pg_proc for
11188                // function-name introspection (ORM / pgAdmin).
11189                "__spg_pg_proc" => {
11190                    let (schema, rows) = synth_pg_proc(self.active_catalog());
11191                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11192                }
11193                // v7.24 (round-16 D) — pg_catalog.pg_trigger. The
11194                // round-16 "why doesn't prod fire the trigger"
11195                // question was unanswerable because triggers had NO
11196                // introspection surface; tgname/tgenabled plus the
11197                // pragmatic relname/timing/events/function columns
11198                // make "is it registered and enabled" a one-liner.
11199                "__spg_pg_trigger" => {
11200                    let (schema, rows) = synth_pg_trigger(self.active_catalog());
11201                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11202                }
11203                // v7.17.0 Phase 3.P0-52 — pg_catalog.pg_namespace
11204                // (schema list for admin tools' tree views).
11205                "__spg_pg_namespace" => {
11206                    let (schema, rows) = synth_pg_namespace(self.active_catalog());
11207                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11208                }
11209                // v7.17.0 Phase 3.P0-53 — pg_catalog.pg_indexes view
11210                // for pgAdmin / DataGrip "indexes per table" listings.
11211                "__spg_pg_indexes" => {
11212                    let (schema, rows) = synth_pg_indexes(self.active_catalog());
11213                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11214                }
11215                // v7.17.0 Phase 3.P0-53 — pg_catalog.pg_index (raw)
11216                // for index introspection by ORM compilers.
11217                "__spg_pg_index" => {
11218                    let (schema, rows) = synth_pg_index_raw(self.active_catalog());
11219                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11220                }
11221                // v7.17.0 Phase 3.P0-54 — pg_catalog.pg_constraint
11222                // for FK / UNIQUE / PK / CHECK introspection.
11223                "__spg_pg_constraint" => {
11224                    let (schema, rows) = synth_pg_constraint(self.active_catalog());
11225                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11226                }
11227                // v7.17.0 Phase 3.P0-55 — pg_catalog.pg_database /
11228                // pg_roles / pg_user. SPG is single-database so
11229                // pg_database surfaces just `postgres`; pg_roles
11230                // / pg_user walk the engine's UserStore.
11231                "__spg_pg_database" => {
11232                    let (schema, rows) = synth_pg_database(self.active_catalog());
11233                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11234                }
11235                "__spg_pg_roles" | "__spg_pg_user" => {
11236                    let (schema, rows) = synth_pg_roles(self);
11237                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11238                }
11239                // v7.17.0 Phase 3.P0-56 — pg_catalog.pg_views. PG's
11240                // pg_views surfaces every CREATE VIEW result; SPG
11241                // ships one row per declared view from the catalog.
11242                "__spg_pg_views" => {
11243                    let (schema, rows) = synth_pg_views(self.active_catalog());
11244                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11245                }
11246                // v7.17.0 Phase 3.P0-56 — pg_catalog.pg_matviews.
11247                // SPG has no materialised view surface yet so the
11248                // table shares pg_views's schema but stays empty.
11249                "__spg_pg_matviews" => {
11250                    let (schema, _) = synth_pg_views(self.active_catalog());
11251                    materialise_meta_view(&mut catalog, view, schema, Vec::new())?;
11252                }
11253                // pg_catalog.pg_extension — native capability list
11254                // (mailrs embed round-12).
11255                "__spg_pg_extension" => {
11256                    let (schema, rows) = synth_pg_extension();
11257                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11258                }
11259                // v7.17.0 Phase 3.P0-57 — pg_catalog.pg_settings.
11260                "__spg_pg_settings" => {
11261                    let (schema, rows) = synth_pg_settings(self);
11262                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11263                }
11264                // v7.17.0 Phase 3.P0-63 — information_schema.KEY_COLUMN_USAGE.
11265                "__spg_info_key_column_usage" => {
11266                    let (schema, rows) = synth_info_key_column_usage(self.active_catalog());
11267                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11268                }
11269                // v7.17.0 Phase 3.P0-64 — information_schema.REFERENTIAL_CONSTRAINTS.
11270                "__spg_info_referential_constraints" => {
11271                    let (schema, rows) = synth_info_referential_constraints(self.active_catalog());
11272                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11273                }
11274                // v7.17.0 Phase 3.P0-64 — information_schema.STATISTICS.
11275                "__spg_info_statistics" => {
11276                    let (schema, rows) = synth_info_statistics(self.active_catalog());
11277                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11278                }
11279                // v7.17.0 Phase 3.P0-64 — information_schema.ROUTINES.
11280                "__spg_info_routines" => {
11281                    let (schema, rows) = synth_info_routines();
11282                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11283                }
11284                // v7.17.0 Phase 3.P0-65 — mysql.user / mysql.db.
11285                "__spg_mysql_user" => {
11286                    let (schema, rows) = synth_mysql_user(self);
11287                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11288                }
11289                "__spg_mysql_db" => {
11290                    let (schema, rows) = synth_mysql_db();
11291                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11292                }
11293                _ => {
11294                    return Err(EngineError::Unsupported(alloc::format!(
11295                        "meta view {view:?} is not yet materialisable; \
11296                         v7.16.2 covers information_schema.columns / .tables \
11297                         and pg_catalog.pg_class / pg_attribute; \
11298                         v7.17.0 P0-50..P0-57 add pg_type / pg_proc / pg_namespace / \
11299                         pg_indexes / pg_index / pg_constraint / pg_database / pg_roles / \
11300                         pg_user / pg_views / pg_matviews / pg_settings"
11301                    )));
11302                }
11303            }
11304        }
11305        let mut temp = Engine::restore(catalog);
11306        if let Some(c) = self.clock {
11307            temp = temp.with_clock(c);
11308        }
11309        if let Some(f) = self.salt_fn {
11310            temp = temp.with_salt_fn(f);
11311        }
11312        temp.meta_views_materialised = true;
11313        temp.exec_select_cancel(stmt, cancel)
11314    }
11315
11316    fn exec_with_ctes(
11317        &self,
11318        stmt: &SelectStatement,
11319        cancel: CancelToken<'_>,
11320    ) -> Result<QueryResult, EngineError> {
11321        cancel.check()?;
11322        let mut catalog = self.active_catalog().clone();
11323        for cte in &stmt.ctes {
11324            if catalog.get(&cte.name).is_some() {
11325                return Err(EngineError::Unsupported(alloc::format!(
11326                    "CTE name {:?} shadows an existing table; rename the CTE",
11327                    cte.name
11328                )));
11329            }
11330            let (columns, rows) = if cte.recursive {
11331                self.materialise_recursive_cte(cte, &catalog, cancel)?
11332            } else {
11333                // v7.25 (round-17) — run the body against the
11334                // ACCUMULATED catalog so a CTE can reference every
11335                // CTE declared before it (`WITH a AS (…), b AS
11336                // (SELECT … FROM a)`). Executing on `self` lost the
11337                // already-materialised CTE tables.
11338                let mut cte_engine = Engine::restore(catalog.clone());
11339                if let Some(c) = self.clock {
11340                    cte_engine = cte_engine.with_clock(c);
11341                }
11342                if let Some(f) = self.salt_fn {
11343                    cte_engine = cte_engine.with_salt_fn(f);
11344                }
11345                let body_result = cte_engine.exec_select_cancel(&cte.body, cancel)?;
11346                let QueryResult::Rows { columns, rows } = body_result else {
11347                    return Err(EngineError::Unsupported(alloc::format!(
11348                        "CTE {:?} body did not return rows",
11349                        cte.name
11350                    )));
11351                };
11352                (columns, rows)
11353            };
11354            // v4.22: the projection builder labels any non-column
11355            // expression as Text — including literal SELECT 1.
11356            // Promote each column's type to whatever the rows
11357            // actually carry so the CTE storage table accepts them.
11358            let inferred = infer_column_types(&columns, &rows);
11359            let mut columns = inferred;
11360            // v4.22: apply optional `WITH name(a, b, c)` overrides.
11361            if !cte.column_overrides.is_empty() {
11362                if cte.column_overrides.len() != columns.len() {
11363                    return Err(EngineError::Unsupported(alloc::format!(
11364                        "CTE {:?} column list has {} names but body returns {} columns",
11365                        cte.name,
11366                        cte.column_overrides.len(),
11367                        columns.len()
11368                    )));
11369                }
11370                for (col, name) in columns.iter_mut().zip(cte.column_overrides.iter()) {
11371                    col.name.clone_from(name);
11372                }
11373            }
11374            let schema = TableSchema::new(cte.name.clone(), columns);
11375            catalog.create_table(schema).map_err(EngineError::Storage)?;
11376            let table = catalog
11377                .get_mut(&cte.name)
11378                .expect("just-created CTE table must exist");
11379            for row in rows {
11380                table.insert(row).map_err(EngineError::Storage)?;
11381            }
11382        }
11383        // Strip CTEs from the body before running on the temp engine
11384        // so we don't recurse forever.
11385        let mut body = stmt.clone();
11386        body.ctes = Vec::new();
11387        let mut temp = Engine::restore(catalog);
11388        if let Some(c) = self.clock {
11389            temp = temp.with_clock(c);
11390        }
11391        if let Some(f) = self.salt_fn {
11392            temp = temp.with_salt_fn(f);
11393        }
11394        temp.exec_select_cancel(&body, cancel)
11395    }
11396
11397    /// v4.22: materialise a WITH RECURSIVE CTE. The body must be a
11398    /// UNION (or UNION ALL) of an anchor that does not reference
11399    /// the CTE name, and one or more recursive terms that do. The
11400    /// anchor runs first; each subsequent iteration runs the
11401    /// recursive term against a temp catalog where the CTE name is
11402    /// bound to the *previous* iteration's output. Iteration stops
11403    /// when the recursive term yields no rows; UNION (DISTINCT)
11404    /// deduplicates against the accumulated result, UNION ALL does
11405    /// not. A hard cap on total rows prevents runaway queries.
11406    #[allow(clippy::too_many_lines)]
11407    fn materialise_recursive_cte(
11408        &self,
11409        cte: &spg_sql::ast::Cte,
11410        base_catalog: &Catalog,
11411        cancel: CancelToken<'_>,
11412    ) -> Result<(Vec<ColumnSchema>, Vec<Row>), EngineError> {
11413        const MAX_TOTAL_ROWS: usize = 1_000_000;
11414        const MAX_ITERATIONS: usize = 100_000;
11415        cancel.check()?;
11416        if cte.body.unions.is_empty() {
11417            return Err(EngineError::Unsupported(alloc::format!(
11418                "WITH RECURSIVE {:?} body must be a UNION of an anchor and a recursive term",
11419                cte.name
11420            )));
11421        }
11422        // Anchor: the body's leading SELECT, with unions stripped.
11423        let mut anchor = cte.body.clone();
11424        let union_terms = core::mem::take(&mut anchor.unions);
11425        anchor.ctes = Vec::new();
11426        // Anchor must not reference the CTE name.
11427        if select_refers_to(&anchor, &cte.name) {
11428            return Err(EngineError::Unsupported(alloc::format!(
11429                "WITH RECURSIVE {:?}: the anchor must not reference the CTE itself",
11430                cte.name
11431            )));
11432        }
11433        let anchor_result = self.exec_select_cancel(&anchor, cancel)?;
11434        let QueryResult::Rows {
11435            columns: anchor_cols,
11436            rows: anchor_rows,
11437        } = anchor_result
11438        else {
11439            return Err(EngineError::Unsupported(alloc::format!(
11440                "WITH RECURSIVE {:?}: anchor did not return rows",
11441                cte.name
11442            )));
11443        };
11444        // The projection builder labels non-column expressions Text;
11445        // refine column types from the anchor's actual values so the
11446        // intermediate iter-catalog tables accept them.
11447        let mut columns = infer_column_types(&anchor_cols, &anchor_rows);
11448        if !cte.column_overrides.is_empty() {
11449            if cte.column_overrides.len() != columns.len() {
11450                return Err(EngineError::Unsupported(alloc::format!(
11451                    "CTE {:?} column list has {} names but anchor returns {} columns",
11452                    cte.name,
11453                    cte.column_overrides.len(),
11454                    columns.len()
11455                )));
11456            }
11457            for (col, name) in columns.iter_mut().zip(cte.column_overrides.iter()) {
11458                col.name.clone_from(name);
11459            }
11460        }
11461        let mut all_rows: Vec<Row> = anchor_rows.clone();
11462        let mut working_set: Vec<Row> = anchor_rows;
11463        let mut seen: alloc::collections::BTreeSet<Vec<u8>> = alloc::collections::BTreeSet::new();
11464        // Track at least one "all UNION ALL" flag — if every union
11465        // kind is ALL we skip the dedup step (faster + matches PG).
11466        let all_union_all = union_terms.iter().all(|(k, _)| matches!(k, UnionKind::All));
11467        if !all_union_all {
11468            for r in &all_rows {
11469                seen.insert(encode_row_key(r));
11470            }
11471        }
11472        for iter in 0..MAX_ITERATIONS {
11473            cancel.check()?;
11474            if working_set.is_empty() {
11475                break;
11476            }
11477            // Build a fresh catalog: base + CTE bound to working_set.
11478            let mut iter_catalog = base_catalog.clone();
11479            let schema = TableSchema::new(cte.name.clone(), columns.clone());
11480            iter_catalog
11481                .create_table(schema)
11482                .map_err(EngineError::Storage)?;
11483            {
11484                let table = iter_catalog.get_mut(&cte.name).expect("just-created");
11485                for row in &working_set {
11486                    table.insert(row.clone()).map_err(EngineError::Storage)?;
11487                }
11488            }
11489            let mut iter_engine = Engine::restore(iter_catalog);
11490            if let Some(c) = self.clock {
11491                iter_engine = iter_engine.with_clock(c);
11492            }
11493            if let Some(f) = self.salt_fn {
11494                iter_engine = iter_engine.with_salt_fn(f);
11495            }
11496            // Run each recursive term in sequence and collect new rows.
11497            let mut next_set: Vec<Row> = Vec::new();
11498            for (_, term) in &union_terms {
11499                let mut term = term.clone();
11500                term.ctes = Vec::new();
11501                let r = iter_engine.exec_select_cancel(&term, cancel)?;
11502                let QueryResult::Rows {
11503                    columns: rc,
11504                    rows: rs,
11505                } = r
11506                else {
11507                    return Err(EngineError::Unsupported(alloc::format!(
11508                        "WITH RECURSIVE {:?}: recursive term did not return rows",
11509                        cte.name
11510                    )));
11511                };
11512                if rc.len() != columns.len() {
11513                    return Err(EngineError::Unsupported(alloc::format!(
11514                        "WITH RECURSIVE {:?}: column count of recursive term ({}) does not match anchor ({})",
11515                        cte.name,
11516                        rc.len(),
11517                        columns.len()
11518                    )));
11519                }
11520                for row in rs {
11521                    if !all_union_all {
11522                        let key = encode_row_key(&row);
11523                        if !seen.insert(key) {
11524                            continue;
11525                        }
11526                    }
11527                    next_set.push(row);
11528                }
11529            }
11530            if next_set.is_empty() {
11531                break;
11532            }
11533            all_rows.extend(next_set.iter().cloned());
11534            working_set = next_set;
11535            if all_rows.len() > MAX_TOTAL_ROWS {
11536                return Err(EngineError::Unsupported(alloc::format!(
11537                    "WITH RECURSIVE {:?}: produced more than {MAX_TOTAL_ROWS} rows — likely runaway recursion",
11538                    cte.name
11539                )));
11540            }
11541            if iter + 1 == MAX_ITERATIONS {
11542                return Err(EngineError::Unsupported(alloc::format!(
11543                    "WITH RECURSIVE {:?}: exceeded {MAX_ITERATIONS} iterations",
11544                    cte.name
11545                )));
11546            }
11547        }
11548        Ok((columns, all_rows))
11549    }
11550
11551    fn resolve_select_subqueries(
11552        &self,
11553        stmt: &mut SelectStatement,
11554        cancel: CancelToken<'_>,
11555    ) -> Result<(), EngineError> {
11556        for item in &mut stmt.items {
11557            if let SelectItem::Expr { expr, .. } = item {
11558                self.resolve_expr_subqueries(expr, cancel)?;
11559            }
11560        }
11561        if let Some(w) = &mut stmt.where_ {
11562            self.resolve_expr_subqueries(w, cancel)?;
11563        }
11564        // v7.24.1 — JOIN ON conditions can carry subqueries too;
11565        // they were never walked, so even an UNCORRELATED subquery
11566        // in ON hit "subquery reached row eval".
11567        if let Some(from) = &mut stmt.from {
11568            for j in &mut from.joins {
11569                if let Some(on) = &mut j.on {
11570                    self.resolve_expr_subqueries(on, cancel)?;
11571                }
11572            }
11573        }
11574        if let Some(gs) = &mut stmt.group_by {
11575            for g in gs {
11576                self.resolve_expr_subqueries(g, cancel)?;
11577            }
11578        }
11579        if let Some(h) = &mut stmt.having {
11580            self.resolve_expr_subqueries(h, cancel)?;
11581        }
11582        for o in &mut stmt.order_by {
11583            self.resolve_expr_subqueries(&mut o.expr, cancel)?;
11584        }
11585        for (_, peer) in &mut stmt.unions {
11586            self.resolve_select_subqueries(peer, cancel)?;
11587        }
11588        Ok(())
11589    }
11590
11591    #[allow(clippy::only_used_in_recursion)] // engine handle reads aren't really pure
11592    fn resolve_expr_subqueries(
11593        &self,
11594        e: &mut Expr,
11595        cancel: CancelToken<'_>,
11596    ) -> Result<(), EngineError> {
11597        // Replace-on-this-node cases first.
11598        if let Some(replacement) = self.subquery_replacement(e, cancel)? {
11599            *e = replacement;
11600            return Ok(());
11601        }
11602        match e {
11603            Expr::AggregateOrdered { call, order_by, .. } => {
11604                self.resolve_expr_subqueries(call, cancel)?;
11605                for o in order_by.iter_mut() {
11606                    self.resolve_expr_subqueries(&mut o.expr, cancel)?;
11607                }
11608            }
11609            Expr::Binary { lhs, rhs, .. } => {
11610                self.resolve_expr_subqueries(lhs, cancel)?;
11611                self.resolve_expr_subqueries(rhs, cancel)?;
11612            }
11613            Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
11614                self.resolve_expr_subqueries(expr, cancel)?;
11615            }
11616            Expr::FunctionCall { args, .. } => {
11617                for a in args {
11618                    self.resolve_expr_subqueries(a, cancel)?;
11619                }
11620            }
11621            Expr::Like { expr, pattern, .. } => {
11622                self.resolve_expr_subqueries(expr, cancel)?;
11623                self.resolve_expr_subqueries(pattern, cancel)?;
11624            }
11625            Expr::Extract { source, .. } => self.resolve_expr_subqueries(source, cancel)?,
11626            // v4.12 window functions — recurse into args + ORDER BY
11627            // + PARTITION BY in case they carry inner subqueries.
11628            Expr::WindowFunction {
11629                args,
11630                partition_by,
11631                order_by,
11632                ..
11633            } => {
11634                for a in args {
11635                    self.resolve_expr_subqueries(a, cancel)?;
11636                }
11637                for p in partition_by {
11638                    self.resolve_expr_subqueries(p, cancel)?;
11639                }
11640                for (e, _, _) in order_by {
11641                    self.resolve_expr_subqueries(e, cancel)?;
11642                }
11643            }
11644            // Subquery nodes are handled in subquery_replacement
11645            // (which returned None — defensive no-op); Literal /
11646            // Column are leaves.
11647            Expr::ScalarSubquery(_)
11648            | Expr::Exists { .. }
11649            | Expr::InSubquery { .. }
11650            | Expr::Literal(_)
11651            | Expr::Placeholder(_)
11652            | Expr::Column(_) => {}
11653            // v7.30.2 — list elements can carry scalar subqueries
11654            // (`x IN (1, (SELECT …))`).
11655            Expr::InList { expr, list, .. } => {
11656                self.resolve_expr_subqueries(expr, cancel)?;
11657                for item in list {
11658                    self.resolve_expr_subqueries(item, cancel)?;
11659                }
11660            }
11661            // v7.10.10 — recurse children.
11662            Expr::Array(items) => {
11663                for elem in items {
11664                    self.resolve_expr_subqueries(elem, cancel)?;
11665                }
11666            }
11667            Expr::ArraySubscript { target, index } => {
11668                self.resolve_expr_subqueries(target, cancel)?;
11669                self.resolve_expr_subqueries(index, cancel)?;
11670            }
11671            Expr::AnyAll { expr, array, .. } => {
11672                self.resolve_expr_subqueries(expr, cancel)?;
11673                self.resolve_expr_subqueries(array, cancel)?;
11674            }
11675            Expr::Case {
11676                operand,
11677                branches,
11678                else_branch,
11679            } => {
11680                if let Some(o) = operand {
11681                    self.resolve_expr_subqueries(o, cancel)?;
11682                }
11683                for (w, t) in branches {
11684                    self.resolve_expr_subqueries(w, cancel)?;
11685                    self.resolve_expr_subqueries(t, cancel)?;
11686                }
11687                if let Some(e) = else_branch {
11688                    self.resolve_expr_subqueries(e, cancel)?;
11689                }
11690            }
11691        }
11692        Ok(())
11693    }
11694
11695    /// v4.23: per-row eval that handles correlated subqueries.
11696    /// Equivalent to `eval::eval_expr` when the expression has no
11697    /// subqueries; otherwise clones the expression, substitutes
11698    /// outer-row columns into each surviving subquery node, runs
11699    /// the inner SELECT, and replaces the node with the literal
11700    /// result. Only the WHERE-filter call sites use this path so
11701    /// the uncorrelated fast path is preserved everywhere else.
11702    fn eval_expr_with_correlated(
11703        &self,
11704        expr: &Expr,
11705        row: &Row,
11706        ctx: &EvalContext<'_>,
11707        cancel: CancelToken<'_>,
11708        mut memo: Option<&mut memoize::MemoizeCache>,
11709    ) -> Result<Value, EngineError> {
11710        // v7.30.2 (mailrs round-25) — the has-subquery walk is
11711        // O(tree) and a materialised `IN (…)` list makes the tree
11712        // huge; cache the answer per expression address so the
11713        // per-row dispatch stops re-walking 24k list elements.
11714        let has_subq = if let Some(m) = memo.as_deref_mut() {
11715            let key = core::ptr::from_ref::<Expr>(expr) as usize;
11716            match m.has_subquery.get(&key) {
11717                Some(b) => *b,
11718                None => {
11719                    let b = expr_has_subquery(expr);
11720                    m.has_subquery.insert(key, b);
11721                    b
11722                }
11723            }
11724        } else {
11725            expr_has_subquery(expr)
11726        };
11727        if !has_subq {
11728            // A large materialised `IN (…)` list inside the WHERE
11729            // makes the plain eval O(rows × list); route through the
11730            // per-query membership set (built once, keyed by node
11731            // address) when one is reachable on the AND spine.
11732            if let Some(m) = memo.as_deref_mut()
11733                && expr_may_use_in_set(expr)
11734            {
11735                return eval_with_in_sets(expr, row, ctx, m);
11736            }
11737            return eval::eval_expr(expr, row, ctx).map_err(EngineError::Eval);
11738        }
11739        // v7.29 (3c) - per-expression plan: the batch maps for this
11740        // host expression's scalar subqueries are looked up by the
11741        // expression's ADDRESS (stable across the row loop), so the
11742        // hot path does zero AST formatting. Building the plan (and
11743        // its Display-keyed group maps) happens once per expression.
11744        if let Some(m) = memo.as_deref_mut() {
11745            let key = core::ptr::from_ref::<Expr>(expr) as usize;
11746            // Plan hit: skip the collection walk entirely (it ran
11747            // once per group otherwise - 70k walks per inbox query).
11748            // The memo is per-query and host expressions outlive it,
11749            // so an address that hit once stays valid.
11750            let plan_hit = m.expr_plans.contains_key(&key);
11751            let mut subs: Vec<&SelectStatement> = Vec::new();
11752            if !plan_hit {
11753                collect_scalar_subqueries(expr, &mut subs);
11754            }
11755            if !plan_hit && !subs.is_empty() {
11756                let mut plan: Vec<Option<alloc::rc::Rc<memoize::GroupMap>>> =
11757                    Vec::with_capacity(subs.len());
11758                for sub in &subs {
11759                    let repr = alloc::format!("{sub}");
11760                    if !m.group_maps.contains_key(&repr) {
11761                        let built = self
11762                            .try_batch_correlated_scalar(sub, cancel)?
11763                            .map(alloc::rc::Rc::new);
11764                        m.group_maps.insert(repr.clone(), built);
11765                    }
11766                    plan.push(m.group_maps.get(&repr).cloned().flatten());
11767                }
11768                let mut template = expr.clone();
11769                hollow_scalar_subqueries(&mut template);
11770                m.expr_plans.insert(key, (subs.len(), plan, template));
11771            }
11772            if let Some((_, plan, template)) = m.expr_plans.get(&key)
11773                && !plan.is_empty()
11774                && plan.iter().all(|p| p.is_some())
11775            {
11776                // Fast path: every scalar subquery resolves via its
11777                // map; clone the HOLLOW template (subquery bodies
11778                // emptied at plan time - cloning full subquery ASTs
11779                // per row was the dominant malloc load), splice map
11780                // values, eval. Exists/IN subqueries (if any) still
11781                // drop to the resolver.
11782                let plan = plan.clone();
11783                let mut e = template.clone();
11784                let mut idx = 0usize;
11785                let ok = splice_planned_subqueries(&mut e, &plan, &mut idx, row, ctx)?;
11786                if ok {
11787                    if expr_has_subquery(&e) {
11788                        self.resolve_correlated_in_expr(&mut e, row, ctx, cancel, memo)?;
11789                    }
11790                    return eval::eval_expr(&e, row, ctx).map_err(EngineError::Eval);
11791                }
11792            }
11793        }
11794        let mut e = expr.clone();
11795        self.resolve_correlated_in_expr(&mut e, row, ctx, cancel, memo)?;
11796        eval::eval_expr(&e, row, ctx).map_err(EngineError::Eval)
11797    }
11798
11799    fn resolve_correlated_in_expr(
11800        &self,
11801        e: &mut Expr,
11802        row: &Row,
11803        ctx: &EvalContext<'_>,
11804        cancel: CancelToken<'_>,
11805        mut memo: Option<&mut memoize::MemoizeCache>,
11806    ) -> Result<(), EngineError> {
11807        match e {
11808            Expr::AggregateOrdered { call, order_by, .. } => {
11809                self.resolve_correlated_in_expr(call, row, ctx, cancel, memo.as_deref_mut())?;
11810                for o in order_by.iter_mut() {
11811                    self.resolve_correlated_in_expr(
11812                        &mut o.expr,
11813                        row,
11814                        ctx,
11815                        cancel,
11816                        memo.as_deref_mut(),
11817                    )?;
11818                }
11819            }
11820            Expr::ScalarSubquery(inner) => {
11821                // v7.29 (round-22 phase 3) — batch path first: a
11822                // correlated scalar of the `inner_col = outer_col
11823                // [ORDER BY … LIMIT 1]` shape evaluates ONCE as a
11824                // grouped scan; per-row resolution becomes a map
11825                // lookup. 23.5k per-group executions (~900 ms) became
11826                // one scan + lookups.
11827                if memo.is_some() {
11828                    let repr = alloc::format!("{}", **inner);
11829                    let entry_known = memo
11830                        .as_ref()
11831                        .is_some_and(|m| m.group_maps.contains_key(&repr));
11832                    if !entry_known {
11833                        let built = self
11834                            .try_batch_correlated_scalar(inner, cancel)?
11835                            .map(alloc::rc::Rc::new);
11836                        if let Some(m) = memo.as_deref_mut() {
11837                            m.group_maps.insert(repr.clone(), built);
11838                        }
11839                    }
11840                    if let Some(m) = memo.as_deref_mut()
11841                        && let Some(Some(gm)) = m.group_maps.get(&repr)
11842                    {
11843                        let (outer_col, map) = gm.as_ref();
11844                        let key_v = eval::eval_expr(&Expr::Column(outer_col.clone()), row, ctx)
11845                            .map_err(EngineError::Eval)?;
11846                        let v = if matches!(key_v, Value::Null) {
11847                            Value::Null
11848                        } else {
11849                            map.get(&aggregate::encode_key(core::slice::from_ref(&key_v)))
11850                                .cloned()
11851                                .unwrap_or(Value::Null)
11852                        };
11853                        *e = value_to_literal_expr(v)?;
11854                        return Ok(());
11855                    }
11856                }
11857                // v6.2.6 — Memoize: build the cache key from the
11858                // pre-substitution subquery repr + the outer row's
11859                // values. Two outer rows with identical correlated
11860                // values hit the same entry.
11861                let cache_key = memo.as_ref().map(|_| memoize::CacheKey {
11862                    subquery_repr: alloc::format!("{}", **inner),
11863                    outer_values: row.values.clone(),
11864                });
11865                if let (Some(cache), Some(k)) = (memo.as_deref_mut(), cache_key.as_ref())
11866                    && let Some(cached) = cache.get(k)
11867                {
11868                    *e = value_to_literal_expr(cached)?;
11869                    return Ok(());
11870                }
11871                let mut s = (**inner).clone();
11872                substitute_outer_columns(&mut s, row, ctx);
11873                let r = self.exec_select_cancel(&s, cancel)?;
11874                let QueryResult::Rows { rows, .. } = r else {
11875                    return Err(EngineError::Unsupported(
11876                        "scalar subquery: inner did not return rows".into(),
11877                    ));
11878                };
11879                let value = match rows.as_slice() {
11880                    [] => Value::Null,
11881                    [r0] => r0.values.first().cloned().unwrap_or(Value::Null),
11882                    _ => {
11883                        return Err(EngineError::Unsupported(alloc::format!(
11884                            "scalar subquery returned {} rows; expected 0 or 1",
11885                            rows.len()
11886                        )));
11887                    }
11888                };
11889                if let (Some(cache), Some(k)) = (memo.as_deref_mut(), cache_key) {
11890                    cache.insert(k, value.clone());
11891                }
11892                *e = value_to_literal_expr(value)?;
11893            }
11894            Expr::Exists { subquery, negated } => {
11895                let mut s = (**subquery).clone();
11896                substitute_outer_columns(&mut s, row, ctx);
11897                let r = self.exec_select_cancel(&s, cancel)?;
11898                let exists = matches!(r, QueryResult::Rows { rows, .. } if !rows.is_empty());
11899                let bit = if *negated { !exists } else { exists };
11900                *e = Expr::Literal(Literal::Bool(bit));
11901            }
11902            Expr::InSubquery {
11903                expr: lhs,
11904                subquery,
11905                negated,
11906            } => {
11907                self.resolve_correlated_in_expr(lhs, row, ctx, cancel, memo.as_deref_mut())?;
11908                let lhs_val = eval::eval_expr(lhs, row, ctx).map_err(EngineError::Eval)?;
11909                let mut s = (**subquery).clone();
11910                substitute_outer_columns(&mut s, row, ctx);
11911                let r = self.exec_select_cancel(&s, cancel)?;
11912                let QueryResult::Rows { columns, rows, .. } = r else {
11913                    return Err(EngineError::Unsupported(
11914                        "IN-subquery: inner did not return rows".into(),
11915                    ));
11916                };
11917                if columns.len() != 1 {
11918                    return Err(EngineError::Unsupported(alloc::format!(
11919                        "IN-subquery must project exactly one column; got {}",
11920                        columns.len()
11921                    )));
11922                }
11923                let mut found = false;
11924                let mut any_null = false;
11925                for r0 in rows {
11926                    let v = r0.values.into_iter().next().unwrap_or(Value::Null);
11927                    if v.is_null() {
11928                        any_null = true;
11929                        continue;
11930                    }
11931                    if value_cmp(&v, &lhs_val) == core::cmp::Ordering::Equal {
11932                        found = true;
11933                        break;
11934                    }
11935                }
11936                let bit = if found {
11937                    !*negated
11938                } else if any_null {
11939                    return Err(EngineError::Unsupported(
11940                        "IN-subquery with NULL in result and no match: NULL semantics not yet implemented".into(),
11941                    ));
11942                } else {
11943                    *negated
11944                };
11945                *e = Expr::Literal(Literal::Bool(bit));
11946            }
11947            Expr::Binary { lhs, rhs, .. } => {
11948                self.resolve_correlated_in_expr(lhs, row, ctx, cancel, memo.as_deref_mut())?;
11949                self.resolve_correlated_in_expr(rhs, row, ctx, cancel, memo.as_deref_mut())?;
11950            }
11951            Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
11952                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
11953            }
11954            Expr::Like { expr, pattern, .. } => {
11955                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
11956                self.resolve_correlated_in_expr(pattern, row, ctx, cancel, memo.as_deref_mut())?;
11957            }
11958            Expr::FunctionCall { args, .. } => {
11959                for a in args {
11960                    self.resolve_correlated_in_expr(a, row, ctx, cancel, memo.as_deref_mut())?;
11961                }
11962            }
11963            Expr::Extract { source, .. } => {
11964                self.resolve_correlated_in_expr(source, row, ctx, cancel, memo.as_deref_mut())?;
11965            }
11966            Expr::WindowFunction { .. }
11967            | Expr::Literal(_)
11968            | Expr::Placeholder(_)
11969            | Expr::Column(_) => {}
11970            // v7.10.10 — recurse children.
11971            Expr::Array(items) => {
11972                for elem in items {
11973                    self.resolve_correlated_in_expr(elem, row, ctx, cancel, memo.as_deref_mut())?;
11974                }
11975            }
11976            Expr::ArraySubscript { target, index } => {
11977                self.resolve_correlated_in_expr(target, row, ctx, cancel, memo.as_deref_mut())?;
11978                self.resolve_correlated_in_expr(index, row, ctx, cancel, memo.as_deref_mut())?;
11979            }
11980            Expr::AnyAll { expr, array, .. } => {
11981                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
11982                self.resolve_correlated_in_expr(array, row, ctx, cancel, memo.as_deref_mut())?;
11983            }
11984            Expr::InList { expr, list, .. } => {
11985                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
11986                for item in list {
11987                    self.resolve_correlated_in_expr(item, row, ctx, cancel, memo.as_deref_mut())?;
11988                }
11989            }
11990            Expr::Case {
11991                operand,
11992                branches,
11993                else_branch,
11994            } => {
11995                if let Some(o) = operand {
11996                    self.resolve_correlated_in_expr(o, row, ctx, cancel, memo.as_deref_mut())?;
11997                }
11998                for (w, t) in branches {
11999                    self.resolve_correlated_in_expr(w, row, ctx, cancel, memo.as_deref_mut())?;
12000                    self.resolve_correlated_in_expr(t, row, ctx, cancel, memo.as_deref_mut())?;
12001                }
12002                if let Some(e) = else_branch {
12003                    self.resolve_correlated_in_expr(e, row, ctx, cancel, memo.as_deref_mut())?;
12004                }
12005            }
12006        }
12007        Ok(())
12008    }
12009
12010    fn subquery_replacement(
12011        &self,
12012        e: &Expr,
12013        cancel: CancelToken<'_>,
12014    ) -> Result<Option<Expr>, EngineError> {
12015        match e {
12016            Expr::ScalarSubquery(inner) => {
12017                let mut s = (**inner).clone();
12018                // Recurse into the inner SELECT first so nested
12019                // subqueries materialise bottom-up.
12020                self.resolve_select_subqueries(&mut s, cancel)?;
12021                let r = match self.exec_bare_select_cancel(&s, cancel) {
12022                    Ok(r) => r,
12023                    Err(e) if is_correlation_error(&e) => return Ok(None),
12024                    Err(e) => return Err(e),
12025                };
12026                let QueryResult::Rows { rows, .. } = r else {
12027                    return Err(EngineError::Unsupported(
12028                        "scalar subquery: inner statement did not return rows".into(),
12029                    ));
12030                };
12031                let value = match rows.as_slice() {
12032                    [] => Value::Null,
12033                    [row] => row.values.first().cloned().unwrap_or(Value::Null),
12034                    _ => {
12035                        return Err(EngineError::Unsupported(alloc::format!(
12036                            "scalar subquery returned {} rows; expected 0 or 1",
12037                            rows.len()
12038                        )));
12039                    }
12040                };
12041                Ok(Some(value_to_literal_expr(value)?))
12042            }
12043            Expr::Exists { subquery, negated } => {
12044                let mut s = (**subquery).clone();
12045                self.resolve_select_subqueries(&mut s, cancel)?;
12046                let r = match self.exec_bare_select_cancel(&s, cancel) {
12047                    Ok(r) => r,
12048                    Err(e) if is_correlation_error(&e) => return Ok(None),
12049                    Err(e) => return Err(e),
12050                };
12051                let exists = match r {
12052                    QueryResult::Rows { rows, .. } => !rows.is_empty(),
12053                    QueryResult::CommandOk { .. } => false,
12054                };
12055                let bit = if *negated { !exists } else { exists };
12056                Ok(Some(Expr::Literal(Literal::Bool(bit))))
12057            }
12058            Expr::InSubquery {
12059                expr,
12060                subquery,
12061                negated,
12062            } => {
12063                let mut s = (**subquery).clone();
12064                self.resolve_select_subqueries(&mut s, cancel)?;
12065                let r = match self.exec_bare_select_cancel(&s, cancel) {
12066                    Ok(r) => r,
12067                    Err(e) if is_correlation_error(&e) => return Ok(None),
12068                    Err(e) => return Err(e),
12069                };
12070                let QueryResult::Rows { columns, rows, .. } = r else {
12071                    return Err(EngineError::Unsupported(
12072                        "IN-subquery: inner statement did not return rows".into(),
12073                    ));
12074                };
12075                if columns.len() != 1 {
12076                    return Err(EngineError::Unsupported(alloc::format!(
12077                        "IN-subquery must project exactly one column; got {}",
12078                        columns.len()
12079                    )));
12080                }
12081                // v7.30.2 (mailrs round-25) — flat InList, NOT an OR-Eq
12082                // chain: chain depth scaled with the inner result's ROW
12083                // COUNT, so one 24k-match search overflowed the worker
12084                // stack (recursive eval + recursive Box drop) and
12085                // aborted the embedding host process.
12086                let mut list: Vec<Expr> = Vec::with_capacity(rows.len());
12087                for row in rows {
12088                    let v = row.values.into_iter().next().unwrap_or(Value::Null);
12089                    list.push(value_to_literal_expr(v)?);
12090                }
12091                Ok(Some(Expr::InList {
12092                    expr: expr.clone(),
12093                    list,
12094                    negated: *negated,
12095                }))
12096            }
12097            _ => Ok(None),
12098        }
12099    }
12100}
12101
12102// ---- v4.12 window-function helpers ----
12103// The (partition-key, order-key, original-index) tuple shape used
12104// across these helpers is intrinsic to the planner. Factoring it
12105// into a typedef adds indirection without making the code clearer,
12106// so several lints are allowed inline on the affected functions
12107// rather than module-wide.
12108
12109/// v4.22: cheap structural scan for `FROM <name>` (qualified or
12110/// not) inside a SELECT — used to verify the anchor of a WITH
12111/// RECURSIVE CTE doesn't recurse into itself. Conservative: walks
12112/// FROM joins, subqueries, and unions.
12113fn select_refers_to(stmt: &SelectStatement, target: &str) -> bool {
12114    if let Some(from) = &stmt.from
12115        && from_refers_to(from, target)
12116    {
12117        return true;
12118    }
12119    for (_, peer) in &stmt.unions {
12120        if select_refers_to(peer, target) {
12121            return true;
12122        }
12123    }
12124    for item in &stmt.items {
12125        if let SelectItem::Expr { expr, .. } = item
12126            && expr_refers_to(expr, target)
12127        {
12128            return true;
12129        }
12130    }
12131    if let Some(w) = &stmt.where_
12132        && expr_refers_to(w, target)
12133    {
12134        return true;
12135    }
12136    false
12137}
12138
12139fn from_refers_to(from: &FromClause, target: &str) -> bool {
12140    if from.primary.name.eq_ignore_ascii_case(target) {
12141        return true;
12142    }
12143    from.joins
12144        .iter()
12145        .any(|j| j.table.name.eq_ignore_ascii_case(target))
12146}
12147
12148/// v7.28 (round-22) — collect every QUALIFIED column referenced
12149/// anywhere in a SELECT (subquery bodies included). Returns None
12150/// when a wildcard or a bare column name makes static attribution
12151/// unsafe — callers then keep every column.
12152fn collect_qualified_refs(
12153    stmt: &SelectStatement,
12154    out: &mut alloc::collections::BTreeSet<(String, String)>,
12155) -> Option<()> {
12156    for item in &stmt.items {
12157        match item {
12158            SelectItem::Wildcard => return None,
12159            SelectItem::Expr { expr, .. } => collect_qualified_refs_expr(expr, out)?,
12160        }
12161    }
12162    if let Some(w) = &stmt.where_ {
12163        collect_qualified_refs_expr(w, out)?;
12164    }
12165    if let Some(from) = &stmt.from {
12166        for j in &from.joins {
12167            if let Some(on) = &j.on {
12168                collect_qualified_refs_expr(on, out)?;
12169            }
12170            if j.table.lateral_subquery.is_some() {
12171                return None;
12172            }
12173        }
12174    }
12175    if let Some(gs) = &stmt.group_by {
12176        for g in gs {
12177            collect_qualified_refs_expr(g, out)?;
12178        }
12179    }
12180    if let Some(h) = &stmt.having {
12181        collect_qualified_refs_expr(h, out)?;
12182    }
12183    for o in &stmt.order_by {
12184        collect_qualified_refs_expr(&o.expr, out)?;
12185    }
12186    for (_, peer) in &stmt.unions {
12187        collect_qualified_refs(peer, out)?;
12188    }
12189    for cte in &stmt.ctes {
12190        collect_qualified_refs(&cte.body, out)?;
12191    }
12192    Some(())
12193}
12194
12195fn collect_qualified_refs_expr(
12196    e: &Expr,
12197    out: &mut alloc::collections::BTreeSet<(String, String)>,
12198) -> Option<()> {
12199    // Two passes so the column and subquery visitors don't both
12200    // capture `out` mutably.
12201    let mut cols: Vec<spg_sql::ast::ColumnName> = Vec::new();
12202    let mut subs: Vec<&SelectStatement> = Vec::new();
12203    visit_expr_columns_and_subqueries(
12204        e,
12205        &mut |c: &spg_sql::ast::ColumnName| cols.push(c.clone()),
12206        &mut |sub| subs.push(sub),
12207    );
12208    for c in cols {
12209        match c.qualifier {
12210            Some(q) => {
12211                out.insert((q, c.name));
12212            }
12213            None => return None,
12214        }
12215    }
12216    for sub in subs {
12217        collect_qualified_refs(sub, out)?;
12218    }
12219    Some(())
12220}
12221
12222/// Immutable walk over an Expr visiting every Column and every
12223/// nested SelectStatement (v7.28).
12224fn visit_expr_columns_and_subqueries<'a>(
12225    e: &'a Expr,
12226    on_col: &mut impl FnMut(&'a spg_sql::ast::ColumnName),
12227    on_sub: &mut impl FnMut(&'a SelectStatement),
12228) {
12229    match e {
12230        Expr::Column(c) => on_col(c),
12231        Expr::ScalarSubquery(s) => on_sub(s),
12232        Expr::Exists { subquery, .. } => on_sub(subquery),
12233        Expr::InSubquery { expr, subquery, .. } => {
12234            visit_expr_columns_and_subqueries(expr, on_col, on_sub);
12235            on_sub(subquery);
12236        }
12237        Expr::Binary { lhs, rhs, .. } => {
12238            visit_expr_columns_and_subqueries(lhs, on_col, on_sub);
12239            visit_expr_columns_and_subqueries(rhs, on_col, on_sub);
12240        }
12241        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
12242            visit_expr_columns_and_subqueries(expr, on_col, on_sub);
12243        }
12244        Expr::Like { expr, pattern, .. } => {
12245            visit_expr_columns_and_subqueries(expr, on_col, on_sub);
12246            visit_expr_columns_and_subqueries(pattern, on_col, on_sub);
12247        }
12248        Expr::FunctionCall { args, .. } => {
12249            for a in args {
12250                visit_expr_columns_and_subqueries(a, on_col, on_sub);
12251            }
12252        }
12253        Expr::AggregateOrdered { call, order_by, .. } => {
12254            visit_expr_columns_and_subqueries(call, on_col, on_sub);
12255            for o in order_by {
12256                visit_expr_columns_and_subqueries(&o.expr, on_col, on_sub);
12257            }
12258        }
12259        Expr::Case {
12260            operand,
12261            branches,
12262            else_branch,
12263        } => {
12264            if let Some(op) = operand {
12265                visit_expr_columns_and_subqueries(op, on_col, on_sub);
12266            }
12267            for (w, t) in branches {
12268                visit_expr_columns_and_subqueries(w, on_col, on_sub);
12269                visit_expr_columns_and_subqueries(t, on_col, on_sub);
12270            }
12271            if let Some(eb) = else_branch {
12272                visit_expr_columns_and_subqueries(eb, on_col, on_sub);
12273            }
12274        }
12275        Expr::ArraySubscript { target, index } => {
12276            visit_expr_columns_and_subqueries(target, on_col, on_sub);
12277            visit_expr_columns_and_subqueries(index, on_col, on_sub);
12278        }
12279        Expr::Literal(_) | Expr::Placeholder(_) => {}
12280        // Exotic nodes (window etc.) — visit nothing extra; their
12281        // columns are caught when the caller bails on bare names
12282        // elsewhere, and window queries skip pruning entirely at
12283        // the call sites.
12284        _ => {
12285            // Exotic node (window function etc.): report an
12286            // unattributable marker so callers disable pruning.
12287            static BAIL: spg_sql::ast::ColumnName = spg_sql::ast::ColumnName {
12288                qualifier: None,
12289                name: String::new(),
12290            };
12291            on_col(&BAIL);
12292        }
12293    }
12294}
12295
12296/// v7.28 (round-22) — collect every Column qualifier in an expr;
12297/// `all_qualified` flips false on any bare column (those can't be
12298/// attributed to one table safely, so the pushdown skips them).
12299fn collect_column_qualifiers<'e>(e: &'e Expr, out: &mut Vec<&'e str>, all_qualified: &mut bool) {
12300    if let Expr::Column(c) = e {
12301        match &c.qualifier {
12302            Some(q) => out.push(q.as_str()),
12303            None => *all_qualified = false,
12304        }
12305        return;
12306    }
12307    // Reuse the canonical immutable walk via describe's walker shape:
12308    // recurse the common containers.
12309    match e {
12310        Expr::Binary { lhs, rhs, .. } => {
12311            collect_column_qualifiers(lhs, out, all_qualified);
12312            collect_column_qualifiers(rhs, out, all_qualified);
12313        }
12314        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
12315            collect_column_qualifiers(expr, out, all_qualified);
12316        }
12317        Expr::Like { expr, pattern, .. } => {
12318            collect_column_qualifiers(expr, out, all_qualified);
12319            collect_column_qualifiers(pattern, out, all_qualified);
12320        }
12321        Expr::FunctionCall { args, .. } => {
12322            for a in args {
12323                collect_column_qualifiers(a, out, all_qualified);
12324            }
12325        }
12326        Expr::Literal(_) | Expr::Placeholder(_) => {}
12327        // Anything exotic (CASE, subquery, window, arrays…):
12328        // conservatively mark unattributable.
12329        _ => *all_qualified = false,
12330    }
12331}
12332
12333fn expr_refers_to(e: &Expr, target: &str) -> bool {
12334    match e {
12335        Expr::AggregateOrdered { call, order_by, .. } => {
12336            expr_refers_to(call, target) || order_by.iter().any(|o| expr_refers_to(&o.expr, target))
12337        }
12338        Expr::ScalarSubquery(s) => select_refers_to(s, target),
12339        Expr::Exists { subquery, .. } | Expr::InSubquery { subquery, .. } => {
12340            select_refers_to(subquery, target)
12341        }
12342        Expr::Binary { lhs, rhs, .. } => expr_refers_to(lhs, target) || expr_refers_to(rhs, target),
12343        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
12344            expr_refers_to(expr, target)
12345        }
12346        Expr::Like { expr, pattern, .. } => {
12347            expr_refers_to(expr, target) || expr_refers_to(pattern, target)
12348        }
12349        Expr::FunctionCall { args, .. } => args.iter().any(|a| expr_refers_to(a, target)),
12350        Expr::Extract { source, .. } => expr_refers_to(source, target),
12351        Expr::WindowFunction {
12352            args,
12353            partition_by,
12354            order_by,
12355            ..
12356        } => {
12357            args.iter().any(|a| expr_refers_to(a, target))
12358                || partition_by.iter().any(|p| expr_refers_to(p, target))
12359                || order_by.iter().any(|(o, _, _)| expr_refers_to(o, target))
12360        }
12361        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => false,
12362        Expr::Array(items) => items.iter().any(|e| expr_refers_to(e, target)),
12363        Expr::InList { expr, list, .. } => {
12364            expr_refers_to(expr, target) || list.iter().any(|e| expr_refers_to(e, target))
12365        }
12366        Expr::ArraySubscript { target: t, index } => {
12367            expr_refers_to(t, target) || expr_refers_to(index, target)
12368        }
12369        Expr::AnyAll { expr, array, .. } => {
12370            expr_refers_to(expr, target) || expr_refers_to(array, target)
12371        }
12372        Expr::Case {
12373            operand,
12374            branches,
12375            else_branch,
12376        } => {
12377            operand
12378                .as_deref()
12379                .is_some_and(|o| expr_refers_to(o, target))
12380                || branches
12381                    .iter()
12382                    .any(|(w, t)| expr_refers_to(w, target) || expr_refers_to(t, target))
12383                || else_branch
12384                    .as_deref()
12385                    .is_some_and(|e| expr_refers_to(e, target))
12386        }
12387    }
12388}
12389
12390/// v4.22: pick more specific column types from observed rows when
12391/// the projection builder defaulted to Text (the v1.x behavior for
12392/// non-column expressions). Lets `WITH t(n) AS (SELECT 1 ...)`
12393/// land an Int column in the CTE storage table rather than failing
12394/// the insert with "expected TEXT, got INT".
12395/// v7.16.2 — map an SPG [`DataType`] to the PG-canonical
12396/// `information_schema.columns.data_type` text. Covers the
12397/// values mailrs's migrations probe (`'ARRAY'`, `'integer'`,
12398/// `'text'`, …). Unknown variants fall back to the SPG name
12399/// downcased — better than panicking on a future DataType.
12400fn pg_data_type_text(ty: DataType) -> alloc::string::String {
12401    let s = match ty {
12402        DataType::Int => "integer",
12403        DataType::BigInt => "bigint",
12404        DataType::SmallInt => "smallint",
12405        DataType::Float => "double precision",
12406        DataType::Bool => "boolean",
12407        DataType::Text => "text",
12408        DataType::Varchar(_) => "character varying",
12409        DataType::Date => "date",
12410        DataType::Timestamp => "timestamp without time zone",
12411        DataType::Timestamptz => "timestamp with time zone",
12412        DataType::Json => "jsonb",
12413        DataType::Bytes => "bytea",
12414        DataType::TextArray | DataType::IntArray | DataType::BigIntArray => "ARRAY",
12415        DataType::TsVector => "tsvector",
12416        DataType::TsQuery => "tsquery",
12417        DataType::Vector { .. } => "USER-DEFINED",
12418        // Non-exhaustive — fall back to "USER-DEFINED" the way
12419        // PG labels any pg_type it doesn't recognise.
12420        _ => "USER-DEFINED",
12421    };
12422    alloc::string::String::from(s)
12423}
12424
12425/// v7.16.2 — synthesise `information_schema.columns`. mailrs
12426/// queries are of shape `SELECT 1 FROM information_schema.columns
12427/// WHERE table_name = … AND column_name = … AND data_type = …` —
12428/// the v7.16.2 view returns the columns mailrs probes; broader
12429/// PG-spec parity (ordinal_position, is_nullable, character_
12430/// maximum_length, udt_name, …) lands as needed.
12431fn synth_information_schema_columns(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
12432    let schema = alloc::vec![
12433        ColumnSchema::new("table_catalog", DataType::Text, false),
12434        ColumnSchema::new("table_schema", DataType::Text, false),
12435        ColumnSchema::new("table_name", DataType::Text, false),
12436        ColumnSchema::new("column_name", DataType::Text, false),
12437        ColumnSchema::new("ordinal_position", DataType::Int, false),
12438        ColumnSchema::new("is_nullable", DataType::Text, false),
12439        ColumnSchema::new("data_type", DataType::Text, false),
12440    ];
12441    let mut rows: Vec<Row> = Vec::new();
12442    for tname in cat.table_names() {
12443        let Some(t) = cat.get(&tname) else { continue };
12444        for (i, col) in t.schema().columns.iter().enumerate() {
12445            #[allow(clippy::cast_possible_wrap)]
12446            let ordinal = (i + 1) as i32;
12447            rows.push(Row::new(alloc::vec![
12448                Value::Text("spg".into()),
12449                Value::Text("public".into()),
12450                Value::Text(tname.clone()),
12451                Value::Text(col.name.clone()),
12452                Value::Int(ordinal),
12453                Value::Text(if col.nullable {
12454                    "YES".into()
12455                } else {
12456                    "NO".into()
12457                }),
12458                Value::Text(pg_data_type_text(col.ty)),
12459            ]));
12460        }
12461    }
12462    (schema, rows)
12463}
12464
12465/// v7.16.2 — synthesise `information_schema.tables`.
12466fn synth_information_schema_tables(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
12467    let schema = alloc::vec![
12468        ColumnSchema::new("table_catalog", DataType::Text, false),
12469        ColumnSchema::new("table_schema", DataType::Text, false),
12470        ColumnSchema::new("table_name", DataType::Text, false),
12471        ColumnSchema::new("table_type", DataType::Text, false),
12472    ];
12473    let mut rows: Vec<Row> = Vec::new();
12474    for tname in cat.table_names() {
12475        rows.push(Row::new(alloc::vec![
12476            Value::Text("spg".into()),
12477            Value::Text("public".into()),
12478            Value::Text(tname.clone()),
12479            Value::Text("BASE TABLE".into()),
12480        ]));
12481    }
12482    (schema, rows)
12483}
12484
12485/// v7.16.2 — synthesise `pg_catalog.pg_class`. Minimum shape
12486/// for psql `\d` / ORM probes: `relname` + `relkind`. Each
12487/// user table emits one row.
12488fn synth_pg_class(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
12489    let schema = alloc::vec![
12490        ColumnSchema::new("relname", DataType::Text, false),
12491        ColumnSchema::new("relkind", DataType::Text, false),
12492        ColumnSchema::new("relnamespace", DataType::BigInt, false),
12493    ];
12494    let mut rows: Vec<Row> = Vec::new();
12495    for tname in cat.table_names() {
12496        rows.push(Row::new(alloc::vec![
12497            Value::Text(tname.clone()),
12498            Value::Text("r".into()),
12499            Value::BigInt(2200), // PG's `public` namespace OID
12500        ]));
12501    }
12502    (schema, rows)
12503}
12504
12505/// v7.16.2 — synthesise `pg_catalog.pg_attribute`. Minimum
12506/// shape: `attrelid` (text — SPG has no OID), `attname`,
12507/// `attnum`, `atttypid` (text), `attnotnull`.
12508fn synth_pg_attribute(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
12509    let schema = alloc::vec![
12510        ColumnSchema::new("attrelid", DataType::Text, false),
12511        ColumnSchema::new("attname", DataType::Text, false),
12512        ColumnSchema::new("attnum", DataType::Int, false),
12513        ColumnSchema::new("atttypid", DataType::Text, false),
12514        ColumnSchema::new("attnotnull", DataType::Bool, false),
12515    ];
12516    let mut rows: Vec<Row> = Vec::new();
12517    for tname in cat.table_names() {
12518        let Some(t) = cat.get(&tname) else { continue };
12519        for (i, col) in t.schema().columns.iter().enumerate() {
12520            #[allow(clippy::cast_possible_wrap)]
12521            let ordinal = (i + 1) as i32;
12522            rows.push(Row::new(alloc::vec![
12523                Value::Text(tname.clone()),
12524                Value::Text(col.name.clone()),
12525                Value::Int(ordinal),
12526                Value::Text(pg_data_type_text(col.ty)),
12527                Value::Bool(!col.nullable),
12528            ]));
12529        }
12530    }
12531    (schema, rows)
12532}
12533
12534/// v7.17.0 Phase 3.P0-50 — synthesise `pg_catalog.pg_type`. The
12535/// returned rows cover every built-in scalar / array type sqlx,
12536/// SQLAlchemy, Diesel and pgAdmin look up at compile / connect
12537/// time. PG-canonical schema columns we expose:
12538///   * oid           — type OID (the lookup key sqlx uses)
12539///   * typname       — canonical type name (`int4`, `text`, …)
12540///   * typlen        — width in bytes (-1 for var-length)
12541///   * typtype       — `b`ase / `c`omposite / `e`num / etc.
12542///   * typcategory   — PG type category single-char
12543///   * typelem       — element OID for arrays (0 otherwise)
12544///   * typarray      — array-type OID (0 if no array type)
12545///   * typnamespace  — schema OID (always `public` = 2200)
12546///
12547/// Other pg_type columns (typowner, typinput/typoutput, etc.)
12548/// land in follow-up work — sqlx encoders don't query them at
12549/// connect time.
12550fn synth_pg_type(_cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
12551    let schema = alloc::vec![
12552        ColumnSchema::new("oid", DataType::BigInt, false),
12553        ColumnSchema::new("typname", DataType::Text, false),
12554        ColumnSchema::new("typlen", DataType::SmallInt, false),
12555        ColumnSchema::new("typtype", DataType::Text, false),
12556        ColumnSchema::new("typcategory", DataType::Text, false),
12557        ColumnSchema::new("typelem", DataType::BigInt, false),
12558        ColumnSchema::new("typarray", DataType::BigInt, false),
12559        ColumnSchema::new("typnamespace", DataType::BigInt, false),
12560    ];
12561    // (oid, name, len, type, cat, elem, array_oid). PG OID
12562    // numbers come straight from `pg_type.dat`.
12563    let scalars: &[(i64, &str, i16, &str, &str, i64, i64)] = &[
12564        // bool
12565        (16, "bool", 1, "b", "B", 0, 1000),
12566        (17, "bytea", -1, "b", "U", 0, 1001),
12567        (18, "char", 1, "b", "S", 0, 1002),
12568        (19, "name", 64, "b", "S", 0, 1003),
12569        (20, "int8", 8, "b", "N", 0, 1016),
12570        (21, "int2", 2, "b", "N", 0, 1005),
12571        (23, "int4", 4, "b", "N", 0, 1007),
12572        (24, "regproc", 4, "b", "N", 0, 1008),
12573        (25, "text", -1, "b", "S", 0, 1009),
12574        (26, "oid", 4, "b", "N", 0, 1028),
12575        (114, "json", -1, "b", "U", 0, 199),
12576        (142, "xml", -1, "b", "U", 0, 143),
12577        (700, "float4", 4, "b", "N", 0, 1021),
12578        (701, "float8", 8, "b", "N", 0, 1022),
12579        (650, "cidr", -1, "b", "I", 0, 651),
12580        (869, "inet", -1, "b", "I", 0, 1041),
12581        (829, "macaddr", 6, "b", "U", 0, 1040),
12582        (1042, "bpchar", -1, "b", "S", 0, 1014),
12583        (1043, "varchar", -1, "b", "S", 0, 1015),
12584        (1082, "date", 4, "b", "D", 0, 1182),
12585        (1083, "time", 8, "b", "D", 0, 1183),
12586        (1114, "timestamp", 8, "b", "D", 0, 1115),
12587        (1184, "timestamptz", 8, "b", "D", 0, 1185),
12588        (1186, "interval", 16, "b", "T", 0, 1187),
12589        (1266, "timetz", 12, "b", "D", 0, 1270),
12590        (1700, "numeric", -1, "b", "N", 0, 1231),
12591        (790, "money", 8, "b", "N", 0, 791),
12592        (2950, "uuid", 16, "b", "U", 0, 2951),
12593        (3802, "jsonb", -1, "b", "U", 0, 3807),
12594        (3614, "tsvector", -1, "b", "U", 0, 3643),
12595        (3615, "tsquery", -1, "b", "U", 0, 3645),
12596        // hstore + range types — typcategory 'U' (user) / 'R' (range).
12597        (3908, "tstzrange", -1, "r", "R", 0, 3909),
12598        (3910, "tsrange", -1, "r", "R", 0, 3911),
12599        (3904, "int4range", -1, "r", "R", 0, 3905),
12600        (3926, "int8range", -1, "r", "R", 0, 3927),
12601        (3906, "numrange", -1, "r", "R", 0, 3907),
12602        (3912, "daterange", -1, "r", "R", 0, 3913),
12603    ];
12604    // Array companion types share the typelem / typcategory='A'.
12605    // We emit just the array OIDs the scalars reference.
12606    let arrays: &[(i64, &str, i64)] = &[
12607        (1000, "_bool", 16),
12608        (1001, "_bytea", 17),
12609        (1002, "_char", 18),
12610        (1003, "_name", 19),
12611        (1016, "_int8", 20),
12612        (1005, "_int2", 21),
12613        (1007, "_int4", 23),
12614        (1008, "_regproc", 24),
12615        (1009, "_text", 25),
12616        (1028, "_oid", 26),
12617        (199, "_json", 114),
12618        (143, "_xml", 142),
12619        (1021, "_float4", 700),
12620        (1022, "_float8", 701),
12621        (651, "_cidr", 650),
12622        (1041, "_inet", 869),
12623        (1040, "_macaddr", 829),
12624        (1014, "_bpchar", 1042),
12625        (1015, "_varchar", 1043),
12626        (1182, "_date", 1082),
12627        (1183, "_time", 1083),
12628        (1115, "_timestamp", 1114),
12629        (1185, "_timestamptz", 1184),
12630        (1187, "_interval", 1186),
12631        (1270, "_timetz", 1266),
12632        (1231, "_numeric", 1700),
12633        (791, "_money", 790),
12634        (2951, "_uuid", 2950),
12635        (3807, "_jsonb", 3802),
12636        (3643, "_tsvector", 3614),
12637        (3645, "_tsquery", 3615),
12638    ];
12639    let mut rows: Vec<Row> = Vec::with_capacity(scalars.len() + arrays.len());
12640    for &(oid, name, len, ty, cat, elem, arr) in scalars {
12641        rows.push(Row::new(alloc::vec![
12642            Value::BigInt(oid),
12643            Value::Text(name.into()),
12644            Value::SmallInt(len),
12645            Value::Text(ty.into()),
12646            Value::Text(cat.into()),
12647            Value::BigInt(elem),
12648            Value::BigInt(arr),
12649            Value::BigInt(2200),
12650        ]));
12651    }
12652    for &(oid, name, elem) in arrays {
12653        rows.push(Row::new(alloc::vec![
12654            Value::BigInt(oid),
12655            Value::Text(name.into()),
12656            Value::SmallInt(-1),
12657            Value::Text("b".into()),
12658            Value::Text("A".into()),
12659            Value::BigInt(elem),
12660            Value::BigInt(0),
12661            Value::BigInt(2200),
12662        ]));
12663    }
12664    (schema, rows)
12665}
12666
12667/// v7.17.0 Phase 3.P0-51 — synthesise `pg_catalog.pg_proc`. ORM /
12668/// pgAdmin probes look up functions by name; SPG synthesises rows
12669/// for the built-in scalar functions / aggregates / window funcs
12670/// the engine actually dispatches. SPG has no user-defined
12671/// functions yet so the table is a stable static list.
12672///
12673/// Schema columns exposed:
12674///   * oid (BigInt) — function OID from PG's pg_proc.dat
12675///   * proname (Text) — function name (lowercase)
12676///   * pronamespace (BigInt) — 11 (`pg_catalog`)
12677///   * prokind (Text) — 'f' function, 'a' aggregate, 'w' window
12678///   * pronargs (SmallInt) — declared arg count (-1 for variadic)
12679///   * prorettype (BigInt) — return type OID (matches synth_pg_type)
12680/// v7.24 (round-16 D) — synthesise `pg_catalog.pg_trigger` from the
12681/// live catalog. PG-shaped core columns (tgname, tgenabled with
12682/// 'O'/'D') plus pragmatic text columns PG keeps relational
12683/// (relname, timing, events, function) so health checks don't need
12684/// oid joins.
12685fn synth_pg_trigger(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
12686    let schema = alloc::vec![
12687        ColumnSchema::new("tgname", DataType::Text, false),
12688        ColumnSchema::new("relname", DataType::Text, false),
12689        ColumnSchema::new("tgenabled", DataType::Text, false),
12690        ColumnSchema::new("timing", DataType::Text, false),
12691        ColumnSchema::new("events", DataType::Text, false),
12692        ColumnSchema::new("function", DataType::Text, false),
12693    ];
12694    let rows: Vec<Row> = cat
12695        .triggers()
12696        .iter()
12697        .map(|t| {
12698            Row::new(alloc::vec![
12699                Value::Text(t.name.clone()),
12700                Value::Text(t.table.clone()),
12701                Value::Text(if t.enabled { "O".into() } else { "D".into() }),
12702                Value::Text(t.timing.clone()),
12703                Value::Text(t.events.join(" OR ")),
12704                Value::Text(t.function.clone()),
12705            ])
12706        })
12707        .collect();
12708    (schema, rows)
12709}
12710
12711fn synth_pg_proc(_cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
12712    let schema = alloc::vec![
12713        ColumnSchema::new("oid", DataType::BigInt, false),
12714        ColumnSchema::new("proname", DataType::Text, false),
12715        ColumnSchema::new("pronamespace", DataType::BigInt, false),
12716        ColumnSchema::new("prokind", DataType::Text, false),
12717        ColumnSchema::new("pronargs", DataType::Int, false),
12718        ColumnSchema::new("prorettype", DataType::BigInt, false),
12719    ];
12720    // (oid, name, kind, nargs, rettype). OIDs taken from PG's
12721    // pg_proc.dat for the common subset.
12722    let funcs: &[(i64, &str, &str, i32, i64)] = &[
12723        // Scalar functions.
12724        (1318, "length", "f", 1, 23),
12725        (871, "upper", "f", 1, 25),
12726        (870, "lower", "f", 1, 25),
12727        (936, "substring", "f", 3, 25),
12728        (937, "substring", "f", 2, 25),
12729        (3055, "btrim", "f", 1, 25),
12730        (885, "btrim", "f", 2, 25),
12731        (3056, "ltrim", "f", 1, 25),
12732        (875, "ltrim", "f", 2, 25),
12733        (3057, "rtrim", "f", 1, 25),
12734        (876, "rtrim", "f", 2, 25),
12735        (1397, "abs", "f", 1, 23),
12736        (1396, "abs", "f", 1, 20),
12737        (1606, "round", "f", 1, 1700),
12738        (1707, "round", "f", 2, 1700),
12739        (2308, "ceil", "f", 1, 701),
12740        (2309, "ceiling", "f", 1, 701),
12741        (2310, "floor", "f", 1, 701),
12742        (1376, "sqrt", "f", 1, 701),
12743        (1369, "ln", "f", 1, 701),
12744        (1373, "exp", "f", 1, 701),
12745        (1368, "power", "f", 2, 701),
12746        (2228, "random", "f", 0, 701),
12747        // Date / time.
12748        (1299, "now", "f", 0, 1184),
12749        (1274, "current_timestamp", "f", 0, 1184),
12750        (1140, "current_date", "f", 0, 1082),
12751        (2050, "current_time", "f", 0, 1083),
12752        (1158, "date_trunc", "f", 2, 1184),
12753        (1171, "date_part", "f", 2, 701),
12754        (1172, "age", "f", 1, 1186),
12755        (936, "to_char", "f", 2, 25),
12756        // Session / introspection.
12757        (861, "current_database", "f", 0, 19),
12758        (745, "current_user", "f", 0, 19),
12759        (745, "session_user", "f", 0, 19),
12760        (1402, "current_schema", "f", 0, 19),
12761        // String concat / format.
12762        (3058, "concat", "f", -1, 25),
12763        (3059, "concat_ws", "f", -1, 25),
12764        (3539, "format", "f", -1, 25),
12765        // Type introspection.
12766        (2877, "pg_typeof", "f", 1, 2206),
12767        // JSON.
12768        (3198, "json_build_object", "f", -1, 114),
12769        (3199, "jsonb_build_object", "f", -1, 3802),
12770        (3271, "json_build_array", "f", -1, 114),
12771        (3272, "jsonb_build_array", "f", -1, 3802),
12772        // UUID.
12773        (3253, "gen_random_uuid", "f", 0, 2950),
12774        (3252, "uuid_generate_v4", "f", 0, 2950),
12775        // Aggregates.
12776        (2147, "count", "a", 0, 20),
12777        (2803, "count", "a", -1, 20),
12778        (2116, "max", "a", 1, 23),
12779        (2132, "min", "a", 1, 23),
12780        (2108, "sum", "a", 1, 20),
12781        (2100, "avg", "a", 1, 1700),
12782        (2517, "string_agg", "a", 2, 25),
12783        (2747, "array_agg", "a", 1, 1009),
12784        (2517, "bool_and", "a", 1, 16),
12785        (2518, "bool_or", "a", 1, 16),
12786        (2519, "every", "a", 1, 16),
12787        // Window functions.
12788        (3100, "row_number", "w", 0, 20),
12789        (3101, "rank", "w", 0, 20),
12790        (3102, "dense_rank", "w", 0, 20),
12791        (3103, "percent_rank", "w", 0, 701),
12792        (3104, "cume_dist", "w", 0, 701),
12793        (3105, "lag", "w", -1, 2283),
12794        (3106, "lead", "w", -1, 2283),
12795        (3107, "first_value", "w", 1, 2283),
12796        (3108, "last_value", "w", 1, 2283),
12797        (3109, "nth_value", "w", 2, 2283),
12798    ];
12799    let mut rows: Vec<Row> = Vec::with_capacity(funcs.len());
12800    for &(oid, name, kind, nargs, rettype) in funcs {
12801        rows.push(Row::new(alloc::vec![
12802            Value::BigInt(oid),
12803            Value::Text(name.into()),
12804            Value::BigInt(11),
12805            Value::Text(kind.into()),
12806            Value::Int(nargs),
12807            Value::BigInt(rettype),
12808        ]));
12809    }
12810    (schema, rows)
12811}
12812
12813/// v7.17.0 Phase 3.P0-65 — synthesise `mysql.user`. MySQL admin
12814/// queries (`SELECT user, host FROM mysql.user`) probe this at
12815/// connect time to list accounts. SPG ships one row per
12816/// UserStore entry plus a synthetic `root` superuser row for
12817/// MySQL bootstrap compat.
12818fn synth_mysql_user(engine: &Engine) -> (Vec<ColumnSchema>, Vec<Row>) {
12819    let schema = alloc::vec![
12820        ColumnSchema::new("user", DataType::Text, false),
12821        ColumnSchema::new("host", DataType::Text, false),
12822        ColumnSchema::new("select_priv", DataType::Text, false),
12823    ];
12824    let mut rows: Vec<Row> = Vec::new();
12825    rows.push(Row::new(alloc::vec![
12826        Value::Text("root".into()),
12827        Value::Text("localhost".into()),
12828        Value::Text("Y".into()),
12829    ]));
12830    for (name, _) in engine.users.iter() {
12831        if name != "root" {
12832            rows.push(Row::new(alloc::vec![
12833                Value::Text(name.to_string()),
12834                Value::Text("%".into()),
12835                Value::Text("Y".into()),
12836            ]));
12837        }
12838    }
12839    (schema, rows)
12840}
12841
12842/// v7.17.0 Phase 3.P0-65 — synthesise `mysql.db`. The
12843/// per-database privileges table. SPG is single-database so the
12844/// table surfaces one row per declared user with full privileges
12845/// on the canonical `postgres` database.
12846fn synth_mysql_db() -> (Vec<ColumnSchema>, Vec<Row>) {
12847    let schema = alloc::vec![
12848        ColumnSchema::new("host", DataType::Text, false),
12849        ColumnSchema::new("db", DataType::Text, false),
12850        ColumnSchema::new("user", DataType::Text, false),
12851        ColumnSchema::new("select_priv", DataType::Text, false),
12852    ];
12853    let rows = alloc::vec![Row::new(alloc::vec![
12854        Value::Text("localhost".into()),
12855        Value::Text("postgres".into()),
12856        Value::Text("root".into()),
12857        Value::Text("Y".into()),
12858    ])];
12859    (schema, rows)
12860}
12861
12862/// v7.17.0 Phase 3.P0-63 — synthesise
12863/// `information_schema.KEY_COLUMN_USAGE`. ORM migration tools
12864/// (Alembic, Sequelize, TypeORM) walk this view to discover FK
12865/// relationships in MySQL-flavoured introspection queries.
12866///
12867/// Schema columns exposed:
12868///   * CONSTRAINT_NAME (Text)
12869///   * TABLE_NAME (Text)
12870///   * COLUMN_NAME (Text)
12871///   * ORDINAL_POSITION (Int)
12872///   * REFERENCED_TABLE_NAME (Text) — empty for non-FK rows
12873///   * REFERENCED_COLUMN_NAME (Text) — empty for non-FK rows
12874fn synth_info_key_column_usage(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
12875    let schema = alloc::vec![
12876        ColumnSchema::new("constraint_name", DataType::Text, false),
12877        ColumnSchema::new("table_name", DataType::Text, false),
12878        ColumnSchema::new("column_name", DataType::Text, false),
12879        ColumnSchema::new("ordinal_position", DataType::Int, false),
12880        ColumnSchema::new("referenced_table_name", DataType::Text, false),
12881        ColumnSchema::new("referenced_column_name", DataType::Text, false),
12882    ];
12883    let mut rows: Vec<Row> = Vec::new();
12884    for tname in cat.table_names() {
12885        let Some(t) = cat.get(&tname) else { continue };
12886        let cols = &t.schema().columns;
12887        let col_name_at = |pos: usize| -> String {
12888            cols.get(pos)
12889                .map_or_else(|| alloc::format!("col{pos}"), |c| c.name.clone())
12890        };
12891        // FKs.
12892        for (fi, fk) in t.schema().foreign_keys.iter().enumerate() {
12893            let conname = fk
12894                .name
12895                .clone()
12896                .unwrap_or_else(|| alloc::format!("{}_fk{fi}", tname));
12897            for (i, (&local, &parent)) in fk
12898                .local_columns
12899                .iter()
12900                .zip(fk.parent_columns.iter())
12901                .enumerate()
12902            {
12903                let parent_name = cat
12904                    .get(&fk.parent_table)
12905                    .and_then(|pt| pt.schema().columns.get(parent).map(|c| c.name.clone()))
12906                    .unwrap_or_else(|| alloc::format!("col{parent}"));
12907                #[allow(clippy::cast_possible_wrap)]
12908                let ordinal = (i + 1) as i32;
12909                rows.push(Row::new(alloc::vec![
12910                    Value::Text(conname.clone()),
12911                    Value::Text(tname.clone()),
12912                    Value::Text(col_name_at(local)),
12913                    Value::Int(ordinal),
12914                    Value::Text(fk.parent_table.clone()),
12915                    Value::Text(parent_name),
12916                ]));
12917            }
12918        }
12919        // PK / composite UC entries.
12920        for (ci, uc) in t.schema().uniqueness_constraints.iter().enumerate() {
12921            let conname = if uc.is_primary_key {
12922                alloc::format!("{}_pkey", tname)
12923            } else {
12924                alloc::format!("{}_uniq{ci}", tname)
12925            };
12926            for (i, &local) in uc.columns.iter().enumerate() {
12927                #[allow(clippy::cast_possible_wrap)]
12928                let ordinal = (i + 1) as i32;
12929                rows.push(Row::new(alloc::vec![
12930                    Value::Text(conname.clone()),
12931                    Value::Text(tname.clone()),
12932                    Value::Text(col_name_at(local)),
12933                    Value::Int(ordinal),
12934                    Value::Text(String::new()),
12935                    Value::Text(String::new()),
12936                ]));
12937            }
12938        }
12939    }
12940    (schema, rows)
12941}
12942
12943/// v7.17.0 Phase 3.P0-64 — synthesise
12944/// `information_schema.REFERENTIAL_CONSTRAINTS`. One row per FK.
12945fn synth_info_referential_constraints(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
12946    let schema = alloc::vec![
12947        ColumnSchema::new("constraint_name", DataType::Text, false),
12948        ColumnSchema::new("table_name", DataType::Text, false),
12949        ColumnSchema::new("referenced_table_name", DataType::Text, false),
12950        ColumnSchema::new("update_rule", DataType::Text, false),
12951        ColumnSchema::new("delete_rule", DataType::Text, false),
12952    ];
12953    fn rule_name(a: spg_storage::FkAction) -> &'static str {
12954        match a {
12955            spg_storage::FkAction::Cascade => "CASCADE",
12956            spg_storage::FkAction::SetNull => "SET NULL",
12957            spg_storage::FkAction::SetDefault => "SET DEFAULT",
12958            spg_storage::FkAction::Restrict => "RESTRICT",
12959            spg_storage::FkAction::NoAction => "NO ACTION",
12960        }
12961    }
12962    let mut rows: Vec<Row> = Vec::new();
12963    for tname in cat.table_names() {
12964        let Some(t) = cat.get(&tname) else { continue };
12965        for (fi, fk) in t.schema().foreign_keys.iter().enumerate() {
12966            let conname = fk
12967                .name
12968                .clone()
12969                .unwrap_or_else(|| alloc::format!("{}_fk{fi}", tname));
12970            rows.push(Row::new(alloc::vec![
12971                Value::Text(conname),
12972                Value::Text(tname.clone()),
12973                Value::Text(fk.parent_table.clone()),
12974                Value::Text(rule_name(fk.on_update).into()),
12975                Value::Text(rule_name(fk.on_delete).into()),
12976            ]));
12977        }
12978    }
12979    (schema, rows)
12980}
12981
12982/// v7.17.0 Phase 3.P0-64 — synthesise `information_schema.STATISTICS`.
12983/// One row per (index × column) — admin tools walk this to
12984/// surface index-cardinality estimates.
12985fn synth_info_statistics(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
12986    let schema = alloc::vec![
12987        ColumnSchema::new("table_name", DataType::Text, false),
12988        ColumnSchema::new("index_name", DataType::Text, false),
12989        ColumnSchema::new("column_name", DataType::Text, false),
12990        ColumnSchema::new("seq_in_index", DataType::Int, false),
12991        ColumnSchema::new("non_unique", DataType::Int, false),
12992        ColumnSchema::new("index_type", DataType::Text, false),
12993    ];
12994    let mut rows: Vec<Row> = Vec::new();
12995    for tname in cat.table_names() {
12996        let Some(t) = cat.get(&tname) else { continue };
12997        for idx in t.indices() {
12998            let col = t
12999                .schema()
13000                .columns
13001                .get(idx.column_position)
13002                .map_or("?".into(), |c| c.name.clone());
13003            rows.push(Row::new(alloc::vec![
13004                Value::Text(tname.clone()),
13005                Value::Text(idx.name.clone()),
13006                Value::Text(col),
13007                Value::Int(1),
13008                Value::Int(i32::from(!idx.is_unique)),
13009                Value::Text("BTREE".into()),
13010            ]));
13011        }
13012    }
13013    (schema, rows)
13014}
13015
13016/// v7.17.0 Phase 3.P0-64 — synthesise `information_schema.ROUTINES`.
13017/// SPG has no user-defined functions in v7.17 so the surface is
13018/// always empty; admin tools just need the table to exist.
13019fn synth_info_routines() -> (Vec<ColumnSchema>, Vec<Row>) {
13020    let schema = alloc::vec![
13021        ColumnSchema::new("routine_name", DataType::Text, false),
13022        ColumnSchema::new("routine_type", DataType::Text, false),
13023        ColumnSchema::new("data_type", DataType::Text, false),
13024    ];
13025    (schema, Vec::new())
13026}
13027
13028/// v7.17.0 Phase 3.P0-54 — synthesise `pg_catalog.pg_constraint`.
13029/// ORM compilers (Diesel, sea-orm) and admin tools probe this for
13030/// FK / UNIQUE / PK / CHECK definitions to surface relationship
13031/// graphs and validation rules. SPG ships one row per
13032/// uniqueness constraint + foreign key declared in the catalog.
13033///
13034/// Schema columns exposed:
13035///   * conname (Text) — constraint name (synthetic when anonymous)
13036///   * contype (Text) — `p` PK, `u` UNIQUE, `f` FK, `c` CHECK
13037///   * conrelid (Text) — owner table name
13038///   * confrelid (Text) — referenced parent table (FK only;
13039///     empty string otherwise)
13040///   * conkey (Text) — comma-separated column names
13041///   * confkey (Text) — comma-separated parent column names (FK only)
13042fn synth_pg_constraint(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
13043    let schema = alloc::vec![
13044        ColumnSchema::new("conname", DataType::Text, false),
13045        ColumnSchema::new("contype", DataType::Text, false),
13046        ColumnSchema::new("conrelid", DataType::Text, false),
13047        ColumnSchema::new("confrelid", DataType::Text, false),
13048        ColumnSchema::new("conkey", DataType::Text, false),
13049        ColumnSchema::new("confkey", DataType::Text, false),
13050    ];
13051    let mut rows: Vec<Row> = Vec::new();
13052    for tname in cat.table_names() {
13053        let Some(t) = cat.get(&tname) else { continue };
13054        let cols = &t.schema().columns;
13055        let col_name_at = |pos: usize| -> String {
13056            cols.get(pos)
13057                .map_or_else(|| alloc::format!("col{pos}"), |c| c.name.clone())
13058        };
13059        // Uniqueness constraints (composite UNIQUE / PRIMARY KEY).
13060        for (ci, uc) in t.schema().uniqueness_constraints.iter().enumerate() {
13061            let kind = if uc.is_primary_key { "p" } else { "u" };
13062            let conname = if uc.is_primary_key {
13063                alloc::format!("{}_pkey", tname)
13064            } else {
13065                alloc::format!("{}_uniq{ci}", tname)
13066            };
13067            let conkey: Vec<String> = uc.columns.iter().map(|&p| col_name_at(p)).collect();
13068            rows.push(Row::new(alloc::vec![
13069                Value::Text(conname),
13070                Value::Text(kind.into()),
13071                Value::Text(tname.clone()),
13072                Value::Text(String::new()),
13073                Value::Text(conkey.join(",")),
13074                Value::Text(String::new()),
13075            ]));
13076        }
13077        // Single-column PK / UNIQUE indexes that have no
13078        // matching entry in `uniqueness_constraints` (the engine
13079        // creates only the BTree index for the bare-column case;
13080        // composite forms ride the UC path above).
13081        for idx in t.indices() {
13082            if !idx.is_unique {
13083                continue;
13084            }
13085            let is_primary = idx.name.ends_with("_pkey");
13086            let conname = idx.name.clone();
13087            let kind = if is_primary { "p" } else { "u" };
13088            let col_name = col_name_at(idx.column_position);
13089            // Skip if already emitted via the UC loop above (same
13090            // tuple shape — single-column).
13091            let already = t
13092                .schema()
13093                .uniqueness_constraints
13094                .iter()
13095                .any(|uc| uc.columns.len() == 1 && uc.columns[0] == idx.column_position);
13096            if already {
13097                continue;
13098            }
13099            rows.push(Row::new(alloc::vec![
13100                Value::Text(conname),
13101                Value::Text(kind.into()),
13102                Value::Text(tname.clone()),
13103                Value::Text(String::new()),
13104                Value::Text(col_name),
13105                Value::Text(String::new()),
13106            ]));
13107        }
13108        // Foreign keys.
13109        for (fi, fk) in t.schema().foreign_keys.iter().enumerate() {
13110            let conname = fk
13111                .name
13112                .clone()
13113                .unwrap_or_else(|| alloc::format!("{}_fk{fi}", tname));
13114            let conkey: Vec<String> = fk.local_columns.iter().map(|&p| col_name_at(p)).collect();
13115            // Parent column names: look up the parent table's
13116            // schema if it exists; otherwise emit positions.
13117            let confkey: Vec<String> = if let Some(parent) = cat.get(&fk.parent_table) {
13118                fk.parent_columns
13119                    .iter()
13120                    .map(|&p| {
13121                        parent
13122                            .schema()
13123                            .columns
13124                            .get(p)
13125                            .map_or_else(|| alloc::format!("col{p}"), |c| c.name.clone())
13126                    })
13127                    .collect()
13128            } else {
13129                fk.parent_columns
13130                    .iter()
13131                    .map(|p| alloc::format!("col{p}"))
13132                    .collect()
13133            };
13134            rows.push(Row::new(alloc::vec![
13135                Value::Text(conname),
13136                Value::Text("f".into()),
13137                Value::Text(tname.clone()),
13138                Value::Text(fk.parent_table.clone()),
13139                Value::Text(conkey.join(",")),
13140                Value::Text(confkey.join(",")),
13141            ]));
13142        }
13143    }
13144    (schema, rows)
13145}
13146
13147/// v7.17.0 Phase 3.P0-55 — synthesise `pg_catalog.pg_database`.
13148/// SPG is single-database so we surface a single row keyed on the
13149/// canonical `postgres` database name (matching what every PG
13150/// admin tool's startup screen expects to find).
13151fn synth_pg_database(_cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
13152    let schema = alloc::vec![
13153        ColumnSchema::new("oid", DataType::BigInt, false),
13154        ColumnSchema::new("datname", DataType::Text, false),
13155        ColumnSchema::new("datdba", DataType::BigInt, false),
13156        ColumnSchema::new("encoding", DataType::Int, false),
13157        ColumnSchema::new("datcollate", DataType::Text, false),
13158    ];
13159    let rows = alloc::vec![Row::new(alloc::vec![
13160        Value::BigInt(16384),
13161        Value::Text("postgres".into()),
13162        Value::BigInt(10),
13163        Value::Int(6), // UTF8
13164        Value::Text("en_US.UTF-8".into()),
13165    ])];
13166    (schema, rows)
13167}
13168
13169/// v7.17.0 Phase 3.P0-55 — synthesise `pg_catalog.pg_roles`. PG's
13170/// pg_roles is a view over pg_authid showing all roles. SPG ships
13171/// one row per declared user from the engine's UserStore so admin
13172/// tool startup screens can populate.
13173fn synth_pg_roles(engine: &Engine) -> (Vec<ColumnSchema>, Vec<Row>) {
13174    let schema = alloc::vec![
13175        ColumnSchema::new("oid", DataType::BigInt, false),
13176        ColumnSchema::new("rolname", DataType::Text, false),
13177        ColumnSchema::new("rolsuper", DataType::Bool, false),
13178        ColumnSchema::new("rolinherit", DataType::Bool, false),
13179        ColumnSchema::new("rolcanlogin", DataType::Bool, false),
13180    ];
13181    let mut rows: Vec<Row> = Vec::new();
13182    let oid: i64 = 10;
13183    for (i, (name, _)) in engine.users.iter().enumerate() {
13184        rows.push(Row::new(alloc::vec![
13185            Value::BigInt(oid + (i as i64) + 1),
13186            Value::Text(name.to_string()),
13187            Value::Bool(false),
13188            Value::Bool(true),
13189            Value::Bool(true),
13190        ]));
13191    }
13192    // Always include `postgres` as the bootstrap superuser if not
13193    // already present — admin tools probe for it.
13194    if !rows
13195        .iter()
13196        .any(|r| matches!(&r.values[1], Value::Text(s) if s == "postgres"))
13197    {
13198        rows.insert(
13199            0,
13200            Row::new(alloc::vec![
13201                Value::BigInt(10),
13202                Value::Text("postgres".into()),
13203                Value::Bool(true),
13204                Value::Bool(true),
13205                Value::Bool(true),
13206            ]),
13207        );
13208    }
13209    (schema, rows)
13210}
13211
13212/// v7.17.0 Phase 3.P0-56 — synthesise `pg_catalog.pg_views`. PG's
13213/// pg_views is a view listing every catalog view; SPG ships one
13214/// row per declared view + its definition text.
13215/// Synthesise `pg_catalog.pg_extension`. SPG ships its "extension"
13216/// surfaces natively (vector, pg_trgm, plpgsql-shaped DO blocks), so
13217/// the table lists those as installed — `SELECT … FROM pg_extension
13218/// WHERE extname = 'vector'` probes from PG clients (mailrs embed
13219/// round-12) answer truthfully about capability presence.
13220fn synth_pg_extension() -> (Vec<ColumnSchema>, Vec<Row>) {
13221    let schema = alloc::vec![
13222        ColumnSchema::new("oid", DataType::BigInt, false),
13223        ColumnSchema::new("extname", DataType::Text, false),
13224        ColumnSchema::new("extversion", DataType::Text, false),
13225        ColumnSchema::new("extnamespace", DataType::Text, false),
13226    ];
13227    let exts: &[(&str, &str)] = &[("plpgsql", "1.0"), ("vector", "0.8.0"), ("pg_trgm", "1.6")];
13228    let rows = exts
13229        .iter()
13230        .enumerate()
13231        .map(|(i, (name, ver))| {
13232            Row::new(alloc::vec![
13233                Value::BigInt(16384 + i as i64),
13234                Value::Text((*name).into()),
13235                Value::Text((*ver).into()),
13236                Value::Text("pg_catalog".into()),
13237            ])
13238        })
13239        .collect();
13240    (schema, rows)
13241}
13242
13243fn synth_pg_views(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
13244    let schema = alloc::vec![
13245        ColumnSchema::new("schemaname", DataType::Text, false),
13246        ColumnSchema::new("viewname", DataType::Text, false),
13247        ColumnSchema::new("definition", DataType::Text, false),
13248    ];
13249    let mut rows: Vec<Row> = Vec::new();
13250    for (name, def) in cat.views() {
13251        rows.push(Row::new(alloc::vec![
13252            Value::Text("public".into()),
13253            Value::Text(name.clone()),
13254            Value::Text(def.body.clone()),
13255        ]));
13256    }
13257    (schema, rows)
13258}
13259
13260/// v7.17.0 Phase 3.P0-57 — synthesise `pg_catalog.pg_settings`. ORM
13261/// connection-checkers (sqlx pre-flight, Diesel migrator) and admin
13262/// tools read `pg_settings` to discover server-side configuration.
13263/// SPG surfaces every session_param + a small set of canonical PG
13264/// defaults so the pre-flight queries match.
13265fn synth_pg_settings(engine: &Engine) -> (Vec<ColumnSchema>, Vec<Row>) {
13266    let schema = alloc::vec![
13267        ColumnSchema::new("name", DataType::Text, false),
13268        ColumnSchema::new("setting", DataType::Text, false),
13269        ColumnSchema::new("category", DataType::Text, false),
13270    ];
13271    let mut rows: Vec<Row> = Vec::new();
13272    // Canonical defaults every admin tool expects to find.
13273    let defaults: &[(&str, &str, &str)] = &[
13274        ("server_version", "16.0 (spg)", "Preset Options"),
13275        ("server_encoding", "UTF8", "Client Connection Defaults"),
13276        ("client_encoding", "UTF8", "Client Connection Defaults"),
13277        ("DateStyle", "ISO, MDY", "Client Connection Defaults"),
13278        ("TimeZone", "UTC", "Client Connection Defaults"),
13279        ("standard_conforming_strings", "on", "Compatibility"),
13280        ("integer_datetimes", "on", "Compatibility"),
13281        ("max_connections", "100", "Connections and Authentication"),
13282    ];
13283    for &(name, val, cat) in defaults {
13284        rows.push(Row::new(alloc::vec![
13285            Value::Text(name.into()),
13286            Value::Text(val.into()),
13287            Value::Text(cat.into()),
13288        ]));
13289    }
13290    // Session-set params override the static defaults.
13291    for (k, v) in &engine.session_params {
13292        if !defaults
13293            .iter()
13294            .any(|(n, _, _)| (*n).eq_ignore_ascii_case(k))
13295        {
13296            rows.push(Row::new(alloc::vec![
13297                Value::Text(k.clone()),
13298                Value::Text(v.clone()),
13299                Value::Text("Session".into()),
13300            ]));
13301        }
13302    }
13303    (schema, rows)
13304}
13305
13306/// v7.17.0 Phase 3.P0-53 — synthesise `pg_catalog.pg_indexes`.
13307/// PG's pg_indexes is a real view on pg_index + pg_class + pg_attribute.
13308/// SPG ships it as a synthesised flat table so admin tools (pgAdmin,
13309/// DataGrip) can list indexes by tablename without joining four catalogs.
13310///
13311/// Schema columns exposed:
13312///   * schemaname (Text) — always `public`
13313///   * tablename (Text)
13314///   * indexname (Text)
13315///   * indexdef (Text) — best-effort CREATE INDEX DDL
13316fn synth_pg_indexes(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
13317    let schema = alloc::vec![
13318        ColumnSchema::new("schemaname", DataType::Text, false),
13319        ColumnSchema::new("tablename", DataType::Text, false),
13320        ColumnSchema::new("indexname", DataType::Text, false),
13321        ColumnSchema::new("indexdef", DataType::Text, false),
13322    ];
13323    let mut rows: Vec<Row> = Vec::new();
13324    for tname in cat.table_names() {
13325        let Some(t) = cat.get(&tname) else { continue };
13326        for idx in t.indices() {
13327            let col_name = t
13328                .schema()
13329                .columns
13330                .get(idx.column_position)
13331                .map_or("?".into(), |c| c.name.clone());
13332            let unique_kw = if idx.is_unique { "UNIQUE " } else { "" };
13333            let indexdef = alloc::format!(
13334                "CREATE {unique_kw}INDEX {} ON public.{} ({})",
13335                idx.name,
13336                tname,
13337                col_name
13338            );
13339            rows.push(Row::new(alloc::vec![
13340                Value::Text("public".into()),
13341                Value::Text(tname.clone()),
13342                Value::Text(idx.name.clone()),
13343                Value::Text(indexdef),
13344            ]));
13345        }
13346    }
13347    (schema, rows)
13348}
13349
13350/// v7.17.0 Phase 3.P0-53 — synthesise `pg_catalog.pg_index`. The
13351/// "raw" pg_index catalog used by PG-internal tooling for index
13352/// flags and ordinal information. SPG ships the columns ORM probes
13353/// actually filter on.
13354///
13355/// Schema columns exposed:
13356///   * indexrelid (BigInt) — index OID (synthetic = position+1)
13357///   * indrelid (BigInt) — table OID (synthetic = position+1)
13358///   * indnatts (Int) — number of indexed columns
13359///   * indisunique (Bool)
13360///   * indisprimary (Bool)
13361fn synth_pg_index_raw(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
13362    let schema = alloc::vec![
13363        ColumnSchema::new("indexrelid", DataType::BigInt, false),
13364        ColumnSchema::new("indrelid", DataType::BigInt, false),
13365        ColumnSchema::new("indnatts", DataType::Int, false),
13366        ColumnSchema::new("indisunique", DataType::Bool, false),
13367        ColumnSchema::new("indisprimary", DataType::Bool, false),
13368    ];
13369    let mut rows: Vec<Row> = Vec::new();
13370    let mut idx_oid: i64 = 100_000;
13371    for (table_idx, tname) in cat.table_names().iter().enumerate() {
13372        let Some(t) = cat.get(tname) else { continue };
13373        for idx in t.indices() {
13374            idx_oid += 1;
13375            #[allow(clippy::cast_possible_wrap)]
13376            let nattrs = (1 + idx.extra_column_positions.len()) as i32;
13377            // is_primary: SPG / PG flag the primary via the
13378            // index name convention `<table>_pkey`.
13379            let is_primary = idx.name.ends_with("_pkey");
13380            rows.push(Row::new(alloc::vec![
13381                Value::BigInt(idx_oid),
13382                Value::BigInt((table_idx + 1) as i64),
13383                Value::Int(nattrs),
13384                Value::Bool(idx.is_unique),
13385                Value::Bool(is_primary),
13386            ]));
13387        }
13388    }
13389    (schema, rows)
13390}
13391
13392/// v7.17.0 Phase 3.P0-52 — synthesise `pg_catalog.pg_namespace`.
13393/// SPG is single-schema so we expose the canonical PG schemas:
13394/// `public` (user-facing), `pg_catalog` (built-in), and
13395/// `information_schema` (PG meta).
13396fn synth_pg_namespace(_cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
13397    let schema = alloc::vec![
13398        ColumnSchema::new("oid", DataType::BigInt, false),
13399        ColumnSchema::new("nspname", DataType::Text, false),
13400        ColumnSchema::new("nspowner", DataType::BigInt, false),
13401    ];
13402    let rows = alloc::vec![
13403        Row::new(alloc::vec![
13404            Value::BigInt(11),
13405            Value::Text("pg_catalog".into()),
13406            Value::BigInt(10),
13407        ]),
13408        Row::new(alloc::vec![
13409            Value::BigInt(2200),
13410            Value::Text("public".into()),
13411            Value::BigInt(10),
13412        ]),
13413        Row::new(alloc::vec![
13414            Value::BigInt(13000),
13415            Value::Text("information_schema".into()),
13416            Value::BigInt(10),
13417        ]),
13418    ];
13419    (schema, rows)
13420}
13421
13422/// v7.16.2 — drop the synthesised meta view into the enriched
13423/// catalog so the regular FROM-resolution path can see it.
13424fn materialise_meta_view(
13425    catalog: &mut Catalog,
13426    name: &str,
13427    columns: Vec<ColumnSchema>,
13428    rows: Vec<Row>,
13429) -> Result<(), EngineError> {
13430    let schema = TableSchema::new(name.to_string(), columns);
13431    catalog.create_table(schema).map_err(EngineError::Storage)?;
13432    let table = catalog
13433        .get_mut(name)
13434        .expect("just-created meta view must exist");
13435    for row in rows {
13436        table.insert(row).map_err(EngineError::Storage)?;
13437    }
13438    Ok(())
13439}
13440
13441/// v7.16.2 — true when the SELECT statement references any
13442/// `__spg_info_*` or `__spg_pg_*` synthetic table name (the
13443/// parser produces these for `information_schema.X` /
13444/// `pg_catalog.X`). Used by `exec_select_cancel` to short-
13445/// circuit into the meta-view materialisation path.
13446/// v7.17.0 Phase 1.2 — append the names of any catalog-known
13447/// views referenced by `tref` to `into`. Helper for
13448/// `Engine::expand_views_in_select`. A view that's been already
13449/// materialised as a table (e.g. via the synthetic CTE pass for
13450/// SELECT FROM v) is skipped — the table form wins so the
13451/// recursive exec_select_cancel call inside exec_with_ctes
13452/// doesn't re-expand and trigger the CTE-shadow guard.
13453fn collect_view_refs(
13454    tref: &spg_sql::ast::TableRef,
13455    cat: &spg_storage::Catalog,
13456    into: &mut Vec<String>,
13457) {
13458    if cat.views().contains_key(&tref.name)
13459        && cat.get(&tref.name).is_none()
13460        && !into.iter().any(|n| n == &tref.name)
13461    {
13462        into.push(tref.name.clone());
13463    }
13464}
13465
13466fn select_references_meta_view(stmt: &SelectStatement) -> bool {
13467    fn is_meta(name: &str) -> bool {
13468        name.starts_with("__spg_info_")
13469            || name.starts_with("__spg_pg_")
13470            || name.starts_with("__spg_mysql_")
13471    }
13472    if let Some(from) = &stmt.from {
13473        if is_meta(&from.primary.name) {
13474            return true;
13475        }
13476        for j in &from.joins {
13477            if is_meta(&j.table.name) {
13478                return true;
13479            }
13480        }
13481    }
13482    for cte in &stmt.ctes {
13483        if select_references_meta_view(&cte.body) {
13484            return true;
13485        }
13486    }
13487    false
13488}
13489
13490/// v7.16.2 — collect every meta-view name a SELECT touches.
13491/// Returns a deduplicated, sorted list. Caller materialises
13492/// each one into the enriched catalog before re-running the
13493/// SELECT. Walks JOINs, CTEs, and the primary FROM.
13494fn collect_meta_view_names(
13495    stmt: &SelectStatement,
13496    into: &mut alloc::collections::BTreeSet<String>,
13497) {
13498    fn is_meta(name: &str) -> bool {
13499        name.starts_with("__spg_info_")
13500            || name.starts_with("__spg_pg_")
13501            || name.starts_with("__spg_mysql_")
13502    }
13503    if let Some(from) = &stmt.from {
13504        if is_meta(&from.primary.name) {
13505            into.insert(from.primary.name.clone());
13506        }
13507        for j in &from.joins {
13508            if is_meta(&j.table.name) {
13509                into.insert(j.table.name.clone());
13510            }
13511        }
13512    }
13513    for cte in &stmt.ctes {
13514        collect_meta_view_names(&cte.body, into);
13515    }
13516}
13517
13518fn infer_column_types(columns: &[ColumnSchema], rows: &[Row]) -> Vec<ColumnSchema> {
13519    let mut out = columns.to_vec();
13520    for (col_idx, col) in out.iter_mut().enumerate() {
13521        if col.ty != DataType::Text {
13522            continue;
13523        }
13524        let mut inferred: Option<DataType> = None;
13525        let mut all_null = true;
13526        for row in rows {
13527            let Some(v) = row.values.get(col_idx) else {
13528                continue;
13529            };
13530            let ty = match v {
13531                Value::Null => continue,
13532                Value::SmallInt(_) => DataType::SmallInt,
13533                Value::Int(_) => DataType::Int,
13534                Value::BigInt(_) => DataType::BigInt,
13535                Value::Float(_) => DataType::Float,
13536                Value::Bool(_) => DataType::Bool,
13537                Value::Vector(_) => DataType::Vector {
13538                    dim: 0,
13539                    encoding: VecEncoding::F32,
13540                },
13541                _ => DataType::Text,
13542            };
13543            all_null = false;
13544            inferred = Some(match inferred {
13545                None => ty,
13546                Some(prev) if prev == ty => prev,
13547                Some(_) => DataType::Text,
13548            });
13549        }
13550        if let Some(t) = inferred {
13551            col.ty = t;
13552            col.nullable = true;
13553        } else if all_null {
13554            col.nullable = true;
13555        }
13556    }
13557    out
13558}
13559
13560/// v4.26: render a human-readable plan tree for `EXPLAIN <select>`.
13561/// Lines are pushed into `out`; `depth` controls indentation. We
13562/// describe the rewritten SELECT — what the executor *would* do —
13563/// using the engine handle to spot indexed lookups and table shapes.
13564#[allow(clippy::too_many_lines, clippy::format_push_string)]
13565/// v6.2.4 — Walk every line of the rendered plan tree and append
13566/// per-operator stats. Lines that name a known operator get
13567/// `(rows=N)` (`actual_rows` of the top-level operator equals the
13568/// final result row count; scans report their catalog row count
13569/// as the rows-considered metric). Other lines — Filter / Join /
13570/// GroupBy / OrderBy etc. — are marked `(—)` so the surface is
13571/// complete-by-construction; v6.2.5 fills these in via inline
13572/// executor counters.
13573/// v6.8.3 — surface "CREATE INDEX …" suggestions for every
13574/// `(table, column)` pair the query touches via WHERE / JOIN
13575/// that doesn't already have an index on the owning table.
13576/// Walks the SELECT's FROM clauses + WHERE expression tree;
13577/// returns one line per missing index. Deterministic order:
13578/// FROM-clause iteration order, then column-reference walk
13579/// order inside each WHERE. Each suggestion is a copy-pastable
13580/// DDL string.
13581fn build_index_suggestions(stmt: &SelectStatement, engine: &Engine) -> Vec<String> {
13582    use alloc::collections::BTreeSet;
13583    let mut seen: BTreeSet<(String, String)> = BTreeSet::new();
13584    let mut out: Vec<String> = Vec::new();
13585    let cat = engine.active_catalog();
13586    // Build a (table, qualifier-or-alias) list from the FROM clause
13587    // so unqualified column refs in WHERE resolve to the correct
13588    // table.
13589    let Some(from) = &stmt.from else {
13590        return out;
13591    };
13592    let mut tables: Vec<String> = Vec::new();
13593    tables.push(from.primary.name.clone());
13594    for j in &from.joins {
13595        tables.push(j.table.name.clone());
13596    }
13597    // Collect column refs from the WHERE expression. JOIN ON
13598    // predicates also feed in.
13599    let mut col_refs: Vec<spg_sql::ast::ColumnName> = Vec::new();
13600    if let Some(w) = &stmt.where_ {
13601        collect_column_refs(w, &mut col_refs);
13602    }
13603    for j in &from.joins {
13604        if let Some(on) = &j.on {
13605            collect_column_refs(on, &mut col_refs);
13606        }
13607    }
13608    for cn in &col_refs {
13609        // Resolve owner table: explicit qualifier first, else
13610        // first table in FROM that has a column of this name.
13611        let owner: Option<String> = if let Some(q) = &cn.qualifier {
13612            tables.iter().find(|t| t == &q).cloned()
13613        } else {
13614            tables.iter().find_map(|t| {
13615                cat.get(t).and_then(|tbl| {
13616                    if tbl.schema().column_position(&cn.name).is_some() {
13617                        Some(t.clone())
13618                    } else {
13619                        None
13620                    }
13621                })
13622            })
13623        };
13624        let Some(owner) = owner else {
13625            continue;
13626        };
13627        let Some(tbl) = cat.get(&owner) else {
13628            continue;
13629        };
13630        let Some(col_pos) = tbl.schema().column_position(&cn.name) else {
13631            continue;
13632        };
13633        // Skip if any BTree index already covers this column as
13634        // its key.
13635        let already_indexed = tbl.indices().iter().any(|i| {
13636            matches!(i.kind, spg_storage::IndexKind::BTree(_))
13637                && i.column_position == col_pos
13638                && i.expression.is_none()
13639                && i.partial_predicate.is_none()
13640        });
13641        if already_indexed {
13642            continue;
13643        }
13644        if seen.insert((owner.clone(), cn.name.clone())) {
13645            out.push(alloc::format!(
13646                "SUGGEST: CREATE INDEX ix_{}_{} ON {} ({})",
13647                owner,
13648                cn.name,
13649                owner,
13650                cn.name
13651            ));
13652        }
13653    }
13654    out
13655}
13656
13657/// Walks an `Expr` and pushes every `ColumnName` it references.
13658/// Order is depth-first, left-to-right.
13659fn collect_column_refs(expr: &Expr, out: &mut Vec<spg_sql::ast::ColumnName>) {
13660    match expr {
13661        Expr::Column(cn) => out.push(cn.clone()),
13662        Expr::FunctionCall { args, .. } => {
13663            for a in args {
13664                collect_column_refs(a, out);
13665            }
13666        }
13667        Expr::Binary { lhs, rhs, .. } => {
13668            collect_column_refs(lhs, out);
13669            collect_column_refs(rhs, out);
13670        }
13671        Expr::Unary { expr: e, .. } => collect_column_refs(e, out),
13672        _ => {}
13673    }
13674}
13675
13676fn annotate_explain_lines(lines: &mut [String], total_rows: usize, engine: &Engine) {
13677    let catalog = engine.active_catalog();
13678    let cold_ids = catalog.cold_segment_ids_global();
13679    let any_cold = !cold_ids.is_empty();
13680    let cold_ids_repr = if any_cold {
13681        let mut s = alloc::string::String::from("[");
13682        for (i, id) in cold_ids.iter().enumerate() {
13683            if i > 0 {
13684                s.push(',');
13685            }
13686            s.push_str(&alloc::format!("{id}"));
13687        }
13688        s.push(']');
13689        s
13690    } else {
13691        alloc::string::String::new()
13692    };
13693    for (idx, line) in lines.iter_mut().enumerate() {
13694        let trimmed = line.trim_start();
13695        let is_top_level = idx == 0;
13696        if is_top_level {
13697            line.push_str(&alloc::format!(" (rows={total_rows})"));
13698            continue;
13699        }
13700        if let Some(rest) = trimmed.strip_prefix("From: ") {
13701            let (name, scan_kind) = match rest.split_once(" [") {
13702                Some((n, k)) => (n.trim(), k.trim_end_matches(']')),
13703                None => (rest.trim(), ""),
13704            };
13705            let bare = name.split_whitespace().next().unwrap_or(name);
13706            let hot = catalog.get(bare).map(|t| t.rows().len());
13707            // v6.2.7 — `cold_segments=[id0,id1,…]` enumerates every
13708            // cold-tier segment the scan COULD have walked. v6.2.x
13709            // can tighten to per-table by walking the table's
13710            // BTree-index cold locators.
13711            let annot = match (hot, scan_kind) {
13712                (Some(h), "full scan") => {
13713                    let mut s = alloc::format!(" (hot_rows={h}");
13714                    if any_cold {
13715                        s.push_str(&alloc::format!(
13716                            ", cold_tier=present, cold_segments={cold_ids_repr}"
13717                        ));
13718                    }
13719                    s.push(')');
13720                    s
13721                }
13722                (Some(h), "index seek") => {
13723                    let mut s = alloc::format!(" (hot_rows≤{h}");
13724                    if any_cold {
13725                        s.push_str(&alloc::format!(
13726                            ", cold_tier=present, cold_segments={cold_ids_repr}"
13727                        ));
13728                    }
13729                    s.push(')');
13730                    s
13731                }
13732                _ => " (rows=—)".to_string(),
13733            };
13734            line.push_str(&annot);
13735            continue;
13736        }
13737        // Filter / GroupBy / Having / OrderBy / Limit / Join etc.
13738        line.push_str(" (rows=—)");
13739    }
13740}
13741
13742fn explain_select(stmt: &SelectStatement, engine: &Engine, depth: usize, out: &mut Vec<String>) {
13743    let pad = "  ".repeat(depth);
13744    // 1) Top-level operator label.
13745    let top = if !stmt.ctes.is_empty() {
13746        if stmt.ctes.iter().any(|c| c.recursive) {
13747            "CTEScan (WITH RECURSIVE)"
13748        } else {
13749            "CTEScan (WITH)"
13750        }
13751    } else if !stmt.unions.is_empty() {
13752        "UnionScan"
13753    } else if select_has_window(stmt) {
13754        "WindowAgg"
13755    } else if aggregate::uses_aggregate(stmt) {
13756        "Aggregate"
13757    } else if stmt.distinct {
13758        "Distinct"
13759    } else if stmt.from.is_some() {
13760        "TableScan"
13761    } else {
13762        "Result"
13763    };
13764    out.push(alloc::format!("{pad}{top}"));
13765    let child = "  ".repeat(depth + 1);
13766    // 2) CTE bodies.
13767    for cte in &stmt.ctes {
13768        let head = if cte.recursive {
13769            alloc::format!("{child}CTE (recursive): {}", cte.name)
13770        } else {
13771            alloc::format!("{child}CTE: {}", cte.name)
13772        };
13773        out.push(head);
13774        explain_select(&cte.body, engine, depth + 2, out);
13775    }
13776    // 3) FROM details — primary table + joins, index hits.
13777    if let Some(from) = &stmt.from {
13778        let mut tag = alloc::format!("{child}From: {}", from.primary.name);
13779        if let Some(alias) = &from.primary.alias {
13780            tag.push_str(&alloc::format!(" AS {alias}"));
13781        }
13782        // Try to detect an index-seek opportunity on WHERE against
13783        // the primary table — same heuristic the executor uses.
13784        if let Some(w) = &stmt.where_
13785            && let Some(table) = engine.active_catalog().get(&from.primary.name)
13786        {
13787            let alias = from.primary.alias.as_deref().unwrap_or(&from.primary.name);
13788            let cols = &table.schema().columns;
13789            if try_index_seek(w, cols, engine.active_catalog(), table, alias).is_some() {
13790                tag.push_str(" [index seek]");
13791            } else {
13792                tag.push_str(" [full scan]");
13793            }
13794        } else {
13795            tag.push_str(" [full scan]");
13796        }
13797        out.push(tag);
13798        for j in &from.joins {
13799            let kind = match j.kind {
13800                spg_sql::ast::JoinKind::Inner => "INNER JOIN",
13801                spg_sql::ast::JoinKind::Left => "LEFT JOIN",
13802                spg_sql::ast::JoinKind::Cross => "CROSS JOIN",
13803            };
13804            let mut s = alloc::format!("{child}{kind}: {}", j.table.name);
13805            if let Some(alias) = &j.table.alias {
13806                s.push_str(&alloc::format!(" AS {alias}"));
13807            }
13808            if j.on.is_some() {
13809                s.push_str(" (ON …)");
13810            }
13811            out.push(s);
13812        }
13813    }
13814    // 4) WHERE / GROUP BY / HAVING / ORDER BY / LIMIT / OFFSET.
13815    if let Some(w) = &stmt.where_ {
13816        let mut s = alloc::format!("{child}Filter: {w}");
13817        if expr_has_subquery(w) {
13818            s.push_str(" [subquery]");
13819        }
13820        out.push(s);
13821    }
13822    if let Some(gs) = &stmt.group_by {
13823        let mut parts = Vec::new();
13824        for g in gs {
13825            parts.push(alloc::format!("{g}"));
13826        }
13827        out.push(alloc::format!("{child}GroupBy: {}", parts.join(", ")));
13828    }
13829    if let Some(h) = &stmt.having {
13830        out.push(alloc::format!("{child}Having: {h}"));
13831    }
13832    for o in &stmt.order_by {
13833        let dir = if o.desc { "DESC" } else { "ASC" };
13834        out.push(alloc::format!("{child}OrderBy: {} {dir}", o.expr));
13835    }
13836    if let Some(lim) = stmt.limit {
13837        out.push(alloc::format!("{child}Limit: {lim}"));
13838    }
13839    if let Some(off) = stmt.offset {
13840        out.push(alloc::format!("{child}Offset: {off}"));
13841    }
13842    // 5) Projection — collapse Wildcard or render N items.
13843    if stmt
13844        .items
13845        .iter()
13846        .any(|it| matches!(it, SelectItem::Wildcard))
13847    {
13848        out.push(alloc::format!("{child}Project: *"));
13849    } else {
13850        out.push(alloc::format!(
13851            "{child}Project: {} item(s)",
13852            stmt.items.len()
13853        ));
13854    }
13855    // 6) Recurse into UNION peers.
13856    for (kind, peer) in &stmt.unions {
13857        let label = match kind {
13858            UnionKind::All => "UNION ALL",
13859            UnionKind::Distinct => "UNION",
13860        };
13861        out.push(alloc::format!("{child}{label}"));
13862        explain_select(peer, engine, depth + 2, out);
13863    }
13864}
13865
13866/// v4.23: recognise the engine errors that indicate the inner
13867/// SELECT couldn't be evaluated in isolation because it references
13868/// an outer column — used by `subquery_replacement` to skip
13869/// materialisation and let row-eval handle it instead.
13870fn is_correlation_error(e: &EngineError) -> bool {
13871    matches!(
13872        e,
13873        EngineError::Eval(
13874            eval::EvalError::ColumnNotFound { .. } | eval::EvalError::UnknownQualifier { .. }
13875        )
13876    )
13877}
13878
13879/// v4.23: walk every Expr in `stmt` and replace each Column ref
13880/// that targets the outer scope (qualifier matches the outer
13881/// table alias) with a Literal carrying the outer row's value.
13882/// Conservative: only qualified refs are substituted, so the user
13883/// must write `outer_alias.col` to reference an outer column. This
13884/// matches PG's lexical scoping for correlated subqueries and
13885/// avoids accidentally rebinding inner columns of the same name.
13886/// v7.17.0 Phase 3.P0-41 — LATERAL peer descriptor. Either eagerly
13887/// materialised (every regular table / unnest / generate_series) or
13888/// lateral (subquery re-evaluated per outer row).
13889struct JoinedPeer<'a> {
13890    eager_rows: Option<Vec<Row>>,
13891    cols: Vec<ColumnSchema>,
13892    alias: String,
13893    kind: JoinKind,
13894    on: Option<&'a Expr>,
13895    lateral: Option<&'a SelectStatement>,
13896    /// v7.28 (round-22) — plain-table name for the index-nested-loop
13897    /// path. None for unnest/lateral.
13898    join_table: Option<String>,
13899}
13900
13901/// v7.17.0 Phase 3.P0-41 — synthesise a column name for a LATERAL
13902/// projection item that has no explicit alias. PG names anonymous
13903/// projection items by the function call's name or by `column<i>`.
13904/// SPG mirrors the latter (lower-overhead than walking arbitrary
13905/// Expr shapes) so the probe-schema fallback path produces stable
13906/// names for the lateral peer's columns.
13907fn synth_lateral_col_name(expr: &Expr, idx: usize) -> String {
13908    match expr {
13909        // Bare column reference — use the column's own name.
13910        Expr::Column(c) => c.name.clone(),
13911        // Function call — use the function name (PG canonical:
13912        // `count` / `max` / `lower` …).
13913        Expr::FunctionCall { name, .. } => name.clone(),
13914        // Cast — drill into the inner expression.
13915        Expr::Cast { expr: inner, .. } => synth_lateral_col_name(inner, idx),
13916        // Everything else falls back to PG's `column<N>` placeholder.
13917        _ => alloc::format!("column{}", idx + 1),
13918    }
13919}
13920
13921/// v7.17.0 Phase 3.P0-41 — substitute every `<alias>.<col>` Expr
13922/// reference whose `<alias>.<col>` exists in the outer composite
13923/// schema with the matching value from the outer row. Walks the
13924/// entire SELECT body (items, WHERE, GROUP BY, HAVING, ORDER BY,
13925/// UNION peers) so any depth of outer reference inside the
13926/// LATERAL subquery resolves before execution.
13927fn substitute_outer_columns_multi(
13928    stmt: &mut SelectStatement,
13929    outer_row: &Row,
13930    outer_schema: &[ColumnSchema],
13931) {
13932    substitute_outer_in_select(stmt, outer_row, outer_schema);
13933}
13934
13935fn substitute_outer_in_select(
13936    stmt: &mut SelectStatement,
13937    outer_row: &Row,
13938    outer_schema: &[ColumnSchema],
13939) {
13940    for item in &mut stmt.items {
13941        if let SelectItem::Expr { expr, .. } = item {
13942            substitute_outer_in_expr(expr, outer_row, outer_schema);
13943        }
13944    }
13945    if let Some(w) = &mut stmt.where_ {
13946        substitute_outer_in_expr(w, outer_row, outer_schema);
13947    }
13948    if let Some(gs) = &mut stmt.group_by {
13949        for g in gs {
13950            substitute_outer_in_expr(g, outer_row, outer_schema);
13951        }
13952    }
13953    if let Some(h) = &mut stmt.having {
13954        substitute_outer_in_expr(h, outer_row, outer_schema);
13955    }
13956    for o in &mut stmt.order_by {
13957        substitute_outer_in_expr(&mut o.expr, outer_row, outer_schema);
13958    }
13959    for (_, peer) in &mut stmt.unions {
13960        substitute_outer_in_select(peer, outer_row, outer_schema);
13961    }
13962}
13963
13964fn substitute_outer_in_expr(e: &mut Expr, outer_row: &Row, outer_schema: &[ColumnSchema]) {
13965    if let Expr::Column(c) = e
13966        && let Some(qual) = &c.qualifier
13967    {
13968        let composite = alloc::format!("{qual}.{}", c.name);
13969        if let Some(idx) = outer_schema
13970            .iter()
13971            .position(|sc| sc.name.eq_ignore_ascii_case(&composite))
13972        {
13973            let v = outer_row.values.get(idx).cloned().unwrap_or(Value::Null);
13974            if let Ok(lit) = value_to_literal_expr(v) {
13975                *e = lit;
13976                return;
13977            }
13978        }
13979    }
13980    match e {
13981        Expr::Binary { lhs, rhs, .. } => {
13982            substitute_outer_in_expr(lhs, outer_row, outer_schema);
13983            substitute_outer_in_expr(rhs, outer_row, outer_schema);
13984        }
13985        Expr::Unary { expr: inner, .. } => {
13986            substitute_outer_in_expr(inner, outer_row, outer_schema);
13987        }
13988        Expr::FunctionCall { args, .. } => {
13989            for a in args {
13990                substitute_outer_in_expr(a, outer_row, outer_schema);
13991            }
13992        }
13993        Expr::Cast { expr: inner, .. } => {
13994            substitute_outer_in_expr(inner, outer_row, outer_schema);
13995        }
13996        Expr::Case {
13997            operand,
13998            branches,
13999            else_branch,
14000        } => {
14001            if let Some(op) = operand {
14002                substitute_outer_in_expr(op, outer_row, outer_schema);
14003            }
14004            for (cond, val) in branches {
14005                substitute_outer_in_expr(cond, outer_row, outer_schema);
14006                substitute_outer_in_expr(val, outer_row, outer_schema);
14007            }
14008            if let Some(e) = else_branch {
14009                substitute_outer_in_expr(e, outer_row, outer_schema);
14010            }
14011        }
14012        _ => {}
14013    }
14014}
14015
14016impl Engine {
14017    /// v7.29 (round-22 phase 3) — try to batch-evaluate a correlated
14018    /// scalar subquery of the shape
14019    ///   (SELECT expr FROM … WHERE inner_preds AND inner_col = outer_col
14020    ///    [ORDER BY o [DESC]] [LIMIT 1])
14021    /// by running the subquery ONCE without the correlation and
14022    /// folding rows into a key→value map (group top-1 when ordered).
14023    /// Returns None when the shape doesn't qualify; correctness then
14024    /// falls back to per-row execution.
14025    fn try_batch_correlated_scalar(
14026        &self,
14027        inner: &SelectStatement,
14028        cancel: CancelToken<'_>,
14029    ) -> Result<Option<memoize::GroupMap>, EngineError> {
14030        use spg_sql::ast::{BinOp, SelectItem as SI};
14031        if !inner.ctes.is_empty()
14032            || !inner.unions.is_empty()
14033            || inner.group_by.is_some()
14034            || inner.having.is_some()
14035            || inner.distinct
14036            || inner.items.len() != 1
14037            || inner.order_by.len() > 1
14038            || inner.offset.is_some()
14039        {
14040            return Ok(None);
14041        }
14042        // LIMIT must be absent or literally 1 (top-1 semantics).
14043        if let Some(le) = inner.limit
14044            && le.as_literal() != Some(1)
14045        {
14046            return Ok(None);
14047        }
14048        let Some(from) = &inner.from else {
14049            return Ok(None);
14050        };
14051        if from.primary.lateral_subquery.is_some() || from.primary.unnest_expr.is_some() {
14052            return Ok(None);
14053        }
14054        // Inner alias set.
14055        let mut inner_aliases: Vec<String> = Vec::new();
14056        inner_aliases.push(
14057            from.primary
14058                .alias
14059                .clone()
14060                .unwrap_or_else(|| from.primary.name.clone()),
14061        );
14062        for j in &from.joins {
14063            if j.table.lateral_subquery.is_some() || j.table.unnest_expr.is_some() {
14064                return Ok(None);
14065            }
14066            inner_aliases.push(
14067                j.table
14068                    .alias
14069                    .clone()
14070                    .unwrap_or_else(|| j.table.name.clone()),
14071            );
14072        }
14073        let is_inner = |c: &spg_sql::ast::ColumnName| -> bool {
14074            match &c.qualifier {
14075                Some(q) => inner_aliases.iter().any(|a| a.eq_ignore_ascii_case(q)),
14076                None => false,
14077            }
14078        };
14079        let is_outer = |c: &spg_sql::ast::ColumnName| -> bool {
14080            match &c.qualifier {
14081                Some(q) => !inner_aliases.iter().any(|a| a.eq_ignore_ascii_case(q)),
14082                // Synthetic group columns arrive bare after the
14083                // aggregate rewrite.
14084                None => c.name.starts_with("__grp_") || c.name.starts_with("__agg_"),
14085            }
14086        };
14087        // Every expression OTHER than the correlation conjunct must be
14088        // fully inner (qualified to inner aliases).
14089        let all_inner = |e: &Expr| -> bool {
14090            let mut cols: Vec<spg_sql::ast::ColumnName> = Vec::new();
14091            let mut subs: Vec<&SelectStatement> = Vec::new();
14092            visit_expr_columns_and_subqueries(e, &mut |c| cols.push(c.clone()), &mut |sub| {
14093                subs.push(sub)
14094            });
14095            subs.is_empty() && cols.iter().all(|c| is_inner(c) && !c.name.is_empty())
14096        };
14097        let Some(w) = &inner.where_ else {
14098            return Ok(None);
14099        };
14100        let conjuncts = reorder::split_and_conjunctions(w);
14101        let mut corr: Option<(spg_sql::ast::ColumnName, spg_sql::ast::ColumnName)> = None; // (inner, outer)
14102        let mut rest: Vec<&Expr> = Vec::new();
14103        for c in conjuncts {
14104            if let Expr::Binary {
14105                lhs,
14106                op: BinOp::Eq,
14107                rhs,
14108            } = c
14109                && let (Expr::Column(a), Expr::Column(b)) = (lhs.as_ref(), rhs.as_ref())
14110            {
14111                let pair = if is_inner(a) && is_outer(b) {
14112                    Some((a.clone(), b.clone()))
14113                } else if is_inner(b) && is_outer(a) {
14114                    Some((b.clone(), a.clone()))
14115                } else {
14116                    None
14117                };
14118                if let Some(p) = pair {
14119                    if corr.is_some() {
14120                        return Ok(None); // more than one correlation
14121                    }
14122                    corr = Some(p);
14123                    continue;
14124                }
14125            }
14126            if !all_inner(c) {
14127                return Ok(None);
14128            }
14129            rest.push(c);
14130        }
14131        let Some((inner_col, outer_col)) = corr else {
14132            return Ok(None);
14133        };
14134        let SI::Expr { expr: out_expr, .. } = &inner.items[0] else {
14135            return Ok(None);
14136        };
14137        if !all_inner(out_expr) {
14138            return Ok(None);
14139        }
14140        let order = inner.order_by.first();
14141        if let Some(o) = order
14142            && !all_inner(&o.expr)
14143        {
14144            return Ok(None);
14145        }
14146        // Build the batch statement: SELECT inner_col, [order], expr
14147        // FROM … WHERE rest — no correlation, no order, no limit.
14148        let mut batch = inner.clone();
14149        batch.limit = None;
14150        batch.offset = None;
14151        batch.order_by = Vec::new();
14152        batch.where_ = rest
14153            .iter()
14154            .map(|e| (*e).clone())
14155            .reduce(|a, b| Expr::Binary {
14156                lhs: alloc::boxed::Box::new(a),
14157                op: BinOp::And,
14158                rhs: alloc::boxed::Box::new(b),
14159            });
14160        let mut items: Vec<SI> = alloc::vec![SI::Expr {
14161            expr: Expr::Column(inner_col),
14162            alias: None,
14163        }];
14164        if let Some(o) = order {
14165            items.push(SI::Expr {
14166                expr: o.expr.clone(),
14167                alias: None,
14168            });
14169        }
14170        items.push(SI::Expr {
14171            expr: out_expr.clone(),
14172            alias: None,
14173        });
14174        batch.items = items;
14175        let r = self.exec_select_cancel(&batch, cancel)?;
14176        let QueryResult::Rows { rows, .. } = r else {
14177            return Ok(None);
14178        };
14179        let has_order = order.is_some();
14180        let (desc, nf) = order
14181            .map(|o| (o.desc, o.nulls_first))
14182            .unwrap_or((false, None));
14183        let mut best: alloc::collections::BTreeMap<String, (Option<Value>, Value)> =
14184            alloc::collections::BTreeMap::new();
14185        for row in rows {
14186            let key_v = row.values.first().cloned().unwrap_or(Value::Null);
14187            if matches!(key_v, Value::Null) {
14188                continue;
14189            }
14190            let key = aggregate::encode_key(core::slice::from_ref(&key_v));
14191            let (ord_v, out_v) = if has_order {
14192                (
14193                    Some(row.values.get(1).cloned().unwrap_or(Value::Null)),
14194                    row.values.get(2).cloned().unwrap_or(Value::Null),
14195                )
14196            } else {
14197                (None, row.values.get(1).cloned().unwrap_or(Value::Null))
14198            };
14199            match best.get(&key) {
14200                None => {
14201                    best.insert(key, (ord_v, out_v));
14202                }
14203                Some((cur_ord, _)) if has_order => {
14204                    // The sorted-first row wins: candidate beats the
14205                    // incumbent when it compares LESS under the key's
14206                    // ordering.
14207                    let cand = ord_v.clone().unwrap_or(Value::Null);
14208                    let cur = cur_ord.clone().unwrap_or(Value::Null);
14209                    if order_by_value_cmp(desc, nf, &cand, &cur) == core::cmp::Ordering::Less {
14210                        best.insert(key, (ord_v, out_v));
14211                    }
14212                }
14213                Some(_) => {} // unordered: first row stands (any row is valid)
14214            }
14215        }
14216        let map = best.into_iter().map(|(k, (_, v))| (k, v)).collect();
14217        Ok(Some((outer_col, map)))
14218    }
14219}
14220
14221/// v7.29 (3c) — pre-order collection of SCALAR subquery nodes in a
14222/// host expression (no descent into subquery bodies). The splice
14223/// walk below uses the same order; the pair must stay in lockstep.
14224fn collect_scalar_subqueries<'a>(e: &'a Expr, out: &mut Vec<&'a SelectStatement>) {
14225    match e {
14226        Expr::ScalarSubquery(s) => out.push(s),
14227        Expr::Exists { .. } | Expr::InSubquery { .. } => {}
14228        Expr::Binary { lhs, rhs, .. } => {
14229            collect_scalar_subqueries(lhs, out);
14230            collect_scalar_subqueries(rhs, out);
14231        }
14232        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
14233            collect_scalar_subqueries(expr, out);
14234        }
14235        Expr::Like { expr, pattern, .. } => {
14236            collect_scalar_subqueries(expr, out);
14237            collect_scalar_subqueries(pattern, out);
14238        }
14239        Expr::FunctionCall { args, .. } => {
14240            for a in args {
14241                collect_scalar_subqueries(a, out);
14242            }
14243        }
14244        Expr::AggregateOrdered { call, order_by, .. } => {
14245            collect_scalar_subqueries(call, out);
14246            for o in order_by {
14247                collect_scalar_subqueries(&o.expr, out);
14248            }
14249        }
14250        Expr::Case {
14251            operand,
14252            branches,
14253            else_branch,
14254        } => {
14255            if let Some(op) = operand {
14256                collect_scalar_subqueries(op, out);
14257            }
14258            for (w, t) in branches {
14259                collect_scalar_subqueries(w, out);
14260                collect_scalar_subqueries(t, out);
14261            }
14262            if let Some(eb) = else_branch {
14263                collect_scalar_subqueries(eb, out);
14264            }
14265        }
14266        Expr::ArraySubscript { target, index } => {
14267            collect_scalar_subqueries(target, out);
14268            collect_scalar_subqueries(index, out);
14269        }
14270        Expr::InList { expr, list, .. } => {
14271            collect_scalar_subqueries(expr, out);
14272            for item in list {
14273                collect_scalar_subqueries(item, out);
14274            }
14275        }
14276        _ => {}
14277    }
14278}
14279
14280/// v7.29 (3d) — empty every scalar-subquery BODY in a host
14281/// expression (node kept so the splice pre-order still matches).
14282fn hollow_scalar_subqueries(e: &mut Expr) {
14283    match e {
14284        Expr::ScalarSubquery(s) => {
14285            let hollow = SelectStatement {
14286                items: Vec::new(),
14287                ..SelectStatement::default()
14288            };
14289            **s = hollow;
14290        }
14291        Expr::Exists { .. } | Expr::InSubquery { .. } => {}
14292        Expr::Binary { lhs, rhs, .. } => {
14293            hollow_scalar_subqueries(lhs);
14294            hollow_scalar_subqueries(rhs);
14295        }
14296        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
14297            hollow_scalar_subqueries(expr);
14298        }
14299        Expr::Like { expr, pattern, .. } => {
14300            hollow_scalar_subqueries(expr);
14301            hollow_scalar_subqueries(pattern);
14302        }
14303        Expr::FunctionCall { args, .. } => {
14304            for a in args.iter_mut() {
14305                hollow_scalar_subqueries(a);
14306            }
14307        }
14308        Expr::AggregateOrdered { call, order_by, .. } => {
14309            hollow_scalar_subqueries(call);
14310            for o in order_by.iter_mut() {
14311                hollow_scalar_subqueries(&mut o.expr);
14312            }
14313        }
14314        Expr::Case {
14315            operand,
14316            branches,
14317            else_branch,
14318        } => {
14319            if let Some(op) = operand {
14320                hollow_scalar_subqueries(op);
14321            }
14322            for (w, t) in branches.iter_mut() {
14323                hollow_scalar_subqueries(w);
14324                hollow_scalar_subqueries(t);
14325            }
14326            if let Some(eb) = else_branch {
14327                hollow_scalar_subqueries(eb);
14328            }
14329        }
14330        Expr::ArraySubscript { target, index } => {
14331            hollow_scalar_subqueries(target);
14332            hollow_scalar_subqueries(index);
14333        }
14334        Expr::InList { expr, list, .. } => {
14335            hollow_scalar_subqueries(expr);
14336            for item in list.iter_mut() {
14337                hollow_scalar_subqueries(item);
14338            }
14339        }
14340        _ => {}
14341    }
14342}
14343
14344/// v7.29 (3c) — splice the i-th scalar subquery's batched value into
14345/// the cloned tree (same pre-order as collect_scalar_subqueries).
14346/// Returns Ok(false) if a literal conversion fails (caller falls
14347/// back to the resolver path).
14348fn splice_planned_subqueries(
14349    e: &mut Expr,
14350    plan: &[Option<alloc::rc::Rc<memoize::GroupMap>>],
14351    idx: &mut usize,
14352    row: &Row,
14353    ctx: &EvalContext<'_>,
14354) -> Result<bool, EngineError> {
14355    match e {
14356        Expr::ScalarSubquery(_) => {
14357            let Some(Some(gm)) = plan.get(*idx) else {
14358                return Ok(false);
14359            };
14360            *idx += 1;
14361            let (outer_col, map) = gm.as_ref();
14362            let key_v = eval::eval_expr(&Expr::Column(outer_col.clone()), row, ctx)
14363                .map_err(EngineError::Eval)?;
14364            let v = if matches!(key_v, Value::Null) {
14365                Value::Null
14366            } else {
14367                map.get(&aggregate::encode_key(core::slice::from_ref(&key_v)))
14368                    .cloned()
14369                    .unwrap_or(Value::Null)
14370            };
14371            *e = value_to_literal_expr(v)?;
14372            Ok(true)
14373        }
14374        Expr::Exists { .. } | Expr::InSubquery { .. } => Ok(true),
14375        Expr::Binary { lhs, rhs, .. } => Ok(splice_planned_subqueries(lhs, plan, idx, row, ctx)?
14376            && splice_planned_subqueries(rhs, plan, idx, row, ctx)?),
14377        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
14378            splice_planned_subqueries(expr, plan, idx, row, ctx)
14379        }
14380        Expr::Like { expr, pattern, .. } => {
14381            Ok(splice_planned_subqueries(expr, plan, idx, row, ctx)?
14382                && splice_planned_subqueries(pattern, plan, idx, row, ctx)?)
14383        }
14384        Expr::FunctionCall { args, .. } => {
14385            for a in args.iter_mut() {
14386                if !splice_planned_subqueries(a, plan, idx, row, ctx)? {
14387                    return Ok(false);
14388                }
14389            }
14390            Ok(true)
14391        }
14392        Expr::AggregateOrdered { call, order_by, .. } => {
14393            if !splice_planned_subqueries(call, plan, idx, row, ctx)? {
14394                return Ok(false);
14395            }
14396            for o in order_by.iter_mut() {
14397                if !splice_planned_subqueries(&mut o.expr, plan, idx, row, ctx)? {
14398                    return Ok(false);
14399                }
14400            }
14401            Ok(true)
14402        }
14403        Expr::Case {
14404            operand,
14405            branches,
14406            else_branch,
14407        } => {
14408            if let Some(op) = operand {
14409                if !splice_planned_subqueries(op, plan, idx, row, ctx)? {
14410                    return Ok(false);
14411                }
14412            }
14413            for (w, t) in branches.iter_mut() {
14414                if !splice_planned_subqueries(w, plan, idx, row, ctx)?
14415                    || !splice_planned_subqueries(t, plan, idx, row, ctx)?
14416                {
14417                    return Ok(false);
14418                }
14419            }
14420            if let Some(eb) = else_branch {
14421                if !splice_planned_subqueries(eb, plan, idx, row, ctx)? {
14422                    return Ok(false);
14423                }
14424            }
14425            Ok(true)
14426        }
14427        Expr::ArraySubscript { target, index } => {
14428            Ok(splice_planned_subqueries(target, plan, idx, row, ctx)?
14429                && splice_planned_subqueries(index, plan, idx, row, ctx)?)
14430        }
14431        Expr::InList { expr, list, .. } => {
14432            if !splice_planned_subqueries(expr, plan, idx, row, ctx)? {
14433                return Ok(false);
14434            }
14435            for item in list.iter_mut() {
14436                if !splice_planned_subqueries(item, plan, idx, row, ctx)? {
14437                    return Ok(false);
14438                }
14439            }
14440            Ok(true)
14441        }
14442        _ => Ok(true),
14443    }
14444}
14445
14446/// v7.30.2 (mailrs round-25) — minimum element count before an
14447/// all-literal `IN` list gets a per-query membership set. Below
14448/// this the linear scan wins on build cost.
14449const INLIST_SET_THRESHOLD: usize = 64;
14450
14451/// Cheap pre-check: is a set-eligible `IN` list reachable on the
14452/// AND spine of this expression? Anything else keeps the plain
14453/// `eval_expr` path untouched.
14454fn expr_may_use_in_set(e: &Expr) -> bool {
14455    match e {
14456        Expr::InList { list, .. } => list.len() >= INLIST_SET_THRESHOLD,
14457        Expr::Binary {
14458            lhs,
14459            op: BinOp::And,
14460            rhs,
14461        } => expr_may_use_in_set(lhs) || expr_may_use_in_set(rhs),
14462        _ => false,
14463    }
14464}
14465
14466/// Analyse an `IN` list for set eligibility: every element a literal,
14467/// all of one family (integer or string, NULLs tracked separately).
14468fn build_in_list_set(list: &[Expr]) -> Option<memoize::InListSetEntry> {
14469    let mut has_null = false;
14470    let mut ints: alloc::collections::BTreeSet<i64> = alloc::collections::BTreeSet::new();
14471    let mut texts: alloc::collections::BTreeSet<String> = alloc::collections::BTreeSet::new();
14472    for item in list {
14473        let Expr::Literal(lit) = item else {
14474            return None;
14475        };
14476        match lit {
14477            Literal::Null => has_null = true,
14478            Literal::Integer(i) => {
14479                ints.insert(*i);
14480            }
14481            Literal::String(s) => {
14482                texts.insert(s.clone());
14483            }
14484            _ => return None,
14485        }
14486        if !ints.is_empty() && !texts.is_empty() {
14487            return None;
14488        }
14489    }
14490    let set = if !ints.is_empty() {
14491        memoize::InListSet::Int(ints)
14492    } else if !texts.is_empty() {
14493        memoize::InListSet::Text(texts)
14494    } else {
14495        return None;
14496    };
14497    Some(memoize::InListSetEntry { set, has_null })
14498}
14499
14500/// Subquery-free eval that serves large all-literal `IN` lists from
14501/// a per-query membership set (cached in the memo by node address).
14502/// Walks only the AND spine; every other node — and every needle
14503/// whose runtime family doesn't match the set — falls through to
14504/// `eval_expr`, so coercion and error semantics stay identical.
14505fn eval_with_in_sets(
14506    e: &Expr,
14507    row: &Row,
14508    ctx: &EvalContext<'_>,
14509    m: &mut memoize::MemoizeCache,
14510) -> Result<Value, EngineError> {
14511    match e {
14512        Expr::Binary {
14513            lhs,
14514            op: BinOp::And,
14515            rhs,
14516        } => {
14517            // Mirror eval_expr: both sides evaluate (no short
14518            // circuit), then SQL three-valued AND.
14519            let l = eval_with_in_sets(lhs, row, ctx, m)?;
14520            let r = eval_with_in_sets(rhs, row, ctx, m)?;
14521            eval::and_3vl(l, r).map_err(EngineError::Eval)
14522        }
14523        Expr::InList {
14524            expr: lhs,
14525            list,
14526            negated,
14527        } if list.len() >= INLIST_SET_THRESHOLD => {
14528            let key = core::ptr::from_ref::<Expr>(e) as usize;
14529            let Some(entry) = m
14530                .in_sets
14531                .entry(key)
14532                .or_insert_with(|| build_in_list_set(list))
14533            else {
14534                return eval::eval_expr(e, row, ctx).map_err(EngineError::Eval);
14535            };
14536            let needle = eval::eval_expr(lhs, row, ctx).map_err(EngineError::Eval)?;
14537            let contained = match (&needle, &entry.set) {
14538                // Non-empty list + NULL needle → NULL (negation of
14539                // NULL is still NULL).
14540                (Value::Null, _) => return Ok(Value::Null),
14541                (Value::SmallInt(n), memoize::InListSet::Int(s)) => s.contains(&i64::from(*n)),
14542                (Value::Int(n), memoize::InListSet::Int(s)) => s.contains(&i64::from(*n)),
14543                (Value::BigInt(n), memoize::InListSet::Int(s)) => s.contains(n),
14544                (Value::Text(t), memoize::InListSet::Text(s)) => s.contains(t.as_str()),
14545                // Cross-family needle (e.g. Float vs integer list):
14546                // keep apply_binary's coercion / error behaviour.
14547                _ => return eval::eval_expr(e, row, ctx).map_err(EngineError::Eval),
14548            };
14549            let inner = if contained {
14550                Value::Bool(true)
14551            } else if entry.has_null {
14552                Value::Null
14553            } else {
14554                Value::Bool(false)
14555            };
14556            Ok(match (negated, inner) {
14557                (true, Value::Bool(b)) => Value::Bool(!b),
14558                (_, v) => v,
14559            })
14560        }
14561        _ => eval::eval_expr(e, row, ctx).map_err(EngineError::Eval),
14562    }
14563}
14564
14565fn substitute_outer_columns(stmt: &mut SelectStatement, row: &Row, ctx: &EvalContext<'_>) {
14566    // v7.24 (round-16 B) — joined outer contexts carry no single
14567    // table alias; their schemas use composite "alias.column" names
14568    // instead. Pass an unmatchable alias and let the composite
14569    // lookup in substitute_in_expr do the work (a correlated EXISTS
14570    // under a JOIN previously skipped substitution entirely and
14571    // died with "unknown table qualifier").
14572    let outer_alias = ctx.table_alias.unwrap_or("");
14573    substitute_in_select(stmt, row, ctx, outer_alias);
14574}
14575
14576fn substitute_in_select(
14577    stmt: &mut SelectStatement,
14578    row: &Row,
14579    ctx: &EvalContext<'_>,
14580    outer_alias: &str,
14581) {
14582    for item in &mut stmt.items {
14583        if let SelectItem::Expr { expr, .. } = item {
14584            substitute_in_expr(expr, row, ctx, outer_alias);
14585        }
14586    }
14587    if let Some(w) = &mut stmt.where_ {
14588        substitute_in_expr(w, row, ctx, outer_alias);
14589    }
14590    if let Some(gs) = &mut stmt.group_by {
14591        for g in gs {
14592            substitute_in_expr(g, row, ctx, outer_alias);
14593        }
14594    }
14595    if let Some(h) = &mut stmt.having {
14596        substitute_in_expr(h, row, ctx, outer_alias);
14597    }
14598    for o in &mut stmt.order_by {
14599        substitute_in_expr(&mut o.expr, row, ctx, outer_alias);
14600    }
14601    for (_, peer) in &mut stmt.unions {
14602        substitute_in_select(peer, row, ctx, outer_alias);
14603    }
14604}
14605
14606fn substitute_in_expr(e: &mut Expr, row: &Row, ctx: &EvalContext<'_>, outer_alias: &str) {
14607    // v7.25.2 (round-19 A) — bare synthetic columns. The aggregate
14608    // rewriter replaces group-key references INSIDE subquery bodies
14609    // with `__grp_N` so a correlated subquery in a GROUP BY select
14610    // list can resolve against the synthesised group row. The names
14611    // are engine-generated, so they can't shadow user columns.
14612    if let Expr::Column(c) = e
14613        && c.qualifier.is_none()
14614        && (c.name.starts_with("__grp_") || c.name.starts_with("__agg_"))
14615        && let Some(idx) = ctx.columns.iter().position(|sc| sc.name == c.name)
14616    {
14617        let v = row.values.get(idx).cloned().unwrap_or(Value::Null);
14618        if let Ok(lit) = value_to_literal_expr(v) {
14619            *e = lit;
14620            return;
14621        }
14622    }
14623    if let Expr::Column(c) = e
14624        && let Some(qual) = &c.qualifier
14625    {
14626        // Look up the column's index in the outer schema: plain name
14627        // when the qualifier is the outer table's alias, composite
14628        // "alias.column" for joined outer schemas (v7.24).
14629        let idx = if !outer_alias.is_empty() && qual.eq_ignore_ascii_case(outer_alias) {
14630            ctx.columns
14631                .iter()
14632                .position(|sc| sc.name.eq_ignore_ascii_case(&c.name))
14633        } else {
14634            None
14635        }
14636        .or_else(|| {
14637            let composite = alloc::format!("{qual}.{name}", name = c.name);
14638            ctx.columns
14639                .iter()
14640                .position(|sc| sc.name.eq_ignore_ascii_case(&composite))
14641        });
14642        if let Some(idx) = idx {
14643            let v = row.values.get(idx).cloned().unwrap_or(Value::Null);
14644            if let Ok(lit) = value_to_literal_expr(v) {
14645                *e = lit;
14646                return;
14647            }
14648        }
14649    }
14650    match e {
14651        Expr::AggregateOrdered { call, order_by, .. } => {
14652            substitute_in_expr(call, row, ctx, outer_alias);
14653            for o in order_by.iter_mut() {
14654                substitute_in_expr(&mut o.expr, row, ctx, outer_alias);
14655            }
14656        }
14657        Expr::Binary { lhs, rhs, .. } => {
14658            substitute_in_expr(lhs, row, ctx, outer_alias);
14659            substitute_in_expr(rhs, row, ctx, outer_alias);
14660        }
14661        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
14662            substitute_in_expr(expr, row, ctx, outer_alias);
14663        }
14664        Expr::Like { expr, pattern, .. } => {
14665            substitute_in_expr(expr, row, ctx, outer_alias);
14666            substitute_in_expr(pattern, row, ctx, outer_alias);
14667        }
14668        Expr::FunctionCall { args, .. } => {
14669            for a in args {
14670                substitute_in_expr(a, row, ctx, outer_alias);
14671            }
14672        }
14673        Expr::Extract { source, .. } => substitute_in_expr(source, row, ctx, outer_alias),
14674        Expr::WindowFunction {
14675            args,
14676            partition_by,
14677            order_by,
14678            ..
14679        } => {
14680            for a in args {
14681                substitute_in_expr(a, row, ctx, outer_alias);
14682            }
14683            for p in partition_by {
14684                substitute_in_expr(p, row, ctx, outer_alias);
14685            }
14686            for (o, _, _) in order_by {
14687                substitute_in_expr(o, row, ctx, outer_alias);
14688            }
14689        }
14690        Expr::ScalarSubquery(s) => substitute_in_select(s, row, ctx, outer_alias),
14691        Expr::Exists { subquery, .. } | Expr::InSubquery { subquery, .. } => {
14692            substitute_in_select(subquery, row, ctx, outer_alias);
14693        }
14694        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
14695        Expr::Array(items) => {
14696            for elem in items {
14697                substitute_in_expr(elem, row, ctx, outer_alias);
14698            }
14699        }
14700        Expr::ArraySubscript { target, index } => {
14701            substitute_in_expr(target, row, ctx, outer_alias);
14702            substitute_in_expr(index, row, ctx, outer_alias);
14703        }
14704        Expr::AnyAll { expr, array, .. } => {
14705            substitute_in_expr(expr, row, ctx, outer_alias);
14706            substitute_in_expr(array, row, ctx, outer_alias);
14707        }
14708        Expr::InList { expr, list, .. } => {
14709            substitute_in_expr(expr, row, ctx, outer_alias);
14710            for item in list {
14711                substitute_in_expr(item, row, ctx, outer_alias);
14712            }
14713        }
14714        Expr::Case {
14715            operand,
14716            branches,
14717            else_branch,
14718        } => {
14719            if let Some(o) = operand {
14720                substitute_in_expr(o, row, ctx, outer_alias);
14721            }
14722            for (w, t) in branches {
14723                substitute_in_expr(w, row, ctx, outer_alias);
14724                substitute_in_expr(t, row, ctx, outer_alias);
14725            }
14726            if let Some(e) = else_branch {
14727                substitute_in_expr(e, row, ctx, outer_alias);
14728            }
14729        }
14730    }
14731}
14732
14733/// v4.22: encode a Row to a comparable byte key for UNION-DISTINCT
14734/// dedup inside the recursive iteration. Crude but deterministic
14735/// — Debug prints embed type discriminants so NULL ≠ "" ≠ 0.
14736fn encode_row_key(row: &Row) -> Vec<u8> {
14737    let mut out = Vec::new();
14738    for v in &row.values {
14739        let s = alloc::format!("{v:?}|");
14740        out.extend_from_slice(s.as_bytes());
14741    }
14742    out
14743}
14744
14745fn select_has_window(stmt: &SelectStatement) -> bool {
14746    for item in &stmt.items {
14747        if let SelectItem::Expr { expr, .. } = item
14748            && expr_has_window(expr)
14749        {
14750            return true;
14751        }
14752    }
14753    false
14754}
14755
14756fn expr_has_window(e: &Expr) -> bool {
14757    match e {
14758        Expr::WindowFunction { .. } => true,
14759        Expr::AggregateOrdered { call, order_by, .. } => {
14760            expr_has_window(call) || order_by.iter().any(|o| expr_has_window(&o.expr))
14761        }
14762        Expr::Binary { lhs, rhs, .. } => expr_has_window(lhs) || expr_has_window(rhs),
14763        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
14764            expr_has_window(expr)
14765        }
14766        Expr::FunctionCall { args, .. } => args.iter().any(expr_has_window),
14767        Expr::Like { expr, pattern, .. } => expr_has_window(expr) || expr_has_window(pattern),
14768        Expr::Extract { source, .. } => expr_has_window(source),
14769        Expr::ScalarSubquery(_)
14770        | Expr::Exists { .. }
14771        | Expr::InSubquery { .. }
14772        | Expr::Literal(_)
14773        | Expr::Placeholder(_)
14774        | Expr::Column(_) => false,
14775        Expr::Array(items) => items.iter().any(expr_has_window),
14776        Expr::ArraySubscript { target, index } => expr_has_window(target) || expr_has_window(index),
14777        Expr::AnyAll { expr, array, .. } => expr_has_window(expr) || expr_has_window(array),
14778        Expr::InList { expr, list, .. } => {
14779            expr_has_window(expr) || list.iter().any(expr_has_window)
14780        }
14781        Expr::Case {
14782            operand,
14783            branches,
14784            else_branch,
14785        } => {
14786            operand.as_deref().is_some_and(expr_has_window)
14787                || branches
14788                    .iter()
14789                    .any(|(w, t)| expr_has_window(w) || expr_has_window(t))
14790                || else_branch.as_deref().is_some_and(expr_has_window)
14791        }
14792    }
14793}
14794
14795fn collect_window_nodes(e: &Expr, out: &mut Vec<Expr>) {
14796    if let Expr::WindowFunction { .. } = e {
14797        // Deduplicate by structural equality on the expression
14798        // (cheap because window args + partition + order are
14799        // small). Without dedup we'd recompute identical windows
14800        // once per occurrence in the projection.
14801        if !out.iter().any(|x| x == e) {
14802            out.push(e.clone());
14803        }
14804        return;
14805    }
14806    match e {
14807        // Already handled by the early-return at the top.
14808        Expr::WindowFunction { .. } => unreachable!(),
14809        Expr::Binary { lhs, rhs, .. } => {
14810            collect_window_nodes(lhs, out);
14811            collect_window_nodes(rhs, out);
14812        }
14813        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
14814            collect_window_nodes(expr, out);
14815        }
14816        Expr::FunctionCall { args, .. } => {
14817            for a in args {
14818                collect_window_nodes(a, out);
14819            }
14820        }
14821        Expr::Like { expr, pattern, .. } => {
14822            collect_window_nodes(expr, out);
14823            collect_window_nodes(pattern, out);
14824        }
14825        Expr::Extract { source, .. } => collect_window_nodes(source, out),
14826        _ => {}
14827    }
14828}
14829
14830fn rewrite_window_to_columns(e: &mut Expr, window_nodes: &[Expr]) {
14831    if let Expr::WindowFunction { .. } = e
14832        && let Some(idx) = window_nodes.iter().position(|w| w == e)
14833    {
14834        *e = Expr::Column(spg_sql::ast::ColumnName {
14835            qualifier: None,
14836            name: alloc::format!("__win_{idx}"),
14837        });
14838        return;
14839    }
14840    match e {
14841        Expr::Binary { lhs, rhs, .. } => {
14842            rewrite_window_to_columns(lhs, window_nodes);
14843            rewrite_window_to_columns(rhs, window_nodes);
14844        }
14845        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
14846            rewrite_window_to_columns(expr, window_nodes);
14847        }
14848        Expr::FunctionCall { args, .. } => {
14849            for a in args {
14850                rewrite_window_to_columns(a, window_nodes);
14851            }
14852        }
14853        Expr::Like { expr, pattern, .. } => {
14854            rewrite_window_to_columns(expr, window_nodes);
14855            rewrite_window_to_columns(pattern, window_nodes);
14856        }
14857        Expr::Extract { source, .. } => rewrite_window_to_columns(source, window_nodes),
14858        _ => {}
14859    }
14860}
14861
14862/// Total order over partition-key tuples. NULL sorts as the
14863/// lowest value (matches the `<` partial order's NULL-last
14864/// behaviour with `INFINITY` flipped).
14865fn partition_key_cmp(a: &[Value], b: &[Value]) -> core::cmp::Ordering {
14866    for (x, y) in a.iter().zip(b.iter()) {
14867        let c = value_cmp(x, y);
14868        if c != core::cmp::Ordering::Equal {
14869            return c;
14870        }
14871    }
14872    a.len().cmp(&b.len())
14873}
14874
14875fn order_key_cmp(
14876    a: &[(Value, bool, Option<bool>)],
14877    b: &[(Value, bool, Option<bool>)],
14878) -> core::cmp::Ordering {
14879    // v7.24.1 — per-key DESC + effective NULLS placement (shared
14880    // contract with order_by_value_cmp).
14881    for ((va, desc, nf), (vb, _, _)) in a.iter().zip(b.iter()) {
14882        let c = order_by_value_cmp(*desc, *nf, va, vb);
14883        if c != core::cmp::Ordering::Equal {
14884            return c;
14885        }
14886    }
14887    a.len().cmp(&b.len())
14888}
14889
14890/// v7.17.0 Phase 3.10 — true when the Value is one of the
14891/// integer-shaped variants `generate_series` accepts as a start
14892/// / stop / step component. Float / NUMERIC are rejected — PG's
14893/// `generate_series(numeric, numeric)` overload is out of v7.17
14894/// scope.
14895const fn value_is_integer(v: &Value) -> bool {
14896    matches!(v, Value::SmallInt(_) | Value::Int(_) | Value::BigInt(_))
14897}
14898
14899/// v7.17.0 Phase 3.10 — widen any integer-shaped Value to i64 for
14900/// the generate_series iteration loop. Non-integer inputs panic;
14901/// caller guards via `value_is_integer`.
14902const fn value_to_i64(v: &Value) -> i64 {
14903    match v {
14904        Value::SmallInt(n) => *n as i64,
14905        Value::Int(n) => *n as i64,
14906        Value::BigInt(n) => *n,
14907        _ => panic!("value_to_i64 called on non-integer Value"),
14908    }
14909}
14910
14911/// v7.17.0 Phase 3.10 — integer-mode generate_series materialiser.
14912/// Step direction follows the sign: positive step iterates upward
14913/// (stops when current > stop); negative iterates downward; zero
14914/// errors. Caller-facing row stream is `BigInt`-typed so a single
14915/// projection schema covers SmallInt / Int / BigInt callers.
14916fn generate_series_integers(
14917    start: i64,
14918    stop: i64,
14919    step: i64,
14920    cancel: &CancelToken<'_>,
14921) -> Result<alloc::vec::Vec<Row>, EngineError> {
14922    if step == 0 {
14923        return Err(EngineError::Unsupported(
14924            "generate_series(): step argument cannot be zero".into(),
14925        ));
14926    }
14927    let mut out = alloc::vec::Vec::new();
14928    let mut cur = start;
14929    // Hard cap to keep a runaway call from eating all memory. PG
14930    // has no such cap but does honour query timeout; SPG's cancel
14931    // token will fire too — this is a defense-in-depth backstop.
14932    const MAX_ROWS: usize = 10_000_000;
14933    loop {
14934        cancel.check()?;
14935        if step > 0 && cur > stop {
14936            break;
14937        }
14938        if step < 0 && cur < stop {
14939            break;
14940        }
14941        out.push(Row::new(alloc::vec![Value::BigInt(cur)]));
14942        if out.len() > MAX_ROWS {
14943            return Err(EngineError::Unsupported(alloc::format!(
14944                "generate_series(): exceeded {MAX_ROWS} rows; \
14945                 narrow start/stop or use a larger step"
14946            )));
14947        }
14948        cur = match cur.checked_add(step) {
14949            Some(n) => n,
14950            None => break,
14951        };
14952    }
14953    Ok(out)
14954}
14955
14956/// v7.17.0 Phase 3.10 — timestamp-mode generate_series. step is a
14957/// `Value::Interval { months, micros }` per the caller's guard;
14958/// each iteration adds the interval via `apply_binary_interval`
14959/// so month-shifting handles short-month rollover (PG semantics).
14960fn generate_series_timestamps(
14961    start: i64,
14962    stop: i64,
14963    step: Value,
14964    cancel: &CancelToken<'_>,
14965) -> Result<alloc::vec::Vec<Row>, EngineError> {
14966    let (months, micros) = match &step {
14967        Value::Interval { months, micros } => (*months, *micros),
14968        _ => unreachable!("caller guards step.is_interval"),
14969    };
14970    if months == 0 && micros == 0 {
14971        return Err(EngineError::Unsupported(
14972            "generate_series(): INTERVAL step cannot be zero".into(),
14973        ));
14974    }
14975    let ascending = months > 0 || micros > 0;
14976    let mut out = alloc::vec::Vec::new();
14977    let mut cur = Value::Timestamp(start);
14978    const MAX_ROWS: usize = 10_000_000;
14979    loop {
14980        cancel.check()?;
14981        let cur_t = match cur {
14982            Value::Timestamp(t) => t,
14983            _ => unreachable!("loop invariant: cur is Timestamp"),
14984        };
14985        if ascending && cur_t > stop {
14986            break;
14987        }
14988        if !ascending && cur_t < stop {
14989            break;
14990        }
14991        out.push(Row::new(alloc::vec![Value::Timestamp(cur_t)]));
14992        if out.len() > MAX_ROWS {
14993            return Err(EngineError::Unsupported(alloc::format!(
14994                "generate_series(): exceeded {MAX_ROWS} rows; \
14995                 narrow start/stop or use a larger step"
14996            )));
14997        }
14998        let next = eval::apply_binary_interval(
14999            spg_sql::ast::BinOp::Add,
15000            &cur,
15001            &Value::Interval { months, micros },
15002        )
15003        .map_err(EngineError::Eval)?;
15004        cur = match next {
15005            Some(v) => v,
15006            None => break,
15007        };
15008    }
15009    Ok(out)
15010}
15011
15012#[allow(clippy::match_same_arms)] // explicit arms per type document the supported pairs
15013/// v7.24 (round-16 A) — per-key ORDER BY comparator honouring DESC
15014/// and the effective NULLS placement (explicit NULLS FIRST/LAST,
15015/// else the PG default: NULLS LAST for ASC, NULLS FIRST for DESC).
15016/// NULL placement is absolute — it does not flip with DESC.
15017pub(crate) fn order_by_value_cmp(
15018    desc: bool,
15019    nulls_first: Option<bool>,
15020    a: &Value,
15021    b: &Value,
15022) -> core::cmp::Ordering {
15023    use core::cmp::Ordering;
15024    let nf = nulls_first.unwrap_or(desc);
15025    match (matches!(a, Value::Null), matches!(b, Value::Null)) {
15026        (true, true) => Ordering::Equal,
15027        (true, false) => {
15028            if nf {
15029                Ordering::Less
15030            } else {
15031                Ordering::Greater
15032            }
15033        }
15034        (false, true) => {
15035            if nf {
15036                Ordering::Greater
15037            } else {
15038                Ordering::Less
15039            }
15040        }
15041        (false, false) => {
15042            let c = value_cmp(a, b);
15043            if desc { c.reverse() } else { c }
15044        }
15045    }
15046}
15047
15048fn value_cmp(a: &Value, b: &Value) -> core::cmp::Ordering {
15049    use core::cmp::Ordering;
15050    match (a, b) {
15051        (Value::Null, Value::Null) => Ordering::Equal,
15052        (Value::Null, _) => Ordering::Less,
15053        (_, Value::Null) => Ordering::Greater,
15054        (Value::Int(x), Value::Int(y)) => x.cmp(y),
15055        (Value::BigInt(x), Value::BigInt(y)) => x.cmp(y),
15056        (Value::SmallInt(x), Value::SmallInt(y)) => x.cmp(y),
15057        (Value::Text(x), Value::Text(y)) => x.cmp(y),
15058        (Value::Bool(x), Value::Bool(y)) => x.cmp(y),
15059        (Value::Float(x), Value::Float(y)) => x.partial_cmp(y).unwrap_or(Ordering::Equal),
15060        (Value::Date(x), Value::Date(y)) => x.cmp(y),
15061        (Value::Timestamp(x), Value::Timestamp(y)) => x.cmp(y),
15062        // Cross-type compare: fall back to the debug rendering —
15063        // same-partition is the goal, exact order is irrelevant.
15064        _ => alloc::format!("{a:?}").cmp(&alloc::format!("{b:?}")),
15065    }
15066}
15067
15068/// Compute the window function's per-row output for one partition.
15069/// `slice` has (partition key, order key, original-row-index)
15070/// tuples already sorted by order key. `filtered_rows` is the
15071/// full row list indexed by original-row-index. `out_vals` is
15072/// the destination, also indexed by original-row-index.
15073#[allow(
15074    clippy::too_many_arguments,
15075    clippy::cast_possible_truncation,
15076    clippy::cast_possible_wrap,
15077    clippy::cast_precision_loss,
15078    clippy::cast_sign_loss,
15079    clippy::doc_markdown,
15080    clippy::too_many_lines,
15081    clippy::type_complexity,
15082    clippy::match_same_arms
15083)]
15084fn compute_window_partition(
15085    name: &str,
15086    args: &[Expr],
15087    ordered: bool,
15088    frame: Option<&WindowFrame>,
15089    null_treatment: spg_sql::ast::NullTreatment,
15090    slice: &[(Vec<Value>, Vec<(Value, bool, Option<bool>)>, usize)],
15091    filtered_rows: &[&Row],
15092    ctx: &EvalContext<'_>,
15093    out_vals: &mut [Value],
15094) -> Result<(), EngineError> {
15095    let ignore_nulls = matches!(null_treatment, spg_sql::ast::NullTreatment::Ignore);
15096    let lower = name.to_ascii_lowercase();
15097    match lower.as_str() {
15098        "row_number" => {
15099            for (rank, (_, _, idx)) in slice.iter().enumerate() {
15100                out_vals[*idx] = Value::BigInt((rank + 1) as i64);
15101            }
15102            Ok(())
15103        }
15104        "rank" => {
15105            let mut prev_key: Option<&[(Value, bool, Option<bool>)]> = None;
15106            let mut current_rank: i64 = 1;
15107            for (i, (_, okey, idx)) in slice.iter().enumerate() {
15108                if let Some(p) = prev_key
15109                    && order_key_cmp(p, okey) != core::cmp::Ordering::Equal
15110                {
15111                    current_rank = (i + 1) as i64;
15112                }
15113                if prev_key.is_none() {
15114                    current_rank = 1;
15115                }
15116                out_vals[*idx] = Value::BigInt(current_rank);
15117                prev_key = Some(okey.as_slice());
15118            }
15119            Ok(())
15120        }
15121        "dense_rank" => {
15122            let mut prev_key: Option<&[(Value, bool, Option<bool>)]> = None;
15123            let mut current_rank: i64 = 0;
15124            for (_, okey, idx) in slice {
15125                if prev_key.is_none_or(|p| order_key_cmp(p, okey) != core::cmp::Ordering::Equal) {
15126                    current_rank += 1;
15127                }
15128                out_vals[*idx] = Value::BigInt(current_rank);
15129                prev_key = Some(okey.as_slice());
15130            }
15131            Ok(())
15132        }
15133        "sum" | "avg" | "min" | "max" | "count" | "count_star" => {
15134            // Pre-evaluate the function arg per row in the slice
15135            // (count_star has no arg).
15136            let arg_values: Vec<Value> = if lower == "count_star" || args.is_empty() {
15137                slice.iter().map(|_| Value::Null).collect()
15138            } else {
15139                slice
15140                    .iter()
15141                    .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
15142                    .collect::<Result<_, _>>()
15143                    .map_err(EngineError::Eval)?
15144            };
15145            // v4.20: pick the effective frame. Explicit frame
15146            // overrides the implicit default (running for ordered,
15147            // whole-partition for unordered).
15148            let eff = effective_frame(frame, ordered)?;
15149            #[allow(clippy::needless_range_loop)]
15150            for i in 0..slice.len() {
15151                let (lo, hi) = frame_bounds_for_row(&eff, i, slice);
15152                let mut sum: f64 = 0.0;
15153                let mut count: i64 = 0;
15154                let mut min_v: Option<f64> = None;
15155                let mut max_v: Option<f64> = None;
15156                let mut row_count: i64 = 0;
15157                if lo <= hi {
15158                    for j in lo..=hi {
15159                        let v = &arg_values[j];
15160                        match lower.as_str() {
15161                            "count_star" => row_count += 1,
15162                            "count" => {
15163                                if !v.is_null() {
15164                                    count += 1;
15165                                }
15166                            }
15167                            _ => {
15168                                if let Some(x) = value_to_f64(v) {
15169                                    sum += x;
15170                                    count += 1;
15171                                    min_v = Some(min_v.map_or(x, |m| m.min(x)));
15172                                    max_v = Some(max_v.map_or(x, |m| m.max(x)));
15173                                }
15174                            }
15175                        }
15176                    }
15177                }
15178                let value = match lower.as_str() {
15179                    "count_star" => Value::BigInt(row_count),
15180                    "count" => Value::BigInt(count),
15181                    "sum" => Value::Float(sum),
15182                    "avg" => {
15183                        if count == 0 {
15184                            Value::Null
15185                        } else {
15186                            Value::Float(sum / count as f64)
15187                        }
15188                    }
15189                    "min" => min_v.map_or(Value::Null, Value::Float),
15190                    "max" => max_v.map_or(Value::Null, Value::Float),
15191                    _ => unreachable!(),
15192                };
15193                let (_, _, idx) = &slice[i];
15194                out_vals[*idx] = value;
15195            }
15196            Ok(())
15197        }
15198        "lag" | "lead" => {
15199            // lag(expr [, offset [, default]])
15200            // lead(expr [, offset [, default]])
15201            if args.is_empty() {
15202                return Err(EngineError::Unsupported(alloc::format!(
15203                    "{lower}() requires at least one argument"
15204                )));
15205            }
15206            let offset: i64 = if args.len() >= 2 {
15207                let v = eval::eval_expr(&args[1], filtered_rows[slice[0].2], ctx)
15208                    .map_err(EngineError::Eval)?;
15209                match v {
15210                    Value::SmallInt(n) => i64::from(n),
15211                    Value::Int(n) => i64::from(n),
15212                    Value::BigInt(n) => n,
15213                    _ => {
15214                        return Err(EngineError::Unsupported(alloc::format!(
15215                            "{lower}() offset must be integer"
15216                        )));
15217                    }
15218                }
15219            } else {
15220                1
15221            };
15222            let default: Value = if args.len() >= 3 {
15223                eval::eval_expr(&args[2], filtered_rows[slice[0].2], ctx)
15224                    .map_err(EngineError::Eval)?
15225            } else {
15226                Value::Null
15227            };
15228            let values: Vec<Value> = slice
15229                .iter()
15230                .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
15231                .collect::<Result<_, _>>()
15232                .map_err(EngineError::Eval)?;
15233            let n = slice.len();
15234            for (i, (_, _, idx)) in slice.iter().enumerate() {
15235                let signed_offset = if lower == "lag" { -offset } else { offset };
15236                let v = if ignore_nulls {
15237                    // v6.4.2 — IGNORE NULLS: walk in the offset direction
15238                    // skipping NULL values; the `offset`-th non-NULL
15239                    // encountered is the result.
15240                    let step: i64 = if signed_offset >= 0 { 1 } else { -1 };
15241                    let needed: i64 = signed_offset.abs();
15242                    if needed == 0 {
15243                        values[i].clone()
15244                    } else {
15245                        let mut j: i64 = i as i64;
15246                        let mut hits: i64 = 0;
15247                        let mut found: Option<Value> = None;
15248                        loop {
15249                            j += step;
15250                            if j < 0 || j >= n as i64 {
15251                                break;
15252                            }
15253                            #[allow(clippy::cast_sign_loss)]
15254                            let v = &values[j as usize];
15255                            if !v.is_null() {
15256                                hits += 1;
15257                                if hits == needed {
15258                                    found = Some(v.clone());
15259                                    break;
15260                                }
15261                            }
15262                        }
15263                        found.unwrap_or_else(|| default.clone())
15264                    }
15265                } else {
15266                    let target_signed = i64::try_from(i).unwrap_or(i64::MAX) + signed_offset;
15267                    if target_signed < 0 || target_signed >= i64::try_from(n).unwrap_or(i64::MAX) {
15268                        default.clone()
15269                    } else {
15270                        #[allow(clippy::cast_sign_loss)]
15271                        {
15272                            values[target_signed as usize].clone()
15273                        }
15274                    }
15275                };
15276                out_vals[*idx] = v;
15277            }
15278            Ok(())
15279        }
15280        "first_value" | "last_value" | "nth_value" => {
15281            if args.is_empty() {
15282                return Err(EngineError::Unsupported(alloc::format!(
15283                    "{lower}() requires at least one argument"
15284                )));
15285            }
15286            let values: Vec<Value> = slice
15287                .iter()
15288                .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
15289                .collect::<Result<_, _>>()
15290                .map_err(EngineError::Eval)?;
15291            let nth: usize = if lower == "nth_value" {
15292                if args.len() < 2 {
15293                    return Err(EngineError::Unsupported(
15294                        "nth_value() requires (expr, n)".into(),
15295                    ));
15296                }
15297                let v = eval::eval_expr(&args[1], filtered_rows[slice[0].2], ctx)
15298                    .map_err(EngineError::Eval)?;
15299                let raw = match v {
15300                    Value::SmallInt(n) => i64::from(n),
15301                    Value::Int(n) => i64::from(n),
15302                    Value::BigInt(n) => n,
15303                    _ => {
15304                        return Err(EngineError::Unsupported(
15305                            "nth_value() n must be integer".into(),
15306                        ));
15307                    }
15308                };
15309                if raw < 1 {
15310                    return Err(EngineError::Unsupported(
15311                        "nth_value() n must be >= 1".into(),
15312                    ));
15313                }
15314                #[allow(clippy::cast_sign_loss)]
15315                {
15316                    raw as usize
15317                }
15318            } else {
15319                0
15320            };
15321            let eff = effective_frame(frame, ordered)?;
15322            for i in 0..slice.len() {
15323                let (lo, hi) = frame_bounds_for_row(&eff, i, slice);
15324                let (_, _, idx) = &slice[i];
15325                let v = if lo > hi {
15326                    Value::Null
15327                } else if ignore_nulls && matches!(lower.as_str(), "first_value" | "last_value") {
15328                    // v6.4.2 — IGNORE NULLS: skip NULL cells when
15329                    // selecting the boundary value within the frame.
15330                    if lower == "first_value" {
15331                        (lo..=hi)
15332                            .find_map(|j| {
15333                                let v = &values[j];
15334                                (!v.is_null()).then(|| v.clone())
15335                            })
15336                            .unwrap_or(Value::Null)
15337                    } else {
15338                        (lo..=hi)
15339                            .rev()
15340                            .find_map(|j| {
15341                                let v = &values[j];
15342                                (!v.is_null()).then(|| v.clone())
15343                            })
15344                            .unwrap_or(Value::Null)
15345                    }
15346                } else {
15347                    match lower.as_str() {
15348                        "first_value" => values[lo].clone(),
15349                        "last_value" => values[hi].clone(),
15350                        "nth_value" => {
15351                            let pos = lo + nth - 1;
15352                            if pos > hi {
15353                                Value::Null
15354                            } else {
15355                                values[pos].clone()
15356                            }
15357                        }
15358                        _ => unreachable!(),
15359                    }
15360                };
15361                out_vals[*idx] = v;
15362            }
15363            Ok(())
15364        }
15365        "ntile" => {
15366            if args.is_empty() {
15367                return Err(EngineError::Unsupported(
15368                    "ntile(n) requires an integer argument".into(),
15369                ));
15370            }
15371            let v = eval::eval_expr(&args[0], filtered_rows[slice[0].2], ctx)
15372                .map_err(EngineError::Eval)?;
15373            let bucket_count: i64 = match v {
15374                Value::SmallInt(n) => i64::from(n),
15375                Value::Int(n) => i64::from(n),
15376                Value::BigInt(n) => n,
15377                _ => {
15378                    return Err(EngineError::Unsupported(
15379                        "ntile() argument must be integer".into(),
15380                    ));
15381                }
15382            };
15383            if bucket_count < 1 {
15384                return Err(EngineError::Unsupported(
15385                    "ntile() argument must be >= 1".into(),
15386                ));
15387            }
15388            #[allow(clippy::cast_sign_loss)]
15389            let buckets = bucket_count as usize;
15390            let n = slice.len();
15391            // Each bucket gets `base` rows; the first `extras` buckets
15392            // get one extra. PG semantics.
15393            let base = n / buckets;
15394            let extras = n % buckets;
15395            let mut bucket: usize = 1;
15396            let mut remaining_in_bucket = if extras > 0 { base + 1 } else { base };
15397            let mut buckets_with_extra_remaining = extras;
15398            for (_, _, idx) in slice {
15399                if remaining_in_bucket == 0 {
15400                    bucket += 1;
15401                    buckets_with_extra_remaining = buckets_with_extra_remaining.saturating_sub(1);
15402                    remaining_in_bucket = if buckets_with_extra_remaining > 0 {
15403                        base + 1
15404                    } else {
15405                        base
15406                    };
15407                    // Edge: if base==0 and extras==0, all rows fit;
15408                    // shouldn't reach here, but guard anyway.
15409                    if remaining_in_bucket == 0 {
15410                        remaining_in_bucket = 1;
15411                    }
15412                }
15413                out_vals[*idx] = Value::BigInt(i64::try_from(bucket).unwrap_or(i64::MAX));
15414                remaining_in_bucket -= 1;
15415            }
15416            Ok(())
15417        }
15418        "percent_rank" => {
15419            // (rank - 1) / (n - 1) where rank is the standard RANK().
15420            // Single-row partitions get 0.
15421            let n = slice.len();
15422            let mut prev_key: Option<&[(Value, bool, Option<bool>)]> = None;
15423            let mut current_rank: i64 = 1;
15424            for (i, (_, okey, idx)) in slice.iter().enumerate() {
15425                if let Some(p) = prev_key
15426                    && order_key_cmp(p, okey) != core::cmp::Ordering::Equal
15427                {
15428                    current_rank = i64::try_from(i + 1).unwrap_or(i64::MAX);
15429                }
15430                if prev_key.is_none() {
15431                    current_rank = 1;
15432                }
15433                #[allow(clippy::cast_precision_loss)]
15434                let pr = if n <= 1 {
15435                    0.0
15436                } else {
15437                    (current_rank - 1) as f64 / (n - 1) as f64
15438                };
15439                out_vals[*idx] = Value::Float(pr);
15440                prev_key = Some(okey.as_slice());
15441            }
15442            Ok(())
15443        }
15444        "cume_dist" => {
15445            // # rows up to and including this row's peer group / n.
15446            let n = slice.len();
15447            // First pass: find peer-group-end rank for each row.
15448            for i in 0..slice.len() {
15449                let peer_end = peer_group_end(slice, i);
15450                #[allow(clippy::cast_precision_loss)]
15451                let cd = (peer_end + 1) as f64 / n as f64;
15452                let (_, _, idx) = &slice[i];
15453                out_vals[*idx] = Value::Float(cd);
15454            }
15455            Ok(())
15456        }
15457        other => Err(EngineError::Unsupported(alloc::format!(
15458            "window function {other:?} not supported (v4.21: row_number/rank/dense_rank/sum/avg/count/min/max/lag/lead/first_value/last_value/nth_value/ntile/percent_rank/cume_dist)"
15459        ))),
15460    }
15461}
15462
15463/// v4.20: resolve the user-provided frame down to a normalised
15464/// `(kind, start, end)`. `None` means default — derive from
15465/// `ordered`: ordered ⇒ RANGE UNBOUNDED PRECEDING AND CURRENT ROW,
15466/// unordered ⇒ ROWS UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING.
15467/// Single-bound shorthand (e.g. `ROWS 5 PRECEDING`) normalises
15468/// end → CURRENT ROW per the PG spec.
15469fn effective_frame(
15470    frame: Option<&WindowFrame>,
15471    ordered: bool,
15472) -> Result<(FrameKind, FrameBound, FrameBound), EngineError> {
15473    match frame {
15474        None => {
15475            if ordered {
15476                Ok((
15477                    FrameKind::Range,
15478                    FrameBound::UnboundedPreceding,
15479                    FrameBound::CurrentRow,
15480                ))
15481            } else {
15482                Ok((
15483                    FrameKind::Rows,
15484                    FrameBound::UnboundedPreceding,
15485                    FrameBound::UnboundedFollowing,
15486                ))
15487            }
15488        }
15489        Some(fr) => {
15490            let end = fr.end.clone().unwrap_or(FrameBound::CurrentRow);
15491            // Reject start > end (a few impossible combinations).
15492            if matches!(fr.start, FrameBound::UnboundedFollowing)
15493                || matches!(end, FrameBound::UnboundedPreceding)
15494            {
15495                return Err(EngineError::Unsupported(alloc::format!(
15496                    "invalid frame: start={:?} end={:?}",
15497                    fr.start,
15498                    end
15499                )));
15500            }
15501            // RANGE OFFSET PRECEDING / FOLLOWING needs value-typed
15502            // arithmetic on the ORDER BY key (e.g. `RANGE BETWEEN
15503            // INTERVAL '1 day' PRECEDING AND CURRENT ROW`). Not
15504            // implemented in v4.20.
15505            if fr.kind == FrameKind::Range
15506                && (matches!(
15507                    fr.start,
15508                    FrameBound::OffsetPreceding(_) | FrameBound::OffsetFollowing(_)
15509                ) || matches!(
15510                    end,
15511                    FrameBound::OffsetPreceding(_) | FrameBound::OffsetFollowing(_)
15512                ))
15513            {
15514                return Err(EngineError::Unsupported(
15515                    "RANGE with explicit offset bounds is not supported (v4.20: only UNBOUNDED / CURRENT ROW for RANGE)".into(),
15516                ));
15517            }
15518            Ok((fr.kind, fr.start.clone(), end))
15519        }
15520    }
15521}
15522
15523/// Compute `(lo, hi)` row-index bounds inside the partition slice
15524/// for the row at position `i`. Inclusive, clamped to
15525/// `[0, slice.len()-1]`. Empty result if `lo > hi`.
15526#[allow(clippy::type_complexity)]
15527fn frame_bounds_for_row(
15528    eff: &(FrameKind, FrameBound, FrameBound),
15529    i: usize,
15530    slice: &[(Vec<Value>, Vec<(Value, bool, Option<bool>)>, usize)],
15531) -> (usize, usize) {
15532    let (kind, start, end) = eff;
15533    let n = slice.len();
15534    let last = n.saturating_sub(1);
15535    let (mut lo, mut hi) = match kind {
15536        FrameKind::Rows => {
15537            let lo = match start {
15538                FrameBound::UnboundedPreceding => 0,
15539                FrameBound::OffsetPreceding(k) => {
15540                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
15541                    i.saturating_sub(k)
15542                }
15543                FrameBound::CurrentRow => i,
15544                FrameBound::OffsetFollowing(k) => {
15545                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
15546                    i.saturating_add(k).min(last)
15547                }
15548                FrameBound::UnboundedFollowing => last,
15549            };
15550            let hi = match end {
15551                FrameBound::UnboundedPreceding => 0,
15552                FrameBound::OffsetPreceding(k) => {
15553                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
15554                    i.saturating_sub(k)
15555                }
15556                FrameBound::CurrentRow => i,
15557                FrameBound::OffsetFollowing(k) => {
15558                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
15559                    i.saturating_add(k).min(last)
15560                }
15561                FrameBound::UnboundedFollowing => last,
15562            };
15563            (lo, hi)
15564        }
15565        FrameKind::Range => {
15566            // RANGE bounds are peer-aware. With only UNBOUNDED and
15567            // CURRENT ROW supported (rejected at effective_frame for
15568            // explicit offsets), the start/end map to the
15569            // partition's full extent at the same-order-key peer
15570            // group boundary.
15571            let lo = match start {
15572                FrameBound::UnboundedPreceding => 0,
15573                FrameBound::CurrentRow => peer_group_start(slice, i),
15574                FrameBound::UnboundedFollowing => last,
15575                _ => unreachable!("offset bounds rejected for RANGE"),
15576            };
15577            let hi = match end {
15578                FrameBound::UnboundedPreceding => 0,
15579                FrameBound::CurrentRow => peer_group_end(slice, i),
15580                FrameBound::UnboundedFollowing => last,
15581                _ => unreachable!("offset bounds rejected for RANGE"),
15582            };
15583            (lo, hi)
15584        }
15585    };
15586    if hi >= n {
15587        hi = last;
15588    }
15589    if lo >= n {
15590        lo = last;
15591    }
15592    (lo, hi)
15593}
15594
15595/// Find the inclusive index of the first row with the same ORDER
15596/// BY key as `slice[i]`. Slice is already sorted by partition then
15597/// order, so peers are contiguous.
15598#[allow(clippy::type_complexity)]
15599fn peer_group_start(
15600    slice: &[(Vec<Value>, Vec<(Value, bool, Option<bool>)>, usize)],
15601    i: usize,
15602) -> usize {
15603    let key = &slice[i].1;
15604    let mut j = i;
15605    while j > 0 && order_key_cmp(&slice[j - 1].1, key) == core::cmp::Ordering::Equal {
15606        j -= 1;
15607    }
15608    j
15609}
15610
15611/// Find the inclusive index of the last row with the same ORDER
15612/// BY key as `slice[i]`.
15613#[allow(clippy::type_complexity)]
15614fn peer_group_end(
15615    slice: &[(Vec<Value>, Vec<(Value, bool, Option<bool>)>, usize)],
15616    i: usize,
15617) -> usize {
15618    let key = &slice[i].1;
15619    let mut j = i;
15620    while j + 1 < slice.len() && order_key_cmp(&slice[j + 1].1, key) == core::cmp::Ordering::Equal {
15621        j += 1;
15622    }
15623    j
15624}
15625
15626fn value_to_f64(v: &Value) -> Option<f64> {
15627    match v {
15628        Value::SmallInt(n) => Some(f64::from(*n)),
15629        Value::Int(n) => Some(f64::from(*n)),
15630        #[allow(clippy::cast_precision_loss)]
15631        Value::BigInt(n) => Some(*n as f64),
15632        Value::Float(x) => Some(*x),
15633        _ => None,
15634    }
15635}
15636
15637/// Quick scan for any subquery-bearing node in a SELECT's WHERE /
15638/// projection / `order_by` — saves cloning the AST when there are
15639/// none (the common case).
15640fn expr_tree_has_subquery(stmt: &SelectStatement) -> bool {
15641    let mut any = false;
15642    for item in &stmt.items {
15643        if let SelectItem::Expr { expr, .. } = item {
15644            any = any || expr_has_subquery(expr);
15645        }
15646    }
15647    if let Some(w) = &stmt.where_ {
15648        any = any || expr_has_subquery(w);
15649    }
15650    if let Some(h) = &stmt.having {
15651        any = any || expr_has_subquery(h);
15652    }
15653    for o in &stmt.order_by {
15654        any = any || expr_has_subquery(&o.expr);
15655    }
15656    for (_, peer) in &stmt.unions {
15657        any = any || expr_tree_has_subquery(peer);
15658    }
15659    any
15660}
15661
15662pub(crate) fn expr_has_subquery(e: &Expr) -> bool {
15663    match e {
15664        Expr::ScalarSubquery(_) | Expr::Exists { .. } | Expr::InSubquery { .. } => true,
15665        Expr::AggregateOrdered { call, order_by, .. } => {
15666            expr_has_subquery(call) || order_by.iter().any(|o| expr_has_subquery(&o.expr))
15667        }
15668        Expr::Binary { lhs, rhs, .. } => expr_has_subquery(lhs) || expr_has_subquery(rhs),
15669        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
15670            expr_has_subquery(expr)
15671        }
15672        Expr::FunctionCall { args, .. } => args.iter().any(expr_has_subquery),
15673        Expr::Like { expr, pattern, .. } => expr_has_subquery(expr) || expr_has_subquery(pattern),
15674        Expr::Extract { source, .. } => expr_has_subquery(source),
15675        Expr::WindowFunction {
15676            args,
15677            partition_by,
15678            order_by,
15679            ..
15680        } => {
15681            args.iter().any(expr_has_subquery)
15682                || partition_by.iter().any(expr_has_subquery)
15683                || order_by.iter().any(|(e, _, _)| expr_has_subquery(e))
15684        }
15685        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => false,
15686        Expr::Array(items) => items.iter().any(expr_has_subquery),
15687        Expr::ArraySubscript { target, index } => {
15688            expr_has_subquery(target) || expr_has_subquery(index)
15689        }
15690        Expr::AnyAll { expr, array, .. } => expr_has_subquery(expr) || expr_has_subquery(array),
15691        Expr::InList { expr, list, .. } => {
15692            expr_has_subquery(expr) || list.iter().any(expr_has_subquery)
15693        }
15694        Expr::Case {
15695            operand,
15696            branches,
15697            else_branch,
15698        } => {
15699            operand.as_deref().is_some_and(expr_has_subquery)
15700                || branches
15701                    .iter()
15702                    .any(|(w, t)| expr_has_subquery(w) || expr_has_subquery(t))
15703                || else_branch.as_deref().is_some_and(expr_has_subquery)
15704        }
15705    }
15706}
15707
15708/// v4.10 helper: materialise a runtime `Value` back into an AST
15709/// `Expr::Literal` for the subquery-rewrite path. Supports the
15710/// types `Literal` can represent (Integer / Float / Text / Bool /
15711/// Null). Date / Timestamp / Numeric / Vector / Interval / JSON
15712/// would lose precision through Literal and aren't supported in
15713/// uncorrelated-subquery results; they error with a clear hint.
15714fn value_to_literal_expr(v: Value) -> Result<Expr, EngineError> {
15715    let lit = match v {
15716        Value::Null => Literal::Null,
15717        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
15718        Value::Int(n) => Literal::Integer(i64::from(n)),
15719        Value::BigInt(n) => Literal::Integer(n),
15720        Value::Float(x) => Literal::Float(x),
15721        Value::Text(s) | Value::Json(s) => Literal::String(s),
15722        Value::Bool(b) => Literal::Bool(b),
15723        other => {
15724            return Err(EngineError::Unsupported(alloc::format!(
15725                "subquery result type {:?} not yet materialisable; cast to text or integer in the inner SELECT",
15726                other.data_type()
15727            )));
15728        }
15729    };
15730    Ok(Expr::Literal(lit))
15731}
15732
15733/// v7.13.0 — wider helper used by `INSERT … SELECT` (mailrs
15734/// round-5 G4). Covers the most common `Value` variants. Types
15735/// that need lossy textual round-trip (BYTEA, arrays, ts*)
15736/// surface as an Unsupported error so the caller can add a cast
15737/// in the inner SELECT.
15738fn value_to_literal_expr_permissive(v: Value) -> Result<Expr, EngineError> {
15739    let lit = match v {
15740        Value::Null => Literal::Null,
15741        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
15742        Value::Int(n) => Literal::Integer(i64::from(n)),
15743        Value::BigInt(n) => Literal::Integer(n),
15744        Value::Float(x) => Literal::Float(x),
15745        Value::Text(s) | Value::Json(s) => Literal::String(s),
15746        Value::Bool(b) => Literal::Bool(b),
15747        Value::Vector(xs) => Literal::Vector(xs),
15748        // Date / Timestamp / Timestamptz / Numeric round-trip
15749        // through a TEXT literal that `coerce_value` re-parses
15750        // against the target column type.
15751        Value::Date(days) => {
15752            let micros = (i64::from(days)) * 86_400_000_000;
15753            Literal::String(format_timestamp_micros_as_date(micros))
15754        }
15755        Value::Timestamp(us) => Literal::String(format_timestamp_micros(us)),
15756        Value::Numeric { scaled, scale } => Literal::String(format_numeric(scaled, scale)),
15757        other => {
15758            return Err(EngineError::Unsupported(alloc::format!(
15759                "INSERT … SELECT cannot materialise value of type {:?}; \
15760                 add an explicit CAST in the inner SELECT",
15761                other.data_type()
15762            )));
15763        }
15764    };
15765    Ok(Expr::Literal(lit))
15766}
15767
15768fn format_timestamp_micros(us: i64) -> String {
15769    // Same Y/M/D split used by the wire layer; epoch-relative.
15770    let days = us.div_euclid(86_400_000_000);
15771    let intra_day = us.rem_euclid(86_400_000_000);
15772    let date = format_timestamp_micros_as_date(days * 86_400_000_000);
15773    let secs = intra_day / 1_000_000;
15774    let us_rem = intra_day % 1_000_000;
15775    let h = (secs / 3600) % 24;
15776    let m = (secs / 60) % 60;
15777    let s = secs % 60;
15778    if us_rem == 0 {
15779        alloc::format!("{date} {h:02}:{m:02}:{s:02}")
15780    } else {
15781        alloc::format!("{date} {h:02}:{m:02}:{s:02}.{us_rem:06}")
15782    }
15783}
15784
15785fn format_timestamp_micros_as_date(us: i64) -> String {
15786    // Days since 1970-01-01 → calendar Y-M-D via the proleptic
15787    // Gregorian conversion used by spg-engine's date helpers.
15788    let days = us.div_euclid(86_400_000_000);
15789    // 1970-01-01 = JDN 2440588.
15790    let jdn = days + 2_440_588;
15791    let (y, mo, d) = jdn_to_ymd(jdn);
15792    alloc::format!("{y:04}-{mo:02}-{d:02}")
15793}
15794
15795fn jdn_to_ymd(jdn: i64) -> (i64, u32, u32) {
15796    // Fliegel & Van Flandern (1968) — works for all positive JDNs.
15797    let l = jdn + 68569;
15798    let n = (4 * l) / 146_097;
15799    let l = l - (146_097 * n + 3) / 4;
15800    let i = (4000 * (l + 1)) / 1_461_001;
15801    let l = l - (1461 * i) / 4 + 31;
15802    let j = (80 * l) / 2447;
15803    let day = (l - (2447 * j) / 80) as u32;
15804    let l = j / 11;
15805    let month = (j + 2 - 12 * l) as u32;
15806    let year = 100 * (n - 49) + i + l;
15807    (year, month, day)
15808}
15809
15810fn format_numeric(scaled: i128, scale: u8) -> String {
15811    if scale == 0 {
15812        return alloc::format!("{scaled}");
15813    }
15814    let abs = scaled.unsigned_abs();
15815    let divisor = 10u128.pow(u32::from(scale));
15816    let whole = abs / divisor;
15817    let frac = abs % divisor;
15818    let sign = if scaled < 0 { "-" } else { "" };
15819    alloc::format!("{sign}{whole}.{frac:0width$}", width = usize::from(scale))
15820}
15821
15822/// v6.1.1 — walk the prepared `Statement` AST and replace every
15823/// `Expr::Placeholder(n)` with `Expr::Literal(value_to_literal(
15824/// params[n-1]))`. The dispatch downstream sees a `Statement`
15825/// indistinguishable from a simple-query parse, so the exec path
15826/// stays unchanged.
15827///
15828/// Errors fall into one shape: a `$N` references past the bound
15829/// `params.len()`. Out-of-range happens when the Bind didn't
15830/// supply enough values; pgwire surfaces this as a protocol error
15831/// to the client.
15832/// v7.15.0 — rewrite every (potentially-qualified) column
15833/// identifier matching `old` to `new` in a stored SQL source
15834/// string. Used by `ALTER TABLE … RENAME COLUMN` to patch
15835/// CHECK predicate sources, partial-index predicate sources,
15836/// and runtime DEFAULT expression sources before they get
15837/// re-parsed on the next INSERT/UPDATE.
15838///
15839/// Round-trips through the parser, so the rewritten output is
15840/// the canonical Display form (matches what the engine stores
15841/// for fresh predicates). If the source doesn't parse, surfaces
15842/// the parse error — the invariant that stored predicates are
15843/// in canonical Display form means a parse failure here is a
15844/// real bug, not a user mistake to swallow.
15845fn rewrite_column_in_source(
15846    src: &str,
15847    old: &str,
15848    new: &str,
15849) -> Result<alloc::string::String, EngineError> {
15850    let mut expr = spg_sql::parser::parse_expression(src).map_err(|e| {
15851        EngineError::Unsupported(alloc::format!(
15852            "ALTER TABLE RENAME COLUMN: stored predicate source {src:?} \
15853             failed to parse for rewrite ({e})"
15854        ))
15855    })?;
15856    rewrite_column_in_expr(&mut expr, old, new);
15857    Ok(alloc::format!("{expr}"))
15858}
15859
15860/// v7.15.0 — Expr walker that swaps `Expr::Column { name: old, .. }`
15861/// for `Expr::Column { name: new, .. }`. Qualifier is preserved
15862/// (e.g. `t.old` → `t.new`); a foreign-table qualifier still
15863/// gets rewritten because the AST has no way to tell us this
15864/// predicate is on table T versus table T2 — predicate sources
15865/// in SPG are always scoped to the owning table, so any
15866/// qualifier present is either redundant or wrong.
15867fn rewrite_column_in_expr(e: &mut Expr, old: &str, new: &str) {
15868    match e {
15869        Expr::AggregateOrdered { call, order_by, .. } => {
15870            rewrite_column_in_expr(call, old, new);
15871            for o in order_by.iter_mut() {
15872                rewrite_column_in_expr(&mut o.expr, old, new);
15873            }
15874        }
15875        Expr::Column(c) => {
15876            if c.name.eq_ignore_ascii_case(old) {
15877                c.name = new.to_string();
15878            }
15879        }
15880        Expr::Binary { lhs, rhs, .. } => {
15881            rewrite_column_in_expr(lhs, old, new);
15882            rewrite_column_in_expr(rhs, old, new);
15883        }
15884        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
15885            rewrite_column_in_expr(expr, old, new);
15886        }
15887        Expr::FunctionCall { args, .. } => {
15888            for a in args {
15889                rewrite_column_in_expr(a, old, new);
15890            }
15891        }
15892        Expr::Like { expr, pattern, .. } => {
15893            rewrite_column_in_expr(expr, old, new);
15894            rewrite_column_in_expr(pattern, old, new);
15895        }
15896        Expr::Extract { source, .. } => rewrite_column_in_expr(source, old, new),
15897        Expr::WindowFunction {
15898            args,
15899            partition_by,
15900            order_by,
15901            ..
15902        } => {
15903            for a in args {
15904                rewrite_column_in_expr(a, old, new);
15905            }
15906            for p in partition_by {
15907                rewrite_column_in_expr(p, old, new);
15908            }
15909            for (o, _, _) in order_by {
15910                rewrite_column_in_expr(o, old, new);
15911            }
15912        }
15913        Expr::Array(items) => {
15914            for elem in items {
15915                rewrite_column_in_expr(elem, old, new);
15916            }
15917        }
15918        Expr::ArraySubscript { target, index } => {
15919            rewrite_column_in_expr(target, old, new);
15920            rewrite_column_in_expr(index, old, new);
15921        }
15922        Expr::AnyAll { expr, array, .. } => {
15923            rewrite_column_in_expr(expr, old, new);
15924            rewrite_column_in_expr(array, old, new);
15925        }
15926        Expr::InList { expr, list, .. } => {
15927            rewrite_column_in_expr(expr, old, new);
15928            for item in list {
15929                rewrite_column_in_expr(item, old, new);
15930            }
15931        }
15932        Expr::Case {
15933            operand,
15934            branches,
15935            else_branch,
15936        } => {
15937            if let Some(o) = operand {
15938                rewrite_column_in_expr(o, old, new);
15939            }
15940            for (w, t) in branches {
15941                rewrite_column_in_expr(w, old, new);
15942                rewrite_column_in_expr(t, old, new);
15943            }
15944            if let Some(e) = else_branch {
15945                rewrite_column_in_expr(e, old, new);
15946            }
15947        }
15948        // Stored predicate sources never contain subqueries —
15949        // CHECK / partial-index / runtime_default are all scalar.
15950        // If a future feature changes that, recurse here.
15951        Expr::ScalarSubquery(_) | Expr::Exists { .. } | Expr::InSubquery { .. } => {}
15952        Expr::Literal(_) | Expr::Placeholder(_) => {}
15953    }
15954}
15955
15956/// v7.16.0 — walks a parsed statement and replaces every
15957/// `Expr::Placeholder(N)` with the corresponding `params[N-1]`
15958/// re-encoded as an `Expr::Literal`. Used internally by
15959/// `Engine::execute_prepared` AND surfaced for the spg-embedded
15960/// WAL path (which needs the bind-final AST so replay sees a
15961/// simple-query-shaped statement, not a `$1`-shaped one). Errors
15962/// when a placeholder references an index past the params slice.
15963pub fn substitute_placeholders(stmt: &mut Statement, params: &[Value]) -> Result<(), EngineError> {
15964    match stmt {
15965        Statement::Select(s) => substitute_select(s, params)?,
15966        Statement::Insert(ins) => {
15967            for row in &mut ins.rows {
15968                for e in row {
15969                    substitute_expr(e, params)?;
15970                }
15971            }
15972            // ON CONFLICT DO UPDATE assignments / WHERE can carry
15973            // placeholders too (`… DO UPDATE SET reason = $2` —
15974            // mailrs embed round-12).
15975            if let Some(clause) = &mut ins.on_conflict
15976                && let spg_sql::ast::OnConflictAction::Update {
15977                    assignments,
15978                    where_,
15979                } = &mut clause.action
15980            {
15981                for (_, e) in assignments.iter_mut() {
15982                    substitute_expr(e, params)?;
15983                }
15984                if let Some(w) = where_ {
15985                    substitute_expr(w, params)?;
15986                }
15987            }
15988        }
15989        Statement::Update(u) => {
15990            for (_, e) in &mut u.assignments {
15991                substitute_expr(e, params)?;
15992            }
15993            if let Some(w) = &mut u.where_ {
15994                substitute_expr(w, params)?;
15995            }
15996        }
15997        Statement::Delete(d) => {
15998            if let Some(w) = &mut d.where_ {
15999                substitute_expr(w, params)?;
16000            }
16001        }
16002        Statement::Explain(e) => substitute_select(&mut e.inner, params)?,
16003        // Other statements (CREATE / BEGIN / SHOW / …) have no
16004        // expression slots; no walk needed.
16005        _ => {}
16006    }
16007    Ok(())
16008}
16009
16010/// v7.25.1 (mailrs round-18) — THE canonical mutable traversal of
16011/// every expression slot in a SelectStatement, including every
16012/// nested SelectStatement (CTE bodies, UNION peers, LATERAL derived
16013/// tables) and the JOIN ON conditions. Round-12 #7b and round-18
16014/// were both "a hand-rolled Select walker forgot one subtree";
16015/// every whole-statement rewrite pass (placeholders, clock) must go
16016/// through here so a new AST slot only needs adding once.
16017/// Expression-INTERNAL recursion (into subquery nodes inside an
16018/// Expr) stays the visitor's own responsibility.
16019pub(crate) fn walk_select_exprs_mut(
16020    s: &mut SelectStatement,
16021    f: &mut impl FnMut(&mut Expr) -> Result<(), EngineError>,
16022) -> Result<(), EngineError> {
16023    for cte in &mut s.ctes {
16024        walk_select_exprs_mut(&mut cte.body, f)?;
16025    }
16026    for item in &mut s.items {
16027        if let SelectItem::Expr { expr, .. } = item {
16028            f(expr)?;
16029        }
16030    }
16031    if let Some(from) = &mut s.from {
16032        if let Some(sub) = &mut from.primary.lateral_subquery {
16033            walk_select_exprs_mut(sub, f)?;
16034        }
16035        for j in &mut from.joins {
16036            if let Some(sub) = &mut j.table.lateral_subquery {
16037                walk_select_exprs_mut(sub, f)?;
16038            }
16039            if let Some(on) = &mut j.on {
16040                f(on)?;
16041            }
16042        }
16043    }
16044    if let Some(w) = &mut s.where_ {
16045        f(w)?;
16046    }
16047    if let Some(gs) = &mut s.group_by {
16048        for g in gs {
16049            f(g)?;
16050        }
16051    }
16052    if let Some(h) = &mut s.having {
16053        f(h)?;
16054    }
16055    for o in &mut s.order_by {
16056        f(&mut o.expr)?;
16057    }
16058    for (_, peer) in &mut s.unions {
16059        walk_select_exprs_mut(peer, f)?;
16060    }
16061    Ok(())
16062}
16063
16064fn substitute_select(s: &mut SelectStatement, params: &[Value]) -> Result<(), EngineError> {
16065    walk_select_exprs_mut(s, &mut |e| substitute_expr(e, params))?;
16066    // v7.25.1 — LIMIT/OFFSET placeholders inside CTE bodies and
16067    // UNION peers resolve through their own recursion (the walker
16068    // above only visits Expr slots), so handle them per nested
16069    // statement here.
16070    for cte in &mut s.ctes {
16071        resolve_limit_offset_placeholders(&mut cte.body, params)?;
16072    }
16073    for (_, peer) in &mut s.unions {
16074        resolve_limit_offset_placeholders(peer, params)?;
16075    }
16076    // v7.9.24 — LIMIT $N / OFFSET $N placeholder resolution.
16077    // mailrs H2. After this pass each LIMIT/OFFSET that was a
16078    // Placeholder is rewritten to Literal so the existing
16079    // `LimitExpr::as_literal` path consumes a concrete u32.
16080    if let Some(le) = s.limit {
16081        s.limit = Some(resolve_limit_placeholder(le, params)?);
16082    }
16083    if let Some(le) = s.offset {
16084        s.offset = Some(resolve_limit_placeholder(le, params)?);
16085    }
16086    Ok(())
16087}
16088
16089/// v7.25.1 — recursive LIMIT/OFFSET placeholder resolution for
16090/// nested statements (CTE bodies / UNION peers).
16091fn resolve_limit_offset_placeholders(
16092    s: &mut SelectStatement,
16093    params: &[Value],
16094) -> Result<(), EngineError> {
16095    if let Some(le) = s.limit {
16096        s.limit = Some(resolve_limit_placeholder(le, params)?);
16097    }
16098    if let Some(le) = s.offset {
16099        s.offset = Some(resolve_limit_placeholder(le, params)?);
16100    }
16101    for cte in &mut s.ctes {
16102        resolve_limit_offset_placeholders(&mut cte.body, params)?;
16103    }
16104    for (_, peer) in &mut s.unions {
16105        resolve_limit_offset_placeholders(peer, params)?;
16106    }
16107    Ok(())
16108}
16109
16110fn resolve_limit_placeholder(
16111    le: spg_sql::ast::LimitExpr,
16112    params: &[Value],
16113) -> Result<spg_sql::ast::LimitExpr, EngineError> {
16114    use spg_sql::ast::LimitExpr;
16115    match le {
16116        LimitExpr::Literal(_) => Ok(le),
16117        LimitExpr::Placeholder(n) => {
16118            let idx = usize::from(n).saturating_sub(1);
16119            let v = params.get(idx).ok_or_else(|| {
16120                EngineError::Eval(EvalError::PlaceholderOutOfRange {
16121                    n,
16122                    bound: u16::try_from(params.len()).unwrap_or(u16::MAX),
16123                })
16124            })?;
16125            let int = match v {
16126                Value::SmallInt(x) => Some(i64::from(*x)),
16127                Value::Int(x) => Some(i64::from(*x)),
16128                Value::BigInt(x) => Some(*x),
16129                _ => None,
16130            }
16131            .ok_or_else(|| {
16132                EngineError::Unsupported(alloc::format!(
16133                    "LIMIT/OFFSET ${n} bound to non-integer {v:?}"
16134                ))
16135            })?;
16136            if int < 0 {
16137                return Err(EngineError::Unsupported(alloc::format!(
16138                    "LIMIT/OFFSET ${n} bound to negative value {int}"
16139                )));
16140            }
16141            let bounded = u32::try_from(int).map_err(|_| {
16142                EngineError::Unsupported(alloc::format!(
16143                    "LIMIT/OFFSET ${n} value {int} exceeds u32 range"
16144                ))
16145            })?;
16146            Ok(LimitExpr::Literal(bounded))
16147        }
16148    }
16149}
16150
16151fn substitute_expr(e: &mut Expr, params: &[Value]) -> Result<(), EngineError> {
16152    if let Expr::Placeholder(n) = e {
16153        let idx = usize::from(*n).saturating_sub(1);
16154        let v = params.get(idx).ok_or_else(|| {
16155            EngineError::Eval(EvalError::PlaceholderOutOfRange {
16156                n: *n,
16157                bound: u16::try_from(params.len()).unwrap_or(u16::MAX),
16158            })
16159        })?;
16160        *e = Expr::Literal(value_to_literal(v.clone()));
16161        return Ok(());
16162    }
16163    match e {
16164        Expr::AggregateOrdered { call, order_by, .. } => {
16165            substitute_expr(call, params)?;
16166            for o in order_by.iter_mut() {
16167                substitute_expr(&mut o.expr, params)?;
16168            }
16169        }
16170        Expr::Binary { lhs, rhs, .. } => {
16171            substitute_expr(lhs, params)?;
16172            substitute_expr(rhs, params)?;
16173        }
16174        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
16175            substitute_expr(expr, params)?;
16176        }
16177        Expr::FunctionCall { args, .. } => {
16178            for a in args {
16179                substitute_expr(a, params)?;
16180            }
16181        }
16182        Expr::Like { expr, pattern, .. } => {
16183            substitute_expr(expr, params)?;
16184            substitute_expr(pattern, params)?;
16185        }
16186        Expr::Extract { source, .. } => substitute_expr(source, params)?,
16187        Expr::ScalarSubquery(s) => substitute_select(s, params)?,
16188        Expr::Exists { subquery, .. } => substitute_select(subquery, params)?,
16189        Expr::InSubquery { expr, subquery, .. } => {
16190            substitute_expr(expr, params)?;
16191            substitute_select(subquery, params)?;
16192        }
16193        Expr::WindowFunction {
16194            args,
16195            partition_by,
16196            order_by,
16197            ..
16198        } => {
16199            for a in args {
16200                substitute_expr(a, params)?;
16201            }
16202            for p in partition_by {
16203                substitute_expr(p, params)?;
16204            }
16205            for (e, _, _) in order_by {
16206                substitute_expr(e, params)?;
16207            }
16208        }
16209        Expr::Literal(_) | Expr::Column(_) => {}
16210        // Already handled above.
16211        Expr::Placeholder(_) => unreachable!("Placeholder handled at top of fn"),
16212        Expr::Array(items) => {
16213            for elem in items {
16214                substitute_expr(elem, params)?;
16215            }
16216        }
16217        Expr::ArraySubscript { target, index } => {
16218            substitute_expr(target, params)?;
16219            substitute_expr(index, params)?;
16220        }
16221        Expr::AnyAll { expr, array, .. } => {
16222            substitute_expr(expr, params)?;
16223            substitute_expr(array, params)?;
16224        }
16225        Expr::InList { expr, list, .. } => {
16226            substitute_expr(expr, params)?;
16227            for item in list {
16228                substitute_expr(item, params)?;
16229            }
16230        }
16231        Expr::Case {
16232            operand,
16233            branches,
16234            else_branch,
16235        } => {
16236            if let Some(o) = operand {
16237                substitute_expr(o, params)?;
16238            }
16239            for (w, t) in branches {
16240                substitute_expr(w, params)?;
16241                substitute_expr(t, params)?;
16242            }
16243            if let Some(e) = else_branch {
16244                substitute_expr(e, params)?;
16245            }
16246        }
16247    }
16248    Ok(())
16249}
16250
16251/// v6.1.1 — convert a runtime `Value` into the closest matching
16252/// `Literal` for the substitute walker. Lossless for the simple
16253/// scalars (Int / Float / Text / Bool); Numeric / Date / Timestamp
16254/// / Json / Interval render as their canonical text form so the
16255/// downstream coerce_value can re-parse against the target column
16256/// type. SQ8 / HalfVector cells are NOT expected as bind params;
16257/// pgwire's Bind decodes vector params to the f32 representation
16258/// before they reach this helper.
16259/// v6.2.0 — total ordering on `Value`s used by ANALYZE to sort a
16260/// column's non-NULL sample before histogram building. Cross-type
16261/// pairs (Int vs Float, Date vs Timestamp, …) compare via the
16262/// same widening the eval-side `compare` operator uses; everything
16263/// else (the genuinely-incompatible pairs) falls back to ordering
16264/// by canonical string form so the sort is still total + stable.
16265/// Vector / SQ8 / Half / Json / Numeric / Interval values reach
16266/// here only via the string-fallback path because vector columns
16267/// are filtered out upstream.
16268fn sort_values_for_histogram(a: &Value, b: &Value) -> core::cmp::Ordering {
16269    use core::cmp::Ordering;
16270    match (a, b) {
16271        (Value::SmallInt(a), Value::SmallInt(b)) => a.cmp(b),
16272        (Value::Int(a), Value::Int(b)) => a.cmp(b),
16273        (Value::BigInt(a), Value::BigInt(b)) => a.cmp(b),
16274        (Value::SmallInt(a), Value::Int(b)) => i32::from(*a).cmp(b),
16275        (Value::Int(a), Value::SmallInt(b)) => a.cmp(&i32::from(*b)),
16276        (Value::Int(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
16277        (Value::BigInt(a), Value::Int(b)) => a.cmp(&i64::from(*b)),
16278        (Value::SmallInt(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
16279        (Value::BigInt(a), Value::SmallInt(b)) => a.cmp(&i64::from(*b)),
16280        (Value::Float(a), Value::Float(b)) => a.partial_cmp(b).unwrap_or(Ordering::Equal),
16281        (Value::Text(a), Value::Text(b)) | (Value::Json(a), Value::Json(b)) => a.cmp(b),
16282        (Value::Bool(a), Value::Bool(b)) => a.cmp(b),
16283        (Value::Date(a), Value::Date(b)) => a.cmp(b),
16284        (Value::Timestamp(a), Value::Timestamp(b)) => a.cmp(b),
16285        // Mixed numeric/float — widen to f64 and compare.
16286        (Value::SmallInt(n), Value::Float(x)) => {
16287            (f64::from(*n)).partial_cmp(x).unwrap_or(Ordering::Equal)
16288        }
16289        (Value::Float(x), Value::SmallInt(n)) => {
16290            x.partial_cmp(&f64::from(*n)).unwrap_or(Ordering::Equal)
16291        }
16292        (Value::Int(n), Value::Float(x)) => {
16293            (f64::from(*n)).partial_cmp(x).unwrap_or(Ordering::Equal)
16294        }
16295        (Value::Float(x), Value::Int(n)) => {
16296            x.partial_cmp(&f64::from(*n)).unwrap_or(Ordering::Equal)
16297        }
16298        (Value::BigInt(n), Value::Float(x)) => {
16299            #[allow(clippy::cast_precision_loss)]
16300            let nf = *n as f64;
16301            nf.partial_cmp(x).unwrap_or(Ordering::Equal)
16302        }
16303        (Value::Float(x), Value::BigInt(n)) => {
16304            #[allow(clippy::cast_precision_loss)]
16305            let nf = *n as f64;
16306            x.partial_cmp(&nf).unwrap_or(Ordering::Equal)
16307        }
16308        // Cross-type fallback: lexicographic on canonical form.
16309        // Total + stable so the sort is well-defined.
16310        _ => canonical_value_repr(a).cmp(&canonical_value_repr(b)),
16311    }
16312}
16313
16314/// v6.2.0 — render the histogram bounds list as a `[v0, v1, ...]`
16315/// string for the `spg_statistic.histogram_bounds` column. Values
16316/// containing `,` or `[` / `]` are JSON-style escaped so the
16317/// rendering round-trips through a future parser; v6.2.0 only
16318/// uses the rendered form for human consumption, so the escaping
16319/// is conservative.
16320fn render_histogram_bounds(bounds: &[alloc::string::String]) -> alloc::string::String {
16321    let mut out = alloc::string::String::with_capacity(bounds.len() * 8 + 2);
16322    out.push('[');
16323    for (i, b) in bounds.iter().enumerate() {
16324        if i > 0 {
16325            out.push_str(", ");
16326        }
16327        let needs_quote = b.contains([',', '[', ']', '"']) || b.is_empty();
16328        if needs_quote {
16329            out.push('"');
16330            for ch in b.chars() {
16331                if ch == '"' || ch == '\\' {
16332                    out.push('\\');
16333                }
16334                out.push(ch);
16335            }
16336            out.push('"');
16337        } else {
16338            out.push_str(b);
16339        }
16340    }
16341    out.push(']');
16342    out
16343}
16344
16345/// v6.2.0 — canonical textual form of a `Value` for histogram
16346/// bound storage. Strings used by ANALYZE for sort + bound output.
16347/// INT / BIGINT → decimal; FLOAT → shortest-round-trip via
16348/// `{:?}`; TEXT pass-through; BOOL → `t` / `f`; DATE / TIMESTAMP →
16349/// the same form `format_date` / `format_timestamp` produce for
16350/// SQL Display. Vector / SQ8 / Half / Json / Numeric / Interval
16351/// reach this only via a non-Vector column (vector columns are
16352/// skipped upstream); they fall back to a Debug-derived form so
16353/// stats still serialise without crashing.
16354pub(crate) fn canonical_value_repr(v: &Value) -> alloc::string::String {
16355    match v {
16356        Value::Null => "NULL".to_string(),
16357        Value::SmallInt(n) => alloc::format!("{n}"),
16358        Value::Int(n) => alloc::format!("{n}"),
16359        Value::BigInt(n) => alloc::format!("{n}"),
16360        Value::Float(x) => alloc::format!("{x:?}"),
16361        Value::Text(s) | Value::Json(s) => s.clone(),
16362        Value::Bool(b) => if *b { "t" } else { "f" }.to_string(),
16363        Value::Date(d) => eval::format_date(*d),
16364        Value::Timestamp(t) => eval::format_timestamp(*t),
16365        // v7.17.0 Phase 3.P0-32 — PG TIME canonical text form.
16366        Value::Time(us) => eval::format_time(*us),
16367        // v7.17.0 Phase 3.P0-33 — MySQL YEAR 4-digit zero-padded.
16368        Value::Year(y) => alloc::format!("{y:04}"),
16369        // v7.17.0 Phase 3.P0-34 — PG TIMETZ canonical text form.
16370        Value::TimeTz { us, offset_secs } => eval::format_timetz(*us, *offset_secs),
16371        // v7.17.0 Phase 3.P0-35 — PG MONEY canonical en_US text form.
16372        Value::Money(c) => eval::format_money(*c),
16373        // v7.17.0 Phase 3.P0-38 — PG range canonical text form.
16374        v @ Value::Range { .. } => format_range_str(v),
16375        // v7.17.0 Phase 3.P0-39 — PG hstore canonical text form.
16376        Value::Hstore(pairs) => format_hstore_str(pairs),
16377        // v7.17.0 Phase 3.P0-40 — 2D array canonical text form.
16378        Value::IntArray2D(rows) => format_int_2d_text(rows),
16379        Value::BigIntArray2D(rows) => format_bigint_2d_text(rows),
16380        Value::TextArray2D(rows) => format_text_2d_text(rows),
16381        Value::Interval { months, micros } => eval::format_interval(*months, *micros),
16382        Value::Numeric { scaled, scale } => eval::format_numeric(*scaled, *scale),
16383        Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => {
16384            // Unreachable in practice (vector columns are filtered
16385            // out before this). Defensive fallback so a future
16386            // vector-stats path doesn't crash.
16387            alloc::format!("{v:?}")
16388        }
16389        // v7.5.0 — Value is #[non_exhaustive] for downstream
16390        // forward-compat. Future variants fall through to Debug
16391        // form here (same shape as the vector fallback above).
16392        _ => alloc::format!("{v:?}"),
16393    }
16394}
16395
16396/// v6.2.0 — true for engine-managed catalog tables that the bare
16397/// `ANALYZE` (no target) should skip. v6.2.0 has no internal
16398/// tables yet (publications / subscriptions / users / statistics
16399/// all live as engine fields, not catalog tables), so this is a
16400/// reserved future-proofing hook — every existing user table is
16401/// analysed.
16402const fn is_internal_table_name(_name: &str) -> bool {
16403    false
16404}
16405
16406fn value_to_literal(v: Value) -> Literal {
16407    match v {
16408        Value::Null => Literal::Null,
16409        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
16410        Value::Int(n) => Literal::Integer(i64::from(n)),
16411        Value::BigInt(n) => Literal::Integer(n),
16412        Value::Float(x) => Literal::Float(x),
16413        Value::Text(s) | Value::Json(s) => Literal::String(s),
16414        Value::Bool(b) => Literal::Bool(b),
16415        Value::Vector(v) => Literal::Vector(v),
16416        Value::Numeric { scaled, scale } => Literal::String(eval::format_numeric(scaled, scale)),
16417        Value::Date(d) => Literal::String(eval::format_date(d)),
16418        Value::Timestamp(t) => Literal::String(eval::format_timestamp(t)),
16419        // v7.17.0 Phase 3.P0-69 — UUID round-trips via canonical
16420        // hyphenated text. Without this arm the fallback below
16421        // renders `Debug` form ("Uuid([85, …])") which the
16422        // engine's Text → Uuid coerce can't parse, breaking
16423        // prepared-bind round-trip from the spg-sqlx adapter.
16424        Value::Uuid(b) => Literal::String(spg_storage::format_uuid(&b)),
16425        // v7.16.0 — BYTEA round-trip for the spg-sqlx Bind path.
16426        // PG-canonical text rep is `\x` + lowercase hex; the
16427        // engine's coerce_value already accepts that on the
16428        // text → bytea direction.
16429        Value::Bytes(b) => Literal::String(eval::format_bytea_hex(&b)),
16430        // Arrays ride the AST natively (mailrs embed round-12) —
16431        // the prior `{a,b,c}` text form only worked where a column
16432        // type drove the re-parse; `= ANY($1)` has no column
16433        // context and saw a bare Text value.
16434        Value::TextArray(items) => Literal::TextArray(items),
16435        Value::IntArray(items) => Literal::IntArray(items),
16436        Value::BigIntArray(items) => Literal::BigIntArray(items),
16437        Value::Interval { months, micros } => Literal::Interval {
16438            months,
16439            micros,
16440            text: eval::format_interval(months, micros),
16441        },
16442        // SQ8 / halfvec cells dequantise to f32 before reaching the
16443        // substitute walker; pgwire's Bind path handles that.
16444        Value::Sq8Vector(q) => Literal::Vector(spg_storage::quantize::dequantize(&q)),
16445        Value::HalfVector(h) => Literal::Vector(h.to_f32_vec()),
16446        // v7.5.0 — Value is #[non_exhaustive]; future variants
16447        // render as Debug-form String literal until explicit
16448        // mapping is added.
16449        v => Literal::String(alloc::format!("{v:?}")),
16450    }
16451}
16452
16453fn rewrite_clock_calls(stmt: &mut Statement, now_micros: Option<i64>) {
16454    let Some(now) = now_micros else {
16455        return;
16456    };
16457    match stmt {
16458        Statement::Select(s) => rewrite_select_clock(s, now),
16459        Statement::Insert(ins) => {
16460            for row in &mut ins.rows {
16461                for e in row {
16462                    rewrite_expr_clock(e, now);
16463                }
16464            }
16465            // `ON CONFLICT … DO UPDATE SET created_at = NOW()` —
16466            // the upsert assignments carry clock calls too (mailrs
16467            // embed round-12).
16468            if let Some(clause) = &mut ins.on_conflict
16469                && let spg_sql::ast::OnConflictAction::Update {
16470                    assignments,
16471                    where_,
16472                } = &mut clause.action
16473            {
16474                for (_, e) in assignments.iter_mut() {
16475                    rewrite_expr_clock(e, now);
16476                }
16477                if let Some(w) = where_ {
16478                    rewrite_expr_clock(w, now);
16479                }
16480            }
16481        }
16482        // `UPDATE … SET seen_at = NOW() WHERE …` / `DELETE … WHERE
16483        // ts < NOW()` (mailrs embed round-12 — previously only
16484        // SELECT / INSERT-rows were walked).
16485        Statement::Update(u) => {
16486            for (_, e) in &mut u.assignments {
16487                rewrite_expr_clock(e, now);
16488            }
16489            if let Some(w) = &mut u.where_ {
16490                rewrite_expr_clock(w, now);
16491            }
16492        }
16493        Statement::Delete(d) => {
16494            if let Some(w) = &mut d.where_ {
16495                rewrite_expr_clock(w, now);
16496            }
16497        }
16498        _ => {}
16499    }
16500}
16501
16502fn rewrite_select_clock(s: &mut SelectStatement, now: i64) {
16503    // v7.25.1 (round-18) — shared traversal: CTE bodies, LATERAL
16504    // subqueries, JOIN ON, and UNION peers all get the clock
16505    // rewrite (NOW() inside a CTE previously survived to eval as
16506    // "unknown function `now`").
16507    let _ = walk_select_exprs_mut(s, &mut |e| {
16508        rewrite_expr_clock(e, now);
16509        Ok(())
16510    });
16511}
16512
16513/// v3.0.3 hot path: every recursion lands in exactly one `match` arm.
16514/// Literal / Column-with-qualifier (the dominant cases on a typical
16515/// AST) take a single pattern dispatch and exit. The clock-rewrite
16516/// targets (zero-arg `NOW` / `CURRENT_TIMESTAMP` / `CURRENT_DATE`
16517/// functions, and bare `CURRENT_TIMESTAMP` / `CURRENT_DATE` column
16518/// refs) sit on their own arms with match guards so the fall-through
16519/// to the recursive arms is unambiguous.
16520fn rewrite_expr_clock(e: &mut Expr, now: i64) {
16521    // Fast-path test on the no-recursion shapes first. We can't fold
16522    // them into the big match below because they need to *replace* `e`
16523    // outright; the recursive arms below match on its sub-fields.
16524    if let Some(replacement) = clock_replacement_for(e, now) {
16525        *e = replacement;
16526        return;
16527    }
16528    match e {
16529        Expr::AggregateOrdered { call, order_by, .. } => {
16530            rewrite_expr_clock(call, now);
16531            for o in order_by.iter_mut() {
16532                rewrite_expr_clock(&mut o.expr, now);
16533            }
16534        }
16535        Expr::Binary { lhs, rhs, .. } => {
16536            rewrite_expr_clock(lhs, now);
16537            rewrite_expr_clock(rhs, now);
16538        }
16539        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
16540            rewrite_expr_clock(expr, now);
16541        }
16542        Expr::FunctionCall { args, .. } => {
16543            for a in args {
16544                rewrite_expr_clock(a, now);
16545            }
16546        }
16547        Expr::Like { expr, pattern, .. } => {
16548            rewrite_expr_clock(expr, now);
16549            rewrite_expr_clock(pattern, now);
16550        }
16551        Expr::Extract { source, .. } => rewrite_expr_clock(source, now),
16552        // v4.10 subquery nodes — recurse into the inner SELECT's
16553        // expression slots so e.g. SELECT NOW() in a scalar
16554        // subquery picks up the same instant as the outer query.
16555        Expr::ScalarSubquery(s) => rewrite_select_clock(s, now),
16556        Expr::Exists { subquery, .. } => rewrite_select_clock(subquery, now),
16557        Expr::InSubquery { expr, subquery, .. } => {
16558            rewrite_expr_clock(expr, now);
16559            rewrite_select_clock(subquery, now);
16560        }
16561        // v4.12 window functions — args + PARTITION BY + ORDER BY
16562        // may all reference clock literals.
16563        Expr::WindowFunction {
16564            args,
16565            partition_by,
16566            order_by,
16567            ..
16568        } => {
16569            for a in args {
16570                rewrite_expr_clock(a, now);
16571            }
16572            for p in partition_by {
16573                rewrite_expr_clock(p, now);
16574            }
16575            for (e, _, _) in order_by {
16576                rewrite_expr_clock(e, now);
16577            }
16578        }
16579        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
16580        Expr::Array(items) => {
16581            for elem in items {
16582                rewrite_expr_clock(elem, now);
16583            }
16584        }
16585        Expr::ArraySubscript { target, index } => {
16586            rewrite_expr_clock(target, now);
16587            rewrite_expr_clock(index, now);
16588        }
16589        Expr::AnyAll { expr, array, .. } => {
16590            rewrite_expr_clock(expr, now);
16591            rewrite_expr_clock(array, now);
16592        }
16593        Expr::InList { expr, list, .. } => {
16594            rewrite_expr_clock(expr, now);
16595            for item in list {
16596                rewrite_expr_clock(item, now);
16597            }
16598        }
16599        Expr::Case {
16600            operand,
16601            branches,
16602            else_branch,
16603        } => {
16604            if let Some(o) = operand {
16605                rewrite_expr_clock(o, now);
16606            }
16607            for (w, t) in branches {
16608                rewrite_expr_clock(w, now);
16609                rewrite_expr_clock(t, now);
16610            }
16611            if let Some(e) = else_branch {
16612                rewrite_expr_clock(e, now);
16613            }
16614        }
16615    }
16616}
16617
16618/// Returns `Some(Expr)` when `e` is one of the clock-call shapes that
16619/// must be rewritten; otherwise `None` so the caller falls through to
16620/// the recursive walk. Identifies both function-call forms (`NOW()` /
16621/// `CURRENT_TIMESTAMP()` / `CURRENT_DATE()`) and bare-identifier forms
16622/// (`CURRENT_TIMESTAMP` / `CURRENT_DATE` as unqualified column refs,
16623/// which is how PG accepts them without parens).
16624fn clock_replacement_for(e: &Expr, now: i64) -> Option<Expr> {
16625    let (kind, name) = match e {
16626        Expr::FunctionCall { name, args } if args.is_empty() => (ClockSite::Fn, name.as_str()),
16627        Expr::Column(c) if c.qualifier.is_none() => (ClockSite::BareIdent, c.name.as_str()),
16628        _ => return None,
16629    };
16630    // ASCII case-insensitive name match. Each entry decides what
16631    // synthetic literal the call expands to.
16632    //
16633    // v7.17.0 Phase 3.P0-29 — `unix_timestamp` (no args) joins this
16634    // table as MySQL's epoch-seconds equivalent of `now()`. Folded
16635    // to a BigInt literal here so apply_function never needs a
16636    // clock dependency.
16637    enum ClockShape {
16638        Timestamp,
16639        Date,
16640        UnixSeconds,
16641    }
16642    let shape = match name.len() {
16643        3 if kind == ClockSite::Fn && name.eq_ignore_ascii_case("now") => {
16644            Some(ClockShape::Timestamp)
16645        }
16646        12 if name.eq_ignore_ascii_case("current_date") => Some(ClockShape::Date),
16647        14 if kind == ClockSite::Fn && name.eq_ignore_ascii_case("unix_timestamp") => {
16648            Some(ClockShape::UnixSeconds)
16649        }
16650        17 if name.eq_ignore_ascii_case("current_timestamp") => Some(ClockShape::Timestamp),
16651        _ => None,
16652    };
16653    let shape = shape?;
16654    let payload = match shape {
16655        ClockShape::Timestamp => now,
16656        ClockShape::Date => now.div_euclid(86_400_000_000),
16657        ClockShape::UnixSeconds => now.div_euclid(1_000_000),
16658    };
16659    let target = match shape {
16660        ClockShape::Timestamp => spg_sql::ast::CastTarget::Timestamp,
16661        ClockShape::Date => spg_sql::ast::CastTarget::Date,
16662        ClockShape::UnixSeconds => spg_sql::ast::CastTarget::BigInt,
16663    };
16664    Some(Expr::Cast {
16665        expr: alloc::boxed::Box::new(Expr::Literal(spg_sql::ast::Literal::Integer(payload))),
16666        target,
16667    })
16668}
16669
16670#[derive(Debug, Clone, Copy, PartialEq, Eq)]
16671enum ClockSite {
16672    Fn,
16673    BareIdent,
16674}
16675
16676/// `ORDER BY <integer>` references the N-th SELECT item (1-based).
16677/// Swap the integer literal for the matching item's expression so the
16678/// executor doesn't need a special-case branch. Recurses into UNION
16679/// peers because each peer keeps its own SELECT list.
16680/// v6.4.1 — expand `GROUP BY ALL` to every non-aggregate SELECT-list
16681/// item. Mirrors DuckDB / PG 19 semantics. Wildcards (`SELECT * …`)
16682/// are NOT expanded by GROUP BY ALL (PG 19 leaves the wildcard intact
16683/// and groups by whatever explicit non-aggregates remain — none in
16684/// the wildcard-only case, which still works for non-aggregate
16685/// queries).
16686fn expand_group_by_all(s: &mut SelectStatement) {
16687    if !s.group_by_all {
16688        for (_, peer) in &mut s.unions {
16689            expand_group_by_all(peer);
16690        }
16691        return;
16692    }
16693    let mut groups: Vec<Expr> = Vec::new();
16694    for item in &s.items {
16695        if let SelectItem::Expr { expr, .. } = item
16696            && !aggregate::contains_aggregate(expr)
16697        {
16698            groups.push(expr.clone());
16699        }
16700    }
16701    s.group_by = Some(groups);
16702    s.group_by_all = false;
16703    for (_, peer) in &mut s.unions {
16704        expand_group_by_all(peer);
16705    }
16706}
16707
16708fn resolve_order_by_position(s: &mut SelectStatement) {
16709    // v6.4.0 — iterate every ORDER BY key. Position references
16710    // (`ORDER BY 2`) bind to the 1-based projection index;
16711    // identifier references that match a SELECT-list alias bind to
16712    // the projected expression (Step 4 of L3a).
16713    for order in &mut s.order_by {
16714        match &order.expr {
16715            Expr::Literal(Literal::Integer(n)) if *n >= 1 => {
16716                if let Ok(idx_one_based) = usize::try_from(*n) {
16717                    let idx = idx_one_based - 1;
16718                    if idx < s.items.len()
16719                        && let SelectItem::Expr { expr, .. } = &s.items[idx]
16720                    {
16721                        order.expr = expr.clone();
16722                    }
16723                }
16724            }
16725            Expr::Column(c) if c.qualifier.is_none() => {
16726                // Alias-in-ORDER-BY lookup.
16727                for item in &s.items {
16728                    if let SelectItem::Expr {
16729                        expr,
16730                        alias: Some(a),
16731                    } = item
16732                        && a == &c.name
16733                    {
16734                        order.expr = expr.clone();
16735                        break;
16736                    }
16737                }
16738            }
16739            _ => {}
16740        }
16741    }
16742    for (_, peer) in &mut s.unions {
16743        resolve_order_by_position(peer);
16744    }
16745}
16746
16747/// Sort `tagged` by `f64` key, reversing the comparator under DESC.
16748/// Used by the UNION ORDER BY path; per-block paths inline the same
16749/// comparator because they already hold `&OrderBy` directly.
16750/// v3.1.1: partial-sort helper. When `keep` (= offset + limit) is
16751/// strictly less than `tagged.len()`, run `select_nth_unstable_by` to
16752/// partition the prefix in O(n), then sort just that prefix in O(k
16753/// log k). Total O(n + k log k), vs O(n log n) for a full sort. The
16754/// caller decides what `keep` is; passing `None` (no LIMIT) keeps the
16755/// full-sort behaviour.
16756///
16757/// `tagged` holds `(Option<f64>, Row)` (the SELECT path) — `None` keys
16758/// sort last in ascending order, mirroring NULL-sorts-last in SQL.
16759fn partial_sort_tagged(tagged: &mut Vec<(Vec<f64>, Row)>, keep: Option<usize>, descs: &[bool]) {
16760    let cmp = |a: &(Vec<f64>, Row), b: &(Vec<f64>, Row)| cmp_multi_key(&a.0, &b.0, descs);
16761    match keep {
16762        Some(k) if k < tagged.len() && k > 0 => {
16763            let pivot = k - 1;
16764            tagged.select_nth_unstable_by(pivot, cmp);
16765            tagged[..k].sort_by(cmp);
16766            tagged.truncate(k);
16767        }
16768        _ => {
16769            tagged.sort_by(cmp);
16770        }
16771    }
16772}
16773
16774fn sort_by_keys(tagged: &mut [(Vec<f64>, Row)], descs: &[bool]) {
16775    tagged.sort_by(|a, b| cmp_multi_key(&a.0, &b.0, descs));
16776}
16777
16778/// v6.4.0 — multi-key ORDER BY comparator. Each key's per-key DESC
16779/// flag is honored independently. NULL is encoded as `f64::INFINITY`
16780/// so it sorts last in ASC and first in DESC (matches PG default).
16781fn cmp_multi_key(a: &[f64], b: &[f64], descs: &[bool]) -> core::cmp::Ordering {
16782    use core::cmp::Ordering;
16783    for (i, (ka, kb)) in a.iter().zip(b.iter()).enumerate() {
16784        let ord = ka.partial_cmp(kb).unwrap_or(Ordering::Equal);
16785        let ord = if descs.get(i).copied().unwrap_or(false) {
16786            ord.reverse()
16787        } else {
16788            ord
16789        };
16790        if ord != Ordering::Equal {
16791            return ord;
16792        }
16793    }
16794    Ordering::Equal
16795}
16796
16797/// v6.4.0 — eval every ORDER BY expression for a row and pack the
16798/// resulting keys into a `Vec<f64>`. NULL → `f64::INFINITY`.
16799fn build_order_keys(
16800    order_by: &[OrderBy],
16801    row: &Row,
16802    ctx: &EvalContext,
16803) -> Result<Vec<f64>, EngineError> {
16804    let mut keys = Vec::with_capacity(order_by.len());
16805    for o in order_by {
16806        let v = eval::eval_expr(&o.expr, row, ctx)?;
16807        // v7.24 (round-16 A) — explicit NULLS FIRST/LAST. The f64
16808        // packing sorts ascending THEN applies the per-key DESC
16809        // reverse, so a NULL must land at +INF exactly when the
16810        // effective placement agrees with the reverse direction:
16811        // nf == desc → +INF (ASC default last / DESC default
16812        // first), nf != desc → -INF (the explicit flips).
16813        if matches!(v, Value::Null) {
16814            let nf = o.nulls_first.unwrap_or(o.desc);
16815            keys.push(if nf == o.desc {
16816                f64::INFINITY
16817            } else {
16818                f64::NEG_INFINITY
16819            });
16820        } else {
16821            keys.push(value_to_order_key(&v)?);
16822        }
16823    }
16824    Ok(keys)
16825}
16826
16827/// Drop the first `offset` rows then truncate to `limit`. PG / `MySQL`
16828/// agree: OFFSET applies *after* ORDER BY but *before* LIMIT (so
16829/// `LIMIT 10 OFFSET 5` keeps rows 6..=15).
16830fn apply_offset_and_limit(rows: &mut Vec<Row>, offset: Option<u32>, limit: Option<u32>) {
16831    if let Some(off) = offset {
16832        let off = off as usize;
16833        if off >= rows.len() {
16834            rows.clear();
16835        } else {
16836            rows.drain(..off);
16837        }
16838    }
16839    if let Some(n) = limit {
16840        rows.truncate(n as usize);
16841    }
16842}
16843
16844/// v7.17.0 Phase 3.P0-49 — offset + limit applied to a tagged
16845/// `(order_keys, row)` sequence, with optional SQL:2008 `WITH
16846/// TIES` extension. When `with_ties` is set, the truncated tail
16847/// is extended through every subsequent row whose order keys
16848/// equal the last-kept row's keys (so a "top 3 by score" with
16849/// WITH TIES emits row 4 too when row 4 ties row 3 on `score`).
16850///
16851/// The order-key vector is the per-row sort key the caller already
16852/// computed via `build_order_keys`; equal-key detection therefore
16853/// matches the sort comparator exactly.
16854fn apply_offset_and_limit_tagged(
16855    tagged: &mut Vec<(Vec<f64>, Row)>,
16856    offset: Option<u32>,
16857    limit: Option<u32>,
16858    with_ties: bool,
16859) {
16860    if let Some(off) = offset {
16861        let off = off as usize;
16862        if off >= tagged.len() {
16863            tagged.clear();
16864        } else {
16865            tagged.drain(..off);
16866        }
16867    }
16868    if let Some(n) = limit {
16869        let n = n as usize;
16870        if with_ties && n > 0 && n < tagged.len() {
16871            let cutoff_key = tagged[n - 1].0.clone();
16872            let mut end = n;
16873            while end < tagged.len() && tagged[end].0 == cutoff_key {
16874                end += 1;
16875            }
16876            tagged.truncate(end);
16877        } else {
16878            tagged.truncate(n);
16879        }
16880    }
16881}
16882
16883/// v7.17.0 Phase 3.P0-49 — PG-canonical: `FETCH FIRST <n> ROWS
16884/// WITH TIES` requires an `ORDER BY`. Without one, there's no
16885/// way to identify "ties" deterministically, so PG errors at
16886/// plan time. SPG mirrors that surface so the same DDL / app
16887/// behaviour holds on cutover.
16888fn check_with_ties_requires_order_by(stmt: &SelectStatement) -> Result<(), EngineError> {
16889    if stmt.limit_with_ties && stmt.order_by.is_empty() {
16890        return Err(EngineError::Unsupported(alloc::string::String::from(
16891            "FETCH FIRST … ROWS WITH TIES requires an ORDER BY clause",
16892        )));
16893    }
16894    Ok(())
16895}
16896
16897/// v7.6.1 — resolve a parser-level `ForeignKeyConstraint` (column
16898/// names + parent table name) into the storage-layer shape (column
16899/// indices + same parent table). Validates everything the engine
16900/// needs to know about the FK at CREATE TABLE time:
16901///
16902///   - parent table exists (catalog lookup, unless self-referencing)
16903///   - parent columns exist on the parent table
16904///   - parent column list matches the local arity (defaults to the
16905///     parent's primary index column when omitted)
16906///   - parent columns are covered by a `BTree` UNIQUE-class index
16907///     (SPG's stand-in for `PRIMARY KEY`/`UNIQUE`) — required so
16908///     the v7.6.2 INSERT path can do an O(log n) parent lookup
16909///   - local columns exist on the table being created
16910fn resolve_foreign_key(
16911    local_table_name: &str,
16912    local_cols: &[ColumnSchema],
16913    fk: spg_sql::ast::ForeignKeyConstraint,
16914    catalog: &Catalog,
16915) -> Result<spg_storage::ForeignKeyConstraint, EngineError> {
16916    // Resolve local columns.
16917    let mut local_columns = Vec::with_capacity(fk.columns.len());
16918    for name in &fk.columns {
16919        let pos = local_cols
16920            .iter()
16921            .position(|c| c.name == *name)
16922            .ok_or_else(|| {
16923                EngineError::Unsupported(alloc::format!(
16924                    "FOREIGN KEY references unknown local column {name:?}"
16925                ))
16926            })?;
16927        local_columns.push(pos);
16928    }
16929    // Self-referencing FK: parent table is the one we're creating.
16930    // The parent column resolution uses the local column list since
16931    // the catalog doesn't have this table yet.
16932    let is_self_ref = fk.parent_table == local_table_name;
16933    let (parent_cols_for_lookup, parent_table_str): (&[ColumnSchema], &str) = if is_self_ref {
16934        (local_cols, local_table_name)
16935    } else {
16936        let parent_table = catalog.get(&fk.parent_table).ok_or_else(|| {
16937            EngineError::Storage(StorageError::TableNotFound {
16938                name: fk.parent_table.clone(),
16939            })
16940        })?;
16941        (
16942            parent_table.schema().columns.as_slice(),
16943            fk.parent_table.as_str(),
16944        )
16945    };
16946    // Resolve parent column names → positions. If the FK omitted the
16947    // parent column list, fall back to the parent's primary index
16948    // column (single-column only — composite default is rejected
16949    // because there's no unambiguous "PK" in SPG's index list).
16950    let parent_columns: Vec<usize> = if fk.parent_columns.is_empty() {
16951        if fk.columns.len() != 1 {
16952            return Err(EngineError::Unsupported(
16953                "composite FOREIGN KEY without explicit parent column list is not supported \
16954                 — list the parent columns explicitly"
16955                    .into(),
16956            ));
16957        }
16958        // Find a single BTree index on the parent and use its column.
16959        let pos = pick_pk_index_column(catalog, parent_table_str, is_self_ref, local_cols)
16960            .ok_or_else(|| {
16961                EngineError::Unsupported(alloc::format!(
16962                    "parent table {parent_table_str:?} has no PRIMARY-key / UNIQUE BTree index \
16963                     to default the FOREIGN KEY against"
16964                ))
16965            })?;
16966        alloc::vec![pos]
16967    } else {
16968        let mut out = Vec::with_capacity(fk.parent_columns.len());
16969        for name in &fk.parent_columns {
16970            let pos = parent_cols_for_lookup
16971                .iter()
16972                .position(|c| c.name == *name)
16973                .ok_or_else(|| {
16974                    EngineError::Unsupported(alloc::format!(
16975                        "FOREIGN KEY references unknown parent column \
16976                         {name:?} on table {parent_table_str:?}"
16977                    ))
16978                })?;
16979            out.push(pos);
16980        }
16981        out
16982    };
16983    if parent_columns.len() != local_columns.len() {
16984        return Err(EngineError::Unsupported(alloc::format!(
16985            "FOREIGN KEY arity mismatch: {} local columns vs {} parent columns",
16986            local_columns.len(),
16987            parent_columns.len()
16988        )));
16989    }
16990    // For non-self-referencing FKs, verify the parent column set is
16991    // covered by a BTree index. SPG doesn't have a `PRIMARY KEY`
16992    // declaration; the convention is "the parent column for FK
16993    // purposes must have a BTree index" — which the user creates via
16994    // `CREATE INDEX ... USING btree (col)` (the default). We accept
16995    // any single-column BTree index that covers a parent column;
16996    // composite parent column lists require an index whose `column_position`
16997    // matches the first parent column (multi-column BTree indices
16998    // are not in the v7.x roadmap).
16999    if !is_self_ref {
17000        let parent_table = catalog.get(&fk.parent_table).expect("checked above");
17001        let primary_parent_col = parent_columns[0];
17002        let has_btree = parent_table
17003            .schema()
17004            .columns
17005            .get(primary_parent_col)
17006            .is_some()
17007            && parent_table.indices().iter().any(|idx| {
17008                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
17009                    && idx.column_position == primary_parent_col
17010                    && idx.partial_predicate.is_none()
17011            });
17012        if !has_btree {
17013            return Err(EngineError::Unsupported(alloc::format!(
17014                "FOREIGN KEY parent column on {:?} is not covered by an unconditional BTree \
17015                 index — create one with `CREATE INDEX ... ON {} ({})` first",
17016                parent_table_str,
17017                parent_table_str,
17018                parent_table.schema().columns[primary_parent_col].name,
17019            )));
17020        }
17021    }
17022    let on_delete = fk_action_sql_to_storage(fk.on_delete);
17023    let on_update = fk_action_sql_to_storage(fk.on_update);
17024    Ok(spg_storage::ForeignKeyConstraint {
17025        name: fk.name,
17026        local_columns,
17027        parent_table: fk.parent_table,
17028        parent_columns,
17029        on_delete,
17030        on_update,
17031    })
17032}
17033
17034/// v7.6.1 — pick a sentinel "primary key" column from the parent
17035/// table when the FK didn't name parent columns. Picks the first
17036/// single-column unconditional BTree index — that's the closest
17037/// thing SPG has to a PRIMARY KEY today. Self-referencing FKs use
17038/// `local_cols` as the column source.
17039fn pick_pk_index_column(
17040    catalog: &Catalog,
17041    parent_name: &str,
17042    is_self_ref: bool,
17043    local_cols: &[ColumnSchema],
17044) -> Option<usize> {
17045    if is_self_ref {
17046        // Self-ref FK omitted parent columns: pick column 0 by
17047        // convention (no catalog entry yet). Engine will widen this
17048        // when v7.6.7 lands; v7.6.1 only handles the explicit form.
17049        let _ = local_cols;
17050        return Some(0);
17051    }
17052    let parent = catalog.get(parent_name)?;
17053    parent.indices().iter().find_map(|idx| {
17054        if matches!(idx.kind, spg_storage::IndexKind::BTree(_))
17055            && idx.partial_predicate.is_none()
17056            && idx.included_columns.is_empty()
17057            && idx.expression.is_none()
17058        {
17059            Some(idx.column_position)
17060        } else {
17061            None
17062        }
17063    })
17064}
17065
17066/// v7.9.8 / v7.9.10 — resolve the column positions that
17067/// identify a conflict for ON CONFLICT. Returns a Vec of
17068/// column positions (1 element for single-column form, N for
17069/// composite). When the user wrote bare `ON CONFLICT DO …`,
17070/// falls back to the table's first unconditional BTree index
17071/// (always single-column today).
17072/// Returns the conflict-key column positions plus whether the
17073/// matched constraint declares NULLS NOT DISTINCT (v7.29 — a NULL
17074/// in the key only rules out a conflict under the default
17075/// NULLS DISTINCT semantics).
17076fn resolve_on_conflict_columns(
17077    catalog: &Catalog,
17078    table_name: &str,
17079    target: &[String],
17080) -> Result<(Vec<usize>, bool), EngineError> {
17081    let table = catalog.get(table_name).ok_or_else(|| {
17082        EngineError::Storage(StorageError::TableNotFound {
17083            name: table_name.into(),
17084        })
17085    })?;
17086    if target.is_empty() {
17087        // v7.13.2 — mailrs round-6 S5 follow-up. Composite UNIQUE
17088        // constraints carry a multi-column tuple; the prior code
17089        // path picked only the leading column of the first BTree
17090        // index, which caused `ON CONFLICT DO NOTHING` to dedup
17091        // by leading column alone (3 rows with same group_id but
17092        // different permission collapsed to 1). PG semantics use
17093        // the full tuple. Prefer a UniquenessConstraint's full
17094        // column list when one exists; fall back to the leading
17095        // BTree column for legacy single-column UNIQUE.
17096        if let Some(uc) = table.schema().uniqueness_constraints.first() {
17097            return Ok((uc.columns.clone(), uc.nulls_not_distinct));
17098        }
17099        let pos = table
17100            .indices()
17101            .iter()
17102            .find_map(|idx| {
17103                if matches!(idx.kind, spg_storage::IndexKind::BTree(_))
17104                    && idx.partial_predicate.is_none()
17105                    && idx.included_columns.is_empty()
17106                    && idx.expression.is_none()
17107                {
17108                    Some(idx.column_position)
17109                } else {
17110                    None
17111                }
17112            })
17113            .ok_or_else(|| {
17114                EngineError::Unsupported(alloc::format!(
17115                    "ON CONFLICT without target requires a UNIQUE BTree index on {table_name:?}"
17116                ))
17117            })?;
17118        return Ok((alloc::vec![pos], false));
17119    }
17120    let mut out = Vec::with_capacity(target.len());
17121    for name in target {
17122        let pos = table
17123            .schema()
17124            .columns
17125            .iter()
17126            .position(|c| c.name == *name)
17127            .ok_or_else(|| {
17128                EngineError::Unsupported(alloc::format!(
17129                    "ON CONFLICT target column {name:?} not found on {table_name:?}"
17130                ))
17131            })?;
17132        out.push(pos);
17133    }
17134    // An explicit target matching a UNIQUE constraint inherits its
17135    // NULLS [NOT] DISTINCT declaration.
17136    let mut sorted = out.clone();
17137    sorted.sort_unstable();
17138    let nnd = table.schema().uniqueness_constraints.iter().any(|uc| {
17139        let mut u = uc.columns.clone();
17140        u.sort_unstable();
17141        u == sorted && uc.nulls_not_distinct
17142    });
17143    Ok((out, nnd))
17144}
17145
17146/// v7.9.8 — check whether the BTree index on `column_pos` of
17147/// `table_name` already has a row with this key.
17148fn on_conflict_key_exists(
17149    catalog: &Catalog,
17150    table_name: &str,
17151    column_pos: usize,
17152    key: &Value,
17153) -> bool {
17154    let Some(table) = catalog.get(table_name) else {
17155        return false;
17156    };
17157    let Some(idx_key) = spg_storage::IndexKey::from_value(key) else {
17158        return false;
17159    };
17160    table.indices().iter().any(|idx| {
17161        matches!(idx.kind, spg_storage::IndexKind::BTree(_))
17162            && idx.column_position == column_pos
17163            && idx.partial_predicate.is_none()
17164            && !idx.lookup_eq(&idx_key).is_empty()
17165    })
17166}
17167
17168/// v7.9.9 / v7.9.10 — look up an existing row's position by
17169/// matching all `column_positions` against the incoming `key`
17170/// tuple. Single-column shape (one column) reduces to the
17171/// canonical PK lookup; composite shapes scan linearly until
17172/// every position matches.
17173fn lookup_row_position_by_keys(
17174    catalog: &Catalog,
17175    table_name: &str,
17176    column_positions: &[usize],
17177    key: &[&Value],
17178) -> Option<usize> {
17179    let table = catalog.get(table_name)?;
17180    table.rows().iter().position(|r| {
17181        column_positions
17182            .iter()
17183            .enumerate()
17184            .all(|(i, &pos)| r.values.get(pos) == Some(key[i]))
17185    })
17186}
17187
17188/// v7.9.10 — does the table already contain a row whose
17189/// `column_positions` tuple equals `key`? Single-column shape
17190/// uses the existing BTree fast path; composite shapes fall
17191/// back to a row scan.
17192fn on_conflict_keys_exist(
17193    catalog: &Catalog,
17194    table_name: &str,
17195    column_positions: &[usize],
17196    key: &[&Value],
17197) -> bool {
17198    if column_positions.len() == 1 {
17199        return on_conflict_key_exists(catalog, table_name, column_positions[0], key[0]);
17200    }
17201    let Some(table) = catalog.get(table_name) else {
17202        return false;
17203    };
17204    table.rows().iter().any(|r| {
17205        column_positions
17206            .iter()
17207            .enumerate()
17208            .all(|(i, &pos)| r.values.get(pos) == Some(key[i]))
17209    })
17210}
17211
17212/// v7.9.9 — apply ON CONFLICT DO UPDATE SET assignments to an
17213/// existing row.
17214///
17215/// `incoming` is the rejected INSERT row (used to resolve
17216/// `EXCLUDED.col` references in the assignment exprs);
17217/// `target_pos` is the position of the existing row in the table.
17218/// Each assignment substitutes `EXCLUDED.col` with the matching
17219/// incoming value, evaluates the resulting expression against
17220/// the existing row, and writes the new value into the
17221/// corresponding column of the returned `Vec<Value>`. If
17222/// `where_` evaluates falsy, returns Ok(None) — PG behaviour:
17223/// the conflicting row is silently kept unchanged.
17224fn apply_on_conflict_assignments(
17225    catalog: &Catalog,
17226    table_name: &str,
17227    target_pos: usize,
17228    incoming: &[Value],
17229    assignments: &[(String, Expr)],
17230    where_: Option<&Expr>,
17231) -> Result<Option<Vec<Value>>, EngineError> {
17232    let table = catalog.get(table_name).ok_or_else(|| {
17233        EngineError::Storage(StorageError::TableNotFound {
17234            name: table_name.into(),
17235        })
17236    })?;
17237    let schema_cols = table.schema().columns.clone();
17238    let existing = table
17239        .rows()
17240        .get(target_pos)
17241        .ok_or_else(|| {
17242            EngineError::Unsupported(alloc::format!(
17243                "ON CONFLICT DO UPDATE: row position {target_pos} out of bounds on {table_name:?}"
17244            ))
17245        })?
17246        .clone();
17247    let ctx = eval::EvalContext::new(&schema_cols, Some(table_name));
17248    // Optional WHERE filter on the conflict row.
17249    if let Some(w) = where_ {
17250        let pred = w.clone();
17251        let pred = substitute_excluded_refs(pred, &schema_cols, incoming);
17252        let v = eval::eval_expr(&pred, &existing, &ctx)?;
17253        if !matches!(v, Value::Bool(true)) {
17254            return Ok(None);
17255        }
17256    }
17257    let mut new_values = existing.values.clone();
17258    for (col_name, expr) in assignments {
17259        let target_idx = schema_cols
17260            .iter()
17261            .position(|c| c.name == *col_name)
17262            .ok_or_else(|| {
17263                EngineError::Eval(EvalError::ColumnNotFound {
17264                    name: col_name.clone(),
17265                })
17266            })?;
17267        let sub = substitute_excluded_refs(expr.clone(), &schema_cols, incoming);
17268        let v = eval::eval_expr(&sub, &existing, &ctx)?;
17269        let coerced = coerce_value(v, schema_cols[target_idx].ty, col_name, target_idx)?;
17270        check_unsigned_range(&coerced, &schema_cols[target_idx], target_idx)?;
17271        new_values[target_idx] = coerced;
17272    }
17273    Ok(Some(new_values))
17274}
17275
17276/// v7.9.9 — walk an `Expr` tree replacing any `Column { qualifier:
17277/// "EXCLUDED", name }` reference with a `Literal` of the matching
17278/// value from the incoming-row vec. Resolution against the
17279/// child-table column list (by name).
17280fn substitute_excluded_refs(expr: Expr, schema_cols: &[ColumnSchema], incoming: &[Value]) -> Expr {
17281    use spg_sql::ast::ColumnName;
17282    match expr {
17283        Expr::Column(ColumnName { qualifier, name })
17284            if qualifier
17285                .as_deref()
17286                .is_some_and(|q| q.eq_ignore_ascii_case("excluded")) =>
17287        {
17288            let pos = schema_cols.iter().position(|c| c.name == name);
17289            match pos {
17290                Some(p) => {
17291                    let v = incoming.get(p).cloned().unwrap_or(Value::Null);
17292                    value_to_literal_expr(v)
17293                        .unwrap_or_else(|_| Expr::Literal(spg_sql::ast::Literal::Null))
17294                }
17295                None => Expr::Column(ColumnName { qualifier, name }),
17296            }
17297        }
17298        Expr::Binary { op, lhs, rhs } => Expr::Binary {
17299            op,
17300            lhs: Box::new(substitute_excluded_refs(*lhs, schema_cols, incoming)),
17301            rhs: Box::new(substitute_excluded_refs(*rhs, schema_cols, incoming)),
17302        },
17303        Expr::Unary { op, expr } => Expr::Unary {
17304            op,
17305            expr: Box::new(substitute_excluded_refs(*expr, schema_cols, incoming)),
17306        },
17307        Expr::FunctionCall { name, args } => Expr::FunctionCall {
17308            name,
17309            args: args
17310                .into_iter()
17311                .map(|a| substitute_excluded_refs(a, schema_cols, incoming))
17312                .collect(),
17313        },
17314        other => other,
17315    }
17316}
17317
17318/// v7.6.2 / v7.6.7 — INSERT-side FK enforcement. For every row
17319/// about to be inserted into `child_table`, every FK declared on
17320/// that table is checked: the row's FK columns must either be
17321/// NULL (SQL spec skip) or match an existing parent row via the
17322/// parent's BTree PK / UNIQUE index.
17323///
17324/// Returns `EngineError::Unsupported` with a `FOREIGN KEY violation`
17325/// payload on first failure.
17326///
17327/// **Self-referencing FKs (v7.6.7 widening):** when `fk.parent_table
17328/// == child_table`, the parent rows visible to this check are
17329///  (a) rows already committed to the table, plus
17330///  (b) earlier rows from the *same* `rows` batch.
17331/// This makes `INSERT INTO tree VALUES (1, NULL), (2, 1), (3, 2)`
17332/// work in a single statement — common pattern for bulk-loading
17333/// hierarchies.
17334/// v7.9.19 — enforce table-level UNIQUE / PRIMARY KEY tuple
17335/// constraints at INSERT time. For each constraint declared on
17336/// the target table, check that no existing row + no earlier row
17337/// in the same batch has the same full-column tuple. NULL in
17338/// any column lifts the row out of the check (SQL spec: NULL
17339/// ≠ NULL for uniqueness). mailrs G1 + G6.
17340fn enforce_uniqueness_inserts(
17341    catalog: &Catalog,
17342    child_table: &str,
17343    constraints: &[spg_storage::UniquenessConstraint],
17344    rows: &[Vec<Value>],
17345) -> Result<(), EngineError> {
17346    if constraints.is_empty() {
17347        return Ok(());
17348    }
17349    let table = catalog.get(child_table).ok_or_else(|| {
17350        EngineError::Storage(StorageError::TableNotFound {
17351            name: child_table.into(),
17352        })
17353    })?;
17354    let schema = table.schema();
17355    // v7.29 (mailrs round-23b) — set-based: ONE O(table) pass folds
17356    // existing keys into a hash set, then each batch row is a probe
17357    // + insert. The previous shape scanned the WHOLE table per
17358    // inserted row (and earlier batch rows per row), which made
17359    // bulk import O(n²) — a 104 MB dump extrapolated to ~1 hour
17360    // (PG: 2 min). Collation folding (Phase 3.P0-45) and
17361    // NULLS [NOT] DISTINCT semantics are unchanged: keys fold via
17362    // collated_key_cell before encoding, NULL-bearing keys skip the
17363    // set unless nulls_not_distinct.
17364    for uc in constraints {
17365        let fold_key = |values: &[Value]| -> Vec<Value> {
17366            uc.columns
17367                .iter()
17368                .map(|&i| {
17369                    let v = values.get(i).cloned().unwrap_or(Value::Null);
17370                    collated_key_cell(&v, i, schema)
17371                })
17372                .collect()
17373        };
17374        let mut seen: hashbrown::HashSet<String> =
17375            hashbrown::HashSet::with_capacity(table.rows().len() + rows.len());
17376        for prow in table.rows() {
17377            let key = fold_key(&prow.values);
17378            if key.iter().any(|v| matches!(v, Value::Null)) && !uc.nulls_not_distinct {
17379                continue;
17380            }
17381            seen.insert(aggregate::encode_key(&key));
17382        }
17383        for (batch_idx, row_values) in rows.iter().enumerate() {
17384            let key = fold_key(row_values);
17385            if key.iter().any(|v| matches!(v, Value::Null)) && !uc.nulls_not_distinct {
17386                continue;
17387            }
17388            if !seen.insert(aggregate::encode_key(&key)) {
17389                let kind = if uc.is_primary_key {
17390                    "PRIMARY KEY"
17391                } else {
17392                    "UNIQUE"
17393                };
17394                let col_names: Vec<String> = uc
17395                    .columns
17396                    .iter()
17397                    .map(|&i| table.schema().columns[i].name.clone())
17398                    .collect();
17399                return Err(EngineError::Unsupported(alloc::format!(
17400                    "{kind} violation on {child_table:?} columns {col_names:?}: \
17401                     row #{batch_idx} duplicates an existing key"
17402                )));
17403            }
17404        }
17405    }
17406    Ok(())
17407}
17408
17409/// v7.17.0 Phase 3.P0-45 — return a key cell folded by its column's
17410/// declared `Collation`. For `CaseInsensitive`, fold Text payloads to
17411/// ASCII lowercase (matches Phase 2.5's `*_ci` semantics: ASCII case-
17412/// fold only, non-ASCII bytes stay byte-wise). For `Binary` or non-Text
17413/// values, the cell passes through unchanged. The caller compares the
17414/// folded values with `==`.
17415fn collated_key_cell(
17416    v: &spg_storage::Value,
17417    column_position: usize,
17418    schema: &spg_storage::TableSchema,
17419) -> spg_storage::Value {
17420    match (v, schema.columns.get(column_position).map(|c| c.collation)) {
17421        (spg_storage::Value::Text(s), Some(spg_storage::Collation::CaseInsensitive)) => {
17422            spg_storage::Value::Text(s.to_ascii_lowercase())
17423        }
17424        _ => v.clone(),
17425    }
17426}
17427
17428/// v7.9.29 — `true` iff `v` counts as a truthy SQL value for a
17429/// WHERE-style predicate. NULL → false (three-valued logic
17430/// collapses to "skip this row" for index inclusion). Numeric
17431/// non-zero, BIGINT non-zero, TINYINT non-zero, BOOLEAN true → true.
17432/// Everything else (strings, vectors, JSON, …) is not a valid
17433/// predicate result and surfaces as `false` so a malformed
17434/// predicate degrades to "row not in index" rather than panicking.
17435fn predicate_truthy(v: &spg_storage::Value) -> bool {
17436    use spg_storage::Value as V;
17437    match v {
17438        V::Bool(b) => *b,
17439        V::Int(n) => *n != 0,
17440        V::BigInt(n) => *n != 0,
17441        V::SmallInt(n) => *n != 0,
17442        _ => false,
17443    }
17444}
17445
17446/// v7.9.29 — at CREATE UNIQUE INDEX time, scan the table's
17447/// committed rows for pre-existing duplicates. If any pair of rows
17448/// matches the predicate AND has the same index key, refuse to
17449/// create the index so the user fixes the data before retrying.
17450fn check_existing_unique_violation(
17451    idx: &spg_storage::Index,
17452    schema: &spg_storage::TableSchema,
17453    rows: &[spg_storage::Row],
17454) -> Result<(), EngineError> {
17455    let predicate_expr = match idx.partial_predicate.as_deref() {
17456        Some(s) => Some(spg_sql::parser::parse_expression(s).map_err(|e| {
17457            EngineError::Unsupported(alloc::format!(
17458                "stored partial predicate {s:?} failed to re-parse: {e:?}"
17459            ))
17460        })?),
17461        None => None,
17462    };
17463    let ctx = eval::EvalContext::new(&schema.columns, None);
17464    let key_positions = unique_key_positions(idx);
17465    let mut seen: alloc::vec::Vec<alloc::vec::Vec<spg_storage::Value>> = alloc::vec::Vec::new();
17466    for row in rows {
17467        if let Some(expr) = &predicate_expr {
17468            let v = eval::eval_expr(expr, row, &ctx).map_err(|e| {
17469                EngineError::Unsupported(alloc::format!(
17470                    "evaluating UNIQUE INDEX predicate against existing row: {e:?}"
17471                ))
17472            })?;
17473            if !predicate_truthy(&v) {
17474                continue;
17475            }
17476        }
17477        let key: alloc::vec::Vec<spg_storage::Value> = key_positions
17478            .iter()
17479            .map(|&p| {
17480                let v = row
17481                    .values
17482                    .get(p)
17483                    .cloned()
17484                    .unwrap_or(spg_storage::Value::Null);
17485                collated_key_cell(&v, p, schema)
17486            })
17487            .collect();
17488        if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
17489            continue;
17490        }
17491        if seen.iter().any(|other| *other == key) {
17492            return Err(EngineError::Unsupported(alloc::format!(
17493                "CREATE UNIQUE INDEX {:?}: existing rows already violate the constraint",
17494                idx.name
17495            )));
17496        }
17497        seen.push(key);
17498    }
17499    Ok(())
17500}
17501
17502/// v7.9.29 — full key tuple for a UNIQUE INDEX (leading +
17503/// extra positions). For single-column indexes this is just
17504/// `[column_position]`.
17505fn unique_key_positions(idx: &spg_storage::Index) -> alloc::vec::Vec<usize> {
17506    let mut out = alloc::vec::Vec::with_capacity(1 + idx.extra_column_positions.len());
17507    out.push(idx.column_position);
17508    out.extend_from_slice(&idx.extra_column_positions);
17509    out
17510}
17511
17512/// v7.9.29 — at INSERT time, walk every `is_unique` index on the
17513/// target table. For each, eval the index's optional predicate
17514/// against (a) the candidate row and (b) every committed row plus
17515/// earlier batch rows; only rows where the predicate is truthy
17516/// participate. A duplicate key among predicate-matching rows is a
17517/// uniqueness violation. NULL keys lift the row out of the check
17518/// (matching PG's "UNIQUE allows multiple NULLs" semantics).
17519fn enforce_unique_index_inserts(
17520    catalog: &Catalog,
17521    table_name: &str,
17522    rows: &[alloc::vec::Vec<spg_storage::Value>],
17523) -> Result<(), EngineError> {
17524    let table = catalog.get(table_name).ok_or_else(|| {
17525        EngineError::Storage(StorageError::TableNotFound {
17526            name: table_name.into(),
17527        })
17528    })?;
17529    let schema = table.schema();
17530    let ctx = eval::EvalContext::new(&schema.columns, None);
17531    for idx in table.indices() {
17532        if !idx.is_unique {
17533            continue;
17534        }
17535        // Re-parse the predicate once per index per batch.
17536        let predicate_expr = match idx.partial_predicate.as_deref() {
17537            Some(s) => Some(spg_sql::parser::parse_expression(s).map_err(|e| {
17538                EngineError::Unsupported(alloc::format!(
17539                    "UNIQUE INDEX {:?} predicate {s:?} failed to re-parse: {e:?}",
17540                    idx.name
17541                ))
17542            })?),
17543            None => None,
17544        };
17545        let key_positions = unique_key_positions(idx);
17546        let key_of = |values: &[spg_storage::Value]| -> alloc::vec::Vec<spg_storage::Value> {
17547            key_positions
17548                .iter()
17549                .map(|&p| {
17550                    let v = values.get(p).cloned().unwrap_or(spg_storage::Value::Null);
17551                    collated_key_cell(&v, p, schema)
17552                })
17553                .collect()
17554        };
17555        let participates = |values: &[spg_storage::Value]| -> Result<bool, EngineError> {
17556            let Some(expr) = &predicate_expr else {
17557                return Ok(true);
17558            };
17559            let tmp_row = spg_storage::Row {
17560                values: values.to_vec(),
17561            };
17562            let v = eval::eval_expr(expr, &tmp_row, &ctx).map_err(|e| {
17563                EngineError::Unsupported(alloc::format!(
17564                    "UNIQUE INDEX {:?} predicate eval: {e:?}",
17565                    idx.name
17566                ))
17567            })?;
17568            Ok(predicate_truthy(&v))
17569        };
17570        // v7.29 (mailrs round-23b) — set-based: one O(table) pass
17571        // (predicate evaluated once per existing row instead of once
17572        // per row PAIR), then probe per batch row. The previous
17573        // nested scans made bulk import O(n²).
17574        let mut seen: hashbrown::HashSet<String> =
17575            hashbrown::HashSet::with_capacity(table.rows().len() + rows.len());
17576        for prow in table.rows() {
17577            if !participates(&prow.values)? {
17578                continue;
17579            }
17580            let key = key_of(&prow.values);
17581            if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
17582                continue;
17583            }
17584            seen.insert(aggregate::encode_key(&key));
17585        }
17586        for (batch_idx, row_values) in rows.iter().enumerate() {
17587            if !participates(row_values)? {
17588                continue;
17589            }
17590            let key = key_of(row_values);
17591            if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
17592                continue;
17593            }
17594            if !seen.insert(aggregate::encode_key(&key)) {
17595                return Err(EngineError::Unsupported(alloc::format!(
17596                    "UNIQUE INDEX {:?} violation on {table_name:?}: \
17597                     row #{batch_idx} duplicates an existing key",
17598                    idx.name
17599                )));
17600            }
17601        }
17602    }
17603    Ok(())
17604}
17605
17606/// v7.13.0 — `UPDATE OF cols` filter helper (mailrs round-5 G7).
17607/// Returns `true` when at least one of `filter_cols` has a
17608/// different value in `new_row` vs `old_row`. Column lookup is
17609/// case-insensitive against `schema_cols`; unknown filter columns
17610/// are treated as "not changed" (the trigger therefore won't
17611/// fire on them — surfacing a parse-time error would be too
17612/// strict for catalog reloads where the schema may have drifted).
17613fn any_column_changed(
17614    filter_cols: &[String],
17615    schema_cols: &[ColumnSchema],
17616    old_row: &Row,
17617    new_row: &Row,
17618) -> bool {
17619    for col_name in filter_cols {
17620        let Some(pos) = schema_cols
17621            .iter()
17622            .position(|c| c.name.eq_ignore_ascii_case(col_name))
17623        else {
17624            continue;
17625        };
17626        let old_v = old_row.values.get(pos);
17627        let new_v = new_row.values.get(pos);
17628        if old_v != new_v {
17629            return true;
17630        }
17631    }
17632    false
17633}
17634
17635/// v7.13.0 — evaluate every CHECK predicate on the schema against
17636/// each candidate row. Mirrors PG semantics: a `false` result
17637/// rejects the mutation; a NULL result *passes* (CHECK rejects
17638/// only on definite-false, not on unknown). mailrs round-5 G3.
17639fn enforce_check_constraints(
17640    catalog: &Catalog,
17641    table_name: &str,
17642    rows: &[alloc::vec::Vec<spg_storage::Value>],
17643) -> Result<(), EngineError> {
17644    let table = catalog.get(table_name).ok_or_else(|| {
17645        EngineError::Storage(StorageError::TableNotFound {
17646            name: table_name.into(),
17647        })
17648    })?;
17649    let schema = table.schema();
17650    // v7.17.0 Phase 1.5 — domain-level CHECKs are enforced in
17651    // parallel with table-level CHECKs. Collect both lists up
17652    // front; if neither exists we early-out.
17653    let mut domain_checks_per_col: alloc::vec::Vec<(usize, alloc::vec::Vec<Expr>)> =
17654        alloc::vec::Vec::new();
17655    for (idx, col) in schema.columns.iter().enumerate() {
17656        let Some(dname) = &col.user_domain_type else {
17657            continue;
17658        };
17659        let Some(dom) = catalog.domain_types().get(dname) else {
17660            continue;
17661        };
17662        let mut parsed_for_col: alloc::vec::Vec<Expr> =
17663            alloc::vec::Vec::with_capacity(dom.checks.len());
17664        for src in &dom.checks {
17665            let expr = spg_sql::parser::parse_expression(src).map_err(|e| {
17666                EngineError::Unsupported(alloc::format!(
17667                    "DOMAIN {dname:?} CHECK ({src:?}) on column {:?}: re-parse failed: {e:?}",
17668                    col.name
17669                ))
17670            })?;
17671            parsed_for_col.push(expr);
17672        }
17673        if !parsed_for_col.is_empty() {
17674            domain_checks_per_col.push((idx, parsed_for_col));
17675        }
17676    }
17677    if schema.checks.is_empty() && domain_checks_per_col.is_empty() {
17678        return Ok(());
17679    }
17680    let ctx = eval::EvalContext::new(&schema.columns, None);
17681    let mut parsed: alloc::vec::Vec<(usize, Expr)> = alloc::vec::Vec::new();
17682    for (i, src) in schema.checks.iter().enumerate() {
17683        let expr = spg_sql::parser::parse_expression(src).map_err(|e| {
17684            EngineError::Unsupported(alloc::format!(
17685                "CHECK constraint #{i} on {table_name:?} ({src:?}) failed to re-parse: {e:?}"
17686            ))
17687        })?;
17688        parsed.push((i, expr));
17689    }
17690    for (batch_idx, row_values) in rows.iter().enumerate() {
17691        let tmp_row = spg_storage::Row {
17692            values: row_values.clone(),
17693        };
17694        for (i, expr) in &parsed {
17695            let v = eval::eval_expr(expr, &tmp_row, &ctx).map_err(|e| {
17696                EngineError::Unsupported(alloc::format!(
17697                    "CHECK constraint #{i} on {table_name:?} eval at row #{batch_idx}: {e:?}"
17698                ))
17699            })?;
17700            // PG: NULL passes (CHECK rejects on definite-false only).
17701            if matches!(v, spg_storage::Value::Bool(false)) {
17702                return Err(EngineError::Unsupported(alloc::format!(
17703                    "CHECK constraint violation on {table_name:?} (row #{batch_idx}): {:?}",
17704                    schema.checks[*i]
17705                )));
17706            }
17707        }
17708        // v7.17.0 Phase 1.5 — domain-level CHECKs. Each CHECK
17709        // expression references VALUE as a column-name; we
17710        // substitute the per-row cell into the eval context by
17711        // synthesising a single-column row of just that value
17712        // under a temporary `value` column schema.
17713        for (col_idx, checks) in &domain_checks_per_col {
17714            let cell = row_values
17715                .get(*col_idx)
17716                .cloned()
17717                .unwrap_or(spg_storage::Value::Null);
17718            let synth_cols = alloc::vec![spg_storage::ColumnSchema::new(
17719                "value",
17720                schema.columns[*col_idx].ty,
17721                schema.columns[*col_idx].nullable,
17722            )];
17723            let synth_ctx = eval::EvalContext::new(&synth_cols, None);
17724            let synth_row = spg_storage::Row {
17725                values: alloc::vec![cell],
17726            };
17727            for (ci, expr) in checks.iter().enumerate() {
17728                let v = eval::eval_expr(expr, &synth_row, &synth_ctx).map_err(|e| {
17729                    EngineError::Unsupported(alloc::format!(
17730                        "DOMAIN CHECK #{ci} on column {:?} eval at row #{batch_idx}: {e:?}",
17731                        schema.columns[*col_idx].name
17732                    ))
17733                })?;
17734                if matches!(v, spg_storage::Value::Bool(false)) {
17735                    return Err(EngineError::Unsupported(alloc::format!(
17736                        "DOMAIN CHECK violation on column {:?} (row #{batch_idx})",
17737                        schema.columns[*col_idx].name
17738                    )));
17739                }
17740            }
17741        }
17742    }
17743    Ok(())
17744}
17745
17746fn enforce_fk_inserts(
17747    catalog: &Catalog,
17748    child_table: &str,
17749    fks: &[spg_storage::ForeignKeyConstraint],
17750    rows: &[Vec<Value>],
17751) -> Result<(), EngineError> {
17752    for fk in fks {
17753        let parent_is_self = fk.parent_table == child_table;
17754        let parent = if parent_is_self {
17755            // Self-ref: read the current state of the same table.
17756            // The mut borrow on child has been dropped by the caller.
17757            catalog.get(child_table).ok_or_else(|| {
17758                EngineError::Storage(StorageError::TableNotFound {
17759                    name: child_table.into(),
17760                })
17761            })?
17762        } else {
17763            catalog.get(&fk.parent_table).ok_or_else(|| {
17764                EngineError::Storage(StorageError::TableNotFound {
17765                    name: fk.parent_table.clone(),
17766                })
17767            })?
17768        };
17769        for (batch_idx, row_values) in rows.iter().enumerate() {
17770            // Single-column FK fast path: try the parent's BTree
17771            // index for an O(log n) lookup. Composite FKs fall back
17772            // to a parent-row scan.
17773            if fk.local_columns.len() == 1 {
17774                let v = &row_values[fk.local_columns[0]];
17775                if matches!(v, Value::Null) {
17776                    continue;
17777                }
17778                let parent_col = fk.parent_columns[0];
17779                let key = spg_storage::IndexKey::from_value(v).ok_or_else(|| {
17780                    EngineError::Unsupported(alloc::format!(
17781                        "FOREIGN KEY column value of type {:?} is not index-eligible",
17782                        v.data_type()
17783                    ))
17784                })?;
17785                let present_committed = parent.indices().iter().any(|idx| {
17786                    matches!(idx.kind, spg_storage::IndexKind::BTree(_))
17787                        && idx.column_position == parent_col
17788                        && idx.partial_predicate.is_none()
17789                        && !idx.lookup_eq(&key).is_empty()
17790                });
17791                // v7.6.7 self-ref widening: also accept a match
17792                // against earlier rows in this same batch when the
17793                // FK points at the table being inserted into.
17794                let present_in_batch = parent_is_self
17795                    && rows[..batch_idx]
17796                        .iter()
17797                        .any(|earlier| earlier.get(parent_col) == Some(v));
17798                if !(present_committed || present_in_batch) {
17799                    return Err(EngineError::Unsupported(alloc::format!(
17800                        "FOREIGN KEY violation: no parent row in {:?} where {} = {:?}",
17801                        fk.parent_table,
17802                        parent
17803                            .schema()
17804                            .columns
17805                            .get(parent_col)
17806                            .map_or("?", |c| c.name.as_str()),
17807                        v,
17808                    )));
17809                }
17810            } else {
17811                // Composite FK: scan parent rows. v7.6.7 also
17812                // accepts a match against earlier rows in the same
17813                // batch (self-ref bulk-loading of hierarchies).
17814                if fk
17815                    .local_columns
17816                    .iter()
17817                    .all(|&i| matches!(row_values.get(i), Some(Value::Null)))
17818                {
17819                    continue;
17820                }
17821                let local: Vec<&Value> = fk.local_columns.iter().map(|&i| &row_values[i]).collect();
17822                let parent_match_committed = parent.rows().iter().any(|prow| {
17823                    fk.parent_columns
17824                        .iter()
17825                        .enumerate()
17826                        .all(|(i, &pi)| prow.values.get(pi) == Some(local[i]))
17827                });
17828                let parent_match_in_batch = parent_is_self
17829                    && rows[..batch_idx].iter().any(|earlier| {
17830                        fk.parent_columns
17831                            .iter()
17832                            .enumerate()
17833                            .all(|(i, &pi)| earlier.get(pi) == Some(local[i]))
17834                    });
17835                if !(parent_match_committed || parent_match_in_batch) {
17836                    return Err(EngineError::Unsupported(alloc::format!(
17837                        "FOREIGN KEY violation: no parent row in {:?} matching composite key",
17838                        fk.parent_table,
17839                    )));
17840                }
17841            }
17842        }
17843    }
17844    Ok(())
17845}
17846
17847/// v7.6.4 / v7.6.5 — one step of the FK action plan computed for a
17848/// DELETE on a parent. The plan is a list of these steps, stacked
17849/// across the FK graph by `plan_fk_parent_deletions`.
17850#[derive(Debug, Clone)]
17851struct FkChildStep {
17852    child_table: String,
17853    action: FkChildAction,
17854}
17855
17856#[derive(Debug, Clone)]
17857enum FkChildAction {
17858    /// CASCADE — remove these rows. Sorted, deduplicated positions.
17859    Delete { positions: Vec<usize> },
17860    /// SET NULL — for each (row, column) in the flat list, write
17861    /// NULL into that child cell. Multiple FKs on the same row may
17862    /// produce overlapping entries (deduped at plan time).
17863    SetNull {
17864        positions: Vec<usize>,
17865        columns: Vec<usize>,
17866    },
17867    /// SET DEFAULT — same shape as SetNull but writes the column's
17868    /// declared DEFAULT value (resolved at plan time). Columns
17869    /// without a DEFAULT raise an error during planning.
17870    SetDefault {
17871        positions: Vec<usize>,
17872        columns: Vec<usize>,
17873        defaults: Vec<Value>,
17874    },
17875}
17876
17877/// v7.6.3 → v7.6.5 — plan FK fallout for a DELETE on a parent table.
17878///
17879/// Walks every table in the catalog looking for FKs whose
17880/// `parent_table` is `parent_table_name`. For each such FK + each
17881/// to-be-deleted parent row:
17882///
17883///   - RESTRICT / NoAction → error, no plan returned
17884///   - CASCADE → child rows get scheduled for deletion; recursive
17885///   - SetNull → child FK column(s) scheduled to be NULL-ed.
17886///     Verified NULL-able at plan time.
17887///   - SetDefault → child FK column(s) scheduled to be reset to
17888///     their declared DEFAULT. Columns without a DEFAULT raise.
17889///
17890/// SET NULL / SET DEFAULT do NOT cascade further — the child row
17891/// stays; only one of its columns mutates.
17892fn plan_fk_parent_deletions(
17893    catalog: &Catalog,
17894    parent_table_name: &str,
17895    to_delete_positions: &[usize],
17896    to_delete_rows: &[Vec<Value>],
17897) -> Result<Vec<FkChildStep>, EngineError> {
17898    use alloc::collections::{BTreeMap, BTreeSet};
17899    if to_delete_rows.is_empty() {
17900        return Ok(Vec::new());
17901    }
17902    let mut delete_plan: BTreeMap<String, BTreeSet<usize>> = BTreeMap::new();
17903    // setnull / setdefault keyed by child_table → (row_idx, col_idx) → optional default
17904    let mut setnull_plan: BTreeMap<String, BTreeSet<(usize, usize)>> = BTreeMap::new();
17905    let mut setdefault_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
17906    let mut visited: BTreeSet<(String, usize)> = BTreeSet::new();
17907    for &p in to_delete_positions {
17908        visited.insert((parent_table_name.to_string(), p));
17909    }
17910    let mut work: Vec<(String, Vec<Value>)> = to_delete_rows
17911        .iter()
17912        .map(|r| (parent_table_name.to_string(), r.clone()))
17913        .collect();
17914    while let Some((cur_parent, parent_row)) = work.pop() {
17915        for child_name in catalog.table_names() {
17916            let child = catalog
17917                .get(&child_name)
17918                .expect("table_names → catalog.get round-trip is total");
17919            for fk in &child.schema().foreign_keys {
17920                if fk.parent_table != cur_parent {
17921                    continue;
17922                }
17923                let parent_key: Vec<&Value> = fk
17924                    .parent_columns
17925                    .iter()
17926                    .map(|&pi| &parent_row[pi])
17927                    .collect();
17928                if parent_key.iter().any(|v| matches!(v, Value::Null)) {
17929                    continue;
17930                }
17931                for (child_row_idx, child_row) in child.rows().iter().enumerate() {
17932                    if child_name == cur_parent
17933                        && visited.contains(&(child_name.clone(), child_row_idx))
17934                    {
17935                        continue;
17936                    }
17937                    let matches_key = fk
17938                        .local_columns
17939                        .iter()
17940                        .enumerate()
17941                        .all(|(i, &li)| child_row.values.get(li) == Some(parent_key[i]));
17942                    if !matches_key {
17943                        continue;
17944                    }
17945                    match fk.on_delete {
17946                        spg_storage::FkAction::Restrict | spg_storage::FkAction::NoAction => {
17947                            return Err(EngineError::Unsupported(alloc::format!(
17948                                "FOREIGN KEY violation: DELETE on {cur_parent:?} is \
17949                                 restricted by FK from {child_name:?}.{:?}",
17950                                fk.local_columns,
17951                            )));
17952                        }
17953                        spg_storage::FkAction::Cascade => {
17954                            if visited.insert((child_name.clone(), child_row_idx)) {
17955                                delete_plan
17956                                    .entry(child_name.clone())
17957                                    .or_default()
17958                                    .insert(child_row_idx);
17959                                work.push((child_name.clone(), child_row.values.clone()));
17960                            }
17961                        }
17962                        spg_storage::FkAction::SetNull => {
17963                            // Verify every local FK column is NULL-able.
17964                            for &li in &fk.local_columns {
17965                                let col = child.schema().columns.get(li).ok_or_else(|| {
17966                                    EngineError::Unsupported(alloc::format!(
17967                                        "FK local column {li} missing in {child_name:?}"
17968                                    ))
17969                                })?;
17970                                if !col.nullable {
17971                                    return Err(EngineError::Unsupported(alloc::format!(
17972                                        "FOREIGN KEY ON DELETE SET NULL: column \
17973                                         {child_name:?}.{:?} is NOT NULL — cannot SET NULL",
17974                                        col.name,
17975                                    )));
17976                                }
17977                            }
17978                            let entry = setnull_plan.entry(child_name.clone()).or_default();
17979                            for &li in &fk.local_columns {
17980                                entry.insert((child_row_idx, li));
17981                            }
17982                        }
17983                        spg_storage::FkAction::SetDefault => {
17984                            // Resolve the DEFAULT for every local FK col.
17985                            let entry = setdefault_plan.entry(child_name.clone()).or_default();
17986                            for &li in &fk.local_columns {
17987                                let col = child.schema().columns.get(li).ok_or_else(|| {
17988                                    EngineError::Unsupported(alloc::format!(
17989                                        "FK local column {li} missing in {child_name:?}"
17990                                    ))
17991                                })?;
17992                                let default = col.default.clone().ok_or_else(|| {
17993                                    EngineError::Unsupported(alloc::format!(
17994                                        "FOREIGN KEY ON DELETE SET DEFAULT: column \
17995                                         {child_name:?}.{:?} has no DEFAULT declared",
17996                                        col.name,
17997                                    ))
17998                                })?;
17999                                entry.insert((child_row_idx, li), default);
18000                            }
18001                        }
18002                    }
18003                }
18004            }
18005        }
18006    }
18007    // Flatten the three plans into the ordered `FkChildStep` list.
18008    // Deletes are applied last per child (after any null/default
18009    // re-writes on the same child) so a child row that's both
18010    // re-written and then cascade-deleted only ends up deleted —
18011    // but in v7.6.5 SetNull/Cascade never overlap on the same row
18012    // (a single FK chooses exactly one action), so the order is
18013    // mostly a precaution.
18014    let mut steps: Vec<FkChildStep> = Vec::new();
18015    for (child_table, entries) in setnull_plan {
18016        let (positions, columns): (Vec<usize>, Vec<usize>) = entries.into_iter().unzip();
18017        steps.push(FkChildStep {
18018            child_table,
18019            action: FkChildAction::SetNull { positions, columns },
18020        });
18021    }
18022    for (child_table, entries) in setdefault_plan {
18023        let mut positions = Vec::with_capacity(entries.len());
18024        let mut columns = Vec::with_capacity(entries.len());
18025        let mut defaults = Vec::with_capacity(entries.len());
18026        for ((p, c), v) in entries {
18027            positions.push(p);
18028            columns.push(c);
18029            defaults.push(v);
18030        }
18031        steps.push(FkChildStep {
18032            child_table,
18033            action: FkChildAction::SetDefault {
18034                positions,
18035                columns,
18036                defaults,
18037            },
18038        });
18039    }
18040    for (child_table, positions) in delete_plan {
18041        steps.push(FkChildStep {
18042            child_table,
18043            action: FkChildAction::Delete {
18044                positions: positions.into_iter().collect(),
18045            },
18046        });
18047    }
18048    Ok(steps)
18049}
18050
18051/// v7.6.6 — plan FK fallout for an UPDATE that mutates parent-side
18052/// PK/UNIQUE columns. Walks every other table whose FK references
18053/// `parent_table_name`; for each FK whose parent_columns overlap a
18054/// mutated column, decides the action by `fk.on_update`.
18055///
18056///   - RESTRICT / NoAction → error if any child references the OLD
18057///     value
18058///   - CASCADE → child FK columns get rewritten to the NEW parent
18059///     value (a SetNull-style update step with the new value)
18060///   - SetNull → child FK columns set to NULL
18061///   - SetDefault → child FK columns set to declared default
18062///
18063/// `plan_with_old` is `(row_position, old_values, new_values)` so
18064/// the planner can detect "did this row's parent key actually
18065/// change?" — only rows where at least one referenced parent
18066/// column moved trigger inbound work.
18067fn plan_fk_parent_updates(
18068    catalog: &Catalog,
18069    parent_table_name: &str,
18070    plan_with_old: &[(usize, Vec<Value>, Vec<Value>)],
18071) -> Result<Vec<FkChildStep>, EngineError> {
18072    use alloc::collections::BTreeMap;
18073    if plan_with_old.is_empty() {
18074        return Ok(Vec::new());
18075    }
18076    // For each child table we may touch, build per-child step
18077    // lists. UPDATE never deletes children — `delete_plan` stays
18078    // empty here but is kept structurally aligned with
18079    // `plan_fk_parent_deletions` for future use.
18080    let delete_plan: BTreeMap<String, alloc::collections::BTreeSet<usize>> = BTreeMap::new();
18081    let mut setnull_plan: BTreeMap<String, alloc::collections::BTreeSet<(usize, usize)>> =
18082        BTreeMap::new();
18083    let mut setdefault_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
18084    // Cascade-update plan: child_table → row_idx → col_idx → new_value
18085    let mut cascade_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
18086
18087    for child_name in catalog.table_names() {
18088        let child = catalog
18089            .get(&child_name)
18090            .expect("table_names → catalog.get total");
18091        for fk in &child.schema().foreign_keys {
18092            if fk.parent_table != parent_table_name {
18093                continue;
18094            }
18095            for (_pos, old_row, new_row) in plan_with_old {
18096                // Did any parent FK column change?
18097                let key_changed = fk
18098                    .parent_columns
18099                    .iter()
18100                    .any(|&pi| old_row.get(pi) != new_row.get(pi));
18101                if !key_changed {
18102                    continue;
18103                }
18104                // The OLD parent key — used to find referring children.
18105                let old_key: Vec<&Value> =
18106                    fk.parent_columns.iter().map(|&pi| &old_row[pi]).collect();
18107                if old_key.iter().any(|v| matches!(v, Value::Null)) {
18108                    // NULL parent has no children — skip.
18109                    continue;
18110                }
18111                let new_key: Vec<&Value> =
18112                    fk.parent_columns.iter().map(|&pi| &new_row[pi]).collect();
18113                for (child_row_idx, child_row) in child.rows().iter().enumerate() {
18114                    // Self-ref same-row updates: a row updating its
18115                    // own PK doesn't restrict itself.
18116                    if child_name == parent_table_name
18117                        && plan_with_old.iter().any(|(p, _, _)| *p == child_row_idx)
18118                    {
18119                        continue;
18120                    }
18121                    let matches_key = fk
18122                        .local_columns
18123                        .iter()
18124                        .enumerate()
18125                        .all(|(i, &li)| child_row.values.get(li) == Some(old_key[i]));
18126                    if !matches_key {
18127                        continue;
18128                    }
18129                    match fk.on_update {
18130                        spg_storage::FkAction::Restrict | spg_storage::FkAction::NoAction => {
18131                            return Err(EngineError::Unsupported(alloc::format!(
18132                                "FOREIGN KEY violation: UPDATE on {parent_table_name:?} PK is \
18133                                 restricted by FK from {child_name:?}.{:?}",
18134                                fk.local_columns,
18135                            )));
18136                        }
18137                        spg_storage::FkAction::Cascade => {
18138                            // Rewrite child FK columns to new key.
18139                            let entry = cascade_plan.entry(child_name.clone()).or_default();
18140                            for (i, &li) in fk.local_columns.iter().enumerate() {
18141                                entry.insert((child_row_idx, li), new_key[i].clone());
18142                            }
18143                        }
18144                        spg_storage::FkAction::SetNull => {
18145                            for &li in &fk.local_columns {
18146                                let col = child.schema().columns.get(li).ok_or_else(|| {
18147                                    EngineError::Unsupported(alloc::format!(
18148                                        "FK local column {li} missing in {child_name:?}"
18149                                    ))
18150                                })?;
18151                                if !col.nullable {
18152                                    return Err(EngineError::Unsupported(alloc::format!(
18153                                        "FOREIGN KEY ON UPDATE SET NULL: column \
18154                                         {child_name:?}.{:?} is NOT NULL",
18155                                        col.name,
18156                                    )));
18157                                }
18158                            }
18159                            let entry = setnull_plan.entry(child_name.clone()).or_default();
18160                            for &li in &fk.local_columns {
18161                                entry.insert((child_row_idx, li));
18162                            }
18163                        }
18164                        spg_storage::FkAction::SetDefault => {
18165                            let entry = setdefault_plan.entry(child_name.clone()).or_default();
18166                            for &li in &fk.local_columns {
18167                                let col = child.schema().columns.get(li).ok_or_else(|| {
18168                                    EngineError::Unsupported(alloc::format!(
18169                                        "FK local column {li} missing in {child_name:?}"
18170                                    ))
18171                                })?;
18172                                let default = col.default.clone().ok_or_else(|| {
18173                                    EngineError::Unsupported(alloc::format!(
18174                                        "FOREIGN KEY ON UPDATE SET DEFAULT: column \
18175                                         {child_name:?}.{:?} has no DEFAULT",
18176                                        col.name,
18177                                    ))
18178                                })?;
18179                                entry.insert((child_row_idx, li), default);
18180                            }
18181                        }
18182                    }
18183                }
18184            }
18185        }
18186    }
18187    // Flatten into FkChildStep list. UPDATE doesn't produce
18188    // DeleteSteps (CASCADE on UPDATE just rewrites FK values).
18189    let mut steps: Vec<FkChildStep> = Vec::new();
18190    for (child_table, entries) in cascade_plan {
18191        let mut positions = Vec::with_capacity(entries.len());
18192        let mut columns = Vec::with_capacity(entries.len());
18193        let mut defaults = Vec::with_capacity(entries.len());
18194        for ((p, c), v) in entries {
18195            positions.push(p);
18196            columns.push(c);
18197            defaults.push(v);
18198        }
18199        // We reuse `FkChildAction::SetDefault` for cascade-update:
18200        // both shapes are "write a known value into specific cells"
18201        // — `apply_per_cell_writes` doesn't care whether the value
18202        // came from a DEFAULT declaration or a new parent key.
18203        steps.push(FkChildStep {
18204            child_table,
18205            action: FkChildAction::SetDefault {
18206                positions,
18207                columns,
18208                defaults,
18209            },
18210        });
18211    }
18212    for (child_table, entries) in setnull_plan {
18213        let (positions, columns): (Vec<usize>, Vec<usize>) = entries.into_iter().unzip();
18214        steps.push(FkChildStep {
18215            child_table,
18216            action: FkChildAction::SetNull { positions, columns },
18217        });
18218    }
18219    for (child_table, entries) in setdefault_plan {
18220        let mut positions = Vec::with_capacity(entries.len());
18221        let mut columns = Vec::with_capacity(entries.len());
18222        let mut defaults = Vec::with_capacity(entries.len());
18223        for ((p, c), v) in entries {
18224            positions.push(p);
18225            columns.push(c);
18226            defaults.push(v);
18227        }
18228        steps.push(FkChildStep {
18229            child_table,
18230            action: FkChildAction::SetDefault {
18231                positions,
18232                columns,
18233                defaults,
18234            },
18235        });
18236    }
18237    let _ = delete_plan; // UPDATE never deletes children.
18238    Ok(steps)
18239}
18240
18241/// v7.6.5 — apply one FK child step to the catalog. Encapsulates
18242/// the three action variants so the DELETE executor stays a
18243/// simple loop over the planned steps.
18244fn apply_fk_child_step(catalog: &mut Catalog, step: &FkChildStep) -> Result<(), EngineError> {
18245    let child = catalog.get_mut(&step.child_table).ok_or_else(|| {
18246        EngineError::Storage(StorageError::TableNotFound {
18247            name: step.child_table.clone(),
18248        })
18249    })?;
18250    match &step.action {
18251        FkChildAction::Delete { positions } => {
18252            let _ = child.delete_rows(positions);
18253        }
18254        FkChildAction::SetNull { positions, columns } => {
18255            apply_per_cell_writes(child, positions, columns, |_| Value::Null)?;
18256        }
18257        FkChildAction::SetDefault {
18258            positions,
18259            columns,
18260            defaults,
18261        } => {
18262            apply_per_cell_writes(child, positions, columns, |i| defaults[i].clone())?;
18263        }
18264    }
18265    Ok(())
18266}
18267
18268/// v7.6.5 — write new values into selected child cells via
18269/// `Table::update_row` (the catalog's existing UPDATE entry).
18270/// Groups writes by row position so multi-column updates on the
18271/// same row only call `update_row` once. `value_for(i)` produces
18272/// the new value for the i-th (position, column) entry.
18273fn apply_per_cell_writes(
18274    child: &mut spg_storage::Table,
18275    positions: &[usize],
18276    columns: &[usize],
18277    mut value_for: impl FnMut(usize) -> Value,
18278) -> Result<(), EngineError> {
18279    use alloc::collections::BTreeMap;
18280    let mut by_row: BTreeMap<usize, Vec<(usize, Value)>> = BTreeMap::new();
18281    for i in 0..positions.len() {
18282        by_row
18283            .entry(positions[i])
18284            .or_default()
18285            .push((columns[i], value_for(i)));
18286    }
18287    for (pos, mutations) in by_row {
18288        let mut new_values = child.rows()[pos].values.clone();
18289        for (col, v) in mutations {
18290            if let Some(slot) = new_values.get_mut(col) {
18291                *slot = v;
18292            }
18293        }
18294        child
18295            .update_row(pos, new_values)
18296            .map_err(EngineError::Storage)?;
18297    }
18298    Ok(())
18299}
18300
18301fn fk_action_sql_to_storage(a: spg_sql::ast::FkAction) -> spg_storage::FkAction {
18302    match a {
18303        spg_sql::ast::FkAction::Restrict => spg_storage::FkAction::Restrict,
18304        spg_sql::ast::FkAction::Cascade => spg_storage::FkAction::Cascade,
18305        spg_sql::ast::FkAction::SetNull => spg_storage::FkAction::SetNull,
18306        spg_sql::ast::FkAction::SetDefault => spg_storage::FkAction::SetDefault,
18307        spg_sql::ast::FkAction::NoAction => spg_storage::FkAction::NoAction,
18308    }
18309}
18310
18311/// v7.9.21 — resolve a column's DEFAULT for INSERT-time
18312/// default-fill. Free fn (rather than `&self`) so callers
18313/// with an active `&mut Table` borrow can still use it.
18314/// Literal defaults take the cached path (`col.default`);
18315/// runtime defaults hit `clock_fn` at each call. mailrs G4.
18316fn resolve_column_default_free(
18317    col: &ColumnSchema,
18318    clock_fn: Option<ClockFn>,
18319) -> Result<Value, EngineError> {
18320    if let Some(rt) = &col.runtime_default {
18321        return eval_runtime_default_free(rt, col.ty, clock_fn);
18322    }
18323    Ok(col.default.clone().unwrap_or(Value::Null))
18324}
18325
18326fn eval_runtime_default_free(
18327    rt: &str,
18328    ty: DataType,
18329    clock_fn: Option<ClockFn>,
18330) -> Result<Value, EngineError> {
18331    let s = rt.trim().to_ascii_lowercase();
18332    // v7.17.0 Phase 2.1 — also strip `(N)` precision suffix
18333    // so MySQL `CURRENT_TIMESTAMP(6)` resolves the same as
18334    // bare `CURRENT_TIMESTAMP`. SPG stores TIMESTAMP at fixed
18335    // microsecond resolution; the precision modifier is
18336    // parser-only.
18337    let with_no_parens = s.trim_end_matches("()");
18338    let canonical: &str = if let Some(open_idx) = with_no_parens.find('(') {
18339        if with_no_parens.ends_with(')') {
18340            &with_no_parens[..open_idx]
18341        } else {
18342            with_no_parens
18343        }
18344    } else {
18345        with_no_parens
18346    };
18347    let now_us = match clock_fn {
18348        Some(f) => f(),
18349        None => 0,
18350    };
18351    let v = match canonical {
18352        "now" | "current_timestamp" | "localtimestamp" => Value::Timestamp(now_us),
18353        "current_date" => Value::Date((now_us / 86_400_000_000) as i32),
18354        "current_time" | "localtime" => Value::Timestamp(now_us),
18355        // v7.17.0 — UUID generators in DEFAULT clauses. Required
18356        // for the canonical Django / Rails / Hibernate `id UUID
18357        // PRIMARY KEY DEFAULT gen_random_uuid()` pattern. Each
18358        // INSERT evaluates the function fresh; the per-row UUID
18359        // is the storage value, not a cached literal.
18360        "gen_random_uuid" | "uuid_generate_v4" => Value::Uuid(eval::gen_random_uuid_bytes()),
18361        other => {
18362            return Err(EngineError::Unsupported(alloc::format!(
18363                "runtime DEFAULT expression {other:?} not supported \
18364                 (v7.17.0 whitelist: now() / current_timestamp / \
18365                 current_date / current_time / localtimestamp / \
18366                 localtime / gen_random_uuid() / \
18367                 uuid_generate_v4())"
18368            )));
18369        }
18370    };
18371    coerce_value(v, ty, "DEFAULT", 0)
18372}
18373
18374/// v7.9.21 — true when a DEFAULT expression needs INSERT-time
18375/// evaluation rather than being cacheable as a literal Value.
18376/// FunctionCall is the immediate case (`now()`,
18377/// `current_timestamp`). Literal expressions and simple sign-
18378/// flipped numerics still take the static-cache path.
18379fn is_runtime_default_expr(expr: &Expr) -> bool {
18380    match expr {
18381        Expr::FunctionCall { .. } => true,
18382        Expr::Unary { expr, .. } => is_runtime_default_expr(expr),
18383        _ => false,
18384    }
18385}
18386
18387/// v7.17.0 Phase 1.4 — INSERT/UPDATE-time enum label check. When
18388/// `col_idx` has a registered label list, the cell value must be
18389/// NULL or one of the labels (case-sensitive per PG).
18390/// v7.17.0 Phase 3.P0-37 — validate + canonicalise a MySQL inline
18391/// SET cell. For non-SET columns this is a no-op pass-through.
18392///
18393/// Semantics:
18394///   * NULL preserved.
18395///   * Empty string → `''` (zero flags).
18396///   * Otherwise split on ',', trim each token, validate every
18397///     token against the column's variant list (error on miss),
18398///     de-dup, then re-emit in DEFINITION order joined by ','.
18399fn canonicalize_set_value(
18400    lookup: &alloc::collections::BTreeMap<usize, Vec<String>>,
18401    col_idx: usize,
18402    col_name: &str,
18403    value: Value,
18404) -> Result<Value, EngineError> {
18405    let Some(variants) = lookup.get(&col_idx) else {
18406        return Ok(value);
18407    };
18408    match value {
18409        Value::Null => Ok(Value::Null),
18410        Value::Text(s) => {
18411            if s.is_empty() {
18412                return Ok(Value::Text(alloc::string::String::new()));
18413            }
18414            // Collect a presence-set of variant indices to keep
18415            // definition order + handle de-dup in one pass.
18416            let mut present = alloc::vec![false; variants.len()];
18417            for raw in s.split(',') {
18418                let tok = raw.trim();
18419                if tok.is_empty() {
18420                    continue;
18421                }
18422                let idx = variants.iter().position(|v| v == tok).ok_or_else(|| {
18423                    EngineError::Unsupported(alloc::format!(
18424                        "column {col_name:?}: invalid SET token {tok:?}; \
18425                         allowed: {variants:?}"
18426                    ))
18427                })?;
18428                present[idx] = true;
18429            }
18430            // Re-emit in definition order.
18431            let mut out = alloc::string::String::new();
18432            let mut first = true;
18433            for (i, keep) in present.iter().enumerate() {
18434                if !keep {
18435                    continue;
18436                }
18437                if !first {
18438                    out.push(',');
18439                }
18440                first = false;
18441                out.push_str(&variants[i]);
18442            }
18443            Ok(Value::Text(out))
18444        }
18445        other => Err(EngineError::Unsupported(alloc::format!(
18446            "column {col_name:?}: SET-typed column expects TEXT, got {:?}",
18447            other.data_type()
18448        ))),
18449    }
18450}
18451
18452fn enforce_enum_label(
18453    lookup: &alloc::collections::BTreeMap<usize, Vec<String>>,
18454    col_idx: usize,
18455    col_name: &str,
18456    value: &Value,
18457) -> Result<(), EngineError> {
18458    if let Some(labels) = lookup.get(&col_idx) {
18459        match value {
18460            Value::Null => Ok(()),
18461            Value::Text(s) => {
18462                if labels.iter().any(|l| l == s) {
18463                    Ok(())
18464                } else {
18465                    Err(EngineError::Unsupported(alloc::format!(
18466                        "column {col_name:?}: invalid enum label {s:?}; allowed: {labels:?}"
18467                    )))
18468                }
18469            }
18470            other => Err(EngineError::Unsupported(alloc::format!(
18471                "column {col_name:?}: enum-typed column expects TEXT, got {:?}",
18472                other.data_type()
18473            ))),
18474        }
18475    } else {
18476        Ok(())
18477    }
18478}
18479
18480fn column_def_to_schema(c: ColumnDef) -> Result<ColumnSchema, EngineError> {
18481    let ty = column_type_to_data_type(c.ty);
18482    let mut schema = ColumnSchema::new(c.name.clone(), ty, c.nullable);
18483    // user_type_ref is the raw ident the parser couldn't resolve
18484    // to a built-in; classification into enum vs domain happens
18485    // at exec_create_table where we have catalog access. We
18486    // park it temporarily as user_enum_type and the engine
18487    // promotes domain bindings to user_domain_type before the
18488    // table is stored.
18489    if let Some(name) = c.user_type_ref {
18490        schema.user_enum_type = Some(name);
18491    }
18492    // v7.17.0 Phase 2.1 — render the ON UPDATE expression to
18493    // canonical text (the engine re-parses at UPDATE time).
18494    if let Some(expr) = c.on_update_runtime {
18495        schema.on_update_runtime = Some(alloc::format!("{expr}"));
18496    }
18497    // v7.17.0 Phase 2.5 — bridge the AST `Collation` enum to the
18498    // storage one. Same variants, different crates (spg-storage
18499    // owns no dep on spg-sql).
18500    schema.collation = match c.collation {
18501        spg_sql::ast::Collation::Binary => spg_storage::Collation::Binary,
18502        spg_sql::ast::Collation::CaseInsensitive => spg_storage::Collation::CaseInsensitive,
18503    };
18504    // v7.17.0 Phase 4.4 — MySQL `UNSIGNED` flag propagates to
18505    // storage so engine INSERT / UPDATE can range-check.
18506    schema.is_unsigned = c.is_unsigned;
18507    // v7.17.0 Phase 3.P0-36 — MySQL inline ENUM variant list.
18508    // INSERT validation lives in coerce_value (Text → Text path
18509    // with the column's variant list as the accept-set).
18510    schema.inline_enum_variants = c.inline_enum_variants;
18511    // v7.17.0 Phase 3.P0-37 — MySQL inline SET variant list.
18512    // INSERT canonicalisation (de-dup + sort by definition order)
18513    // lives in the exec_insert path next to the ENUM check.
18514    schema.inline_set_variants = c.inline_set_variants;
18515    if let Some(default_expr) = c.default {
18516        // v7.9.21 — distinguish literal defaults (evaluated once
18517        // at CREATE TABLE) from expression defaults (deferred to
18518        // INSERT). Function calls (`now()`, `current_timestamp`
18519        // — see v7.9.20 keyword promotion) take the runtime path.
18520        // Literals continue to cache. mailrs G4.
18521        if is_runtime_default_expr(&default_expr) {
18522            let display = alloc::format!("{default_expr}");
18523            schema = schema.with_runtime_default(display);
18524        } else {
18525            let raw = literal_expr_to_value(default_expr)?;
18526            let coerced = coerce_value(raw, ty, &c.name, 0)?;
18527            schema = schema.with_default(coerced);
18528        }
18529    }
18530    if c.auto_increment {
18531        // AUTO_INCREMENT only makes sense on integer-shaped columns.
18532        if !matches!(ty, DataType::SmallInt | DataType::Int | DataType::BigInt) {
18533            return Err(EngineError::Unsupported(alloc::format!(
18534                "AUTO_INCREMENT requires an integer column type, got {ty:?}"
18535            )));
18536        }
18537        schema = schema.with_auto_increment();
18538    }
18539    Ok(schema)
18540}
18541
18542/// v7.10.4 — decode a BYTEA literal. Accepts:
18543///   * `\xDEADBEEF` (case-insensitive hex; whitespace stripped)
18544///   * `Hello\000world` (backslash escape form; `\\` for literal backslash)
18545///   * Anything else → raw UTF-8 bytes of the input (PG accepts this too).
18546fn decode_bytea_literal(s: &str) -> Result<alloc::vec::Vec<u8>, &'static str> {
18547    let s = s.trim();
18548    if let Some(hex) = s.strip_prefix("\\x").or_else(|| s.strip_prefix("\\X")) {
18549        // Hex form. Each pair of hex digits → one byte.
18550        let cleaned: alloc::string::String = hex.chars().filter(|c| !c.is_whitespace()).collect();
18551        if cleaned.len() % 2 != 0 {
18552            return Err("odd-length hex literal");
18553        }
18554        let mut out = alloc::vec::Vec::with_capacity(cleaned.len() / 2);
18555        let cleaned_bytes = cleaned.as_bytes();
18556        for i in (0..cleaned_bytes.len()).step_by(2) {
18557            let hi = hex_nibble(cleaned_bytes[i])?;
18558            let lo = hex_nibble(cleaned_bytes[i + 1])?;
18559            out.push((hi << 4) | lo);
18560        }
18561        return Ok(out);
18562    }
18563    // Escape form or raw. Walk char-by-char; `\\` and `\NNN` octal
18564    // sequences decode; anything else is a literal byte.
18565    let bytes = s.as_bytes();
18566    let mut out = alloc::vec::Vec::with_capacity(bytes.len());
18567    let mut i = 0;
18568    while i < bytes.len() {
18569        let b = bytes[i];
18570        if b == b'\\' && i + 1 < bytes.len() {
18571            let n = bytes[i + 1];
18572            if n == b'\\' {
18573                out.push(b'\\');
18574                i += 2;
18575                continue;
18576            }
18577            if n.is_ascii_digit()
18578                && i + 3 < bytes.len()
18579                && bytes[i + 2].is_ascii_digit()
18580                && bytes[i + 3].is_ascii_digit()
18581            {
18582                let oct = |x: u8| (x - b'0') as u32;
18583                let v = oct(n) * 64 + oct(bytes[i + 2]) * 8 + oct(bytes[i + 3]);
18584                if v <= 0xFF {
18585                    out.push(v as u8);
18586                    i += 4;
18587                    continue;
18588                }
18589            }
18590        }
18591        out.push(b);
18592        i += 1;
18593    }
18594    Ok(out)
18595}
18596
18597fn hex_nibble(b: u8) -> Result<u8, &'static str> {
18598    match b {
18599        b'0'..=b'9' => Ok(b - b'0'),
18600        b'a'..=b'f' => Ok(b - b'a' + 10),
18601        b'A'..=b'F' => Ok(b - b'A' + 10),
18602        _ => Err("invalid hex digit"),
18603    }
18604}
18605
18606/// v7.10.11 — decode a PG TEXT[] external array form
18607/// (`{a,b,NULL}` with optional double-quoted elements). The
18608/// engine takes a leading/trailing `{`/`}` and splits at commas.
18609/// Quoted elements (`"hello, world"`) preserve embedded commas;
18610/// `\\` and `\"` decode to literal backslash / quote. Plain
18611/// unquoted `NULL` (case-insensitive) maps to `None`.
18612/// v7.11.13 — pick the array type for `ARRAY[lit, …]` from the
18613/// element values. Single-element-type rules:
18614///   - all NULL / all Text → TextArray
18615///   - all Int (or Int+NULL) → IntArray
18616///   - any BigInt without Text → BigIntArray (widening)
18617///   - any Text → TextArray (fallback; non-string elements
18618///     render as text)
18619fn array_literal_widen(items: alloc::vec::Vec<Value>) -> Value {
18620    let mut has_text = false;
18621    let mut has_bigint = false;
18622    let mut has_int = false;
18623    for v in &items {
18624        match v {
18625            Value::Null => {}
18626            Value::Text(_) | Value::Json(_) => has_text = true,
18627            Value::BigInt(_) => has_bigint = true,
18628            Value::Int(_) | Value::SmallInt(_) => has_int = true,
18629            _ => has_text = true,
18630        }
18631    }
18632    if has_text || (!has_bigint && !has_int) {
18633        let out: alloc::vec::Vec<Option<alloc::string::String>> = items
18634            .into_iter()
18635            .map(|v| match v {
18636                Value::Null => None,
18637                Value::Text(s) | Value::Json(s) => Some(s),
18638                other => Some(alloc::format!("{other:?}")),
18639            })
18640            .collect();
18641        return Value::TextArray(out);
18642    }
18643    if has_bigint {
18644        let out: alloc::vec::Vec<Option<i64>> = items
18645            .into_iter()
18646            .map(|v| match v {
18647                Value::Null => None,
18648                Value::Int(n) => Some(i64::from(n)),
18649                Value::SmallInt(n) => Some(i64::from(n)),
18650                Value::BigInt(n) => Some(n),
18651                _ => unreachable!("widen: unexpected non-integer in BigInt path"),
18652            })
18653            .collect();
18654        return Value::BigIntArray(out);
18655    }
18656    let out: alloc::vec::Vec<Option<i32>> = items
18657        .into_iter()
18658        .map(|v| match v {
18659            Value::Null => None,
18660            Value::Int(n) => Some(n),
18661            Value::SmallInt(n) => Some(i32::from(n)),
18662            _ => unreachable!("widen: unexpected non-i32-compatible in Int path"),
18663        })
18664        .collect();
18665    Value::IntArray(out)
18666}
18667
18668fn decode_text_array_literal(
18669    s: &str,
18670) -> Result<alloc::vec::Vec<Option<alloc::string::String>>, &'static str> {
18671    let trimmed = s.trim();
18672    let inner = trimmed
18673        .strip_prefix('{')
18674        .and_then(|x| x.strip_suffix('}'))
18675        .ok_or("TEXT[] literal must be enclosed in '{...}'")?;
18676    let mut out: alloc::vec::Vec<Option<alloc::string::String>> = alloc::vec::Vec::new();
18677    if inner.trim().is_empty() {
18678        return Ok(out);
18679    }
18680    let bytes = inner.as_bytes();
18681    let mut i = 0;
18682    while i <= bytes.len() {
18683        // Skip leading whitespace.
18684        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
18685            i += 1;
18686        }
18687        // Quoted element.
18688        if i < bytes.len() && bytes[i] == b'"' {
18689            i += 1; // open quote
18690            let mut buf = alloc::string::String::new();
18691            while i < bytes.len() && bytes[i] != b'"' {
18692                if bytes[i] == b'\\' && i + 1 < bytes.len() {
18693                    buf.push(bytes[i + 1] as char);
18694                    i += 2;
18695                } else {
18696                    buf.push(bytes[i] as char);
18697                    i += 1;
18698                }
18699            }
18700            if i >= bytes.len() {
18701                return Err("unterminated quoted element");
18702            }
18703            i += 1; // close quote
18704            out.push(Some(buf));
18705        } else {
18706            // Unquoted element — read until next comma or end.
18707            let start = i;
18708            while i < bytes.len() && bytes[i] != b',' {
18709                i += 1;
18710            }
18711            let raw = inner[start..i].trim();
18712            if raw.eq_ignore_ascii_case("NULL") {
18713                out.push(None);
18714            } else {
18715                out.push(Some(alloc::string::ToString::to_string(raw)));
18716            }
18717        }
18718        // Skip whitespace, expect comma or end.
18719        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
18720            i += 1;
18721        }
18722        if i >= bytes.len() {
18723            break;
18724        }
18725        if bytes[i] != b',' {
18726            return Err("expected ',' between TEXT[] elements");
18727        }
18728        i += 1;
18729    }
18730    Ok(out)
18731}
18732
18733/// v7.10.11 — encode a TEXT[] back into the PG external array
18734/// form. NULL elements become the literal `NULL`; elements
18735/// containing commas, quotes, backslashes, or braces are
18736/// double-quoted with `\\` / `\"` escapes.
18737fn encode_text_array(items: &[Option<alloc::string::String>]) -> alloc::string::String {
18738    let mut out = alloc::string::String::with_capacity(2 + items.len() * 8);
18739    out.push('{');
18740    for (i, item) in items.iter().enumerate() {
18741        if i > 0 {
18742            out.push(',');
18743        }
18744        match item {
18745            None => out.push_str("NULL"),
18746            Some(s) => {
18747                let needs_quote = s.is_empty()
18748                    || s.eq_ignore_ascii_case("NULL")
18749                    || s.chars()
18750                        .any(|c| matches!(c, ',' | '{' | '}' | '"' | '\\' | ' ' | '\t'));
18751                if needs_quote {
18752                    out.push('"');
18753                    for c in s.chars() {
18754                        if c == '"' || c == '\\' {
18755                            out.push('\\');
18756                        }
18757                        out.push(c);
18758                    }
18759                    out.push('"');
18760                } else {
18761                    out.push_str(s);
18762                }
18763            }
18764        }
18765    }
18766    out.push('}');
18767    out
18768}
18769
18770/// v7.10.4 — encode BYTEA bytes in PG hex output format
18771/// (`\x` prefix, lowercase hex pairs). Used by Text-side
18772/// round-trip + the wire layer's text-mode encoder.
18773fn encode_bytea_hex(b: &[u8]) -> alloc::string::String {
18774    let mut out = alloc::string::String::with_capacity(2 + 2 * b.len());
18775    out.push_str("\\x");
18776    for byte in b {
18777        let hi = byte >> 4;
18778        let lo = byte & 0x0F;
18779        out.push(hex_digit(hi));
18780        out.push(hex_digit(lo));
18781    }
18782    out
18783}
18784
18785const fn hex_digit(n: u8) -> char {
18786    match n {
18787        0..=9 => (b'0' + n) as char,
18788        10..=15 => (b'a' + n - 10) as char,
18789        _ => '?',
18790    }
18791}
18792
18793/// v7.17.0 Phase 3.P0-39 — parse a PG `hstore` text literal into
18794/// a flat key→value map. Empty string → empty map. Duplicate
18795/// keys take last-write-wins (matches PG `hstore_in`).
18796///
18797/// Accepted shapes (minimal subset):
18798///   * `'a=>1, b=>2'`            — bareword keys/values
18799///   * `'"a"=>"1", "b"=>"2"'`    — quoted keys/values
18800///   * `'a=>NULL'`               — case-insensitive NULL token
18801///     surfaces as `None` (no quotes around NULL)
18802///
18803/// Returns None on parse failure → caller surfaces as hard error.
18804fn parse_hstore_str(
18805    s: &str,
18806) -> Option<Vec<(alloc::string::String, Option<alloc::string::String>)>> {
18807    let bytes = s.as_bytes();
18808    let mut i = 0;
18809    let mut out: Vec<(alloc::string::String, Option<alloc::string::String>)> = Vec::new();
18810    let skip_ws = |bytes: &[u8], i: &mut usize| {
18811        while *i < bytes.len() && matches!(bytes[*i], b' ' | b'\t' | b'\n' | b'\r') {
18812            *i += 1;
18813        }
18814    };
18815    let parse_token = |bytes: &[u8], i: &mut usize| -> Option<alloc::string::String> {
18816        if *i >= bytes.len() {
18817            return None;
18818        }
18819        if bytes[*i] == b'"' {
18820            *i += 1;
18821            let mut out = alloc::string::String::new();
18822            while *i < bytes.len() {
18823                match bytes[*i] {
18824                    b'"' => {
18825                        *i += 1;
18826                        return Some(out);
18827                    }
18828                    b'\\' if *i + 1 < bytes.len() => {
18829                        out.push(bytes[*i + 1] as char);
18830                        *i += 2;
18831                    }
18832                    c => {
18833                        out.push(c as char);
18834                        *i += 1;
18835                    }
18836                }
18837            }
18838            None
18839        } else {
18840            let start = *i;
18841            while *i < bytes.len()
18842                && !matches!(bytes[*i], b' ' | b'\t' | b'\n' | b'\r' | b',' | b'=')
18843            {
18844                *i += 1;
18845            }
18846            if *i == start {
18847                return None;
18848            }
18849            Some(alloc::str::from_utf8(&bytes[start..*i]).ok()?.to_string())
18850        }
18851    };
18852    skip_ws(bytes, &mut i);
18853    while i < bytes.len() {
18854        let key = parse_token(bytes, &mut i)?;
18855        skip_ws(bytes, &mut i);
18856        if i + 1 >= bytes.len() || bytes[i] != b'=' || bytes[i + 1] != b'>' {
18857            return None;
18858        }
18859        i += 2;
18860        skip_ws(bytes, &mut i);
18861        // Check for unquoted NULL token (case-insensitive).
18862        let val_token = if i + 4 <= bytes.len()
18863            && bytes[i..i + 4].eq_ignore_ascii_case(b"NULL")
18864            && (i + 4 == bytes.len() || matches!(bytes[i + 4], b' ' | b'\t' | b',' | b'\n' | b'\r'))
18865        {
18866            i += 4;
18867            None
18868        } else {
18869            Some(parse_token(bytes, &mut i)?)
18870        };
18871        // Replace any existing entry with the same key (last-wins).
18872        if let Some(pos) = out.iter().position(|(k, _)| k == &key) {
18873            out[pos] = (key, val_token);
18874        } else {
18875            out.push((key, val_token));
18876        }
18877        skip_ws(bytes, &mut i);
18878        if i >= bytes.len() {
18879            break;
18880        }
18881        if bytes[i] == b',' {
18882            i += 1;
18883            skip_ws(bytes, &mut i);
18884            continue;
18885        }
18886        return None;
18887    }
18888    Some(out)
18889}
18890
18891/// v7.17.0 Phase 3.P0-39 — render a hstore as canonical PG text
18892/// form `"k"=>"v"` (keys and non-NULL values always quoted;
18893/// NULL token is bare).
18894fn format_hstore_str(
18895    pairs: &[(alloc::string::String, Option<alloc::string::String>)],
18896) -> alloc::string::String {
18897    let mut out = alloc::string::String::new();
18898    for (i, (k, v)) in pairs.iter().enumerate() {
18899        if i > 0 {
18900            out.push_str(", ");
18901        }
18902        out.push('"');
18903        out.push_str(k);
18904        out.push_str("\"=>");
18905        match v {
18906            None => out.push_str("NULL"),
18907            Some(val) => {
18908                out.push('"');
18909                out.push_str(val);
18910                out.push('"');
18911            }
18912        }
18913    }
18914    out
18915}
18916
18917/// v7.17.0 Phase 3.P0-39 — pub re-export so pgwire + sqllogictest
18918/// share the single hstore renderer.
18919pub fn format_hstore_text(
18920    pairs: &[(alloc::string::String, Option<alloc::string::String>)],
18921) -> alloc::string::String {
18922    format_hstore_str(pairs)
18923}
18924
18925// ─── v7.17.0 Phase 3.P0-40 — 2D array parse + display ─────────
18926
18927/// Split a PG external 2D-array literal `'{{a,b},{c,d}}'` into
18928/// per-row token lists. Returns Err on shape mismatch.
18929fn split_2d_literal(s: &str) -> Result<Vec<Vec<alloc::string::String>>, &'static str> {
18930    let s = s.trim();
18931    let outer = s
18932        .strip_prefix('{')
18933        .and_then(|x| x.strip_suffix('}'))
18934        .ok_or("missing outer '{...}' braces")?;
18935    let trimmed = outer.trim();
18936    if trimmed.is_empty() {
18937        return Ok(Vec::new());
18938    }
18939    let mut rows: Vec<Vec<alloc::string::String>> = Vec::new();
18940    let mut i = 0;
18941    let bytes = trimmed.as_bytes();
18942    while i < bytes.len() {
18943        while i < bytes.len() && matches!(bytes[i], b' ' | b'\t' | b'\n' | b'\r' | b',') {
18944            i += 1;
18945        }
18946        if i >= bytes.len() {
18947            break;
18948        }
18949        if bytes[i] != b'{' {
18950            return Err("expected '{' opening a row");
18951        }
18952        i += 1;
18953        let row_start = i;
18954        let mut depth = 1;
18955        while i < bytes.len() && depth > 0 {
18956            match bytes[i] {
18957                b'{' => depth += 1,
18958                b'}' => depth -= 1,
18959                _ => {}
18960            }
18961            if depth > 0 {
18962                i += 1;
18963            }
18964        }
18965        if depth != 0 {
18966            return Err("unbalanced '{...}' in row");
18967        }
18968        let row_text = &trimmed[row_start..i];
18969        i += 1;
18970        let cells: Vec<alloc::string::String> = if row_text.trim().is_empty() {
18971            Vec::new()
18972        } else {
18973            row_text.split(',').map(|t| t.trim().to_string()).collect()
18974        };
18975        rows.push(cells);
18976    }
18977    if let Some(first) = rows.first() {
18978        let cols = first.len();
18979        for r in &rows {
18980            if r.len() != cols {
18981                return Err("ragged 2D array (rows have different column counts)");
18982            }
18983        }
18984    }
18985    Ok(rows)
18986}
18987
18988fn parse_int_2d_literal(s: &str) -> Result<Vec<Vec<Option<i32>>>, &'static str> {
18989    let raw = split_2d_literal(s)?;
18990    raw.into_iter()
18991        .map(|row| {
18992            row.into_iter()
18993                .map(|cell| {
18994                    if cell.eq_ignore_ascii_case("NULL") {
18995                        Ok(None)
18996                    } else {
18997                        cell.parse::<i32>()
18998                            .map(Some)
18999                            .map_err(|_| "invalid int element")
19000                    }
19001                })
19002                .collect()
19003        })
19004        .collect()
19005}
19006
19007fn parse_bigint_2d_literal(s: &str) -> Result<Vec<Vec<Option<i64>>>, &'static str> {
19008    let raw = split_2d_literal(s)?;
19009    raw.into_iter()
19010        .map(|row| {
19011            row.into_iter()
19012                .map(|cell| {
19013                    if cell.eq_ignore_ascii_case("NULL") {
19014                        Ok(None)
19015                    } else {
19016                        cell.parse::<i64>()
19017                            .map(Some)
19018                            .map_err(|_| "invalid bigint element")
19019                    }
19020                })
19021                .collect()
19022        })
19023        .collect()
19024}
19025
19026fn parse_text_2d_literal(s: &str) -> Result<Vec<Vec<Option<alloc::string::String>>>, &'static str> {
19027    let raw = split_2d_literal(s)?;
19028    Ok(raw
19029        .into_iter()
19030        .map(|row| {
19031            row.into_iter()
19032                .map(|cell| {
19033                    if cell.eq_ignore_ascii_case("NULL") {
19034                        None
19035                    } else {
19036                        Some(cell.trim_matches('"').to_string())
19037                    }
19038                })
19039                .collect()
19040        })
19041        .collect())
19042}
19043
19044fn format_int_2d_text(rows: &[Vec<Option<i32>>]) -> alloc::string::String {
19045    let mut out = alloc::string::String::from("{");
19046    for (i, row) in rows.iter().enumerate() {
19047        if i > 0 {
19048            out.push(',');
19049        }
19050        out.push('{');
19051        for (j, cell) in row.iter().enumerate() {
19052            if j > 0 {
19053                out.push(',');
19054            }
19055            match cell {
19056                None => out.push_str("NULL"),
19057                Some(n) => out.push_str(&alloc::format!("{n}")),
19058            }
19059        }
19060        out.push('}');
19061    }
19062    out.push('}');
19063    out
19064}
19065
19066fn format_bigint_2d_text(rows: &[Vec<Option<i64>>]) -> alloc::string::String {
19067    let mut out = alloc::string::String::from("{");
19068    for (i, row) in rows.iter().enumerate() {
19069        if i > 0 {
19070            out.push(',');
19071        }
19072        out.push('{');
19073        for (j, cell) in row.iter().enumerate() {
19074            if j > 0 {
19075                out.push(',');
19076            }
19077            match cell {
19078                None => out.push_str("NULL"),
19079                Some(n) => out.push_str(&alloc::format!("{n}")),
19080            }
19081        }
19082        out.push('}');
19083    }
19084    out.push('}');
19085    out
19086}
19087
19088fn format_text_2d_text(rows: &[Vec<Option<alloc::string::String>>]) -> alloc::string::String {
19089    let mut out = alloc::string::String::from("{");
19090    for (i, row) in rows.iter().enumerate() {
19091        if i > 0 {
19092            out.push(',');
19093        }
19094        out.push('{');
19095        for (j, cell) in row.iter().enumerate() {
19096            if j > 0 {
19097                out.push(',');
19098            }
19099            match cell {
19100                None => out.push_str("NULL"),
19101                Some(s) => out.push_str(s),
19102            }
19103        }
19104        out.push('}');
19105    }
19106    out.push('}');
19107    out
19108}
19109
19110/// v7.17.0 Phase 3.P0-40 — pub re-exports so pgwire + sqllogictest
19111/// share the single 2D-array renderer.
19112pub fn format_int_2d_text_pub(rows: &[Vec<Option<i32>>]) -> alloc::string::String {
19113    format_int_2d_text(rows)
19114}
19115pub fn format_bigint_2d_text_pub(rows: &[Vec<Option<i64>>]) -> alloc::string::String {
19116    format_bigint_2d_text(rows)
19117}
19118pub fn format_text_2d_text_pub(
19119    rows: &[Vec<Option<alloc::string::String>>],
19120) -> alloc::string::String {
19121    format_text_2d_text(rows)
19122}
19123
19124/// v7.17.0 Phase 3.P0-38 — parse a PG range literal of the form
19125/// `'[lo,up)'` / `'(lo,up]'` / `'[lo,up]'` / `'(lo,up)'` /
19126/// `'empty'`. Lower / upper may be empty (unbounded). Returns
19127/// `None` on any parse failure; caller surfaces as hard error.
19128fn parse_range_str(s: &str, kind: spg_storage::RangeKind) -> Option<Value> {
19129    let s = s.trim();
19130    if s.eq_ignore_ascii_case("empty") {
19131        return Some(Value::Range {
19132            kind,
19133            lower: None,
19134            upper: None,
19135            lower_inc: false,
19136            upper_inc: false,
19137            empty: true,
19138        });
19139    }
19140    let bytes = s.as_bytes();
19141    if bytes.len() < 3 {
19142        return None;
19143    }
19144    let lower_inc = match bytes[0] {
19145        b'[' => true,
19146        b'(' => false,
19147        _ => return None,
19148    };
19149    let upper_inc = match bytes[bytes.len() - 1] {
19150        b']' => true,
19151        b')' => false,
19152        _ => return None,
19153    };
19154    let inner = &s[1..s.len() - 1];
19155    let (lo_text, up_text) = inner.split_once(',')?;
19156    let lower = if lo_text.is_empty() {
19157        None
19158    } else {
19159        Some(alloc::boxed::Box::new(parse_range_element(lo_text, kind)?))
19160    };
19161    let upper = if up_text.is_empty() {
19162        None
19163    } else {
19164        Some(alloc::boxed::Box::new(parse_range_element(up_text, kind)?))
19165    };
19166    Some(Value::Range {
19167        kind,
19168        lower,
19169        upper,
19170        lower_inc,
19171        upper_inc,
19172        empty: false,
19173    })
19174}
19175
19176/// v7.17.0 Phase 3.P0-38 — parse a single range bound text into
19177/// the matching element Value for the RangeKind.
19178fn parse_range_element(text: &str, kind: spg_storage::RangeKind) -> Option<Value> {
19179    let text = text.trim().trim_matches('"');
19180    use spg_storage::RangeKind as K;
19181    match kind {
19182        K::Int4 => text.parse::<i32>().ok().map(Value::Int),
19183        K::Int8 => text.parse::<i64>().ok().map(Value::BigInt),
19184        K::Num => {
19185            // Reuse the Numeric parse via the engine's text-coercion
19186            // path; bail to None on failure.
19187            let dot = text.find('.');
19188            let scale: u8 = dot.map_or(0, |p| (text.len() - p - 1) as u8);
19189            let digits: alloc::string::String = text
19190                .chars()
19191                .filter(|c| *c == '-' || c.is_ascii_digit())
19192                .collect();
19193            let scaled: i128 = digits.parse().ok()?;
19194            Some(Value::Numeric { scaled, scale })
19195        }
19196        K::Ts | K::TsTz => {
19197            // Reuse the existing timestamp parse path. v7.17.0
19198            // expects `'YYYY-MM-DD HH:MM:SS[.ffffff]'` in range
19199            // bounds (TZ offset on TsTz is OOS for the initial
19200            // P0-38; ship plain Timestamp shape).
19201            crate::eval::parse_timestamp_literal(text).map(Value::Timestamp)
19202        }
19203        K::Date => crate::eval::parse_date_literal(text).map(Value::Date),
19204    }
19205}
19206
19207/// v7.17.0 Phase 3.P0-38 — render a Range value as its canonical
19208/// PG text form. Re-exported via [`format_range_text`] for use
19209/// from spg-server's pgwire layer.
19210pub fn format_range_text(v: &Value) -> alloc::string::String {
19211    format_range_str(v)
19212}
19213
19214fn format_range_str(v: &Value) -> alloc::string::String {
19215    let Value::Range {
19216        lower,
19217        upper,
19218        lower_inc,
19219        upper_inc,
19220        empty,
19221        ..
19222    } = v
19223    else {
19224        return alloc::string::String::new();
19225    };
19226    if *empty {
19227        return "empty".into();
19228    }
19229    let mut out = alloc::string::String::new();
19230    out.push(if *lower_inc { '[' } else { '(' });
19231    if let Some(l) = lower {
19232        out.push_str(&format_range_element(l));
19233    }
19234    out.push(',');
19235    if let Some(u) = upper {
19236        out.push_str(&format_range_element(u));
19237    }
19238    out.push(if *upper_inc { ']' } else { ')' });
19239    out
19240}
19241
19242fn format_range_element(v: &Value) -> alloc::string::String {
19243    match v {
19244        Value::Int(n) => alloc::format!("{n}"),
19245        Value::BigInt(n) => alloc::format!("{n}"),
19246        Value::Date(d) => crate::eval::format_date(*d),
19247        Value::Timestamp(t) => crate::eval::format_timestamp(*t),
19248        Value::Numeric { scaled, scale } => crate::eval::format_numeric(*scaled, *scale),
19249        other => alloc::format!("{other:?}"),
19250    }
19251}
19252
19253/// v7.17.0 Phase 3.P0-35 — parse a PG `money` literal into i64
19254/// cents. Accepts:
19255///   * Optional leading `-` (negative)
19256///   * Optional `$` prefix
19257///   * Integer portion with optional `,` thousands separators
19258///   * Optional `.` followed by 1-2 digits (cents); 1 digit
19259///     auto-pads to 2 (`.5` → 50 cents).
19260///
19261/// Returns None on any parse failure — caller surfaces as hard
19262/// SQL error.
19263fn parse_money_str(s: &str) -> Option<i64> {
19264    let s = s.trim();
19265    let (neg, rest) = match s.strip_prefix('-') {
19266        Some(r) => (true, r.trim_start()),
19267        None => (false, s),
19268    };
19269    let rest = rest.strip_prefix('$').unwrap_or(rest).trim_start();
19270    let (int_part, frac_part) = match rest.split_once('.') {
19271        Some((i, f)) => (i, Some(f)),
19272        None => (rest, None),
19273    };
19274    if int_part.is_empty() {
19275        return None;
19276    }
19277    // Validate + strip commas from the integer portion.
19278    let mut int_digits = alloc::string::String::with_capacity(int_part.len());
19279    for b in int_part.bytes() {
19280        match b {
19281            b',' => {}
19282            b'0'..=b'9' => int_digits.push(b as char),
19283            _ => return None,
19284        }
19285    }
19286    if int_digits.is_empty() {
19287        return None;
19288    }
19289    let dollars: i64 = int_digits.parse().ok()?;
19290    let cents: i64 = match frac_part {
19291        None => 0,
19292        Some(f) => {
19293            if f.is_empty() || f.len() > 2 || !f.bytes().all(|b| b.is_ascii_digit()) {
19294                return None;
19295            }
19296            let padded = if f.len() == 1 {
19297                alloc::format!("{f}0")
19298            } else {
19299                f.to_string()
19300            };
19301            padded.parse().ok()?
19302        }
19303    };
19304    let total = dollars.checked_mul(100)?.checked_add(cents)?;
19305    Some(if neg { -total } else { total })
19306}
19307
19308/// v7.17.0 Phase 3.P0-34 — parse a PG `timetz` literal
19309/// `HH:MM:SS[.fraction]±HH[:MM]` into (us, offset_secs).
19310///
19311/// The offset suffix is MANDATORY: SPG doesn't have a session TZ
19312/// wired into eval, so a bare `HH:MM:SS` literal would be
19313/// ambiguous. Returns None for any parse failure or out-of-range
19314/// component — caller surfaces as a hard SQL error.
19315///
19316/// Offset range: ±14 hours (±50400 seconds), matching PG's
19317/// internal limit.
19318fn parse_timetz_str(s: &str) -> Option<(i64, i32)> {
19319    let s = s.trim();
19320    // Find the offset sign — scan from right since the time part
19321    // never contains '+' / '-' (after the optional fractional dot
19322    // it's all digits and ':').
19323    let bytes = s.as_bytes();
19324    let sign_pos = bytes
19325        .iter()
19326        .enumerate()
19327        .rev()
19328        .find(|&(_, &b)| b == b'+' || b == b'-')
19329        .map(|(i, _)| i)?;
19330    if sign_pos == 0 {
19331        return None; // bare sign — no time component
19332    }
19333    let time_part = &s[..sign_pos];
19334    let offset_part = &s[sign_pos..];
19335    let us = parse_time_str(time_part)?;
19336    let sign: i32 = if offset_part.starts_with('+') { 1 } else { -1 };
19337    let offset_body = &offset_part[1..];
19338    let (hh_str, mm_str) = match offset_body.split_once(':') {
19339        Some((h, m)) => (h, m),
19340        None => (offset_body, "0"),
19341    };
19342    let hh: i32 = hh_str.parse().ok()?;
19343    let mm: i32 = mm_str.parse().ok()?;
19344    if !(0..=14).contains(&hh) || !(0..=59).contains(&mm) {
19345        return None;
19346    }
19347    let total = sign * (hh * 3600 + mm * 60);
19348    if total.abs() > 50_400 {
19349        return None;
19350    }
19351    Some((us, total))
19352}
19353
19354/// v7.17.0 Phase 3.P0-33 — funnel an integer literal through MySQL
19355/// YEAR range validation: 0 sentinel or 1901..=2155. Out-of-range
19356/// surfaces as a hard SQL error (no silent truncation, mirrors PG
19357/// `time_in` / `uuid_in` discipline).
19358fn coerce_int_to_year(n: i64, col_name: &str) -> Result<Value, EngineError> {
19359    if n == 0 || (1901..=2155).contains(&n) {
19360        // u16::try_from cannot fail in this range; the cast also
19361        // covers the 0 sentinel.
19362        return Ok(Value::Year(n as u16));
19363    }
19364    Err(EngineError::Eval(EvalError::TypeMismatch {
19365        detail: alloc::format!(
19366            "year value out of range: {n} (column `{col_name}`; \
19367             MySQL accepts 0 or 1901..=2155)"
19368        ),
19369    }))
19370}
19371
19372/// v7.17.0 Phase 3.P0-32 — parse a PG `time` literal
19373/// `HH:MM:SS[.fraction]` into microseconds since 00:00:00.
19374///
19375/// Accepts:
19376///   * `HH:MM:SS`            — exact-second precision
19377///   * `HH:MM:SS.f` .. `.ffffff` — 1-6 fractional digits, right-padded
19378///     with zeros to microseconds
19379///
19380/// Range: hour 0..=23, minute 0..=59, second 0..=59. Anything else
19381/// returns None — caller surfaces as a hard SQL error (no silent
19382/// truncation, matches PG's `time_in` behaviour).
19383fn parse_time_str(s: &str) -> Option<i64> {
19384    let s = s.trim();
19385    let (hms, frac) = match s.split_once('.') {
19386        Some((h, f)) => (h, Some(f)),
19387        None => (s, None),
19388    };
19389    let mut parts = hms.split(':');
19390    let hh: u32 = parts.next()?.parse().ok()?;
19391    let mm: u32 = parts.next()?.parse().ok()?;
19392    let ss: u32 = parts.next()?.parse().ok()?;
19393    if parts.next().is_some() {
19394        return None;
19395    }
19396    if hh > 23 || mm > 59 || ss > 59 {
19397        return None;
19398    }
19399    let frac_us: i64 = match frac {
19400        None => 0,
19401        Some(f) => {
19402            if f.is_empty() || f.len() > 6 || !f.bytes().all(|b| b.is_ascii_digit()) {
19403                return None;
19404            }
19405            // Right-pad with zeros so '.5' = 500000 µsec.
19406            let mut padded = alloc::string::String::with_capacity(6);
19407            padded.push_str(f);
19408            while padded.len() < 6 {
19409                padded.push('0');
19410            }
19411            padded.parse().ok()?
19412        }
19413    };
19414    Some(
19415        i64::from(hh) * 3_600_000_000
19416            + i64::from(mm) * 60_000_000
19417            + i64::from(ss) * 1_000_000
19418            + frac_us,
19419    )
19420}
19421
19422const fn column_type_to_data_type(t: ColumnTypeName) -> DataType {
19423    match t {
19424        ColumnTypeName::SmallInt => DataType::SmallInt,
19425        ColumnTypeName::Int => DataType::Int,
19426        ColumnTypeName::BigInt => DataType::BigInt,
19427        ColumnTypeName::Float => DataType::Float,
19428        ColumnTypeName::Text => DataType::Text,
19429        ColumnTypeName::Varchar(n) => DataType::Varchar(n),
19430        ColumnTypeName::Char(n) => DataType::Char(n),
19431        ColumnTypeName::Bool => DataType::Bool,
19432        ColumnTypeName::Vector { dim, encoding } => DataType::Vector {
19433            dim,
19434            encoding: match encoding {
19435                SqlVecEncoding::F32 => VecEncoding::F32,
19436                SqlVecEncoding::Sq8 => VecEncoding::Sq8,
19437                SqlVecEncoding::F16 => VecEncoding::F16,
19438            },
19439        },
19440        ColumnTypeName::Numeric(precision, scale) => DataType::Numeric { precision, scale },
19441        ColumnTypeName::Date => DataType::Date,
19442        ColumnTypeName::Timestamp => DataType::Timestamp,
19443        ColumnTypeName::Timestamptz => DataType::Timestamptz,
19444        ColumnTypeName::Json => DataType::Json,
19445        ColumnTypeName::Jsonb => DataType::Jsonb,
19446        ColumnTypeName::Bytes => DataType::Bytes,
19447        ColumnTypeName::TextArray => DataType::TextArray,
19448        ColumnTypeName::IntArray => DataType::IntArray,
19449        ColumnTypeName::BigIntArray => DataType::BigIntArray,
19450        ColumnTypeName::TsVector => DataType::TsVector,
19451        ColumnTypeName::TsQuery => DataType::TsQuery,
19452        ColumnTypeName::Uuid => DataType::Uuid,
19453        ColumnTypeName::Time => DataType::Time,
19454        ColumnTypeName::Year => DataType::Year,
19455        ColumnTypeName::TimeTz => DataType::TimeTz,
19456        ColumnTypeName::Money => DataType::Money,
19457        ColumnTypeName::Range(k) => DataType::Range(match k {
19458            spg_sql::ast::RangeKindAst::Int4 => spg_storage::RangeKind::Int4,
19459            spg_sql::ast::RangeKindAst::Int8 => spg_storage::RangeKind::Int8,
19460            spg_sql::ast::RangeKindAst::Num => spg_storage::RangeKind::Num,
19461            spg_sql::ast::RangeKindAst::Ts => spg_storage::RangeKind::Ts,
19462            spg_sql::ast::RangeKindAst::TsTz => spg_storage::RangeKind::TsTz,
19463            spg_sql::ast::RangeKindAst::Date => spg_storage::RangeKind::Date,
19464        }),
19465        ColumnTypeName::Hstore => DataType::Hstore,
19466        ColumnTypeName::IntArray2D => DataType::IntArray2D,
19467        ColumnTypeName::BigIntArray2D => DataType::BigIntArray2D,
19468        ColumnTypeName::TextArray2D => DataType::TextArray2D,
19469    }
19470}
19471
19472/// Convert an INSERT VALUES expression to a storage Value. Supports literal
19473/// expressions, unary-minus over numeric literals, and pgvector-style
19474/// `'[..]'::vector` cast (v1.2). Anything more complex returns `Unsupported`.
19475fn literal_expr_to_value(expr: Expr) -> Result<Value, EngineError> {
19476    match expr {
19477        Expr::Literal(l) => Ok(literal_to_value(l)),
19478        Expr::Cast { expr, target } => {
19479            let inner_value = literal_expr_to_value(*expr)?;
19480            crate::eval::cast_value(inner_value, target).map_err(EngineError::Eval)
19481        }
19482        Expr::Unary {
19483            op: UnOp::Neg,
19484            expr,
19485        } => match *expr {
19486            Expr::Literal(Literal::Integer(n)) => {
19487                // Fold to i32 if it fits, else BigInt. Parser emits Integer(i64)
19488                // — overflow on negate of i64::MIN is the one edge case.
19489                let neg = n.checked_neg().ok_or_else(|| {
19490                    EngineError::Unsupported("integer literal overflow on negation".into())
19491                })?;
19492                Ok(int_value_for(neg))
19493            }
19494            Expr::Literal(Literal::Float(x)) => Ok(Value::Float(-x)),
19495            other => Err(EngineError::Unsupported(alloc::format!(
19496                "unary minus over non-literal expression: {other:?}"
19497            ))),
19498        },
19499        // v7.10.10 — `ARRAY[lit, lit, …]` constructor accepted at
19500        // INSERT-time. Each element must reduce to a Value through
19501        // `literal_expr_to_value`; NULL elements become `None`.
19502        // v7.11.13 — deduce shape from element values: all Int →
19503        // IntArray; any BigInt → BigIntArray (widening); any Text
19504        // → TextArray. Cast targets (`ARRAY[]::INT[]`) flow through
19505        // the outer Cast arm before reaching here and re-coerce.
19506        Expr::Array(items) => {
19507            let mut materialised: alloc::vec::Vec<Value> =
19508                alloc::vec::Vec::with_capacity(items.len());
19509            for elem in items {
19510                materialised.push(literal_expr_to_value(elem)?);
19511            }
19512            Ok(array_literal_widen(materialised))
19513        }
19514        // Any other Expr shape — fall back to a general evaluation
19515        // against an empty row + empty schema. This unblocks the
19516        // app-common patterns where INSERT VALUES carries a
19517        // non-correlated function call:
19518        //   INSERT INTO t VALUES (concat('U-', 42))
19519        //   INSERT INTO t VALUES (now())
19520        //   INSERT INTO t VALUES (format('%s-%s', 'a', 'b'))
19521        // Any expression that references a column or `$N`
19522        // placeholder fails cleanly inside `eval_expr` with a
19523        // descriptive error; literals + casts + ARRAY[…] continue
19524        // to take the fast paths above so the hot INSERT path is
19525        // unchanged on the common case.
19526        other => {
19527            let empty_schema: alloc::vec::Vec<spg_storage::ColumnSchema> = alloc::vec::Vec::new();
19528            let ctx = EvalContext::new(&empty_schema, None);
19529            let empty_row = spg_storage::Row::new(alloc::vec::Vec::new());
19530            crate::eval::eval_expr(&other, &empty_row, &ctx).map_err(EngineError::Eval)
19531        }
19532    }
19533}
19534
19535fn literal_to_value(l: Literal) -> Value {
19536    match l {
19537        Literal::Integer(n) => int_value_for(n),
19538        Literal::Float(x) => Value::Float(x),
19539        Literal::String(s) => Value::Text(s),
19540        Literal::Bool(b) => Value::Bool(b),
19541        Literal::Null => Value::Null,
19542        Literal::Vector(v) => Value::Vector(v),
19543        Literal::TextArray(items) => Value::TextArray(items),
19544        Literal::IntArray(items) => Value::IntArray(items),
19545        Literal::BigIntArray(items) => Value::BigIntArray(items),
19546        Literal::Interval { months, micros, .. } => Value::Interval { months, micros },
19547    }
19548}
19549
19550/// Pick `Int` (`i32`) when the literal fits, else `BigInt`. `INT` vs `BIGINT`
19551/// columns will still enforce the right tag downstream — this is just the
19552/// default we synthesise from an unannotated integer literal.
19553fn int_value_for(n: i64) -> Value {
19554    if let Ok(small) = i32::try_from(n) {
19555        Value::Int(small)
19556    } else {
19557        Value::BigInt(n)
19558    }
19559}
19560
19561/// Widen / narrow `v` to fit `expected`. Numerics permit safe widening
19562/// (`Int → BigInt`, `Int/BigInt → Float`) and best-effort narrowing
19563/// (`BigInt → Int` succeeds only when the value fits in `i32`). Everything
19564/// else returns `TypeMismatch` carrying the column name for caller diagnostics.
19565/// `NULL` is always permitted; the nullability check happens later in storage.
19566#[allow(clippy::too_many_lines)]
19567/// v7.17.0 Phase 4.4 — reject negative integer values on UNSIGNED
19568/// columns. Called after `coerce_value` at each INSERT / UPDATE
19569/// site that has ColumnSchema context. NULL passes through (a
19570/// nullable UNSIGNED column can legitimately hold NULL).
19571fn check_unsigned_range(
19572    v: &Value,
19573    schema: &ColumnSchema,
19574    position: usize,
19575) -> Result<(), EngineError> {
19576    if !schema.is_unsigned {
19577        return Ok(());
19578    }
19579    let n = match v {
19580        Value::SmallInt(x) => i64::from(*x),
19581        Value::Int(x) => i64::from(*x),
19582        Value::BigInt(x) => *x,
19583        _ => return Ok(()), // non-integer cells (NULL, default) skip
19584    };
19585    if n < 0 {
19586        return Err(EngineError::Unsupported(alloc::format!(
19587            "column {:?} is UNSIGNED but got negative value {n} at position {position}",
19588            schema.name
19589        )));
19590    }
19591    Ok(())
19592}
19593
19594fn coerce_value(
19595    v: Value,
19596    expected: DataType,
19597    col_name: &str,
19598    position: usize,
19599) -> Result<Value, EngineError> {
19600    if v.is_null() {
19601        return Ok(Value::Null);
19602    }
19603    let actual = v.data_type().expect("non-null");
19604    if actual == expected {
19605        return Ok(v);
19606    }
19607    let coerced = match (v, expected) {
19608        (Value::Int(n), DataType::BigInt) => Some(Value::BigInt(i64::from(n))),
19609        (Value::Int(n), DataType::Float) => Some(Value::Float(f64::from(n))),
19610        (Value::Int(n), DataType::SmallInt) => i16::try_from(n).ok().map(Value::SmallInt),
19611        (Value::Int(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
19612            i128::from(n),
19613            precision,
19614            scale,
19615            col_name,
19616        )?),
19617        (Value::SmallInt(n), DataType::Int) => Some(Value::Int(i32::from(n))),
19618        (Value::SmallInt(n), DataType::BigInt) => Some(Value::BigInt(i64::from(n))),
19619        (Value::SmallInt(n), DataType::Float) => Some(Value::Float(f64::from(n))),
19620        (Value::SmallInt(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
19621            i128::from(n),
19622            precision,
19623            scale,
19624            col_name,
19625        )?),
19626        (Value::BigInt(n), DataType::Int) => i32::try_from(n).ok().map(Value::Int),
19627        (Value::BigInt(n), DataType::SmallInt) => i16::try_from(n).ok().map(Value::SmallInt),
19628        #[allow(clippy::cast_precision_loss)]
19629        (Value::BigInt(n), DataType::Float) => Some(Value::Float(n as f64)),
19630        (Value::BigInt(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
19631            i128::from(n),
19632            precision,
19633            scale,
19634            col_name,
19635        )?),
19636        (Value::Float(x), DataType::Numeric { precision, scale }) => {
19637            Some(numeric_from_float(x, precision, scale, col_name)?)
19638        }
19639        // v7.17.0 Phase 3.P0-67 — Text → NUMERIC. Parse a
19640        // canonical decimal text (`"-1234.56"` / `"42"` /
19641        // `"0.0001"`) into `(mantissa, source_scale)` and rescale
19642        // to the column's declared scale. Required for prepared
19643        // binds: `value_to_literal` flattens a Value::Numeric
19644        // into a TEXT literal because Literal carries no native
19645        // Numeric variant, so the placeholder substitution path
19646        // reaches coerce_value as Text → Numeric. Without this
19647        // arm the round-trip surfaces a TypeMismatch even though
19648        // the cell already left the engine as a valid Numeric.
19649        (Value::Text(s), DataType::Numeric { precision, scale }) => {
19650            let Some((mantissa, src_scale)) = parse_numeric_text(&s) else {
19651                return Err(EngineError::Eval(EvalError::TypeMismatch {
19652                    detail: alloc::format!("cannot parse {s:?} as NUMERIC for column `{col_name}`"),
19653                }));
19654            };
19655            Some(numeric_rescale(
19656                mantissa, src_scale, precision, scale, col_name,
19657            )?)
19658        }
19659        // Text → DATE / TIMESTAMP: parse canonical text forms.
19660        (Value::Text(s), DataType::Date) => {
19661            let d = eval::parse_date_literal(&s).ok_or_else(|| {
19662                EngineError::Eval(EvalError::TypeMismatch {
19663                    detail: alloc::format!("cannot parse {s:?} as DATE for column `{col_name}`"),
19664                })
19665            })?;
19666            Some(Value::Date(d))
19667        }
19668        // v7.14.0 — MySQL DEFAULT clauses quote integer / float
19669        // / boolean literals (`DEFAULT '0'`, `DEFAULT '1'`,
19670        // `DEFAULT '3.14'`, `DEFAULT 'true'`). Coerce the text
19671        // form to the column's numeric / bool type at DEFAULT-
19672        // installation time so the storage check sees a typed
19673        // value. Parse failures fall through to TypeMismatch.
19674        (Value::Text(s), DataType::SmallInt) => s.parse::<i16>().ok().map(Value::SmallInt),
19675        (Value::Text(s), DataType::Int) => s.parse::<i32>().ok().map(Value::Int),
19676        (Value::Text(s), DataType::BigInt) => s.parse::<i64>().ok().map(Value::BigInt),
19677        (Value::Text(s), DataType::Float) => s.parse::<f64>().ok().map(Value::Float),
19678        (Value::Text(s), DataType::Bool) => match s.to_ascii_lowercase().as_str() {
19679            "0" | "false" | "f" | "no" | "off" => Some(Value::Bool(false)),
19680            "1" | "true" | "t" | "yes" | "on" => Some(Value::Bool(true)),
19681            _ => None,
19682        },
19683        // v7.17.0 Phase 3.P0-46 — MySQL TINYINT(1) (which Phase 4.3
19684        // classifies as DataType::Bool) is the storage shape every
19685        // mysqldump-restored boolean column lands in. mysqldump emits
19686        // the values as integer `0` / `1` literals, so int → bool
19687        // coerce on INSERT is required for a 0-change cutover. MySQL's
19688        // rule is "any non-zero is truthy"; we follow that for all
19689        // signed int widths so the same coerce path serves an
19690        // explicit `BOOLEAN` column too.
19691        (Value::Int(n), DataType::Bool) => Some(Value::Bool(n != 0)),
19692        (Value::SmallInt(n), DataType::Bool) => Some(Value::Bool(n != 0)),
19693        (Value::BigInt(n), DataType::Bool) => Some(Value::Bool(n != 0)),
19694        // v4.9: Text ↔ JSON coercion. No structural validation —
19695        // any text literal is accepted; the responsibility for
19696        // valid JSON lies with the producer.
19697        (Value::Text(s), DataType::Json | DataType::Jsonb) => Some(Value::Json(s)),
19698        (Value::Json(s), DataType::Text) => Some(Value::Text(s)),
19699        // v7.13.3 — mailrs round-7 S10. SPG's storage represents
19700        // both JSON and JSONB on-disk as `Value::Json(String)` —
19701        // they share the underlying text payload. The cast
19702        // `'<text>'::jsonb` produces a Value::Json that needs to
19703        // satisfy a DataType::Jsonb column. Identity coerce in
19704        // both directions so JSON ↔ JSONB assignments work at all
19705        // INSERT / ALTER COLUMN TYPE / DEFAULT contexts.
19706        (Value::Json(s), DataType::Jsonb | DataType::Json) => Some(Value::Json(s)),
19707        // v7.10.4 — Text → BYTEA. Decode PG-style literal forms:
19708        //   - Hex:    `\x48656c6c6f`  (case-insensitive hex pairs)
19709        //   - Escape: `Hello\\000world`  (backslash + octal triples)
19710        //   - Plain:  any string → raw UTF-8 bytes (PG also accepts)
19711        // Errors surface as TypeMismatch so the operator gets a
19712        // clear "this literal isn't a bytea literal" hint.
19713        (Value::Text(s), DataType::Bytes) => {
19714            let bytes = decode_bytea_literal(&s).map_err(|e| {
19715                EngineError::Eval(EvalError::TypeMismatch {
19716                    detail: alloc::format!(
19717                        "cannot parse {s:?} as BYTEA for column `{col_name}`: {e}"
19718                    ),
19719                })
19720            })?;
19721            Some(Value::Bytes(bytes))
19722        }
19723        // v7.10.4 — BYTEA → Text round-trip uses the PG hex
19724        // output (lowercase, `\x` prefix). Important when a
19725        // SELECT pulls a bytea cell through a Text column path.
19726        (Value::Bytes(b), DataType::Text) => Some(Value::Text(encode_bytea_hex(&b))),
19727        // v7.17.0 — Text → UUID. PG accepts canonical hyphenated,
19728        // unhyphenated, uppercase, and `{...}`-braced forms; we
19729        // funnel all four through `spg_storage::parse_uuid_str`.
19730        // A malformed literal surfaces as a SQL TypeMismatch
19731        // rather than silently inserting garbage — `0-change
19732        // cutover` requires that an app inserting bad UUID text
19733        // sees the same hard error PG would raise.
19734        (Value::Text(s), DataType::Uuid) => match spg_storage::parse_uuid_str(&s) {
19735            Some(b) => Some(Value::Uuid(b)),
19736            None => {
19737                return Err(EngineError::Eval(EvalError::TypeMismatch {
19738                    detail: alloc::format!(
19739                        "invalid input syntax for type uuid: {s:?} (column `{col_name}`)"
19740                    ),
19741                }));
19742            }
19743        },
19744        // v7.17.0 — UUID → Text canonical 8-4-4-4-12 lowercase.
19745        // Surfaces when a SELECT plucks a uuid cell through a
19746        // Text column path (e.g. INSERT INTO log SELECT id::text
19747        // FROM other_table).
19748        (Value::Uuid(b), DataType::Text) => Some(Value::Text(spg_storage::format_uuid(&b))),
19749        // v7.17.0 Phase 3.P0-32 — Text → TIME. Accepts
19750        // `HH:MM:SS` and `HH:MM:SS.ffffff` (1-6 fractional digits).
19751        // Out-of-range hour/min/sec is a hard SQL error (no
19752        // silent truncation — same 0-change-cutover discipline
19753        // we apply to UUID).
19754        (Value::Text(s), DataType::Time) => match parse_time_str(&s) {
19755            Some(us) => Some(Value::Time(us)),
19756            None => {
19757                return Err(EngineError::Eval(EvalError::TypeMismatch {
19758                    detail: alloc::format!(
19759                        "invalid input syntax for type time: {s:?} (column `{col_name}`)"
19760                    ),
19761                }));
19762            }
19763        },
19764        // v7.17.0 Phase 3.P0-32 — TIME → Text canonical `HH:MM:SS[.ffffff]`.
19765        (Value::Time(us), DataType::Text) => Some(Value::Text(eval::format_time(us))),
19766        // v7.17.0 Phase 3.P0-33 — int / bigint → YEAR. Range
19767        // check enforces the MySQL canonical 1901..=2155 + 0
19768        // sentinel; out-of-range is a hard SQL error (no silent
19769        // truncation, mirrors P0-32 / P0-25 discipline).
19770        (Value::SmallInt(n), DataType::Year) => Some(coerce_int_to_year(i64::from(n), col_name)?),
19771        (Value::Int(n), DataType::Year) => Some(coerce_int_to_year(i64::from(n), col_name)?),
19772        (Value::BigInt(n), DataType::Year) => Some(coerce_int_to_year(n, col_name)?),
19773        // Text → YEAR. Accepts the 4-digit decimal form only;
19774        // two-digit YEAR (`'99'` → 1999) was deprecated in MySQL
19775        // 5.7 and is out of scope for v7.17.0.
19776        (Value::Text(s), DataType::Year) => match s.trim().parse::<i64>() {
19777            Ok(n) => Some(coerce_int_to_year(n, col_name)?),
19778            Err(_) => {
19779                return Err(EngineError::Eval(EvalError::TypeMismatch {
19780                    detail: alloc::format!(
19781                        "invalid input syntax for type year: {s:?} (column `{col_name}`)"
19782                    ),
19783                }));
19784            }
19785        },
19786        // YEAR → Text 4-digit zero-padded.
19787        (Value::Year(y), DataType::Text) => Some(Value::Text(alloc::format!("{y:04}"))),
19788        // v7.17.0 Phase 3.P0-34 — Text → TIMETZ. Mandatory
19789        // signed offset suffix; missing offset is a hard error
19790        // (SPG has no session TZ wired into eval, unlike PG).
19791        (Value::Text(s), DataType::TimeTz) => match parse_timetz_str(&s) {
19792            Some((us, offset_secs)) => Some(Value::TimeTz { us, offset_secs }),
19793            None => {
19794                return Err(EngineError::Eval(EvalError::TypeMismatch {
19795                    detail: alloc::format!(
19796                        "invalid input syntax for type time with time zone: \
19797                         {s:?} (column `{col_name}`)"
19798                    ),
19799                }));
19800            }
19801        },
19802        // TIMETZ → Text canonical `HH:MM:SS[.ffffff]±HH[:MM]`.
19803        (Value::TimeTz { us, offset_secs }, DataType::Text) => {
19804            Some(Value::Text(eval::format_timetz(us, offset_secs)))
19805        }
19806        // v7.17.0 Phase 3.P0-35 — Text → MONEY. Accepts `$N.NN`,
19807        // `$N,NNN.NN`, optional leading `-`. Bare numeric literals
19808        // arrive via the Int/BigInt/Float/Numeric arms below.
19809        (Value::Text(s), DataType::Money) => match parse_money_str(&s) {
19810            Some(c) => Some(Value::Money(c)),
19811            None => {
19812                return Err(EngineError::Eval(EvalError::TypeMismatch {
19813                    detail: alloc::format!(
19814                        "invalid input syntax for type money: {s:?} (column `{col_name}`)"
19815                    ),
19816                }));
19817            }
19818        },
19819        // Int / BigInt / SmallInt / Float / Numeric → MONEY.
19820        // Bare numeric literal is interpreted as a major-unit
19821        // amount (matches PG: `100`::money → $100.00 = 10000 cents).
19822        (Value::SmallInt(n), DataType::Money) => {
19823            Some(Value::Money(i64::from(n).saturating_mul(100)))
19824        }
19825        (Value::Int(n), DataType::Money) => Some(Value::Money(i64::from(n).saturating_mul(100))),
19826        (Value::BigInt(n), DataType::Money) => Some(Value::Money(n.saturating_mul(100))),
19827        (Value::Float(x), DataType::Money) => {
19828            // Round half-away-from-zero to cents (no_std — no
19829            // `f64::round`, so hand-roll via biased truncation).
19830            let scaled = x * 100.0;
19831            let cents = if scaled >= 0.0 {
19832                (scaled + 0.5) as i64
19833            } else {
19834                (scaled - 0.5) as i64
19835            };
19836            Some(Value::Money(cents))
19837        }
19838        (Value::Numeric { scaled, scale }, DataType::Money) => {
19839            // Convert exact decimal to cents (scale 2). If scale > 2,
19840            // round half-away-from-zero. If scale < 2, multiply up.
19841            let cents = if scale == 2 {
19842                scaled
19843            } else if scale < 2 {
19844                let mult = 10_i128.pow(u32::from(2 - scale));
19845                scaled.saturating_mul(mult)
19846            } else {
19847                let div = 10_i128.pow(u32::from(scale - 2));
19848                let half = div / 2;
19849                let bias = if scaled >= 0 { half } else { -half };
19850                (scaled + bias) / div
19851            };
19852            Some(Value::Money(i64::try_from(cents).unwrap_or(i64::MAX)))
19853        }
19854        // MONEY → Text canonical `$N,NNN.CC`.
19855        (Value::Money(c), DataType::Text) => Some(Value::Text(eval::format_money(c))),
19856        // v7.17.0 Phase 3.P0-38 — Text → Range. Accepts canonical
19857        // PG forms: `'empty'`, `'[a,b)'`, `'(a,b]'`, `'[a,b]'`,
19858        // `'(a,b)'`, with empty lower or upper for unbounded.
19859        (Value::Text(s), DataType::Range(kind)) => match parse_range_str(&s, kind) {
19860            Some(v) => Some(v),
19861            None => {
19862                return Err(EngineError::Eval(EvalError::TypeMismatch {
19863                    detail: alloc::format!(
19864                        "invalid input syntax for range type: {s:?} (column `{col_name}`)"
19865                    ),
19866                }));
19867            }
19868        },
19869        // Range → Text canonical form (`[a,b)`, `'empty'`, etc).
19870        (v @ Value::Range { .. }, DataType::Text) => Some(Value::Text(format_range_str(&v))),
19871        // v7.17.0 Phase 3.P0-39 — Text → Hstore.
19872        (Value::Text(s), DataType::Hstore) => match parse_hstore_str(&s) {
19873            Some(pairs) => Some(Value::Hstore(pairs)),
19874            None => {
19875                return Err(EngineError::Eval(EvalError::TypeMismatch {
19876                    detail: alloc::format!(
19877                        "invalid input syntax for type hstore: {s:?} (column `{col_name}`)"
19878                    ),
19879                }));
19880            }
19881        },
19882        // Hstore → Text canonical `"k"=>"v"` form.
19883        (Value::Hstore(pairs), DataType::Text) => Some(Value::Text(format_hstore_str(&pairs))),
19884        // v7.17.0 Phase 3.P0-40 — Text → 2D arrays via PG
19885        // external `'{{a,b},{c,d}}'` literal.
19886        (Value::Text(s), DataType::IntArray2D) => match parse_int_2d_literal(&s) {
19887            Ok(m) => Some(Value::IntArray2D(m)),
19888            Err(e) => {
19889                return Err(EngineError::Eval(EvalError::TypeMismatch {
19890                    detail: alloc::format!(
19891                        "invalid input syntax for INT[][]: {s:?} (column `{col_name}`): {e}"
19892                    ),
19893                }));
19894            }
19895        },
19896        (Value::Text(s), DataType::BigIntArray2D) => match parse_bigint_2d_literal(&s) {
19897            Ok(m) => Some(Value::BigIntArray2D(m)),
19898            Err(e) => {
19899                return Err(EngineError::Eval(EvalError::TypeMismatch {
19900                    detail: alloc::format!(
19901                        "invalid input syntax for BIGINT[][]: {s:?} (column `{col_name}`): {e}"
19902                    ),
19903                }));
19904            }
19905        },
19906        (Value::Text(s), DataType::TextArray2D) => match parse_text_2d_literal(&s) {
19907            Ok(m) => Some(Value::TextArray2D(m)),
19908            Err(e) => {
19909                return Err(EngineError::Eval(EvalError::TypeMismatch {
19910                    detail: alloc::format!(
19911                        "invalid input syntax for TEXT[][]: {s:?} (column `{col_name}`): {e}"
19912                    ),
19913                }));
19914            }
19915        },
19916        // 2D arrays → Text canonical nested form.
19917        (Value::IntArray2D(rows), DataType::Text) => Some(Value::Text(format_int_2d_text(&rows))),
19918        (Value::BigIntArray2D(rows), DataType::Text) => {
19919            Some(Value::Text(format_bigint_2d_text(&rows)))
19920        }
19921        (Value::TextArray2D(rows), DataType::Text) => Some(Value::Text(format_text_2d_text(&rows))),
19922        // v7.10.11 — Text → TEXT[]. Decode PG's external array
19923        // form `'{a,b,NULL}'`. NULL element token (case-insensitive)
19924        // is the literal `NULL`; everything else is a quoted or
19925        // unquoted text element. mailrs `'{label1,label2}'::TEXT[]`.
19926        (Value::Text(s), DataType::TextArray) => {
19927            let arr = decode_text_array_literal(&s).map_err(|e| {
19928                EngineError::Eval(EvalError::TypeMismatch {
19929                    detail: alloc::format!(
19930                        "cannot parse {s:?} as TEXT[] for column `{col_name}`: {e}"
19931                    ),
19932                })
19933            })?;
19934            Some(Value::TextArray(arr))
19935        }
19936        // v7.16.0 — Text → IntArray / BigIntArray for the
19937        // spg-sqlx Bind path. Decode the PG external form
19938        // `{1,2,3}` as a TEXT array first, then parse each
19939        // element as int. Same shape as the TextArray decode
19940        // above with an element-wise narrow.
19941        (Value::Text(s), DataType::IntArray) => {
19942            let arr = decode_text_array_literal(&s).map_err(|e| {
19943                EngineError::Eval(EvalError::TypeMismatch {
19944                    detail: alloc::format!(
19945                        "cannot parse {s:?} as INT[] for column `{col_name}`: {e}"
19946                    ),
19947                })
19948            })?;
19949            let mut out: Vec<Option<i32>> = Vec::with_capacity(arr.len());
19950            for elem in arr {
19951                match elem {
19952                    None => out.push(None),
19953                    Some(t) => {
19954                        let n: i32 = t.parse().map_err(|_| {
19955                            EngineError::Eval(EvalError::TypeMismatch {
19956                                detail: alloc::format!(
19957                                    "cannot parse {t:?} as INT element for `{col_name}`"
19958                                ),
19959                            })
19960                        })?;
19961                        out.push(Some(n));
19962                    }
19963                }
19964            }
19965            Some(Value::IntArray(out))
19966        }
19967        (Value::Text(s), DataType::BigIntArray) => {
19968            let arr = decode_text_array_literal(&s).map_err(|e| {
19969                EngineError::Eval(EvalError::TypeMismatch {
19970                    detail: alloc::format!(
19971                        "cannot parse {s:?} as BIGINT[] for column `{col_name}`: {e}"
19972                    ),
19973                })
19974            })?;
19975            let mut out: Vec<Option<i64>> = Vec::with_capacity(arr.len());
19976            for elem in arr {
19977                match elem {
19978                    None => out.push(None),
19979                    Some(t) => {
19980                        let n: i64 = t.parse().map_err(|_| {
19981                            EngineError::Eval(EvalError::TypeMismatch {
19982                                detail: alloc::format!(
19983                                    "cannot parse {t:?} as BIGINT element for `{col_name}`"
19984                                ),
19985                            })
19986                        })?;
19987                        out.push(Some(n));
19988                    }
19989                }
19990            }
19991            Some(Value::BigIntArray(out))
19992        }
19993        // v7.10.11 — TEXT[] → Text round-trip uses PG's
19994        // external array form (`{a,b,NULL}`). Lets a SELECT
19995        // pull an array column through any Text-side codepath.
19996        (Value::TextArray(items), DataType::Text) => Some(Value::Text(encode_text_array(&items))),
19997        // v7.17.0 Phase 3.P0-68 — Text → VECTOR auto-coerce.
19998        // Matches the existing Text → TsVector arm and the
19999        // `::vector` cast: PG-canonical pgvector external form
20000        // (`'[1, 2, -3]'`) becomes a typed Vector value at the
20001        // column boundary. Dim mismatch surfaces as TypeMismatch.
20002        // For SQ8 / HALF encodings we chain through the standard
20003        // quantise helpers so the storage shape matches the
20004        // declared encoding without a second coerce pass.
20005        (Value::Text(s), DataType::Vector { dim, encoding }) => {
20006            let parsed = eval::parse_vector_text(&s).ok_or_else(|| {
20007                EngineError::Eval(EvalError::TypeMismatch {
20008                    detail: alloc::format!("cannot parse {s:?} as VECTOR for column `{col_name}`"),
20009                })
20010            })?;
20011            if parsed.len() != dim as usize {
20012                return Err(EngineError::Eval(EvalError::TypeMismatch {
20013                    detail: alloc::format!(
20014                        "VECTOR({dim}) column `{col_name}` rejects literal of length {}",
20015                        parsed.len()
20016                    ),
20017                }));
20018            }
20019            Some(match encoding {
20020                VecEncoding::F32 => Value::Vector(parsed),
20021                VecEncoding::Sq8 => Value::Sq8Vector(spg_storage::quantize::quantize(&parsed)),
20022                VecEncoding::F16 => {
20023                    Value::HalfVector(spg_storage::halfvec::HalfVector::from_f32_slice(&parsed))
20024                }
20025            })
20026        }
20027        // v7.16.1 — Text → TSVECTOR auto-coerce for the
20028        // INSERT-side wire path (mailrs round-9 A.2.a). PG
20029        // implicitly promotes the TEXT literal at INSERT into a
20030        // TSVECTOR column; SPG previously rejected with a hard
20031        // type mismatch, blocking 23,276 pg_dump rows into
20032        // `messages.search_vector`. We route through the same
20033        // `decode_tsvector_external` the `::tsvector` cast
20034        // already uses, so PG-canonical forms (`'word'`,
20035        // `'word:1A,2B'`, multi-lexeme, empty `''`) all parse.
20036        (Value::Text(s), DataType::TsVector) => {
20037            let lexs = eval::decode_tsvector_external(&s).map_err(|e| {
20038                EngineError::Eval(EvalError::TypeMismatch {
20039                    detail: alloc::format!(
20040                        "cannot parse {s:?} as TSVECTOR for column `{col_name}`: {e}"
20041                    ),
20042                })
20043            })?;
20044            Some(Value::TsVector(lexs))
20045        }
20046        (Value::Text(s), DataType::Timestamp | DataType::Timestamptz) => {
20047            let t = eval::parse_timestamp_literal(&s).ok_or_else(|| {
20048                EngineError::Eval(EvalError::TypeMismatch {
20049                    detail: alloc::format!(
20050                        "cannot parse {s:?} as TIMESTAMP for column `{col_name}`"
20051                    ),
20052                })
20053            })?;
20054            Some(Value::Timestamp(t))
20055        }
20056        // DATE ↔ TIMESTAMP convertibility (DATE → midnight,
20057        // TIMESTAMP → day truncation).
20058        (Value::Date(d), DataType::Timestamp | DataType::Timestamptz) => {
20059            Some(Value::Timestamp(i64::from(d) * 86_400_000_000))
20060        }
20061        // v7.9.21 — Value::Timestamp lands in either Timestamp
20062        // or Timestamptz columns; the on-disk layout is the
20063        // same i64 microseconds UTC.
20064        (Value::Timestamp(t), DataType::Timestamptz) => Some(Value::Timestamp(t)),
20065        (Value::Timestamp(t), DataType::Date) => {
20066            let days = t.div_euclid(86_400_000_000);
20067            i32::try_from(days).ok().map(Value::Date)
20068        }
20069        (
20070            Value::Numeric {
20071                scaled,
20072                scale: src_scale,
20073            },
20074            DataType::Numeric { precision, scale },
20075        ) => Some(numeric_rescale(
20076            scaled, src_scale, precision, scale, col_name,
20077        )?),
20078        #[allow(clippy::cast_precision_loss)]
20079        (Value::Numeric { scaled, scale }, DataType::Float) => {
20080            let mut div = 1.0_f64;
20081            for _ in 0..scale {
20082                div *= 10.0;
20083            }
20084            Some(Value::Float((scaled as f64) / div))
20085        }
20086        (Value::Numeric { scaled, scale }, DataType::Int) => {
20087            let truncated = numeric_truncate_to_integer(scaled, scale);
20088            i32::try_from(truncated).ok().map(Value::Int)
20089        }
20090        (Value::Numeric { scaled, scale }, DataType::BigInt) => {
20091            let truncated = numeric_truncate_to_integer(scaled, scale);
20092            i64::try_from(truncated).ok().map(Value::BigInt)
20093        }
20094        (Value::Numeric { scaled, scale }, DataType::SmallInt) => {
20095            let truncated = numeric_truncate_to_integer(scaled, scale);
20096            i16::try_from(truncated).ok().map(Value::SmallInt)
20097        }
20098        // VARCHAR(n) enforces an upper bound on character count.
20099        (Value::Text(s), DataType::Varchar(max)) => {
20100            if u32::try_from(s.chars().count()).unwrap_or(u32::MAX) <= max {
20101                Some(Value::Text(s))
20102            } else {
20103                return Err(EngineError::Unsupported(alloc::format!(
20104                    "value for VARCHAR({max}) column `{col_name}` exceeds length: \
20105                     {} chars",
20106                    s.chars().count()
20107                )));
20108            }
20109        }
20110        // v6.0.1: f32 → SQ8 INSERT-time quantisation. Triggered
20111        // when the column declares `VECTOR(N) USING SQ8` and
20112        // the INSERT VALUES expression yields a raw f32 vector
20113        // (the normal pgvector-shape literal). Dim mismatch
20114        // falls through the `_ => None` arm and surfaces as
20115        // `TypeMismatch` with the expected SQ8 column type —
20116        // matching the F32 path's existing error.
20117        (
20118            Value::Vector(v),
20119            DataType::Vector {
20120                dim,
20121                encoding: VecEncoding::Sq8,
20122            },
20123        ) if v.len() == dim as usize => Some(Value::Sq8Vector(spg_storage::quantize::quantize(&v))),
20124        // v6.0.3: f32 → f16 INSERT-time conversion for HALF
20125        // columns. Bit-exact at the storage layer (modulo
20126        // half-precision rounding); no rerank pass needed at
20127        // search time.
20128        (
20129            Value::Vector(v),
20130            DataType::Vector {
20131                dim,
20132                encoding: VecEncoding::F16,
20133            },
20134        ) if v.len() == dim as usize => Some(Value::HalfVector(
20135            spg_storage::halfvec::HalfVector::from_f32_slice(&v),
20136        )),
20137        // CHAR(n) right-pads with U+0020 to exactly n chars; if the input
20138        // is already longer we reject (PG truncates trailing-space-only;
20139        // staying strict for v1).
20140        (Value::Text(s), DataType::Char(size)) => {
20141            let len = u32::try_from(s.chars().count()).unwrap_or(u32::MAX);
20142            if len > size {
20143                return Err(EngineError::Unsupported(alloc::format!(
20144                    "value for CHAR({size}) column `{col_name}` exceeds length: \
20145                     {len} chars"
20146                )));
20147            }
20148            let need = (size - len) as usize;
20149            let mut padded = s;
20150            padded.reserve(need);
20151            for _ in 0..need {
20152                padded.push(' ');
20153            }
20154            Some(Value::Text(padded))
20155        }
20156        _ => None,
20157    };
20158    coerced.ok_or(EngineError::Storage(StorageError::TypeMismatch {
20159        column: col_name.into(),
20160        expected,
20161        actual,
20162        position,
20163    }))
20164}
20165
20166/// v7.12.4 — render a function arg list into the
20167/// canonical form the storage layer caches as
20168/// [`spg_storage::FunctionDef::args_repr`]. The catalogue uses
20169/// this string for both display + as a coarse signature key
20170/// for the (deferred) overload resolution v7.12.5+ adds.
20171fn render_function_args(args: &[spg_sql::ast::FunctionArg]) -> alloc::string::String {
20172    use core::fmt::Write;
20173    let mut out = alloc::string::String::from("(");
20174    for (i, a) in args.iter().enumerate() {
20175        if i > 0 {
20176            out.push_str(", ");
20177        }
20178        match a.mode {
20179            spg_sql::ast::FunctionArgMode::In => {}
20180            spg_sql::ast::FunctionArgMode::Out => out.push_str("OUT "),
20181            spg_sql::ast::FunctionArgMode::InOut => out.push_str("INOUT "),
20182        }
20183        if let Some(n) = &a.name {
20184            out.push_str(n);
20185            out.push(' ');
20186        }
20187        match &a.ty {
20188            spg_sql::ast::FunctionArgType::Typed(t) => {
20189                let _ = write!(out, "{t}");
20190            }
20191            spg_sql::ast::FunctionArgType::Raw(s) => out.push_str(s),
20192        }
20193    }
20194    out.push(')');
20195    out
20196}
20197
20198/// v7.19 P5 — true iff `expr` is `unnest(arg)` at the top level
20199/// (case-insensitive). Used by `exec_select_cancel`'s
20200/// projection loop to detect Set-Returning-Function rows that
20201/// need per-row expansion. Only the top-level call counts —
20202/// `coalesce(unnest(arr), 'x')` is NOT a SRF row from the
20203/// projection's perspective; it would surface as an "unknown
20204/// function" mismatch downstream, which is what we want
20205/// (multi-SRF / nested SRF is documented carve-out for v7.19).
20206fn is_top_level_unnest(expr: &spg_sql::ast::Expr) -> bool {
20207    match expr {
20208        spg_sql::ast::Expr::FunctionCall { name, args } => {
20209            name.eq_ignore_ascii_case("unnest") && args.len() == 1
20210        }
20211        _ => false,
20212    }
20213}
20214
20215/// v7.19 P5 — extract the array argument out of a top-level
20216/// `unnest(arg)` call. `None` if `expr` isn't a `unnest` call
20217/// of arity 1 (mirrors `is_top_level_unnest`).
20218fn top_level_unnest_arg(expr: &spg_sql::ast::Expr) -> Option<&spg_sql::ast::Expr> {
20219    match expr {
20220        spg_sql::ast::Expr::FunctionCall { name, args }
20221            if name.eq_ignore_ascii_case("unnest") && args.len() == 1 =>
20222        {
20223            Some(&args[0])
20224        }
20225        _ => None,
20226    }
20227}
20228
20229/// v7.19 P5 — turn an array-typed `Value` into the element list
20230/// `unnest()` projection emits. NULL → empty list (PG: `unnest(NULL)
20231/// = (no rows)`). Non-array values fall through to a type-mismatch
20232/// error.
20233fn array_value_to_elements(v: &Value) -> Result<Vec<Value>, EngineError> {
20234    match v {
20235        Value::Null => Ok(Vec::new()),
20236        Value::TextArray(items) => Ok(items
20237            .iter()
20238            .map(|opt| {
20239                opt.as_ref()
20240                    .map(|s| Value::Text(s.clone()))
20241                    .unwrap_or(Value::Null)
20242            })
20243            .collect()),
20244        Value::IntArray(items) => Ok(items
20245            .iter()
20246            .map(|opt| opt.map(Value::Int).unwrap_or(Value::Null))
20247            .collect()),
20248        Value::BigIntArray(items) => Ok(items
20249            .iter()
20250            .map(|opt| opt.map(Value::BigInt).unwrap_or(Value::Null))
20251            .collect()),
20252        other => Err(EngineError::Eval(EvalError::TypeMismatch {
20253            detail: alloc::format!(
20254                "unnest() expects an array argument, got {:?}",
20255                other.data_type()
20256            ),
20257        })),
20258    }
20259}
20260
20261#[cfg(test)]
20262mod tests {
20263    use super::*;
20264    use alloc::vec;
20265
20266    fn unwrap_command_ok(r: &QueryResult) -> usize {
20267        match r {
20268            QueryResult::CommandOk { affected, .. } => *affected,
20269            QueryResult::Rows { .. } => panic!("expected CommandOk, got Rows"),
20270        }
20271    }
20272
20273    #[test]
20274    fn update_seek_positions_engages_on_indexed_eq() {
20275        let mut e = Engine::new();
20276        e.execute("CREATE TABLE b (id INT NOT NULL, v INT NOT NULL)")
20277            .unwrap();
20278        e.execute("CREATE INDEX b_id ON b (id)").unwrap();
20279        for i in 0..100 {
20280            e.execute(&alloc::format!("INSERT INTO b VALUES ({i}, {i})"))
20281                .unwrap();
20282        }
20283        let stmt = spg_sql::parser::parse_statement("UPDATE b SET v = v + 1 WHERE id = 42")
20284            .expect("parse");
20285        let Statement::Update(u) = stmt else {
20286            panic!("expected Update, got {stmt:?}");
20287        };
20288        let w = u.where_.as_ref().expect("where");
20289        let table = e.catalog().get("b").unwrap();
20290        let schema_cols = table.schema().columns.clone();
20291        // step-by-step: each sub-resolution must succeed.
20292        let Expr::Binary { lhs, op, rhs } = w else {
20293            panic!("WHERE not Binary: {w:?}");
20294        };
20295        assert_eq!(*op, BinOp::Eq, "op not Eq");
20296        let pair = resolve_col_literal_pair(lhs, rhs, &schema_cols, "b");
20297        assert!(
20298            pair.is_some(),
20299            "resolve_col_literal_pair None: lhs={lhs:?} rhs={rhs:?}"
20300        );
20301        let (col_pos, value) = pair.unwrap();
20302        assert!(
20303            table.index_on(col_pos).is_some(),
20304            "no index on col {col_pos}"
20305        );
20306        assert!(
20307            IndexKey::from_value(&value).is_some(),
20308            "IndexKey::from_value None for {value:?}"
20309        );
20310        let positions = try_index_seek_positions(w, &schema_cols, table, "b");
20311        assert_eq!(positions, Some(vec![42]), "seek did not engage");
20312    }
20313
20314    #[test]
20315    fn create_table_registers_schema() {
20316        let mut e = Engine::new();
20317        e.execute("CREATE TABLE foo (a INT NOT NULL, b TEXT)")
20318            .unwrap();
20319        assert_eq!(e.catalog().table_count(), 1);
20320        let t = e.catalog().get("foo").unwrap();
20321        assert_eq!(t.schema().columns.len(), 2);
20322        assert_eq!(t.schema().columns[0].ty, DataType::Int);
20323        assert!(!t.schema().columns[0].nullable);
20324        assert_eq!(t.schema().columns[1].ty, DataType::Text);
20325    }
20326
20327    #[test]
20328    fn create_table_vector_default_is_f32_encoded() {
20329        let mut e = Engine::new();
20330        e.execute("CREATE TABLE t (v VECTOR(8))").unwrap();
20331        let t = e.catalog().get("t").unwrap();
20332        assert_eq!(
20333            t.schema().columns[0].ty,
20334            DataType::Vector {
20335                dim: 8,
20336                encoding: VecEncoding::F32,
20337            },
20338        );
20339    }
20340
20341    #[test]
20342    fn create_table_vector_using_sq8_succeeds() {
20343        // v6.0.1 step 3: the step-1 fence in `column_def_to_schema`
20344        // is lifted. CREATE TABLE persists an SQ8 column type in
20345        // the catalog; INSERT (next test) quantises raw f32 input.
20346        let mut e = Engine::new();
20347        e.execute("CREATE TABLE t (v VECTOR(8) USING SQ8)").unwrap();
20348        let t = e.catalog().get("t").unwrap();
20349        assert_eq!(
20350            t.schema().columns[0].ty,
20351            DataType::Vector {
20352                dim: 8,
20353                encoding: VecEncoding::Sq8,
20354            },
20355        );
20356    }
20357
20358    #[test]
20359    fn insert_into_sq8_column_quantises_f32_payload() {
20360        // v6.0.1 step 3: INSERT-time `coerce_value` rewrites a raw
20361        // `Value::Vector(Vec<f32>)` literal into the column's
20362        // quantised representation. The row that lands in the
20363        // catalog must therefore hold a `Value::Sq8Vector`, not the
20364        // original f32 buffer — that's the bit that delivers the
20365        // 4× compression target.
20366        let mut e = Engine::new();
20367        e.execute("CREATE TABLE t (v VECTOR(4) USING SQ8)").unwrap();
20368        e.execute("INSERT INTO t VALUES ([0.0, 0.25, 0.5, 1.0])")
20369            .unwrap();
20370        let t = e.catalog().get("t").unwrap();
20371        assert_eq!(t.rows().len(), 1);
20372        match &t.rows()[0].values[0] {
20373            Value::Sq8Vector(q) => {
20374                assert_eq!(q.bytes.len(), 4);
20375                // min/max are derived from the payload: min=0.0, max=1.0.
20376                assert!((q.min - 0.0).abs() < 1e-6);
20377                assert!((q.max - 1.0).abs() < 1e-6);
20378            }
20379            other => panic!("expected Sq8Vector cell, got {other:?}"),
20380        }
20381    }
20382
20383    #[test]
20384    fn create_table_vector_using_half_succeeds_and_insert_converts_to_f16() {
20385        // v6.0.3: CREATE TABLE accepts USING HALF; INSERT path
20386        // converts the incoming `Value::Vector(Vec<f32>)` cell
20387        // into `Value::HalfVector(HalfVector)` via the new
20388        // `coerce_value` arm. The dequantised round-trip is
20389        // bit-exact for f16-representable values, so 0.0 / 0.25
20390        // / 0.5 / 1.0 hit their grid points exactly.
20391        let mut e = Engine::new();
20392        e.execute("CREATE TABLE t (v VECTOR(4) USING HALF)")
20393            .unwrap();
20394        e.execute("INSERT INTO t VALUES ([0.0, 0.25, 0.5, 1.0])")
20395            .unwrap();
20396        let t = e.catalog().get("t").unwrap();
20397        assert_eq!(t.rows().len(), 1);
20398        match &t.rows()[0].values[0] {
20399            Value::HalfVector(h) => {
20400                assert_eq!(h.dim(), 4);
20401                let back = h.to_f32_vec();
20402                let expected = alloc::vec![0.0_f32, 0.25, 0.5, 1.0];
20403                for (g, e) in back.iter().zip(expected.iter()) {
20404                    assert!(
20405                        (g - e).abs() < 1e-6,
20406                        "{g} vs {e} should be exact on f16 grid"
20407                    );
20408                }
20409            }
20410            other => panic!("expected HalfVector cell, got {other:?}"),
20411        }
20412    }
20413
20414    #[test]
20415    fn alter_index_rebuild_in_place_succeeds() {
20416        // v6.0.4: bare REBUILD (no encoding switch) walks every
20417        // row again to rebuild the NSW graph. Verifies the engine
20418        // dispatch + storage helper plumbing without changing any
20419        // cell encoding.
20420        let mut e = Engine::new();
20421        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(3) NOT NULL)")
20422            .unwrap();
20423        for i in 0..8_i32 {
20424            #[allow(clippy::cast_precision_loss)]
20425            let base = (i as f32) * 0.1;
20426            e.execute(&alloc::format!(
20427                "INSERT INTO t VALUES ({i}, [{base}, {b1}, {b2}])",
20428                b1 = base + 0.01,
20429                b2 = base + 0.02,
20430            ))
20431            .unwrap();
20432        }
20433        e.execute("CREATE INDEX t_idx ON t USING hnsw (v)").unwrap();
20434        e.execute("ALTER INDEX t_idx REBUILD").unwrap();
20435        // Schema encoding stays F32 (no encoding clause).
20436        assert_eq!(
20437            e.catalog().get("t").unwrap().schema().columns[1].ty,
20438            DataType::Vector {
20439                dim: 3,
20440                encoding: VecEncoding::F32,
20441            },
20442        );
20443    }
20444
20445    #[test]
20446    fn alter_index_rebuild_with_encoding_switches_cell_type() {
20447        // v6.0.4: REBUILD WITH (encoding = SQ8) recodes every
20448        // stored cell from F32 → SQ8 + rebuilds the graph atop the
20449        // new encoding. Post-rebuild, cells must be Sq8Vector and
20450        // the schema must report encoding = Sq8.
20451        let mut e = Engine::new();
20452        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(4) NOT NULL)")
20453            .unwrap();
20454        e.execute("INSERT INTO t VALUES (1, [0.0, 0.25, 0.5, 1.0])")
20455            .unwrap();
20456        e.execute("CREATE INDEX t_idx ON t USING hnsw (v)").unwrap();
20457        e.execute("ALTER INDEX t_idx REBUILD WITH (encoding = SQ8)")
20458            .unwrap();
20459        let t = e.catalog().get("t").unwrap();
20460        assert_eq!(
20461            t.schema().columns[1].ty,
20462            DataType::Vector {
20463                dim: 4,
20464                encoding: VecEncoding::Sq8,
20465            },
20466        );
20467        assert!(matches!(t.rows()[0].values[1], Value::Sq8Vector(_)));
20468    }
20469
20470    #[test]
20471    fn alter_index_rebuild_unknown_index_errors() {
20472        let mut e = Engine::new();
20473        let err = e.execute("ALTER INDEX nope REBUILD").unwrap_err();
20474        assert!(
20475            matches!(
20476                &err,
20477                EngineError::Storage(StorageError::IndexNotFound { name }) if name == "nope"
20478            ),
20479            "got: {err}"
20480        );
20481    }
20482
20483    #[test]
20484    fn alter_index_rebuild_on_btree_index_errors() {
20485        // REBUILD on a B-tree index has no semantic meaning in
20486        // v6.0.4 — rejected at the storage layer with `Unsupported`.
20487        let mut e = Engine::new();
20488        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
20489        e.execute("INSERT INTO t VALUES (1)").unwrap();
20490        e.execute("CREATE INDEX t_idx ON t (id)").unwrap();
20491        let err = e.execute("ALTER INDEX t_idx REBUILD").unwrap_err();
20492        assert!(
20493            matches!(&err, EngineError::Storage(StorageError::Unsupported(_))),
20494            "got: {err}"
20495        );
20496    }
20497
20498    #[test]
20499    fn prepared_insert_substitutes_placeholders() {
20500        // v6.1.1: prepare() parses once; execute_prepared() walks the
20501        // AST and replaces $1/$2 with the param Values BEFORE the
20502        // dispatch sees them. Same logical result as a simple-query
20503        // INSERT, but parse happens once per *statement*, not per
20504        // execution.
20505        let mut e = Engine::new();
20506        e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT NOT NULL)")
20507            .unwrap();
20508        let stmt = e.prepare("INSERT INTO t VALUES ($1, $2)").unwrap();
20509        for (id, name) in [(1, "alice"), (2, "bob"), (3, "carol")] {
20510            e.execute_prepared(stmt.clone(), &[Value::Int(id), Value::Text(name.into())])
20511                .unwrap();
20512        }
20513        // Read back via simple-query SELECT.
20514        let rows_result = e.execute("SELECT id, name FROM t").unwrap();
20515        let QueryResult::Rows { rows, .. } = rows_result else {
20516            panic!("expected Rows")
20517        };
20518        assert_eq!(rows.len(), 3);
20519    }
20520
20521    #[test]
20522    fn prepared_select_with_placeholder_filters_rows() {
20523        let mut e = Engine::new();
20524        e.execute("CREATE TABLE t (id INT NOT NULL, v INT NOT NULL)")
20525            .unwrap();
20526        for i in 0..10_i32 {
20527            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, {})", i * 7))
20528                .unwrap();
20529        }
20530        let stmt = e.prepare("SELECT id FROM t WHERE v = $1").unwrap();
20531        let QueryResult::Rows { rows, .. } = e.execute_prepared(stmt, &[Value::Int(35)]).unwrap()
20532        else {
20533            panic!("expected Rows")
20534        };
20535        // v = 35 means i*7 = 35 → i = 5.
20536        assert_eq!(rows.len(), 1);
20537        assert_eq!(rows[0].values[0], Value::Int(5));
20538    }
20539
20540    #[test]
20541    fn prepared_too_few_params_errors() {
20542        let mut e = Engine::new();
20543        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
20544        let stmt = e.prepare("INSERT INTO t VALUES ($1)").unwrap();
20545        let err = e.execute_prepared(stmt, &[]).unwrap_err();
20546        assert!(
20547            matches!(
20548                &err,
20549                EngineError::Eval(EvalError::PlaceholderOutOfRange { n: 1, bound: 0 })
20550            ),
20551            "got: {err}"
20552        );
20553    }
20554
20555    #[test]
20556    fn bytea_cast_round_trips_text_input() {
20557        // v7.18 — `'hello'::bytea` produces the raw bytes. Closes
20558        // the mailrs D-pre #3 reverse-acceptance gap.
20559        let e = Engine::new();
20560        let r = e.execute_readonly("SELECT 'hello'::bytea").unwrap();
20561        let QueryResult::Rows { rows, .. } = r else {
20562            panic!("expected Rows")
20563        };
20564        assert_eq!(rows.len(), 1);
20565        assert_eq!(rows[0].values[0], Value::Bytes(b"hello".to_vec()));
20566    }
20567
20568    #[test]
20569    fn bytea_cast_pg_escape_hex_form() {
20570        // E'\\xdeadbeef'::bytea — E-string decodes to `\xdeadbeef`
20571        // (literal 10 chars), then ::bytea reads it as PG hex
20572        // form bytea literal → 4 bytes.
20573        let e = Engine::new();
20574        let r = e.execute_readonly(r"SELECT E'\\xdeadbeef'::bytea").unwrap();
20575        let QueryResult::Rows { rows, .. } = r else {
20576            panic!("expected Rows")
20577        };
20578        assert_eq!(
20579            rows[0].values[0],
20580            Value::Bytes(vec![0xde, 0xad, 0xbe, 0xef])
20581        );
20582    }
20583
20584    #[test]
20585    fn bytea_cast_chains_through_octet_length() {
20586        // octet_length('hello'::bytea) → 5. Confirms the cast
20587        // composes inside larger expressions, not just at top
20588        // level.
20589        let e = Engine::new();
20590        let r = e
20591            .execute_readonly("SELECT octet_length('hello'::bytea)")
20592            .unwrap();
20593        let QueryResult::Rows { rows, .. } = r else {
20594            panic!("expected Rows")
20595        };
20596        match &rows[0].values[0] {
20597            Value::Int(n) => assert_eq!(*n, 5),
20598            Value::BigInt(n) => assert_eq!(*n, 5),
20599            other => panic!("expected integer length, got {other:?}"),
20600        }
20601    }
20602
20603    #[test]
20604    fn readonly_prepared_on_snapshot_select_with_placeholder() {
20605        // v7.18 — sqlx Pool fan-out relies on running prepared
20606        // SELECTs against a frozen snapshot without re-entering
20607        // the writer engine. Mirrors the simple-query SELECT path
20608        // in `execute_readonly_on_snapshot` but takes a Statement
20609        // + bound params (the shape sqlx's Execute path produces).
20610        let mut e = Engine::new();
20611        e.execute("CREATE TABLE t (id INT NOT NULL, v INT NOT NULL)")
20612            .unwrap();
20613        for i in 0..10_i32 {
20614            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, {})", i * 7))
20615                .unwrap();
20616        }
20617        let snapshot = e.clone_snapshot();
20618        let stmt = e.prepare("SELECT id FROM t WHERE v = $1").unwrap();
20619        let QueryResult::Rows { rows, .. } =
20620            Engine::execute_readonly_prepared_on_snapshot(&snapshot, stmt, &[Value::Int(35)])
20621                .unwrap()
20622        else {
20623            panic!("expected Rows")
20624        };
20625        assert_eq!(rows.len(), 1);
20626        assert_eq!(rows[0].values[0], Value::Int(5));
20627    }
20628
20629    #[test]
20630    fn readonly_prepared_on_snapshot_rejects_writes() {
20631        // DDL / DML prepared statements on the readonly path must
20632        // surface `WriteRequired` so the spg-sqlx connection layer
20633        // routes them to the writer mutex instead of the snapshot.
20634        let mut e = Engine::new();
20635        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
20636        let snapshot = e.clone_snapshot();
20637        let stmt = e.prepare("INSERT INTO t VALUES ($1)").unwrap();
20638        let err = Engine::execute_readonly_prepared_on_snapshot(&snapshot, stmt, &[Value::Int(1)])
20639            .unwrap_err();
20640        assert!(matches!(&err, EngineError::WriteRequired), "got: {err}");
20641    }
20642
20643    #[test]
20644    fn readonly_prepared_on_snapshot_frozen_view() {
20645        // The snapshot reflects engine state at clone_snapshot()
20646        // time. Writes after the snapshot are NOT visible — caller
20647        // takes a fresh snapshot (or `AsyncReadHandle::refresh()`)
20648        // to see them. This is the contract the per-statement
20649        // refresh in spg-sqlx relies on.
20650        let mut e = Engine::new();
20651        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
20652        e.execute("INSERT INTO t VALUES (1)").unwrap();
20653        let snapshot = e.clone_snapshot();
20654        e.execute("INSERT INTO t VALUES (2)").unwrap();
20655        let stmt = e.prepare("SELECT id FROM t WHERE id = $1").unwrap();
20656        let QueryResult::Rows { rows, .. } =
20657            Engine::execute_readonly_prepared_on_snapshot(&snapshot, stmt, &[Value::Int(2)])
20658                .unwrap()
20659        else {
20660            panic!("expected Rows")
20661        };
20662        assert!(rows.is_empty(), "id=2 was inserted after snapshot");
20663    }
20664
20665    #[test]
20666    fn describe_prepared_on_snapshot_resolves_columns() {
20667        // v7.18 — sqlx's Executor::describe path on the readonly
20668        // fan-out needs to resolve column names + types against
20669        // the snapshot's catalog (not the live engine's catalog,
20670        // which may have moved on).
20671        let mut e = Engine::new();
20672        e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT NOT NULL)")
20673            .unwrap();
20674        let snapshot = e.clone_snapshot();
20675        let stmt = e.prepare("SELECT id, name FROM t WHERE id = $1").unwrap();
20676        let (_params, cols) = Engine::describe_prepared_on_snapshot(&snapshot, &stmt);
20677        assert_eq!(cols.len(), 2);
20678        assert_eq!(cols[0].name, "id");
20679        assert_eq!(cols[0].ty, DataType::Int);
20680        assert_eq!(cols[1].name, "name");
20681        assert_eq!(cols[1].ty, DataType::Text);
20682    }
20683
20684    #[test]
20685    fn insert_into_half_column_dim_mismatch_errors() {
20686        let mut e = Engine::new();
20687        e.execute("CREATE TABLE t (v VECTOR(4) USING HALF)")
20688            .unwrap();
20689        let err = e.execute("INSERT INTO t VALUES ([1.0, 2.0])").unwrap_err();
20690        assert!(matches!(
20691            &err,
20692            EngineError::Storage(StorageError::TypeMismatch { .. })
20693        ));
20694    }
20695
20696    #[test]
20697    fn insert_into_sq8_column_dim_mismatch_errors() {
20698        // Dim mismatch falls through the `coerce_value` Vector→Sq8
20699        // arm's guard and surfaces as `TypeMismatch` — the same
20700        // error the F32 path produces today, so client error
20701        // handling stays uniform across encodings.
20702        let mut e = Engine::new();
20703        e.execute("CREATE TABLE t (v VECTOR(4) USING SQ8)").unwrap();
20704        let err = e.execute("INSERT INTO t VALUES ([1.0, 2.0])").unwrap_err();
20705        assert!(
20706            matches!(
20707                &err,
20708                EngineError::Storage(StorageError::TypeMismatch { .. })
20709            ),
20710            "got: {err}",
20711        );
20712    }
20713
20714    #[test]
20715    fn create_table_duplicate_errors() {
20716        let mut e = Engine::new();
20717        e.execute("CREATE TABLE foo (a INT)").unwrap();
20718        let err = e.execute("CREATE TABLE foo (a INT)").unwrap_err();
20719        assert!(matches!(
20720            err,
20721            EngineError::Storage(StorageError::DuplicateTable { ref name }) if name == "foo"
20722        ));
20723    }
20724
20725    #[test]
20726    fn insert_into_unknown_table_errors() {
20727        let mut e = Engine::new();
20728        let err = e.execute("INSERT INTO ghost VALUES (1)").unwrap_err();
20729        assert!(matches!(
20730            err,
20731            EngineError::Storage(StorageError::TableNotFound { ref name }) if name == "ghost"
20732        ));
20733    }
20734
20735    #[test]
20736    fn insert_happy_path_reports_one_affected() {
20737        let mut e = Engine::new();
20738        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
20739        let r = e.execute("INSERT INTO foo VALUES (42)").unwrap();
20740        assert_eq!(unwrap_command_ok(&r), 1);
20741        assert_eq!(e.catalog().get("foo").unwrap().row_count(), 1);
20742    }
20743
20744    #[test]
20745    fn insert_arity_mismatch_propagates() {
20746        let mut e = Engine::new();
20747        e.execute("CREATE TABLE foo (a INT, b TEXT)").unwrap();
20748        let err = e.execute("INSERT INTO foo VALUES (1)").unwrap_err();
20749        assert!(matches!(
20750            err,
20751            EngineError::Storage(StorageError::ArityMismatch { .. })
20752        ));
20753    }
20754
20755    #[test]
20756    fn insert_negative_integer_via_unary_minus() {
20757        let mut e = Engine::new();
20758        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
20759        e.execute("INSERT INTO foo VALUES (-7)").unwrap();
20760        let rows = e.catalog().get("foo").unwrap().rows();
20761        assert_eq!(rows[0].values[0], Value::Int(-7));
20762    }
20763
20764    #[test]
20765    fn insert_expression_evaluated_against_empty_context() {
20766        // PG-canonical: INSERT VALUES accepts an arbitrary scalar
20767        // expression. The engine evaluates against an empty row
20768        // context — column references would error, but pure
20769        // arithmetic / function calls are fine.
20770        let mut e = Engine::new();
20771        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
20772        e.execute("INSERT INTO foo VALUES (1 + 2)").unwrap();
20773        let rows = e.catalog().get("foo").unwrap().rows();
20774        assert_eq!(rows[0].values[0], Value::Int(3));
20775    }
20776
20777    #[test]
20778    fn select_star_returns_all_rows_in_insertion_order() {
20779        let mut e = Engine::new();
20780        e.execute("CREATE TABLE foo (a INT NOT NULL, b TEXT NOT NULL)")
20781            .unwrap();
20782        e.execute("INSERT INTO foo VALUES (1, 'one')").unwrap();
20783        e.execute("INSERT INTO foo VALUES (2, 'two')").unwrap();
20784        e.execute("INSERT INTO foo VALUES (3, 'three')").unwrap();
20785
20786        let r = e.execute("SELECT * FROM foo").unwrap();
20787        let QueryResult::Rows { columns, rows } = r else {
20788            panic!("expected Rows")
20789        };
20790        assert_eq!(columns.len(), 2);
20791        assert_eq!(columns[0].name, "a");
20792        assert_eq!(rows.len(), 3);
20793        assert_eq!(
20794            rows[1].values,
20795            vec![Value::Int(2), Value::Text("two".into())]
20796        );
20797    }
20798
20799    #[test]
20800    fn select_star_on_empty_table_returns_zero_rows() {
20801        let mut e = Engine::new();
20802        e.execute("CREATE TABLE foo (a INT)").unwrap();
20803        let r = e.execute("SELECT * FROM foo").unwrap();
20804        match r {
20805            QueryResult::Rows { rows, .. } => assert!(rows.is_empty()),
20806            QueryResult::CommandOk { .. } => panic!("expected Rows"),
20807        }
20808    }
20809
20810    // --- v0.4: WHERE + projection ------------------------------------------
20811
20812    fn make_three_row_users(e: &mut Engine) {
20813        e.execute("CREATE TABLE users (id INT NOT NULL, name TEXT NOT NULL, score INT)")
20814            .unwrap();
20815        e.execute("INSERT INTO users VALUES (1, 'alice', 90)")
20816            .unwrap();
20817        e.execute("INSERT INTO users VALUES (2, 'bob', NULL)")
20818            .unwrap();
20819        e.execute("INSERT INTO users VALUES (3, 'cara', 70)")
20820            .unwrap();
20821    }
20822
20823    fn unwrap_rows(r: QueryResult) -> (Vec<ColumnSchema>, Vec<Row>) {
20824        match r {
20825            QueryResult::Rows { columns, rows } => (columns, rows),
20826            QueryResult::CommandOk { .. } => panic!("expected Rows"),
20827        }
20828    }
20829
20830    #[test]
20831    fn where_filter_passes_only_true_rows() {
20832        let mut e = Engine::new();
20833        make_three_row_users(&mut e);
20834        let r = e.execute("SELECT * FROM users WHERE id > 1").unwrap();
20835        let (_, rows) = unwrap_rows(r);
20836        assert_eq!(rows.len(), 2);
20837        assert_eq!(rows[0].values[0], Value::Int(2));
20838        assert_eq!(rows[1].values[0], Value::Int(3));
20839    }
20840
20841    #[test]
20842    fn where_with_null_result_filters_out_row() {
20843        let mut e = Engine::new();
20844        make_three_row_users(&mut e);
20845        // score is NULL for bob → score > 80 is NULL → row excluded
20846        let r = e.execute("SELECT * FROM users WHERE score > 80").unwrap();
20847        let (_, rows) = unwrap_rows(r);
20848        assert_eq!(rows.len(), 1);
20849        assert_eq!(rows[0].values[1], Value::Text("alice".into()));
20850    }
20851
20852    #[test]
20853    fn projection_named_columns() {
20854        let mut e = Engine::new();
20855        make_three_row_users(&mut e);
20856        let r = e.execute("SELECT name, score FROM users").unwrap();
20857        let (cols, rows) = unwrap_rows(r);
20858        assert_eq!(cols.len(), 2);
20859        assert_eq!(cols[0].name, "name");
20860        assert_eq!(cols[1].name, "score");
20861        assert_eq!(rows.len(), 3);
20862        assert_eq!(
20863            rows[0].values,
20864            vec![Value::Text("alice".into()), Value::Int(90)]
20865        );
20866    }
20867
20868    #[test]
20869    fn projection_with_column_alias() {
20870        let mut e = Engine::new();
20871        make_three_row_users(&mut e);
20872        let r = e
20873            .execute("SELECT name AS who FROM users WHERE id = 1")
20874            .unwrap();
20875        let (cols, rows) = unwrap_rows(r);
20876        assert_eq!(cols[0].name, "who");
20877        assert_eq!(rows.len(), 1);
20878        assert_eq!(rows[0].values[0], Value::Text("alice".into()));
20879    }
20880
20881    #[test]
20882    fn qualified_column_with_table_alias_resolves() {
20883        let mut e = Engine::new();
20884        make_three_row_users(&mut e);
20885        let r = e
20886            .execute("SELECT u.id, u.name FROM users AS u WHERE u.id < 3")
20887            .unwrap();
20888        let (cols, rows) = unwrap_rows(r);
20889        assert_eq!(cols.len(), 2);
20890        assert_eq!(rows.len(), 2);
20891    }
20892
20893    #[test]
20894    fn qualified_column_with_wrong_alias_errors() {
20895        let mut e = Engine::new();
20896        make_three_row_users(&mut e);
20897        let err = e.execute("SELECT x.id FROM users AS u").unwrap_err();
20898        assert!(matches!(
20899            err,
20900            EngineError::Eval(EvalError::UnknownQualifier { ref qualifier }) if qualifier == "x"
20901        ));
20902    }
20903
20904    #[test]
20905    fn select_unknown_column_errors_in_projection() {
20906        let mut e = Engine::new();
20907        make_three_row_users(&mut e);
20908        let err = e.execute("SELECT ghost FROM users").unwrap_err();
20909        assert!(matches!(
20910            err,
20911            EngineError::Eval(EvalError::ColumnNotFound { ref name }) if name == "ghost"
20912        ));
20913    }
20914
20915    #[test]
20916    fn where_unknown_column_errors() {
20917        let mut e = Engine::new();
20918        make_three_row_users(&mut e);
20919        let err = e
20920            .execute("SELECT * FROM users WHERE ghost = 1")
20921            .unwrap_err();
20922        assert!(matches!(
20923            err,
20924            EngineError::Eval(EvalError::ColumnNotFound { .. })
20925        ));
20926    }
20927
20928    #[test]
20929    fn expression_projection_evaluates_and_renders() {
20930        // Compound expressions in the SELECT list are evaluated per row;
20931        // the output column is typed TEXT, name defaults to the expression.
20932        let mut e = Engine::new();
20933        e.execute("CREATE TABLE t (a INT NOT NULL)").unwrap();
20934        e.execute("INSERT INTO t VALUES (3)").unwrap();
20935        let (_, rows) = unwrap_rows(e.execute("SELECT 1 + 2 FROM t").unwrap());
20936        assert_eq!(rows.len(), 1);
20937        // The expression evaluates to integer 3; rendered as the cell value
20938        // (storage::Value::Int(3) since arithmetic kept ints).
20939        assert_eq!(rows[0].values[0], Value::Int(3));
20940    }
20941
20942    #[test]
20943    fn select_unknown_table_errors() {
20944        let mut e = Engine::new();
20945        let err = e.execute("SELECT * FROM ghost").unwrap_err();
20946        assert!(matches!(
20947            err,
20948            EngineError::Storage(StorageError::TableNotFound { .. })
20949        ));
20950    }
20951
20952    #[test]
20953    fn invalid_sql_returns_parse_error() {
20954        // v4.4: UPDATE is now real SQL, so use a true syntactic
20955        // garbage payload for the parse-error path.
20956        let mut e = Engine::new();
20957        let err = e.execute("THIS_IS_NOT_A_KEYWORD foo bar baz").unwrap_err();
20958        assert!(matches!(err, EngineError::Parse(_)));
20959    }
20960
20961    // --- v0.8 CREATE INDEX + index seek ------------------------------------
20962
20963    #[test]
20964    fn create_index_registers_on_table() {
20965        let mut e = Engine::new();
20966        make_three_row_users(&mut e);
20967        e.execute("CREATE INDEX by_name ON users (name)").unwrap();
20968        let t = e.catalog().get("users").unwrap();
20969        assert_eq!(t.indices().len(), 1);
20970        assert_eq!(t.indices()[0].name, "by_name");
20971    }
20972
20973    #[test]
20974    fn create_index_on_unknown_table_errors() {
20975        let mut e = Engine::new();
20976        let err = e.execute("CREATE INDEX i ON ghost (a)").unwrap_err();
20977        assert!(matches!(
20978            err,
20979            EngineError::Storage(StorageError::TableNotFound { .. })
20980        ));
20981    }
20982
20983    #[test]
20984    fn create_index_on_unknown_column_errors() {
20985        let mut e = Engine::new();
20986        make_three_row_users(&mut e);
20987        let err = e.execute("CREATE INDEX i ON users (ghost)").unwrap_err();
20988        assert!(matches!(
20989            err,
20990            EngineError::Storage(StorageError::ColumnNotFound { .. })
20991        ));
20992    }
20993
20994    #[test]
20995    fn select_eq_uses_index_returns_same_rows_as_scan() {
20996        // Build two engines: one with an index, one without. Same query →
20997        // same row set (index is a planner optimisation, not a semantic
20998        // change).
20999        let mut without = Engine::new();
21000        make_three_row_users(&mut without);
21001        let mut with = Engine::new();
21002        make_three_row_users(&mut with);
21003        with.execute("CREATE INDEX by_id ON users (id)").unwrap();
21004
21005        let q = "SELECT * FROM users WHERE id = 2";
21006        let (_, no_idx_rows) = unwrap_rows(without.execute(q).unwrap());
21007        let (_, idx_rows) = unwrap_rows(with.execute(q).unwrap());
21008        assert_eq!(no_idx_rows, idx_rows);
21009        assert_eq!(idx_rows.len(), 1);
21010    }
21011
21012    #[test]
21013    fn select_eq_with_no_matching_index_value_returns_empty() {
21014        let mut e = Engine::new();
21015        make_three_row_users(&mut e);
21016        e.execute("CREATE INDEX by_id ON users (id)").unwrap();
21017        let (_, rows) = unwrap_rows(e.execute("SELECT * FROM users WHERE id = 999").unwrap());
21018        assert_eq!(rows.len(), 0);
21019    }
21020
21021    // --- v0.9 transactions -------------------------------------------------
21022
21023    #[test]
21024    fn begin_sets_in_transaction_flag() {
21025        let mut e = Engine::new();
21026        assert!(!e.in_transaction());
21027        e.execute("BEGIN").unwrap();
21028        assert!(e.in_transaction());
21029    }
21030
21031    #[test]
21032    fn double_begin_errors() {
21033        let mut e = Engine::new();
21034        e.execute("BEGIN").unwrap();
21035        let err = e.execute("BEGIN").unwrap_err();
21036        assert_eq!(err, EngineError::TransactionAlreadyOpen);
21037    }
21038
21039    #[test]
21040    fn commit_without_begin_errors() {
21041        let mut e = Engine::new();
21042        let err = e.execute("COMMIT").unwrap_err();
21043        assert_eq!(err, EngineError::NoActiveTransaction);
21044    }
21045
21046    #[test]
21047    fn rollback_without_begin_errors() {
21048        let mut e = Engine::new();
21049        let err = e.execute("ROLLBACK").unwrap_err();
21050        assert_eq!(err, EngineError::NoActiveTransaction);
21051    }
21052
21053    #[test]
21054    fn commit_applies_shadow_to_committed_catalog() {
21055        let mut e = Engine::new();
21056        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
21057        e.execute("BEGIN").unwrap();
21058        e.execute("INSERT INTO t VALUES (1)").unwrap();
21059        e.execute("INSERT INTO t VALUES (2)").unwrap();
21060        e.execute("COMMIT").unwrap();
21061        assert!(!e.in_transaction());
21062        assert_eq!(e.catalog().get("t").unwrap().row_count(), 2);
21063    }
21064
21065    #[test]
21066    fn rollback_discards_shadow() {
21067        let mut e = Engine::new();
21068        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
21069        e.execute("BEGIN").unwrap();
21070        e.execute("INSERT INTO t VALUES (1)").unwrap();
21071        e.execute("INSERT INTO t VALUES (2)").unwrap();
21072        e.execute("ROLLBACK").unwrap();
21073        assert!(!e.in_transaction());
21074        assert_eq!(e.catalog().get("t").unwrap().row_count(), 0);
21075    }
21076
21077    #[test]
21078    fn select_during_tx_sees_uncommitted_writes_own_session() {
21079        // The shadow catalog is read by SELECTs while a TX is open — the
21080        // session can see its own pending writes.
21081        let mut e = Engine::new();
21082        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
21083        e.execute("BEGIN").unwrap();
21084        e.execute("INSERT INTO t VALUES (42)").unwrap();
21085        let (_, rows) = unwrap_rows(e.execute("SELECT * FROM t").unwrap());
21086        assert_eq!(rows.len(), 1);
21087        assert_eq!(rows[0].values[0], Value::Int(42));
21088    }
21089
21090    #[test]
21091    fn snapshot_with_no_users_is_bare_catalog_format() {
21092        let mut e = Engine::new();
21093        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
21094        let bytes = e.snapshot();
21095        assert_eq!(
21096            &bytes[..8],
21097            b"SPGDB001",
21098            "must be the bare v3.x catalog magic"
21099        );
21100        let e2 = Engine::restore_envelope(&bytes).unwrap();
21101        assert!(e2.users().is_empty());
21102        assert_eq!(e2.catalog().table_count(), 1);
21103    }
21104
21105    #[test]
21106    fn snapshot_with_users_round_trips_both_via_envelope() {
21107        let mut e = Engine::new();
21108        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
21109        e.create_user("alice", "pw1", Role::Admin, [9; 16]).unwrap();
21110        e.create_user("bob", "pw2", Role::ReadOnly, [5; 16])
21111            .unwrap();
21112        let bytes = e.snapshot();
21113        assert_eq!(&bytes[..8], b"SPGENV01", "must be the v4.1 envelope magic");
21114        let e2 = Engine::restore_envelope(&bytes).unwrap();
21115        assert_eq!(e2.users().len(), 2);
21116        assert_eq!(e2.verify_user("alice", "pw1"), Some(Role::Admin));
21117        assert_eq!(e2.verify_user("bob", "pw2"), Some(Role::ReadOnly));
21118        assert_eq!(e2.verify_user("alice", "wrong"), None);
21119        assert_eq!(e2.catalog().table_count(), 1);
21120    }
21121
21122    #[test]
21123    fn ddl_inside_tx_also_rolled_back() {
21124        let mut e = Engine::new();
21125        e.execute("BEGIN").unwrap();
21126        e.execute("CREATE TABLE t (v INT)").unwrap();
21127        // Visible inside the TX.
21128        e.execute("SELECT * FROM t").unwrap();
21129        e.execute("ROLLBACK").unwrap();
21130        // Gone after rollback.
21131        let err = e.execute("SELECT * FROM t").unwrap_err();
21132        assert!(matches!(
21133            err,
21134            EngineError::Storage(StorageError::TableNotFound { .. })
21135        ));
21136    }
21137
21138    // ── v6.1.2: CREATE / DROP PUBLICATION (engine-side) ──────
21139
21140    #[test]
21141    fn create_publication_lands_in_catalog() {
21142        let mut e = Engine::new();
21143        assert!(e.publications().is_empty());
21144        e.execute("CREATE PUBLICATION pub_a").unwrap();
21145        assert_eq!(e.publications().len(), 1);
21146        assert!(e.publications().contains("pub_a"));
21147    }
21148
21149    #[test]
21150    fn create_publication_duplicate_errors() {
21151        let mut e = Engine::new();
21152        e.execute("CREATE PUBLICATION pub_a").unwrap();
21153        let err = e.execute("CREATE PUBLICATION pub_a").unwrap_err();
21154        assert!(
21155            alloc::format!("{err:?}").contains("DuplicateName"),
21156            "got {err:?}"
21157        );
21158    }
21159
21160    #[test]
21161    fn drop_publication_silent_when_absent() {
21162        let mut e = Engine::new();
21163        // PG-compatible: DROP a publication that doesn't exist
21164        // succeeds (no-op) but reports zero affected.
21165        let r = e.execute("DROP PUBLICATION nope").unwrap();
21166        match r {
21167            QueryResult::CommandOk { affected, .. } => assert_eq!(affected, 0),
21168            other => panic!("expected CommandOk, got {other:?}"),
21169        }
21170    }
21171
21172    #[test]
21173    fn drop_publication_present_reports_one_affected() {
21174        let mut e = Engine::new();
21175        e.execute("CREATE PUBLICATION pub_a").unwrap();
21176        let r = e.execute("DROP PUBLICATION pub_a").unwrap();
21177        match r {
21178            QueryResult::CommandOk {
21179                affected,
21180                modified_catalog,
21181            } => {
21182                assert_eq!(affected, 1);
21183                assert!(modified_catalog);
21184            }
21185            other => panic!("expected CommandOk, got {other:?}"),
21186        }
21187        assert!(e.publications().is_empty());
21188    }
21189
21190    #[test]
21191    fn publications_persist_across_snapshot_restore() {
21192        // The persist-across-restart ship-gate at the engine layer —
21193        // snapshot → restore_envelope round trip must preserve the
21194        // publication catalog. The spg-server e2e covers the
21195        // process-restart variant.
21196        let mut e = Engine::new();
21197        e.execute("CREATE PUBLICATION pub_a").unwrap();
21198        e.execute("CREATE PUBLICATION pub_b FOR ALL TABLES")
21199            .unwrap();
21200        let snap = e.snapshot();
21201        let e2 = Engine::restore_envelope(&snap).unwrap();
21202        assert_eq!(e2.publications().len(), 2);
21203        assert!(e2.publications().contains("pub_a"));
21204        assert!(e2.publications().contains("pub_b"));
21205    }
21206
21207    #[test]
21208    fn create_publication_allowed_inside_transaction() {
21209        // v6.1.4 dropped the v6.1.2 in-TX guard — PG allows
21210        // CREATE PUBLICATION inside a TX and the auto-commit
21211        // wrap path needs the same allowance.
21212        let mut e = Engine::new();
21213        e.execute("BEGIN").unwrap();
21214        e.execute("CREATE PUBLICATION pub_a").unwrap();
21215        e.execute("COMMIT").unwrap();
21216        assert!(e.publications().contains("pub_a"));
21217    }
21218
21219    // ── v6.1.3: SHOW PUBLICATIONS + FOR-list variants ───────
21220
21221    #[test]
21222    fn create_publication_for_table_list_lands_with_scope() {
21223        let mut e = Engine::new();
21224        e.execute("CREATE TABLE t1 (id INT NOT NULL)").unwrap();
21225        e.execute("CREATE TABLE t2 (id INT NOT NULL)").unwrap();
21226        e.execute("CREATE PUBLICATION pub_a FOR TABLE t1, t2")
21227            .unwrap();
21228        let scope = e.publications().get("pub_a").cloned();
21229        let Some(spg_sql::ast::PublicationScope::ForTables(ts)) = scope else {
21230            panic!("expected ForTables scope, got {scope:?}")
21231        };
21232        assert_eq!(ts, alloc::vec!["t1".to_string(), "t2".to_string()]);
21233    }
21234
21235    #[test]
21236    fn create_publication_all_tables_except_lands_with_scope() {
21237        let mut e = Engine::new();
21238        e.execute("CREATE PUBLICATION pub_a FOR ALL TABLES EXCEPT t3")
21239            .unwrap();
21240        let scope = e.publications().get("pub_a").cloned();
21241        let Some(spg_sql::ast::PublicationScope::AllTablesExcept(ts)) = scope else {
21242            panic!("expected AllTablesExcept scope, got {scope:?}")
21243        };
21244        assert_eq!(ts, alloc::vec!["t3".to_string()]);
21245    }
21246
21247    #[test]
21248    fn show_publications_empty_returns_zero_rows() {
21249        let e = Engine::new();
21250        let r = e.execute_readonly("SHOW PUBLICATIONS").unwrap();
21251        let QueryResult::Rows { rows, columns } = r else {
21252            panic!()
21253        };
21254        assert!(rows.is_empty());
21255        assert_eq!(columns.len(), 3);
21256        assert_eq!(columns[0].name, "name");
21257        assert_eq!(columns[1].name, "scope");
21258        assert_eq!(columns[2].name, "table_count");
21259    }
21260
21261    #[test]
21262    fn show_publications_returns_one_row_per_publication_ordered_by_name() {
21263        let mut e = Engine::new();
21264        e.execute("CREATE PUBLICATION z_pub").unwrap();
21265        e.execute("CREATE PUBLICATION a_pub FOR TABLE t1, t2")
21266            .unwrap();
21267        e.execute("CREATE PUBLICATION m_pub FOR ALL TABLES EXCEPT bad")
21268            .unwrap();
21269        let r = e.execute_readonly("SHOW PUBLICATIONS").unwrap();
21270        let QueryResult::Rows { rows, .. } = r else {
21271            panic!()
21272        };
21273        assert_eq!(rows.len(), 3);
21274        // Alphabetical order: a_pub, m_pub, z_pub.
21275        let names: Vec<&str> = rows
21276            .iter()
21277            .map(|r| {
21278                if let Value::Text(s) = &r.values[0] {
21279                    s.as_str()
21280                } else {
21281                    panic!()
21282                }
21283            })
21284            .collect();
21285        assert_eq!(names, alloc::vec!["a_pub", "m_pub", "z_pub"]);
21286        // Row 0 — a_pub scope summary + table_count = 2.
21287        match &rows[0].values[1] {
21288            Value::Text(s) => assert_eq!(s, "FOR TABLE t1, t2"),
21289            other => panic!("expected Text, got {other:?}"),
21290        }
21291        assert_eq!(rows[0].values[2], Value::Int(2));
21292        // Row 1 — m_pub.
21293        match &rows[1].values[1] {
21294            Value::Text(s) => assert_eq!(s, "FOR ALL TABLES EXCEPT bad"),
21295            other => panic!("expected Text, got {other:?}"),
21296        }
21297        assert_eq!(rows[1].values[2], Value::Int(1));
21298        // Row 2 — z_pub (AllTables → NULL count).
21299        match &rows[2].values[1] {
21300            Value::Text(s) => assert_eq!(s, "FOR ALL TABLES"),
21301            other => panic!("expected Text, got {other:?}"),
21302        }
21303        assert_eq!(rows[2].values[2], Value::Null);
21304    }
21305
21306    #[test]
21307    fn for_list_scopes_persist_across_snapshot() {
21308        // The v6.1.2 envelope-v3 round-trip exercised AllTables;
21309        // v6.1.3 needs the scope-1 / scope-2 tags to survive too.
21310        let mut e = Engine::new();
21311        e.execute("CREATE PUBLICATION p1 FOR TABLE t1, t2").unwrap();
21312        e.execute("CREATE PUBLICATION p2 FOR ALL TABLES EXCEPT bad, worse")
21313            .unwrap();
21314        let snap = e.snapshot();
21315        let e2 = Engine::restore_envelope(&snap).unwrap();
21316        assert_eq!(e2.publications().len(), 2);
21317        let p1 = e2.publications().get("p1").cloned();
21318        let Some(spg_sql::ast::PublicationScope::ForTables(ts)) = p1 else {
21319            panic!("p1 scope lost: {p1:?}")
21320        };
21321        assert_eq!(ts, alloc::vec!["t1".to_string(), "t2".to_string()]);
21322        let p2 = e2.publications().get("p2").cloned();
21323        let Some(spg_sql::ast::PublicationScope::AllTablesExcept(ts)) = p2 else {
21324            panic!("p2 scope lost: {p2:?}")
21325        };
21326        assert_eq!(ts, alloc::vec!["bad".to_string(), "worse".to_string()]);
21327    }
21328
21329    // ── v6.1.4: CREATE / DROP SUBSCRIPTION + SHOW + envelope v4 ─
21330
21331    #[test]
21332    fn create_subscription_lands_in_catalog_with_defaults() {
21333        let mut e = Engine::new();
21334        e.execute(
21335            "CREATE SUBSCRIPTION sub_a CONNECTION 'host=127.0.0.1 port=20002' PUBLICATION pub_a",
21336        )
21337        .unwrap();
21338        let s = e.subscriptions().get("sub_a").cloned().expect("present");
21339        assert_eq!(s.conn_str, "host=127.0.0.1 port=20002");
21340        assert_eq!(s.publications, alloc::vec!["pub_a".to_string()]);
21341        assert!(s.enabled);
21342        assert_eq!(s.last_received_pos, 0);
21343    }
21344
21345    #[test]
21346    fn create_subscription_duplicate_name_errors() {
21347        let mut e = Engine::new();
21348        e.execute("CREATE SUBSCRIPTION s CONNECTION 'host=x' PUBLICATION p")
21349            .unwrap();
21350        let err = e
21351            .execute("CREATE SUBSCRIPTION s CONNECTION 'host=y' PUBLICATION p")
21352            .unwrap_err();
21353        assert!(
21354            alloc::format!("{err:?}").contains("DuplicateName"),
21355            "got {err:?}"
21356        );
21357    }
21358
21359    #[test]
21360    fn drop_subscription_silent_when_absent() {
21361        let mut e = Engine::new();
21362        let r = e.execute("DROP SUBSCRIPTION never").unwrap();
21363        match r {
21364            QueryResult::CommandOk { affected, .. } => assert_eq!(affected, 0),
21365            other => panic!("expected CommandOk, got {other:?}"),
21366        }
21367    }
21368
21369    #[test]
21370    fn subscription_advance_updates_last_pos_monotone() {
21371        let mut e = Engine::new();
21372        e.execute("CREATE SUBSCRIPTION s CONNECTION 'h=x' PUBLICATION p")
21373            .unwrap();
21374        assert!(e.subscription_advance("s", 100));
21375        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 100);
21376        assert!(e.subscription_advance("s", 50)); // stale → ignored
21377        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 100);
21378        assert!(e.subscription_advance("s", 200));
21379        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 200);
21380        assert!(!e.subscription_advance("missing", 1));
21381    }
21382
21383    #[test]
21384    fn show_subscriptions_returns_rows_ordered_by_name() {
21385        let mut e = Engine::new();
21386        e.execute("CREATE SUBSCRIPTION z_sub CONNECTION 'h=x' PUBLICATION p1, p2")
21387            .unwrap();
21388        e.execute("CREATE SUBSCRIPTION a_sub CONNECTION 'h=y' PUBLICATION p3")
21389            .unwrap();
21390        let r = e.execute_readonly("SHOW SUBSCRIPTIONS").unwrap();
21391        let QueryResult::Rows { rows, columns } = r else {
21392            panic!()
21393        };
21394        assert_eq!(rows.len(), 2);
21395        assert_eq!(columns.len(), 5);
21396        assert_eq!(columns[0].name, "name");
21397        assert_eq!(columns[4].name, "last_received_pos");
21398        // Alphabetical: a_sub, z_sub.
21399        let names: Vec<&str> = rows
21400            .iter()
21401            .map(|r| {
21402                if let Value::Text(s) = &r.values[0] {
21403                    s.as_str()
21404                } else {
21405                    panic!()
21406                }
21407            })
21408            .collect();
21409        assert_eq!(names, alloc::vec!["a_sub", "z_sub"]);
21410        // Row 0: a_sub
21411        assert_eq!(rows[0].values[1], Value::Text("h=y".to_string()));
21412        assert_eq!(rows[0].values[2], Value::Text("p3".to_string()));
21413        assert_eq!(rows[0].values[3], Value::Bool(true));
21414        assert_eq!(rows[0].values[4], Value::BigInt(0));
21415        // Row 1: z_sub — publications join with ", "
21416        assert_eq!(rows[1].values[2], Value::Text("p1, p2".to_string()));
21417    }
21418
21419    #[test]
21420    fn subscriptions_persist_across_snapshot_envelope_v4() {
21421        let mut e = Engine::new();
21422        e.execute("CREATE SUBSCRIPTION s1 CONNECTION 'h=A' PUBLICATION p1, p2")
21423            .unwrap();
21424        e.execute("CREATE SUBSCRIPTION s2 CONNECTION 'h=B' PUBLICATION p3")
21425            .unwrap();
21426        e.subscription_advance("s2", 42);
21427        let snap = e.snapshot();
21428        let e2 = Engine::restore_envelope(&snap).unwrap();
21429        assert_eq!(e2.subscriptions().len(), 2);
21430        let s1 = e2.subscriptions().get("s1").unwrap();
21431        assert_eq!(s1.conn_str, "h=A");
21432        assert_eq!(
21433            s1.publications,
21434            alloc::vec!["p1".to_string(), "p2".to_string()]
21435        );
21436        assert_eq!(s1.last_received_pos, 0);
21437        let s2 = e2.subscriptions().get("s2").unwrap();
21438        assert_eq!(s2.last_received_pos, 42);
21439    }
21440
21441    #[test]
21442    fn v3_envelope_loads_with_empty_subscriptions() {
21443        // v3 snapshot (publications-only). Forge it by hand so we
21444        // verify v6.1.4 readers don't panic — they must surface
21445        // empty subscriptions and a populated publication table.
21446        let mut e = Engine::new();
21447        e.execute("CREATE PUBLICATION pub_legacy").unwrap();
21448        let catalog = e.catalog.serialize();
21449        let users = crate::users::serialize_users(&e.users);
21450        let pubs = e.publications.serialize();
21451        let mut buf = Vec::new();
21452        buf.extend_from_slice(b"SPGENV01");
21453        buf.push(3u8); // v3
21454        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
21455        buf.extend_from_slice(&catalog);
21456        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
21457        buf.extend_from_slice(&users);
21458        buf.extend_from_slice(&u32::try_from(pubs.len()).unwrap().to_le_bytes());
21459        buf.extend_from_slice(&pubs);
21460        let crc = spg_crypto::crc32::crc32(&buf);
21461        buf.extend_from_slice(&crc.to_le_bytes());
21462
21463        let e2 = Engine::restore_envelope(&buf).expect("v3 envelope restores under v4 reader");
21464        assert!(e2.subscriptions().is_empty());
21465        assert!(e2.publications().contains("pub_legacy"));
21466    }
21467
21468    #[test]
21469    fn create_subscription_allowed_inside_transaction() {
21470        let mut e = Engine::new();
21471        e.execute("BEGIN").unwrap();
21472        e.execute("CREATE SUBSCRIPTION s CONNECTION 'h=x' PUBLICATION p")
21473            .unwrap();
21474        e.execute("COMMIT").unwrap();
21475        assert!(e.subscriptions().contains("s"));
21476    }
21477
21478    // ── v6.2.0: ANALYZE + spg_statistic + envelope v5 ──────────
21479    #[test]
21480    fn analyze_populates_histogram_bounds() {
21481        let mut e = Engine::new();
21482        e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT)")
21483            .unwrap();
21484        for i in 0..50 {
21485            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, 'name{i}')"))
21486                .unwrap();
21487        }
21488        e.execute("ANALYZE t").unwrap();
21489        let stats = e.statistics();
21490        let id_stats = stats.get("t", "id").unwrap();
21491        assert!(id_stats.histogram_bounds.len() >= 2);
21492        assert_eq!(id_stats.histogram_bounds.first().unwrap(), "0");
21493        assert_eq!(id_stats.histogram_bounds.last().unwrap(), "49");
21494        assert!((id_stats.null_frac - 0.0).abs() < 1e-6);
21495        assert_eq!(id_stats.n_distinct, 50);
21496    }
21497
21498    #[test]
21499    fn reanalyze_overwrites_prior_stats() {
21500        let mut e = Engine::new();
21501        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
21502        for i in 0..10 {
21503            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
21504                .unwrap();
21505        }
21506        e.execute("ANALYZE t").unwrap();
21507        let n1 = e.statistics().get("t", "id").unwrap().n_distinct;
21508        assert_eq!(n1, 10);
21509        for i in 10..30 {
21510            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
21511                .unwrap();
21512        }
21513        e.execute("ANALYZE t").unwrap();
21514        let n2 = e.statistics().get("t", "id").unwrap().n_distinct;
21515        assert_eq!(n2, 30);
21516    }
21517
21518    #[test]
21519    fn analyze_unknown_table_errors() {
21520        let mut e = Engine::new();
21521        let err = e.execute("ANALYZE nonexistent").unwrap_err();
21522        assert!(matches!(
21523            err,
21524            EngineError::Storage(StorageError::TableNotFound { .. })
21525        ));
21526    }
21527
21528    #[test]
21529    fn bare_analyze_covers_all_user_tables() {
21530        let mut e = Engine::new();
21531        e.execute("CREATE TABLE t1 (id INT NOT NULL)").unwrap();
21532        e.execute("CREATE TABLE t2 (name TEXT NOT NULL)").unwrap();
21533        e.execute("INSERT INTO t1 VALUES (1)").unwrap();
21534        e.execute("INSERT INTO t2 VALUES ('alice')").unwrap();
21535        let r = e.execute("ANALYZE").unwrap();
21536        match r {
21537            QueryResult::CommandOk {
21538                affected,
21539                modified_catalog,
21540            } => {
21541                assert_eq!(affected, 2);
21542                assert!(modified_catalog);
21543            }
21544            other => panic!("expected CommandOk, got {other:?}"),
21545        }
21546        assert!(e.statistics().get("t1", "id").is_some());
21547        assert!(e.statistics().get("t2", "name").is_some());
21548    }
21549
21550    #[test]
21551    fn select_from_spg_statistic_returns_rows_per_column() {
21552        let mut e = Engine::new();
21553        e.execute("CREATE TABLE t (id INT NOT NULL, label TEXT)")
21554            .unwrap();
21555        e.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
21556        e.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
21557        e.execute("ANALYZE t").unwrap();
21558        let r = e.execute_readonly("SELECT * FROM spg_statistic").unwrap();
21559        let QueryResult::Rows { rows, columns } = r else {
21560            panic!()
21561        };
21562        // v6.7.0 — spg_statistic gained a `cold_row_count` column.
21563        assert_eq!(columns.len(), 6);
21564        assert_eq!(columns[0].name, "table_name");
21565        assert_eq!(columns[4].name, "histogram_bounds");
21566        assert_eq!(columns[5].name, "cold_row_count");
21567        assert_eq!(rows.len(), 2, "one row per column of t");
21568        // Sorted by (table_name, column_name).
21569        match (&rows[0].values[0], &rows[0].values[1]) {
21570            (Value::Text(t), Value::Text(c)) => {
21571                assert_eq!(t, "t");
21572                // BTreeMap orders (table, column); columns "id" < "label".
21573                assert_eq!(c, "id");
21574            }
21575            _ => panic!(),
21576        }
21577    }
21578
21579    #[test]
21580    fn analyze_skips_vector_columns() {
21581        // Vector columns have their own stats shape (HNSW graph);
21582        // ANALYZE leaves them out of spg_statistic.
21583        let mut e = Engine::new();
21584        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(3) NOT NULL)")
21585            .unwrap();
21586        e.execute("INSERT INTO t VALUES (1, [1, 2, 3])").unwrap();
21587        e.execute("ANALYZE t").unwrap();
21588        assert!(e.statistics().get("t", "id").is_some());
21589        assert!(e.statistics().get("t", "v").is_none());
21590    }
21591
21592    #[test]
21593    fn statistics_persist_across_envelope_v5_round_trip() {
21594        let mut e = Engine::new();
21595        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
21596        for i in 0..20 {
21597            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
21598                .unwrap();
21599        }
21600        e.execute("ANALYZE").unwrap();
21601        let snap = e.snapshot();
21602        let e2 = Engine::restore_envelope(&snap).unwrap();
21603        let s = e2.statistics().get("t", "id").unwrap();
21604        assert_eq!(s.n_distinct, 20);
21605    }
21606
21607    // ── v6.2.1 auto-analyze threshold ───────────────────────────
21608
21609    #[test]
21610    fn auto_analyze_threshold_fires_after_10pct_of_min_rows_on_small_table() {
21611        // For a table with 0 rows then 10 inserts → modified=10,
21612        // row_count=10. Threshold = 0.1 × max(10, 100) = 10. So
21613        // after the 10th INSERT the threshold is met.
21614        let mut e = Engine::new();
21615        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
21616        for i in 0..9 {
21617            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
21618                .unwrap();
21619        }
21620        assert!(e.tables_needing_analyze().is_empty(), "9 < threshold");
21621        e.execute("INSERT INTO t VALUES (9)").unwrap();
21622        let needs = e.tables_needing_analyze();
21623        assert_eq!(needs, alloc::vec!["t".to_string()]);
21624    }
21625
21626    #[test]
21627    fn auto_analyze_threshold_uses_10pct_of_row_count_for_large_tables() {
21628        // After ANALYZE on 1000 rows, threshold = 0.1 × row_count.
21629        // Each new INSERT bumps both modified and row_count, so to
21630        // trigger from N=1000 we need modifications ≥ 0.1 × (1000+M),
21631        // i.e. M ≥ 112. The test inserts 50 (no fire), then 150
21632        // more (200 total mods, row_count=1200, threshold=120 → fire).
21633        let mut e = Engine::new();
21634        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
21635        for i in 0..1000 {
21636            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
21637                .unwrap();
21638        }
21639        e.execute("ANALYZE t").unwrap();
21640        assert!(e.tables_needing_analyze().is_empty(), "fresh ANALYZE");
21641        for i in 1000..1050 {
21642            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
21643                .unwrap();
21644        }
21645        assert!(
21646            e.tables_needing_analyze().is_empty(),
21647            "50 inserts < threshold of ~105"
21648        );
21649        for i in 1050..1200 {
21650            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
21651                .unwrap();
21652        }
21653        assert_eq!(
21654            e.tables_needing_analyze(),
21655            alloc::vec!["t".to_string()],
21656            "200 inserts > 0.1 × 1200 threshold"
21657        );
21658    }
21659
21660    #[test]
21661    fn auto_analyze_threshold_resets_after_analyze() {
21662        let mut e = Engine::new();
21663        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
21664        for i in 0..200 {
21665            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
21666                .unwrap();
21667        }
21668        assert!(!e.tables_needing_analyze().is_empty());
21669        e.execute("ANALYZE").unwrap();
21670        assert!(
21671            e.tables_needing_analyze().is_empty(),
21672            "ANALYZE must reset the counter"
21673        );
21674    }
21675
21676    #[test]
21677    fn auto_analyze_threshold_tracks_updates_and_deletes() {
21678        let mut e = Engine::new();
21679        e.execute("CREATE TABLE t (id INT NOT NULL, label TEXT)")
21680            .unwrap();
21681        for i in 0..50 {
21682            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, 'x')"))
21683                .unwrap();
21684        }
21685        e.execute("ANALYZE t").unwrap();
21686        // UPDATE 20 rows + DELETE 5 → modified=25. Threshold = 0.1
21687        // × max(50, 100) = 10. So 25 >= 10 → trigger.
21688        e.execute("UPDATE t SET label = 'y' WHERE id < 20").unwrap();
21689        e.execute("DELETE FROM t WHERE id >= 45").unwrap();
21690        assert_eq!(e.tables_needing_analyze(), alloc::vec!["t".to_string()]);
21691    }
21692
21693    #[test]
21694    fn v4_envelope_loads_with_empty_statistics() {
21695        // Forge a v4 envelope by hand: catalog + users + pubs +
21696        // subs trailer, no statistics. A v6.2.0 reader must accept
21697        // it and surface an empty Statistics.
21698        let mut e = Engine::new();
21699        e.create_user("alice", "secret", crate::users::Role::ReadOnly, [0u8; 16])
21700            .unwrap();
21701        let catalog = e.catalog.serialize();
21702        let users = crate::users::serialize_users(&e.users);
21703        let pubs = e.publications.serialize();
21704        let subs = e.subscriptions.serialize();
21705        let mut buf = Vec::new();
21706        buf.extend_from_slice(b"SPGENV01");
21707        buf.push(4u8);
21708        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
21709        buf.extend_from_slice(&catalog);
21710        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
21711        buf.extend_from_slice(&users);
21712        buf.extend_from_slice(&u32::try_from(pubs.len()).unwrap().to_le_bytes());
21713        buf.extend_from_slice(&pubs);
21714        buf.extend_from_slice(&u32::try_from(subs.len()).unwrap().to_le_bytes());
21715        buf.extend_from_slice(&subs);
21716        let crc = spg_crypto::crc32::crc32(&buf);
21717        buf.extend_from_slice(&crc.to_le_bytes());
21718        let e2 = Engine::restore_envelope(&buf).expect("v4 envelope restores");
21719        assert!(e2.statistics().is_empty());
21720    }
21721
21722    #[test]
21723    fn v1_v2_envelope_loads_with_empty_publications() {
21724        // A snapshot taken before v6.1.2 (no publication trailer,
21725        // envelope v2) must still deserialise — and the resulting
21726        // engine must report zero publications. Use the engine's own
21727        // round-trip with no publications: that emits v3 but with an
21728        // empty pubs block. Then forge a v2 envelope by hand to lock
21729        // the back-compat path.
21730        let mut e = Engine::new();
21731        // Force users to be non-empty so the snapshot takes the
21732        // envelope path rather than the bare-catalog fallback.
21733        e.create_user("alice", "secret", crate::users::Role::ReadOnly, [0u8; 16])
21734            .unwrap();
21735
21736        // Forge an envelope v2: same shape as v3 but no pubs trailer.
21737        let catalog = e.catalog.serialize();
21738        let users = crate::users::serialize_users(&e.users);
21739        let mut buf = Vec::new();
21740        buf.extend_from_slice(b"SPGENV01");
21741        buf.push(2u8); // v2
21742        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
21743        buf.extend_from_slice(&catalog);
21744        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
21745        buf.extend_from_slice(&users);
21746        let crc = spg_crypto::crc32::crc32(&buf);
21747        buf.extend_from_slice(&crc.to_le_bytes());
21748
21749        let e2 = Engine::restore_envelope(&buf).expect("v2 envelope restores");
21750        assert!(e2.publications().is_empty());
21751    }
21752}