Skip to main content

spg_engine/
lib.rs

1//! SPG execution engine — v0.3 wires the SQL front-end to the in-memory
2//! storage layer. Implements `CREATE TABLE`, single-row `INSERT VALUES`, and
3//! `SELECT * FROM <table>` (no WHERE yet — that lands in v0.4 alongside
4//! expression evaluation against rows).
5#![no_std]
6
7extern crate alloc;
8
9pub mod aggregate;
10pub mod copy;
11pub mod describe;
12pub mod eval;
13pub mod fts;
14pub mod json;
15pub mod memoize;
16pub mod plan_cache;
17pub mod publications;
18pub mod query_stats;
19pub mod reorder;
20pub mod selectivity;
21pub mod statistics;
22pub mod subscriptions;
23pub mod triggers;
24pub mod users;
25
26pub use crate::users::{Role, ScramSecrets, UserError, UserStore};
27
28use alloc::borrow::Cow;
29use alloc::boxed::Box;
30use alloc::collections::BTreeMap;
31use alloc::string::{String, ToString};
32use alloc::vec::Vec;
33use core::fmt;
34
35use spg_sql::ast::{
36    BinOp, ColumnDef, ColumnName, ColumnTypeName, CreateIndexStatement, CreatePublicationStatement,
37    CreateSubscriptionStatement, CreateTableStatement, CreateUserStatement, Expr, FrameBound,
38    FrameKind, FromClause, IndexMethod, InsertStatement, JoinKind, Literal, OrderBy, SelectItem,
39    SelectStatement, Statement, TableRef, UnOp, UnionKind, VecEncoding as SqlVecEncoding,
40    WindowFrame,
41};
42// v7.16.0 — re-export the parsed-statement AST so downstream
43// crates (spg-embedded → spg-sqlx) don't need a direct dep on
44// spg-sql for the prepare/bind handle.
45pub use spg_sql::ast::Statement as ParsedStatement;
46use spg_sql::parser::{self, ParseError};
47use spg_storage::{
48    Catalog, ColumnSchema, CompactReport, DataType, IndexKey, IndexKind, Row, StorageError, Table,
49    TableSchema, Value, VecEncoding,
50};
51
52use crate::eval::{EvalContext, EvalError};
53
54/// Result of executing one statement.
55#[derive(Debug, Clone, PartialEq)]
56#[non_exhaustive]
57pub enum QueryResult {
58    /// DDL or DML succeeded.
59    ///
60    /// `affected` is the row count for `INSERT` and 0 elsewhere.
61    /// `modified_catalog` tells the server whether this statement
62    /// caused the *committed* catalog to change — it's the signal to
63    /// snapshot/audit. False for `BEGIN`/`ROLLBACK`, false for writeful
64    /// statements executed inside a transaction (those only touch the
65    /// shadow), and true for `COMMIT` and for writes outside a TX.
66    CommandOk {
67        affected: usize,
68        modified_catalog: bool,
69    },
70    /// `SELECT` returned a (possibly empty) row set.
71    Rows {
72        columns: Vec<ColumnSchema>,
73        rows: Vec<Row>,
74    },
75}
76
77/// All errors the engine can return.
78///
79/// Marked `#[non_exhaustive]` from v7.5.0 onward: external `match`
80/// must include a `_` arm so new variants in subsequent v7.x releases
81/// are not breaking changes.
82#[derive(Debug, Clone, PartialEq)]
83#[non_exhaustive]
84pub enum EngineError {
85    Parse(ParseError),
86    Storage(StorageError),
87    Eval(EvalError),
88    /// Front-end accepted a construct that the v0.x executor doesn't support.
89    Unsupported(String),
90    /// `BEGIN` while another transaction is already open.
91    TransactionAlreadyOpen,
92    /// `COMMIT` / `ROLLBACK` with no active transaction.
93    NoActiveTransaction,
94    /// v4.0 sentinel: `execute_readonly` got a statement that
95    /// mutates engine state (INSERT / CREATE / BEGIN / COMMIT / …).
96    /// The caller should retake the write lock and dispatch through
97    /// `execute(&mut self)` instead.
98    WriteRequired,
99    /// v4.2: a SELECT would have returned more rows than the
100    /// configured `max_query_rows` cap. Carries the cap.
101    RowLimitExceeded(usize),
102    /// v7.30.3 (mailrs round-26): a SELECT's join/filter
103    /// materialisation would have held more (approximate) heap
104    /// bytes than the configured `max_query_bytes` cap. The row
105    /// cap above counts rows; this counts bytes, because one row
106    /// can be a multi-MB mail body — 1000 fat rows pressure the
107    /// host long before any row ceiling trips. Carries the cap.
108    QueryBytesExceeded(usize),
109    /// v4.5: cooperative cancellation — the host (server's
110    /// per-query watchdog) set the cancel flag while a long-running
111    /// SELECT / UPDATE / DELETE was scanning rows. The partial work
112    /// is discarded; the caller should surface this as a timeout
113    /// to the client.
114    Cancelled,
115}
116
117impl fmt::Display for EngineError {
118    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
119        match self {
120            Self::Parse(e) => write!(f, "parse: {e}"),
121            Self::Storage(e) => write!(f, "storage: {e}"),
122            Self::Eval(e) => write!(f, "eval: {e}"),
123            Self::Unsupported(s) => write!(f, "unsupported: {s}"),
124            Self::TransactionAlreadyOpen => f.write_str("a transaction is already open"),
125            Self::NoActiveTransaction => f.write_str("no active transaction"),
126            Self::WriteRequired => {
127                f.write_str("statement requires a write lock (use execute, not execute_readonly)")
128            }
129            Self::RowLimitExceeded(n) => {
130                write!(f, "query exceeded max_query_rows={n}")
131            }
132            Self::QueryBytesExceeded(n) => {
133                write!(
134                    f,
135                    "query materialisation exceeded max_query_bytes={n} (set SPG_MAX_QUERY_BYTES to raise, 0 to disable)"
136                )
137            }
138            Self::Cancelled => f.write_str("query cancelled (timeout or client request)"),
139        }
140    }
141}
142
143impl From<ParseError> for EngineError {
144    fn from(e: ParseError) -> Self {
145        Self::Parse(e)
146    }
147}
148impl From<StorageError> for EngineError {
149    fn from(e: StorageError) -> Self {
150        Self::Storage(e)
151    }
152}
153impl From<EvalError> for EngineError {
154    fn from(e: EvalError) -> Self {
155        Self::Eval(e)
156    }
157}
158
159/// The execution engine. Holds the catalog and (later) other server-scope
160/// state. `Engine::new()` is intentionally cheap so callers can construct one
161/// per database, per test.
162/// Function pointer that returns "now" as microseconds since Unix
163/// epoch. The engine is `no_std`, so it can't reach for `std::time`
164/// itself — callers (`spg-server`, the sqllogictest runner) inject a
165/// concrete implementation. `None` means `NOW()` / `CURRENT_*` raise
166/// `Unsupported`.
167pub type ClockFn = fn() -> i64;
168
169/// Function pointer that produces 16 cryptographically random bytes.
170/// Like `ClockFn`, the engine is `no_std` and can't reach for /dev/urandom
171/// itself — host (`spg-server`) injects an OS-backed source. `None`
172/// means SQL-driven `CREATE USER` falls back to a deterministic salt
173/// derived from the username (acceptable in tests; the server always
174/// installs a real RNG so production paths never see this).
175pub type SaltFn = fn() -> [u8; 16];
176
177/// v4.5 cooperative cancellation token. A long-running SELECT /
178/// UPDATE / DELETE checks `is_cancelled` at row-loop checkpoints
179/// and bails with `EngineError::Cancelled`. The host
180/// (`spg-server`) creates an `AtomicBool` per query, spawns a
181/// watchdog thread that sets it after `SPG_QUERY_TIMEOUT_MS`,
182/// and passes it via `execute_with_cancel` / `execute_readonly_with_cancel`.
183///
184/// `CancelToken::none()` is a no-op — used by the legacy `execute`
185/// and `execute_readonly` entry points so existing callers don't
186/// change.
187/// v7.17.0 Phase 2.3 — monotonic time source for deadline-aware
188/// cancellation (PG `statement_timeout`). Returns microseconds
189/// since some host-stable monotonic origin (typically the first
190/// call into `Instant::now()` on the server). The engine never
191/// calls `Instant::now()` directly so the crate stays `#![no_std]`.
192pub type MonotonicNowFn = fn() -> u64;
193
194#[derive(Debug, Clone, Copy)]
195struct Deadline {
196    now_fn: MonotonicNowFn,
197    /// Absolute deadline in `now_fn()` units (microseconds).
198    deadline_us: u64,
199}
200
201#[derive(Debug, Clone, Copy)]
202pub struct CancelToken<'a> {
203    flag: Option<&'a core::sync::atomic::AtomicBool>,
204    // v7.17.0 Phase 2.3 — when set, every existing `cancel.check()`
205    // checkpoint also fires `EngineError::Cancelled` once
206    // `(now_fn)() >= deadline_us`. No new check sites, no thread
207    // spawn per query — the monotonic now-fn read is a vDSO
208    // `clock_gettime(CLOCK_MONOTONIC)` (~20ns) and only runs when
209    // the host actually wired a deadline (statement_timeout > 0).
210    deadline: Option<Deadline>,
211}
212
213impl<'a> CancelToken<'a> {
214    #[must_use]
215    pub const fn none() -> Self {
216        Self {
217            flag: None,
218            deadline: None,
219        }
220    }
221
222    #[must_use]
223    pub const fn from_flag(f: &'a core::sync::atomic::AtomicBool) -> Self {
224        Self {
225            flag: Some(f),
226            deadline: None,
227        }
228    }
229
230    /// v7.17.0 Phase 2.3 — attach a monotonic deadline. `now_fn`
231    /// must return microseconds since a stable origin; the token
232    /// trips when `now_fn() >= deadline_us`. Compose with
233    /// `from_flag(...)` when both a watchdog flag and a per-statement
234    /// timeout are in play (e.g. server-wide `SPG_QUERY_TIMEOUT_MS`
235    /// plus session `statement_timeout`); the tighter of the two
236    /// wins by virtue of either signaling first.
237    #[must_use]
238    pub const fn with_deadline(mut self, now_fn: MonotonicNowFn, deadline_us: u64) -> Self {
239        self.deadline = Some(Deadline {
240            now_fn,
241            deadline_us,
242        });
243        self
244    }
245
246    #[must_use]
247    pub fn is_cancelled(self) -> bool {
248        if self
249            .flag
250            .is_some_and(|f| f.load(core::sync::atomic::Ordering::Relaxed))
251        {
252            return true;
253        }
254        // Deadline check is the second branch so the "no timeout"
255        // hot path (`deadline: None`) elides the now-fn call —
256        // predicted-not-taken on the SLO INSERT loop.
257        if let Some(d) = self.deadline
258            && (d.now_fn)() >= d.deadline_us
259        {
260            return true;
261        }
262        false
263    }
264
265    /// Returns `Err(Cancelled)` if the token has been tripped.
266    /// Used at row-loop checkpoints to bail cooperatively without
267    /// scattering raw `is_cancelled` checks across the executor.
268    #[inline]
269    pub fn check(self) -> Result<(), EngineError> {
270        if self.is_cancelled() {
271            Err(EngineError::Cancelled)
272        } else {
273            Ok(())
274        }
275    }
276}
277
278// ---- snapshot envelope (v4.1, extended with CRC32 in v4.37,  ----
279// ----   publications in v6.1.2 v3, subscriptions in v6.1.4 v4) ----
280//
281// Wraps a catalog blob + a user blob behind a small header so the
282// server can persist both atomically without inventing a new file.
283// Bare catalog blobs (v3.x) still load via `restore_envelope` since
284// the magic check fails fast and the function falls back to
285// `Catalog::deserialize`.
286//
287// Layout — v1 (v4.1, no CRC):
288//   [8 bytes magic "SPGENV01"]
289//   [u8 version = 1]
290//   [u32 catalog_len][catalog bytes]
291//   [u32 users_len][users bytes]
292//
293// Layout — v2 (v4.37, CRC32 of body):
294//   [8 bytes magic "SPGENV01"]
295//   [u8 version = 2]
296//   [u32 catalog_len][catalog bytes]
297//   [u32 users_len][users bytes]
298//   [u32 crc32]                      ← CRC32 of every byte before it.
299//
300// Layout — v3 (v6.1.2, publications trailer):
301//   [8 bytes magic "SPGENV01"]
302//   [u8 version = 3]
303//   [u32 catalog_len][catalog bytes]
304//   [u32 users_len][users bytes]
305//   [u32 pubs_len][publications bytes]
306//   [u32 crc32]
307//
308// Layout — v4 (v6.1.4, subscriptions trailer):
309//   [8 bytes magic "SPGENV01"]
310//   [u8 version = 4]
311//   [u32 catalog_len][catalog bytes]
312//   [u32 users_len][users bytes]
313//   [u32 pubs_len][publications bytes]
314//   [u32 subs_len][subscriptions bytes]
315//   [u32 crc32]
316//
317// Layout — v5 (v6.2.0, statistics trailer):
318//   [8 bytes magic "SPGENV01"]
319//   [u8 version = 5]
320//   [u32 catalog_len][catalog bytes]
321//   [u32 users_len][users bytes]
322//   [u32 pubs_len][publications bytes]
323//   [u32 subs_len][subscriptions bytes]
324//   [u32 stats_len][statistics bytes]      ← NEW
325//   [u32 crc32]
326//
327// Writers emit v5 from v6.2.0 on. Readers accept all of {v1, v2,
328// v3, v4, v5}: v1/v2 load with empty publications / subscriptions /
329// statistics; v3 loads with empty subscriptions + statistics; v4
330// loads with empty statistics; v5 deserialises all three. Older
331// SPG versions reading a v5 envelope fall through the version
332// match to `EnvelopeParse::Bare` — pre-v6.2.0 binaries cannot
333// open v6.2.0+ snapshots (matches the v6.1.2 / v6.1.4 breaks).
334
335const ENVELOPE_MAGIC: &[u8; 8] = b"SPGENV01";
336const ENVELOPE_VERSION_V1: u8 = 1;
337const ENVELOPE_VERSION_V2: u8 = 2;
338const ENVELOPE_VERSION_V3: u8 = 3;
339const ENVELOPE_VERSION_V4: u8 = 4;
340const ENVELOPE_VERSION_V5: u8 = 5;
341
342fn build_envelope(catalog: &[u8], users: &[u8], pubs: &[u8], subs: &[u8], stats: &[u8]) -> Vec<u8> {
343    let mut out = Vec::with_capacity(
344        8 + 1
345            + 4
346            + catalog.len()
347            + 4
348            + users.len()
349            + 4
350            + pubs.len()
351            + 4
352            + subs.len()
353            + 4
354            + stats.len()
355            + 4,
356    );
357    out.extend_from_slice(ENVELOPE_MAGIC);
358    out.push(ENVELOPE_VERSION_V5);
359    out.extend_from_slice(
360        &u32::try_from(catalog.len())
361            .expect("≤ 4G catalog")
362            .to_le_bytes(),
363    );
364    out.extend_from_slice(catalog);
365    out.extend_from_slice(
366        &u32::try_from(users.len())
367            .expect("≤ 4G users")
368            .to_le_bytes(),
369    );
370    out.extend_from_slice(users);
371    out.extend_from_slice(
372        &u32::try_from(pubs.len())
373            .expect("≤ 4G publications")
374            .to_le_bytes(),
375    );
376    out.extend_from_slice(pubs);
377    out.extend_from_slice(
378        &u32::try_from(subs.len())
379            .expect("≤ 4G subscriptions")
380            .to_le_bytes(),
381    );
382    out.extend_from_slice(subs);
383    out.extend_from_slice(
384        &u32::try_from(stats.len())
385            .expect("≤ 4G statistics")
386            .to_le_bytes(),
387    );
388    out.extend_from_slice(stats);
389    let crc = spg_crypto::crc32::crc32(&out);
390    out.extend_from_slice(&crc.to_le_bytes());
391    out
392}
393
394/// Outcome of envelope parsing: either bare-catalog fallback, a
395/// successfully split section trio from a v1/v2/v3 envelope, or an
396/// explicit corruption error from a v2/v3 CRC mismatch. `Bare`
397/// (catalog-only fallback) preserves v3.x readability. v1/v2
398/// envelopes set `publications` to `None`; v3 sets it to the
399/// publications byte slice.
400enum EnvelopeParse<'a> {
401    Bare,
402    Pair {
403        catalog: &'a [u8],
404        users: &'a [u8],
405        publications: Option<&'a [u8]>,
406        subscriptions: Option<&'a [u8]>,
407        statistics: Option<&'a [u8]>,
408    },
409    CrcMismatch {
410        expected: u32,
411        computed: u32,
412    },
413}
414
415/// Returns `EnvelopeParse::Pair` for a valid v1 / v2 / v3 envelope,
416/// `Bare` for a buffer that doesn't look like an envelope (v3.x
417/// bare catalog fallback), and `CrcMismatch` for a v2/v3 envelope
418/// whose trailing CRC32 doesn't match the body.
419fn split_envelope(buf: &[u8]) -> EnvelopeParse<'_> {
420    if buf.len() < 8 + 1 + 4 || &buf[..8] != ENVELOPE_MAGIC {
421        return EnvelopeParse::Bare;
422    }
423    let version = buf[8];
424    if !matches!(
425        version,
426        ENVELOPE_VERSION_V1
427            | ENVELOPE_VERSION_V2
428            | ENVELOPE_VERSION_V3
429            | ENVELOPE_VERSION_V4
430            | ENVELOPE_VERSION_V5
431    ) {
432        return EnvelopeParse::Bare;
433    }
434    let mut p = 9usize;
435    let Some(cat_len_bytes) = buf.get(p..p + 4) else {
436        return EnvelopeParse::Bare;
437    };
438    let Ok(cat_len_arr) = cat_len_bytes.try_into() else {
439        return EnvelopeParse::Bare;
440    };
441    let cat_len = u32::from_le_bytes(cat_len_arr) as usize;
442    p += 4;
443    if p + cat_len + 4 > buf.len() {
444        return EnvelopeParse::Bare;
445    }
446    let catalog = &buf[p..p + cat_len];
447    p += cat_len;
448    let Some(user_len_bytes) = buf.get(p..p + 4) else {
449        return EnvelopeParse::Bare;
450    };
451    let Ok(user_len_arr) = user_len_bytes.try_into() else {
452        return EnvelopeParse::Bare;
453    };
454    let user_len = u32::from_le_bytes(user_len_arr) as usize;
455    p += 4;
456    if p + user_len > buf.len() {
457        return EnvelopeParse::Bare;
458    }
459    let users = &buf[p..p + user_len];
460    p += user_len;
461    let publications = if matches!(
462        version,
463        ENVELOPE_VERSION_V3 | ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5
464    ) {
465        // [u32 pubs_len][publications bytes]
466        let Some(pubs_len_bytes) = buf.get(p..p + 4) else {
467            return EnvelopeParse::Bare;
468        };
469        let Ok(pubs_len_arr) = pubs_len_bytes.try_into() else {
470            return EnvelopeParse::Bare;
471        };
472        let pubs_len = u32::from_le_bytes(pubs_len_arr) as usize;
473        p += 4;
474        if p + pubs_len > buf.len() {
475            return EnvelopeParse::Bare;
476        }
477        let pubs_slice = &buf[p..p + pubs_len];
478        p += pubs_len;
479        Some(pubs_slice)
480    } else {
481        None
482    };
483    let subscriptions = if matches!(version, ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5) {
484        // [u32 subs_len][subscriptions bytes]
485        let Some(subs_len_bytes) = buf.get(p..p + 4) else {
486            return EnvelopeParse::Bare;
487        };
488        let Ok(subs_len_arr) = subs_len_bytes.try_into() else {
489            return EnvelopeParse::Bare;
490        };
491        let subs_len = u32::from_le_bytes(subs_len_arr) as usize;
492        p += 4;
493        if p + subs_len > buf.len() {
494            return EnvelopeParse::Bare;
495        }
496        let subs_slice = &buf[p..p + subs_len];
497        p += subs_len;
498        Some(subs_slice)
499    } else {
500        None
501    };
502    let statistics = if version == ENVELOPE_VERSION_V5 {
503        // [u32 stats_len][statistics bytes]
504        let Some(stats_len_bytes) = buf.get(p..p + 4) else {
505            return EnvelopeParse::Bare;
506        };
507        let Ok(stats_len_arr) = stats_len_bytes.try_into() else {
508            return EnvelopeParse::Bare;
509        };
510        let stats_len = u32::from_le_bytes(stats_len_arr) as usize;
511        p += 4;
512        if p + stats_len > buf.len() {
513            return EnvelopeParse::Bare;
514        }
515        let stats_slice = &buf[p..p + stats_len];
516        p += stats_len;
517        Some(stats_slice)
518    } else {
519        None
520    };
521    if matches!(
522        version,
523        ENVELOPE_VERSION_V2 | ENVELOPE_VERSION_V3 | ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5
524    ) {
525        if p + 4 != buf.len() {
526            return EnvelopeParse::Bare;
527        }
528        let Ok(crc_arr) = buf[p..p + 4].try_into() else {
529            return EnvelopeParse::Bare;
530        };
531        let expected = u32::from_le_bytes(crc_arr);
532        let computed = spg_crypto::crc32::crc32(&buf[..p]);
533        if expected != computed {
534            return EnvelopeParse::CrcMismatch { expected, computed };
535        }
536    } else if p != buf.len() {
537        // v1: must end exactly at the users section.
538        return EnvelopeParse::Bare;
539    }
540    EnvelopeParse::Pair {
541        catalog,
542        users,
543        publications,
544        subscriptions,
545        statistics,
546    }
547}
548
549/// v4.41.1 opaque transaction handle. Returned by `Engine::alloc_tx_id`,
550/// threaded through `Engine::execute_in` so dispatch can identify which
551/// in-flight TX a statement belongs to. `IMPLICIT_TX` is the reserved
552/// slot every legacy caller — engine self-tests, spg-cli, spg-embedded,
553/// startup replay — implicitly uses through the unchanged
554/// `Engine::execute(sql)` API. v4.41.1 keeps at most one active slot at
555/// runtime (dispatch holds `engine.write()` across the wrap, same as
556/// v4.34); the map shape is here to let v4.42 turn on N in-flight
557/// implicit TXs without reshuffling the engine internals.
558#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
559pub struct TxId(pub u64);
560
561/// Reserved slot used by `Engine::execute(sql)` — the legacy single-
562/// global-shadow path. New `alloc_tx_id` handles start at 1.
563pub const IMPLICIT_TX: TxId = TxId(0);
564
565/// v6.7.3 — default segment-size threshold used by `COMPACT COLD
566/// SEGMENTS` when no explicit target is supplied. Segments whose
567/// `OwnedSegment::bytes().len()` is **strictly** less than this
568/// value are eligible to merge. spg-server reads
569/// `SPG_COMPACTION_TARGET_SEGMENT_BYTES` to override.
570pub const COMPACTION_TARGET_DEFAULT_BYTES: u64 = 4 * 1024 * 1024;
571
572/// Per-slot transaction state. Held inside `tx_catalogs[tx_id]` for the
573/// lifetime of a BEGIN..COMMIT (or BEGIN..ROLLBACK) window. Drops when
574/// the TX commits (its `catalog` is moved over `Engine.catalog`) or
575/// rolls back (slot removed, catalog discarded).
576#[derive(Debug, Default, Clone)]
577struct TxState {
578    /// The TX's shadow copy of the catalog. Started as a clone of
579    /// `Engine.catalog` at BEGIN time; writes flow into it; COMMIT
580    /// installs it over `Engine.catalog`. `Catalog::clone()` is O(1)
581    /// since v4.40 (`PersistentVec` rows + `PersistentBTreeMap` indices).
582    catalog: Catalog,
583    /// Per-TX savepoint stack. Each entry pairs the savepoint name with
584    /// a clone of `catalog` at the moment `SAVEPOINT <name>` fired.
585    /// `ROLLBACK TO <name>` restores from the entry and pops everything
586    /// after it; `RELEASE <name>` discards the entry and everything
587    /// after; COMMIT/ROLLBACK clears the whole stack.
588    savepoints: Vec<(String, Catalog)>,
589}
590
591/// v7.11.0 — frozen read-only view of the engine's committed state.
592/// Constructed via [`Engine::clone_snapshot`]. Holds clones of the
593/// catalog, statistics, clock function, and row-cap config — the
594/// four fields the `execute_readonly` path actually reads. Cheap to
595/// `Clone` (each clone shares the underlying `PersistentVec` row
596/// storage; only the trie root pointers copy). Send + Sync so a
597/// snapshot can be moved across `tokio::task::spawn_blocking`
598/// boundaries without coordination.
599///
600/// The contract: a snapshot reflects the engine's state at the
601/// moment `clone_snapshot()` returned. Subsequent writes to the
602/// engine are NOT visible. Callers who need fresher data take a
603/// new snapshot.
604#[derive(Debug, Clone)]
605pub struct CatalogSnapshot {
606    catalog: Catalog,
607    statistics: statistics::Statistics,
608    clock: Option<ClockFn>,
609    max_query_rows: Option<usize>,
610}
611
612#[derive(Debug, Default)]
613pub struct Engine {
614    /// Committed catalog — what survives `Engine::snapshot()` and what
615    /// outside-TX `SELECT`s read.
616    catalog: Catalog,
617    /// Active TX slots, keyed by `TxId`. Empty when no TX is in flight.
618    /// v4.41.1 runtime invariant: at most one entry (single-writer
619    /// model unchanged). v4.42 will let dispatch hold multiple entries
620    /// concurrently for group commit + engine MVCC.
621    tx_catalogs: BTreeMap<TxId, TxState>,
622    /// Which slot the next exec_* call should mutate. Set by
623    /// `execute_in(sql, tx_id)` at the entry point; legacy `execute(sql)`
624    /// sets it to `IMPLICIT_TX`. None when no TX is in flight (read /
625    /// write goes straight against `catalog`).
626    current_tx: Option<TxId>,
627    /// Monotonic counter for `alloc_tx_id`. Starts at 1 — slot 0 is
628    /// reserved for `IMPLICIT_TX`.
629    next_tx_id: u64,
630    /// v7.22 (round-13 T3) — session string-literal dialect. `false`
631    /// (default) = PG semantics (backslash literal, `''` escape);
632    /// `true` = MySQL semantics (`\'` etc.). Flipped by the
633    /// deterministic session signals each dump emits: `SET sql_mode`
634    /// (only MySQL clients/dumps send it) turns it on,
635    /// `SET standard_conforming_strings = on` (every pg_dump
636    /// preamble) turns it off. The plan cache is cleared on every
637    /// flip — the same SQL text lexes differently per dialect.
638    backslash_escapes: bool,
639    /// Optional wall clock used to satisfy `NOW()` / `CURRENT_TIMESTAMP`
640    /// / `CURRENT_DATE`. Set by the host environment.
641    clock: Option<ClockFn>,
642    /// v4.1 cryptographic RNG for per-user password salt. Set by the
643    /// host. `None` means SQL-driven `CREATE USER` uses a
644    /// deterministic fallback — see `SaltFn`.
645    salt_fn: Option<SaltFn>,
646    /// v4.2 per-query row cap. `None` = unlimited. When set, a
647    /// SELECT that materialises more than `n` rows returns
648    /// `EngineError::RowLimitExceeded`. Enforced before the result
649    /// is shaped into wire frames so a runaway scan can't blow the
650    /// server's heap.
651    max_query_rows: Option<usize>,
652    /// v7.30.3 (mailrs round-26) per-query byte cap on join/filter
653    /// materialisation. `None` = unlimited. Approximate net
654    /// accounting (Value heap payloads + per-cell enum overhead)
655    /// charged at every point the join pipeline clones rows;
656    /// crossing the cap raises `EngineError::QueryBytesExceeded`
657    /// instead of pressuring the host into reclaim livelock. The
658    /// host wires this to `SPG_MAX_QUERY_BYTES` (embed defaults it
659    /// ON; the server keeps its allocator-precise budget as the
660    /// outer layer).
661    max_query_bytes: Option<usize>,
662    /// v4.1 RBAC user table. Empty means "no RBAC configured yet" —
663    /// the server decides what that means at the auth boundary
664    /// (open mode vs legacy single-password mode). User CRUD goes
665    /// through `create_user`/`drop_user`/`verify_user`; persistence
666    /// rides the snapshot envelope alongside the catalog.
667    users: UserStore,
668    /// v6.1.2 logical-replication publication catalog. Empty until
669    /// `CREATE PUBLICATION` runs. Persistence rides the v3 envelope
670    /// trailer (see `build_envelope`).
671    publications: publications::Publications,
672    /// v6.1.4 logical-replication subscription catalog. Empty until
673    /// `CREATE SUBSCRIPTION` runs. Persistence rides the v4 envelope
674    /// trailer.
675    subscriptions: subscriptions::Subscriptions,
676    /// v6.2.0 — per-column statistics for the cost-based optimizer.
677    /// Populated by `ANALYZE`; queried via `spg_statistic` virtual
678    /// table. Persistence rides the v5 envelope trailer.
679    statistics: statistics::Statistics,
680    /// v6.3.0 — engine-level plan cache. Caches the post-`prepare()`
681    /// `Statement` keyed on SQL text. In-memory only — does NOT ride
682    /// the snapshot envelope (rebuilt on demand after restart).
683    plan_cache: plan_cache::PlanCache,
684    /// v6.5.1 — per-distinct-SQL execution stats. In-memory only,
685    /// surfaced via `spg_stat_query` virtual table. Updated by the
686    /// `execute_*` paths after a successful execute.
687    query_stats: query_stats::QueryStats,
688    /// v6.5.2 — connection-state provider callback. spg-server
689    /// registers a function at startup that snapshots its
690    /// per-pgwire-connection registry into `ActivityRow`s; engine
691    /// reads through it on every `SELECT * FROM spg_stat_activity`.
692    /// `None` ⇒ no-data (returns empty rows; matches the no_std
693    /// embedded callers that don't run pgwire).
694    activity_provider: Option<ActivityProvider>,
695    /// v6.5.3 — audit-chain provider + verifier. Same pattern as
696    /// activity_provider: spg-server registers both at startup;
697    /// engine reads through on `SELECT * FROM spg_audit_chain` and
698    /// `SELECT * FROM spg_audit_verify`. `None` ⇒ no-data.
699    audit_chain_provider: Option<AuditChainProvider>,
700    audit_verifier: Option<AuditVerifier>,
701    /// v6.5.6 — slow-query log threshold in microseconds. When set,
702    /// every successful execute whose elapsed exceeds the threshold
703    /// gets fed to the registered slow-query log callback (so
704    /// spg-server can emit a structured log line). Default `None`
705    /// = no slow-query logging.
706    slow_query_threshold_us: Option<u64>,
707    slow_query_logger: Option<SlowQueryLogger>,
708    /// v7.12.1 — session parameters set via `SET <name> = <value>`.
709    /// Only `default_text_search_config` is consumed by the engine
710    /// today (the FTS function dispatcher reads it when
711    /// `to_tsvector(text)` is called without an explicit config).
712    /// All other names are accepted + recorded so PG-dump output
713    /// loads, but have no behavioural effect.
714    session_params: BTreeMap<String, String>,
715    /// v7.12.7 — depth counter for trigger-emitted embedded SQL.
716    /// Each time the engine executes a `DeferredEmbeddedStmt` it
717    /// increments this; the recursive `execute_stmt_with_cancel`
718    /// inside that path checks against [`MAX_TRIGGER_RECURSION`]
719    /// to bound runaway cascades (trigger A's UPDATE on table B
720    /// fires trigger B which UPDATEs table A which fires trigger
721    /// A again…). Reset to 0 once the original DML returns.
722    trigger_recursion_depth: u32,
723    /// v7.14.0 — when `SET FOREIGN_KEY_CHECKS=0` is in effect
724    /// (mysqldump preamble), the FK existence + arity check at
725    /// CREATE TABLE time is deferred. FKs referencing a
726    /// not-yet-existing parent land in `pending_foreign_keys`
727    /// keyed by child table; `SET FOREIGN_KEY_CHECKS=1` drains
728    /// the queue and resolves each FK against the now-complete
729    /// catalog. Empty by default; the queue is drained on every
730    /// `RESET ALL` too.
731    foreign_key_checks: bool,
732    /// v7.16.2 — true on the temp Engine an outer
733    /// `exec_select_with_meta_views` builds, telling that
734    /// temp engine "stop short-circuiting into the meta-view
735    /// path — your catalog already has the materialised
736    /// tables; just run the regular SELECT." Without this we'd
737    /// infinite-loop since the meta-view name (e.g.
738    /// `__spg_info_columns`) still triggers
739    /// `select_references_meta_view`.
740    meta_views_materialised: bool,
741    pending_foreign_keys: Vec<(alloc::string::String, spg_sql::ast::ForeignKeyConstraint)>,
742}
743
744/// v7.12.7 — hard cap on nested trigger-emitted embedded SQL
745/// fires. 16 deep is well past anything a normal trigger graph
746/// uses while still preventing infinite-loop wedging.
747const MAX_TRIGGER_RECURSION: u32 = 16;
748
749/// v6.5.6 — callback signature for slow-query log emission. Called
750/// with `(sql, elapsed_us)` once per successful execute that crosses
751/// the threshold.
752pub type SlowQueryLogger = fn(&str, u64);
753
754/// v6.5.4 — synthesise a `CREATE TABLE` statement from catalog
755/// state. Round-trips through `Engine::execute` to recreate the
756/// same schema (sans data + indexes — indexes are emitted as a
757/// separate `CREATE INDEX` chain in `spg_database_ddl`).
758fn render_create_table(name: &str, columns: &[ColumnSchema]) -> String {
759    let mut out = alloc::format!("CREATE TABLE {name} (");
760    for (i, col) in columns.iter().enumerate() {
761        if i > 0 {
762            out.push_str(", ");
763        }
764        out.push_str(&col.name);
765        out.push(' ');
766        out.push_str(&render_data_type(col.ty));
767        if !col.nullable {
768            out.push_str(" NOT NULL");
769        }
770        if col.auto_increment {
771            out.push_str(" AUTO_INCREMENT");
772        }
773    }
774    out.push(')');
775    out
776}
777
778fn render_data_type(ty: DataType) -> String {
779    match ty {
780        DataType::SmallInt => "SMALLINT".into(),
781        DataType::Int => "INT".into(),
782        DataType::BigInt => "BIGINT".into(),
783        DataType::Float => "FLOAT".into(),
784        DataType::Text => "TEXT".into(),
785        DataType::Varchar(n) => alloc::format!("VARCHAR({n})"),
786        DataType::Char(n) => alloc::format!("CHAR({n})"),
787        DataType::Bool => "BOOL".into(),
788        DataType::Vector { dim, encoding } => match encoding {
789            spg_storage::VecEncoding::F32 => alloc::format!("VECTOR({dim})"),
790            spg_storage::VecEncoding::Sq8 => alloc::format!("VECTOR({dim}) USING SQ8"),
791            spg_storage::VecEncoding::F16 => alloc::format!("VECTOR({dim}) USING HALF"),
792        },
793        DataType::Numeric { precision, scale } => {
794            alloc::format!("NUMERIC({precision},{scale})")
795        }
796        DataType::Date => "DATE".into(),
797        DataType::Timestamp => "TIMESTAMP".into(),
798        DataType::Interval => "INTERVAL".into(),
799        DataType::Json => "JSON".into(),
800        DataType::Jsonb => "JSONB".into(),
801        DataType::Timestamptz => "TIMESTAMPTZ".into(),
802        DataType::Bytes => "BYTEA".into(),
803        DataType::TextArray => "TEXT[]".into(),
804        DataType::IntArray => "INT[]".into(),
805        DataType::BigIntArray => "BIGINT[]".into(),
806        DataType::TsVector => "TSVECTOR".into(),
807        DataType::TsQuery => "TSQUERY".into(),
808        DataType::Uuid => "UUID".into(),
809        DataType::Time => "TIME".into(),
810        DataType::Year => "YEAR".into(),
811        DataType::TimeTz => "TIMETZ".into(),
812        DataType::Money => "MONEY".into(),
813        DataType::Range(k) => k.keyword().into(),
814        DataType::Hstore => "HSTORE".into(),
815        DataType::IntArray2D => "INT[][]".into(),
816        DataType::BigIntArray2D => "BIGINT[][]".into(),
817        DataType::TextArray2D => "TEXT[][]".into(),
818    }
819}
820
821/// v6.5.2 — one row of `spg_stat_activity`. Engine-public so
822/// spg-server can construct rows without re-exporting internal
823/// dispatch types.
824#[derive(Debug, Clone)]
825pub struct ActivityRow {
826    pub pid: u32,
827    pub user: String,
828    pub started_at_us: i64,
829    pub current_sql: String,
830    pub wait_event: String,
831    pub elapsed_us: i64,
832    pub in_transaction: bool,
833    /// v7.17 Phase 2.4 — startup-param `application_name` (or the
834    /// last value the client sent via `SET application_name = '...'`).
835    /// Empty when the client never declared one.
836    pub application_name: String,
837}
838
839/// v6.5.2 — provider callback type. Fresh snapshot returned each
840/// call; engine doesn't cache the slice.
841pub type ActivityProvider = fn() -> Vec<ActivityRow>;
842
843/// v6.5.3 — one row of `spg_audit_chain`. Engine-public so
844/// spg-server can construct rows directly from `AuditEntry`.
845#[derive(Debug, Clone)]
846pub struct AuditRow {
847    pub seq: i64,
848    pub ts_ms: i64,
849    pub prev_hash_hex: String,
850    pub entry_hash_hex: String,
851    pub sql: String,
852}
853
854/// v6.5.3 — chain-table provider + verifier. spg-server registers
855/// fn pointers that snapshot / verify the audit log. `verify`
856/// returns `(verified_count, broken_at_seq)` — `broken_at_seq` is
857/// `-1` on a clean chain.
858pub type AuditChainProvider = fn() -> Vec<AuditRow>;
859pub type AuditVerifier = fn() -> (i64, i64);
860
861impl Engine {
862    pub fn new() -> Self {
863        Self {
864            catalog: Catalog::new(),
865            tx_catalogs: BTreeMap::new(),
866            current_tx: None,
867            backslash_escapes: false,
868            next_tx_id: 1,
869            clock: None,
870            salt_fn: None,
871            max_query_rows: None,
872            max_query_bytes: None,
873            users: UserStore::new(),
874            publications: publications::Publications::new(),
875            subscriptions: subscriptions::Subscriptions::new(),
876            statistics: statistics::Statistics::new(),
877            plan_cache: plan_cache::PlanCache::new(),
878            query_stats: query_stats::QueryStats::new(),
879            activity_provider: None,
880            audit_chain_provider: None,
881            audit_verifier: None,
882            slow_query_threshold_us: None,
883            slow_query_logger: None,
884            session_params: BTreeMap::new(),
885            trigger_recursion_depth: 0,
886            foreign_key_checks: true,
887            meta_views_materialised: false,
888            pending_foreign_keys: Vec::new(),
889        }
890    }
891
892    /// v7.11.0 — clone the engine's committed catalog + read-time
893    /// state into a frozen `CatalogSnapshot`. Cheap (`Catalog` is
894    /// backed by `PersistentVec`; cloning is O(log n) per table).
895    /// Subsequent writes to this engine are invisible to the
896    /// snapshot; the snapshot is self-contained and can be moved
897    /// to another thread for concurrent `execute_readonly_on_snapshot`
898    /// calls. The basis for [`AsyncReadHandle`] in spg-embedded-tokio
899    /// and any other read-fanout pattern.
900    #[must_use]
901    pub fn clone_snapshot(&self) -> CatalogSnapshot {
902        CatalogSnapshot {
903            catalog: self.active_catalog().clone(),
904            statistics: self.statistics.clone(),
905            clock: self.clock,
906            max_query_rows: self.max_query_rows,
907        }
908    }
909
910    /// v7.11.1 — execute a read-only SQL statement against a
911    /// `CatalogSnapshot` without touching this engine. Same
912    /// semantics as `execute_readonly` but parameterised on the
913    /// snapshot's catalog. Reject DDL/DML the same way
914    /// `execute_readonly` does. Static-on-Self so the caller can
915    /// dispatch without holding an `Engine` borrow alongside the
916    /// snapshot.
917    pub fn execute_readonly_on_snapshot(
918        snapshot: &CatalogSnapshot,
919        sql: &str,
920    ) -> Result<QueryResult, EngineError> {
921        Self::execute_readonly_on_snapshot_with_cancel(snapshot, sql, CancelToken::none())
922    }
923
924    /// v7.11.1 — `execute_readonly_on_snapshot` with cooperative
925    /// cancellation. Builds a transient `Engine` over the snapshot
926    /// state, runs `execute_readonly_with_cancel`, drops. The
927    /// transient engine is cheap to construct (no I/O; everything
928    /// is just struct moves) and lets the existing read path stay
929    /// untouched.
930    pub fn execute_readonly_on_snapshot_with_cancel(
931        snapshot: &CatalogSnapshot,
932        sql: &str,
933        cancel: CancelToken<'_>,
934    ) -> Result<QueryResult, EngineError> {
935        let transient = Engine {
936            catalog: snapshot.catalog.clone(),
937            statistics: snapshot.statistics.clone(),
938            clock: snapshot.clock,
939            max_query_rows: snapshot.max_query_rows,
940            ..Engine::default()
941        };
942        transient.execute_readonly_with_cancel(sql, cancel)
943    }
944
945    /// v7.18 — execute a previously-prepared `Statement` against a
946    /// `CatalogSnapshot` in read-only mode. Mirror of
947    /// [`Engine::execute_prepared`] for the fan-out read path:
948    /// substitutes `Expr::Placeholder(n)` nodes from `params`, then
949    /// dispatches through [`Engine::execute_readonly_stmt_with_cancel`]
950    /// (writes / DDL hit `EngineError::WriteRequired`). Static-on-Self
951    /// so multiple readonly threads can dispatch against the same
952    /// snapshot concurrently without an `Engine` borrow.
953    ///
954    /// **Schema drift contract**. The `Statement` was prepared against
955    /// some prior catalog. If the snapshot's catalog has since
956    /// diverged (DDL renamed / dropped a referenced column / table),
957    /// execution surfaces the normal `EngineError` — same shape as
958    /// PG's "cached plan must not change result type". Caller decides
959    /// whether to re-prepare; engine does NOT auto-retry.
960    pub fn execute_readonly_prepared_on_snapshot(
961        snapshot: &CatalogSnapshot,
962        stmt: Statement,
963        params: &[Value],
964    ) -> Result<QueryResult, EngineError> {
965        Self::execute_readonly_prepared_on_snapshot_with_cancel(
966            snapshot,
967            stmt,
968            params,
969            CancelToken::none(),
970        )
971    }
972
973    /// v7.18 — cancellable variant of
974    /// [`Engine::execute_readonly_prepared_on_snapshot`].
975    pub fn execute_readonly_prepared_on_snapshot_with_cancel(
976        snapshot: &CatalogSnapshot,
977        mut stmt: Statement,
978        params: &[Value],
979        cancel: CancelToken<'_>,
980    ) -> Result<QueryResult, EngineError> {
981        cancel.check()?;
982        substitute_placeholders(&mut stmt, params)?;
983        let transient = Engine {
984            catalog: snapshot.catalog.clone(),
985            statistics: snapshot.statistics.clone(),
986            clock: snapshot.clock,
987            max_query_rows: snapshot.max_query_rows,
988            ..Engine::default()
989        };
990        transient.execute_readonly_stmt_with_cancel(stmt, cancel)
991    }
992
993    /// v7.18 — describe a prepared `Statement` against a
994    /// `CatalogSnapshot`. Same `(parameter_oids, output_columns)`
995    /// shape as [`Engine::describe_prepared`]; resolves names
996    /// against the snapshot's catalog instead of `self`. Pure
997    /// function — no engine state read.
998    pub fn describe_prepared_on_snapshot(
999        snapshot: &CatalogSnapshot,
1000        stmt: &Statement,
1001    ) -> (Vec<u32>, Vec<ColumnSchema>) {
1002        describe::describe_prepared(stmt, &snapshot.catalog)
1003    }
1004
1005    /// v7.18 — does this SQL string classify as read-only? Parses
1006    /// `sql` with the engine parser and consults
1007    /// `Statement::is_readonly()`. A parse error returns `false`
1008    /// (route to the writer path so the user sees the canonical
1009    /// parse error from the writer's simple-query dispatch).
1010    /// Static-on-Self so the spg-sqlx connection layer can ask
1011    /// without an `Engine` borrow.
1012    #[must_use]
1013    pub fn is_readonly_sql(sql: &str) -> bool {
1014        parser::parse_statement(sql)
1015            .as_ref()
1016            .map(spg_sql::ast::Statement::is_readonly)
1017            .unwrap_or(false)
1018    }
1019
1020    /// v7.18 — parse + plan a SQL string against a
1021    /// `CatalogSnapshot`. Mirror of [`Engine::prepare`] for the
1022    /// readonly fan-out path: applies the same prepare-time
1023    /// transforms (clock rewrite, `GROUP BY ALL` expansion, ORDER
1024    /// BY position resolve, cost-based JOIN reorder) but resolves
1025    /// catalog + statistics against the snapshot, not a live
1026    /// engine. Static-on-Self — `AsyncReadHandle::prepare` calls
1027    /// this without taking the writer lock so multiple read
1028    /// handles can prepare concurrently against frozen views.
1029    ///
1030    /// # Errors
1031    /// Propagates [`ParseError`] from the parser. Schema
1032    /// validation deferred to execute time, same as
1033    /// [`Engine::prepare`].
1034    pub fn prepare_on_snapshot(
1035        snapshot: &CatalogSnapshot,
1036        sql: &str,
1037    ) -> Result<Statement, ParseError> {
1038        let mut stmt = parser::parse_statement(sql)?;
1039        let now_micros = snapshot.clock.map(|f| f());
1040        rewrite_clock_calls(&mut stmt, now_micros);
1041        if let Statement::Select(s) = &mut stmt {
1042            expand_group_by_all(s);
1043            resolve_order_by_position(s);
1044            reorder::reorder_joins(s, &snapshot.catalog, &snapshot.statistics);
1045        }
1046        Ok(stmt)
1047    }
1048
1049    /// Construct an engine restored from a previously-snapshotted catalog
1050    /// (see `snapshot()`).
1051    pub fn restore(catalog: Catalog) -> Self {
1052        Self {
1053            catalog,
1054            tx_catalogs: BTreeMap::new(),
1055            current_tx: None,
1056            backslash_escapes: false,
1057            next_tx_id: 1,
1058            clock: None,
1059            salt_fn: None,
1060            max_query_rows: None,
1061            max_query_bytes: None,
1062            users: UserStore::new(),
1063            publications: publications::Publications::new(),
1064            subscriptions: subscriptions::Subscriptions::new(),
1065            statistics: statistics::Statistics::new(),
1066            plan_cache: plan_cache::PlanCache::new(),
1067            query_stats: query_stats::QueryStats::new(),
1068            activity_provider: None,
1069            audit_chain_provider: None,
1070            audit_verifier: None,
1071            slow_query_threshold_us: None,
1072            slow_query_logger: None,
1073            session_params: BTreeMap::new(),
1074            trigger_recursion_depth: 0,
1075            foreign_key_checks: true,
1076            meta_views_materialised: false,
1077            pending_foreign_keys: Vec::new(),
1078        }
1079    }
1080
1081    /// Restore an engine + user table from a v4.1 envelope produced
1082    /// by `snapshot_with_users()`. Falls back to plain catalog-only
1083    /// restore if the envelope magic isn't present (so v3.x snapshot
1084    /// files still load). v6.1.2 adds the optional publications
1085    /// trailer (envelope v3); a v1/v2 envelope deserialises to an
1086    /// empty publication table.
1087    pub fn restore_envelope(buf: &[u8]) -> Result<Self, EngineError> {
1088        match split_envelope(buf) {
1089            EnvelopeParse::Pair {
1090                catalog: catalog_bytes,
1091                users: user_bytes,
1092                publications: pub_bytes,
1093                subscriptions: sub_bytes,
1094                statistics: stats_bytes,
1095            } => {
1096                let catalog = Catalog::deserialize(catalog_bytes).map_err(EngineError::Storage)?;
1097                let users = users::deserialize_users(user_bytes)
1098                    .map_err(|e| EngineError::Unsupported(alloc::format!("users restore: {e}")))?;
1099                let publications = match pub_bytes {
1100                    Some(b) => publications::Publications::deserialize(b).map_err(|e| {
1101                        EngineError::Unsupported(alloc::format!("publications restore: {e:?}"))
1102                    })?,
1103                    None => publications::Publications::new(),
1104                };
1105                let subscriptions = match sub_bytes {
1106                    Some(b) => subscriptions::Subscriptions::deserialize(b).map_err(|e| {
1107                        EngineError::Unsupported(alloc::format!("subscriptions restore: {e:?}"))
1108                    })?,
1109                    None => subscriptions::Subscriptions::new(),
1110                };
1111                let statistics = match stats_bytes {
1112                    Some(b) => statistics::Statistics::deserialize(b).map_err(|e| {
1113                        EngineError::Unsupported(alloc::format!("statistics restore: {e:?}"))
1114                    })?,
1115                    None => statistics::Statistics::new(),
1116                };
1117                Ok(Self {
1118                    catalog,
1119                    tx_catalogs: BTreeMap::new(),
1120                    current_tx: None,
1121                    backslash_escapes: false,
1122                    next_tx_id: 1,
1123                    clock: None,
1124                    salt_fn: None,
1125                    max_query_rows: None,
1126                    max_query_bytes: None,
1127                    users,
1128                    publications,
1129                    subscriptions,
1130                    statistics,
1131                    plan_cache: plan_cache::PlanCache::new(),
1132                    query_stats: query_stats::QueryStats::new(),
1133                    activity_provider: None,
1134                    audit_chain_provider: None,
1135                    audit_verifier: None,
1136                    slow_query_threshold_us: None,
1137                    slow_query_logger: None,
1138                    session_params: BTreeMap::new(),
1139                    trigger_recursion_depth: 0,
1140                    foreign_key_checks: true,
1141                    meta_views_materialised: false,
1142                    pending_foreign_keys: Vec::new(),
1143                })
1144            }
1145            EnvelopeParse::CrcMismatch { expected, computed } => {
1146                Err(EngineError::Storage(StorageError::Corrupt(alloc::format!(
1147                    "snapshot envelope CRC32 mismatch (expected={expected:#010x}, computed={computed:#010x})"
1148                ))))
1149            }
1150            EnvelopeParse::Bare => {
1151                let catalog = Catalog::deserialize(buf).map_err(EngineError::Storage)?;
1152                Ok(Self::restore(catalog))
1153            }
1154        }
1155    }
1156
1157    pub const fn users(&self) -> &UserStore {
1158        &self.users
1159    }
1160
1161    /// `salt` is supplied by the caller (the host has a random
1162    /// source; the engine is `no_std`). Caller should pass a fresh
1163    /// 16-byte random value per user.
1164    pub fn create_user(
1165        &mut self,
1166        name: &str,
1167        password: &str,
1168        role: Role,
1169        salt: [u8; 16],
1170    ) -> Result<(), UserError> {
1171        self.users.create(name, password, role, salt)?;
1172        // v4.8: also derive SCRAM-SHA-256 secrets so PG-wire SASL
1173        // auth can verify without re-running PBKDF2 per attempt.
1174        // Uses a fresh salt from the host RNG (falls back to a
1175        // deterministic per-username salt when no RNG is wired, same
1176        // as the legacy hash path).
1177        let scram_salt = self.salt_fn.map_or_else(
1178            || {
1179                let mut s = [0u8; users::SCRAM_SALT_LEN];
1180                let digest = spg_crypto::hash(name.as_bytes());
1181                // Use bytes 16..32 of BLAKE3 so we don't reuse the
1182                // exact same fallback salt as the BLAKE3 hash path.
1183                s.copy_from_slice(&digest[16..32]);
1184                s
1185            },
1186            |f| f(),
1187        );
1188        self.users
1189            .enable_scram(name, password, scram_salt, users::SCRAM_DEFAULT_ITERS)?;
1190        Ok(())
1191    }
1192
1193    pub fn drop_user(&mut self, name: &str) -> Result<(), UserError> {
1194        self.users.drop(name)
1195    }
1196
1197    pub fn verify_user(&self, name: &str, password: &str) -> Option<Role> {
1198        self.users.verify(name, password)
1199    }
1200
1201    /// Builder: attach a wall clock so `NOW()` / `CURRENT_TIMESTAMP` /
1202    /// `CURRENT_DATE` evaluate to a real value instead of erroring out.
1203    #[must_use]
1204    pub const fn with_clock(mut self, clock: ClockFn) -> Self {
1205        self.clock = Some(clock);
1206        self
1207    }
1208
1209    /// Builder: attach an OS-backed RNG for per-user password salts.
1210    /// The host (`spg-server`) typically wires this to `/dev/urandom`.
1211    #[must_use]
1212    pub const fn with_salt_fn(mut self, f: SaltFn) -> Self {
1213        self.salt_fn = Some(f);
1214        self
1215    }
1216
1217    /// Builder: cap the number of rows a single SELECT may return.
1218    /// Exceeding the cap raises `EngineError::RowLimitExceeded` —
1219    /// the bound is checked inside the executor so a runaway
1220    /// catalog scan can't allocate millions of rows before the
1221    /// server gets a chance to reject the result.
1222    #[must_use]
1223    pub const fn with_max_query_rows(mut self, n: usize) -> Self {
1224        self.max_query_rows = Some(n);
1225        self
1226    }
1227
1228    /// Builder: cap the approximate heap bytes a single SELECT's
1229    /// join/filter materialisation may hold. Exceeding the cap
1230    /// raises `EngineError::QueryBytesExceeded`. Rows are the wrong
1231    /// unit when one row carries a multi-MB body (mailrs round-26:
1232    /// 1000-row batches of full mail text walked a 15 GiB host into
1233    /// reclaim livelock without ever tripping a row ceiling).
1234    #[must_use]
1235    pub const fn with_max_query_bytes(mut self, n: usize) -> Self {
1236        self.max_query_bytes = Some(n);
1237        self
1238    }
1239
1240    /// The *committed* catalog. Note: during a transaction this returns the
1241    /// pre-TX state — `SELECT` inside a TX goes through `execute()` and reads
1242    /// the shadow. Tests that inspect outside-TX state should use this.
1243    pub const fn catalog(&self) -> &Catalog {
1244        &self.catalog
1245    }
1246
1247    /// Serialize the *committed* catalog to bytes. v0.6 was full-snapshot; v0.9
1248    /// adds the rule that an open TX's shadow is never snapshotted — only the
1249    /// post-COMMIT state is persisted. v4.1 wraps the catalog in an envelope
1250    /// when there are users to persist; an empty user table snapshots as the
1251    /// bare catalog format (backwards-compat with v3.x readers). v6.1.2
1252    /// adds publications to the envelope condition: either non-empty
1253    /// users OR non-empty publications now triggers the envelope path.
1254    pub fn snapshot(&self) -> Vec<u8> {
1255        if self.users.is_empty()
1256            && self.publications.is_empty()
1257            && self.subscriptions.is_empty()
1258            && self.statistics.is_empty()
1259        {
1260            self.catalog.serialize()
1261        } else {
1262            build_envelope(
1263                &self.catalog.serialize(),
1264                &users::serialize_users(&self.users),
1265                &self.publications.serialize(),
1266                &self.subscriptions.serialize(),
1267                &self.statistics.serialize(),
1268            )
1269        }
1270    }
1271
1272    /// True when at least one TX slot is in flight. v4.41.1 runtime
1273    /// invariant: at most one slot active at a time (dispatch holds
1274    /// `engine.write()` across the entire wrap). v4.42 will let this
1275    /// return true with multiple slots concurrently.
1276    pub fn in_transaction(&self) -> bool {
1277        !self.tx_catalogs.is_empty()
1278    }
1279
1280    /// v4.41.1 allocate a fresh TX handle. Used by spg-server dispatch
1281    /// to scope each implicit-wrap BEGIN..stmt..COMMIT to its own slot
1282    /// in `tx_catalogs`. v4.42 — the commit-barrier leader allocates
1283    /// one of these per task in its group, runs `BEGIN`+sql+`COMMIT`
1284    /// sequentially under a single `engine.write()` so each task's
1285    /// mutations accumulate into shared state, then either keeps the
1286    /// accumulated state (fsync OK) or restores the pre-image via
1287    /// `replace_catalog` (fsync err).
1288    pub fn alloc_tx_id(&mut self) -> TxId {
1289        let id = TxId(self.next_tx_id);
1290        self.next_tx_id = self.next_tx_id.saturating_add(1);
1291        id
1292    }
1293
1294    /// v4.42 — atomically replace the live catalog. Used by the
1295    /// commit-barrier leader to roll back a group whose batched
1296    /// fsync failed: the leader snapshots `engine.catalog().clone()`
1297    /// (O(1) Arc bump after the v4.39/v4.40 persistent migration)
1298    /// at group start, sequentially applies each task's BEGIN+sql+
1299    /// COMMIT under the same write lock to accumulate mutations
1300    /// into shared state, batches the WAL bytes, fsyncs once, and
1301    /// on failure calls this with the pre-image to undo every
1302    /// task in the group at once.
1303    ///
1304    /// **Does NOT touch `tx_catalogs` / `current_tx`.** Any
1305    /// explicit-TX slot from a concurrent client (created via the
1306    /// legacy `IMPLICIT_TX`-less dispatch path or via the future
1307    /// MVCC-readers v5+ work) has its own snapshot baked into the
1308    /// slot — restoring `self.catalog` to the pre-image leaves
1309    /// those slots untouched, exactly as they were when the leader
1310    /// took the lock. The leader's own implicit-TX slots are all
1311    /// already discarded (`exec_commit` removed them as each
1312    /// task's COMMIT ran) by the time this is reached.
1313    pub fn replace_catalog(&mut self, catalog: Catalog) {
1314        self.catalog = catalog;
1315    }
1316
1317    /// v6.7.0 — public shim around `Catalog::freeze_oldest_to_cold`
1318    /// so tests + the spg-server freezer can drive a freeze without
1319    /// reaching into the private `active_catalog_mut`. v6.7.4
1320    /// parallel freezer will build on this surface.
1321    ///
1322    /// Marks the table's cached `cold_row_count` stale because the
1323    /// freeze added cold locators that ANALYZE hasn't yet refreshed.
1324    pub fn freeze_oldest_to_cold(
1325        &mut self,
1326        table_name: &str,
1327        index_name: &str,
1328        max_rows: usize,
1329    ) -> Result<spg_storage::FreezeReport, EngineError> {
1330        let report = self
1331            .active_catalog_mut()
1332            .freeze_oldest_to_cold(table_name, index_name, max_rows)
1333            .map_err(EngineError::Storage)?;
1334        if let Some(t) = self.active_catalog_mut().get_mut(table_name) {
1335            t.mark_cold_row_count_stale();
1336        }
1337        Ok(report)
1338    }
1339
1340    /// v6.7.5 — public shim used by the spg-server follower's
1341    /// segment-forwarding receiver. Registers a cold-tier segment
1342    /// at a specific id (the master's id, as transmitted on the
1343    /// wire) so the follower's BTree-Cold locators stay byte-
1344    /// identical with the master's. Wraps
1345    /// `Catalog::load_segment_bytes_at` under the standard
1346    /// clone-mutate-replace pattern.
1347    ///
1348    /// Returns `Ok(())` on success **and** on the "slot already
1349    /// occupied" case — a follower mid-reconnect may receive a
1350    /// segment chunk for a segment_id it already has on disk
1351    /// (forwarded last session); the caller should treat that
1352    /// path as a no-op rather than a fatal error.
1353    pub fn receive_cold_segment(
1354        &mut self,
1355        segment_id: u32,
1356        bytes: Vec<u8>,
1357    ) -> Result<(), EngineError> {
1358        let mut new_cat = self.catalog.clone();
1359        match new_cat.load_segment_bytes_at(segment_id, bytes) {
1360            Ok(()) => {
1361                self.replace_catalog(new_cat);
1362                Ok(())
1363            }
1364            Err(StorageError::Corrupt(msg)) if msg.contains("already occupied") => Ok(()),
1365            Err(e) => Err(EngineError::Storage(e)),
1366        }
1367    }
1368
1369    /// v6.7.3 — public shim around `Catalog::compact_cold_segments`
1370    /// driving every BTree index on every user table. Returns one
1371    /// `(table, index, report)` triple for each merge that
1372    /// actually happened (no-op (table, index) pairs are filtered
1373    /// out so callers can size persist-side work to the live
1374    /// merges). Caller is responsible for persisting each
1375    /// `report.merged_segment_bytes` and updating the on-disk
1376    /// segment registry; engine layer is no_std and never
1377    /// touches disk.
1378    ///
1379    /// Marks every touched table's cached `cold_row_count` stale
1380    /// — compaction GC'd some shadowed rows, so the count must be
1381    /// re-derived on the next ANALYZE.
1382    pub fn compact_cold_segments_with_target(
1383        &mut self,
1384        target_segment_bytes: u64,
1385    ) -> Result<Vec<(String, String, CompactReport)>, EngineError> {
1386        let table_names = self.active_catalog().table_names();
1387        let mut reports: Vec<(String, String, CompactReport)> = Vec::new();
1388        for tname in table_names {
1389            if is_internal_table_name(&tname) {
1390                continue;
1391            }
1392            let idx_names: Vec<String> = {
1393                let Some(t) = self.active_catalog().get(&tname) else {
1394                    continue;
1395                };
1396                t.indices()
1397                    .iter()
1398                    .filter(|i| matches!(i.kind, IndexKind::BTree(_)))
1399                    .map(|i| i.name.clone())
1400                    .collect()
1401            };
1402            for iname in idx_names {
1403                let report = self
1404                    .active_catalog_mut()
1405                    .compact_cold_segments(&tname, &iname, target_segment_bytes)
1406                    .map_err(EngineError::Storage)?;
1407                if report.merged_segment_id.is_some() {
1408                    if let Some(t) = self.active_catalog_mut().get_mut(&tname) {
1409                        t.mark_cold_row_count_stale();
1410                    }
1411                    reports.push((tname.clone(), iname, report));
1412                }
1413            }
1414        }
1415        Ok(reports)
1416    }
1417
1418    fn active_catalog(&self) -> &Catalog {
1419        match self.current_tx {
1420            Some(t) => self
1421                .tx_catalogs
1422                .get(&t)
1423                .map_or(&self.catalog, |s| &s.catalog),
1424            None => &self.catalog,
1425        }
1426    }
1427
1428    /// v7.12.4 — snapshot every row-level trigger on `table` that
1429    /// fires for `event` (`"INSERT"` / `"UPDATE"` / `"DELETE"`) at
1430    /// the given `timing` (`"BEFORE"` / `"AFTER"`), and clone its
1431    /// referenced function definition. Returned as a vec of owned
1432    /// `FunctionDef` so the row-write loop can fire them without
1433    /// holding a borrow on the catalog (which would conflict with
1434    /// the table.insert / update_row / delete mutable borrows).
1435    /// v7.16.2 — top-level DO block executor. Walks the
1436    /// PlPgSqlBlock via [`triggers::execute_do_block_top_level`],
1437    /// then runs each collected EmbeddedSql statement through
1438    /// the engine's regular execute path (NOT deferred — DO is
1439    /// outside any row-write borrow). Errors from any step
1440    /// abort the block and propagate verbatim.
1441    /// v7.16.2 — resolve every subquery inside a PlPgSqlBlock's
1442    /// expression slots so the downstream trigger-flavoured
1443    /// evaluator (which expects pre-resolved Expr::Literal /
1444    /// Binary chains) doesn't trip on raw Exists/ScalarSubquery
1445    /// nodes. Walks IF conditions, Assign values, RAISE args.
1446    /// EmbeddedSql statements re-enter the engine for execution
1447    /// later so their subqueries get the normal SELECT-side
1448    /// resolution.
1449    fn resolve_plpgsql_block_subqueries(
1450        &self,
1451        block: &mut spg_sql::ast::PlPgSqlBlock,
1452        cancel: CancelToken<'_>,
1453    ) -> Result<(), EngineError> {
1454        for d in &mut block.declarations {
1455            if let Some(e) = &mut d.default {
1456                self.resolve_expr_subqueries(e, cancel)?;
1457            }
1458        }
1459        self.resolve_plpgsql_stmts_subqueries(&mut block.statements, cancel)
1460    }
1461
1462    fn resolve_plpgsql_stmts_subqueries(
1463        &self,
1464        stmts: &mut [spg_sql::ast::PlPgSqlStmt],
1465        cancel: CancelToken<'_>,
1466    ) -> Result<(), EngineError> {
1467        use spg_sql::ast::PlPgSqlStmt;
1468        for stmt in stmts {
1469            match stmt {
1470                PlPgSqlStmt::Assign { value, .. } => {
1471                    self.resolve_expr_subqueries(value, cancel)?;
1472                }
1473                PlPgSqlStmt::Return(spg_sql::ast::ReturnTarget::Expr(e)) => {
1474                    self.resolve_expr_subqueries(e, cancel)?;
1475                }
1476                PlPgSqlStmt::Return(_) => {}
1477                PlPgSqlStmt::If {
1478                    branches,
1479                    else_branch,
1480                } => {
1481                    for (cond, body) in branches.iter_mut() {
1482                        self.resolve_expr_subqueries(cond, cancel)?;
1483                        self.resolve_plpgsql_stmts_subqueries(body, cancel)?;
1484                    }
1485                    self.resolve_plpgsql_stmts_subqueries(else_branch, cancel)?;
1486                }
1487                PlPgSqlStmt::Raise { args, .. } => {
1488                    for a in args {
1489                        self.resolve_expr_subqueries(a, cancel)?;
1490                    }
1491                }
1492                PlPgSqlStmt::EmbeddedSql(_) => {
1493                    // Embedded SQL goes back through execute_stmt
1494                    // _with_cancel which runs the SELECT-side
1495                    // resolver itself; nothing to do here.
1496                }
1497                PlPgSqlStmt::SelectInto { body, .. } => {
1498                    // SELECT INTO runs through Engine::execute
1499                    // when reached, so subquery resolution
1500                    // happens via the normal SELECT-side path.
1501                    // Still walk for nested subqueries inside
1502                    // the SELECT body so eval doesn't trip.
1503                    self.resolve_select_subqueries(body, cancel)?;
1504                }
1505            }
1506        }
1507        Ok(())
1508    }
1509
1510    fn exec_do_block(
1511        &mut self,
1512        body: spg_sql::ast::PlPgSqlBlock,
1513    ) -> Result<QueryResult, EngineError> {
1514        // v7.16.2 — pre-resolve every subquery the body's
1515        // expressions reach. `eval::eval_expr` errors on
1516        // unresolved Exists/ScalarSubquery/InSubquery; the
1517        // top-level SELECT path runs `resolve_select_subqueries`
1518        // for the caller — for plpgsql we have to do the
1519        // equivalent before the body walker runs. Catches the
1520        // mailrs idiom `IF EXISTS (SELECT 1 FROM
1521        // information_schema.columns WHERE …) THEN …`.
1522        let mut body = body;
1523        self.resolve_plpgsql_block_subqueries(&mut body, CancelToken::none())?;
1524        let dts = self
1525            .session_param("default_text_search_config")
1526            .map(String::from);
1527        // v7.16.2 — SELECT … INTO resolver. The walker calls
1528        // this synchronously when it hits a SelectInto stmt
1529        // so the IF / locals scope sees the result before the
1530        // next statement. Body walks for trigger paths (no
1531        // resolver) error loudly on SelectInto.
1532        // SAFETY: the closure shares this engine borrow with
1533        // the walker, but the walker only borrows for the
1534        // duration of `execute_do_block_top_level` and doesn't
1535        // reach back into the engine through any other path —
1536        // so the recursive `&mut` is sound. We use a `RefCell`
1537        // for interior mutability since the closure is
1538        // Fn-shaped.
1539        let engine_cell = core::cell::RefCell::new(&mut *self);
1540        let resolver_fn =
1541            |stmt: &spg_sql::ast::Statement| -> Result<Value, triggers::TriggerError> {
1542                let mut eng = engine_cell.borrow_mut();
1543                let r = eng
1544                    .execute_stmt_with_cancel(stmt.clone(), CancelToken::none())
1545                    .map_err(|e| triggers::TriggerError::EvalFailed {
1546                        function: "DO".into(),
1547                        cause: eval::EvalError::TypeMismatch {
1548                            detail: alloc::format!("SELECT … INTO failed: {e}"),
1549                        },
1550                    })?;
1551                match r {
1552                    QueryResult::Rows { rows, .. } => match rows.into_iter().next() {
1553                        Some(row) => Ok(row.values.into_iter().next().unwrap_or(Value::Null)),
1554                        None => Ok(Value::Null),
1555                    },
1556                    _ => Err(triggers::TriggerError::EvalFailed {
1557                        function: "DO".into(),
1558                        cause: eval::EvalError::TypeMismatch {
1559                            detail: "SELECT … INTO body must be a SELECT".into(),
1560                        },
1561                    }),
1562                }
1563            };
1564        let collected =
1565            triggers::execute_do_block_top_level(&body, dts.as_deref(), Some(&resolver_fn))
1566                .map_err(|e| {
1567                    EngineError::Storage(StorageError::Corrupt(alloc::format!("DO: {e}")))
1568                })?;
1569        // engine_cell goes out of scope here, releasing the &mut self borrow
1570        // Run each embedded statement against the engine. The
1571        // statements were already substitute-walked for NEW/OLD/
1572        // locals (those evaluate to engine literals before they
1573        // land here) so dispatch is plain execute_stmt_with_cancel.
1574        for stmt in collected {
1575            // v7.16.2 — preserve current_tx wrap so an outer
1576            // BEGIN/COMMIT around a DO block keeps the
1577            // EmbeddedSql writes inside that same tx slot.
1578            self.execute_stmt_with_cancel(stmt, CancelToken::none())?;
1579        }
1580        Ok(QueryResult::CommandOk {
1581            affected: 0,
1582            modified_catalog: !self.in_transaction(),
1583        })
1584    }
1585
1586    fn snapshot_row_triggers(
1587        &self,
1588        table: &str,
1589        event: &str,
1590        timing: &str,
1591    ) -> Vec<spg_storage::FunctionDef> {
1592        let cat = self.active_catalog();
1593        cat.triggers()
1594            .iter()
1595            .filter(|t| {
1596                // v7.16.1 — skip disabled triggers (mailrs
1597                // round-9 A.2.b — pg_dump --disable-triggers).
1598                t.enabled
1599                    && t.table == table
1600                    && t.timing.eq_ignore_ascii_case(timing)
1601                    && t.for_each.eq_ignore_ascii_case("row")
1602                    && t.events.iter().any(|e| e.eq_ignore_ascii_case(event))
1603            })
1604            .filter_map(|t| cat.functions().get(&t.function).cloned())
1605            .collect()
1606    }
1607
1608    /// v7.13.0 — UPDATE-side snapshot that pairs each trigger's
1609    /// function with its `UPDATE OF cols` filter (mailrs round-5
1610    /// G7). Empty filter Vec means "fire unconditionally", matching
1611    /// the v7.12 behaviour.
1612    fn snapshot_update_row_triggers(
1613        &self,
1614        table: &str,
1615        timing: &str,
1616    ) -> Vec<(spg_storage::FunctionDef, Vec<String>)> {
1617        let cat = self.active_catalog();
1618        cat.triggers()
1619            .iter()
1620            .filter(|t| {
1621                // v7.16.1 — skip disabled triggers.
1622                t.enabled
1623                    && t.table == table
1624                    && t.timing.eq_ignore_ascii_case(timing)
1625                    && t.for_each.eq_ignore_ascii_case("row")
1626                    && t.events.iter().any(|e| e.eq_ignore_ascii_case("UPDATE"))
1627            })
1628            .filter_map(|t| {
1629                cat.functions()
1630                    .get(&t.function)
1631                    .cloned()
1632                    .map(|fd| (fd, t.update_columns.clone()))
1633            })
1634            .collect()
1635    }
1636
1637    /// v7.12.7 — drain the trigger-emitted embedded SQL queue.
1638    /// Called by the INSERT / UPDATE / DELETE executors after
1639    /// their main row-write loop returns. Each statement runs
1640    /// inside the same cancel scope as the firing DML and bumps
1641    /// the recursion counter; nested embedded SQL beyond
1642    /// [`MAX_TRIGGER_RECURSION`] errors with a clear message so
1643    /// a trigger-graph cycle surfaces as a query failure instead
1644    /// of stack-blowing the engine.
1645    fn execute_deferred_trigger_stmts(
1646        &mut self,
1647        deferred: Vec<triggers::DeferredEmbeddedStmt>,
1648        cancel: CancelToken<'_>,
1649    ) -> Result<(), EngineError> {
1650        for d in deferred {
1651            if self.trigger_recursion_depth >= MAX_TRIGGER_RECURSION {
1652                return Err(EngineError::Storage(StorageError::Corrupt(alloc::format!(
1653                    "trigger embedded SQL recursion depth {} exceeded (trigger function \
1654                     {:?} would push past the {} cap — check for trigger cycles)",
1655                    self.trigger_recursion_depth,
1656                    d.function,
1657                    MAX_TRIGGER_RECURSION,
1658                ))));
1659            }
1660            self.trigger_recursion_depth += 1;
1661            let res = self.execute_stmt_with_cancel(d.stmt, cancel);
1662            self.trigger_recursion_depth -= 1;
1663            res?;
1664        }
1665        Ok(())
1666    }
1667
1668    fn active_catalog_mut(&mut self) -> &mut Catalog {
1669        let tx = self.current_tx;
1670        match tx {
1671            Some(t) => match self.tx_catalogs.get_mut(&t) {
1672                Some(s) => &mut s.catalog,
1673                None => &mut self.catalog,
1674            },
1675            None => &mut self.catalog,
1676        }
1677    }
1678
1679    /// Read-only execute path. Succeeds for `SELECT` / `SHOW TABLES`
1680    /// / `SHOW COLUMNS`; returns `EngineError::WriteRequired` for
1681    /// every other statement, so the caller can fall through to the
1682    /// `&mut self` `execute` path under a write lock. Engine state is
1683    /// not mutated even on the success path (`rewrite_clock_calls`
1684    /// and `resolve_order_by_position` both mutate the locally-owned
1685    /// AST, not `self`).
1686    ///
1687    /// **v4.0 concurrency**: this is the entry point the server takes
1688    /// under an `RwLock::read()` so multiple `SELECT` clients run in
1689    /// parallel without serialising on a single mutex.
1690    pub fn execute_readonly(&self, sql: &str) -> Result<QueryResult, EngineError> {
1691        self.execute_readonly_with_cancel(sql, CancelToken::none())
1692    }
1693
1694    /// v4.5 — read path with cooperative cancellation. Token's
1695    /// `is_cancelled` is checked at the start (so a watchdog that
1696    /// already fired returns Cancelled immediately) and at row-loop
1697    /// checkpoints inside `exec_select`. SHOW paths are O(small) and
1698    /// don't bother checking.
1699    pub fn execute_readonly_with_cancel(
1700        &self,
1701        sql: &str,
1702        cancel: CancelToken<'_>,
1703    ) -> Result<QueryResult, EngineError> {
1704        cancel.check()?;
1705        let mut stmt = parser::parse_statement_with(sql, self.backslash_escapes)?;
1706        let now_micros = self.clock.map(|f| f());
1707        rewrite_clock_calls(&mut stmt, now_micros);
1708        if let Statement::Select(s) = &mut stmt {
1709            resolve_order_by_position(s);
1710            // v6.2.3 — cost-based JOIN reorder (read path).
1711            reorder::reorder_joins(s, &self.catalog, &self.statistics);
1712        }
1713        self.execute_readonly_stmt_with_cancel(stmt, cancel)
1714    }
1715
1716    /// v7.18 — readonly dispatch on a pre-parsed `Statement`.
1717    /// Internal helper shared by the SQL-string path
1718    /// ([`Engine::execute_readonly_with_cancel`]) and the prepared-
1719    /// statement path ([`Engine::execute_readonly_prepared_on_snapshot_with_cancel`]).
1720    /// Statement-level transforms (clock rewrite, ORDER BY position,
1721    /// JOIN reorder, placeholder substitution) are the caller's
1722    /// responsibility — this helper assumes the AST is already
1723    /// execution-ready. Writes / DDL hit
1724    /// [`EngineError::WriteRequired`] the same way the SQL path does.
1725    fn execute_readonly_stmt_with_cancel(
1726        &self,
1727        stmt: Statement,
1728        cancel: CancelToken<'_>,
1729    ) -> Result<QueryResult, EngineError> {
1730        let result = match stmt {
1731            Statement::Select(s) => self.exec_select_cancel(&s, cancel),
1732            Statement::ShowTables => Ok(self.exec_show_tables()),
1733            Statement::ShowDatabases => Ok(self.exec_show_databases()),
1734            Statement::ShowCreateTable(name) => self.exec_show_create_table(&name),
1735            Statement::ShowIndexes(name) => self.exec_show_indexes(&name),
1736            Statement::ShowStatus => Ok(self.exec_show_status()),
1737            Statement::ShowVariables => Ok(self.exec_show_variables()),
1738            Statement::ShowProcesslist => Ok(self.exec_show_processlist()),
1739            Statement::ShowColumns(table) => self.exec_show_columns(&table),
1740            Statement::ShowUsers => Ok(self.exec_show_users()),
1741            Statement::ShowPublications => Ok(self.exec_show_publications()),
1742            Statement::ShowSubscriptions => Ok(self.exec_show_subscriptions()),
1743            Statement::WaitForWalPosition { .. } => Err(EngineError::Unsupported(
1744                "WAIT FOR WAL POSITION must be handled by the server layer".into(),
1745            )),
1746            Statement::Explain(e) => self.exec_explain(&e, cancel),
1747            _ => Err(EngineError::WriteRequired),
1748        };
1749        self.enforce_row_limit(result)
1750    }
1751
1752    /// v4.2: cap result-set size. Applied after the executor
1753    /// materialises rows but before they leave the engine — wrapping
1754    /// every Rows-returning exec_* function would scatter the check.
1755    ///
1756    /// v7.31 (memory campaign, bucket A) — the same choke point now
1757    /// also enforces the BYTE budget on the final result set, so
1758    /// single-table and aggregate paths (which don't route through
1759    /// the join materialiser's incremental accounting) still cannot
1760    /// hand the host an unbounded result. Intermediate single-table
1761    /// clones are the 7.31.x follow-up (design doc, bucket A).
1762    fn enforce_row_limit(
1763        &self,
1764        result: Result<QueryResult, EngineError>,
1765    ) -> Result<QueryResult, EngineError> {
1766        if let Ok(QueryResult::Rows { rows, .. }) = &result {
1767            if let Some(cap) = self.max_query_rows
1768                && rows.len() > cap
1769            {
1770                return Err(EngineError::RowLimitExceeded(cap));
1771            }
1772            if let Some(byte_cap) = self.max_query_bytes
1773                && approx_rows_bytes(rows) > byte_cap
1774            {
1775                return Err(EngineError::QueryBytesExceeded(byte_cap));
1776            }
1777        }
1778        result
1779    }
1780
1781    pub fn execute(&mut self, sql: &str) -> Result<QueryResult, EngineError> {
1782        self.execute_in_with_cancel(sql, IMPLICIT_TX, CancelToken::none())
1783    }
1784
1785    /// v4.5 — write path with cooperative cancellation. Same dispatch
1786    /// as `execute_in_with_cancel(sql, IMPLICIT_TX, cancel)`. Kept as
1787    /// a separate entry point for backward-compat with the v4.5
1788    /// public API.
1789    pub fn execute_with_cancel(
1790        &mut self,
1791        sql: &str,
1792        cancel: CancelToken<'_>,
1793    ) -> Result<QueryResult, EngineError> {
1794        self.execute_in_with_cancel(sql, IMPLICIT_TX, cancel)
1795    }
1796
1797    /// v4.41.1 multi-slot write entry. Routes `sql` through the TX
1798    /// slot identified by `tx_id` so spg-server dispatch can scope
1799    /// each implicit-wrap BEGIN..stmt..COMMIT to its own slot in
1800    /// `tx_catalogs`. `IMPLICIT_TX` is the legacy single-slot path
1801    /// every other caller (engine self-tests, replay, spg-embedded)
1802    /// implicitly takes via `execute()` / `execute_with_cancel()`.
1803    pub fn execute_in(&mut self, sql: &str, tx_id: TxId) -> Result<QueryResult, EngineError> {
1804        self.execute_in_with_cancel(sql, tx_id, CancelToken::none())
1805    }
1806
1807    /// v4.41.1 write path with cooperative cancellation + explicit TX
1808    /// scope. Sets `self.current_tx` for the duration of the call so
1809    /// every `exec_*` helper transparently sees its TX's shadow
1810    /// catalog and savepoint stack; restores on exit so the field is
1811    /// only valid mid-call (no leakage across calls).
1812    pub fn execute_in_with_cancel(
1813        &mut self,
1814        sql: &str,
1815        tx_id: TxId,
1816        cancel: CancelToken<'_>,
1817    ) -> Result<QueryResult, EngineError> {
1818        let saved = self.current_tx;
1819        self.current_tx = Some(tx_id);
1820        let result = self.execute_inner_with_cancel(sql, cancel);
1821        self.current_tx = saved;
1822        result
1823    }
1824
1825    /// v6.1.1 — parse and pre-process a SQL string ONCE so the
1826    /// resulting [`Statement`] can be cached and re-executed via
1827    /// [`Engine::execute_prepared`]. Returns the same `Statement`
1828    /// the simple-query path would synthesise internally (clock
1829    /// rewrites + ORDER BY position-ref resolution applied at
1830    /// prepare time, since both are session-independent). The
1831    /// `$N` placeholders in the SQL stay as `Expr::Placeholder(n)`
1832    /// nodes; they're resolved to concrete values per-call by
1833    /// `execute_prepared`'s substitution walk.
1834    ///
1835    /// Pgwire's `Parse` (P) message lands here.
1836    pub fn prepare(&self, sql: &str) -> Result<Statement, ParseError> {
1837        let mut stmt = parser::parse_statement_with(sql, self.backslash_escapes)?;
1838        let now_micros = self.clock.map(|f| f());
1839        rewrite_clock_calls(&mut stmt, now_micros);
1840        if let Statement::Select(s) = &mut stmt {
1841            // v6.4.1 — expand `GROUP BY ALL` to every non-aggregate
1842            // SELECT-list item BEFORE position / alias resolution so
1843            // downstream passes see the explicit list.
1844            expand_group_by_all(s);
1845            resolve_order_by_position(s);
1846            // v6.2.3 — cost-based JOIN reorder. No-op for
1847            // single-table FROMs or any non-INNER join shape.
1848            reorder::reorder_joins(s, &self.catalog, &self.statistics);
1849        }
1850        Ok(stmt)
1851    }
1852
1853    /// v6.3.0 — cached prepare. Returns a cloned `Statement` from
1854    /// the plan cache on hit, runs the full `prepare()` path on miss
1855    /// and inserts the resulting plan before returning. Skipping the
1856    /// parse + JOIN-reorder pipeline on hit is the dominant win for
1857    /// JDBC / sqlx / pgx clients that reuse the same SQL string.
1858    ///
1859    /// Returns a cloned `Statement` (not a borrow) because the
1860    /// pgwire layer owns its `PreparedStmt` map per-session and the
1861    /// engine-level cache must stay available for other sessions.
1862    /// Clone cost on a 5-table JOIN AST is well under the parse cost
1863    /// it replaces.
1864    pub fn prepare_cached(&mut self, sql: &str) -> Result<Statement, ParseError> {
1865        // v6.3.1 — version-aware lookup. If the cached plan was
1866        // prepared before the most recent ANALYZE, evict and replan.
1867        let current_version = self.statistics.version();
1868        if let Some(plan) = self.plan_cache.get(sql) {
1869            if plan.statistics_version == current_version {
1870                return Ok(plan.stmt.clone());
1871            }
1872            // Stale entry — fall through to evict + re-prepare.
1873        }
1874        self.plan_cache.evict(sql);
1875        let stmt = self.prepare(sql)?;
1876        let source_tables = plan_cache::collect_source_tables(&stmt);
1877        let plan = plan_cache::PreparedPlan {
1878            stmt: stmt.clone(),
1879            statistics_version: current_version,
1880            source_tables,
1881            describe_columns: alloc::vec::Vec::new(),
1882        };
1883        self.plan_cache.insert(String::from(sql), plan);
1884        Ok(stmt)
1885    }
1886
1887    /// v6.3.0 — read-only accessor for tests and v6.3.1 invalidation.
1888    pub fn plan_cache(&self) -> &plan_cache::PlanCache {
1889        &self.plan_cache
1890    }
1891
1892    /// v6.3.0 — mutable accessor for v6.3.1 invalidation hooks.
1893    pub fn plan_cache_mut(&mut self) -> &mut plan_cache::PlanCache {
1894        &mut self.plan_cache
1895    }
1896
1897    /// v6.3.3 — Describe a prepared `Statement` without executing.
1898    /// Returns `(parameter_oids, output_columns)`. Empty
1899    /// `output_columns` means the statement has no row-producing
1900    /// shape we could resolve here (JOIN, subquery, non-SELECT, …)
1901    /// — pgwire layer maps that to a `NoData` reply.
1902    pub fn describe_prepared(&self, stmt: &Statement) -> (Vec<u32>, Vec<ColumnSchema>) {
1903        describe::describe_prepared(stmt, self.active_catalog())
1904    }
1905
1906    /// v6.1.1 — execute a [`Statement`] previously returned by
1907    /// [`Engine::prepare`], substituting `Expr::Placeholder(n)`
1908    /// nodes for the corresponding [`Value`] in `params` (1-based
1909    /// per PG: `$1` → `params[0]`). Bind-time string parameters
1910    /// are decoded into typed `Value`s by the pgwire layer before
1911    /// this call so the resulting AST hits the same execution
1912    /// path as a simple query — no SQL re-parse.
1913    ///
1914    /// Pgwire's `Execute` (E) message after a `Bind` (B) lands here.
1915    pub fn execute_prepared(
1916        &mut self,
1917        stmt: Statement,
1918        params: &[Value],
1919    ) -> Result<QueryResult, EngineError> {
1920        self.execute_prepared_with_cancel(stmt, params, CancelToken::none())
1921    }
1922
1923    /// v7.17.0 Phase 2.3 — prepared-statement entry that honors a
1924    /// caller-supplied `CancelToken`. Mirrors `execute_prepared`'s
1925    /// `current_tx` save/restore so the extended-query path stays
1926    /// transactionally consistent with the simple-query path.
1927    pub fn execute_prepared_with_cancel(
1928        &mut self,
1929        mut stmt: Statement,
1930        params: &[Value],
1931        cancel: CancelToken<'_>,
1932    ) -> Result<QueryResult, EngineError> {
1933        substitute_placeholders(&mut stmt, params)?;
1934        // v7.16.0 — set `current_tx` for the duration of the
1935        // dispatch so the `exec_*` helpers see the right TX
1936        // slot (matches what `execute_in_with_cancel` does for
1937        // simple-query). Pre-v7.16 the simple-query path
1938        // worked because every public entry point routed
1939        // through `execute_in_with_cancel`; the prepared path
1940        // skipped the wrap and so its INSERTs/UPDATEs landed
1941        // in the no-tx default slot, silently invisible to a
1942        // BEGIN/COMMIT-bracketed flow. Caught by spg-sqlx's
1943        // first transaction-visibility test.
1944        let saved = self.current_tx;
1945        self.current_tx = Some(IMPLICIT_TX);
1946        let result = self.execute_stmt_with_cancel(stmt, cancel);
1947        self.current_tx = saved;
1948        result
1949    }
1950
1951    fn execute_inner_with_cancel(
1952        &mut self,
1953        sql: &str,
1954        cancel: CancelToken<'_>,
1955    ) -> Result<QueryResult, EngineError> {
1956        cancel.check()?;
1957        let stmt = self.prepare(sql)?;
1958        // v6.5.1 — wrap the executor with a wall-clock window so we
1959        // can record into spg_stat_query. Skip when the engine has
1960        // no clock attached (no_std embedded callers).
1961        let start_us = self.clock.map(|f| f());
1962        let result = self.execute_stmt_with_cancel(stmt, cancel);
1963        if let (Some(t0), Ok(_)) = (start_us, &result) {
1964            let now = self.clock.map_or(t0, |f| f());
1965            let elapsed = now.saturating_sub(t0).max(0) as u64;
1966            self.query_stats.record(sql, elapsed, now as u64);
1967            // v6.5.6 — slow-query log: fire callback when elapsed
1968            // exceeds the configured floor.
1969            if let (Some(threshold), Some(logger)) =
1970                (self.slow_query_threshold_us, self.slow_query_logger)
1971                && elapsed >= threshold
1972            {
1973                logger(sql, elapsed);
1974            }
1975        }
1976        result
1977    }
1978
1979    fn execute_stmt_with_cancel(
1980        &mut self,
1981        stmt: Statement,
1982        cancel: CancelToken<'_>,
1983    ) -> Result<QueryResult, EngineError> {
1984        cancel.check()?;
1985        // v7.17.0 Phase 1.1 — pre-resolve nextval / currval /
1986        // setval calls in the statement tree. Walks SELECT
1987        // projection, INSERT VALUES, UPDATE SET, DELETE WHERE,
1988        // and DEFAULT exprs; replaces sequence FunctionCall
1989        // nodes with concrete Literal values minted against the
1990        // catalog. This is the only place that mutates sequence
1991        // state from a SELECT-shaped path (exec_select_cancel is
1992        // `&self` and can't reach the catalog mutably).
1993        //
1994        // Fast-path: when no sequences exist anywhere in the
1995        // catalog (the typical hot-path INSERT load), skip the
1996        // walker entirely. Single map-emptiness check on the
1997        // catalog beats walking every expression on every call.
1998        let mut stmt = stmt;
1999        // v7.17 dump-compat — the fast-path check
2000        // `sequences().is_empty()` skips pre-resolve when no
2001        // sequence exists in the *currently active* catalog
2002        // snapshot. The committed catalog or the implicit-TX
2003        // catalog may legitimately disagree on this between
2004        // CREATE SEQUENCE and a later setval(): always run the
2005        // resolver — the walk is O(expr-count) and dwarfed by
2006        // the parse cost we just paid.
2007        self.pre_resolve_sequence_calls_in_statement(&mut stmt)?;
2008        let result = match stmt {
2009            Statement::CreateTable(s) => self.exec_create_table(s),
2010            // v7.9.15 — CREATE EXTENSION is a no-op on SPG. Returns
2011            // CommandOk with affected=0; modified_catalog=false so
2012            // the WAL doesn't grow a useless entry. mailrs F3.
2013            Statement::CreateExtension(_) => Ok(QueryResult::CommandOk {
2014                affected: 0,
2015                modified_catalog: false,
2016            }),
2017            // v7.16.2 — DO $$ ... $$ block. mailrs round-10 A.2
2018            // — the pre-v7.9.27 no-op SILENTLY swallowed every
2019            // mailrs migrate-038/-040/-042 idempotent rename
2020            // (the IF EXISTS … THEN ALTER … END block never
2021            // ran). v7.16.2 dispatches to exec_do_block which
2022            // runs the PlPgSqlBlock at top level via the same
2023            // execute_stmts machinery the trigger executor
2024            // uses (NEW=None, OLD=None — DO blocks have no
2025            // row context).
2026            Statement::DoBlock(body) => self.exec_do_block(body),
2027            // v7.14.0 — empty-statement no-op for pg_dump /
2028            // mysqldump preamble lines that collapse to nothing
2029            // after comment-stripping.
2030            Statement::Empty => Ok(QueryResult::CommandOk {
2031                affected: 0,
2032                modified_catalog: false,
2033            }),
2034            Statement::DropTable { names, if_exists } => self.exec_drop_table(names, if_exists),
2035            Statement::DropIndex { name, if_exists } => self.exec_drop_index(name, if_exists),
2036            Statement::CreateIndex(s) => self.exec_create_index(s),
2037            Statement::Insert(s) => self.exec_insert(s),
2038            Statement::Update(mut s) => {
2039                // Materialise uncorrelated subqueries in SET / WHERE
2040                // before the row walk — the SELECT path has done this
2041                // since v4.10; UPDATE gained it for mailrs's
2042                // `UPDATE … WHERE id IN (SELECT … FOR UPDATE SKIP
2043                // LOCKED)` claim pattern (embed round-12).
2044                for (_, e) in &mut s.assignments {
2045                    self.resolve_expr_subqueries(e, cancel)?;
2046                }
2047                if let Some(w) = &mut s.where_ {
2048                    self.resolve_expr_subqueries(w, cancel)?;
2049                }
2050                self.exec_update_cancel(&s, cancel)
2051            }
2052            Statement::Delete(mut s) => {
2053                if let Some(w) = &mut s.where_ {
2054                    self.resolve_expr_subqueries(w, cancel)?;
2055                }
2056                self.exec_delete_cancel(&s, cancel)
2057            }
2058            Statement::Merge(s) => self.exec_merge_cancel(&s, cancel),
2059            Statement::Select(s) => self.exec_select_cancel(&s, cancel),
2060            Statement::Begin => self.exec_begin(),
2061            Statement::Commit => self.exec_commit(),
2062            Statement::Rollback => self.exec_rollback(),
2063            Statement::Savepoint(name) => self.exec_savepoint(name),
2064            Statement::RollbackToSavepoint(name) => self.exec_rollback_to_savepoint(&name),
2065            Statement::ReleaseSavepoint(name) => self.exec_release_savepoint(&name),
2066            Statement::ShowTables => Ok(self.exec_show_tables()),
2067            Statement::ShowDatabases => Ok(self.exec_show_databases()),
2068            Statement::ShowCreateTable(name) => self.exec_show_create_table(&name),
2069            Statement::ShowIndexes(name) => self.exec_show_indexes(&name),
2070            Statement::ShowStatus => Ok(self.exec_show_status()),
2071            Statement::ShowVariables => Ok(self.exec_show_variables()),
2072            Statement::ShowProcesslist => Ok(self.exec_show_processlist()),
2073            Statement::ShowColumns(table) => self.exec_show_columns(&table),
2074            Statement::ShowUsers => Ok(self.exec_show_users()),
2075            Statement::ShowPublications => Ok(self.exec_show_publications()),
2076            Statement::ShowSubscriptions => Ok(self.exec_show_subscriptions()),
2077            Statement::CreateUser(s) => self.exec_create_user(&s),
2078            Statement::DropUser(name) => self.exec_drop_user(&name),
2079            Statement::Explain(e) => self.exec_explain(&e, cancel),
2080            Statement::AlterIndex(s) => self.exec_alter_index(s),
2081            Statement::AlterTable(s) => self.exec_alter_table(s),
2082            Statement::CreatePublication(s) => self.exec_create_publication(s),
2083            Statement::DropPublication(name) => self.exec_drop_publication(&name),
2084            Statement::CreateSubscription(s) => self.exec_create_subscription(s),
2085            Statement::DropSubscription(name) => self.exec_drop_subscription(&name),
2086            // v6.1.7 — WAIT FOR WAL POSITION needs `lag_state`,
2087            // which lives in spg-server's ServerState. The engine
2088            // surfaces a clear error; the server-layer dispatch
2089            // intercepts the SQL before it reaches the engine on
2090            // a server build, so this arm only fires for
2091            // engine-only callers (spg-embedded, lib tests).
2092            Statement::WaitForWalPosition { .. } => Err(EngineError::Unsupported(
2093                "WAIT FOR WAL POSITION must be handled by the server layer".into(),
2094            )),
2095            // v6.2.0 — ANALYZE recomputes per-column histograms.
2096            Statement::Analyze(target) => self.exec_analyze(target.as_deref()),
2097            // v6.7.3 — COMPACT COLD SEGMENTS.
2098            Statement::CompactColdSegments => self.exec_compact_cold_segments(),
2099            // v7.12.1 — SET / RESET session parameter. Engine
2100            // tracks the value in `session_params`; FTS dispatcher
2101            // reads `default_text_search_config`. Everything else
2102            // is a recorded no-op (PG dump compat).
2103            Statement::SetParameter { name, value } => {
2104                self.set_session_param(name, value);
2105                Ok(QueryResult::CommandOk {
2106                    affected: 0,
2107                    modified_catalog: false,
2108                })
2109            }
2110            // v7.14.0 — MySQL multi-assignment SET. Each pair runs
2111            // through `set_session_param` so engine-known params
2112            // (FOREIGN_KEY_CHECKS, session_replication_role, …) take
2113            // effect; unknown pairs (including `@VAR` LHS from the
2114            // mysqldump preamble) are recorded then ignored.
2115            Statement::SetParameterList(pairs) => {
2116                for (name, value) in pairs {
2117                    self.set_session_param(name, value);
2118                }
2119                Ok(QueryResult::CommandOk {
2120                    affected: 0,
2121                    modified_catalog: false,
2122                })
2123            }
2124            // v7.12.4 — CREATE FUNCTION / CREATE TRIGGER / DROP …
2125            // for the PL/pgSQL trigger surface. exec_* methods are
2126            // defined alongside the existing CREATE handlers below.
2127            Statement::CreateFunction(s) => self.exec_create_function(s),
2128            Statement::CreateTrigger(s) => self.exec_create_trigger(s),
2129            Statement::DropTrigger {
2130                name,
2131                table,
2132                if_exists,
2133            } => self.exec_drop_trigger(&name, &table, if_exists),
2134            Statement::DropFunction { name, if_exists } => {
2135                self.exec_drop_function(&name, if_exists)
2136            }
2137            Statement::CreateSequence(s) => self.exec_create_sequence(s),
2138            Statement::AlterSequence(s) => self.exec_alter_sequence(s),
2139            Statement::DropSequence { names, if_exists } => {
2140                self.exec_drop_sequence(&names, if_exists)
2141            }
2142            Statement::CreateView(s) => self.exec_create_view(s),
2143            Statement::DropView { names, if_exists } => self.exec_drop_view(&names, if_exists),
2144            Statement::CreateMaterializedView(s) => self.exec_create_materialized_view(s),
2145            Statement::RefreshMaterializedView { name, with_data } => {
2146                self.exec_refresh_materialized_view(&name, with_data)
2147            }
2148            Statement::DropMaterializedView { names, if_exists } => {
2149                self.exec_drop_materialized_view(&names, if_exists)
2150            }
2151            Statement::CreateType(s) => self.exec_create_type(s),
2152            Statement::DropType { names, if_exists } => self.exec_drop_type(&names, if_exists),
2153            Statement::CreateDomain(s) => self.exec_create_domain(s),
2154            Statement::DropDomain { names, if_exists } => self.exec_drop_domain(&names, if_exists),
2155            Statement::CreateSchema {
2156                name,
2157                if_not_exists,
2158            } => self.exec_create_schema(name, if_not_exists),
2159            Statement::DropSchema { names, if_exists } => self.exec_drop_schema(&names, if_exists),
2160            Statement::ResetParameter(target) => {
2161                match target {
2162                    None => self.session_params.clear(),
2163                    Some(name) => {
2164                        self.session_params.remove(&name.to_ascii_lowercase());
2165                    }
2166                }
2167                Ok(QueryResult::CommandOk {
2168                    affected: 0,
2169                    modified_catalog: false,
2170                })
2171            }
2172        };
2173        self.enforce_row_limit(result)
2174    }
2175
2176    /// v6.1.2 — `CREATE PUBLICATION` runtime path. Duplicate names
2177    /// surface as `EngineError::Unsupported` so the existing PG-wire
2178    /// error mapping stays uniform; the message carries the name so
2179    /// operators can grep replication-log noise. Inside-transaction
2180    /// invocation is rejected (matches `CREATE USER` / `DROP USER`
2181    /// stance) — replication-catalog mutation is a connection-level
2182    /// administrative op, not a transactional one.
2183    fn exec_create_publication(
2184        &mut self,
2185        s: CreatePublicationStatement,
2186    ) -> Result<QueryResult, EngineError> {
2187        // v6.1.4 — the v6.1.2 "no DDL inside a transaction" guard
2188        // was over-cautious: it also blocked the auto-commit wrap
2189        // path (which begins an internal TX around every WAL-
2190        // logged statement). PG itself allows CREATE PUBLICATION
2191        // inside a transaction (it rolls back with the TX).
2192        self.publications
2193            .create(s.name, s.scope)
2194            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE PUBLICATION: {e:?}")))?;
2195        Ok(QueryResult::CommandOk {
2196            affected: 1,
2197            modified_catalog: true,
2198        })
2199    }
2200
2201    /// v6.1.2 — `DROP PUBLICATION` runtime path. PG-compatible silent
2202    /// no-op when the publication doesn't exist (returns `affected=0`
2203    /// in that case so the wire-level command tag distinguishes
2204    /// "dropped" from "no-op", though both succeed).
2205    fn exec_drop_publication(&mut self, name: &str) -> Result<QueryResult, EngineError> {
2206        let removed = self.publications.drop(name);
2207        Ok(QueryResult::CommandOk {
2208            affected: usize::from(removed),
2209            modified_catalog: removed,
2210        })
2211    }
2212
2213    /// v6.1.2 — read access to the publication catalog. Used by
2214    /// the v6.1.5 publisher-side WAL filter, by `SHOW PUBLICATIONS`
2215    /// (v6.1.3+), and by e2e tests that need to assert state without
2216    /// going through the wire.
2217    pub const fn publications(&self) -> &publications::Publications {
2218        &self.publications
2219    }
2220
2221    /// v6.1.4 — `CREATE SUBSCRIPTION` runtime path. Defaults
2222    /// `enabled = true` and `last_received_pos = 0` for a freshly-
2223    /// created subscription. The actual worker thread is spawned
2224    /// by spg-server once the engine returns success.
2225    fn exec_create_subscription(
2226        &mut self,
2227        s: CreateSubscriptionStatement,
2228    ) -> Result<QueryResult, EngineError> {
2229        // See exec_create_publication — the in_transaction gate
2230        // was over-cautious; the auto-commit wrap path holds an
2231        // internal TX that this check was incorrectly blocking.
2232        let sub = subscriptions::Subscription {
2233            conn_str: s.conn_str,
2234            publications: s.publications,
2235            enabled: true,
2236            last_received_pos: 0,
2237        };
2238        self.subscriptions
2239            .create(s.name, sub)
2240            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE SUBSCRIPTION: {e:?}")))?;
2241        Ok(QueryResult::CommandOk {
2242            affected: 1,
2243            modified_catalog: true,
2244        })
2245    }
2246
2247    /// v6.1.4 — `DROP SUBSCRIPTION`. Silent no-op when the name
2248    /// doesn't exist (PG-compatible). The associated worker is
2249    /// torn down by spg-server when it observes the catalog
2250    /// change at the next snapshot or via the engine's
2251    /// subscriptions accessor (the worker polls the catalog on
2252    /// reconnect; v6.1.5's filter-side will tighten this to an
2253    /// explicit signal).
2254    fn exec_drop_subscription(&mut self, name: &str) -> Result<QueryResult, EngineError> {
2255        let removed = self.subscriptions.drop(name);
2256        Ok(QueryResult::CommandOk {
2257            affected: usize::from(removed),
2258            modified_catalog: removed,
2259        })
2260    }
2261
2262    /// v6.1.4 — read access to the subscription catalog. Used by
2263    /// the subscription worker (read its own row to find its
2264    /// publications + last applied position), by SHOW SUBSCRIPTIONS,
2265    /// and by e2e tests asserting state directly.
2266    pub const fn subscriptions(&self) -> &subscriptions::Subscriptions {
2267        &self.subscriptions
2268    }
2269
2270    /// v6.1.4 — write access to `last_received_pos`. Worker
2271    /// calls this after each apply batch (under the engine's
2272    /// write-lock). Returns `false` when the subscription was
2273    /// dropped between when the worker received the record and
2274    /// when this call landed.
2275    pub fn subscription_advance(&mut self, name: &str, pos: u64) -> bool {
2276        self.subscriptions.update_last_received_pos(name, pos)
2277    }
2278
2279    /// v6.1.4 — `SHOW SUBSCRIPTIONS` row materialisation. Returns
2280    /// `(name, conn_str, publications, enabled, last_received_pos)`
2281    /// ordered by subscription name. The `publications` column is
2282    /// the comma-joined list ("p1, p2") for ergonomic SHOW output;
2283    /// callers wanting structured access read `Engine::subscriptions`.
2284    fn exec_show_subscriptions(&self) -> QueryResult {
2285        let columns = alloc::vec![
2286            ColumnSchema::new("name", DataType::Text, false),
2287            ColumnSchema::new("conn_str", DataType::Text, false),
2288            ColumnSchema::new("publications", DataType::Text, false),
2289            ColumnSchema::new("enabled", DataType::Bool, false),
2290            ColumnSchema::new("last_received_pos", DataType::BigInt, false),
2291        ];
2292        let rows: Vec<Row> = self
2293            .subscriptions
2294            .iter()
2295            .map(|(name, sub)| {
2296                Row::new(alloc::vec![
2297                    Value::Text(name.clone()),
2298                    Value::Text(sub.conn_str.clone()),
2299                    Value::Text(sub.publications.join(", ")),
2300                    Value::Bool(sub.enabled),
2301                    Value::BigInt(i64::try_from(sub.last_received_pos).unwrap_or(i64::MAX)),
2302                ])
2303            })
2304            .collect();
2305        QueryResult::Rows { columns, rows }
2306    }
2307
2308    /// v6.2.0 — materialise `spg_statistic` rows. One row per
2309    /// `(table, column)` pair tracked in `Statistics`, with
2310    /// `histogram_bounds` rendered as a `[v0, v1, ...]` string —
2311    /// the same canonical form vector literals use for round-trip.
2312    fn exec_spg_statistic(&self) -> QueryResult {
2313        let columns = alloc::vec![
2314            ColumnSchema::new("table_name", DataType::Text, false),
2315            ColumnSchema::new("column_name", DataType::Text, false),
2316            ColumnSchema::new("null_frac", DataType::Float, false),
2317            ColumnSchema::new("n_distinct", DataType::BigInt, false),
2318            ColumnSchema::new("histogram_bounds", DataType::Text, false),
2319            // v6.7.0 — appended column (v6.2.0 stability contract
2320            // allows APPEND to spg_statistic, not reorder/rename).
2321            // Reports the cached per-table cold-row count; same
2322            // value across every column row of the same table.
2323            ColumnSchema::new("cold_row_count", DataType::BigInt, false),
2324        ];
2325        let rows: Vec<Row> = self
2326            .statistics
2327            .iter()
2328            .map(|((t, c), s)| {
2329                let cold = self
2330                    .catalog
2331                    .get(t)
2332                    .map_or(0, |table| table.cold_row_count());
2333                Row::new(alloc::vec![
2334                    Value::Text(t.clone()),
2335                    Value::Text(c.clone()),
2336                    Value::Float(f64::from(s.null_frac)),
2337                    Value::BigInt(i64::try_from(s.n_distinct).unwrap_or(i64::MAX)),
2338                    Value::Text(render_histogram_bounds(&s.histogram_bounds)),
2339                    Value::BigInt(i64::try_from(cold).unwrap_or(i64::MAX)),
2340                ])
2341            })
2342            .collect();
2343        QueryResult::Rows { columns, rows }
2344    }
2345
2346    /// v6.5.0 — materialise `spg_stat_replication` rows. One row
2347    /// per subscription with `(name, conn_str, publications,
2348    /// last_received_pos, enabled)`. Surface mirrors
2349    /// `SHOW SUBSCRIPTIONS` but follows the virtual-table dispatch
2350    /// shape so it composes with SELECT clauses (WHERE, projection
2351    /// onto specific columns, etc).
2352    fn exec_spg_stat_replication(&self) -> QueryResult {
2353        let columns = alloc::vec![
2354            ColumnSchema::new("name", DataType::Text, false),
2355            ColumnSchema::new("conn_str", DataType::Text, false),
2356            ColumnSchema::new("publications", DataType::Text, false),
2357            ColumnSchema::new("last_received_pos", DataType::BigInt, false),
2358            ColumnSchema::new("enabled", DataType::Bool, false),
2359        ];
2360        let rows: Vec<Row> = self
2361            .subscriptions
2362            .iter()
2363            .map(|(name, sub)| {
2364                Row::new(alloc::vec![
2365                    Value::Text(name.clone()),
2366                    Value::Text(sub.conn_str.clone()),
2367                    Value::Text(sub.publications.join(",")),
2368                    Value::BigInt(i64::try_from(sub.last_received_pos).unwrap_or(i64::MAX)),
2369                    Value::Bool(sub.enabled),
2370                ])
2371            })
2372            .collect();
2373        QueryResult::Rows { columns, rows }
2374    }
2375
2376    /// v6.5.0 — materialise `spg_stat_segment` rows. One row per
2377    /// cold-tier segment with `(segment_id, num_rows, num_pages,
2378    /// total_bytes)`.
2379    ///
2380    /// v6.7.0 — appended `table_name` column resolves the v6.5.0
2381    /// carve-out. Walks every user table's BTree indices to find
2382    /// which table's Cold locators point at each segment. Empty
2383    /// string for orphan segments (loaded via SPG_PRELOAD_COLD_SEGMENT
2384    /// before any index registered a locator). The walk is
2385    /// O(tables × indices × keys); cached per call, not across
2386    /// calls — re-walked on every `SELECT * FROM spg_stat_segment`.
2387    /// v7.31 (memory campaign) — walk the committed catalog and
2388    /// build the per-bucket memory snapshot. O(rows + index
2389    /// entries): operator/monitoring surface, not a query path.
2390    pub fn memory_stats(&self) -> MemoryStats {
2391        let mut tables: Vec<TableMemoryStats> = Vec::new();
2392        let (mut total_enc, mut total_res, mut total_idx) = (0u64, 0u64, 0u64);
2393        for tname in self.catalog.table_names() {
2394            if is_internal_table_name(&tname) {
2395                continue;
2396            }
2397            let Some(t) = self.catalog.get(&tname) else {
2398                continue;
2399            };
2400            let resident: u64 = t.rows().iter().map(|r| approx_row_bytes(r) as u64).sum();
2401            let mut idx_bytes: u64 = 0;
2402            for idx in t.indices() {
2403                idx_bytes += match &idx.kind {
2404                    spg_storage::IndexKind::BTree(map) => {
2405                        let mut b: u64 = 0;
2406                        for (_, locs) in map.iter() {
2407                            b += (core::mem::size_of::<spg_storage::IndexKey>()
2408                                + 24
2409                                + locs.len() * core::mem::size_of::<spg_storage::RowLocator>())
2410                                as u64;
2411                        }
2412                        b
2413                    }
2414                    // Parametric estimate: per node, the dense
2415                    // layer-0 neighbour list dominates.
2416                    spg_storage::IndexKind::Nsw(g) => {
2417                        (g.levels.len() * (g.m_max_0 * 8 + 16)) as u64
2418                    }
2419                    // BRIN is block-range metadata — flat token.
2420                    _ => 1024,
2421                };
2422            }
2423            total_enc += t.hot_bytes();
2424            total_res += resident;
2425            total_idx += idx_bytes;
2426            tables.push(TableMemoryStats {
2427                name: tname.clone(),
2428                hot_rows: t.rows().len() as u64,
2429                cold_rows: t.cold_row_count(),
2430                hot_encoded_bytes: t.hot_bytes(),
2431                approx_resident_bytes: resident,
2432                index_count: t.indices().len() as u64,
2433                approx_index_bytes: idx_bytes,
2434            });
2435        }
2436        MemoryStats {
2437            tables,
2438            total_hot_encoded_bytes: total_enc,
2439            total_approx_resident_bytes: total_res,
2440            total_approx_index_bytes: total_idx,
2441            max_query_bytes: self.max_query_bytes,
2442        }
2443    }
2444
2445    /// v7.31 — `SELECT * FROM spg_memory_stats`: one row per user
2446    /// table (same numbers as `Engine::memory_stats()`), so the
2447    /// server path gets the meter through plain SQL.
2448    fn exec_spg_memory_stats(&self) -> QueryResult {
2449        let columns = alloc::vec![
2450            ColumnSchema::new("table_name", DataType::Text, false),
2451            ColumnSchema::new("hot_rows", DataType::BigInt, false),
2452            ColumnSchema::new("cold_rows", DataType::BigInt, false),
2453            ColumnSchema::new("hot_encoded_bytes", DataType::BigInt, false),
2454            ColumnSchema::new("approx_resident_bytes", DataType::BigInt, false),
2455            ColumnSchema::new("index_count", DataType::BigInt, false),
2456            ColumnSchema::new("approx_index_bytes", DataType::BigInt, false),
2457        ];
2458        #[allow(clippy::cast_possible_wrap)]
2459        let rows: Vec<Row> = self
2460            .memory_stats()
2461            .tables
2462            .into_iter()
2463            .map(|t| {
2464                Row::new(alloc::vec![
2465                    Value::Text(t.name),
2466                    Value::BigInt(t.hot_rows as i64),
2467                    Value::BigInt(t.cold_rows as i64),
2468                    Value::BigInt(t.hot_encoded_bytes as i64),
2469                    Value::BigInt(t.approx_resident_bytes as i64),
2470                    Value::BigInt(t.index_count as i64),
2471                    Value::BigInt(t.approx_index_bytes as i64),
2472                ])
2473            })
2474            .collect();
2475        QueryResult::Rows { columns, rows }
2476    }
2477
2478    fn exec_spg_stat_segment(&self) -> QueryResult {
2479        let columns = alloc::vec![
2480            ColumnSchema::new("segment_id", DataType::BigInt, false),
2481            ColumnSchema::new("table_name", DataType::Text, false),
2482            ColumnSchema::new("num_rows", DataType::BigInt, false),
2483            ColumnSchema::new("num_pages", DataType::BigInt, false),
2484            ColumnSchema::new("total_bytes", DataType::BigInt, false),
2485        ];
2486        // v6.7.0 — build a segment_id → table_name map by walking
2487        // every user table's BTree indices once. O(tables × indices
2488        // × keys) for the v6.5.0 carve-out resolution; acceptable
2489        // because spg_stat_segment is operator-facing (not on a
2490        // hot-loop path).
2491        let mut segment_owners: alloc::collections::BTreeMap<u32, String> = BTreeMap::new();
2492        for tname in self.catalog.table_names() {
2493            if is_internal_table_name(&tname) {
2494                continue;
2495            }
2496            let Some(t) = self.catalog.get(&tname) else {
2497                continue;
2498            };
2499            for idx in t.indices() {
2500                if let spg_storage::IndexKind::BTree(map) = &idx.kind {
2501                    for (_, locs) in map.iter() {
2502                        for loc in locs {
2503                            if let spg_storage::RowLocator::Cold { segment_id, .. } = loc {
2504                                segment_owners
2505                                    .entry(*segment_id)
2506                                    .or_insert_with(|| tname.clone());
2507                            }
2508                        }
2509                    }
2510                }
2511            }
2512        }
2513        let rows: Vec<Row> = self
2514            .catalog
2515            .cold_segment_ids_global()
2516            .iter()
2517            .filter_map(|&id| {
2518                let seg = self.catalog.cold_segment(id)?;
2519                let meta = seg.meta();
2520                let owner = segment_owners.get(&id).cloned().unwrap_or_default();
2521                Some(Row::new(alloc::vec![
2522                    Value::BigInt(i64::from(id)),
2523                    Value::Text(owner),
2524                    Value::BigInt(i64::try_from(meta.num_rows).unwrap_or(i64::MAX)),
2525                    Value::BigInt(i64::from(meta.num_pages)),
2526                    Value::BigInt(i64::try_from(meta.total_bytes).unwrap_or(i64::MAX)),
2527                ]))
2528            })
2529            .collect();
2530        QueryResult::Rows { columns, rows }
2531    }
2532
2533    /// v6.5.1 — materialise `spg_stat_query` rows. One row per
2534    /// distinct SQL text recorded since the engine booted, capped
2535    /// at `QUERY_STATS_MAX` (1024). Columns:
2536    ///   sql, exec_count, total_us, mean_us, max_us, last_seen_us
2537    /// mean_us = total_us / exec_count (saturating).
2538    fn exec_spg_stat_query(&self) -> QueryResult {
2539        let columns = alloc::vec![
2540            ColumnSchema::new("sql", DataType::Text, false),
2541            ColumnSchema::new("exec_count", DataType::BigInt, false),
2542            ColumnSchema::new("total_us", DataType::BigInt, false),
2543            ColumnSchema::new("mean_us", DataType::BigInt, false),
2544            ColumnSchema::new("max_us", DataType::BigInt, false),
2545            ColumnSchema::new("last_seen_us", DataType::BigInt, false),
2546        ];
2547        let rows: Vec<Row> = self
2548            .query_stats
2549            .snapshot()
2550            .into_iter()
2551            .map(|(sql, s)| {
2552                let mean = if s.exec_count == 0 {
2553                    0
2554                } else {
2555                    s.total_us / s.exec_count
2556                };
2557                Row::new(alloc::vec![
2558                    Value::Text(sql),
2559                    Value::BigInt(i64::try_from(s.exec_count).unwrap_or(i64::MAX)),
2560                    Value::BigInt(i64::try_from(s.total_us).unwrap_or(i64::MAX)),
2561                    Value::BigInt(i64::try_from(mean).unwrap_or(i64::MAX)),
2562                    Value::BigInt(i64::try_from(s.max_us).unwrap_or(i64::MAX)),
2563                    Value::BigInt(i64::try_from(s.last_seen_us).unwrap_or(i64::MAX)),
2564                ])
2565            })
2566            .collect();
2567        QueryResult::Rows { columns, rows }
2568    }
2569
2570    /// v6.5.2 — register a connection-state provider. spg-server
2571    /// calls this at startup with a function that snapshots its
2572    /// per-pgwire-connection registry. Engine reads through the
2573    /// callback on `SELECT * FROM spg_stat_activity`.
2574    #[must_use]
2575    pub const fn with_activity_provider(mut self, f: ActivityProvider) -> Self {
2576        self.activity_provider = Some(f);
2577        self
2578    }
2579
2580    /// v6.5.3 — register audit chain provider + verifier.
2581    #[must_use]
2582    pub const fn with_audit_providers(
2583        mut self,
2584        chain: AuditChainProvider,
2585        verify: AuditVerifier,
2586    ) -> Self {
2587        self.audit_chain_provider = Some(chain);
2588        self.audit_verifier = Some(verify);
2589        self
2590    }
2591
2592    /// v6.5.6 — register a slow-query log callback. `threshold_us`
2593    /// is the floor (in microseconds); only executes above the floor
2594    /// fire the callback. spg-server wires this from
2595    /// `SPG_SLOW_QUERY_THRESHOLD_MS` (default 100 ms).
2596    #[must_use]
2597    pub const fn with_slow_query_log(mut self, threshold_us: u64, logger: SlowQueryLogger) -> Self {
2598        self.slow_query_threshold_us = Some(threshold_us);
2599        self.slow_query_logger = Some(logger);
2600        self
2601    }
2602
2603    /// v6.5.6 — operator knob for plan cache cap. spg-server reads
2604    /// `SPG_PLAN_CACHE_MAX` env at startup; uses this to override
2605    /// the compile-time default of 256.
2606    pub fn set_plan_cache_max(&mut self, n: usize) {
2607        self.plan_cache.set_max_entries(n);
2608    }
2609
2610    /// v6.5.2 — materialise `spg_stat_activity` rows. Pulls a fresh
2611    /// snapshot from the registered `ActivityProvider`. Returns an
2612    /// empty result set when no provider is registered (the no_std
2613    /// embedded path with no pgwire layer).
2614    fn exec_spg_stat_activity(&self) -> QueryResult {
2615        let columns = alloc::vec![
2616            ColumnSchema::new("pid", DataType::Int, false),
2617            ColumnSchema::new("user", DataType::Text, false),
2618            ColumnSchema::new("started_at_us", DataType::BigInt, false),
2619            ColumnSchema::new("current_sql", DataType::Text, false),
2620            ColumnSchema::new("wait_event", DataType::Text, false),
2621            ColumnSchema::new("elapsed_us", DataType::BigInt, false),
2622            ColumnSchema::new("in_transaction", DataType::Bool, false),
2623            ColumnSchema::new("application_name", DataType::Text, false),
2624        ];
2625        let rows: Vec<Row> = self
2626            .activity_provider
2627            .map(|f| f())
2628            .unwrap_or_default()
2629            .into_iter()
2630            .map(|r| {
2631                Row::new(alloc::vec![
2632                    Value::Int(i32::try_from(r.pid).unwrap_or(i32::MAX)),
2633                    Value::Text(r.user),
2634                    Value::BigInt(r.started_at_us),
2635                    Value::Text(r.current_sql),
2636                    Value::Text(r.wait_event),
2637                    Value::BigInt(r.elapsed_us),
2638                    Value::Bool(r.in_transaction),
2639                    Value::Text(r.application_name),
2640                ])
2641            })
2642            .collect();
2643        QueryResult::Rows { columns, rows }
2644    }
2645
2646    /// v6.5.4 — materialise `spg_table_ddl` rows. One row per user
2647    /// table with `(table_name, ddl)`. Reconstructed from catalog
2648    /// state on demand.
2649    fn exec_spg_table_ddl(&self) -> QueryResult {
2650        let columns = alloc::vec![
2651            ColumnSchema::new("table_name", DataType::Text, false),
2652            ColumnSchema::new("ddl", DataType::Text, false),
2653        ];
2654        let rows: Vec<Row> = self
2655            .catalog
2656            .table_names()
2657            .into_iter()
2658            .filter(|n| !is_internal_table_name(n))
2659            .filter_map(|name| {
2660                let table = self.catalog.get(&name)?;
2661                let ddl = render_create_table(&name, &table.schema().columns);
2662                Some(Row::new(alloc::vec![Value::Text(name), Value::Text(ddl),]))
2663            })
2664            .collect();
2665        QueryResult::Rows { columns, rows }
2666    }
2667
2668    /// v6.5.4 — materialise `spg_role_ddl` rows. One row per user
2669    /// with `(role_name, ddl)`. Password is redacted (matches the
2670    /// `Statement::CreateUser` Display which prints `'<redacted>'`).
2671    fn exec_spg_role_ddl(&self) -> QueryResult {
2672        let columns = alloc::vec![
2673            ColumnSchema::new("role_name", DataType::Text, false),
2674            ColumnSchema::new("ddl", DataType::Text, false),
2675        ];
2676        let rows: Vec<Row> = self
2677            .users
2678            .iter()
2679            .map(|(name, rec)| {
2680                let ddl = alloc::format!(
2681                    "CREATE USER {name} WITH PASSWORD '<redacted>' ROLE '{}'",
2682                    rec.role.as_str(),
2683                );
2684                Row::new(alloc::vec![
2685                    Value::Text(String::from(name)),
2686                    Value::Text(ddl)
2687                ])
2688            })
2689            .collect();
2690        QueryResult::Rows { columns, rows }
2691    }
2692
2693    /// v6.5.4 — materialise `spg_database_ddl`: single row whose
2694    /// `ddl` column concatenates every user table's CREATE +
2695    /// every role's CREATE in deterministic catalog order. Suitable
2696    /// for piping back through `Engine::execute` to recreate a
2697    /// schema-equivalent database.
2698    fn exec_spg_database_ddl(&self) -> QueryResult {
2699        let columns = alloc::vec![ColumnSchema::new("ddl", DataType::Text, false)];
2700        let mut out = String::new();
2701        for (name, rec) in self.users.iter() {
2702            out.push_str(&alloc::format!(
2703                "CREATE USER {name} WITH PASSWORD '<redacted>' ROLE '{}';\n",
2704                rec.role.as_str(),
2705            ));
2706        }
2707        for name in self.catalog.table_names() {
2708            if is_internal_table_name(&name) {
2709                continue;
2710            }
2711            if let Some(table) = self.catalog.get(&name) {
2712                out.push_str(&render_create_table(&name, &table.schema().columns));
2713                out.push_str(";\n");
2714            }
2715        }
2716        QueryResult::Rows {
2717            columns,
2718            rows: alloc::vec![Row::new(alloc::vec![Value::Text(out)])],
2719        }
2720    }
2721
2722    /// v6.5.3 — materialise `spg_audit_chain` rows. Pulls a fresh
2723    /// snapshot from the registered provider; empty when no
2724    /// provider is set.
2725    fn exec_spg_audit_chain(&self) -> QueryResult {
2726        let columns = alloc::vec![
2727            ColumnSchema::new("seq", DataType::BigInt, false),
2728            ColumnSchema::new("ts_ms", DataType::BigInt, false),
2729            ColumnSchema::new("prev_hash", DataType::Text, false),
2730            ColumnSchema::new("entry_hash", DataType::Text, false),
2731            ColumnSchema::new("sql", DataType::Text, false),
2732        ];
2733        let rows: Vec<Row> = self
2734            .audit_chain_provider
2735            .map(|f| f())
2736            .unwrap_or_default()
2737            .into_iter()
2738            .map(|r| {
2739                Row::new(alloc::vec![
2740                    Value::BigInt(r.seq),
2741                    Value::BigInt(r.ts_ms),
2742                    Value::Text(r.prev_hash_hex),
2743                    Value::Text(r.entry_hash_hex),
2744                    Value::Text(r.sql),
2745                ])
2746            })
2747            .collect();
2748        QueryResult::Rows { columns, rows }
2749    }
2750
2751    /// v6.5.3 — materialise `spg_audit_verify` single-row result.
2752    /// `(verified_count, broken_at_seq)` — broken_at_seq is `-1`
2753    /// on a clean chain. Returns one row with both values 0 when
2754    /// no verifier is registered (no-data fallback for embedded
2755    /// callers).
2756    fn exec_spg_audit_verify(&self) -> QueryResult {
2757        let columns = alloc::vec![
2758            ColumnSchema::new("verified_count", DataType::BigInt, false),
2759            ColumnSchema::new("broken_at_seq", DataType::BigInt, false),
2760        ];
2761        let (verified, broken) = self.audit_verifier.map(|f| f()).unwrap_or((0, -1));
2762        let row = Row::new(alloc::vec![Value::BigInt(verified), Value::BigInt(broken),]);
2763        QueryResult::Rows {
2764            columns,
2765            rows: alloc::vec![row],
2766        }
2767    }
2768
2769    /// v6.5.1 — read-only accessor for tests + v6.5.6 ops resets.
2770    pub fn query_stats(&self) -> &query_stats::QueryStats {
2771        &self.query_stats
2772    }
2773
2774    /// v6.5.1 — mutable accessor (clear, etc).
2775    pub fn query_stats_mut(&mut self) -> &mut query_stats::QueryStats {
2776        &mut self.query_stats
2777    }
2778
2779    /// v6.2.0 — read access to the per-column statistics table.
2780    /// Used by the planner (v6.2.2 selectivity functions read this),
2781    /// by `SELECT * FROM spg_statistic`, and by e2e tests.
2782    pub const fn statistics(&self) -> &statistics::Statistics {
2783        &self.statistics
2784    }
2785
2786    /// v6.2.1 — return tables whose modified-row count crossed the
2787    /// auto-analyze threshold since the last ANALYZE on that table.
2788    /// The threshold is `0.1 × max(row_count, MIN_ROWS_FOR_AUTO_
2789    /// ANALYZE)` — combines PG-style fractional + absolute lower
2790    /// bound so a fresh / tiny table doesn't get hammered on every
2791    /// INSERT.
2792    ///
2793    /// Designed to be cheap: walks every user table's
2794    /// `Catalog::table_names()` + reads `statistics::modified_
2795    /// since_last_analyze()` (BTreeMap lookup). The background
2796    /// worker calls this under `engine.read()` then drops the lock
2797    /// before re-acquiring `engine.write()` for the actual ANALYZE.
2798    pub fn tables_needing_analyze(&self) -> Vec<String> {
2799        const MIN_ROWS: u64 = 100;
2800        let mut out = Vec::new();
2801        for name in self.catalog.table_names() {
2802            if is_internal_table_name(&name) {
2803                continue;
2804            }
2805            let Some(table) = self.catalog.get(&name) else {
2806                continue;
2807            };
2808            let row_count = table.rows().len() as u64;
2809            let modified = self.statistics.modified_since_last_analyze(&name);
2810            // Threshold: ceil(0.1 × max(row_count, MIN_ROWS)),
2811            // computed in integer arithmetic so spg-engine stays
2812            // no_std without pulling in libm. `(n + 9) / 10` is
2813            // `ceil(n / 10)` for non-negative `n`.
2814            let base = row_count.max(MIN_ROWS);
2815            let threshold = base.saturating_add(9) / 10;
2816            if modified >= threshold {
2817                out.push(name);
2818            }
2819        }
2820        out
2821    }
2822
2823    /// v6.2.0 — `ANALYZE [<table>]` runtime. Bare `ANALYZE` walks
2824    /// every user table; `ANALYZE <name>` re-stats one. For each
2825    /// target table, single-pass scan + per-column histogram +
2826    /// `null_frac` + `n_distinct`. Replaces the table's prior
2827    /// stats; resets the modified-row counter.
2828    ///
2829    /// v6.2.0 doesn't sample — it scans the full table. v6.2.x
2830    /// can add reservoir sampling at the > 100 K-row mark; not a
2831    /// scope blocker for the current commit since rows ≤ 100 K
2832    /// analyse in milliseconds.
2833    fn exec_analyze(&mut self, target: Option<&str>) -> Result<QueryResult, EngineError> {
2834        let names: Vec<String> = if let Some(name) = target {
2835            // Verify the table exists; surface a clear error if not.
2836            if self.catalog.get(name).is_none() {
2837                return Err(EngineError::Storage(StorageError::TableNotFound {
2838                    name: name.to_string(),
2839                }));
2840            }
2841            alloc::vec![name.to_string()]
2842        } else {
2843            self.catalog
2844                .table_names()
2845                .into_iter()
2846                .filter(|n| !is_internal_table_name(n))
2847                .collect()
2848        };
2849        let mut analysed = 0usize;
2850        for table_name in &names {
2851            self.analyze_one_table(table_name)?;
2852            analysed += 1;
2853        }
2854        // v6.3.1 — plan cache invalidation. Bump stats version so
2855        // future lookups see the new generation, and selectively
2856        // evict every plan whose `source_tables` overlap with the
2857        // ANALYZE target set. Bare ANALYZE (all tables) clears the
2858        // whole cache.
2859        if analysed > 0 {
2860            self.statistics.bump_version();
2861            if target.is_some() {
2862                for t in &names {
2863                    self.plan_cache.evict_referencing(t);
2864                }
2865            } else {
2866                self.plan_cache.clear();
2867            }
2868        }
2869        Ok(QueryResult::CommandOk {
2870            affected: analysed,
2871            modified_catalog: true,
2872        })
2873    }
2874
2875    /// v6.7.3 — `COMPACT COLD SEGMENTS` runtime path. Drives the
2876    /// engine-layer compaction shim with the default
2877    /// 4 MiB segment-size threshold. spg-server intercepts the
2878    /// SQL before it reaches the engine on a server build —
2879    /// it reads `SPG_COMPACTION_TARGET_SEGMENT_BYTES`, calls
2880    /// `Engine::compact_cold_segments_with_target` directly with
2881    /// the env value, and persists every merged segment to
2882    /// v7.12.1 — record a `SET <name> = <value>` parameter. Names
2883    /// are case-folded to lowercase to match PG; values keep their
2884    /// caller-supplied form so observability paths see what was
2885    /// requested. Only `default_text_search_config` is consulted by
2886    /// the engine today.
2887    fn set_session_param(&mut self, name: String, value: spg_sql::ast::SetValue) {
2888        let normalised = match value {
2889            spg_sql::ast::SetValue::String(s) => s,
2890            spg_sql::ast::SetValue::Ident(s) => s,
2891            spg_sql::ast::SetValue::Number(s) => s,
2892            spg_sql::ast::SetValue::Default => String::new(),
2893        };
2894        let key = name.to_ascii_lowercase();
2895        // v7.14.0 — mysqldump preamble emits
2896        // `SET FOREIGN_KEY_CHECKS=0` so it can CREATE TABLE in any
2897        // order despite cross-table FK references; the closing
2898        // section emits `SET FOREIGN_KEY_CHECKS=1` (or
2899        // `=@OLD_FOREIGN_KEY_CHECKS` which resolves to "ON" in our
2900        // session-variable-aware path). Match both shapes.
2901        // Also accept PG's `session_replication_role = 'replica'`
2902        // which suppresses trigger + FK enforcement during a
2903        // logical replication apply (pg_dump preserves this for
2904        // schema-only mode but it shows up in some restores).
2905        let value_off = matches!(
2906            normalised.to_ascii_lowercase().as_str(),
2907            "0" | "off" | "false"
2908        );
2909        let value_on = matches!(
2910            normalised.to_ascii_lowercase().as_str(),
2911            "1" | "on" | "true"
2912        );
2913        if key == "foreign_key_checks"
2914            || key == "session_replication_role" && normalised.eq_ignore_ascii_case("replica")
2915        {
2916            if value_off || key == "session_replication_role" {
2917                self.foreign_key_checks = false;
2918            } else if value_on
2919                || (key == "session_replication_role" && normalised.eq_ignore_ascii_case("origin"))
2920            {
2921                self.foreign_key_checks = true;
2922                // Drain pending FK queue against the now-complete
2923                // catalog. Errors here surface as the SET reply —
2924                // caller knows enabling checks revealed orphans.
2925                let _ = self.drain_pending_foreign_keys();
2926            }
2927        }
2928        // v7.22 (round-13 T3) — string-literal dialect signals.
2929        // `SET sql_mode = …` is something only MySQL clients and
2930        // mysqldump preambles emit → MySQL escape semantics.
2931        // `SET standard_conforming_strings = on|off` is PG's own
2932        // switch for exactly this behaviour (every pg_dump preamble
2933        // sets it to on). The same SQL text lexes differently per
2934        // dialect, so a flip invalidates the plan cache.
2935        let new_escapes = if key == "sql_mode" {
2936            Some(true)
2937        } else if key == "standard_conforming_strings" {
2938            Some(value_off)
2939        } else {
2940            None
2941        };
2942        if let Some(flag) = new_escapes
2943            && flag != self.backslash_escapes
2944        {
2945            self.backslash_escapes = flag;
2946            self.plan_cache.clear();
2947        }
2948        self.session_params.insert(key, normalised);
2949    }
2950
2951    /// v7.14.0 — resolve every queued FK whose installation was
2952    /// deferred (`SET FOREIGN_KEY_CHECKS=0` window). Called by
2953    /// `set_session_param` when checks flip back on and by the
2954    /// drop-import release gate. Each FK is resolved against the
2955    /// current catalog; remaining missing-parent errors propagate
2956    /// up so the caller knows the import was incomplete.
2957    fn drain_pending_foreign_keys(&mut self) -> Result<(), EngineError> {
2958        let pending = core::mem::take(&mut self.pending_foreign_keys);
2959        for (child, fk) in pending {
2960            // Resolve against the current catalog. Skip silently
2961            // when the child table itself was dropped between
2962            // queue + drain.
2963            let cols_snapshot = match self.active_catalog().get(&child) {
2964                Some(t) => t.schema().columns.clone(),
2965                None => continue,
2966            };
2967            let storage_fk =
2968                resolve_foreign_key(&child, &cols_snapshot, fk, self.active_catalog())?;
2969            let table = self
2970                .active_catalog_mut()
2971                .get_mut(&child)
2972                .expect("checked above");
2973            table.schema_mut().foreign_keys.push(storage_fk);
2974        }
2975        Ok(())
2976    }
2977
2978    /// v7.12.1 — read a session parameter set via `SET`. Used by
2979    /// the FTS function dispatcher to resolve the default config
2980    /// for `to_tsvector(text)` / `plainto_tsquery(text)` etc.
2981    #[must_use]
2982    pub fn session_param(&self, name: &str) -> Option<&str> {
2983        self.session_params
2984            .get(&name.to_ascii_lowercase())
2985            .map(String::as_str)
2986    }
2987
2988    /// v7.12.1 — build an `EvalContext` chained with the session's
2989    /// `default_text_search_config`. Engine-internal callers use
2990    /// this instead of `EvalContext::new` so the FTS function
2991    /// dispatcher sees the SET configuration.
2992    fn ev_ctx<'a>(
2993        &'a self,
2994        columns: &'a [ColumnSchema],
2995        alias: Option<&'a str>,
2996    ) -> EvalContext<'a> {
2997        EvalContext::new(columns, alias)
2998            .with_default_text_search_config(self.session_param("default_text_search_config"))
2999    }
3000
3001    /// `<db>.spg/segments/`. This arm only fires for engine-only
3002    /// callers (spg-embedded, lib tests); in that mode merged
3003    /// segments live in memory and are dropped at process exit.
3004    fn exec_compact_cold_segments(&mut self) -> Result<QueryResult, EngineError> {
3005        let target = COMPACTION_TARGET_DEFAULT_BYTES;
3006        let reports = self.compact_cold_segments_with_target(target)?;
3007        let columns = alloc::vec![
3008            ColumnSchema::new("table_name", DataType::Text, false),
3009            ColumnSchema::new("index_name", DataType::Text, false),
3010            ColumnSchema::new("sources_merged", DataType::BigInt, false),
3011            ColumnSchema::new("merged_segment_id", DataType::BigInt, false),
3012            ColumnSchema::new("merged_rows", DataType::BigInt, false),
3013            ColumnSchema::new("deleted_rows_pruned", DataType::BigInt, false),
3014            ColumnSchema::new("bytes_reclaimed_estimate", DataType::BigInt, false),
3015        ];
3016        let rows: Vec<Row> = reports
3017            .into_iter()
3018            .map(|(tname, iname, report)| {
3019                Row::new(alloc::vec![
3020                    Value::Text(tname),
3021                    Value::Text(iname),
3022                    Value::BigInt(i64::try_from(report.sources.len()).unwrap_or(i64::MAX)),
3023                    Value::BigInt(i64::from(report.merged_segment_id.unwrap_or(0))),
3024                    Value::BigInt(i64::try_from(report.merged_rows).unwrap_or(i64::MAX)),
3025                    Value::BigInt(i64::try_from(report.deleted_rows_pruned).unwrap_or(i64::MAX),),
3026                    Value::BigInt(
3027                        i64::try_from(report.bytes_reclaimed_estimate).unwrap_or(i64::MAX),
3028                    ),
3029                ])
3030            })
3031            .collect();
3032        Ok(QueryResult::Rows { columns, rows })
3033    }
3034
3035    /// Walk a single table's rows once and (re-)populate per-column
3036    /// stats. Drops the existing stats for `table` first so columns
3037    /// that have been DROP-ed between ANALYZEs don't leave stale
3038    /// rows.
3039    fn analyze_one_table(&mut self, table_name: &str) -> Result<(), EngineError> {
3040        let table = self.catalog.get(table_name).ok_or_else(|| {
3041            EngineError::Storage(StorageError::TableNotFound {
3042                name: table_name.to_string(),
3043            })
3044        })?;
3045        let schema = table.schema().clone();
3046        let row_count = table.rows().len();
3047        // For each column, collect (sorted) non-NULL textual values
3048        // + count NULLs; then ask `statistics::build_histogram` to
3049        // produce the 101 bounds and `estimate_n_distinct` the
3050        // distinct count.
3051        self.statistics.clear_table(table_name);
3052        for (col_pos, col_schema) in schema.columns.iter().enumerate() {
3053            // v6.2.0 skip: vector columns have their own stats
3054            // shape (HNSW graph topology). v6.2 deliberation #1.
3055            if matches!(col_schema.ty, DataType::Vector { .. }) {
3056                continue;
3057            }
3058            let mut non_null_values: Vec<Value> = Vec::with_capacity(row_count);
3059            let mut nulls: u64 = 0;
3060            for row in table.rows() {
3061                match row.values.get(col_pos) {
3062                    Some(Value::Null) | None => nulls += 1,
3063                    Some(v) => non_null_values.push(v.clone()),
3064                }
3065            }
3066            // Sort by type-aware ordering (Int as int, Text as
3067            // lex, etc.) so histogram bounds reflect the column's
3068            // natural order — not lexicographic on the string
3069            // representation, which would put "9" after "49".
3070            non_null_values.sort_by(|a, b| sort_values_for_histogram(a, b));
3071            let non_null: Vec<String> = non_null_values.iter().map(canonical_value_repr).collect();
3072            let null_frac = if row_count == 0 {
3073                0.0
3074            } else {
3075                #[allow(clippy::cast_precision_loss)]
3076                let f = nulls as f32 / row_count as f32;
3077                f
3078            };
3079            let n_distinct = statistics::estimate_n_distinct(&non_null);
3080            let histogram_bounds = statistics::build_histogram(&non_null);
3081            self.statistics.set(
3082                table_name.to_string(),
3083                col_schema.name.clone(),
3084                statistics::ColumnStats {
3085                    null_frac,
3086                    n_distinct,
3087                    histogram_bounds,
3088                },
3089            );
3090        }
3091        self.statistics.reset_modified(table_name);
3092        // v6.7.0 — refresh the per-table cold_rows cache. Walk the
3093        // BTree indices and count Cold locators (MAX across
3094        // indices); store the result on the table. Surfaced via
3095        // `spg_statistic.cold_row_count` (new column) and
3096        // `spg_stat_segment.table_name` (new column).
3097        let cold_count = {
3098            let table = self
3099                .active_catalog()
3100                .get(table_name)
3101                .expect("table still present");
3102            table.count_cold_locators()
3103        };
3104        let table_mut = self
3105            .active_catalog_mut()
3106            .get_mut(table_name)
3107            .expect("table still present");
3108        table_mut.set_cold_row_count(cold_count);
3109        Ok(())
3110    }
3111
3112    /// v6.1.3 — `SHOW PUBLICATIONS` row materialisation. Returns
3113    /// `(name, scope, table_count)` ordered by publication name.
3114    ///   - `scope` is the human-readable string:
3115    ///       `"FOR ALL TABLES"` /
3116    ///       `"FOR TABLE t1, t2"` /
3117    ///       `"FOR ALL TABLES EXCEPT t1, t2"`.
3118    ///   - `table_count` is NULL for `AllTables`, the list length
3119    ///     otherwise. NULLability lets clients distinguish "publish
3120    ///     everything" from "publish exactly 0 tables" (the v6.1.3
3121    ///     parser forbids the empty list, but the column shape is
3122    ///     ready for the v6.1.5 publisher-side semantics).
3123    fn exec_show_publications(&self) -> QueryResult {
3124        let columns = alloc::vec![
3125            ColumnSchema::new("name", DataType::Text, false),
3126            ColumnSchema::new("scope", DataType::Text, false),
3127            ColumnSchema::new("table_count", DataType::Int, true),
3128        ];
3129        let rows: Vec<Row> = self
3130            .publications
3131            .iter()
3132            .map(|(name, scope)| {
3133                let (scope_str, count_val) = match scope {
3134                    spg_sql::ast::PublicationScope::AllTables => {
3135                        ("FOR ALL TABLES".to_string(), Value::Null)
3136                    }
3137                    spg_sql::ast::PublicationScope::ForTables(ts) => (
3138                        alloc::format!("FOR TABLE {}", ts.join(", ")),
3139                        Value::Int(i32::try_from(ts.len()).unwrap_or(i32::MAX)),
3140                    ),
3141                    spg_sql::ast::PublicationScope::AllTablesExcept(ts) => (
3142                        alloc::format!("FOR ALL TABLES EXCEPT {}", ts.join(", ")),
3143                        Value::Int(i32::try_from(ts.len()).unwrap_or(i32::MAX)),
3144                    ),
3145                };
3146                Row::new(alloc::vec![
3147                    Value::Text(name.clone()),
3148                    Value::Text(scope_str),
3149                    count_val,
3150                ])
3151            })
3152            .collect();
3153        QueryResult::Rows { columns, rows }
3154    }
3155
3156    /// v4.1 `SHOW USERS` — `(name, role)` per row, ordered by name.
3157    fn exec_show_users(&self) -> QueryResult {
3158        let columns = alloc::vec![
3159            ColumnSchema::new("name", DataType::Text, false),
3160            ColumnSchema::new("role", DataType::Text, false),
3161        ];
3162        let rows: Vec<Row> = self
3163            .users
3164            .iter()
3165            .map(|(name, rec)| {
3166                Row::new(alloc::vec![
3167                    Value::Text(name.to_string()),
3168                    Value::Text(rec.role.as_str().to_string()),
3169                ])
3170            })
3171            .collect();
3172        QueryResult::Rows { columns, rows }
3173    }
3174
3175    fn exec_create_user(&mut self, s: &CreateUserStatement) -> Result<QueryResult, EngineError> {
3176        if self.in_transaction() {
3177            return Err(EngineError::Unsupported(
3178                "CREATE USER is not allowed inside a transaction".into(),
3179            ));
3180        }
3181        let role = users::Role::parse(&s.role).ok_or_else(|| {
3182            EngineError::Unsupported(alloc::format!("invalid role: {:?}", s.role))
3183        })?;
3184        // Prefer the host-injected RNG. Falls back to a deterministic
3185        // salt derived from the username only when no RNG is wired —
3186        // acceptable for tests; the server always installs one.
3187        let salt = self.salt_fn.map_or_else(
3188            || {
3189                let mut s_bytes = [0u8; 16];
3190                let digest = spg_crypto::hash(s.name.as_bytes());
3191                s_bytes.copy_from_slice(&digest[..16]);
3192                s_bytes
3193            },
3194            |f| f(),
3195        );
3196        self.users
3197            .create(&s.name, &s.password, role, salt)
3198            .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE USER: {e}")))?;
3199        Ok(QueryResult::CommandOk {
3200            affected: 1,
3201            modified_catalog: true,
3202        })
3203    }
3204
3205    fn exec_drop_user(&mut self, name: &str) -> Result<QueryResult, EngineError> {
3206        if self.in_transaction() {
3207            return Err(EngineError::Unsupported(
3208                "DROP USER is not allowed inside a transaction".into(),
3209            ));
3210        }
3211        self.users
3212            .drop(name)
3213            .map_err(|e| EngineError::Unsupported(alloc::format!("DROP USER: {e}")))?;
3214        Ok(QueryResult::CommandOk {
3215            affected: 1,
3216            modified_catalog: true,
3217        })
3218    }
3219
3220    /// v7.12.4 — `CREATE [OR REPLACE] FUNCTION`. Stores the
3221    /// function metadata in the catalog. PL/pgSQL bodies are
3222    /// already parsed by the SQL parser; we re-canonicalise the
3223    /// body to source text for storage (the executor re-parses
3224    /// it at trigger fire time — see the trigger fire path).
3225    fn exec_create_function(
3226        &mut self,
3227        s: spg_sql::ast::CreateFunctionStatement,
3228    ) -> Result<QueryResult, EngineError> {
3229        let args_repr = render_function_args(&s.args);
3230        let returns = match &s.returns {
3231            spg_sql::ast::FunctionReturn::Trigger => alloc::string::String::from("TRIGGER"),
3232            spg_sql::ast::FunctionReturn::Void => alloc::string::String::from("VOID"),
3233            spg_sql::ast::FunctionReturn::Type(t) => alloc::format!("{t}"),
3234            spg_sql::ast::FunctionReturn::Other(s) => s.clone(),
3235        };
3236        let body_text = match &s.body {
3237            spg_sql::ast::FunctionBody::PlPgSql(b) => alloc::format!("{b}"),
3238            spg_sql::ast::FunctionBody::Raw(s) => s.clone(),
3239        };
3240        let def = spg_storage::FunctionDef {
3241            name: s.name.clone(),
3242            args_repr,
3243            returns,
3244            language: s.language.clone(),
3245            body: body_text,
3246        };
3247        self.active_catalog_mut()
3248            .create_function(def, s.or_replace)
3249            .map_err(EngineError::Storage)?;
3250        Ok(QueryResult::CommandOk {
3251            affected: 0,
3252            modified_catalog: true,
3253        })
3254    }
3255
3256    /// v7.12.4 — `CREATE [OR REPLACE] TRIGGER`. The referenced
3257    /// function must already exist in the catalog (forward
3258    /// references defer to a later release). Persists the
3259    /// trigger metadata for the row-write hooks below to consult.
3260    fn exec_create_trigger(
3261        &mut self,
3262        s: spg_sql::ast::CreateTriggerStatement,
3263    ) -> Result<QueryResult, EngineError> {
3264        let timing = match s.timing {
3265            spg_sql::ast::TriggerTiming::Before => "BEFORE",
3266            spg_sql::ast::TriggerTiming::After => "AFTER",
3267            spg_sql::ast::TriggerTiming::InsteadOf => "INSTEAD OF",
3268        };
3269        let events: Vec<alloc::string::String> = s
3270            .events
3271            .iter()
3272            .map(|e| match e {
3273                spg_sql::ast::TriggerEvent::Insert => alloc::string::String::from("INSERT"),
3274                spg_sql::ast::TriggerEvent::Update => alloc::string::String::from("UPDATE"),
3275                spg_sql::ast::TriggerEvent::Delete => alloc::string::String::from("DELETE"),
3276                spg_sql::ast::TriggerEvent::Truncate => alloc::string::String::from("TRUNCATE"),
3277            })
3278            .collect();
3279        let for_each = match s.for_each {
3280            spg_sql::ast::TriggerForEach::Row => "ROW",
3281            spg_sql::ast::TriggerForEach::Statement => "STATEMENT",
3282        };
3283        let def = spg_storage::TriggerDef {
3284            name: s.name.clone(),
3285            table: s.table.clone(),
3286            timing: alloc::string::String::from(timing),
3287            events,
3288            for_each: alloc::string::String::from(for_each),
3289            function: s.function.clone(),
3290            update_columns: s.update_columns.clone(),
3291            // v7.16.1 — every trigger is born enabled. Toggled
3292            // by ALTER TABLE … { ENABLE | DISABLE } TRIGGER.
3293            enabled: true,
3294        };
3295        self.active_catalog_mut()
3296            .create_trigger(def, s.or_replace)
3297            .map_err(EngineError::Storage)?;
3298        Ok(QueryResult::CommandOk {
3299            affected: 0,
3300            modified_catalog: true,
3301        })
3302    }
3303
3304    fn exec_drop_trigger(
3305        &mut self,
3306        name: &str,
3307        table: &str,
3308        if_exists: bool,
3309    ) -> Result<QueryResult, EngineError> {
3310        let removed = self.active_catalog_mut().drop_trigger(name, table);
3311        if !removed && !if_exists {
3312            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3313                alloc::format!("trigger {name:?} on {table:?} does not exist"),
3314            )));
3315        }
3316        Ok(QueryResult::CommandOk {
3317            affected: usize::from(removed),
3318            modified_catalog: removed,
3319        })
3320    }
3321
3322    fn exec_drop_function(
3323        &mut self,
3324        name: &str,
3325        if_exists: bool,
3326    ) -> Result<QueryResult, EngineError> {
3327        let removed = self.active_catalog_mut().drop_function(name);
3328        if !removed && !if_exists {
3329            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3330                alloc::format!("function {name:?} does not exist"),
3331            )));
3332        }
3333        Ok(QueryResult::CommandOk {
3334            affected: usize::from(removed),
3335            modified_catalog: removed,
3336        })
3337    }
3338
3339    /// v7.17.0 — `CREATE SEQUENCE` engine path. Resolves
3340    /// `min_value` / `max_value` / `start` against PG defaults
3341    /// when omitted, then installs the SequenceDef in the catalog.
3342    fn exec_create_sequence(
3343        &mut self,
3344        s: spg_sql::ast::CreateSequenceStatement,
3345    ) -> Result<QueryResult, EngineError> {
3346        use spg_sql::ast::{SeqBound, SequenceDataType as AstDt};
3347        use spg_storage::{SequenceDataType, SequenceDef};
3348        let dt = match s.data_type {
3349            None => SequenceDataType::BigInt,
3350            Some(AstDt::SmallInt) => SequenceDataType::SmallInt,
3351            Some(AstDt::Int) => SequenceDataType::Int,
3352            Some(AstDt::BigInt) => SequenceDataType::BigInt,
3353        };
3354        let increment = s.options.increment.unwrap_or(1);
3355        if increment == 0 {
3356            return Err(EngineError::Unsupported(
3357                "INCREMENT must not be zero".into(),
3358            ));
3359        }
3360        let (def_min, def_max) = dt.default_bounds(increment > 0);
3361        let min_value = match s.options.min_value {
3362            None | Some(SeqBound::NoBound) => def_min,
3363            Some(SeqBound::Value(n)) => n,
3364        };
3365        let max_value = match s.options.max_value {
3366            None | Some(SeqBound::NoBound) => def_max,
3367            Some(SeqBound::Value(n)) => n,
3368        };
3369        if min_value > max_value {
3370            return Err(EngineError::Unsupported(alloc::format!(
3371                "MINVALUE ({min_value}) must be <= MAXVALUE ({max_value})"
3372            )));
3373        }
3374        let start = s
3375            .options
3376            .start
3377            .unwrap_or(if increment > 0 { min_value } else { max_value });
3378        if start < min_value || start > max_value {
3379            return Err(EngineError::Unsupported(alloc::format!(
3380                "START WITH ({start}) is outside MINVALUE..MAXVALUE ({min_value}..{max_value})"
3381            )));
3382        }
3383        let cache = s.options.cache.unwrap_or(1);
3384        if cache < 1 {
3385            return Err(EngineError::Unsupported("CACHE must be >= 1".into()));
3386        }
3387        let cycle = s.options.cycle.unwrap_or(false);
3388        let owned_by = match s.options.owned_by {
3389            None | Some(spg_sql::ast::SequenceOwnedBy::None) => None,
3390            Some(spg_sql::ast::SequenceOwnedBy::Column { table, column }) => Some((table, column)),
3391        };
3392        let def = SequenceDef {
3393            name: s.name.clone(),
3394            data_type: dt,
3395            start,
3396            increment,
3397            min_value,
3398            max_value,
3399            cache,
3400            cycle,
3401            owned_by,
3402            last_value: start,
3403            is_called: false,
3404        };
3405        self.active_catalog_mut()
3406            .create_sequence(def, s.if_not_exists)
3407            .map_err(EngineError::Storage)?;
3408        Ok(QueryResult::CommandOk {
3409            affected: 0,
3410            modified_catalog: !self.in_transaction(),
3411        })
3412    }
3413
3414    /// v7.17.0 — `ALTER SEQUENCE` engine path. Re-uses the catalog
3415    /// `alter_sequence` merge helper.
3416    fn exec_alter_sequence(
3417        &mut self,
3418        s: spg_sql::ast::AlterSequenceStatement,
3419    ) -> Result<QueryResult, EngineError> {
3420        use spg_sql::ast::SeqBound;
3421        // v7.29 (round-23a) - implicit serial sequences materialise
3422        // on first address, ALTER SEQUENCE included.
3423        self.ensure_implicit_sequence(&s.name);
3424        let cat = self.active_catalog_mut();
3425        if !cat.sequences().contains_key(&s.name) {
3426            if s.if_exists {
3427                return Ok(QueryResult::CommandOk {
3428                    affected: 0,
3429                    modified_catalog: false,
3430                });
3431            }
3432            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3433                alloc::format!("sequence {:?} does not exist", s.name),
3434            )));
3435        }
3436        let min_value = match s.options.min_value {
3437            None => None,
3438            Some(SeqBound::NoBound) => None, // NO MINVALUE → keep current
3439            Some(SeqBound::Value(n)) => Some(n),
3440        };
3441        let max_value = match s.options.max_value {
3442            None => None,
3443            Some(SeqBound::NoBound) => None,
3444            Some(SeqBound::Value(n)) => Some(n),
3445        };
3446        let owned_by = s.options.owned_by.map(|ob| match ob {
3447            spg_sql::ast::SequenceOwnedBy::None => None,
3448            spg_sql::ast::SequenceOwnedBy::Column { table, column } => Some((table, column)),
3449        });
3450        cat.alter_sequence(
3451            &s.name,
3452            s.options.increment,
3453            min_value,
3454            max_value,
3455            s.options.start,
3456            s.options.restart,
3457            s.options.cache,
3458            s.options.cycle,
3459            owned_by,
3460        )
3461        .map_err(EngineError::Storage)?;
3462        Ok(QueryResult::CommandOk {
3463            affected: 0,
3464            modified_catalog: !self.in_transaction(),
3465        })
3466    }
3467
3468    /// v7.17.0 Phase 1.1 — walk a Statement tree and pre-resolve
3469    /// any sequence FunctionCall nodes inside its Expr slots.
3470    /// Delegates per-statement-kind: SELECT projection +
3471    /// WHERE, INSERT VALUES, UPDATE SET, DELETE WHERE.
3472    fn pre_resolve_sequence_calls_in_statement(
3473        &mut self,
3474        stmt: &mut Statement,
3475    ) -> Result<(), EngineError> {
3476        match stmt {
3477            Statement::Select(s) => self.pre_resolve_sequence_calls_in_select(s),
3478            Statement::Insert(s) => {
3479                for tuple in &mut s.rows {
3480                    for cell in tuple.iter_mut() {
3481                        self.resolve_sequence_calls_in_expr(cell)?;
3482                    }
3483                }
3484                Ok(())
3485            }
3486            Statement::Update(s) => {
3487                for (_col, expr) in &mut s.assignments {
3488                    self.resolve_sequence_calls_in_expr(expr)?;
3489                }
3490                if let Some(w) = &mut s.where_ {
3491                    self.resolve_sequence_calls_in_expr(w)?;
3492                }
3493                Ok(())
3494            }
3495            Statement::Delete(s) => {
3496                if let Some(w) = &mut s.where_ {
3497                    self.resolve_sequence_calls_in_expr(w)?;
3498                }
3499                Ok(())
3500            }
3501            _ => Ok(()),
3502        }
3503    }
3504
3505    fn pre_resolve_sequence_calls_in_select(
3506        &mut self,
3507        s: &mut spg_sql::ast::SelectStatement,
3508    ) -> Result<(), EngineError> {
3509        for item in &mut s.items {
3510            match item {
3511                spg_sql::ast::SelectItem::Expr { expr, .. } => {
3512                    self.resolve_sequence_calls_in_expr(expr)?;
3513                }
3514                spg_sql::ast::SelectItem::Wildcard => {}
3515            }
3516        }
3517        if let Some(w) = &mut s.where_ {
3518            self.resolve_sequence_calls_in_expr(w)?;
3519        }
3520        Ok(())
3521    }
3522
3523    /// v7.17.0 Phase 1.1 — walk an Expr tree and pre-resolve any
3524    /// `nextval(name)` / `currval(name)` / `setval(name, value[,
3525    /// is_called])` FunctionCall nodes by calling the catalog and
3526    /// replacing the node with the resulting `Expr::Literal`.
3527    /// Used by INSERT VALUES / UPDATE SET / DEFAULT eval so the
3528    /// row-eval path sees pre-computed sequence values instead of
3529    /// needing mutable catalog access mid-eval.
3530    #[allow(clippy::too_many_lines)]
3531    fn resolve_sequence_calls_in_expr(&mut self, expr: &mut Expr) -> Result<(), EngineError> {
3532        match expr {
3533            Expr::Literal(_) | Expr::Column(_) | Expr::Placeholder(_) => Ok(()),
3534            Expr::FunctionCall { name, args } => {
3535                // Descend first so nested calls — e.g.
3536                // setval('seq', currval('other')) — resolve
3537                // innermost-first.
3538                for a in args.iter_mut() {
3539                    self.resolve_sequence_calls_in_expr(a)?;
3540                }
3541                let lc = name.to_ascii_lowercase();
3542                if lc == "nextval" || lc == "currval" || lc == "setval" {
3543                    let v = self.eval_sequence_call(&lc, args)?;
3544                    *expr = Expr::Literal(value_to_literal(v));
3545                } else if lc == "pg_get_serial_sequence" && args.len() == 2 {
3546                    // v7.29 (round-23a) — resolves to the implicit
3547                    // sequence name so the pg_dump idiom
3548                    // `setval(pg_get_serial_sequence('t','c'), n)`
3549                    // works (the setval arm receives a literal).
3550                    let lit = |e: &Expr| -> Option<String> {
3551                        match e {
3552                            Expr::Literal(spg_sql::ast::Literal::String(v)) => {
3553                                let t = v.strip_prefix("public.").unwrap_or(v).trim_matches('"');
3554                                Some(t.to_string())
3555                            }
3556                            _ => None,
3557                        }
3558                    };
3559                    if let (Some(t), Some(c)) = (lit(&args[0]), lit(&args[1])) {
3560                        let is_serial = self.active_catalog().get(&t).is_some_and(|tb| {
3561                            tb.schema()
3562                                .columns
3563                                .iter()
3564                                .any(|col| col.name == c && col.auto_increment)
3565                        });
3566                        *expr = if is_serial {
3567                            Expr::Literal(spg_sql::ast::Literal::String(alloc::format!(
3568                                "public.{t}_{c}_seq"
3569                            )))
3570                        } else {
3571                            Expr::Literal(spg_sql::ast::Literal::Null)
3572                        };
3573                    }
3574                }
3575                Ok(())
3576            }
3577            Expr::Binary { lhs, rhs, .. } => {
3578                self.resolve_sequence_calls_in_expr(lhs)?;
3579                self.resolve_sequence_calls_in_expr(rhs)
3580            }
3581            Expr::Unary { expr, .. } => self.resolve_sequence_calls_in_expr(expr),
3582            Expr::Cast { expr, .. } => self.resolve_sequence_calls_in_expr(expr),
3583            Expr::IsNull { expr, .. } => self.resolve_sequence_calls_in_expr(expr),
3584            Expr::Like { expr, pattern, .. } => {
3585                self.resolve_sequence_calls_in_expr(expr)?;
3586                self.resolve_sequence_calls_in_expr(pattern)
3587            }
3588            Expr::Extract { source, .. } => self.resolve_sequence_calls_in_expr(source),
3589            Expr::Array(items) => {
3590                for it in items.iter_mut() {
3591                    self.resolve_sequence_calls_in_expr(it)?;
3592                }
3593                Ok(())
3594            }
3595            // Window / subquery / etc — sequence calls inside these
3596            // are uncommon and require separate row-eval; leave
3597            // untouched for now and rely on the eval-time error
3598            // (no sequence_resolver attached).
3599            _ => Ok(()),
3600        }
3601    }
3602
3603    /// v7.29 (mailrs round-23a) — SERIAL/BIGSERIAL columns get their
3604    /// PG-style implicit sequence `<table>_<column>_seq` ON FIRST
3605    /// ADDRESS rather than at CREATE TABLE time, so pre-7.29 data
3606    /// directories gain addressability without a storage migration.
3607    /// The sequence is born synced to the column's current MAX so
3608    /// `nextval` immediately after creation continues the series.
3609    fn ensure_implicit_sequence(&mut self, seq_name: &str) {
3610        if self.active_catalog().sequences().contains_key(seq_name) {
3611            return;
3612        }
3613        let Some(rest) = seq_name.strip_suffix("_seq") else {
3614            return;
3615        };
3616        let mut found: Option<(String, String, i64)> = None;
3617        for tname in self.active_catalog().table_names() {
3618            let Some(table) = self.active_catalog().get(&tname) else {
3619                continue;
3620            };
3621            for (i, col) in table.schema().columns.iter().enumerate() {
3622                if col.auto_increment && alloc::format!("{tname}_{}", col.name) == rest {
3623                    let next = table.next_auto_value(i).unwrap_or(1);
3624                    found = Some((tname.clone(), col.name.clone(), next - 1));
3625                    break;
3626                }
3627            }
3628            if found.is_some() {
3629                break;
3630            }
3631        }
3632        let Some((tname, cname, last)) = found else {
3633            return;
3634        };
3635        let def = spg_storage::SequenceDef {
3636            name: seq_name.to_string(),
3637            data_type: spg_storage::SequenceDataType::BigInt,
3638            start: 1,
3639            increment: 1,
3640            min_value: 1,
3641            max_value: i64::MAX,
3642            cache: 1,
3643            cycle: false,
3644            owned_by: Some((tname, cname)),
3645            last_value: last.max(0),
3646            is_called: last > 0,
3647        };
3648        let _ = self.active_catalog_mut().create_sequence(def, true);
3649    }
3650
3651    /// v7.17.0 Phase 1.1 — evaluate a single nextval/currval/
3652    /// setval call. `args` are already pre-resolved Expr nodes
3653    /// (literals) — we extract their constant values.
3654    fn eval_sequence_call(&mut self, op: &str, args: &[Expr]) -> Result<Value, EngineError> {
3655        if args.is_empty() {
3656            return Err(EngineError::Unsupported(alloc::format!(
3657                "{op}() takes at least one argument"
3658            )));
3659        }
3660        let seq_name = match &args[0] {
3661            Expr::Literal(spg_sql::ast::Literal::String(s)) => {
3662                // v7.17 dump-compat — pg_dump emits sequence
3663                // names schema-qualified (`'public.posts_id_seq'`).
3664                // SPG is single-schema; strip a leading
3665                // `public.` / `pg_catalog.` so the catalog lookup
3666                // matches the bare-name CREATE SEQUENCE used.
3667                let trimmed = s
3668                    .strip_prefix("public.")
3669                    .or_else(|| s.strip_prefix("pg_catalog."))
3670                    .unwrap_or(s);
3671                trimmed.to_string()
3672            }
3673            // v7.17 dump-compat — pg_dump also emits
3674            // `nextval('public.posts_id_seq'::regclass)`
3675            // where the cast wraps the literal. Peel the cast
3676            // and continue.
3677            Expr::Cast { expr, .. } => {
3678                if let Expr::Literal(spg_sql::ast::Literal::String(s)) = expr.as_ref() {
3679                    let trimmed = s
3680                        .strip_prefix("public.")
3681                        .or_else(|| s.strip_prefix("pg_catalog."))
3682                        .unwrap_or(s);
3683                    trimmed.to_string()
3684                } else {
3685                    return Err(EngineError::Unsupported(alloc::format!(
3686                        "{op}() first argument must be a literal sequence name"
3687                    )));
3688                }
3689            }
3690            other => {
3691                return Err(EngineError::Unsupported(alloc::format!(
3692                    "{op}() first argument must be a literal sequence name, got {other:?}"
3693                )));
3694            }
3695        };
3696        self.ensure_implicit_sequence(&seq_name);
3697        match op {
3698            "nextval" => {
3699                let v = self
3700                    .active_catalog_mut()
3701                    .sequence_next_value(&seq_name)
3702                    .map_err(EngineError::Storage)?;
3703                Ok(Value::BigInt(v))
3704            }
3705            "currval" => {
3706                let v = self
3707                    .active_catalog()
3708                    .sequence_current_value(&seq_name)
3709                    .map_err(EngineError::Storage)?;
3710                Ok(Value::BigInt(v))
3711            }
3712            "setval" => {
3713                if args.len() < 2 || args.len() > 3 {
3714                    return Err(EngineError::Unsupported(alloc::format!(
3715                        "setval() takes 2 or 3 arguments, got {}",
3716                        args.len()
3717                    )));
3718                }
3719                let value = match &args[1] {
3720                    Expr::Literal(spg_sql::ast::Literal::Integer(n)) => *n,
3721                    other => {
3722                        return Err(EngineError::Unsupported(alloc::format!(
3723                            "setval() value argument must be a literal integer, got {other:?}"
3724                        )));
3725                    }
3726                };
3727                let is_called = if args.len() == 3 {
3728                    match &args[2] {
3729                        Expr::Literal(spg_sql::ast::Literal::Bool(b)) => *b,
3730                        other => {
3731                            return Err(EngineError::Unsupported(alloc::format!(
3732                                "setval() is_called argument must be a literal BOOL, got {other:?}"
3733                            )));
3734                        }
3735                    }
3736                } else {
3737                    true
3738                };
3739                let v = self
3740                    .active_catalog_mut()
3741                    .sequence_set_value(&seq_name, value, is_called)
3742                    .map_err(EngineError::Storage)?;
3743                Ok(Value::BigInt(v))
3744            }
3745            other => Err(EngineError::Unsupported(alloc::format!(
3746                "unknown sequence op {other:?}"
3747            ))),
3748        }
3749    }
3750
3751    /// v7.17.0 Phase 1.2 — find every catalog VIEW referenced in
3752    /// the SELECT's FROM / JOIN graph, re-parse each view's body
3753    /// source, and prepend it as a synthetic CTE on the
3754    /// returned SelectStatement. Returns `None` when no view
3755    /// references are found (caller proceeds with the original
3756    /// statement); returns `Some(rewritten)` otherwise (caller
3757    /// re-runs exec_select_cancel on the rewritten form so the
3758    /// regular CTE materialiser handles it).
3759    fn expand_views_in_select(
3760        &self,
3761        stmt: &SelectStatement,
3762    ) -> Result<Option<SelectStatement>, EngineError> {
3763        let cat = self.active_catalog();
3764        let mut referenced: Vec<String> = Vec::new();
3765        if let Some(from) = &stmt.from {
3766            collect_view_refs(&from.primary, cat, &mut referenced);
3767            for j in &from.joins {
3768                collect_view_refs(&j.table, cat, &mut referenced);
3769            }
3770        }
3771        // Don't expand a view name that's already shadowed by a
3772        // CTE on the same SELECT — the CTE wins per PG.
3773        referenced.retain(|n| !stmt.ctes.iter().any(|c| c.name == *n));
3774        if referenced.is_empty() {
3775            return Ok(None);
3776        }
3777        let mut new_ctes: Vec<spg_sql::ast::Cte> = Vec::with_capacity(referenced.len());
3778        for name in &referenced {
3779            let view = cat.views().get(name).ok_or_else(|| {
3780                EngineError::Storage(spg_storage::StorageError::Corrupt(alloc::format!(
3781                    "view {name:?} disappeared mid-expansion"
3782                )))
3783            })?;
3784            let parsed = spg_sql::parser::parse_statement(&view.body).map_err(|e| {
3785                EngineError::Unsupported(alloc::format!("view {name:?} body re-parse failed: {e}"))
3786            })?;
3787            let Statement::Select(body) = parsed else {
3788                return Err(EngineError::Unsupported(alloc::format!(
3789                    "view {name:?} body is not a SELECT (catalog corruption)"
3790                )));
3791            };
3792            new_ctes.push(spg_sql::ast::Cte {
3793                name: name.clone(),
3794                body,
3795                recursive: false,
3796                column_overrides: view.columns.clone(),
3797            });
3798        }
3799        let mut out = stmt.clone();
3800        // Prepend so view CTEs are visible to caller-supplied CTEs.
3801        new_ctes.extend(out.ctes);
3802        out.ctes = new_ctes;
3803        Ok(Some(out))
3804    }
3805
3806    /// v7.17.0 Phase 1.2 — `CREATE VIEW` engine path. Stores the
3807    /// Display-rendered body verbatim in the catalog; SELECT-from-
3808    /// view at exec time re-parses + prepends as a synthetic CTE.
3809    fn exec_create_view(
3810        &mut self,
3811        s: spg_sql::ast::CreateViewStatement,
3812    ) -> Result<QueryResult, EngineError> {
3813        // Render the SELECT body to canonical form so the catalog
3814        // round-trips a deterministic source (no whitespace /
3815        // comment surprises in the on-disk snapshot).
3816        let body_repr = alloc::format!("{}", spg_sql::ast::Statement::Select(s.body));
3817        let def = spg_storage::ViewDef {
3818            name: s.name.clone(),
3819            columns: s.columns,
3820            body: body_repr,
3821        };
3822        self.active_catalog_mut()
3823            .create_view(def, s.or_replace, s.if_not_exists)
3824            .map_err(EngineError::Storage)?;
3825        Ok(QueryResult::CommandOk {
3826            affected: 0,
3827            modified_catalog: !self.in_transaction(),
3828        })
3829    }
3830
3831    /// v7.17.0 Phase 1.4 — `CREATE TYPE name AS ENUM (…)` engine
3832    /// path. Registers the enum in the catalog with order-
3833    /// preserving labels. PG semantics: CREATE TYPE errors if the
3834    /// name is taken (no IF NOT EXISTS).
3835    fn exec_create_type(
3836        &mut self,
3837        s: spg_sql::ast::CreateTypeStatement,
3838    ) -> Result<QueryResult, EngineError> {
3839        // Name-collision check against tables / sequences / views /
3840        // materialized views.
3841        let cat = self.active_catalog();
3842        if cat.get(&s.name).is_some() {
3843            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3844                alloc::format!("type {:?} would shadow an existing table", s.name),
3845            )));
3846        }
3847        if cat.sequences().contains_key(&s.name) {
3848            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3849                alloc::format!("type {:?} would shadow an existing sequence", s.name),
3850            )));
3851        }
3852        if cat.views().contains_key(&s.name) {
3853            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3854                alloc::format!("type {:?} would shadow an existing view", s.name),
3855            )));
3856        }
3857        let def = match s.kind {
3858            spg_sql::ast::TypeKind::Enum { labels } => {
3859                if labels.is_empty() {
3860                    return Err(EngineError::Unsupported(
3861                        "CREATE TYPE … AS ENUM requires at least one label".into(),
3862                    ));
3863                }
3864                // Reject duplicate labels per PG.
3865                for i in 0..labels.len() {
3866                    for j in (i + 1)..labels.len() {
3867                        if labels[i] == labels[j] {
3868                            return Err(EngineError::Unsupported(alloc::format!(
3869                                "CREATE TYPE {:?}: duplicate ENUM label {:?}",
3870                                s.name,
3871                                labels[i]
3872                            )));
3873                        }
3874                    }
3875                }
3876                spg_storage::EnumDef {
3877                    name: s.name.clone(),
3878                    labels,
3879                }
3880            }
3881        };
3882        self.active_catalog_mut()
3883            .create_enum_type(def)
3884            .map_err(EngineError::Storage)?;
3885        Ok(QueryResult::CommandOk {
3886            affected: 0,
3887            modified_catalog: !self.in_transaction(),
3888        })
3889    }
3890
3891    /// v7.17.0 Phase 1.5 — `CREATE DOMAIN name AS base [DEFAULT
3892    /// expr] [NOT NULL] [CHECK (expr)]*` engine path. Stores the
3893    /// base type + Display-rendered CHECK / DEFAULT sources so
3894    /// INSERT/UPDATE on bound columns can re-eval the checks.
3895    fn exec_create_domain(
3896        &mut self,
3897        s: spg_sql::ast::CreateDomainStatement,
3898    ) -> Result<QueryResult, EngineError> {
3899        let cat = self.active_catalog();
3900        if cat.domain_types().contains_key(&s.name) {
3901            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3902                alloc::format!("domain {:?} already exists", s.name),
3903            )));
3904        }
3905        if cat.get(&s.name).is_some()
3906            || cat.sequences().contains_key(&s.name)
3907            || cat.views().contains_key(&s.name)
3908            || cat.enum_types().contains_key(&s.name)
3909        {
3910            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3911                alloc::format!("domain {:?} would shadow an existing object", s.name),
3912            )));
3913        }
3914        let base_type = column_type_to_data_type(s.base_type);
3915        let default = s.default.as_ref().map(|e| alloc::format!("{e}"));
3916        let checks = s
3917            .checks
3918            .iter()
3919            .map(|e| alloc::format!("{e}"))
3920            .collect::<Vec<_>>();
3921        let def = spg_storage::DomainDef {
3922            name: s.name.clone(),
3923            base_type,
3924            nullable: !s.not_null,
3925            default,
3926            checks,
3927        };
3928        self.active_catalog_mut()
3929            .create_domain_type(def)
3930            .map_err(EngineError::Storage)?;
3931        Ok(QueryResult::CommandOk {
3932            affected: 0,
3933            modified_catalog: !self.in_transaction(),
3934        })
3935    }
3936
3937    /// v7.17.0 Phase 1.5 — `DROP DOMAIN [IF EXISTS] names`.
3938    fn exec_drop_domain(
3939        &mut self,
3940        names: &[String],
3941        if_exists: bool,
3942    ) -> Result<QueryResult, EngineError> {
3943        let mut removed = 0usize;
3944        for name in names {
3945            let was_present = self.active_catalog_mut().drop_domain_type(name);
3946            if was_present {
3947                removed += 1;
3948            } else if !if_exists {
3949                return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3950                    alloc::format!("domain {name:?} does not exist"),
3951                )));
3952            }
3953        }
3954        Ok(QueryResult::CommandOk {
3955            affected: removed,
3956            modified_catalog: removed > 0 && !self.in_transaction(),
3957        })
3958    }
3959
3960    /// v7.17.0 Phase 1.6 — `CREATE SCHEMA [IF NOT EXISTS] name`.
3961    /// Registers the schema in the catalog. Schema-qualified
3962    /// table references continue to strip the prefix at lookup
3963    /// time (prefix routing, not isolation — see project-next-
3964    /// docket for the v7.18+ real-isolation tracking).
3965    fn exec_create_schema(
3966        &mut self,
3967        name: String,
3968        if_not_exists: bool,
3969    ) -> Result<QueryResult, EngineError> {
3970        self.active_catalog_mut()
3971            .create_schema(name, if_not_exists)
3972            .map_err(EngineError::Storage)?;
3973        Ok(QueryResult::CommandOk {
3974            affected: 0,
3975            modified_catalog: !self.in_transaction(),
3976        })
3977    }
3978
3979    /// v7.17.0 Phase 1.6 — `DROP SCHEMA [IF EXISTS] names`.
3980    /// Built-in schemas always reject the drop with a clear
3981    /// error.
3982    fn exec_drop_schema(
3983        &mut self,
3984        names: &[String],
3985        if_exists: bool,
3986    ) -> Result<QueryResult, EngineError> {
3987        let mut removed = 0usize;
3988        for name in names {
3989            let was_present = self
3990                .active_catalog_mut()
3991                .drop_schema(name)
3992                .map_err(EngineError::Storage)?;
3993            if was_present {
3994                removed += 1;
3995            } else if !if_exists {
3996                return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
3997                    alloc::format!("schema {name:?} does not exist"),
3998                )));
3999            }
4000        }
4001        Ok(QueryResult::CommandOk {
4002            affected: removed,
4003            modified_catalog: removed > 0 && !self.in_transaction(),
4004        })
4005    }
4006
4007    /// v7.17.0 Phase 1.4 — `DROP TYPE [IF EXISTS] names`. Only
4008    /// ENUM types are catalogued today; other types silently
4009    /// no-op even outside IF EXISTS to mirror the prior
4010    /// "everything's text" lax stance.
4011    fn exec_drop_type(
4012        &mut self,
4013        names: &[String],
4014        if_exists: bool,
4015    ) -> Result<QueryResult, EngineError> {
4016        let mut removed = 0usize;
4017        for name in names {
4018            let was_present = self.active_catalog_mut().drop_enum_type(name);
4019            if was_present {
4020                removed += 1;
4021            } else if !if_exists {
4022                return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
4023                    alloc::format!("type {name:?} does not exist"),
4024                )));
4025            }
4026        }
4027        Ok(QueryResult::CommandOk {
4028            affected: removed,
4029            modified_catalog: removed > 0 && !self.in_transaction(),
4030        })
4031    }
4032
4033    /// v7.17.0 Phase 1.3 — `CREATE MATERIALIZED VIEW` engine path.
4034    /// Materialises the body at CREATE time (unless WITH NO DATA),
4035    /// stores the result as a regular `Table`, and registers the
4036    /// body source in the catalog so REFRESH can re-run it.
4037    fn exec_create_materialized_view(
4038        &mut self,
4039        s: spg_sql::ast::CreateMaterializedViewStatement,
4040    ) -> Result<QueryResult, EngineError> {
4041        // Name-collision check (table / view / sequence / mat-view).
4042        let cat = self.active_catalog();
4043        if cat.materialized_views().contains_key(&s.name) || cat.get(&s.name).is_some() {
4044            if s.if_not_exists {
4045                return Ok(QueryResult::CommandOk {
4046                    affected: 0,
4047                    modified_catalog: false,
4048                });
4049            }
4050            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
4051                alloc::format!("materialized view {:?} already exists", s.name),
4052            )));
4053        }
4054        if cat.views().contains_key(&s.name) {
4055            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
4056                alloc::format!(
4057                    "materialized view {:?} would shadow an existing view",
4058                    s.name
4059                ),
4060            )));
4061        }
4062        if cat.sequences().contains_key(&s.name) {
4063            return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
4064                alloc::format!(
4065                    "materialized view {:?} would shadow an existing sequence",
4066                    s.name
4067                ),
4068            )));
4069        }
4070        // Render the body to canonical form for the registry.
4071        let body_repr = alloc::format!("{}", spg_sql::ast::Statement::Select(s.body.clone()));
4072        // Execute the body to learn the columns. With WITH DATA we
4073        // also materialise the rows; with WITH NO DATA we only need
4074        // the schema, so re-use a LIMIT 0 wrap to keep the column
4075        // inference path uniform without paying for the rows.
4076        let result = self.exec_select_cancel(&s.body, CancelToken::none())?;
4077        let (mut cols, rows) = match result {
4078            QueryResult::Rows { columns, rows } => (columns, rows),
4079            other => {
4080                return Err(EngineError::Unsupported(alloc::format!(
4081                    "CREATE MATERIALIZED VIEW body did not return rows: {other:?}"
4082                )));
4083            }
4084        };
4085        // Apply the column-rename list per PG semantics.
4086        if !s.columns.is_empty() {
4087            if s.columns.len() != cols.len() {
4088                return Err(EngineError::Unsupported(alloc::format!(
4089                    "CREATE MATERIALIZED VIEW {:?}: column list has {} names but body returns {}",
4090                    s.name,
4091                    s.columns.len(),
4092                    cols.len()
4093                )));
4094            }
4095            for (c, name) in cols.iter_mut().zip(s.columns.iter()) {
4096                c.name.clone_from(name);
4097            }
4098        }
4099        // Promote any synthetic-Text projections to their actual
4100        // observed types so the backing table accepts the rows.
4101        cols = infer_column_types(&cols, &rows);
4102        let schema = spg_storage::TableSchema::new(s.name.clone(), cols);
4103        let cat = self.active_catalog_mut();
4104        cat.create_table(schema).map_err(EngineError::Storage)?;
4105        if s.with_data {
4106            let table = cat
4107                .get_mut(&s.name)
4108                .expect("just-created materialized-view backing table must exist");
4109            for row in rows {
4110                table.insert(row).map_err(EngineError::Storage)?;
4111            }
4112        }
4113        cat.register_materialized_view(s.name.clone(), body_repr);
4114        Ok(QueryResult::CommandOk {
4115            affected: 0,
4116            modified_catalog: !self.in_transaction(),
4117        })
4118    }
4119
4120    /// v7.17.0 Phase 1.3 — `REFRESH MATERIALIZED VIEW name [WITH
4121    /// [NO] DATA]`. Looks up the source, re-runs it, replaces the
4122    /// backing table's rows.
4123    fn exec_refresh_materialized_view(
4124        &mut self,
4125        name: &str,
4126        with_data: bool,
4127    ) -> Result<QueryResult, EngineError> {
4128        let source = self
4129            .active_catalog()
4130            .materialized_views()
4131            .get(name)
4132            .cloned()
4133            .ok_or_else(|| {
4134                EngineError::Storage(spg_storage::StorageError::Corrupt(alloc::format!(
4135                    "materialized view {name:?} does not exist"
4136                )))
4137            })?;
4138        // Wipe the existing rows first (PG truncates the matview
4139        // and rebuilds; we approximate with an empty INSERT loop).
4140        {
4141            let cat = self.active_catalog_mut();
4142            let table = cat.get_mut(name).ok_or_else(|| {
4143                EngineError::Storage(spg_storage::StorageError::Corrupt(alloc::format!(
4144                    "materialized view {name:?} backing table missing"
4145                )))
4146            })?;
4147            table.truncate();
4148        }
4149        if !with_data {
4150            return Ok(QueryResult::CommandOk {
4151                affected: 0,
4152                modified_catalog: !self.in_transaction(),
4153            });
4154        }
4155        let parsed = spg_sql::parser::parse_statement(&source).map_err(|e| {
4156            EngineError::Unsupported(alloc::format!(
4157                "materialized view {name:?} body re-parse failed: {e}"
4158            ))
4159        })?;
4160        let Statement::Select(body) = parsed else {
4161            return Err(EngineError::Unsupported(alloc::format!(
4162                "materialized view {name:?} body is not a SELECT (catalog corruption)"
4163            )));
4164        };
4165        let rows = match self.exec_select_cancel(&body, CancelToken::none())? {
4166            QueryResult::Rows { rows, .. } => rows,
4167            other => {
4168                return Err(EngineError::Unsupported(alloc::format!(
4169                    "REFRESH MATERIALIZED VIEW {name:?} body did not return rows: {other:?}"
4170                )));
4171            }
4172        };
4173        let cat = self.active_catalog_mut();
4174        let table = cat.get_mut(name).expect("backing table verified above");
4175        let affected = rows.len();
4176        for row in rows {
4177            table.insert(row).map_err(EngineError::Storage)?;
4178        }
4179        Ok(QueryResult::CommandOk {
4180            affected,
4181            modified_catalog: !self.in_transaction(),
4182        })
4183    }
4184
4185    /// v7.17.0 Phase 1.3 — `DROP MATERIALIZED VIEW [IF EXISTS]
4186    /// names`. Drops the backing table + unregisters the source.
4187    fn exec_drop_materialized_view(
4188        &mut self,
4189        names: &[String],
4190        if_exists: bool,
4191    ) -> Result<QueryResult, EngineError> {
4192        let mut removed = 0usize;
4193        for name in names {
4194            let was_present = self
4195                .active_catalog_mut()
4196                .drop_materialized_view_source(name);
4197            if was_present {
4198                // Drop the backing table too.
4199                self.active_catalog_mut().drop_table(name);
4200                removed += 1;
4201            } else if !if_exists {
4202                return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
4203                    alloc::format!("materialized view {name:?} does not exist"),
4204                )));
4205            }
4206        }
4207        Ok(QueryResult::CommandOk {
4208            affected: removed,
4209            modified_catalog: removed > 0 && !self.in_transaction(),
4210        })
4211    }
4212
4213    /// v7.17.0 Phase 1.2 — `DROP VIEW [IF EXISTS] name [, name…]`.
4214    fn exec_drop_view(
4215        &mut self,
4216        names: &[String],
4217        if_exists: bool,
4218    ) -> Result<QueryResult, EngineError> {
4219        let mut removed = 0usize;
4220        for name in names {
4221            let was_present = self.active_catalog_mut().drop_view(name);
4222            if !was_present && !if_exists {
4223                return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
4224                    alloc::format!("view {name:?} does not exist"),
4225                )));
4226            }
4227            if was_present {
4228                removed += 1;
4229            }
4230        }
4231        Ok(QueryResult::CommandOk {
4232            affected: removed,
4233            modified_catalog: removed > 0 && !self.in_transaction(),
4234        })
4235    }
4236
4237    /// v7.17.0 — `DROP SEQUENCE [IF EXISTS] name [, name…]`.
4238    fn exec_drop_sequence(
4239        &mut self,
4240        names: &[String],
4241        if_exists: bool,
4242    ) -> Result<QueryResult, EngineError> {
4243        let mut removed = 0usize;
4244        for name in names {
4245            let was_present = self.active_catalog_mut().drop_sequence(name);
4246            if !was_present && !if_exists {
4247                return Err(EngineError::Storage(spg_storage::StorageError::Corrupt(
4248                    alloc::format!("sequence {name:?} does not exist"),
4249                )));
4250            }
4251            if was_present {
4252                removed += 1;
4253            }
4254        }
4255        Ok(QueryResult::CommandOk {
4256            affected: removed,
4257            modified_catalog: removed > 0 && !self.in_transaction(),
4258        })
4259    }
4260
4261    /// v4.4 `UPDATE <table> SET col = expr [, ...] [WHERE cond]`.
4262    /// Filter pass uses the same WHERE eval as `exec_select`. Per
4263    /// matched row, evaluate each RHS expression against the *old*
4264    /// row, then call `Table::update_row` which rebuilds indices.
4265    /// Indexed columns are correctly reflected because rebuild
4266    /// happens after the cell rewrite.
4267    fn exec_update_cancel(
4268        &mut self,
4269        stmt: &spg_sql::ast::UpdateStatement,
4270        cancel: CancelToken<'_>,
4271    ) -> Result<QueryResult, EngineError> {
4272        // v7.12.5 — snapshot BEFORE/AFTER UPDATE row triggers + the
4273        // session FTS config before the table mut-borrow opens (the
4274        // INSERT path uses the same pattern). Empty vecs are the
4275        // common "no triggers on this table" fast path.
4276        // v7.13.0 — UPDATE triggers carry an optional `UPDATE OF
4277        // cols` filter. The filter is paired with each function so
4278        // the per-row fire loop can skip when no listed column
4279        // actually differs between OLD and NEW.
4280        let before_update_triggers = self.snapshot_update_row_triggers(&stmt.table, "BEFORE");
4281        let after_update_triggers = self.snapshot_update_row_triggers(&stmt.table, "AFTER");
4282        let trigger_session_cfg: Option<String> = self
4283            .session_params
4284            .get("default_text_search_config")
4285            .cloned();
4286        // v5.2.3: if the WHERE is a PK equality and matches a cold-
4287        // tier row, promote it back to the hot tier *before* the
4288        // hot-row walk. The promote pushes the row to the end of
4289        // `table.rows`, where the upcoming SET-evaluation loop will
4290        // pick it up and apply the assignments. Lookups for the key
4291        // never observe a gap because `promote_cold_row` inserts the
4292        // hot row before retiring the cold locator.
4293        if let Some(w) = &stmt.where_ {
4294            let schema_cols = self
4295                .active_catalog()
4296                .get(&stmt.table)
4297                .ok_or_else(|| {
4298                    EngineError::Storage(StorageError::TableNotFound {
4299                        name: stmt.table.clone(),
4300                    })
4301                })?
4302                .schema()
4303                .columns
4304                .clone();
4305            if let Some((col_pos, key)) = try_pk_predicate(w, &schema_cols, stmt.table.as_str())
4306                && let Some(idx_name) = self
4307                    .active_catalog()
4308                    .get(&stmt.table)
4309                    .and_then(|t| t.index_on(col_pos).map(|i| i.name.clone()))
4310            {
4311                // Promote may be a no-op (key is hot-only or absent);
4312                // we don't care about the return value here — the
4313                // subsequent hot walk will either match or not.
4314                let _ = self
4315                    .active_catalog_mut()
4316                    .promote_cold_row(&stmt.table, &idx_name, &key);
4317            }
4318        }
4319
4320        // v7.12.1 — cache session FTS config before the table
4321        // mut-borrow (same reason as exec_delete).
4322        let ts_cfg: Option<String> = self
4323            .session_param("default_text_search_config")
4324            .map(String::from);
4325        // v7.17.0 Phase 2.1 — snapshot the clock pointer before
4326        // we hold the catalog mutably so ON UPDATE runtime
4327        // overrides see the engine wall clock.
4328        let clock_for_on_update = self.clock;
4329        let table = self
4330            .active_catalog_mut()
4331            .get_mut(&stmt.table)
4332            .ok_or_else(|| {
4333                EngineError::Storage(StorageError::TableNotFound {
4334                    name: stmt.table.clone(),
4335                })
4336            })?;
4337        let schema_cols: Vec<ColumnSchema> = table.schema().columns.clone();
4338        // Resolve each SET target to a column position once, validate
4339        // up front so a typo'd column doesn't leave a partial mutation
4340        // behind.
4341        let mut targets: Vec<(usize, &Expr)> = Vec::with_capacity(stmt.assignments.len());
4342        for (col, expr) in &stmt.assignments {
4343            let pos = schema_cols
4344                .iter()
4345                .position(|c| c.name == *col)
4346                .ok_or_else(|| {
4347                    EngineError::Eval(EvalError::ColumnNotFound { name: col.clone() })
4348                })?;
4349            targets.push((pos, expr));
4350        }
4351        // v7.17.0 Phase 2.1 — for every column with an
4352        // `ON UPDATE CURRENT_TIMESTAMP` binding that the caller
4353        // did NOT explicitly set, schedule an automatic override.
4354        // Reuses `eval_runtime_default_free` so the same
4355        // canonical runtime-expression whitelist (now /
4356        // current_timestamp / current_date / …) governs both
4357        // DEFAULT and ON UPDATE.
4358        let mut on_update_overrides: Vec<(usize, String)> = Vec::new();
4359        for (i, col) in schema_cols.iter().enumerate() {
4360            if targets.iter().any(|(p, _)| *p == i) {
4361                continue;
4362            }
4363            if let Some(src) = &col.on_update_runtime {
4364                on_update_overrides.push((i, src.clone()));
4365            }
4366        }
4367        let ctx = EvalContext::new(&schema_cols, Some(stmt.table.as_str()))
4368            .with_default_text_search_config(ts_cfg.as_deref());
4369        // Walk candidate rows, evaluate WHERE then SET
4370        // expressions. We gather (position, new_values) tuples
4371        // first and apply them afterwards so the WHERE/RHS
4372        // evaluation reads the original row state — matches PG
4373        // semantics (UPDATE doesn't see its own writes).
4374        //
4375        // v7.20 P4 — index seek: a single-column equality WHERE
4376        // on an indexed column narrows the walk from
4377        // O(table.rows()) to O(matches). The full WHERE still
4378        // re-evaluates per candidate (the seek may be an
4379        // over-approximation under AND-composites), so semantics
4380        // are unchanged. profile: the bench's `UPDATE … WHERE
4381        // id = $1` on a 5 000-row table was a ~1.3 ms full scan
4382        // per statement; with the seek it's ~2 µs.
4383        let seek_positions: Option<Vec<usize>> = stmt
4384            .where_
4385            .as_ref()
4386            .and_then(|w| try_index_seek_positions(w, &schema_cols, table, stmt.table.as_str()));
4387        let mut planned: Vec<(usize, Vec<Value>)> = Vec::new();
4388        let candidate_positions: Vec<usize> = match &seek_positions {
4389            Some(list) => list.clone(),
4390            None => (0..table.row_count()).collect(),
4391        };
4392        for (loop_n, &i) in candidate_positions.iter().enumerate() {
4393            // v4.5: cooperative cancel checkpoint every 256 rows so
4394            // a runaway UPDATE without WHERE doesn't drag past the
4395            // server's query-timeout watchdog.
4396            if loop_n.is_multiple_of(256) {
4397                cancel.check()?;
4398            }
4399            let Some(row) = table.rows().get(i) else {
4400                continue;
4401            };
4402            if let Some(w) = &stmt.where_ {
4403                let cond = eval::eval_expr(w, row, &ctx)?;
4404                if !matches!(cond, Value::Bool(true)) {
4405                    continue;
4406                }
4407            }
4408            let mut new_vals = row.values.clone();
4409            for (pos, expr) in &targets {
4410                let v = eval::eval_expr(expr, row, &ctx)?;
4411                let coerced = coerce_value(v, schema_cols[*pos].ty, &schema_cols[*pos].name, *pos)?;
4412                check_unsigned_range(&coerced, &schema_cols[*pos], *pos)?;
4413                new_vals[*pos] = coerced;
4414            }
4415            // v7.17.0 Phase 2.1 — apply ON UPDATE overrides for
4416            // any column the SET clause didn't touch.
4417            for (pos, src) in &on_update_overrides {
4418                let v = eval_runtime_default_free(src, schema_cols[*pos].ty, clock_for_on_update)?;
4419                new_vals[*pos] = v;
4420            }
4421            planned.push((i, new_vals));
4422        }
4423        // planned must stay position-sorted: downstream passes
4424        // (FK pairing, trigger walks, the apply loop) iterate it
4425        // assuming ascending row order, which the full-scan path
4426        // guaranteed implicitly.
4427        planned.sort_by_key(|(i, _)| *i);
4428        // v7.6.6 — capture pre-update row values for the FK
4429        // enforcement passes below. `planned` carries new values
4430        // only; pair them with the old row.
4431        let plan_with_old: Vec<(usize, Vec<Value>, Vec<Value>)> = planned
4432            .iter()
4433            .map(|(pos, new_vals)| (*pos, table.rows()[*pos].values.clone(), new_vals.clone()))
4434            .collect();
4435        let self_fks = table.schema().foreign_keys.clone();
4436        // v7.12.5 — `affected` is computed post-BEFORE-trigger
4437        // below (triggers may RETURN NULL to skip individual
4438        // rows). The pre-trigger len shape is no longer accurate.
4439        // Release mutable borrow on `table` for the FK passes.
4440        let _ = table;
4441        // v7.6.6 — Stage 2a: outbound FK check. For every row whose
4442        // local FK columns changed, the new value must exist in the
4443        // parent.
4444        if !self_fks.is_empty() {
4445            let new_rows: Vec<Vec<Value>> = planned
4446                .iter()
4447                .map(|(_pos, new_vals)| new_vals.clone())
4448                .collect();
4449            enforce_fk_inserts(self.active_catalog(), &stmt.table, &self_fks, &new_rows)?;
4450        }
4451        // v7.13.0 — CHECK constraint enforcement on UPDATE
4452        // (mailrs round-5 G3). Predicates evaluated against the
4453        // candidate post-UPDATE row; false rejects the UPDATE.
4454        {
4455            let new_rows: Vec<Vec<Value>> = planned
4456                .iter()
4457                .map(|(_pos, new_vals)| new_vals.clone())
4458                .collect();
4459            enforce_check_constraints(self.active_catalog(), &stmt.table, &new_rows)?;
4460        }
4461        // v7.6.6 — Stage 2b: inbound FK check. For every row that
4462        // changed value in a column that *some other table* uses as
4463        // a FK parent column, react per `on_update` action.
4464        let child_plan =
4465            plan_fk_parent_updates(self.active_catalog(), &stmt.table, &plan_with_old)?;
4466        // Stage 3a — apply each child-side action.
4467        for step in &child_plan {
4468            apply_fk_child_step(self.active_catalog_mut(), step)?;
4469        }
4470        // Stage 3b — apply the original UPDATE.
4471        let table = self
4472            .active_catalog_mut()
4473            .get_mut(&stmt.table)
4474            .ok_or_else(|| {
4475                EngineError::Storage(StorageError::TableNotFound {
4476                    name: stmt.table.clone(),
4477                })
4478            })?;
4479        // v7.12.5 — fire BEFORE/AFTER UPDATE row-level triggers
4480        // around the apply loop. BEFORE sees NEW=candidate +
4481        // OLD=current; may rewrite NEW or RETURN NULL to skip.
4482        // AFTER sees NEW=post-write + OLD=pre-write (both read-
4483        // only).
4484        //
4485        // Filter `planned` through the BEFORE pass first so the
4486        // RETURNING snapshot reflects what actually got written
4487        // (triggers may rewrite cells, including a cancellation).
4488        let mut applied_after_before: Vec<(usize, Row, Row)> = Vec::with_capacity(planned.len());
4489        // v7.12.7 — embedded SQL queue.
4490        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
4491        for (pos, new_vals) in &planned {
4492            let old_row = table.rows()[*pos].clone();
4493            let mut new_row = Row::new(new_vals.clone());
4494            let mut skip = false;
4495            for (fd, filter) in &before_update_triggers {
4496                // v7.13.0 — `UPDATE OF cols` filter (mailrs round-5
4497                // G7). Skip this trigger when the filter is set and
4498                // no listed column actually differs between OLD and
4499                // NEW for this row.
4500                if !filter.is_empty()
4501                    && !any_column_changed(filter, &schema_cols, &old_row, &new_row)
4502                {
4503                    continue;
4504                }
4505                let (outcome, deferred) = triggers::fire_row_trigger(
4506                    fd,
4507                    Some(new_row.clone()),
4508                    Some(&old_row),
4509                    &stmt.table,
4510                    &schema_cols,
4511                    &[],
4512                    trigger_session_cfg.as_deref(),
4513                    false,
4514                )
4515                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
4516                deferred_embedded.extend(deferred);
4517                match outcome {
4518                    triggers::TriggerOutcome::Row(r) => new_row = r,
4519                    triggers::TriggerOutcome::Skip => {
4520                        skip = true;
4521                        break;
4522                    }
4523                }
4524            }
4525            if !skip {
4526                applied_after_before.push((*pos, new_row, old_row));
4527            }
4528        }
4529        // v7.9.4 — snapshot post-update values for RETURNING (post-
4530        // BEFORE-trigger because triggers can rewrite cells).
4531        let updated_for_returning: Vec<Vec<Value>> = if stmt.returning.is_some() {
4532            applied_after_before
4533                .iter()
4534                .map(|(_pos, new_row, _old)| new_row.values.clone())
4535                .collect()
4536        } else {
4537            Vec::new()
4538        };
4539        let affected = applied_after_before.len();
4540        // Apply, then fire AFTER triggers per row. AFTER runs read-
4541        // only against the freshly-written row; v7.12.4-shape
4542        // assignment errors with a clear message.
4543        for (pos, new_row, old_row) in applied_after_before {
4544            table.update_row(pos, new_row.values.clone())?;
4545            for (fd, filter) in &after_update_triggers {
4546                if !filter.is_empty()
4547                    && !any_column_changed(filter, &schema_cols, &old_row, &new_row)
4548                {
4549                    continue;
4550                }
4551                let (_outcome, deferred) = triggers::fire_row_trigger(
4552                    fd,
4553                    Some(new_row.clone()),
4554                    Some(&old_row),
4555                    &stmt.table,
4556                    &schema_cols,
4557                    &[],
4558                    trigger_session_cfg.as_deref(),
4559                    true,
4560                )
4561                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
4562                deferred_embedded.extend(deferred);
4563            }
4564        }
4565        let _ = table;
4566        // v7.12.7 — drain trigger-emitted embedded SQL for this UPDATE.
4567        self.execute_deferred_trigger_stmts(deferred_embedded, cancel)?;
4568        // v6.2.1 — auto-analyze modified-row tracking for UPDATE.
4569        if !self.in_transaction() && affected > 0 {
4570            self.statistics
4571                .record_modifications(&stmt.table, affected as u64);
4572        }
4573        // v7.9.4 — RETURNING projection.
4574        if let Some(items) = &stmt.returning {
4575            return self.build_returning_rows(&stmt.table, items, updated_for_returning);
4576        }
4577        Ok(QueryResult::CommandOk {
4578            affected,
4579            modified_catalog: !self.in_transaction(),
4580        })
4581    }
4582
4583    /// v4.4 `DELETE FROM <table> [WHERE cond]`. Collects matching
4584    /// positions then delegates to `Table::delete_rows` (single index
4585    /// rebuild for the batch).
4586    /// v7.17.0 Phase 3.P0-42 — SQL:2003 / PG 15+ `MERGE` execution.
4587    ///
4588    /// Semantics:
4589    ///   * Resolve `target` and `source` tables (catalog reads).
4590    ///   * Build a combined `(target_alias.col, source_alias.col)`
4591    ///     schema so the ON / WHEN AND / SET / VALUES expressions
4592    ///     resolve through the standard qualifier-aware resolver.
4593    ///   * Pass 1: walk every source row × every target hot row,
4594    ///     evaluate ON, then pick the first WHEN clause that fits
4595    ///     (`Matched` if any target row matched, `NotMatched`
4596    ///     otherwise; AND-condition must hold). Collect the action
4597    ///     plan as `(deletes, updates, inserts)` so the apply pass
4598    ///     reads the original target row state.
4599    ///   * Pass 2: apply the plan against the target's mutable row
4600    ///     vector. Deletes execute by index in descending order so
4601    ///     earlier indices remain stable; updates next; inserts
4602    ///     last (matching PG's "INSERT branch sees the post-delete
4603    ///     state" behaviour for the common upsert shape).
4604    ///
4605    /// v7.17 simplifications (documented limitations):
4606    ///   * No triggers / WAL plumbing (MVP); MERGE rows don't fire
4607    ///     INSERT / UPDATE / DELETE row triggers in v7.17.
4608    ///   * No cardinality check (PG-canonical: "MERGE command
4609    ///     cannot affect row a second time" — SPG silently applies
4610    ///     the last action for a target row covered twice).
4611    ///   * Source must be a catalog-resolvable table (no subquery
4612    ///     source); RETURNING / BY SOURCE / BY TARGET unsupported.
4613    fn exec_merge_cancel(
4614        &mut self,
4615        stmt: &spg_sql::ast::MergeStatement,
4616        cancel: CancelToken<'_>,
4617    ) -> Result<QueryResult, EngineError> {
4618        let target_alias = stmt
4619            .target_alias
4620            .clone()
4621            .unwrap_or_else(|| stmt.target.clone());
4622        let source_alias = stmt
4623            .source_alias
4624            .clone()
4625            .unwrap_or_else(|| stmt.source.clone());
4626        let (target_cols, target_rows_snapshot) = {
4627            let t = self.active_catalog().get(&stmt.target).ok_or_else(|| {
4628                EngineError::Storage(StorageError::TableNotFound {
4629                    name: stmt.target.clone(),
4630                })
4631            })?;
4632            (
4633                t.schema().columns.clone(),
4634                t.rows().iter().cloned().collect::<Vec<Row>>(),
4635            )
4636        };
4637        let (source_cols, source_rows) = {
4638            let s = self.active_catalog().get(&stmt.source).ok_or_else(|| {
4639                EngineError::Storage(StorageError::TableNotFound {
4640                    name: stmt.source.clone(),
4641                })
4642            })?;
4643            (
4644                s.schema().columns.clone(),
4645                s.rows().iter().cloned().collect::<Vec<Row>>(),
4646            )
4647        };
4648        // Composite schema: target_alias.col ... source_alias.col ...
4649        let mut combined_schema: Vec<ColumnSchema> = Vec::new();
4650        for col in &target_cols {
4651            combined_schema.push(ColumnSchema::new(
4652                alloc::format!("{target_alias}.{}", col.name),
4653                col.ty,
4654                col.nullable,
4655            ));
4656        }
4657        for col in &source_cols {
4658            combined_schema.push(ColumnSchema::new(
4659                alloc::format!("{source_alias}.{}", col.name),
4660                col.ty,
4661                col.nullable,
4662            ));
4663        }
4664        let combined_ctx = EvalContext::new(&combined_schema, None);
4665        // Source-only context for WHEN NOT MATCHED actions (no
4666        // matched target row exists — the source-side qualified
4667        // columns must still resolve).
4668        let mut source_only_schema: Vec<ColumnSchema> = Vec::new();
4669        for col in &target_cols {
4670            source_only_schema.push(ColumnSchema::new(
4671                alloc::format!("{target_alias}.{}", col.name),
4672                col.ty,
4673                col.nullable,
4674            ));
4675        }
4676        for col in &source_cols {
4677            source_only_schema.push(ColumnSchema::new(
4678                alloc::format!("{source_alias}.{}", col.name),
4679                col.ty,
4680                col.nullable,
4681            ));
4682        }
4683        let source_only_ctx = EvalContext::new(&source_only_schema, None);
4684        let target_arity = target_cols.len();
4685        let source_arity = source_cols.len();
4686
4687        // Resolve INSERT column positions once (validate names).
4688        // For each clause that's an INSERT, map column names → target positions.
4689        let mut delete_indices: Vec<usize> = Vec::new();
4690        let mut updates: Vec<(usize, Vec<Value>)> = Vec::new();
4691        let mut inserts: Vec<Vec<Value>> = Vec::new();
4692        let mut affected: usize = 0;
4693
4694        for (src_idx, src_row) in source_rows.iter().enumerate() {
4695            if src_idx.is_multiple_of(256) {
4696                cancel.check()?;
4697            }
4698            // Find every matched target index (per the ON predicate).
4699            let mut matched_targets: Vec<usize> = Vec::new();
4700            for (t_idx, t_row) in target_rows_snapshot.iter().enumerate() {
4701                let mut combined_vals = t_row.values.clone();
4702                combined_vals.extend(src_row.values.iter().cloned());
4703                let combined_row = Row::new(combined_vals);
4704                let cond = eval::eval_expr(&stmt.on, &combined_row, &combined_ctx)?;
4705                if matches!(cond, Value::Bool(true)) {
4706                    matched_targets.push(t_idx);
4707                }
4708            }
4709            let is_matched = !matched_targets.is_empty();
4710            // Pick the first WHEN clause whose kind agrees with
4711            // `is_matched` and whose AND condition (if any) holds.
4712            // AND condition for MATCHED: evaluated against the
4713            // first matched target row × source. For NOT MATCHED:
4714            // evaluated with target side NULL-padded.
4715            let fired_clause = stmt.clauses.iter().find(|c| {
4716                let kind_ok = match c.matched {
4717                    spg_sql::ast::MergeMatched::Matched => is_matched,
4718                    spg_sql::ast::MergeMatched::NotMatched => !is_matched,
4719                };
4720                if !kind_ok {
4721                    return false;
4722                }
4723                let Some(cond_expr) = &c.condition else {
4724                    return true;
4725                };
4726                let row = if is_matched {
4727                    let t = &target_rows_snapshot[matched_targets[0]];
4728                    let mut vals = t.values.clone();
4729                    vals.extend(src_row.values.iter().cloned());
4730                    Row::new(vals)
4731                } else {
4732                    let mut vals: Vec<Value> = (0..target_arity).map(|_| Value::Null).collect();
4733                    vals.extend(src_row.values.iter().cloned());
4734                    Row::new(vals)
4735                };
4736                let ctx_ref = if is_matched {
4737                    &combined_ctx
4738                } else {
4739                    &source_only_ctx
4740                };
4741                matches!(
4742                    eval::eval_expr(cond_expr, &row, ctx_ref),
4743                    Ok(Value::Bool(true))
4744                )
4745            });
4746            let Some(clause) = fired_clause else { continue };
4747            match &clause.action {
4748                spg_sql::ast::MergeAction::DoNothing => {}
4749                spg_sql::ast::MergeAction::Delete => {
4750                    for &t_idx in &matched_targets {
4751                        if !delete_indices.contains(&t_idx) {
4752                            delete_indices.push(t_idx);
4753                            affected += 1;
4754                        }
4755                    }
4756                }
4757                spg_sql::ast::MergeAction::Update { assignments } => {
4758                    // Pre-resolve SET targets to target column positions.
4759                    let mut planned_sets: Vec<(usize, &Expr)> =
4760                        Vec::with_capacity(assignments.len());
4761                    for (col, expr) in assignments {
4762                        let pos =
4763                            target_cols
4764                                .iter()
4765                                .position(|c| c.name == *col)
4766                                .ok_or_else(|| {
4767                                    EngineError::Eval(EvalError::ColumnNotFound {
4768                                        name: col.clone(),
4769                                    })
4770                                })?;
4771                        planned_sets.push((pos, expr));
4772                    }
4773                    for &t_idx in &matched_targets {
4774                        let t_row = &target_rows_snapshot[t_idx];
4775                        let mut new_values = t_row.values.clone();
4776                        let mut combined_vals = t_row.values.clone();
4777                        combined_vals.extend(src_row.values.iter().cloned());
4778                        let combined_row = Row::new(combined_vals);
4779                        for (pos, expr) in &planned_sets {
4780                            let raw = eval::eval_expr(expr, &combined_row, &combined_ctx)?;
4781                            let coerced = coerce_value(
4782                                raw,
4783                                target_cols[*pos].ty,
4784                                &target_cols[*pos].name,
4785                                *pos,
4786                            )?;
4787                            new_values[*pos] = coerced;
4788                        }
4789                        updates.push((t_idx, new_values));
4790                        affected += 1;
4791                    }
4792                }
4793                spg_sql::ast::MergeAction::Insert { columns, values } => {
4794                    // For INSERT NOT MATCHED, target side is NULL-padded.
4795                    let mut vals: Vec<Value> = (0..target_arity).map(|_| Value::Null).collect();
4796                    vals.extend(src_row.values.iter().cloned());
4797                    let synth_row = Row::new(vals);
4798                    let mut new_row_values: Vec<Value> =
4799                        (0..target_arity).map(|_| Value::Null).collect();
4800                    for (col, expr) in columns.iter().zip(values.iter()) {
4801                        let pos =
4802                            target_cols
4803                                .iter()
4804                                .position(|c| c.name == *col)
4805                                .ok_or_else(|| {
4806                                    EngineError::Eval(EvalError::ColumnNotFound {
4807                                        name: col.clone(),
4808                                    })
4809                                })?;
4810                        let raw = eval::eval_expr(expr, &synth_row, &source_only_ctx)?;
4811                        let coerced =
4812                            coerce_value(raw, target_cols[pos].ty, &target_cols[pos].name, pos)?;
4813                        new_row_values[pos] = coerced;
4814                    }
4815                    inserts.push(new_row_values);
4816                    affected += 1;
4817                }
4818            }
4819        }
4820        let _ = source_arity; // captured for symmetry; cancellation cost negligible.
4821
4822        // Apply the plan to the target table.
4823        let table = self
4824            .active_catalog_mut()
4825            .get_mut(&stmt.target)
4826            .ok_or_else(|| {
4827                EngineError::Storage(StorageError::TableNotFound {
4828                    name: stmt.target.clone(),
4829                })
4830            })?;
4831        // Apply updates first (in-place), then deletes (one batch),
4832        // then inserts. The storage API uses `update_row(pos,
4833        // new_values)`, `delete_rows(&[positions])`, and `insert(row)`.
4834        for (idx, new_vals) in &updates {
4835            table
4836                .update_row(*idx, new_vals.clone())
4837                .map_err(EngineError::Storage)?;
4838        }
4839        if !delete_indices.is_empty() {
4840            table.delete_rows(&delete_indices);
4841        }
4842        for vals in inserts {
4843            table.insert(Row::new(vals)).map_err(EngineError::Storage)?;
4844        }
4845        Ok(QueryResult::CommandOk {
4846            affected,
4847            modified_catalog: affected > 0,
4848        })
4849    }
4850
4851    fn exec_delete_cancel(
4852        &mut self,
4853        stmt: &spg_sql::ast::DeleteStatement,
4854        cancel: CancelToken<'_>,
4855    ) -> Result<QueryResult, EngineError> {
4856        // v7.12.5 — snapshot BEFORE/AFTER DELETE row triggers + the
4857        // session FTS config before the mut borrow (same shape as
4858        // INSERT / UPDATE).
4859        let before_delete_triggers = self.snapshot_row_triggers(&stmt.table, "DELETE", "BEFORE");
4860        let after_delete_triggers = self.snapshot_row_triggers(&stmt.table, "DELETE", "AFTER");
4861        let trigger_session_cfg: Option<String> = self
4862            .session_params
4863            .get("default_text_search_config")
4864            .cloned();
4865        // v5.2.3: PK-targeted DELETE → first retire any cold-tier
4866        // locator for the key. The cold row body stays in the
4867        // segment (becoming shadowed garbage that a future
4868        // compaction pass reclaims) but the index no longer
4869        // resolves it. The shadow count contributes to the
4870        // affected total; the subsequent hot walk handles any hot
4871        // rows for the same key.
4872        let mut cold_shadow_count: usize = 0;
4873        if let Some(w) = &stmt.where_ {
4874            let schema_cols = self
4875                .active_catalog()
4876                .get(&stmt.table)
4877                .ok_or_else(|| {
4878                    EngineError::Storage(StorageError::TableNotFound {
4879                        name: stmt.table.clone(),
4880                    })
4881                })?
4882                .schema()
4883                .columns
4884                .clone();
4885            if let Some((col_pos, key)) = try_pk_predicate(w, &schema_cols, stmt.table.as_str())
4886                && let Some(idx_name) = self
4887                    .active_catalog()
4888                    .get(&stmt.table)
4889                    .and_then(|t| t.index_on(col_pos).map(|i| i.name.clone()))
4890            {
4891                cold_shadow_count = self
4892                    .active_catalog_mut()
4893                    .shadow_cold_row(&stmt.table, &idx_name, &key)
4894                    .unwrap_or(0);
4895            }
4896        }
4897
4898        // v7.12.1 — cache the session FTS config as an owned
4899        // String before the mutable table borrow below; the
4900        // ctx-builder then references it via `as_deref` so the
4901        // immutable read of `session_params` doesn't conflict
4902        // with the mut borrow chain.
4903        let ts_cfg: Option<String> = self
4904            .session_param("default_text_search_config")
4905            .map(String::from);
4906        let table = self
4907            .active_catalog_mut()
4908            .get_mut(&stmt.table)
4909            .ok_or_else(|| {
4910                EngineError::Storage(StorageError::TableNotFound {
4911                    name: stmt.table.clone(),
4912                })
4913            })?;
4914        let schema_cols: Vec<ColumnSchema> = table.schema().columns.clone();
4915        let ctx = EvalContext::new(&schema_cols, Some(stmt.table.as_str()))
4916            .with_default_text_search_config(ts_cfg.as_deref());
4917        let mut positions: Vec<usize> = Vec::new();
4918        // v7.6.3 — collect every to-delete row's full Value tuple
4919        // alongside its position, so the FK enforcement pass can
4920        // run after the mut borrow drops.
4921        let mut to_delete_rows: Vec<Vec<Value>> = Vec::new();
4922        // v7.20 P4 — index seek (same shape as exec_update_cancel):
4923        // an equality WHERE on an indexed column narrows the walk
4924        // to the matching hot positions; the full WHERE still
4925        // re-evaluates per candidate. Downstream passes assume
4926        // ascending position order, so the seek result is sorted.
4927        let seek_positions: Option<Vec<usize>> = stmt
4928            .where_
4929            .as_ref()
4930            .and_then(|w| try_index_seek_positions(w, &schema_cols, table, stmt.table.as_str()));
4931        let candidate_positions: Vec<usize> = match seek_positions {
4932            Some(mut list) => {
4933                list.sort_unstable();
4934                list
4935            }
4936            None => (0..table.row_count()).collect(),
4937        };
4938        for (loop_n, &i) in candidate_positions.iter().enumerate() {
4939            if loop_n.is_multiple_of(256) {
4940                cancel.check()?;
4941            }
4942            let Some(row) = table.rows().get(i) else {
4943                continue;
4944            };
4945            let keep = if let Some(w) = &stmt.where_ {
4946                let cond = eval::eval_expr(w, row, &ctx)?;
4947                !matches!(cond, Value::Bool(true))
4948            } else {
4949                false
4950            };
4951            if !keep {
4952                positions.push(i);
4953                to_delete_rows.push(row.values.clone());
4954            }
4955        }
4956        // v7.6.3 / v7.6.4 — Stage 2: FK enforcement on the immutable
4957        // catalog. Release the mut borrow and run reverse-scan
4958        // against every child table whose FK targets this table.
4959        // RESTRICT / NoAction raise an error; CASCADE returns a
4960        // cascade plan that stage 3 applies after the primary delete.
4961        // SET NULL / SET DEFAULT remain Unsupported until v7.6.5.
4962        let _ = table;
4963        // v7.12.5 — BEFORE DELETE row-level triggers. Each fires
4964        // with NEW=None / OLD=pre-delete row; RETURN OLD (or NEW)
4965        // = proceed, RETURN NULL = skip the row entirely. The
4966        // filter must run BEFORE the FK cascade plan so cascaded
4967        // child rows track the trigger's skip-decision on the
4968        // parent.
4969        // v7.12.7 — embedded SQL queue.
4970        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
4971        if !before_delete_triggers.is_empty() {
4972            let mut filtered_positions: Vec<usize> = Vec::with_capacity(positions.len());
4973            let mut filtered_old_rows: Vec<Vec<Value>> = Vec::with_capacity(to_delete_rows.len());
4974            for (pos, old_vals) in positions.iter().zip(to_delete_rows.iter()) {
4975                let old_row = Row::new(old_vals.clone());
4976                let mut cancel_this = false;
4977                for fd in &before_delete_triggers {
4978                    let (outcome, deferred) = triggers::fire_row_trigger(
4979                        fd,
4980                        None,
4981                        Some(&old_row),
4982                        &stmt.table,
4983                        &schema_cols,
4984                        &[],
4985                        trigger_session_cfg.as_deref(),
4986                        false,
4987                    )
4988                    .map_err(|e| {
4989                        EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}")))
4990                    })?;
4991                    deferred_embedded.extend(deferred);
4992                    if matches!(outcome, triggers::TriggerOutcome::Skip) {
4993                        cancel_this = true;
4994                        break;
4995                    }
4996                }
4997                if !cancel_this {
4998                    filtered_positions.push(*pos);
4999                    filtered_old_rows.push(old_vals.clone());
5000                }
5001            }
5002            positions = filtered_positions;
5003            to_delete_rows = filtered_old_rows;
5004        }
5005        let cascade_plan = plan_fk_parent_deletions(
5006            self.active_catalog(),
5007            &stmt.table,
5008            &positions,
5009            &to_delete_rows,
5010        )?;
5011        // Stage 3a — apply each FK child step (SET NULL / SET
5012        // DEFAULT / CASCADE delete) before deleting the parent.
5013        // The plan is already ordered: nulls/defaults first, then
5014        // cascade deletes (so a row mutated and later deleted
5015        // surfaces as deleted — though v7.6.5 doesn't produce
5016        // that overlap today).
5017        for step in &cascade_plan {
5018            apply_fk_child_step(self.active_catalog_mut(), step)?;
5019        }
5020        // Stage 3b — actually delete the original target rows.
5021        let table = self
5022            .active_catalog_mut()
5023            .get_mut(&stmt.table)
5024            .ok_or_else(|| {
5025                EngineError::Storage(StorageError::TableNotFound {
5026                    name: stmt.table.clone(),
5027                })
5028            })?;
5029        let affected = table.delete_rows(&positions) + cold_shadow_count;
5030        let _ = table;
5031        // v7.12.5 — AFTER DELETE row-level triggers fire post-write
5032        // with NEW=None / OLD=pre-delete row (each from the
5033        // already-snapshotted to_delete_rows). Return value is
5034        // ignored (matches PG AFTER semantics).
5035        if !after_delete_triggers.is_empty() {
5036            for old_vals in &to_delete_rows {
5037                let old_row = Row::new(old_vals.clone());
5038                for fd in &after_delete_triggers {
5039                    let (_outcome, deferred) = triggers::fire_row_trigger(
5040                        fd,
5041                        None,
5042                        Some(&old_row),
5043                        &stmt.table,
5044                        &schema_cols,
5045                        &[],
5046                        trigger_session_cfg.as_deref(),
5047                        true,
5048                    )
5049                    .map_err(|e| {
5050                        EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}")))
5051                    })?;
5052                    deferred_embedded.extend(deferred);
5053                }
5054            }
5055        }
5056        // v7.12.7 — drain trigger-emitted embedded SQL for this DELETE.
5057        self.execute_deferred_trigger_stmts(deferred_embedded, cancel)?;
5058        // v6.2.1 — auto-analyze modified-row tracking for DELETE.
5059        if !self.in_transaction() && affected > 0 {
5060            self.statistics
5061                .record_modifications(&stmt.table, affected as u64);
5062        }
5063        // v7.9.4 — RETURNING projection over the soon-to-be-gone
5064        // rows. `to_delete_rows` was snapshotted in stage 1 before
5065        // mutation, so the projection sees the pre-delete state
5066        // (matches PG semantics: DELETE RETURNING returns the row
5067        // as it was just before removal).
5068        if let Some(items) = &stmt.returning {
5069            return self.build_returning_rows(&stmt.table, items, to_delete_rows);
5070        }
5071        Ok(QueryResult::CommandOk {
5072            affected,
5073            modified_catalog: !self.in_transaction(),
5074        })
5075    }
5076
5077    /// `SHOW TABLES` — one row per table in the active catalog.
5078    /// Column name is `name` so result-set consumers can downstream
5079    /// `SELECT name FROM ...` style logic if needed.
5080    /// v4.26: `EXPLAIN [ANALYZE] <select>`. Returns a single-column
5081    /// `QUERY PLAN` text table — first line names the top operator
5082    /// (Scan / Aggregate / Window / etc.), indented children list
5083    /// FROM joins, WHERE filters, ORDER BY / LIMIT, projection
5084    /// shape, and any active index hits. `ANALYZE` execs the inner
5085    /// SELECT and appends actual-row + elapsed-micros annotations.
5086    #[allow(clippy::format_push_string)]
5087    fn exec_explain(
5088        &self,
5089        e: &spg_sql::ast::ExplainStatement,
5090        cancel: CancelToken<'_>,
5091    ) -> Result<QueryResult, EngineError> {
5092        let mut lines = Vec::<String>::new();
5093        explain_select(&e.inner, self, 0, &mut lines);
5094        if e.suggest {
5095            // v6.8.3 — index advisor. Walks the SELECT's FROM
5096            // tables + WHERE column refs; for each (table, column)
5097            // pair that lacks an index, append a SUGGEST line with
5098            // a copy-pastable `CREATE INDEX` statement. This is a
5099            // pure-syntax heuristic — no cardinality estimation —
5100            // matching the v6.8.3 design intent of "tell the
5101            // operator where indexes are missing", not "give the
5102            // mathematically optimal index set".
5103            let suggestions = build_index_suggestions(&e.inner, self);
5104            for s in suggestions {
5105                lines.push(s);
5106            }
5107        } else if e.analyze {
5108            // v6.2.4 — EXPLAIN ANALYZE annotates each operator line
5109            // with `(rows=N)` where the row count is computable
5110            // without re-executing the full query:
5111            //   - Top-level operator (first non-indented line):
5112            //     rows = final result.len()
5113            //   - "From: <table> [full scan]" lines: rows =
5114            //     table.rows().len() (catalog read; no execution)
5115            //   - "From: <table> [index seek]": indeterminate —
5116            //     the index step would need re-execution; v6.2.5
5117            //     adds per-operator wall-clock + hot/cold rows
5118            //     instrumentation that makes this concrete.
5119            //   - Everything else: marked `(—)` so the surface
5120            //     stays well-defined without silently dropping
5121            //     stats. v6.2.5 fills in via inline executor
5122            //     instrumentation.
5123            // Total elapsed lands on a trailing `Total: …` line.
5124            let started = self.clock.map(|f| f());
5125            let exec = self.exec_select_cancel(&e.inner, cancel)?;
5126            let elapsed_micros = match (self.clock, started) {
5127                (Some(f), Some(s)) => Some(f().saturating_sub(s)),
5128                _ => None,
5129            };
5130            let row_count = if let QueryResult::Rows { rows, .. } = &exec {
5131                rows.len()
5132            } else {
5133                0
5134            };
5135            annotate_explain_lines(&mut lines, row_count, self);
5136            let mut total = alloc::format!("Total: rows={row_count}");
5137            if let Some(us) = elapsed_micros {
5138                total.push_str(&alloc::format!(" elapsed={us}us"));
5139            }
5140            lines.push(total);
5141        }
5142        let columns = alloc::vec![ColumnSchema::new("QUERY PLAN", DataType::Text, false)];
5143        let rows: Vec<Row> = lines
5144            .into_iter()
5145            .map(|l| Row::new(alloc::vec![Value::Text(l)]))
5146            .collect();
5147        Ok(QueryResult::Rows { columns, rows })
5148    }
5149
5150    fn exec_show_tables(&self) -> QueryResult {
5151        let columns = alloc::vec![ColumnSchema::new("name", DataType::Text, false)];
5152        let rows: Vec<Row> = self
5153            .active_catalog()
5154            .table_names()
5155            .into_iter()
5156            .map(|n| Row::new(alloc::vec![Value::Text(n)]))
5157            .collect();
5158        QueryResult::Rows { columns, rows }
5159    }
5160
5161    /// v7.17.0 Phase 3.P0-59 — `SHOW CREATE TABLE <t>`. Synthesise
5162    /// a minimal MySQL-flavoured CREATE TABLE DDL from the
5163    /// catalog's TableSchema so mysqldump round-trips load against
5164    /// SPG without splitting init scripts.
5165    fn exec_show_create_table(&self, name: &str) -> Result<QueryResult, EngineError> {
5166        let t = self.active_catalog().get(name).ok_or_else(|| {
5167            EngineError::Storage(StorageError::TableNotFound { name: name.into() })
5168        })?;
5169        let cols: Vec<String> = t
5170            .schema()
5171            .columns
5172            .iter()
5173            .map(|c| {
5174                let ty = render_data_type(c.ty);
5175                let nullable = if c.nullable { "" } else { " NOT NULL" };
5176                alloc::format!("  `{}` {}{}", c.name, ty, nullable)
5177            })
5178            .collect();
5179        let mut body = cols.join(",\n");
5180        // Append UNIQUE / PRIMARY KEY clauses.
5181        for uc in &t.schema().uniqueness_constraints {
5182            let col_names: Vec<String> = uc
5183                .columns
5184                .iter()
5185                .map(|&p| {
5186                    t.schema().columns.get(p).map_or_else(
5187                        || alloc::format!("col{p}"),
5188                        |c| alloc::format!("`{}`", c.name),
5189                    )
5190                })
5191                .collect();
5192            let kw = if uc.is_primary_key {
5193                "PRIMARY KEY"
5194            } else {
5195                "UNIQUE KEY"
5196            };
5197            body.push_str(",\n  ");
5198            body.push_str(&alloc::format!("{kw} ({})", col_names.join(", ")));
5199        }
5200        // Foreign keys.
5201        for fk in &t.schema().foreign_keys {
5202            let local: Vec<String> = fk
5203                .local_columns
5204                .iter()
5205                .map(|&p| {
5206                    t.schema().columns.get(p).map_or_else(
5207                        || alloc::format!("col{p}"),
5208                        |c| alloc::format!("`{}`", c.name),
5209                    )
5210                })
5211                .collect();
5212            let parent_cols: Vec<String> =
5213                if let Some(parent) = self.active_catalog().get(&fk.parent_table) {
5214                    fk.parent_columns
5215                        .iter()
5216                        .map(|&p| {
5217                            parent.schema().columns.get(p).map_or_else(
5218                                || alloc::format!("col{p}"),
5219                                |c| alloc::format!("`{}`", c.name),
5220                            )
5221                        })
5222                        .collect()
5223                } else {
5224                    fk.parent_columns
5225                        .iter()
5226                        .map(|p| alloc::format!("col{p}"))
5227                        .collect()
5228                };
5229            body.push_str(",\n  ");
5230            body.push_str(&alloc::format!(
5231                "FOREIGN KEY ({}) REFERENCES `{}` ({})",
5232                local.join(", "),
5233                fk.parent_table,
5234                parent_cols.join(", ")
5235            ));
5236        }
5237        let ddl = alloc::format!(
5238            "CREATE TABLE `{}` (\n{}\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4",
5239            name,
5240            body
5241        );
5242        let columns = alloc::vec![
5243            ColumnSchema::new("Table", DataType::Text, false),
5244            ColumnSchema::new("Create Table", DataType::Text, false),
5245        ];
5246        let rows = alloc::vec![Row::new(alloc::vec![
5247            Value::Text(name.into()),
5248            Value::Text(ddl),
5249        ])];
5250        Ok(QueryResult::Rows { columns, rows })
5251    }
5252
5253    /// v7.17.0 Phase 3.P0-60 — `SHOW INDEXES FROM <t>`. MySQL
5254    /// surface returns one row per (index × column) with 14
5255    /// columns; v7.17 ships the columns admin probes actually
5256    /// filter on: Table, Non_unique, Key_name, Seq_in_index,
5257    /// Column_name, Null, Index_type.
5258    fn exec_show_indexes(&self, name: &str) -> Result<QueryResult, EngineError> {
5259        let t = self.active_catalog().get(name).ok_or_else(|| {
5260            EngineError::Storage(StorageError::TableNotFound { name: name.into() })
5261        })?;
5262        let columns = alloc::vec![
5263            ColumnSchema::new("Table", DataType::Text, false),
5264            ColumnSchema::new("Non_unique", DataType::Int, false),
5265            ColumnSchema::new("Key_name", DataType::Text, false),
5266            ColumnSchema::new("Seq_in_index", DataType::Int, false),
5267            ColumnSchema::new("Column_name", DataType::Text, false),
5268            ColumnSchema::new("Null", DataType::Text, false),
5269            ColumnSchema::new("Index_type", DataType::Text, false),
5270        ];
5271        let mut rows: Vec<Row> = Vec::new();
5272        for idx in t.indices() {
5273            let col = t
5274                .schema()
5275                .columns
5276                .get(idx.column_position)
5277                .map_or("?".into(), |c| c.name.clone());
5278            let nullable = t
5279                .schema()
5280                .columns
5281                .get(idx.column_position)
5282                .map_or(true, |c| c.nullable);
5283            rows.push(Row::new(alloc::vec![
5284                Value::Text(name.into()),
5285                Value::Int(i32::from(!idx.is_unique)),
5286                Value::Text(idx.name.clone()),
5287                Value::Int(1),
5288                Value::Text(col),
5289                Value::Text(if nullable {
5290                    "YES".into()
5291                } else {
5292                    String::new()
5293                }),
5294                Value::Text("BTREE".into()),
5295            ]));
5296        }
5297        Ok(QueryResult::Rows { columns, rows })
5298    }
5299
5300    /// v7.17.0 Phase 3.P0-61 — `SHOW STATUS`. Returns canonical
5301    /// MySQL server-status counters (2-column `(Variable_name,
5302    /// Value)`).
5303    fn exec_show_status(&self) -> QueryResult {
5304        let columns = alloc::vec![
5305            ColumnSchema::new("Variable_name", DataType::Text, false),
5306            ColumnSchema::new("Value", DataType::Text, false),
5307        ];
5308        let pairs: &[(&str, &str)] = &[
5309            ("Uptime", "0"),
5310            ("Threads_connected", "1"),
5311            ("Threads_running", "1"),
5312            ("Questions", "0"),
5313            ("Slow_queries", "0"),
5314            ("Opened_tables", "0"),
5315            ("Innodb_buffer_pool_pages_total", "0"),
5316        ];
5317        let rows: Vec<Row> = pairs
5318            .iter()
5319            .map(|(k, v)| {
5320                Row::new(alloc::vec![
5321                    Value::Text((*k).into()),
5322                    Value::Text((*v).into())
5323                ])
5324            })
5325            .collect();
5326        QueryResult::Rows { columns, rows }
5327    }
5328
5329    /// v7.17.0 Phase 3.P0-61 — `SHOW VARIABLES`. Returns server-side
5330    /// variables MySQL/MariaDB clients probe at connect time.
5331    fn exec_show_variables(&self) -> QueryResult {
5332        let columns = alloc::vec![
5333            ColumnSchema::new("Variable_name", DataType::Text, false),
5334            ColumnSchema::new("Value", DataType::Text, false),
5335        ];
5336        let mut rows: Vec<Row> = Vec::new();
5337        let canonical: &[(&str, &str)] = &[
5338            ("version", "8.0.35-spg"),
5339            ("version_comment", "SPG dual-stack engine"),
5340            ("character_set_server", "utf8mb4"),
5341            ("collation_server", "utf8mb4_0900_ai_ci"),
5342            ("max_allowed_packet", "67108864"),
5343            ("autocommit", "ON"),
5344            ("sql_mode", "STRICT_TRANS_TABLES,NO_ENGINE_SUBSTITUTION"),
5345            ("time_zone", "SYSTEM"),
5346            ("transaction_isolation", "REPEATABLE-READ"),
5347        ];
5348        for &(k, v) in canonical {
5349            rows.push(Row::new(alloc::vec![
5350                Value::Text(k.into()),
5351                Value::Text(v.into()),
5352            ]));
5353        }
5354        // Session-set parameters surface here too.
5355        for (k, v) in &self.session_params {
5356            if !canonical.iter().any(|(n, _)| (*n).eq_ignore_ascii_case(k)) {
5357                rows.push(Row::new(alloc::vec![
5358                    Value::Text(k.clone()),
5359                    Value::Text(v.clone()),
5360                ]));
5361            }
5362        }
5363        QueryResult::Rows { columns, rows }
5364    }
5365
5366    /// v7.17.0 Phase 3.P0-62 — `SHOW PROCESSLIST`. SPG is
5367    /// single-process so the surface returns one synthetic row
5368    /// describing the current connection (Id, User, Host, db,
5369    /// Command, Time, State, Info).
5370    fn exec_show_processlist(&self) -> QueryResult {
5371        let columns = alloc::vec![
5372            ColumnSchema::new("Id", DataType::Int, false),
5373            ColumnSchema::new("User", DataType::Text, false),
5374            ColumnSchema::new("Host", DataType::Text, false),
5375            ColumnSchema::new("db", DataType::Text, true),
5376            ColumnSchema::new("Command", DataType::Text, false),
5377            ColumnSchema::new("Time", DataType::Int, false),
5378            ColumnSchema::new("State", DataType::Text, true),
5379            ColumnSchema::new("Info", DataType::Text, true),
5380        ];
5381        let rows = alloc::vec![Row::new(alloc::vec![
5382            Value::Int(1),
5383            Value::Text("postgres".into()),
5384            Value::Text("localhost".into()),
5385            Value::Text("postgres".into()),
5386            Value::Text("Query".into()),
5387            Value::Int(0),
5388            Value::Text("executing".into()),
5389            Value::Text("SHOW PROCESSLIST".into()),
5390        ])];
5391        QueryResult::Rows { columns, rows }
5392    }
5393
5394    /// v7.17.0 Phase 3.P0-58 — `SHOW DATABASES` / `SHOW SCHEMAS`.
5395    /// SPG is single-database so the result is the canonical MySQL
5396    /// set every mysql/MariaDB client expects at connect time:
5397    /// `information_schema`, `mysql`, `performance_schema`, `sys`,
5398    /// plus a `postgres` slot so dual-stack callers find their
5399    /// PG-compatible database too.
5400    fn exec_show_databases(&self) -> QueryResult {
5401        let columns = alloc::vec![ColumnSchema::new("Database", DataType::Text, false)];
5402        let names = [
5403            "information_schema",
5404            "mysql",
5405            "performance_schema",
5406            "sys",
5407            "postgres",
5408        ];
5409        let rows: Vec<Row> = names
5410            .iter()
5411            .map(|n| Row::new(alloc::vec![Value::Text((*n).into())]))
5412            .collect();
5413        QueryResult::Rows { columns, rows }
5414    }
5415
5416    /// `SHOW COLUMNS FROM <table>` — one row per column with the
5417    /// declared name, SQL type rendering, and nullability flag.
5418    fn exec_show_columns(&self, table_name: &str) -> Result<QueryResult, EngineError> {
5419        let table =
5420            self.active_catalog()
5421                .get(table_name)
5422                .ok_or_else(|| StorageError::TableNotFound {
5423                    name: table_name.into(),
5424                })?;
5425        let columns = alloc::vec![
5426            ColumnSchema::new("name", DataType::Text, false),
5427            ColumnSchema::new("type", DataType::Text, false),
5428            ColumnSchema::new("nullable", DataType::Bool, false),
5429        ];
5430        let rows: Vec<Row> = table
5431            .schema()
5432            .columns
5433            .iter()
5434            .map(|c| {
5435                Row::new(alloc::vec![
5436                    Value::Text(c.name.clone()),
5437                    Value::Text(alloc::format!("{}", c.ty)),
5438                    Value::Bool(c.nullable),
5439                ])
5440            })
5441            .collect();
5442        Ok(QueryResult::Rows { columns, rows })
5443    }
5444
5445    fn exec_begin(&mut self) -> Result<QueryResult, EngineError> {
5446        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
5447        if self.tx_catalogs.contains_key(&tx_id) {
5448            return Err(EngineError::TransactionAlreadyOpen);
5449        }
5450        self.tx_catalogs.insert(
5451            tx_id,
5452            TxState {
5453                catalog: self.catalog.clone(),
5454                savepoints: Vec::new(),
5455            },
5456        );
5457        Ok(QueryResult::CommandOk {
5458            affected: 0,
5459            modified_catalog: false,
5460        })
5461    }
5462
5463    fn exec_commit(&mut self) -> Result<QueryResult, EngineError> {
5464        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
5465        let state = self
5466            .tx_catalogs
5467            .remove(&tx_id)
5468            .ok_or(EngineError::NoActiveTransaction)?;
5469        self.catalog = state.catalog;
5470        // All savepoints become permanent at COMMIT and the stack
5471        // resets for the next TX (`state.savepoints` is discarded with
5472        // `state`).
5473        Ok(QueryResult::CommandOk {
5474            affected: 0,
5475            modified_catalog: true,
5476        })
5477    }
5478
5479    fn exec_rollback(&mut self) -> Result<QueryResult, EngineError> {
5480        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
5481        if self.tx_catalogs.remove(&tx_id).is_none() {
5482            return Err(EngineError::NoActiveTransaction);
5483        }
5484        // savepoints discarded with the TxState
5485        Ok(QueryResult::CommandOk {
5486            affected: 0,
5487            modified_catalog: false,
5488        })
5489    }
5490
5491    fn exec_savepoint(&mut self, name: String) -> Result<QueryResult, EngineError> {
5492        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
5493        let state = self
5494            .tx_catalogs
5495            .get_mut(&tx_id)
5496            .ok_or(EngineError::NoActiveTransaction)?;
5497        // PG re-uses an existing savepoint name by dropping the older
5498        // entry and pushing a fresh one — match that behaviour so
5499        // application code can `SAVEPOINT sp; ...; SAVEPOINT sp` freely.
5500        state.savepoints.retain(|(n, _)| n != &name);
5501        let snapshot = state.catalog.clone();
5502        state.savepoints.push((name, snapshot));
5503        Ok(QueryResult::CommandOk {
5504            affected: 0,
5505            modified_catalog: false,
5506        })
5507    }
5508
5509    fn exec_rollback_to_savepoint(&mut self, name: &str) -> Result<QueryResult, EngineError> {
5510        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
5511        let state = self
5512            .tx_catalogs
5513            .get_mut(&tx_id)
5514            .ok_or(EngineError::NoActiveTransaction)?;
5515        let pos = state
5516            .savepoints
5517            .iter()
5518            .rposition(|(n, _)| n == name)
5519            .ok_or_else(|| {
5520                EngineError::Unsupported(alloc::format!("savepoint not found: {name}"))
5521            })?;
5522        // The savepoint stays on the stack (PG semantics): a later
5523        // `RELEASE` or further `ROLLBACK TO` is still allowed. Everything
5524        // after it is discarded.
5525        let snapshot = state.savepoints[pos].1.clone();
5526        state.savepoints.truncate(pos + 1);
5527        state.catalog = snapshot;
5528        Ok(QueryResult::CommandOk {
5529            affected: 0,
5530            modified_catalog: false,
5531        })
5532    }
5533
5534    fn exec_release_savepoint(&mut self, name: &str) -> Result<QueryResult, EngineError> {
5535        let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
5536        let state = self
5537            .tx_catalogs
5538            .get_mut(&tx_id)
5539            .ok_or(EngineError::NoActiveTransaction)?;
5540        let pos = state
5541            .savepoints
5542            .iter()
5543            .rposition(|(n, _)| n == name)
5544            .ok_or_else(|| {
5545                EngineError::Unsupported(alloc::format!("savepoint not found: {name}"))
5546            })?;
5547        // RELEASE keeps the work since the savepoint, just discards the
5548        // bookmark plus everything nested under it.
5549        state.savepoints.truncate(pos);
5550        Ok(QueryResult::CommandOk {
5551            affected: 0,
5552            modified_catalog: false,
5553        })
5554    }
5555
5556    /// v6.0.4 — synchronous `ALTER INDEX <name> REBUILD [WITH
5557    /// (encoding = …)]`. Walks every table in the active catalog
5558    /// looking for an index matching `stmt.name`, then delegates the
5559    /// rebuild (including any encoding switch) to
5560    /// `Table::rebuild_nsw_index`. The "live" non-blocking
5561    /// optimisation is v6.0.4.1 / v6.1.x territory.
5562    /// v6.7.2 — `ALTER TABLE t SET hot_tier_bytes = X`. Dispatch
5563    /// arm. Currently the only setting is `hot_tier_bytes`; later
5564    /// v6.7.x can extend `AlterTableTarget` without touching this
5565    /// arm structure.
5566    fn exec_alter_table(
5567        &mut self,
5568        s: spg_sql::ast::AlterTableStatement,
5569    ) -> Result<QueryResult, EngineError> {
5570        // v7.13.2 — mailrs round-6 S1: apply each subaction in order.
5571        // On first error the statement aborts; subactions already
5572        // applied stay (no transactional rollback in v7.13 — wrap in
5573        // BEGIN/COMMIT if atomicity matters).
5574        let table_name = s.name.clone();
5575        for target in s.targets {
5576            self.exec_alter_table_subaction(&table_name, target)?;
5577        }
5578        Ok(QueryResult::CommandOk {
5579            affected: 0,
5580            modified_catalog: !self.in_transaction(),
5581        })
5582    }
5583
5584    fn exec_alter_table_subaction(
5585        &mut self,
5586        table_name_outer: &str,
5587        target: spg_sql::ast::AlterTableTarget,
5588    ) -> Result<(), EngineError> {
5589        // Inner helper retains the s.name closure shape; alias to `s`
5590        // for minimal diff against the v7.13.0 body.
5591        struct S<'a> {
5592            name: &'a str,
5593        }
5594        let s = S {
5595            name: table_name_outer,
5596        };
5597        match target {
5598            spg_sql::ast::AlterTableTarget::SetHotTierBytes(n) => {
5599                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
5600                    EngineError::Storage(StorageError::TableNotFound {
5601                        name: s.name.into(),
5602                    })
5603                })?;
5604                table.schema_mut().hot_tier_bytes = Some(n);
5605            }
5606            spg_sql::ast::AlterTableTarget::AddForeignKey(fk) => {
5607                // v7.6.8 — resolve FK against the live catalog first
5608                // (validates parent table, columns, indices). Then
5609                // verify every existing row in the child table
5610                // satisfies the new constraint. Then install it.
5611                let cols_snapshot = self
5612                    .active_catalog()
5613                    .get(s.name)
5614                    .ok_or_else(|| {
5615                        EngineError::Storage(StorageError::TableNotFound {
5616                            name: s.name.into(),
5617                        })
5618                    })?
5619                    .schema()
5620                    .columns
5621                    .clone();
5622                let storage_fk =
5623                    resolve_foreign_key(s.name, &cols_snapshot, fk, self.active_catalog())?;
5624                // Verify existing rows. Treat them as a virtual
5625                // INSERT batch — reusing the v7.6.2 enforce helper.
5626                let existing_rows: Vec<Vec<Value>> = self
5627                    .active_catalog()
5628                    .get(s.name)
5629                    .expect("checked above")
5630                    .rows()
5631                    .iter()
5632                    .map(|r| r.values.clone())
5633                    .collect();
5634                enforce_fk_inserts(
5635                    self.active_catalog(),
5636                    s.name,
5637                    core::slice::from_ref(&storage_fk),
5638                    &existing_rows,
5639                )?;
5640                // Reject duplicate constraint name.
5641                let table = self
5642                    .active_catalog_mut()
5643                    .get_mut(s.name)
5644                    .expect("checked above");
5645                if let Some(name) = &storage_fk.name
5646                    && table
5647                        .schema()
5648                        .foreign_keys
5649                        .iter()
5650                        .any(|f| f.name.as_ref() == Some(name))
5651                {
5652                    return Err(EngineError::Unsupported(alloc::format!(
5653                        "ALTER TABLE ADD CONSTRAINT: a constraint named {name:?} already exists"
5654                    )));
5655                }
5656                table.schema_mut().foreign_keys.push(storage_fk);
5657            }
5658            spg_sql::ast::AlterTableTarget::DropForeignKey { name, if_exists } => {
5659                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
5660                    EngineError::Storage(StorageError::TableNotFound {
5661                        name: s.name.into(),
5662                    })
5663                })?;
5664                let fks = &mut table.schema_mut().foreign_keys;
5665                let before = fks.len();
5666                fks.retain(|f| f.name.as_ref() != Some(&name));
5667                if fks.len() == before && !if_exists {
5668                    return Err(EngineError::Unsupported(alloc::format!(
5669                        "ALTER TABLE DROP CONSTRAINT: no FK named {name:?} on {:?}",
5670                        s.name
5671                    )));
5672                }
5673                // v7.13.2 mailrs round-6 S7: IF EXISTS silences the miss.
5674            }
5675            spg_sql::ast::AlterTableTarget::AddColumn {
5676                column,
5677                if_not_exists,
5678            } => {
5679                // v7.13.0 — mailrs round-5 G1. Append-only column add
5680                // with back-fill of the DEFAULT (or NULL) into every
5681                // existing row. Column positions don't shift, so we
5682                // skip index rebuild.
5683                let clock = self.clock;
5684                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
5685                    EngineError::Storage(StorageError::TableNotFound {
5686                        name: s.name.into(),
5687                    })
5688                })?;
5689                if table
5690                    .schema()
5691                    .columns
5692                    .iter()
5693                    .any(|c| c.name.eq_ignore_ascii_case(&column.name))
5694                {
5695                    if if_not_exists {
5696                        return Ok(());
5697                    }
5698                    return Err(EngineError::Unsupported(alloc::format!(
5699                        "ALTER TABLE ADD COLUMN: column {:?} already exists on {:?}",
5700                        column.name,
5701                        s.name
5702                    )));
5703                }
5704                let col_name = column.name.clone();
5705                let nullable = column.nullable;
5706                let has_default = column.default.is_some() || column.auto_increment;
5707                let col_schema = column_def_to_schema(column)?;
5708                let row_count = table.row_count();
5709                // Compute the back-fill value. Literal / runtime DEFAULT
5710                // funnels through the same resolver that INSERT uses
5711                // (v7.9.21 `resolve_column_default_free`). NULL when
5712                // the column is nullable and has no DEFAULT. NOT NULL
5713                // without DEFAULT errors when the table has existing
5714                // rows — same as PG.
5715                let fill_value: Value = if has_default || col_schema.runtime_default.is_some() {
5716                    resolve_column_default_free(&col_schema, clock)?
5717                } else if nullable || row_count == 0 {
5718                    Value::Null
5719                } else {
5720                    return Err(EngineError::Unsupported(alloc::format!(
5721                        "ALTER TABLE ADD COLUMN {col_name:?}: NOT NULL column requires DEFAULT \
5722                         when the table has existing rows"
5723                    )));
5724                };
5725                table.add_column(col_schema, fill_value);
5726            }
5727            spg_sql::ast::AlterTableTarget::AlterColumnType {
5728                column,
5729                new_type,
5730                using,
5731            } => {
5732                // v7.13.0 — mailrs round-5 G8. Re-evaluate each
5733                // row's column value (either through the USING
5734                // expression if supplied, or as a direct CAST of
5735                // the existing value) and re-coerce to the new
5736                // type. Indices on the column get rebuilt.
5737                let new_data_type = column_type_to_data_type(new_type);
5738                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
5739                    EngineError::Storage(StorageError::TableNotFound {
5740                        name: s.name.into(),
5741                    })
5742                })?;
5743                let col_pos = table
5744                    .schema()
5745                    .columns
5746                    .iter()
5747                    .position(|c| c.name.eq_ignore_ascii_case(&column))
5748                    .ok_or_else(|| {
5749                        EngineError::Unsupported(alloc::format!(
5750                            "ALTER COLUMN TYPE: column {column:?} not found on {:?}",
5751                            s.name
5752                        ))
5753                    })?;
5754                let schema_cols = table.schema().columns.clone();
5755                let ctx = eval::EvalContext::new(&schema_cols, None);
5756                let mut new_values: alloc::vec::Vec<Value> =
5757                    alloc::vec::Vec::with_capacity(table.row_count());
5758                for row in table.rows().iter() {
5759                    let raw = match &using {
5760                        Some(expr) => eval::eval_expr(expr, row, &ctx).map_err(|e| {
5761                            EngineError::Unsupported(alloc::format!(
5762                                "ALTER COLUMN TYPE: USING expression failed: {e:?}"
5763                            ))
5764                        })?,
5765                        None => row.values.get(col_pos).cloned().unwrap_or(Value::Null),
5766                    };
5767                    let coerced = coerce_value(raw, new_data_type, &column, col_pos)?;
5768                    new_values.push(coerced);
5769                }
5770                table.schema_mut().columns[col_pos].ty = new_data_type;
5771                for (i, v) in new_values.into_iter().enumerate() {
5772                    let mut row_values = table
5773                        .rows()
5774                        .get(i)
5775                        .expect("bounds-checked above")
5776                        .values
5777                        .clone();
5778                    row_values[col_pos] = v;
5779                    table.update_row(i, row_values)?;
5780                }
5781            }
5782            spg_sql::ast::AlterTableTarget::AddTableConstraint(tc) => {
5783                // v7.14.0 — pg_dump emits PKs as a separate
5784                // ALTER TABLE ADD CONSTRAINT post-CREATE-TABLE.
5785                // For PRIMARY KEY / UNIQUE, install a UC entry
5786                // and the implicit BTree index on the leading
5787                // column. CHECK: append predicate to schema.
5788                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
5789                    EngineError::Storage(StorageError::TableNotFound {
5790                        name: s.name.into(),
5791                    })
5792                })?;
5793                let is_pk = matches!(tc, spg_sql::ast::TableConstraint::PrimaryKey { .. });
5794                // v7.22 (mailrs round-13 gap 6) — carry the parsed
5795                // NULLS NOT DISTINCT flag through the ALTER path;
5796                // it was hardcoded false here while the CREATE
5797                // TABLE path honoured it since v7.13.
5798                let nnd = matches!(
5799                    tc,
5800                    spg_sql::ast::TableConstraint::Unique {
5801                        nulls_not_distinct: true,
5802                        ..
5803                    }
5804                );
5805                match tc {
5806                    spg_sql::ast::TableConstraint::PrimaryKey { columns, .. }
5807                    | spg_sql::ast::TableConstraint::Unique { columns, .. } => {
5808                        let positions: Vec<usize> = columns
5809                            .iter()
5810                            .map(|c| {
5811                                table
5812                                    .schema()
5813                                    .columns
5814                                    .iter()
5815                                    .position(|sc| sc.name.eq_ignore_ascii_case(c))
5816                                    .ok_or_else(|| {
5817                                        EngineError::Unsupported(alloc::format!(
5818                                            "ALTER TABLE ADD CONSTRAINT: column {c:?} not found on {:?}",
5819                                            s.name
5820                                        ))
5821                                    })
5822                            })
5823                            .collect::<Result<Vec<_>, _>>()?;
5824                        // Skip if an equivalent UC is already there
5825                        // (idempotent — pg_dump's PK + a prior inline
5826                        // PK shouldn't double-install).
5827                        let already = table
5828                            .schema()
5829                            .uniqueness_constraints
5830                            .iter()
5831                            .any(|u| u.columns == positions);
5832                        if !already {
5833                            table.schema_mut().uniqueness_constraints.push(
5834                                spg_storage::UniquenessConstraint {
5835                                    is_primary_key: is_pk,
5836                                    columns: positions.clone(),
5837                                    nulls_not_distinct: nnd,
5838                                },
5839                            );
5840                            // PK implies NOT NULL on referenced cols.
5841                            if is_pk {
5842                                for p in &positions {
5843                                    if let Some(c) = table.schema_mut().columns.get_mut(*p) {
5844                                        c.nullable = false;
5845                                    }
5846                                }
5847                            }
5848                            // Add a BTree index on the leading
5849                            // column for INSERT-side enforcement.
5850                            let leading = &columns[0];
5851                            let already_idx = table.indices().iter().any(|idx| {
5852                                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
5853                                    && table.schema().columns[idx.column_position].name == *leading
5854                            });
5855                            if !already_idx {
5856                                let suffix = if is_pk { "pkey" } else { "key" };
5857                                let idx_name = alloc::format!("{}_{leading}_{suffix}", s.name);
5858                                let _ = table.add_index(idx_name, leading);
5859                            }
5860                        }
5861                    }
5862                    spg_sql::ast::TableConstraint::Check { expr, .. } => {
5863                        table.schema_mut().checks.push(alloc::format!("{expr}"));
5864                    }
5865                    spg_sql::ast::TableConstraint::Index { name, columns } => {
5866                        // v7.15.0 — ALTER TABLE ADD KEY (cols).
5867                        // mysqldump occasionally emits this
5868                        // post-CREATE-TABLE shape; build a BTree
5869                        // on the leading column using the
5870                        // user-supplied or synthesised name.
5871                        let leading = &columns[0];
5872                        let already_idx = table.indices().iter().any(|idx| {
5873                            matches!(idx.kind, spg_storage::IndexKind::BTree(_))
5874                                && table.schema().columns[idx.column_position].name == *leading
5875                        });
5876                        if !already_idx {
5877                            let idx_name = name
5878                                .clone()
5879                                .unwrap_or_else(|| alloc::format!("{}_{leading}_idx", s.name));
5880                            let _ = table.add_index(idx_name, leading);
5881                        }
5882                    }
5883                    spg_sql::ast::TableConstraint::FulltextIndex { name, columns } => {
5884                        // v7.17.0 Phase 2.2 — ALTER TABLE ADD
5885                        // FULLTEXT KEY (cols). Builds one
5886                        // fulltext-GIN per named column so MATCH
5887                        // AGAINST gets a real inverted index.
5888                        // Multi-column declarations expand to
5889                        // per-column GINs (the leading column
5890                        // drives MATCH AGAINST planning).
5891                        for (k, col) in columns.iter().enumerate() {
5892                            let already_idx = table.indices().iter().any(|idx| {
5893                                matches!(idx.kind, spg_storage::IndexKind::GinFulltext(_))
5894                                    && table.schema().columns[idx.column_position].name == *col
5895                            });
5896                            if already_idx {
5897                                continue;
5898                            }
5899                            let idx_name = match (&name, columns.len(), k) {
5900                                (Some(n), 1, _) => n.clone(),
5901                                (Some(n), _, k) => alloc::format!("{n}_{k}"),
5902                                (None, _, _) => {
5903                                    alloc::format!("{}_{col}_ftidx", s.name)
5904                                }
5905                            };
5906                            let _ = table.add_gin_fulltext_index(idx_name, col);
5907                        }
5908                    }
5909                }
5910            }
5911            spg_sql::ast::AlterTableTarget::DropColumn {
5912                column,
5913                if_exists,
5914                cascade,
5915            } => {
5916                // v7.13.3 — mailrs round-7 S8. Remove the column +
5917                // every row's value at that position; drop any index
5918                // on the column. RESTRICT (default) rejects when an
5919                // FK on this table or partial-index predicate
5920                // references the column; CASCADE removes those
5921                // dependents first.
5922                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
5923                    EngineError::Storage(StorageError::TableNotFound {
5924                        name: s.name.into(),
5925                    })
5926                })?;
5927                let col_pos = match table
5928                    .schema()
5929                    .columns
5930                    .iter()
5931                    .position(|c| c.name.eq_ignore_ascii_case(&column))
5932                {
5933                    Some(p) => p,
5934                    None => {
5935                        if if_exists {
5936                            return Ok(());
5937                        }
5938                        return Err(EngineError::Unsupported(alloc::format!(
5939                            "ALTER TABLE DROP COLUMN: column {column:?} not found on {:?}",
5940                            s.name
5941                        )));
5942                    }
5943                };
5944                // Dependent check: FKs whose local columns include
5945                // col_pos. CASCADE drops them; otherwise reject.
5946                let dependent_fks: Vec<usize> = table
5947                    .schema()
5948                    .foreign_keys
5949                    .iter()
5950                    .enumerate()
5951                    .filter_map(|(i, fk)| {
5952                        if fk.local_columns.contains(&col_pos) {
5953                            Some(i)
5954                        } else {
5955                            None
5956                        }
5957                    })
5958                    .collect();
5959                if !dependent_fks.is_empty() && !cascade {
5960                    return Err(EngineError::Unsupported(alloc::format!(
5961                        "ALTER TABLE DROP COLUMN {column:?}: column has FK dependents; \
5962                         use DROP COLUMN ... CASCADE to remove them"
5963                    )));
5964                }
5965                // CASCADE the FK removals first.
5966                if cascade {
5967                    // Drop in reverse so indices stay valid.
5968                    let mut sorted = dependent_fks.clone();
5969                    sorted.sort();
5970                    sorted.reverse();
5971                    let fks = &mut table.schema_mut().foreign_keys;
5972                    for i in sorted {
5973                        fks.remove(i);
5974                    }
5975                }
5976                // Drop the column. New helper on Table does the
5977                // row + schema + index shift atomically.
5978                table.drop_column(col_pos);
5979            }
5980            spg_sql::ast::AlterTableTarget::SetTriggerEnabled { which, enabled } => {
5981                // v7.16.1 — mailrs round-9 A.2.b. pg_dump
5982                // --disable-triggers wraps each table's data
5983                // block with `ALTER TABLE … DISABLE TRIGGER ALL`
5984                // / `… ENABLE TRIGGER ALL`. Toggle the enabled
5985                // flag on every matching trigger so the row-
5986                // write paths skip them; the catalog snapshot
5987                // persists the new state across restarts.
5988                let table_name = s.name.to_string();
5989                let trigs = self.active_catalog_mut().triggers_mut();
5990                let mut touched = false;
5991                for t in trigs.iter_mut() {
5992                    if !t.table.eq_ignore_ascii_case(&table_name) {
5993                        continue;
5994                    }
5995                    match &which {
5996                        spg_sql::ast::TriggerSelector::All => {
5997                            t.enabled = enabled;
5998                            touched = true;
5999                        }
6000                        spg_sql::ast::TriggerSelector::Named(name) => {
6001                            if t.name.eq_ignore_ascii_case(name) {
6002                                t.enabled = enabled;
6003                                touched = true;
6004                            }
6005                        }
6006                    }
6007                }
6008                // PG semantics: `ALL` on a table with no
6009                // triggers is a no-op (no error). A `Named`
6010                // form pointing at a non-existent trigger
6011                // raises in PG; v7.16.1 also raises so we
6012                // don't silently lose state.
6013                if !touched {
6014                    if let spg_sql::ast::TriggerSelector::Named(name) = &which {
6015                        return Err(EngineError::Unsupported(alloc::format!(
6016                            "ALTER TABLE {table_name:?} {} TRIGGER {name:?}: no such trigger on table",
6017                            if enabled { "ENABLE" } else { "DISABLE" },
6018                        )));
6019                    }
6020                }
6021            }
6022            spg_sql::ast::AlterTableTarget::SetColumnAutoIncrement { column, seq_name } => {
6023                // pg_dump's identity form names an IMPLICIT sequence
6024                // (`… AS IDENTITY ( SEQUENCE NAME s … )`) that never
6025                // gets its own CREATE SEQUENCE statement, while the
6026                // data section still calls `setval(s, …)`. Make the
6027                // sequence exist (idempotent) so those calls land.
6028                if let Some(seq) = seq_name {
6029                    let _ = self.exec_create_sequence(spg_sql::ast::CreateSequenceStatement {
6030                        name: seq,
6031                        if_not_exists: true,
6032                        temporary: false,
6033                        data_type: None,
6034                        options: spg_sql::ast::SequenceOptions::default(),
6035                    })?;
6036                }
6037                // v7.22 (round-13 T2) — pg_dump's serial/identity
6038                // spellings (`SET DEFAULT nextval(…)` / `ADD
6039                // GENERATED … AS IDENTITY`) lower here: flip the
6040                // column's auto-increment flag so post-import
6041                // INSERTs without an explicit value keep numbering
6042                // (max+1 semantics; the dump's setval() calls are
6043                // no-ops by construction).
6044                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
6045                    EngineError::Storage(StorageError::TableNotFound {
6046                        name: s.name.into(),
6047                    })
6048                })?;
6049                let pos = table
6050                    .schema()
6051                    .columns
6052                    .iter()
6053                    .position(|c| c.name.eq_ignore_ascii_case(&column))
6054                    .ok_or_else(|| {
6055                        EngineError::Unsupported(alloc::format!(
6056                            "ALTER COLUMN {column:?}: no such column on {:?}",
6057                            s.name
6058                        ))
6059                    })?;
6060                let col = &table.schema().columns[pos];
6061                if !matches!(
6062                    col.ty,
6063                    spg_storage::DataType::SmallInt
6064                        | spg_storage::DataType::Int
6065                        | spg_storage::DataType::BigInt
6066                ) {
6067                    return Err(EngineError::Unsupported(alloc::format!(
6068                        "auto-increment applies to integer columns only ({column:?} is {:?})",
6069                        col.ty
6070                    )));
6071                }
6072                table.schema_mut().columns[pos].auto_increment = true;
6073            }
6074            spg_sql::ast::AlterTableTarget::RenameTable { new } => {
6075                // v7.16.2 — table-level rename (mailrs round-10
6076                // A.5 — used by migrate-042's `ALTER TABLE
6077                // contacts RENAME TO email_contacts`). Storage
6078                // helper updates the schema + by_name index +
6079                // dangling FK / trigger references in one
6080                // atomic step.
6081                let old = s.name.to_string();
6082                self.active_catalog_mut()
6083                    .rename_table(&old, &new)
6084                    .map_err(EngineError::Storage)?;
6085            }
6086            spg_sql::ast::AlterTableTarget::RenameColumn { old, new } => {
6087                // v7.15.0 — `ALTER TABLE t RENAME [COLUMN] old TO
6088                // new`. Rename the column in the schema; rewrite
6089                // every stored source string on this table that
6090                // references it as a (potentially-qualified)
6091                // column identifier: CHECK predicates, partial-
6092                // index predicates, runtime DEFAULT expressions.
6093                // Then walk catalog triggers on this table and
6094                // patch any `UPDATE OF` column list. Function and
6095                // trigger bodies are NOT auto-rewritten — that
6096                // surface is dynamic SQL territory; users update
6097                // those separately (matches PG plpgsql behavior:
6098                // a column rename invalidates name-referencing
6099                // plpgsql at call time, not rename time).
6100                let table = self.active_catalog_mut().get_mut(s.name).ok_or_else(|| {
6101                    EngineError::Storage(StorageError::TableNotFound {
6102                        name: s.name.into(),
6103                    })
6104                })?;
6105                let col_pos = table
6106                    .schema()
6107                    .columns
6108                    .iter()
6109                    .position(|c| c.name.eq_ignore_ascii_case(&old))
6110                    .ok_or_else(|| {
6111                        EngineError::Unsupported(alloc::format!(
6112                            "ALTER TABLE RENAME COLUMN: column {old:?} not found on {:?}",
6113                            s.name
6114                        ))
6115                    })?;
6116                // Reject same-name (case-insensitive) collision.
6117                if table
6118                    .schema()
6119                    .columns
6120                    .iter()
6121                    .enumerate()
6122                    .any(|(i, c)| i != col_pos && c.name.eq_ignore_ascii_case(&new))
6123                {
6124                    return Err(EngineError::Unsupported(alloc::format!(
6125                        "ALTER TABLE RENAME COLUMN: column {new:?} already exists on {:?}",
6126                        s.name
6127                    )));
6128                }
6129                // Schema rename first — even idempotent same-name
6130                // rename (`ALTER TABLE t RENAME a TO a`) needs to
6131                // be a no-op, not an error.
6132                if old.eq_ignore_ascii_case(&new) {
6133                    return Ok(());
6134                }
6135                table.rename_column(col_pos, &new);
6136                // Rewrite per-column runtime_default sources on
6137                // every column of this table — a DEFAULT expression
6138                // on column X may reference column Y by name (rare,
6139                // but legal in PG when the value is supplied via a
6140                // function that takes the row).
6141                let n_cols = table.schema().columns.len();
6142                for i in 0..n_cols {
6143                    let rt = table.schema().columns[i].runtime_default.clone();
6144                    if let Some(src) = rt {
6145                        let rewritten = rewrite_column_in_source(&src, &old, &new)?;
6146                        table.schema_mut().columns[i].runtime_default = Some(rewritten);
6147                    }
6148                }
6149                // Rewrite table-level CHECK predicates.
6150                let checks = table.schema().checks.clone();
6151                let mut new_checks = Vec::with_capacity(checks.len());
6152                for chk in checks {
6153                    new_checks.push(rewrite_column_in_source(&chk, &old, &new)?);
6154                }
6155                table.schema_mut().checks = new_checks;
6156                // Rewrite per-index partial_predicate sources.
6157                let n_idx = table.indices().len();
6158                for i in 0..n_idx {
6159                    let pred = table.indices()[i].partial_predicate.clone();
6160                    if let Some(src) = pred {
6161                        let rewritten = rewrite_column_in_source(&src, &old, &new)?;
6162                        // SAFETY: indices_mut would be cleanest, but
6163                        // partial_predicate is the only mutable field
6164                        // here; reach in via the public mut accessor.
6165                        table.set_partial_predicate(i, Some(rewritten));
6166                    }
6167                }
6168                // Walk catalog triggers; patch `update_columns` on
6169                // triggers attached to this table.
6170                let table_name = s.name.to_string();
6171                for trig in self.active_catalog_mut().triggers_mut() {
6172                    if !trig.table.eq_ignore_ascii_case(&table_name) {
6173                        continue;
6174                    }
6175                    for c in &mut trig.update_columns {
6176                        if c.eq_ignore_ascii_case(&old) {
6177                            *c = new.clone();
6178                        }
6179                    }
6180                }
6181            }
6182        }
6183        Ok(())
6184    }
6185
6186    fn exec_alter_index(
6187        &mut self,
6188        stmt: spg_sql::ast::AlterIndexStatement,
6189    ) -> Result<QueryResult, EngineError> {
6190        // Translate the optional SQL-side encoding choice into the
6191        // storage-side enum; the same SqlVecEncoding -> VecEncoding
6192        // bridge `column_type_to_data_type` uses.
6193        let spg_sql::ast::AlterIndexStatement {
6194            name: idx_name,
6195            target,
6196        } = stmt;
6197        // v7.16.2 — RENAME TO branch (mailrs round-10 migrate-042).
6198        // IF EXISTS makes a missing index a no-op rather than an
6199        // error, mirroring PG semantics.
6200        if let spg_sql::ast::AlterIndexTarget::Rename { new, if_exists } = target {
6201            let renamed = self.active_catalog_mut().rename_index(&idx_name, &new);
6202            return match renamed {
6203                Ok(()) => Ok(QueryResult::CommandOk {
6204                    affected: 0,
6205                    modified_catalog: !self.in_transaction(),
6206                }),
6207                Err(StorageError::IndexNotFound { .. }) if if_exists => {
6208                    Ok(QueryResult::CommandOk {
6209                        affected: 0,
6210                        modified_catalog: false,
6211                    })
6212                }
6213                Err(e) => Err(EngineError::Storage(e)),
6214            };
6215        }
6216        let spg_sql::ast::AlterIndexTarget::Rebuild { encoding } = target else {
6217            unreachable!("Rename branch returned above");
6218        };
6219        let target = encoding.map(|e| match e {
6220            SqlVecEncoding::F32 => VecEncoding::F32,
6221            SqlVecEncoding::Sq8 => VecEncoding::Sq8,
6222            SqlVecEncoding::F16 => VecEncoding::F16,
6223        });
6224        // Linear scan: index names are globally unique within a
6225        // catalog (enforced by add_nsw_index_inner) so the first
6226        // match is the only one. Save the table name to avoid
6227        // borrowing while we then take a mut borrow.
6228        let table_name = {
6229            let cat = self.active_catalog();
6230            let mut found: Option<String> = None;
6231            for tname in cat.table_names() {
6232                if let Some(t) = cat.get(&tname)
6233                    && t.indices().iter().any(|i| i.name == idx_name)
6234                {
6235                    found = Some(tname);
6236                    break;
6237                }
6238            }
6239            found.ok_or_else(|| {
6240                EngineError::Storage(StorageError::IndexNotFound {
6241                    name: idx_name.clone(),
6242                })
6243            })?
6244        };
6245        let table = self
6246            .active_catalog_mut()
6247            .get_mut(&table_name)
6248            .expect("table found above");
6249        table.rebuild_nsw_index(&idx_name, target)?;
6250        // v6.3.1 — ALTER INDEX REBUILD potentially with new encoding
6251        // changes cost characteristics; evict any cached plans.
6252        self.plan_cache.evict_referencing(&table_name);
6253        Ok(QueryResult::CommandOk {
6254            affected: 0,
6255            modified_catalog: !self.in_transaction(),
6256        })
6257    }
6258
6259    fn exec_create_index(
6260        &mut self,
6261        stmt: CreateIndexStatement,
6262    ) -> Result<QueryResult, EngineError> {
6263        let table = self
6264            .active_catalog_mut()
6265            .get_mut(&stmt.table)
6266            .ok_or_else(|| {
6267                EngineError::Storage(StorageError::TableNotFound {
6268                    name: stmt.table.clone(),
6269                })
6270            })?;
6271        // `IF NOT EXISTS` reduces DuplicateIndex to a no-op CommandOk.
6272        if stmt.if_not_exists && table.indices().iter().any(|i| i.name == stmt.name) {
6273            return Ok(QueryResult::CommandOk {
6274                affected: 0,
6275                modified_catalog: false,
6276            });
6277        }
6278        // v7.9.14 — multi-column index parses through; engine
6279        // builds a single-column BTree on the leading column only.
6280        // The extras live on the AST so spg-server's dispatcher
6281        // can emit a PG-wire NoticeResponse / log line. Composite
6282        // BTree keys land in v7.10.
6283        let _ = &stmt.extra_columns; // intentional drop on engine side
6284        let table_name = stmt.table.clone();
6285        // v6.8.0 — resolve INCLUDE column names to positions. Done
6286        // before `add_index` so a typo error surfaces before any
6287        // catalog mutation lands.
6288        let included_positions: Vec<usize> = if stmt.included_columns.is_empty() {
6289            Vec::new()
6290        } else {
6291            let schema = table.schema();
6292            stmt.included_columns
6293                .iter()
6294                .map(|c| {
6295                    schema.column_position(c).ok_or_else(|| {
6296                        EngineError::Storage(StorageError::ColumnNotFound { column: c.clone() })
6297                    })
6298                })
6299                .collect::<Result<Vec<_>, _>>()?
6300        };
6301        match stmt.method {
6302            IndexMethod::BTree => table.add_index(stmt.name.clone(), &stmt.column)?,
6303            IndexMethod::Hnsw => {
6304                if !included_positions.is_empty() {
6305                    return Err(EngineError::Unsupported(
6306                        "INCLUDE columns are not supported on HNSW indexes".into(),
6307                    ));
6308                }
6309                table.add_nsw_index(stmt.name.clone(), &stmt.column, spg_storage::NSW_DEFAULT_M)?;
6310            }
6311            // v6.7.1 — BRIN. Pure metadata; no in-memory data.
6312            IndexMethod::Brin => {
6313                if !included_positions.is_empty() {
6314                    return Err(EngineError::Unsupported(
6315                        "INCLUDE columns are not supported on BRIN indexes".into(),
6316                    ));
6317                }
6318                table.add_brin_index(stmt.name.clone(), &stmt.column)?;
6319            }
6320            // v7.12.3 — GIN inverted index. Real posting-list-backed
6321            // GIN when the indexed column is `tsvector`; falls back
6322            // to a BTree on the leading column for any other column
6323            // type so v7.9.26b's `pg_dump` compatibility (GIN on
6324            // JSONB etc. silently loading as BTree) is preserved.
6325            // Operators see the real GIN only where it matters; old
6326            // schemas keep loading.
6327            IndexMethod::Gin => {
6328                if !included_positions.is_empty() {
6329                    return Err(EngineError::Unsupported(
6330                        "INCLUDE columns are not supported on GIN indexes".into(),
6331                    ));
6332                }
6333                let col_pos = table
6334                    .schema()
6335                    .column_position(&stmt.column)
6336                    .ok_or_else(|| {
6337                        EngineError::Storage(StorageError::ColumnNotFound {
6338                            column: stmt.column.clone(),
6339                        })
6340                    })?;
6341                let col_ty = table.schema().columns[col_pos].ty;
6342                // v7.15.0 — `gin_trgm_ops` on a TEXT/VARCHAR
6343                // column dispatches to the real trigram-shingle
6344                // GIN build (LIKE / similarity acceleration).
6345                // Other GIN opclasses fall through to the regular
6346                // tsvector-vs-BTree split below.
6347                let is_trgm = stmt
6348                    .opclass
6349                    .as_deref()
6350                    .is_some_and(|op| op.eq_ignore_ascii_case("gin_trgm_ops"));
6351                if is_trgm
6352                    && matches!(
6353                        col_ty,
6354                        spg_storage::DataType::Text | spg_storage::DataType::Varchar(_)
6355                    )
6356                {
6357                    table
6358                        .add_gin_trgm_index(stmt.name.clone(), &stmt.column)
6359                        .map_err(EngineError::Storage)?;
6360                } else if col_ty == spg_storage::DataType::TsVector {
6361                    table
6362                        .add_gin_index(stmt.name.clone(), &stmt.column)
6363                        .map_err(EngineError::Storage)?;
6364                } else {
6365                    // v7.9.26b BTree fallback — the catalog still
6366                    // gets an index entry on the leading column so
6367                    // pg_dump scripts that name GIN on JSONB / etc.
6368                    // load clean; query-time gain stays opt-in for
6369                    // tsvector callers.
6370                    table.add_index(stmt.name.clone(), &stmt.column)?;
6371                }
6372            }
6373        }
6374        if !included_positions.is_empty()
6375            && let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name)
6376        {
6377            idx.included_columns = included_positions;
6378        }
6379        // v6.8.1 — persist partial-index predicate. Stored as the
6380        // expression's Display form so the catalog snapshot stays
6381        // pure (storage has no spg-sql dependency). The runtime
6382        // maintenance path treats partial indexes identically to
6383        // full indexes for v6.8.1 (over-maintenance is safe; the
6384        // planner-side "use partial when query WHERE implies the
6385        // predicate" pass is STABILITY carve-out).
6386        if let Some(pred_expr) = &stmt.partial_predicate {
6387            let canonical = pred_expr.to_string();
6388            // v7.13.2 — mailrs round-6 S2. PG's `pg_trgm` uses
6389            // `CREATE INDEX … USING gin(col gin_trgm_ops) WHERE …`
6390            // routinely to slim trigram indexes. SPG now persists
6391            // the predicate for GIN / BRIN / HNSW the same way it
6392            // already does for BTree — same v6.8.1 "over-maintain
6393            // is safe; planner-side partial routing is STABILITY
6394            // carve-out" semantics. HNSW carries an additional
6395            // caveat: the predicate isn't applied at index build
6396            // time (would require per-row eval inside the NSW
6397            // construction loop), so the index oversamples; query
6398            // time the WHERE clause still filters correctly.
6399            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
6400                idx.partial_predicate = Some(canonical);
6401            }
6402        }
6403        // v6.8.2 — persist expression index key. Same Display-form
6404        // storage; the runtime maintenance pass evaluates each
6405        // row's expression to derive the index key, but for v6.8.2
6406        // the engine falls through to the bare-column-reference
6407        // path and the expression is preserved for format-layer
6408        // round-trip + future planner work. Carved-out in
6409        // STABILITY § "Out of v6.8".
6410        if let Some(key_expr) = &stmt.expression {
6411            if matches!(
6412                stmt.method,
6413                IndexMethod::Hnsw | IndexMethod::Brin | IndexMethod::Gin
6414            ) {
6415                return Err(EngineError::Unsupported(
6416                    "Expression keys are not supported on HNSW or BRIN indexes".into(),
6417                ));
6418            }
6419            let canonical = key_expr.to_string();
6420            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
6421                idx.expression = Some(canonical);
6422            }
6423        }
6424        // v7.9.29 — persist `is_unique` flag on the storage Index.
6425        // Combined with `partial_predicate`, INSERT enforcement
6426        // checks that no other row whose predicate evaluates true
6427        // shares the same indexed key. Parser already rejected
6428        // `UNIQUE` on HNSW / BRIN, so plain BTree here.
6429        // For multi-column UNIQUE INDEX the extras matter (the
6430        // full tuple is the uniqueness key), so resolve them to
6431        // column positions and persist on the index too.
6432        if stmt.is_unique {
6433            let mut extra_positions: alloc::vec::Vec<usize> = alloc::vec::Vec::new();
6434            for col_name in &stmt.extra_columns {
6435                let pos = table
6436                    .schema()
6437                    .columns
6438                    .iter()
6439                    .position(|c| c.name.eq_ignore_ascii_case(col_name))
6440                    .ok_or_else(|| {
6441                        EngineError::Unsupported(alloc::format!(
6442                            "UNIQUE INDEX {:?}: extra column {col_name:?} not in table {:?}",
6443                            stmt.name,
6444                            stmt.table
6445                        ))
6446                    })?;
6447                extra_positions.push(pos);
6448            }
6449            if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
6450                idx.is_unique = true;
6451                idx.extra_column_positions = extra_positions;
6452            }
6453            // At index-creation time, check the existing rows for
6454            // pre-existing duplicates that would have violated the
6455            // new constraint — otherwise CREATE UNIQUE INDEX would
6456            // silently leave duplicates in place.
6457            let snapshot_indices = table.indices().to_vec();
6458            let snapshot_rows: alloc::vec::Vec<spg_storage::Row> =
6459                table.rows().iter().cloned().collect();
6460            let snapshot_schema = table.schema().clone();
6461            let idx_ref = snapshot_indices
6462                .iter()
6463                .find(|i| i.name == stmt.name)
6464                .expect("just-added index");
6465            check_existing_unique_violation(idx_ref, &snapshot_schema, &snapshot_rows)?;
6466        }
6467        // v6.3.1 — adding an index can change the optimal plan for
6468        // any cached query that references this table.
6469        self.plan_cache.evict_referencing(&table_name);
6470        Ok(QueryResult::CommandOk {
6471            affected: 0,
6472            modified_catalog: !self.in_transaction(),
6473        })
6474    }
6475
6476    /// v7.13.3 — mailrs round-7 S9. SPG-specific reconciliation
6477    /// for `CREATE TABLE IF NOT EXISTS` when the table already
6478    /// exists. Adds missing columns + inline FKs from the new
6479    /// definition; existing columns / constraints stay untouched.
6480    /// New columns with a `NOT NULL` declaration without a
6481    /// `DEFAULT` are reported as a clear error rather than
6482    /// silently dropped — this is the "fail loud on real
6483    /// incompatibility, fail silent on schema-superset" tradeoff.
6484    fn reconcile_table_if_not_exists(
6485        &mut self,
6486        stmt: CreateTableStatement,
6487    ) -> Result<QueryResult, EngineError> {
6488        let table_name = stmt.name.clone();
6489        let clock = self.clock;
6490        let existing_col_names: alloc::collections::BTreeSet<String> = self
6491            .active_catalog()
6492            .get(&table_name)
6493            .expect("checked above")
6494            .schema()
6495            .columns
6496            .iter()
6497            .map(|c| c.name.to_ascii_lowercase())
6498            .collect();
6499        let row_count = self
6500            .active_catalog()
6501            .get(&table_name)
6502            .expect("checked above")
6503            .row_count();
6504        // Collect missing column defs in source order.
6505        let new_columns: alloc::vec::Vec<spg_sql::ast::ColumnDef> = stmt
6506            .columns
6507            .iter()
6508            .filter(|c| !existing_col_names.contains(&c.name.to_ascii_lowercase()))
6509            .cloned()
6510            .collect();
6511        for col_def in new_columns {
6512            let col_name = col_def.name.clone();
6513            let nullable = col_def.nullable;
6514            let has_default = col_def.default.is_some() || col_def.auto_increment;
6515            let col_schema = column_def_to_schema(col_def)?;
6516            let fill_value: Value = if has_default || col_schema.runtime_default.is_some() {
6517                resolve_column_default_free(&col_schema, clock)?
6518            } else if nullable || row_count == 0 {
6519                Value::Null
6520            } else {
6521                return Err(EngineError::Unsupported(alloc::format!(
6522                    "CREATE TABLE IF NOT EXISTS {table_name:?}: reconciling \
6523                     column {col_name:?} requires DEFAULT (existing rows would violate NOT NULL)"
6524                )));
6525            };
6526            let table = self
6527                .active_catalog_mut()
6528                .get_mut(&table_name)
6529                .expect("checked above");
6530            table.add_column(col_schema, fill_value);
6531        }
6532        // Resolve any newly-added inline FKs (column-level
6533        // REFERENCES forms) and install. Skip FKs whose local
6534        // columns we didn't have in the existing table.
6535        let table_cols_now = self
6536            .active_catalog()
6537            .get(&table_name)
6538            .expect("checked above")
6539            .schema()
6540            .columns
6541            .clone();
6542        for fk in stmt.foreign_keys {
6543            // Only install FKs whose every local column resolves
6544            // — older catalogs may have a column the new FK
6545            // references but not the column the new FK declares.
6546            let all_resolved = fk.columns.iter().all(|c| {
6547                table_cols_now
6548                    .iter()
6549                    .any(|sc| sc.name.eq_ignore_ascii_case(c))
6550            });
6551            if !all_resolved {
6552                continue;
6553            }
6554            let already_present = {
6555                let table = self
6556                    .active_catalog()
6557                    .get(&table_name)
6558                    .expect("checked above");
6559                table.schema().foreign_keys.iter().any(|f| {
6560                    f.parent_table.eq_ignore_ascii_case(&fk.parent_table)
6561                        && f.local_columns.len() == fk.columns.len()
6562                })
6563            };
6564            if already_present {
6565                continue;
6566            }
6567            let storage_fk =
6568                resolve_foreign_key(&table_name, &table_cols_now, fk, self.active_catalog())?;
6569            let table = self
6570                .active_catalog_mut()
6571                .get_mut(&table_name)
6572                .expect("checked above");
6573            table.schema_mut().foreign_keys.push(storage_fk);
6574        }
6575        Ok(QueryResult::CommandOk {
6576            affected: 0,
6577            modified_catalog: !self.in_transaction(),
6578        })
6579    }
6580
6581    /// v7.14.0 — DROP TABLE handler (pg_dump / mysqldump preamble).
6582    fn exec_drop_table(
6583        &mut self,
6584        names: Vec<String>,
6585        if_exists: bool,
6586    ) -> Result<QueryResult, EngineError> {
6587        for name in names {
6588            let dropped = self.active_catalog_mut().drop_table(&name);
6589            if !dropped && !if_exists {
6590                return Err(EngineError::Storage(StorageError::TableNotFound { name }));
6591            }
6592        }
6593        Ok(QueryResult::CommandOk {
6594            affected: 0,
6595            modified_catalog: !self.in_transaction(),
6596        })
6597    }
6598
6599    /// v7.14.0 — DROP INDEX handler.
6600    fn exec_drop_index(
6601        &mut self,
6602        name: String,
6603        if_exists: bool,
6604    ) -> Result<QueryResult, EngineError> {
6605        let dropped = self.active_catalog_mut().drop_named_index(&name);
6606        if !dropped && !if_exists {
6607            return Err(EngineError::Storage(StorageError::IndexNotFound { name }));
6608        }
6609        Ok(QueryResult::CommandOk {
6610            affected: 0,
6611            modified_catalog: !self.in_transaction(),
6612        })
6613    }
6614
6615    fn exec_create_table(
6616        &mut self,
6617        stmt: CreateTableStatement,
6618    ) -> Result<QueryResult, EngineError> {
6619        if stmt.if_not_exists && self.active_catalog().get(&stmt.name).is_some() {
6620            // v7.16.2 — PG-strict silent no-op (mailrs round-10
6621            // surfaced this). v7.13.3's "reconcile by adding
6622            // missing columns" was friendly for mailrs round-7
6623            // where init-schema's `contacts` and migrate-023's
6624            // CardDAV `contacts` collided; but it ALSO silently
6625            // added columns to existing tables when later
6626            // migrations had a duplicate `CREATE TABLE IF NOT
6627            // EXISTS <t> (different-shape-cols)` shape. mailrs's
6628            // migrate-030 has exactly that — re-declares
6629            // system_config with `key` even though init-schema
6630            // already created it with `config_key`. PG's silent
6631            // no-op leaves system_config at `config_key`;
6632            // v7.13.3 added a phantom `key` column that then
6633            // tripped migrate-040's idempotent rename guard.
6634            // mailrs v1.7.106 ships the proper PG-style
6635            // contacts rename via DO + IF EXISTS, so SPG can
6636            // revert to PG-strict here without re-breaking the
6637            // round-7 case.
6638            return Ok(QueryResult::CommandOk {
6639                affected: 0,
6640                modified_catalog: false,
6641            });
6642        }
6643        let table_name = stmt.name.clone();
6644        // v7.9.13 — pluck the names of any columns marked
6645        // `PRIMARY KEY` inline so the post-create-table pass can
6646        // build an implicit BTree index. mailrs F1.
6647        let inline_pk_columns: Vec<String> = stmt
6648            .columns
6649            .iter()
6650            .filter(|c| c.is_primary_key)
6651            .map(|c| c.name.clone())
6652            .collect();
6653        // v7.9.19 — table-level constraints: PRIMARY KEY (a, b, ...)
6654        // and UNIQUE (a, b, ...). Each builds a BTree index on the
6655        // leading column (the existing single-column storage tier)
6656        // and registers a UniquenessConstraint on the schema for
6657        // INSERT-time enforcement of the full tuple. mailrs G1/G6.
6658        let cols = stmt
6659            .columns
6660            .into_iter()
6661            .map(column_def_to_schema)
6662            .collect::<Result<Vec<_>, _>>()?;
6663        // v7.17.0 Phase 1.4 + 1.5 — classify every raw
6664        // user_type_ref (parked as user_enum_type by
6665        // column_def_to_schema) into either an enum binding or a
6666        // domain binding. For domains, also rewrite the column's
6667        // base DataType from the placeholder Text to the domain's
6668        // declared base. Unknown idents are still a hard error
6669        // here (same as Phase 1.4) so silent acceptance never
6670        // happens.
6671        let mut cols = cols;
6672        for col in cols.iter_mut() {
6673            let Some(name) = col.user_enum_type.take() else {
6674                continue;
6675            };
6676            let cat = self.active_catalog();
6677            if cat.enum_types().contains_key(&name) {
6678                col.user_enum_type = Some(name);
6679                continue;
6680            }
6681            if let Some(dom) = cat.domain_types().get(&name) {
6682                col.ty = dom.base_type;
6683                col.user_domain_type = Some(name);
6684                if !dom.nullable {
6685                    col.nullable = false;
6686                }
6687                continue;
6688            }
6689            return Err(EngineError::Unsupported(alloc::format!(
6690                "column {:?}: unknown column type {:?} (not a built-in, ENUM, or DOMAIN)",
6691                col.name,
6692                name
6693            )));
6694        }
6695        for tc in &stmt.table_constraints {
6696            if let spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } = tc {
6697                for col_name in columns {
6698                    if let Some(col) = cols.iter_mut().find(|c| c.name == *col_name) {
6699                        col.nullable = false;
6700                    }
6701                }
6702            }
6703        }
6704        // v7.6.1 — resolve every FK in the statement against the
6705        // already-known catalog. Validates: parent table exists,
6706        // parent column names exist, arity matches, parent columns
6707        // have a PK / UNIQUE index. Self-referencing FKs (parent
6708        // table == this table) resolve against the column list we
6709        // just built — they don't need the catalog yet.
6710        let mut fks: Vec<spg_storage::ForeignKeyConstraint> =
6711            Vec::with_capacity(stmt.foreign_keys.len());
6712        for fk in stmt.foreign_keys {
6713            // v7.14.0 — when SET FOREIGN_KEY_CHECKS=0 is in effect
6714            // (mysqldump preamble + bulk imports), defer FK
6715            // resolution if the parent table isn't in the catalog
6716            // yet. The FK is queued and resolved when checks flip
6717            // back on. Self-references stay in-band (the parent is
6718            // the same as the child we're building).
6719            let needs_parent = !fk.parent_table.eq_ignore_ascii_case(&table_name);
6720            if !self.foreign_key_checks
6721                && needs_parent
6722                && self.active_catalog().get(&fk.parent_table).is_none()
6723            {
6724                self.pending_foreign_keys.push((table_name.clone(), fk));
6725                continue;
6726            }
6727            fks.push(resolve_foreign_key(
6728                &table_name,
6729                &cols,
6730                fk,
6731                self.active_catalog(),
6732            )?);
6733        }
6734        let mut schema = TableSchema::new(table_name.clone(), cols);
6735        schema.foreign_keys = fks;
6736        // v7.9.19 — translate AST table_constraints to storage
6737        // UniquenessConstraints (column name → position) so the
6738        // INSERT enforcement helper sees positions directly.
6739        let mut uc_storage: Vec<spg_storage::UniquenessConstraint> = Vec::new();
6740        let mut check_exprs: Vec<String> = Vec::new();
6741        for tc in &stmt.table_constraints {
6742            let (is_pk, names, nnd) = match tc {
6743                spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } => {
6744                    (true, columns.clone(), false)
6745                }
6746                spg_sql::ast::TableConstraint::Unique {
6747                    columns,
6748                    nulls_not_distinct,
6749                    ..
6750                } => (false, columns.clone(), *nulls_not_distinct),
6751                spg_sql::ast::TableConstraint::Check { expr, .. } => {
6752                    // v7.13.0 — collect CHECK predicate sources;
6753                    // they get attached to the schema below.
6754                    check_exprs.push(alloc::format!("{expr}"));
6755                    continue;
6756                }
6757                // v7.15.0 — plain `KEY (cols)` from MySQL inline
6758                // is NOT a uniqueness constraint; skip the UC
6759                // build path entirely. The BTree index lands in
6760                // the post-create loop below alongside the PK/UQ
6761                // implicit indexes.
6762                spg_sql::ast::TableConstraint::Index { .. } => continue,
6763                // v7.17.0 Phase 2.2 — MySQL FULLTEXT KEY is not
6764                // a uniqueness constraint either; its GIN gets
6765                // built in the post-create loop below.
6766                spg_sql::ast::TableConstraint::FulltextIndex { .. } => continue,
6767            };
6768            let mut positions = Vec::with_capacity(names.len());
6769            for n in &names {
6770                let pos = schema
6771                    .columns
6772                    .iter()
6773                    .position(|c| c.name == *n)
6774                    .ok_or_else(|| {
6775                        EngineError::Unsupported(alloc::format!(
6776                            "table constraint references unknown column {n:?}"
6777                        ))
6778                    })?;
6779                positions.push(pos);
6780            }
6781            uc_storage.push(spg_storage::UniquenessConstraint {
6782                is_primary_key: is_pk,
6783                columns: positions,
6784                nulls_not_distinct: nnd,
6785            });
6786        }
6787        // v7.24 (round-16 collateral) — inline `PRIMARY KEY` column
6788        // constraints used to build only the implicit BTree index;
6789        // uniqueness was NEVER registered, so duplicate keys were
6790        // silently accepted (table-level PRIMARY KEY did enforce).
6791        // Register the same UniquenessConstraint the table-level
6792        // form gets, unless one already covers the column set.
6793        if !inline_pk_columns.is_empty() {
6794            let mut positions = Vec::with_capacity(inline_pk_columns.len());
6795            for n in &inline_pk_columns {
6796                if let Some(pos) = schema.columns.iter().position(|c| c.name == *n) {
6797                    positions.push(pos);
6798                }
6799            }
6800            if !uc_storage
6801                .iter()
6802                .any(|uc| uc.is_primary_key || uc.columns == positions)
6803            {
6804                uc_storage.push(spg_storage::UniquenessConstraint {
6805                    is_primary_key: true,
6806                    columns: positions,
6807                    nulls_not_distinct: false,
6808                });
6809            }
6810        }
6811        schema.uniqueness_constraints = uc_storage.clone();
6812        schema.checks = check_exprs;
6813        self.active_catalog_mut().create_table(schema)?;
6814        // v7.9.13 — implicit BTree per inline PK column +
6815        // v7.9.19 — implicit BTree on the leading column of every
6816        // table-level PRIMARY KEY / UNIQUE constraint.
6817        let table = self
6818            .active_catalog_mut()
6819            .get_mut(&table_name)
6820            .expect("just created");
6821        for (i, col_name) in inline_pk_columns.iter().enumerate() {
6822            let idx_name = if inline_pk_columns.len() == 1 {
6823                alloc::format!("{table_name}_pkey")
6824            } else {
6825                alloc::format!("{table_name}_pkey_{i}")
6826            };
6827            if let Err(e) = table.add_index(idx_name, col_name) {
6828                return Err(EngineError::Storage(e));
6829            }
6830        }
6831        for (i, tc) in stmt.table_constraints.iter().enumerate() {
6832            // v7.17.0 Phase 2.2 — FULLTEXT KEY lands a real
6833            // tsvector-GIN per declared column instead of the
6834            // BTree the PK / UQ / KEY paths build. Branch early
6835            // so the BTree loop never sees the FULLTEXT shape.
6836            if let spg_sql::ast::TableConstraint::FulltextIndex { name, columns } = tc {
6837                for (k, col) in columns.iter().enumerate() {
6838                    let already = table.indices().iter().any(|idx| {
6839                        matches!(idx.kind, spg_storage::IndexKind::GinFulltext(_))
6840                            && table.schema().columns[idx.column_position].name == *col
6841                    });
6842                    if already {
6843                        continue;
6844                    }
6845                    let idx_name = match (name.as_ref(), columns.len(), k) {
6846                        (Some(n), 1, _) => n.clone(),
6847                        (Some(n), _, k) => alloc::format!("{n}_{k}"),
6848                        (None, _, _) => {
6849                            alloc::format!("{table_name}_{col}_ftidx")
6850                        }
6851                    };
6852                    if let Err(e) = table.add_gin_fulltext_index(idx_name, col) {
6853                        return Err(EngineError::Storage(e));
6854                    }
6855                }
6856                continue;
6857            }
6858            // v7.15.0 — plain KEY/INDEX rides this same loop so
6859            // the implicit BTree gets built. It carries its own
6860            // user-supplied name; PK/UQ still synthesise.
6861            let (suffix, names, explicit_name): (&str, &Vec<String>, Option<&String>) = match tc {
6862                spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } => {
6863                    ("pkey", columns, None)
6864                }
6865                spg_sql::ast::TableConstraint::Unique { columns, .. } => ("key", columns, None),
6866                spg_sql::ast::TableConstraint::Index { name, columns } => {
6867                    ("idx", columns, name.as_ref())
6868                }
6869                spg_sql::ast::TableConstraint::Check { .. } => continue,
6870                // Handled by the early-branch above.
6871                spg_sql::ast::TableConstraint::FulltextIndex { .. } => continue,
6872            };
6873            let leading = &names[0];
6874            // Skip if a same-column BTree already exists (e.g.
6875            // inline PK on the leading column).
6876            let already = table.indices().iter().any(|idx| {
6877                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
6878                    && table.schema().columns[idx.column_position].name == *leading
6879            });
6880            if already {
6881                continue;
6882            }
6883            let idx_name = if let Some(n) = explicit_name {
6884                n.clone()
6885            } else if names.len() == 1 {
6886                alloc::format!("{table_name}_{leading}_{suffix}")
6887            } else {
6888                alloc::format!("{table_name}_{leading}_{suffix}_{i}")
6889            };
6890            if let Err(e) = table.add_index(idx_name, leading) {
6891                return Err(EngineError::Storage(e));
6892            }
6893        }
6894        Ok(QueryResult::CommandOk {
6895            affected: 0,
6896            modified_catalog: !self.in_transaction(),
6897        })
6898    }
6899
6900    fn exec_insert(&mut self, mut stmt: InsertStatement) -> Result<QueryResult, EngineError> {
6901        // v7.17.0 Phase 1.1 — pre-resolve any nextval / currval /
6902        // setval calls against the catalog before the row loop. We
6903        // walk each tuple expression and replace matching
6904        // FunctionCall nodes with their concrete Literal. This
6905        // keeps `literal_expr_to_value` free of `&mut self` and
6906        // lets multi-row INSERT VALUES (… nextval('seq') …)
6907        // mint a separate sequence value per row.
6908        for tuple in &mut stmt.rows {
6909            for cell in tuple.iter_mut() {
6910                self.resolve_sequence_calls_in_expr(cell)?;
6911            }
6912        }
6913        // v7.13.0 — `INSERT INTO t [(cols)] SELECT …` (mailrs
6914        // round-5 G4). Execute the inner SELECT first, then route
6915        // back through the regular VALUES code path with the
6916        // materialised rows.
6917        if let Some(select) = stmt.select_source.clone() {
6918            let select_result = self.exec_select_cancel(&select, CancelToken::none())?;
6919            let rows = match select_result {
6920                QueryResult::Rows { rows, .. } => rows,
6921                other => {
6922                    return Err(EngineError::Unsupported(alloc::format!(
6923                        "INSERT … SELECT: inner statement produced {other:?} instead of a row set"
6924                    )));
6925                }
6926            };
6927            let mut materialised: Vec<Vec<Expr>> = Vec::with_capacity(rows.len());
6928            for row in rows {
6929                let mut tuple: Vec<Expr> = Vec::with_capacity(row.values.len());
6930                for v in row.values {
6931                    tuple.push(value_to_literal_expr_permissive(v)?);
6932                }
6933                materialised.push(tuple);
6934            }
6935            let recurse = InsertStatement {
6936                table: stmt.table,
6937                columns: stmt.columns,
6938                rows: materialised,
6939                select_source: None,
6940                on_conflict: stmt.on_conflict,
6941                returning: stmt.returning,
6942            };
6943            return self.exec_insert(recurse);
6944        }
6945        // v7.9.21 — snapshot the clock fn pointer before the mut
6946        // borrow on the catalog opens; runtime DEFAULT eval needs
6947        // it inside the row hot loop.
6948        let clock = self.clock;
6949        // v7.12.4 — snapshot row-level triggers + their referenced
6950        // functions before the mut borrow on the catalog opens.
6951        // Cloned out so the row hot loop can fire them without
6952        // re-borrowing the catalog (which would conflict with
6953        // table.insert's mutable borrow).
6954        let before_insert_triggers = self.snapshot_row_triggers(&stmt.table, "INSERT", "BEFORE");
6955        let after_insert_triggers = self.snapshot_row_triggers(&stmt.table, "INSERT", "AFTER");
6956        let trigger_session_cfg: Option<alloc::string::String> = self
6957            .session_params
6958            .get("default_text_search_config")
6959            .cloned();
6960        // v7.17.0 Phase 1.4 — snapshot the enum label lookup BEFORE
6961        // opening the mutable borrow on the table below. We need
6962        // catalog-level read access (enum_types lives at the
6963        // catalog level, not the table) and the upcoming mutable
6964        // borrow shadows it.
6965        let pre_borrow_column_meta: Vec<ColumnSchema> = {
6966            let preview_table = self.active_catalog().get(&stmt.table).ok_or_else(|| {
6967                EngineError::Storage(StorageError::TableNotFound {
6968                    name: stmt.table.clone(),
6969                })
6970            })?;
6971            preview_table.schema().columns.clone()
6972        };
6973        let enum_label_lookup: alloc::collections::BTreeMap<usize, Vec<String>> =
6974            pre_borrow_column_meta
6975                .iter()
6976                .enumerate()
6977                .filter_map(|(i, col)| {
6978                    // v7.17.0 Phase 3.P0-36 — MySQL inline ENUM
6979                    // variant lists take priority over the PG
6980                    // catalog enum_types lookup (they're
6981                    // column-local and authoritative when set).
6982                    if let Some(inline) = &col.inline_enum_variants {
6983                        return Some((i, inline.clone()));
6984                    }
6985                    col.user_enum_type.as_ref().and_then(|ename| {
6986                        self.active_catalog()
6987                            .enum_types()
6988                            .get(ename)
6989                            .map(|e| (i, e.labels.clone()))
6990                    })
6991                })
6992                .collect();
6993        // v7.17.0 Phase 3.P0-37 — MySQL inline SET variant lists.
6994        // Distinct from enum_label_lookup: SET validates that
6995        // every comma-separated token is in the variant list, and
6996        // canonicalises the cell to definition-order de-duped text.
6997        let set_variant_lookup: alloc::collections::BTreeMap<usize, Vec<String>> =
6998            pre_borrow_column_meta
6999                .iter()
7000                .enumerate()
7001                .filter_map(|(i, col)| col.inline_set_variants.as_ref().map(|vs| (i, vs.clone())))
7002                .collect();
7003        // v7.29 (round-23a) - when the column's implicit sequence
7004        // exists (born on first nextval/setval address), a setval
7005        // above the table MAX moves the next auto-assigned id:
7006        // assign from max(table_max + 1, last_value + 1). Tables
7007        // whose sequence was never addressed keep the bare max+1
7008        // path (identical pre-7.29 behaviour, no lookup cost
7009        // beyond one map probe per auto column per statement).
7010        let mut seq_floors: alloc::collections::BTreeMap<usize, i64> =
7011            alloc::collections::BTreeMap::new();
7012        for (i, col) in pre_borrow_column_meta.iter().enumerate() {
7013            if col.auto_increment
7014                && let Some(sd) = self.active_catalog().sequences().get(&alloc::format!(
7015                    "{}_{}_seq",
7016                    stmt.table,
7017                    col.name
7018                ))
7019            {
7020                // is_called=false (fresh RESTART / setval(_, false))
7021                // means the NEXT value is last_value itself.
7022                let floor = if sd.is_called {
7023                    sd.last_value + 1
7024                } else {
7025                    sd.last_value
7026                };
7027                seq_floors.insert(i, floor);
7028            }
7029        }
7030        let table = self
7031            .active_catalog_mut()
7032            .get_mut(&stmt.table)
7033            .ok_or_else(|| {
7034                EngineError::Storage(StorageError::TableNotFound {
7035                    name: stmt.table.clone(),
7036                })
7037            })?;
7038        // v3.1.5: clone the columns vector only (not the whole
7039        // TableSchema — saves one String alloc for the table name).
7040        // We need an owned snapshot because we'll call `table.insert`
7041        // (mutable borrow on `table`) inside the row loop while
7042        // reading schema fields.
7043        let column_meta: Vec<ColumnSchema> = table.schema().columns.clone();
7044        let schema_cols_len = column_meta.len();
7045        // Build a permutation `tuple_pos[c] = Some(j)` meaning schema
7046        // column `c` is filled from the `j`-th tuple slot; `None` means
7047        // "fill with NULL". Validated once and reused for every row.
7048        let tuple_pos: Option<Vec<Option<usize>>> = match &stmt.columns {
7049            None => None, // 1-1 mapping, fast path
7050            Some(cols) => {
7051                let mut map = alloc::vec![None; schema_cols_len];
7052                for (j, name) in cols.iter().enumerate() {
7053                    let idx = column_meta
7054                        .iter()
7055                        .position(|c| c.name == *name)
7056                        .ok_or_else(|| {
7057                            EngineError::Eval(EvalError::ColumnNotFound { name: name.clone() })
7058                        })?;
7059                    if map[idx].is_some() {
7060                        return Err(EngineError::Storage(StorageError::ArityMismatch {
7061                            expected: schema_cols_len,
7062                            actual: cols.len(),
7063                        }));
7064                    }
7065                    map[idx] = Some(j);
7066                }
7067                // Omitted columns must either be nullable, carry a
7068                // DEFAULT, or be AUTO_INCREMENT. Catch NOT NULL
7069                // omissions up front so the WAL stays clean.
7070                for (i, col) in column_meta.iter().enumerate() {
7071                    if map[i].is_none()
7072                        && !col.nullable
7073                        && col.default.is_none()
7074                        && col.runtime_default.is_none()
7075                        && !col.auto_increment
7076                    {
7077                        return Err(EngineError::Storage(StorageError::NullInNotNull {
7078                            column: col.name.clone(),
7079                        }));
7080                    }
7081                }
7082                Some(map)
7083            }
7084        };
7085        let expected_tuple_len = stmt.columns.as_ref().map_or(schema_cols_len, Vec::len);
7086        // v7.6.2 — snapshot this table's FK list before the
7087        // mutable-borrow window so we can run parent lookups
7088        // against the immutable catalog after parsing. Empty vec is
7089        // the no-FK fast path; clone cost is O(fks * arity) which
7090        // is < 100 ns for typical schemas.
7091        let fks = table.schema().foreign_keys.clone();
7092        let mut affected = 0usize;
7093        // Stage 1 — parse + AUTO_INC + coerce all rows under the
7094        // single mutable borrow.
7095        let mut all_values: Vec<Vec<Value>> = Vec::with_capacity(stmt.rows.len());
7096        // v7.24 (round-16 collateral) — statement-scoped serial
7097        // cursors. next_auto_value() is a max+1 scan over COMMITTED
7098        // rows; multi-row `INSERT … VALUES (…),(…)` computed it per
7099        // tuple BEFORE any insertion, so every row drew the SAME id
7100        // (then sailed through, compounding with the inline-PK
7101        // enforcement gap). First use per column seeds from the
7102        // table; subsequent rows increment.
7103        let mut auto_cursors: alloc::collections::BTreeMap<usize, i64> =
7104            alloc::collections::BTreeMap::new();
7105        for tuple in stmt.rows {
7106            if tuple.len() != expected_tuple_len {
7107                return Err(EngineError::Storage(StorageError::ArityMismatch {
7108                    expected: expected_tuple_len,
7109                    actual: tuple.len(),
7110                }));
7111            }
7112            // Fast path: no column-list permutation → tuple slot j
7113            // maps to schema column j. We can zip schema with tuple
7114            // and skip the `raw_tuple` staging allocation entirely.
7115            let values: Vec<Value> = if let Some(map) = &tuple_pos {
7116                // Permuted path: still need raw_tuple to index by `map[i]`.
7117                let raw_tuple: Vec<Value> = tuple
7118                    .into_iter()
7119                    .map(literal_expr_to_value)
7120                    .collect::<Result<_, _>>()?;
7121                let mut out = Vec::with_capacity(schema_cols_len);
7122                for (i, col) in column_meta.iter().enumerate() {
7123                    let mut raw = match map[i] {
7124                        Some(j) => raw_tuple[j].clone(),
7125                        None => resolve_column_default_free(col, clock)?,
7126                    };
7127                    if col.auto_increment && raw.is_null() {
7128                        let next = match auto_cursors.get(&i) {
7129                            Some(n) => *n,
7130                            None => {
7131                                let base = table.next_auto_value(i).ok_or_else(|| {
7132                                    EngineError::Unsupported(alloc::format!(
7133                                        "AUTO_INCREMENT applies to integer columns only (column `{}`)",
7134                                        col.name
7135                                    ))
7136                                })?;
7137                                base.max(seq_floors.get(&i).copied().unwrap_or(i64::MIN))
7138                            }
7139                        };
7140                        auto_cursors.insert(i, next + 1);
7141                        raw = Value::BigInt(next);
7142                    }
7143                    let coerced = coerce_value(raw, col.ty, &col.name, i)?;
7144                    enforce_enum_label(&enum_label_lookup, i, &col.name, &coerced)?;
7145                    let coerced =
7146                        canonicalize_set_value(&set_variant_lookup, i, &col.name, coerced)?;
7147                    check_unsigned_range(&coerced, col, i)?;
7148                    out.push(coerced);
7149                }
7150                out
7151            } else {
7152                // 1-1 mapping fast path: single Vec alloc, no raw_tuple.
7153                let mut out = Vec::with_capacity(schema_cols_len);
7154                for (i, (col, expr)) in column_meta.iter().zip(tuple).enumerate() {
7155                    let mut raw = literal_expr_to_value(expr)?;
7156                    if col.auto_increment && raw.is_null() {
7157                        let next = match auto_cursors.get(&i) {
7158                            Some(n) => *n,
7159                            None => {
7160                                let base = table.next_auto_value(i).ok_or_else(|| {
7161                                    EngineError::Unsupported(alloc::format!(
7162                                        "AUTO_INCREMENT applies to integer columns only (column `{}`)",
7163                                        col.name
7164                                    ))
7165                                })?;
7166                                base.max(seq_floors.get(&i).copied().unwrap_or(i64::MIN))
7167                            }
7168                        };
7169                        auto_cursors.insert(i, next + 1);
7170                        raw = Value::BigInt(next);
7171                    }
7172                    let coerced = coerce_value(raw, col.ty, &col.name, i)?;
7173                    enforce_enum_label(&enum_label_lookup, i, &col.name, &coerced)?;
7174                    let coerced =
7175                        canonicalize_set_value(&set_variant_lookup, i, &col.name, coerced)?;
7176                    check_unsigned_range(&coerced, col, i)?;
7177                    out.push(coerced);
7178                }
7179                out
7180            };
7181            all_values.push(values);
7182        }
7183        // Stage 2 — FK enforcement on the immutable catalog.
7184        // Non-lexical lifetimes release the mutable borrow on
7185        // `table` here since stage 1 was the last use. The
7186        // parent-table lookup runs before any row is committed.
7187        let uniqueness = table.schema().uniqueness_constraints.clone();
7188        let _ = table;
7189        if !fks.is_empty() {
7190            enforce_fk_inserts(self.active_catalog(), &stmt.table, &fks, &all_values)?;
7191        }
7192        // v7.13.0 — CHECK constraint enforcement (mailrs round-5 G3).
7193        enforce_check_constraints(self.active_catalog(), &stmt.table, &all_values)?;
7194        // NOTE (mailrs embed round-12): UNIQUE / PRIMARY KEY and
7195        // UNIQUE INDEX enforcement moved BELOW the ON CONFLICT
7196        // resolution pass. Running them first made every
7197        // `ON CONFLICT … DO UPDATE` upsert fail with a uniqueness
7198        // violation before the conflict handler could route the row
7199        // to an UPDATE — PG resolves the conflict action first and
7200        // only errors on rows no arbiter matched.
7201        // v7.9.8 / v7.9.9 — ON CONFLICT handling.
7202        //   - `DO NOTHING` filters `all_values` to non-conflicting
7203        //     rows + drops within-batch duplicates.
7204        //   - `DO UPDATE SET …` ALSO filters, but for each
7205        //     conflicting row it queues an UPDATE on the existing
7206        //     row using the incoming row's values as `EXCLUDED.*`.
7207        let mut pending_updates: Vec<(usize, Vec<Value>)> = Vec::new();
7208        let mut skipped_count = 0usize;
7209        if let Some(clause) = &stmt.on_conflict {
7210            let (conflict_cols, conflict_nnd) = resolve_on_conflict_columns(
7211                self.active_catalog(),
7212                &stmt.table,
7213                clause.target_columns.as_slice(),
7214            )?;
7215            let mut kept: Vec<Vec<Value>> = Vec::with_capacity(all_values.len());
7216            let mut seen_keys: Vec<Vec<Value>> = Vec::new();
7217            for values in all_values {
7218                let key_tuple: Vec<&Value> = conflict_cols.iter().map(|&c| &values[c]).collect();
7219                // SQL spec: NULL in any conflict column means "no
7220                // conflict possible" (NULL ≠ NULL for uniqueness) —
7221                // UNLESS the constraint says NULLS NOT DISTINCT
7222                // (v7.29; mailrs migrate-013 replays its seed row
7223                // ('super', NULL) under exactly that declaration).
7224                let has_null_key =
7225                    !conflict_nnd && key_tuple.iter().any(|v| matches!(v, Value::Null));
7226                let collides_with_table = !has_null_key
7227                    && on_conflict_keys_exist(
7228                        self.active_catalog(),
7229                        &stmt.table,
7230                        &conflict_cols,
7231                        &key_tuple,
7232                    );
7233                let key_tuple_owned: Vec<Value> = key_tuple.iter().map(|v| (*v).clone()).collect();
7234                let collides_with_batch =
7235                    !has_null_key && seen_keys.iter().any(|k| k == &key_tuple_owned);
7236                let collides = collides_with_table || collides_with_batch;
7237                match (&clause.action, collides) {
7238                    (_, false) => {
7239                        seen_keys.push(key_tuple_owned);
7240                        kept.push(values);
7241                    }
7242                    (spg_sql::ast::OnConflictAction::Nothing, true) => {
7243                        skipped_count += 1;
7244                    }
7245                    (
7246                        spg_sql::ast::OnConflictAction::Update {
7247                            assignments,
7248                            where_,
7249                        },
7250                        true,
7251                    ) => {
7252                        if !collides_with_table {
7253                            skipped_count += 1;
7254                            continue;
7255                        }
7256                        let target_pos = lookup_row_position_by_keys(
7257                            self.active_catalog(),
7258                            &stmt.table,
7259                            &conflict_cols,
7260                            &key_tuple,
7261                        )
7262                        .ok_or_else(|| {
7263                            EngineError::Unsupported(
7264                                "ON CONFLICT DO UPDATE: conflict detected but row \
7265                                 position could not be resolved (cold-tier row?)"
7266                                    .into(),
7267                            )
7268                        })?;
7269                        let updated = apply_on_conflict_assignments(
7270                            self.active_catalog(),
7271                            &stmt.table,
7272                            target_pos,
7273                            &values,
7274                            assignments,
7275                            where_.as_ref(),
7276                        )?;
7277                        if let Some(new_row) = updated {
7278                            pending_updates.push((target_pos, new_row));
7279                        } else {
7280                            skipped_count += 1;
7281                        }
7282                    }
7283                }
7284            }
7285            all_values = kept;
7286        }
7287        // v7.9.19 — composite UNIQUE / PRIMARY KEY enforcement.
7288        // v7.9.29 — CREATE UNIQUE INDEX [WHERE pred] enforcement.
7289        // Both run on the post-ON-CONFLICT row set: conflicting rows
7290        // already left `all_values` (DO NOTHING drop / DO UPDATE
7291        // reroute), so what remains must be genuinely unique.
7292        enforce_uniqueness_inserts(self.active_catalog(), &stmt.table, &uniqueness, &all_values)?;
7293        enforce_unique_index_inserts(self.active_catalog(), &stmt.table, &all_values)?;
7294        // Stage 3 — insert all rows under a fresh mutable borrow.
7295        let table = self
7296            .active_catalog_mut()
7297            .get_mut(&stmt.table)
7298            .ok_or_else(|| {
7299                EngineError::Storage(StorageError::TableNotFound {
7300                    name: stmt.table.clone(),
7301                })
7302            })?;
7303        // v7.9.4 — keep RETURNING projection rows separate per
7304        // INSERT and per UPDATE branch so DO UPDATE pushes the new
7305        // post-update state, not the incoming-only values.
7306        let mut returning_rows: Vec<Vec<Value>> = Vec::new();
7307        // v7.12.7 — collect embedded SQL emitted by any trigger
7308        // fire across the row loop; engine drains the queue after
7309        // the table mut borrow drops.
7310        let mut deferred_embedded: Vec<triggers::DeferredEmbeddedStmt> = Vec::new();
7311        'rowloop: for values in all_values {
7312            let mut row = Row::new(values);
7313            // v7.12.4 — BEFORE INSERT row-level triggers. Each
7314            // trigger may rewrite NEW cells (e.g. populate
7315            // `search_vector := to_tsvector(...)`) and may return
7316            // NULL to skip the row entirely.
7317            for fd in &before_insert_triggers {
7318                let (outcome, deferred) = triggers::fire_row_trigger(
7319                    fd,
7320                    Some(row.clone()),
7321                    None,
7322                    &stmt.table,
7323                    &column_meta,
7324                    &[],
7325                    trigger_session_cfg.as_deref(),
7326                    false,
7327                )
7328                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
7329                deferred_embedded.extend(deferred);
7330                match outcome {
7331                    triggers::TriggerOutcome::Row(r) => row = r,
7332                    triggers::TriggerOutcome::Skip => continue 'rowloop,
7333                }
7334            }
7335            if stmt.returning.is_some() {
7336                returning_rows.push(row.values.clone());
7337            }
7338            // v7.12.4 — clone for the AFTER trigger view; insert
7339            // moves the row into the table.
7340            let inserted = row.clone();
7341            table.insert(row)?;
7342            affected += 1;
7343            // v7.12.4 — AFTER INSERT row-level triggers fire post-
7344            // write. Return value is ignored (PG semantics); we
7345            // surface any error from the body up to the caller.
7346            for fd in &after_insert_triggers {
7347                let (_outcome, deferred) = triggers::fire_row_trigger(
7348                    fd,
7349                    Some(inserted.clone()),
7350                    None,
7351                    &stmt.table,
7352                    &column_meta,
7353                    &[],
7354                    trigger_session_cfg.as_deref(),
7355                    true,
7356                )
7357                .map_err(|e| EngineError::Storage(StorageError::Corrupt(alloc::format!("{e}"))))?;
7358                deferred_embedded.extend(deferred);
7359            }
7360        }
7361        // v7.9.9 — apply ON CONFLICT DO UPDATE rewrites collected
7362        // in the conflict-resolution pass. update_row handles
7363        // index maintenance + body re-encoding.
7364        for (pos, new_row) in pending_updates {
7365            if stmt.returning.is_some() {
7366                returning_rows.push(new_row.clone());
7367            }
7368            table.update_row(pos, new_row)?;
7369            affected += 1;
7370        }
7371        let _ = skipped_count;
7372        // v7.12.7 — drop the table mut borrow and drain any
7373        // trigger-emitted embedded SQL queued during this INSERT.
7374        // The borrow has to release first because each deferred
7375        // stmt may UPDATE / INSERT / DELETE the same (or another)
7376        // table — including, in principle, this one.
7377        let _ = table;
7378        self.execute_deferred_trigger_stmts(deferred_embedded, CancelToken::none())?;
7379        // v7.9.4/v7.9.9 — RETURNING streams the rows that ended
7380        // up in the table after this statement (insert or
7381        // post-update on conflict).
7382        if let Some(items) = &stmt.returning {
7383            return self.build_returning_rows(&stmt.table, items, returning_rows);
7384        }
7385        // v6.2.1 — auto-analyze: track per-table modified-row
7386        // counter so the background sweep can decide when to
7387        // re-ANALYZE. Cheap path on the autocommit-wrap hot loop
7388        // — one BTreeMap entry update per INSERT batch.
7389        if !self.in_transaction() && affected > 0 {
7390            self.statistics
7391                .record_modifications(&stmt.table, affected as u64);
7392        }
7393        Ok(QueryResult::CommandOk {
7394            affected,
7395            modified_catalog: !self.in_transaction(),
7396        })
7397    }
7398
7399    /// v4.5: SELECT with cooperative cancellation. The token is
7400    /// honoured between UNION peers and inside the bare-SELECT row
7401    /// loop; HNSW kNN graph walks and the aggregate executor don't
7402    /// honour it yet (deferred — those paths bound their work
7403    /// internally by `LIMIT k` and `GROUP BY` cardinality).
7404    /// v6.10.2 — cold-tier time-travel scan. Resolves the segment
7405    /// by id, decodes each row body against the table's current
7406    /// schema, applies the SELECT's projection + optional WHERE +
7407    /// optional LIMIT, returns a `Rows` result. JOINs / aggregates
7408    /// / ORDER BY are unsupported on this path (STABILITY carve-
7409    /// out); operators wanting them should restore the segment
7410    /// into a regular table first.
7411    fn exec_select_as_of_segment(
7412        &self,
7413        stmt: &SelectStatement,
7414        from: &spg_sql::ast::FromClause,
7415        segment_id: u32,
7416    ) -> Result<QueryResult, EngineError> {
7417        // v6.10.2 scope: no joins, no aggregates, no ORDER BY,
7418        // no GROUP BY / HAVING / UNION / OFFSET / DISTINCT.
7419        if !from.joins.is_empty()
7420            || stmt.group_by.is_some()
7421            || stmt.having.is_some()
7422            || !stmt.unions.is_empty()
7423            || !stmt.order_by.is_empty()
7424            || stmt.offset.is_some()
7425            || stmt.distinct
7426            || aggregate::uses_aggregate(stmt)
7427        {
7428            return Err(EngineError::Unsupported(
7429                "AS OF SEGMENT supports SELECT projection + WHERE + LIMIT only \
7430                 (joins / aggregates / ORDER BY are STABILITY § \"Out of v6.10\")"
7431                    .into(),
7432            ));
7433        }
7434        let table = self
7435            .active_catalog()
7436            .get(&from.primary.name)
7437            .ok_or_else(|| StorageError::TableNotFound {
7438                name: from.primary.name.clone(),
7439            })?;
7440        let schema = table.schema().clone();
7441        let schema_cols = &schema.columns;
7442        let alias = from
7443            .primary
7444            .alias
7445            .as_deref()
7446            .unwrap_or(from.primary.name.as_str());
7447        let ctx = EvalContext::new(schema_cols, Some(alias));
7448        let seg = self
7449            .active_catalog()
7450            .cold_segment(segment_id)
7451            .ok_or_else(|| {
7452                EngineError::Unsupported(alloc::format!(
7453                    "AS OF SEGMENT: cold segment {segment_id} not registered"
7454                ))
7455            })?;
7456        let mut out_rows: Vec<Row> = Vec::new();
7457        let mut limit_remaining: Option<usize> =
7458            stmt.limit_literal().and_then(|n| usize::try_from(n).ok());
7459        for (_key, body) in seg.scan() {
7460            let (row, _consumed) =
7461                spg_storage::decode_row_body_dense(&body, &schema, seg.codec_version())
7462                    .map_err(EngineError::Storage)?;
7463            if let Some(where_expr) = &stmt.where_ {
7464                let cond = self.eval_expr_simple(where_expr, &row, &ctx)?;
7465                if !matches!(cond, Value::Bool(true)) {
7466                    continue;
7467                }
7468            }
7469            // Projection.
7470            let projected = self.project_row_simple(&row, &stmt.items, schema_cols, alias)?;
7471            out_rows.push(projected);
7472            if let Some(rem) = limit_remaining.as_mut() {
7473                if *rem == 0 {
7474                    out_rows.pop();
7475                    break;
7476                }
7477                *rem -= 1;
7478            }
7479        }
7480        // Output column schema: derive from SELECT items.
7481        let columns = self.derive_output_columns(&stmt.items, schema_cols, alias);
7482        Ok(QueryResult::Rows {
7483            columns,
7484            rows: out_rows,
7485        })
7486    }
7487
7488    /// v6.10.2 — simple-path WHERE eval that doesn't go through
7489    /// the correlated-subquery / Memoize machinery. AS OF SEGMENT
7490    /// scan paths predicate against a snapshot frozen segment, no
7491    /// cross-row state.
7492    fn eval_expr_simple(
7493        &self,
7494        expr: &Expr,
7495        row: &Row,
7496        ctx: &EvalContext,
7497    ) -> Result<Value, EngineError> {
7498        let cancel = CancelToken::none();
7499        self.eval_expr_with_correlated(expr, row, ctx, cancel, None)
7500    }
7501
7502    /// v7.9.4 — INSERT / UPDATE / DELETE RETURNING projector.
7503    /// Given the table name, the user-supplied projection items,
7504    /// and the mutated rows (post-insert / post-update values, or
7505    /// pre-delete snapshot), build a `QueryResult::Rows` whose
7506    /// schema describes the projected columns. Mailrs migration
7507    /// blocker #1.
7508    fn build_returning_rows(
7509        &self,
7510        table_name: &str,
7511        items: &[SelectItem],
7512        mutated_rows: Vec<Vec<Value>>,
7513    ) -> Result<QueryResult, EngineError> {
7514        let table = self.active_catalog().get(table_name).ok_or_else(|| {
7515            EngineError::Storage(StorageError::TableNotFound {
7516                name: table_name.into(),
7517            })
7518        })?;
7519        let schema_cols = table.schema().columns.clone();
7520        let columns = self.derive_output_columns(items, &schema_cols, table_name);
7521        let mut out_rows: Vec<Row> = Vec::with_capacity(mutated_rows.len());
7522        for values in mutated_rows {
7523            let row = Row::new(values);
7524            let projected = self.project_row_simple(&row, items, &schema_cols, table_name)?;
7525            out_rows.push(projected);
7526        }
7527        Ok(QueryResult::Rows {
7528            columns,
7529            rows: out_rows,
7530        })
7531    }
7532
7533    /// v6.10.2 — projection for AS OF SEGMENT. Resolves
7534    /// `SelectItem::Wildcard` to all schema columns and
7535    /// `SelectItem::Expr` via the regular eval path.
7536    fn project_row_simple(
7537        &self,
7538        row: &Row,
7539        items: &[SelectItem],
7540        schema_cols: &[ColumnSchema],
7541        alias: &str,
7542    ) -> Result<Row, EngineError> {
7543        let ctx = EvalContext::new(schema_cols, Some(alias));
7544        let cancel = CancelToken::none();
7545        let mut out_vals = Vec::new();
7546        for item in items {
7547            match item {
7548                SelectItem::Wildcard => {
7549                    out_vals.extend(row.values.iter().cloned());
7550                }
7551                SelectItem::Expr { expr, .. } => {
7552                    let v = self.eval_expr_with_correlated(expr, row, &ctx, cancel, None)?;
7553                    out_vals.push(v);
7554                }
7555            }
7556        }
7557        Ok(Row::new(out_vals))
7558    }
7559
7560    /// v6.10.2 — derive the output `ColumnSchema` list for an
7561    /// AS OF SEGMENT projection. Wildcards take the full schema;
7562    /// expressions take the alias if present or a synthetic
7563    /// `?column?` (PG convention) otherwise.
7564    fn derive_output_columns(
7565        &self,
7566        items: &[SelectItem],
7567        schema_cols: &[ColumnSchema],
7568        table_alias: &str,
7569    ) -> Vec<ColumnSchema> {
7570        let mut out = Vec::new();
7571        for item in items {
7572            match item {
7573                SelectItem::Wildcard => {
7574                    out.extend(schema_cols.iter().cloned());
7575                }
7576                SelectItem::Expr { expr, alias } => {
7577                    // Bare column references inherit the schema
7578                    // column's name + type — PG names `RETURNING id`
7579                    // "id" and types it BIGINT, and the sqlx embed
7580                    // path type-checks RowDescription against the
7581                    // Rust target (mailrs embed round-12).
7582                    if let Expr::Column(col) = expr
7583                        && let Some(sc) = schema_cols.iter().find(|c| c.name == col.name)
7584                    {
7585                        let name = alias.clone().unwrap_or_else(|| sc.name.clone());
7586                        out.push(ColumnSchema::new(name, sc.ty, sc.nullable));
7587                        continue;
7588                    }
7589                    let name = alias.clone().unwrap_or_else(|| "?column?".to_string());
7590                    // v7.30.4 (mailrs round-27, P0) — type the
7591                    // expression with the same inference the SELECT
7592                    // list uses (INT−INT=INT, BIGINT+INT=BIGINT…).
7593                    // The old Text default broke every typed decode
7594                    // of `RETURNING uidnext - 1 AS uid`: four days
7595                    // of inbound mail indexed nowhere. Inference
7596                    // failure keeps the old Text fallback rather
7597                    // than inventing new error paths here.
7598                    let (ty, nullable) =
7599                        build_projection(core::slice::from_ref(item), schema_cols, table_alias)
7600                            .ok()
7601                            .and_then(|p| p.into_iter().next())
7602                            .map_or((DataType::Text, true), |p| (p.ty, p.nullable));
7603                    out.push(ColumnSchema::new(name, ty, nullable));
7604                }
7605            }
7606        }
7607        out
7608    }
7609
7610    fn exec_select_cancel(
7611        &self,
7612        stmt: &SelectStatement,
7613        cancel: CancelToken<'_>,
7614    ) -> Result<QueryResult, EngineError> {
7615        cancel.check()?;
7616        // v7.17.0 Phase 1.2 — user-defined VIEW expansion. If the
7617        // FROM / JOIN graph references any catalogued view name,
7618        // re-parse the view body and prepend it as a synthetic
7619        // CTE. Recurses on views-in-views via the regular CTE
7620        // dispatch below. Fast-path: skip the walker entirely when
7621        // the catalog has no views (the typical OLTP load).
7622        if !self.active_catalog().views().is_empty() {
7623            if let Some(rewritten) = self.expand_views_in_select(stmt)? {
7624                return self.exec_select_cancel(&rewritten, cancel);
7625            }
7626        }
7627        // v7.16.2 — information_schema / pg_catalog virtual
7628        // views (mailrs round-10 A.3). If the SELECT touches a
7629        // synthetic meta-table name (`__spg_info_*` /
7630        // `__spg_pg_*` — produced by the parser for
7631        // `information_schema.X` / `pg_catalog.X`), clone the
7632        // catalog, materialise the requested view as a real
7633        // temporary table, and re-execute against an enriched
7634        // engine. Same pattern as `exec_with_ctes` for CTEs.
7635        if !self.meta_views_materialised && select_references_meta_view(stmt) {
7636            return self.exec_select_with_meta_views(stmt, cancel);
7637        }
7638        // v6.10.2 — cold-tier time-travel short-circuit. When the
7639        // primary TableRef carries `AS OF SEGMENT '<id>'`, run a
7640        // dedicated cold-segment scan instead of the regular
7641        // hot+index path. The scope is intentionally narrow for
7642        // v6.10.2 — bare `SELECT * FROM <t> AS OF SEGMENT 'id'`,
7643        // optionally with a single-column-equality WHERE. JOINs /
7644        // aggregates / ORDER BY / subqueries on top of a time-
7645        // travelled scan are STABILITY § "Out of v6.10".
7646        if let Some(from) = &stmt.from
7647            && let Some(seg_id) = from.primary.as_of_segment
7648        {
7649            return self.exec_select_as_of_segment(stmt, from, seg_id);
7650        }
7651        // v6.2.0 / v6.5.0 — virtual-table short-circuits. Detected
7652        // pre-CTE because they don't read from the catalog and
7653        // shouldn't participate in regular FROM resolution.
7654        if let Some(from) = &stmt.from
7655            && from.joins.is_empty()
7656            && stmt.where_.is_none()
7657            && stmt.group_by.is_none()
7658            && stmt.having.is_none()
7659            && stmt.unions.is_empty()
7660            && stmt.order_by.is_empty()
7661            && stmt.limit.is_none()
7662            && stmt.offset.is_none()
7663            && !stmt.distinct
7664            && stmt.items.iter().all(|i| matches!(i, SelectItem::Wildcard))
7665        {
7666            let lower = from.primary.name.to_ascii_lowercase();
7667            match lower.as_str() {
7668                "spg_statistic" => return Ok(self.exec_spg_statistic()),
7669                // v6.5.0 — observability v2 virtual tables.
7670                "spg_stat_replication" => return Ok(self.exec_spg_stat_replication()),
7671                "spg_stat_segment" => return Ok(self.exec_spg_stat_segment()),
7672                // v7.31 — memory-campaign bucket meters.
7673                "spg_memory_stats" => return Ok(self.exec_spg_memory_stats()),
7674                "spg_stat_query" => return Ok(self.exec_spg_stat_query()),
7675                "spg_stat_activity" => return Ok(self.exec_spg_stat_activity()),
7676                "spg_audit_chain" => return Ok(self.exec_spg_audit_chain()),
7677                "spg_audit_verify" => return Ok(self.exec_spg_audit_verify()),
7678                "spg_table_ddl" => return Ok(self.exec_spg_table_ddl()),
7679                "spg_role_ddl" => return Ok(self.exec_spg_role_ddl()),
7680                "spg_database_ddl" => return Ok(self.exec_spg_database_ddl()),
7681                _ => {}
7682            }
7683        }
7684        // v4.11: CTEs materialise into a temporary enriched catalog
7685        // *before* anything else — the body SELECT can then refer
7686        // to CTE names via the regular FROM-clause resolution.
7687        // Uncorrelated only: each CTE body runs once against the
7688        // current catalog, not against later CTEs' results (left-
7689        // to-right materialisation would relax this, but we keep
7690        // it simple for v4.11 MVP).
7691        if !stmt.ctes.is_empty() {
7692            return self.exec_with_ctes(stmt, cancel);
7693        }
7694        // v4.10: subqueries (uncorrelated) are resolved here, before
7695        // the executor sees the row loop. We clone the statement so
7696        // we can mutate without disturbing the caller's AST — most
7697        // queries pass through with no subquery nodes and the clone
7698        // is cheap; with subqueries the materialisation cost
7699        // dominates anyway.
7700        let mut stmt_owned;
7701        let stmt_ref: &SelectStatement = if expr_tree_has_subquery(stmt) {
7702            stmt_owned = stmt.clone();
7703            self.resolve_select_subqueries(&mut stmt_owned, cancel)?;
7704            &stmt_owned
7705        } else {
7706            stmt
7707        };
7708        if stmt_ref.unions.is_empty() {
7709            return self.exec_bare_select_cancel(stmt_ref, cancel);
7710        }
7711        // UNION path: clone-strip the head into a bare block (its own
7712        // DISTINCT and any inner ORDER BY are dropped by parser rule —
7713        // the wrapper SelectStatement carries them), execute, then chain
7714        // peers with left-associative dedup semantics.
7715        let mut head = stmt_ref.clone();
7716        head.unions = Vec::new();
7717        head.order_by = Vec::new();
7718        head.limit = None;
7719        let QueryResult::Rows { columns, mut rows } =
7720            self.exec_bare_select_cancel(&head, cancel)?
7721        else {
7722            unreachable!("bare SELECT cannot return CommandOk")
7723        };
7724        for (kind, peer) in &stmt_ref.unions {
7725            let QueryResult::Rows {
7726                columns: peer_cols,
7727                rows: peer_rows,
7728            } = self.exec_bare_select_cancel(peer, cancel)?
7729            else {
7730                unreachable!("bare SELECT cannot return CommandOk")
7731            };
7732            if peer_cols.len() != columns.len() {
7733                return Err(EngineError::Unsupported(alloc::format!(
7734                    "UNION arity mismatch: head has {} columns, peer has {}",
7735                    columns.len(),
7736                    peer_cols.len()
7737                )));
7738            }
7739            rows.extend(peer_rows);
7740            if matches!(kind, UnionKind::Distinct) {
7741                rows = dedup_rows(rows);
7742            }
7743        }
7744        // ORDER BY at the top of a UNION applies to the combined result.
7745        // Eval against the projected schema (NOT the source table).
7746        if !stmt.order_by.is_empty() {
7747            let synth_ctx = EvalContext::new(&columns, None);
7748            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
7749            let mut tagged: Vec<(Vec<f64>, Row)> = Vec::with_capacity(rows.len());
7750            for r in rows {
7751                let keys = build_order_keys(&stmt.order_by, &r, &synth_ctx)?;
7752                tagged.push((keys, r));
7753            }
7754            sort_by_keys(&mut tagged, &descs);
7755            rows = tagged.into_iter().map(|(_, r)| r).collect();
7756        }
7757        apply_offset_and_limit(&mut rows, stmt.offset_literal(), stmt.limit_literal());
7758        Ok(QueryResult::Rows { columns, rows })
7759    }
7760
7761    #[allow(clippy::too_many_lines)]
7762    #[allow(clippy::too_many_lines)] // huge match — splitting fragments the planner
7763    /// v7.11.7 — execute `SELECT … FROM unnest(expr) [AS] alias …`.
7764    /// Synthesises a single-column virtual table whose column type
7765    /// is TEXT and whose rows are the array elements. Routes
7766    /// through the regular projection / WHERE / ORDER BY / LIMIT
7767    /// machinery so set-returning UNNEST composes naturally with
7768    /// the rest of the SELECT surface.
7769    fn exec_select_unnest(
7770        &self,
7771        stmt: &SelectStatement,
7772        primary: &TableRef,
7773        cancel: CancelToken<'_>,
7774    ) -> Result<QueryResult, EngineError> {
7775        let expr = primary
7776            .unnest_expr
7777            .as_deref()
7778            .expect("caller guards unnest_expr.is_some()");
7779        // Evaluate the array expression once. Empty schema / empty
7780        // row — uncorrelated UNNEST cannot reference outer columns.
7781        let empty_schema: alloc::vec::Vec<ColumnSchema> = alloc::vec::Vec::new();
7782        let ctx = EvalContext::new(&empty_schema, None);
7783        let dummy_row = Row::new(alloc::vec::Vec::new());
7784        // v7.11.13 — unnest dispatches per array element type so
7785        // INT[] / BIGINT[] surface their PG types in projection.
7786        let (elem_dtype, rows): (DataType, alloc::vec::Vec<Row>) =
7787            match eval::eval_expr(expr, &dummy_row, &ctx).map_err(EngineError::Eval)? {
7788                Value::Null => (DataType::Text, alloc::vec::Vec::new()),
7789                Value::TextArray(items) => {
7790                    let rows = items
7791                        .into_iter()
7792                        .map(|item| {
7793                            Row::new(alloc::vec![match item {
7794                                Some(s) => Value::Text(s),
7795                                None => Value::Null,
7796                            }])
7797                        })
7798                        .collect();
7799                    (DataType::Text, rows)
7800                }
7801                Value::IntArray(items) => {
7802                    let rows = items
7803                        .into_iter()
7804                        .map(|item| {
7805                            Row::new(alloc::vec![match item {
7806                                Some(n) => Value::Int(n),
7807                                None => Value::Null,
7808                            }])
7809                        })
7810                        .collect();
7811                    (DataType::Int, rows)
7812                }
7813                Value::BigIntArray(items) => {
7814                    let rows = items
7815                        .into_iter()
7816                        .map(|item| {
7817                            Row::new(alloc::vec![match item {
7818                                Some(n) => Value::BigInt(n),
7819                                None => Value::Null,
7820                            }])
7821                        })
7822                        .collect();
7823                    (DataType::BigInt, rows)
7824                }
7825                other => {
7826                    return Err(EngineError::Unsupported(alloc::format!(
7827                        "unnest() expects an array argument, got {:?}",
7828                        other.data_type()
7829                    )));
7830                }
7831            };
7832        let alias = primary
7833            .alias
7834            .clone()
7835            .unwrap_or_else(|| "unnest".to_string());
7836        // v7.13.2 — mailrs round-6 S5. Honour PG-standard
7837        // `UNNEST(arr) AS p(col_name)` column-list aliasing: the
7838        // first entry overrides the projected column's name.
7839        // Without the column list, fall back to the table alias
7840        // (pre-v7.13.2 behaviour).
7841        let col_name = primary
7842            .unnest_column_aliases
7843            .first()
7844            .cloned()
7845            .unwrap_or_else(|| alias.clone());
7846        let col_schema = ColumnSchema::new(col_name, elem_dtype, true);
7847        let schema_cols = alloc::vec![col_schema.clone()];
7848        let scan_ctx = EvalContext::new(&schema_cols, Some(&alias));
7849        // Apply WHERE.
7850        let filtered: alloc::vec::Vec<Row> = if let Some(w) = &stmt.where_ {
7851            let mut out = alloc::vec::Vec::with_capacity(rows.len());
7852            for row in rows {
7853                cancel.check()?;
7854                let v = eval::eval_expr(w, &row, &scan_ctx).map_err(EngineError::Eval)?;
7855                if matches!(v, Value::Bool(true)) {
7856                    out.push(row);
7857                }
7858            }
7859            out
7860        } else {
7861            rows
7862        };
7863        // v7.17.0 Phase 3.P0-48 — aggregate dispatch over the
7864        // unnest source. Same routing the relational scan path
7865        // already takes — without it `SELECT COUNT(*) FROM
7866        // unnest(ARRAY[…])` either errored at projection time or
7867        // returned the wrong shape.
7868        if aggregate::uses_aggregate(stmt) {
7869            // v7.29 — a per-query memo so correlated scalar
7870            // subqueries batch-evaluate once (group map) instead of
7871            // executing per group.
7872            let agg_memo = core::cell::RefCell::new(memoize::MemoizeCache::default());
7873            let agg_correlated = |e: &Expr, r: &Row, c: &EvalContext<'_>| {
7874                self.eval_expr_with_correlated(e, r, c, cancel, Some(&mut agg_memo.borrow_mut()))
7875                    .map_err(|err| match err {
7876                        EngineError::Eval(ev) => ev,
7877                        other => eval::EvalError::TypeMismatch {
7878                            detail: alloc::format!("{other}"),
7879                        },
7880                    })
7881            };
7882            let filtered_refs: alloc::vec::Vec<&Row> = filtered.iter().collect();
7883            let agg = aggregate::run(
7884                stmt,
7885                &filtered_refs,
7886                &schema_cols,
7887                Some(&alias),
7888                Some(&agg_correlated),
7889            )?;
7890            return self.finish_agg_result(agg, stmt, cancel);
7891        }
7892        // Projection.
7893        let projection = build_projection(&stmt.items, &schema_cols, &alias)?;
7894        let mut projected_rows: alloc::vec::Vec<Row> =
7895            alloc::vec::Vec::with_capacity(filtered.len());
7896        // v7.19 P5 — Set-Returning-Function in projection
7897        // position (PG `SELECT unnest(arr) FROM t` shape). When a
7898        // SELECT item evaluates to a top-level unnest(arr) call,
7899        // expand it: for each input row, evaluate the array, emit
7900        // one output row per element, broadcasting non-SRF
7901        // projections from the same input row. Multi-SRF + LCM
7902        // padding stays a documented carve-out; mailrs uses
7903        // single-SRF for redirect_uris.
7904        let srf_position = projection.iter().position(|p| is_top_level_unnest(&p.expr));
7905        if let Some(srf_idx) = srf_position {
7906            let srf_arg = top_level_unnest_arg(&projection[srf_idx].expr)
7907                .expect("checked by is_top_level_unnest above");
7908            for row in &filtered {
7909                let arr_val =
7910                    eval::eval_expr(srf_arg, row, &scan_ctx).map_err(EngineError::Eval)?;
7911                let elements = array_value_to_elements(&arr_val)?;
7912                // Empty array → zero rows for this input row (PG
7913                // semantics: `SELECT unnest('{}'::int[])` returns
7914                // 0 rows, not a single NULL row).
7915                for elem in elements {
7916                    let mut vals = alloc::vec::Vec::with_capacity(projection.len());
7917                    for (i, p) in projection.iter().enumerate() {
7918                        if i == srf_idx {
7919                            vals.push(elem.clone());
7920                        } else {
7921                            vals.push(
7922                                eval::eval_expr(&p.expr, row, &scan_ctx)
7923                                    .map_err(EngineError::Eval)?,
7924                            );
7925                        }
7926                    }
7927                    projected_rows.push(Row::new(vals));
7928                }
7929            }
7930        } else {
7931            // v7.24 (round-16 B) — select-list subqueries resolve
7932            // per row (correlated-aware; plain exprs take the fast
7933            // path inside).
7934            let mut proj_memo = memoize::MemoizeCache::default();
7935            for row in &filtered {
7936                let mut vals = alloc::vec::Vec::with_capacity(projection.len());
7937                for p in &projection {
7938                    vals.push(self.eval_expr_with_correlated(
7939                        &p.expr,
7940                        row,
7941                        &scan_ctx,
7942                        cancel,
7943                        Some(&mut proj_memo),
7944                    )?);
7945                }
7946                projected_rows.push(Row::new(vals));
7947            }
7948        }
7949        // ORDER BY / LIMIT — apply on the projected rows (cheap;
7950        // unnest result sets are small by design).
7951        let columns: alloc::vec::Vec<ColumnSchema> = projection
7952            .iter()
7953            .map(|p| ColumnSchema::new(p.output_name.clone(), p.ty, p.nullable))
7954            .collect();
7955        // Re-evaluate ORDER BY against the source schema (pre-projection
7956        // so col refs by name still resolve through `scan_ctx`).
7957        if !stmt.order_by.is_empty() {
7958            let mut indexed: alloc::vec::Vec<(usize, Vec<Value>)> = filtered
7959                .iter()
7960                .enumerate()
7961                .map(|(i, r)| -> Result<_, EngineError> {
7962                    let keys: Result<Vec<Value>, EngineError> = stmt
7963                        .order_by
7964                        .iter()
7965                        .map(|ob| {
7966                            eval::eval_expr(&ob.expr, r, &scan_ctx).map_err(EngineError::Eval)
7967                        })
7968                        .collect();
7969                    Ok((i, keys?))
7970                })
7971                .collect::<Result<_, _>>()?;
7972            indexed.sort_by(|a, b| {
7973                for (idx, (ka, kb)) in a.1.iter().zip(b.1.iter()).enumerate() {
7974                    let o = &stmt.order_by[idx];
7975                    let cmp = order_by_value_cmp(o.desc, o.nulls_first, ka, kb);
7976                    if cmp != core::cmp::Ordering::Equal {
7977                        return cmp;
7978                    }
7979                }
7980                core::cmp::Ordering::Equal
7981            });
7982            projected_rows = indexed
7983                .into_iter()
7984                .map(|(i, _)| projected_rows[i].clone())
7985                .collect();
7986        }
7987        // LIMIT / OFFSET — apply at the tail.
7988        if let Some(offset) = stmt.offset_literal() {
7989            let off = (offset as usize).min(projected_rows.len());
7990            projected_rows.drain(..off);
7991        }
7992        if let Some(limit) = stmt.limit_literal() {
7993            projected_rows.truncate(limit as usize);
7994        }
7995        Ok(QueryResult::Rows {
7996            columns,
7997            rows: projected_rows,
7998        })
7999    }
8000
8001    /// v7.17.0 Phase 3.10 — `FROM generate_series(start, stop [,
8002    /// step])` set-returning source. Mirrors `exec_select_unnest`'s
8003    /// shape: evaluate the arg list once against an empty row,
8004    /// materialise the row stream by stepping start → stop, then
8005    /// route through the standard WHERE / projection / ORDER BY /
8006    /// LIMIT pipeline. Two arg-type combos in v7.17:
8007    ///   * integer / integer [/ integer] — SmallInt, Int, BigInt
8008    ///     (widened to BigInt internally; step defaults to 1)
8009    ///   * timestamp / timestamp / interval — date-range
8010    ///     iteration (mailrs's daily-report pattern)
8011    fn exec_select_generate_series(
8012        &self,
8013        stmt: &SelectStatement,
8014        primary: &TableRef,
8015        cancel: CancelToken<'_>,
8016    ) -> Result<QueryResult, EngineError> {
8017        let args = primary
8018            .generate_series_args
8019            .as_ref()
8020            .expect("caller guards generate_series_args.is_some()");
8021        let empty_schema: alloc::vec::Vec<ColumnSchema> = alloc::vec::Vec::new();
8022        let ctx = EvalContext::new(&empty_schema, None);
8023        let dummy_row = Row::new(alloc::vec::Vec::new());
8024        let mut arg_values: alloc::vec::Vec<Value> = alloc::vec::Vec::with_capacity(args.len());
8025        for a in args {
8026            arg_values.push(eval::eval_expr(a, &dummy_row, &ctx).map_err(EngineError::Eval)?);
8027        }
8028        // Dispatch on the start value's shape. Reject mixed-shape
8029        // calls early (e.g. start = timestamp, stop = integer) so
8030        // the caller gets a clean error rather than a panic.
8031        let (elem_dtype, rows) = match arg_values.as_slice() {
8032            [Value::Timestamp(start), Value::Timestamp(stop), step] => {
8033                let interval_step = match step {
8034                    Value::Interval { .. } => step.clone(),
8035                    other => {
8036                        return Err(EngineError::Unsupported(alloc::format!(
8037                            "generate_series(timestamp, timestamp, …): \
8038                             step must be INTERVAL, got {:?}",
8039                            other.data_type()
8040                        )));
8041                    }
8042                };
8043                let rows = generate_series_timestamps(*start, *stop, interval_step, &cancel)?;
8044                (DataType::Timestamp, rows)
8045            }
8046            [start, stop, step]
8047                if value_is_integer(start) && value_is_integer(stop) && value_is_integer(step) =>
8048            {
8049                let s = value_to_i64(start);
8050                let e = value_to_i64(stop);
8051                let st = value_to_i64(step);
8052                let rows = generate_series_integers(s, e, st, &cancel)?;
8053                (DataType::BigInt, rows)
8054            }
8055            [start, stop] if value_is_integer(start) && value_is_integer(stop) => {
8056                let s = value_to_i64(start);
8057                let e = value_to_i64(stop);
8058                let rows = generate_series_integers(s, e, 1, &cancel)?;
8059                (DataType::BigInt, rows)
8060            }
8061            _ => {
8062                return Err(EngineError::Unsupported(alloc::format!(
8063                    "generate_series(): v7.17 supports integer or (timestamp, timestamp, interval) \
8064                     argument shapes; got {:?}",
8065                    arg_values
8066                        .iter()
8067                        .map(|v| v.data_type())
8068                        .collect::<alloc::vec::Vec<_>>()
8069                )));
8070            }
8071        };
8072        let alias = primary
8073            .alias
8074            .clone()
8075            .unwrap_or_else(|| "generate_series".to_string());
8076        let col_name = alias.clone();
8077        let col_schema = ColumnSchema::new(col_name, elem_dtype, true);
8078        let schema_cols = alloc::vec![col_schema.clone()];
8079        let scan_ctx = EvalContext::new(&schema_cols, Some(&alias));
8080        // WHERE.
8081        let filtered: alloc::vec::Vec<Row> = if let Some(w) = &stmt.where_ {
8082            let mut out = alloc::vec::Vec::with_capacity(rows.len());
8083            for row in rows {
8084                cancel.check()?;
8085                let v = eval::eval_expr(w, &row, &scan_ctx).map_err(EngineError::Eval)?;
8086                if matches!(v, Value::Bool(true)) {
8087                    out.push(row);
8088                }
8089            }
8090            out
8091        } else {
8092            rows
8093        };
8094        // v7.17.0 Phase 3.P0-48 — aggregate dispatch for set-
8095        // returning sources. When the SELECT projection contains
8096        // aggregate functions (COUNT/SUM/MIN/MAX/AVG/string_agg/
8097        // …) we route the filtered row stream through the same
8098        // aggregate executor the relational scan path uses, so
8099        // `SELECT COUNT(*) FROM generate_series(1, 100)` returns
8100        // a single 100 row instead of erroring at projection
8101        // time. GROUP BY / HAVING / ORDER BY over the aggregate
8102        // output all ride through `aggregate::run`.
8103        if aggregate::uses_aggregate(stmt) {
8104            // v7.29 — a per-query memo so correlated scalar
8105            // subqueries batch-evaluate once (group map) instead of
8106            // executing per group.
8107            let agg_memo = core::cell::RefCell::new(memoize::MemoizeCache::default());
8108            let agg_correlated = |e: &Expr, r: &Row, c: &EvalContext<'_>| {
8109                self.eval_expr_with_correlated(e, r, c, cancel, Some(&mut agg_memo.borrow_mut()))
8110                    .map_err(|err| match err {
8111                        EngineError::Eval(ev) => ev,
8112                        other => eval::EvalError::TypeMismatch {
8113                            detail: alloc::format!("{other}"),
8114                        },
8115                    })
8116            };
8117            let filtered_refs: alloc::vec::Vec<&Row> = filtered.iter().collect();
8118            let agg = aggregate::run(
8119                stmt,
8120                &filtered_refs,
8121                &schema_cols,
8122                Some(&alias),
8123                Some(&agg_correlated),
8124            )?;
8125            return self.finish_agg_result(agg, stmt, cancel);
8126        }
8127        // Projection.
8128        let projection = build_projection(&stmt.items, &schema_cols, &alias)?;
8129        let mut projected_rows: alloc::vec::Vec<Row> =
8130            alloc::vec::Vec::with_capacity(filtered.len());
8131        let mut proj_memo = memoize::MemoizeCache::default();
8132        for row in &filtered {
8133            let mut vals = alloc::vec::Vec::with_capacity(projection.len());
8134            for p in &projection {
8135                // v7.24 (round-16 B) — correlated-aware.
8136                vals.push(self.eval_expr_with_correlated(
8137                    &p.expr,
8138                    row,
8139                    &scan_ctx,
8140                    cancel,
8141                    Some(&mut proj_memo),
8142                )?);
8143            }
8144            projected_rows.push(Row::new(vals));
8145        }
8146        let columns: alloc::vec::Vec<ColumnSchema> = projection
8147            .iter()
8148            .map(|p| ColumnSchema::new(p.output_name.clone(), p.ty, p.nullable))
8149            .collect();
8150        // ORDER BY against the source schema.
8151        if !stmt.order_by.is_empty() {
8152            let mut indexed: alloc::vec::Vec<(usize, Vec<Value>)> = filtered
8153                .iter()
8154                .enumerate()
8155                .map(|(i, r)| -> Result<_, EngineError> {
8156                    let keys: Result<Vec<Value>, EngineError> = stmt
8157                        .order_by
8158                        .iter()
8159                        .map(|ob| {
8160                            eval::eval_expr(&ob.expr, r, &scan_ctx).map_err(EngineError::Eval)
8161                        })
8162                        .collect();
8163                    Ok((i, keys?))
8164                })
8165                .collect::<Result<_, _>>()?;
8166            indexed.sort_by(|a, b| {
8167                for (idx, (ka, kb)) in a.1.iter().zip(b.1.iter()).enumerate() {
8168                    let o = &stmt.order_by[idx];
8169                    let cmp = order_by_value_cmp(o.desc, o.nulls_first, ka, kb);
8170                    if cmp != core::cmp::Ordering::Equal {
8171                        return cmp;
8172                    }
8173                }
8174                core::cmp::Ordering::Equal
8175            });
8176            projected_rows = indexed
8177                .into_iter()
8178                .map(|(i, _)| projected_rows[i].clone())
8179                .collect();
8180        }
8181        if let Some(offset) = stmt.offset_literal() {
8182            let off = (offset as usize).min(projected_rows.len());
8183            projected_rows.drain(..off);
8184        }
8185        if let Some(limit) = stmt.limit_literal() {
8186            projected_rows.truncate(limit as usize);
8187        }
8188        Ok(QueryResult::Rows {
8189            columns,
8190            rows: projected_rows,
8191        })
8192    }
8193
8194    fn exec_bare_select_cancel(
8195        &self,
8196        stmt: &SelectStatement,
8197        cancel: CancelToken<'_>,
8198    ) -> Result<QueryResult, EngineError> {
8199        // v7.17.0 Phase 3.P0-49 — `FETCH FIRST N ROWS WITH TIES`
8200        // is meaningless without an ORDER BY; PG raises a hard
8201        // error and SPG mirrors the surface so the same DDL/app
8202        // path behaves identically on cutover.
8203        check_with_ties_requires_order_by(stmt)?;
8204        // v7.16.2 — same meta-view dispatch as
8205        // `exec_select_cancel`, applied here too because
8206        // `subquery_replacement` enters this function directly
8207        // for Exists / ScalarSubquery / InSubquery resolution
8208        // (bypassing the top-level entry to avoid double
8209        // subquery walking). Without this dispatch the subquery
8210        // hits `__spg_info_columns` and reports TableNotFound.
8211        if !self.meta_views_materialised && select_references_meta_view(stmt) {
8212            return self.exec_select_with_meta_views(stmt, cancel);
8213        }
8214        // v4.12: window-function path. When the projection contains
8215        // any `name(args) OVER (...)` we route to the dedicated
8216        // executor — partition + sort + per-row window value before
8217        // the regular projection.
8218        if select_has_window(stmt) {
8219            return self.exec_select_with_window(stmt, cancel);
8220        }
8221        // Constant SELECT (no FROM) — evaluate each item once against an
8222        // empty dummy row. Useful for `SELECT 1`, `SELECT coalesce(...)`,
8223        // `SELECT '7'::INT`. Column references will surface as
8224        // ColumnNotFound on eval since the schema is empty.
8225        let Some(from) = &stmt.from else {
8226            let empty_schema: Vec<ColumnSchema> = Vec::new();
8227            let ctx = self.ev_ctx(&empty_schema, None);
8228            let projection = build_projection(&stmt.items, &empty_schema, "")?;
8229            let dummy_row = Row::new(Vec::new());
8230            let mut values = Vec::with_capacity(projection.len());
8231            for p in &projection {
8232                values.push(eval::eval_expr(&p.expr, &dummy_row, &ctx)?);
8233            }
8234            let columns: Vec<ColumnSchema> = projection
8235                .into_iter()
8236                .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
8237                .collect();
8238            return Ok(QueryResult::Rows {
8239                columns,
8240                rows: alloc::vec![Row::new(values)],
8241            });
8242        };
8243        // Multi-table FROM (one or more joined peers) goes through the
8244        // nested-loop join executor. Single-table FROM stays on the
8245        // existing scan + index-seek path.
8246        if !from.joins.is_empty() {
8247            return self.exec_joined_select(stmt, from, cancel);
8248        }
8249        // v7.11.7 — `FROM unnest(<expr>) [AS] <alias>`. Synthesise a
8250        // single-column table at SELECT entry by evaluating the
8251        // expression once against the empty row (UNNEST is
8252        // uncorrelated in v7.11; correlated / LATERAL unnest is a
8253        // v7.12 carve-out). Build a virtual `Table` in a heap-only
8254        // catalog, then route to the regular scan path.
8255        if from.primary.unnest_expr.is_some() {
8256            return self.exec_select_unnest(stmt, &from.primary, cancel);
8257        }
8258        // v7.17.0 Phase 3.10 — `FROM generate_series(start, stop
8259        // [, step])` set-returning source. Dispatch mirrors UNNEST:
8260        // materialise the row stream from a single eval pass, then
8261        // run the regular projection / WHERE / ORDER BY / LIMIT
8262        // pipeline over the synthetic single-column table.
8263        if from.primary.generate_series_args.is_some() {
8264            return self.exec_select_generate_series(stmt, &from.primary, cancel);
8265        }
8266        let primary = &from.primary;
8267        let table = self.active_catalog().get(&primary.name).ok_or_else(|| {
8268            StorageError::TableNotFound {
8269                name: primary.name.clone(),
8270            }
8271        })?;
8272        let schema_cols = &table.schema().columns;
8273        // The qualifier accepted on column refs is the alias (if any) else the
8274        // bare table name.
8275        let alias = primary.alias.as_deref().unwrap_or(primary.name.as_str());
8276        let ctx = self.ev_ctx(schema_cols, Some(alias));
8277
8278        // NSW kNN planner: `ORDER BY col <-> literal LIMIT k` with no
8279        // WHERE and an NSW index on `col` skips the full scan. The
8280        // walk returns rows already in ascending-distance order, so
8281        // ORDER BY / LIMIT are honoured implicitly.
8282        if let Some(nsw_rows) = try_nsw_knn(stmt, table, schema_cols, alias) {
8283            return materialise_in_order(stmt, table, schema_cols, alias, &nsw_rows);
8284        }
8285
8286        // Index seek: if WHERE is `col = literal` (or commuted) and the
8287        // referenced column has an index, dispatch each locator through
8288        // the catalog (hot tier → borrow, cold tier → page-read +
8289        // decode) and iterate just those rows. Otherwise fall back to a
8290        // full scan over the hot tier (cold-tier rows are only reached
8291        // via index seek in v5.1 — full table scans against cold-tier
8292        // data ship in v5.2 with the freezer's per-segment scan API).
8293        let indexed_rows: Option<Vec<Cow<'_, Row>>> = stmt.where_.as_ref().and_then(|w| {
8294            // BTree / col=literal seek first — covers the v7.11.3 multi-
8295            // column AND case and the leading-column equality lookup.
8296            try_index_seek(w, schema_cols, self.active_catalog(), table, alias)
8297                .or_else(|| {
8298                    // v7.12.3 — GIN-accelerated `WHERE col @@
8299                    // tsquery` when the column has a `USING gin`
8300                    // index. Returns an over-approximate candidate
8301                    // set; the WHERE re-eval loop below verifies
8302                    // the full `@@` predicate per row.
8303                    try_gin_seek(w, schema_cols, self.active_catalog(), table, alias, &ctx)
8304                })
8305                .or_else(|| {
8306                    // v7.15.0 — trigram-GIN-accelerated
8307                    // `WHERE col LIKE / ILIKE '<pat>'` when the
8308                    // column has a `gin_trgm_ops` GIN index.
8309                    // Over-approximate candidate set; the WHERE
8310                    // re-eval verifies the LIKE per row.
8311                    try_trgm_seek(w, schema_cols, table, alias)
8312                })
8313        });
8314
8315        // Aggregate path: filter rows first, then hand off to the
8316        // aggregate executor which does its own projection + ORDER BY.
8317        if aggregate::uses_aggregate(stmt) {
8318            let mut filtered: Vec<&Row> = Vec::new();
8319            // v6.2.6 — Memoize: per-query LRU cache for correlated
8320            // scalar subqueries. Fresh per row-loop entry so each
8321            // SELECT execution gets an isolated cache.
8322            let mut memo = memoize::MemoizeCache::new();
8323            if let Some(rows) = &indexed_rows {
8324                for cow in rows {
8325                    let row = cow.as_ref();
8326                    if let Some(where_expr) = &stmt.where_ {
8327                        let cond = self.eval_expr_with_correlated(
8328                            where_expr,
8329                            row,
8330                            &ctx,
8331                            cancel,
8332                            Some(&mut memo),
8333                        )?;
8334                        if !matches!(cond, Value::Bool(true)) {
8335                            continue;
8336                        }
8337                    }
8338                    filtered.push(row);
8339                }
8340            } else {
8341                for i in 0..table.row_count() {
8342                    let row = &table.rows()[i];
8343                    if let Some(where_expr) = &stmt.where_ {
8344                        let cond = self.eval_expr_with_correlated(
8345                            where_expr,
8346                            row,
8347                            &ctx,
8348                            cancel,
8349                            Some(&mut memo),
8350                        )?;
8351                        if !matches!(cond, Value::Bool(true)) {
8352                            continue;
8353                        }
8354                    }
8355                    filtered.push(row);
8356                }
8357            }
8358            // v7.29 — a per-query memo so correlated scalar
8359            // subqueries batch-evaluate once (group map) instead of
8360            // executing per group.
8361            let agg_memo = core::cell::RefCell::new(memoize::MemoizeCache::default());
8362            let agg_correlated = |e: &Expr, r: &Row, c: &EvalContext<'_>| {
8363                self.eval_expr_with_correlated(e, r, c, cancel, Some(&mut agg_memo.borrow_mut()))
8364                    .map_err(|err| match err {
8365                        EngineError::Eval(ev) => ev,
8366                        other => eval::EvalError::TypeMismatch {
8367                            detail: alloc::format!("{other}"),
8368                        },
8369                    })
8370            };
8371            let agg = aggregate::run(
8372                stmt,
8373                &filtered,
8374                schema_cols,
8375                Some(alias),
8376                Some(&agg_correlated),
8377            )?;
8378            return self.finish_agg_result(agg, stmt, cancel);
8379        }
8380
8381        let projection = build_projection(&stmt.items, schema_cols, alias)?;
8382        // v7.19 P5 — single-table SELECT path for SRF
8383        // `SELECT unnest(arr) FROM t` shape. Detect a top-level
8384        // unnest in the projection list. When present, the
8385        // per-row processor emits one output row per array
8386        // element (broadcasting non-SRF projections from the
8387        // same input row). Empty / NULL arrays emit zero rows
8388        // for that input — PG semantics.
8389        let srf_position = projection.iter().position(|p| is_top_level_unnest(&p.expr));
8390
8391        // Materialise the filter pass into `(order_key, projected_row)`
8392        // tuples. The order key is `None` when there's no ORDER BY clause.
8393        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::new();
8394        // v6.2.6 — Memoize per-row WHERE eval shares one cache.
8395        let mut memo = memoize::MemoizeCache::new();
8396        // Inline the per-row work in a closure so the indexed and full-
8397        // scan branches share the body.
8398        let mut process_row = |row: &Row, loop_idx: usize| -> Result<(), EngineError> {
8399            if loop_idx.is_multiple_of(256) {
8400                cancel.check()?;
8401            }
8402            if let Some(where_expr) = &stmt.where_ {
8403                let cond =
8404                    self.eval_expr_with_correlated(where_expr, row, &ctx, cancel, Some(&mut memo))?;
8405                if !matches!(cond, Value::Bool(true)) {
8406                    return Ok(());
8407                }
8408            }
8409            let order_keys = if stmt.order_by.is_empty() {
8410                Vec::new()
8411            } else {
8412                build_order_keys(&stmt.order_by, row, &ctx)?
8413            };
8414            if let Some(srf_idx) = srf_position {
8415                let srf_arg = top_level_unnest_arg(&projection[srf_idx].expr)
8416                    .expect("checked by is_top_level_unnest above");
8417                let arr_val = eval::eval_expr(srf_arg, row, &ctx)?;
8418                let elements = array_value_to_elements(&arr_val)?;
8419                for elem in elements {
8420                    let mut values = Vec::with_capacity(projection.len());
8421                    for (i, p) in projection.iter().enumerate() {
8422                        if i == srf_idx {
8423                            values.push(elem.clone());
8424                        } else {
8425                            values.push(eval::eval_expr(&p.expr, row, &ctx)?);
8426                        }
8427                    }
8428                    tagged.push((order_keys.clone(), Row::new(values)));
8429                }
8430            } else {
8431                let mut values = Vec::with_capacity(projection.len());
8432                for p in &projection {
8433                    // v7.24 (round-16 B) — correlated-aware.
8434                    values.push(self.eval_expr_with_correlated(&p.expr, row, &ctx, cancel, None)?);
8435                }
8436                tagged.push((order_keys, Row::new(values)));
8437            }
8438            Ok(())
8439        };
8440        if let Some(rows) = &indexed_rows {
8441            for (loop_idx, cow) in rows.iter().enumerate() {
8442                process_row(cow.as_ref(), loop_idx)?;
8443            }
8444        } else {
8445            for i in 0..table.row_count() {
8446                process_row(&table.rows()[i], i)?;
8447            }
8448        }
8449
8450        if !stmt.order_by.is_empty() {
8451            // Partial-sort fast path: when LIMIT is small relative to
8452            // the row count, select_nth_unstable + sort just the
8453            // prefix is O(n + k log k) instead of O(n log n). DISTINCT
8454            // requires the full sort because de-dup happens after.
8455            // WITH TIES likewise needs the full sort so the tie
8456            // extension can scan past `limit` to find rows that
8457            // share the last-kept row's key.
8458            let keep = if stmt.distinct || stmt.limit_with_ties {
8459                None
8460            } else {
8461                stmt.limit_literal()
8462                    .map(|l| l as usize + stmt.offset_literal().map_or(0, |o| o as usize))
8463            };
8464            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
8465            partial_sort_tagged(&mut tagged, keep, &descs);
8466        }
8467
8468        // v7.17.0 Phase 3.P0-49 — `FETCH FIRST … WITH TIES` extends
8469        // past the truncated tail through every row that shares the
8470        // last-kept row's ORDER BY key. The tie check uses the
8471        // already-computed `(order_keys, row)` pairs so it matches
8472        // the sort comparator exactly. DISTINCT + WITH TIES falls
8473        // through to the no-ties path (PG also disallows their
8474        // combination; SPG silently drops the tie extension here so
8475        // the customer doesn't see a hard error mid-query — the
8476        // user-visible result is still correct, just narrower).
8477        let output_rows: Vec<Row> = if stmt.limit_with_ties && !stmt.distinct {
8478            apply_offset_and_limit_tagged(
8479                &mut tagged,
8480                stmt.offset_literal(),
8481                stmt.limit_literal(),
8482                true,
8483            );
8484            tagged.into_iter().map(|(_, r)| r).collect()
8485        } else {
8486            let mut output_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
8487            if stmt.distinct {
8488                output_rows = dedup_rows(output_rows);
8489            }
8490            apply_offset_and_limit(
8491                &mut output_rows,
8492                stmt.offset_literal(),
8493                stmt.limit_literal(),
8494            );
8495            output_rows
8496        };
8497
8498        let columns: Vec<ColumnSchema> = projection
8499            .into_iter()
8500            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
8501            .collect();
8502
8503        Ok(QueryResult::Rows {
8504            columns,
8505            rows: output_rows,
8506        })
8507    }
8508
8509    /// Multi-table SELECT executor (one or more JOIN peers).
8510    ///
8511    /// v1.10 builds the joined row set up-front via nested-loop joins,
8512    /// then runs WHERE + projection + ORDER BY against the combined
8513    /// rows. No index seek. Aggregates and DISTINCT still work because
8514    /// the executor delegates projection through the same shared paths.
8515    #[allow(clippy::too_many_lines)]
8516    /// v7.13.2 — mailrs round-6 S5. Resolve a TableRef into an
8517    /// owned (rows, schema) pair. Catalog tables clone their hot
8518    /// rows + schema; UNNEST table refs evaluate their array
8519    /// expression once and synthesise a single-column row set
8520    /// using the same dispatch as `exec_select_unnest`. Used by
8521    /// the joined-select path so UNNEST can appear in any FROM
8522    /// position, not just as the primary.
8523    fn materialise_table_ref(
8524        &self,
8525        tref: &TableRef,
8526    ) -> Result<(Vec<Row>, Vec<ColumnSchema>), EngineError> {
8527        if let Some(expr) = tref.unnest_expr.as_deref() {
8528            let empty_schema: Vec<ColumnSchema> = Vec::new();
8529            let ctx = EvalContext::new(&empty_schema, None);
8530            let dummy_row = Row::new(Vec::new());
8531            let (elem_dtype, rows) =
8532                match eval::eval_expr(expr, &dummy_row, &ctx).map_err(EngineError::Eval)? {
8533                    Value::Null => (DataType::Text, Vec::new()),
8534                    Value::TextArray(items) => (
8535                        DataType::Text,
8536                        items
8537                            .into_iter()
8538                            .map(|item| {
8539                                Row::new(alloc::vec![match item {
8540                                    Some(s) => Value::Text(s),
8541                                    None => Value::Null,
8542                                }])
8543                            })
8544                            .collect(),
8545                    ),
8546                    Value::IntArray(items) => (
8547                        DataType::Int,
8548                        items
8549                            .into_iter()
8550                            .map(|item| {
8551                                Row::new(alloc::vec![match item {
8552                                    Some(n) => Value::Int(n),
8553                                    None => Value::Null,
8554                                }])
8555                            })
8556                            .collect(),
8557                    ),
8558                    Value::BigIntArray(items) => (
8559                        DataType::BigInt,
8560                        items
8561                            .into_iter()
8562                            .map(|item| {
8563                                Row::new(alloc::vec![match item {
8564                                    Some(n) => Value::BigInt(n),
8565                                    None => Value::Null,
8566                                }])
8567                            })
8568                            .collect(),
8569                    ),
8570                    other => {
8571                        return Err(EngineError::Unsupported(alloc::format!(
8572                            "unnest() expects an array argument, got {:?}",
8573                            other.data_type()
8574                        )));
8575                    }
8576                };
8577            let alias = tref.alias.clone().unwrap_or_else(|| "unnest".to_string());
8578            let col_name = tref.unnest_column_aliases.first().cloned().unwrap_or(alias);
8579            return Ok((
8580                rows,
8581                alloc::vec![ColumnSchema::new(col_name, elem_dtype, true)],
8582            ));
8583        }
8584        let table =
8585            self.active_catalog()
8586                .get(&tref.name)
8587                .ok_or_else(|| StorageError::TableNotFound {
8588                    name: tref.name.clone(),
8589                })?;
8590        let rows: Vec<Row> = table.rows().iter().cloned().collect();
8591        let cols = table.schema().columns.clone();
8592        Ok((rows, cols))
8593    }
8594
8595    /// v7.28 (round-22) — materialise a plain table ref with
8596    /// single-table predicates pushed BELOW the clone: an indexed
8597    /// `col = literal` narrows to the matching row ids before any
8598    /// row is cloned, the rest filter linearly. A correlated
8599    /// subquery body like `… JOIN messages m2 ON …
8600    /// WHERE m2.thread_id = '<outer>'` runs per GROUP — without
8601    /// this it cloned + scanned the full 24k-row table 23.5k times.
8602    /// Falls back to the plain path for non-table refs.
8603    fn materialise_table_ref_filtered(
8604        &self,
8605        tref: &TableRef,
8606        preds: &[&Expr],
8607    ) -> Result<(Vec<Row>, Vec<ColumnSchema>), EngineError> {
8608        if preds.is_empty()
8609            || tref.unnest_expr.is_some()
8610            || tref.lateral_subquery.is_some()
8611            || tref.as_of_segment.is_some()
8612        {
8613            return self.materialise_table_ref(tref);
8614        }
8615        let Some(table) = self.active_catalog().get(&tref.name) else {
8616            return self.materialise_table_ref(tref);
8617        };
8618        let cols = table.schema().columns.clone();
8619        let alias = tref.alias.as_deref().unwrap_or(tref.name.as_str());
8620        // Index seek on the first `col = literal` predicate with a
8621        // BTree on that column.
8622        let mut seeded: Option<Vec<usize>> = None;
8623        for p in preds {
8624            if let Expr::Binary {
8625                lhs,
8626                op: spg_sql::ast::BinOp::Eq,
8627                rhs,
8628            } = p
8629            {
8630                let pair = match (lhs.as_ref(), rhs.as_ref()) {
8631                    (Expr::Column(c), Expr::Literal(l)) | (Expr::Literal(l), Expr::Column(c)) => {
8632                        Some((c, l))
8633                    }
8634                    _ => None,
8635                };
8636                if let Some((c, l)) = pair
8637                    && c.qualifier
8638                        .as_deref()
8639                        .is_none_or(|q| q.eq_ignore_ascii_case(alias))
8640                    && let Some(pos) = cols.iter().position(|s| s.name == c.name)
8641                    && let Some(idx) = table.index_on(pos)
8642                    && let Some(key) = spg_storage::IndexKey::from_value(&eval::literal_to_value(l))
8643                {
8644                    let mut ids = Vec::new();
8645                    let mut all_hot = true;
8646                    for loc in idx.lookup_eq(&key) {
8647                        match *loc {
8648                            spg_storage::RowLocator::Hot(i) => ids.push(i),
8649                            spg_storage::RowLocator::Cold { .. } => {
8650                                all_hot = false;
8651                                break;
8652                            }
8653                        }
8654                    }
8655                    if all_hot {
8656                        seeded = Some(ids);
8657                        break;
8658                    }
8659                }
8660            }
8661        }
8662        let ctx = EvalContext::new(&cols, Some(alias));
8663        let mut out: Vec<Row> = Vec::new();
8664        let push_if = |row: &Row, out: &mut Vec<Row>| -> Result<(), EngineError> {
8665            for p in preds {
8666                let v = eval::eval_expr(p, row, &ctx).map_err(EngineError::Eval)?;
8667                if !matches!(v, Value::Bool(true)) {
8668                    return Ok(());
8669                }
8670            }
8671            out.push(row.clone());
8672            Ok(())
8673        };
8674        match seeded {
8675            Some(ids) => {
8676                for i in ids {
8677                    if let Some(row) = table.rows().get(i) {
8678                        push_if(row, &mut out)?;
8679                    }
8680                }
8681            }
8682            None => {
8683                for row in table.rows().iter() {
8684                    push_if(row, &mut out)?;
8685                }
8686            }
8687        }
8688        Ok((out, cols))
8689    }
8690
8691    /// v7.31 (perf campaign) — `materialise_table_ref_filtered` for
8692    /// the deferred-join pipeline: same index-seek + linear-filter
8693    /// logic, but returns surviving row INDICES into the stored
8694    /// table instead of cloned rows. The join working set reads the
8695    /// table in place; survivors clone once at output time.
8696    fn filter_table_indices(
8697        &self,
8698        table: &Table,
8699        alias: &str,
8700        preds: &[&Expr],
8701    ) -> Result<Vec<usize>, EngineError> {
8702        if preds.is_empty() {
8703            return Ok((0..table.rows().len()).collect());
8704        }
8705        let cols = &table.schema().columns;
8706        let mut seeded: Option<Vec<usize>> = None;
8707        for p in preds {
8708            if let Expr::Binary {
8709                lhs,
8710                op: spg_sql::ast::BinOp::Eq,
8711                rhs,
8712            } = p
8713            {
8714                let pair = match (lhs.as_ref(), rhs.as_ref()) {
8715                    (Expr::Column(c), Expr::Literal(l)) | (Expr::Literal(l), Expr::Column(c)) => {
8716                        Some((c, l))
8717                    }
8718                    _ => None,
8719                };
8720                if let Some((c, l)) = pair
8721                    && c.qualifier
8722                        .as_deref()
8723                        .is_none_or(|q| q.eq_ignore_ascii_case(alias))
8724                    && let Some(pos) = cols.iter().position(|s| s.name == c.name)
8725                    && let Some(idx) = table.index_on(pos)
8726                    && let Some(key) = spg_storage::IndexKey::from_value(&eval::literal_to_value(l))
8727                {
8728                    let mut ids = Vec::new();
8729                    let mut all_hot = true;
8730                    for loc in idx.lookup_eq(&key) {
8731                        match *loc {
8732                            spg_storage::RowLocator::Hot(i) => ids.push(i),
8733                            spg_storage::RowLocator::Cold { .. } => {
8734                                all_hot = false;
8735                                break;
8736                            }
8737                        }
8738                    }
8739                    if all_hot {
8740                        seeded = Some(ids);
8741                        break;
8742                    }
8743                }
8744            }
8745        }
8746        let ctx = EvalContext::new(cols, Some(alias));
8747        let keep = |row: &Row| -> Result<bool, EngineError> {
8748            for p in preds {
8749                let v = eval::eval_expr(p, row, &ctx).map_err(EngineError::Eval)?;
8750                if !matches!(v, Value::Bool(true)) {
8751                    return Ok(false);
8752                }
8753            }
8754            Ok(true)
8755        };
8756        let mut out: Vec<usize> = Vec::new();
8757        match seeded {
8758            Some(ids) => {
8759                for i in ids {
8760                    if let Some(row) = table.rows().get(i)
8761                        && keep(row)?
8762                    {
8763                        out.push(i);
8764                    }
8765                }
8766            }
8767            None => {
8768                for (i, row) in table.rows().iter().enumerate() {
8769                    if keep(row)? {
8770                        out.push(i);
8771                    }
8772                }
8773            }
8774        }
8775        Ok(out)
8776    }
8777
8778    /// v7.17.0 Phase 3.P0-43 — materialise a `FROM` with one or more
8779    /// JOINs into `(combined_schema, filtered_rows)`. The combined
8780    /// schema uses composite `alias.col` column names so the
8781    /// qualifier-aware column resolver finds every join peer by
8782    /// exact match; the filtered rows are the join cross-product
8783    /// after the optional WHERE clause is applied.
8784    ///
8785    /// Shared by `exec_joined_select` and the JOIN branch of
8786    /// `exec_select_with_window`; both paths used to inline the
8787    /// same nested-loop logic and the window path rejected JOIN
8788    /// outright.
8789    /// v7.28 (round-22) — resolve a Column reference against a
8790    /// composite ("alias.col") schema slice. Bare names match a
8791    /// unique ".col" suffix.
8792    fn composite_col_pos(schema: &[ColumnSchema], c: &spg_sql::ast::ColumnName) -> Option<usize> {
8793        if let Some(q) = &c.qualifier {
8794            let composite = alloc::format!("{q}.{}", c.name);
8795            return schema.iter().position(|s| s.name == composite);
8796        }
8797        let suffix = alloc::format!(".{}", c.name);
8798        let mut hits = schema
8799            .iter()
8800            .enumerate()
8801            .filter(|(_, s)| s.name.ends_with(&suffix) || s.name == c.name);
8802        let first = hits.next();
8803        if hits.next().is_some() {
8804            return None; // ambiguous — leave to the residual evaluator
8805        }
8806        first.map(|(i, _)| i)
8807    }
8808
8809    /// v7.28 (round-22) — resolve a Column against ONE peer's own
8810    /// columns (right side of a join): `alias.col` or a bare name.
8811    fn peer_col_pos(
8812        peer_alias: &str,
8813        peer_cols: &[ColumnSchema],
8814        c: &spg_sql::ast::ColumnName,
8815    ) -> Option<usize> {
8816        if let Some(q) = &c.qualifier
8817            && !q.eq_ignore_ascii_case(peer_alias)
8818        {
8819            return None;
8820        }
8821        peer_cols.iter().position(|s| s.name == c.name)
8822    }
8823
8824    /// v7.28 (round-22) — drop the VALUES of columns the statement
8825    /// never references (schema and positions stay; the value
8826    /// becomes NULL, so a 30 KB body column costs nothing through
8827    /// the join pipeline instead of being cloned per row).
8828    fn null_out_unreferenced(
8829        rows: &mut [Row],
8830        cols: &[ColumnSchema],
8831        alias: &str,
8832        needed: &alloc::collections::BTreeSet<(String, String)>,
8833    ) {
8834        let keep: Vec<bool> = cols
8835            .iter()
8836            .map(|c| needed.contains(&(alias.to_string(), c.name.clone())))
8837            .collect();
8838        if keep.iter().all(|k| *k) {
8839            return;
8840        }
8841        for row in rows.iter_mut() {
8842            for (i, k) in keep.iter().enumerate() {
8843                if !*k && i < row.values.len() {
8844                    row.values[i] = Value::Null;
8845                }
8846            }
8847        }
8848    }
8849
8850    fn build_joined_filtered_rows(
8851        &self,
8852        from: &FromClause,
8853        where_: Option<&Expr>,
8854        cancel: CancelToken<'_>,
8855        needed: Option<&alloc::collections::BTreeSet<(String, String)>>,
8856        budget: &mut ByteBudget,
8857    ) -> Result<(Vec<ColumnSchema>, Vec<Row>), EngineError> {
8858        let primary_alias = from
8859            .primary
8860            .alias
8861            .as_deref()
8862            .unwrap_or(from.primary.name.as_str())
8863            .to_string();
8864        // v7.28 (round-22) — single-table predicate pushdown. WHERE
8865        // conjuncts whose every column is QUALIFIED with one table's
8866        // alias filter that table BEFORE the join (with an index
8867        // seek when one matches `col = literal`). Only the primary
8868        // and INNER peers are eligible — pre-filtering a LEFT peer
8869        // would change which rows NULL-extend. Pushed conjuncts stay
8870        // in WHERE too (idempotent), so correctness never depends on
8871        // the pushdown.
8872        let mut primary_preds: Vec<&Expr> = Vec::new();
8873        let mut peer_preds: Vec<Vec<&Expr>> = alloc::vec![Vec::new(); from.joins.len()];
8874        if let Some(w) = where_ {
8875            for sub in reorder::split_and_conjunctions(w) {
8876                if expr_has_subquery(sub) || aggregate::contains_aggregate(sub) {
8877                    continue;
8878                }
8879                let mut quals: Vec<&str> = Vec::new();
8880                let mut all_qualified = true;
8881                collect_column_qualifiers(sub, &mut quals, &mut all_qualified);
8882                if !all_qualified || quals.is_empty() {
8883                    continue;
8884                }
8885                let q0 = quals[0];
8886                if !quals.iter().all(|q| q.eq_ignore_ascii_case(q0)) {
8887                    continue;
8888                }
8889                if q0.eq_ignore_ascii_case(&primary_alias) {
8890                    primary_preds.push(sub);
8891                    continue;
8892                }
8893                for (i, j) in from.joins.iter().enumerate() {
8894                    if matches!(j.kind, JoinKind::Inner)
8895                        && j.table.lateral_subquery.is_none()
8896                        && q0.eq_ignore_ascii_case(
8897                            j.table.alias.as_deref().unwrap_or(j.table.name.as_str()),
8898                        )
8899                    {
8900                        peer_preds[i].push(sub);
8901                        break;
8902                    }
8903                }
8904            }
8905        }
8906        // v7.28 (round-22) — table-order swap: when the primary has
8907        // no pushed predicate but an INNER peer does, start from the
8908        // filtered peer instead. Equi-joins commute; output columns
8909        // resolve by composite name, so downstream projection is
8910        // order-independent. (A correlated subquery body like
8911        // `FROM email_analysis e2 JOIN messages m2 … WHERE
8912        // m2.thread_id = '<outer>'` otherwise clones the whole
8913        // unfiltered primary once per outer group.)
8914        let mut from_owned;
8915        let mut from = from;
8916        // Safety: swapping reorders which table joins FIRST, so it is
8917        // only legal when the FIRST join's ON references no table
8918        // beyond {primary, first peer} (a later peer's ON may name
8919        // the original primary, which must already be in the
8920        // combined row when that peer joins). Restrict to i == 0 AND
8921        // an ON whose qualifiers all live in those two tables.
8922        if primary_preds.is_empty()
8923            && let Some(j0) = from.joins.first()
8924            && matches!(j0.kind, JoinKind::Inner)
8925            && j0.table.lateral_subquery.is_none()
8926            && !peer_preds[0].is_empty()
8927        {
8928            let peer_alias = j0.table.alias.as_deref().unwrap_or(j0.table.name.as_str());
8929            let on_safe = j0.on.as_ref().is_some_and(|on| {
8930                let mut quals: Vec<&str> = Vec::new();
8931                let mut all_q = true;
8932                collect_column_qualifiers(on, &mut quals, &mut all_q);
8933                all_q
8934                    && quals.iter().all(|q| {
8935                        q.eq_ignore_ascii_case(&primary_alias) || q.eq_ignore_ascii_case(peer_alias)
8936                    })
8937            });
8938            if on_safe {
8939                from_owned = from.clone();
8940                core::mem::swap(&mut from_owned.primary, &mut from_owned.joins[0].table);
8941                primary_preds = peer_preds[0].drain(..).collect();
8942                from = &from_owned;
8943            }
8944        }
8945        let primary_alias = from
8946            .primary
8947            .alias
8948            .as_deref()
8949            .unwrap_or(from.primary.name.as_str())
8950            .to_string();
8951        // v7.31 (perf campaign) — when the primary is a plain stored
8952        // table and there are joins to run, keep it in place: filter
8953        // to row indices (same index seek / linear filter) and let
8954        // the deferred-join pipeline clone only the surviving,
8955        // referenced columns once at output time. Joinless FROMs and
8956        // non-table refs take the materialising path.
8957        //
8958        // v7.30.3 byte-budget interplay: the index path materialises
8959        // nothing (row numbers are 8 B each), so the budget charges
8960        // land where the clones happen — the materialising fallback
8961        // here, eager peers below, and the output assembly.
8962        let primary_table: Option<&Table> = if !from.joins.is_empty()
8963            && from.primary.unnest_expr.is_none()
8964            && from.primary.lateral_subquery.is_none()
8965            && from.primary.as_of_segment.is_none()
8966        {
8967            self.active_catalog().get(&from.primary.name)
8968        } else {
8969            None
8970        };
8971        let (primary_rows, primary_cols, primary_indices) = match primary_table {
8972            Some(t) => {
8973                let idxs = self.filter_table_indices(t, &primary_alias, &primary_preds)?;
8974                (Vec::new(), t.schema().columns.clone(), Some(idxs))
8975            }
8976            None => {
8977                let (mut rows, cols) =
8978                    self.materialise_table_ref_filtered(&from.primary, &primary_preds)?;
8979                if let Some(needed) = needed {
8980                    Self::null_out_unreferenced(&mut rows, &cols, &primary_alias, needed);
8981                }
8982                budget.charge(approx_rows_bytes(&rows))?;
8983                (rows, cols, None)
8984            }
8985        };
8986        // v7.17.0 Phase 3.P0-41 — LATERAL peers can't be
8987        // pre-materialised because their rows depend on outer
8988        // columns. For each peer, build either an eager
8989        // (rows, schema) pair or a "lateral" sentinel carrying
8990        // just the schema and the inner SELECT to re-run per
8991        // outer row.
8992        #[allow(clippy::type_complexity)]
8993        let mut joined: Vec<JoinedPeer<'_>> = Vec::new();
8994        for j in &from.joins {
8995            let a = j
8996                .table
8997                .alias
8998                .as_deref()
8999                .unwrap_or(j.table.name.as_str())
9000                .to_string();
9001            if let Some(inner_box) = &j.table.lateral_subquery {
9002                // Probe schema by running the inner SELECT against a
9003                // NULL-padded outer context. The probe gives us the
9004                // projection's column shape; rows materialise per
9005                // left-row below.
9006                let schema = self.lateral_probe_schema(inner_box)?;
9007                joined.push(JoinedPeer {
9008                    eager_rows: None,
9009                    cols: schema,
9010                    alias: a,
9011                    kind: j.kind,
9012                    on: j.on.as_ref(),
9013                    lateral: Some(inner_box.as_ref()),
9014                    join_table: None,
9015                });
9016            } else {
9017                let pidx = from
9018                    .joins
9019                    .iter()
9020                    .position(|jj| core::ptr::eq(jj, j))
9021                    .unwrap_or(0);
9022                // v7.28 - defer materialisation for plain tables with
9023                // no pushed predicate: the index-nested-loop path may
9024                // avoid cloning the table entirely.
9025                let plain = j.table.unnest_expr.is_none() && j.table.as_of_segment.is_none();
9026                if plain
9027                    && peer_preds[pidx].is_empty()
9028                    && let Some(t) = self.active_catalog().get(&j.table.name)
9029                {
9030                    joined.push(JoinedPeer {
9031                        eager_rows: None,
9032                        cols: t.schema().columns.clone(),
9033                        alias: a,
9034                        kind: j.kind,
9035                        on: j.on.as_ref(),
9036                        lateral: None,
9037                        join_table: Some(j.table.name.clone()),
9038                    });
9039                    continue;
9040                }
9041                let (mut rows, cols) =
9042                    self.materialise_table_ref_filtered(&j.table, &peer_preds[pidx])?;
9043                if let Some(needed) = needed {
9044                    Self::null_out_unreferenced(&mut rows, &cols, &a, needed);
9045                }
9046                budget.charge(approx_rows_bytes(&rows))?;
9047                joined.push(JoinedPeer {
9048                    eager_rows: Some(rows),
9049                    cols,
9050                    alias: a,
9051                    kind: j.kind,
9052                    on: j.on.as_ref(),
9053                    lateral: None,
9054                    join_table: Some(j.table.name.clone()),
9055                });
9056            }
9057        }
9058        let mut combined_schema: Vec<ColumnSchema> = Vec::new();
9059        for col in &primary_cols {
9060            combined_schema.push(ColumnSchema::new(
9061                alloc::format!("{primary_alias}.{}", col.name),
9062                col.ty,
9063                col.nullable,
9064            ));
9065        }
9066        for peer in &joined {
9067            for col in &peer.cols {
9068                combined_schema.push(ColumnSchema::new(
9069                    alloc::format!("{}.{}", peer.alias, col.name),
9070                    col.ty,
9071                    col.nullable,
9072                ));
9073            }
9074        }
9075        let ctx = EvalContext::new(&combined_schema, None);
9076        // v7.28 (round-22) - intermediate-row ceiling: a join whose
9077        // working set explodes errors instead of eating the host
9078        // (mailrs watched RSS climb to 7 GiB of 15 before a manual
9079        // restart). The ceiling is per join STAGE, not per query.
9080        const MAX_JOIN_INTERMEDIATE_ROWS: usize = 4_000_000;
9081        if joined.is_empty() {
9082            // Joinless FROM: the primary rows ARE the combined rows —
9083            // filter and hand them back without any re-clone.
9084            let mut filtered: Vec<Row> = Vec::new();
9085            let mut memo = memoize::MemoizeCache::default();
9086            for row in primary_rows {
9087                if let Some(where_expr) = where_ {
9088                    let cond = self.eval_expr_with_correlated(
9089                        where_expr,
9090                        &row,
9091                        &ctx,
9092                        cancel,
9093                        Some(&mut memo),
9094                    )?;
9095                    if !matches!(cond, Value::Bool(true)) {
9096                        continue;
9097                    }
9098                }
9099                filtered.push(row);
9100            }
9101            return Ok((combined_schema, filtered));
9102        }
9103        // v7.31 (perf campaign) — deferred join materialisation. The
9104        // old pipeline cloned the full combined row at EVERY join
9105        // stage (stage k re-clones all columns of stages 0..k), so a
9106        // two-join 24k-row query paid the wide-Text clones twice
9107        // before aggregation saw a single row. The working set is now
9108        // a flat row-index tuple vec (stride = sources joined so far,
9109        // usize::MAX = LEFT-join NULL slot). Real rows materialise
9110        // exactly once: for a residual-ON / lateral / WHERE evaluation
9111        // that needs a Row, and for the survivors returned to the
9112        // caller.
9113        // Per-source column mask: which columns the statement
9114        // references (None = keep all). In-place sources apply it at
9115        // materialisation time instead of `null_out_unreferenced`.
9116        let keep_mask = |cols: &[ColumnSchema], alias: &str| -> Option<Vec<bool>> {
9117            let needed = needed?;
9118            let keep: Vec<bool> = cols
9119                .iter()
9120                .map(|c| needed.contains(&(alias.to_string(), c.name.clone())))
9121                .collect();
9122            if keep.iter().all(|k| *k) {
9123                None
9124            } else {
9125                Some(keep)
9126            }
9127        };
9128        let mut sources: Vec<JoinSrc<'_>> = Vec::new();
9129        let mut masks: Vec<Option<Vec<bool>>> = Vec::new();
9130        let mut widths: Vec<usize> = Vec::new();
9131        let mut offsets: Vec<usize> = alloc::vec![0];
9132        widths.push(primary_cols.len());
9133        offsets.push(primary_cols.len());
9134        let mut working: Vec<usize> = match primary_indices {
9135            Some(idxs) => {
9136                let t = primary_table.expect("stored primary");
9137                sources.push(JoinSrc::Stored(t.rows()));
9138                masks.push(keep_mask(&primary_cols, &primary_alias));
9139                idxs
9140            }
9141            None => {
9142                let n = primary_rows.len();
9143                sources.push(JoinSrc::Owned(primary_rows));
9144                masks.push(None);
9145                (0..n).collect()
9146            }
9147        };
9148        let mut stride = 1usize;
9149        // Track the per-row width consumed by the outer left side so
9150        // each lateral evaluation sees the correct schema slice.
9151        let mut consumed_cols = primary_cols.len();
9152        for peer in &joined {
9153            if working.len() / stride > MAX_JOIN_INTERMEDIATE_ROWS {
9154                return Err(EngineError::Unsupported(alloc::format!(
9155                    "join intermediate result exceeds {MAX_JOIN_INTERMEDIATE_ROWS} rows ({} so far) - add join predicates",
9156                    working.len() / stride
9157                )));
9158            }
9159            let right_arity = peer.cols.len();
9160            let peer_mask = keep_mask(&peer.cols, &peer.alias);
9161            // Stage outputs are row-number tuples (8 B per slot) —
9162            // the v7.30.3 byte budget has nothing to meter here; it
9163            // charges at the real materialisation points instead.
9164            let mut next: Vec<usize> = Vec::new();
9165            // v7.28 (round-22) — hash equi-join. The old path CLONED
9166            // the full combined row for EVERY (left, right) pair and
9167            // then evaluated ON — O(L×R) row materialisations (a
9168            // 24k × 6k LEFT JOIN = 1.5e8 multi-KB clones; the inbox
9169            // query never returned). Extract `left_col = right_col`
9170            // conjuncts from ON, build a hash on the (smaller,
9171            // already-materialised) right side, and only materialise
9172            // matching pairs. Residual ON conjuncts evaluate on the
9173            // candidates. NULL keys never match (SQL equality).
9174            let mut eq_pairs: Vec<(usize, usize)> = Vec::new(); // (left combined pos, right peer pos)
9175            let mut residual: Vec<&Expr> = Vec::new();
9176            if let (Some(on_expr), None) = (peer.on, peer.lateral) {
9177                for sub in reorder::split_and_conjunctions(on_expr) {
9178                    let mut matched = None;
9179                    if let Expr::Binary {
9180                        lhs,
9181                        op: spg_sql::ast::BinOp::Eq,
9182                        rhs,
9183                    } = sub
9184                        && let (Expr::Column(a), Expr::Column(b)) = (lhs.as_ref(), rhs.as_ref())
9185                    {
9186                        let left_slice = &combined_schema[..consumed_cols];
9187                        if let (Some(l), Some(r)) = (
9188                            Self::composite_col_pos(left_slice, a),
9189                            Self::peer_col_pos(&peer.alias, &peer.cols, b),
9190                        ) {
9191                            matched = Some((l, r));
9192                        } else if let (Some(l), Some(r)) = (
9193                            Self::composite_col_pos(left_slice, b),
9194                            Self::peer_col_pos(&peer.alias, &peer.cols, a),
9195                        ) {
9196                            matched = Some((l, r));
9197                        }
9198                    }
9199                    match matched {
9200                        Some(pair) => eq_pairs.push(pair),
9201                        None => residual.push(sub),
9202                    }
9203                }
9204            }
9205            // v7.28 (round-22) - index-nested-loop: when the working
9206            // set is small and the peer's join column has a BTree,
9207            // seek per left row instead of materialising the whole
9208            // peer table (a correlated subquery body otherwise
9209            // clones the full table once per outer group).
9210            const INL_MAX_LEFT: usize = 1024;
9211            if let Some(tname) = &peer.join_table
9212                && peer.eager_rows.is_none()
9213                && !eq_pairs.is_empty()
9214                && working.len() / stride <= INL_MAX_LEFT
9215                && let Some(table) = self.active_catalog().get(tname)
9216                && let Some(idx) = peer
9217                    .cols
9218                    .iter()
9219                    .position(|c| c.name == peer.cols[eq_pairs[0].1].name)
9220                    .and_then(|pos| table.index_on(pos))
9221            {
9222                let stored = table.rows();
9223                let (lpos0, _) = eq_pairs[0];
9224                for tuple in working.chunks(stride) {
9225                    cancel.check()?;
9226                    let mut left_matched = false;
9227                    if let Some(kv) = tuple_value(&sources, &offsets, tuple, lpos0)
9228                        && !matches!(kv, Value::Null)
9229                        && let Some(key) = spg_storage::IndexKey::from_value(kv)
9230                    {
9231                        for loc in idx.lookup_eq(&key) {
9232                            let ri = match *loc {
9233                                spg_storage::RowLocator::Hot(i) => i,
9234                                spg_storage::RowLocator::Cold { .. } => continue,
9235                            };
9236                            let right = match stored.get(ri) {
9237                                Some(r) => r,
9238                                None => continue,
9239                            };
9240                            // Remaining eq pairs + residual ON check on
9241                            // the candidate only.
9242                            let mut ok = true;
9243                            for (lp, rp) in eq_pairs.iter().skip(1) {
9244                                let lv = tuple_value(&sources, &offsets, tuple, *lp);
9245                                let rv = right.values.get(*rp);
9246                                let eq = match (lv, rv) {
9247                                    (Some(a), Some(b)) => {
9248                                        !matches!(a, Value::Null)
9249                                            && !matches!(b, Value::Null)
9250                                            && value_cmp(a, b) == core::cmp::Ordering::Equal
9251                                    }
9252                                    _ => false,
9253                                };
9254                                if !eq {
9255                                    ok = false;
9256                                    break;
9257                                }
9258                            }
9259                            if !ok {
9260                                continue;
9261                            }
9262                            let keep = if residual.is_empty() {
9263                                true
9264                            } else {
9265                                let mut combined_vals = materialise_tuple_vals(
9266                                    &sources,
9267                                    &widths,
9268                                    &masks,
9269                                    tuple,
9270                                    consumed_cols + right_arity,
9271                                );
9272                                extend_masked(&mut combined_vals, right, peer_mask.as_deref());
9273                                let combined = Row::new(combined_vals);
9274                                let mut k = true;
9275                                for r in &residual {
9276                                    let cond = self.eval_expr_with_correlated(
9277                                        r, &combined, &ctx, cancel, None,
9278                                    )?;
9279                                    if !matches!(cond, Value::Bool(true)) {
9280                                        k = false;
9281                                        break;
9282                                    }
9283                                }
9284                                k
9285                            };
9286                            if keep {
9287                                next.extend_from_slice(tuple);
9288                                next.push(ri);
9289                                left_matched = true;
9290                            }
9291                        }
9292                    }
9293                    if !left_matched && matches!(peer.kind, JoinKind::Left) {
9294                        next.extend_from_slice(tuple);
9295                        next.push(usize::MAX);
9296                    }
9297                }
9298                working = next;
9299                stride += 1;
9300                sources.push(JoinSrc::Stored(stored));
9301                masks.push(peer_mask);
9302                consumed_cols += right_arity;
9303                offsets.push(consumed_cols);
9304                widths.push(right_arity);
9305                continue;
9306            }
9307            if !eq_pairs.is_empty() && peer.lateral.is_none() {
9308                // Build side: eager rows if the peer was materialised
9309                // (pushed predicate / non-table ref), otherwise the
9310                // stored table read in place (v7.31 — no full-table
9311                // clone + null-out just to hash it).
9312                let rights_src: JoinSrc<'_> = match peer.eager_rows.as_deref() {
9313                    Some(er) => JoinSrc::Eager(er),
9314                    None => match peer
9315                        .join_table
9316                        .as_deref()
9317                        .and_then(|n| self.active_catalog().get(n))
9318                    {
9319                        Some(t) => JoinSrc::Stored(t.rows()),
9320                        None => JoinSrc::Owned(Vec::new()),
9321                    },
9322                };
9323                let n_rights = rights_src.len();
9324                // v7.29 - hashbrown over BTreeMap: the ordered map
9325                // paid O(log n) string comparisons per insert/probe
9326                // (24k-row build sides spent ~100 ms in it).
9327                let mut table: hashbrown::HashMap<String, Vec<usize>> =
9328                    hashbrown::HashMap::with_capacity(n_rights);
9329                let mut keybuf: Vec<&Value> = Vec::with_capacity(eq_pairs.len());
9330                // v7.31 (perf 3e) — scratch key buffer: build inserts
9331                // allocate only on vacant, probes never allocate (the
9332                // old code built a fresh String for all 24k probes).
9333                let mut keystr = String::new();
9334                'build: for ri in 0..n_rights {
9335                    let Some(right) = rights_src.get(ri) else {
9336                        continue;
9337                    };
9338                    keybuf.clear();
9339                    for (_, rpos) in &eq_pairs {
9340                        match right.values.get(*rpos) {
9341                            Some(v) if !matches!(v, Value::Null) => keybuf.push(v),
9342                            _ => continue 'build,
9343                        }
9344                    }
9345                    aggregate::encode_key_refs_into(&keybuf, &mut keystr);
9346                    table.entry_ref(keystr.as_str()).or_default().push(ri);
9347                }
9348                let mut probebuf: Vec<&Value> = Vec::with_capacity(eq_pairs.len());
9349                for tuple in working.chunks(stride) {
9350                    cancel.check()?;
9351                    let mut left_matched = false;
9352                    probebuf.clear();
9353                    let mut left_has_null = false;
9354                    for (lpos, _) in &eq_pairs {
9355                        match tuple_value(&sources, &offsets, tuple, *lpos) {
9356                            Some(v) if !matches!(v, Value::Null) => probebuf.push(v),
9357                            _ => {
9358                                left_has_null = true;
9359                                break;
9360                            }
9361                        }
9362                    }
9363                    if !left_has_null {
9364                        aggregate::encode_key_refs_into(&probebuf, &mut keystr);
9365                    }
9366                    if !left_has_null && let Some(cands) = table.get(keystr.as_str()) {
9367                        for &ri in cands {
9368                            let keep = if residual.is_empty() {
9369                                true
9370                            } else {
9371                                let right = rights_src.get(ri).expect("hash candidate row");
9372                                let mut combined_vals = materialise_tuple_vals(
9373                                    &sources,
9374                                    &widths,
9375                                    &masks,
9376                                    tuple,
9377                                    consumed_cols + right_arity,
9378                                );
9379                                extend_masked(&mut combined_vals, right, peer_mask.as_deref());
9380                                let combined = Row::new(combined_vals);
9381                                let mut ok = true;
9382                                for r in &residual {
9383                                    let cond = self.eval_expr_with_correlated(
9384                                        r, &combined, &ctx, cancel, None,
9385                                    )?;
9386                                    if !matches!(cond, Value::Bool(true)) {
9387                                        ok = false;
9388                                        break;
9389                                    }
9390                                }
9391                                ok
9392                            };
9393                            if keep {
9394                                next.extend_from_slice(tuple);
9395                                next.push(ri);
9396                                left_matched = true;
9397                            }
9398                        }
9399                    }
9400                    if !left_matched && matches!(peer.kind, JoinKind::Left) {
9401                        next.extend_from_slice(tuple);
9402                        next.push(usize::MAX);
9403                    }
9404                }
9405                working = next;
9406                stride += 1;
9407                sources.push(rights_src);
9408                masks.push(peer_mask);
9409                consumed_cols += right_arity;
9410                offsets.push(consumed_cols);
9411                widths.push(right_arity);
9412                debug_assert!(consumed_cols <= combined_schema.len());
9413                continue;
9414            }
9415            // Fallback: nested loop (lateral peers, non-equi ON).
9416            // A deferred plain-table peer materialises here (pruned),
9417            // since every (left, right) pair gets evaluated anyway.
9418            let lazy_rows: Option<Vec<Row>> = if peer.eager_rows.is_none() && peer.lateral.is_none()
9419            {
9420                let tname = peer.join_table.as_deref().unwrap_or("");
9421                let mut rows: Vec<Row> = self
9422                    .active_catalog()
9423                    .get(tname)
9424                    .map(|t| t.rows().iter().cloned().collect())
9425                    .unwrap_or_default();
9426                if let Some(needed) = needed {
9427                    Self::null_out_unreferenced(&mut rows, &peer.cols, &peer.alias, needed);
9428                }
9429                budget.charge(approx_rows_bytes(&rows))?;
9430                Some(rows)
9431            } else {
9432                None
9433            };
9434            // Lateral results are per-outer-row, so matched right rows
9435            // persist in a stage arena the tuples can index.
9436            let mut arena: Vec<Row> = Vec::new();
9437            let rights_eager: Option<&[Row]> = peer.eager_rows.as_deref().or(lazy_rows.as_deref());
9438            for tuple in working.chunks(stride) {
9439                cancel.check()?;
9440                let mut left_matched = false;
9441                let left_vals =
9442                    materialise_tuple_vals(&sources, &widths, &masks, tuple, consumed_cols);
9443                let per_left_rrows: alloc::borrow::Cow<'_, [Row]> = match peer.lateral {
9444                    Some(inner) => {
9445                        // Substitute outer columns and run the inner
9446                        // SELECT against the current left row's slice
9447                        // of the combined schema.
9448                        let outer_schema = &combined_schema[..consumed_cols];
9449                        let left_row = Row::new(left_vals.clone());
9450                        let rows =
9451                            self.materialise_lateral_for_outer(inner, outer_schema, &left_row)?;
9452                        alloc::borrow::Cow::Owned(rows)
9453                    }
9454                    None => {
9455                        alloc::borrow::Cow::Borrowed(rights_eager.expect("non-lateral peer eager"))
9456                    }
9457                };
9458                for (ri, right) in per_left_rrows.as_ref().iter().enumerate() {
9459                    let mut combined_vals = left_vals.clone();
9460                    combined_vals.extend(right.values.iter().cloned());
9461                    let combined = Row::new(combined_vals);
9462                    let keep = if let Some(on_expr) = peer.on {
9463                        // v7.24.1 — correlated-aware (subqueries in
9464                        // ON referencing earlier join columns).
9465                        let cond =
9466                            self.eval_expr_with_correlated(on_expr, &combined, &ctx, cancel, None)?;
9467                        matches!(cond, Value::Bool(true))
9468                    } else {
9469                        true
9470                    };
9471                    if keep {
9472                        next.extend_from_slice(tuple);
9473                        if peer.lateral.is_some() {
9474                            let mut cv = combined.values;
9475                            let rv = cv.split_off(left_vals.len());
9476                            arena.push(Row::new(rv));
9477                            next.push(arena.len() - 1);
9478                        } else {
9479                            next.push(ri);
9480                        }
9481                        left_matched = true;
9482                    }
9483                }
9484                if !left_matched && matches!(peer.kind, JoinKind::Left) {
9485                    next.extend_from_slice(tuple);
9486                    next.push(usize::MAX);
9487                }
9488            }
9489            working = next;
9490            stride += 1;
9491            if working.len() / stride > MAX_JOIN_INTERMEDIATE_ROWS {
9492                return Err(EngineError::Unsupported(alloc::format!(
9493                    "join intermediate result exceeds {MAX_JOIN_INTERMEDIATE_ROWS} rows ({} so far) - add join predicates",
9494                    working.len() / stride
9495                )));
9496            }
9497            if peer.lateral.is_some() {
9498                sources.push(JoinSrc::Owned(arena));
9499            } else if let Some(lz) = lazy_rows {
9500                sources.push(JoinSrc::Owned(lz));
9501            } else {
9502                sources.push(JoinSrc::Eager(
9503                    peer.eager_rows.as_deref().expect("non-lateral peer eager"),
9504                ));
9505            }
9506            // Fallback sources are pre-pruned (eager / lazy null-out)
9507            // or lateral projections; nothing left for a mask to drop.
9508            masks.push(None);
9509            consumed_cols += right_arity;
9510            offsets.push(consumed_cols);
9511            widths.push(right_arity);
9512            debug_assert!(consumed_cols <= combined_schema.len());
9513        }
9514        let mut filtered: Vec<Row> = Vec::new();
9515        // v7.24 (round-16 B) — the joined WHERE filter ran the plain
9516        // row evaluator, so a correlated EXISTS/IN/scalar subquery
9517        // under a JOIN hit "subquery reached row eval". Route through
9518        // the correlated-aware evaluator (memoized, same as the
9519        // single-table path).
9520        let mut memo = memoize::MemoizeCache::default();
9521        for tuple in working.chunks(stride) {
9522            let row = Row::new(materialise_tuple_vals(
9523                &sources,
9524                &widths,
9525                &masks,
9526                tuple,
9527                consumed_cols,
9528            ));
9529            if let Some(where_expr) = where_ {
9530                let cond = self.eval_expr_with_correlated(
9531                    where_expr,
9532                    &row,
9533                    &ctx,
9534                    cancel,
9535                    Some(&mut memo),
9536                )?;
9537                if !matches!(cond, Value::Bool(true)) {
9538                    continue;
9539                }
9540            }
9541            // v7.30.3 byte budget — in the deferred-join pipeline the
9542            // tuples are 8 B row numbers; THIS is the point where fat
9543            // values actually get cloned and retained, so this is
9544            // where the meter charges (rows failing WHERE are dropped
9545            // immediately and never accumulate).
9546            budget.charge(approx_row_bytes(&row))?;
9547            filtered.push(row);
9548        }
9549        Ok((combined_schema, filtered))
9550    }
9551
9552    /// v7.17.0 Phase 3.P0-41 — probe a LATERAL subquery's projection
9553    /// schema by running it once with a NULL-padded outer context.
9554    /// The probe never materialises real outer rows; it just executes
9555    /// the inner SELECT with `outer_alias.col` references substituted
9556    /// to NULL so the projection's type inference is exercised.
9557    fn lateral_probe_schema(
9558        &self,
9559        inner: &SelectStatement,
9560    ) -> Result<Vec<ColumnSchema>, EngineError> {
9561        // Substitute every qualified column reference whose qualifier
9562        // does NOT match an in-subquery FROM alias with NULL. The
9563        // safest probe is to walk the inner SELECT and replace any
9564        // `<qual>.<col>` whose qual isn't bound inside the subquery
9565        // with a Null literal. For the v7.17 probe we just run the
9566        // unmodified subquery and surface the columns; if it fails
9567        // (e.g. references an outer column the probe can't resolve),
9568        // we synthesise a best-effort schema from the SELECT items
9569        // by inferring a single Text-typed column per projection.
9570        match self.execute_readonly_select_for_lateral_probe(inner) {
9571            Ok(QueryResult::Rows { columns, .. }) => Ok(columns),
9572            // Best-effort fallback: each SELECT item becomes a TEXT
9573            // column. Real schemas only differ when the inner SELECT
9574            // references outer columns at projection-time; those
9575            // queries surface via the substitution path during
9576            // per-row execution and still return the right values.
9577            _ => {
9578                let mut out: Vec<ColumnSchema> = Vec::new();
9579                for (i, item) in inner.items.iter().enumerate() {
9580                    let name = match item {
9581                        SelectItem::Expr { alias: Some(a), .. } => a.clone(),
9582                        SelectItem::Expr { expr, .. } => synth_lateral_col_name(expr, i),
9583                        SelectItem::Wildcard => alloc::format!("col{i}"),
9584                    };
9585                    out.push(ColumnSchema::new(name, DataType::Text, true));
9586                }
9587                Ok(out)
9588            }
9589        }
9590    }
9591
9592    /// v7.17.0 Phase 3.P0-41 — try the inner LATERAL subquery against
9593    /// the engine in read-only mode for schema-probe purposes. Failure
9594    /// is expected when the subquery references an outer column the
9595    /// probe can't resolve; the caller falls back to a best-effort
9596    /// schema based on the SELECT items.
9597    fn execute_readonly_select_for_lateral_probe(
9598        &self,
9599        inner: &SelectStatement,
9600    ) -> Result<QueryResult, EngineError> {
9601        self.exec_bare_select_cancel(inner, CancelToken::none())
9602    }
9603
9604    /// v7.17.0 Phase 3.P0-41 — materialise a LATERAL subquery's rows
9605    /// for one outer-row context. Walks the inner SELECT, replaces
9606    /// every `<outer_alias>.<col>` reference whose alias appears in
9607    /// the outer schema with the literal value from the outer row,
9608    /// then runs the rewritten SELECT against the engine.
9609    fn materialise_lateral_for_outer(
9610        &self,
9611        inner: &SelectStatement,
9612        outer_schema: &[ColumnSchema],
9613        outer_row: &Row,
9614    ) -> Result<Vec<Row>, EngineError> {
9615        let mut substituted = inner.clone();
9616        substitute_outer_columns_multi(&mut substituted, outer_row, outer_schema);
9617        let result = self.exec_bare_select_cancel(&substituted, CancelToken::none())?;
9618        match result {
9619            QueryResult::Rows { rows, .. } => Ok(rows),
9620            _ => Err(EngineError::Unsupported(
9621                "LATERAL subquery must be a SELECT (cannot be a write statement)".into(),
9622            )),
9623        }
9624    }
9625
9626    /// v7.31 (perf — PG lesson #1): shared aggregate finisher. Apply
9627    /// OFFSET/LIMIT first, then evaluate the deferred subquery-bearing
9628    /// select items for the surviving rows only — PG's Result-above-
9629    /// Limit shape, where SubPlan loops equal the OUTPUT row count
9630    /// (50) instead of the group count (24k).
9631    fn finish_agg_result(
9632        &self,
9633        mut agg: aggregate::AggResult,
9634        stmt: &SelectStatement,
9635        cancel: CancelToken<'_>,
9636    ) -> Result<QueryResult, EngineError> {
9637        apply_offset_and_limit(&mut agg.rows, stmt.offset_literal(), stmt.limit_literal());
9638        if !agg.deferred.is_empty() {
9639            apply_offset_and_limit(
9640                &mut agg.synth_rows,
9641                stmt.offset_literal(),
9642                stmt.limit_literal(),
9643            );
9644            let ctx = EvalContext::new(&agg.synth_schema, None);
9645            // Memoized path on purpose. Bypassing the batch (memo
9646            // None) was measured at 715 ms — 12.8 ms PER direct eval:
9647            // the join-shaped inner subquery takes no index seek
9648            // (seeded lookups demand all-hot locators, and the e2
9649            // JOIN m2 inner shape re-runs the full join pipeline per
9650            // row). One all-keys batch ≈ 15 ms total stays the best
9651            // available until inner subplans can index-probe like
9652            // PG's SubPlan (knife: keyed single-probe execution).
9653            let mut memo = memoize::MemoizeCache::default();
9654            for (ri, srow) in agg.synth_rows.iter().enumerate() {
9655                cancel.check()?;
9656                for (col, expr) in &agg.deferred {
9657                    let v =
9658                        self.eval_expr_with_correlated(expr, srow, &ctx, cancel, Some(&mut memo))?;
9659                    if let Some(cell) = agg.rows[ri].values.get_mut(*col) {
9660                        *cell = v;
9661                    }
9662                }
9663            }
9664        }
9665        Ok(QueryResult::Rows {
9666            columns: agg.columns,
9667            rows: agg.rows,
9668        })
9669    }
9670
9671    /// v7.30.3 (mailrs round-26) — bounded execution for the backfill
9672    /// shape that walked prod into reclaim livelock:
9673    ///
9674    ///   SELECT … FROM big b JOIN small s ON b.k = s.k
9675    ///   WHERE … ORDER BY … LIMIT n
9676    ///
9677    /// The general join path materialises the FULL join+filter result
9678    /// (≈2× the table's fat columns on a fresh backfill scan) before
9679    /// LIMIT truncates to n rows. Here the primary streams row-by-row
9680    /// against a hash of the materialised peer, and accepted rows feed
9681    /// a keep = LIMIT+OFFSET bounded top-N heap — peak memory scales
9682    /// with the answer, not the table. Returns Ok(None) when the shape
9683    /// doesn't qualify; the caller falls through to the general path,
9684    /// which the byte budget guards.
9685    fn try_streamed_inner_join_topn(
9686        &self,
9687        stmt: &SelectStatement,
9688        from: &FromClause,
9689        cancel: CancelToken<'_>,
9690    ) -> Result<Option<QueryResult>, EngineError> {
9691        // Shape gate — any bail lands on the general path.
9692        let Some(limit) = stmt.limit_literal() else {
9693            return Ok(None);
9694        };
9695        if stmt.offset.is_some() && stmt.offset_literal().is_none() {
9696            return Ok(None);
9697        }
9698        if stmt.distinct
9699            || stmt.group_by.is_some()
9700            || stmt.having.is_some()
9701            || aggregate::uses_aggregate(stmt)
9702        {
9703            return Ok(None);
9704        }
9705        if from.joins.len() != 1 {
9706            return Ok(None);
9707        }
9708        let j = &from.joins[0];
9709        if !matches!(j.kind, JoinKind::Inner) {
9710            return Ok(None);
9711        }
9712        let plain = |t: &TableRef| {
9713            t.unnest_expr.is_none() && t.lateral_subquery.is_none() && t.as_of_segment.is_none()
9714        };
9715        if !plain(&from.primary) || !plain(&j.table) {
9716            return Ok(None);
9717        }
9718        let Some(on_expr) = j.on.as_ref() else {
9719            return Ok(None);
9720        };
9721        // Plain catalog tables only — views / virtual tables keep the
9722        // general path's materialise_table_ref fallback.
9723        let Some(primary_table) = self.active_catalog().get(&from.primary.name) else {
9724            return Ok(None);
9725        };
9726        if self.active_catalog().get(&j.table.name).is_none() {
9727            return Ok(None);
9728        }
9729        let primary_alias = from
9730            .primary
9731            .alias
9732            .as_deref()
9733            .unwrap_or(from.primary.name.as_str())
9734            .to_string();
9735        let peer_alias = j
9736            .table
9737            .alias
9738            .as_deref()
9739            .unwrap_or(j.table.name.as_str())
9740            .to_string();
9741        let mut needed = alloc::collections::BTreeSet::new();
9742        let prunable = collect_qualified_refs(stmt, &mut needed).is_some();
9743        // Peer side: materialise + prune exactly like the general
9744        // path; the budget still guards a degenerately fat peer.
9745        let mut budget = ByteBudget::new(self.max_query_bytes);
9746        let (mut peer_rows, peer_cols) = self.materialise_table_ref_filtered(&j.table, &[])?;
9747        if prunable {
9748            Self::null_out_unreferenced(&mut peer_rows, &peer_cols, &peer_alias, &needed);
9749        }
9750        budget.charge(approx_rows_bytes(&peer_rows))?;
9751        let primary_cols = primary_table.schema().columns.clone();
9752        let mut combined_schema: Vec<ColumnSchema> = Vec::new();
9753        for col in &primary_cols {
9754            combined_schema.push(ColumnSchema::new(
9755                alloc::format!("{primary_alias}.{}", col.name),
9756                col.ty,
9757                col.nullable,
9758            ));
9759        }
9760        for col in &peer_cols {
9761            combined_schema.push(ColumnSchema::new(
9762                alloc::format!("{peer_alias}.{}", col.name),
9763                col.ty,
9764                col.nullable,
9765            ));
9766        }
9767        let ctx = EvalContext::new(&combined_schema, None);
9768        // Hash-joinable left = right equality pairs from ON; anything
9769        // else stays as a residual conjunct on the candidate row.
9770        let left_arity = primary_cols.len();
9771        let mut eq_pairs: Vec<(usize, usize)> = Vec::new();
9772        let mut residual: Vec<&Expr> = Vec::new();
9773        for sub in reorder::split_and_conjunctions(on_expr) {
9774            let mut matched = None;
9775            if let Expr::Binary {
9776                lhs,
9777                op: spg_sql::ast::BinOp::Eq,
9778                rhs,
9779            } = sub
9780                && let (Expr::Column(a), Expr::Column(b)) = (lhs.as_ref(), rhs.as_ref())
9781            {
9782                let left_slice = &combined_schema[..left_arity];
9783                if let (Some(l), Some(r)) = (
9784                    Self::composite_col_pos(left_slice, a),
9785                    Self::peer_col_pos(&peer_alias, &peer_cols, b),
9786                ) {
9787                    matched = Some((l, r));
9788                } else if let (Some(l), Some(r)) = (
9789                    Self::composite_col_pos(left_slice, b),
9790                    Self::peer_col_pos(&peer_alias, &peer_cols, a),
9791                ) {
9792                    matched = Some((l, r));
9793                }
9794            }
9795            match matched {
9796                Some(pair) => eq_pairs.push(pair),
9797                None => residual.push(sub),
9798            }
9799        }
9800        if eq_pairs.is_empty() {
9801            return Ok(None); // nested-loop shapes stay on the general path
9802        }
9803        // Hash the peer on the equality key (NULL keys never match).
9804        let mut htable: hashbrown::HashMap<String, Vec<usize>> =
9805            hashbrown::HashMap::with_capacity(peer_rows.len());
9806        let mut keybuf: Vec<Value> = Vec::with_capacity(eq_pairs.len());
9807        'build: for (ri, right) in peer_rows.iter().enumerate() {
9808            keybuf.clear();
9809            for (_, rpos) in &eq_pairs {
9810                let v = right.values.get(*rpos).cloned().unwrap_or(Value::Null);
9811                if matches!(v, Value::Null) {
9812                    continue 'build;
9813                }
9814                keybuf.push(v);
9815            }
9816            htable
9817                .entry(aggregate::encode_key(&keybuf))
9818                .or_default()
9819                .push(ri);
9820        }
9821        // Streamed twin of null_out_unreferenced: clone only the
9822        // referenced primary columns into each candidate row.
9823        let keep_mask: Vec<bool> = primary_cols
9824            .iter()
9825            .map(|c| !prunable || needed.contains(&(primary_alias.clone(), c.name.clone())))
9826            .collect();
9827        let keep = (limit as usize).saturating_add(stmt.offset_literal().map_or(0, |o| o as usize));
9828        let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
9829        let mut where_memo = memoize::MemoizeCache::default();
9830        let mut heap: alloc::collections::BinaryHeap<TopNEntry> =
9831            alloc::collections::BinaryHeap::new();
9832        let mut plain_sink: Vec<Row> = Vec::new();
9833        let mut seq: u64 = 0;
9834        'scan: for left in primary_table.rows().iter() {
9835            cancel.check()?;
9836            if keep == 0 {
9837                break 'scan;
9838            }
9839            keybuf.clear();
9840            let mut left_has_null = false;
9841            for (lpos, _) in &eq_pairs {
9842                let v = left.values.get(*lpos).cloned().unwrap_or(Value::Null);
9843                if matches!(v, Value::Null) {
9844                    left_has_null = true;
9845                    break;
9846                }
9847                keybuf.push(v);
9848            }
9849            if left_has_null {
9850                continue;
9851            }
9852            let Some(cands) = htable.get(&aggregate::encode_key(&keybuf)) else {
9853                continue;
9854            };
9855            for &ri in cands {
9856                let right = &peer_rows[ri];
9857                let mut combined_vals: Vec<Value> =
9858                    Vec::with_capacity(left_arity + peer_cols.len());
9859                for (i, v) in left.values.iter().enumerate() {
9860                    combined_vals.push(if keep_mask.get(i).copied().unwrap_or(true) {
9861                        v.clone()
9862                    } else {
9863                        Value::Null
9864                    });
9865                }
9866                combined_vals.extend(right.values.iter().cloned());
9867                let combined = Row::new(combined_vals);
9868                let mut ok = true;
9869                for r in &residual {
9870                    let cond = self.eval_expr_with_correlated(r, &combined, &ctx, cancel, None)?;
9871                    if !matches!(cond, Value::Bool(true)) {
9872                        ok = false;
9873                        break;
9874                    }
9875                }
9876                if !ok {
9877                    continue;
9878                }
9879                if let Some(w) = stmt.where_.as_ref() {
9880                    let cond = self.eval_expr_with_correlated(
9881                        w,
9882                        &combined,
9883                        &ctx,
9884                        cancel,
9885                        Some(&mut where_memo),
9886                    )?;
9887                    if !matches!(cond, Value::Bool(true)) {
9888                        continue;
9889                    }
9890                }
9891                if stmt.order_by.is_empty() {
9892                    budget.charge(approx_row_bytes(&combined))?;
9893                    plain_sink.push(combined);
9894                    if plain_sink.len() >= keep {
9895                        break 'scan;
9896                    }
9897                } else {
9898                    let raw = build_order_keys(&stmt.order_by, &combined, &ctx)?;
9899                    let keys: Vec<f64> = raw
9900                        .into_iter()
9901                        .enumerate()
9902                        .map(|(i, k)| {
9903                            if descs.get(i).copied().unwrap_or(false) {
9904                                -k
9905                            } else {
9906                                k
9907                            }
9908                        })
9909                        .collect();
9910                    let entry = TopNEntry {
9911                        keys,
9912                        seq,
9913                        row: combined,
9914                    };
9915                    seq += 1;
9916                    if heap.len() < keep {
9917                        budget.charge(approx_row_bytes(&entry.row))?;
9918                        heap.push(entry);
9919                    } else if let Some(top) = heap.peek()
9920                        && entry < *top
9921                    {
9922                        if let Some(evicted) = heap.pop() {
9923                            budget.release(approx_row_bytes(&evicted.row));
9924                        }
9925                        budget.charge(approx_row_bytes(&entry.row))?;
9926                        heap.push(entry);
9927                    }
9928                }
9929            }
9930        }
9931        let mut output: Vec<Row> = if stmt.order_by.is_empty() {
9932            plain_sink
9933        } else {
9934            heap.into_sorted_vec().into_iter().map(|e| e.row).collect()
9935        };
9936        apply_offset_and_limit(&mut output, stmt.offset_literal(), stmt.limit_literal());
9937        let projection = build_projection(&stmt.items, &combined_schema, "")?;
9938        let mut proj_memo = memoize::MemoizeCache::default();
9939        let mut rows: Vec<Row> = Vec::with_capacity(output.len());
9940        for row in &output {
9941            let mut values = Vec::with_capacity(projection.len());
9942            for p in &projection {
9943                values.push(self.eval_expr_with_correlated(
9944                    &p.expr,
9945                    row,
9946                    &ctx,
9947                    cancel,
9948                    Some(&mut proj_memo),
9949                )?);
9950            }
9951            rows.push(Row::new(values));
9952        }
9953        let columns: Vec<ColumnSchema> = projection
9954            .into_iter()
9955            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
9956            .collect();
9957        Ok(Some(QueryResult::Rows { columns, rows }))
9958    }
9959
9960    fn exec_joined_select(
9961        &self,
9962        stmt: &SelectStatement,
9963        from: &FromClause,
9964        cancel: CancelToken<'_>,
9965    ) -> Result<QueryResult, EngineError> {
9966        // v7.30.3 (mailrs round-26) — the bounded single-join path
9967        // first; peak memory scales with LIMIT instead of the table.
9968        if let Some(out) = self.try_streamed_inner_join_topn(stmt, from, cancel)? {
9969            return Ok(out);
9970        }
9971        // v7.17.0 Phase 3.P0-43 + P0-41 — delegate the join +
9972        // WHERE materialisation to the shared helper so the LATERAL
9973        // / UNNEST / regular-catalog paths route through one place.
9974        // (`build_joined_filtered_rows` carries LATERAL support as
9975        // of Phase 3.P0-41.) Downstream we still handle aggregate /
9976        // projection / ORDER BY / DISTINCT / LIMIT inline because
9977        // those depend on the SelectStatement's items list.
9978        let mut budget = ByteBudget::new(self.max_query_bytes);
9979        let (combined_schema, filtered) = {
9980            let mut needed = alloc::collections::BTreeSet::new();
9981            let prunable = collect_qualified_refs(stmt, &mut needed).is_some();
9982            self.build_joined_filtered_rows(
9983                from,
9984                stmt.where_.as_ref(),
9985                cancel,
9986                if prunable { Some(&needed) } else { None },
9987                &mut budget,
9988            )?
9989        };
9990        let ctx = EvalContext::new(&combined_schema, None);
9991        // Aggregate path: handle GROUP BY / aggregate calls over the
9992        // joined+filtered rows.
9993        if aggregate::uses_aggregate(stmt) {
9994            let refs: Vec<&Row> = filtered.iter().collect();
9995            // v7.29 — a per-query memo so correlated scalar
9996            // subqueries batch-evaluate once (group map) instead of
9997            // executing per group.
9998            let agg_memo = core::cell::RefCell::new(memoize::MemoizeCache::default());
9999            let agg_correlated = |e: &Expr, r: &Row, c: &EvalContext<'_>| {
10000                self.eval_expr_with_correlated(e, r, c, cancel, Some(&mut agg_memo.borrow_mut()))
10001                    .map_err(|err| match err {
10002                        EngineError::Eval(ev) => ev,
10003                        other => eval::EvalError::TypeMismatch {
10004                            detail: alloc::format!("{other}"),
10005                        },
10006                    })
10007            };
10008            let agg = aggregate::run(stmt, &refs, &combined_schema, None, Some(&agg_correlated))?;
10009            return self.finish_agg_result(agg, stmt, cancel);
10010        }
10011
10012        let projection = build_projection(&stmt.items, &combined_schema, "")?;
10013        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::new();
10014        let mut proj_memo = memoize::MemoizeCache::default();
10015        for row in &filtered {
10016            let mut values = Vec::with_capacity(projection.len());
10017            for p in &projection {
10018                // v7.24 (round-16 B) — select-list subqueries under a
10019                // JOIN go through the correlated-aware evaluator too.
10020                values.push(self.eval_expr_with_correlated(
10021                    &p.expr,
10022                    row,
10023                    &ctx,
10024                    cancel,
10025                    Some(&mut proj_memo),
10026                )?);
10027            }
10028            let order_keys = if stmt.order_by.is_empty() {
10029                Vec::new()
10030            } else {
10031                build_order_keys(&stmt.order_by, row, &ctx)?
10032            };
10033            let out_row = Row::new(values);
10034            budget.charge(approx_row_bytes(&out_row))?;
10035            tagged.push((order_keys, out_row));
10036        }
10037        if !stmt.order_by.is_empty() {
10038            let keep = if stmt.distinct {
10039                None
10040            } else {
10041                stmt.limit_literal()
10042                    .map(|l| l as usize + stmt.offset_literal().map_or(0, |o| o as usize))
10043            };
10044            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
10045            partial_sort_tagged(&mut tagged, keep, &descs);
10046        }
10047        let mut output_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
10048        if stmt.distinct {
10049            output_rows = dedup_rows(output_rows);
10050        }
10051        apply_offset_and_limit(
10052            &mut output_rows,
10053            stmt.offset_literal(),
10054            stmt.limit_literal(),
10055        );
10056        let columns: Vec<ColumnSchema> = projection
10057            .into_iter()
10058            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
10059            .collect();
10060        Ok(QueryResult::Rows {
10061            columns,
10062            rows: output_rows,
10063        })
10064    }
10065}
10066
10067/// One row-producing projection: an expression to evaluate, the resulting
10068/// column's user-visible name, its inferred type, and nullability.
10069#[derive(Debug, Clone)]
10070struct ProjectedItem {
10071    expr: Expr,
10072    output_name: String,
10073    ty: DataType,
10074    nullable: bool,
10075}
10076
10077/// v7.30.3 (mailrs round-26) — approximate heap bytes held by one
10078/// `Value`. Fat payloads (text / json / bytea / vectors / arrays)
10079/// dominate; fixed-size variants count 0 here because the per-cell
10080/// enum overhead is charged separately in `approx_row_bytes`. An
10081/// under-estimate is acceptable — the budget is a host-pressure
10082/// guard, not an exact meter.
10083fn approx_value_bytes(v: &Value) -> usize {
10084    match v {
10085        Value::Text(s) | Value::Json(s) => s.len(),
10086        Value::Bytes(b) => b.len(),
10087        Value::Vector(v) => v.len() * 4,
10088        Value::TextArray(a) => a
10089            .iter()
10090            .map(|o| o.as_ref().map_or(0, String::len) + 8)
10091            .sum(),
10092        Value::IntArray(a) => a.len() * 8,
10093        _ => 0,
10094    }
10095}
10096
10097/// Approximate heap bytes held by one materialised `Row`: per-cell
10098/// enum slots plus fat payloads.
10099fn approx_row_bytes(row: &Row) -> usize {
10100    row.values.len() * core::mem::size_of::<Value>()
10101        + row.values.iter().map(approx_value_bytes).sum::<usize>()
10102}
10103
10104/// v7.30.3 (mailrs round-26) — per-query byte budget for join/filter
10105/// materialisation. Net accounting: stages charge what they clone and
10106/// release what they free (`working` is released when the next stage
10107/// replaces it), so the meter tracks live bytes, not cumulative
10108/// churn. `limit = usize::MAX` when the budget is disabled keeps the
10109/// hot path branch-free apart from one saturating add + compare.
10110struct ByteBudget {
10111    limit: usize,
10112    used: usize,
10113}
10114
10115impl ByteBudget {
10116    const fn new(limit: Option<usize>) -> Self {
10117        Self {
10118            limit: match limit {
10119                Some(n) => n,
10120                None => usize::MAX,
10121            },
10122            used: 0,
10123        }
10124    }
10125
10126    fn charge(&mut self, n: usize) -> Result<(), EngineError> {
10127        self.used = self.used.saturating_add(n);
10128        if self.used > self.limit {
10129            return Err(EngineError::QueryBytesExceeded(self.limit));
10130        }
10131        Ok(())
10132    }
10133
10134    fn release(&mut self, n: usize) {
10135        self.used = self.used.saturating_sub(n);
10136    }
10137}
10138
10139/// Sum `approx_row_bytes` over a freshly materialised row set.
10140fn approx_rows_bytes(rows: &[Row]) -> usize {
10141    rows.iter().map(approx_row_bytes).sum()
10142}
10143
10144/// v7.30.3 (mailrs round-26) — bounded top-N sink entry for the
10145/// streamed single-join path. `keys` carry per-key DESC pre-encoded
10146/// by negation, so ordering is plain ascending lexicographic (the
10147/// negation commutes with `cmp_multi_key`'s per-key reverse,
10148/// including the ±INF NULL placements `build_order_keys` emits).
10149/// `seq` is production order: ties keep the earliest-produced rows,
10150/// matching what the general path's stable in-budget sort yields.
10151/// The `BinaryHeap` is a max-heap, so `peek()` is the worst kept row.
10152struct TopNEntry {
10153    keys: Vec<f64>,
10154    seq: u64,
10155    row: Row,
10156}
10157
10158impl TopNEntry {
10159    fn cmp_keys(a: &[f64], b: &[f64]) -> core::cmp::Ordering {
10160        for (ka, kb) in a.iter().zip(b.iter()) {
10161            let ord = ka.partial_cmp(kb).unwrap_or(core::cmp::Ordering::Equal);
10162            if ord != core::cmp::Ordering::Equal {
10163                return ord;
10164            }
10165        }
10166        core::cmp::Ordering::Equal
10167    }
10168}
10169
10170impl PartialEq for TopNEntry {
10171    fn eq(&self, other: &Self) -> bool {
10172        self.cmp(other) == core::cmp::Ordering::Equal
10173    }
10174}
10175impl Eq for TopNEntry {}
10176impl PartialOrd for TopNEntry {
10177    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
10178        Some(self.cmp(other))
10179    }
10180}
10181impl Ord for TopNEntry {
10182    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
10183        Self::cmp_keys(&self.keys, &other.keys).then(self.seq.cmp(&other.seq))
10184    }
10185}
10186
10187/// v7.31 (memory campaign — ceiling-first / never-die, design v1) —
10188/// per-table slice of the engine's resident-memory accounting.
10189/// `hot_encoded_bytes` is the storage layer's maintained meter (what
10190/// the rows encode to); `approx_resident_bytes` is what they COST in
10191/// RAM (per-cell enum slots + heap payloads via `approx_row_bytes`)
10192/// — the gap between the two is the representation multiplier the
10193/// round-26 report measured at ~11× end-to-end.
10194#[derive(Debug, Clone)]
10195pub struct TableMemoryStats {
10196    pub name: String,
10197    pub hot_rows: u64,
10198    /// Cached cold-row count (refreshed by ANALYZE — see
10199    /// `Table::cold_row_count`'s staleness contract).
10200    pub cold_rows: u64,
10201    pub hot_encoded_bytes: u64,
10202    pub approx_resident_bytes: u64,
10203    pub index_count: u64,
10204    /// BTree indices are walked entry-by-entry (operator surface,
10205    /// not a hot path); NSW graphs and BRIN are parametric
10206    /// ESTIMATES until spg-storage carries its own byte meters
10207    /// (7.31.x follow-up in the design doc).
10208    pub approx_index_bytes: u64,
10209}
10210
10211/// v7.31 — whole-engine memory snapshot: the polling form of the
10212/// round-26 ask-4 watermark signal. Hosts compare
10213/// `total_approx_resident_bytes` (+ their own WAL/file accounting)
10214/// against their deployment ceiling and shed/shrink before the
10215/// kernel does it for them.
10216#[derive(Debug, Clone)]
10217pub struct MemoryStats {
10218    pub tables: Vec<TableMemoryStats>,
10219    pub total_hot_encoded_bytes: u64,
10220    pub total_approx_resident_bytes: u64,
10221    pub total_approx_index_bytes: u64,
10222    /// The active per-query materialisation budget (bucket A), so a
10223    /// monitoring host sees ceiling and usage through one call.
10224    pub max_query_bytes: Option<usize>,
10225}
10226
10227/// Dedupe a row set, preserving first-seen order. `Row`'s `PartialEq` is
10228/// structural (`Vec<Value>` ⇒ pairwise `Value` equality), which gives SQL
10229/// `NULL = NULL → TRUE` and `NaN = NaN → FALSE`. The first agrees with
10230/// the spec's "two NULLs are not distinct"; the second is a tolerated
10231/// quirk for v1 (no NaN literals are reachable from the SQL surface).
10232fn dedup_rows(rows: Vec<Row>) -> Vec<Row> {
10233    let mut out: Vec<Row> = Vec::with_capacity(rows.len());
10234    for r in rows {
10235        if !out.iter().any(|seen| seen == &r) {
10236            out.push(r);
10237        }
10238    }
10239    out
10240}
10241
10242/// Coerce a `Value` to an `f64` sort key for ORDER BY. Numbers map directly;
10243/// NULL sorts last (treated as `+∞`); booleans are 0.0 / 1.0; text uses lex
10244/// order via the byte values; vectors are not sortable.
10245fn value_to_order_key(v: &Value) -> Result<f64, EngineError> {
10246    match v {
10247        Value::Null => Ok(f64::INFINITY),
10248        Value::SmallInt(n) => Ok(f64::from(*n)),
10249        Value::Int(n) => Ok(f64::from(*n)),
10250        Value::Date(d) => Ok(f64::from(*d)),
10251        #[allow(clippy::cast_precision_loss)]
10252        Value::Timestamp(t) => Ok(*t as f64),
10253        // v7.17.0 Phase 3.P0-32 — PG TIME ordered by underlying
10254        // i64 microseconds (matches wall-clock ordering).
10255        #[allow(clippy::cast_precision_loss)]
10256        Value::Time(us) => Ok(*us as f64),
10257        // v7.17.0 Phase 3.P0-33 — MySQL YEAR ordered by underlying
10258        // u16 (matches calendar ordering; zero-year sentinel
10259        // sorts before 1901).
10260        Value::Year(y) => Ok(f64::from(*y)),
10261        // v7.17.0 Phase 3.P0-34 — PG TIMETZ ordered by the
10262        // UTC-equivalent microseconds (local wall - offset). Two
10263        // values for the same physical instant in different zones
10264        // sort equal — matches PG TIMETZ index behaviour.
10265        #[allow(clippy::cast_precision_loss)]
10266        Value::TimeTz { us, offset_secs } => Ok((us - i64::from(*offset_secs) * 1_000_000) as f64),
10267        // v7.17.0 Phase 3.P0-35 — PG MONEY ordered by i64 cents.
10268        #[allow(clippy::cast_precision_loss)]
10269        Value::Money(c) => Ok(*c as f64),
10270        // v7.17.0 Phase 3.P0-38 — range ordering is not supported
10271        // in v7.17.0 (needs lex-then-inclusivity tiebreak).
10272        Value::Range { .. } => Err(EngineError::Unsupported(
10273            "ORDER BY of a range value is not supported in v7.17.0".into(),
10274        )),
10275        // v7.17.0 Phase 3.P0-39 — hstore is not orderable.
10276        Value::Hstore(_) => Err(EngineError::Unsupported(
10277            "ORDER BY of a hstore value is not supported".into(),
10278        )),
10279        // v7.17.0 Phase 3.P0-40 — 2D arrays not orderable.
10280        Value::IntArray2D(_) | Value::BigIntArray2D(_) | Value::TextArray2D(_) => Err(
10281            EngineError::Unsupported("ORDER BY of a 2D array is not supported in v7.17.0".into()),
10282        ),
10283        #[allow(clippy::cast_precision_loss)]
10284        Value::Numeric { scaled, scale } => {
10285            // Scaled integer / 10^scale, computed via f64 for sort
10286            // ordering only. Precision losses here only matter for
10287            // ORDER BY tie-breaks well past 15 significant digits.
10288            // `f64::powi` lives in std; we hand-roll the loop so the
10289            // no_std engine crate doesn't need it.
10290            let mut divisor = 1.0_f64;
10291            for _ in 0..*scale {
10292                divisor *= 10.0;
10293            }
10294            Ok((*scaled as f64) / divisor)
10295        }
10296        #[allow(clippy::cast_precision_loss)]
10297        Value::BigInt(n) => Ok(*n as f64),
10298        Value::Float(x) => Ok(*x),
10299        Value::Bool(b) => Ok(if *b { 1.0 } else { 0.0 }),
10300        Value::Text(s) => {
10301            // Lex order by codepoints — good enough for ORDER BY name.
10302            // Map first 8 bytes packed into u64 as a coarse key; ties fall to
10303            // partial_cmp Equal. v1.x can swap in a real string comparator.
10304            let mut key: u64 = 0;
10305            for &b in s.as_bytes().iter().take(8) {
10306                key = (key << 8) | u64::from(b);
10307            }
10308            #[allow(clippy::cast_precision_loss)]
10309            Ok(key as f64)
10310        }
10311        Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => {
10312            Err(EngineError::Unsupported(
10313                "ORDER BY of a raw vector column is not meaningful — use `<->`".into(),
10314            ))
10315        }
10316        Value::Interval { .. } => Err(EngineError::Unsupported(
10317            "ORDER BY of an INTERVAL is not supported in v2.11 \
10318             (months vs micros has no single canonical ordering)"
10319                .into(),
10320        )),
10321        Value::Json(_) => Err(EngineError::Unsupported(
10322            "ORDER BY of a JSON value is not supported — cast the document to text first".into(),
10323        )),
10324        // v7.5.0 — Value is #[non_exhaustive]; future variants need
10325        // an explicit ORDER BY mapping. Surface as Unsupported until
10326        // engine support is added.
10327        _ => Err(EngineError::Unsupported(
10328            "ORDER BY of this value type is not supported".into(),
10329        )),
10330    }
10331}
10332
10333/// Try to plan a WHERE clause as an equality lookup against an existing
10334/// index. Returns the candidate row indices on success; `None` means the
10335/// caller should fall back to a full scan.
10336///
10337/// v0.8 recognises a single top-level `col = literal` (in either operand
10338/// order). AND chains and range scans land in later milestones.
10339/// Look for `ORDER BY col <dist-op> literal LIMIT k` against an
10340/// NSW-indexed vector column. Recognised distance ops: `<->` (L2),
10341/// `<#>` (inner product), `<=>` (cosine). When a WHERE clause is
10342/// present, the planner does an "over-fetch and filter" pass — it
10343/// asks the graph for `k * over_fetch` candidates, evaluates WHERE
10344/// against each, and trims back to `k`. Returns the row indices in
10345/// ascending-distance order when the plan applies.
10346fn try_nsw_knn(
10347    stmt: &SelectStatement,
10348    table: &Table,
10349    schema_cols: &[ColumnSchema],
10350    table_alias: &str,
10351) -> Option<Vec<usize>> {
10352    if stmt.distinct {
10353        return None;
10354    }
10355    let limit = usize::try_from(stmt.limit_literal()?).ok()?;
10356    if limit == 0 {
10357        return None;
10358    }
10359    // v6.4.0 — NSW kNN dispatch needs a single ORDER BY key on the
10360    // distance metric. Multi-key ORDER BY falls through to the
10361    // generic sort path.
10362    if stmt.order_by.len() != 1 {
10363        return None;
10364    }
10365    let order = &stmt.order_by[0];
10366    // NSW kNN returns rows ascending by distance — DESC inverts the
10367    // natural order, so the planner can't handle it without a sort
10368    // pass. Fall back to the generic ORDER BY path.
10369    if order.desc {
10370        return None;
10371    }
10372    let Expr::Binary { lhs, op, rhs } = &order.expr else {
10373        return None;
10374    };
10375    let metric = match op {
10376        BinOp::L2Distance => spg_storage::NswMetric::L2,
10377        BinOp::InnerProduct => spg_storage::NswMetric::InnerProduct,
10378        BinOp::CosineDistance => spg_storage::NswMetric::Cosine,
10379        _ => return None,
10380    };
10381    // Accept both `col <op> literal` and `literal <op> col`.
10382    let ((Expr::Column(col), literal) | (literal, Expr::Column(col))) =
10383        (lhs.as_ref(), rhs.as_ref())
10384    else {
10385        return None;
10386    };
10387    if let Some(q) = &col.qualifier
10388        && q != table_alias
10389    {
10390        return None;
10391    }
10392    let col_pos = schema_cols.iter().position(|s| s.name == col.name)?;
10393    let query = literal_to_vector(literal)?;
10394    let idx = spg_storage::nsw_index_on(table, col_pos)?;
10395    if let Some(where_expr) = &stmt.where_ {
10396        // Over-fetch and filter. The factor (10×) is a heuristic that
10397        // covers typical selectivity for the corpus tests; v2.x will
10398        // make it configurable.
10399        let over_fetch = limit.saturating_mul(10).max(NSW_OVER_FETCH_FLOOR);
10400        let candidates = spg_storage::nsw_query(table, &idx.name, &query, over_fetch, metric);
10401        let ctx = EvalContext::new(schema_cols, Some(table_alias));
10402        let mut kept: Vec<usize> = Vec::with_capacity(limit);
10403        for i in candidates {
10404            let row = &table.rows()[i];
10405            let cond = eval::eval_expr(where_expr, row, &ctx).ok()?;
10406            if matches!(cond, Value::Bool(true)) {
10407                kept.push(i);
10408                if kept.len() >= limit {
10409                    break;
10410                }
10411            }
10412        }
10413        Some(kept)
10414    } else {
10415        Some(spg_storage::nsw_query(
10416            table, &idx.name, &query, limit, metric,
10417        ))
10418    }
10419}
10420
10421/// Lower bound on the over-fetch pool when WHERE is present — even
10422/// for tiny `LIMIT 1` queries we keep enough candidates to absorb a
10423/// few WHERE rejections.
10424const NSW_OVER_FETCH_FLOOR: usize = 32;
10425
10426/// Pull a `Vec<f32>` out of a literal-or-cast expression. Returns
10427/// `None` for anything we can't fold at plan time.
10428fn literal_to_vector(e: &Expr) -> Option<Vec<f32>> {
10429    match e {
10430        Expr::Literal(Literal::Vector(v)) => Some(v.clone()),
10431        Expr::Cast { expr, .. } => literal_to_vector(expr),
10432        _ => None,
10433    }
10434}
10435
10436/// Materialise rows in a planner-supplied order (used by the NSW path)
10437/// without re-running ORDER BY. The projection + LIMIT slot mirror the
10438/// equivalent block in `exec_bare_select`.
10439fn materialise_in_order(
10440    stmt: &SelectStatement,
10441    table: &Table,
10442    schema_cols: &[ColumnSchema],
10443    table_alias: &str,
10444    ordered_rows: &[usize],
10445) -> Result<QueryResult, EngineError> {
10446    let ctx = EvalContext::new(schema_cols, Some(table_alias));
10447    let projection = build_projection(&stmt.items, schema_cols, table_alias)?;
10448    let mut output_rows: Vec<Row> = Vec::with_capacity(ordered_rows.len());
10449    for &i in ordered_rows {
10450        let row = &table.rows()[i];
10451        let mut values = Vec::with_capacity(projection.len());
10452        for p in &projection {
10453            values.push(eval::eval_expr(&p.expr, row, &ctx)?);
10454        }
10455        output_rows.push(Row::new(values));
10456    }
10457    apply_offset_and_limit(
10458        &mut output_rows,
10459        stmt.offset_literal(),
10460        stmt.limit_literal(),
10461    );
10462    let columns: Vec<ColumnSchema> = projection
10463        .into_iter()
10464        .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
10465        .collect();
10466    Ok(QueryResult::Rows {
10467        columns,
10468        rows: output_rows,
10469    })
10470}
10471
10472/// v7.20 P4 — hot-row POSITION seek for the mutation paths
10473/// (UPDATE / DELETE index their planned writes by position in
10474/// `table.rows()`, so the Cow-row shape `try_index_seek`
10475/// returns doesn't fit). Same top-level-AND recursion and
10476/// col=literal resolution; the caller re-applies the full WHERE
10477/// to every returned row so the index only narrows candidates.
10478///
10479/// Returns `None` (→ caller full-scans) when no equality leaf
10480/// hits an index OR any matching locator lives in the cold tier
10481/// — the mutation paths operate on hot rows, and the PK
10482/// promote-then-walk upstream already handles the
10483/// cold-single-row case.
10484fn try_index_seek_positions(
10485    where_expr: &Expr,
10486    schema_cols: &[ColumnSchema],
10487    table: &Table,
10488    table_alias: &str,
10489) -> Option<Vec<usize>> {
10490    if let Expr::Binary {
10491        lhs,
10492        op: BinOp::And,
10493        rhs,
10494    } = where_expr
10495    {
10496        if let Some(p) = try_index_seek_positions(lhs, schema_cols, table, table_alias) {
10497            return Some(p);
10498        }
10499        return try_index_seek_positions(rhs, schema_cols, table, table_alias);
10500    }
10501    let Expr::Binary {
10502        lhs,
10503        op: BinOp::Eq,
10504        rhs,
10505    } = where_expr
10506    else {
10507        return None;
10508    };
10509    let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
10510        .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
10511    let idx = table.index_on(col_pos)?;
10512    let key = IndexKey::from_value(&value)?;
10513    let locators = idx.lookup_eq(&key);
10514    let mut out = Vec::with_capacity(locators.len());
10515    for loc in locators {
10516        match *loc {
10517            spg_storage::RowLocator::Hot(i) => out.push(i),
10518            spg_storage::RowLocator::Cold { .. } => return None,
10519        }
10520    }
10521    Some(out)
10522}
10523
10524fn try_index_seek<'a>(
10525    where_expr: &Expr,
10526    schema_cols: &[ColumnSchema],
10527    catalog: &'a Catalog,
10528    table: &'a Table,
10529    table_alias: &str,
10530) -> Option<Vec<Cow<'a, Row>>> {
10531    // v7.11.3 — recurse through top-level `AND` so a PG-style
10532    // composite predicate like `WHERE id = 1 AND created_at > $1`
10533    // still hits the index on `id`. The caller re-applies the
10534    // full WHERE expression to each returned row, so dropping the
10535    // residual conjuncts here is correct — the index just narrows
10536    // the candidate set.
10537    if let Expr::Binary {
10538        lhs,
10539        op: BinOp::And,
10540        rhs,
10541    } = where_expr
10542    {
10543        // Try LHS first (typical convention: leading equality on
10544        // the indexed column comes first in user-written SQL).
10545        if let Some(rows) = try_index_seek(lhs, schema_cols, catalog, table, table_alias) {
10546            return Some(rows);
10547        }
10548        return try_index_seek(rhs, schema_cols, catalog, table, table_alias);
10549    }
10550    let Expr::Binary {
10551        lhs,
10552        op: BinOp::Eq,
10553        rhs,
10554    } = where_expr
10555    else {
10556        return None;
10557    };
10558    let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
10559        .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
10560    let idx = table.index_on(col_pos)?;
10561    let key = IndexKey::from_value(&value)?;
10562    let locators = idx.lookup_eq(&key);
10563    let table_name = table.schema().name.as_str();
10564    // v5.1: each locator dispatches to either the hot tier (zero-
10565    // copy borrow of `table.rows()[i]`) or a cold-tier segment
10566    // (one page read + dense row decode, ~µs scale). Cold rows are
10567    // returned as `Cow::Owned` so the caller's `&Row` iteration
10568    // doesn't see a tier distinction; pre-freezer (no cold
10569    // segments loaded) every locator is `Hot` and every entry is
10570    // `Cow::Borrowed` — identical cost to the pre-v5.1 path.
10571    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(locators.len());
10572    for loc in locators {
10573        match *loc {
10574            spg_storage::RowLocator::Hot(i) => {
10575                if let Some(row) = table.rows().get(i) {
10576                    out.push(Cow::Borrowed(row));
10577                }
10578            }
10579            spg_storage::RowLocator::Cold { segment_id, .. } => {
10580                if let Some(row) = catalog.resolve_cold_locator(table_name, segment_id, &key) {
10581                    out.push(Cow::Owned(row));
10582                }
10583            }
10584        }
10585    }
10586    Some(out)
10587}
10588
10589/// v7.12.3 — GIN-accelerated candidate seek for `WHERE col @@ <ts_query>`.
10590///
10591/// Recurses through top-level `AND` like [`try_index_seek`] so a
10592/// composite predicate `WHERE search_vector @@ q AND id > $1` still
10593/// hits the GIN index on `search_vector` — the caller re-applies the
10594/// full WHERE expression to each returned candidate, so dropping the
10595/// `id > $1` residual here stays semantically correct.
10596///
10597/// Returns `None` when:
10598///   - no leaf is a `col @@ <rhs>` shape on a GIN-indexed column;
10599///   - the RHS can't be const-evaluated to a `Value::TsQuery`
10600///     (typically because it references row columns);
10601///   - the resolved `TsQuery` uses query shapes the MVP doesn't
10602///     accelerate (`Not`, `Phrase` — those fall through to full scan).
10603///
10604/// On `Some(rows)` the caller iterates only `rows` and re-evaluates
10605/// the full `@@` predicate per row, so an over-approximate candidate
10606/// set is safe.
10607fn try_gin_seek<'a>(
10608    where_expr: &Expr,
10609    schema_cols: &[ColumnSchema],
10610    catalog: &'a Catalog,
10611    table: &'a Table,
10612    table_alias: &str,
10613    ctx: &eval::EvalContext<'_>,
10614) -> Option<Vec<Cow<'a, Row>>> {
10615    if let Expr::Binary {
10616        lhs,
10617        op: BinOp::And,
10618        rhs,
10619    } = where_expr
10620    {
10621        if let Some(rows) = try_gin_seek(lhs, schema_cols, catalog, table, table_alias, ctx) {
10622            return Some(rows);
10623        }
10624        return try_gin_seek(rhs, schema_cols, catalog, table, table_alias, ctx);
10625    }
10626    // v7.17.0 Phase 3.P0-44 — MySQL `MATCH(col1, col2) AGAINST (...)`
10627    // desugars into `(to_tsvector(col1) @@ q) OR (to_tsvector(col2) @@ q)`
10628    // in the parser. To accelerate the multi-column case, walk OR the same
10629    // way we walk AND: only emit a candidate set if BOTH sides can seek
10630    // (otherwise the OR result is unbounded and we must fall through to
10631    // the full scan). Candidates are union'd; the caller's WHERE re-eval
10632    // verifies the full predicate per row, so duplicates / supersets stay
10633    // semantically safe.
10634    if let Expr::Binary {
10635        lhs,
10636        op: BinOp::Or,
10637        rhs,
10638    } = where_expr
10639    {
10640        let left = try_gin_seek(lhs, schema_cols, catalog, table, table_alias, ctx)?;
10641        let right = try_gin_seek(rhs, schema_cols, catalog, table, table_alias, ctx)?;
10642        let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(left.len() + right.len());
10643        out.extend(left);
10644        out.extend(right);
10645        return Some(out);
10646    }
10647    let Expr::Binary {
10648        lhs,
10649        op: BinOp::TsMatch,
10650        rhs,
10651    } = where_expr
10652    else {
10653        return None;
10654    };
10655    // Either side can be the column; pgvector idiom (`vec @@ q`)
10656    // hits the first arm, FROM-clause-derived (`plainto_tsquery($1)
10657    // q ... WHERE search_vector @@ q`) the same. CROSS JOIN derived
10658    // tables resolve `q` to a Column too.
10659    let (col_pos, query) = resolve_gin_col_query(lhs, rhs, schema_cols, table_alias, ctx)
10660        .or_else(|| resolve_gin_col_query(rhs, lhs, schema_cols, table_alias, ctx))?;
10661    // v7.17.0 Phase 3.P0-44 — MySQL `FULLTEXT KEY` builds a
10662    // `IndexKind::GinFulltext` posting list (Phase 2.2). It shares
10663    // the same `gin_lookup_word` shape as the tsvector-typed GIN,
10664    // so the MATCH-AGAINST `@@` predicate (desugared by the parser
10665    // into `to_tsvector(col) @@ plainto_tsquery('term')`) routes
10666    // through the same candidate-set seek.
10667    let idx = table
10668        .indices()
10669        .iter()
10670        .find(|i| i.column_position == col_pos && (i.is_gin() || i.is_gin_fulltext()))?;
10671    let candidates = gin_query_candidates(idx, &query)?;
10672    let _ = catalog; // cold-tier row resolution unused in MVP; see below.
10673    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(candidates.len());
10674    for loc in candidates {
10675        match loc {
10676            spg_storage::RowLocator::Hot(i) => {
10677                if let Some(row) = table.rows().get(i) {
10678                    out.push(Cow::Borrowed(row));
10679                }
10680            }
10681            // GIN cold-tier rows in the MVP: skipped, matching the
10682            // full-scan `@@` path which itself only iterates
10683            // `table.rows()` (hot tier). When v7.13+ adds cold-tier
10684            // scan-time materialisation for `@@`, the parallel
10685            // resolution lands here; until then both paths see the
10686            // same hot-only candidate set so correctness is preserved.
10687            spg_storage::RowLocator::Cold { .. } => {}
10688        }
10689    }
10690    Some(out)
10691}
10692
10693/// v7.15.0 — trigram-GIN-accelerated candidate seek for
10694/// `WHERE col LIKE '<pat>'` and `WHERE col ILIKE '<pat>'` when
10695/// the column has a `gin_trgm_ops` GIN index.
10696///
10697/// Walks top-level `AND` so multi-predicate WHEREs (`col LIKE
10698/// 'foo%' AND id > 1`) still hit the trigram index; the caller
10699/// re-evaluates the full WHERE per candidate row, so dropping
10700/// non-LIKE conjuncts here stays semantically correct.
10701///
10702/// Returns `None` when:
10703///   - no leaf is `col LIKE/ILIKE <literal>` on a trigram-GIN-
10704///     indexed column;
10705///   - the pattern's literal runs are too short to constrain
10706///     (pattern decomposes into `< 3`-char runs, e.g. `%ab%`);
10707///   - the pattern doesn't const-evaluate to a TEXT.
10708fn try_trgm_seek<'a>(
10709    where_expr: &Expr,
10710    schema_cols: &[ColumnSchema],
10711    table: &'a Table,
10712    table_alias: &str,
10713) -> Option<Vec<Cow<'a, Row>>> {
10714    if let Expr::Binary {
10715        lhs,
10716        op: BinOp::And,
10717        rhs,
10718    } = where_expr
10719    {
10720        if let Some(rows) = try_trgm_seek(lhs, schema_cols, table, table_alias) {
10721            return Some(rows);
10722        }
10723        return try_trgm_seek(rhs, schema_cols, table, table_alias);
10724    }
10725    // LIKE node is what carries the column reference + pattern.
10726    // ILIKE is the same AST node — PG's LIKE/ILIKE both lower
10727    // through `Expr::Like { expr, pattern, negated }`. The trigram
10728    // index posting-list keys are already lower-cased and
10729    // case-folded, so we only need the pattern's literal runs.
10730    let Expr::Like { expr, pattern, .. } = where_expr else {
10731        return None;
10732    };
10733    // Column side.
10734    let Expr::Column(c) = expr.as_ref() else {
10735        return None;
10736    };
10737    if let Some(q) = &c.qualifier
10738        && q != table_alias
10739    {
10740        return None;
10741    }
10742    let col_pos = schema_cols
10743        .iter()
10744        .position(|s| s.name.eq_ignore_ascii_case(&c.name))?;
10745    // Index must exist on that column AND be a trigram-GIN.
10746    let idx = table
10747        .indices()
10748        .iter()
10749        .find(|i| i.column_position == col_pos && i.is_gin_trgm())?;
10750    // Pattern side must be a literal TEXT — anything else (column
10751    // ref, function call, parameter that hasn't been bound yet)
10752    // falls through to full scan.
10753    let Expr::Literal(spg_sql::ast::Literal::String(pat)) = pattern.as_ref() else {
10754        return None;
10755    };
10756    let trigrams = spg_storage::trgm::trigrams_from_like_pattern(pat)?;
10757    // Intersect every trigram's posting list. Empty intersection
10758    // → empty candidate set (caller short-circuits its row loop).
10759    let mut iter = trigrams.iter();
10760    let first = iter.next()?;
10761    let mut acc: Vec<spg_storage::RowLocator> = {
10762        let mut v = idx.gin_trgm_lookup(first).to_vec();
10763        v.sort_by_key(locator_sort_key);
10764        v.dedup_by_key(|l| locator_sort_key(l));
10765        v
10766    };
10767    for tri in iter {
10768        let mut next: Vec<spg_storage::RowLocator> = idx.gin_trgm_lookup(tri).to_vec();
10769        next.sort_by_key(locator_sort_key);
10770        next.dedup_by_key(|l| locator_sort_key(l));
10771        // Sorted-merge intersection.
10772        let mut merged: Vec<spg_storage::RowLocator> =
10773            Vec::with_capacity(acc.len().min(next.len()));
10774        let (mut i, mut j) = (0usize, 0usize);
10775        while i < acc.len() && j < next.len() {
10776            let lk = locator_sort_key(&acc[i]);
10777            let rk = locator_sort_key(&next[j]);
10778            match lk.cmp(&rk) {
10779                core::cmp::Ordering::Less => i += 1,
10780                core::cmp::Ordering::Greater => j += 1,
10781                core::cmp::Ordering::Equal => {
10782                    merged.push(acc[i]);
10783                    i += 1;
10784                    j += 1;
10785                }
10786            }
10787        }
10788        acc = merged;
10789        if acc.is_empty() {
10790            break;
10791        }
10792    }
10793    let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(acc.len());
10794    for loc in acc {
10795        if let spg_storage::RowLocator::Hot(i) = loc
10796            && let Some(row) = table.rows().get(i)
10797        {
10798            out.push(Cow::Borrowed(row));
10799        }
10800        // Cold-tier rows: skipped in MVP (same as try_gin_seek).
10801    }
10802    Some(out)
10803}
10804
10805/// v7.12.3 — extract `(column_position, TsQueryAst)` when one side of
10806/// the binary is a column reference to a GIN-indexed tsvector column
10807/// and the other side const-evaluates to a `Value::TsQuery`. Returns
10808/// `None` if the column reference is for the wrong table alias, or if
10809/// the RHS expression depends on row data.
10810fn resolve_gin_col_query(
10811    col_side: &Expr,
10812    query_side: &Expr,
10813    schema_cols: &[ColumnSchema],
10814    table_alias: &str,
10815    ctx: &eval::EvalContext<'_>,
10816) -> Option<(usize, spg_storage::TsQueryAst)> {
10817    // v7.17.0 Phase 3.P0-44 — the MATCH AGAINST desugar wraps the
10818    // column in `to_tsvector('simple', col)`, so we peel that wrapper
10819    // before the column lookup. Direct `col @@ tsquery` paths (the
10820    // tsvector-typed v7.12 surface) skip the wrapper entirely.
10821    let column = match col_side {
10822        Expr::Column(c) => c,
10823        Expr::FunctionCall { name, args }
10824            if name.eq_ignore_ascii_case("to_tsvector") && !args.is_empty() =>
10825        {
10826            // PG `to_tsvector` accepts either `to_tsvector(col)` or
10827            // `to_tsvector(config, col)`. In both shapes the column
10828            // we care about is the final argument.
10829            if let Expr::Column(c) = args.last().unwrap() {
10830                c
10831            } else {
10832                return None;
10833            }
10834        }
10835        _ => return None,
10836    };
10837    let c = column;
10838    if let Some(q) = &c.qualifier
10839        && q != table_alias
10840    {
10841        return None;
10842    }
10843    let pos = schema_cols.iter().position(|s| s.name == c.name)?;
10844    // Const-evaluate the query side with an empty row — fails fast
10845    // (with a `ColumnNotFound` / similar) if the expression actually
10846    // depends on row data, which is exactly the bail signal we want.
10847    let empty_row = Row::new(Vec::new());
10848    let v = eval::eval_expr(query_side, &empty_row, ctx).ok()?;
10849    let Value::TsQuery(q) = v else { return None };
10850    Some((pos, q))
10851}
10852
10853/// v7.12.3 — walk a `TsQueryAst` against an [`IndexKind::Gin`] index
10854/// to produce a candidate row-locator set. Returns `None` for query
10855/// shapes the MVP doesn't accelerate (`Not` / `Phrase` — both bail to
10856/// full scan since their semantics need either complementation across
10857/// the whole row set or positional verification beyond what the
10858/// posting list carries).
10859///
10860/// Candidate sets are over-approximate — the caller re-applies the
10861/// full `@@` predicate per row, so reporting "row was in some
10862/// posting list" without verifying positions / weights stays correct.
10863fn gin_query_candidates(
10864    idx: &spg_storage::Index,
10865    query: &spg_storage::TsQueryAst,
10866) -> Option<Vec<spg_storage::RowLocator>> {
10867    use spg_storage::TsQueryAst;
10868    match query {
10869        TsQueryAst::Term { word, .. } => {
10870            let mut v: Vec<spg_storage::RowLocator> = idx.gin_lookup_word(word).to_vec();
10871            v.sort_by_key(locator_sort_key);
10872            v.dedup_by_key(|l| locator_sort_key(l));
10873            Some(v)
10874        }
10875        TsQueryAst::And(l, r) => {
10876            let mut left = gin_query_candidates(idx, l)?;
10877            let mut right = gin_query_candidates(idx, r)?;
10878            left.sort_by_key(locator_sort_key);
10879            right.sort_by_key(locator_sort_key);
10880            // Sorted-merge intersection.
10881            let mut out: Vec<spg_storage::RowLocator> = Vec::new();
10882            let (mut i, mut j) = (0usize, 0usize);
10883            while i < left.len() && j < right.len() {
10884                let lk = locator_sort_key(&left[i]);
10885                let rk = locator_sort_key(&right[j]);
10886                match lk.cmp(&rk) {
10887                    core::cmp::Ordering::Less => i += 1,
10888                    core::cmp::Ordering::Greater => j += 1,
10889                    core::cmp::Ordering::Equal => {
10890                        out.push(left[i]);
10891                        i += 1;
10892                        j += 1;
10893                    }
10894                }
10895            }
10896            Some(out)
10897        }
10898        TsQueryAst::Or(l, r) => {
10899            let mut out = gin_query_candidates(idx, l)?;
10900            out.extend(gin_query_candidates(idx, r)?);
10901            out.sort_by_key(locator_sort_key);
10902            out.dedup_by_key(|l| locator_sort_key(l));
10903            Some(out)
10904        }
10905        // Not / Phrase bail to full scan in the MVP. Not needs
10906        // complementation against the whole row set (not represented
10907        // in the posting-list view); Phrase needs positional
10908        // verification beyond what `word → rows` carries.
10909        TsQueryAst::Not(_) | TsQueryAst::Phrase { .. } => None,
10910    }
10911}
10912
10913/// v7.12.3 — total ordering on `RowLocator` for sort/dedup purposes
10914/// inside the GIN intersection / union loops. Hot rows order by their
10915/// row index; Cold rows order after all Hot rows, then by
10916/// `(segment_id, the cold sub-key)`.
10917fn locator_sort_key(l: &spg_storage::RowLocator) -> (u8, u64, u64) {
10918    match *l {
10919        spg_storage::RowLocator::Hot(i) => (0, i as u64, 0),
10920        spg_storage::RowLocator::Cold {
10921            segment_id,
10922            page_offset,
10923        } => (1, u64::from(segment_id), u64::from(page_offset)),
10924    }
10925}
10926
10927/// v5.2.3: extract `(column_position, IndexKey)` when `where_expr`
10928/// is a simple `col = literal` predicate suitable for a `BTree` index
10929/// seek. Used by `exec_update_cancel` / `exec_delete_cancel` to
10930/// decide whether a write touches a cold-tier row (which requires
10931/// promote-on-write / shadow-on-delete) before falling through to
10932/// the hot-tier row walk.
10933///
10934/// Returns `None` for any predicate shape the planner can't push
10935/// down to an index seek — complex WHERE clauses always take the
10936/// hot-only path (cold rows are immutable to non-indexed writes
10937/// until a future scan-fanout sub-version).
10938fn try_pk_predicate(
10939    where_expr: &Expr,
10940    schema_cols: &[ColumnSchema],
10941    table_alias: &str,
10942) -> Option<(usize, IndexKey)> {
10943    let Expr::Binary {
10944        lhs,
10945        op: BinOp::Eq,
10946        rhs,
10947    } = where_expr
10948    else {
10949        return None;
10950    };
10951    let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
10952        .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
10953    let key = IndexKey::from_value(&value)?;
10954    Some((col_pos, key))
10955}
10956
10957fn resolve_col_literal_pair(
10958    col_side: &Expr,
10959    lit_side: &Expr,
10960    schema_cols: &[ColumnSchema],
10961    table_alias: &str,
10962) -> Option<(usize, Value)> {
10963    let Expr::Column(c) = col_side else {
10964        return None;
10965    };
10966    if let Some(q) = &c.qualifier
10967        && q != table_alias
10968    {
10969        return None;
10970    }
10971    let pos = schema_cols.iter().position(|s| s.name == c.name)?;
10972    let Expr::Literal(l) = lit_side else {
10973        return None;
10974    };
10975    let v = match l {
10976        Literal::Integer(n) => {
10977            if let Ok(small) = i32::try_from(*n) {
10978                Value::Int(small)
10979            } else {
10980                Value::BigInt(*n)
10981            }
10982        }
10983        Literal::Float(x) => Value::Float(*x),
10984        Literal::String(s) => Value::Text(s.clone()),
10985        Literal::Bool(b) => Value::Bool(*b),
10986        Literal::Null => Value::Null,
10987        // Vector, array and Interval literals can't be used as B-tree
10988        // index keys. Tell the planner to fall back to full-scan.
10989        Literal::Vector(_)
10990        | Literal::Interval { .. }
10991        | Literal::TextArray(_)
10992        | Literal::IntArray(_)
10993        | Literal::BigIntArray(_) => return None,
10994    };
10995    Some((pos, v))
10996}
10997
10998/// Find the schema entry that a SELECT-list `Expr::Column` refers to.
10999/// Mirrors `resolve_column` in `eval.rs`, but returns a proper
11000/// `EngineError` so the projection-build path keeps `UnknownQualifier`
11001/// vs `ColumnNotFound` distinct.
11002fn resolve_projection_column<'a>(
11003    c: &ColumnName,
11004    schema_cols: &'a [ColumnSchema],
11005    table_alias: &str,
11006) -> Result<&'a ColumnSchema, EngineError> {
11007    if let Some(q) = &c.qualifier {
11008        let composite = alloc::format!("{q}.{name}", name = c.name);
11009        if let Some(s) = schema_cols.iter().find(|s| s.name == composite) {
11010            return Ok(s);
11011        }
11012        // Single-table case: the qualifier may equal the active alias —
11013        // then look for the bare column name.
11014        if q == table_alias
11015            && let Some(s) = schema_cols.iter().find(|s| s.name == c.name)
11016        {
11017            return Ok(s);
11018        }
11019        // For multi-table schemas the qualifier is unknown only if no
11020        // column bears the "<q>." prefix. For single-table, the alias
11021        // mismatch alone is enough.
11022        let prefix = alloc::format!("{q}.");
11023        let qualifier_known =
11024            q == table_alias || schema_cols.iter().any(|s| s.name.starts_with(&prefix));
11025        if !qualifier_known {
11026            return Err(EngineError::Eval(EvalError::UnknownQualifier {
11027                qualifier: q.clone(),
11028            }));
11029        }
11030        return Err(EngineError::Eval(EvalError::ColumnNotFound {
11031            name: c.name.clone(),
11032        }));
11033    }
11034    if let Some(s) = schema_cols.iter().find(|s| s.name == c.name) {
11035        return Ok(s);
11036    }
11037    let suffix = alloc::format!(".{name}", name = c.name);
11038    let mut matches = schema_cols.iter().filter(|s| s.name.ends_with(&suffix));
11039    let first = matches.next();
11040    let extra = matches.next();
11041    match (first, extra) {
11042        (Some(s), None) => Ok(s),
11043        (Some(_), Some(_)) => Err(EngineError::Eval(EvalError::TypeMismatch {
11044            detail: alloc::format!("ambiguous column reference: {}", c.name),
11045        })),
11046        _ => Err(EngineError::Eval(EvalError::ColumnNotFound {
11047            name: c.name.clone(),
11048        })),
11049    }
11050}
11051
11052fn build_projection(
11053    items: &[SelectItem],
11054    schema_cols: &[ColumnSchema],
11055    table_alias: &str,
11056) -> Result<Vec<ProjectedItem>, EngineError> {
11057    let mut out = Vec::new();
11058    for item in items {
11059        match item {
11060            SelectItem::Wildcard => {
11061                for col in schema_cols {
11062                    out.push(ProjectedItem {
11063                        expr: Expr::Column(ColumnName {
11064                            qualifier: None,
11065                            name: col.name.clone(),
11066                        }),
11067                        output_name: col.name.clone(),
11068                        ty: col.ty,
11069                        nullable: col.nullable,
11070                    });
11071                }
11072            }
11073            SelectItem::Expr { expr, alias } => {
11074                // Plain column ref keeps full schema info (real type +
11075                // nullability). For compound expressions try the
11076                // describe-side function-return-type table first
11077                // (e.g. `SELECT now()` → Timestamptz, `SELECT
11078                // concat(…)` → Text). Falls back to nullable Text
11079                // for shapes the describe path can't resolve.
11080                if let Expr::Column(c) = expr {
11081                    let sch = resolve_projection_column(c, schema_cols, table_alias)?;
11082                    let output_name = alias.clone().unwrap_or_else(|| c.name.clone());
11083                    out.push(ProjectedItem {
11084                        expr: expr.clone(),
11085                        output_name,
11086                        ty: sch.ty,
11087                        nullable: sch.nullable,
11088                    });
11089                } else if let Some(shape) = describe::describe_expr(expr, schema_cols) {
11090                    let output_name = alias.clone().unwrap_or_else(|| expr.to_string());
11091                    out.push(ProjectedItem {
11092                        expr: expr.clone(),
11093                        output_name,
11094                        ty: shape.ty,
11095                        nullable: shape.nullable,
11096                    });
11097                } else {
11098                    let output_name = alias.clone().unwrap_or_else(|| expr.to_string());
11099                    out.push(ProjectedItem {
11100                        expr: expr.clone(),
11101                        output_name,
11102                        ty: DataType::Text,
11103                        nullable: true,
11104                    });
11105                }
11106            }
11107        }
11108    }
11109    Ok(out)
11110}
11111
11112/// Promote an integer to a NUMERIC value at the requested scale.
11113/// Rejects values that, after scaling, would overflow the column's
11114/// precision budget.
11115fn numeric_from_integer(
11116    n: i128,
11117    precision: u8,
11118    scale: u8,
11119    col_name: &str,
11120) -> Result<Value, EngineError> {
11121    let factor = pow10_i128(scale);
11122    let scaled = n.checked_mul(factor).ok_or_else(|| {
11123        EngineError::Unsupported(alloc::format!(
11124            "integer overflow scaling value for column `{col_name}` to scale {scale}"
11125        ))
11126    })?;
11127    check_precision(scaled, precision, col_name)?;
11128    Ok(Value::Numeric { scaled, scale })
11129}
11130
11131/// Float → NUMERIC. Uses round-half-away-from-zero on `x * 10^scale`,
11132/// then verifies the result fits the column's precision.
11133#[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)]
11134fn numeric_from_float(
11135    x: f64,
11136    precision: u8,
11137    scale: u8,
11138    col_name: &str,
11139) -> Result<Value, EngineError> {
11140    if !x.is_finite() {
11141        return Err(EngineError::Unsupported(alloc::format!(
11142            "cannot store non-finite float in NUMERIC column `{col_name}`"
11143        )));
11144    }
11145    let mut factor = 1.0_f64;
11146    for _ in 0..scale {
11147        factor *= 10.0;
11148    }
11149    // Round half-away-from-zero by biasing then casting (`as i128`
11150    // truncates toward zero, so the bias + truncation gives the
11151    // desired rounding). `f64::floor` / `ceil` live in std; we don't
11152    // need them — the cast handles the truncation step.
11153    let shifted = x * factor;
11154    let biased = if shifted >= 0.0 {
11155        shifted + 0.5
11156    } else {
11157        shifted - 0.5
11158    };
11159    // Range-check before casting back to i128 — the cast itself is
11160    // saturating in Rust, which would silently truncate huge inputs.
11161    if !(-1e38..=1e38).contains(&biased) {
11162        return Err(EngineError::Unsupported(alloc::format!(
11163            "value {x} overflows NUMERIC range for column `{col_name}`"
11164        )));
11165    }
11166    let scaled = biased as i128;
11167    check_precision(scaled, precision, col_name)?;
11168    Ok(Value::Numeric { scaled, scale })
11169}
11170
11171/// v7.17.0 Phase 3.P0-67 — parse PG-canonical decimal text into
11172/// `(mantissa: i128, source_scale: u8)`. Accepts optional sign,
11173/// optional integer part, optional fractional part. Rejects
11174/// scientific notation, embedded spaces, locale-specific
11175/// thousand separators. Returns None on bad input — coerce_value
11176/// turns that into a TypeMismatch error.
11177fn parse_numeric_text(s: &str) -> Option<(i128, u8)> {
11178    let s = s.trim();
11179    if s.is_empty() {
11180        return None;
11181    }
11182    let (negative, rest) = match s.as_bytes()[0] {
11183        b'-' => (true, &s[1..]),
11184        b'+' => (false, &s[1..]),
11185        _ => (false, s),
11186    };
11187    if rest.is_empty() {
11188        return None;
11189    }
11190    // Reject scientific notation — bigdecimal collapses it before
11191    // hitting the wire, and we want a clear error if a stray `e`
11192    // sneaks in.
11193    if rest.bytes().any(|b| b == b'e' || b == b'E') {
11194        return None;
11195    }
11196    let (int_part, frac_part) = match rest.find('.') {
11197        Some(idx) => (&rest[..idx], &rest[idx + 1..]),
11198        None => (rest, ""),
11199    };
11200    if int_part.is_empty() && frac_part.is_empty() {
11201        return None;
11202    }
11203    if int_part.bytes().any(|b| !b.is_ascii_digit()) {
11204        return None;
11205    }
11206    if frac_part.bytes().any(|b| !b.is_ascii_digit()) {
11207        return None;
11208    }
11209    let scale_u32 = u32::try_from(frac_part.len()).ok()?;
11210    if scale_u32 > u32::from(u8::MAX) {
11211        return None;
11212    }
11213    let scale = scale_u32 as u8;
11214    let mut digits = alloc::string::String::with_capacity(int_part.len() + frac_part.len() + 1);
11215    if negative {
11216        digits.push('-');
11217    }
11218    digits.push_str(int_part);
11219    digits.push_str(frac_part);
11220    // Strip a leading "+0..0" so parse doesn't choke on "00" etc.
11221    let digits = if digits == "-" {
11222        return None;
11223    } else if digits.is_empty() {
11224        "0"
11225    } else {
11226        digits.as_str()
11227    };
11228    let mantissa: i128 = digits.parse().ok()?;
11229    Some((mantissa, scale))
11230}
11231
11232/// Move a Numeric value from `src_scale` to `dst_scale`. Going up
11233/// multiplies by 10; going down rounds half-away-from-zero.
11234fn numeric_rescale(
11235    scaled: i128,
11236    src_scale: u8,
11237    precision: u8,
11238    dst_scale: u8,
11239    col_name: &str,
11240) -> Result<Value, EngineError> {
11241    let new_scaled = if dst_scale >= src_scale {
11242        let bump = pow10_i128(dst_scale - src_scale);
11243        scaled.checked_mul(bump).ok_or_else(|| {
11244            EngineError::Unsupported(alloc::format!(
11245                "overflow rescaling NUMERIC for column `{col_name}`"
11246            ))
11247        })?
11248    } else {
11249        let drop = pow10_i128(src_scale - dst_scale);
11250        let half = drop / 2;
11251        if scaled >= 0 {
11252            (scaled + half) / drop
11253        } else {
11254            (scaled - half) / drop
11255        }
11256    };
11257    check_precision(new_scaled, precision, col_name)?;
11258    Ok(Value::Numeric {
11259        scaled: new_scaled,
11260        scale: dst_scale,
11261    })
11262}
11263
11264/// Drop the fractional part of a scaled integer, returning the integer
11265/// portion (toward zero). Used for NUMERIC → INT casts.
11266const fn numeric_truncate_to_integer(scaled: i128, scale: u8) -> i128 {
11267    if scale == 0 {
11268        return scaled;
11269    }
11270    let factor = pow10_i128_const(scale);
11271    scaled / factor
11272}
11273
11274/// Verify a scaled NUMERIC value fits the column's declared precision.
11275/// `precision == 0` is the "unconstrained" form (bare `NUMERIC`); we
11276/// skip the check there.
11277fn check_precision(scaled: i128, precision: u8, col_name: &str) -> Result<(), EngineError> {
11278    if precision == 0 {
11279        return Ok(());
11280    }
11281    let limit = pow10_i128(precision);
11282    if scaled.unsigned_abs() >= limit.unsigned_abs() {
11283        return Err(EngineError::Unsupported(alloc::format!(
11284            "NUMERIC value exceeds precision {precision} for column `{col_name}`"
11285        )));
11286    }
11287    Ok(())
11288}
11289
11290const fn pow10_i128_const(p: u8) -> i128 {
11291    let mut acc: i128 = 1;
11292    let mut i = 0;
11293    while i < p {
11294        acc *= 10;
11295        i += 1;
11296    }
11297    acc
11298}
11299
11300fn pow10_i128(p: u8) -> i128 {
11301    pow10_i128_const(p)
11302}
11303
11304/// Walk a parsed `Statement`, swapping any `NOW()` /
11305/// `CURRENT_TIMESTAMP()` / `CURRENT_DATE()` function calls for a
11306/// literal cast that wraps the engine's per-statement clock reading.
11307/// When `now_micros` is `None`, calls stay as-is and surface as
11308/// `unknown function` at eval time — keeps the error path explicit.
11309/// v4.10: pre-walk the WHERE / projection / etc. of a SELECT and
11310/// replace every subquery node with a materialised literal. SPG
11311/// only supports uncorrelated subqueries — the inner SELECT does
11312/// not see outer-row columns, so the result is the same for every
11313/// outer row and can be evaluated once.
11314///
11315/// Returns the rewritten statement; the caller passes this to the
11316/// regular row-loop executor which no longer sees Subquery nodes
11317/// in its tree.
11318impl Engine {
11319    /// v4.12 window executor. Implements `ROW_NUMBER` / `RANK` /
11320    /// `DENSE_RANK` and the partition-aware aggregates `SUM` /
11321    /// `AVG` / `COUNT` / `MIN` / `MAX`. The plan is:
11322    /// 1. Apply the WHERE filter.
11323    /// 2. For each unique `WindowFunction` node in the projection,
11324    ///    partition + sort, compute the per-row value.
11325    /// 3. Append the window values as synthetic columns (`__win_N`)
11326    ///    to the row schema.
11327    /// 4. Rewrite the projection to read those columns.
11328    /// 5. Hand off to the regular project / ORDER BY / LIMIT pipe.
11329    #[allow(
11330        clippy::too_many_lines,
11331        clippy::type_complexity,
11332        clippy::needless_range_loop
11333    )] // window-eval is one cohesive pipe; splitting fragments
11334    fn exec_select_with_window(
11335        &self,
11336        stmt: &SelectStatement,
11337        cancel: CancelToken<'_>,
11338    ) -> Result<QueryResult, EngineError> {
11339        let from = stmt.from.as_ref().ok_or_else(|| {
11340            EngineError::Unsupported("window functions require a FROM clause".into())
11341        })?;
11342        // v7.17.0 Phase 3.P0-43 — JOIN + window functions. Phase
11343        // 3.6 rejected this combination outright ("queued for
11344        // v5.x"); P0-43 materialises the join + WHERE through the
11345        // existing nested-loop helper and runs the window pipeline
11346        // on the joined row set with the combined `alias.col`
11347        // schema. The window expressions resolve through the
11348        // qualifier-aware column resolver same as the aggregate /
11349        // projection paths on JOIN.
11350        let (schema_cols_owned, alias_opt): (Vec<ColumnSchema>, Option<&str>);
11351        let filtered: Vec<Row>;
11352        if from.joins.is_empty() {
11353            let primary = &from.primary;
11354            let table = self.active_catalog().get(&primary.name).ok_or_else(|| {
11355                StorageError::TableNotFound {
11356                    name: primary.name.clone(),
11357                }
11358            })?;
11359            let alias = primary.alias.as_deref().unwrap_or(primary.name.as_str());
11360            schema_cols_owned = table.schema().columns.clone();
11361            alias_opt = Some(alias);
11362            // Materialise WHERE-filtered rows owned so the JOIN
11363            // and single-table paths share a single downstream
11364            // shape. The clone is cheap relative to the window
11365            // computation that follows.
11366            let ctx = self.ev_ctx(&schema_cols_owned, alias_opt);
11367            let mut owned: Vec<Row> = Vec::new();
11368            for (i, row) in table.rows().iter().enumerate() {
11369                if i.is_multiple_of(256) {
11370                    cancel.check()?;
11371                }
11372                if let Some(w) = &stmt.where_ {
11373                    let cond = eval::eval_expr(w, row, &ctx)?;
11374                    if !matches!(cond, Value::Bool(true)) {
11375                        continue;
11376                    }
11377                }
11378                owned.push(row.clone());
11379            }
11380            filtered = owned;
11381        } else {
11382            let (combined_schema, rows) = self.build_joined_filtered_rows(
11383                from,
11384                stmt.where_.as_ref(),
11385                cancel,
11386                None,
11387                &mut ByteBudget::new(self.max_query_bytes),
11388            )?;
11389            schema_cols_owned = combined_schema;
11390            alias_opt = None;
11391            filtered = rows;
11392        }
11393        let schema_cols = &schema_cols_owned;
11394        let ctx = self.ev_ctx(schema_cols, alias_opt);
11395        let alias = alias_opt.unwrap_or("");
11396        let n_rows = filtered.len();
11397        // Borrow refs into the owned row vec once so the downstream
11398        // `compute_window_partition` call (which takes `&[&Row]`) and
11399        // the per-row eval loops share a single backing buffer.
11400        let filtered_refs: Vec<&Row> = filtered.iter().collect();
11401
11402        // 2) Collect unique window function nodes from projection.
11403        let mut window_nodes: Vec<Expr> = Vec::new();
11404        for item in &stmt.items {
11405            if let SelectItem::Expr { expr, .. } = item {
11406                collect_window_nodes(expr, &mut window_nodes);
11407            }
11408        }
11409
11410        // 3) For each window, compute per-row value.
11411        // Index: same order as window_nodes; for row i, win_vals[w][i].
11412        let mut win_vals: Vec<Vec<Value>> = Vec::with_capacity(window_nodes.len());
11413        for wnode in &window_nodes {
11414            let Expr::WindowFunction {
11415                name,
11416                args,
11417                partition_by,
11418                order_by,
11419                frame,
11420                null_treatment,
11421            } = wnode
11422            else {
11423                unreachable!("collect_window_nodes pushes only WindowFunction");
11424            };
11425            // Compute (partition_key, order_key, original_index) for each row.
11426            let mut indexed: Vec<(Vec<Value>, Vec<(Value, bool, Option<bool>)>, usize)> =
11427                Vec::with_capacity(n_rows);
11428            for (i, row) in filtered.iter().enumerate() {
11429                let pkey: Vec<Value> = partition_by
11430                    .iter()
11431                    .map(|p| eval::eval_expr(p, row, &ctx))
11432                    .collect::<Result<_, _>>()?;
11433                let okey: Vec<(Value, bool, Option<bool>)> = order_by
11434                    .iter()
11435                    .map(|(e, desc, nf)| eval::eval_expr(e, row, &ctx).map(|v| (v, *desc, *nf)))
11436                    .collect::<Result<_, _>>()?;
11437                indexed.push((pkey, okey, i));
11438            }
11439            // Sort by (partition_key, order_key). Partition key uses
11440            // a stable encoded form; order key respects ASC/DESC.
11441            indexed.sort_by(|a, b| {
11442                let p_cmp = partition_key_cmp(&a.0, &b.0);
11443                if p_cmp != core::cmp::Ordering::Equal {
11444                    return p_cmp;
11445                }
11446                order_key_cmp(&a.1, &b.1)
11447            });
11448            // Per-partition compute.
11449            let mut out_vals: Vec<Value> = alloc::vec![Value::Null; n_rows];
11450            let mut p_start = 0;
11451            while p_start < indexed.len() {
11452                let mut p_end = p_start + 1;
11453                while p_end < indexed.len()
11454                    && partition_key_cmp(&indexed[p_start].0, &indexed[p_end].0)
11455                        == core::cmp::Ordering::Equal
11456                {
11457                    p_end += 1;
11458                }
11459                // Compute the function within this partition slice.
11460                compute_window_partition(
11461                    name,
11462                    args,
11463                    !order_by.is_empty(),
11464                    frame.as_ref(),
11465                    *null_treatment,
11466                    &indexed[p_start..p_end],
11467                    &filtered_refs,
11468                    &ctx,
11469                    &mut out_vals,
11470                )?;
11471                p_start = p_end;
11472            }
11473            win_vals.push(out_vals);
11474        }
11475
11476        // 4) Build extended schema: original columns + synthetic.
11477        let mut ext_cols = schema_cols.clone();
11478        for i in 0..window_nodes.len() {
11479            ext_cols.push(ColumnSchema::new(
11480                alloc::format!("__win_{i}"),
11481                DataType::Text, // type doesn't matter for projection eval
11482                true,
11483            ));
11484        }
11485        // 5) Build extended rows: each row gets its window values appended.
11486        let mut ext_rows: Vec<Row> = Vec::with_capacity(n_rows);
11487        for i in 0..n_rows {
11488            let mut values = filtered[i].values.clone();
11489            for w in 0..window_nodes.len() {
11490                values.push(win_vals[w][i].clone());
11491            }
11492            ext_rows.push(Row::new(values));
11493        }
11494        // 6) Rewrite the projection: WindowFunction nodes → Column(__win_N).
11495        let mut rewritten_items: Vec<SelectItem> = Vec::with_capacity(stmt.items.len());
11496        for item in &stmt.items {
11497            let new_item = match item {
11498                SelectItem::Wildcard => SelectItem::Wildcard,
11499                SelectItem::Expr { expr, alias } => {
11500                    let mut e = expr.clone();
11501                    rewrite_window_to_columns(&mut e, &window_nodes);
11502                    SelectItem::Expr {
11503                        expr: e,
11504                        alias: alias.clone(),
11505                    }
11506                }
11507            };
11508            rewritten_items.push(new_item);
11509        }
11510
11511        // 7) Project into final rows. JOIN case uses None so the
11512        // qualifier check in `resolve_column` falls through to the
11513        // composite `alias.col` schema lookup; single-table case
11514        // keeps the bare alias so `bare_col` resolution still
11515        // works for the projection's per-row column references.
11516        let ext_ctx = EvalContext::new(&ext_cols, alias_opt);
11517        let projection = build_projection(&rewritten_items, &ext_cols, alias)?;
11518        let mut tagged: Vec<(Vec<f64>, Row)> = Vec::with_capacity(n_rows);
11519        for (i, row) in ext_rows.iter().enumerate() {
11520            if i.is_multiple_of(256) {
11521                cancel.check()?;
11522            }
11523            let mut values = Vec::with_capacity(projection.len());
11524            for p in &projection {
11525                values.push(eval::eval_expr(&p.expr, row, &ext_ctx)?);
11526            }
11527            let order_keys = if stmt.order_by.is_empty() {
11528                Vec::new()
11529            } else {
11530                let mut keys = Vec::with_capacity(stmt.order_by.len());
11531                for o in &stmt.order_by {
11532                    let mut e = o.expr.clone();
11533                    rewrite_window_to_columns(&mut e, &window_nodes);
11534                    let key = eval::eval_expr(&e, row, &ext_ctx)?;
11535                    keys.push(value_to_order_key(&key)?);
11536                }
11537                keys
11538            };
11539            tagged.push((order_keys, Row::new(values)));
11540        }
11541        // ORDER BY + LIMIT/OFFSET on the projected rows.
11542        if !stmt.order_by.is_empty() {
11543            let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
11544            sort_by_keys(&mut tagged, &descs);
11545        }
11546        let mut out_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
11547        apply_offset_and_limit(&mut out_rows, stmt.offset_literal(), stmt.limit_literal());
11548        let final_cols: Vec<ColumnSchema> = projection
11549            .into_iter()
11550            .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
11551            .collect();
11552        Ok(QueryResult::Rows {
11553            columns: final_cols,
11554            rows: out_rows,
11555        })
11556    }
11557
11558    /// v4.11: materialise each CTE into a temp table inside a
11559    /// cloned catalog, then run the body SELECT against a fresh
11560    /// engine instance that owns the enriched catalog. The clone
11561    /// is moderately expensive — only paid by CTE-bearing queries.
11562    /// Subqueries inside CTE bodies / the main body resolve as
11563    /// usual; `clock_fn` is propagated so `NOW()` lines up.
11564    /// v7.16.2 — mailrs round-10 A.3. Materialise the
11565    /// `information_schema.*` / `pg_catalog.*` virtual views
11566    /// the SELECT references, then re-execute the SELECT
11567    /// against an enriched catalog where those views are real
11568    /// tables. Same pattern as `exec_with_ctes`. The temp
11569    /// engine carries `meta_views_materialised = true` so its
11570    /// own meta-dispatch short-circuits — without that we'd
11571    /// infinite-recurse since the temp catalog's view name
11572    /// still starts with `__spg_info_` and re-triggers the
11573    /// check.
11574    fn exec_select_with_meta_views(
11575        &self,
11576        stmt: &SelectStatement,
11577        cancel: CancelToken<'_>,
11578    ) -> Result<QueryResult, EngineError> {
11579        let mut needed: alloc::collections::BTreeSet<String> = alloc::collections::BTreeSet::new();
11580        collect_meta_view_names(stmt, &mut needed);
11581        let mut catalog = self.active_catalog().clone();
11582        for view in &needed {
11583            if catalog.get(view).is_some() {
11584                continue;
11585            }
11586            match view.as_str() {
11587                "__spg_info_columns" => {
11588                    let (schema, rows) = synth_information_schema_columns(self.active_catalog());
11589                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11590                }
11591                "__spg_info_tables" => {
11592                    let (schema, rows) = synth_information_schema_tables(self.active_catalog());
11593                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11594                }
11595                "__spg_pg_class" => {
11596                    let (schema, rows) = synth_pg_class(self.active_catalog());
11597                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11598                }
11599                "__spg_pg_attribute" => {
11600                    let (schema, rows) = synth_pg_attribute(self.active_catalog());
11601                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11602                }
11603                // v7.17.0 Phase 3.P0-50 — pg_catalog.pg_type for
11604                // sqlx / SQLAlchemy / Diesel / pgAdmin lookups.
11605                "__spg_pg_type" => {
11606                    let (schema, rows) = synth_pg_type(self.active_catalog());
11607                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11608                }
11609                // v7.17.0 Phase 3.P0-51 — pg_catalog.pg_proc for
11610                // function-name introspection (ORM / pgAdmin).
11611                "__spg_pg_proc" => {
11612                    let (schema, rows) = synth_pg_proc(self.active_catalog());
11613                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11614                }
11615                // v7.24 (round-16 D) — pg_catalog.pg_trigger. The
11616                // round-16 "why doesn't prod fire the trigger"
11617                // question was unanswerable because triggers had NO
11618                // introspection surface; tgname/tgenabled plus the
11619                // pragmatic relname/timing/events/function columns
11620                // make "is it registered and enabled" a one-liner.
11621                "__spg_pg_trigger" => {
11622                    let (schema, rows) = synth_pg_trigger(self.active_catalog());
11623                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11624                }
11625                // v7.17.0 Phase 3.P0-52 — pg_catalog.pg_namespace
11626                // (schema list for admin tools' tree views).
11627                "__spg_pg_namespace" => {
11628                    let (schema, rows) = synth_pg_namespace(self.active_catalog());
11629                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11630                }
11631                // v7.17.0 Phase 3.P0-53 — pg_catalog.pg_indexes view
11632                // for pgAdmin / DataGrip "indexes per table" listings.
11633                "__spg_pg_indexes" => {
11634                    let (schema, rows) = synth_pg_indexes(self.active_catalog());
11635                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11636                }
11637                // v7.17.0 Phase 3.P0-53 — pg_catalog.pg_index (raw)
11638                // for index introspection by ORM compilers.
11639                "__spg_pg_index" => {
11640                    let (schema, rows) = synth_pg_index_raw(self.active_catalog());
11641                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11642                }
11643                // v7.17.0 Phase 3.P0-54 — pg_catalog.pg_constraint
11644                // for FK / UNIQUE / PK / CHECK introspection.
11645                "__spg_pg_constraint" => {
11646                    let (schema, rows) = synth_pg_constraint(self.active_catalog());
11647                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11648                }
11649                // v7.17.0 Phase 3.P0-55 — pg_catalog.pg_database /
11650                // pg_roles / pg_user. SPG is single-database so
11651                // pg_database surfaces just `postgres`; pg_roles
11652                // / pg_user walk the engine's UserStore.
11653                "__spg_pg_database" => {
11654                    let (schema, rows) = synth_pg_database(self.active_catalog());
11655                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11656                }
11657                "__spg_pg_roles" | "__spg_pg_user" => {
11658                    let (schema, rows) = synth_pg_roles(self);
11659                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11660                }
11661                // v7.17.0 Phase 3.P0-56 — pg_catalog.pg_views. PG's
11662                // pg_views surfaces every CREATE VIEW result; SPG
11663                // ships one row per declared view from the catalog.
11664                "__spg_pg_views" => {
11665                    let (schema, rows) = synth_pg_views(self.active_catalog());
11666                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11667                }
11668                // v7.17.0 Phase 3.P0-56 — pg_catalog.pg_matviews.
11669                // SPG has no materialised view surface yet so the
11670                // table shares pg_views's schema but stays empty.
11671                "__spg_pg_matviews" => {
11672                    let (schema, _) = synth_pg_views(self.active_catalog());
11673                    materialise_meta_view(&mut catalog, view, schema, Vec::new())?;
11674                }
11675                // pg_catalog.pg_extension — native capability list
11676                // (mailrs embed round-12).
11677                "__spg_pg_extension" => {
11678                    let (schema, rows) = synth_pg_extension();
11679                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11680                }
11681                // v7.17.0 Phase 3.P0-57 — pg_catalog.pg_settings.
11682                "__spg_pg_settings" => {
11683                    let (schema, rows) = synth_pg_settings(self);
11684                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11685                }
11686                // v7.17.0 Phase 3.P0-63 — information_schema.KEY_COLUMN_USAGE.
11687                "__spg_info_key_column_usage" => {
11688                    let (schema, rows) = synth_info_key_column_usage(self.active_catalog());
11689                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11690                }
11691                // v7.17.0 Phase 3.P0-64 — information_schema.REFERENTIAL_CONSTRAINTS.
11692                "__spg_info_referential_constraints" => {
11693                    let (schema, rows) = synth_info_referential_constraints(self.active_catalog());
11694                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11695                }
11696                // v7.17.0 Phase 3.P0-64 — information_schema.STATISTICS.
11697                "__spg_info_statistics" => {
11698                    let (schema, rows) = synth_info_statistics(self.active_catalog());
11699                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11700                }
11701                // v7.17.0 Phase 3.P0-64 — information_schema.ROUTINES.
11702                "__spg_info_routines" => {
11703                    let (schema, rows) = synth_info_routines();
11704                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11705                }
11706                // v7.17.0 Phase 3.P0-65 — mysql.user / mysql.db.
11707                "__spg_mysql_user" => {
11708                    let (schema, rows) = synth_mysql_user(self);
11709                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11710                }
11711                "__spg_mysql_db" => {
11712                    let (schema, rows) = synth_mysql_db();
11713                    materialise_meta_view(&mut catalog, view, schema, rows)?;
11714                }
11715                _ => {
11716                    return Err(EngineError::Unsupported(alloc::format!(
11717                        "meta view {view:?} is not yet materialisable; \
11718                         v7.16.2 covers information_schema.columns / .tables \
11719                         and pg_catalog.pg_class / pg_attribute; \
11720                         v7.17.0 P0-50..P0-57 add pg_type / pg_proc / pg_namespace / \
11721                         pg_indexes / pg_index / pg_constraint / pg_database / pg_roles / \
11722                         pg_user / pg_views / pg_matviews / pg_settings"
11723                    )));
11724                }
11725            }
11726        }
11727        let mut temp = Engine::restore(catalog);
11728        if let Some(c) = self.clock {
11729            temp = temp.with_clock(c);
11730        }
11731        if let Some(f) = self.salt_fn {
11732            temp = temp.with_salt_fn(f);
11733        }
11734        temp.meta_views_materialised = true;
11735        temp.exec_select_cancel(stmt, cancel)
11736    }
11737
11738    fn exec_with_ctes(
11739        &self,
11740        stmt: &SelectStatement,
11741        cancel: CancelToken<'_>,
11742    ) -> Result<QueryResult, EngineError> {
11743        cancel.check()?;
11744        let mut catalog = self.active_catalog().clone();
11745        for cte in &stmt.ctes {
11746            if catalog.get(&cte.name).is_some() {
11747                return Err(EngineError::Unsupported(alloc::format!(
11748                    "CTE name {:?} shadows an existing table; rename the CTE",
11749                    cte.name
11750                )));
11751            }
11752            let (columns, rows) = if cte.recursive {
11753                self.materialise_recursive_cte(cte, &catalog, cancel)?
11754            } else {
11755                // v7.25 (round-17) — run the body against the
11756                // ACCUMULATED catalog so a CTE can reference every
11757                // CTE declared before it (`WITH a AS (…), b AS
11758                // (SELECT … FROM a)`). Executing on `self` lost the
11759                // already-materialised CTE tables.
11760                let mut cte_engine = Engine::restore(catalog.clone());
11761                if let Some(c) = self.clock {
11762                    cte_engine = cte_engine.with_clock(c);
11763                }
11764                if let Some(f) = self.salt_fn {
11765                    cte_engine = cte_engine.with_salt_fn(f);
11766                }
11767                let body_result = cte_engine.exec_select_cancel(&cte.body, cancel)?;
11768                let QueryResult::Rows { columns, rows } = body_result else {
11769                    return Err(EngineError::Unsupported(alloc::format!(
11770                        "CTE {:?} body did not return rows",
11771                        cte.name
11772                    )));
11773                };
11774                (columns, rows)
11775            };
11776            // v4.22: the projection builder labels any non-column
11777            // expression as Text — including literal SELECT 1.
11778            // Promote each column's type to whatever the rows
11779            // actually carry so the CTE storage table accepts them.
11780            let inferred = infer_column_types(&columns, &rows);
11781            let mut columns = inferred;
11782            // v4.22: apply optional `WITH name(a, b, c)` overrides.
11783            if !cte.column_overrides.is_empty() {
11784                if cte.column_overrides.len() != columns.len() {
11785                    return Err(EngineError::Unsupported(alloc::format!(
11786                        "CTE {:?} column list has {} names but body returns {} columns",
11787                        cte.name,
11788                        cte.column_overrides.len(),
11789                        columns.len()
11790                    )));
11791                }
11792                for (col, name) in columns.iter_mut().zip(cte.column_overrides.iter()) {
11793                    col.name.clone_from(name);
11794                }
11795            }
11796            let schema = TableSchema::new(cte.name.clone(), columns);
11797            catalog.create_table(schema).map_err(EngineError::Storage)?;
11798            let table = catalog
11799                .get_mut(&cte.name)
11800                .expect("just-created CTE table must exist");
11801            for row in rows {
11802                table.insert(row).map_err(EngineError::Storage)?;
11803            }
11804        }
11805        // Strip CTEs from the body before running on the temp engine
11806        // so we don't recurse forever.
11807        let mut body = stmt.clone();
11808        body.ctes = Vec::new();
11809        let mut temp = Engine::restore(catalog);
11810        if let Some(c) = self.clock {
11811            temp = temp.with_clock(c);
11812        }
11813        if let Some(f) = self.salt_fn {
11814            temp = temp.with_salt_fn(f);
11815        }
11816        temp.exec_select_cancel(&body, cancel)
11817    }
11818
11819    /// v4.22: materialise a WITH RECURSIVE CTE. The body must be a
11820    /// UNION (or UNION ALL) of an anchor that does not reference
11821    /// the CTE name, and one or more recursive terms that do. The
11822    /// anchor runs first; each subsequent iteration runs the
11823    /// recursive term against a temp catalog where the CTE name is
11824    /// bound to the *previous* iteration's output. Iteration stops
11825    /// when the recursive term yields no rows; UNION (DISTINCT)
11826    /// deduplicates against the accumulated result, UNION ALL does
11827    /// not. A hard cap on total rows prevents runaway queries.
11828    #[allow(clippy::too_many_lines)]
11829    fn materialise_recursive_cte(
11830        &self,
11831        cte: &spg_sql::ast::Cte,
11832        base_catalog: &Catalog,
11833        cancel: CancelToken<'_>,
11834    ) -> Result<(Vec<ColumnSchema>, Vec<Row>), EngineError> {
11835        const MAX_TOTAL_ROWS: usize = 1_000_000;
11836        const MAX_ITERATIONS: usize = 100_000;
11837        cancel.check()?;
11838        if cte.body.unions.is_empty() {
11839            return Err(EngineError::Unsupported(alloc::format!(
11840                "WITH RECURSIVE {:?} body must be a UNION of an anchor and a recursive term",
11841                cte.name
11842            )));
11843        }
11844        // Anchor: the body's leading SELECT, with unions stripped.
11845        let mut anchor = cte.body.clone();
11846        let union_terms = core::mem::take(&mut anchor.unions);
11847        anchor.ctes = Vec::new();
11848        // Anchor must not reference the CTE name.
11849        if select_refers_to(&anchor, &cte.name) {
11850            return Err(EngineError::Unsupported(alloc::format!(
11851                "WITH RECURSIVE {:?}: the anchor must not reference the CTE itself",
11852                cte.name
11853            )));
11854        }
11855        let anchor_result = self.exec_select_cancel(&anchor, cancel)?;
11856        let QueryResult::Rows {
11857            columns: anchor_cols,
11858            rows: anchor_rows,
11859        } = anchor_result
11860        else {
11861            return Err(EngineError::Unsupported(alloc::format!(
11862                "WITH RECURSIVE {:?}: anchor did not return rows",
11863                cte.name
11864            )));
11865        };
11866        // The projection builder labels non-column expressions Text;
11867        // refine column types from the anchor's actual values so the
11868        // intermediate iter-catalog tables accept them.
11869        let mut columns = infer_column_types(&anchor_cols, &anchor_rows);
11870        if !cte.column_overrides.is_empty() {
11871            if cte.column_overrides.len() != columns.len() {
11872                return Err(EngineError::Unsupported(alloc::format!(
11873                    "CTE {:?} column list has {} names but anchor returns {} columns",
11874                    cte.name,
11875                    cte.column_overrides.len(),
11876                    columns.len()
11877                )));
11878            }
11879            for (col, name) in columns.iter_mut().zip(cte.column_overrides.iter()) {
11880                col.name.clone_from(name);
11881            }
11882        }
11883        let mut all_rows: Vec<Row> = anchor_rows.clone();
11884        let mut working_set: Vec<Row> = anchor_rows;
11885        let mut seen: alloc::collections::BTreeSet<Vec<u8>> = alloc::collections::BTreeSet::new();
11886        // Track at least one "all UNION ALL" flag — if every union
11887        // kind is ALL we skip the dedup step (faster + matches PG).
11888        let all_union_all = union_terms.iter().all(|(k, _)| matches!(k, UnionKind::All));
11889        if !all_union_all {
11890            for r in &all_rows {
11891                seen.insert(encode_row_key(r));
11892            }
11893        }
11894        for iter in 0..MAX_ITERATIONS {
11895            cancel.check()?;
11896            if working_set.is_empty() {
11897                break;
11898            }
11899            // Build a fresh catalog: base + CTE bound to working_set.
11900            let mut iter_catalog = base_catalog.clone();
11901            let schema = TableSchema::new(cte.name.clone(), columns.clone());
11902            iter_catalog
11903                .create_table(schema)
11904                .map_err(EngineError::Storage)?;
11905            {
11906                let table = iter_catalog.get_mut(&cte.name).expect("just-created");
11907                for row in &working_set {
11908                    table.insert(row.clone()).map_err(EngineError::Storage)?;
11909                }
11910            }
11911            let mut iter_engine = Engine::restore(iter_catalog);
11912            if let Some(c) = self.clock {
11913                iter_engine = iter_engine.with_clock(c);
11914            }
11915            if let Some(f) = self.salt_fn {
11916                iter_engine = iter_engine.with_salt_fn(f);
11917            }
11918            // Run each recursive term in sequence and collect new rows.
11919            let mut next_set: Vec<Row> = Vec::new();
11920            for (_, term) in &union_terms {
11921                let mut term = term.clone();
11922                term.ctes = Vec::new();
11923                let r = iter_engine.exec_select_cancel(&term, cancel)?;
11924                let QueryResult::Rows {
11925                    columns: rc,
11926                    rows: rs,
11927                } = r
11928                else {
11929                    return Err(EngineError::Unsupported(alloc::format!(
11930                        "WITH RECURSIVE {:?}: recursive term did not return rows",
11931                        cte.name
11932                    )));
11933                };
11934                if rc.len() != columns.len() {
11935                    return Err(EngineError::Unsupported(alloc::format!(
11936                        "WITH RECURSIVE {:?}: column count of recursive term ({}) does not match anchor ({})",
11937                        cte.name,
11938                        rc.len(),
11939                        columns.len()
11940                    )));
11941                }
11942                for row in rs {
11943                    if !all_union_all {
11944                        let key = encode_row_key(&row);
11945                        if !seen.insert(key) {
11946                            continue;
11947                        }
11948                    }
11949                    next_set.push(row);
11950                }
11951            }
11952            if next_set.is_empty() {
11953                break;
11954            }
11955            all_rows.extend(next_set.iter().cloned());
11956            working_set = next_set;
11957            if all_rows.len() > MAX_TOTAL_ROWS {
11958                return Err(EngineError::Unsupported(alloc::format!(
11959                    "WITH RECURSIVE {:?}: produced more than {MAX_TOTAL_ROWS} rows — likely runaway recursion",
11960                    cte.name
11961                )));
11962            }
11963            if iter + 1 == MAX_ITERATIONS {
11964                return Err(EngineError::Unsupported(alloc::format!(
11965                    "WITH RECURSIVE {:?}: exceeded {MAX_ITERATIONS} iterations",
11966                    cte.name
11967                )));
11968            }
11969        }
11970        Ok((columns, all_rows))
11971    }
11972
11973    fn resolve_select_subqueries(
11974        &self,
11975        stmt: &mut SelectStatement,
11976        cancel: CancelToken<'_>,
11977    ) -> Result<(), EngineError> {
11978        for item in &mut stmt.items {
11979            if let SelectItem::Expr { expr, .. } = item {
11980                self.resolve_expr_subqueries(expr, cancel)?;
11981            }
11982        }
11983        if let Some(w) = &mut stmt.where_ {
11984            self.resolve_expr_subqueries(w, cancel)?;
11985        }
11986        // v7.24.1 — JOIN ON conditions can carry subqueries too;
11987        // they were never walked, so even an UNCORRELATED subquery
11988        // in ON hit "subquery reached row eval".
11989        if let Some(from) = &mut stmt.from {
11990            for j in &mut from.joins {
11991                if let Some(on) = &mut j.on {
11992                    self.resolve_expr_subqueries(on, cancel)?;
11993                }
11994            }
11995        }
11996        if let Some(gs) = &mut stmt.group_by {
11997            for g in gs {
11998                self.resolve_expr_subqueries(g, cancel)?;
11999            }
12000        }
12001        if let Some(h) = &mut stmt.having {
12002            self.resolve_expr_subqueries(h, cancel)?;
12003        }
12004        for o in &mut stmt.order_by {
12005            self.resolve_expr_subqueries(&mut o.expr, cancel)?;
12006        }
12007        for (_, peer) in &mut stmt.unions {
12008            self.resolve_select_subqueries(peer, cancel)?;
12009        }
12010        Ok(())
12011    }
12012
12013    #[allow(clippy::only_used_in_recursion)] // engine handle reads aren't really pure
12014    fn resolve_expr_subqueries(
12015        &self,
12016        e: &mut Expr,
12017        cancel: CancelToken<'_>,
12018    ) -> Result<(), EngineError> {
12019        // Replace-on-this-node cases first.
12020        if let Some(replacement) = self.subquery_replacement(e, cancel)? {
12021            *e = replacement;
12022            return Ok(());
12023        }
12024        match e {
12025            Expr::AggregateOrdered { call, order_by, .. } => {
12026                self.resolve_expr_subqueries(call, cancel)?;
12027                for o in order_by.iter_mut() {
12028                    self.resolve_expr_subqueries(&mut o.expr, cancel)?;
12029                }
12030            }
12031            Expr::Binary { lhs, rhs, .. } => {
12032                self.resolve_expr_subqueries(lhs, cancel)?;
12033                self.resolve_expr_subqueries(rhs, cancel)?;
12034            }
12035            Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
12036                self.resolve_expr_subqueries(expr, cancel)?;
12037            }
12038            Expr::FunctionCall { args, .. } => {
12039                for a in args {
12040                    self.resolve_expr_subqueries(a, cancel)?;
12041                }
12042            }
12043            Expr::Like { expr, pattern, .. } => {
12044                self.resolve_expr_subqueries(expr, cancel)?;
12045                self.resolve_expr_subqueries(pattern, cancel)?;
12046            }
12047            Expr::Extract { source, .. } => self.resolve_expr_subqueries(source, cancel)?,
12048            // v4.12 window functions — recurse into args + ORDER BY
12049            // + PARTITION BY in case they carry inner subqueries.
12050            Expr::WindowFunction {
12051                args,
12052                partition_by,
12053                order_by,
12054                ..
12055            } => {
12056                for a in args {
12057                    self.resolve_expr_subqueries(a, cancel)?;
12058                }
12059                for p in partition_by {
12060                    self.resolve_expr_subqueries(p, cancel)?;
12061                }
12062                for (e, _, _) in order_by {
12063                    self.resolve_expr_subqueries(e, cancel)?;
12064                }
12065            }
12066            // Subquery nodes are handled in subquery_replacement
12067            // (which returned None — defensive no-op); Literal /
12068            // Column are leaves.
12069            Expr::ScalarSubquery(_)
12070            | Expr::Exists { .. }
12071            | Expr::InSubquery { .. }
12072            | Expr::Literal(_)
12073            | Expr::Placeholder(_)
12074            | Expr::Column(_) => {}
12075            // v7.30.2 — list elements can carry scalar subqueries
12076            // (`x IN (1, (SELECT …))`).
12077            Expr::InList { expr, list, .. } => {
12078                self.resolve_expr_subqueries(expr, cancel)?;
12079                for item in list {
12080                    self.resolve_expr_subqueries(item, cancel)?;
12081                }
12082            }
12083            // v7.10.10 — recurse children.
12084            Expr::Array(items) => {
12085                for elem in items {
12086                    self.resolve_expr_subqueries(elem, cancel)?;
12087                }
12088            }
12089            Expr::ArraySubscript { target, index } => {
12090                self.resolve_expr_subqueries(target, cancel)?;
12091                self.resolve_expr_subqueries(index, cancel)?;
12092            }
12093            Expr::AnyAll { expr, array, .. } => {
12094                self.resolve_expr_subqueries(expr, cancel)?;
12095                self.resolve_expr_subqueries(array, cancel)?;
12096            }
12097            Expr::Case {
12098                operand,
12099                branches,
12100                else_branch,
12101            } => {
12102                if let Some(o) = operand {
12103                    self.resolve_expr_subqueries(o, cancel)?;
12104                }
12105                for (w, t) in branches {
12106                    self.resolve_expr_subqueries(w, cancel)?;
12107                    self.resolve_expr_subqueries(t, cancel)?;
12108                }
12109                if let Some(e) = else_branch {
12110                    self.resolve_expr_subqueries(e, cancel)?;
12111                }
12112            }
12113        }
12114        Ok(())
12115    }
12116
12117    /// v4.23: per-row eval that handles correlated subqueries.
12118    /// Equivalent to `eval::eval_expr` when the expression has no
12119    /// subqueries; otherwise clones the expression, substitutes
12120    /// outer-row columns into each surviving subquery node, runs
12121    /// the inner SELECT, and replaces the node with the literal
12122    /// result. Only the WHERE-filter call sites use this path so
12123    /// the uncorrelated fast path is preserved everywhere else.
12124    fn eval_expr_with_correlated(
12125        &self,
12126        expr: &Expr,
12127        row: &Row,
12128        ctx: &EvalContext<'_>,
12129        cancel: CancelToken<'_>,
12130        mut memo: Option<&mut memoize::MemoizeCache>,
12131    ) -> Result<Value, EngineError> {
12132        // v7.30.2 (mailrs round-25) — the has-subquery walk is
12133        // O(tree) and a materialised `IN (…)` list makes the tree
12134        // huge; cache the answer per expression address so the
12135        // per-row dispatch stops re-walking 24k list elements.
12136        let has_subq = if let Some(m) = memo.as_deref_mut() {
12137            let key = core::ptr::from_ref::<Expr>(expr) as usize;
12138            match m.has_subquery.get(&key) {
12139                Some(b) => *b,
12140                None => {
12141                    let b = expr_has_subquery(expr);
12142                    m.has_subquery.insert(key, b);
12143                    b
12144                }
12145            }
12146        } else {
12147            expr_has_subquery(expr)
12148        };
12149        if !has_subq {
12150            // A large materialised `IN (…)` list inside the WHERE
12151            // makes the plain eval O(rows × list); route through the
12152            // per-query membership set (built once, keyed by node
12153            // address) when one is reachable on the AND spine.
12154            if let Some(m) = memo.as_deref_mut()
12155                && expr_may_use_in_set(expr)
12156            {
12157                return eval_with_in_sets(expr, row, ctx, m);
12158            }
12159            return eval::eval_expr(expr, row, ctx).map_err(EngineError::Eval);
12160        }
12161        // v7.29 (3c) - per-expression plan: the batch maps for this
12162        // host expression's scalar subqueries are looked up by the
12163        // expression's ADDRESS (stable across the row loop), so the
12164        // hot path does zero AST formatting. Building the plan (and
12165        // its Display-keyed group maps) happens once per expression.
12166        if let Some(m) = memo.as_deref_mut() {
12167            let key = core::ptr::from_ref::<Expr>(expr) as usize;
12168            // Plan hit: skip the collection walk entirely (it ran
12169            // once per group otherwise - 70k walks per inbox query).
12170            // The memo is per-query and host expressions outlive it,
12171            // so an address that hit once stays valid.
12172            let plan_hit = m.expr_plans.contains_key(&key);
12173            let mut subs: Vec<&SelectStatement> = Vec::new();
12174            if !plan_hit {
12175                collect_scalar_subqueries(expr, &mut subs);
12176            }
12177            if !plan_hit && !subs.is_empty() {
12178                let mut plan: Vec<Option<alloc::rc::Rc<memoize::GroupMap>>> =
12179                    Vec::with_capacity(subs.len());
12180                for sub in &subs {
12181                    let repr = alloc::format!("{sub}");
12182                    if !m.group_maps.contains_key(&repr) {
12183                        let built = self
12184                            .try_batch_correlated_scalar(sub, cancel)?
12185                            .map(alloc::rc::Rc::new);
12186                        m.group_maps.insert(repr.clone(), built);
12187                    }
12188                    plan.push(m.group_maps.get(&repr).cloned().flatten());
12189                }
12190                let mut template = expr.clone();
12191                hollow_scalar_subqueries(&mut template);
12192                m.expr_plans.insert(key, (subs.len(), plan, template));
12193            }
12194            if let Some((_, plan, template)) = m.expr_plans.get(&key)
12195                && !plan.is_empty()
12196                && plan.iter().all(|p| p.is_some())
12197            {
12198                // Fast path: every scalar subquery resolves via its
12199                // map; clone the HOLLOW template (subquery bodies
12200                // emptied at plan time - cloning full subquery ASTs
12201                // per row was the dominant malloc load), splice map
12202                // values, eval. Exists/IN subqueries (if any) still
12203                // drop to the resolver.
12204                let plan = plan.clone();
12205                let mut e = template.clone();
12206                let mut idx = 0usize;
12207                let ok = splice_planned_subqueries(&mut e, &plan, &mut idx, row, ctx)?;
12208                if ok {
12209                    if expr_has_subquery(&e) {
12210                        self.resolve_correlated_in_expr(&mut e, row, ctx, cancel, memo)?;
12211                    }
12212                    return eval::eval_expr(&e, row, ctx).map_err(EngineError::Eval);
12213                }
12214            }
12215        }
12216        let mut e = expr.clone();
12217        self.resolve_correlated_in_expr(&mut e, row, ctx, cancel, memo)?;
12218        eval::eval_expr(&e, row, ctx).map_err(EngineError::Eval)
12219    }
12220
12221    fn resolve_correlated_in_expr(
12222        &self,
12223        e: &mut Expr,
12224        row: &Row,
12225        ctx: &EvalContext<'_>,
12226        cancel: CancelToken<'_>,
12227        mut memo: Option<&mut memoize::MemoizeCache>,
12228    ) -> Result<(), EngineError> {
12229        match e {
12230            Expr::AggregateOrdered { call, order_by, .. } => {
12231                self.resolve_correlated_in_expr(call, row, ctx, cancel, memo.as_deref_mut())?;
12232                for o in order_by.iter_mut() {
12233                    self.resolve_correlated_in_expr(
12234                        &mut o.expr,
12235                        row,
12236                        ctx,
12237                        cancel,
12238                        memo.as_deref_mut(),
12239                    )?;
12240                }
12241            }
12242            Expr::ScalarSubquery(inner) => {
12243                // v7.29 (round-22 phase 3) — batch path first: a
12244                // correlated scalar of the `inner_col = outer_col
12245                // [ORDER BY … LIMIT 1]` shape evaluates ONCE as a
12246                // grouped scan; per-row resolution becomes a map
12247                // lookup. 23.5k per-group executions (~900 ms) became
12248                // one scan + lookups.
12249                if memo.is_some() {
12250                    let repr = alloc::format!("{}", **inner);
12251                    let entry_known = memo
12252                        .as_ref()
12253                        .is_some_and(|m| m.group_maps.contains_key(&repr));
12254                    if !entry_known {
12255                        let built = self
12256                            .try_batch_correlated_scalar(inner, cancel)?
12257                            .map(alloc::rc::Rc::new);
12258                        if let Some(m) = memo.as_deref_mut() {
12259                            m.group_maps.insert(repr.clone(), built);
12260                        }
12261                    }
12262                    if let Some(m) = memo.as_deref_mut()
12263                        && let Some(Some(gm)) = m.group_maps.get(&repr)
12264                    {
12265                        let (outer_col, map) = gm.as_ref();
12266                        let key_v = eval::eval_expr(&Expr::Column(outer_col.clone()), row, ctx)
12267                            .map_err(EngineError::Eval)?;
12268                        let v = if matches!(key_v, Value::Null) {
12269                            Value::Null
12270                        } else {
12271                            map.get(&aggregate::encode_key(core::slice::from_ref(&key_v)))
12272                                .cloned()
12273                                .unwrap_or(Value::Null)
12274                        };
12275                        *e = value_to_literal_expr(v)?;
12276                        return Ok(());
12277                    }
12278                }
12279                // v6.2.6 — Memoize: build the cache key from the
12280                // pre-substitution subquery repr + the outer row's
12281                // values. Two outer rows with identical correlated
12282                // values hit the same entry.
12283                let cache_key = memo.as_ref().map(|_| memoize::CacheKey {
12284                    subquery_repr: alloc::format!("{}", **inner),
12285                    outer_values: row.values.clone(),
12286                });
12287                if let (Some(cache), Some(k)) = (memo.as_deref_mut(), cache_key.as_ref())
12288                    && let Some(cached) = cache.get(k)
12289                {
12290                    *e = value_to_literal_expr(cached)?;
12291                    return Ok(());
12292                }
12293                let mut s = (**inner).clone();
12294                substitute_outer_columns(&mut s, row, ctx);
12295                let r = self.exec_select_cancel(&s, cancel)?;
12296                let QueryResult::Rows { rows, .. } = r else {
12297                    return Err(EngineError::Unsupported(
12298                        "scalar subquery: inner did not return rows".into(),
12299                    ));
12300                };
12301                let value = match rows.as_slice() {
12302                    [] => Value::Null,
12303                    [r0] => r0.values.first().cloned().unwrap_or(Value::Null),
12304                    _ => {
12305                        return Err(EngineError::Unsupported(alloc::format!(
12306                            "scalar subquery returned {} rows; expected 0 or 1",
12307                            rows.len()
12308                        )));
12309                    }
12310                };
12311                if let (Some(cache), Some(k)) = (memo.as_deref_mut(), cache_key) {
12312                    cache.insert(k, value.clone());
12313                }
12314                *e = value_to_literal_expr(value)?;
12315            }
12316            Expr::Exists { subquery, negated } => {
12317                let mut s = (**subquery).clone();
12318                substitute_outer_columns(&mut s, row, ctx);
12319                let r = self.exec_select_cancel(&s, cancel)?;
12320                let exists = matches!(r, QueryResult::Rows { rows, .. } if !rows.is_empty());
12321                let bit = if *negated { !exists } else { exists };
12322                *e = Expr::Literal(Literal::Bool(bit));
12323            }
12324            Expr::InSubquery {
12325                expr: lhs,
12326                subquery,
12327                negated,
12328            } => {
12329                self.resolve_correlated_in_expr(lhs, row, ctx, cancel, memo.as_deref_mut())?;
12330                let lhs_val = eval::eval_expr(lhs, row, ctx).map_err(EngineError::Eval)?;
12331                let mut s = (**subquery).clone();
12332                substitute_outer_columns(&mut s, row, ctx);
12333                let r = self.exec_select_cancel(&s, cancel)?;
12334                let QueryResult::Rows { columns, rows, .. } = r else {
12335                    return Err(EngineError::Unsupported(
12336                        "IN-subquery: inner did not return rows".into(),
12337                    ));
12338                };
12339                if columns.len() != 1 {
12340                    return Err(EngineError::Unsupported(alloc::format!(
12341                        "IN-subquery must project exactly one column; got {}",
12342                        columns.len()
12343                    )));
12344                }
12345                let mut found = false;
12346                let mut any_null = false;
12347                for r0 in rows {
12348                    let v = r0.values.into_iter().next().unwrap_or(Value::Null);
12349                    if v.is_null() {
12350                        any_null = true;
12351                        continue;
12352                    }
12353                    if value_cmp(&v, &lhs_val) == core::cmp::Ordering::Equal {
12354                        found = true;
12355                        break;
12356                    }
12357                }
12358                let bit = if found {
12359                    !*negated
12360                } else if any_null {
12361                    return Err(EngineError::Unsupported(
12362                        "IN-subquery with NULL in result and no match: NULL semantics not yet implemented".into(),
12363                    ));
12364                } else {
12365                    *negated
12366                };
12367                *e = Expr::Literal(Literal::Bool(bit));
12368            }
12369            Expr::Binary { lhs, rhs, .. } => {
12370                self.resolve_correlated_in_expr(lhs, row, ctx, cancel, memo.as_deref_mut())?;
12371                self.resolve_correlated_in_expr(rhs, row, ctx, cancel, memo.as_deref_mut())?;
12372            }
12373            Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
12374                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
12375            }
12376            Expr::Like { expr, pattern, .. } => {
12377                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
12378                self.resolve_correlated_in_expr(pattern, row, ctx, cancel, memo.as_deref_mut())?;
12379            }
12380            Expr::FunctionCall { args, .. } => {
12381                for a in args {
12382                    self.resolve_correlated_in_expr(a, row, ctx, cancel, memo.as_deref_mut())?;
12383                }
12384            }
12385            Expr::Extract { source, .. } => {
12386                self.resolve_correlated_in_expr(source, row, ctx, cancel, memo.as_deref_mut())?;
12387            }
12388            Expr::WindowFunction { .. }
12389            | Expr::Literal(_)
12390            | Expr::Placeholder(_)
12391            | Expr::Column(_) => {}
12392            // v7.10.10 — recurse children.
12393            Expr::Array(items) => {
12394                for elem in items {
12395                    self.resolve_correlated_in_expr(elem, row, ctx, cancel, memo.as_deref_mut())?;
12396                }
12397            }
12398            Expr::ArraySubscript { target, index } => {
12399                self.resolve_correlated_in_expr(target, row, ctx, cancel, memo.as_deref_mut())?;
12400                self.resolve_correlated_in_expr(index, row, ctx, cancel, memo.as_deref_mut())?;
12401            }
12402            Expr::AnyAll { expr, array, .. } => {
12403                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
12404                self.resolve_correlated_in_expr(array, row, ctx, cancel, memo.as_deref_mut())?;
12405            }
12406            Expr::InList { expr, list, .. } => {
12407                self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
12408                for item in list {
12409                    self.resolve_correlated_in_expr(item, row, ctx, cancel, memo.as_deref_mut())?;
12410                }
12411            }
12412            Expr::Case {
12413                operand,
12414                branches,
12415                else_branch,
12416            } => {
12417                if let Some(o) = operand {
12418                    self.resolve_correlated_in_expr(o, row, ctx, cancel, memo.as_deref_mut())?;
12419                }
12420                for (w, t) in branches {
12421                    self.resolve_correlated_in_expr(w, row, ctx, cancel, memo.as_deref_mut())?;
12422                    self.resolve_correlated_in_expr(t, row, ctx, cancel, memo.as_deref_mut())?;
12423                }
12424                if let Some(e) = else_branch {
12425                    self.resolve_correlated_in_expr(e, row, ctx, cancel, memo.as_deref_mut())?;
12426                }
12427            }
12428        }
12429        Ok(())
12430    }
12431
12432    fn subquery_replacement(
12433        &self,
12434        e: &Expr,
12435        cancel: CancelToken<'_>,
12436    ) -> Result<Option<Expr>, EngineError> {
12437        match e {
12438            Expr::ScalarSubquery(inner) => {
12439                let mut s = (**inner).clone();
12440                // Recurse into the inner SELECT first so nested
12441                // subqueries materialise bottom-up.
12442                self.resolve_select_subqueries(&mut s, cancel)?;
12443                let r = match self.exec_bare_select_cancel(&s, cancel) {
12444                    Ok(r) => r,
12445                    Err(e) if is_correlation_error(&e) => return Ok(None),
12446                    Err(e) => return Err(e),
12447                };
12448                let QueryResult::Rows { rows, .. } = r else {
12449                    return Err(EngineError::Unsupported(
12450                        "scalar subquery: inner statement did not return rows".into(),
12451                    ));
12452                };
12453                let value = match rows.as_slice() {
12454                    [] => Value::Null,
12455                    [row] => row.values.first().cloned().unwrap_or(Value::Null),
12456                    _ => {
12457                        return Err(EngineError::Unsupported(alloc::format!(
12458                            "scalar subquery returned {} rows; expected 0 or 1",
12459                            rows.len()
12460                        )));
12461                    }
12462                };
12463                Ok(Some(value_to_literal_expr(value)?))
12464            }
12465            Expr::Exists { subquery, negated } => {
12466                let mut s = (**subquery).clone();
12467                self.resolve_select_subqueries(&mut s, cancel)?;
12468                let r = match self.exec_bare_select_cancel(&s, cancel) {
12469                    Ok(r) => r,
12470                    Err(e) if is_correlation_error(&e) => return Ok(None),
12471                    Err(e) => return Err(e),
12472                };
12473                let exists = match r {
12474                    QueryResult::Rows { rows, .. } => !rows.is_empty(),
12475                    QueryResult::CommandOk { .. } => false,
12476                };
12477                let bit = if *negated { !exists } else { exists };
12478                Ok(Some(Expr::Literal(Literal::Bool(bit))))
12479            }
12480            Expr::InSubquery {
12481                expr,
12482                subquery,
12483                negated,
12484            } => {
12485                let mut s = (**subquery).clone();
12486                self.resolve_select_subqueries(&mut s, cancel)?;
12487                let r = match self.exec_bare_select_cancel(&s, cancel) {
12488                    Ok(r) => r,
12489                    Err(e) if is_correlation_error(&e) => return Ok(None),
12490                    Err(e) => return Err(e),
12491                };
12492                let QueryResult::Rows { columns, rows, .. } = r else {
12493                    return Err(EngineError::Unsupported(
12494                        "IN-subquery: inner statement did not return rows".into(),
12495                    ));
12496                };
12497                if columns.len() != 1 {
12498                    return Err(EngineError::Unsupported(alloc::format!(
12499                        "IN-subquery must project exactly one column; got {}",
12500                        columns.len()
12501                    )));
12502                }
12503                // v7.30.2 (mailrs round-25) — flat InList, NOT an OR-Eq
12504                // chain: chain depth scaled with the inner result's ROW
12505                // COUNT, so one 24k-match search overflowed the worker
12506                // stack (recursive eval + recursive Box drop) and
12507                // aborted the embedding host process.
12508                let mut list: Vec<Expr> = Vec::with_capacity(rows.len());
12509                for row in rows {
12510                    let v = row.values.into_iter().next().unwrap_or(Value::Null);
12511                    list.push(value_to_literal_expr(v)?);
12512                }
12513                Ok(Some(Expr::InList {
12514                    expr: expr.clone(),
12515                    list,
12516                    negated: *negated,
12517                }))
12518            }
12519            _ => Ok(None),
12520        }
12521    }
12522}
12523
12524// ---- v4.12 window-function helpers ----
12525// The (partition-key, order-key, original-index) tuple shape used
12526// across these helpers is intrinsic to the planner. Factoring it
12527// into a typedef adds indirection without making the code clearer,
12528// so several lints are allowed inline on the affected functions
12529// rather than module-wide.
12530
12531/// v4.22: cheap structural scan for `FROM <name>` (qualified or
12532/// not) inside a SELECT — used to verify the anchor of a WITH
12533/// RECURSIVE CTE doesn't recurse into itself. Conservative: walks
12534/// FROM joins, subqueries, and unions.
12535fn select_refers_to(stmt: &SelectStatement, target: &str) -> bool {
12536    if let Some(from) = &stmt.from
12537        && from_refers_to(from, target)
12538    {
12539        return true;
12540    }
12541    for (_, peer) in &stmt.unions {
12542        if select_refers_to(peer, target) {
12543            return true;
12544        }
12545    }
12546    for item in &stmt.items {
12547        if let SelectItem::Expr { expr, .. } = item
12548            && expr_refers_to(expr, target)
12549        {
12550            return true;
12551        }
12552    }
12553    if let Some(w) = &stmt.where_
12554        && expr_refers_to(w, target)
12555    {
12556        return true;
12557    }
12558    false
12559}
12560
12561fn from_refers_to(from: &FromClause, target: &str) -> bool {
12562    if from.primary.name.eq_ignore_ascii_case(target) {
12563        return true;
12564    }
12565    from.joins
12566        .iter()
12567        .any(|j| j.table.name.eq_ignore_ascii_case(target))
12568}
12569
12570/// v7.28 (round-22) — collect every QUALIFIED column referenced
12571/// anywhere in a SELECT (subquery bodies included). Returns None
12572/// when a wildcard or a bare column name makes static attribution
12573/// unsafe — callers then keep every column.
12574fn collect_qualified_refs(
12575    stmt: &SelectStatement,
12576    out: &mut alloc::collections::BTreeSet<(String, String)>,
12577) -> Option<()> {
12578    for item in &stmt.items {
12579        match item {
12580            SelectItem::Wildcard => return None,
12581            SelectItem::Expr { expr, .. } => collect_qualified_refs_expr(expr, out)?,
12582        }
12583    }
12584    if let Some(w) = &stmt.where_ {
12585        collect_qualified_refs_expr(w, out)?;
12586    }
12587    if let Some(from) = &stmt.from {
12588        for j in &from.joins {
12589            if let Some(on) = &j.on {
12590                collect_qualified_refs_expr(on, out)?;
12591            }
12592            if j.table.lateral_subquery.is_some() {
12593                return None;
12594            }
12595        }
12596    }
12597    if let Some(gs) = &stmt.group_by {
12598        for g in gs {
12599            collect_qualified_refs_expr(g, out)?;
12600        }
12601    }
12602    if let Some(h) = &stmt.having {
12603        collect_qualified_refs_expr(h, out)?;
12604    }
12605    for o in &stmt.order_by {
12606        collect_qualified_refs_expr(&o.expr, out)?;
12607    }
12608    for (_, peer) in &stmt.unions {
12609        collect_qualified_refs(peer, out)?;
12610    }
12611    for cte in &stmt.ctes {
12612        collect_qualified_refs(&cte.body, out)?;
12613    }
12614    Some(())
12615}
12616
12617fn collect_qualified_refs_expr(
12618    e: &Expr,
12619    out: &mut alloc::collections::BTreeSet<(String, String)>,
12620) -> Option<()> {
12621    // Two passes so the column and subquery visitors don't both
12622    // capture `out` mutably.
12623    let mut cols: Vec<spg_sql::ast::ColumnName> = Vec::new();
12624    let mut subs: Vec<&SelectStatement> = Vec::new();
12625    visit_expr_columns_and_subqueries(
12626        e,
12627        &mut |c: &spg_sql::ast::ColumnName| cols.push(c.clone()),
12628        &mut |sub| subs.push(sub),
12629    );
12630    for c in cols {
12631        match c.qualifier {
12632            Some(q) => {
12633                out.insert((q, c.name));
12634            }
12635            None => return None,
12636        }
12637    }
12638    for sub in subs {
12639        collect_qualified_refs(sub, out)?;
12640    }
12641    Some(())
12642}
12643
12644/// Immutable walk over an Expr visiting every Column and every
12645/// nested SelectStatement (v7.28).
12646fn visit_expr_columns_and_subqueries<'a>(
12647    e: &'a Expr,
12648    on_col: &mut impl FnMut(&'a spg_sql::ast::ColumnName),
12649    on_sub: &mut impl FnMut(&'a SelectStatement),
12650) {
12651    match e {
12652        Expr::Column(c) => on_col(c),
12653        Expr::ScalarSubquery(s) => on_sub(s),
12654        Expr::Exists { subquery, .. } => on_sub(subquery),
12655        Expr::InSubquery { expr, subquery, .. } => {
12656            visit_expr_columns_and_subqueries(expr, on_col, on_sub);
12657            on_sub(subquery);
12658        }
12659        Expr::Binary { lhs, rhs, .. } => {
12660            visit_expr_columns_and_subqueries(lhs, on_col, on_sub);
12661            visit_expr_columns_and_subqueries(rhs, on_col, on_sub);
12662        }
12663        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
12664            visit_expr_columns_and_subqueries(expr, on_col, on_sub);
12665        }
12666        Expr::Like { expr, pattern, .. } => {
12667            visit_expr_columns_and_subqueries(expr, on_col, on_sub);
12668            visit_expr_columns_and_subqueries(pattern, on_col, on_sub);
12669        }
12670        Expr::FunctionCall { args, .. } => {
12671            for a in args {
12672                visit_expr_columns_and_subqueries(a, on_col, on_sub);
12673            }
12674        }
12675        Expr::AggregateOrdered { call, order_by, .. } => {
12676            visit_expr_columns_and_subqueries(call, on_col, on_sub);
12677            for o in order_by {
12678                visit_expr_columns_and_subqueries(&o.expr, on_col, on_sub);
12679            }
12680        }
12681        Expr::Case {
12682            operand,
12683            branches,
12684            else_branch,
12685        } => {
12686            if let Some(op) = operand {
12687                visit_expr_columns_and_subqueries(op, on_col, on_sub);
12688            }
12689            for (w, t) in branches {
12690                visit_expr_columns_and_subqueries(w, on_col, on_sub);
12691                visit_expr_columns_and_subqueries(t, on_col, on_sub);
12692            }
12693            if let Some(eb) = else_branch {
12694                visit_expr_columns_and_subqueries(eb, on_col, on_sub);
12695            }
12696        }
12697        Expr::ArraySubscript { target, index } => {
12698            visit_expr_columns_and_subqueries(target, on_col, on_sub);
12699            visit_expr_columns_and_subqueries(index, on_col, on_sub);
12700        }
12701        Expr::Literal(_) | Expr::Placeholder(_) => {}
12702        // Exotic nodes (window etc.) — visit nothing extra; their
12703        // columns are caught when the caller bails on bare names
12704        // elsewhere, and window queries skip pruning entirely at
12705        // the call sites.
12706        _ => {
12707            // Exotic node (window function etc.): report an
12708            // unattributable marker so callers disable pruning.
12709            static BAIL: spg_sql::ast::ColumnName = spg_sql::ast::ColumnName {
12710                qualifier: None,
12711                name: String::new(),
12712            };
12713            on_col(&BAIL);
12714        }
12715    }
12716}
12717
12718/// v7.28 (round-22) — collect every Column qualifier in an expr;
12719/// `all_qualified` flips false on any bare column (those can't be
12720/// attributed to one table safely, so the pushdown skips them).
12721fn collect_column_qualifiers<'e>(e: &'e Expr, out: &mut Vec<&'e str>, all_qualified: &mut bool) {
12722    if let Expr::Column(c) = e {
12723        match &c.qualifier {
12724            Some(q) => out.push(q.as_str()),
12725            None => *all_qualified = false,
12726        }
12727        return;
12728    }
12729    // Reuse the canonical immutable walk via describe's walker shape:
12730    // recurse the common containers.
12731    match e {
12732        Expr::Binary { lhs, rhs, .. } => {
12733            collect_column_qualifiers(lhs, out, all_qualified);
12734            collect_column_qualifiers(rhs, out, all_qualified);
12735        }
12736        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
12737            collect_column_qualifiers(expr, out, all_qualified);
12738        }
12739        Expr::Like { expr, pattern, .. } => {
12740            collect_column_qualifiers(expr, out, all_qualified);
12741            collect_column_qualifiers(pattern, out, all_qualified);
12742        }
12743        Expr::FunctionCall { args, .. } => {
12744            for a in args {
12745                collect_column_qualifiers(a, out, all_qualified);
12746            }
12747        }
12748        Expr::Literal(_) | Expr::Placeholder(_) => {}
12749        // Anything exotic (CASE, subquery, window, arrays…):
12750        // conservatively mark unattributable.
12751        _ => *all_qualified = false,
12752    }
12753}
12754
12755fn expr_refers_to(e: &Expr, target: &str) -> bool {
12756    match e {
12757        Expr::AggregateOrdered { call, order_by, .. } => {
12758            expr_refers_to(call, target) || order_by.iter().any(|o| expr_refers_to(&o.expr, target))
12759        }
12760        Expr::ScalarSubquery(s) => select_refers_to(s, target),
12761        Expr::Exists { subquery, .. } | Expr::InSubquery { subquery, .. } => {
12762            select_refers_to(subquery, target)
12763        }
12764        Expr::Binary { lhs, rhs, .. } => expr_refers_to(lhs, target) || expr_refers_to(rhs, target),
12765        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
12766            expr_refers_to(expr, target)
12767        }
12768        Expr::Like { expr, pattern, .. } => {
12769            expr_refers_to(expr, target) || expr_refers_to(pattern, target)
12770        }
12771        Expr::FunctionCall { args, .. } => args.iter().any(|a| expr_refers_to(a, target)),
12772        Expr::Extract { source, .. } => expr_refers_to(source, target),
12773        Expr::WindowFunction {
12774            args,
12775            partition_by,
12776            order_by,
12777            ..
12778        } => {
12779            args.iter().any(|a| expr_refers_to(a, target))
12780                || partition_by.iter().any(|p| expr_refers_to(p, target))
12781                || order_by.iter().any(|(o, _, _)| expr_refers_to(o, target))
12782        }
12783        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => false,
12784        Expr::Array(items) => items.iter().any(|e| expr_refers_to(e, target)),
12785        Expr::InList { expr, list, .. } => {
12786            expr_refers_to(expr, target) || list.iter().any(|e| expr_refers_to(e, target))
12787        }
12788        Expr::ArraySubscript { target: t, index } => {
12789            expr_refers_to(t, target) || expr_refers_to(index, target)
12790        }
12791        Expr::AnyAll { expr, array, .. } => {
12792            expr_refers_to(expr, target) || expr_refers_to(array, target)
12793        }
12794        Expr::Case {
12795            operand,
12796            branches,
12797            else_branch,
12798        } => {
12799            operand
12800                .as_deref()
12801                .is_some_and(|o| expr_refers_to(o, target))
12802                || branches
12803                    .iter()
12804                    .any(|(w, t)| expr_refers_to(w, target) || expr_refers_to(t, target))
12805                || else_branch
12806                    .as_deref()
12807                    .is_some_and(|e| expr_refers_to(e, target))
12808        }
12809    }
12810}
12811
12812/// v4.22: pick more specific column types from observed rows when
12813/// the projection builder defaulted to Text (the v1.x behavior for
12814/// non-column expressions). Lets `WITH t(n) AS (SELECT 1 ...)`
12815/// land an Int column in the CTE storage table rather than failing
12816/// the insert with "expected TEXT, got INT".
12817/// v7.16.2 — map an SPG [`DataType`] to the PG-canonical
12818/// `information_schema.columns.data_type` text. Covers the
12819/// values mailrs's migrations probe (`'ARRAY'`, `'integer'`,
12820/// `'text'`, …). Unknown variants fall back to the SPG name
12821/// downcased — better than panicking on a future DataType.
12822fn pg_data_type_text(ty: DataType) -> alloc::string::String {
12823    let s = match ty {
12824        DataType::Int => "integer",
12825        DataType::BigInt => "bigint",
12826        DataType::SmallInt => "smallint",
12827        DataType::Float => "double precision",
12828        DataType::Bool => "boolean",
12829        DataType::Text => "text",
12830        DataType::Varchar(_) => "character varying",
12831        DataType::Date => "date",
12832        DataType::Timestamp => "timestamp without time zone",
12833        DataType::Timestamptz => "timestamp with time zone",
12834        DataType::Json => "jsonb",
12835        DataType::Bytes => "bytea",
12836        DataType::TextArray | DataType::IntArray | DataType::BigIntArray => "ARRAY",
12837        DataType::TsVector => "tsvector",
12838        DataType::TsQuery => "tsquery",
12839        DataType::Vector { .. } => "USER-DEFINED",
12840        // Non-exhaustive — fall back to "USER-DEFINED" the way
12841        // PG labels any pg_type it doesn't recognise.
12842        _ => "USER-DEFINED",
12843    };
12844    alloc::string::String::from(s)
12845}
12846
12847/// v7.16.2 — synthesise `information_schema.columns`. mailrs
12848/// queries are of shape `SELECT 1 FROM information_schema.columns
12849/// WHERE table_name = … AND column_name = … AND data_type = …` —
12850/// the v7.16.2 view returns the columns mailrs probes; broader
12851/// PG-spec parity (ordinal_position, is_nullable, character_
12852/// maximum_length, udt_name, …) lands as needed.
12853fn synth_information_schema_columns(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
12854    let schema = alloc::vec![
12855        ColumnSchema::new("table_catalog", DataType::Text, false),
12856        ColumnSchema::new("table_schema", DataType::Text, false),
12857        ColumnSchema::new("table_name", DataType::Text, false),
12858        ColumnSchema::new("column_name", DataType::Text, false),
12859        ColumnSchema::new("ordinal_position", DataType::Int, false),
12860        ColumnSchema::new("is_nullable", DataType::Text, false),
12861        ColumnSchema::new("data_type", DataType::Text, false),
12862    ];
12863    let mut rows: Vec<Row> = Vec::new();
12864    for tname in cat.table_names() {
12865        let Some(t) = cat.get(&tname) else { continue };
12866        for (i, col) in t.schema().columns.iter().enumerate() {
12867            #[allow(clippy::cast_possible_wrap)]
12868            let ordinal = (i + 1) as i32;
12869            rows.push(Row::new(alloc::vec![
12870                Value::Text("spg".into()),
12871                Value::Text("public".into()),
12872                Value::Text(tname.clone()),
12873                Value::Text(col.name.clone()),
12874                Value::Int(ordinal),
12875                Value::Text(if col.nullable {
12876                    "YES".into()
12877                } else {
12878                    "NO".into()
12879                }),
12880                Value::Text(pg_data_type_text(col.ty)),
12881            ]));
12882        }
12883    }
12884    (schema, rows)
12885}
12886
12887/// v7.16.2 — synthesise `information_schema.tables`.
12888fn synth_information_schema_tables(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
12889    let schema = alloc::vec![
12890        ColumnSchema::new("table_catalog", DataType::Text, false),
12891        ColumnSchema::new("table_schema", DataType::Text, false),
12892        ColumnSchema::new("table_name", DataType::Text, false),
12893        ColumnSchema::new("table_type", DataType::Text, false),
12894    ];
12895    let mut rows: Vec<Row> = Vec::new();
12896    for tname in cat.table_names() {
12897        rows.push(Row::new(alloc::vec![
12898            Value::Text("spg".into()),
12899            Value::Text("public".into()),
12900            Value::Text(tname.clone()),
12901            Value::Text("BASE TABLE".into()),
12902        ]));
12903    }
12904    (schema, rows)
12905}
12906
12907/// v7.16.2 — synthesise `pg_catalog.pg_class`. Minimum shape
12908/// for psql `\d` / ORM probes: `relname` + `relkind`. Each
12909/// user table emits one row.
12910fn synth_pg_class(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
12911    let schema = alloc::vec![
12912        ColumnSchema::new("relname", DataType::Text, false),
12913        ColumnSchema::new("relkind", DataType::Text, false),
12914        ColumnSchema::new("relnamespace", DataType::BigInt, false),
12915    ];
12916    let mut rows: Vec<Row> = Vec::new();
12917    for tname in cat.table_names() {
12918        rows.push(Row::new(alloc::vec![
12919            Value::Text(tname.clone()),
12920            Value::Text("r".into()),
12921            Value::BigInt(2200), // PG's `public` namespace OID
12922        ]));
12923    }
12924    (schema, rows)
12925}
12926
12927/// v7.16.2 — synthesise `pg_catalog.pg_attribute`. Minimum
12928/// shape: `attrelid` (text — SPG has no OID), `attname`,
12929/// `attnum`, `atttypid` (text), `attnotnull`.
12930fn synth_pg_attribute(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
12931    let schema = alloc::vec![
12932        ColumnSchema::new("attrelid", DataType::Text, false),
12933        ColumnSchema::new("attname", DataType::Text, false),
12934        ColumnSchema::new("attnum", DataType::Int, false),
12935        ColumnSchema::new("atttypid", DataType::Text, false),
12936        ColumnSchema::new("attnotnull", DataType::Bool, false),
12937    ];
12938    let mut rows: Vec<Row> = Vec::new();
12939    for tname in cat.table_names() {
12940        let Some(t) = cat.get(&tname) else { continue };
12941        for (i, col) in t.schema().columns.iter().enumerate() {
12942            #[allow(clippy::cast_possible_wrap)]
12943            let ordinal = (i + 1) as i32;
12944            rows.push(Row::new(alloc::vec![
12945                Value::Text(tname.clone()),
12946                Value::Text(col.name.clone()),
12947                Value::Int(ordinal),
12948                Value::Text(pg_data_type_text(col.ty)),
12949                Value::Bool(!col.nullable),
12950            ]));
12951        }
12952    }
12953    (schema, rows)
12954}
12955
12956/// v7.17.0 Phase 3.P0-50 — synthesise `pg_catalog.pg_type`. The
12957/// returned rows cover every built-in scalar / array type sqlx,
12958/// SQLAlchemy, Diesel and pgAdmin look up at compile / connect
12959/// time. PG-canonical schema columns we expose:
12960///   * oid           — type OID (the lookup key sqlx uses)
12961///   * typname       — canonical type name (`int4`, `text`, …)
12962///   * typlen        — width in bytes (-1 for var-length)
12963///   * typtype       — `b`ase / `c`omposite / `e`num / etc.
12964///   * typcategory   — PG type category single-char
12965///   * typelem       — element OID for arrays (0 otherwise)
12966///   * typarray      — array-type OID (0 if no array type)
12967///   * typnamespace  — schema OID (always `public` = 2200)
12968///
12969/// Other pg_type columns (typowner, typinput/typoutput, etc.)
12970/// land in follow-up work — sqlx encoders don't query them at
12971/// connect time.
12972fn synth_pg_type(_cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
12973    let schema = alloc::vec![
12974        ColumnSchema::new("oid", DataType::BigInt, false),
12975        ColumnSchema::new("typname", DataType::Text, false),
12976        ColumnSchema::new("typlen", DataType::SmallInt, false),
12977        ColumnSchema::new("typtype", DataType::Text, false),
12978        ColumnSchema::new("typcategory", DataType::Text, false),
12979        ColumnSchema::new("typelem", DataType::BigInt, false),
12980        ColumnSchema::new("typarray", DataType::BigInt, false),
12981        ColumnSchema::new("typnamespace", DataType::BigInt, false),
12982    ];
12983    // (oid, name, len, type, cat, elem, array_oid). PG OID
12984    // numbers come straight from `pg_type.dat`.
12985    let scalars: &[(i64, &str, i16, &str, &str, i64, i64)] = &[
12986        // bool
12987        (16, "bool", 1, "b", "B", 0, 1000),
12988        (17, "bytea", -1, "b", "U", 0, 1001),
12989        (18, "char", 1, "b", "S", 0, 1002),
12990        (19, "name", 64, "b", "S", 0, 1003),
12991        (20, "int8", 8, "b", "N", 0, 1016),
12992        (21, "int2", 2, "b", "N", 0, 1005),
12993        (23, "int4", 4, "b", "N", 0, 1007),
12994        (24, "regproc", 4, "b", "N", 0, 1008),
12995        (25, "text", -1, "b", "S", 0, 1009),
12996        (26, "oid", 4, "b", "N", 0, 1028),
12997        (114, "json", -1, "b", "U", 0, 199),
12998        (142, "xml", -1, "b", "U", 0, 143),
12999        (700, "float4", 4, "b", "N", 0, 1021),
13000        (701, "float8", 8, "b", "N", 0, 1022),
13001        (650, "cidr", -1, "b", "I", 0, 651),
13002        (869, "inet", -1, "b", "I", 0, 1041),
13003        (829, "macaddr", 6, "b", "U", 0, 1040),
13004        (1042, "bpchar", -1, "b", "S", 0, 1014),
13005        (1043, "varchar", -1, "b", "S", 0, 1015),
13006        (1082, "date", 4, "b", "D", 0, 1182),
13007        (1083, "time", 8, "b", "D", 0, 1183),
13008        (1114, "timestamp", 8, "b", "D", 0, 1115),
13009        (1184, "timestamptz", 8, "b", "D", 0, 1185),
13010        (1186, "interval", 16, "b", "T", 0, 1187),
13011        (1266, "timetz", 12, "b", "D", 0, 1270),
13012        (1700, "numeric", -1, "b", "N", 0, 1231),
13013        (790, "money", 8, "b", "N", 0, 791),
13014        (2950, "uuid", 16, "b", "U", 0, 2951),
13015        (3802, "jsonb", -1, "b", "U", 0, 3807),
13016        (3614, "tsvector", -1, "b", "U", 0, 3643),
13017        (3615, "tsquery", -1, "b", "U", 0, 3645),
13018        // hstore + range types — typcategory 'U' (user) / 'R' (range).
13019        (3908, "tstzrange", -1, "r", "R", 0, 3909),
13020        (3910, "tsrange", -1, "r", "R", 0, 3911),
13021        (3904, "int4range", -1, "r", "R", 0, 3905),
13022        (3926, "int8range", -1, "r", "R", 0, 3927),
13023        (3906, "numrange", -1, "r", "R", 0, 3907),
13024        (3912, "daterange", -1, "r", "R", 0, 3913),
13025    ];
13026    // Array companion types share the typelem / typcategory='A'.
13027    // We emit just the array OIDs the scalars reference.
13028    let arrays: &[(i64, &str, i64)] = &[
13029        (1000, "_bool", 16),
13030        (1001, "_bytea", 17),
13031        (1002, "_char", 18),
13032        (1003, "_name", 19),
13033        (1016, "_int8", 20),
13034        (1005, "_int2", 21),
13035        (1007, "_int4", 23),
13036        (1008, "_regproc", 24),
13037        (1009, "_text", 25),
13038        (1028, "_oid", 26),
13039        (199, "_json", 114),
13040        (143, "_xml", 142),
13041        (1021, "_float4", 700),
13042        (1022, "_float8", 701),
13043        (651, "_cidr", 650),
13044        (1041, "_inet", 869),
13045        (1040, "_macaddr", 829),
13046        (1014, "_bpchar", 1042),
13047        (1015, "_varchar", 1043),
13048        (1182, "_date", 1082),
13049        (1183, "_time", 1083),
13050        (1115, "_timestamp", 1114),
13051        (1185, "_timestamptz", 1184),
13052        (1187, "_interval", 1186),
13053        (1270, "_timetz", 1266),
13054        (1231, "_numeric", 1700),
13055        (791, "_money", 790),
13056        (2951, "_uuid", 2950),
13057        (3807, "_jsonb", 3802),
13058        (3643, "_tsvector", 3614),
13059        (3645, "_tsquery", 3615),
13060    ];
13061    let mut rows: Vec<Row> = Vec::with_capacity(scalars.len() + arrays.len());
13062    for &(oid, name, len, ty, cat, elem, arr) in scalars {
13063        rows.push(Row::new(alloc::vec![
13064            Value::BigInt(oid),
13065            Value::Text(name.into()),
13066            Value::SmallInt(len),
13067            Value::Text(ty.into()),
13068            Value::Text(cat.into()),
13069            Value::BigInt(elem),
13070            Value::BigInt(arr),
13071            Value::BigInt(2200),
13072        ]));
13073    }
13074    for &(oid, name, elem) in arrays {
13075        rows.push(Row::new(alloc::vec![
13076            Value::BigInt(oid),
13077            Value::Text(name.into()),
13078            Value::SmallInt(-1),
13079            Value::Text("b".into()),
13080            Value::Text("A".into()),
13081            Value::BigInt(elem),
13082            Value::BigInt(0),
13083            Value::BigInt(2200),
13084        ]));
13085    }
13086    (schema, rows)
13087}
13088
13089/// v7.17.0 Phase 3.P0-51 — synthesise `pg_catalog.pg_proc`. ORM /
13090/// pgAdmin probes look up functions by name; SPG synthesises rows
13091/// for the built-in scalar functions / aggregates / window funcs
13092/// the engine actually dispatches. SPG has no user-defined
13093/// functions yet so the table is a stable static list.
13094///
13095/// Schema columns exposed:
13096///   * oid (BigInt) — function OID from PG's pg_proc.dat
13097///   * proname (Text) — function name (lowercase)
13098///   * pronamespace (BigInt) — 11 (`pg_catalog`)
13099///   * prokind (Text) — 'f' function, 'a' aggregate, 'w' window
13100///   * pronargs (SmallInt) — declared arg count (-1 for variadic)
13101///   * prorettype (BigInt) — return type OID (matches synth_pg_type)
13102/// v7.24 (round-16 D) — synthesise `pg_catalog.pg_trigger` from the
13103/// live catalog. PG-shaped core columns (tgname, tgenabled with
13104/// 'O'/'D') plus pragmatic text columns PG keeps relational
13105/// (relname, timing, events, function) so health checks don't need
13106/// oid joins.
13107fn synth_pg_trigger(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
13108    let schema = alloc::vec![
13109        ColumnSchema::new("tgname", DataType::Text, false),
13110        ColumnSchema::new("relname", DataType::Text, false),
13111        ColumnSchema::new("tgenabled", DataType::Text, false),
13112        ColumnSchema::new("timing", DataType::Text, false),
13113        ColumnSchema::new("events", DataType::Text, false),
13114        ColumnSchema::new("function", DataType::Text, false),
13115    ];
13116    let rows: Vec<Row> = cat
13117        .triggers()
13118        .iter()
13119        .map(|t| {
13120            Row::new(alloc::vec![
13121                Value::Text(t.name.clone()),
13122                Value::Text(t.table.clone()),
13123                Value::Text(if t.enabled { "O".into() } else { "D".into() }),
13124                Value::Text(t.timing.clone()),
13125                Value::Text(t.events.join(" OR ")),
13126                Value::Text(t.function.clone()),
13127            ])
13128        })
13129        .collect();
13130    (schema, rows)
13131}
13132
13133fn synth_pg_proc(_cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
13134    let schema = alloc::vec![
13135        ColumnSchema::new("oid", DataType::BigInt, false),
13136        ColumnSchema::new("proname", DataType::Text, false),
13137        ColumnSchema::new("pronamespace", DataType::BigInt, false),
13138        ColumnSchema::new("prokind", DataType::Text, false),
13139        ColumnSchema::new("pronargs", DataType::Int, false),
13140        ColumnSchema::new("prorettype", DataType::BigInt, false),
13141    ];
13142    // (oid, name, kind, nargs, rettype). OIDs taken from PG's
13143    // pg_proc.dat for the common subset.
13144    let funcs: &[(i64, &str, &str, i32, i64)] = &[
13145        // Scalar functions.
13146        (1318, "length", "f", 1, 23),
13147        (871, "upper", "f", 1, 25),
13148        (870, "lower", "f", 1, 25),
13149        (936, "substring", "f", 3, 25),
13150        (937, "substring", "f", 2, 25),
13151        (3055, "btrim", "f", 1, 25),
13152        (885, "btrim", "f", 2, 25),
13153        (3056, "ltrim", "f", 1, 25),
13154        (875, "ltrim", "f", 2, 25),
13155        (3057, "rtrim", "f", 1, 25),
13156        (876, "rtrim", "f", 2, 25),
13157        (1397, "abs", "f", 1, 23),
13158        (1396, "abs", "f", 1, 20),
13159        (1606, "round", "f", 1, 1700),
13160        (1707, "round", "f", 2, 1700),
13161        (2308, "ceil", "f", 1, 701),
13162        (2309, "ceiling", "f", 1, 701),
13163        (2310, "floor", "f", 1, 701),
13164        (1376, "sqrt", "f", 1, 701),
13165        (1369, "ln", "f", 1, 701),
13166        (1373, "exp", "f", 1, 701),
13167        (1368, "power", "f", 2, 701),
13168        (2228, "random", "f", 0, 701),
13169        // Date / time.
13170        (1299, "now", "f", 0, 1184),
13171        (1274, "current_timestamp", "f", 0, 1184),
13172        (1140, "current_date", "f", 0, 1082),
13173        (2050, "current_time", "f", 0, 1083),
13174        (1158, "date_trunc", "f", 2, 1184),
13175        (1171, "date_part", "f", 2, 701),
13176        (1172, "age", "f", 1, 1186),
13177        (936, "to_char", "f", 2, 25),
13178        // Session / introspection.
13179        (861, "current_database", "f", 0, 19),
13180        (745, "current_user", "f", 0, 19),
13181        (745, "session_user", "f", 0, 19),
13182        (1402, "current_schema", "f", 0, 19),
13183        // String concat / format.
13184        (3058, "concat", "f", -1, 25),
13185        (3059, "concat_ws", "f", -1, 25),
13186        (3539, "format", "f", -1, 25),
13187        // Type introspection.
13188        (2877, "pg_typeof", "f", 1, 2206),
13189        // JSON.
13190        (3198, "json_build_object", "f", -1, 114),
13191        (3199, "jsonb_build_object", "f", -1, 3802),
13192        (3271, "json_build_array", "f", -1, 114),
13193        (3272, "jsonb_build_array", "f", -1, 3802),
13194        // UUID.
13195        (3253, "gen_random_uuid", "f", 0, 2950),
13196        (3252, "uuid_generate_v4", "f", 0, 2950),
13197        // Aggregates.
13198        (2147, "count", "a", 0, 20),
13199        (2803, "count", "a", -1, 20),
13200        (2116, "max", "a", 1, 23),
13201        (2132, "min", "a", 1, 23),
13202        (2108, "sum", "a", 1, 20),
13203        (2100, "avg", "a", 1, 1700),
13204        (2517, "string_agg", "a", 2, 25),
13205        (2747, "array_agg", "a", 1, 1009),
13206        (2517, "bool_and", "a", 1, 16),
13207        (2518, "bool_or", "a", 1, 16),
13208        (2519, "every", "a", 1, 16),
13209        // Window functions.
13210        (3100, "row_number", "w", 0, 20),
13211        (3101, "rank", "w", 0, 20),
13212        (3102, "dense_rank", "w", 0, 20),
13213        (3103, "percent_rank", "w", 0, 701),
13214        (3104, "cume_dist", "w", 0, 701),
13215        (3105, "lag", "w", -1, 2283),
13216        (3106, "lead", "w", -1, 2283),
13217        (3107, "first_value", "w", 1, 2283),
13218        (3108, "last_value", "w", 1, 2283),
13219        (3109, "nth_value", "w", 2, 2283),
13220    ];
13221    let mut rows: Vec<Row> = Vec::with_capacity(funcs.len());
13222    for &(oid, name, kind, nargs, rettype) in funcs {
13223        rows.push(Row::new(alloc::vec![
13224            Value::BigInt(oid),
13225            Value::Text(name.into()),
13226            Value::BigInt(11),
13227            Value::Text(kind.into()),
13228            Value::Int(nargs),
13229            Value::BigInt(rettype),
13230        ]));
13231    }
13232    (schema, rows)
13233}
13234
13235/// v7.17.0 Phase 3.P0-65 — synthesise `mysql.user`. MySQL admin
13236/// queries (`SELECT user, host FROM mysql.user`) probe this at
13237/// connect time to list accounts. SPG ships one row per
13238/// UserStore entry plus a synthetic `root` superuser row for
13239/// MySQL bootstrap compat.
13240fn synth_mysql_user(engine: &Engine) -> (Vec<ColumnSchema>, Vec<Row>) {
13241    let schema = alloc::vec![
13242        ColumnSchema::new("user", DataType::Text, false),
13243        ColumnSchema::new("host", DataType::Text, false),
13244        ColumnSchema::new("select_priv", DataType::Text, false),
13245    ];
13246    let mut rows: Vec<Row> = Vec::new();
13247    rows.push(Row::new(alloc::vec![
13248        Value::Text("root".into()),
13249        Value::Text("localhost".into()),
13250        Value::Text("Y".into()),
13251    ]));
13252    for (name, _) in engine.users.iter() {
13253        if name != "root" {
13254            rows.push(Row::new(alloc::vec![
13255                Value::Text(name.to_string()),
13256                Value::Text("%".into()),
13257                Value::Text("Y".into()),
13258            ]));
13259        }
13260    }
13261    (schema, rows)
13262}
13263
13264/// v7.17.0 Phase 3.P0-65 — synthesise `mysql.db`. The
13265/// per-database privileges table. SPG is single-database so the
13266/// table surfaces one row per declared user with full privileges
13267/// on the canonical `postgres` database.
13268fn synth_mysql_db() -> (Vec<ColumnSchema>, Vec<Row>) {
13269    let schema = alloc::vec![
13270        ColumnSchema::new("host", DataType::Text, false),
13271        ColumnSchema::new("db", DataType::Text, false),
13272        ColumnSchema::new("user", DataType::Text, false),
13273        ColumnSchema::new("select_priv", DataType::Text, false),
13274    ];
13275    let rows = alloc::vec![Row::new(alloc::vec![
13276        Value::Text("localhost".into()),
13277        Value::Text("postgres".into()),
13278        Value::Text("root".into()),
13279        Value::Text("Y".into()),
13280    ])];
13281    (schema, rows)
13282}
13283
13284/// v7.17.0 Phase 3.P0-63 — synthesise
13285/// `information_schema.KEY_COLUMN_USAGE`. ORM migration tools
13286/// (Alembic, Sequelize, TypeORM) walk this view to discover FK
13287/// relationships in MySQL-flavoured introspection queries.
13288///
13289/// Schema columns exposed:
13290///   * CONSTRAINT_NAME (Text)
13291///   * TABLE_NAME (Text)
13292///   * COLUMN_NAME (Text)
13293///   * ORDINAL_POSITION (Int)
13294///   * REFERENCED_TABLE_NAME (Text) — empty for non-FK rows
13295///   * REFERENCED_COLUMN_NAME (Text) — empty for non-FK rows
13296fn synth_info_key_column_usage(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
13297    let schema = alloc::vec![
13298        ColumnSchema::new("constraint_name", DataType::Text, false),
13299        ColumnSchema::new("table_name", DataType::Text, false),
13300        ColumnSchema::new("column_name", DataType::Text, false),
13301        ColumnSchema::new("ordinal_position", DataType::Int, false),
13302        ColumnSchema::new("referenced_table_name", DataType::Text, false),
13303        ColumnSchema::new("referenced_column_name", DataType::Text, false),
13304    ];
13305    let mut rows: Vec<Row> = Vec::new();
13306    for tname in cat.table_names() {
13307        let Some(t) = cat.get(&tname) else { continue };
13308        let cols = &t.schema().columns;
13309        let col_name_at = |pos: usize| -> String {
13310            cols.get(pos)
13311                .map_or_else(|| alloc::format!("col{pos}"), |c| c.name.clone())
13312        };
13313        // FKs.
13314        for (fi, fk) in t.schema().foreign_keys.iter().enumerate() {
13315            let conname = fk
13316                .name
13317                .clone()
13318                .unwrap_or_else(|| alloc::format!("{}_fk{fi}", tname));
13319            for (i, (&local, &parent)) in fk
13320                .local_columns
13321                .iter()
13322                .zip(fk.parent_columns.iter())
13323                .enumerate()
13324            {
13325                let parent_name = cat
13326                    .get(&fk.parent_table)
13327                    .and_then(|pt| pt.schema().columns.get(parent).map(|c| c.name.clone()))
13328                    .unwrap_or_else(|| alloc::format!("col{parent}"));
13329                #[allow(clippy::cast_possible_wrap)]
13330                let ordinal = (i + 1) as i32;
13331                rows.push(Row::new(alloc::vec![
13332                    Value::Text(conname.clone()),
13333                    Value::Text(tname.clone()),
13334                    Value::Text(col_name_at(local)),
13335                    Value::Int(ordinal),
13336                    Value::Text(fk.parent_table.clone()),
13337                    Value::Text(parent_name),
13338                ]));
13339            }
13340        }
13341        // PK / composite UC entries.
13342        for (ci, uc) in t.schema().uniqueness_constraints.iter().enumerate() {
13343            let conname = if uc.is_primary_key {
13344                alloc::format!("{}_pkey", tname)
13345            } else {
13346                alloc::format!("{}_uniq{ci}", tname)
13347            };
13348            for (i, &local) in uc.columns.iter().enumerate() {
13349                #[allow(clippy::cast_possible_wrap)]
13350                let ordinal = (i + 1) as i32;
13351                rows.push(Row::new(alloc::vec![
13352                    Value::Text(conname.clone()),
13353                    Value::Text(tname.clone()),
13354                    Value::Text(col_name_at(local)),
13355                    Value::Int(ordinal),
13356                    Value::Text(String::new()),
13357                    Value::Text(String::new()),
13358                ]));
13359            }
13360        }
13361    }
13362    (schema, rows)
13363}
13364
13365/// v7.17.0 Phase 3.P0-64 — synthesise
13366/// `information_schema.REFERENTIAL_CONSTRAINTS`. One row per FK.
13367fn synth_info_referential_constraints(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
13368    let schema = alloc::vec![
13369        ColumnSchema::new("constraint_name", DataType::Text, false),
13370        ColumnSchema::new("table_name", DataType::Text, false),
13371        ColumnSchema::new("referenced_table_name", DataType::Text, false),
13372        ColumnSchema::new("update_rule", DataType::Text, false),
13373        ColumnSchema::new("delete_rule", DataType::Text, false),
13374    ];
13375    fn rule_name(a: spg_storage::FkAction) -> &'static str {
13376        match a {
13377            spg_storage::FkAction::Cascade => "CASCADE",
13378            spg_storage::FkAction::SetNull => "SET NULL",
13379            spg_storage::FkAction::SetDefault => "SET DEFAULT",
13380            spg_storage::FkAction::Restrict => "RESTRICT",
13381            spg_storage::FkAction::NoAction => "NO ACTION",
13382        }
13383    }
13384    let mut rows: Vec<Row> = Vec::new();
13385    for tname in cat.table_names() {
13386        let Some(t) = cat.get(&tname) else { continue };
13387        for (fi, fk) in t.schema().foreign_keys.iter().enumerate() {
13388            let conname = fk
13389                .name
13390                .clone()
13391                .unwrap_or_else(|| alloc::format!("{}_fk{fi}", tname));
13392            rows.push(Row::new(alloc::vec![
13393                Value::Text(conname),
13394                Value::Text(tname.clone()),
13395                Value::Text(fk.parent_table.clone()),
13396                Value::Text(rule_name(fk.on_update).into()),
13397                Value::Text(rule_name(fk.on_delete).into()),
13398            ]));
13399        }
13400    }
13401    (schema, rows)
13402}
13403
13404/// v7.17.0 Phase 3.P0-64 — synthesise `information_schema.STATISTICS`.
13405/// One row per (index × column) — admin tools walk this to
13406/// surface index-cardinality estimates.
13407fn synth_info_statistics(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
13408    let schema = alloc::vec![
13409        ColumnSchema::new("table_name", DataType::Text, false),
13410        ColumnSchema::new("index_name", DataType::Text, false),
13411        ColumnSchema::new("column_name", DataType::Text, false),
13412        ColumnSchema::new("seq_in_index", DataType::Int, false),
13413        ColumnSchema::new("non_unique", DataType::Int, false),
13414        ColumnSchema::new("index_type", DataType::Text, false),
13415    ];
13416    let mut rows: Vec<Row> = Vec::new();
13417    for tname in cat.table_names() {
13418        let Some(t) = cat.get(&tname) else { continue };
13419        for idx in t.indices() {
13420            let col = t
13421                .schema()
13422                .columns
13423                .get(idx.column_position)
13424                .map_or("?".into(), |c| c.name.clone());
13425            rows.push(Row::new(alloc::vec![
13426                Value::Text(tname.clone()),
13427                Value::Text(idx.name.clone()),
13428                Value::Text(col),
13429                Value::Int(1),
13430                Value::Int(i32::from(!idx.is_unique)),
13431                Value::Text("BTREE".into()),
13432            ]));
13433        }
13434    }
13435    (schema, rows)
13436}
13437
13438/// v7.17.0 Phase 3.P0-64 — synthesise `information_schema.ROUTINES`.
13439/// SPG has no user-defined functions in v7.17 so the surface is
13440/// always empty; admin tools just need the table to exist.
13441fn synth_info_routines() -> (Vec<ColumnSchema>, Vec<Row>) {
13442    let schema = alloc::vec![
13443        ColumnSchema::new("routine_name", DataType::Text, false),
13444        ColumnSchema::new("routine_type", DataType::Text, false),
13445        ColumnSchema::new("data_type", DataType::Text, false),
13446    ];
13447    (schema, Vec::new())
13448}
13449
13450/// v7.17.0 Phase 3.P0-54 — synthesise `pg_catalog.pg_constraint`.
13451/// ORM compilers (Diesel, sea-orm) and admin tools probe this for
13452/// FK / UNIQUE / PK / CHECK definitions to surface relationship
13453/// graphs and validation rules. SPG ships one row per
13454/// uniqueness constraint + foreign key declared in the catalog.
13455///
13456/// Schema columns exposed:
13457///   * conname (Text) — constraint name (synthetic when anonymous)
13458///   * contype (Text) — `p` PK, `u` UNIQUE, `f` FK, `c` CHECK
13459///   * conrelid (Text) — owner table name
13460///   * confrelid (Text) — referenced parent table (FK only;
13461///     empty string otherwise)
13462///   * conkey (Text) — comma-separated column names
13463///   * confkey (Text) — comma-separated parent column names (FK only)
13464fn synth_pg_constraint(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
13465    let schema = alloc::vec![
13466        ColumnSchema::new("conname", DataType::Text, false),
13467        ColumnSchema::new("contype", DataType::Text, false),
13468        ColumnSchema::new("conrelid", DataType::Text, false),
13469        ColumnSchema::new("confrelid", DataType::Text, false),
13470        ColumnSchema::new("conkey", DataType::Text, false),
13471        ColumnSchema::new("confkey", DataType::Text, false),
13472    ];
13473    let mut rows: Vec<Row> = Vec::new();
13474    for tname in cat.table_names() {
13475        let Some(t) = cat.get(&tname) else { continue };
13476        let cols = &t.schema().columns;
13477        let col_name_at = |pos: usize| -> String {
13478            cols.get(pos)
13479                .map_or_else(|| alloc::format!("col{pos}"), |c| c.name.clone())
13480        };
13481        // Uniqueness constraints (composite UNIQUE / PRIMARY KEY).
13482        for (ci, uc) in t.schema().uniqueness_constraints.iter().enumerate() {
13483            let kind = if uc.is_primary_key { "p" } else { "u" };
13484            let conname = if uc.is_primary_key {
13485                alloc::format!("{}_pkey", tname)
13486            } else {
13487                alloc::format!("{}_uniq{ci}", tname)
13488            };
13489            let conkey: Vec<String> = uc.columns.iter().map(|&p| col_name_at(p)).collect();
13490            rows.push(Row::new(alloc::vec![
13491                Value::Text(conname),
13492                Value::Text(kind.into()),
13493                Value::Text(tname.clone()),
13494                Value::Text(String::new()),
13495                Value::Text(conkey.join(",")),
13496                Value::Text(String::new()),
13497            ]));
13498        }
13499        // Single-column PK / UNIQUE indexes that have no
13500        // matching entry in `uniqueness_constraints` (the engine
13501        // creates only the BTree index for the bare-column case;
13502        // composite forms ride the UC path above).
13503        for idx in t.indices() {
13504            if !idx.is_unique {
13505                continue;
13506            }
13507            let is_primary = idx.name.ends_with("_pkey");
13508            let conname = idx.name.clone();
13509            let kind = if is_primary { "p" } else { "u" };
13510            let col_name = col_name_at(idx.column_position);
13511            // Skip if already emitted via the UC loop above (same
13512            // tuple shape — single-column).
13513            let already = t
13514                .schema()
13515                .uniqueness_constraints
13516                .iter()
13517                .any(|uc| uc.columns.len() == 1 && uc.columns[0] == idx.column_position);
13518            if already {
13519                continue;
13520            }
13521            rows.push(Row::new(alloc::vec![
13522                Value::Text(conname),
13523                Value::Text(kind.into()),
13524                Value::Text(tname.clone()),
13525                Value::Text(String::new()),
13526                Value::Text(col_name),
13527                Value::Text(String::new()),
13528            ]));
13529        }
13530        // Foreign keys.
13531        for (fi, fk) in t.schema().foreign_keys.iter().enumerate() {
13532            let conname = fk
13533                .name
13534                .clone()
13535                .unwrap_or_else(|| alloc::format!("{}_fk{fi}", tname));
13536            let conkey: Vec<String> = fk.local_columns.iter().map(|&p| col_name_at(p)).collect();
13537            // Parent column names: look up the parent table's
13538            // schema if it exists; otherwise emit positions.
13539            let confkey: Vec<String> = if let Some(parent) = cat.get(&fk.parent_table) {
13540                fk.parent_columns
13541                    .iter()
13542                    .map(|&p| {
13543                        parent
13544                            .schema()
13545                            .columns
13546                            .get(p)
13547                            .map_or_else(|| alloc::format!("col{p}"), |c| c.name.clone())
13548                    })
13549                    .collect()
13550            } else {
13551                fk.parent_columns
13552                    .iter()
13553                    .map(|p| alloc::format!("col{p}"))
13554                    .collect()
13555            };
13556            rows.push(Row::new(alloc::vec![
13557                Value::Text(conname),
13558                Value::Text("f".into()),
13559                Value::Text(tname.clone()),
13560                Value::Text(fk.parent_table.clone()),
13561                Value::Text(conkey.join(",")),
13562                Value::Text(confkey.join(",")),
13563            ]));
13564        }
13565    }
13566    (schema, rows)
13567}
13568
13569/// v7.17.0 Phase 3.P0-55 — synthesise `pg_catalog.pg_database`.
13570/// SPG is single-database so we surface a single row keyed on the
13571/// canonical `postgres` database name (matching what every PG
13572/// admin tool's startup screen expects to find).
13573fn synth_pg_database(_cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
13574    let schema = alloc::vec![
13575        ColumnSchema::new("oid", DataType::BigInt, false),
13576        ColumnSchema::new("datname", DataType::Text, false),
13577        ColumnSchema::new("datdba", DataType::BigInt, false),
13578        ColumnSchema::new("encoding", DataType::Int, false),
13579        ColumnSchema::new("datcollate", DataType::Text, false),
13580    ];
13581    let rows = alloc::vec![Row::new(alloc::vec![
13582        Value::BigInt(16384),
13583        Value::Text("postgres".into()),
13584        Value::BigInt(10),
13585        Value::Int(6), // UTF8
13586        Value::Text("en_US.UTF-8".into()),
13587    ])];
13588    (schema, rows)
13589}
13590
13591/// v7.17.0 Phase 3.P0-55 — synthesise `pg_catalog.pg_roles`. PG's
13592/// pg_roles is a view over pg_authid showing all roles. SPG ships
13593/// one row per declared user from the engine's UserStore so admin
13594/// tool startup screens can populate.
13595fn synth_pg_roles(engine: &Engine) -> (Vec<ColumnSchema>, Vec<Row>) {
13596    let schema = alloc::vec![
13597        ColumnSchema::new("oid", DataType::BigInt, false),
13598        ColumnSchema::new("rolname", DataType::Text, false),
13599        ColumnSchema::new("rolsuper", DataType::Bool, false),
13600        ColumnSchema::new("rolinherit", DataType::Bool, false),
13601        ColumnSchema::new("rolcanlogin", DataType::Bool, false),
13602    ];
13603    let mut rows: Vec<Row> = Vec::new();
13604    let oid: i64 = 10;
13605    for (i, (name, _)) in engine.users.iter().enumerate() {
13606        rows.push(Row::new(alloc::vec![
13607            Value::BigInt(oid + (i as i64) + 1),
13608            Value::Text(name.to_string()),
13609            Value::Bool(false),
13610            Value::Bool(true),
13611            Value::Bool(true),
13612        ]));
13613    }
13614    // Always include `postgres` as the bootstrap superuser if not
13615    // already present — admin tools probe for it.
13616    if !rows
13617        .iter()
13618        .any(|r| matches!(&r.values[1], Value::Text(s) if s == "postgres"))
13619    {
13620        rows.insert(
13621            0,
13622            Row::new(alloc::vec![
13623                Value::BigInt(10),
13624                Value::Text("postgres".into()),
13625                Value::Bool(true),
13626                Value::Bool(true),
13627                Value::Bool(true),
13628            ]),
13629        );
13630    }
13631    (schema, rows)
13632}
13633
13634/// v7.17.0 Phase 3.P0-56 — synthesise `pg_catalog.pg_views`. PG's
13635/// pg_views is a view listing every catalog view; SPG ships one
13636/// row per declared view + its definition text.
13637/// Synthesise `pg_catalog.pg_extension`. SPG ships its "extension"
13638/// surfaces natively (vector, pg_trgm, plpgsql-shaped DO blocks), so
13639/// the table lists those as installed — `SELECT … FROM pg_extension
13640/// WHERE extname = 'vector'` probes from PG clients (mailrs embed
13641/// round-12) answer truthfully about capability presence.
13642fn synth_pg_extension() -> (Vec<ColumnSchema>, Vec<Row>) {
13643    let schema = alloc::vec![
13644        ColumnSchema::new("oid", DataType::BigInt, false),
13645        ColumnSchema::new("extname", DataType::Text, false),
13646        ColumnSchema::new("extversion", DataType::Text, false),
13647        ColumnSchema::new("extnamespace", DataType::Text, false),
13648    ];
13649    let exts: &[(&str, &str)] = &[("plpgsql", "1.0"), ("vector", "0.8.0"), ("pg_trgm", "1.6")];
13650    let rows = exts
13651        .iter()
13652        .enumerate()
13653        .map(|(i, (name, ver))| {
13654            Row::new(alloc::vec![
13655                Value::BigInt(16384 + i as i64),
13656                Value::Text((*name).into()),
13657                Value::Text((*ver).into()),
13658                Value::Text("pg_catalog".into()),
13659            ])
13660        })
13661        .collect();
13662    (schema, rows)
13663}
13664
13665fn synth_pg_views(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
13666    let schema = alloc::vec![
13667        ColumnSchema::new("schemaname", DataType::Text, false),
13668        ColumnSchema::new("viewname", DataType::Text, false),
13669        ColumnSchema::new("definition", DataType::Text, false),
13670    ];
13671    let mut rows: Vec<Row> = Vec::new();
13672    for (name, def) in cat.views() {
13673        rows.push(Row::new(alloc::vec![
13674            Value::Text("public".into()),
13675            Value::Text(name.clone()),
13676            Value::Text(def.body.clone()),
13677        ]));
13678    }
13679    (schema, rows)
13680}
13681
13682/// v7.17.0 Phase 3.P0-57 — synthesise `pg_catalog.pg_settings`. ORM
13683/// connection-checkers (sqlx pre-flight, Diesel migrator) and admin
13684/// tools read `pg_settings` to discover server-side configuration.
13685/// SPG surfaces every session_param + a small set of canonical PG
13686/// defaults so the pre-flight queries match.
13687fn synth_pg_settings(engine: &Engine) -> (Vec<ColumnSchema>, Vec<Row>) {
13688    let schema = alloc::vec![
13689        ColumnSchema::new("name", DataType::Text, false),
13690        ColumnSchema::new("setting", DataType::Text, false),
13691        ColumnSchema::new("category", DataType::Text, false),
13692    ];
13693    let mut rows: Vec<Row> = Vec::new();
13694    // Canonical defaults every admin tool expects to find.
13695    let defaults: &[(&str, &str, &str)] = &[
13696        ("server_version", "16.0 (spg)", "Preset Options"),
13697        ("server_encoding", "UTF8", "Client Connection Defaults"),
13698        ("client_encoding", "UTF8", "Client Connection Defaults"),
13699        ("DateStyle", "ISO, MDY", "Client Connection Defaults"),
13700        ("TimeZone", "UTC", "Client Connection Defaults"),
13701        ("standard_conforming_strings", "on", "Compatibility"),
13702        ("integer_datetimes", "on", "Compatibility"),
13703        ("max_connections", "100", "Connections and Authentication"),
13704    ];
13705    for &(name, val, cat) in defaults {
13706        rows.push(Row::new(alloc::vec![
13707            Value::Text(name.into()),
13708            Value::Text(val.into()),
13709            Value::Text(cat.into()),
13710        ]));
13711    }
13712    // Session-set params override the static defaults.
13713    for (k, v) in &engine.session_params {
13714        if !defaults
13715            .iter()
13716            .any(|(n, _, _)| (*n).eq_ignore_ascii_case(k))
13717        {
13718            rows.push(Row::new(alloc::vec![
13719                Value::Text(k.clone()),
13720                Value::Text(v.clone()),
13721                Value::Text("Session".into()),
13722            ]));
13723        }
13724    }
13725    (schema, rows)
13726}
13727
13728/// v7.17.0 Phase 3.P0-53 — synthesise `pg_catalog.pg_indexes`.
13729/// PG's pg_indexes is a real view on pg_index + pg_class + pg_attribute.
13730/// SPG ships it as a synthesised flat table so admin tools (pgAdmin,
13731/// DataGrip) can list indexes by tablename without joining four catalogs.
13732///
13733/// Schema columns exposed:
13734///   * schemaname (Text) — always `public`
13735///   * tablename (Text)
13736///   * indexname (Text)
13737///   * indexdef (Text) — best-effort CREATE INDEX DDL
13738fn synth_pg_indexes(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
13739    let schema = alloc::vec![
13740        ColumnSchema::new("schemaname", DataType::Text, false),
13741        ColumnSchema::new("tablename", DataType::Text, false),
13742        ColumnSchema::new("indexname", DataType::Text, false),
13743        ColumnSchema::new("indexdef", DataType::Text, false),
13744    ];
13745    let mut rows: Vec<Row> = Vec::new();
13746    for tname in cat.table_names() {
13747        let Some(t) = cat.get(&tname) else { continue };
13748        for idx in t.indices() {
13749            let col_name = t
13750                .schema()
13751                .columns
13752                .get(idx.column_position)
13753                .map_or("?".into(), |c| c.name.clone());
13754            let unique_kw = if idx.is_unique { "UNIQUE " } else { "" };
13755            let indexdef = alloc::format!(
13756                "CREATE {unique_kw}INDEX {} ON public.{} ({})",
13757                idx.name,
13758                tname,
13759                col_name
13760            );
13761            rows.push(Row::new(alloc::vec![
13762                Value::Text("public".into()),
13763                Value::Text(tname.clone()),
13764                Value::Text(idx.name.clone()),
13765                Value::Text(indexdef),
13766            ]));
13767        }
13768    }
13769    (schema, rows)
13770}
13771
13772/// v7.17.0 Phase 3.P0-53 — synthesise `pg_catalog.pg_index`. The
13773/// "raw" pg_index catalog used by PG-internal tooling for index
13774/// flags and ordinal information. SPG ships the columns ORM probes
13775/// actually filter on.
13776///
13777/// Schema columns exposed:
13778///   * indexrelid (BigInt) — index OID (synthetic = position+1)
13779///   * indrelid (BigInt) — table OID (synthetic = position+1)
13780///   * indnatts (Int) — number of indexed columns
13781///   * indisunique (Bool)
13782///   * indisprimary (Bool)
13783fn synth_pg_index_raw(cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
13784    let schema = alloc::vec![
13785        ColumnSchema::new("indexrelid", DataType::BigInt, false),
13786        ColumnSchema::new("indrelid", DataType::BigInt, false),
13787        ColumnSchema::new("indnatts", DataType::Int, false),
13788        ColumnSchema::new("indisunique", DataType::Bool, false),
13789        ColumnSchema::new("indisprimary", DataType::Bool, false),
13790    ];
13791    let mut rows: Vec<Row> = Vec::new();
13792    let mut idx_oid: i64 = 100_000;
13793    for (table_idx, tname) in cat.table_names().iter().enumerate() {
13794        let Some(t) = cat.get(tname) else { continue };
13795        for idx in t.indices() {
13796            idx_oid += 1;
13797            #[allow(clippy::cast_possible_wrap)]
13798            let nattrs = (1 + idx.extra_column_positions.len()) as i32;
13799            // is_primary: SPG / PG flag the primary via the
13800            // index name convention `<table>_pkey`.
13801            let is_primary = idx.name.ends_with("_pkey");
13802            rows.push(Row::new(alloc::vec![
13803                Value::BigInt(idx_oid),
13804                Value::BigInt((table_idx + 1) as i64),
13805                Value::Int(nattrs),
13806                Value::Bool(idx.is_unique),
13807                Value::Bool(is_primary),
13808            ]));
13809        }
13810    }
13811    (schema, rows)
13812}
13813
13814/// v7.17.0 Phase 3.P0-52 — synthesise `pg_catalog.pg_namespace`.
13815/// SPG is single-schema so we expose the canonical PG schemas:
13816/// `public` (user-facing), `pg_catalog` (built-in), and
13817/// `information_schema` (PG meta).
13818fn synth_pg_namespace(_cat: &Catalog) -> (Vec<ColumnSchema>, Vec<Row>) {
13819    let schema = alloc::vec![
13820        ColumnSchema::new("oid", DataType::BigInt, false),
13821        ColumnSchema::new("nspname", DataType::Text, false),
13822        ColumnSchema::new("nspowner", DataType::BigInt, false),
13823    ];
13824    let rows = alloc::vec![
13825        Row::new(alloc::vec![
13826            Value::BigInt(11),
13827            Value::Text("pg_catalog".into()),
13828            Value::BigInt(10),
13829        ]),
13830        Row::new(alloc::vec![
13831            Value::BigInt(2200),
13832            Value::Text("public".into()),
13833            Value::BigInt(10),
13834        ]),
13835        Row::new(alloc::vec![
13836            Value::BigInt(13000),
13837            Value::Text("information_schema".into()),
13838            Value::BigInt(10),
13839        ]),
13840    ];
13841    (schema, rows)
13842}
13843
13844/// v7.16.2 — drop the synthesised meta view into the enriched
13845/// catalog so the regular FROM-resolution path can see it.
13846fn materialise_meta_view(
13847    catalog: &mut Catalog,
13848    name: &str,
13849    columns: Vec<ColumnSchema>,
13850    rows: Vec<Row>,
13851) -> Result<(), EngineError> {
13852    let schema = TableSchema::new(name.to_string(), columns);
13853    catalog.create_table(schema).map_err(EngineError::Storage)?;
13854    let table = catalog
13855        .get_mut(name)
13856        .expect("just-created meta view must exist");
13857    for row in rows {
13858        table.insert(row).map_err(EngineError::Storage)?;
13859    }
13860    Ok(())
13861}
13862
13863/// v7.16.2 — true when the SELECT statement references any
13864/// `__spg_info_*` or `__spg_pg_*` synthetic table name (the
13865/// parser produces these for `information_schema.X` /
13866/// `pg_catalog.X`). Used by `exec_select_cancel` to short-
13867/// circuit into the meta-view materialisation path.
13868/// v7.17.0 Phase 1.2 — append the names of any catalog-known
13869/// views referenced by `tref` to `into`. Helper for
13870/// `Engine::expand_views_in_select`. A view that's been already
13871/// materialised as a table (e.g. via the synthetic CTE pass for
13872/// SELECT FROM v) is skipped — the table form wins so the
13873/// recursive exec_select_cancel call inside exec_with_ctes
13874/// doesn't re-expand and trigger the CTE-shadow guard.
13875fn collect_view_refs(
13876    tref: &spg_sql::ast::TableRef,
13877    cat: &spg_storage::Catalog,
13878    into: &mut Vec<String>,
13879) {
13880    if cat.views().contains_key(&tref.name)
13881        && cat.get(&tref.name).is_none()
13882        && !into.iter().any(|n| n == &tref.name)
13883    {
13884        into.push(tref.name.clone());
13885    }
13886}
13887
13888fn select_references_meta_view(stmt: &SelectStatement) -> bool {
13889    fn is_meta(name: &str) -> bool {
13890        name.starts_with("__spg_info_")
13891            || name.starts_with("__spg_pg_")
13892            || name.starts_with("__spg_mysql_")
13893    }
13894    if let Some(from) = &stmt.from {
13895        if is_meta(&from.primary.name) {
13896            return true;
13897        }
13898        for j in &from.joins {
13899            if is_meta(&j.table.name) {
13900                return true;
13901            }
13902        }
13903    }
13904    for cte in &stmt.ctes {
13905        if select_references_meta_view(&cte.body) {
13906            return true;
13907        }
13908    }
13909    false
13910}
13911
13912/// v7.16.2 — collect every meta-view name a SELECT touches.
13913/// Returns a deduplicated, sorted list. Caller materialises
13914/// each one into the enriched catalog before re-running the
13915/// SELECT. Walks JOINs, CTEs, and the primary FROM.
13916fn collect_meta_view_names(
13917    stmt: &SelectStatement,
13918    into: &mut alloc::collections::BTreeSet<String>,
13919) {
13920    fn is_meta(name: &str) -> bool {
13921        name.starts_with("__spg_info_")
13922            || name.starts_with("__spg_pg_")
13923            || name.starts_with("__spg_mysql_")
13924    }
13925    if let Some(from) = &stmt.from {
13926        if is_meta(&from.primary.name) {
13927            into.insert(from.primary.name.clone());
13928        }
13929        for j in &from.joins {
13930            if is_meta(&j.table.name) {
13931                into.insert(j.table.name.clone());
13932            }
13933        }
13934    }
13935    for cte in &stmt.ctes {
13936        collect_meta_view_names(&cte.body, into);
13937    }
13938}
13939
13940fn infer_column_types(columns: &[ColumnSchema], rows: &[Row]) -> Vec<ColumnSchema> {
13941    let mut out = columns.to_vec();
13942    for (col_idx, col) in out.iter_mut().enumerate() {
13943        if col.ty != DataType::Text {
13944            continue;
13945        }
13946        let mut inferred: Option<DataType> = None;
13947        let mut all_null = true;
13948        for row in rows {
13949            let Some(v) = row.values.get(col_idx) else {
13950                continue;
13951            };
13952            let ty = match v {
13953                Value::Null => continue,
13954                Value::SmallInt(_) => DataType::SmallInt,
13955                Value::Int(_) => DataType::Int,
13956                Value::BigInt(_) => DataType::BigInt,
13957                Value::Float(_) => DataType::Float,
13958                Value::Bool(_) => DataType::Bool,
13959                Value::Vector(_) => DataType::Vector {
13960                    dim: 0,
13961                    encoding: VecEncoding::F32,
13962                },
13963                _ => DataType::Text,
13964            };
13965            all_null = false;
13966            inferred = Some(match inferred {
13967                None => ty,
13968                Some(prev) if prev == ty => prev,
13969                Some(_) => DataType::Text,
13970            });
13971        }
13972        if let Some(t) = inferred {
13973            col.ty = t;
13974            col.nullable = true;
13975        } else if all_null {
13976            col.nullable = true;
13977        }
13978    }
13979    out
13980}
13981
13982/// v4.26: render a human-readable plan tree for `EXPLAIN <select>`.
13983/// Lines are pushed into `out`; `depth` controls indentation. We
13984/// describe the rewritten SELECT — what the executor *would* do —
13985/// using the engine handle to spot indexed lookups and table shapes.
13986#[allow(clippy::too_many_lines, clippy::format_push_string)]
13987/// v6.2.4 — Walk every line of the rendered plan tree and append
13988/// per-operator stats. Lines that name a known operator get
13989/// `(rows=N)` (`actual_rows` of the top-level operator equals the
13990/// final result row count; scans report their catalog row count
13991/// as the rows-considered metric). Other lines — Filter / Join /
13992/// GroupBy / OrderBy etc. — are marked `(—)` so the surface is
13993/// complete-by-construction; v6.2.5 fills these in via inline
13994/// executor counters.
13995/// v6.8.3 — surface "CREATE INDEX …" suggestions for every
13996/// `(table, column)` pair the query touches via WHERE / JOIN
13997/// that doesn't already have an index on the owning table.
13998/// Walks the SELECT's FROM clauses + WHERE expression tree;
13999/// returns one line per missing index. Deterministic order:
14000/// FROM-clause iteration order, then column-reference walk
14001/// order inside each WHERE. Each suggestion is a copy-pastable
14002/// DDL string.
14003fn build_index_suggestions(stmt: &SelectStatement, engine: &Engine) -> Vec<String> {
14004    use alloc::collections::BTreeSet;
14005    let mut seen: BTreeSet<(String, String)> = BTreeSet::new();
14006    let mut out: Vec<String> = Vec::new();
14007    let cat = engine.active_catalog();
14008    // Build a (table, qualifier-or-alias) list from the FROM clause
14009    // so unqualified column refs in WHERE resolve to the correct
14010    // table.
14011    let Some(from) = &stmt.from else {
14012        return out;
14013    };
14014    let mut tables: Vec<String> = Vec::new();
14015    tables.push(from.primary.name.clone());
14016    for j in &from.joins {
14017        tables.push(j.table.name.clone());
14018    }
14019    // Collect column refs from the WHERE expression. JOIN ON
14020    // predicates also feed in.
14021    let mut col_refs: Vec<spg_sql::ast::ColumnName> = Vec::new();
14022    if let Some(w) = &stmt.where_ {
14023        collect_column_refs(w, &mut col_refs);
14024    }
14025    for j in &from.joins {
14026        if let Some(on) = &j.on {
14027            collect_column_refs(on, &mut col_refs);
14028        }
14029    }
14030    for cn in &col_refs {
14031        // Resolve owner table: explicit qualifier first, else
14032        // first table in FROM that has a column of this name.
14033        let owner: Option<String> = if let Some(q) = &cn.qualifier {
14034            tables.iter().find(|t| t == &q).cloned()
14035        } else {
14036            tables.iter().find_map(|t| {
14037                cat.get(t).and_then(|tbl| {
14038                    if tbl.schema().column_position(&cn.name).is_some() {
14039                        Some(t.clone())
14040                    } else {
14041                        None
14042                    }
14043                })
14044            })
14045        };
14046        let Some(owner) = owner else {
14047            continue;
14048        };
14049        let Some(tbl) = cat.get(&owner) else {
14050            continue;
14051        };
14052        let Some(col_pos) = tbl.schema().column_position(&cn.name) else {
14053            continue;
14054        };
14055        // Skip if any BTree index already covers this column as
14056        // its key.
14057        let already_indexed = tbl.indices().iter().any(|i| {
14058            matches!(i.kind, spg_storage::IndexKind::BTree(_))
14059                && i.column_position == col_pos
14060                && i.expression.is_none()
14061                && i.partial_predicate.is_none()
14062        });
14063        if already_indexed {
14064            continue;
14065        }
14066        if seen.insert((owner.clone(), cn.name.clone())) {
14067            out.push(alloc::format!(
14068                "SUGGEST: CREATE INDEX ix_{}_{} ON {} ({})",
14069                owner,
14070                cn.name,
14071                owner,
14072                cn.name
14073            ));
14074        }
14075    }
14076    out
14077}
14078
14079/// Walks an `Expr` and pushes every `ColumnName` it references.
14080/// Order is depth-first, left-to-right.
14081fn collect_column_refs(expr: &Expr, out: &mut Vec<spg_sql::ast::ColumnName>) {
14082    match expr {
14083        Expr::Column(cn) => out.push(cn.clone()),
14084        Expr::FunctionCall { args, .. } => {
14085            for a in args {
14086                collect_column_refs(a, out);
14087            }
14088        }
14089        Expr::Binary { lhs, rhs, .. } => {
14090            collect_column_refs(lhs, out);
14091            collect_column_refs(rhs, out);
14092        }
14093        Expr::Unary { expr: e, .. } => collect_column_refs(e, out),
14094        _ => {}
14095    }
14096}
14097
14098fn annotate_explain_lines(lines: &mut [String], total_rows: usize, engine: &Engine) {
14099    let catalog = engine.active_catalog();
14100    let cold_ids = catalog.cold_segment_ids_global();
14101    let any_cold = !cold_ids.is_empty();
14102    let cold_ids_repr = if any_cold {
14103        let mut s = alloc::string::String::from("[");
14104        for (i, id) in cold_ids.iter().enumerate() {
14105            if i > 0 {
14106                s.push(',');
14107            }
14108            s.push_str(&alloc::format!("{id}"));
14109        }
14110        s.push(']');
14111        s
14112    } else {
14113        alloc::string::String::new()
14114    };
14115    for (idx, line) in lines.iter_mut().enumerate() {
14116        let trimmed = line.trim_start();
14117        let is_top_level = idx == 0;
14118        if is_top_level {
14119            line.push_str(&alloc::format!(" (rows={total_rows})"));
14120            continue;
14121        }
14122        if let Some(rest) = trimmed.strip_prefix("From: ") {
14123            let (name, scan_kind) = match rest.split_once(" [") {
14124                Some((n, k)) => (n.trim(), k.trim_end_matches(']')),
14125                None => (rest.trim(), ""),
14126            };
14127            let bare = name.split_whitespace().next().unwrap_or(name);
14128            let hot = catalog.get(bare).map(|t| t.rows().len());
14129            // v6.2.7 — `cold_segments=[id0,id1,…]` enumerates every
14130            // cold-tier segment the scan COULD have walked. v6.2.x
14131            // can tighten to per-table by walking the table's
14132            // BTree-index cold locators.
14133            let annot = match (hot, scan_kind) {
14134                (Some(h), "full scan") => {
14135                    let mut s = alloc::format!(" (hot_rows={h}");
14136                    if any_cold {
14137                        s.push_str(&alloc::format!(
14138                            ", cold_tier=present, cold_segments={cold_ids_repr}"
14139                        ));
14140                    }
14141                    s.push(')');
14142                    s
14143                }
14144                (Some(h), "index seek") => {
14145                    let mut s = alloc::format!(" (hot_rows≤{h}");
14146                    if any_cold {
14147                        s.push_str(&alloc::format!(
14148                            ", cold_tier=present, cold_segments={cold_ids_repr}"
14149                        ));
14150                    }
14151                    s.push(')');
14152                    s
14153                }
14154                _ => " (rows=—)".to_string(),
14155            };
14156            line.push_str(&annot);
14157            continue;
14158        }
14159        // Filter / GroupBy / Having / OrderBy / Limit / Join etc.
14160        line.push_str(" (rows=—)");
14161    }
14162}
14163
14164fn explain_select(stmt: &SelectStatement, engine: &Engine, depth: usize, out: &mut Vec<String>) {
14165    let pad = "  ".repeat(depth);
14166    // 1) Top-level operator label.
14167    let top = if !stmt.ctes.is_empty() {
14168        if stmt.ctes.iter().any(|c| c.recursive) {
14169            "CTEScan (WITH RECURSIVE)"
14170        } else {
14171            "CTEScan (WITH)"
14172        }
14173    } else if !stmt.unions.is_empty() {
14174        "UnionScan"
14175    } else if select_has_window(stmt) {
14176        "WindowAgg"
14177    } else if aggregate::uses_aggregate(stmt) {
14178        "Aggregate"
14179    } else if stmt.distinct {
14180        "Distinct"
14181    } else if stmt.from.is_some() {
14182        "TableScan"
14183    } else {
14184        "Result"
14185    };
14186    out.push(alloc::format!("{pad}{top}"));
14187    let child = "  ".repeat(depth + 1);
14188    // 2) CTE bodies.
14189    for cte in &stmt.ctes {
14190        let head = if cte.recursive {
14191            alloc::format!("{child}CTE (recursive): {}", cte.name)
14192        } else {
14193            alloc::format!("{child}CTE: {}", cte.name)
14194        };
14195        out.push(head);
14196        explain_select(&cte.body, engine, depth + 2, out);
14197    }
14198    // 3) FROM details — primary table + joins, index hits.
14199    if let Some(from) = &stmt.from {
14200        let mut tag = alloc::format!("{child}From: {}", from.primary.name);
14201        if let Some(alias) = &from.primary.alias {
14202            tag.push_str(&alloc::format!(" AS {alias}"));
14203        }
14204        // Try to detect an index-seek opportunity on WHERE against
14205        // the primary table — same heuristic the executor uses.
14206        if let Some(w) = &stmt.where_
14207            && let Some(table) = engine.active_catalog().get(&from.primary.name)
14208        {
14209            let alias = from.primary.alias.as_deref().unwrap_or(&from.primary.name);
14210            let cols = &table.schema().columns;
14211            if try_index_seek(w, cols, engine.active_catalog(), table, alias).is_some() {
14212                tag.push_str(" [index seek]");
14213            } else {
14214                tag.push_str(" [full scan]");
14215            }
14216        } else {
14217            tag.push_str(" [full scan]");
14218        }
14219        out.push(tag);
14220        for j in &from.joins {
14221            let kind = match j.kind {
14222                spg_sql::ast::JoinKind::Inner => "INNER JOIN",
14223                spg_sql::ast::JoinKind::Left => "LEFT JOIN",
14224                spg_sql::ast::JoinKind::Cross => "CROSS JOIN",
14225            };
14226            let mut s = alloc::format!("{child}{kind}: {}", j.table.name);
14227            if let Some(alias) = &j.table.alias {
14228                s.push_str(&alloc::format!(" AS {alias}"));
14229            }
14230            if j.on.is_some() {
14231                s.push_str(" (ON …)");
14232            }
14233            out.push(s);
14234        }
14235    }
14236    // 4) WHERE / GROUP BY / HAVING / ORDER BY / LIMIT / OFFSET.
14237    if let Some(w) = &stmt.where_ {
14238        let mut s = alloc::format!("{child}Filter: {w}");
14239        if expr_has_subquery(w) {
14240            s.push_str(" [subquery]");
14241        }
14242        out.push(s);
14243    }
14244    if let Some(gs) = &stmt.group_by {
14245        let mut parts = Vec::new();
14246        for g in gs {
14247            parts.push(alloc::format!("{g}"));
14248        }
14249        out.push(alloc::format!("{child}GroupBy: {}", parts.join(", ")));
14250    }
14251    if let Some(h) = &stmt.having {
14252        out.push(alloc::format!("{child}Having: {h}"));
14253    }
14254    for o in &stmt.order_by {
14255        let dir = if o.desc { "DESC" } else { "ASC" };
14256        out.push(alloc::format!("{child}OrderBy: {} {dir}", o.expr));
14257    }
14258    if let Some(lim) = stmt.limit {
14259        out.push(alloc::format!("{child}Limit: {lim}"));
14260    }
14261    if let Some(off) = stmt.offset {
14262        out.push(alloc::format!("{child}Offset: {off}"));
14263    }
14264    // 5) Projection — collapse Wildcard or render N items.
14265    if stmt
14266        .items
14267        .iter()
14268        .any(|it| matches!(it, SelectItem::Wildcard))
14269    {
14270        out.push(alloc::format!("{child}Project: *"));
14271    } else {
14272        out.push(alloc::format!(
14273            "{child}Project: {} item(s)",
14274            stmt.items.len()
14275        ));
14276    }
14277    // 6) Recurse into UNION peers.
14278    for (kind, peer) in &stmt.unions {
14279        let label = match kind {
14280            UnionKind::All => "UNION ALL",
14281            UnionKind::Distinct => "UNION",
14282        };
14283        out.push(alloc::format!("{child}{label}"));
14284        explain_select(peer, engine, depth + 2, out);
14285    }
14286}
14287
14288/// v4.23: recognise the engine errors that indicate the inner
14289/// SELECT couldn't be evaluated in isolation because it references
14290/// an outer column — used by `subquery_replacement` to skip
14291/// materialisation and let row-eval handle it instead.
14292fn is_correlation_error(e: &EngineError) -> bool {
14293    matches!(
14294        e,
14295        EngineError::Eval(
14296            eval::EvalError::ColumnNotFound { .. } | eval::EvalError::UnknownQualifier { .. }
14297        )
14298    )
14299}
14300
14301/// v4.23: walk every Expr in `stmt` and replace each Column ref
14302/// that targets the outer scope (qualifier matches the outer
14303/// table alias) with a Literal carrying the outer row's value.
14304/// Conservative: only qualified refs are substituted, so the user
14305/// must write `outer_alias.col` to reference an outer column. This
14306/// matches PG's lexical scoping for correlated subqueries and
14307/// avoids accidentally rebinding inner columns of the same name.
14308/// v7.17.0 Phase 3.P0-41 — LATERAL peer descriptor. Either eagerly
14309/// materialised (every regular table / unnest / generate_series) or
14310/// lateral (subquery re-evaluated per outer row).
14311struct JoinedPeer<'a> {
14312    eager_rows: Option<Vec<Row>>,
14313    cols: Vec<ColumnSchema>,
14314    alias: String,
14315    kind: JoinKind,
14316    on: Option<&'a Expr>,
14317    lateral: Option<&'a SelectStatement>,
14318    /// v7.28 (round-22) — plain-table name for the index-nested-loop
14319    /// path. None for unnest/lateral.
14320    join_table: Option<String>,
14321}
14322
14323/// v7.31 (perf campaign) — deferred-join row source: one per join
14324/// stage. The working set advances as row-index tuples instead of
14325/// cloned combined rows; each tuple slot indexes into one of these.
14326enum JoinSrc<'a> {
14327    /// Owned by the join: the primary scan, a lazily-materialised
14328    /// peer, or the arena of per-outer-row LATERAL results.
14329    Owned(Vec<Row>),
14330    /// Peer rows materialised up front and still owned by `JoinedPeer`.
14331    Eager(&'a [Row]),
14332    /// Index-nested-loop peer reading the stored table in place.
14333    Stored(&'a spg_storage::persistent::PersistentVec<Row>),
14334}
14335
14336impl JoinSrc<'_> {
14337    fn get(&self, i: usize) -> Option<&Row> {
14338        match self {
14339            Self::Owned(v) => v.get(i),
14340            Self::Eager(s) => s.get(i),
14341            Self::Stored(p) => p.get(i),
14342        }
14343    }
14344
14345    fn len(&self) -> usize {
14346        match self {
14347            Self::Owned(v) => v.len(),
14348            Self::Eager(s) => s.len(),
14349            Self::Stored(p) => p.len(),
14350        }
14351    }
14352}
14353
14354/// Resolve one combined-schema position against a row-index tuple.
14355/// `offsets` holds the prefix column offsets of the consumed sources
14356/// (`offsets.len() == tuple.len() + 1`). `None` means SQL NULL: a
14357/// LEFT-extended slot (`usize::MAX`), or a position past the row's
14358/// width.
14359fn tuple_value<'s>(
14360    sources: &'s [JoinSrc<'_>],
14361    offsets: &[usize],
14362    tuple: &[usize],
14363    pos: usize,
14364) -> Option<&'s Value> {
14365    let k = offsets.partition_point(|&o| o <= pos).checked_sub(1)?;
14366    let ri = *tuple.get(k)?;
14367    if ri == usize::MAX {
14368        return None;
14369    }
14370    sources.get(k)?.get(ri)?.values.get(pos - offsets[k])
14371}
14372
14373/// Clone a source row's values into a combined-row buffer. A mask
14374/// (per-column "is referenced anywhere in the statement") NULLs the
14375/// unreferenced columns instead of cloning them — the in-place
14376/// equivalent of `null_out_unreferenced` for sources that were never
14377/// pre-cloned.
14378fn extend_masked(vals: &mut Vec<Value>, row: &Row, mask: Option<&[bool]>) {
14379    match mask {
14380        Some(keep) => {
14381            for (i, v) in row.values.iter().enumerate() {
14382                if keep.get(i).copied().unwrap_or(false) {
14383                    vals.push(v.clone());
14384                } else {
14385                    vals.push(Value::Null);
14386                }
14387            }
14388        }
14389        None => vals.extend(row.values.iter().cloned()),
14390    }
14391}
14392
14393/// Materialise a row-index tuple into owned values, NULL-padding
14394/// LEFT-extended slots to the source's schema width.
14395fn materialise_tuple_vals(
14396    sources: &[JoinSrc<'_>],
14397    widths: &[usize],
14398    masks: &[Option<Vec<bool>>],
14399    tuple: &[usize],
14400    cap: usize,
14401) -> Vec<Value> {
14402    let mut vals: Vec<Value> = Vec::with_capacity(cap);
14403    for (k, &ri) in tuple.iter().enumerate() {
14404        let row = if ri == usize::MAX {
14405            None
14406        } else {
14407            sources[k].get(ri)
14408        };
14409        match row {
14410            Some(r) => extend_masked(&mut vals, r, masks[k].as_deref()),
14411            None => {
14412                for _ in 0..widths[k] {
14413                    vals.push(Value::Null);
14414                }
14415            }
14416        }
14417    }
14418    vals
14419}
14420
14421/// v7.17.0 Phase 3.P0-41 — synthesise a column name for a LATERAL
14422/// projection item that has no explicit alias. PG names anonymous
14423/// projection items by the function call's name or by `column<i>`.
14424/// SPG mirrors the latter (lower-overhead than walking arbitrary
14425/// Expr shapes) so the probe-schema fallback path produces stable
14426/// names for the lateral peer's columns.
14427fn synth_lateral_col_name(expr: &Expr, idx: usize) -> String {
14428    match expr {
14429        // Bare column reference — use the column's own name.
14430        Expr::Column(c) => c.name.clone(),
14431        // Function call — use the function name (PG canonical:
14432        // `count` / `max` / `lower` …).
14433        Expr::FunctionCall { name, .. } => name.clone(),
14434        // Cast — drill into the inner expression.
14435        Expr::Cast { expr: inner, .. } => synth_lateral_col_name(inner, idx),
14436        // Everything else falls back to PG's `column<N>` placeholder.
14437        _ => alloc::format!("column{}", idx + 1),
14438    }
14439}
14440
14441/// v7.17.0 Phase 3.P0-41 — substitute every `<alias>.<col>` Expr
14442/// reference whose `<alias>.<col>` exists in the outer composite
14443/// schema with the matching value from the outer row. Walks the
14444/// entire SELECT body (items, WHERE, GROUP BY, HAVING, ORDER BY,
14445/// UNION peers) so any depth of outer reference inside the
14446/// LATERAL subquery resolves before execution.
14447fn substitute_outer_columns_multi(
14448    stmt: &mut SelectStatement,
14449    outer_row: &Row,
14450    outer_schema: &[ColumnSchema],
14451) {
14452    substitute_outer_in_select(stmt, outer_row, outer_schema);
14453}
14454
14455fn substitute_outer_in_select(
14456    stmt: &mut SelectStatement,
14457    outer_row: &Row,
14458    outer_schema: &[ColumnSchema],
14459) {
14460    for item in &mut stmt.items {
14461        if let SelectItem::Expr { expr, .. } = item {
14462            substitute_outer_in_expr(expr, outer_row, outer_schema);
14463        }
14464    }
14465    if let Some(w) = &mut stmt.where_ {
14466        substitute_outer_in_expr(w, outer_row, outer_schema);
14467    }
14468    if let Some(gs) = &mut stmt.group_by {
14469        for g in gs {
14470            substitute_outer_in_expr(g, outer_row, outer_schema);
14471        }
14472    }
14473    if let Some(h) = &mut stmt.having {
14474        substitute_outer_in_expr(h, outer_row, outer_schema);
14475    }
14476    for o in &mut stmt.order_by {
14477        substitute_outer_in_expr(&mut o.expr, outer_row, outer_schema);
14478    }
14479    for (_, peer) in &mut stmt.unions {
14480        substitute_outer_in_select(peer, outer_row, outer_schema);
14481    }
14482}
14483
14484fn substitute_outer_in_expr(e: &mut Expr, outer_row: &Row, outer_schema: &[ColumnSchema]) {
14485    if let Expr::Column(c) = e
14486        && let Some(qual) = &c.qualifier
14487    {
14488        let composite = alloc::format!("{qual}.{}", c.name);
14489        if let Some(idx) = outer_schema
14490            .iter()
14491            .position(|sc| sc.name.eq_ignore_ascii_case(&composite))
14492        {
14493            let v = outer_row.values.get(idx).cloned().unwrap_or(Value::Null);
14494            if let Ok(lit) = value_to_literal_expr(v) {
14495                *e = lit;
14496                return;
14497            }
14498        }
14499    }
14500    match e {
14501        Expr::Binary { lhs, rhs, .. } => {
14502            substitute_outer_in_expr(lhs, outer_row, outer_schema);
14503            substitute_outer_in_expr(rhs, outer_row, outer_schema);
14504        }
14505        Expr::Unary { expr: inner, .. } => {
14506            substitute_outer_in_expr(inner, outer_row, outer_schema);
14507        }
14508        Expr::FunctionCall { args, .. } => {
14509            for a in args {
14510                substitute_outer_in_expr(a, outer_row, outer_schema);
14511            }
14512        }
14513        Expr::Cast { expr: inner, .. } => {
14514            substitute_outer_in_expr(inner, outer_row, outer_schema);
14515        }
14516        Expr::Case {
14517            operand,
14518            branches,
14519            else_branch,
14520        } => {
14521            if let Some(op) = operand {
14522                substitute_outer_in_expr(op, outer_row, outer_schema);
14523            }
14524            for (cond, val) in branches {
14525                substitute_outer_in_expr(cond, outer_row, outer_schema);
14526                substitute_outer_in_expr(val, outer_row, outer_schema);
14527            }
14528            if let Some(e) = else_branch {
14529                substitute_outer_in_expr(e, outer_row, outer_schema);
14530            }
14531        }
14532        _ => {}
14533    }
14534}
14535
14536impl Engine {
14537    /// v7.29 (round-22 phase 3) — try to batch-evaluate a correlated
14538    /// scalar subquery of the shape
14539    ///   (SELECT expr FROM … WHERE inner_preds AND inner_col = outer_col
14540    ///    [ORDER BY o [DESC]] [LIMIT 1])
14541    /// by running the subquery ONCE without the correlation and
14542    /// folding rows into a key→value map (group top-1 when ordered).
14543    /// Returns None when the shape doesn't qualify; correctness then
14544    /// falls back to per-row execution.
14545    fn try_batch_correlated_scalar(
14546        &self,
14547        inner: &SelectStatement,
14548        cancel: CancelToken<'_>,
14549    ) -> Result<Option<memoize::GroupMap>, EngineError> {
14550        use spg_sql::ast::{BinOp, SelectItem as SI};
14551        if !inner.ctes.is_empty()
14552            || !inner.unions.is_empty()
14553            || inner.group_by.is_some()
14554            || inner.having.is_some()
14555            || inner.distinct
14556            || inner.items.len() != 1
14557            || inner.order_by.len() > 1
14558            || inner.offset.is_some()
14559        {
14560            return Ok(None);
14561        }
14562        // LIMIT must be absent or literally 1 (top-1 semantics).
14563        if let Some(le) = inner.limit
14564            && le.as_literal() != Some(1)
14565        {
14566            return Ok(None);
14567        }
14568        let Some(from) = &inner.from else {
14569            return Ok(None);
14570        };
14571        if from.primary.lateral_subquery.is_some() || from.primary.unnest_expr.is_some() {
14572            return Ok(None);
14573        }
14574        // Inner alias set.
14575        let mut inner_aliases: Vec<String> = Vec::new();
14576        inner_aliases.push(
14577            from.primary
14578                .alias
14579                .clone()
14580                .unwrap_or_else(|| from.primary.name.clone()),
14581        );
14582        for j in &from.joins {
14583            if j.table.lateral_subquery.is_some() || j.table.unnest_expr.is_some() {
14584                return Ok(None);
14585            }
14586            inner_aliases.push(
14587                j.table
14588                    .alias
14589                    .clone()
14590                    .unwrap_or_else(|| j.table.name.clone()),
14591            );
14592        }
14593        let is_inner = |c: &spg_sql::ast::ColumnName| -> bool {
14594            match &c.qualifier {
14595                Some(q) => inner_aliases.iter().any(|a| a.eq_ignore_ascii_case(q)),
14596                None => false,
14597            }
14598        };
14599        let is_outer = |c: &spg_sql::ast::ColumnName| -> bool {
14600            match &c.qualifier {
14601                Some(q) => !inner_aliases.iter().any(|a| a.eq_ignore_ascii_case(q)),
14602                // Synthetic group columns arrive bare after the
14603                // aggregate rewrite.
14604                None => c.name.starts_with("__grp_") || c.name.starts_with("__agg_"),
14605            }
14606        };
14607        // Every expression OTHER than the correlation conjunct must be
14608        // fully inner (qualified to inner aliases).
14609        let all_inner = |e: &Expr| -> bool {
14610            let mut cols: Vec<spg_sql::ast::ColumnName> = Vec::new();
14611            let mut subs: Vec<&SelectStatement> = Vec::new();
14612            visit_expr_columns_and_subqueries(e, &mut |c| cols.push(c.clone()), &mut |sub| {
14613                subs.push(sub)
14614            });
14615            subs.is_empty() && cols.iter().all(|c| is_inner(c) && !c.name.is_empty())
14616        };
14617        let Some(w) = &inner.where_ else {
14618            return Ok(None);
14619        };
14620        let conjuncts = reorder::split_and_conjunctions(w);
14621        let mut corr: Option<(spg_sql::ast::ColumnName, spg_sql::ast::ColumnName)> = None; // (inner, outer)
14622        let mut rest: Vec<&Expr> = Vec::new();
14623        for c in conjuncts {
14624            if let Expr::Binary {
14625                lhs,
14626                op: BinOp::Eq,
14627                rhs,
14628            } = c
14629                && let (Expr::Column(a), Expr::Column(b)) = (lhs.as_ref(), rhs.as_ref())
14630            {
14631                let pair = if is_inner(a) && is_outer(b) {
14632                    Some((a.clone(), b.clone()))
14633                } else if is_inner(b) && is_outer(a) {
14634                    Some((b.clone(), a.clone()))
14635                } else {
14636                    None
14637                };
14638                if let Some(p) = pair {
14639                    if corr.is_some() {
14640                        return Ok(None); // more than one correlation
14641                    }
14642                    corr = Some(p);
14643                    continue;
14644                }
14645            }
14646            if !all_inner(c) {
14647                return Ok(None);
14648            }
14649            rest.push(c);
14650        }
14651        let Some((inner_col, outer_col)) = corr else {
14652            return Ok(None);
14653        };
14654        let SI::Expr { expr: out_expr, .. } = &inner.items[0] else {
14655            return Ok(None);
14656        };
14657        if !all_inner(out_expr) {
14658            return Ok(None);
14659        }
14660        let order = inner.order_by.first();
14661        if let Some(o) = order
14662            && !all_inner(&o.expr)
14663        {
14664            return Ok(None);
14665        }
14666        // Build the batch statement: SELECT inner_col, [order], expr
14667        // FROM … WHERE rest — no correlation, no order, no limit.
14668        let mut batch = inner.clone();
14669        batch.limit = None;
14670        batch.offset = None;
14671        batch.order_by = Vec::new();
14672        batch.where_ = rest
14673            .iter()
14674            .map(|e| (*e).clone())
14675            .reduce(|a, b| Expr::Binary {
14676                lhs: alloc::boxed::Box::new(a),
14677                op: BinOp::And,
14678                rhs: alloc::boxed::Box::new(b),
14679            });
14680        let mut items: Vec<SI> = alloc::vec![SI::Expr {
14681            expr: Expr::Column(inner_col),
14682            alias: None,
14683        }];
14684        if let Some(o) = order {
14685            items.push(SI::Expr {
14686                expr: o.expr.clone(),
14687                alias: None,
14688            });
14689        }
14690        items.push(SI::Expr {
14691            expr: out_expr.clone(),
14692            alias: None,
14693        });
14694        batch.items = items;
14695        let r = self.exec_select_cancel(&batch, cancel)?;
14696        let QueryResult::Rows { rows, .. } = r else {
14697            return Ok(None);
14698        };
14699        let has_order = order.is_some();
14700        let (desc, nf) = order
14701            .map(|o| (o.desc, o.nulls_first))
14702            .unwrap_or((false, None));
14703        let mut best: alloc::collections::BTreeMap<String, (Option<Value>, Value)> =
14704            alloc::collections::BTreeMap::new();
14705        for row in rows {
14706            let key_v = row.values.first().cloned().unwrap_or(Value::Null);
14707            if matches!(key_v, Value::Null) {
14708                continue;
14709            }
14710            let key = aggregate::encode_key(core::slice::from_ref(&key_v));
14711            let (ord_v, out_v) = if has_order {
14712                (
14713                    Some(row.values.get(1).cloned().unwrap_or(Value::Null)),
14714                    row.values.get(2).cloned().unwrap_or(Value::Null),
14715                )
14716            } else {
14717                (None, row.values.get(1).cloned().unwrap_or(Value::Null))
14718            };
14719            match best.get(&key) {
14720                None => {
14721                    best.insert(key, (ord_v, out_v));
14722                }
14723                Some((cur_ord, _)) if has_order => {
14724                    // The sorted-first row wins: candidate beats the
14725                    // incumbent when it compares LESS under the key's
14726                    // ordering.
14727                    let cand = ord_v.clone().unwrap_or(Value::Null);
14728                    let cur = cur_ord.clone().unwrap_or(Value::Null);
14729                    if order_by_value_cmp(desc, nf, &cand, &cur) == core::cmp::Ordering::Less {
14730                        best.insert(key, (ord_v, out_v));
14731                    }
14732                }
14733                Some(_) => {} // unordered: first row stands (any row is valid)
14734            }
14735        }
14736        let map = best.into_iter().map(|(k, (_, v))| (k, v)).collect();
14737        Ok(Some((outer_col, map)))
14738    }
14739}
14740
14741/// v7.29 (3c) — pre-order collection of SCALAR subquery nodes in a
14742/// host expression (no descent into subquery bodies). The splice
14743/// walk below uses the same order; the pair must stay in lockstep.
14744fn collect_scalar_subqueries<'a>(e: &'a Expr, out: &mut Vec<&'a SelectStatement>) {
14745    match e {
14746        Expr::ScalarSubquery(s) => out.push(s),
14747        Expr::Exists { .. } | Expr::InSubquery { .. } => {}
14748        Expr::Binary { lhs, rhs, .. } => {
14749            collect_scalar_subqueries(lhs, out);
14750            collect_scalar_subqueries(rhs, out);
14751        }
14752        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
14753            collect_scalar_subqueries(expr, out);
14754        }
14755        Expr::Like { expr, pattern, .. } => {
14756            collect_scalar_subqueries(expr, out);
14757            collect_scalar_subqueries(pattern, out);
14758        }
14759        Expr::FunctionCall { args, .. } => {
14760            for a in args {
14761                collect_scalar_subqueries(a, out);
14762            }
14763        }
14764        Expr::AggregateOrdered { call, order_by, .. } => {
14765            collect_scalar_subqueries(call, out);
14766            for o in order_by {
14767                collect_scalar_subqueries(&o.expr, out);
14768            }
14769        }
14770        Expr::Case {
14771            operand,
14772            branches,
14773            else_branch,
14774        } => {
14775            if let Some(op) = operand {
14776                collect_scalar_subqueries(op, out);
14777            }
14778            for (w, t) in branches {
14779                collect_scalar_subqueries(w, out);
14780                collect_scalar_subqueries(t, out);
14781            }
14782            if let Some(eb) = else_branch {
14783                collect_scalar_subqueries(eb, out);
14784            }
14785        }
14786        Expr::ArraySubscript { target, index } => {
14787            collect_scalar_subqueries(target, out);
14788            collect_scalar_subqueries(index, out);
14789        }
14790        Expr::InList { expr, list, .. } => {
14791            collect_scalar_subqueries(expr, out);
14792            for item in list {
14793                collect_scalar_subqueries(item, out);
14794            }
14795        }
14796        _ => {}
14797    }
14798}
14799
14800/// v7.29 (3d) — empty every scalar-subquery BODY in a host
14801/// expression (node kept so the splice pre-order still matches).
14802fn hollow_scalar_subqueries(e: &mut Expr) {
14803    match e {
14804        Expr::ScalarSubquery(s) => {
14805            let hollow = SelectStatement {
14806                items: Vec::new(),
14807                ..SelectStatement::default()
14808            };
14809            **s = hollow;
14810        }
14811        Expr::Exists { .. } | Expr::InSubquery { .. } => {}
14812        Expr::Binary { lhs, rhs, .. } => {
14813            hollow_scalar_subqueries(lhs);
14814            hollow_scalar_subqueries(rhs);
14815        }
14816        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
14817            hollow_scalar_subqueries(expr);
14818        }
14819        Expr::Like { expr, pattern, .. } => {
14820            hollow_scalar_subqueries(expr);
14821            hollow_scalar_subqueries(pattern);
14822        }
14823        Expr::FunctionCall { args, .. } => {
14824            for a in args.iter_mut() {
14825                hollow_scalar_subqueries(a);
14826            }
14827        }
14828        Expr::AggregateOrdered { call, order_by, .. } => {
14829            hollow_scalar_subqueries(call);
14830            for o in order_by.iter_mut() {
14831                hollow_scalar_subqueries(&mut o.expr);
14832            }
14833        }
14834        Expr::Case {
14835            operand,
14836            branches,
14837            else_branch,
14838        } => {
14839            if let Some(op) = operand {
14840                hollow_scalar_subqueries(op);
14841            }
14842            for (w, t) in branches.iter_mut() {
14843                hollow_scalar_subqueries(w);
14844                hollow_scalar_subqueries(t);
14845            }
14846            if let Some(eb) = else_branch {
14847                hollow_scalar_subqueries(eb);
14848            }
14849        }
14850        Expr::ArraySubscript { target, index } => {
14851            hollow_scalar_subqueries(target);
14852            hollow_scalar_subqueries(index);
14853        }
14854        Expr::InList { expr, list, .. } => {
14855            hollow_scalar_subqueries(expr);
14856            for item in list.iter_mut() {
14857                hollow_scalar_subqueries(item);
14858            }
14859        }
14860        _ => {}
14861    }
14862}
14863
14864/// v7.29 (3c) — splice the i-th scalar subquery's batched value into
14865/// the cloned tree (same pre-order as collect_scalar_subqueries).
14866/// Returns Ok(false) if a literal conversion fails (caller falls
14867/// back to the resolver path).
14868fn splice_planned_subqueries(
14869    e: &mut Expr,
14870    plan: &[Option<alloc::rc::Rc<memoize::GroupMap>>],
14871    idx: &mut usize,
14872    row: &Row,
14873    ctx: &EvalContext<'_>,
14874) -> Result<bool, EngineError> {
14875    match e {
14876        Expr::ScalarSubquery(_) => {
14877            let Some(Some(gm)) = plan.get(*idx) else {
14878                return Ok(false);
14879            };
14880            *idx += 1;
14881            let (outer_col, map) = gm.as_ref();
14882            let key_v = eval::eval_expr(&Expr::Column(outer_col.clone()), row, ctx)
14883                .map_err(EngineError::Eval)?;
14884            let v = if matches!(key_v, Value::Null) {
14885                Value::Null
14886            } else {
14887                map.get(&aggregate::encode_key(core::slice::from_ref(&key_v)))
14888                    .cloned()
14889                    .unwrap_or(Value::Null)
14890            };
14891            *e = value_to_literal_expr(v)?;
14892            Ok(true)
14893        }
14894        Expr::Exists { .. } | Expr::InSubquery { .. } => Ok(true),
14895        Expr::Binary { lhs, rhs, .. } => Ok(splice_planned_subqueries(lhs, plan, idx, row, ctx)?
14896            && splice_planned_subqueries(rhs, plan, idx, row, ctx)?),
14897        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
14898            splice_planned_subqueries(expr, plan, idx, row, ctx)
14899        }
14900        Expr::Like { expr, pattern, .. } => {
14901            Ok(splice_planned_subqueries(expr, plan, idx, row, ctx)?
14902                && splice_planned_subqueries(pattern, plan, idx, row, ctx)?)
14903        }
14904        Expr::FunctionCall { args, .. } => {
14905            for a in args.iter_mut() {
14906                if !splice_planned_subqueries(a, plan, idx, row, ctx)? {
14907                    return Ok(false);
14908                }
14909            }
14910            Ok(true)
14911        }
14912        Expr::AggregateOrdered { call, order_by, .. } => {
14913            if !splice_planned_subqueries(call, plan, idx, row, ctx)? {
14914                return Ok(false);
14915            }
14916            for o in order_by.iter_mut() {
14917                if !splice_planned_subqueries(&mut o.expr, plan, idx, row, ctx)? {
14918                    return Ok(false);
14919                }
14920            }
14921            Ok(true)
14922        }
14923        Expr::Case {
14924            operand,
14925            branches,
14926            else_branch,
14927        } => {
14928            if let Some(op) = operand {
14929                if !splice_planned_subqueries(op, plan, idx, row, ctx)? {
14930                    return Ok(false);
14931                }
14932            }
14933            for (w, t) in branches.iter_mut() {
14934                if !splice_planned_subqueries(w, plan, idx, row, ctx)?
14935                    || !splice_planned_subqueries(t, plan, idx, row, ctx)?
14936                {
14937                    return Ok(false);
14938                }
14939            }
14940            if let Some(eb) = else_branch {
14941                if !splice_planned_subqueries(eb, plan, idx, row, ctx)? {
14942                    return Ok(false);
14943                }
14944            }
14945            Ok(true)
14946        }
14947        Expr::ArraySubscript { target, index } => {
14948            Ok(splice_planned_subqueries(target, plan, idx, row, ctx)?
14949                && splice_planned_subqueries(index, plan, idx, row, ctx)?)
14950        }
14951        Expr::InList { expr, list, .. } => {
14952            if !splice_planned_subqueries(expr, plan, idx, row, ctx)? {
14953                return Ok(false);
14954            }
14955            for item in list.iter_mut() {
14956                if !splice_planned_subqueries(item, plan, idx, row, ctx)? {
14957                    return Ok(false);
14958                }
14959            }
14960            Ok(true)
14961        }
14962        _ => Ok(true),
14963    }
14964}
14965
14966/// v7.30.2 (mailrs round-25) — minimum element count before an
14967/// all-literal `IN` list gets a per-query membership set. Below
14968/// this the linear scan wins on build cost.
14969const INLIST_SET_THRESHOLD: usize = 64;
14970
14971/// Cheap pre-check: is a set-eligible `IN` list reachable on the
14972/// AND spine of this expression? Anything else keeps the plain
14973/// `eval_expr` path untouched.
14974fn expr_may_use_in_set(e: &Expr) -> bool {
14975    match e {
14976        Expr::InList { list, .. } => list.len() >= INLIST_SET_THRESHOLD,
14977        Expr::Binary {
14978            lhs,
14979            op: BinOp::And,
14980            rhs,
14981        } => expr_may_use_in_set(lhs) || expr_may_use_in_set(rhs),
14982        _ => false,
14983    }
14984}
14985
14986/// Analyse an `IN` list for set eligibility: every element a literal,
14987/// all of one family (integer or string, NULLs tracked separately).
14988fn build_in_list_set(list: &[Expr]) -> Option<memoize::InListSetEntry> {
14989    let mut has_null = false;
14990    let mut ints: alloc::collections::BTreeSet<i64> = alloc::collections::BTreeSet::new();
14991    let mut texts: alloc::collections::BTreeSet<String> = alloc::collections::BTreeSet::new();
14992    for item in list {
14993        let Expr::Literal(lit) = item else {
14994            return None;
14995        };
14996        match lit {
14997            Literal::Null => has_null = true,
14998            Literal::Integer(i) => {
14999                ints.insert(*i);
15000            }
15001            Literal::String(s) => {
15002                texts.insert(s.clone());
15003            }
15004            _ => return None,
15005        }
15006        if !ints.is_empty() && !texts.is_empty() {
15007            return None;
15008        }
15009    }
15010    let set = if !ints.is_empty() {
15011        memoize::InListSet::Int(ints)
15012    } else if !texts.is_empty() {
15013        memoize::InListSet::Text(texts)
15014    } else {
15015        return None;
15016    };
15017    Some(memoize::InListSetEntry { set, has_null })
15018}
15019
15020/// Subquery-free eval that serves large all-literal `IN` lists from
15021/// a per-query membership set (cached in the memo by node address).
15022/// Walks only the AND spine; every other node — and every needle
15023/// whose runtime family doesn't match the set — falls through to
15024/// `eval_expr`, so coercion and error semantics stay identical.
15025fn eval_with_in_sets(
15026    e: &Expr,
15027    row: &Row,
15028    ctx: &EvalContext<'_>,
15029    m: &mut memoize::MemoizeCache,
15030) -> Result<Value, EngineError> {
15031    match e {
15032        Expr::Binary {
15033            lhs,
15034            op: BinOp::And,
15035            rhs,
15036        } => {
15037            // Mirror eval_expr: both sides evaluate (no short
15038            // circuit), then SQL three-valued AND.
15039            let l = eval_with_in_sets(lhs, row, ctx, m)?;
15040            let r = eval_with_in_sets(rhs, row, ctx, m)?;
15041            eval::and_3vl(l, r).map_err(EngineError::Eval)
15042        }
15043        Expr::InList {
15044            expr: lhs,
15045            list,
15046            negated,
15047        } if list.len() >= INLIST_SET_THRESHOLD => {
15048            let key = core::ptr::from_ref::<Expr>(e) as usize;
15049            let Some(entry) = m
15050                .in_sets
15051                .entry(key)
15052                .or_insert_with(|| build_in_list_set(list))
15053            else {
15054                return eval::eval_expr(e, row, ctx).map_err(EngineError::Eval);
15055            };
15056            let needle = eval::eval_expr(lhs, row, ctx).map_err(EngineError::Eval)?;
15057            let contained = match (&needle, &entry.set) {
15058                // Non-empty list + NULL needle → NULL (negation of
15059                // NULL is still NULL).
15060                (Value::Null, _) => return Ok(Value::Null),
15061                (Value::SmallInt(n), memoize::InListSet::Int(s)) => s.contains(&i64::from(*n)),
15062                (Value::Int(n), memoize::InListSet::Int(s)) => s.contains(&i64::from(*n)),
15063                (Value::BigInt(n), memoize::InListSet::Int(s)) => s.contains(n),
15064                (Value::Text(t), memoize::InListSet::Text(s)) => s.contains(t.as_str()),
15065                // Cross-family needle (e.g. Float vs integer list):
15066                // keep apply_binary's coercion / error behaviour.
15067                _ => return eval::eval_expr(e, row, ctx).map_err(EngineError::Eval),
15068            };
15069            let inner = if contained {
15070                Value::Bool(true)
15071            } else if entry.has_null {
15072                Value::Null
15073            } else {
15074                Value::Bool(false)
15075            };
15076            Ok(match (negated, inner) {
15077                (true, Value::Bool(b)) => Value::Bool(!b),
15078                (_, v) => v,
15079            })
15080        }
15081        _ => eval::eval_expr(e, row, ctx).map_err(EngineError::Eval),
15082    }
15083}
15084
15085fn substitute_outer_columns(stmt: &mut SelectStatement, row: &Row, ctx: &EvalContext<'_>) {
15086    // v7.24 (round-16 B) — joined outer contexts carry no single
15087    // table alias; their schemas use composite "alias.column" names
15088    // instead. Pass an unmatchable alias and let the composite
15089    // lookup in substitute_in_expr do the work (a correlated EXISTS
15090    // under a JOIN previously skipped substitution entirely and
15091    // died with "unknown table qualifier").
15092    let outer_alias = ctx.table_alias.unwrap_or("");
15093    substitute_in_select(stmt, row, ctx, outer_alias);
15094}
15095
15096fn substitute_in_select(
15097    stmt: &mut SelectStatement,
15098    row: &Row,
15099    ctx: &EvalContext<'_>,
15100    outer_alias: &str,
15101) {
15102    for item in &mut stmt.items {
15103        if let SelectItem::Expr { expr, .. } = item {
15104            substitute_in_expr(expr, row, ctx, outer_alias);
15105        }
15106    }
15107    if let Some(w) = &mut stmt.where_ {
15108        substitute_in_expr(w, row, ctx, outer_alias);
15109    }
15110    if let Some(gs) = &mut stmt.group_by {
15111        for g in gs {
15112            substitute_in_expr(g, row, ctx, outer_alias);
15113        }
15114    }
15115    if let Some(h) = &mut stmt.having {
15116        substitute_in_expr(h, row, ctx, outer_alias);
15117    }
15118    for o in &mut stmt.order_by {
15119        substitute_in_expr(&mut o.expr, row, ctx, outer_alias);
15120    }
15121    for (_, peer) in &mut stmt.unions {
15122        substitute_in_select(peer, row, ctx, outer_alias);
15123    }
15124}
15125
15126fn substitute_in_expr(e: &mut Expr, row: &Row, ctx: &EvalContext<'_>, outer_alias: &str) {
15127    // v7.25.2 (round-19 A) — bare synthetic columns. The aggregate
15128    // rewriter replaces group-key references INSIDE subquery bodies
15129    // with `__grp_N` so a correlated subquery in a GROUP BY select
15130    // list can resolve against the synthesised group row. The names
15131    // are engine-generated, so they can't shadow user columns.
15132    if let Expr::Column(c) = e
15133        && c.qualifier.is_none()
15134        && (c.name.starts_with("__grp_") || c.name.starts_with("__agg_"))
15135        && let Some(idx) = ctx.columns.iter().position(|sc| sc.name == c.name)
15136    {
15137        let v = row.values.get(idx).cloned().unwrap_or(Value::Null);
15138        if let Ok(lit) = value_to_literal_expr(v) {
15139            *e = lit;
15140            return;
15141        }
15142    }
15143    if let Expr::Column(c) = e
15144        && let Some(qual) = &c.qualifier
15145    {
15146        // Look up the column's index in the outer schema: plain name
15147        // when the qualifier is the outer table's alias, composite
15148        // "alias.column" for joined outer schemas (v7.24).
15149        let idx = if !outer_alias.is_empty() && qual.eq_ignore_ascii_case(outer_alias) {
15150            ctx.columns
15151                .iter()
15152                .position(|sc| sc.name.eq_ignore_ascii_case(&c.name))
15153        } else {
15154            None
15155        }
15156        .or_else(|| {
15157            let composite = alloc::format!("{qual}.{name}", name = c.name);
15158            ctx.columns
15159                .iter()
15160                .position(|sc| sc.name.eq_ignore_ascii_case(&composite))
15161        });
15162        if let Some(idx) = idx {
15163            let v = row.values.get(idx).cloned().unwrap_or(Value::Null);
15164            if let Ok(lit) = value_to_literal_expr(v) {
15165                *e = lit;
15166                return;
15167            }
15168        }
15169    }
15170    match e {
15171        Expr::AggregateOrdered { call, order_by, .. } => {
15172            substitute_in_expr(call, row, ctx, outer_alias);
15173            for o in order_by.iter_mut() {
15174                substitute_in_expr(&mut o.expr, row, ctx, outer_alias);
15175            }
15176        }
15177        Expr::Binary { lhs, rhs, .. } => {
15178            substitute_in_expr(lhs, row, ctx, outer_alias);
15179            substitute_in_expr(rhs, row, ctx, outer_alias);
15180        }
15181        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
15182            substitute_in_expr(expr, row, ctx, outer_alias);
15183        }
15184        Expr::Like { expr, pattern, .. } => {
15185            substitute_in_expr(expr, row, ctx, outer_alias);
15186            substitute_in_expr(pattern, row, ctx, outer_alias);
15187        }
15188        Expr::FunctionCall { args, .. } => {
15189            for a in args {
15190                substitute_in_expr(a, row, ctx, outer_alias);
15191            }
15192        }
15193        Expr::Extract { source, .. } => substitute_in_expr(source, row, ctx, outer_alias),
15194        Expr::WindowFunction {
15195            args,
15196            partition_by,
15197            order_by,
15198            ..
15199        } => {
15200            for a in args {
15201                substitute_in_expr(a, row, ctx, outer_alias);
15202            }
15203            for p in partition_by {
15204                substitute_in_expr(p, row, ctx, outer_alias);
15205            }
15206            for (o, _, _) in order_by {
15207                substitute_in_expr(o, row, ctx, outer_alias);
15208            }
15209        }
15210        Expr::ScalarSubquery(s) => substitute_in_select(s, row, ctx, outer_alias),
15211        Expr::Exists { subquery, .. } | Expr::InSubquery { subquery, .. } => {
15212            substitute_in_select(subquery, row, ctx, outer_alias);
15213        }
15214        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
15215        Expr::Array(items) => {
15216            for elem in items {
15217                substitute_in_expr(elem, row, ctx, outer_alias);
15218            }
15219        }
15220        Expr::ArraySubscript { target, index } => {
15221            substitute_in_expr(target, row, ctx, outer_alias);
15222            substitute_in_expr(index, row, ctx, outer_alias);
15223        }
15224        Expr::AnyAll { expr, array, .. } => {
15225            substitute_in_expr(expr, row, ctx, outer_alias);
15226            substitute_in_expr(array, row, ctx, outer_alias);
15227        }
15228        Expr::InList { expr, list, .. } => {
15229            substitute_in_expr(expr, row, ctx, outer_alias);
15230            for item in list {
15231                substitute_in_expr(item, row, ctx, outer_alias);
15232            }
15233        }
15234        Expr::Case {
15235            operand,
15236            branches,
15237            else_branch,
15238        } => {
15239            if let Some(o) = operand {
15240                substitute_in_expr(o, row, ctx, outer_alias);
15241            }
15242            for (w, t) in branches {
15243                substitute_in_expr(w, row, ctx, outer_alias);
15244                substitute_in_expr(t, row, ctx, outer_alias);
15245            }
15246            if let Some(e) = else_branch {
15247                substitute_in_expr(e, row, ctx, outer_alias);
15248            }
15249        }
15250    }
15251}
15252
15253/// v4.22: encode a Row to a comparable byte key for UNION-DISTINCT
15254/// dedup inside the recursive iteration. Crude but deterministic
15255/// — Debug prints embed type discriminants so NULL ≠ "" ≠ 0.
15256fn encode_row_key(row: &Row) -> Vec<u8> {
15257    let mut out = Vec::new();
15258    for v in &row.values {
15259        let s = alloc::format!("{v:?}|");
15260        out.extend_from_slice(s.as_bytes());
15261    }
15262    out
15263}
15264
15265fn select_has_window(stmt: &SelectStatement) -> bool {
15266    for item in &stmt.items {
15267        if let SelectItem::Expr { expr, .. } = item
15268            && expr_has_window(expr)
15269        {
15270            return true;
15271        }
15272    }
15273    false
15274}
15275
15276fn expr_has_window(e: &Expr) -> bool {
15277    match e {
15278        Expr::WindowFunction { .. } => true,
15279        Expr::AggregateOrdered { call, order_by, .. } => {
15280            expr_has_window(call) || order_by.iter().any(|o| expr_has_window(&o.expr))
15281        }
15282        Expr::Binary { lhs, rhs, .. } => expr_has_window(lhs) || expr_has_window(rhs),
15283        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
15284            expr_has_window(expr)
15285        }
15286        Expr::FunctionCall { args, .. } => args.iter().any(expr_has_window),
15287        Expr::Like { expr, pattern, .. } => expr_has_window(expr) || expr_has_window(pattern),
15288        Expr::Extract { source, .. } => expr_has_window(source),
15289        Expr::ScalarSubquery(_)
15290        | Expr::Exists { .. }
15291        | Expr::InSubquery { .. }
15292        | Expr::Literal(_)
15293        | Expr::Placeholder(_)
15294        | Expr::Column(_) => false,
15295        Expr::Array(items) => items.iter().any(expr_has_window),
15296        Expr::ArraySubscript { target, index } => expr_has_window(target) || expr_has_window(index),
15297        Expr::AnyAll { expr, array, .. } => expr_has_window(expr) || expr_has_window(array),
15298        Expr::InList { expr, list, .. } => {
15299            expr_has_window(expr) || list.iter().any(expr_has_window)
15300        }
15301        Expr::Case {
15302            operand,
15303            branches,
15304            else_branch,
15305        } => {
15306            operand.as_deref().is_some_and(expr_has_window)
15307                || branches
15308                    .iter()
15309                    .any(|(w, t)| expr_has_window(w) || expr_has_window(t))
15310                || else_branch.as_deref().is_some_and(expr_has_window)
15311        }
15312    }
15313}
15314
15315fn collect_window_nodes(e: &Expr, out: &mut Vec<Expr>) {
15316    if let Expr::WindowFunction { .. } = e {
15317        // Deduplicate by structural equality on the expression
15318        // (cheap because window args + partition + order are
15319        // small). Without dedup we'd recompute identical windows
15320        // once per occurrence in the projection.
15321        if !out.iter().any(|x| x == e) {
15322            out.push(e.clone());
15323        }
15324        return;
15325    }
15326    match e {
15327        // Already handled by the early-return at the top.
15328        Expr::WindowFunction { .. } => unreachable!(),
15329        Expr::Binary { lhs, rhs, .. } => {
15330            collect_window_nodes(lhs, out);
15331            collect_window_nodes(rhs, out);
15332        }
15333        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
15334            collect_window_nodes(expr, out);
15335        }
15336        Expr::FunctionCall { args, .. } => {
15337            for a in args {
15338                collect_window_nodes(a, out);
15339            }
15340        }
15341        Expr::Like { expr, pattern, .. } => {
15342            collect_window_nodes(expr, out);
15343            collect_window_nodes(pattern, out);
15344        }
15345        Expr::Extract { source, .. } => collect_window_nodes(source, out),
15346        _ => {}
15347    }
15348}
15349
15350fn rewrite_window_to_columns(e: &mut Expr, window_nodes: &[Expr]) {
15351    if let Expr::WindowFunction { .. } = e
15352        && let Some(idx) = window_nodes.iter().position(|w| w == e)
15353    {
15354        *e = Expr::Column(spg_sql::ast::ColumnName {
15355            qualifier: None,
15356            name: alloc::format!("__win_{idx}"),
15357        });
15358        return;
15359    }
15360    match e {
15361        Expr::Binary { lhs, rhs, .. } => {
15362            rewrite_window_to_columns(lhs, window_nodes);
15363            rewrite_window_to_columns(rhs, window_nodes);
15364        }
15365        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
15366            rewrite_window_to_columns(expr, window_nodes);
15367        }
15368        Expr::FunctionCall { args, .. } => {
15369            for a in args {
15370                rewrite_window_to_columns(a, window_nodes);
15371            }
15372        }
15373        Expr::Like { expr, pattern, .. } => {
15374            rewrite_window_to_columns(expr, window_nodes);
15375            rewrite_window_to_columns(pattern, window_nodes);
15376        }
15377        Expr::Extract { source, .. } => rewrite_window_to_columns(source, window_nodes),
15378        _ => {}
15379    }
15380}
15381
15382/// Total order over partition-key tuples. NULL sorts as the
15383/// lowest value (matches the `<` partial order's NULL-last
15384/// behaviour with `INFINITY` flipped).
15385fn partition_key_cmp(a: &[Value], b: &[Value]) -> core::cmp::Ordering {
15386    for (x, y) in a.iter().zip(b.iter()) {
15387        let c = value_cmp(x, y);
15388        if c != core::cmp::Ordering::Equal {
15389            return c;
15390        }
15391    }
15392    a.len().cmp(&b.len())
15393}
15394
15395fn order_key_cmp(
15396    a: &[(Value, bool, Option<bool>)],
15397    b: &[(Value, bool, Option<bool>)],
15398) -> core::cmp::Ordering {
15399    // v7.24.1 — per-key DESC + effective NULLS placement (shared
15400    // contract with order_by_value_cmp).
15401    for ((va, desc, nf), (vb, _, _)) in a.iter().zip(b.iter()) {
15402        let c = order_by_value_cmp(*desc, *nf, va, vb);
15403        if c != core::cmp::Ordering::Equal {
15404            return c;
15405        }
15406    }
15407    a.len().cmp(&b.len())
15408}
15409
15410/// v7.17.0 Phase 3.10 — true when the Value is one of the
15411/// integer-shaped variants `generate_series` accepts as a start
15412/// / stop / step component. Float / NUMERIC are rejected — PG's
15413/// `generate_series(numeric, numeric)` overload is out of v7.17
15414/// scope.
15415const fn value_is_integer(v: &Value) -> bool {
15416    matches!(v, Value::SmallInt(_) | Value::Int(_) | Value::BigInt(_))
15417}
15418
15419/// v7.17.0 Phase 3.10 — widen any integer-shaped Value to i64 for
15420/// the generate_series iteration loop. Non-integer inputs panic;
15421/// caller guards via `value_is_integer`.
15422const fn value_to_i64(v: &Value) -> i64 {
15423    match v {
15424        Value::SmallInt(n) => *n as i64,
15425        Value::Int(n) => *n as i64,
15426        Value::BigInt(n) => *n,
15427        _ => panic!("value_to_i64 called on non-integer Value"),
15428    }
15429}
15430
15431/// v7.17.0 Phase 3.10 — integer-mode generate_series materialiser.
15432/// Step direction follows the sign: positive step iterates upward
15433/// (stops when current > stop); negative iterates downward; zero
15434/// errors. Caller-facing row stream is `BigInt`-typed so a single
15435/// projection schema covers SmallInt / Int / BigInt callers.
15436fn generate_series_integers(
15437    start: i64,
15438    stop: i64,
15439    step: i64,
15440    cancel: &CancelToken<'_>,
15441) -> Result<alloc::vec::Vec<Row>, EngineError> {
15442    if step == 0 {
15443        return Err(EngineError::Unsupported(
15444            "generate_series(): step argument cannot be zero".into(),
15445        ));
15446    }
15447    let mut out = alloc::vec::Vec::new();
15448    let mut cur = start;
15449    // Hard cap to keep a runaway call from eating all memory. PG
15450    // has no such cap but does honour query timeout; SPG's cancel
15451    // token will fire too — this is a defense-in-depth backstop.
15452    const MAX_ROWS: usize = 10_000_000;
15453    loop {
15454        cancel.check()?;
15455        if step > 0 && cur > stop {
15456            break;
15457        }
15458        if step < 0 && cur < stop {
15459            break;
15460        }
15461        out.push(Row::new(alloc::vec![Value::BigInt(cur)]));
15462        if out.len() > MAX_ROWS {
15463            return Err(EngineError::Unsupported(alloc::format!(
15464                "generate_series(): exceeded {MAX_ROWS} rows; \
15465                 narrow start/stop or use a larger step"
15466            )));
15467        }
15468        cur = match cur.checked_add(step) {
15469            Some(n) => n,
15470            None => break,
15471        };
15472    }
15473    Ok(out)
15474}
15475
15476/// v7.17.0 Phase 3.10 — timestamp-mode generate_series. step is a
15477/// `Value::Interval { months, micros }` per the caller's guard;
15478/// each iteration adds the interval via `apply_binary_interval`
15479/// so month-shifting handles short-month rollover (PG semantics).
15480fn generate_series_timestamps(
15481    start: i64,
15482    stop: i64,
15483    step: Value,
15484    cancel: &CancelToken<'_>,
15485) -> Result<alloc::vec::Vec<Row>, EngineError> {
15486    let (months, micros) = match &step {
15487        Value::Interval { months, micros } => (*months, *micros),
15488        _ => unreachable!("caller guards step.is_interval"),
15489    };
15490    if months == 0 && micros == 0 {
15491        return Err(EngineError::Unsupported(
15492            "generate_series(): INTERVAL step cannot be zero".into(),
15493        ));
15494    }
15495    let ascending = months > 0 || micros > 0;
15496    let mut out = alloc::vec::Vec::new();
15497    let mut cur = Value::Timestamp(start);
15498    const MAX_ROWS: usize = 10_000_000;
15499    loop {
15500        cancel.check()?;
15501        let cur_t = match cur {
15502            Value::Timestamp(t) => t,
15503            _ => unreachable!("loop invariant: cur is Timestamp"),
15504        };
15505        if ascending && cur_t > stop {
15506            break;
15507        }
15508        if !ascending && cur_t < stop {
15509            break;
15510        }
15511        out.push(Row::new(alloc::vec![Value::Timestamp(cur_t)]));
15512        if out.len() > MAX_ROWS {
15513            return Err(EngineError::Unsupported(alloc::format!(
15514                "generate_series(): exceeded {MAX_ROWS} rows; \
15515                 narrow start/stop or use a larger step"
15516            )));
15517        }
15518        let next = eval::apply_binary_interval(
15519            spg_sql::ast::BinOp::Add,
15520            &cur,
15521            &Value::Interval { months, micros },
15522        )
15523        .map_err(EngineError::Eval)?;
15524        cur = match next {
15525            Some(v) => v,
15526            None => break,
15527        };
15528    }
15529    Ok(out)
15530}
15531
15532#[allow(clippy::match_same_arms)] // explicit arms per type document the supported pairs
15533/// v7.24 (round-16 A) — per-key ORDER BY comparator honouring DESC
15534/// and the effective NULLS placement (explicit NULLS FIRST/LAST,
15535/// else the PG default: NULLS LAST for ASC, NULLS FIRST for DESC).
15536/// NULL placement is absolute — it does not flip with DESC.
15537pub(crate) fn order_by_value_cmp(
15538    desc: bool,
15539    nulls_first: Option<bool>,
15540    a: &Value,
15541    b: &Value,
15542) -> core::cmp::Ordering {
15543    use core::cmp::Ordering;
15544    let nf = nulls_first.unwrap_or(desc);
15545    match (matches!(a, Value::Null), matches!(b, Value::Null)) {
15546        (true, true) => Ordering::Equal,
15547        (true, false) => {
15548            if nf {
15549                Ordering::Less
15550            } else {
15551                Ordering::Greater
15552            }
15553        }
15554        (false, true) => {
15555            if nf {
15556                Ordering::Greater
15557            } else {
15558                Ordering::Less
15559            }
15560        }
15561        (false, false) => {
15562            let c = value_cmp(a, b);
15563            if desc { c.reverse() } else { c }
15564        }
15565    }
15566}
15567
15568fn value_cmp(a: &Value, b: &Value) -> core::cmp::Ordering {
15569    use core::cmp::Ordering;
15570    match (a, b) {
15571        (Value::Null, Value::Null) => Ordering::Equal,
15572        (Value::Null, _) => Ordering::Less,
15573        (_, Value::Null) => Ordering::Greater,
15574        (Value::Int(x), Value::Int(y)) => x.cmp(y),
15575        (Value::BigInt(x), Value::BigInt(y)) => x.cmp(y),
15576        (Value::SmallInt(x), Value::SmallInt(y)) => x.cmp(y),
15577        (Value::Text(x), Value::Text(y)) => x.cmp(y),
15578        (Value::Bool(x), Value::Bool(y)) => x.cmp(y),
15579        (Value::Float(x), Value::Float(y)) => x.partial_cmp(y).unwrap_or(Ordering::Equal),
15580        (Value::Date(x), Value::Date(y)) => x.cmp(y),
15581        (Value::Timestamp(x), Value::Timestamp(y)) => x.cmp(y),
15582        // Cross-type compare: fall back to the debug rendering —
15583        // same-partition is the goal, exact order is irrelevant.
15584        _ => alloc::format!("{a:?}").cmp(&alloc::format!("{b:?}")),
15585    }
15586}
15587
15588/// Compute the window function's per-row output for one partition.
15589/// `slice` has (partition key, order key, original-row-index)
15590/// tuples already sorted by order key. `filtered_rows` is the
15591/// full row list indexed by original-row-index. `out_vals` is
15592/// the destination, also indexed by original-row-index.
15593#[allow(
15594    clippy::too_many_arguments,
15595    clippy::cast_possible_truncation,
15596    clippy::cast_possible_wrap,
15597    clippy::cast_precision_loss,
15598    clippy::cast_sign_loss,
15599    clippy::doc_markdown,
15600    clippy::too_many_lines,
15601    clippy::type_complexity,
15602    clippy::match_same_arms
15603)]
15604fn compute_window_partition(
15605    name: &str,
15606    args: &[Expr],
15607    ordered: bool,
15608    frame: Option<&WindowFrame>,
15609    null_treatment: spg_sql::ast::NullTreatment,
15610    slice: &[(Vec<Value>, Vec<(Value, bool, Option<bool>)>, usize)],
15611    filtered_rows: &[&Row],
15612    ctx: &EvalContext<'_>,
15613    out_vals: &mut [Value],
15614) -> Result<(), EngineError> {
15615    let ignore_nulls = matches!(null_treatment, spg_sql::ast::NullTreatment::Ignore);
15616    let lower = name.to_ascii_lowercase();
15617    match lower.as_str() {
15618        "row_number" => {
15619            for (rank, (_, _, idx)) in slice.iter().enumerate() {
15620                out_vals[*idx] = Value::BigInt((rank + 1) as i64);
15621            }
15622            Ok(())
15623        }
15624        "rank" => {
15625            let mut prev_key: Option<&[(Value, bool, Option<bool>)]> = None;
15626            let mut current_rank: i64 = 1;
15627            for (i, (_, okey, idx)) in slice.iter().enumerate() {
15628                if let Some(p) = prev_key
15629                    && order_key_cmp(p, okey) != core::cmp::Ordering::Equal
15630                {
15631                    current_rank = (i + 1) as i64;
15632                }
15633                if prev_key.is_none() {
15634                    current_rank = 1;
15635                }
15636                out_vals[*idx] = Value::BigInt(current_rank);
15637                prev_key = Some(okey.as_slice());
15638            }
15639            Ok(())
15640        }
15641        "dense_rank" => {
15642            let mut prev_key: Option<&[(Value, bool, Option<bool>)]> = None;
15643            let mut current_rank: i64 = 0;
15644            for (_, okey, idx) in slice {
15645                if prev_key.is_none_or(|p| order_key_cmp(p, okey) != core::cmp::Ordering::Equal) {
15646                    current_rank += 1;
15647                }
15648                out_vals[*idx] = Value::BigInt(current_rank);
15649                prev_key = Some(okey.as_slice());
15650            }
15651            Ok(())
15652        }
15653        "sum" | "avg" | "min" | "max" | "count" | "count_star" => {
15654            // Pre-evaluate the function arg per row in the slice
15655            // (count_star has no arg).
15656            let arg_values: Vec<Value> = if lower == "count_star" || args.is_empty() {
15657                slice.iter().map(|_| Value::Null).collect()
15658            } else {
15659                slice
15660                    .iter()
15661                    .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
15662                    .collect::<Result<_, _>>()
15663                    .map_err(EngineError::Eval)?
15664            };
15665            // v4.20: pick the effective frame. Explicit frame
15666            // overrides the implicit default (running for ordered,
15667            // whole-partition for unordered).
15668            let eff = effective_frame(frame, ordered)?;
15669            #[allow(clippy::needless_range_loop)]
15670            for i in 0..slice.len() {
15671                let (lo, hi) = frame_bounds_for_row(&eff, i, slice);
15672                let mut sum: f64 = 0.0;
15673                let mut count: i64 = 0;
15674                let mut min_v: Option<f64> = None;
15675                let mut max_v: Option<f64> = None;
15676                let mut row_count: i64 = 0;
15677                if lo <= hi {
15678                    for j in lo..=hi {
15679                        let v = &arg_values[j];
15680                        match lower.as_str() {
15681                            "count_star" => row_count += 1,
15682                            "count" => {
15683                                if !v.is_null() {
15684                                    count += 1;
15685                                }
15686                            }
15687                            _ => {
15688                                if let Some(x) = value_to_f64(v) {
15689                                    sum += x;
15690                                    count += 1;
15691                                    min_v = Some(min_v.map_or(x, |m| m.min(x)));
15692                                    max_v = Some(max_v.map_or(x, |m| m.max(x)));
15693                                }
15694                            }
15695                        }
15696                    }
15697                }
15698                let value = match lower.as_str() {
15699                    "count_star" => Value::BigInt(row_count),
15700                    "count" => Value::BigInt(count),
15701                    "sum" => Value::Float(sum),
15702                    "avg" => {
15703                        if count == 0 {
15704                            Value::Null
15705                        } else {
15706                            Value::Float(sum / count as f64)
15707                        }
15708                    }
15709                    "min" => min_v.map_or(Value::Null, Value::Float),
15710                    "max" => max_v.map_or(Value::Null, Value::Float),
15711                    _ => unreachable!(),
15712                };
15713                let (_, _, idx) = &slice[i];
15714                out_vals[*idx] = value;
15715            }
15716            Ok(())
15717        }
15718        "lag" | "lead" => {
15719            // lag(expr [, offset [, default]])
15720            // lead(expr [, offset [, default]])
15721            if args.is_empty() {
15722                return Err(EngineError::Unsupported(alloc::format!(
15723                    "{lower}() requires at least one argument"
15724                )));
15725            }
15726            let offset: i64 = if args.len() >= 2 {
15727                let v = eval::eval_expr(&args[1], filtered_rows[slice[0].2], ctx)
15728                    .map_err(EngineError::Eval)?;
15729                match v {
15730                    Value::SmallInt(n) => i64::from(n),
15731                    Value::Int(n) => i64::from(n),
15732                    Value::BigInt(n) => n,
15733                    _ => {
15734                        return Err(EngineError::Unsupported(alloc::format!(
15735                            "{lower}() offset must be integer"
15736                        )));
15737                    }
15738                }
15739            } else {
15740                1
15741            };
15742            let default: Value = if args.len() >= 3 {
15743                eval::eval_expr(&args[2], filtered_rows[slice[0].2], ctx)
15744                    .map_err(EngineError::Eval)?
15745            } else {
15746                Value::Null
15747            };
15748            let values: Vec<Value> = slice
15749                .iter()
15750                .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
15751                .collect::<Result<_, _>>()
15752                .map_err(EngineError::Eval)?;
15753            let n = slice.len();
15754            for (i, (_, _, idx)) in slice.iter().enumerate() {
15755                let signed_offset = if lower == "lag" { -offset } else { offset };
15756                let v = if ignore_nulls {
15757                    // v6.4.2 — IGNORE NULLS: walk in the offset direction
15758                    // skipping NULL values; the `offset`-th non-NULL
15759                    // encountered is the result.
15760                    let step: i64 = if signed_offset >= 0 { 1 } else { -1 };
15761                    let needed: i64 = signed_offset.abs();
15762                    if needed == 0 {
15763                        values[i].clone()
15764                    } else {
15765                        let mut j: i64 = i as i64;
15766                        let mut hits: i64 = 0;
15767                        let mut found: Option<Value> = None;
15768                        loop {
15769                            j += step;
15770                            if j < 0 || j >= n as i64 {
15771                                break;
15772                            }
15773                            #[allow(clippy::cast_sign_loss)]
15774                            let v = &values[j as usize];
15775                            if !v.is_null() {
15776                                hits += 1;
15777                                if hits == needed {
15778                                    found = Some(v.clone());
15779                                    break;
15780                                }
15781                            }
15782                        }
15783                        found.unwrap_or_else(|| default.clone())
15784                    }
15785                } else {
15786                    let target_signed = i64::try_from(i).unwrap_or(i64::MAX) + signed_offset;
15787                    if target_signed < 0 || target_signed >= i64::try_from(n).unwrap_or(i64::MAX) {
15788                        default.clone()
15789                    } else {
15790                        #[allow(clippy::cast_sign_loss)]
15791                        {
15792                            values[target_signed as usize].clone()
15793                        }
15794                    }
15795                };
15796                out_vals[*idx] = v;
15797            }
15798            Ok(())
15799        }
15800        "first_value" | "last_value" | "nth_value" => {
15801            if args.is_empty() {
15802                return Err(EngineError::Unsupported(alloc::format!(
15803                    "{lower}() requires at least one argument"
15804                )));
15805            }
15806            let values: Vec<Value> = slice
15807                .iter()
15808                .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
15809                .collect::<Result<_, _>>()
15810                .map_err(EngineError::Eval)?;
15811            let nth: usize = if lower == "nth_value" {
15812                if args.len() < 2 {
15813                    return Err(EngineError::Unsupported(
15814                        "nth_value() requires (expr, n)".into(),
15815                    ));
15816                }
15817                let v = eval::eval_expr(&args[1], filtered_rows[slice[0].2], ctx)
15818                    .map_err(EngineError::Eval)?;
15819                let raw = match v {
15820                    Value::SmallInt(n) => i64::from(n),
15821                    Value::Int(n) => i64::from(n),
15822                    Value::BigInt(n) => n,
15823                    _ => {
15824                        return Err(EngineError::Unsupported(
15825                            "nth_value() n must be integer".into(),
15826                        ));
15827                    }
15828                };
15829                if raw < 1 {
15830                    return Err(EngineError::Unsupported(
15831                        "nth_value() n must be >= 1".into(),
15832                    ));
15833                }
15834                #[allow(clippy::cast_sign_loss)]
15835                {
15836                    raw as usize
15837                }
15838            } else {
15839                0
15840            };
15841            let eff = effective_frame(frame, ordered)?;
15842            for i in 0..slice.len() {
15843                let (lo, hi) = frame_bounds_for_row(&eff, i, slice);
15844                let (_, _, idx) = &slice[i];
15845                let v = if lo > hi {
15846                    Value::Null
15847                } else if ignore_nulls && matches!(lower.as_str(), "first_value" | "last_value") {
15848                    // v6.4.2 — IGNORE NULLS: skip NULL cells when
15849                    // selecting the boundary value within the frame.
15850                    if lower == "first_value" {
15851                        (lo..=hi)
15852                            .find_map(|j| {
15853                                let v = &values[j];
15854                                (!v.is_null()).then(|| v.clone())
15855                            })
15856                            .unwrap_or(Value::Null)
15857                    } else {
15858                        (lo..=hi)
15859                            .rev()
15860                            .find_map(|j| {
15861                                let v = &values[j];
15862                                (!v.is_null()).then(|| v.clone())
15863                            })
15864                            .unwrap_or(Value::Null)
15865                    }
15866                } else {
15867                    match lower.as_str() {
15868                        "first_value" => values[lo].clone(),
15869                        "last_value" => values[hi].clone(),
15870                        "nth_value" => {
15871                            let pos = lo + nth - 1;
15872                            if pos > hi {
15873                                Value::Null
15874                            } else {
15875                                values[pos].clone()
15876                            }
15877                        }
15878                        _ => unreachable!(),
15879                    }
15880                };
15881                out_vals[*idx] = v;
15882            }
15883            Ok(())
15884        }
15885        "ntile" => {
15886            if args.is_empty() {
15887                return Err(EngineError::Unsupported(
15888                    "ntile(n) requires an integer argument".into(),
15889                ));
15890            }
15891            let v = eval::eval_expr(&args[0], filtered_rows[slice[0].2], ctx)
15892                .map_err(EngineError::Eval)?;
15893            let bucket_count: i64 = match v {
15894                Value::SmallInt(n) => i64::from(n),
15895                Value::Int(n) => i64::from(n),
15896                Value::BigInt(n) => n,
15897                _ => {
15898                    return Err(EngineError::Unsupported(
15899                        "ntile() argument must be integer".into(),
15900                    ));
15901                }
15902            };
15903            if bucket_count < 1 {
15904                return Err(EngineError::Unsupported(
15905                    "ntile() argument must be >= 1".into(),
15906                ));
15907            }
15908            #[allow(clippy::cast_sign_loss)]
15909            let buckets = bucket_count as usize;
15910            let n = slice.len();
15911            // Each bucket gets `base` rows; the first `extras` buckets
15912            // get one extra. PG semantics.
15913            let base = n / buckets;
15914            let extras = n % buckets;
15915            let mut bucket: usize = 1;
15916            let mut remaining_in_bucket = if extras > 0 { base + 1 } else { base };
15917            let mut buckets_with_extra_remaining = extras;
15918            for (_, _, idx) in slice {
15919                if remaining_in_bucket == 0 {
15920                    bucket += 1;
15921                    buckets_with_extra_remaining = buckets_with_extra_remaining.saturating_sub(1);
15922                    remaining_in_bucket = if buckets_with_extra_remaining > 0 {
15923                        base + 1
15924                    } else {
15925                        base
15926                    };
15927                    // Edge: if base==0 and extras==0, all rows fit;
15928                    // shouldn't reach here, but guard anyway.
15929                    if remaining_in_bucket == 0 {
15930                        remaining_in_bucket = 1;
15931                    }
15932                }
15933                out_vals[*idx] = Value::BigInt(i64::try_from(bucket).unwrap_or(i64::MAX));
15934                remaining_in_bucket -= 1;
15935            }
15936            Ok(())
15937        }
15938        "percent_rank" => {
15939            // (rank - 1) / (n - 1) where rank is the standard RANK().
15940            // Single-row partitions get 0.
15941            let n = slice.len();
15942            let mut prev_key: Option<&[(Value, bool, Option<bool>)]> = None;
15943            let mut current_rank: i64 = 1;
15944            for (i, (_, okey, idx)) in slice.iter().enumerate() {
15945                if let Some(p) = prev_key
15946                    && order_key_cmp(p, okey) != core::cmp::Ordering::Equal
15947                {
15948                    current_rank = i64::try_from(i + 1).unwrap_or(i64::MAX);
15949                }
15950                if prev_key.is_none() {
15951                    current_rank = 1;
15952                }
15953                #[allow(clippy::cast_precision_loss)]
15954                let pr = if n <= 1 {
15955                    0.0
15956                } else {
15957                    (current_rank - 1) as f64 / (n - 1) as f64
15958                };
15959                out_vals[*idx] = Value::Float(pr);
15960                prev_key = Some(okey.as_slice());
15961            }
15962            Ok(())
15963        }
15964        "cume_dist" => {
15965            // # rows up to and including this row's peer group / n.
15966            let n = slice.len();
15967            // First pass: find peer-group-end rank for each row.
15968            for i in 0..slice.len() {
15969                let peer_end = peer_group_end(slice, i);
15970                #[allow(clippy::cast_precision_loss)]
15971                let cd = (peer_end + 1) as f64 / n as f64;
15972                let (_, _, idx) = &slice[i];
15973                out_vals[*idx] = Value::Float(cd);
15974            }
15975            Ok(())
15976        }
15977        other => Err(EngineError::Unsupported(alloc::format!(
15978            "window function {other:?} not supported (v4.21: row_number/rank/dense_rank/sum/avg/count/min/max/lag/lead/first_value/last_value/nth_value/ntile/percent_rank/cume_dist)"
15979        ))),
15980    }
15981}
15982
15983/// v4.20: resolve the user-provided frame down to a normalised
15984/// `(kind, start, end)`. `None` means default — derive from
15985/// `ordered`: ordered ⇒ RANGE UNBOUNDED PRECEDING AND CURRENT ROW,
15986/// unordered ⇒ ROWS UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING.
15987/// Single-bound shorthand (e.g. `ROWS 5 PRECEDING`) normalises
15988/// end → CURRENT ROW per the PG spec.
15989fn effective_frame(
15990    frame: Option<&WindowFrame>,
15991    ordered: bool,
15992) -> Result<(FrameKind, FrameBound, FrameBound), EngineError> {
15993    match frame {
15994        None => {
15995            if ordered {
15996                Ok((
15997                    FrameKind::Range,
15998                    FrameBound::UnboundedPreceding,
15999                    FrameBound::CurrentRow,
16000                ))
16001            } else {
16002                Ok((
16003                    FrameKind::Rows,
16004                    FrameBound::UnboundedPreceding,
16005                    FrameBound::UnboundedFollowing,
16006                ))
16007            }
16008        }
16009        Some(fr) => {
16010            let end = fr.end.clone().unwrap_or(FrameBound::CurrentRow);
16011            // Reject start > end (a few impossible combinations).
16012            if matches!(fr.start, FrameBound::UnboundedFollowing)
16013                || matches!(end, FrameBound::UnboundedPreceding)
16014            {
16015                return Err(EngineError::Unsupported(alloc::format!(
16016                    "invalid frame: start={:?} end={:?}",
16017                    fr.start,
16018                    end
16019                )));
16020            }
16021            // RANGE OFFSET PRECEDING / FOLLOWING needs value-typed
16022            // arithmetic on the ORDER BY key (e.g. `RANGE BETWEEN
16023            // INTERVAL '1 day' PRECEDING AND CURRENT ROW`). Not
16024            // implemented in v4.20.
16025            if fr.kind == FrameKind::Range
16026                && (matches!(
16027                    fr.start,
16028                    FrameBound::OffsetPreceding(_) | FrameBound::OffsetFollowing(_)
16029                ) || matches!(
16030                    end,
16031                    FrameBound::OffsetPreceding(_) | FrameBound::OffsetFollowing(_)
16032                ))
16033            {
16034                return Err(EngineError::Unsupported(
16035                    "RANGE with explicit offset bounds is not supported (v4.20: only UNBOUNDED / CURRENT ROW for RANGE)".into(),
16036                ));
16037            }
16038            Ok((fr.kind, fr.start.clone(), end))
16039        }
16040    }
16041}
16042
16043/// Compute `(lo, hi)` row-index bounds inside the partition slice
16044/// for the row at position `i`. Inclusive, clamped to
16045/// `[0, slice.len()-1]`. Empty result if `lo > hi`.
16046#[allow(clippy::type_complexity)]
16047fn frame_bounds_for_row(
16048    eff: &(FrameKind, FrameBound, FrameBound),
16049    i: usize,
16050    slice: &[(Vec<Value>, Vec<(Value, bool, Option<bool>)>, usize)],
16051) -> (usize, usize) {
16052    let (kind, start, end) = eff;
16053    let n = slice.len();
16054    let last = n.saturating_sub(1);
16055    let (mut lo, mut hi) = match kind {
16056        FrameKind::Rows => {
16057            let lo = match start {
16058                FrameBound::UnboundedPreceding => 0,
16059                FrameBound::OffsetPreceding(k) => {
16060                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
16061                    i.saturating_sub(k)
16062                }
16063                FrameBound::CurrentRow => i,
16064                FrameBound::OffsetFollowing(k) => {
16065                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
16066                    i.saturating_add(k).min(last)
16067                }
16068                FrameBound::UnboundedFollowing => last,
16069            };
16070            let hi = match end {
16071                FrameBound::UnboundedPreceding => 0,
16072                FrameBound::OffsetPreceding(k) => {
16073                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
16074                    i.saturating_sub(k)
16075                }
16076                FrameBound::CurrentRow => i,
16077                FrameBound::OffsetFollowing(k) => {
16078                    let k = usize::try_from(*k).unwrap_or(usize::MAX);
16079                    i.saturating_add(k).min(last)
16080                }
16081                FrameBound::UnboundedFollowing => last,
16082            };
16083            (lo, hi)
16084        }
16085        FrameKind::Range => {
16086            // RANGE bounds are peer-aware. With only UNBOUNDED and
16087            // CURRENT ROW supported (rejected at effective_frame for
16088            // explicit offsets), the start/end map to the
16089            // partition's full extent at the same-order-key peer
16090            // group boundary.
16091            let lo = match start {
16092                FrameBound::UnboundedPreceding => 0,
16093                FrameBound::CurrentRow => peer_group_start(slice, i),
16094                FrameBound::UnboundedFollowing => last,
16095                _ => unreachable!("offset bounds rejected for RANGE"),
16096            };
16097            let hi = match end {
16098                FrameBound::UnboundedPreceding => 0,
16099                FrameBound::CurrentRow => peer_group_end(slice, i),
16100                FrameBound::UnboundedFollowing => last,
16101                _ => unreachable!("offset bounds rejected for RANGE"),
16102            };
16103            (lo, hi)
16104        }
16105    };
16106    if hi >= n {
16107        hi = last;
16108    }
16109    if lo >= n {
16110        lo = last;
16111    }
16112    (lo, hi)
16113}
16114
16115/// Find the inclusive index of the first row with the same ORDER
16116/// BY key as `slice[i]`. Slice is already sorted by partition then
16117/// order, so peers are contiguous.
16118#[allow(clippy::type_complexity)]
16119fn peer_group_start(
16120    slice: &[(Vec<Value>, Vec<(Value, bool, Option<bool>)>, usize)],
16121    i: usize,
16122) -> usize {
16123    let key = &slice[i].1;
16124    let mut j = i;
16125    while j > 0 && order_key_cmp(&slice[j - 1].1, key) == core::cmp::Ordering::Equal {
16126        j -= 1;
16127    }
16128    j
16129}
16130
16131/// Find the inclusive index of the last row with the same ORDER
16132/// BY key as `slice[i]`.
16133#[allow(clippy::type_complexity)]
16134fn peer_group_end(
16135    slice: &[(Vec<Value>, Vec<(Value, bool, Option<bool>)>, usize)],
16136    i: usize,
16137) -> usize {
16138    let key = &slice[i].1;
16139    let mut j = i;
16140    while j + 1 < slice.len() && order_key_cmp(&slice[j + 1].1, key) == core::cmp::Ordering::Equal {
16141        j += 1;
16142    }
16143    j
16144}
16145
16146fn value_to_f64(v: &Value) -> Option<f64> {
16147    match v {
16148        Value::SmallInt(n) => Some(f64::from(*n)),
16149        Value::Int(n) => Some(f64::from(*n)),
16150        #[allow(clippy::cast_precision_loss)]
16151        Value::BigInt(n) => Some(*n as f64),
16152        Value::Float(x) => Some(*x),
16153        _ => None,
16154    }
16155}
16156
16157/// Quick scan for any subquery-bearing node in a SELECT's WHERE /
16158/// projection / `order_by` — saves cloning the AST when there are
16159/// none (the common case).
16160fn expr_tree_has_subquery(stmt: &SelectStatement) -> bool {
16161    let mut any = false;
16162    for item in &stmt.items {
16163        if let SelectItem::Expr { expr, .. } = item {
16164            any = any || expr_has_subquery(expr);
16165        }
16166    }
16167    if let Some(w) = &stmt.where_ {
16168        any = any || expr_has_subquery(w);
16169    }
16170    if let Some(h) = &stmt.having {
16171        any = any || expr_has_subquery(h);
16172    }
16173    for o in &stmt.order_by {
16174        any = any || expr_has_subquery(&o.expr);
16175    }
16176    for (_, peer) in &stmt.unions {
16177        any = any || expr_tree_has_subquery(peer);
16178    }
16179    any
16180}
16181
16182pub(crate) fn expr_has_subquery(e: &Expr) -> bool {
16183    match e {
16184        Expr::ScalarSubquery(_) | Expr::Exists { .. } | Expr::InSubquery { .. } => true,
16185        Expr::AggregateOrdered { call, order_by, .. } => {
16186            expr_has_subquery(call) || order_by.iter().any(|o| expr_has_subquery(&o.expr))
16187        }
16188        Expr::Binary { lhs, rhs, .. } => expr_has_subquery(lhs) || expr_has_subquery(rhs),
16189        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
16190            expr_has_subquery(expr)
16191        }
16192        Expr::FunctionCall { args, .. } => args.iter().any(expr_has_subquery),
16193        Expr::Like { expr, pattern, .. } => expr_has_subquery(expr) || expr_has_subquery(pattern),
16194        Expr::Extract { source, .. } => expr_has_subquery(source),
16195        Expr::WindowFunction {
16196            args,
16197            partition_by,
16198            order_by,
16199            ..
16200        } => {
16201            args.iter().any(expr_has_subquery)
16202                || partition_by.iter().any(expr_has_subquery)
16203                || order_by.iter().any(|(e, _, _)| expr_has_subquery(e))
16204        }
16205        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => false,
16206        Expr::Array(items) => items.iter().any(expr_has_subquery),
16207        Expr::ArraySubscript { target, index } => {
16208            expr_has_subquery(target) || expr_has_subquery(index)
16209        }
16210        Expr::AnyAll { expr, array, .. } => expr_has_subquery(expr) || expr_has_subquery(array),
16211        Expr::InList { expr, list, .. } => {
16212            expr_has_subquery(expr) || list.iter().any(expr_has_subquery)
16213        }
16214        Expr::Case {
16215            operand,
16216            branches,
16217            else_branch,
16218        } => {
16219            operand.as_deref().is_some_and(expr_has_subquery)
16220                || branches
16221                    .iter()
16222                    .any(|(w, t)| expr_has_subquery(w) || expr_has_subquery(t))
16223                || else_branch.as_deref().is_some_and(expr_has_subquery)
16224        }
16225    }
16226}
16227
16228/// v4.10 helper: materialise a runtime `Value` back into an AST
16229/// `Expr::Literal` for the subquery-rewrite path. Supports the
16230/// types `Literal` can represent (Integer / Float / Text / Bool /
16231/// Null). Date / Timestamp / Numeric / Vector / Interval / JSON
16232/// would lose precision through Literal and aren't supported in
16233/// uncorrelated-subquery results; they error with a clear hint.
16234fn value_to_literal_expr(v: Value) -> Result<Expr, EngineError> {
16235    let lit = match v {
16236        Value::Null => Literal::Null,
16237        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
16238        Value::Int(n) => Literal::Integer(i64::from(n)),
16239        Value::BigInt(n) => Literal::Integer(n),
16240        Value::Float(x) => Literal::Float(x),
16241        Value::Text(s) | Value::Json(s) => Literal::String(s),
16242        Value::Bool(b) => Literal::Bool(b),
16243        other => {
16244            return Err(EngineError::Unsupported(alloc::format!(
16245                "subquery result type {:?} not yet materialisable; cast to text or integer in the inner SELECT",
16246                other.data_type()
16247            )));
16248        }
16249    };
16250    Ok(Expr::Literal(lit))
16251}
16252
16253/// v7.13.0 — wider helper used by `INSERT … SELECT` (mailrs
16254/// round-5 G4). Covers the most common `Value` variants. Types
16255/// that need lossy textual round-trip (BYTEA, arrays, ts*)
16256/// surface as an Unsupported error so the caller can add a cast
16257/// in the inner SELECT.
16258fn value_to_literal_expr_permissive(v: Value) -> Result<Expr, EngineError> {
16259    let lit = match v {
16260        Value::Null => Literal::Null,
16261        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
16262        Value::Int(n) => Literal::Integer(i64::from(n)),
16263        Value::BigInt(n) => Literal::Integer(n),
16264        Value::Float(x) => Literal::Float(x),
16265        Value::Text(s) | Value::Json(s) => Literal::String(s),
16266        Value::Bool(b) => Literal::Bool(b),
16267        Value::Vector(xs) => Literal::Vector(xs),
16268        // Date / Timestamp / Timestamptz / Numeric round-trip
16269        // through a TEXT literal that `coerce_value` re-parses
16270        // against the target column type.
16271        Value::Date(days) => {
16272            let micros = (i64::from(days)) * 86_400_000_000;
16273            Literal::String(format_timestamp_micros_as_date(micros))
16274        }
16275        Value::Timestamp(us) => Literal::String(format_timestamp_micros(us)),
16276        Value::Numeric { scaled, scale } => Literal::String(format_numeric(scaled, scale)),
16277        other => {
16278            return Err(EngineError::Unsupported(alloc::format!(
16279                "INSERT … SELECT cannot materialise value of type {:?}; \
16280                 add an explicit CAST in the inner SELECT",
16281                other.data_type()
16282            )));
16283        }
16284    };
16285    Ok(Expr::Literal(lit))
16286}
16287
16288fn format_timestamp_micros(us: i64) -> String {
16289    // Same Y/M/D split used by the wire layer; epoch-relative.
16290    let days = us.div_euclid(86_400_000_000);
16291    let intra_day = us.rem_euclid(86_400_000_000);
16292    let date = format_timestamp_micros_as_date(days * 86_400_000_000);
16293    let secs = intra_day / 1_000_000;
16294    let us_rem = intra_day % 1_000_000;
16295    let h = (secs / 3600) % 24;
16296    let m = (secs / 60) % 60;
16297    let s = secs % 60;
16298    if us_rem == 0 {
16299        alloc::format!("{date} {h:02}:{m:02}:{s:02}")
16300    } else {
16301        alloc::format!("{date} {h:02}:{m:02}:{s:02}.{us_rem:06}")
16302    }
16303}
16304
16305fn format_timestamp_micros_as_date(us: i64) -> String {
16306    // Days since 1970-01-01 → calendar Y-M-D via the proleptic
16307    // Gregorian conversion used by spg-engine's date helpers.
16308    let days = us.div_euclid(86_400_000_000);
16309    // 1970-01-01 = JDN 2440588.
16310    let jdn = days + 2_440_588;
16311    let (y, mo, d) = jdn_to_ymd(jdn);
16312    alloc::format!("{y:04}-{mo:02}-{d:02}")
16313}
16314
16315fn jdn_to_ymd(jdn: i64) -> (i64, u32, u32) {
16316    // Fliegel & Van Flandern (1968) — works for all positive JDNs.
16317    let l = jdn + 68569;
16318    let n = (4 * l) / 146_097;
16319    let l = l - (146_097 * n + 3) / 4;
16320    let i = (4000 * (l + 1)) / 1_461_001;
16321    let l = l - (1461 * i) / 4 + 31;
16322    let j = (80 * l) / 2447;
16323    let day = (l - (2447 * j) / 80) as u32;
16324    let l = j / 11;
16325    let month = (j + 2 - 12 * l) as u32;
16326    let year = 100 * (n - 49) + i + l;
16327    (year, month, day)
16328}
16329
16330fn format_numeric(scaled: i128, scale: u8) -> String {
16331    if scale == 0 {
16332        return alloc::format!("{scaled}");
16333    }
16334    let abs = scaled.unsigned_abs();
16335    let divisor = 10u128.pow(u32::from(scale));
16336    let whole = abs / divisor;
16337    let frac = abs % divisor;
16338    let sign = if scaled < 0 { "-" } else { "" };
16339    alloc::format!("{sign}{whole}.{frac:0width$}", width = usize::from(scale))
16340}
16341
16342/// v6.1.1 — walk the prepared `Statement` AST and replace every
16343/// `Expr::Placeholder(n)` with `Expr::Literal(value_to_literal(
16344/// params[n-1]))`. The dispatch downstream sees a `Statement`
16345/// indistinguishable from a simple-query parse, so the exec path
16346/// stays unchanged.
16347///
16348/// Errors fall into one shape: a `$N` references past the bound
16349/// `params.len()`. Out-of-range happens when the Bind didn't
16350/// supply enough values; pgwire surfaces this as a protocol error
16351/// to the client.
16352/// v7.15.0 — rewrite every (potentially-qualified) column
16353/// identifier matching `old` to `new` in a stored SQL source
16354/// string. Used by `ALTER TABLE … RENAME COLUMN` to patch
16355/// CHECK predicate sources, partial-index predicate sources,
16356/// and runtime DEFAULT expression sources before they get
16357/// re-parsed on the next INSERT/UPDATE.
16358///
16359/// Round-trips through the parser, so the rewritten output is
16360/// the canonical Display form (matches what the engine stores
16361/// for fresh predicates). If the source doesn't parse, surfaces
16362/// the parse error — the invariant that stored predicates are
16363/// in canonical Display form means a parse failure here is a
16364/// real bug, not a user mistake to swallow.
16365fn rewrite_column_in_source(
16366    src: &str,
16367    old: &str,
16368    new: &str,
16369) -> Result<alloc::string::String, EngineError> {
16370    let mut expr = spg_sql::parser::parse_expression(src).map_err(|e| {
16371        EngineError::Unsupported(alloc::format!(
16372            "ALTER TABLE RENAME COLUMN: stored predicate source {src:?} \
16373             failed to parse for rewrite ({e})"
16374        ))
16375    })?;
16376    rewrite_column_in_expr(&mut expr, old, new);
16377    Ok(alloc::format!("{expr}"))
16378}
16379
16380/// v7.15.0 — Expr walker that swaps `Expr::Column { name: old, .. }`
16381/// for `Expr::Column { name: new, .. }`. Qualifier is preserved
16382/// (e.g. `t.old` → `t.new`); a foreign-table qualifier still
16383/// gets rewritten because the AST has no way to tell us this
16384/// predicate is on table T versus table T2 — predicate sources
16385/// in SPG are always scoped to the owning table, so any
16386/// qualifier present is either redundant or wrong.
16387fn rewrite_column_in_expr(e: &mut Expr, old: &str, new: &str) {
16388    match e {
16389        Expr::AggregateOrdered { call, order_by, .. } => {
16390            rewrite_column_in_expr(call, old, new);
16391            for o in order_by.iter_mut() {
16392                rewrite_column_in_expr(&mut o.expr, old, new);
16393            }
16394        }
16395        Expr::Column(c) => {
16396            if c.name.eq_ignore_ascii_case(old) {
16397                c.name = new.to_string();
16398            }
16399        }
16400        Expr::Binary { lhs, rhs, .. } => {
16401            rewrite_column_in_expr(lhs, old, new);
16402            rewrite_column_in_expr(rhs, old, new);
16403        }
16404        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
16405            rewrite_column_in_expr(expr, old, new);
16406        }
16407        Expr::FunctionCall { args, .. } => {
16408            for a in args {
16409                rewrite_column_in_expr(a, old, new);
16410            }
16411        }
16412        Expr::Like { expr, pattern, .. } => {
16413            rewrite_column_in_expr(expr, old, new);
16414            rewrite_column_in_expr(pattern, old, new);
16415        }
16416        Expr::Extract { source, .. } => rewrite_column_in_expr(source, old, new),
16417        Expr::WindowFunction {
16418            args,
16419            partition_by,
16420            order_by,
16421            ..
16422        } => {
16423            for a in args {
16424                rewrite_column_in_expr(a, old, new);
16425            }
16426            for p in partition_by {
16427                rewrite_column_in_expr(p, old, new);
16428            }
16429            for (o, _, _) in order_by {
16430                rewrite_column_in_expr(o, old, new);
16431            }
16432        }
16433        Expr::Array(items) => {
16434            for elem in items {
16435                rewrite_column_in_expr(elem, old, new);
16436            }
16437        }
16438        Expr::ArraySubscript { target, index } => {
16439            rewrite_column_in_expr(target, old, new);
16440            rewrite_column_in_expr(index, old, new);
16441        }
16442        Expr::AnyAll { expr, array, .. } => {
16443            rewrite_column_in_expr(expr, old, new);
16444            rewrite_column_in_expr(array, old, new);
16445        }
16446        Expr::InList { expr, list, .. } => {
16447            rewrite_column_in_expr(expr, old, new);
16448            for item in list {
16449                rewrite_column_in_expr(item, old, new);
16450            }
16451        }
16452        Expr::Case {
16453            operand,
16454            branches,
16455            else_branch,
16456        } => {
16457            if let Some(o) = operand {
16458                rewrite_column_in_expr(o, old, new);
16459            }
16460            for (w, t) in branches {
16461                rewrite_column_in_expr(w, old, new);
16462                rewrite_column_in_expr(t, old, new);
16463            }
16464            if let Some(e) = else_branch {
16465                rewrite_column_in_expr(e, old, new);
16466            }
16467        }
16468        // Stored predicate sources never contain subqueries —
16469        // CHECK / partial-index / runtime_default are all scalar.
16470        // If a future feature changes that, recurse here.
16471        Expr::ScalarSubquery(_) | Expr::Exists { .. } | Expr::InSubquery { .. } => {}
16472        Expr::Literal(_) | Expr::Placeholder(_) => {}
16473    }
16474}
16475
16476/// v7.16.0 — walks a parsed statement and replaces every
16477/// `Expr::Placeholder(N)` with the corresponding `params[N-1]`
16478/// re-encoded as an `Expr::Literal`. Used internally by
16479/// `Engine::execute_prepared` AND surfaced for the spg-embedded
16480/// WAL path (which needs the bind-final AST so replay sees a
16481/// simple-query-shaped statement, not a `$1`-shaped one). Errors
16482/// when a placeholder references an index past the params slice.
16483pub fn substitute_placeholders(stmt: &mut Statement, params: &[Value]) -> Result<(), EngineError> {
16484    match stmt {
16485        Statement::Select(s) => substitute_select(s, params)?,
16486        Statement::Insert(ins) => {
16487            for row in &mut ins.rows {
16488                for e in row {
16489                    substitute_expr(e, params)?;
16490                }
16491            }
16492            // ON CONFLICT DO UPDATE assignments / WHERE can carry
16493            // placeholders too (`… DO UPDATE SET reason = $2` —
16494            // mailrs embed round-12).
16495            if let Some(clause) = &mut ins.on_conflict
16496                && let spg_sql::ast::OnConflictAction::Update {
16497                    assignments,
16498                    where_,
16499                } = &mut clause.action
16500            {
16501                for (_, e) in assignments.iter_mut() {
16502                    substitute_expr(e, params)?;
16503                }
16504                if let Some(w) = where_ {
16505                    substitute_expr(w, params)?;
16506                }
16507            }
16508        }
16509        Statement::Update(u) => {
16510            for (_, e) in &mut u.assignments {
16511                substitute_expr(e, params)?;
16512            }
16513            if let Some(w) = &mut u.where_ {
16514                substitute_expr(w, params)?;
16515            }
16516        }
16517        Statement::Delete(d) => {
16518            if let Some(w) = &mut d.where_ {
16519                substitute_expr(w, params)?;
16520            }
16521        }
16522        Statement::Explain(e) => substitute_select(&mut e.inner, params)?,
16523        // Other statements (CREATE / BEGIN / SHOW / …) have no
16524        // expression slots; no walk needed.
16525        _ => {}
16526    }
16527    Ok(())
16528}
16529
16530/// v7.25.1 (mailrs round-18) — THE canonical mutable traversal of
16531/// every expression slot in a SelectStatement, including every
16532/// nested SelectStatement (CTE bodies, UNION peers, LATERAL derived
16533/// tables) and the JOIN ON conditions. Round-12 #7b and round-18
16534/// were both "a hand-rolled Select walker forgot one subtree";
16535/// every whole-statement rewrite pass (placeholders, clock) must go
16536/// through here so a new AST slot only needs adding once.
16537/// Expression-INTERNAL recursion (into subquery nodes inside an
16538/// Expr) stays the visitor's own responsibility.
16539pub(crate) fn walk_select_exprs_mut(
16540    s: &mut SelectStatement,
16541    f: &mut impl FnMut(&mut Expr) -> Result<(), EngineError>,
16542) -> Result<(), EngineError> {
16543    for cte in &mut s.ctes {
16544        walk_select_exprs_mut(&mut cte.body, f)?;
16545    }
16546    for item in &mut s.items {
16547        if let SelectItem::Expr { expr, .. } = item {
16548            f(expr)?;
16549        }
16550    }
16551    if let Some(from) = &mut s.from {
16552        if let Some(sub) = &mut from.primary.lateral_subquery {
16553            walk_select_exprs_mut(sub, f)?;
16554        }
16555        for j in &mut from.joins {
16556            if let Some(sub) = &mut j.table.lateral_subquery {
16557                walk_select_exprs_mut(sub, f)?;
16558            }
16559            if let Some(on) = &mut j.on {
16560                f(on)?;
16561            }
16562        }
16563    }
16564    if let Some(w) = &mut s.where_ {
16565        f(w)?;
16566    }
16567    if let Some(gs) = &mut s.group_by {
16568        for g in gs {
16569            f(g)?;
16570        }
16571    }
16572    if let Some(h) = &mut s.having {
16573        f(h)?;
16574    }
16575    for o in &mut s.order_by {
16576        f(&mut o.expr)?;
16577    }
16578    for (_, peer) in &mut s.unions {
16579        walk_select_exprs_mut(peer, f)?;
16580    }
16581    Ok(())
16582}
16583
16584fn substitute_select(s: &mut SelectStatement, params: &[Value]) -> Result<(), EngineError> {
16585    walk_select_exprs_mut(s, &mut |e| substitute_expr(e, params))?;
16586    // v7.25.1 — LIMIT/OFFSET placeholders inside CTE bodies and
16587    // UNION peers resolve through their own recursion (the walker
16588    // above only visits Expr slots), so handle them per nested
16589    // statement here.
16590    for cte in &mut s.ctes {
16591        resolve_limit_offset_placeholders(&mut cte.body, params)?;
16592    }
16593    for (_, peer) in &mut s.unions {
16594        resolve_limit_offset_placeholders(peer, params)?;
16595    }
16596    // v7.9.24 — LIMIT $N / OFFSET $N placeholder resolution.
16597    // mailrs H2. After this pass each LIMIT/OFFSET that was a
16598    // Placeholder is rewritten to Literal so the existing
16599    // `LimitExpr::as_literal` path consumes a concrete u32.
16600    if let Some(le) = s.limit {
16601        s.limit = Some(resolve_limit_placeholder(le, params)?);
16602    }
16603    if let Some(le) = s.offset {
16604        s.offset = Some(resolve_limit_placeholder(le, params)?);
16605    }
16606    Ok(())
16607}
16608
16609/// v7.25.1 — recursive LIMIT/OFFSET placeholder resolution for
16610/// nested statements (CTE bodies / UNION peers).
16611fn resolve_limit_offset_placeholders(
16612    s: &mut SelectStatement,
16613    params: &[Value],
16614) -> Result<(), EngineError> {
16615    if let Some(le) = s.limit {
16616        s.limit = Some(resolve_limit_placeholder(le, params)?);
16617    }
16618    if let Some(le) = s.offset {
16619        s.offset = Some(resolve_limit_placeholder(le, params)?);
16620    }
16621    for cte in &mut s.ctes {
16622        resolve_limit_offset_placeholders(&mut cte.body, params)?;
16623    }
16624    for (_, peer) in &mut s.unions {
16625        resolve_limit_offset_placeholders(peer, params)?;
16626    }
16627    Ok(())
16628}
16629
16630fn resolve_limit_placeholder(
16631    le: spg_sql::ast::LimitExpr,
16632    params: &[Value],
16633) -> Result<spg_sql::ast::LimitExpr, EngineError> {
16634    use spg_sql::ast::LimitExpr;
16635    match le {
16636        LimitExpr::Literal(_) => Ok(le),
16637        LimitExpr::Placeholder(n) => {
16638            let idx = usize::from(n).saturating_sub(1);
16639            let v = params.get(idx).ok_or_else(|| {
16640                EngineError::Eval(EvalError::PlaceholderOutOfRange {
16641                    n,
16642                    bound: u16::try_from(params.len()).unwrap_or(u16::MAX),
16643                })
16644            })?;
16645            let int = match v {
16646                Value::SmallInt(x) => Some(i64::from(*x)),
16647                Value::Int(x) => Some(i64::from(*x)),
16648                Value::BigInt(x) => Some(*x),
16649                _ => None,
16650            }
16651            .ok_or_else(|| {
16652                EngineError::Unsupported(alloc::format!(
16653                    "LIMIT/OFFSET ${n} bound to non-integer {v:?}"
16654                ))
16655            })?;
16656            if int < 0 {
16657                return Err(EngineError::Unsupported(alloc::format!(
16658                    "LIMIT/OFFSET ${n} bound to negative value {int}"
16659                )));
16660            }
16661            let bounded = u32::try_from(int).map_err(|_| {
16662                EngineError::Unsupported(alloc::format!(
16663                    "LIMIT/OFFSET ${n} value {int} exceeds u32 range"
16664                ))
16665            })?;
16666            Ok(LimitExpr::Literal(bounded))
16667        }
16668    }
16669}
16670
16671fn substitute_expr(e: &mut Expr, params: &[Value]) -> Result<(), EngineError> {
16672    if let Expr::Placeholder(n) = e {
16673        let idx = usize::from(*n).saturating_sub(1);
16674        let v = params.get(idx).ok_or_else(|| {
16675            EngineError::Eval(EvalError::PlaceholderOutOfRange {
16676                n: *n,
16677                bound: u16::try_from(params.len()).unwrap_or(u16::MAX),
16678            })
16679        })?;
16680        *e = Expr::Literal(value_to_literal(v.clone()));
16681        return Ok(());
16682    }
16683    match e {
16684        Expr::AggregateOrdered { call, order_by, .. } => {
16685            substitute_expr(call, params)?;
16686            for o in order_by.iter_mut() {
16687                substitute_expr(&mut o.expr, params)?;
16688            }
16689        }
16690        Expr::Binary { lhs, rhs, .. } => {
16691            substitute_expr(lhs, params)?;
16692            substitute_expr(rhs, params)?;
16693        }
16694        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
16695            substitute_expr(expr, params)?;
16696        }
16697        Expr::FunctionCall { args, .. } => {
16698            for a in args {
16699                substitute_expr(a, params)?;
16700            }
16701        }
16702        Expr::Like { expr, pattern, .. } => {
16703            substitute_expr(expr, params)?;
16704            substitute_expr(pattern, params)?;
16705        }
16706        Expr::Extract { source, .. } => substitute_expr(source, params)?,
16707        Expr::ScalarSubquery(s) => substitute_select(s, params)?,
16708        Expr::Exists { subquery, .. } => substitute_select(subquery, params)?,
16709        Expr::InSubquery { expr, subquery, .. } => {
16710            substitute_expr(expr, params)?;
16711            substitute_select(subquery, params)?;
16712        }
16713        Expr::WindowFunction {
16714            args,
16715            partition_by,
16716            order_by,
16717            ..
16718        } => {
16719            for a in args {
16720                substitute_expr(a, params)?;
16721            }
16722            for p in partition_by {
16723                substitute_expr(p, params)?;
16724            }
16725            for (e, _, _) in order_by {
16726                substitute_expr(e, params)?;
16727            }
16728        }
16729        Expr::Literal(_) | Expr::Column(_) => {}
16730        // Already handled above.
16731        Expr::Placeholder(_) => unreachable!("Placeholder handled at top of fn"),
16732        Expr::Array(items) => {
16733            for elem in items {
16734                substitute_expr(elem, params)?;
16735            }
16736        }
16737        Expr::ArraySubscript { target, index } => {
16738            substitute_expr(target, params)?;
16739            substitute_expr(index, params)?;
16740        }
16741        Expr::AnyAll { expr, array, .. } => {
16742            substitute_expr(expr, params)?;
16743            substitute_expr(array, params)?;
16744        }
16745        Expr::InList { expr, list, .. } => {
16746            substitute_expr(expr, params)?;
16747            for item in list {
16748                substitute_expr(item, params)?;
16749            }
16750        }
16751        Expr::Case {
16752            operand,
16753            branches,
16754            else_branch,
16755        } => {
16756            if let Some(o) = operand {
16757                substitute_expr(o, params)?;
16758            }
16759            for (w, t) in branches {
16760                substitute_expr(w, params)?;
16761                substitute_expr(t, params)?;
16762            }
16763            if let Some(e) = else_branch {
16764                substitute_expr(e, params)?;
16765            }
16766        }
16767    }
16768    Ok(())
16769}
16770
16771/// v6.1.1 — convert a runtime `Value` into the closest matching
16772/// `Literal` for the substitute walker. Lossless for the simple
16773/// scalars (Int / Float / Text / Bool); Numeric / Date / Timestamp
16774/// / Json / Interval render as their canonical text form so the
16775/// downstream coerce_value can re-parse against the target column
16776/// type. SQ8 / HalfVector cells are NOT expected as bind params;
16777/// pgwire's Bind decodes vector params to the f32 representation
16778/// before they reach this helper.
16779/// v6.2.0 — total ordering on `Value`s used by ANALYZE to sort a
16780/// column's non-NULL sample before histogram building. Cross-type
16781/// pairs (Int vs Float, Date vs Timestamp, …) compare via the
16782/// same widening the eval-side `compare` operator uses; everything
16783/// else (the genuinely-incompatible pairs) falls back to ordering
16784/// by canonical string form so the sort is still total + stable.
16785/// Vector / SQ8 / Half / Json / Numeric / Interval values reach
16786/// here only via the string-fallback path because vector columns
16787/// are filtered out upstream.
16788fn sort_values_for_histogram(a: &Value, b: &Value) -> core::cmp::Ordering {
16789    use core::cmp::Ordering;
16790    match (a, b) {
16791        (Value::SmallInt(a), Value::SmallInt(b)) => a.cmp(b),
16792        (Value::Int(a), Value::Int(b)) => a.cmp(b),
16793        (Value::BigInt(a), Value::BigInt(b)) => a.cmp(b),
16794        (Value::SmallInt(a), Value::Int(b)) => i32::from(*a).cmp(b),
16795        (Value::Int(a), Value::SmallInt(b)) => a.cmp(&i32::from(*b)),
16796        (Value::Int(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
16797        (Value::BigInt(a), Value::Int(b)) => a.cmp(&i64::from(*b)),
16798        (Value::SmallInt(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
16799        (Value::BigInt(a), Value::SmallInt(b)) => a.cmp(&i64::from(*b)),
16800        (Value::Float(a), Value::Float(b)) => a.partial_cmp(b).unwrap_or(Ordering::Equal),
16801        (Value::Text(a), Value::Text(b)) | (Value::Json(a), Value::Json(b)) => a.cmp(b),
16802        (Value::Bool(a), Value::Bool(b)) => a.cmp(b),
16803        (Value::Date(a), Value::Date(b)) => a.cmp(b),
16804        (Value::Timestamp(a), Value::Timestamp(b)) => a.cmp(b),
16805        // Mixed numeric/float — widen to f64 and compare.
16806        (Value::SmallInt(n), Value::Float(x)) => {
16807            (f64::from(*n)).partial_cmp(x).unwrap_or(Ordering::Equal)
16808        }
16809        (Value::Float(x), Value::SmallInt(n)) => {
16810            x.partial_cmp(&f64::from(*n)).unwrap_or(Ordering::Equal)
16811        }
16812        (Value::Int(n), Value::Float(x)) => {
16813            (f64::from(*n)).partial_cmp(x).unwrap_or(Ordering::Equal)
16814        }
16815        (Value::Float(x), Value::Int(n)) => {
16816            x.partial_cmp(&f64::from(*n)).unwrap_or(Ordering::Equal)
16817        }
16818        (Value::BigInt(n), Value::Float(x)) => {
16819            #[allow(clippy::cast_precision_loss)]
16820            let nf = *n as f64;
16821            nf.partial_cmp(x).unwrap_or(Ordering::Equal)
16822        }
16823        (Value::Float(x), Value::BigInt(n)) => {
16824            #[allow(clippy::cast_precision_loss)]
16825            let nf = *n as f64;
16826            x.partial_cmp(&nf).unwrap_or(Ordering::Equal)
16827        }
16828        // Cross-type fallback: lexicographic on canonical form.
16829        // Total + stable so the sort is well-defined.
16830        _ => canonical_value_repr(a).cmp(&canonical_value_repr(b)),
16831    }
16832}
16833
16834/// v6.2.0 — render the histogram bounds list as a `[v0, v1, ...]`
16835/// string for the `spg_statistic.histogram_bounds` column. Values
16836/// containing `,` or `[` / `]` are JSON-style escaped so the
16837/// rendering round-trips through a future parser; v6.2.0 only
16838/// uses the rendered form for human consumption, so the escaping
16839/// is conservative.
16840fn render_histogram_bounds(bounds: &[alloc::string::String]) -> alloc::string::String {
16841    let mut out = alloc::string::String::with_capacity(bounds.len() * 8 + 2);
16842    out.push('[');
16843    for (i, b) in bounds.iter().enumerate() {
16844        if i > 0 {
16845            out.push_str(", ");
16846        }
16847        let needs_quote = b.contains([',', '[', ']', '"']) || b.is_empty();
16848        if needs_quote {
16849            out.push('"');
16850            for ch in b.chars() {
16851                if ch == '"' || ch == '\\' {
16852                    out.push('\\');
16853                }
16854                out.push(ch);
16855            }
16856            out.push('"');
16857        } else {
16858            out.push_str(b);
16859        }
16860    }
16861    out.push(']');
16862    out
16863}
16864
16865/// v6.2.0 — canonical textual form of a `Value` for histogram
16866/// bound storage. Strings used by ANALYZE for sort + bound output.
16867/// INT / BIGINT → decimal; FLOAT → shortest-round-trip via
16868/// `{:?}`; TEXT pass-through; BOOL → `t` / `f`; DATE / TIMESTAMP →
16869/// the same form `format_date` / `format_timestamp` produce for
16870/// SQL Display. Vector / SQ8 / Half / Json / Numeric / Interval
16871/// reach this only via a non-Vector column (vector columns are
16872/// skipped upstream); they fall back to a Debug-derived form so
16873/// stats still serialise without crashing.
16874pub(crate) fn canonical_value_repr(v: &Value) -> alloc::string::String {
16875    match v {
16876        Value::Null => "NULL".to_string(),
16877        Value::SmallInt(n) => alloc::format!("{n}"),
16878        Value::Int(n) => alloc::format!("{n}"),
16879        Value::BigInt(n) => alloc::format!("{n}"),
16880        Value::Float(x) => alloc::format!("{x:?}"),
16881        Value::Text(s) | Value::Json(s) => s.clone(),
16882        Value::Bool(b) => if *b { "t" } else { "f" }.to_string(),
16883        Value::Date(d) => eval::format_date(*d),
16884        Value::Timestamp(t) => eval::format_timestamp(*t),
16885        // v7.17.0 Phase 3.P0-32 — PG TIME canonical text form.
16886        Value::Time(us) => eval::format_time(*us),
16887        // v7.17.0 Phase 3.P0-33 — MySQL YEAR 4-digit zero-padded.
16888        Value::Year(y) => alloc::format!("{y:04}"),
16889        // v7.17.0 Phase 3.P0-34 — PG TIMETZ canonical text form.
16890        Value::TimeTz { us, offset_secs } => eval::format_timetz(*us, *offset_secs),
16891        // v7.17.0 Phase 3.P0-35 — PG MONEY canonical en_US text form.
16892        Value::Money(c) => eval::format_money(*c),
16893        // v7.17.0 Phase 3.P0-38 — PG range canonical text form.
16894        v @ Value::Range { .. } => format_range_str(v),
16895        // v7.17.0 Phase 3.P0-39 — PG hstore canonical text form.
16896        Value::Hstore(pairs) => format_hstore_str(pairs),
16897        // v7.17.0 Phase 3.P0-40 — 2D array canonical text form.
16898        Value::IntArray2D(rows) => format_int_2d_text(rows),
16899        Value::BigIntArray2D(rows) => format_bigint_2d_text(rows),
16900        Value::TextArray2D(rows) => format_text_2d_text(rows),
16901        Value::Interval { months, micros } => eval::format_interval(*months, *micros),
16902        Value::Numeric { scaled, scale } => eval::format_numeric(*scaled, *scale),
16903        Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => {
16904            // Unreachable in practice (vector columns are filtered
16905            // out before this). Defensive fallback so a future
16906            // vector-stats path doesn't crash.
16907            alloc::format!("{v:?}")
16908        }
16909        // v7.5.0 — Value is #[non_exhaustive] for downstream
16910        // forward-compat. Future variants fall through to Debug
16911        // form here (same shape as the vector fallback above).
16912        _ => alloc::format!("{v:?}"),
16913    }
16914}
16915
16916/// v6.2.0 — true for engine-managed catalog tables that the bare
16917/// `ANALYZE` (no target) should skip. v6.2.0 has no internal
16918/// tables yet (publications / subscriptions / users / statistics
16919/// all live as engine fields, not catalog tables), so this is a
16920/// reserved future-proofing hook — every existing user table is
16921/// analysed.
16922const fn is_internal_table_name(_name: &str) -> bool {
16923    false
16924}
16925
16926fn value_to_literal(v: Value) -> Literal {
16927    match v {
16928        Value::Null => Literal::Null,
16929        Value::SmallInt(n) => Literal::Integer(i64::from(n)),
16930        Value::Int(n) => Literal::Integer(i64::from(n)),
16931        Value::BigInt(n) => Literal::Integer(n),
16932        Value::Float(x) => Literal::Float(x),
16933        Value::Text(s) | Value::Json(s) => Literal::String(s),
16934        Value::Bool(b) => Literal::Bool(b),
16935        Value::Vector(v) => Literal::Vector(v),
16936        Value::Numeric { scaled, scale } => Literal::String(eval::format_numeric(scaled, scale)),
16937        Value::Date(d) => Literal::String(eval::format_date(d)),
16938        Value::Timestamp(t) => Literal::String(eval::format_timestamp(t)),
16939        // v7.17.0 Phase 3.P0-69 — UUID round-trips via canonical
16940        // hyphenated text. Without this arm the fallback below
16941        // renders `Debug` form ("Uuid([85, …])") which the
16942        // engine's Text → Uuid coerce can't parse, breaking
16943        // prepared-bind round-trip from the spg-sqlx adapter.
16944        Value::Uuid(b) => Literal::String(spg_storage::format_uuid(&b)),
16945        // v7.16.0 — BYTEA round-trip for the spg-sqlx Bind path.
16946        // PG-canonical text rep is `\x` + lowercase hex; the
16947        // engine's coerce_value already accepts that on the
16948        // text → bytea direction.
16949        Value::Bytes(b) => Literal::String(eval::format_bytea_hex(&b)),
16950        // Arrays ride the AST natively (mailrs embed round-12) —
16951        // the prior `{a,b,c}` text form only worked where a column
16952        // type drove the re-parse; `= ANY($1)` has no column
16953        // context and saw a bare Text value.
16954        Value::TextArray(items) => Literal::TextArray(items),
16955        Value::IntArray(items) => Literal::IntArray(items),
16956        Value::BigIntArray(items) => Literal::BigIntArray(items),
16957        Value::Interval { months, micros } => Literal::Interval {
16958            months,
16959            micros,
16960            text: eval::format_interval(months, micros),
16961        },
16962        // SQ8 / halfvec cells dequantise to f32 before reaching the
16963        // substitute walker; pgwire's Bind path handles that.
16964        Value::Sq8Vector(q) => Literal::Vector(spg_storage::quantize::dequantize(&q)),
16965        Value::HalfVector(h) => Literal::Vector(h.to_f32_vec()),
16966        // v7.5.0 — Value is #[non_exhaustive]; future variants
16967        // render as Debug-form String literal until explicit
16968        // mapping is added.
16969        v => Literal::String(alloc::format!("{v:?}")),
16970    }
16971}
16972
16973fn rewrite_clock_calls(stmt: &mut Statement, now_micros: Option<i64>) {
16974    let Some(now) = now_micros else {
16975        return;
16976    };
16977    match stmt {
16978        Statement::Select(s) => rewrite_select_clock(s, now),
16979        Statement::Insert(ins) => {
16980            for row in &mut ins.rows {
16981                for e in row {
16982                    rewrite_expr_clock(e, now);
16983                }
16984            }
16985            // `ON CONFLICT … DO UPDATE SET created_at = NOW()` —
16986            // the upsert assignments carry clock calls too (mailrs
16987            // embed round-12).
16988            if let Some(clause) = &mut ins.on_conflict
16989                && let spg_sql::ast::OnConflictAction::Update {
16990                    assignments,
16991                    where_,
16992                } = &mut clause.action
16993            {
16994                for (_, e) in assignments.iter_mut() {
16995                    rewrite_expr_clock(e, now);
16996                }
16997                if let Some(w) = where_ {
16998                    rewrite_expr_clock(w, now);
16999                }
17000            }
17001        }
17002        // `UPDATE … SET seen_at = NOW() WHERE …` / `DELETE … WHERE
17003        // ts < NOW()` (mailrs embed round-12 — previously only
17004        // SELECT / INSERT-rows were walked).
17005        Statement::Update(u) => {
17006            for (_, e) in &mut u.assignments {
17007                rewrite_expr_clock(e, now);
17008            }
17009            if let Some(w) = &mut u.where_ {
17010                rewrite_expr_clock(w, now);
17011            }
17012        }
17013        Statement::Delete(d) => {
17014            if let Some(w) = &mut d.where_ {
17015                rewrite_expr_clock(w, now);
17016            }
17017        }
17018        _ => {}
17019    }
17020}
17021
17022fn rewrite_select_clock(s: &mut SelectStatement, now: i64) {
17023    // v7.25.1 (round-18) — shared traversal: CTE bodies, LATERAL
17024    // subqueries, JOIN ON, and UNION peers all get the clock
17025    // rewrite (NOW() inside a CTE previously survived to eval as
17026    // "unknown function `now`").
17027    let _ = walk_select_exprs_mut(s, &mut |e| {
17028        rewrite_expr_clock(e, now);
17029        Ok(())
17030    });
17031}
17032
17033/// v3.0.3 hot path: every recursion lands in exactly one `match` arm.
17034/// Literal / Column-with-qualifier (the dominant cases on a typical
17035/// AST) take a single pattern dispatch and exit. The clock-rewrite
17036/// targets (zero-arg `NOW` / `CURRENT_TIMESTAMP` / `CURRENT_DATE`
17037/// functions, and bare `CURRENT_TIMESTAMP` / `CURRENT_DATE` column
17038/// refs) sit on their own arms with match guards so the fall-through
17039/// to the recursive arms is unambiguous.
17040fn rewrite_expr_clock(e: &mut Expr, now: i64) {
17041    // Fast-path test on the no-recursion shapes first. We can't fold
17042    // them into the big match below because they need to *replace* `e`
17043    // outright; the recursive arms below match on its sub-fields.
17044    if let Some(replacement) = clock_replacement_for(e, now) {
17045        *e = replacement;
17046        return;
17047    }
17048    match e {
17049        Expr::AggregateOrdered { call, order_by, .. } => {
17050            rewrite_expr_clock(call, now);
17051            for o in order_by.iter_mut() {
17052                rewrite_expr_clock(&mut o.expr, now);
17053            }
17054        }
17055        Expr::Binary { lhs, rhs, .. } => {
17056            rewrite_expr_clock(lhs, now);
17057            rewrite_expr_clock(rhs, now);
17058        }
17059        Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
17060            rewrite_expr_clock(expr, now);
17061        }
17062        Expr::FunctionCall { args, .. } => {
17063            for a in args {
17064                rewrite_expr_clock(a, now);
17065            }
17066        }
17067        Expr::Like { expr, pattern, .. } => {
17068            rewrite_expr_clock(expr, now);
17069            rewrite_expr_clock(pattern, now);
17070        }
17071        Expr::Extract { source, .. } => rewrite_expr_clock(source, now),
17072        // v4.10 subquery nodes — recurse into the inner SELECT's
17073        // expression slots so e.g. SELECT NOW() in a scalar
17074        // subquery picks up the same instant as the outer query.
17075        Expr::ScalarSubquery(s) => rewrite_select_clock(s, now),
17076        Expr::Exists { subquery, .. } => rewrite_select_clock(subquery, now),
17077        Expr::InSubquery { expr, subquery, .. } => {
17078            rewrite_expr_clock(expr, now);
17079            rewrite_select_clock(subquery, now);
17080        }
17081        // v4.12 window functions — args + PARTITION BY + ORDER BY
17082        // may all reference clock literals.
17083        Expr::WindowFunction {
17084            args,
17085            partition_by,
17086            order_by,
17087            ..
17088        } => {
17089            for a in args {
17090                rewrite_expr_clock(a, now);
17091            }
17092            for p in partition_by {
17093                rewrite_expr_clock(p, now);
17094            }
17095            for (e, _, _) in order_by {
17096                rewrite_expr_clock(e, now);
17097            }
17098        }
17099        Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
17100        Expr::Array(items) => {
17101            for elem in items {
17102                rewrite_expr_clock(elem, now);
17103            }
17104        }
17105        Expr::ArraySubscript { target, index } => {
17106            rewrite_expr_clock(target, now);
17107            rewrite_expr_clock(index, now);
17108        }
17109        Expr::AnyAll { expr, array, .. } => {
17110            rewrite_expr_clock(expr, now);
17111            rewrite_expr_clock(array, now);
17112        }
17113        Expr::InList { expr, list, .. } => {
17114            rewrite_expr_clock(expr, now);
17115            for item in list {
17116                rewrite_expr_clock(item, now);
17117            }
17118        }
17119        Expr::Case {
17120            operand,
17121            branches,
17122            else_branch,
17123        } => {
17124            if let Some(o) = operand {
17125                rewrite_expr_clock(o, now);
17126            }
17127            for (w, t) in branches {
17128                rewrite_expr_clock(w, now);
17129                rewrite_expr_clock(t, now);
17130            }
17131            if let Some(e) = else_branch {
17132                rewrite_expr_clock(e, now);
17133            }
17134        }
17135    }
17136}
17137
17138/// Returns `Some(Expr)` when `e` is one of the clock-call shapes that
17139/// must be rewritten; otherwise `None` so the caller falls through to
17140/// the recursive walk. Identifies both function-call forms (`NOW()` /
17141/// `CURRENT_TIMESTAMP()` / `CURRENT_DATE()`) and bare-identifier forms
17142/// (`CURRENT_TIMESTAMP` / `CURRENT_DATE` as unqualified column refs,
17143/// which is how PG accepts them without parens).
17144fn clock_replacement_for(e: &Expr, now: i64) -> Option<Expr> {
17145    let (kind, name) = match e {
17146        Expr::FunctionCall { name, args } if args.is_empty() => (ClockSite::Fn, name.as_str()),
17147        Expr::Column(c) if c.qualifier.is_none() => (ClockSite::BareIdent, c.name.as_str()),
17148        _ => return None,
17149    };
17150    // ASCII case-insensitive name match. Each entry decides what
17151    // synthetic literal the call expands to.
17152    //
17153    // v7.17.0 Phase 3.P0-29 — `unix_timestamp` (no args) joins this
17154    // table as MySQL's epoch-seconds equivalent of `now()`. Folded
17155    // to a BigInt literal here so apply_function never needs a
17156    // clock dependency.
17157    enum ClockShape {
17158        Timestamp,
17159        Date,
17160        UnixSeconds,
17161    }
17162    let shape = match name.len() {
17163        3 if kind == ClockSite::Fn && name.eq_ignore_ascii_case("now") => {
17164            Some(ClockShape::Timestamp)
17165        }
17166        12 if name.eq_ignore_ascii_case("current_date") => Some(ClockShape::Date),
17167        14 if kind == ClockSite::Fn && name.eq_ignore_ascii_case("unix_timestamp") => {
17168            Some(ClockShape::UnixSeconds)
17169        }
17170        17 if name.eq_ignore_ascii_case("current_timestamp") => Some(ClockShape::Timestamp),
17171        _ => None,
17172    };
17173    let shape = shape?;
17174    let payload = match shape {
17175        ClockShape::Timestamp => now,
17176        ClockShape::Date => now.div_euclid(86_400_000_000),
17177        ClockShape::UnixSeconds => now.div_euclid(1_000_000),
17178    };
17179    let target = match shape {
17180        ClockShape::Timestamp => spg_sql::ast::CastTarget::Timestamp,
17181        ClockShape::Date => spg_sql::ast::CastTarget::Date,
17182        ClockShape::UnixSeconds => spg_sql::ast::CastTarget::BigInt,
17183    };
17184    Some(Expr::Cast {
17185        expr: alloc::boxed::Box::new(Expr::Literal(spg_sql::ast::Literal::Integer(payload))),
17186        target,
17187    })
17188}
17189
17190#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17191enum ClockSite {
17192    Fn,
17193    BareIdent,
17194}
17195
17196/// `ORDER BY <integer>` references the N-th SELECT item (1-based).
17197/// Swap the integer literal for the matching item's expression so the
17198/// executor doesn't need a special-case branch. Recurses into UNION
17199/// peers because each peer keeps its own SELECT list.
17200/// v6.4.1 — expand `GROUP BY ALL` to every non-aggregate SELECT-list
17201/// item. Mirrors DuckDB / PG 19 semantics. Wildcards (`SELECT * …`)
17202/// are NOT expanded by GROUP BY ALL (PG 19 leaves the wildcard intact
17203/// and groups by whatever explicit non-aggregates remain — none in
17204/// the wildcard-only case, which still works for non-aggregate
17205/// queries).
17206fn expand_group_by_all(s: &mut SelectStatement) {
17207    if !s.group_by_all {
17208        for (_, peer) in &mut s.unions {
17209            expand_group_by_all(peer);
17210        }
17211        return;
17212    }
17213    let mut groups: Vec<Expr> = Vec::new();
17214    for item in &s.items {
17215        if let SelectItem::Expr { expr, .. } = item
17216            && !aggregate::contains_aggregate(expr)
17217        {
17218            groups.push(expr.clone());
17219        }
17220    }
17221    s.group_by = Some(groups);
17222    s.group_by_all = false;
17223    for (_, peer) in &mut s.unions {
17224        expand_group_by_all(peer);
17225    }
17226}
17227
17228fn resolve_order_by_position(s: &mut SelectStatement) {
17229    // v6.4.0 — iterate every ORDER BY key. Position references
17230    // (`ORDER BY 2`) bind to the 1-based projection index;
17231    // identifier references that match a SELECT-list alias bind to
17232    // the projected expression (Step 4 of L3a).
17233    for order in &mut s.order_by {
17234        match &order.expr {
17235            Expr::Literal(Literal::Integer(n)) if *n >= 1 => {
17236                if let Ok(idx_one_based) = usize::try_from(*n) {
17237                    let idx = idx_one_based - 1;
17238                    if idx < s.items.len()
17239                        && let SelectItem::Expr { expr, .. } = &s.items[idx]
17240                    {
17241                        order.expr = expr.clone();
17242                    }
17243                }
17244            }
17245            Expr::Column(c) if c.qualifier.is_none() => {
17246                // Alias-in-ORDER-BY lookup.
17247                for item in &s.items {
17248                    if let SelectItem::Expr {
17249                        expr,
17250                        alias: Some(a),
17251                    } = item
17252                        && a == &c.name
17253                    {
17254                        order.expr = expr.clone();
17255                        break;
17256                    }
17257                }
17258            }
17259            _ => {}
17260        }
17261    }
17262    for (_, peer) in &mut s.unions {
17263        resolve_order_by_position(peer);
17264    }
17265}
17266
17267/// Sort `tagged` by `f64` key, reversing the comparator under DESC.
17268/// Used by the UNION ORDER BY path; per-block paths inline the same
17269/// comparator because they already hold `&OrderBy` directly.
17270/// v3.1.1: partial-sort helper. When `keep` (= offset + limit) is
17271/// strictly less than `tagged.len()`, run `select_nth_unstable_by` to
17272/// partition the prefix in O(n), then sort just that prefix in O(k
17273/// log k). Total O(n + k log k), vs O(n log n) for a full sort. The
17274/// caller decides what `keep` is; passing `None` (no LIMIT) keeps the
17275/// full-sort behaviour.
17276///
17277/// `tagged` holds `(Option<f64>, Row)` (the SELECT path) — `None` keys
17278/// sort last in ascending order, mirroring NULL-sorts-last in SQL.
17279fn partial_sort_tagged(tagged: &mut Vec<(Vec<f64>, Row)>, keep: Option<usize>, descs: &[bool]) {
17280    let cmp = |a: &(Vec<f64>, Row), b: &(Vec<f64>, Row)| cmp_multi_key(&a.0, &b.0, descs);
17281    match keep {
17282        Some(k) if k < tagged.len() && k > 0 => {
17283            let pivot = k - 1;
17284            tagged.select_nth_unstable_by(pivot, cmp);
17285            tagged[..k].sort_by(cmp);
17286            tagged.truncate(k);
17287        }
17288        _ => {
17289            tagged.sort_by(cmp);
17290        }
17291    }
17292}
17293
17294fn sort_by_keys(tagged: &mut [(Vec<f64>, Row)], descs: &[bool]) {
17295    tagged.sort_by(|a, b| cmp_multi_key(&a.0, &b.0, descs));
17296}
17297
17298/// v6.4.0 — multi-key ORDER BY comparator. Each key's per-key DESC
17299/// flag is honored independently. NULL is encoded as `f64::INFINITY`
17300/// so it sorts last in ASC and first in DESC (matches PG default).
17301fn cmp_multi_key(a: &[f64], b: &[f64], descs: &[bool]) -> core::cmp::Ordering {
17302    use core::cmp::Ordering;
17303    for (i, (ka, kb)) in a.iter().zip(b.iter()).enumerate() {
17304        let ord = ka.partial_cmp(kb).unwrap_or(Ordering::Equal);
17305        let ord = if descs.get(i).copied().unwrap_or(false) {
17306            ord.reverse()
17307        } else {
17308            ord
17309        };
17310        if ord != Ordering::Equal {
17311            return ord;
17312        }
17313    }
17314    Ordering::Equal
17315}
17316
17317/// v6.4.0 — eval every ORDER BY expression for a row and pack the
17318/// resulting keys into a `Vec<f64>`. NULL → `f64::INFINITY`.
17319fn build_order_keys(
17320    order_by: &[OrderBy],
17321    row: &Row,
17322    ctx: &EvalContext,
17323) -> Result<Vec<f64>, EngineError> {
17324    let mut keys = Vec::with_capacity(order_by.len());
17325    for o in order_by {
17326        let v = eval::eval_expr(&o.expr, row, ctx)?;
17327        // v7.24 (round-16 A) — explicit NULLS FIRST/LAST. The f64
17328        // packing sorts ascending THEN applies the per-key DESC
17329        // reverse, so a NULL must land at +INF exactly when the
17330        // effective placement agrees with the reverse direction:
17331        // nf == desc → +INF (ASC default last / DESC default
17332        // first), nf != desc → -INF (the explicit flips).
17333        if matches!(v, Value::Null) {
17334            let nf = o.nulls_first.unwrap_or(o.desc);
17335            keys.push(if nf == o.desc {
17336                f64::INFINITY
17337            } else {
17338                f64::NEG_INFINITY
17339            });
17340        } else {
17341            keys.push(value_to_order_key(&v)?);
17342        }
17343    }
17344    Ok(keys)
17345}
17346
17347/// Drop the first `offset` rows then truncate to `limit`. PG / `MySQL`
17348/// agree: OFFSET applies *after* ORDER BY but *before* LIMIT (so
17349/// `LIMIT 10 OFFSET 5` keeps rows 6..=15).
17350fn apply_offset_and_limit(rows: &mut Vec<Row>, offset: Option<u32>, limit: Option<u32>) {
17351    if let Some(off) = offset {
17352        let off = off as usize;
17353        if off >= rows.len() {
17354            rows.clear();
17355        } else {
17356            rows.drain(..off);
17357        }
17358    }
17359    if let Some(n) = limit {
17360        rows.truncate(n as usize);
17361    }
17362}
17363
17364/// v7.17.0 Phase 3.P0-49 — offset + limit applied to a tagged
17365/// `(order_keys, row)` sequence, with optional SQL:2008 `WITH
17366/// TIES` extension. When `with_ties` is set, the truncated tail
17367/// is extended through every subsequent row whose order keys
17368/// equal the last-kept row's keys (so a "top 3 by score" with
17369/// WITH TIES emits row 4 too when row 4 ties row 3 on `score`).
17370///
17371/// The order-key vector is the per-row sort key the caller already
17372/// computed via `build_order_keys`; equal-key detection therefore
17373/// matches the sort comparator exactly.
17374fn apply_offset_and_limit_tagged(
17375    tagged: &mut Vec<(Vec<f64>, Row)>,
17376    offset: Option<u32>,
17377    limit: Option<u32>,
17378    with_ties: bool,
17379) {
17380    if let Some(off) = offset {
17381        let off = off as usize;
17382        if off >= tagged.len() {
17383            tagged.clear();
17384        } else {
17385            tagged.drain(..off);
17386        }
17387    }
17388    if let Some(n) = limit {
17389        let n = n as usize;
17390        if with_ties && n > 0 && n < tagged.len() {
17391            let cutoff_key = tagged[n - 1].0.clone();
17392            let mut end = n;
17393            while end < tagged.len() && tagged[end].0 == cutoff_key {
17394                end += 1;
17395            }
17396            tagged.truncate(end);
17397        } else {
17398            tagged.truncate(n);
17399        }
17400    }
17401}
17402
17403/// v7.17.0 Phase 3.P0-49 — PG-canonical: `FETCH FIRST <n> ROWS
17404/// WITH TIES` requires an `ORDER BY`. Without one, there's no
17405/// way to identify "ties" deterministically, so PG errors at
17406/// plan time. SPG mirrors that surface so the same DDL / app
17407/// behaviour holds on cutover.
17408fn check_with_ties_requires_order_by(stmt: &SelectStatement) -> Result<(), EngineError> {
17409    if stmt.limit_with_ties && stmt.order_by.is_empty() {
17410        return Err(EngineError::Unsupported(alloc::string::String::from(
17411            "FETCH FIRST … ROWS WITH TIES requires an ORDER BY clause",
17412        )));
17413    }
17414    Ok(())
17415}
17416
17417/// v7.6.1 — resolve a parser-level `ForeignKeyConstraint` (column
17418/// names + parent table name) into the storage-layer shape (column
17419/// indices + same parent table). Validates everything the engine
17420/// needs to know about the FK at CREATE TABLE time:
17421///
17422///   - parent table exists (catalog lookup, unless self-referencing)
17423///   - parent columns exist on the parent table
17424///   - parent column list matches the local arity (defaults to the
17425///     parent's primary index column when omitted)
17426///   - parent columns are covered by a `BTree` UNIQUE-class index
17427///     (SPG's stand-in for `PRIMARY KEY`/`UNIQUE`) — required so
17428///     the v7.6.2 INSERT path can do an O(log n) parent lookup
17429///   - local columns exist on the table being created
17430fn resolve_foreign_key(
17431    local_table_name: &str,
17432    local_cols: &[ColumnSchema],
17433    fk: spg_sql::ast::ForeignKeyConstraint,
17434    catalog: &Catalog,
17435) -> Result<spg_storage::ForeignKeyConstraint, EngineError> {
17436    // Resolve local columns.
17437    let mut local_columns = Vec::with_capacity(fk.columns.len());
17438    for name in &fk.columns {
17439        let pos = local_cols
17440            .iter()
17441            .position(|c| c.name == *name)
17442            .ok_or_else(|| {
17443                EngineError::Unsupported(alloc::format!(
17444                    "FOREIGN KEY references unknown local column {name:?}"
17445                ))
17446            })?;
17447        local_columns.push(pos);
17448    }
17449    // Self-referencing FK: parent table is the one we're creating.
17450    // The parent column resolution uses the local column list since
17451    // the catalog doesn't have this table yet.
17452    let is_self_ref = fk.parent_table == local_table_name;
17453    let (parent_cols_for_lookup, parent_table_str): (&[ColumnSchema], &str) = if is_self_ref {
17454        (local_cols, local_table_name)
17455    } else {
17456        let parent_table = catalog.get(&fk.parent_table).ok_or_else(|| {
17457            EngineError::Storage(StorageError::TableNotFound {
17458                name: fk.parent_table.clone(),
17459            })
17460        })?;
17461        (
17462            parent_table.schema().columns.as_slice(),
17463            fk.parent_table.as_str(),
17464        )
17465    };
17466    // Resolve parent column names → positions. If the FK omitted the
17467    // parent column list, fall back to the parent's primary index
17468    // column (single-column only — composite default is rejected
17469    // because there's no unambiguous "PK" in SPG's index list).
17470    let parent_columns: Vec<usize> = if fk.parent_columns.is_empty() {
17471        if fk.columns.len() != 1 {
17472            return Err(EngineError::Unsupported(
17473                "composite FOREIGN KEY without explicit parent column list is not supported \
17474                 — list the parent columns explicitly"
17475                    .into(),
17476            ));
17477        }
17478        // Find a single BTree index on the parent and use its column.
17479        let pos = pick_pk_index_column(catalog, parent_table_str, is_self_ref, local_cols)
17480            .ok_or_else(|| {
17481                EngineError::Unsupported(alloc::format!(
17482                    "parent table {parent_table_str:?} has no PRIMARY-key / UNIQUE BTree index \
17483                     to default the FOREIGN KEY against"
17484                ))
17485            })?;
17486        alloc::vec![pos]
17487    } else {
17488        let mut out = Vec::with_capacity(fk.parent_columns.len());
17489        for name in &fk.parent_columns {
17490            let pos = parent_cols_for_lookup
17491                .iter()
17492                .position(|c| c.name == *name)
17493                .ok_or_else(|| {
17494                    EngineError::Unsupported(alloc::format!(
17495                        "FOREIGN KEY references unknown parent column \
17496                         {name:?} on table {parent_table_str:?}"
17497                    ))
17498                })?;
17499            out.push(pos);
17500        }
17501        out
17502    };
17503    if parent_columns.len() != local_columns.len() {
17504        return Err(EngineError::Unsupported(alloc::format!(
17505            "FOREIGN KEY arity mismatch: {} local columns vs {} parent columns",
17506            local_columns.len(),
17507            parent_columns.len()
17508        )));
17509    }
17510    // For non-self-referencing FKs, verify the parent column set is
17511    // covered by a BTree index. SPG doesn't have a `PRIMARY KEY`
17512    // declaration; the convention is "the parent column for FK
17513    // purposes must have a BTree index" — which the user creates via
17514    // `CREATE INDEX ... USING btree (col)` (the default). We accept
17515    // any single-column BTree index that covers a parent column;
17516    // composite parent column lists require an index whose `column_position`
17517    // matches the first parent column (multi-column BTree indices
17518    // are not in the v7.x roadmap).
17519    if !is_self_ref {
17520        let parent_table = catalog.get(&fk.parent_table).expect("checked above");
17521        let primary_parent_col = parent_columns[0];
17522        let has_btree = parent_table
17523            .schema()
17524            .columns
17525            .get(primary_parent_col)
17526            .is_some()
17527            && parent_table.indices().iter().any(|idx| {
17528                matches!(idx.kind, spg_storage::IndexKind::BTree(_))
17529                    && idx.column_position == primary_parent_col
17530                    && idx.partial_predicate.is_none()
17531            });
17532        if !has_btree {
17533            return Err(EngineError::Unsupported(alloc::format!(
17534                "FOREIGN KEY parent column on {:?} is not covered by an unconditional BTree \
17535                 index — create one with `CREATE INDEX ... ON {} ({})` first",
17536                parent_table_str,
17537                parent_table_str,
17538                parent_table.schema().columns[primary_parent_col].name,
17539            )));
17540        }
17541    }
17542    let on_delete = fk_action_sql_to_storage(fk.on_delete);
17543    let on_update = fk_action_sql_to_storage(fk.on_update);
17544    Ok(spg_storage::ForeignKeyConstraint {
17545        name: fk.name,
17546        local_columns,
17547        parent_table: fk.parent_table,
17548        parent_columns,
17549        on_delete,
17550        on_update,
17551    })
17552}
17553
17554/// v7.6.1 — pick a sentinel "primary key" column from the parent
17555/// table when the FK didn't name parent columns. Picks the first
17556/// single-column unconditional BTree index — that's the closest
17557/// thing SPG has to a PRIMARY KEY today. Self-referencing FKs use
17558/// `local_cols` as the column source.
17559fn pick_pk_index_column(
17560    catalog: &Catalog,
17561    parent_name: &str,
17562    is_self_ref: bool,
17563    local_cols: &[ColumnSchema],
17564) -> Option<usize> {
17565    if is_self_ref {
17566        // Self-ref FK omitted parent columns: pick column 0 by
17567        // convention (no catalog entry yet). Engine will widen this
17568        // when v7.6.7 lands; v7.6.1 only handles the explicit form.
17569        let _ = local_cols;
17570        return Some(0);
17571    }
17572    let parent = catalog.get(parent_name)?;
17573    parent.indices().iter().find_map(|idx| {
17574        if matches!(idx.kind, spg_storage::IndexKind::BTree(_))
17575            && idx.partial_predicate.is_none()
17576            && idx.included_columns.is_empty()
17577            && idx.expression.is_none()
17578        {
17579            Some(idx.column_position)
17580        } else {
17581            None
17582        }
17583    })
17584}
17585
17586/// v7.9.8 / v7.9.10 — resolve the column positions that
17587/// identify a conflict for ON CONFLICT. Returns a Vec of
17588/// column positions (1 element for single-column form, N for
17589/// composite). When the user wrote bare `ON CONFLICT DO …`,
17590/// falls back to the table's first unconditional BTree index
17591/// (always single-column today).
17592/// Returns the conflict-key column positions plus whether the
17593/// matched constraint declares NULLS NOT DISTINCT (v7.29 — a NULL
17594/// in the key only rules out a conflict under the default
17595/// NULLS DISTINCT semantics).
17596fn resolve_on_conflict_columns(
17597    catalog: &Catalog,
17598    table_name: &str,
17599    target: &[String],
17600) -> Result<(Vec<usize>, bool), EngineError> {
17601    let table = catalog.get(table_name).ok_or_else(|| {
17602        EngineError::Storage(StorageError::TableNotFound {
17603            name: table_name.into(),
17604        })
17605    })?;
17606    if target.is_empty() {
17607        // v7.13.2 — mailrs round-6 S5 follow-up. Composite UNIQUE
17608        // constraints carry a multi-column tuple; the prior code
17609        // path picked only the leading column of the first BTree
17610        // index, which caused `ON CONFLICT DO NOTHING` to dedup
17611        // by leading column alone (3 rows with same group_id but
17612        // different permission collapsed to 1). PG semantics use
17613        // the full tuple. Prefer a UniquenessConstraint's full
17614        // column list when one exists; fall back to the leading
17615        // BTree column for legacy single-column UNIQUE.
17616        if let Some(uc) = table.schema().uniqueness_constraints.first() {
17617            return Ok((uc.columns.clone(), uc.nulls_not_distinct));
17618        }
17619        let pos = table
17620            .indices()
17621            .iter()
17622            .find_map(|idx| {
17623                if matches!(idx.kind, spg_storage::IndexKind::BTree(_))
17624                    && idx.partial_predicate.is_none()
17625                    && idx.included_columns.is_empty()
17626                    && idx.expression.is_none()
17627                {
17628                    Some(idx.column_position)
17629                } else {
17630                    None
17631                }
17632            })
17633            .ok_or_else(|| {
17634                EngineError::Unsupported(alloc::format!(
17635                    "ON CONFLICT without target requires a UNIQUE BTree index on {table_name:?}"
17636                ))
17637            })?;
17638        return Ok((alloc::vec![pos], false));
17639    }
17640    let mut out = Vec::with_capacity(target.len());
17641    for name in target {
17642        let pos = table
17643            .schema()
17644            .columns
17645            .iter()
17646            .position(|c| c.name == *name)
17647            .ok_or_else(|| {
17648                EngineError::Unsupported(alloc::format!(
17649                    "ON CONFLICT target column {name:?} not found on {table_name:?}"
17650                ))
17651            })?;
17652        out.push(pos);
17653    }
17654    // An explicit target matching a UNIQUE constraint inherits its
17655    // NULLS [NOT] DISTINCT declaration.
17656    let mut sorted = out.clone();
17657    sorted.sort_unstable();
17658    let nnd = table.schema().uniqueness_constraints.iter().any(|uc| {
17659        let mut u = uc.columns.clone();
17660        u.sort_unstable();
17661        u == sorted && uc.nulls_not_distinct
17662    });
17663    Ok((out, nnd))
17664}
17665
17666/// v7.9.8 — check whether the BTree index on `column_pos` of
17667/// `table_name` already has a row with this key.
17668fn on_conflict_key_exists(
17669    catalog: &Catalog,
17670    table_name: &str,
17671    column_pos: usize,
17672    key: &Value,
17673) -> bool {
17674    let Some(table) = catalog.get(table_name) else {
17675        return false;
17676    };
17677    let Some(idx_key) = spg_storage::IndexKey::from_value(key) else {
17678        return false;
17679    };
17680    table.indices().iter().any(|idx| {
17681        matches!(idx.kind, spg_storage::IndexKind::BTree(_))
17682            && idx.column_position == column_pos
17683            && idx.partial_predicate.is_none()
17684            && !idx.lookup_eq(&idx_key).is_empty()
17685    })
17686}
17687
17688/// v7.9.9 / v7.9.10 — look up an existing row's position by
17689/// matching all `column_positions` against the incoming `key`
17690/// tuple. Single-column shape (one column) reduces to the
17691/// canonical PK lookup; composite shapes scan linearly until
17692/// every position matches.
17693fn lookup_row_position_by_keys(
17694    catalog: &Catalog,
17695    table_name: &str,
17696    column_positions: &[usize],
17697    key: &[&Value],
17698) -> Option<usize> {
17699    let table = catalog.get(table_name)?;
17700    table.rows().iter().position(|r| {
17701        column_positions
17702            .iter()
17703            .enumerate()
17704            .all(|(i, &pos)| r.values.get(pos) == Some(key[i]))
17705    })
17706}
17707
17708/// v7.9.10 — does the table already contain a row whose
17709/// `column_positions` tuple equals `key`? Single-column shape
17710/// uses the existing BTree fast path; composite shapes fall
17711/// back to a row scan.
17712fn on_conflict_keys_exist(
17713    catalog: &Catalog,
17714    table_name: &str,
17715    column_positions: &[usize],
17716    key: &[&Value],
17717) -> bool {
17718    if column_positions.len() == 1 {
17719        return on_conflict_key_exists(catalog, table_name, column_positions[0], key[0]);
17720    }
17721    let Some(table) = catalog.get(table_name) else {
17722        return false;
17723    };
17724    table.rows().iter().any(|r| {
17725        column_positions
17726            .iter()
17727            .enumerate()
17728            .all(|(i, &pos)| r.values.get(pos) == Some(key[i]))
17729    })
17730}
17731
17732/// v7.9.9 — apply ON CONFLICT DO UPDATE SET assignments to an
17733/// existing row.
17734///
17735/// `incoming` is the rejected INSERT row (used to resolve
17736/// `EXCLUDED.col` references in the assignment exprs);
17737/// `target_pos` is the position of the existing row in the table.
17738/// Each assignment substitutes `EXCLUDED.col` with the matching
17739/// incoming value, evaluates the resulting expression against
17740/// the existing row, and writes the new value into the
17741/// corresponding column of the returned `Vec<Value>`. If
17742/// `where_` evaluates falsy, returns Ok(None) — PG behaviour:
17743/// the conflicting row is silently kept unchanged.
17744fn apply_on_conflict_assignments(
17745    catalog: &Catalog,
17746    table_name: &str,
17747    target_pos: usize,
17748    incoming: &[Value],
17749    assignments: &[(String, Expr)],
17750    where_: Option<&Expr>,
17751) -> Result<Option<Vec<Value>>, EngineError> {
17752    let table = catalog.get(table_name).ok_or_else(|| {
17753        EngineError::Storage(StorageError::TableNotFound {
17754            name: table_name.into(),
17755        })
17756    })?;
17757    let schema_cols = table.schema().columns.clone();
17758    let existing = table
17759        .rows()
17760        .get(target_pos)
17761        .ok_or_else(|| {
17762            EngineError::Unsupported(alloc::format!(
17763                "ON CONFLICT DO UPDATE: row position {target_pos} out of bounds on {table_name:?}"
17764            ))
17765        })?
17766        .clone();
17767    let ctx = eval::EvalContext::new(&schema_cols, Some(table_name));
17768    // Optional WHERE filter on the conflict row.
17769    if let Some(w) = where_ {
17770        let pred = w.clone();
17771        let pred = substitute_excluded_refs(pred, &schema_cols, incoming);
17772        let v = eval::eval_expr(&pred, &existing, &ctx)?;
17773        if !matches!(v, Value::Bool(true)) {
17774            return Ok(None);
17775        }
17776    }
17777    let mut new_values = existing.values.clone();
17778    for (col_name, expr) in assignments {
17779        let target_idx = schema_cols
17780            .iter()
17781            .position(|c| c.name == *col_name)
17782            .ok_or_else(|| {
17783                EngineError::Eval(EvalError::ColumnNotFound {
17784                    name: col_name.clone(),
17785                })
17786            })?;
17787        let sub = substitute_excluded_refs(expr.clone(), &schema_cols, incoming);
17788        let v = eval::eval_expr(&sub, &existing, &ctx)?;
17789        let coerced = coerce_value(v, schema_cols[target_idx].ty, col_name, target_idx)?;
17790        check_unsigned_range(&coerced, &schema_cols[target_idx], target_idx)?;
17791        new_values[target_idx] = coerced;
17792    }
17793    Ok(Some(new_values))
17794}
17795
17796/// v7.9.9 — walk an `Expr` tree replacing any `Column { qualifier:
17797/// "EXCLUDED", name }` reference with a `Literal` of the matching
17798/// value from the incoming-row vec. Resolution against the
17799/// child-table column list (by name).
17800fn substitute_excluded_refs(expr: Expr, schema_cols: &[ColumnSchema], incoming: &[Value]) -> Expr {
17801    use spg_sql::ast::ColumnName;
17802    match expr {
17803        Expr::Column(ColumnName { qualifier, name })
17804            if qualifier
17805                .as_deref()
17806                .is_some_and(|q| q.eq_ignore_ascii_case("excluded")) =>
17807        {
17808            let pos = schema_cols.iter().position(|c| c.name == name);
17809            match pos {
17810                Some(p) => {
17811                    let v = incoming.get(p).cloned().unwrap_or(Value::Null);
17812                    value_to_literal_expr(v)
17813                        .unwrap_or_else(|_| Expr::Literal(spg_sql::ast::Literal::Null))
17814                }
17815                None => Expr::Column(ColumnName { qualifier, name }),
17816            }
17817        }
17818        Expr::Binary { op, lhs, rhs } => Expr::Binary {
17819            op,
17820            lhs: Box::new(substitute_excluded_refs(*lhs, schema_cols, incoming)),
17821            rhs: Box::new(substitute_excluded_refs(*rhs, schema_cols, incoming)),
17822        },
17823        Expr::Unary { op, expr } => Expr::Unary {
17824            op,
17825            expr: Box::new(substitute_excluded_refs(*expr, schema_cols, incoming)),
17826        },
17827        Expr::FunctionCall { name, args } => Expr::FunctionCall {
17828            name,
17829            args: args
17830                .into_iter()
17831                .map(|a| substitute_excluded_refs(a, schema_cols, incoming))
17832                .collect(),
17833        },
17834        other => other,
17835    }
17836}
17837
17838/// v7.6.2 / v7.6.7 — INSERT-side FK enforcement. For every row
17839/// about to be inserted into `child_table`, every FK declared on
17840/// that table is checked: the row's FK columns must either be
17841/// NULL (SQL spec skip) or match an existing parent row via the
17842/// parent's BTree PK / UNIQUE index.
17843///
17844/// Returns `EngineError::Unsupported` with a `FOREIGN KEY violation`
17845/// payload on first failure.
17846///
17847/// **Self-referencing FKs (v7.6.7 widening):** when `fk.parent_table
17848/// == child_table`, the parent rows visible to this check are
17849///  (a) rows already committed to the table, plus
17850///  (b) earlier rows from the *same* `rows` batch.
17851/// This makes `INSERT INTO tree VALUES (1, NULL), (2, 1), (3, 2)`
17852/// work in a single statement — common pattern for bulk-loading
17853/// hierarchies.
17854/// v7.9.19 — enforce table-level UNIQUE / PRIMARY KEY tuple
17855/// constraints at INSERT time. For each constraint declared on
17856/// the target table, check that no existing row + no earlier row
17857/// in the same batch has the same full-column tuple. NULL in
17858/// any column lifts the row out of the check (SQL spec: NULL
17859/// ≠ NULL for uniqueness). mailrs G1 + G6.
17860fn enforce_uniqueness_inserts(
17861    catalog: &Catalog,
17862    child_table: &str,
17863    constraints: &[spg_storage::UniquenessConstraint],
17864    rows: &[Vec<Value>],
17865) -> Result<(), EngineError> {
17866    if constraints.is_empty() {
17867        return Ok(());
17868    }
17869    let table = catalog.get(child_table).ok_or_else(|| {
17870        EngineError::Storage(StorageError::TableNotFound {
17871            name: child_table.into(),
17872        })
17873    })?;
17874    let schema = table.schema();
17875    // v7.29 (mailrs round-23b) — set-based: ONE O(table) pass folds
17876    // existing keys into a hash set, then each batch row is a probe
17877    // + insert. The previous shape scanned the WHOLE table per
17878    // inserted row (and earlier batch rows per row), which made
17879    // bulk import O(n²) — a 104 MB dump extrapolated to ~1 hour
17880    // (PG: 2 min). Collation folding (Phase 3.P0-45) and
17881    // NULLS [NOT] DISTINCT semantics are unchanged: keys fold via
17882    // collated_key_cell before encoding, NULL-bearing keys skip the
17883    // set unless nulls_not_distinct.
17884    for uc in constraints {
17885        let fold_key = |values: &[Value]| -> Vec<Value> {
17886            uc.columns
17887                .iter()
17888                .map(|&i| {
17889                    let v = values.get(i).cloned().unwrap_or(Value::Null);
17890                    collated_key_cell(&v, i, schema)
17891                })
17892                .collect()
17893        };
17894        let mut seen: hashbrown::HashSet<String> =
17895            hashbrown::HashSet::with_capacity(table.rows().len() + rows.len());
17896        for prow in table.rows() {
17897            let key = fold_key(&prow.values);
17898            if key.iter().any(|v| matches!(v, Value::Null)) && !uc.nulls_not_distinct {
17899                continue;
17900            }
17901            seen.insert(aggregate::encode_key(&key));
17902        }
17903        for (batch_idx, row_values) in rows.iter().enumerate() {
17904            let key = fold_key(row_values);
17905            if key.iter().any(|v| matches!(v, Value::Null)) && !uc.nulls_not_distinct {
17906                continue;
17907            }
17908            if !seen.insert(aggregate::encode_key(&key)) {
17909                let kind = if uc.is_primary_key {
17910                    "PRIMARY KEY"
17911                } else {
17912                    "UNIQUE"
17913                };
17914                let col_names: Vec<String> = uc
17915                    .columns
17916                    .iter()
17917                    .map(|&i| table.schema().columns[i].name.clone())
17918                    .collect();
17919                return Err(EngineError::Unsupported(alloc::format!(
17920                    "{kind} violation on {child_table:?} columns {col_names:?}: \
17921                     row #{batch_idx} duplicates an existing key"
17922                )));
17923            }
17924        }
17925    }
17926    Ok(())
17927}
17928
17929/// v7.17.0 Phase 3.P0-45 — return a key cell folded by its column's
17930/// declared `Collation`. For `CaseInsensitive`, fold Text payloads to
17931/// ASCII lowercase (matches Phase 2.5's `*_ci` semantics: ASCII case-
17932/// fold only, non-ASCII bytes stay byte-wise). For `Binary` or non-Text
17933/// values, the cell passes through unchanged. The caller compares the
17934/// folded values with `==`.
17935fn collated_key_cell(
17936    v: &spg_storage::Value,
17937    column_position: usize,
17938    schema: &spg_storage::TableSchema,
17939) -> spg_storage::Value {
17940    match (v, schema.columns.get(column_position).map(|c| c.collation)) {
17941        (spg_storage::Value::Text(s), Some(spg_storage::Collation::CaseInsensitive)) => {
17942            spg_storage::Value::Text(s.to_ascii_lowercase())
17943        }
17944        _ => v.clone(),
17945    }
17946}
17947
17948/// v7.9.29 — `true` iff `v` counts as a truthy SQL value for a
17949/// WHERE-style predicate. NULL → false (three-valued logic
17950/// collapses to "skip this row" for index inclusion). Numeric
17951/// non-zero, BIGINT non-zero, TINYINT non-zero, BOOLEAN true → true.
17952/// Everything else (strings, vectors, JSON, …) is not a valid
17953/// predicate result and surfaces as `false` so a malformed
17954/// predicate degrades to "row not in index" rather than panicking.
17955fn predicate_truthy(v: &spg_storage::Value) -> bool {
17956    use spg_storage::Value as V;
17957    match v {
17958        V::Bool(b) => *b,
17959        V::Int(n) => *n != 0,
17960        V::BigInt(n) => *n != 0,
17961        V::SmallInt(n) => *n != 0,
17962        _ => false,
17963    }
17964}
17965
17966/// v7.9.29 — at CREATE UNIQUE INDEX time, scan the table's
17967/// committed rows for pre-existing duplicates. If any pair of rows
17968/// matches the predicate AND has the same index key, refuse to
17969/// create the index so the user fixes the data before retrying.
17970fn check_existing_unique_violation(
17971    idx: &spg_storage::Index,
17972    schema: &spg_storage::TableSchema,
17973    rows: &[spg_storage::Row],
17974) -> Result<(), EngineError> {
17975    let predicate_expr = match idx.partial_predicate.as_deref() {
17976        Some(s) => Some(spg_sql::parser::parse_expression(s).map_err(|e| {
17977            EngineError::Unsupported(alloc::format!(
17978                "stored partial predicate {s:?} failed to re-parse: {e:?}"
17979            ))
17980        })?),
17981        None => None,
17982    };
17983    let ctx = eval::EvalContext::new(&schema.columns, None);
17984    let key_positions = unique_key_positions(idx);
17985    let mut seen: alloc::vec::Vec<alloc::vec::Vec<spg_storage::Value>> = alloc::vec::Vec::new();
17986    for row in rows {
17987        if let Some(expr) = &predicate_expr {
17988            let v = eval::eval_expr(expr, row, &ctx).map_err(|e| {
17989                EngineError::Unsupported(alloc::format!(
17990                    "evaluating UNIQUE INDEX predicate against existing row: {e:?}"
17991                ))
17992            })?;
17993            if !predicate_truthy(&v) {
17994                continue;
17995            }
17996        }
17997        let key: alloc::vec::Vec<spg_storage::Value> = key_positions
17998            .iter()
17999            .map(|&p| {
18000                let v = row
18001                    .values
18002                    .get(p)
18003                    .cloned()
18004                    .unwrap_or(spg_storage::Value::Null);
18005                collated_key_cell(&v, p, schema)
18006            })
18007            .collect();
18008        if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
18009            continue;
18010        }
18011        if seen.iter().any(|other| *other == key) {
18012            return Err(EngineError::Unsupported(alloc::format!(
18013                "CREATE UNIQUE INDEX {:?}: existing rows already violate the constraint",
18014                idx.name
18015            )));
18016        }
18017        seen.push(key);
18018    }
18019    Ok(())
18020}
18021
18022/// v7.9.29 — full key tuple for a UNIQUE INDEX (leading +
18023/// extra positions). For single-column indexes this is just
18024/// `[column_position]`.
18025fn unique_key_positions(idx: &spg_storage::Index) -> alloc::vec::Vec<usize> {
18026    let mut out = alloc::vec::Vec::with_capacity(1 + idx.extra_column_positions.len());
18027    out.push(idx.column_position);
18028    out.extend_from_slice(&idx.extra_column_positions);
18029    out
18030}
18031
18032/// v7.9.29 — at INSERT time, walk every `is_unique` index on the
18033/// target table. For each, eval the index's optional predicate
18034/// against (a) the candidate row and (b) every committed row plus
18035/// earlier batch rows; only rows where the predicate is truthy
18036/// participate. A duplicate key among predicate-matching rows is a
18037/// uniqueness violation. NULL keys lift the row out of the check
18038/// (matching PG's "UNIQUE allows multiple NULLs" semantics).
18039fn enforce_unique_index_inserts(
18040    catalog: &Catalog,
18041    table_name: &str,
18042    rows: &[alloc::vec::Vec<spg_storage::Value>],
18043) -> Result<(), EngineError> {
18044    let table = catalog.get(table_name).ok_or_else(|| {
18045        EngineError::Storage(StorageError::TableNotFound {
18046            name: table_name.into(),
18047        })
18048    })?;
18049    let schema = table.schema();
18050    let ctx = eval::EvalContext::new(&schema.columns, None);
18051    for idx in table.indices() {
18052        if !idx.is_unique {
18053            continue;
18054        }
18055        // Re-parse the predicate once per index per batch.
18056        let predicate_expr = match idx.partial_predicate.as_deref() {
18057            Some(s) => Some(spg_sql::parser::parse_expression(s).map_err(|e| {
18058                EngineError::Unsupported(alloc::format!(
18059                    "UNIQUE INDEX {:?} predicate {s:?} failed to re-parse: {e:?}",
18060                    idx.name
18061                ))
18062            })?),
18063            None => None,
18064        };
18065        let key_positions = unique_key_positions(idx);
18066        let key_of = |values: &[spg_storage::Value]| -> alloc::vec::Vec<spg_storage::Value> {
18067            key_positions
18068                .iter()
18069                .map(|&p| {
18070                    let v = values.get(p).cloned().unwrap_or(spg_storage::Value::Null);
18071                    collated_key_cell(&v, p, schema)
18072                })
18073                .collect()
18074        };
18075        let participates = |values: &[spg_storage::Value]| -> Result<bool, EngineError> {
18076            let Some(expr) = &predicate_expr else {
18077                return Ok(true);
18078            };
18079            let tmp_row = spg_storage::Row {
18080                values: values.to_vec(),
18081            };
18082            let v = eval::eval_expr(expr, &tmp_row, &ctx).map_err(|e| {
18083                EngineError::Unsupported(alloc::format!(
18084                    "UNIQUE INDEX {:?} predicate eval: {e:?}",
18085                    idx.name
18086                ))
18087            })?;
18088            Ok(predicate_truthy(&v))
18089        };
18090        // v7.29 (mailrs round-23b) — set-based: one O(table) pass
18091        // (predicate evaluated once per existing row instead of once
18092        // per row PAIR), then probe per batch row. The previous
18093        // nested scans made bulk import O(n²).
18094        let mut seen: hashbrown::HashSet<String> =
18095            hashbrown::HashSet::with_capacity(table.rows().len() + rows.len());
18096        for prow in table.rows() {
18097            if !participates(&prow.values)? {
18098                continue;
18099            }
18100            let key = key_of(&prow.values);
18101            if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
18102                continue;
18103            }
18104            seen.insert(aggregate::encode_key(&key));
18105        }
18106        for (batch_idx, row_values) in rows.iter().enumerate() {
18107            if !participates(row_values)? {
18108                continue;
18109            }
18110            let key = key_of(row_values);
18111            if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
18112                continue;
18113            }
18114            if !seen.insert(aggregate::encode_key(&key)) {
18115                return Err(EngineError::Unsupported(alloc::format!(
18116                    "UNIQUE INDEX {:?} violation on {table_name:?}: \
18117                     row #{batch_idx} duplicates an existing key",
18118                    idx.name
18119                )));
18120            }
18121        }
18122    }
18123    Ok(())
18124}
18125
18126/// v7.13.0 — `UPDATE OF cols` filter helper (mailrs round-5 G7).
18127/// Returns `true` when at least one of `filter_cols` has a
18128/// different value in `new_row` vs `old_row`. Column lookup is
18129/// case-insensitive against `schema_cols`; unknown filter columns
18130/// are treated as "not changed" (the trigger therefore won't
18131/// fire on them — surfacing a parse-time error would be too
18132/// strict for catalog reloads where the schema may have drifted).
18133fn any_column_changed(
18134    filter_cols: &[String],
18135    schema_cols: &[ColumnSchema],
18136    old_row: &Row,
18137    new_row: &Row,
18138) -> bool {
18139    for col_name in filter_cols {
18140        let Some(pos) = schema_cols
18141            .iter()
18142            .position(|c| c.name.eq_ignore_ascii_case(col_name))
18143        else {
18144            continue;
18145        };
18146        let old_v = old_row.values.get(pos);
18147        let new_v = new_row.values.get(pos);
18148        if old_v != new_v {
18149            return true;
18150        }
18151    }
18152    false
18153}
18154
18155/// v7.13.0 — evaluate every CHECK predicate on the schema against
18156/// each candidate row. Mirrors PG semantics: a `false` result
18157/// rejects the mutation; a NULL result *passes* (CHECK rejects
18158/// only on definite-false, not on unknown). mailrs round-5 G3.
18159fn enforce_check_constraints(
18160    catalog: &Catalog,
18161    table_name: &str,
18162    rows: &[alloc::vec::Vec<spg_storage::Value>],
18163) -> Result<(), EngineError> {
18164    let table = catalog.get(table_name).ok_or_else(|| {
18165        EngineError::Storage(StorageError::TableNotFound {
18166            name: table_name.into(),
18167        })
18168    })?;
18169    let schema = table.schema();
18170    // v7.17.0 Phase 1.5 — domain-level CHECKs are enforced in
18171    // parallel with table-level CHECKs. Collect both lists up
18172    // front; if neither exists we early-out.
18173    let mut domain_checks_per_col: alloc::vec::Vec<(usize, alloc::vec::Vec<Expr>)> =
18174        alloc::vec::Vec::new();
18175    for (idx, col) in schema.columns.iter().enumerate() {
18176        let Some(dname) = &col.user_domain_type else {
18177            continue;
18178        };
18179        let Some(dom) = catalog.domain_types().get(dname) else {
18180            continue;
18181        };
18182        let mut parsed_for_col: alloc::vec::Vec<Expr> =
18183            alloc::vec::Vec::with_capacity(dom.checks.len());
18184        for src in &dom.checks {
18185            let expr = spg_sql::parser::parse_expression(src).map_err(|e| {
18186                EngineError::Unsupported(alloc::format!(
18187                    "DOMAIN {dname:?} CHECK ({src:?}) on column {:?}: re-parse failed: {e:?}",
18188                    col.name
18189                ))
18190            })?;
18191            parsed_for_col.push(expr);
18192        }
18193        if !parsed_for_col.is_empty() {
18194            domain_checks_per_col.push((idx, parsed_for_col));
18195        }
18196    }
18197    if schema.checks.is_empty() && domain_checks_per_col.is_empty() {
18198        return Ok(());
18199    }
18200    let ctx = eval::EvalContext::new(&schema.columns, None);
18201    let mut parsed: alloc::vec::Vec<(usize, Expr)> = alloc::vec::Vec::new();
18202    for (i, src) in schema.checks.iter().enumerate() {
18203        let expr = spg_sql::parser::parse_expression(src).map_err(|e| {
18204            EngineError::Unsupported(alloc::format!(
18205                "CHECK constraint #{i} on {table_name:?} ({src:?}) failed to re-parse: {e:?}"
18206            ))
18207        })?;
18208        parsed.push((i, expr));
18209    }
18210    for (batch_idx, row_values) in rows.iter().enumerate() {
18211        let tmp_row = spg_storage::Row {
18212            values: row_values.clone(),
18213        };
18214        for (i, expr) in &parsed {
18215            let v = eval::eval_expr(expr, &tmp_row, &ctx).map_err(|e| {
18216                EngineError::Unsupported(alloc::format!(
18217                    "CHECK constraint #{i} on {table_name:?} eval at row #{batch_idx}: {e:?}"
18218                ))
18219            })?;
18220            // PG: NULL passes (CHECK rejects on definite-false only).
18221            if matches!(v, spg_storage::Value::Bool(false)) {
18222                return Err(EngineError::Unsupported(alloc::format!(
18223                    "CHECK constraint violation on {table_name:?} (row #{batch_idx}): {:?}",
18224                    schema.checks[*i]
18225                )));
18226            }
18227        }
18228        // v7.17.0 Phase 1.5 — domain-level CHECKs. Each CHECK
18229        // expression references VALUE as a column-name; we
18230        // substitute the per-row cell into the eval context by
18231        // synthesising a single-column row of just that value
18232        // under a temporary `value` column schema.
18233        for (col_idx, checks) in &domain_checks_per_col {
18234            let cell = row_values
18235                .get(*col_idx)
18236                .cloned()
18237                .unwrap_or(spg_storage::Value::Null);
18238            let synth_cols = alloc::vec![spg_storage::ColumnSchema::new(
18239                "value",
18240                schema.columns[*col_idx].ty,
18241                schema.columns[*col_idx].nullable,
18242            )];
18243            let synth_ctx = eval::EvalContext::new(&synth_cols, None);
18244            let synth_row = spg_storage::Row {
18245                values: alloc::vec![cell],
18246            };
18247            for (ci, expr) in checks.iter().enumerate() {
18248                let v = eval::eval_expr(expr, &synth_row, &synth_ctx).map_err(|e| {
18249                    EngineError::Unsupported(alloc::format!(
18250                        "DOMAIN CHECK #{ci} on column {:?} eval at row #{batch_idx}: {e:?}",
18251                        schema.columns[*col_idx].name
18252                    ))
18253                })?;
18254                if matches!(v, spg_storage::Value::Bool(false)) {
18255                    return Err(EngineError::Unsupported(alloc::format!(
18256                        "DOMAIN CHECK violation on column {:?} (row #{batch_idx})",
18257                        schema.columns[*col_idx].name
18258                    )));
18259                }
18260            }
18261        }
18262    }
18263    Ok(())
18264}
18265
18266fn enforce_fk_inserts(
18267    catalog: &Catalog,
18268    child_table: &str,
18269    fks: &[spg_storage::ForeignKeyConstraint],
18270    rows: &[Vec<Value>],
18271) -> Result<(), EngineError> {
18272    for fk in fks {
18273        let parent_is_self = fk.parent_table == child_table;
18274        let parent = if parent_is_self {
18275            // Self-ref: read the current state of the same table.
18276            // The mut borrow on child has been dropped by the caller.
18277            catalog.get(child_table).ok_or_else(|| {
18278                EngineError::Storage(StorageError::TableNotFound {
18279                    name: child_table.into(),
18280                })
18281            })?
18282        } else {
18283            catalog.get(&fk.parent_table).ok_or_else(|| {
18284                EngineError::Storage(StorageError::TableNotFound {
18285                    name: fk.parent_table.clone(),
18286                })
18287            })?
18288        };
18289        for (batch_idx, row_values) in rows.iter().enumerate() {
18290            // Single-column FK fast path: try the parent's BTree
18291            // index for an O(log n) lookup. Composite FKs fall back
18292            // to a parent-row scan.
18293            if fk.local_columns.len() == 1 {
18294                let v = &row_values[fk.local_columns[0]];
18295                if matches!(v, Value::Null) {
18296                    continue;
18297                }
18298                let parent_col = fk.parent_columns[0];
18299                let key = spg_storage::IndexKey::from_value(v).ok_or_else(|| {
18300                    EngineError::Unsupported(alloc::format!(
18301                        "FOREIGN KEY column value of type {:?} is not index-eligible",
18302                        v.data_type()
18303                    ))
18304                })?;
18305                let present_committed = parent.indices().iter().any(|idx| {
18306                    matches!(idx.kind, spg_storage::IndexKind::BTree(_))
18307                        && idx.column_position == parent_col
18308                        && idx.partial_predicate.is_none()
18309                        && !idx.lookup_eq(&key).is_empty()
18310                });
18311                // v7.6.7 self-ref widening: also accept a match
18312                // against earlier rows in this same batch when the
18313                // FK points at the table being inserted into.
18314                let present_in_batch = parent_is_self
18315                    && rows[..batch_idx]
18316                        .iter()
18317                        .any(|earlier| earlier.get(parent_col) == Some(v));
18318                if !(present_committed || present_in_batch) {
18319                    return Err(EngineError::Unsupported(alloc::format!(
18320                        "FOREIGN KEY violation: no parent row in {:?} where {} = {:?}",
18321                        fk.parent_table,
18322                        parent
18323                            .schema()
18324                            .columns
18325                            .get(parent_col)
18326                            .map_or("?", |c| c.name.as_str()),
18327                        v,
18328                    )));
18329                }
18330            } else {
18331                // Composite FK: scan parent rows. v7.6.7 also
18332                // accepts a match against earlier rows in the same
18333                // batch (self-ref bulk-loading of hierarchies).
18334                if fk
18335                    .local_columns
18336                    .iter()
18337                    .all(|&i| matches!(row_values.get(i), Some(Value::Null)))
18338                {
18339                    continue;
18340                }
18341                let local: Vec<&Value> = fk.local_columns.iter().map(|&i| &row_values[i]).collect();
18342                let parent_match_committed = parent.rows().iter().any(|prow| {
18343                    fk.parent_columns
18344                        .iter()
18345                        .enumerate()
18346                        .all(|(i, &pi)| prow.values.get(pi) == Some(local[i]))
18347                });
18348                let parent_match_in_batch = parent_is_self
18349                    && rows[..batch_idx].iter().any(|earlier| {
18350                        fk.parent_columns
18351                            .iter()
18352                            .enumerate()
18353                            .all(|(i, &pi)| earlier.get(pi) == Some(local[i]))
18354                    });
18355                if !(parent_match_committed || parent_match_in_batch) {
18356                    return Err(EngineError::Unsupported(alloc::format!(
18357                        "FOREIGN KEY violation: no parent row in {:?} matching composite key",
18358                        fk.parent_table,
18359                    )));
18360                }
18361            }
18362        }
18363    }
18364    Ok(())
18365}
18366
18367/// v7.6.4 / v7.6.5 — one step of the FK action plan computed for a
18368/// DELETE on a parent. The plan is a list of these steps, stacked
18369/// across the FK graph by `plan_fk_parent_deletions`.
18370#[derive(Debug, Clone)]
18371struct FkChildStep {
18372    child_table: String,
18373    action: FkChildAction,
18374}
18375
18376#[derive(Debug, Clone)]
18377enum FkChildAction {
18378    /// CASCADE — remove these rows. Sorted, deduplicated positions.
18379    Delete { positions: Vec<usize> },
18380    /// SET NULL — for each (row, column) in the flat list, write
18381    /// NULL into that child cell. Multiple FKs on the same row may
18382    /// produce overlapping entries (deduped at plan time).
18383    SetNull {
18384        positions: Vec<usize>,
18385        columns: Vec<usize>,
18386    },
18387    /// SET DEFAULT — same shape as SetNull but writes the column's
18388    /// declared DEFAULT value (resolved at plan time). Columns
18389    /// without a DEFAULT raise an error during planning.
18390    SetDefault {
18391        positions: Vec<usize>,
18392        columns: Vec<usize>,
18393        defaults: Vec<Value>,
18394    },
18395}
18396
18397/// v7.6.3 → v7.6.5 — plan FK fallout for a DELETE on a parent table.
18398///
18399/// Walks every table in the catalog looking for FKs whose
18400/// `parent_table` is `parent_table_name`. For each such FK + each
18401/// to-be-deleted parent row:
18402///
18403///   - RESTRICT / NoAction → error, no plan returned
18404///   - CASCADE → child rows get scheduled for deletion; recursive
18405///   - SetNull → child FK column(s) scheduled to be NULL-ed.
18406///     Verified NULL-able at plan time.
18407///   - SetDefault → child FK column(s) scheduled to be reset to
18408///     their declared DEFAULT. Columns without a DEFAULT raise.
18409///
18410/// SET NULL / SET DEFAULT do NOT cascade further — the child row
18411/// stays; only one of its columns mutates.
18412fn plan_fk_parent_deletions(
18413    catalog: &Catalog,
18414    parent_table_name: &str,
18415    to_delete_positions: &[usize],
18416    to_delete_rows: &[Vec<Value>],
18417) -> Result<Vec<FkChildStep>, EngineError> {
18418    use alloc::collections::{BTreeMap, BTreeSet};
18419    if to_delete_rows.is_empty() {
18420        return Ok(Vec::new());
18421    }
18422    let mut delete_plan: BTreeMap<String, BTreeSet<usize>> = BTreeMap::new();
18423    // setnull / setdefault keyed by child_table → (row_idx, col_idx) → optional default
18424    let mut setnull_plan: BTreeMap<String, BTreeSet<(usize, usize)>> = BTreeMap::new();
18425    let mut setdefault_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
18426    let mut visited: BTreeSet<(String, usize)> = BTreeSet::new();
18427    for &p in to_delete_positions {
18428        visited.insert((parent_table_name.to_string(), p));
18429    }
18430    let mut work: Vec<(String, Vec<Value>)> = to_delete_rows
18431        .iter()
18432        .map(|r| (parent_table_name.to_string(), r.clone()))
18433        .collect();
18434    while let Some((cur_parent, parent_row)) = work.pop() {
18435        for child_name in catalog.table_names() {
18436            let child = catalog
18437                .get(&child_name)
18438                .expect("table_names → catalog.get round-trip is total");
18439            for fk in &child.schema().foreign_keys {
18440                if fk.parent_table != cur_parent {
18441                    continue;
18442                }
18443                let parent_key: Vec<&Value> = fk
18444                    .parent_columns
18445                    .iter()
18446                    .map(|&pi| &parent_row[pi])
18447                    .collect();
18448                if parent_key.iter().any(|v| matches!(v, Value::Null)) {
18449                    continue;
18450                }
18451                for (child_row_idx, child_row) in child.rows().iter().enumerate() {
18452                    if child_name == cur_parent
18453                        && visited.contains(&(child_name.clone(), child_row_idx))
18454                    {
18455                        continue;
18456                    }
18457                    let matches_key = fk
18458                        .local_columns
18459                        .iter()
18460                        .enumerate()
18461                        .all(|(i, &li)| child_row.values.get(li) == Some(parent_key[i]));
18462                    if !matches_key {
18463                        continue;
18464                    }
18465                    match fk.on_delete {
18466                        spg_storage::FkAction::Restrict | spg_storage::FkAction::NoAction => {
18467                            return Err(EngineError::Unsupported(alloc::format!(
18468                                "FOREIGN KEY violation: DELETE on {cur_parent:?} is \
18469                                 restricted by FK from {child_name:?}.{:?}",
18470                                fk.local_columns,
18471                            )));
18472                        }
18473                        spg_storage::FkAction::Cascade => {
18474                            if visited.insert((child_name.clone(), child_row_idx)) {
18475                                delete_plan
18476                                    .entry(child_name.clone())
18477                                    .or_default()
18478                                    .insert(child_row_idx);
18479                                work.push((child_name.clone(), child_row.values.clone()));
18480                            }
18481                        }
18482                        spg_storage::FkAction::SetNull => {
18483                            // Verify every local FK column is NULL-able.
18484                            for &li in &fk.local_columns {
18485                                let col = child.schema().columns.get(li).ok_or_else(|| {
18486                                    EngineError::Unsupported(alloc::format!(
18487                                        "FK local column {li} missing in {child_name:?}"
18488                                    ))
18489                                })?;
18490                                if !col.nullable {
18491                                    return Err(EngineError::Unsupported(alloc::format!(
18492                                        "FOREIGN KEY ON DELETE SET NULL: column \
18493                                         {child_name:?}.{:?} is NOT NULL — cannot SET NULL",
18494                                        col.name,
18495                                    )));
18496                                }
18497                            }
18498                            let entry = setnull_plan.entry(child_name.clone()).or_default();
18499                            for &li in &fk.local_columns {
18500                                entry.insert((child_row_idx, li));
18501                            }
18502                        }
18503                        spg_storage::FkAction::SetDefault => {
18504                            // Resolve the DEFAULT for every local FK col.
18505                            let entry = setdefault_plan.entry(child_name.clone()).or_default();
18506                            for &li in &fk.local_columns {
18507                                let col = child.schema().columns.get(li).ok_or_else(|| {
18508                                    EngineError::Unsupported(alloc::format!(
18509                                        "FK local column {li} missing in {child_name:?}"
18510                                    ))
18511                                })?;
18512                                let default = col.default.clone().ok_or_else(|| {
18513                                    EngineError::Unsupported(alloc::format!(
18514                                        "FOREIGN KEY ON DELETE SET DEFAULT: column \
18515                                         {child_name:?}.{:?} has no DEFAULT declared",
18516                                        col.name,
18517                                    ))
18518                                })?;
18519                                entry.insert((child_row_idx, li), default);
18520                            }
18521                        }
18522                    }
18523                }
18524            }
18525        }
18526    }
18527    // Flatten the three plans into the ordered `FkChildStep` list.
18528    // Deletes are applied last per child (after any null/default
18529    // re-writes on the same child) so a child row that's both
18530    // re-written and then cascade-deleted only ends up deleted —
18531    // but in v7.6.5 SetNull/Cascade never overlap on the same row
18532    // (a single FK chooses exactly one action), so the order is
18533    // mostly a precaution.
18534    let mut steps: Vec<FkChildStep> = Vec::new();
18535    for (child_table, entries) in setnull_plan {
18536        let (positions, columns): (Vec<usize>, Vec<usize>) = entries.into_iter().unzip();
18537        steps.push(FkChildStep {
18538            child_table,
18539            action: FkChildAction::SetNull { positions, columns },
18540        });
18541    }
18542    for (child_table, entries) in setdefault_plan {
18543        let mut positions = Vec::with_capacity(entries.len());
18544        let mut columns = Vec::with_capacity(entries.len());
18545        let mut defaults = Vec::with_capacity(entries.len());
18546        for ((p, c), v) in entries {
18547            positions.push(p);
18548            columns.push(c);
18549            defaults.push(v);
18550        }
18551        steps.push(FkChildStep {
18552            child_table,
18553            action: FkChildAction::SetDefault {
18554                positions,
18555                columns,
18556                defaults,
18557            },
18558        });
18559    }
18560    for (child_table, positions) in delete_plan {
18561        steps.push(FkChildStep {
18562            child_table,
18563            action: FkChildAction::Delete {
18564                positions: positions.into_iter().collect(),
18565            },
18566        });
18567    }
18568    Ok(steps)
18569}
18570
18571/// v7.6.6 — plan FK fallout for an UPDATE that mutates parent-side
18572/// PK/UNIQUE columns. Walks every other table whose FK references
18573/// `parent_table_name`; for each FK whose parent_columns overlap a
18574/// mutated column, decides the action by `fk.on_update`.
18575///
18576///   - RESTRICT / NoAction → error if any child references the OLD
18577///     value
18578///   - CASCADE → child FK columns get rewritten to the NEW parent
18579///     value (a SetNull-style update step with the new value)
18580///   - SetNull → child FK columns set to NULL
18581///   - SetDefault → child FK columns set to declared default
18582///
18583/// `plan_with_old` is `(row_position, old_values, new_values)` so
18584/// the planner can detect "did this row's parent key actually
18585/// change?" — only rows where at least one referenced parent
18586/// column moved trigger inbound work.
18587fn plan_fk_parent_updates(
18588    catalog: &Catalog,
18589    parent_table_name: &str,
18590    plan_with_old: &[(usize, Vec<Value>, Vec<Value>)],
18591) -> Result<Vec<FkChildStep>, EngineError> {
18592    use alloc::collections::BTreeMap;
18593    if plan_with_old.is_empty() {
18594        return Ok(Vec::new());
18595    }
18596    // For each child table we may touch, build per-child step
18597    // lists. UPDATE never deletes children — `delete_plan` stays
18598    // empty here but is kept structurally aligned with
18599    // `plan_fk_parent_deletions` for future use.
18600    let delete_plan: BTreeMap<String, alloc::collections::BTreeSet<usize>> = BTreeMap::new();
18601    let mut setnull_plan: BTreeMap<String, alloc::collections::BTreeSet<(usize, usize)>> =
18602        BTreeMap::new();
18603    let mut setdefault_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
18604    // Cascade-update plan: child_table → row_idx → col_idx → new_value
18605    let mut cascade_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
18606
18607    for child_name in catalog.table_names() {
18608        let child = catalog
18609            .get(&child_name)
18610            .expect("table_names → catalog.get total");
18611        for fk in &child.schema().foreign_keys {
18612            if fk.parent_table != parent_table_name {
18613                continue;
18614            }
18615            for (_pos, old_row, new_row) in plan_with_old {
18616                // Did any parent FK column change?
18617                let key_changed = fk
18618                    .parent_columns
18619                    .iter()
18620                    .any(|&pi| old_row.get(pi) != new_row.get(pi));
18621                if !key_changed {
18622                    continue;
18623                }
18624                // The OLD parent key — used to find referring children.
18625                let old_key: Vec<&Value> =
18626                    fk.parent_columns.iter().map(|&pi| &old_row[pi]).collect();
18627                if old_key.iter().any(|v| matches!(v, Value::Null)) {
18628                    // NULL parent has no children — skip.
18629                    continue;
18630                }
18631                let new_key: Vec<&Value> =
18632                    fk.parent_columns.iter().map(|&pi| &new_row[pi]).collect();
18633                for (child_row_idx, child_row) in child.rows().iter().enumerate() {
18634                    // Self-ref same-row updates: a row updating its
18635                    // own PK doesn't restrict itself.
18636                    if child_name == parent_table_name
18637                        && plan_with_old.iter().any(|(p, _, _)| *p == child_row_idx)
18638                    {
18639                        continue;
18640                    }
18641                    let matches_key = fk
18642                        .local_columns
18643                        .iter()
18644                        .enumerate()
18645                        .all(|(i, &li)| child_row.values.get(li) == Some(old_key[i]));
18646                    if !matches_key {
18647                        continue;
18648                    }
18649                    match fk.on_update {
18650                        spg_storage::FkAction::Restrict | spg_storage::FkAction::NoAction => {
18651                            return Err(EngineError::Unsupported(alloc::format!(
18652                                "FOREIGN KEY violation: UPDATE on {parent_table_name:?} PK is \
18653                                 restricted by FK from {child_name:?}.{:?}",
18654                                fk.local_columns,
18655                            )));
18656                        }
18657                        spg_storage::FkAction::Cascade => {
18658                            // Rewrite child FK columns to new key.
18659                            let entry = cascade_plan.entry(child_name.clone()).or_default();
18660                            for (i, &li) in fk.local_columns.iter().enumerate() {
18661                                entry.insert((child_row_idx, li), new_key[i].clone());
18662                            }
18663                        }
18664                        spg_storage::FkAction::SetNull => {
18665                            for &li in &fk.local_columns {
18666                                let col = child.schema().columns.get(li).ok_or_else(|| {
18667                                    EngineError::Unsupported(alloc::format!(
18668                                        "FK local column {li} missing in {child_name:?}"
18669                                    ))
18670                                })?;
18671                                if !col.nullable {
18672                                    return Err(EngineError::Unsupported(alloc::format!(
18673                                        "FOREIGN KEY ON UPDATE SET NULL: column \
18674                                         {child_name:?}.{:?} is NOT NULL",
18675                                        col.name,
18676                                    )));
18677                                }
18678                            }
18679                            let entry = setnull_plan.entry(child_name.clone()).or_default();
18680                            for &li in &fk.local_columns {
18681                                entry.insert((child_row_idx, li));
18682                            }
18683                        }
18684                        spg_storage::FkAction::SetDefault => {
18685                            let entry = setdefault_plan.entry(child_name.clone()).or_default();
18686                            for &li in &fk.local_columns {
18687                                let col = child.schema().columns.get(li).ok_or_else(|| {
18688                                    EngineError::Unsupported(alloc::format!(
18689                                        "FK local column {li} missing in {child_name:?}"
18690                                    ))
18691                                })?;
18692                                let default = col.default.clone().ok_or_else(|| {
18693                                    EngineError::Unsupported(alloc::format!(
18694                                        "FOREIGN KEY ON UPDATE SET DEFAULT: column \
18695                                         {child_name:?}.{:?} has no DEFAULT",
18696                                        col.name,
18697                                    ))
18698                                })?;
18699                                entry.insert((child_row_idx, li), default);
18700                            }
18701                        }
18702                    }
18703                }
18704            }
18705        }
18706    }
18707    // Flatten into FkChildStep list. UPDATE doesn't produce
18708    // DeleteSteps (CASCADE on UPDATE just rewrites FK values).
18709    let mut steps: Vec<FkChildStep> = Vec::new();
18710    for (child_table, entries) in cascade_plan {
18711        let mut positions = Vec::with_capacity(entries.len());
18712        let mut columns = Vec::with_capacity(entries.len());
18713        let mut defaults = Vec::with_capacity(entries.len());
18714        for ((p, c), v) in entries {
18715            positions.push(p);
18716            columns.push(c);
18717            defaults.push(v);
18718        }
18719        // We reuse `FkChildAction::SetDefault` for cascade-update:
18720        // both shapes are "write a known value into specific cells"
18721        // — `apply_per_cell_writes` doesn't care whether the value
18722        // came from a DEFAULT declaration or a new parent key.
18723        steps.push(FkChildStep {
18724            child_table,
18725            action: FkChildAction::SetDefault {
18726                positions,
18727                columns,
18728                defaults,
18729            },
18730        });
18731    }
18732    for (child_table, entries) in setnull_plan {
18733        let (positions, columns): (Vec<usize>, Vec<usize>) = entries.into_iter().unzip();
18734        steps.push(FkChildStep {
18735            child_table,
18736            action: FkChildAction::SetNull { positions, columns },
18737        });
18738    }
18739    for (child_table, entries) in setdefault_plan {
18740        let mut positions = Vec::with_capacity(entries.len());
18741        let mut columns = Vec::with_capacity(entries.len());
18742        let mut defaults = Vec::with_capacity(entries.len());
18743        for ((p, c), v) in entries {
18744            positions.push(p);
18745            columns.push(c);
18746            defaults.push(v);
18747        }
18748        steps.push(FkChildStep {
18749            child_table,
18750            action: FkChildAction::SetDefault {
18751                positions,
18752                columns,
18753                defaults,
18754            },
18755        });
18756    }
18757    let _ = delete_plan; // UPDATE never deletes children.
18758    Ok(steps)
18759}
18760
18761/// v7.6.5 — apply one FK child step to the catalog. Encapsulates
18762/// the three action variants so the DELETE executor stays a
18763/// simple loop over the planned steps.
18764fn apply_fk_child_step(catalog: &mut Catalog, step: &FkChildStep) -> Result<(), EngineError> {
18765    let child = catalog.get_mut(&step.child_table).ok_or_else(|| {
18766        EngineError::Storage(StorageError::TableNotFound {
18767            name: step.child_table.clone(),
18768        })
18769    })?;
18770    match &step.action {
18771        FkChildAction::Delete { positions } => {
18772            let _ = child.delete_rows(positions);
18773        }
18774        FkChildAction::SetNull { positions, columns } => {
18775            apply_per_cell_writes(child, positions, columns, |_| Value::Null)?;
18776        }
18777        FkChildAction::SetDefault {
18778            positions,
18779            columns,
18780            defaults,
18781        } => {
18782            apply_per_cell_writes(child, positions, columns, |i| defaults[i].clone())?;
18783        }
18784    }
18785    Ok(())
18786}
18787
18788/// v7.6.5 — write new values into selected child cells via
18789/// `Table::update_row` (the catalog's existing UPDATE entry).
18790/// Groups writes by row position so multi-column updates on the
18791/// same row only call `update_row` once. `value_for(i)` produces
18792/// the new value for the i-th (position, column) entry.
18793fn apply_per_cell_writes(
18794    child: &mut spg_storage::Table,
18795    positions: &[usize],
18796    columns: &[usize],
18797    mut value_for: impl FnMut(usize) -> Value,
18798) -> Result<(), EngineError> {
18799    use alloc::collections::BTreeMap;
18800    let mut by_row: BTreeMap<usize, Vec<(usize, Value)>> = BTreeMap::new();
18801    for i in 0..positions.len() {
18802        by_row
18803            .entry(positions[i])
18804            .or_default()
18805            .push((columns[i], value_for(i)));
18806    }
18807    for (pos, mutations) in by_row {
18808        let mut new_values = child.rows()[pos].values.clone();
18809        for (col, v) in mutations {
18810            if let Some(slot) = new_values.get_mut(col) {
18811                *slot = v;
18812            }
18813        }
18814        child
18815            .update_row(pos, new_values)
18816            .map_err(EngineError::Storage)?;
18817    }
18818    Ok(())
18819}
18820
18821fn fk_action_sql_to_storage(a: spg_sql::ast::FkAction) -> spg_storage::FkAction {
18822    match a {
18823        spg_sql::ast::FkAction::Restrict => spg_storage::FkAction::Restrict,
18824        spg_sql::ast::FkAction::Cascade => spg_storage::FkAction::Cascade,
18825        spg_sql::ast::FkAction::SetNull => spg_storage::FkAction::SetNull,
18826        spg_sql::ast::FkAction::SetDefault => spg_storage::FkAction::SetDefault,
18827        spg_sql::ast::FkAction::NoAction => spg_storage::FkAction::NoAction,
18828    }
18829}
18830
18831/// v7.9.21 — resolve a column's DEFAULT for INSERT-time
18832/// default-fill. Free fn (rather than `&self`) so callers
18833/// with an active `&mut Table` borrow can still use it.
18834/// Literal defaults take the cached path (`col.default`);
18835/// runtime defaults hit `clock_fn` at each call. mailrs G4.
18836fn resolve_column_default_free(
18837    col: &ColumnSchema,
18838    clock_fn: Option<ClockFn>,
18839) -> Result<Value, EngineError> {
18840    if let Some(rt) = &col.runtime_default {
18841        return eval_runtime_default_free(rt, col.ty, clock_fn);
18842    }
18843    Ok(col.default.clone().unwrap_or(Value::Null))
18844}
18845
18846fn eval_runtime_default_free(
18847    rt: &str,
18848    ty: DataType,
18849    clock_fn: Option<ClockFn>,
18850) -> Result<Value, EngineError> {
18851    let s = rt.trim().to_ascii_lowercase();
18852    // v7.17.0 Phase 2.1 — also strip `(N)` precision suffix
18853    // so MySQL `CURRENT_TIMESTAMP(6)` resolves the same as
18854    // bare `CURRENT_TIMESTAMP`. SPG stores TIMESTAMP at fixed
18855    // microsecond resolution; the precision modifier is
18856    // parser-only.
18857    let with_no_parens = s.trim_end_matches("()");
18858    let canonical: &str = if let Some(open_idx) = with_no_parens.find('(') {
18859        if with_no_parens.ends_with(')') {
18860            &with_no_parens[..open_idx]
18861        } else {
18862            with_no_parens
18863        }
18864    } else {
18865        with_no_parens
18866    };
18867    let now_us = match clock_fn {
18868        Some(f) => f(),
18869        None => 0,
18870    };
18871    let v = match canonical {
18872        "now" | "current_timestamp" | "localtimestamp" => Value::Timestamp(now_us),
18873        "current_date" => Value::Date((now_us / 86_400_000_000) as i32),
18874        "current_time" | "localtime" => Value::Timestamp(now_us),
18875        // v7.17.0 — UUID generators in DEFAULT clauses. Required
18876        // for the canonical Django / Rails / Hibernate `id UUID
18877        // PRIMARY KEY DEFAULT gen_random_uuid()` pattern. Each
18878        // INSERT evaluates the function fresh; the per-row UUID
18879        // is the storage value, not a cached literal.
18880        "gen_random_uuid" | "uuid_generate_v4" => Value::Uuid(eval::gen_random_uuid_bytes()),
18881        other => {
18882            return Err(EngineError::Unsupported(alloc::format!(
18883                "runtime DEFAULT expression {other:?} not supported \
18884                 (v7.17.0 whitelist: now() / current_timestamp / \
18885                 current_date / current_time / localtimestamp / \
18886                 localtime / gen_random_uuid() / \
18887                 uuid_generate_v4())"
18888            )));
18889        }
18890    };
18891    coerce_value(v, ty, "DEFAULT", 0)
18892}
18893
18894/// v7.9.21 — true when a DEFAULT expression needs INSERT-time
18895/// evaluation rather than being cacheable as a literal Value.
18896/// FunctionCall is the immediate case (`now()`,
18897/// `current_timestamp`). Literal expressions and simple sign-
18898/// flipped numerics still take the static-cache path.
18899fn is_runtime_default_expr(expr: &Expr) -> bool {
18900    match expr {
18901        Expr::FunctionCall { .. } => true,
18902        Expr::Unary { expr, .. } => is_runtime_default_expr(expr),
18903        _ => false,
18904    }
18905}
18906
18907/// v7.17.0 Phase 1.4 — INSERT/UPDATE-time enum label check. When
18908/// `col_idx` has a registered label list, the cell value must be
18909/// NULL or one of the labels (case-sensitive per PG).
18910/// v7.17.0 Phase 3.P0-37 — validate + canonicalise a MySQL inline
18911/// SET cell. For non-SET columns this is a no-op pass-through.
18912///
18913/// Semantics:
18914///   * NULL preserved.
18915///   * Empty string → `''` (zero flags).
18916///   * Otherwise split on ',', trim each token, validate every
18917///     token against the column's variant list (error on miss),
18918///     de-dup, then re-emit in DEFINITION order joined by ','.
18919fn canonicalize_set_value(
18920    lookup: &alloc::collections::BTreeMap<usize, Vec<String>>,
18921    col_idx: usize,
18922    col_name: &str,
18923    value: Value,
18924) -> Result<Value, EngineError> {
18925    let Some(variants) = lookup.get(&col_idx) else {
18926        return Ok(value);
18927    };
18928    match value {
18929        Value::Null => Ok(Value::Null),
18930        Value::Text(s) => {
18931            if s.is_empty() {
18932                return Ok(Value::Text(alloc::string::String::new()));
18933            }
18934            // Collect a presence-set of variant indices to keep
18935            // definition order + handle de-dup in one pass.
18936            let mut present = alloc::vec![false; variants.len()];
18937            for raw in s.split(',') {
18938                let tok = raw.trim();
18939                if tok.is_empty() {
18940                    continue;
18941                }
18942                let idx = variants.iter().position(|v| v == tok).ok_or_else(|| {
18943                    EngineError::Unsupported(alloc::format!(
18944                        "column {col_name:?}: invalid SET token {tok:?}; \
18945                         allowed: {variants:?}"
18946                    ))
18947                })?;
18948                present[idx] = true;
18949            }
18950            // Re-emit in definition order.
18951            let mut out = alloc::string::String::new();
18952            let mut first = true;
18953            for (i, keep) in present.iter().enumerate() {
18954                if !keep {
18955                    continue;
18956                }
18957                if !first {
18958                    out.push(',');
18959                }
18960                first = false;
18961                out.push_str(&variants[i]);
18962            }
18963            Ok(Value::Text(out))
18964        }
18965        other => Err(EngineError::Unsupported(alloc::format!(
18966            "column {col_name:?}: SET-typed column expects TEXT, got {:?}",
18967            other.data_type()
18968        ))),
18969    }
18970}
18971
18972fn enforce_enum_label(
18973    lookup: &alloc::collections::BTreeMap<usize, Vec<String>>,
18974    col_idx: usize,
18975    col_name: &str,
18976    value: &Value,
18977) -> Result<(), EngineError> {
18978    if let Some(labels) = lookup.get(&col_idx) {
18979        match value {
18980            Value::Null => Ok(()),
18981            Value::Text(s) => {
18982                if labels.iter().any(|l| l == s) {
18983                    Ok(())
18984                } else {
18985                    Err(EngineError::Unsupported(alloc::format!(
18986                        "column {col_name:?}: invalid enum label {s:?}; allowed: {labels:?}"
18987                    )))
18988                }
18989            }
18990            other => Err(EngineError::Unsupported(alloc::format!(
18991                "column {col_name:?}: enum-typed column expects TEXT, got {:?}",
18992                other.data_type()
18993            ))),
18994        }
18995    } else {
18996        Ok(())
18997    }
18998}
18999
19000fn column_def_to_schema(c: ColumnDef) -> Result<ColumnSchema, EngineError> {
19001    let ty = column_type_to_data_type(c.ty);
19002    let mut schema = ColumnSchema::new(c.name.clone(), ty, c.nullable);
19003    // user_type_ref is the raw ident the parser couldn't resolve
19004    // to a built-in; classification into enum vs domain happens
19005    // at exec_create_table where we have catalog access. We
19006    // park it temporarily as user_enum_type and the engine
19007    // promotes domain bindings to user_domain_type before the
19008    // table is stored.
19009    if let Some(name) = c.user_type_ref {
19010        schema.user_enum_type = Some(name);
19011    }
19012    // v7.17.0 Phase 2.1 — render the ON UPDATE expression to
19013    // canonical text (the engine re-parses at UPDATE time).
19014    if let Some(expr) = c.on_update_runtime {
19015        schema.on_update_runtime = Some(alloc::format!("{expr}"));
19016    }
19017    // v7.17.0 Phase 2.5 — bridge the AST `Collation` enum to the
19018    // storage one. Same variants, different crates (spg-storage
19019    // owns no dep on spg-sql).
19020    schema.collation = match c.collation {
19021        spg_sql::ast::Collation::Binary => spg_storage::Collation::Binary,
19022        spg_sql::ast::Collation::CaseInsensitive => spg_storage::Collation::CaseInsensitive,
19023    };
19024    // v7.17.0 Phase 4.4 — MySQL `UNSIGNED` flag propagates to
19025    // storage so engine INSERT / UPDATE can range-check.
19026    schema.is_unsigned = c.is_unsigned;
19027    // v7.17.0 Phase 3.P0-36 — MySQL inline ENUM variant list.
19028    // INSERT validation lives in coerce_value (Text → Text path
19029    // with the column's variant list as the accept-set).
19030    schema.inline_enum_variants = c.inline_enum_variants;
19031    // v7.17.0 Phase 3.P0-37 — MySQL inline SET variant list.
19032    // INSERT canonicalisation (de-dup + sort by definition order)
19033    // lives in the exec_insert path next to the ENUM check.
19034    schema.inline_set_variants = c.inline_set_variants;
19035    if let Some(default_expr) = c.default {
19036        // v7.9.21 — distinguish literal defaults (evaluated once
19037        // at CREATE TABLE) from expression defaults (deferred to
19038        // INSERT). Function calls (`now()`, `current_timestamp`
19039        // — see v7.9.20 keyword promotion) take the runtime path.
19040        // Literals continue to cache. mailrs G4.
19041        if is_runtime_default_expr(&default_expr) {
19042            let display = alloc::format!("{default_expr}");
19043            schema = schema.with_runtime_default(display);
19044        } else {
19045            let raw = literal_expr_to_value(default_expr)?;
19046            let coerced = coerce_value(raw, ty, &c.name, 0)?;
19047            schema = schema.with_default(coerced);
19048        }
19049    }
19050    if c.auto_increment {
19051        // AUTO_INCREMENT only makes sense on integer-shaped columns.
19052        if !matches!(ty, DataType::SmallInt | DataType::Int | DataType::BigInt) {
19053            return Err(EngineError::Unsupported(alloc::format!(
19054                "AUTO_INCREMENT requires an integer column type, got {ty:?}"
19055            )));
19056        }
19057        schema = schema.with_auto_increment();
19058    }
19059    Ok(schema)
19060}
19061
19062/// v7.10.4 — decode a BYTEA literal. Accepts:
19063///   * `\xDEADBEEF` (case-insensitive hex; whitespace stripped)
19064///   * `Hello\000world` (backslash escape form; `\\` for literal backslash)
19065///   * Anything else → raw UTF-8 bytes of the input (PG accepts this too).
19066fn decode_bytea_literal(s: &str) -> Result<alloc::vec::Vec<u8>, &'static str> {
19067    let s = s.trim();
19068    if let Some(hex) = s.strip_prefix("\\x").or_else(|| s.strip_prefix("\\X")) {
19069        // Hex form. Each pair of hex digits → one byte.
19070        let cleaned: alloc::string::String = hex.chars().filter(|c| !c.is_whitespace()).collect();
19071        if cleaned.len() % 2 != 0 {
19072            return Err("odd-length hex literal");
19073        }
19074        let mut out = alloc::vec::Vec::with_capacity(cleaned.len() / 2);
19075        let cleaned_bytes = cleaned.as_bytes();
19076        for i in (0..cleaned_bytes.len()).step_by(2) {
19077            let hi = hex_nibble(cleaned_bytes[i])?;
19078            let lo = hex_nibble(cleaned_bytes[i + 1])?;
19079            out.push((hi << 4) | lo);
19080        }
19081        return Ok(out);
19082    }
19083    // Escape form or raw. Walk char-by-char; `\\` and `\NNN` octal
19084    // sequences decode; anything else is a literal byte.
19085    let bytes = s.as_bytes();
19086    let mut out = alloc::vec::Vec::with_capacity(bytes.len());
19087    let mut i = 0;
19088    while i < bytes.len() {
19089        let b = bytes[i];
19090        if b == b'\\' && i + 1 < bytes.len() {
19091            let n = bytes[i + 1];
19092            if n == b'\\' {
19093                out.push(b'\\');
19094                i += 2;
19095                continue;
19096            }
19097            if n.is_ascii_digit()
19098                && i + 3 < bytes.len()
19099                && bytes[i + 2].is_ascii_digit()
19100                && bytes[i + 3].is_ascii_digit()
19101            {
19102                let oct = |x: u8| (x - b'0') as u32;
19103                let v = oct(n) * 64 + oct(bytes[i + 2]) * 8 + oct(bytes[i + 3]);
19104                if v <= 0xFF {
19105                    out.push(v as u8);
19106                    i += 4;
19107                    continue;
19108                }
19109            }
19110        }
19111        out.push(b);
19112        i += 1;
19113    }
19114    Ok(out)
19115}
19116
19117fn hex_nibble(b: u8) -> Result<u8, &'static str> {
19118    match b {
19119        b'0'..=b'9' => Ok(b - b'0'),
19120        b'a'..=b'f' => Ok(b - b'a' + 10),
19121        b'A'..=b'F' => Ok(b - b'A' + 10),
19122        _ => Err("invalid hex digit"),
19123    }
19124}
19125
19126/// v7.10.11 — decode a PG TEXT[] external array form
19127/// (`{a,b,NULL}` with optional double-quoted elements). The
19128/// engine takes a leading/trailing `{`/`}` and splits at commas.
19129/// Quoted elements (`"hello, world"`) preserve embedded commas;
19130/// `\\` and `\"` decode to literal backslash / quote. Plain
19131/// unquoted `NULL` (case-insensitive) maps to `None`.
19132/// v7.11.13 — pick the array type for `ARRAY[lit, …]` from the
19133/// element values. Single-element-type rules:
19134///   - all NULL / all Text → TextArray
19135///   - all Int (or Int+NULL) → IntArray
19136///   - any BigInt without Text → BigIntArray (widening)
19137///   - any Text → TextArray (fallback; non-string elements
19138///     render as text)
19139fn array_literal_widen(items: alloc::vec::Vec<Value>) -> Value {
19140    let mut has_text = false;
19141    let mut has_bigint = false;
19142    let mut has_int = false;
19143    for v in &items {
19144        match v {
19145            Value::Null => {}
19146            Value::Text(_) | Value::Json(_) => has_text = true,
19147            Value::BigInt(_) => has_bigint = true,
19148            Value::Int(_) | Value::SmallInt(_) => has_int = true,
19149            _ => has_text = true,
19150        }
19151    }
19152    if has_text || (!has_bigint && !has_int) {
19153        let out: alloc::vec::Vec<Option<alloc::string::String>> = items
19154            .into_iter()
19155            .map(|v| match v {
19156                Value::Null => None,
19157                Value::Text(s) | Value::Json(s) => Some(s),
19158                other => Some(alloc::format!("{other:?}")),
19159            })
19160            .collect();
19161        return Value::TextArray(out);
19162    }
19163    if has_bigint {
19164        let out: alloc::vec::Vec<Option<i64>> = items
19165            .into_iter()
19166            .map(|v| match v {
19167                Value::Null => None,
19168                Value::Int(n) => Some(i64::from(n)),
19169                Value::SmallInt(n) => Some(i64::from(n)),
19170                Value::BigInt(n) => Some(n),
19171                _ => unreachable!("widen: unexpected non-integer in BigInt path"),
19172            })
19173            .collect();
19174        return Value::BigIntArray(out);
19175    }
19176    let out: alloc::vec::Vec<Option<i32>> = items
19177        .into_iter()
19178        .map(|v| match v {
19179            Value::Null => None,
19180            Value::Int(n) => Some(n),
19181            Value::SmallInt(n) => Some(i32::from(n)),
19182            _ => unreachable!("widen: unexpected non-i32-compatible in Int path"),
19183        })
19184        .collect();
19185    Value::IntArray(out)
19186}
19187
19188fn decode_text_array_literal(
19189    s: &str,
19190) -> Result<alloc::vec::Vec<Option<alloc::string::String>>, &'static str> {
19191    let trimmed = s.trim();
19192    let inner = trimmed
19193        .strip_prefix('{')
19194        .and_then(|x| x.strip_suffix('}'))
19195        .ok_or("TEXT[] literal must be enclosed in '{...}'")?;
19196    let mut out: alloc::vec::Vec<Option<alloc::string::String>> = alloc::vec::Vec::new();
19197    if inner.trim().is_empty() {
19198        return Ok(out);
19199    }
19200    let bytes = inner.as_bytes();
19201    let mut i = 0;
19202    while i <= bytes.len() {
19203        // Skip leading whitespace.
19204        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
19205            i += 1;
19206        }
19207        // Quoted element.
19208        if i < bytes.len() && bytes[i] == b'"' {
19209            i += 1; // open quote
19210            let mut buf = alloc::string::String::new();
19211            while i < bytes.len() && bytes[i] != b'"' {
19212                if bytes[i] == b'\\' && i + 1 < bytes.len() {
19213                    buf.push(bytes[i + 1] as char);
19214                    i += 2;
19215                } else {
19216                    buf.push(bytes[i] as char);
19217                    i += 1;
19218                }
19219            }
19220            if i >= bytes.len() {
19221                return Err("unterminated quoted element");
19222            }
19223            i += 1; // close quote
19224            out.push(Some(buf));
19225        } else {
19226            // Unquoted element — read until next comma or end.
19227            let start = i;
19228            while i < bytes.len() && bytes[i] != b',' {
19229                i += 1;
19230            }
19231            let raw = inner[start..i].trim();
19232            if raw.eq_ignore_ascii_case("NULL") {
19233                out.push(None);
19234            } else {
19235                out.push(Some(alloc::string::ToString::to_string(raw)));
19236            }
19237        }
19238        // Skip whitespace, expect comma or end.
19239        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
19240            i += 1;
19241        }
19242        if i >= bytes.len() {
19243            break;
19244        }
19245        if bytes[i] != b',' {
19246            return Err("expected ',' between TEXT[] elements");
19247        }
19248        i += 1;
19249    }
19250    Ok(out)
19251}
19252
19253/// v7.10.11 — encode a TEXT[] back into the PG external array
19254/// form. NULL elements become the literal `NULL`; elements
19255/// containing commas, quotes, backslashes, or braces are
19256/// double-quoted with `\\` / `\"` escapes.
19257fn encode_text_array(items: &[Option<alloc::string::String>]) -> alloc::string::String {
19258    let mut out = alloc::string::String::with_capacity(2 + items.len() * 8);
19259    out.push('{');
19260    for (i, item) in items.iter().enumerate() {
19261        if i > 0 {
19262            out.push(',');
19263        }
19264        match item {
19265            None => out.push_str("NULL"),
19266            Some(s) => {
19267                let needs_quote = s.is_empty()
19268                    || s.eq_ignore_ascii_case("NULL")
19269                    || s.chars()
19270                        .any(|c| matches!(c, ',' | '{' | '}' | '"' | '\\' | ' ' | '\t'));
19271                if needs_quote {
19272                    out.push('"');
19273                    for c in s.chars() {
19274                        if c == '"' || c == '\\' {
19275                            out.push('\\');
19276                        }
19277                        out.push(c);
19278                    }
19279                    out.push('"');
19280                } else {
19281                    out.push_str(s);
19282                }
19283            }
19284        }
19285    }
19286    out.push('}');
19287    out
19288}
19289
19290/// v7.10.4 — encode BYTEA bytes in PG hex output format
19291/// (`\x` prefix, lowercase hex pairs). Used by Text-side
19292/// round-trip + the wire layer's text-mode encoder.
19293fn encode_bytea_hex(b: &[u8]) -> alloc::string::String {
19294    let mut out = alloc::string::String::with_capacity(2 + 2 * b.len());
19295    out.push_str("\\x");
19296    for byte in b {
19297        let hi = byte >> 4;
19298        let lo = byte & 0x0F;
19299        out.push(hex_digit(hi));
19300        out.push(hex_digit(lo));
19301    }
19302    out
19303}
19304
19305const fn hex_digit(n: u8) -> char {
19306    match n {
19307        0..=9 => (b'0' + n) as char,
19308        10..=15 => (b'a' + n - 10) as char,
19309        _ => '?',
19310    }
19311}
19312
19313/// v7.17.0 Phase 3.P0-39 — parse a PG `hstore` text literal into
19314/// a flat key→value map. Empty string → empty map. Duplicate
19315/// keys take last-write-wins (matches PG `hstore_in`).
19316///
19317/// Accepted shapes (minimal subset):
19318///   * `'a=>1, b=>2'`            — bareword keys/values
19319///   * `'"a"=>"1", "b"=>"2"'`    — quoted keys/values
19320///   * `'a=>NULL'`               — case-insensitive NULL token
19321///     surfaces as `None` (no quotes around NULL)
19322///
19323/// Returns None on parse failure → caller surfaces as hard error.
19324fn parse_hstore_str(
19325    s: &str,
19326) -> Option<Vec<(alloc::string::String, Option<alloc::string::String>)>> {
19327    let bytes = s.as_bytes();
19328    let mut i = 0;
19329    let mut out: Vec<(alloc::string::String, Option<alloc::string::String>)> = Vec::new();
19330    let skip_ws = |bytes: &[u8], i: &mut usize| {
19331        while *i < bytes.len() && matches!(bytes[*i], b' ' | b'\t' | b'\n' | b'\r') {
19332            *i += 1;
19333        }
19334    };
19335    let parse_token = |bytes: &[u8], i: &mut usize| -> Option<alloc::string::String> {
19336        if *i >= bytes.len() {
19337            return None;
19338        }
19339        if bytes[*i] == b'"' {
19340            *i += 1;
19341            let mut out = alloc::string::String::new();
19342            while *i < bytes.len() {
19343                match bytes[*i] {
19344                    b'"' => {
19345                        *i += 1;
19346                        return Some(out);
19347                    }
19348                    b'\\' if *i + 1 < bytes.len() => {
19349                        out.push(bytes[*i + 1] as char);
19350                        *i += 2;
19351                    }
19352                    c => {
19353                        out.push(c as char);
19354                        *i += 1;
19355                    }
19356                }
19357            }
19358            None
19359        } else {
19360            let start = *i;
19361            while *i < bytes.len()
19362                && !matches!(bytes[*i], b' ' | b'\t' | b'\n' | b'\r' | b',' | b'=')
19363            {
19364                *i += 1;
19365            }
19366            if *i == start {
19367                return None;
19368            }
19369            Some(alloc::str::from_utf8(&bytes[start..*i]).ok()?.to_string())
19370        }
19371    };
19372    skip_ws(bytes, &mut i);
19373    while i < bytes.len() {
19374        let key = parse_token(bytes, &mut i)?;
19375        skip_ws(bytes, &mut i);
19376        if i + 1 >= bytes.len() || bytes[i] != b'=' || bytes[i + 1] != b'>' {
19377            return None;
19378        }
19379        i += 2;
19380        skip_ws(bytes, &mut i);
19381        // Check for unquoted NULL token (case-insensitive).
19382        let val_token = if i + 4 <= bytes.len()
19383            && bytes[i..i + 4].eq_ignore_ascii_case(b"NULL")
19384            && (i + 4 == bytes.len() || matches!(bytes[i + 4], b' ' | b'\t' | b',' | b'\n' | b'\r'))
19385        {
19386            i += 4;
19387            None
19388        } else {
19389            Some(parse_token(bytes, &mut i)?)
19390        };
19391        // Replace any existing entry with the same key (last-wins).
19392        if let Some(pos) = out.iter().position(|(k, _)| k == &key) {
19393            out[pos] = (key, val_token);
19394        } else {
19395            out.push((key, val_token));
19396        }
19397        skip_ws(bytes, &mut i);
19398        if i >= bytes.len() {
19399            break;
19400        }
19401        if bytes[i] == b',' {
19402            i += 1;
19403            skip_ws(bytes, &mut i);
19404            continue;
19405        }
19406        return None;
19407    }
19408    Some(out)
19409}
19410
19411/// v7.17.0 Phase 3.P0-39 — render a hstore as canonical PG text
19412/// form `"k"=>"v"` (keys and non-NULL values always quoted;
19413/// NULL token is bare).
19414fn format_hstore_str(
19415    pairs: &[(alloc::string::String, Option<alloc::string::String>)],
19416) -> alloc::string::String {
19417    let mut out = alloc::string::String::new();
19418    for (i, (k, v)) in pairs.iter().enumerate() {
19419        if i > 0 {
19420            out.push_str(", ");
19421        }
19422        out.push('"');
19423        out.push_str(k);
19424        out.push_str("\"=>");
19425        match v {
19426            None => out.push_str("NULL"),
19427            Some(val) => {
19428                out.push('"');
19429                out.push_str(val);
19430                out.push('"');
19431            }
19432        }
19433    }
19434    out
19435}
19436
19437/// v7.17.0 Phase 3.P0-39 — pub re-export so pgwire + sqllogictest
19438/// share the single hstore renderer.
19439pub fn format_hstore_text(
19440    pairs: &[(alloc::string::String, Option<alloc::string::String>)],
19441) -> alloc::string::String {
19442    format_hstore_str(pairs)
19443}
19444
19445// ─── v7.17.0 Phase 3.P0-40 — 2D array parse + display ─────────
19446
19447/// Split a PG external 2D-array literal `'{{a,b},{c,d}}'` into
19448/// per-row token lists. Returns Err on shape mismatch.
19449fn split_2d_literal(s: &str) -> Result<Vec<Vec<alloc::string::String>>, &'static str> {
19450    let s = s.trim();
19451    let outer = s
19452        .strip_prefix('{')
19453        .and_then(|x| x.strip_suffix('}'))
19454        .ok_or("missing outer '{...}' braces")?;
19455    let trimmed = outer.trim();
19456    if trimmed.is_empty() {
19457        return Ok(Vec::new());
19458    }
19459    let mut rows: Vec<Vec<alloc::string::String>> = Vec::new();
19460    let mut i = 0;
19461    let bytes = trimmed.as_bytes();
19462    while i < bytes.len() {
19463        while i < bytes.len() && matches!(bytes[i], b' ' | b'\t' | b'\n' | b'\r' | b',') {
19464            i += 1;
19465        }
19466        if i >= bytes.len() {
19467            break;
19468        }
19469        if bytes[i] != b'{' {
19470            return Err("expected '{' opening a row");
19471        }
19472        i += 1;
19473        let row_start = i;
19474        let mut depth = 1;
19475        while i < bytes.len() && depth > 0 {
19476            match bytes[i] {
19477                b'{' => depth += 1,
19478                b'}' => depth -= 1,
19479                _ => {}
19480            }
19481            if depth > 0 {
19482                i += 1;
19483            }
19484        }
19485        if depth != 0 {
19486            return Err("unbalanced '{...}' in row");
19487        }
19488        let row_text = &trimmed[row_start..i];
19489        i += 1;
19490        let cells: Vec<alloc::string::String> = if row_text.trim().is_empty() {
19491            Vec::new()
19492        } else {
19493            row_text.split(',').map(|t| t.trim().to_string()).collect()
19494        };
19495        rows.push(cells);
19496    }
19497    if let Some(first) = rows.first() {
19498        let cols = first.len();
19499        for r in &rows {
19500            if r.len() != cols {
19501                return Err("ragged 2D array (rows have different column counts)");
19502            }
19503        }
19504    }
19505    Ok(rows)
19506}
19507
19508fn parse_int_2d_literal(s: &str) -> Result<Vec<Vec<Option<i32>>>, &'static str> {
19509    let raw = split_2d_literal(s)?;
19510    raw.into_iter()
19511        .map(|row| {
19512            row.into_iter()
19513                .map(|cell| {
19514                    if cell.eq_ignore_ascii_case("NULL") {
19515                        Ok(None)
19516                    } else {
19517                        cell.parse::<i32>()
19518                            .map(Some)
19519                            .map_err(|_| "invalid int element")
19520                    }
19521                })
19522                .collect()
19523        })
19524        .collect()
19525}
19526
19527fn parse_bigint_2d_literal(s: &str) -> Result<Vec<Vec<Option<i64>>>, &'static str> {
19528    let raw = split_2d_literal(s)?;
19529    raw.into_iter()
19530        .map(|row| {
19531            row.into_iter()
19532                .map(|cell| {
19533                    if cell.eq_ignore_ascii_case("NULL") {
19534                        Ok(None)
19535                    } else {
19536                        cell.parse::<i64>()
19537                            .map(Some)
19538                            .map_err(|_| "invalid bigint element")
19539                    }
19540                })
19541                .collect()
19542        })
19543        .collect()
19544}
19545
19546fn parse_text_2d_literal(s: &str) -> Result<Vec<Vec<Option<alloc::string::String>>>, &'static str> {
19547    let raw = split_2d_literal(s)?;
19548    Ok(raw
19549        .into_iter()
19550        .map(|row| {
19551            row.into_iter()
19552                .map(|cell| {
19553                    if cell.eq_ignore_ascii_case("NULL") {
19554                        None
19555                    } else {
19556                        Some(cell.trim_matches('"').to_string())
19557                    }
19558                })
19559                .collect()
19560        })
19561        .collect())
19562}
19563
19564fn format_int_2d_text(rows: &[Vec<Option<i32>>]) -> alloc::string::String {
19565    let mut out = alloc::string::String::from("{");
19566    for (i, row) in rows.iter().enumerate() {
19567        if i > 0 {
19568            out.push(',');
19569        }
19570        out.push('{');
19571        for (j, cell) in row.iter().enumerate() {
19572            if j > 0 {
19573                out.push(',');
19574            }
19575            match cell {
19576                None => out.push_str("NULL"),
19577                Some(n) => out.push_str(&alloc::format!("{n}")),
19578            }
19579        }
19580        out.push('}');
19581    }
19582    out.push('}');
19583    out
19584}
19585
19586fn format_bigint_2d_text(rows: &[Vec<Option<i64>>]) -> alloc::string::String {
19587    let mut out = alloc::string::String::from("{");
19588    for (i, row) in rows.iter().enumerate() {
19589        if i > 0 {
19590            out.push(',');
19591        }
19592        out.push('{');
19593        for (j, cell) in row.iter().enumerate() {
19594            if j > 0 {
19595                out.push(',');
19596            }
19597            match cell {
19598                None => out.push_str("NULL"),
19599                Some(n) => out.push_str(&alloc::format!("{n}")),
19600            }
19601        }
19602        out.push('}');
19603    }
19604    out.push('}');
19605    out
19606}
19607
19608fn format_text_2d_text(rows: &[Vec<Option<alloc::string::String>>]) -> alloc::string::String {
19609    let mut out = alloc::string::String::from("{");
19610    for (i, row) in rows.iter().enumerate() {
19611        if i > 0 {
19612            out.push(',');
19613        }
19614        out.push('{');
19615        for (j, cell) in row.iter().enumerate() {
19616            if j > 0 {
19617                out.push(',');
19618            }
19619            match cell {
19620                None => out.push_str("NULL"),
19621                Some(s) => out.push_str(s),
19622            }
19623        }
19624        out.push('}');
19625    }
19626    out.push('}');
19627    out
19628}
19629
19630/// v7.17.0 Phase 3.P0-40 — pub re-exports so pgwire + sqllogictest
19631/// share the single 2D-array renderer.
19632pub fn format_int_2d_text_pub(rows: &[Vec<Option<i32>>]) -> alloc::string::String {
19633    format_int_2d_text(rows)
19634}
19635pub fn format_bigint_2d_text_pub(rows: &[Vec<Option<i64>>]) -> alloc::string::String {
19636    format_bigint_2d_text(rows)
19637}
19638pub fn format_text_2d_text_pub(
19639    rows: &[Vec<Option<alloc::string::String>>],
19640) -> alloc::string::String {
19641    format_text_2d_text(rows)
19642}
19643
19644/// v7.17.0 Phase 3.P0-38 — parse a PG range literal of the form
19645/// `'[lo,up)'` / `'(lo,up]'` / `'[lo,up]'` / `'(lo,up)'` /
19646/// `'empty'`. Lower / upper may be empty (unbounded). Returns
19647/// `None` on any parse failure; caller surfaces as hard error.
19648fn parse_range_str(s: &str, kind: spg_storage::RangeKind) -> Option<Value> {
19649    let s = s.trim();
19650    if s.eq_ignore_ascii_case("empty") {
19651        return Some(Value::Range {
19652            kind,
19653            lower: None,
19654            upper: None,
19655            lower_inc: false,
19656            upper_inc: false,
19657            empty: true,
19658        });
19659    }
19660    let bytes = s.as_bytes();
19661    if bytes.len() < 3 {
19662        return None;
19663    }
19664    let lower_inc = match bytes[0] {
19665        b'[' => true,
19666        b'(' => false,
19667        _ => return None,
19668    };
19669    let upper_inc = match bytes[bytes.len() - 1] {
19670        b']' => true,
19671        b')' => false,
19672        _ => return None,
19673    };
19674    let inner = &s[1..s.len() - 1];
19675    let (lo_text, up_text) = inner.split_once(',')?;
19676    let lower = if lo_text.is_empty() {
19677        None
19678    } else {
19679        Some(alloc::boxed::Box::new(parse_range_element(lo_text, kind)?))
19680    };
19681    let upper = if up_text.is_empty() {
19682        None
19683    } else {
19684        Some(alloc::boxed::Box::new(parse_range_element(up_text, kind)?))
19685    };
19686    Some(Value::Range {
19687        kind,
19688        lower,
19689        upper,
19690        lower_inc,
19691        upper_inc,
19692        empty: false,
19693    })
19694}
19695
19696/// v7.17.0 Phase 3.P0-38 — parse a single range bound text into
19697/// the matching element Value for the RangeKind.
19698fn parse_range_element(text: &str, kind: spg_storage::RangeKind) -> Option<Value> {
19699    let text = text.trim().trim_matches('"');
19700    use spg_storage::RangeKind as K;
19701    match kind {
19702        K::Int4 => text.parse::<i32>().ok().map(Value::Int),
19703        K::Int8 => text.parse::<i64>().ok().map(Value::BigInt),
19704        K::Num => {
19705            // Reuse the Numeric parse via the engine's text-coercion
19706            // path; bail to None on failure.
19707            let dot = text.find('.');
19708            let scale: u8 = dot.map_or(0, |p| (text.len() - p - 1) as u8);
19709            let digits: alloc::string::String = text
19710                .chars()
19711                .filter(|c| *c == '-' || c.is_ascii_digit())
19712                .collect();
19713            let scaled: i128 = digits.parse().ok()?;
19714            Some(Value::Numeric { scaled, scale })
19715        }
19716        K::Ts | K::TsTz => {
19717            // Reuse the existing timestamp parse path. v7.17.0
19718            // expects `'YYYY-MM-DD HH:MM:SS[.ffffff]'` in range
19719            // bounds (TZ offset on TsTz is OOS for the initial
19720            // P0-38; ship plain Timestamp shape).
19721            crate::eval::parse_timestamp_literal(text).map(Value::Timestamp)
19722        }
19723        K::Date => crate::eval::parse_date_literal(text).map(Value::Date),
19724    }
19725}
19726
19727/// v7.17.0 Phase 3.P0-38 — render a Range value as its canonical
19728/// PG text form. Re-exported via [`format_range_text`] for use
19729/// from spg-server's pgwire layer.
19730pub fn format_range_text(v: &Value) -> alloc::string::String {
19731    format_range_str(v)
19732}
19733
19734fn format_range_str(v: &Value) -> alloc::string::String {
19735    let Value::Range {
19736        lower,
19737        upper,
19738        lower_inc,
19739        upper_inc,
19740        empty,
19741        ..
19742    } = v
19743    else {
19744        return alloc::string::String::new();
19745    };
19746    if *empty {
19747        return "empty".into();
19748    }
19749    let mut out = alloc::string::String::new();
19750    out.push(if *lower_inc { '[' } else { '(' });
19751    if let Some(l) = lower {
19752        out.push_str(&format_range_element(l));
19753    }
19754    out.push(',');
19755    if let Some(u) = upper {
19756        out.push_str(&format_range_element(u));
19757    }
19758    out.push(if *upper_inc { ']' } else { ')' });
19759    out
19760}
19761
19762fn format_range_element(v: &Value) -> alloc::string::String {
19763    match v {
19764        Value::Int(n) => alloc::format!("{n}"),
19765        Value::BigInt(n) => alloc::format!("{n}"),
19766        Value::Date(d) => crate::eval::format_date(*d),
19767        Value::Timestamp(t) => crate::eval::format_timestamp(*t),
19768        Value::Numeric { scaled, scale } => crate::eval::format_numeric(*scaled, *scale),
19769        other => alloc::format!("{other:?}"),
19770    }
19771}
19772
19773/// v7.17.0 Phase 3.P0-35 — parse a PG `money` literal into i64
19774/// cents. Accepts:
19775///   * Optional leading `-` (negative)
19776///   * Optional `$` prefix
19777///   * Integer portion with optional `,` thousands separators
19778///   * Optional `.` followed by 1-2 digits (cents); 1 digit
19779///     auto-pads to 2 (`.5` → 50 cents).
19780///
19781/// Returns None on any parse failure — caller surfaces as hard
19782/// SQL error.
19783fn parse_money_str(s: &str) -> Option<i64> {
19784    let s = s.trim();
19785    let (neg, rest) = match s.strip_prefix('-') {
19786        Some(r) => (true, r.trim_start()),
19787        None => (false, s),
19788    };
19789    let rest = rest.strip_prefix('$').unwrap_or(rest).trim_start();
19790    let (int_part, frac_part) = match rest.split_once('.') {
19791        Some((i, f)) => (i, Some(f)),
19792        None => (rest, None),
19793    };
19794    if int_part.is_empty() {
19795        return None;
19796    }
19797    // Validate + strip commas from the integer portion.
19798    let mut int_digits = alloc::string::String::with_capacity(int_part.len());
19799    for b in int_part.bytes() {
19800        match b {
19801            b',' => {}
19802            b'0'..=b'9' => int_digits.push(b as char),
19803            _ => return None,
19804        }
19805    }
19806    if int_digits.is_empty() {
19807        return None;
19808    }
19809    let dollars: i64 = int_digits.parse().ok()?;
19810    let cents: i64 = match frac_part {
19811        None => 0,
19812        Some(f) => {
19813            if f.is_empty() || f.len() > 2 || !f.bytes().all(|b| b.is_ascii_digit()) {
19814                return None;
19815            }
19816            let padded = if f.len() == 1 {
19817                alloc::format!("{f}0")
19818            } else {
19819                f.to_string()
19820            };
19821            padded.parse().ok()?
19822        }
19823    };
19824    let total = dollars.checked_mul(100)?.checked_add(cents)?;
19825    Some(if neg { -total } else { total })
19826}
19827
19828/// v7.17.0 Phase 3.P0-34 — parse a PG `timetz` literal
19829/// `HH:MM:SS[.fraction]±HH[:MM]` into (us, offset_secs).
19830///
19831/// The offset suffix is MANDATORY: SPG doesn't have a session TZ
19832/// wired into eval, so a bare `HH:MM:SS` literal would be
19833/// ambiguous. Returns None for any parse failure or out-of-range
19834/// component — caller surfaces as a hard SQL error.
19835///
19836/// Offset range: ±14 hours (±50400 seconds), matching PG's
19837/// internal limit.
19838fn parse_timetz_str(s: &str) -> Option<(i64, i32)> {
19839    let s = s.trim();
19840    // Find the offset sign — scan from right since the time part
19841    // never contains '+' / '-' (after the optional fractional dot
19842    // it's all digits and ':').
19843    let bytes = s.as_bytes();
19844    let sign_pos = bytes
19845        .iter()
19846        .enumerate()
19847        .rev()
19848        .find(|&(_, &b)| b == b'+' || b == b'-')
19849        .map(|(i, _)| i)?;
19850    if sign_pos == 0 {
19851        return None; // bare sign — no time component
19852    }
19853    let time_part = &s[..sign_pos];
19854    let offset_part = &s[sign_pos..];
19855    let us = parse_time_str(time_part)?;
19856    let sign: i32 = if offset_part.starts_with('+') { 1 } else { -1 };
19857    let offset_body = &offset_part[1..];
19858    let (hh_str, mm_str) = match offset_body.split_once(':') {
19859        Some((h, m)) => (h, m),
19860        None => (offset_body, "0"),
19861    };
19862    let hh: i32 = hh_str.parse().ok()?;
19863    let mm: i32 = mm_str.parse().ok()?;
19864    if !(0..=14).contains(&hh) || !(0..=59).contains(&mm) {
19865        return None;
19866    }
19867    let total = sign * (hh * 3600 + mm * 60);
19868    if total.abs() > 50_400 {
19869        return None;
19870    }
19871    Some((us, total))
19872}
19873
19874/// v7.17.0 Phase 3.P0-33 — funnel an integer literal through MySQL
19875/// YEAR range validation: 0 sentinel or 1901..=2155. Out-of-range
19876/// surfaces as a hard SQL error (no silent truncation, mirrors PG
19877/// `time_in` / `uuid_in` discipline).
19878fn coerce_int_to_year(n: i64, col_name: &str) -> Result<Value, EngineError> {
19879    if n == 0 || (1901..=2155).contains(&n) {
19880        // u16::try_from cannot fail in this range; the cast also
19881        // covers the 0 sentinel.
19882        return Ok(Value::Year(n as u16));
19883    }
19884    Err(EngineError::Eval(EvalError::TypeMismatch {
19885        detail: alloc::format!(
19886            "year value out of range: {n} (column `{col_name}`; \
19887             MySQL accepts 0 or 1901..=2155)"
19888        ),
19889    }))
19890}
19891
19892/// v7.17.0 Phase 3.P0-32 — parse a PG `time` literal
19893/// `HH:MM:SS[.fraction]` into microseconds since 00:00:00.
19894///
19895/// Accepts:
19896///   * `HH:MM:SS`            — exact-second precision
19897///   * `HH:MM:SS.f` .. `.ffffff` — 1-6 fractional digits, right-padded
19898///     with zeros to microseconds
19899///
19900/// Range: hour 0..=23, minute 0..=59, second 0..=59. Anything else
19901/// returns None — caller surfaces as a hard SQL error (no silent
19902/// truncation, matches PG's `time_in` behaviour).
19903fn parse_time_str(s: &str) -> Option<i64> {
19904    let s = s.trim();
19905    let (hms, frac) = match s.split_once('.') {
19906        Some((h, f)) => (h, Some(f)),
19907        None => (s, None),
19908    };
19909    let mut parts = hms.split(':');
19910    let hh: u32 = parts.next()?.parse().ok()?;
19911    let mm: u32 = parts.next()?.parse().ok()?;
19912    let ss: u32 = parts.next()?.parse().ok()?;
19913    if parts.next().is_some() {
19914        return None;
19915    }
19916    if hh > 23 || mm > 59 || ss > 59 {
19917        return None;
19918    }
19919    let frac_us: i64 = match frac {
19920        None => 0,
19921        Some(f) => {
19922            if f.is_empty() || f.len() > 6 || !f.bytes().all(|b| b.is_ascii_digit()) {
19923                return None;
19924            }
19925            // Right-pad with zeros so '.5' = 500000 µsec.
19926            let mut padded = alloc::string::String::with_capacity(6);
19927            padded.push_str(f);
19928            while padded.len() < 6 {
19929                padded.push('0');
19930            }
19931            padded.parse().ok()?
19932        }
19933    };
19934    Some(
19935        i64::from(hh) * 3_600_000_000
19936            + i64::from(mm) * 60_000_000
19937            + i64::from(ss) * 1_000_000
19938            + frac_us,
19939    )
19940}
19941
19942const fn column_type_to_data_type(t: ColumnTypeName) -> DataType {
19943    match t {
19944        ColumnTypeName::SmallInt => DataType::SmallInt,
19945        ColumnTypeName::Int => DataType::Int,
19946        ColumnTypeName::BigInt => DataType::BigInt,
19947        ColumnTypeName::Float => DataType::Float,
19948        ColumnTypeName::Text => DataType::Text,
19949        ColumnTypeName::Varchar(n) => DataType::Varchar(n),
19950        ColumnTypeName::Char(n) => DataType::Char(n),
19951        ColumnTypeName::Bool => DataType::Bool,
19952        ColumnTypeName::Vector { dim, encoding } => DataType::Vector {
19953            dim,
19954            encoding: match encoding {
19955                SqlVecEncoding::F32 => VecEncoding::F32,
19956                SqlVecEncoding::Sq8 => VecEncoding::Sq8,
19957                SqlVecEncoding::F16 => VecEncoding::F16,
19958            },
19959        },
19960        ColumnTypeName::Numeric(precision, scale) => DataType::Numeric { precision, scale },
19961        ColumnTypeName::Date => DataType::Date,
19962        ColumnTypeName::Timestamp => DataType::Timestamp,
19963        ColumnTypeName::Timestamptz => DataType::Timestamptz,
19964        ColumnTypeName::Json => DataType::Json,
19965        ColumnTypeName::Jsonb => DataType::Jsonb,
19966        ColumnTypeName::Bytes => DataType::Bytes,
19967        ColumnTypeName::TextArray => DataType::TextArray,
19968        ColumnTypeName::IntArray => DataType::IntArray,
19969        ColumnTypeName::BigIntArray => DataType::BigIntArray,
19970        ColumnTypeName::TsVector => DataType::TsVector,
19971        ColumnTypeName::TsQuery => DataType::TsQuery,
19972        ColumnTypeName::Uuid => DataType::Uuid,
19973        ColumnTypeName::Time => DataType::Time,
19974        ColumnTypeName::Year => DataType::Year,
19975        ColumnTypeName::TimeTz => DataType::TimeTz,
19976        ColumnTypeName::Money => DataType::Money,
19977        ColumnTypeName::Range(k) => DataType::Range(match k {
19978            spg_sql::ast::RangeKindAst::Int4 => spg_storage::RangeKind::Int4,
19979            spg_sql::ast::RangeKindAst::Int8 => spg_storage::RangeKind::Int8,
19980            spg_sql::ast::RangeKindAst::Num => spg_storage::RangeKind::Num,
19981            spg_sql::ast::RangeKindAst::Ts => spg_storage::RangeKind::Ts,
19982            spg_sql::ast::RangeKindAst::TsTz => spg_storage::RangeKind::TsTz,
19983            spg_sql::ast::RangeKindAst::Date => spg_storage::RangeKind::Date,
19984        }),
19985        ColumnTypeName::Hstore => DataType::Hstore,
19986        ColumnTypeName::IntArray2D => DataType::IntArray2D,
19987        ColumnTypeName::BigIntArray2D => DataType::BigIntArray2D,
19988        ColumnTypeName::TextArray2D => DataType::TextArray2D,
19989    }
19990}
19991
19992/// Convert an INSERT VALUES expression to a storage Value. Supports literal
19993/// expressions, unary-minus over numeric literals, and pgvector-style
19994/// `'[..]'::vector` cast (v1.2). Anything more complex returns `Unsupported`.
19995fn literal_expr_to_value(expr: Expr) -> Result<Value, EngineError> {
19996    match expr {
19997        Expr::Literal(l) => Ok(literal_to_value(l)),
19998        Expr::Cast { expr, target } => {
19999            let inner_value = literal_expr_to_value(*expr)?;
20000            crate::eval::cast_value(inner_value, target).map_err(EngineError::Eval)
20001        }
20002        Expr::Unary {
20003            op: UnOp::Neg,
20004            expr,
20005        } => match *expr {
20006            Expr::Literal(Literal::Integer(n)) => {
20007                // Fold to i32 if it fits, else BigInt. Parser emits Integer(i64)
20008                // — overflow on negate of i64::MIN is the one edge case.
20009                let neg = n.checked_neg().ok_or_else(|| {
20010                    EngineError::Unsupported("integer literal overflow on negation".into())
20011                })?;
20012                Ok(int_value_for(neg))
20013            }
20014            Expr::Literal(Literal::Float(x)) => Ok(Value::Float(-x)),
20015            other => Err(EngineError::Unsupported(alloc::format!(
20016                "unary minus over non-literal expression: {other:?}"
20017            ))),
20018        },
20019        // v7.10.10 — `ARRAY[lit, lit, …]` constructor accepted at
20020        // INSERT-time. Each element must reduce to a Value through
20021        // `literal_expr_to_value`; NULL elements become `None`.
20022        // v7.11.13 — deduce shape from element values: all Int →
20023        // IntArray; any BigInt → BigIntArray (widening); any Text
20024        // → TextArray. Cast targets (`ARRAY[]::INT[]`) flow through
20025        // the outer Cast arm before reaching here and re-coerce.
20026        Expr::Array(items) => {
20027            let mut materialised: alloc::vec::Vec<Value> =
20028                alloc::vec::Vec::with_capacity(items.len());
20029            for elem in items {
20030                materialised.push(literal_expr_to_value(elem)?);
20031            }
20032            Ok(array_literal_widen(materialised))
20033        }
20034        // Any other Expr shape — fall back to a general evaluation
20035        // against an empty row + empty schema. This unblocks the
20036        // app-common patterns where INSERT VALUES carries a
20037        // non-correlated function call:
20038        //   INSERT INTO t VALUES (concat('U-', 42))
20039        //   INSERT INTO t VALUES (now())
20040        //   INSERT INTO t VALUES (format('%s-%s', 'a', 'b'))
20041        // Any expression that references a column or `$N`
20042        // placeholder fails cleanly inside `eval_expr` with a
20043        // descriptive error; literals + casts + ARRAY[…] continue
20044        // to take the fast paths above so the hot INSERT path is
20045        // unchanged on the common case.
20046        other => {
20047            let empty_schema: alloc::vec::Vec<spg_storage::ColumnSchema> = alloc::vec::Vec::new();
20048            let ctx = EvalContext::new(&empty_schema, None);
20049            let empty_row = spg_storage::Row::new(alloc::vec::Vec::new());
20050            crate::eval::eval_expr(&other, &empty_row, &ctx).map_err(EngineError::Eval)
20051        }
20052    }
20053}
20054
20055fn literal_to_value(l: Literal) -> Value {
20056    match l {
20057        Literal::Integer(n) => int_value_for(n),
20058        Literal::Float(x) => Value::Float(x),
20059        Literal::String(s) => Value::Text(s),
20060        Literal::Bool(b) => Value::Bool(b),
20061        Literal::Null => Value::Null,
20062        Literal::Vector(v) => Value::Vector(v),
20063        Literal::TextArray(items) => Value::TextArray(items),
20064        Literal::IntArray(items) => Value::IntArray(items),
20065        Literal::BigIntArray(items) => Value::BigIntArray(items),
20066        Literal::Interval { months, micros, .. } => Value::Interval { months, micros },
20067    }
20068}
20069
20070/// Pick `Int` (`i32`) when the literal fits, else `BigInt`. `INT` vs `BIGINT`
20071/// columns will still enforce the right tag downstream — this is just the
20072/// default we synthesise from an unannotated integer literal.
20073fn int_value_for(n: i64) -> Value {
20074    if let Ok(small) = i32::try_from(n) {
20075        Value::Int(small)
20076    } else {
20077        Value::BigInt(n)
20078    }
20079}
20080
20081/// Widen / narrow `v` to fit `expected`. Numerics permit safe widening
20082/// (`Int → BigInt`, `Int/BigInt → Float`) and best-effort narrowing
20083/// (`BigInt → Int` succeeds only when the value fits in `i32`). Everything
20084/// else returns `TypeMismatch` carrying the column name for caller diagnostics.
20085/// `NULL` is always permitted; the nullability check happens later in storage.
20086#[allow(clippy::too_many_lines)]
20087/// v7.17.0 Phase 4.4 — reject negative integer values on UNSIGNED
20088/// columns. Called after `coerce_value` at each INSERT / UPDATE
20089/// site that has ColumnSchema context. NULL passes through (a
20090/// nullable UNSIGNED column can legitimately hold NULL).
20091fn check_unsigned_range(
20092    v: &Value,
20093    schema: &ColumnSchema,
20094    position: usize,
20095) -> Result<(), EngineError> {
20096    if !schema.is_unsigned {
20097        return Ok(());
20098    }
20099    let n = match v {
20100        Value::SmallInt(x) => i64::from(*x),
20101        Value::Int(x) => i64::from(*x),
20102        Value::BigInt(x) => *x,
20103        _ => return Ok(()), // non-integer cells (NULL, default) skip
20104    };
20105    if n < 0 {
20106        return Err(EngineError::Unsupported(alloc::format!(
20107            "column {:?} is UNSIGNED but got negative value {n} at position {position}",
20108            schema.name
20109        )));
20110    }
20111    Ok(())
20112}
20113
20114fn coerce_value(
20115    v: Value,
20116    expected: DataType,
20117    col_name: &str,
20118    position: usize,
20119) -> Result<Value, EngineError> {
20120    if v.is_null() {
20121        return Ok(Value::Null);
20122    }
20123    let actual = v.data_type().expect("non-null");
20124    if actual == expected {
20125        return Ok(v);
20126    }
20127    let coerced = match (v, expected) {
20128        (Value::Int(n), DataType::BigInt) => Some(Value::BigInt(i64::from(n))),
20129        (Value::Int(n), DataType::Float) => Some(Value::Float(f64::from(n))),
20130        (Value::Int(n), DataType::SmallInt) => i16::try_from(n).ok().map(Value::SmallInt),
20131        (Value::Int(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
20132            i128::from(n),
20133            precision,
20134            scale,
20135            col_name,
20136        )?),
20137        (Value::SmallInt(n), DataType::Int) => Some(Value::Int(i32::from(n))),
20138        (Value::SmallInt(n), DataType::BigInt) => Some(Value::BigInt(i64::from(n))),
20139        (Value::SmallInt(n), DataType::Float) => Some(Value::Float(f64::from(n))),
20140        (Value::SmallInt(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
20141            i128::from(n),
20142            precision,
20143            scale,
20144            col_name,
20145        )?),
20146        (Value::BigInt(n), DataType::Int) => i32::try_from(n).ok().map(Value::Int),
20147        (Value::BigInt(n), DataType::SmallInt) => i16::try_from(n).ok().map(Value::SmallInt),
20148        #[allow(clippy::cast_precision_loss)]
20149        (Value::BigInt(n), DataType::Float) => Some(Value::Float(n as f64)),
20150        (Value::BigInt(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
20151            i128::from(n),
20152            precision,
20153            scale,
20154            col_name,
20155        )?),
20156        (Value::Float(x), DataType::Numeric { precision, scale }) => {
20157            Some(numeric_from_float(x, precision, scale, col_name)?)
20158        }
20159        // v7.17.0 Phase 3.P0-67 — Text → NUMERIC. Parse a
20160        // canonical decimal text (`"-1234.56"` / `"42"` /
20161        // `"0.0001"`) into `(mantissa, source_scale)` and rescale
20162        // to the column's declared scale. Required for prepared
20163        // binds: `value_to_literal` flattens a Value::Numeric
20164        // into a TEXT literal because Literal carries no native
20165        // Numeric variant, so the placeholder substitution path
20166        // reaches coerce_value as Text → Numeric. Without this
20167        // arm the round-trip surfaces a TypeMismatch even though
20168        // the cell already left the engine as a valid Numeric.
20169        (Value::Text(s), DataType::Numeric { precision, scale }) => {
20170            let Some((mantissa, src_scale)) = parse_numeric_text(&s) else {
20171                return Err(EngineError::Eval(EvalError::TypeMismatch {
20172                    detail: alloc::format!("cannot parse {s:?} as NUMERIC for column `{col_name}`"),
20173                }));
20174            };
20175            Some(numeric_rescale(
20176                mantissa, src_scale, precision, scale, col_name,
20177            )?)
20178        }
20179        // Text → DATE / TIMESTAMP: parse canonical text forms.
20180        (Value::Text(s), DataType::Date) => {
20181            let d = eval::parse_date_literal(&s).ok_or_else(|| {
20182                EngineError::Eval(EvalError::TypeMismatch {
20183                    detail: alloc::format!("cannot parse {s:?} as DATE for column `{col_name}`"),
20184                })
20185            })?;
20186            Some(Value::Date(d))
20187        }
20188        // v7.14.0 — MySQL DEFAULT clauses quote integer / float
20189        // / boolean literals (`DEFAULT '0'`, `DEFAULT '1'`,
20190        // `DEFAULT '3.14'`, `DEFAULT 'true'`). Coerce the text
20191        // form to the column's numeric / bool type at DEFAULT-
20192        // installation time so the storage check sees a typed
20193        // value. Parse failures fall through to TypeMismatch.
20194        (Value::Text(s), DataType::SmallInt) => s.parse::<i16>().ok().map(Value::SmallInt),
20195        (Value::Text(s), DataType::Int) => s.parse::<i32>().ok().map(Value::Int),
20196        (Value::Text(s), DataType::BigInt) => s.parse::<i64>().ok().map(Value::BigInt),
20197        (Value::Text(s), DataType::Float) => s.parse::<f64>().ok().map(Value::Float),
20198        (Value::Text(s), DataType::Bool) => match s.to_ascii_lowercase().as_str() {
20199            "0" | "false" | "f" | "no" | "off" => Some(Value::Bool(false)),
20200            "1" | "true" | "t" | "yes" | "on" => Some(Value::Bool(true)),
20201            _ => None,
20202        },
20203        // v7.17.0 Phase 3.P0-46 — MySQL TINYINT(1) (which Phase 4.3
20204        // classifies as DataType::Bool) is the storage shape every
20205        // mysqldump-restored boolean column lands in. mysqldump emits
20206        // the values as integer `0` / `1` literals, so int → bool
20207        // coerce on INSERT is required for a 0-change cutover. MySQL's
20208        // rule is "any non-zero is truthy"; we follow that for all
20209        // signed int widths so the same coerce path serves an
20210        // explicit `BOOLEAN` column too.
20211        (Value::Int(n), DataType::Bool) => Some(Value::Bool(n != 0)),
20212        (Value::SmallInt(n), DataType::Bool) => Some(Value::Bool(n != 0)),
20213        (Value::BigInt(n), DataType::Bool) => Some(Value::Bool(n != 0)),
20214        // v4.9: Text ↔ JSON coercion. No structural validation —
20215        // any text literal is accepted; the responsibility for
20216        // valid JSON lies with the producer.
20217        (Value::Text(s), DataType::Json | DataType::Jsonb) => Some(Value::Json(s)),
20218        (Value::Json(s), DataType::Text) => Some(Value::Text(s)),
20219        // v7.13.3 — mailrs round-7 S10. SPG's storage represents
20220        // both JSON and JSONB on-disk as `Value::Json(String)` —
20221        // they share the underlying text payload. The cast
20222        // `'<text>'::jsonb` produces a Value::Json that needs to
20223        // satisfy a DataType::Jsonb column. Identity coerce in
20224        // both directions so JSON ↔ JSONB assignments work at all
20225        // INSERT / ALTER COLUMN TYPE / DEFAULT contexts.
20226        (Value::Json(s), DataType::Jsonb | DataType::Json) => Some(Value::Json(s)),
20227        // v7.10.4 — Text → BYTEA. Decode PG-style literal forms:
20228        //   - Hex:    `\x48656c6c6f`  (case-insensitive hex pairs)
20229        //   - Escape: `Hello\\000world`  (backslash + octal triples)
20230        //   - Plain:  any string → raw UTF-8 bytes (PG also accepts)
20231        // Errors surface as TypeMismatch so the operator gets a
20232        // clear "this literal isn't a bytea literal" hint.
20233        (Value::Text(s), DataType::Bytes) => {
20234            let bytes = decode_bytea_literal(&s).map_err(|e| {
20235                EngineError::Eval(EvalError::TypeMismatch {
20236                    detail: alloc::format!(
20237                        "cannot parse {s:?} as BYTEA for column `{col_name}`: {e}"
20238                    ),
20239                })
20240            })?;
20241            Some(Value::Bytes(bytes))
20242        }
20243        // v7.10.4 — BYTEA → Text round-trip uses the PG hex
20244        // output (lowercase, `\x` prefix). Important when a
20245        // SELECT pulls a bytea cell through a Text column path.
20246        (Value::Bytes(b), DataType::Text) => Some(Value::Text(encode_bytea_hex(&b))),
20247        // v7.17.0 — Text → UUID. PG accepts canonical hyphenated,
20248        // unhyphenated, uppercase, and `{...}`-braced forms; we
20249        // funnel all four through `spg_storage::parse_uuid_str`.
20250        // A malformed literal surfaces as a SQL TypeMismatch
20251        // rather than silently inserting garbage — `0-change
20252        // cutover` requires that an app inserting bad UUID text
20253        // sees the same hard error PG would raise.
20254        (Value::Text(s), DataType::Uuid) => match spg_storage::parse_uuid_str(&s) {
20255            Some(b) => Some(Value::Uuid(b)),
20256            None => {
20257                return Err(EngineError::Eval(EvalError::TypeMismatch {
20258                    detail: alloc::format!(
20259                        "invalid input syntax for type uuid: {s:?} (column `{col_name}`)"
20260                    ),
20261                }));
20262            }
20263        },
20264        // v7.17.0 — UUID → Text canonical 8-4-4-4-12 lowercase.
20265        // Surfaces when a SELECT plucks a uuid cell through a
20266        // Text column path (e.g. INSERT INTO log SELECT id::text
20267        // FROM other_table).
20268        (Value::Uuid(b), DataType::Text) => Some(Value::Text(spg_storage::format_uuid(&b))),
20269        // v7.17.0 Phase 3.P0-32 — Text → TIME. Accepts
20270        // `HH:MM:SS` and `HH:MM:SS.ffffff` (1-6 fractional digits).
20271        // Out-of-range hour/min/sec is a hard SQL error (no
20272        // silent truncation — same 0-change-cutover discipline
20273        // we apply to UUID).
20274        (Value::Text(s), DataType::Time) => match parse_time_str(&s) {
20275            Some(us) => Some(Value::Time(us)),
20276            None => {
20277                return Err(EngineError::Eval(EvalError::TypeMismatch {
20278                    detail: alloc::format!(
20279                        "invalid input syntax for type time: {s:?} (column `{col_name}`)"
20280                    ),
20281                }));
20282            }
20283        },
20284        // v7.17.0 Phase 3.P0-32 — TIME → Text canonical `HH:MM:SS[.ffffff]`.
20285        (Value::Time(us), DataType::Text) => Some(Value::Text(eval::format_time(us))),
20286        // v7.17.0 Phase 3.P0-33 — int / bigint → YEAR. Range
20287        // check enforces the MySQL canonical 1901..=2155 + 0
20288        // sentinel; out-of-range is a hard SQL error (no silent
20289        // truncation, mirrors P0-32 / P0-25 discipline).
20290        (Value::SmallInt(n), DataType::Year) => Some(coerce_int_to_year(i64::from(n), col_name)?),
20291        (Value::Int(n), DataType::Year) => Some(coerce_int_to_year(i64::from(n), col_name)?),
20292        (Value::BigInt(n), DataType::Year) => Some(coerce_int_to_year(n, col_name)?),
20293        // Text → YEAR. Accepts the 4-digit decimal form only;
20294        // two-digit YEAR (`'99'` → 1999) was deprecated in MySQL
20295        // 5.7 and is out of scope for v7.17.0.
20296        (Value::Text(s), DataType::Year) => match s.trim().parse::<i64>() {
20297            Ok(n) => Some(coerce_int_to_year(n, col_name)?),
20298            Err(_) => {
20299                return Err(EngineError::Eval(EvalError::TypeMismatch {
20300                    detail: alloc::format!(
20301                        "invalid input syntax for type year: {s:?} (column `{col_name}`)"
20302                    ),
20303                }));
20304            }
20305        },
20306        // YEAR → Text 4-digit zero-padded.
20307        (Value::Year(y), DataType::Text) => Some(Value::Text(alloc::format!("{y:04}"))),
20308        // v7.17.0 Phase 3.P0-34 — Text → TIMETZ. Mandatory
20309        // signed offset suffix; missing offset is a hard error
20310        // (SPG has no session TZ wired into eval, unlike PG).
20311        (Value::Text(s), DataType::TimeTz) => match parse_timetz_str(&s) {
20312            Some((us, offset_secs)) => Some(Value::TimeTz { us, offset_secs }),
20313            None => {
20314                return Err(EngineError::Eval(EvalError::TypeMismatch {
20315                    detail: alloc::format!(
20316                        "invalid input syntax for type time with time zone: \
20317                         {s:?} (column `{col_name}`)"
20318                    ),
20319                }));
20320            }
20321        },
20322        // TIMETZ → Text canonical `HH:MM:SS[.ffffff]±HH[:MM]`.
20323        (Value::TimeTz { us, offset_secs }, DataType::Text) => {
20324            Some(Value::Text(eval::format_timetz(us, offset_secs)))
20325        }
20326        // v7.17.0 Phase 3.P0-35 — Text → MONEY. Accepts `$N.NN`,
20327        // `$N,NNN.NN`, optional leading `-`. Bare numeric literals
20328        // arrive via the Int/BigInt/Float/Numeric arms below.
20329        (Value::Text(s), DataType::Money) => match parse_money_str(&s) {
20330            Some(c) => Some(Value::Money(c)),
20331            None => {
20332                return Err(EngineError::Eval(EvalError::TypeMismatch {
20333                    detail: alloc::format!(
20334                        "invalid input syntax for type money: {s:?} (column `{col_name}`)"
20335                    ),
20336                }));
20337            }
20338        },
20339        // Int / BigInt / SmallInt / Float / Numeric → MONEY.
20340        // Bare numeric literal is interpreted as a major-unit
20341        // amount (matches PG: `100`::money → $100.00 = 10000 cents).
20342        (Value::SmallInt(n), DataType::Money) => {
20343            Some(Value::Money(i64::from(n).saturating_mul(100)))
20344        }
20345        (Value::Int(n), DataType::Money) => Some(Value::Money(i64::from(n).saturating_mul(100))),
20346        (Value::BigInt(n), DataType::Money) => Some(Value::Money(n.saturating_mul(100))),
20347        (Value::Float(x), DataType::Money) => {
20348            // Round half-away-from-zero to cents (no_std — no
20349            // `f64::round`, so hand-roll via biased truncation).
20350            let scaled = x * 100.0;
20351            let cents = if scaled >= 0.0 {
20352                (scaled + 0.5) as i64
20353            } else {
20354                (scaled - 0.5) as i64
20355            };
20356            Some(Value::Money(cents))
20357        }
20358        (Value::Numeric { scaled, scale }, DataType::Money) => {
20359            // Convert exact decimal to cents (scale 2). If scale > 2,
20360            // round half-away-from-zero. If scale < 2, multiply up.
20361            let cents = if scale == 2 {
20362                scaled
20363            } else if scale < 2 {
20364                let mult = 10_i128.pow(u32::from(2 - scale));
20365                scaled.saturating_mul(mult)
20366            } else {
20367                let div = 10_i128.pow(u32::from(scale - 2));
20368                let half = div / 2;
20369                let bias = if scaled >= 0 { half } else { -half };
20370                (scaled + bias) / div
20371            };
20372            Some(Value::Money(i64::try_from(cents).unwrap_or(i64::MAX)))
20373        }
20374        // MONEY → Text canonical `$N,NNN.CC`.
20375        (Value::Money(c), DataType::Text) => Some(Value::Text(eval::format_money(c))),
20376        // v7.17.0 Phase 3.P0-38 — Text → Range. Accepts canonical
20377        // PG forms: `'empty'`, `'[a,b)'`, `'(a,b]'`, `'[a,b]'`,
20378        // `'(a,b)'`, with empty lower or upper for unbounded.
20379        (Value::Text(s), DataType::Range(kind)) => match parse_range_str(&s, kind) {
20380            Some(v) => Some(v),
20381            None => {
20382                return Err(EngineError::Eval(EvalError::TypeMismatch {
20383                    detail: alloc::format!(
20384                        "invalid input syntax for range type: {s:?} (column `{col_name}`)"
20385                    ),
20386                }));
20387            }
20388        },
20389        // Range → Text canonical form (`[a,b)`, `'empty'`, etc).
20390        (v @ Value::Range { .. }, DataType::Text) => Some(Value::Text(format_range_str(&v))),
20391        // v7.17.0 Phase 3.P0-39 — Text → Hstore.
20392        (Value::Text(s), DataType::Hstore) => match parse_hstore_str(&s) {
20393            Some(pairs) => Some(Value::Hstore(pairs)),
20394            None => {
20395                return Err(EngineError::Eval(EvalError::TypeMismatch {
20396                    detail: alloc::format!(
20397                        "invalid input syntax for type hstore: {s:?} (column `{col_name}`)"
20398                    ),
20399                }));
20400            }
20401        },
20402        // Hstore → Text canonical `"k"=>"v"` form.
20403        (Value::Hstore(pairs), DataType::Text) => Some(Value::Text(format_hstore_str(&pairs))),
20404        // v7.17.0 Phase 3.P0-40 — Text → 2D arrays via PG
20405        // external `'{{a,b},{c,d}}'` literal.
20406        (Value::Text(s), DataType::IntArray2D) => match parse_int_2d_literal(&s) {
20407            Ok(m) => Some(Value::IntArray2D(m)),
20408            Err(e) => {
20409                return Err(EngineError::Eval(EvalError::TypeMismatch {
20410                    detail: alloc::format!(
20411                        "invalid input syntax for INT[][]: {s:?} (column `{col_name}`): {e}"
20412                    ),
20413                }));
20414            }
20415        },
20416        (Value::Text(s), DataType::BigIntArray2D) => match parse_bigint_2d_literal(&s) {
20417            Ok(m) => Some(Value::BigIntArray2D(m)),
20418            Err(e) => {
20419                return Err(EngineError::Eval(EvalError::TypeMismatch {
20420                    detail: alloc::format!(
20421                        "invalid input syntax for BIGINT[][]: {s:?} (column `{col_name}`): {e}"
20422                    ),
20423                }));
20424            }
20425        },
20426        (Value::Text(s), DataType::TextArray2D) => match parse_text_2d_literal(&s) {
20427            Ok(m) => Some(Value::TextArray2D(m)),
20428            Err(e) => {
20429                return Err(EngineError::Eval(EvalError::TypeMismatch {
20430                    detail: alloc::format!(
20431                        "invalid input syntax for TEXT[][]: {s:?} (column `{col_name}`): {e}"
20432                    ),
20433                }));
20434            }
20435        },
20436        // 2D arrays → Text canonical nested form.
20437        (Value::IntArray2D(rows), DataType::Text) => Some(Value::Text(format_int_2d_text(&rows))),
20438        (Value::BigIntArray2D(rows), DataType::Text) => {
20439            Some(Value::Text(format_bigint_2d_text(&rows)))
20440        }
20441        (Value::TextArray2D(rows), DataType::Text) => Some(Value::Text(format_text_2d_text(&rows))),
20442        // v7.10.11 — Text → TEXT[]. Decode PG's external array
20443        // form `'{a,b,NULL}'`. NULL element token (case-insensitive)
20444        // is the literal `NULL`; everything else is a quoted or
20445        // unquoted text element. mailrs `'{label1,label2}'::TEXT[]`.
20446        (Value::Text(s), DataType::TextArray) => {
20447            let arr = decode_text_array_literal(&s).map_err(|e| {
20448                EngineError::Eval(EvalError::TypeMismatch {
20449                    detail: alloc::format!(
20450                        "cannot parse {s:?} as TEXT[] for column `{col_name}`: {e}"
20451                    ),
20452                })
20453            })?;
20454            Some(Value::TextArray(arr))
20455        }
20456        // v7.16.0 — Text → IntArray / BigIntArray for the
20457        // spg-sqlx Bind path. Decode the PG external form
20458        // `{1,2,3}` as a TEXT array first, then parse each
20459        // element as int. Same shape as the TextArray decode
20460        // above with an element-wise narrow.
20461        (Value::Text(s), DataType::IntArray) => {
20462            let arr = decode_text_array_literal(&s).map_err(|e| {
20463                EngineError::Eval(EvalError::TypeMismatch {
20464                    detail: alloc::format!(
20465                        "cannot parse {s:?} as INT[] for column `{col_name}`: {e}"
20466                    ),
20467                })
20468            })?;
20469            let mut out: Vec<Option<i32>> = Vec::with_capacity(arr.len());
20470            for elem in arr {
20471                match elem {
20472                    None => out.push(None),
20473                    Some(t) => {
20474                        let n: i32 = t.parse().map_err(|_| {
20475                            EngineError::Eval(EvalError::TypeMismatch {
20476                                detail: alloc::format!(
20477                                    "cannot parse {t:?} as INT element for `{col_name}`"
20478                                ),
20479                            })
20480                        })?;
20481                        out.push(Some(n));
20482                    }
20483                }
20484            }
20485            Some(Value::IntArray(out))
20486        }
20487        (Value::Text(s), DataType::BigIntArray) => {
20488            let arr = decode_text_array_literal(&s).map_err(|e| {
20489                EngineError::Eval(EvalError::TypeMismatch {
20490                    detail: alloc::format!(
20491                        "cannot parse {s:?} as BIGINT[] for column `{col_name}`: {e}"
20492                    ),
20493                })
20494            })?;
20495            let mut out: Vec<Option<i64>> = Vec::with_capacity(arr.len());
20496            for elem in arr {
20497                match elem {
20498                    None => out.push(None),
20499                    Some(t) => {
20500                        let n: i64 = t.parse().map_err(|_| {
20501                            EngineError::Eval(EvalError::TypeMismatch {
20502                                detail: alloc::format!(
20503                                    "cannot parse {t:?} as BIGINT element for `{col_name}`"
20504                                ),
20505                            })
20506                        })?;
20507                        out.push(Some(n));
20508                    }
20509                }
20510            }
20511            Some(Value::BigIntArray(out))
20512        }
20513        // v7.10.11 — TEXT[] → Text round-trip uses PG's
20514        // external array form (`{a,b,NULL}`). Lets a SELECT
20515        // pull an array column through any Text-side codepath.
20516        (Value::TextArray(items), DataType::Text) => Some(Value::Text(encode_text_array(&items))),
20517        // v7.17.0 Phase 3.P0-68 — Text → VECTOR auto-coerce.
20518        // Matches the existing Text → TsVector arm and the
20519        // `::vector` cast: PG-canonical pgvector external form
20520        // (`'[1, 2, -3]'`) becomes a typed Vector value at the
20521        // column boundary. Dim mismatch surfaces as TypeMismatch.
20522        // For SQ8 / HALF encodings we chain through the standard
20523        // quantise helpers so the storage shape matches the
20524        // declared encoding without a second coerce pass.
20525        (Value::Text(s), DataType::Vector { dim, encoding }) => {
20526            let parsed = eval::parse_vector_text(&s).ok_or_else(|| {
20527                EngineError::Eval(EvalError::TypeMismatch {
20528                    detail: alloc::format!("cannot parse {s:?} as VECTOR for column `{col_name}`"),
20529                })
20530            })?;
20531            if parsed.len() != dim as usize {
20532                return Err(EngineError::Eval(EvalError::TypeMismatch {
20533                    detail: alloc::format!(
20534                        "VECTOR({dim}) column `{col_name}` rejects literal of length {}",
20535                        parsed.len()
20536                    ),
20537                }));
20538            }
20539            Some(match encoding {
20540                VecEncoding::F32 => Value::Vector(parsed),
20541                VecEncoding::Sq8 => Value::Sq8Vector(spg_storage::quantize::quantize(&parsed)),
20542                VecEncoding::F16 => {
20543                    Value::HalfVector(spg_storage::halfvec::HalfVector::from_f32_slice(&parsed))
20544                }
20545            })
20546        }
20547        // v7.16.1 — Text → TSVECTOR auto-coerce for the
20548        // INSERT-side wire path (mailrs round-9 A.2.a). PG
20549        // implicitly promotes the TEXT literal at INSERT into a
20550        // TSVECTOR column; SPG previously rejected with a hard
20551        // type mismatch, blocking 23,276 pg_dump rows into
20552        // `messages.search_vector`. We route through the same
20553        // `decode_tsvector_external` the `::tsvector` cast
20554        // already uses, so PG-canonical forms (`'word'`,
20555        // `'word:1A,2B'`, multi-lexeme, empty `''`) all parse.
20556        (Value::Text(s), DataType::TsVector) => {
20557            let lexs = eval::decode_tsvector_external(&s).map_err(|e| {
20558                EngineError::Eval(EvalError::TypeMismatch {
20559                    detail: alloc::format!(
20560                        "cannot parse {s:?} as TSVECTOR for column `{col_name}`: {e}"
20561                    ),
20562                })
20563            })?;
20564            Some(Value::TsVector(lexs))
20565        }
20566        (Value::Text(s), DataType::Timestamp | DataType::Timestamptz) => {
20567            let t = eval::parse_timestamp_literal(&s).ok_or_else(|| {
20568                EngineError::Eval(EvalError::TypeMismatch {
20569                    detail: alloc::format!(
20570                        "cannot parse {s:?} as TIMESTAMP for column `{col_name}`"
20571                    ),
20572                })
20573            })?;
20574            Some(Value::Timestamp(t))
20575        }
20576        // DATE ↔ TIMESTAMP convertibility (DATE → midnight,
20577        // TIMESTAMP → day truncation).
20578        (Value::Date(d), DataType::Timestamp | DataType::Timestamptz) => {
20579            Some(Value::Timestamp(i64::from(d) * 86_400_000_000))
20580        }
20581        // v7.9.21 — Value::Timestamp lands in either Timestamp
20582        // or Timestamptz columns; the on-disk layout is the
20583        // same i64 microseconds UTC.
20584        (Value::Timestamp(t), DataType::Timestamptz) => Some(Value::Timestamp(t)),
20585        (Value::Timestamp(t), DataType::Date) => {
20586            let days = t.div_euclid(86_400_000_000);
20587            i32::try_from(days).ok().map(Value::Date)
20588        }
20589        (
20590            Value::Numeric {
20591                scaled,
20592                scale: src_scale,
20593            },
20594            DataType::Numeric { precision, scale },
20595        ) => Some(numeric_rescale(
20596            scaled, src_scale, precision, scale, col_name,
20597        )?),
20598        #[allow(clippy::cast_precision_loss)]
20599        (Value::Numeric { scaled, scale }, DataType::Float) => {
20600            let mut div = 1.0_f64;
20601            for _ in 0..scale {
20602                div *= 10.0;
20603            }
20604            Some(Value::Float((scaled as f64) / div))
20605        }
20606        (Value::Numeric { scaled, scale }, DataType::Int) => {
20607            let truncated = numeric_truncate_to_integer(scaled, scale);
20608            i32::try_from(truncated).ok().map(Value::Int)
20609        }
20610        (Value::Numeric { scaled, scale }, DataType::BigInt) => {
20611            let truncated = numeric_truncate_to_integer(scaled, scale);
20612            i64::try_from(truncated).ok().map(Value::BigInt)
20613        }
20614        (Value::Numeric { scaled, scale }, DataType::SmallInt) => {
20615            let truncated = numeric_truncate_to_integer(scaled, scale);
20616            i16::try_from(truncated).ok().map(Value::SmallInt)
20617        }
20618        // VARCHAR(n) enforces an upper bound on character count.
20619        (Value::Text(s), DataType::Varchar(max)) => {
20620            if u32::try_from(s.chars().count()).unwrap_or(u32::MAX) <= max {
20621                Some(Value::Text(s))
20622            } else {
20623                return Err(EngineError::Unsupported(alloc::format!(
20624                    "value for VARCHAR({max}) column `{col_name}` exceeds length: \
20625                     {} chars",
20626                    s.chars().count()
20627                )));
20628            }
20629        }
20630        // v6.0.1: f32 → SQ8 INSERT-time quantisation. Triggered
20631        // when the column declares `VECTOR(N) USING SQ8` and
20632        // the INSERT VALUES expression yields a raw f32 vector
20633        // (the normal pgvector-shape literal). Dim mismatch
20634        // falls through the `_ => None` arm and surfaces as
20635        // `TypeMismatch` with the expected SQ8 column type —
20636        // matching the F32 path's existing error.
20637        (
20638            Value::Vector(v),
20639            DataType::Vector {
20640                dim,
20641                encoding: VecEncoding::Sq8,
20642            },
20643        ) if v.len() == dim as usize => Some(Value::Sq8Vector(spg_storage::quantize::quantize(&v))),
20644        // v6.0.3: f32 → f16 INSERT-time conversion for HALF
20645        // columns. Bit-exact at the storage layer (modulo
20646        // half-precision rounding); no rerank pass needed at
20647        // search time.
20648        (
20649            Value::Vector(v),
20650            DataType::Vector {
20651                dim,
20652                encoding: VecEncoding::F16,
20653            },
20654        ) if v.len() == dim as usize => Some(Value::HalfVector(
20655            spg_storage::halfvec::HalfVector::from_f32_slice(&v),
20656        )),
20657        // CHAR(n) right-pads with U+0020 to exactly n chars; if the input
20658        // is already longer we reject (PG truncates trailing-space-only;
20659        // staying strict for v1).
20660        (Value::Text(s), DataType::Char(size)) => {
20661            let len = u32::try_from(s.chars().count()).unwrap_or(u32::MAX);
20662            if len > size {
20663                return Err(EngineError::Unsupported(alloc::format!(
20664                    "value for CHAR({size}) column `{col_name}` exceeds length: \
20665                     {len} chars"
20666                )));
20667            }
20668            let need = (size - len) as usize;
20669            let mut padded = s;
20670            padded.reserve(need);
20671            for _ in 0..need {
20672                padded.push(' ');
20673            }
20674            Some(Value::Text(padded))
20675        }
20676        _ => None,
20677    };
20678    coerced.ok_or(EngineError::Storage(StorageError::TypeMismatch {
20679        column: col_name.into(),
20680        expected,
20681        actual,
20682        position,
20683    }))
20684}
20685
20686/// v7.12.4 — render a function arg list into the
20687/// canonical form the storage layer caches as
20688/// [`spg_storage::FunctionDef::args_repr`]. The catalogue uses
20689/// this string for both display + as a coarse signature key
20690/// for the (deferred) overload resolution v7.12.5+ adds.
20691fn render_function_args(args: &[spg_sql::ast::FunctionArg]) -> alloc::string::String {
20692    use core::fmt::Write;
20693    let mut out = alloc::string::String::from("(");
20694    for (i, a) in args.iter().enumerate() {
20695        if i > 0 {
20696            out.push_str(", ");
20697        }
20698        match a.mode {
20699            spg_sql::ast::FunctionArgMode::In => {}
20700            spg_sql::ast::FunctionArgMode::Out => out.push_str("OUT "),
20701            spg_sql::ast::FunctionArgMode::InOut => out.push_str("INOUT "),
20702        }
20703        if let Some(n) = &a.name {
20704            out.push_str(n);
20705            out.push(' ');
20706        }
20707        match &a.ty {
20708            spg_sql::ast::FunctionArgType::Typed(t) => {
20709                let _ = write!(out, "{t}");
20710            }
20711            spg_sql::ast::FunctionArgType::Raw(s) => out.push_str(s),
20712        }
20713    }
20714    out.push(')');
20715    out
20716}
20717
20718/// v7.19 P5 — true iff `expr` is `unnest(arg)` at the top level
20719/// (case-insensitive). Used by `exec_select_cancel`'s
20720/// projection loop to detect Set-Returning-Function rows that
20721/// need per-row expansion. Only the top-level call counts —
20722/// `coalesce(unnest(arr), 'x')` is NOT a SRF row from the
20723/// projection's perspective; it would surface as an "unknown
20724/// function" mismatch downstream, which is what we want
20725/// (multi-SRF / nested SRF is documented carve-out for v7.19).
20726fn is_top_level_unnest(expr: &spg_sql::ast::Expr) -> bool {
20727    match expr {
20728        spg_sql::ast::Expr::FunctionCall { name, args } => {
20729            name.eq_ignore_ascii_case("unnest") && args.len() == 1
20730        }
20731        _ => false,
20732    }
20733}
20734
20735/// v7.19 P5 — extract the array argument out of a top-level
20736/// `unnest(arg)` call. `None` if `expr` isn't a `unnest` call
20737/// of arity 1 (mirrors `is_top_level_unnest`).
20738fn top_level_unnest_arg(expr: &spg_sql::ast::Expr) -> Option<&spg_sql::ast::Expr> {
20739    match expr {
20740        spg_sql::ast::Expr::FunctionCall { name, args }
20741            if name.eq_ignore_ascii_case("unnest") && args.len() == 1 =>
20742        {
20743            Some(&args[0])
20744        }
20745        _ => None,
20746    }
20747}
20748
20749/// v7.19 P5 — turn an array-typed `Value` into the element list
20750/// `unnest()` projection emits. NULL → empty list (PG: `unnest(NULL)
20751/// = (no rows)`). Non-array values fall through to a type-mismatch
20752/// error.
20753fn array_value_to_elements(v: &Value) -> Result<Vec<Value>, EngineError> {
20754    match v {
20755        Value::Null => Ok(Vec::new()),
20756        Value::TextArray(items) => Ok(items
20757            .iter()
20758            .map(|opt| {
20759                opt.as_ref()
20760                    .map(|s| Value::Text(s.clone()))
20761                    .unwrap_or(Value::Null)
20762            })
20763            .collect()),
20764        Value::IntArray(items) => Ok(items
20765            .iter()
20766            .map(|opt| opt.map(Value::Int).unwrap_or(Value::Null))
20767            .collect()),
20768        Value::BigIntArray(items) => Ok(items
20769            .iter()
20770            .map(|opt| opt.map(Value::BigInt).unwrap_or(Value::Null))
20771            .collect()),
20772        other => Err(EngineError::Eval(EvalError::TypeMismatch {
20773            detail: alloc::format!(
20774                "unnest() expects an array argument, got {:?}",
20775                other.data_type()
20776            ),
20777        })),
20778    }
20779}
20780
20781#[cfg(test)]
20782mod tests {
20783    use super::*;
20784    use alloc::vec;
20785
20786    fn unwrap_command_ok(r: &QueryResult) -> usize {
20787        match r {
20788            QueryResult::CommandOk { affected, .. } => *affected,
20789            QueryResult::Rows { .. } => panic!("expected CommandOk, got Rows"),
20790        }
20791    }
20792
20793    #[test]
20794    fn update_seek_positions_engages_on_indexed_eq() {
20795        let mut e = Engine::new();
20796        e.execute("CREATE TABLE b (id INT NOT NULL, v INT NOT NULL)")
20797            .unwrap();
20798        e.execute("CREATE INDEX b_id ON b (id)").unwrap();
20799        for i in 0..100 {
20800            e.execute(&alloc::format!("INSERT INTO b VALUES ({i}, {i})"))
20801                .unwrap();
20802        }
20803        let stmt = spg_sql::parser::parse_statement("UPDATE b SET v = v + 1 WHERE id = 42")
20804            .expect("parse");
20805        let Statement::Update(u) = stmt else {
20806            panic!("expected Update, got {stmt:?}");
20807        };
20808        let w = u.where_.as_ref().expect("where");
20809        let table = e.catalog().get("b").unwrap();
20810        let schema_cols = table.schema().columns.clone();
20811        // step-by-step: each sub-resolution must succeed.
20812        let Expr::Binary { lhs, op, rhs } = w else {
20813            panic!("WHERE not Binary: {w:?}");
20814        };
20815        assert_eq!(*op, BinOp::Eq, "op not Eq");
20816        let pair = resolve_col_literal_pair(lhs, rhs, &schema_cols, "b");
20817        assert!(
20818            pair.is_some(),
20819            "resolve_col_literal_pair None: lhs={lhs:?} rhs={rhs:?}"
20820        );
20821        let (col_pos, value) = pair.unwrap();
20822        assert!(
20823            table.index_on(col_pos).is_some(),
20824            "no index on col {col_pos}"
20825        );
20826        assert!(
20827            IndexKey::from_value(&value).is_some(),
20828            "IndexKey::from_value None for {value:?}"
20829        );
20830        let positions = try_index_seek_positions(w, &schema_cols, table, "b");
20831        assert_eq!(positions, Some(vec![42]), "seek did not engage");
20832    }
20833
20834    #[test]
20835    fn create_table_registers_schema() {
20836        let mut e = Engine::new();
20837        e.execute("CREATE TABLE foo (a INT NOT NULL, b TEXT)")
20838            .unwrap();
20839        assert_eq!(e.catalog().table_count(), 1);
20840        let t = e.catalog().get("foo").unwrap();
20841        assert_eq!(t.schema().columns.len(), 2);
20842        assert_eq!(t.schema().columns[0].ty, DataType::Int);
20843        assert!(!t.schema().columns[0].nullable);
20844        assert_eq!(t.schema().columns[1].ty, DataType::Text);
20845    }
20846
20847    #[test]
20848    fn create_table_vector_default_is_f32_encoded() {
20849        let mut e = Engine::new();
20850        e.execute("CREATE TABLE t (v VECTOR(8))").unwrap();
20851        let t = e.catalog().get("t").unwrap();
20852        assert_eq!(
20853            t.schema().columns[0].ty,
20854            DataType::Vector {
20855                dim: 8,
20856                encoding: VecEncoding::F32,
20857            },
20858        );
20859    }
20860
20861    #[test]
20862    fn create_table_vector_using_sq8_succeeds() {
20863        // v6.0.1 step 3: the step-1 fence in `column_def_to_schema`
20864        // is lifted. CREATE TABLE persists an SQ8 column type in
20865        // the catalog; INSERT (next test) quantises raw f32 input.
20866        let mut e = Engine::new();
20867        e.execute("CREATE TABLE t (v VECTOR(8) USING SQ8)").unwrap();
20868        let t = e.catalog().get("t").unwrap();
20869        assert_eq!(
20870            t.schema().columns[0].ty,
20871            DataType::Vector {
20872                dim: 8,
20873                encoding: VecEncoding::Sq8,
20874            },
20875        );
20876    }
20877
20878    #[test]
20879    fn insert_into_sq8_column_quantises_f32_payload() {
20880        // v6.0.1 step 3: INSERT-time `coerce_value` rewrites a raw
20881        // `Value::Vector(Vec<f32>)` literal into the column's
20882        // quantised representation. The row that lands in the
20883        // catalog must therefore hold a `Value::Sq8Vector`, not the
20884        // original f32 buffer — that's the bit that delivers the
20885        // 4× compression target.
20886        let mut e = Engine::new();
20887        e.execute("CREATE TABLE t (v VECTOR(4) USING SQ8)").unwrap();
20888        e.execute("INSERT INTO t VALUES ([0.0, 0.25, 0.5, 1.0])")
20889            .unwrap();
20890        let t = e.catalog().get("t").unwrap();
20891        assert_eq!(t.rows().len(), 1);
20892        match &t.rows()[0].values[0] {
20893            Value::Sq8Vector(q) => {
20894                assert_eq!(q.bytes.len(), 4);
20895                // min/max are derived from the payload: min=0.0, max=1.0.
20896                assert!((q.min - 0.0).abs() < 1e-6);
20897                assert!((q.max - 1.0).abs() < 1e-6);
20898            }
20899            other => panic!("expected Sq8Vector cell, got {other:?}"),
20900        }
20901    }
20902
20903    #[test]
20904    fn create_table_vector_using_half_succeeds_and_insert_converts_to_f16() {
20905        // v6.0.3: CREATE TABLE accepts USING HALF; INSERT path
20906        // converts the incoming `Value::Vector(Vec<f32>)` cell
20907        // into `Value::HalfVector(HalfVector)` via the new
20908        // `coerce_value` arm. The dequantised round-trip is
20909        // bit-exact for f16-representable values, so 0.0 / 0.25
20910        // / 0.5 / 1.0 hit their grid points exactly.
20911        let mut e = Engine::new();
20912        e.execute("CREATE TABLE t (v VECTOR(4) USING HALF)")
20913            .unwrap();
20914        e.execute("INSERT INTO t VALUES ([0.0, 0.25, 0.5, 1.0])")
20915            .unwrap();
20916        let t = e.catalog().get("t").unwrap();
20917        assert_eq!(t.rows().len(), 1);
20918        match &t.rows()[0].values[0] {
20919            Value::HalfVector(h) => {
20920                assert_eq!(h.dim(), 4);
20921                let back = h.to_f32_vec();
20922                let expected = alloc::vec![0.0_f32, 0.25, 0.5, 1.0];
20923                for (g, e) in back.iter().zip(expected.iter()) {
20924                    assert!(
20925                        (g - e).abs() < 1e-6,
20926                        "{g} vs {e} should be exact on f16 grid"
20927                    );
20928                }
20929            }
20930            other => panic!("expected HalfVector cell, got {other:?}"),
20931        }
20932    }
20933
20934    #[test]
20935    fn alter_index_rebuild_in_place_succeeds() {
20936        // v6.0.4: bare REBUILD (no encoding switch) walks every
20937        // row again to rebuild the NSW graph. Verifies the engine
20938        // dispatch + storage helper plumbing without changing any
20939        // cell encoding.
20940        let mut e = Engine::new();
20941        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(3) NOT NULL)")
20942            .unwrap();
20943        for i in 0..8_i32 {
20944            #[allow(clippy::cast_precision_loss)]
20945            let base = (i as f32) * 0.1;
20946            e.execute(&alloc::format!(
20947                "INSERT INTO t VALUES ({i}, [{base}, {b1}, {b2}])",
20948                b1 = base + 0.01,
20949                b2 = base + 0.02,
20950            ))
20951            .unwrap();
20952        }
20953        e.execute("CREATE INDEX t_idx ON t USING hnsw (v)").unwrap();
20954        e.execute("ALTER INDEX t_idx REBUILD").unwrap();
20955        // Schema encoding stays F32 (no encoding clause).
20956        assert_eq!(
20957            e.catalog().get("t").unwrap().schema().columns[1].ty,
20958            DataType::Vector {
20959                dim: 3,
20960                encoding: VecEncoding::F32,
20961            },
20962        );
20963    }
20964
20965    #[test]
20966    fn alter_index_rebuild_with_encoding_switches_cell_type() {
20967        // v6.0.4: REBUILD WITH (encoding = SQ8) recodes every
20968        // stored cell from F32 → SQ8 + rebuilds the graph atop the
20969        // new encoding. Post-rebuild, cells must be Sq8Vector and
20970        // the schema must report encoding = Sq8.
20971        let mut e = Engine::new();
20972        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(4) NOT NULL)")
20973            .unwrap();
20974        e.execute("INSERT INTO t VALUES (1, [0.0, 0.25, 0.5, 1.0])")
20975            .unwrap();
20976        e.execute("CREATE INDEX t_idx ON t USING hnsw (v)").unwrap();
20977        e.execute("ALTER INDEX t_idx REBUILD WITH (encoding = SQ8)")
20978            .unwrap();
20979        let t = e.catalog().get("t").unwrap();
20980        assert_eq!(
20981            t.schema().columns[1].ty,
20982            DataType::Vector {
20983                dim: 4,
20984                encoding: VecEncoding::Sq8,
20985            },
20986        );
20987        assert!(matches!(t.rows()[0].values[1], Value::Sq8Vector(_)));
20988    }
20989
20990    #[test]
20991    fn alter_index_rebuild_unknown_index_errors() {
20992        let mut e = Engine::new();
20993        let err = e.execute("ALTER INDEX nope REBUILD").unwrap_err();
20994        assert!(
20995            matches!(
20996                &err,
20997                EngineError::Storage(StorageError::IndexNotFound { name }) if name == "nope"
20998            ),
20999            "got: {err}"
21000        );
21001    }
21002
21003    #[test]
21004    fn alter_index_rebuild_on_btree_index_errors() {
21005        // REBUILD on a B-tree index has no semantic meaning in
21006        // v6.0.4 — rejected at the storage layer with `Unsupported`.
21007        let mut e = Engine::new();
21008        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
21009        e.execute("INSERT INTO t VALUES (1)").unwrap();
21010        e.execute("CREATE INDEX t_idx ON t (id)").unwrap();
21011        let err = e.execute("ALTER INDEX t_idx REBUILD").unwrap_err();
21012        assert!(
21013            matches!(&err, EngineError::Storage(StorageError::Unsupported(_))),
21014            "got: {err}"
21015        );
21016    }
21017
21018    #[test]
21019    fn prepared_insert_substitutes_placeholders() {
21020        // v6.1.1: prepare() parses once; execute_prepared() walks the
21021        // AST and replaces $1/$2 with the param Values BEFORE the
21022        // dispatch sees them. Same logical result as a simple-query
21023        // INSERT, but parse happens once per *statement*, not per
21024        // execution.
21025        let mut e = Engine::new();
21026        e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT NOT NULL)")
21027            .unwrap();
21028        let stmt = e.prepare("INSERT INTO t VALUES ($1, $2)").unwrap();
21029        for (id, name) in [(1, "alice"), (2, "bob"), (3, "carol")] {
21030            e.execute_prepared(stmt.clone(), &[Value::Int(id), Value::Text(name.into())])
21031                .unwrap();
21032        }
21033        // Read back via simple-query SELECT.
21034        let rows_result = e.execute("SELECT id, name FROM t").unwrap();
21035        let QueryResult::Rows { rows, .. } = rows_result else {
21036            panic!("expected Rows")
21037        };
21038        assert_eq!(rows.len(), 3);
21039    }
21040
21041    #[test]
21042    fn prepared_select_with_placeholder_filters_rows() {
21043        let mut e = Engine::new();
21044        e.execute("CREATE TABLE t (id INT NOT NULL, v INT NOT NULL)")
21045            .unwrap();
21046        for i in 0..10_i32 {
21047            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, {})", i * 7))
21048                .unwrap();
21049        }
21050        let stmt = e.prepare("SELECT id FROM t WHERE v = $1").unwrap();
21051        let QueryResult::Rows { rows, .. } = e.execute_prepared(stmt, &[Value::Int(35)]).unwrap()
21052        else {
21053            panic!("expected Rows")
21054        };
21055        // v = 35 means i*7 = 35 → i = 5.
21056        assert_eq!(rows.len(), 1);
21057        assert_eq!(rows[0].values[0], Value::Int(5));
21058    }
21059
21060    #[test]
21061    fn prepared_too_few_params_errors() {
21062        let mut e = Engine::new();
21063        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
21064        let stmt = e.prepare("INSERT INTO t VALUES ($1)").unwrap();
21065        let err = e.execute_prepared(stmt, &[]).unwrap_err();
21066        assert!(
21067            matches!(
21068                &err,
21069                EngineError::Eval(EvalError::PlaceholderOutOfRange { n: 1, bound: 0 })
21070            ),
21071            "got: {err}"
21072        );
21073    }
21074
21075    #[test]
21076    fn bytea_cast_round_trips_text_input() {
21077        // v7.18 — `'hello'::bytea` produces the raw bytes. Closes
21078        // the mailrs D-pre #3 reverse-acceptance gap.
21079        let e = Engine::new();
21080        let r = e.execute_readonly("SELECT 'hello'::bytea").unwrap();
21081        let QueryResult::Rows { rows, .. } = r else {
21082            panic!("expected Rows")
21083        };
21084        assert_eq!(rows.len(), 1);
21085        assert_eq!(rows[0].values[0], Value::Bytes(b"hello".to_vec()));
21086    }
21087
21088    #[test]
21089    fn bytea_cast_pg_escape_hex_form() {
21090        // E'\\xdeadbeef'::bytea — E-string decodes to `\xdeadbeef`
21091        // (literal 10 chars), then ::bytea reads it as PG hex
21092        // form bytea literal → 4 bytes.
21093        let e = Engine::new();
21094        let r = e.execute_readonly(r"SELECT E'\\xdeadbeef'::bytea").unwrap();
21095        let QueryResult::Rows { rows, .. } = r else {
21096            panic!("expected Rows")
21097        };
21098        assert_eq!(
21099            rows[0].values[0],
21100            Value::Bytes(vec![0xde, 0xad, 0xbe, 0xef])
21101        );
21102    }
21103
21104    #[test]
21105    fn bytea_cast_chains_through_octet_length() {
21106        // octet_length('hello'::bytea) → 5. Confirms the cast
21107        // composes inside larger expressions, not just at top
21108        // level.
21109        let e = Engine::new();
21110        let r = e
21111            .execute_readonly("SELECT octet_length('hello'::bytea)")
21112            .unwrap();
21113        let QueryResult::Rows { rows, .. } = r else {
21114            panic!("expected Rows")
21115        };
21116        match &rows[0].values[0] {
21117            Value::Int(n) => assert_eq!(*n, 5),
21118            Value::BigInt(n) => assert_eq!(*n, 5),
21119            other => panic!("expected integer length, got {other:?}"),
21120        }
21121    }
21122
21123    #[test]
21124    fn readonly_prepared_on_snapshot_select_with_placeholder() {
21125        // v7.18 — sqlx Pool fan-out relies on running prepared
21126        // SELECTs against a frozen snapshot without re-entering
21127        // the writer engine. Mirrors the simple-query SELECT path
21128        // in `execute_readonly_on_snapshot` but takes a Statement
21129        // + bound params (the shape sqlx's Execute path produces).
21130        let mut e = Engine::new();
21131        e.execute("CREATE TABLE t (id INT NOT NULL, v INT NOT NULL)")
21132            .unwrap();
21133        for i in 0..10_i32 {
21134            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, {})", i * 7))
21135                .unwrap();
21136        }
21137        let snapshot = e.clone_snapshot();
21138        let stmt = e.prepare("SELECT id FROM t WHERE v = $1").unwrap();
21139        let QueryResult::Rows { rows, .. } =
21140            Engine::execute_readonly_prepared_on_snapshot(&snapshot, stmt, &[Value::Int(35)])
21141                .unwrap()
21142        else {
21143            panic!("expected Rows")
21144        };
21145        assert_eq!(rows.len(), 1);
21146        assert_eq!(rows[0].values[0], Value::Int(5));
21147    }
21148
21149    #[test]
21150    fn readonly_prepared_on_snapshot_rejects_writes() {
21151        // DDL / DML prepared statements on the readonly path must
21152        // surface `WriteRequired` so the spg-sqlx connection layer
21153        // routes them to the writer mutex instead of the snapshot.
21154        let mut e = Engine::new();
21155        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
21156        let snapshot = e.clone_snapshot();
21157        let stmt = e.prepare("INSERT INTO t VALUES ($1)").unwrap();
21158        let err = Engine::execute_readonly_prepared_on_snapshot(&snapshot, stmt, &[Value::Int(1)])
21159            .unwrap_err();
21160        assert!(matches!(&err, EngineError::WriteRequired), "got: {err}");
21161    }
21162
21163    #[test]
21164    fn readonly_prepared_on_snapshot_frozen_view() {
21165        // The snapshot reflects engine state at clone_snapshot()
21166        // time. Writes after the snapshot are NOT visible — caller
21167        // takes a fresh snapshot (or `AsyncReadHandle::refresh()`)
21168        // to see them. This is the contract the per-statement
21169        // refresh in spg-sqlx relies on.
21170        let mut e = Engine::new();
21171        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
21172        e.execute("INSERT INTO t VALUES (1)").unwrap();
21173        let snapshot = e.clone_snapshot();
21174        e.execute("INSERT INTO t VALUES (2)").unwrap();
21175        let stmt = e.prepare("SELECT id FROM t WHERE id = $1").unwrap();
21176        let QueryResult::Rows { rows, .. } =
21177            Engine::execute_readonly_prepared_on_snapshot(&snapshot, stmt, &[Value::Int(2)])
21178                .unwrap()
21179        else {
21180            panic!("expected Rows")
21181        };
21182        assert!(rows.is_empty(), "id=2 was inserted after snapshot");
21183    }
21184
21185    #[test]
21186    fn describe_prepared_on_snapshot_resolves_columns() {
21187        // v7.18 — sqlx's Executor::describe path on the readonly
21188        // fan-out needs to resolve column names + types against
21189        // the snapshot's catalog (not the live engine's catalog,
21190        // which may have moved on).
21191        let mut e = Engine::new();
21192        e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT NOT NULL)")
21193            .unwrap();
21194        let snapshot = e.clone_snapshot();
21195        let stmt = e.prepare("SELECT id, name FROM t WHERE id = $1").unwrap();
21196        let (_params, cols) = Engine::describe_prepared_on_snapshot(&snapshot, &stmt);
21197        assert_eq!(cols.len(), 2);
21198        assert_eq!(cols[0].name, "id");
21199        assert_eq!(cols[0].ty, DataType::Int);
21200        assert_eq!(cols[1].name, "name");
21201        assert_eq!(cols[1].ty, DataType::Text);
21202    }
21203
21204    #[test]
21205    fn insert_into_half_column_dim_mismatch_errors() {
21206        let mut e = Engine::new();
21207        e.execute("CREATE TABLE t (v VECTOR(4) USING HALF)")
21208            .unwrap();
21209        let err = e.execute("INSERT INTO t VALUES ([1.0, 2.0])").unwrap_err();
21210        assert!(matches!(
21211            &err,
21212            EngineError::Storage(StorageError::TypeMismatch { .. })
21213        ));
21214    }
21215
21216    #[test]
21217    fn insert_into_sq8_column_dim_mismatch_errors() {
21218        // Dim mismatch falls through the `coerce_value` Vector→Sq8
21219        // arm's guard and surfaces as `TypeMismatch` — the same
21220        // error the F32 path produces today, so client error
21221        // handling stays uniform across encodings.
21222        let mut e = Engine::new();
21223        e.execute("CREATE TABLE t (v VECTOR(4) USING SQ8)").unwrap();
21224        let err = e.execute("INSERT INTO t VALUES ([1.0, 2.0])").unwrap_err();
21225        assert!(
21226            matches!(
21227                &err,
21228                EngineError::Storage(StorageError::TypeMismatch { .. })
21229            ),
21230            "got: {err}",
21231        );
21232    }
21233
21234    #[test]
21235    fn create_table_duplicate_errors() {
21236        let mut e = Engine::new();
21237        e.execute("CREATE TABLE foo (a INT)").unwrap();
21238        let err = e.execute("CREATE TABLE foo (a INT)").unwrap_err();
21239        assert!(matches!(
21240            err,
21241            EngineError::Storage(StorageError::DuplicateTable { ref name }) if name == "foo"
21242        ));
21243    }
21244
21245    #[test]
21246    fn insert_into_unknown_table_errors() {
21247        let mut e = Engine::new();
21248        let err = e.execute("INSERT INTO ghost VALUES (1)").unwrap_err();
21249        assert!(matches!(
21250            err,
21251            EngineError::Storage(StorageError::TableNotFound { ref name }) if name == "ghost"
21252        ));
21253    }
21254
21255    #[test]
21256    fn insert_happy_path_reports_one_affected() {
21257        let mut e = Engine::new();
21258        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
21259        let r = e.execute("INSERT INTO foo VALUES (42)").unwrap();
21260        assert_eq!(unwrap_command_ok(&r), 1);
21261        assert_eq!(e.catalog().get("foo").unwrap().row_count(), 1);
21262    }
21263
21264    #[test]
21265    fn insert_arity_mismatch_propagates() {
21266        let mut e = Engine::new();
21267        e.execute("CREATE TABLE foo (a INT, b TEXT)").unwrap();
21268        let err = e.execute("INSERT INTO foo VALUES (1)").unwrap_err();
21269        assert!(matches!(
21270            err,
21271            EngineError::Storage(StorageError::ArityMismatch { .. })
21272        ));
21273    }
21274
21275    #[test]
21276    fn insert_negative_integer_via_unary_minus() {
21277        let mut e = Engine::new();
21278        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
21279        e.execute("INSERT INTO foo VALUES (-7)").unwrap();
21280        let rows = e.catalog().get("foo").unwrap().rows();
21281        assert_eq!(rows[0].values[0], Value::Int(-7));
21282    }
21283
21284    #[test]
21285    fn insert_expression_evaluated_against_empty_context() {
21286        // PG-canonical: INSERT VALUES accepts an arbitrary scalar
21287        // expression. The engine evaluates against an empty row
21288        // context — column references would error, but pure
21289        // arithmetic / function calls are fine.
21290        let mut e = Engine::new();
21291        e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
21292        e.execute("INSERT INTO foo VALUES (1 + 2)").unwrap();
21293        let rows = e.catalog().get("foo").unwrap().rows();
21294        assert_eq!(rows[0].values[0], Value::Int(3));
21295    }
21296
21297    #[test]
21298    fn select_star_returns_all_rows_in_insertion_order() {
21299        let mut e = Engine::new();
21300        e.execute("CREATE TABLE foo (a INT NOT NULL, b TEXT NOT NULL)")
21301            .unwrap();
21302        e.execute("INSERT INTO foo VALUES (1, 'one')").unwrap();
21303        e.execute("INSERT INTO foo VALUES (2, 'two')").unwrap();
21304        e.execute("INSERT INTO foo VALUES (3, 'three')").unwrap();
21305
21306        let r = e.execute("SELECT * FROM foo").unwrap();
21307        let QueryResult::Rows { columns, rows } = r else {
21308            panic!("expected Rows")
21309        };
21310        assert_eq!(columns.len(), 2);
21311        assert_eq!(columns[0].name, "a");
21312        assert_eq!(rows.len(), 3);
21313        assert_eq!(
21314            rows[1].values,
21315            vec![Value::Int(2), Value::Text("two".into())]
21316        );
21317    }
21318
21319    #[test]
21320    fn select_star_on_empty_table_returns_zero_rows() {
21321        let mut e = Engine::new();
21322        e.execute("CREATE TABLE foo (a INT)").unwrap();
21323        let r = e.execute("SELECT * FROM foo").unwrap();
21324        match r {
21325            QueryResult::Rows { rows, .. } => assert!(rows.is_empty()),
21326            QueryResult::CommandOk { .. } => panic!("expected Rows"),
21327        }
21328    }
21329
21330    // --- v0.4: WHERE + projection ------------------------------------------
21331
21332    fn make_three_row_users(e: &mut Engine) {
21333        e.execute("CREATE TABLE users (id INT NOT NULL, name TEXT NOT NULL, score INT)")
21334            .unwrap();
21335        e.execute("INSERT INTO users VALUES (1, 'alice', 90)")
21336            .unwrap();
21337        e.execute("INSERT INTO users VALUES (2, 'bob', NULL)")
21338            .unwrap();
21339        e.execute("INSERT INTO users VALUES (3, 'cara', 70)")
21340            .unwrap();
21341    }
21342
21343    fn unwrap_rows(r: QueryResult) -> (Vec<ColumnSchema>, Vec<Row>) {
21344        match r {
21345            QueryResult::Rows { columns, rows } => (columns, rows),
21346            QueryResult::CommandOk { .. } => panic!("expected Rows"),
21347        }
21348    }
21349
21350    #[test]
21351    fn where_filter_passes_only_true_rows() {
21352        let mut e = Engine::new();
21353        make_three_row_users(&mut e);
21354        let r = e.execute("SELECT * FROM users WHERE id > 1").unwrap();
21355        let (_, rows) = unwrap_rows(r);
21356        assert_eq!(rows.len(), 2);
21357        assert_eq!(rows[0].values[0], Value::Int(2));
21358        assert_eq!(rows[1].values[0], Value::Int(3));
21359    }
21360
21361    #[test]
21362    fn where_with_null_result_filters_out_row() {
21363        let mut e = Engine::new();
21364        make_three_row_users(&mut e);
21365        // score is NULL for bob → score > 80 is NULL → row excluded
21366        let r = e.execute("SELECT * FROM users WHERE score > 80").unwrap();
21367        let (_, rows) = unwrap_rows(r);
21368        assert_eq!(rows.len(), 1);
21369        assert_eq!(rows[0].values[1], Value::Text("alice".into()));
21370    }
21371
21372    #[test]
21373    fn projection_named_columns() {
21374        let mut e = Engine::new();
21375        make_three_row_users(&mut e);
21376        let r = e.execute("SELECT name, score FROM users").unwrap();
21377        let (cols, rows) = unwrap_rows(r);
21378        assert_eq!(cols.len(), 2);
21379        assert_eq!(cols[0].name, "name");
21380        assert_eq!(cols[1].name, "score");
21381        assert_eq!(rows.len(), 3);
21382        assert_eq!(
21383            rows[0].values,
21384            vec![Value::Text("alice".into()), Value::Int(90)]
21385        );
21386    }
21387
21388    #[test]
21389    fn projection_with_column_alias() {
21390        let mut e = Engine::new();
21391        make_three_row_users(&mut e);
21392        let r = e
21393            .execute("SELECT name AS who FROM users WHERE id = 1")
21394            .unwrap();
21395        let (cols, rows) = unwrap_rows(r);
21396        assert_eq!(cols[0].name, "who");
21397        assert_eq!(rows.len(), 1);
21398        assert_eq!(rows[0].values[0], Value::Text("alice".into()));
21399    }
21400
21401    #[test]
21402    fn qualified_column_with_table_alias_resolves() {
21403        let mut e = Engine::new();
21404        make_three_row_users(&mut e);
21405        let r = e
21406            .execute("SELECT u.id, u.name FROM users AS u WHERE u.id < 3")
21407            .unwrap();
21408        let (cols, rows) = unwrap_rows(r);
21409        assert_eq!(cols.len(), 2);
21410        assert_eq!(rows.len(), 2);
21411    }
21412
21413    #[test]
21414    fn qualified_column_with_wrong_alias_errors() {
21415        let mut e = Engine::new();
21416        make_three_row_users(&mut e);
21417        let err = e.execute("SELECT x.id FROM users AS u").unwrap_err();
21418        assert!(matches!(
21419            err,
21420            EngineError::Eval(EvalError::UnknownQualifier { ref qualifier }) if qualifier == "x"
21421        ));
21422    }
21423
21424    #[test]
21425    fn select_unknown_column_errors_in_projection() {
21426        let mut e = Engine::new();
21427        make_three_row_users(&mut e);
21428        let err = e.execute("SELECT ghost FROM users").unwrap_err();
21429        assert!(matches!(
21430            err,
21431            EngineError::Eval(EvalError::ColumnNotFound { ref name }) if name == "ghost"
21432        ));
21433    }
21434
21435    #[test]
21436    fn where_unknown_column_errors() {
21437        let mut e = Engine::new();
21438        make_three_row_users(&mut e);
21439        let err = e
21440            .execute("SELECT * FROM users WHERE ghost = 1")
21441            .unwrap_err();
21442        assert!(matches!(
21443            err,
21444            EngineError::Eval(EvalError::ColumnNotFound { .. })
21445        ));
21446    }
21447
21448    #[test]
21449    fn expression_projection_evaluates_and_renders() {
21450        // Compound expressions in the SELECT list are evaluated per row;
21451        // the output column is typed TEXT, name defaults to the expression.
21452        let mut e = Engine::new();
21453        e.execute("CREATE TABLE t (a INT NOT NULL)").unwrap();
21454        e.execute("INSERT INTO t VALUES (3)").unwrap();
21455        let (_, rows) = unwrap_rows(e.execute("SELECT 1 + 2 FROM t").unwrap());
21456        assert_eq!(rows.len(), 1);
21457        // The expression evaluates to integer 3; rendered as the cell value
21458        // (storage::Value::Int(3) since arithmetic kept ints).
21459        assert_eq!(rows[0].values[0], Value::Int(3));
21460    }
21461
21462    #[test]
21463    fn select_unknown_table_errors() {
21464        let mut e = Engine::new();
21465        let err = e.execute("SELECT * FROM ghost").unwrap_err();
21466        assert!(matches!(
21467            err,
21468            EngineError::Storage(StorageError::TableNotFound { .. })
21469        ));
21470    }
21471
21472    #[test]
21473    fn invalid_sql_returns_parse_error() {
21474        // v4.4: UPDATE is now real SQL, so use a true syntactic
21475        // garbage payload for the parse-error path.
21476        let mut e = Engine::new();
21477        let err = e.execute("THIS_IS_NOT_A_KEYWORD foo bar baz").unwrap_err();
21478        assert!(matches!(err, EngineError::Parse(_)));
21479    }
21480
21481    // --- v0.8 CREATE INDEX + index seek ------------------------------------
21482
21483    #[test]
21484    fn create_index_registers_on_table() {
21485        let mut e = Engine::new();
21486        make_three_row_users(&mut e);
21487        e.execute("CREATE INDEX by_name ON users (name)").unwrap();
21488        let t = e.catalog().get("users").unwrap();
21489        assert_eq!(t.indices().len(), 1);
21490        assert_eq!(t.indices()[0].name, "by_name");
21491    }
21492
21493    #[test]
21494    fn create_index_on_unknown_table_errors() {
21495        let mut e = Engine::new();
21496        let err = e.execute("CREATE INDEX i ON ghost (a)").unwrap_err();
21497        assert!(matches!(
21498            err,
21499            EngineError::Storage(StorageError::TableNotFound { .. })
21500        ));
21501    }
21502
21503    #[test]
21504    fn create_index_on_unknown_column_errors() {
21505        let mut e = Engine::new();
21506        make_three_row_users(&mut e);
21507        let err = e.execute("CREATE INDEX i ON users (ghost)").unwrap_err();
21508        assert!(matches!(
21509            err,
21510            EngineError::Storage(StorageError::ColumnNotFound { .. })
21511        ));
21512    }
21513
21514    #[test]
21515    fn select_eq_uses_index_returns_same_rows_as_scan() {
21516        // Build two engines: one with an index, one without. Same query →
21517        // same row set (index is a planner optimisation, not a semantic
21518        // change).
21519        let mut without = Engine::new();
21520        make_three_row_users(&mut without);
21521        let mut with = Engine::new();
21522        make_three_row_users(&mut with);
21523        with.execute("CREATE INDEX by_id ON users (id)").unwrap();
21524
21525        let q = "SELECT * FROM users WHERE id = 2";
21526        let (_, no_idx_rows) = unwrap_rows(without.execute(q).unwrap());
21527        let (_, idx_rows) = unwrap_rows(with.execute(q).unwrap());
21528        assert_eq!(no_idx_rows, idx_rows);
21529        assert_eq!(idx_rows.len(), 1);
21530    }
21531
21532    #[test]
21533    fn select_eq_with_no_matching_index_value_returns_empty() {
21534        let mut e = Engine::new();
21535        make_three_row_users(&mut e);
21536        e.execute("CREATE INDEX by_id ON users (id)").unwrap();
21537        let (_, rows) = unwrap_rows(e.execute("SELECT * FROM users WHERE id = 999").unwrap());
21538        assert_eq!(rows.len(), 0);
21539    }
21540
21541    // --- v0.9 transactions -------------------------------------------------
21542
21543    #[test]
21544    fn begin_sets_in_transaction_flag() {
21545        let mut e = Engine::new();
21546        assert!(!e.in_transaction());
21547        e.execute("BEGIN").unwrap();
21548        assert!(e.in_transaction());
21549    }
21550
21551    #[test]
21552    fn double_begin_errors() {
21553        let mut e = Engine::new();
21554        e.execute("BEGIN").unwrap();
21555        let err = e.execute("BEGIN").unwrap_err();
21556        assert_eq!(err, EngineError::TransactionAlreadyOpen);
21557    }
21558
21559    #[test]
21560    fn commit_without_begin_errors() {
21561        let mut e = Engine::new();
21562        let err = e.execute("COMMIT").unwrap_err();
21563        assert_eq!(err, EngineError::NoActiveTransaction);
21564    }
21565
21566    #[test]
21567    fn rollback_without_begin_errors() {
21568        let mut e = Engine::new();
21569        let err = e.execute("ROLLBACK").unwrap_err();
21570        assert_eq!(err, EngineError::NoActiveTransaction);
21571    }
21572
21573    #[test]
21574    fn commit_applies_shadow_to_committed_catalog() {
21575        let mut e = Engine::new();
21576        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
21577        e.execute("BEGIN").unwrap();
21578        e.execute("INSERT INTO t VALUES (1)").unwrap();
21579        e.execute("INSERT INTO t VALUES (2)").unwrap();
21580        e.execute("COMMIT").unwrap();
21581        assert!(!e.in_transaction());
21582        assert_eq!(e.catalog().get("t").unwrap().row_count(), 2);
21583    }
21584
21585    #[test]
21586    fn rollback_discards_shadow() {
21587        let mut e = Engine::new();
21588        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
21589        e.execute("BEGIN").unwrap();
21590        e.execute("INSERT INTO t VALUES (1)").unwrap();
21591        e.execute("INSERT INTO t VALUES (2)").unwrap();
21592        e.execute("ROLLBACK").unwrap();
21593        assert!(!e.in_transaction());
21594        assert_eq!(e.catalog().get("t").unwrap().row_count(), 0);
21595    }
21596
21597    #[test]
21598    fn select_during_tx_sees_uncommitted_writes_own_session() {
21599        // The shadow catalog is read by SELECTs while a TX is open — the
21600        // session can see its own pending writes.
21601        let mut e = Engine::new();
21602        e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
21603        e.execute("BEGIN").unwrap();
21604        e.execute("INSERT INTO t VALUES (42)").unwrap();
21605        let (_, rows) = unwrap_rows(e.execute("SELECT * FROM t").unwrap());
21606        assert_eq!(rows.len(), 1);
21607        assert_eq!(rows[0].values[0], Value::Int(42));
21608    }
21609
21610    #[test]
21611    fn snapshot_with_no_users_is_bare_catalog_format() {
21612        let mut e = Engine::new();
21613        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
21614        let bytes = e.snapshot();
21615        assert_eq!(
21616            &bytes[..8],
21617            b"SPGDB001",
21618            "must be the bare v3.x catalog magic"
21619        );
21620        let e2 = Engine::restore_envelope(&bytes).unwrap();
21621        assert!(e2.users().is_empty());
21622        assert_eq!(e2.catalog().table_count(), 1);
21623    }
21624
21625    #[test]
21626    fn snapshot_with_users_round_trips_both_via_envelope() {
21627        let mut e = Engine::new();
21628        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
21629        e.create_user("alice", "pw1", Role::Admin, [9; 16]).unwrap();
21630        e.create_user("bob", "pw2", Role::ReadOnly, [5; 16])
21631            .unwrap();
21632        let bytes = e.snapshot();
21633        assert_eq!(&bytes[..8], b"SPGENV01", "must be the v4.1 envelope magic");
21634        let e2 = Engine::restore_envelope(&bytes).unwrap();
21635        assert_eq!(e2.users().len(), 2);
21636        assert_eq!(e2.verify_user("alice", "pw1"), Some(Role::Admin));
21637        assert_eq!(e2.verify_user("bob", "pw2"), Some(Role::ReadOnly));
21638        assert_eq!(e2.verify_user("alice", "wrong"), None);
21639        assert_eq!(e2.catalog().table_count(), 1);
21640    }
21641
21642    #[test]
21643    fn ddl_inside_tx_also_rolled_back() {
21644        let mut e = Engine::new();
21645        e.execute("BEGIN").unwrap();
21646        e.execute("CREATE TABLE t (v INT)").unwrap();
21647        // Visible inside the TX.
21648        e.execute("SELECT * FROM t").unwrap();
21649        e.execute("ROLLBACK").unwrap();
21650        // Gone after rollback.
21651        let err = e.execute("SELECT * FROM t").unwrap_err();
21652        assert!(matches!(
21653            err,
21654            EngineError::Storage(StorageError::TableNotFound { .. })
21655        ));
21656    }
21657
21658    // ── v6.1.2: CREATE / DROP PUBLICATION (engine-side) ──────
21659
21660    #[test]
21661    fn create_publication_lands_in_catalog() {
21662        let mut e = Engine::new();
21663        assert!(e.publications().is_empty());
21664        e.execute("CREATE PUBLICATION pub_a").unwrap();
21665        assert_eq!(e.publications().len(), 1);
21666        assert!(e.publications().contains("pub_a"));
21667    }
21668
21669    #[test]
21670    fn create_publication_duplicate_errors() {
21671        let mut e = Engine::new();
21672        e.execute("CREATE PUBLICATION pub_a").unwrap();
21673        let err = e.execute("CREATE PUBLICATION pub_a").unwrap_err();
21674        assert!(
21675            alloc::format!("{err:?}").contains("DuplicateName"),
21676            "got {err:?}"
21677        );
21678    }
21679
21680    #[test]
21681    fn drop_publication_silent_when_absent() {
21682        let mut e = Engine::new();
21683        // PG-compatible: DROP a publication that doesn't exist
21684        // succeeds (no-op) but reports zero affected.
21685        let r = e.execute("DROP PUBLICATION nope").unwrap();
21686        match r {
21687            QueryResult::CommandOk { affected, .. } => assert_eq!(affected, 0),
21688            other => panic!("expected CommandOk, got {other:?}"),
21689        }
21690    }
21691
21692    #[test]
21693    fn drop_publication_present_reports_one_affected() {
21694        let mut e = Engine::new();
21695        e.execute("CREATE PUBLICATION pub_a").unwrap();
21696        let r = e.execute("DROP PUBLICATION pub_a").unwrap();
21697        match r {
21698            QueryResult::CommandOk {
21699                affected,
21700                modified_catalog,
21701            } => {
21702                assert_eq!(affected, 1);
21703                assert!(modified_catalog);
21704            }
21705            other => panic!("expected CommandOk, got {other:?}"),
21706        }
21707        assert!(e.publications().is_empty());
21708    }
21709
21710    #[test]
21711    fn publications_persist_across_snapshot_restore() {
21712        // The persist-across-restart ship-gate at the engine layer —
21713        // snapshot → restore_envelope round trip must preserve the
21714        // publication catalog. The spg-server e2e covers the
21715        // process-restart variant.
21716        let mut e = Engine::new();
21717        e.execute("CREATE PUBLICATION pub_a").unwrap();
21718        e.execute("CREATE PUBLICATION pub_b FOR ALL TABLES")
21719            .unwrap();
21720        let snap = e.snapshot();
21721        let e2 = Engine::restore_envelope(&snap).unwrap();
21722        assert_eq!(e2.publications().len(), 2);
21723        assert!(e2.publications().contains("pub_a"));
21724        assert!(e2.publications().contains("pub_b"));
21725    }
21726
21727    #[test]
21728    fn create_publication_allowed_inside_transaction() {
21729        // v6.1.4 dropped the v6.1.2 in-TX guard — PG allows
21730        // CREATE PUBLICATION inside a TX and the auto-commit
21731        // wrap path needs the same allowance.
21732        let mut e = Engine::new();
21733        e.execute("BEGIN").unwrap();
21734        e.execute("CREATE PUBLICATION pub_a").unwrap();
21735        e.execute("COMMIT").unwrap();
21736        assert!(e.publications().contains("pub_a"));
21737    }
21738
21739    // ── v6.1.3: SHOW PUBLICATIONS + FOR-list variants ───────
21740
21741    #[test]
21742    fn create_publication_for_table_list_lands_with_scope() {
21743        let mut e = Engine::new();
21744        e.execute("CREATE TABLE t1 (id INT NOT NULL)").unwrap();
21745        e.execute("CREATE TABLE t2 (id INT NOT NULL)").unwrap();
21746        e.execute("CREATE PUBLICATION pub_a FOR TABLE t1, t2")
21747            .unwrap();
21748        let scope = e.publications().get("pub_a").cloned();
21749        let Some(spg_sql::ast::PublicationScope::ForTables(ts)) = scope else {
21750            panic!("expected ForTables scope, got {scope:?}")
21751        };
21752        assert_eq!(ts, alloc::vec!["t1".to_string(), "t2".to_string()]);
21753    }
21754
21755    #[test]
21756    fn create_publication_all_tables_except_lands_with_scope() {
21757        let mut e = Engine::new();
21758        e.execute("CREATE PUBLICATION pub_a FOR ALL TABLES EXCEPT t3")
21759            .unwrap();
21760        let scope = e.publications().get("pub_a").cloned();
21761        let Some(spg_sql::ast::PublicationScope::AllTablesExcept(ts)) = scope else {
21762            panic!("expected AllTablesExcept scope, got {scope:?}")
21763        };
21764        assert_eq!(ts, alloc::vec!["t3".to_string()]);
21765    }
21766
21767    #[test]
21768    fn show_publications_empty_returns_zero_rows() {
21769        let e = Engine::new();
21770        let r = e.execute_readonly("SHOW PUBLICATIONS").unwrap();
21771        let QueryResult::Rows { rows, columns } = r else {
21772            panic!()
21773        };
21774        assert!(rows.is_empty());
21775        assert_eq!(columns.len(), 3);
21776        assert_eq!(columns[0].name, "name");
21777        assert_eq!(columns[1].name, "scope");
21778        assert_eq!(columns[2].name, "table_count");
21779    }
21780
21781    #[test]
21782    fn show_publications_returns_one_row_per_publication_ordered_by_name() {
21783        let mut e = Engine::new();
21784        e.execute("CREATE PUBLICATION z_pub").unwrap();
21785        e.execute("CREATE PUBLICATION a_pub FOR TABLE t1, t2")
21786            .unwrap();
21787        e.execute("CREATE PUBLICATION m_pub FOR ALL TABLES EXCEPT bad")
21788            .unwrap();
21789        let r = e.execute_readonly("SHOW PUBLICATIONS").unwrap();
21790        let QueryResult::Rows { rows, .. } = r else {
21791            panic!()
21792        };
21793        assert_eq!(rows.len(), 3);
21794        // Alphabetical order: a_pub, m_pub, z_pub.
21795        let names: Vec<&str> = rows
21796            .iter()
21797            .map(|r| {
21798                if let Value::Text(s) = &r.values[0] {
21799                    s.as_str()
21800                } else {
21801                    panic!()
21802                }
21803            })
21804            .collect();
21805        assert_eq!(names, alloc::vec!["a_pub", "m_pub", "z_pub"]);
21806        // Row 0 — a_pub scope summary + table_count = 2.
21807        match &rows[0].values[1] {
21808            Value::Text(s) => assert_eq!(s, "FOR TABLE t1, t2"),
21809            other => panic!("expected Text, got {other:?}"),
21810        }
21811        assert_eq!(rows[0].values[2], Value::Int(2));
21812        // Row 1 — m_pub.
21813        match &rows[1].values[1] {
21814            Value::Text(s) => assert_eq!(s, "FOR ALL TABLES EXCEPT bad"),
21815            other => panic!("expected Text, got {other:?}"),
21816        }
21817        assert_eq!(rows[1].values[2], Value::Int(1));
21818        // Row 2 — z_pub (AllTables → NULL count).
21819        match &rows[2].values[1] {
21820            Value::Text(s) => assert_eq!(s, "FOR ALL TABLES"),
21821            other => panic!("expected Text, got {other:?}"),
21822        }
21823        assert_eq!(rows[2].values[2], Value::Null);
21824    }
21825
21826    #[test]
21827    fn for_list_scopes_persist_across_snapshot() {
21828        // The v6.1.2 envelope-v3 round-trip exercised AllTables;
21829        // v6.1.3 needs the scope-1 / scope-2 tags to survive too.
21830        let mut e = Engine::new();
21831        e.execute("CREATE PUBLICATION p1 FOR TABLE t1, t2").unwrap();
21832        e.execute("CREATE PUBLICATION p2 FOR ALL TABLES EXCEPT bad, worse")
21833            .unwrap();
21834        let snap = e.snapshot();
21835        let e2 = Engine::restore_envelope(&snap).unwrap();
21836        assert_eq!(e2.publications().len(), 2);
21837        let p1 = e2.publications().get("p1").cloned();
21838        let Some(spg_sql::ast::PublicationScope::ForTables(ts)) = p1 else {
21839            panic!("p1 scope lost: {p1:?}")
21840        };
21841        assert_eq!(ts, alloc::vec!["t1".to_string(), "t2".to_string()]);
21842        let p2 = e2.publications().get("p2").cloned();
21843        let Some(spg_sql::ast::PublicationScope::AllTablesExcept(ts)) = p2 else {
21844            panic!("p2 scope lost: {p2:?}")
21845        };
21846        assert_eq!(ts, alloc::vec!["bad".to_string(), "worse".to_string()]);
21847    }
21848
21849    // ── v6.1.4: CREATE / DROP SUBSCRIPTION + SHOW + envelope v4 ─
21850
21851    #[test]
21852    fn create_subscription_lands_in_catalog_with_defaults() {
21853        let mut e = Engine::new();
21854        e.execute(
21855            "CREATE SUBSCRIPTION sub_a CONNECTION 'host=127.0.0.1 port=20002' PUBLICATION pub_a",
21856        )
21857        .unwrap();
21858        let s = e.subscriptions().get("sub_a").cloned().expect("present");
21859        assert_eq!(s.conn_str, "host=127.0.0.1 port=20002");
21860        assert_eq!(s.publications, alloc::vec!["pub_a".to_string()]);
21861        assert!(s.enabled);
21862        assert_eq!(s.last_received_pos, 0);
21863    }
21864
21865    #[test]
21866    fn create_subscription_duplicate_name_errors() {
21867        let mut e = Engine::new();
21868        e.execute("CREATE SUBSCRIPTION s CONNECTION 'host=x' PUBLICATION p")
21869            .unwrap();
21870        let err = e
21871            .execute("CREATE SUBSCRIPTION s CONNECTION 'host=y' PUBLICATION p")
21872            .unwrap_err();
21873        assert!(
21874            alloc::format!("{err:?}").contains("DuplicateName"),
21875            "got {err:?}"
21876        );
21877    }
21878
21879    #[test]
21880    fn drop_subscription_silent_when_absent() {
21881        let mut e = Engine::new();
21882        let r = e.execute("DROP SUBSCRIPTION never").unwrap();
21883        match r {
21884            QueryResult::CommandOk { affected, .. } => assert_eq!(affected, 0),
21885            other => panic!("expected CommandOk, got {other:?}"),
21886        }
21887    }
21888
21889    #[test]
21890    fn subscription_advance_updates_last_pos_monotone() {
21891        let mut e = Engine::new();
21892        e.execute("CREATE SUBSCRIPTION s CONNECTION 'h=x' PUBLICATION p")
21893            .unwrap();
21894        assert!(e.subscription_advance("s", 100));
21895        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 100);
21896        assert!(e.subscription_advance("s", 50)); // stale → ignored
21897        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 100);
21898        assert!(e.subscription_advance("s", 200));
21899        assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 200);
21900        assert!(!e.subscription_advance("missing", 1));
21901    }
21902
21903    #[test]
21904    fn show_subscriptions_returns_rows_ordered_by_name() {
21905        let mut e = Engine::new();
21906        e.execute("CREATE SUBSCRIPTION z_sub CONNECTION 'h=x' PUBLICATION p1, p2")
21907            .unwrap();
21908        e.execute("CREATE SUBSCRIPTION a_sub CONNECTION 'h=y' PUBLICATION p3")
21909            .unwrap();
21910        let r = e.execute_readonly("SHOW SUBSCRIPTIONS").unwrap();
21911        let QueryResult::Rows { rows, columns } = r else {
21912            panic!()
21913        };
21914        assert_eq!(rows.len(), 2);
21915        assert_eq!(columns.len(), 5);
21916        assert_eq!(columns[0].name, "name");
21917        assert_eq!(columns[4].name, "last_received_pos");
21918        // Alphabetical: a_sub, z_sub.
21919        let names: Vec<&str> = rows
21920            .iter()
21921            .map(|r| {
21922                if let Value::Text(s) = &r.values[0] {
21923                    s.as_str()
21924                } else {
21925                    panic!()
21926                }
21927            })
21928            .collect();
21929        assert_eq!(names, alloc::vec!["a_sub", "z_sub"]);
21930        // Row 0: a_sub
21931        assert_eq!(rows[0].values[1], Value::Text("h=y".to_string()));
21932        assert_eq!(rows[0].values[2], Value::Text("p3".to_string()));
21933        assert_eq!(rows[0].values[3], Value::Bool(true));
21934        assert_eq!(rows[0].values[4], Value::BigInt(0));
21935        // Row 1: z_sub — publications join with ", "
21936        assert_eq!(rows[1].values[2], Value::Text("p1, p2".to_string()));
21937    }
21938
21939    #[test]
21940    fn subscriptions_persist_across_snapshot_envelope_v4() {
21941        let mut e = Engine::new();
21942        e.execute("CREATE SUBSCRIPTION s1 CONNECTION 'h=A' PUBLICATION p1, p2")
21943            .unwrap();
21944        e.execute("CREATE SUBSCRIPTION s2 CONNECTION 'h=B' PUBLICATION p3")
21945            .unwrap();
21946        e.subscription_advance("s2", 42);
21947        let snap = e.snapshot();
21948        let e2 = Engine::restore_envelope(&snap).unwrap();
21949        assert_eq!(e2.subscriptions().len(), 2);
21950        let s1 = e2.subscriptions().get("s1").unwrap();
21951        assert_eq!(s1.conn_str, "h=A");
21952        assert_eq!(
21953            s1.publications,
21954            alloc::vec!["p1".to_string(), "p2".to_string()]
21955        );
21956        assert_eq!(s1.last_received_pos, 0);
21957        let s2 = e2.subscriptions().get("s2").unwrap();
21958        assert_eq!(s2.last_received_pos, 42);
21959    }
21960
21961    #[test]
21962    fn v3_envelope_loads_with_empty_subscriptions() {
21963        // v3 snapshot (publications-only). Forge it by hand so we
21964        // verify v6.1.4 readers don't panic — they must surface
21965        // empty subscriptions and a populated publication table.
21966        let mut e = Engine::new();
21967        e.execute("CREATE PUBLICATION pub_legacy").unwrap();
21968        let catalog = e.catalog.serialize();
21969        let users = crate::users::serialize_users(&e.users);
21970        let pubs = e.publications.serialize();
21971        let mut buf = Vec::new();
21972        buf.extend_from_slice(b"SPGENV01");
21973        buf.push(3u8); // v3
21974        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
21975        buf.extend_from_slice(&catalog);
21976        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
21977        buf.extend_from_slice(&users);
21978        buf.extend_from_slice(&u32::try_from(pubs.len()).unwrap().to_le_bytes());
21979        buf.extend_from_slice(&pubs);
21980        let crc = spg_crypto::crc32::crc32(&buf);
21981        buf.extend_from_slice(&crc.to_le_bytes());
21982
21983        let e2 = Engine::restore_envelope(&buf).expect("v3 envelope restores under v4 reader");
21984        assert!(e2.subscriptions().is_empty());
21985        assert!(e2.publications().contains("pub_legacy"));
21986    }
21987
21988    #[test]
21989    fn create_subscription_allowed_inside_transaction() {
21990        let mut e = Engine::new();
21991        e.execute("BEGIN").unwrap();
21992        e.execute("CREATE SUBSCRIPTION s CONNECTION 'h=x' PUBLICATION p")
21993            .unwrap();
21994        e.execute("COMMIT").unwrap();
21995        assert!(e.subscriptions().contains("s"));
21996    }
21997
21998    // ── v6.2.0: ANALYZE + spg_statistic + envelope v5 ──────────
21999    #[test]
22000    fn analyze_populates_histogram_bounds() {
22001        let mut e = Engine::new();
22002        e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT)")
22003            .unwrap();
22004        for i in 0..50 {
22005            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, 'name{i}')"))
22006                .unwrap();
22007        }
22008        e.execute("ANALYZE t").unwrap();
22009        let stats = e.statistics();
22010        let id_stats = stats.get("t", "id").unwrap();
22011        assert!(id_stats.histogram_bounds.len() >= 2);
22012        assert_eq!(id_stats.histogram_bounds.first().unwrap(), "0");
22013        assert_eq!(id_stats.histogram_bounds.last().unwrap(), "49");
22014        assert!((id_stats.null_frac - 0.0).abs() < 1e-6);
22015        assert_eq!(id_stats.n_distinct, 50);
22016    }
22017
22018    #[test]
22019    fn reanalyze_overwrites_prior_stats() {
22020        let mut e = Engine::new();
22021        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
22022        for i in 0..10 {
22023            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
22024                .unwrap();
22025        }
22026        e.execute("ANALYZE t").unwrap();
22027        let n1 = e.statistics().get("t", "id").unwrap().n_distinct;
22028        assert_eq!(n1, 10);
22029        for i in 10..30 {
22030            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
22031                .unwrap();
22032        }
22033        e.execute("ANALYZE t").unwrap();
22034        let n2 = e.statistics().get("t", "id").unwrap().n_distinct;
22035        assert_eq!(n2, 30);
22036    }
22037
22038    #[test]
22039    fn analyze_unknown_table_errors() {
22040        let mut e = Engine::new();
22041        let err = e.execute("ANALYZE nonexistent").unwrap_err();
22042        assert!(matches!(
22043            err,
22044            EngineError::Storage(StorageError::TableNotFound { .. })
22045        ));
22046    }
22047
22048    #[test]
22049    fn bare_analyze_covers_all_user_tables() {
22050        let mut e = Engine::new();
22051        e.execute("CREATE TABLE t1 (id INT NOT NULL)").unwrap();
22052        e.execute("CREATE TABLE t2 (name TEXT NOT NULL)").unwrap();
22053        e.execute("INSERT INTO t1 VALUES (1)").unwrap();
22054        e.execute("INSERT INTO t2 VALUES ('alice')").unwrap();
22055        let r = e.execute("ANALYZE").unwrap();
22056        match r {
22057            QueryResult::CommandOk {
22058                affected,
22059                modified_catalog,
22060            } => {
22061                assert_eq!(affected, 2);
22062                assert!(modified_catalog);
22063            }
22064            other => panic!("expected CommandOk, got {other:?}"),
22065        }
22066        assert!(e.statistics().get("t1", "id").is_some());
22067        assert!(e.statistics().get("t2", "name").is_some());
22068    }
22069
22070    #[test]
22071    fn select_from_spg_statistic_returns_rows_per_column() {
22072        let mut e = Engine::new();
22073        e.execute("CREATE TABLE t (id INT NOT NULL, label TEXT)")
22074            .unwrap();
22075        e.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
22076        e.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
22077        e.execute("ANALYZE t").unwrap();
22078        let r = e.execute_readonly("SELECT * FROM spg_statistic").unwrap();
22079        let QueryResult::Rows { rows, columns } = r else {
22080            panic!()
22081        };
22082        // v6.7.0 — spg_statistic gained a `cold_row_count` column.
22083        assert_eq!(columns.len(), 6);
22084        assert_eq!(columns[0].name, "table_name");
22085        assert_eq!(columns[4].name, "histogram_bounds");
22086        assert_eq!(columns[5].name, "cold_row_count");
22087        assert_eq!(rows.len(), 2, "one row per column of t");
22088        // Sorted by (table_name, column_name).
22089        match (&rows[0].values[0], &rows[0].values[1]) {
22090            (Value::Text(t), Value::Text(c)) => {
22091                assert_eq!(t, "t");
22092                // BTreeMap orders (table, column); columns "id" < "label".
22093                assert_eq!(c, "id");
22094            }
22095            _ => panic!(),
22096        }
22097    }
22098
22099    #[test]
22100    fn analyze_skips_vector_columns() {
22101        // Vector columns have their own stats shape (HNSW graph);
22102        // ANALYZE leaves them out of spg_statistic.
22103        let mut e = Engine::new();
22104        e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(3) NOT NULL)")
22105            .unwrap();
22106        e.execute("INSERT INTO t VALUES (1, [1, 2, 3])").unwrap();
22107        e.execute("ANALYZE t").unwrap();
22108        assert!(e.statistics().get("t", "id").is_some());
22109        assert!(e.statistics().get("t", "v").is_none());
22110    }
22111
22112    #[test]
22113    fn statistics_persist_across_envelope_v5_round_trip() {
22114        let mut e = Engine::new();
22115        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
22116        for i in 0..20 {
22117            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
22118                .unwrap();
22119        }
22120        e.execute("ANALYZE").unwrap();
22121        let snap = e.snapshot();
22122        let e2 = Engine::restore_envelope(&snap).unwrap();
22123        let s = e2.statistics().get("t", "id").unwrap();
22124        assert_eq!(s.n_distinct, 20);
22125    }
22126
22127    // ── v6.2.1 auto-analyze threshold ───────────────────────────
22128
22129    #[test]
22130    fn auto_analyze_threshold_fires_after_10pct_of_min_rows_on_small_table() {
22131        // For a table with 0 rows then 10 inserts → modified=10,
22132        // row_count=10. Threshold = 0.1 × max(10, 100) = 10. So
22133        // after the 10th INSERT the threshold is met.
22134        let mut e = Engine::new();
22135        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
22136        for i in 0..9 {
22137            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
22138                .unwrap();
22139        }
22140        assert!(e.tables_needing_analyze().is_empty(), "9 < threshold");
22141        e.execute("INSERT INTO t VALUES (9)").unwrap();
22142        let needs = e.tables_needing_analyze();
22143        assert_eq!(needs, alloc::vec!["t".to_string()]);
22144    }
22145
22146    #[test]
22147    fn auto_analyze_threshold_uses_10pct_of_row_count_for_large_tables() {
22148        // After ANALYZE on 1000 rows, threshold = 0.1 × row_count.
22149        // Each new INSERT bumps both modified and row_count, so to
22150        // trigger from N=1000 we need modifications ≥ 0.1 × (1000+M),
22151        // i.e. M ≥ 112. The test inserts 50 (no fire), then 150
22152        // more (200 total mods, row_count=1200, threshold=120 → fire).
22153        let mut e = Engine::new();
22154        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
22155        for i in 0..1000 {
22156            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
22157                .unwrap();
22158        }
22159        e.execute("ANALYZE t").unwrap();
22160        assert!(e.tables_needing_analyze().is_empty(), "fresh ANALYZE");
22161        for i in 1000..1050 {
22162            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
22163                .unwrap();
22164        }
22165        assert!(
22166            e.tables_needing_analyze().is_empty(),
22167            "50 inserts < threshold of ~105"
22168        );
22169        for i in 1050..1200 {
22170            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
22171                .unwrap();
22172        }
22173        assert_eq!(
22174            e.tables_needing_analyze(),
22175            alloc::vec!["t".to_string()],
22176            "200 inserts > 0.1 × 1200 threshold"
22177        );
22178    }
22179
22180    #[test]
22181    fn auto_analyze_threshold_resets_after_analyze() {
22182        let mut e = Engine::new();
22183        e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
22184        for i in 0..200 {
22185            e.execute(&alloc::format!("INSERT INTO t VALUES ({i})"))
22186                .unwrap();
22187        }
22188        assert!(!e.tables_needing_analyze().is_empty());
22189        e.execute("ANALYZE").unwrap();
22190        assert!(
22191            e.tables_needing_analyze().is_empty(),
22192            "ANALYZE must reset the counter"
22193        );
22194    }
22195
22196    #[test]
22197    fn auto_analyze_threshold_tracks_updates_and_deletes() {
22198        let mut e = Engine::new();
22199        e.execute("CREATE TABLE t (id INT NOT NULL, label TEXT)")
22200            .unwrap();
22201        for i in 0..50 {
22202            e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, 'x')"))
22203                .unwrap();
22204        }
22205        e.execute("ANALYZE t").unwrap();
22206        // UPDATE 20 rows + DELETE 5 → modified=25. Threshold = 0.1
22207        // × max(50, 100) = 10. So 25 >= 10 → trigger.
22208        e.execute("UPDATE t SET label = 'y' WHERE id < 20").unwrap();
22209        e.execute("DELETE FROM t WHERE id >= 45").unwrap();
22210        assert_eq!(e.tables_needing_analyze(), alloc::vec!["t".to_string()]);
22211    }
22212
22213    #[test]
22214    fn v4_envelope_loads_with_empty_statistics() {
22215        // Forge a v4 envelope by hand: catalog + users + pubs +
22216        // subs trailer, no statistics. A v6.2.0 reader must accept
22217        // it and surface an empty Statistics.
22218        let mut e = Engine::new();
22219        e.create_user("alice", "secret", crate::users::Role::ReadOnly, [0u8; 16])
22220            .unwrap();
22221        let catalog = e.catalog.serialize();
22222        let users = crate::users::serialize_users(&e.users);
22223        let pubs = e.publications.serialize();
22224        let subs = e.subscriptions.serialize();
22225        let mut buf = Vec::new();
22226        buf.extend_from_slice(b"SPGENV01");
22227        buf.push(4u8);
22228        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
22229        buf.extend_from_slice(&catalog);
22230        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
22231        buf.extend_from_slice(&users);
22232        buf.extend_from_slice(&u32::try_from(pubs.len()).unwrap().to_le_bytes());
22233        buf.extend_from_slice(&pubs);
22234        buf.extend_from_slice(&u32::try_from(subs.len()).unwrap().to_le_bytes());
22235        buf.extend_from_slice(&subs);
22236        let crc = spg_crypto::crc32::crc32(&buf);
22237        buf.extend_from_slice(&crc.to_le_bytes());
22238        let e2 = Engine::restore_envelope(&buf).expect("v4 envelope restores");
22239        assert!(e2.statistics().is_empty());
22240    }
22241
22242    #[test]
22243    fn v1_v2_envelope_loads_with_empty_publications() {
22244        // A snapshot taken before v6.1.2 (no publication trailer,
22245        // envelope v2) must still deserialise — and the resulting
22246        // engine must report zero publications. Use the engine's own
22247        // round-trip with no publications: that emits v3 but with an
22248        // empty pubs block. Then forge a v2 envelope by hand to lock
22249        // the back-compat path.
22250        let mut e = Engine::new();
22251        // Force users to be non-empty so the snapshot takes the
22252        // envelope path rather than the bare-catalog fallback.
22253        e.create_user("alice", "secret", crate::users::Role::ReadOnly, [0u8; 16])
22254            .unwrap();
22255
22256        // Forge an envelope v2: same shape as v3 but no pubs trailer.
22257        let catalog = e.catalog.serialize();
22258        let users = crate::users::serialize_users(&e.users);
22259        let mut buf = Vec::new();
22260        buf.extend_from_slice(b"SPGENV01");
22261        buf.push(2u8); // v2
22262        buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
22263        buf.extend_from_slice(&catalog);
22264        buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
22265        buf.extend_from_slice(&users);
22266        let crc = spg_crypto::crc32::crc32(&buf);
22267        buf.extend_from_slice(&crc.to_le_bytes());
22268
22269        let e2 = Engine::restore_envelope(&buf).expect("v2 envelope restores");
22270        assert!(e2.publications().is_empty());
22271    }
22272}