1#![no_std]
6
7extern crate alloc;
8
9pub mod aggregate;
10pub mod describe;
11pub mod eval;
12pub mod json;
13pub mod memoize;
14pub mod plan_cache;
15pub mod publications;
16pub mod query_stats;
17pub mod reorder;
18pub mod selectivity;
19pub mod statistics;
20pub mod subscriptions;
21pub mod users;
22
23pub use crate::users::{Role, ScramSecrets, UserError, UserStore};
24
25use alloc::borrow::Cow;
26use alloc::boxed::Box;
27use alloc::collections::BTreeMap;
28use alloc::string::{String, ToString};
29use alloc::vec::Vec;
30use core::fmt;
31
32use spg_sql::ast::{
33 BinOp, ColumnDef, ColumnName, ColumnTypeName, CreateIndexStatement,
34 CreatePublicationStatement, CreateSubscriptionStatement, CreateTableStatement,
35 CreateUserStatement, Expr, FrameBound, FrameKind, FromClause, IndexMethod, InsertStatement,
36 JoinKind, Literal, OrderBy, SelectItem, SelectStatement, Statement, UnOp, UnionKind,
37 VecEncoding as SqlVecEncoding, WindowFrame,
38};
39use spg_sql::parser::{self, ParseError};
40use spg_storage::{
41 Catalog, ColumnSchema, CompactReport, DataType, IndexKey, IndexKind, Row, StorageError, Table,
42 TableSchema, Value, VecEncoding,
43};
44
45use crate::eval::{EvalContext, EvalError};
46
47#[derive(Debug, Clone, PartialEq)]
49#[non_exhaustive]
50pub enum QueryResult {
51 CommandOk {
60 affected: usize,
61 modified_catalog: bool,
62 },
63 Rows {
65 columns: Vec<ColumnSchema>,
66 rows: Vec<Row>,
67 },
68}
69
70#[derive(Debug, Clone, PartialEq)]
76#[non_exhaustive]
77pub enum EngineError {
78 Parse(ParseError),
79 Storage(StorageError),
80 Eval(EvalError),
81 Unsupported(String),
83 TransactionAlreadyOpen,
85 NoActiveTransaction,
87 WriteRequired,
92 RowLimitExceeded(usize),
95 Cancelled,
101}
102
103impl fmt::Display for EngineError {
104 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
105 match self {
106 Self::Parse(e) => write!(f, "parse: {e}"),
107 Self::Storage(e) => write!(f, "storage: {e}"),
108 Self::Eval(e) => write!(f, "eval: {e}"),
109 Self::Unsupported(s) => write!(f, "unsupported: {s}"),
110 Self::TransactionAlreadyOpen => f.write_str("a transaction is already open"),
111 Self::NoActiveTransaction => f.write_str("no active transaction"),
112 Self::WriteRequired => {
113 f.write_str("statement requires a write lock (use execute, not execute_readonly)")
114 }
115 Self::RowLimitExceeded(n) => {
116 write!(f, "query exceeded max_query_rows={n}")
117 }
118 Self::Cancelled => f.write_str("query cancelled (timeout or client request)"),
119 }
120 }
121}
122
123impl From<ParseError> for EngineError {
124 fn from(e: ParseError) -> Self {
125 Self::Parse(e)
126 }
127}
128impl From<StorageError> for EngineError {
129 fn from(e: StorageError) -> Self {
130 Self::Storage(e)
131 }
132}
133impl From<EvalError> for EngineError {
134 fn from(e: EvalError) -> Self {
135 Self::Eval(e)
136 }
137}
138
139pub type ClockFn = fn() -> i64;
148
149pub type SaltFn = fn() -> [u8; 16];
156
157#[derive(Debug, Clone, Copy)]
168pub struct CancelToken<'a> {
169 flag: Option<&'a core::sync::atomic::AtomicBool>,
170}
171
172impl<'a> CancelToken<'a> {
173 #[must_use]
174 pub const fn none() -> Self {
175 Self { flag: None }
176 }
177
178 #[must_use]
179 pub const fn from_flag(f: &'a core::sync::atomic::AtomicBool) -> Self {
180 Self { flag: Some(f) }
181 }
182
183 #[must_use]
184 pub fn is_cancelled(self) -> bool {
185 self.flag
186 .is_some_and(|f| f.load(core::sync::atomic::Ordering::Relaxed))
187 }
188
189 #[inline]
193 pub fn check(self) -> Result<(), EngineError> {
194 if self.is_cancelled() {
195 Err(EngineError::Cancelled)
196 } else {
197 Ok(())
198 }
199 }
200}
201
202const ENVELOPE_MAGIC: &[u8; 8] = b"SPGENV01";
260const ENVELOPE_VERSION_V1: u8 = 1;
261const ENVELOPE_VERSION_V2: u8 = 2;
262const ENVELOPE_VERSION_V3: u8 = 3;
263const ENVELOPE_VERSION_V4: u8 = 4;
264const ENVELOPE_VERSION_V5: u8 = 5;
265
266fn build_envelope(
267 catalog: &[u8],
268 users: &[u8],
269 pubs: &[u8],
270 subs: &[u8],
271 stats: &[u8],
272) -> Vec<u8> {
273 let mut out = Vec::with_capacity(
274 8 + 1
275 + 4
276 + catalog.len()
277 + 4
278 + users.len()
279 + 4
280 + pubs.len()
281 + 4
282 + subs.len()
283 + 4
284 + stats.len()
285 + 4,
286 );
287 out.extend_from_slice(ENVELOPE_MAGIC);
288 out.push(ENVELOPE_VERSION_V5);
289 out.extend_from_slice(
290 &u32::try_from(catalog.len())
291 .expect("≤ 4G catalog")
292 .to_le_bytes(),
293 );
294 out.extend_from_slice(catalog);
295 out.extend_from_slice(
296 &u32::try_from(users.len())
297 .expect("≤ 4G users")
298 .to_le_bytes(),
299 );
300 out.extend_from_slice(users);
301 out.extend_from_slice(
302 &u32::try_from(pubs.len())
303 .expect("≤ 4G publications")
304 .to_le_bytes(),
305 );
306 out.extend_from_slice(pubs);
307 out.extend_from_slice(
308 &u32::try_from(subs.len())
309 .expect("≤ 4G subscriptions")
310 .to_le_bytes(),
311 );
312 out.extend_from_slice(subs);
313 out.extend_from_slice(
314 &u32::try_from(stats.len())
315 .expect("≤ 4G statistics")
316 .to_le_bytes(),
317 );
318 out.extend_from_slice(stats);
319 let crc = spg_crypto::crc32::crc32(&out);
320 out.extend_from_slice(&crc.to_le_bytes());
321 out
322}
323
324enum EnvelopeParse<'a> {
331 Bare,
332 Pair {
333 catalog: &'a [u8],
334 users: &'a [u8],
335 publications: Option<&'a [u8]>,
336 subscriptions: Option<&'a [u8]>,
337 statistics: Option<&'a [u8]>,
338 },
339 CrcMismatch {
340 expected: u32,
341 computed: u32,
342 },
343}
344
345fn split_envelope(buf: &[u8]) -> EnvelopeParse<'_> {
350 if buf.len() < 8 + 1 + 4 || &buf[..8] != ENVELOPE_MAGIC {
351 return EnvelopeParse::Bare;
352 }
353 let version = buf[8];
354 if !matches!(
355 version,
356 ENVELOPE_VERSION_V1
357 | ENVELOPE_VERSION_V2
358 | ENVELOPE_VERSION_V3
359 | ENVELOPE_VERSION_V4
360 | ENVELOPE_VERSION_V5
361 ) {
362 return EnvelopeParse::Bare;
363 }
364 let mut p = 9usize;
365 let Some(cat_len_bytes) = buf.get(p..p + 4) else {
366 return EnvelopeParse::Bare;
367 };
368 let Ok(cat_len_arr) = cat_len_bytes.try_into() else {
369 return EnvelopeParse::Bare;
370 };
371 let cat_len = u32::from_le_bytes(cat_len_arr) as usize;
372 p += 4;
373 if p + cat_len + 4 > buf.len() {
374 return EnvelopeParse::Bare;
375 }
376 let catalog = &buf[p..p + cat_len];
377 p += cat_len;
378 let Some(user_len_bytes) = buf.get(p..p + 4) else {
379 return EnvelopeParse::Bare;
380 };
381 let Ok(user_len_arr) = user_len_bytes.try_into() else {
382 return EnvelopeParse::Bare;
383 };
384 let user_len = u32::from_le_bytes(user_len_arr) as usize;
385 p += 4;
386 if p + user_len > buf.len() {
387 return EnvelopeParse::Bare;
388 }
389 let users = &buf[p..p + user_len];
390 p += user_len;
391 let publications = if matches!(
392 version,
393 ENVELOPE_VERSION_V3 | ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5
394 ) {
395 let Some(pubs_len_bytes) = buf.get(p..p + 4) else {
397 return EnvelopeParse::Bare;
398 };
399 let Ok(pubs_len_arr) = pubs_len_bytes.try_into() else {
400 return EnvelopeParse::Bare;
401 };
402 let pubs_len = u32::from_le_bytes(pubs_len_arr) as usize;
403 p += 4;
404 if p + pubs_len > buf.len() {
405 return EnvelopeParse::Bare;
406 }
407 let pubs_slice = &buf[p..p + pubs_len];
408 p += pubs_len;
409 Some(pubs_slice)
410 } else {
411 None
412 };
413 let subscriptions = if matches!(version, ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5) {
414 let Some(subs_len_bytes) = buf.get(p..p + 4) else {
416 return EnvelopeParse::Bare;
417 };
418 let Ok(subs_len_arr) = subs_len_bytes.try_into() else {
419 return EnvelopeParse::Bare;
420 };
421 let subs_len = u32::from_le_bytes(subs_len_arr) as usize;
422 p += 4;
423 if p + subs_len > buf.len() {
424 return EnvelopeParse::Bare;
425 }
426 let subs_slice = &buf[p..p + subs_len];
427 p += subs_len;
428 Some(subs_slice)
429 } else {
430 None
431 };
432 let statistics = if version == ENVELOPE_VERSION_V5 {
433 let Some(stats_len_bytes) = buf.get(p..p + 4) else {
435 return EnvelopeParse::Bare;
436 };
437 let Ok(stats_len_arr) = stats_len_bytes.try_into() else {
438 return EnvelopeParse::Bare;
439 };
440 let stats_len = u32::from_le_bytes(stats_len_arr) as usize;
441 p += 4;
442 if p + stats_len > buf.len() {
443 return EnvelopeParse::Bare;
444 }
445 let stats_slice = &buf[p..p + stats_len];
446 p += stats_len;
447 Some(stats_slice)
448 } else {
449 None
450 };
451 if matches!(
452 version,
453 ENVELOPE_VERSION_V2 | ENVELOPE_VERSION_V3 | ENVELOPE_VERSION_V4 | ENVELOPE_VERSION_V5
454 ) {
455 if p + 4 != buf.len() {
456 return EnvelopeParse::Bare;
457 }
458 let Ok(crc_arr) = buf[p..p + 4].try_into() else {
459 return EnvelopeParse::Bare;
460 };
461 let expected = u32::from_le_bytes(crc_arr);
462 let computed = spg_crypto::crc32::crc32(&buf[..p]);
463 if expected != computed {
464 return EnvelopeParse::CrcMismatch { expected, computed };
465 }
466 } else if p != buf.len() {
467 return EnvelopeParse::Bare;
469 }
470 EnvelopeParse::Pair {
471 catalog,
472 users,
473 publications,
474 subscriptions,
475 statistics,
476 }
477}
478
479#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
489pub struct TxId(pub u64);
490
491pub const IMPLICIT_TX: TxId = TxId(0);
494
495pub const COMPACTION_TARGET_DEFAULT_BYTES: u64 = 4 * 1024 * 1024;
501
502#[derive(Debug, Default, Clone)]
507struct TxState {
508 catalog: Catalog,
513 savepoints: Vec<(String, Catalog)>,
519}
520
521#[derive(Debug, Default)]
522pub struct Engine {
523 catalog: Catalog,
526 tx_catalogs: BTreeMap<TxId, TxState>,
531 current_tx: Option<TxId>,
536 next_tx_id: u64,
539 clock: Option<ClockFn>,
542 salt_fn: Option<SaltFn>,
546 max_query_rows: Option<usize>,
552 users: UserStore,
558 publications: publications::Publications,
562 subscriptions: subscriptions::Subscriptions,
566 statistics: statistics::Statistics,
570 plan_cache: plan_cache::PlanCache,
574 query_stats: query_stats::QueryStats,
578 activity_provider: Option<ActivityProvider>,
585 audit_chain_provider: Option<AuditChainProvider>,
590 audit_verifier: Option<AuditVerifier>,
591 slow_query_threshold_us: Option<u64>,
597 slow_query_logger: Option<SlowQueryLogger>,
598}
599
600pub type SlowQueryLogger = fn(&str, u64);
604
605fn render_create_table(name: &str, columns: &[ColumnSchema]) -> String {
610 let mut out = alloc::format!("CREATE TABLE {name} (");
611 for (i, col) in columns.iter().enumerate() {
612 if i > 0 {
613 out.push_str(", ");
614 }
615 out.push_str(&col.name);
616 out.push(' ');
617 out.push_str(&render_data_type(col.ty));
618 if !col.nullable {
619 out.push_str(" NOT NULL");
620 }
621 if col.auto_increment {
622 out.push_str(" AUTO_INCREMENT");
623 }
624 }
625 out.push(')');
626 out
627}
628
629fn render_data_type(ty: DataType) -> String {
630 match ty {
631 DataType::SmallInt => "SMALLINT".into(),
632 DataType::Int => "INT".into(),
633 DataType::BigInt => "BIGINT".into(),
634 DataType::Float => "FLOAT".into(),
635 DataType::Text => "TEXT".into(),
636 DataType::Varchar(n) => alloc::format!("VARCHAR({n})"),
637 DataType::Char(n) => alloc::format!("CHAR({n})"),
638 DataType::Bool => "BOOL".into(),
639 DataType::Vector { dim, encoding } => match encoding {
640 spg_storage::VecEncoding::F32 => alloc::format!("VECTOR({dim})"),
641 spg_storage::VecEncoding::Sq8 => alloc::format!("VECTOR({dim}) USING SQ8"),
642 spg_storage::VecEncoding::F16 => alloc::format!("VECTOR({dim}) USING HALF"),
643 },
644 DataType::Numeric { precision, scale } => {
645 alloc::format!("NUMERIC({precision},{scale})")
646 }
647 DataType::Date => "DATE".into(),
648 DataType::Timestamp => "TIMESTAMP".into(),
649 DataType::Interval => "INTERVAL".into(),
650 DataType::Json => "JSON".into(),
651 DataType::Jsonb => "JSONB".into(),
652 DataType::Timestamptz => "TIMESTAMPTZ".into(),
653 DataType::Bytes => "BYTEA".into(),
654 }
655}
656
657#[derive(Debug, Clone)]
661pub struct ActivityRow {
662 pub pid: u32,
663 pub user: String,
664 pub started_at_us: i64,
665 pub current_sql: String,
666 pub wait_event: String,
667 pub elapsed_us: i64,
668 pub in_transaction: bool,
669}
670
671pub type ActivityProvider = fn() -> Vec<ActivityRow>;
674
675#[derive(Debug, Clone)]
678pub struct AuditRow {
679 pub seq: i64,
680 pub ts_ms: i64,
681 pub prev_hash_hex: String,
682 pub entry_hash_hex: String,
683 pub sql: String,
684}
685
686pub type AuditChainProvider = fn() -> Vec<AuditRow>;
691pub type AuditVerifier = fn() -> (i64, i64);
692
693impl Engine {
694 pub fn new() -> Self {
695 Self {
696 catalog: Catalog::new(),
697 tx_catalogs: BTreeMap::new(),
698 current_tx: None,
699 next_tx_id: 1,
700 clock: None,
701 salt_fn: None,
702 max_query_rows: None,
703 users: UserStore::new(),
704 publications: publications::Publications::new(),
705 subscriptions: subscriptions::Subscriptions::new(),
706 statistics: statistics::Statistics::new(),
707 plan_cache: plan_cache::PlanCache::new(),
708 query_stats: query_stats::QueryStats::new(),
709 activity_provider: None,
710 audit_chain_provider: None,
711 audit_verifier: None,
712 slow_query_threshold_us: None,
713 slow_query_logger: None,
714 }
715 }
716
717 pub fn restore(catalog: Catalog) -> Self {
720 Self {
721 catalog,
722 tx_catalogs: BTreeMap::new(),
723 current_tx: None,
724 next_tx_id: 1,
725 clock: None,
726 salt_fn: None,
727 max_query_rows: None,
728 users: UserStore::new(),
729 publications: publications::Publications::new(),
730 subscriptions: subscriptions::Subscriptions::new(),
731 statistics: statistics::Statistics::new(),
732 plan_cache: plan_cache::PlanCache::new(),
733 query_stats: query_stats::QueryStats::new(),
734 activity_provider: None,
735 audit_chain_provider: None,
736 audit_verifier: None,
737 slow_query_threshold_us: None,
738 slow_query_logger: None,
739 }
740 }
741
742 pub fn restore_envelope(buf: &[u8]) -> Result<Self, EngineError> {
749 match split_envelope(buf) {
750 EnvelopeParse::Pair {
751 catalog: catalog_bytes,
752 users: user_bytes,
753 publications: pub_bytes,
754 subscriptions: sub_bytes,
755 statistics: stats_bytes,
756 } => {
757 let catalog = Catalog::deserialize(catalog_bytes).map_err(EngineError::Storage)?;
758 let users = users::deserialize_users(user_bytes)
759 .map_err(|e| EngineError::Unsupported(alloc::format!("users restore: {e}")))?;
760 let publications = match pub_bytes {
761 Some(b) => publications::Publications::deserialize(b).map_err(|e| {
762 EngineError::Unsupported(alloc::format!("publications restore: {e:?}"))
763 })?,
764 None => publications::Publications::new(),
765 };
766 let subscriptions = match sub_bytes {
767 Some(b) => subscriptions::Subscriptions::deserialize(b).map_err(|e| {
768 EngineError::Unsupported(alloc::format!("subscriptions restore: {e:?}"))
769 })?,
770 None => subscriptions::Subscriptions::new(),
771 };
772 let statistics = match stats_bytes {
773 Some(b) => statistics::Statistics::deserialize(b).map_err(|e| {
774 EngineError::Unsupported(alloc::format!("statistics restore: {e:?}"))
775 })?,
776 None => statistics::Statistics::new(),
777 };
778 Ok(Self {
779 catalog,
780 tx_catalogs: BTreeMap::new(),
781 current_tx: None,
782 next_tx_id: 1,
783 clock: None,
784 salt_fn: None,
785 max_query_rows: None,
786 users,
787 publications,
788 subscriptions,
789 statistics,
790 plan_cache: plan_cache::PlanCache::new(),
791 query_stats: query_stats::QueryStats::new(),
792 activity_provider: None,
793 audit_chain_provider: None,
794 audit_verifier: None,
795 slow_query_threshold_us: None,
796 slow_query_logger: None,
797 })
798 }
799 EnvelopeParse::CrcMismatch { expected, computed } => {
800 Err(EngineError::Storage(StorageError::Corrupt(alloc::format!(
801 "snapshot envelope CRC32 mismatch (expected={expected:#010x}, computed={computed:#010x})"
802 ))))
803 }
804 EnvelopeParse::Bare => {
805 let catalog = Catalog::deserialize(buf).map_err(EngineError::Storage)?;
806 Ok(Self::restore(catalog))
807 }
808 }
809 }
810
811 pub const fn users(&self) -> &UserStore {
812 &self.users
813 }
814
815 pub fn create_user(
819 &mut self,
820 name: &str,
821 password: &str,
822 role: Role,
823 salt: [u8; 16],
824 ) -> Result<(), UserError> {
825 self.users.create(name, password, role, salt)?;
826 let scram_salt = self.salt_fn.map_or_else(
832 || {
833 let mut s = [0u8; users::SCRAM_SALT_LEN];
834 let digest = spg_crypto::hash(name.as_bytes());
835 s.copy_from_slice(&digest[16..32]);
838 s
839 },
840 |f| f(),
841 );
842 self.users
843 .enable_scram(name, password, scram_salt, users::SCRAM_DEFAULT_ITERS)?;
844 Ok(())
845 }
846
847 pub fn drop_user(&mut self, name: &str) -> Result<(), UserError> {
848 self.users.drop(name)
849 }
850
851 pub fn verify_user(&self, name: &str, password: &str) -> Option<Role> {
852 self.users.verify(name, password)
853 }
854
855 #[must_use]
858 pub const fn with_clock(mut self, clock: ClockFn) -> Self {
859 self.clock = Some(clock);
860 self
861 }
862
863 #[must_use]
866 pub const fn with_salt_fn(mut self, f: SaltFn) -> Self {
867 self.salt_fn = Some(f);
868 self
869 }
870
871 #[must_use]
877 pub const fn with_max_query_rows(mut self, n: usize) -> Self {
878 self.max_query_rows = Some(n);
879 self
880 }
881
882 pub const fn catalog(&self) -> &Catalog {
886 &self.catalog
887 }
888
889 pub fn snapshot(&self) -> Vec<u8> {
897 if self.users.is_empty()
898 && self.publications.is_empty()
899 && self.subscriptions.is_empty()
900 && self.statistics.is_empty()
901 {
902 self.catalog.serialize()
903 } else {
904 build_envelope(
905 &self.catalog.serialize(),
906 &users::serialize_users(&self.users),
907 &self.publications.serialize(),
908 &self.subscriptions.serialize(),
909 &self.statistics.serialize(),
910 )
911 }
912 }
913
914 pub fn in_transaction(&self) -> bool {
919 !self.tx_catalogs.is_empty()
920 }
921
922 pub fn alloc_tx_id(&mut self) -> TxId {
931 let id = TxId(self.next_tx_id);
932 self.next_tx_id = self.next_tx_id.saturating_add(1);
933 id
934 }
935
936 pub fn replace_catalog(&mut self, catalog: Catalog) {
956 self.catalog = catalog;
957 }
958
959 pub fn freeze_oldest_to_cold(
967 &mut self,
968 table_name: &str,
969 index_name: &str,
970 max_rows: usize,
971 ) -> Result<spg_storage::FreezeReport, EngineError> {
972 let report = self
973 .active_catalog_mut()
974 .freeze_oldest_to_cold(table_name, index_name, max_rows)
975 .map_err(EngineError::Storage)?;
976 if let Some(t) = self.active_catalog_mut().get_mut(table_name) {
977 t.mark_cold_row_count_stale();
978 }
979 Ok(report)
980 }
981
982 pub fn receive_cold_segment(
996 &mut self,
997 segment_id: u32,
998 bytes: Vec<u8>,
999 ) -> Result<(), EngineError> {
1000 let mut new_cat = self.catalog.clone();
1001 match new_cat.load_segment_bytes_at(segment_id, bytes) {
1002 Ok(()) => {
1003 self.replace_catalog(new_cat);
1004 Ok(())
1005 }
1006 Err(StorageError::Corrupt(msg)) if msg.contains("already occupied") => Ok(()),
1007 Err(e) => Err(EngineError::Storage(e)),
1008 }
1009 }
1010
1011 pub fn compact_cold_segments_with_target(
1025 &mut self,
1026 target_segment_bytes: u64,
1027 ) -> Result<Vec<(String, String, CompactReport)>, EngineError> {
1028 let table_names = self.active_catalog().table_names();
1029 let mut reports: Vec<(String, String, CompactReport)> = Vec::new();
1030 for tname in table_names {
1031 if is_internal_table_name(&tname) {
1032 continue;
1033 }
1034 let idx_names: Vec<String> = {
1035 let Some(t) = self.active_catalog().get(&tname) else {
1036 continue;
1037 };
1038 t.indices()
1039 .iter()
1040 .filter(|i| matches!(i.kind, IndexKind::BTree(_)))
1041 .map(|i| i.name.clone())
1042 .collect()
1043 };
1044 for iname in idx_names {
1045 let report = self
1046 .active_catalog_mut()
1047 .compact_cold_segments(&tname, &iname, target_segment_bytes)
1048 .map_err(EngineError::Storage)?;
1049 if report.merged_segment_id.is_some() {
1050 if let Some(t) = self.active_catalog_mut().get_mut(&tname) {
1051 t.mark_cold_row_count_stale();
1052 }
1053 reports.push((tname.clone(), iname, report));
1054 }
1055 }
1056 }
1057 Ok(reports)
1058 }
1059
1060 fn active_catalog(&self) -> &Catalog {
1061 match self.current_tx {
1062 Some(t) => self
1063 .tx_catalogs
1064 .get(&t)
1065 .map_or(&self.catalog, |s| &s.catalog),
1066 None => &self.catalog,
1067 }
1068 }
1069
1070 fn active_catalog_mut(&mut self) -> &mut Catalog {
1071 let tx = self.current_tx;
1072 match tx {
1073 Some(t) => match self.tx_catalogs.get_mut(&t) {
1074 Some(s) => &mut s.catalog,
1075 None => &mut self.catalog,
1076 },
1077 None => &mut self.catalog,
1078 }
1079 }
1080
1081 pub fn execute_readonly(&self, sql: &str) -> Result<QueryResult, EngineError> {
1093 self.execute_readonly_with_cancel(sql, CancelToken::none())
1094 }
1095
1096 pub fn execute_readonly_with_cancel(
1102 &self,
1103 sql: &str,
1104 cancel: CancelToken<'_>,
1105 ) -> Result<QueryResult, EngineError> {
1106 cancel.check()?;
1107 let mut stmt = parser::parse_statement(sql)?;
1108 let now_micros = self.clock.map(|f| f());
1109 rewrite_clock_calls(&mut stmt, now_micros);
1110 if let Statement::Select(s) = &mut stmt {
1111 resolve_order_by_position(s);
1112 reorder::reorder_joins(s, &self.catalog, &self.statistics);
1114 }
1115 let result = match stmt {
1116 Statement::Select(s) => self.exec_select_cancel(&s, cancel),
1117 Statement::ShowTables => Ok(self.exec_show_tables()),
1118 Statement::ShowColumns(table) => self.exec_show_columns(&table),
1119 Statement::ShowUsers => Ok(self.exec_show_users()),
1120 Statement::ShowPublications => Ok(self.exec_show_publications()),
1121 Statement::ShowSubscriptions => Ok(self.exec_show_subscriptions()),
1122 Statement::WaitForWalPosition { .. } => Err(EngineError::Unsupported(
1123 "WAIT FOR WAL POSITION must be handled by the server layer".into(),
1124 )),
1125 Statement::Explain(e) => self.exec_explain(&e, cancel),
1126 _ => Err(EngineError::WriteRequired),
1127 };
1128 self.enforce_row_limit(result)
1129 }
1130
1131 fn enforce_row_limit(
1135 &self,
1136 result: Result<QueryResult, EngineError>,
1137 ) -> Result<QueryResult, EngineError> {
1138 if let (Ok(QueryResult::Rows { rows, .. }), Some(cap)) = (&result, self.max_query_rows)
1139 && rows.len() > cap
1140 {
1141 return Err(EngineError::RowLimitExceeded(cap));
1142 }
1143 result
1144 }
1145
1146 pub fn execute(&mut self, sql: &str) -> Result<QueryResult, EngineError> {
1147 self.execute_in_with_cancel(sql, IMPLICIT_TX, CancelToken::none())
1148 }
1149
1150 pub fn execute_with_cancel(
1155 &mut self,
1156 sql: &str,
1157 cancel: CancelToken<'_>,
1158 ) -> Result<QueryResult, EngineError> {
1159 self.execute_in_with_cancel(sql, IMPLICIT_TX, cancel)
1160 }
1161
1162 pub fn execute_in(&mut self, sql: &str, tx_id: TxId) -> Result<QueryResult, EngineError> {
1169 self.execute_in_with_cancel(sql, tx_id, CancelToken::none())
1170 }
1171
1172 pub fn execute_in_with_cancel(
1178 &mut self,
1179 sql: &str,
1180 tx_id: TxId,
1181 cancel: CancelToken<'_>,
1182 ) -> Result<QueryResult, EngineError> {
1183 let saved = self.current_tx;
1184 self.current_tx = Some(tx_id);
1185 let result = self.execute_inner_with_cancel(sql, cancel);
1186 self.current_tx = saved;
1187 result
1188 }
1189
1190 pub fn prepare(&self, sql: &str) -> Result<Statement, ParseError> {
1202 let mut stmt = parser::parse_statement(sql)?;
1203 let now_micros = self.clock.map(|f| f());
1204 rewrite_clock_calls(&mut stmt, now_micros);
1205 if let Statement::Select(s) = &mut stmt {
1206 expand_group_by_all(s);
1210 resolve_order_by_position(s);
1211 reorder::reorder_joins(s, &self.catalog, &self.statistics);
1214 }
1215 Ok(stmt)
1216 }
1217
1218 pub fn prepare_cached(&mut self, sql: &str) -> Result<Statement, ParseError> {
1230 let current_version = self.statistics.version();
1233 if let Some(plan) = self.plan_cache.get(sql) {
1234 if plan.statistics_version == current_version {
1235 return Ok(plan.stmt.clone());
1236 }
1237 }
1239 self.plan_cache.evict(sql);
1240 let stmt = self.prepare(sql)?;
1241 let source_tables = plan_cache::collect_source_tables(&stmt);
1242 let plan = plan_cache::PreparedPlan {
1243 stmt: stmt.clone(),
1244 statistics_version: current_version,
1245 source_tables,
1246 describe_columns: alloc::vec::Vec::new(),
1247 };
1248 self.plan_cache.insert(String::from(sql), plan);
1249 Ok(stmt)
1250 }
1251
1252 pub fn plan_cache(&self) -> &plan_cache::PlanCache {
1254 &self.plan_cache
1255 }
1256
1257 pub fn plan_cache_mut(&mut self) -> &mut plan_cache::PlanCache {
1259 &mut self.plan_cache
1260 }
1261
1262 pub fn describe_prepared(
1268 &self,
1269 stmt: &Statement,
1270 ) -> (Vec<u32>, Vec<ColumnSchema>) {
1271 describe::describe_prepared(stmt, self.active_catalog())
1272 }
1273
1274 pub fn execute_prepared(
1284 &mut self,
1285 mut stmt: Statement,
1286 params: &[Value],
1287 ) -> Result<QueryResult, EngineError> {
1288 substitute_placeholders(&mut stmt, params)?;
1289 self.execute_stmt_with_cancel(stmt, CancelToken::none())
1290 }
1291
1292 fn execute_inner_with_cancel(
1293 &mut self,
1294 sql: &str,
1295 cancel: CancelToken<'_>,
1296 ) -> Result<QueryResult, EngineError> {
1297 cancel.check()?;
1298 let stmt = self.prepare(sql)?;
1299 let start_us = self.clock.map(|f| f());
1303 let result = self.execute_stmt_with_cancel(stmt, cancel);
1304 if let (Some(t0), Ok(_)) = (start_us, &result) {
1305 let now = self.clock.map_or(t0, |f| f());
1306 let elapsed = now.saturating_sub(t0).max(0) as u64;
1307 self.query_stats.record(sql, elapsed, now as u64);
1308 if let (Some(threshold), Some(logger)) =
1311 (self.slow_query_threshold_us, self.slow_query_logger)
1312 && elapsed >= threshold
1313 {
1314 logger(sql, elapsed);
1315 }
1316 }
1317 result
1318 }
1319
1320 fn execute_stmt_with_cancel(
1321 &mut self,
1322 stmt: Statement,
1323 cancel: CancelToken<'_>,
1324 ) -> Result<QueryResult, EngineError> {
1325 cancel.check()?;
1326 let result = match stmt {
1327 Statement::CreateTable(s) => self.exec_create_table(s),
1328 Statement::CreateExtension(_) => Ok(QueryResult::CommandOk {
1332 affected: 0,
1333 modified_catalog: false,
1334 }),
1335 Statement::DoBlock => Ok(QueryResult::CommandOk {
1338 affected: 0,
1339 modified_catalog: false,
1340 }),
1341 Statement::CreateIndex(s) => self.exec_create_index(s),
1342 Statement::Insert(s) => self.exec_insert(s),
1343 Statement::Update(s) => self.exec_update_cancel(&s, cancel),
1344 Statement::Delete(s) => self.exec_delete_cancel(&s, cancel),
1345 Statement::Select(s) => self.exec_select_cancel(&s, cancel),
1346 Statement::Begin => self.exec_begin(),
1347 Statement::Commit => self.exec_commit(),
1348 Statement::Rollback => self.exec_rollback(),
1349 Statement::Savepoint(name) => self.exec_savepoint(name),
1350 Statement::RollbackToSavepoint(name) => self.exec_rollback_to_savepoint(&name),
1351 Statement::ReleaseSavepoint(name) => self.exec_release_savepoint(&name),
1352 Statement::ShowTables => Ok(self.exec_show_tables()),
1353 Statement::ShowColumns(table) => self.exec_show_columns(&table),
1354 Statement::ShowUsers => Ok(self.exec_show_users()),
1355 Statement::ShowPublications => Ok(self.exec_show_publications()),
1356 Statement::ShowSubscriptions => Ok(self.exec_show_subscriptions()),
1357 Statement::CreateUser(s) => self.exec_create_user(&s),
1358 Statement::DropUser(name) => self.exec_drop_user(&name),
1359 Statement::Explain(e) => self.exec_explain(&e, cancel),
1360 Statement::AlterIndex(s) => self.exec_alter_index(s),
1361 Statement::AlterTable(s) => self.exec_alter_table(s),
1362 Statement::CreatePublication(s) => self.exec_create_publication(s),
1363 Statement::DropPublication(name) => self.exec_drop_publication(&name),
1364 Statement::CreateSubscription(s) => self.exec_create_subscription(s),
1365 Statement::DropSubscription(name) => self.exec_drop_subscription(&name),
1366 Statement::WaitForWalPosition { .. } => Err(EngineError::Unsupported(
1373 "WAIT FOR WAL POSITION must be handled by the server layer".into(),
1374 )),
1375 Statement::Analyze(target) => self.exec_analyze(target.as_deref()),
1377 Statement::CompactColdSegments => self.exec_compact_cold_segments(),
1379 };
1380 self.enforce_row_limit(result)
1381 }
1382
1383 fn exec_create_publication(
1391 &mut self,
1392 s: CreatePublicationStatement,
1393 ) -> Result<QueryResult, EngineError> {
1394 self.publications
1400 .create(s.name, s.scope)
1401 .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE PUBLICATION: {e:?}")))?;
1402 Ok(QueryResult::CommandOk {
1403 affected: 1,
1404 modified_catalog: true,
1405 })
1406 }
1407
1408 fn exec_drop_publication(&mut self, name: &str) -> Result<QueryResult, EngineError> {
1413 let removed = self.publications.drop(name);
1414 Ok(QueryResult::CommandOk {
1415 affected: usize::from(removed),
1416 modified_catalog: removed,
1417 })
1418 }
1419
1420 pub const fn publications(&self) -> &publications::Publications {
1425 &self.publications
1426 }
1427
1428 fn exec_create_subscription(
1433 &mut self,
1434 s: CreateSubscriptionStatement,
1435 ) -> Result<QueryResult, EngineError> {
1436 let sub = subscriptions::Subscription {
1440 conn_str: s.conn_str,
1441 publications: s.publications,
1442 enabled: true,
1443 last_received_pos: 0,
1444 };
1445 self.subscriptions
1446 .create(s.name, sub)
1447 .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE SUBSCRIPTION: {e:?}")))?;
1448 Ok(QueryResult::CommandOk {
1449 affected: 1,
1450 modified_catalog: true,
1451 })
1452 }
1453
1454 fn exec_drop_subscription(&mut self, name: &str) -> Result<QueryResult, EngineError> {
1462 let removed = self.subscriptions.drop(name);
1463 Ok(QueryResult::CommandOk {
1464 affected: usize::from(removed),
1465 modified_catalog: removed,
1466 })
1467 }
1468
1469 pub const fn subscriptions(&self) -> &subscriptions::Subscriptions {
1474 &self.subscriptions
1475 }
1476
1477 pub fn subscription_advance(&mut self, name: &str, pos: u64) -> bool {
1483 self.subscriptions.update_last_received_pos(name, pos)
1484 }
1485
1486 fn exec_show_subscriptions(&self) -> QueryResult {
1492 let columns = alloc::vec![
1493 ColumnSchema::new("name", DataType::Text, false),
1494 ColumnSchema::new("conn_str", DataType::Text, false),
1495 ColumnSchema::new("publications", DataType::Text, false),
1496 ColumnSchema::new("enabled", DataType::Bool, false),
1497 ColumnSchema::new("last_received_pos", DataType::BigInt, false),
1498 ];
1499 let rows: Vec<Row> = self
1500 .subscriptions
1501 .iter()
1502 .map(|(name, sub)| {
1503 Row::new(alloc::vec![
1504 Value::Text(name.clone()),
1505 Value::Text(sub.conn_str.clone()),
1506 Value::Text(sub.publications.join(", ")),
1507 Value::Bool(sub.enabled),
1508 Value::BigInt(i64::try_from(sub.last_received_pos).unwrap_or(i64::MAX)),
1509 ])
1510 })
1511 .collect();
1512 QueryResult::Rows { columns, rows }
1513 }
1514
1515 fn exec_spg_statistic(&self) -> QueryResult {
1520 let columns = alloc::vec![
1521 ColumnSchema::new("table_name", DataType::Text, false),
1522 ColumnSchema::new("column_name", DataType::Text, false),
1523 ColumnSchema::new("null_frac", DataType::Float, false),
1524 ColumnSchema::new("n_distinct", DataType::BigInt, false),
1525 ColumnSchema::new("histogram_bounds", DataType::Text, false),
1526 ColumnSchema::new("cold_row_count", DataType::BigInt, false),
1531 ];
1532 let rows: Vec<Row> = self
1533 .statistics
1534 .iter()
1535 .map(|((t, c), s)| {
1536 let cold = self
1537 .catalog
1538 .get(t)
1539 .map_or(0, |table| table.cold_row_count());
1540 Row::new(alloc::vec![
1541 Value::Text(t.clone()),
1542 Value::Text(c.clone()),
1543 Value::Float(f64::from(s.null_frac)),
1544 Value::BigInt(i64::try_from(s.n_distinct).unwrap_or(i64::MAX)),
1545 Value::Text(render_histogram_bounds(&s.histogram_bounds)),
1546 Value::BigInt(i64::try_from(cold).unwrap_or(i64::MAX)),
1547 ])
1548 })
1549 .collect();
1550 QueryResult::Rows { columns, rows }
1551 }
1552
1553 fn exec_spg_stat_replication(&self) -> QueryResult {
1560 let columns = alloc::vec![
1561 ColumnSchema::new("name", DataType::Text, false),
1562 ColumnSchema::new("conn_str", DataType::Text, false),
1563 ColumnSchema::new("publications", DataType::Text, false),
1564 ColumnSchema::new("last_received_pos", DataType::BigInt, false),
1565 ColumnSchema::new("enabled", DataType::Bool, false),
1566 ];
1567 let rows: Vec<Row> = self
1568 .subscriptions
1569 .iter()
1570 .map(|(name, sub)| {
1571 Row::new(alloc::vec![
1572 Value::Text(name.clone()),
1573 Value::Text(sub.conn_str.clone()),
1574 Value::Text(sub.publications.join(",")),
1575 Value::BigInt(i64::try_from(sub.last_received_pos).unwrap_or(i64::MAX)),
1576 Value::Bool(sub.enabled),
1577 ])
1578 })
1579 .collect();
1580 QueryResult::Rows { columns, rows }
1581 }
1582
1583 fn exec_spg_stat_segment(&self) -> QueryResult {
1595 let columns = alloc::vec![
1596 ColumnSchema::new("segment_id", DataType::BigInt, false),
1597 ColumnSchema::new("table_name", DataType::Text, false),
1598 ColumnSchema::new("num_rows", DataType::BigInt, false),
1599 ColumnSchema::new("num_pages", DataType::BigInt, false),
1600 ColumnSchema::new("total_bytes", DataType::BigInt, false),
1601 ];
1602 let mut segment_owners: alloc::collections::BTreeMap<u32, String> = BTreeMap::new();
1608 for tname in self.catalog.table_names() {
1609 if is_internal_table_name(&tname) {
1610 continue;
1611 }
1612 let Some(t) = self.catalog.get(&tname) else {
1613 continue;
1614 };
1615 for idx in t.indices() {
1616 if let spg_storage::IndexKind::BTree(map) = &idx.kind {
1617 for (_, locs) in map.iter() {
1618 for loc in locs {
1619 if let spg_storage::RowLocator::Cold { segment_id, .. } = loc {
1620 segment_owners.entry(*segment_id).or_insert_with(|| tname.clone());
1621 }
1622 }
1623 }
1624 }
1625 }
1626 }
1627 let rows: Vec<Row> = self
1628 .catalog
1629 .cold_segment_ids_global()
1630 .iter()
1631 .filter_map(|&id| {
1632 let seg = self.catalog.cold_segment(id)?;
1633 let meta = seg.meta();
1634 let owner = segment_owners
1635 .get(&id)
1636 .cloned()
1637 .unwrap_or_default();
1638 Some(Row::new(alloc::vec![
1639 Value::BigInt(i64::from(id)),
1640 Value::Text(owner),
1641 Value::BigInt(i64::try_from(meta.num_rows).unwrap_or(i64::MAX)),
1642 Value::BigInt(i64::from(meta.num_pages)),
1643 Value::BigInt(i64::try_from(meta.total_bytes).unwrap_or(i64::MAX)),
1644 ]))
1645 })
1646 .collect();
1647 QueryResult::Rows { columns, rows }
1648 }
1649
1650 fn exec_spg_stat_query(&self) -> QueryResult {
1656 let columns = alloc::vec![
1657 ColumnSchema::new("sql", DataType::Text, false),
1658 ColumnSchema::new("exec_count", DataType::BigInt, false),
1659 ColumnSchema::new("total_us", DataType::BigInt, false),
1660 ColumnSchema::new("mean_us", DataType::BigInt, false),
1661 ColumnSchema::new("max_us", DataType::BigInt, false),
1662 ColumnSchema::new("last_seen_us", DataType::BigInt, false),
1663 ];
1664 let rows: Vec<Row> = self
1665 .query_stats
1666 .snapshot()
1667 .into_iter()
1668 .map(|(sql, s)| {
1669 let mean = if s.exec_count == 0 {
1670 0
1671 } else {
1672 s.total_us / s.exec_count
1673 };
1674 Row::new(alloc::vec![
1675 Value::Text(sql),
1676 Value::BigInt(i64::try_from(s.exec_count).unwrap_or(i64::MAX)),
1677 Value::BigInt(i64::try_from(s.total_us).unwrap_or(i64::MAX)),
1678 Value::BigInt(i64::try_from(mean).unwrap_or(i64::MAX)),
1679 Value::BigInt(i64::try_from(s.max_us).unwrap_or(i64::MAX)),
1680 Value::BigInt(i64::try_from(s.last_seen_us).unwrap_or(i64::MAX)),
1681 ])
1682 })
1683 .collect();
1684 QueryResult::Rows { columns, rows }
1685 }
1686
1687 #[must_use]
1692 pub const fn with_activity_provider(mut self, f: ActivityProvider) -> Self {
1693 self.activity_provider = Some(f);
1694 self
1695 }
1696
1697 #[must_use]
1699 pub const fn with_audit_providers(
1700 mut self,
1701 chain: AuditChainProvider,
1702 verify: AuditVerifier,
1703 ) -> Self {
1704 self.audit_chain_provider = Some(chain);
1705 self.audit_verifier = Some(verify);
1706 self
1707 }
1708
1709 #[must_use]
1714 pub const fn with_slow_query_log(
1715 mut self,
1716 threshold_us: u64,
1717 logger: SlowQueryLogger,
1718 ) -> Self {
1719 self.slow_query_threshold_us = Some(threshold_us);
1720 self.slow_query_logger = Some(logger);
1721 self
1722 }
1723
1724 pub fn set_plan_cache_max(&mut self, n: usize) {
1728 self.plan_cache.set_max_entries(n);
1729 }
1730
1731 fn exec_spg_stat_activity(&self) -> QueryResult {
1736 let columns = alloc::vec![
1737 ColumnSchema::new("pid", DataType::Int, false),
1738 ColumnSchema::new("user", DataType::Text, false),
1739 ColumnSchema::new("started_at_us", DataType::BigInt, false),
1740 ColumnSchema::new("current_sql", DataType::Text, false),
1741 ColumnSchema::new("wait_event", DataType::Text, false),
1742 ColumnSchema::new("elapsed_us", DataType::BigInt, false),
1743 ColumnSchema::new("in_transaction", DataType::Bool, false),
1744 ];
1745 let rows: Vec<Row> = self
1746 .activity_provider
1747 .map(|f| f())
1748 .unwrap_or_default()
1749 .into_iter()
1750 .map(|r| {
1751 Row::new(alloc::vec![
1752 Value::Int(i32::try_from(r.pid).unwrap_or(i32::MAX)),
1753 Value::Text(r.user),
1754 Value::BigInt(r.started_at_us),
1755 Value::Text(r.current_sql),
1756 Value::Text(r.wait_event),
1757 Value::BigInt(r.elapsed_us),
1758 Value::Bool(r.in_transaction),
1759 ])
1760 })
1761 .collect();
1762 QueryResult::Rows { columns, rows }
1763 }
1764
1765 fn exec_spg_table_ddl(&self) -> QueryResult {
1769 let columns = alloc::vec![
1770 ColumnSchema::new("table_name", DataType::Text, false),
1771 ColumnSchema::new("ddl", DataType::Text, false),
1772 ];
1773 let rows: Vec<Row> = self
1774 .catalog
1775 .table_names()
1776 .into_iter()
1777 .filter(|n| !is_internal_table_name(n))
1778 .filter_map(|name| {
1779 let table = self.catalog.get(&name)?;
1780 let ddl = render_create_table(&name, &table.schema().columns);
1781 Some(Row::new(alloc::vec![
1782 Value::Text(name),
1783 Value::Text(ddl),
1784 ]))
1785 })
1786 .collect();
1787 QueryResult::Rows { columns, rows }
1788 }
1789
1790 fn exec_spg_role_ddl(&self) -> QueryResult {
1794 let columns = alloc::vec![
1795 ColumnSchema::new("role_name", DataType::Text, false),
1796 ColumnSchema::new("ddl", DataType::Text, false),
1797 ];
1798 let rows: Vec<Row> = self
1799 .users
1800 .iter()
1801 .map(|(name, rec)| {
1802 let ddl = alloc::format!(
1803 "CREATE USER {name} WITH PASSWORD '<redacted>' ROLE '{}'",
1804 rec.role.as_str(),
1805 );
1806 Row::new(alloc::vec![Value::Text(String::from(name)), Value::Text(ddl)])
1807 })
1808 .collect();
1809 QueryResult::Rows { columns, rows }
1810 }
1811
1812 fn exec_spg_database_ddl(&self) -> QueryResult {
1818 let columns = alloc::vec![ColumnSchema::new("ddl", DataType::Text, false)];
1819 let mut out = String::new();
1820 for (name, rec) in self.users.iter() {
1821 out.push_str(&alloc::format!(
1822 "CREATE USER {name} WITH PASSWORD '<redacted>' ROLE '{}';\n",
1823 rec.role.as_str(),
1824 ));
1825 }
1826 for name in self.catalog.table_names() {
1827 if is_internal_table_name(&name) {
1828 continue;
1829 }
1830 if let Some(table) = self.catalog.get(&name) {
1831 out.push_str(&render_create_table(&name, &table.schema().columns));
1832 out.push_str(";\n");
1833 }
1834 }
1835 QueryResult::Rows {
1836 columns,
1837 rows: alloc::vec![Row::new(alloc::vec![Value::Text(out)])],
1838 }
1839 }
1840
1841 fn exec_spg_audit_chain(&self) -> QueryResult {
1845 let columns = alloc::vec![
1846 ColumnSchema::new("seq", DataType::BigInt, false),
1847 ColumnSchema::new("ts_ms", DataType::BigInt, false),
1848 ColumnSchema::new("prev_hash", DataType::Text, false),
1849 ColumnSchema::new("entry_hash", DataType::Text, false),
1850 ColumnSchema::new("sql", DataType::Text, false),
1851 ];
1852 let rows: Vec<Row> = self
1853 .audit_chain_provider
1854 .map(|f| f())
1855 .unwrap_or_default()
1856 .into_iter()
1857 .map(|r| {
1858 Row::new(alloc::vec![
1859 Value::BigInt(r.seq),
1860 Value::BigInt(r.ts_ms),
1861 Value::Text(r.prev_hash_hex),
1862 Value::Text(r.entry_hash_hex),
1863 Value::Text(r.sql),
1864 ])
1865 })
1866 .collect();
1867 QueryResult::Rows { columns, rows }
1868 }
1869
1870 fn exec_spg_audit_verify(&self) -> QueryResult {
1876 let columns = alloc::vec![
1877 ColumnSchema::new("verified_count", DataType::BigInt, false),
1878 ColumnSchema::new("broken_at_seq", DataType::BigInt, false),
1879 ];
1880 let (verified, broken) = self.audit_verifier.map(|f| f()).unwrap_or((0, -1));
1881 let row = Row::new(alloc::vec![
1882 Value::BigInt(verified),
1883 Value::BigInt(broken),
1884 ]);
1885 QueryResult::Rows {
1886 columns,
1887 rows: alloc::vec![row],
1888 }
1889 }
1890
1891 pub fn query_stats(&self) -> &query_stats::QueryStats {
1893 &self.query_stats
1894 }
1895
1896 pub fn query_stats_mut(&mut self) -> &mut query_stats::QueryStats {
1898 &mut self.query_stats
1899 }
1900
1901 pub const fn statistics(&self) -> &statistics::Statistics {
1905 &self.statistics
1906 }
1907
1908 pub fn tables_needing_analyze(&self) -> Vec<String> {
1921 const MIN_ROWS: u64 = 100;
1922 let mut out = Vec::new();
1923 for name in self.catalog.table_names() {
1924 if is_internal_table_name(&name) {
1925 continue;
1926 }
1927 let Some(table) = self.catalog.get(&name) else {
1928 continue;
1929 };
1930 let row_count = table.rows().len() as u64;
1931 let modified = self.statistics.modified_since_last_analyze(&name);
1932 let base = row_count.max(MIN_ROWS);
1937 let threshold = base.saturating_add(9) / 10;
1938 if modified >= threshold {
1939 out.push(name);
1940 }
1941 }
1942 out
1943 }
1944
1945 fn exec_analyze(&mut self, target: Option<&str>) -> Result<QueryResult, EngineError> {
1956 let names: Vec<String> = if let Some(name) = target {
1957 if self.catalog.get(name).is_none() {
1959 return Err(EngineError::Storage(StorageError::TableNotFound {
1960 name: name.to_string(),
1961 }));
1962 }
1963 alloc::vec![name.to_string()]
1964 } else {
1965 self.catalog
1966 .table_names()
1967 .into_iter()
1968 .filter(|n| !is_internal_table_name(n))
1969 .collect()
1970 };
1971 let mut analysed = 0usize;
1972 for table_name in &names {
1973 self.analyze_one_table(table_name)?;
1974 analysed += 1;
1975 }
1976 if analysed > 0 {
1982 self.statistics.bump_version();
1983 if target.is_some() {
1984 for t in &names {
1985 self.plan_cache.evict_referencing(t);
1986 }
1987 } else {
1988 self.plan_cache.clear();
1989 }
1990 }
1991 Ok(QueryResult::CommandOk {
1992 affected: analysed,
1993 modified_catalog: true,
1994 })
1995 }
1996
1997 fn exec_compact_cold_segments(&mut self) -> Result<QueryResult, EngineError> {
2008 let target = COMPACTION_TARGET_DEFAULT_BYTES;
2009 let reports = self.compact_cold_segments_with_target(target)?;
2010 let columns = alloc::vec![
2011 ColumnSchema::new("table_name", DataType::Text, false),
2012 ColumnSchema::new("index_name", DataType::Text, false),
2013 ColumnSchema::new("sources_merged", DataType::BigInt, false),
2014 ColumnSchema::new("merged_segment_id", DataType::BigInt, false),
2015 ColumnSchema::new("merged_rows", DataType::BigInt, false),
2016 ColumnSchema::new("deleted_rows_pruned", DataType::BigInt, false),
2017 ColumnSchema::new("bytes_reclaimed_estimate", DataType::BigInt, false),
2018 ];
2019 let rows: Vec<Row> = reports
2020 .into_iter()
2021 .map(|(tname, iname, report)| {
2022 Row::new(alloc::vec![
2023 Value::Text(tname),
2024 Value::Text(iname),
2025 Value::BigInt(i64::try_from(report.sources.len()).unwrap_or(i64::MAX)),
2026 Value::BigInt(i64::from(report.merged_segment_id.unwrap_or(0))),
2027 Value::BigInt(i64::try_from(report.merged_rows).unwrap_or(i64::MAX)),
2028 Value::BigInt(
2029 i64::try_from(report.deleted_rows_pruned).unwrap_or(i64::MAX),
2030 ),
2031 Value::BigInt(
2032 i64::try_from(report.bytes_reclaimed_estimate).unwrap_or(i64::MAX),
2033 ),
2034 ])
2035 })
2036 .collect();
2037 Ok(QueryResult::Rows { columns, rows })
2038 }
2039
2040 fn analyze_one_table(&mut self, table_name: &str) -> Result<(), EngineError> {
2045 let table = self.catalog.get(table_name).ok_or_else(|| {
2046 EngineError::Storage(StorageError::TableNotFound {
2047 name: table_name.to_string(),
2048 })
2049 })?;
2050 let schema = table.schema().clone();
2051 let row_count = table.rows().len();
2052 self.statistics.clear_table(table_name);
2057 for (col_pos, col_schema) in schema.columns.iter().enumerate() {
2058 if matches!(col_schema.ty, DataType::Vector { .. }) {
2061 continue;
2062 }
2063 let mut non_null_values: Vec<Value> = Vec::with_capacity(row_count);
2064 let mut nulls: u64 = 0;
2065 for row in table.rows() {
2066 match row.values.get(col_pos) {
2067 Some(Value::Null) | None => nulls += 1,
2068 Some(v) => non_null_values.push(v.clone()),
2069 }
2070 }
2071 non_null_values.sort_by(|a, b| sort_values_for_histogram(a, b));
2076 let non_null: Vec<String> = non_null_values
2077 .iter()
2078 .map(canonical_value_repr)
2079 .collect();
2080 let null_frac = if row_count == 0 {
2081 0.0
2082 } else {
2083 #[allow(clippy::cast_precision_loss)]
2084 let f = nulls as f32 / row_count as f32;
2085 f
2086 };
2087 let n_distinct = statistics::estimate_n_distinct(&non_null);
2088 let histogram_bounds = statistics::build_histogram(&non_null);
2089 self.statistics.set(
2090 table_name.to_string(),
2091 col_schema.name.clone(),
2092 statistics::ColumnStats {
2093 null_frac,
2094 n_distinct,
2095 histogram_bounds,
2096 },
2097 );
2098 }
2099 self.statistics.reset_modified(table_name);
2100 let cold_count = {
2106 let table = self
2107 .active_catalog()
2108 .get(table_name)
2109 .expect("table still present");
2110 table.count_cold_locators()
2111 };
2112 let table_mut = self
2113 .active_catalog_mut()
2114 .get_mut(table_name)
2115 .expect("table still present");
2116 table_mut.set_cold_row_count(cold_count);
2117 Ok(())
2118 }
2119
2120 fn exec_show_publications(&self) -> QueryResult {
2132 let columns = alloc::vec![
2133 ColumnSchema::new("name", DataType::Text, false),
2134 ColumnSchema::new("scope", DataType::Text, false),
2135 ColumnSchema::new("table_count", DataType::Int, true),
2136 ];
2137 let rows: Vec<Row> = self
2138 .publications
2139 .iter()
2140 .map(|(name, scope)| {
2141 let (scope_str, count_val) = match scope {
2142 spg_sql::ast::PublicationScope::AllTables => {
2143 ("FOR ALL TABLES".to_string(), Value::Null)
2144 }
2145 spg_sql::ast::PublicationScope::ForTables(ts) => (
2146 alloc::format!("FOR TABLE {}", ts.join(", ")),
2147 Value::Int(i32::try_from(ts.len()).unwrap_or(i32::MAX)),
2148 ),
2149 spg_sql::ast::PublicationScope::AllTablesExcept(ts) => (
2150 alloc::format!("FOR ALL TABLES EXCEPT {}", ts.join(", ")),
2151 Value::Int(i32::try_from(ts.len()).unwrap_or(i32::MAX)),
2152 ),
2153 };
2154 Row::new(alloc::vec![
2155 Value::Text(name.clone()),
2156 Value::Text(scope_str),
2157 count_val,
2158 ])
2159 })
2160 .collect();
2161 QueryResult::Rows { columns, rows }
2162 }
2163
2164 fn exec_show_users(&self) -> QueryResult {
2166 let columns = alloc::vec![
2167 ColumnSchema::new("name", DataType::Text, false),
2168 ColumnSchema::new("role", DataType::Text, false),
2169 ];
2170 let rows: Vec<Row> = self
2171 .users
2172 .iter()
2173 .map(|(name, rec)| {
2174 Row::new(alloc::vec![
2175 Value::Text(name.to_string()),
2176 Value::Text(rec.role.as_str().to_string()),
2177 ])
2178 })
2179 .collect();
2180 QueryResult::Rows { columns, rows }
2181 }
2182
2183 fn exec_create_user(&mut self, s: &CreateUserStatement) -> Result<QueryResult, EngineError> {
2184 if self.in_transaction() {
2185 return Err(EngineError::Unsupported(
2186 "CREATE USER is not allowed inside a transaction".into(),
2187 ));
2188 }
2189 let role = users::Role::parse(&s.role).ok_or_else(|| {
2190 EngineError::Unsupported(alloc::format!("invalid role: {:?}", s.role))
2191 })?;
2192 let salt = self.salt_fn.map_or_else(
2196 || {
2197 let mut s_bytes = [0u8; 16];
2198 let digest = spg_crypto::hash(s.name.as_bytes());
2199 s_bytes.copy_from_slice(&digest[..16]);
2200 s_bytes
2201 },
2202 |f| f(),
2203 );
2204 self.users
2205 .create(&s.name, &s.password, role, salt)
2206 .map_err(|e| EngineError::Unsupported(alloc::format!("CREATE USER: {e}")))?;
2207 Ok(QueryResult::CommandOk {
2208 affected: 1,
2209 modified_catalog: true,
2210 })
2211 }
2212
2213 fn exec_drop_user(&mut self, name: &str) -> Result<QueryResult, EngineError> {
2214 if self.in_transaction() {
2215 return Err(EngineError::Unsupported(
2216 "DROP USER is not allowed inside a transaction".into(),
2217 ));
2218 }
2219 self.users
2220 .drop(name)
2221 .map_err(|e| EngineError::Unsupported(alloc::format!("DROP USER: {e}")))?;
2222 Ok(QueryResult::CommandOk {
2223 affected: 1,
2224 modified_catalog: true,
2225 })
2226 }
2227
2228 fn exec_update_cancel(
2235 &mut self,
2236 stmt: &spg_sql::ast::UpdateStatement,
2237 cancel: CancelToken<'_>,
2238 ) -> Result<QueryResult, EngineError> {
2239 if let Some(w) = &stmt.where_ {
2247 let schema_cols = self
2248 .active_catalog()
2249 .get(&stmt.table)
2250 .ok_or_else(|| {
2251 EngineError::Storage(StorageError::TableNotFound {
2252 name: stmt.table.clone(),
2253 })
2254 })?
2255 .schema()
2256 .columns
2257 .clone();
2258 if let Some((col_pos, key)) = try_pk_predicate(w, &schema_cols, stmt.table.as_str())
2259 && let Some(idx_name) = self
2260 .active_catalog()
2261 .get(&stmt.table)
2262 .and_then(|t| t.index_on(col_pos).map(|i| i.name.clone()))
2263 {
2264 let _ = self
2268 .active_catalog_mut()
2269 .promote_cold_row(&stmt.table, &idx_name, &key);
2270 }
2271 }
2272
2273 let table = self
2274 .active_catalog_mut()
2275 .get_mut(&stmt.table)
2276 .ok_or_else(|| {
2277 EngineError::Storage(StorageError::TableNotFound {
2278 name: stmt.table.clone(),
2279 })
2280 })?;
2281 let schema_cols: Vec<ColumnSchema> = table.schema().columns.clone();
2282 let mut targets: Vec<(usize, &Expr)> = Vec::with_capacity(stmt.assignments.len());
2286 for (col, expr) in &stmt.assignments {
2287 let pos = schema_cols
2288 .iter()
2289 .position(|c| c.name == *col)
2290 .ok_or_else(|| {
2291 EngineError::Eval(EvalError::ColumnNotFound { name: col.clone() })
2292 })?;
2293 targets.push((pos, expr));
2294 }
2295 let ctx = EvalContext::new(&schema_cols, Some(stmt.table.as_str()));
2296 let mut planned: Vec<(usize, Vec<Value>)> = Vec::new();
2302 for (i, row) in table.rows().iter().enumerate() {
2303 if i.is_multiple_of(256) {
2307 cancel.check()?;
2308 }
2309 if let Some(w) = &stmt.where_ {
2310 let cond = eval::eval_expr(w, row, &ctx)?;
2311 if !matches!(cond, Value::Bool(true)) {
2312 continue;
2313 }
2314 }
2315 let mut new_vals = row.values.clone();
2316 for (pos, expr) in &targets {
2317 let v = eval::eval_expr(expr, row, &ctx)?;
2318 new_vals[*pos] =
2319 coerce_value(v, schema_cols[*pos].ty, &schema_cols[*pos].name, *pos)?;
2320 }
2321 planned.push((i, new_vals));
2322 }
2323 let plan_with_old: Vec<(usize, Vec<Value>, Vec<Value>)> = planned
2327 .iter()
2328 .map(|(pos, new_vals)| (*pos, table.rows()[*pos].values.clone(), new_vals.clone()))
2329 .collect();
2330 let self_fks = table.schema().foreign_keys.clone();
2331 let affected = planned.len();
2332 let _ = table;
2334 if !self_fks.is_empty() {
2338 let new_rows: Vec<Vec<Value>> = planned
2339 .iter()
2340 .map(|(_pos, new_vals)| new_vals.clone())
2341 .collect();
2342 enforce_fk_inserts(self.active_catalog(), &stmt.table, &self_fks, &new_rows)?;
2343 }
2344 let child_plan = plan_fk_parent_updates(self.active_catalog(), &stmt.table, &plan_with_old)?;
2348 for step in &child_plan {
2350 apply_fk_child_step(self.active_catalog_mut(), step)?;
2351 }
2352 let table = self
2354 .active_catalog_mut()
2355 .get_mut(&stmt.table)
2356 .ok_or_else(|| {
2357 EngineError::Storage(StorageError::TableNotFound {
2358 name: stmt.table.clone(),
2359 })
2360 })?;
2361 let updated_for_returning: Vec<Vec<Value>> =
2363 if stmt.returning.is_some() {
2364 planned.iter().map(|(_pos, vals)| vals.clone()).collect()
2365 } else {
2366 Vec::new()
2367 };
2368 for (pos, vals) in planned {
2369 table.update_row(pos, vals)?;
2370 }
2371 let _ = table;
2372 if !self.in_transaction() && affected > 0 {
2374 self.statistics
2375 .record_modifications(&stmt.table, affected as u64);
2376 }
2377 if let Some(items) = &stmt.returning {
2379 return self.build_returning_rows(
2380 &stmt.table,
2381 items,
2382 updated_for_returning,
2383 );
2384 }
2385 Ok(QueryResult::CommandOk {
2386 affected,
2387 modified_catalog: !self.in_transaction(),
2388 })
2389 }
2390
2391 fn exec_delete_cancel(
2395 &mut self,
2396 stmt: &spg_sql::ast::DeleteStatement,
2397 cancel: CancelToken<'_>,
2398 ) -> Result<QueryResult, EngineError> {
2399 let mut cold_shadow_count: usize = 0;
2407 if let Some(w) = &stmt.where_ {
2408 let schema_cols = self
2409 .active_catalog()
2410 .get(&stmt.table)
2411 .ok_or_else(|| {
2412 EngineError::Storage(StorageError::TableNotFound {
2413 name: stmt.table.clone(),
2414 })
2415 })?
2416 .schema()
2417 .columns
2418 .clone();
2419 if let Some((col_pos, key)) = try_pk_predicate(w, &schema_cols, stmt.table.as_str())
2420 && let Some(idx_name) = self
2421 .active_catalog()
2422 .get(&stmt.table)
2423 .and_then(|t| t.index_on(col_pos).map(|i| i.name.clone()))
2424 {
2425 cold_shadow_count = self
2426 .active_catalog_mut()
2427 .shadow_cold_row(&stmt.table, &idx_name, &key)
2428 .unwrap_or(0);
2429 }
2430 }
2431
2432 let table = self
2433 .active_catalog_mut()
2434 .get_mut(&stmt.table)
2435 .ok_or_else(|| {
2436 EngineError::Storage(StorageError::TableNotFound {
2437 name: stmt.table.clone(),
2438 })
2439 })?;
2440 let schema_cols: Vec<ColumnSchema> = table.schema().columns.clone();
2441 let ctx = EvalContext::new(&schema_cols, Some(stmt.table.as_str()));
2442 let mut positions: Vec<usize> = Vec::new();
2443 let mut to_delete_rows: Vec<Vec<Value>> = Vec::new();
2447 for (i, row) in table.rows().iter().enumerate() {
2448 if i.is_multiple_of(256) {
2449 cancel.check()?;
2450 }
2451 let keep = if let Some(w) = &stmt.where_ {
2452 let cond = eval::eval_expr(w, row, &ctx)?;
2453 !matches!(cond, Value::Bool(true))
2454 } else {
2455 false
2456 };
2457 if !keep {
2458 positions.push(i);
2459 to_delete_rows.push(row.values.clone());
2460 }
2461 }
2462 let _ = table;
2469 let cascade_plan = plan_fk_parent_deletions(
2470 self.active_catalog(),
2471 &stmt.table,
2472 &positions,
2473 &to_delete_rows,
2474 )?;
2475 for step in &cascade_plan {
2482 apply_fk_child_step(self.active_catalog_mut(), step)?;
2483 }
2484 let table = self
2486 .active_catalog_mut()
2487 .get_mut(&stmt.table)
2488 .ok_or_else(|| {
2489 EngineError::Storage(StorageError::TableNotFound {
2490 name: stmt.table.clone(),
2491 })
2492 })?;
2493 let affected = table.delete_rows(&positions) + cold_shadow_count;
2494 let _ = table;
2495 if !self.in_transaction() && affected > 0 {
2497 self.statistics
2498 .record_modifications(&stmt.table, affected as u64);
2499 }
2500 if let Some(items) = &stmt.returning {
2506 return self.build_returning_rows(
2507 &stmt.table,
2508 items,
2509 to_delete_rows,
2510 );
2511 }
2512 Ok(QueryResult::CommandOk {
2513 affected,
2514 modified_catalog: !self.in_transaction(),
2515 })
2516 }
2517
2518 #[allow(clippy::format_push_string)]
2528 fn exec_explain(
2529 &self,
2530 e: &spg_sql::ast::ExplainStatement,
2531 cancel: CancelToken<'_>,
2532 ) -> Result<QueryResult, EngineError> {
2533 let mut lines = Vec::<String>::new();
2534 explain_select(&e.inner, self, 0, &mut lines);
2535 if e.suggest {
2536 let suggestions = build_index_suggestions(&e.inner, self);
2545 for s in suggestions {
2546 lines.push(s);
2547 }
2548 } else if e.analyze {
2549 let started = self.clock.map(|f| f());
2566 let exec = self.exec_select_cancel(&e.inner, cancel)?;
2567 let elapsed_micros = match (self.clock, started) {
2568 (Some(f), Some(s)) => Some(f().saturating_sub(s)),
2569 _ => None,
2570 };
2571 let row_count = if let QueryResult::Rows { rows, .. } = &exec {
2572 rows.len()
2573 } else {
2574 0
2575 };
2576 annotate_explain_lines(&mut lines, row_count, self);
2577 let mut total = alloc::format!("Total: rows={row_count}");
2578 if let Some(us) = elapsed_micros {
2579 total.push_str(&alloc::format!(" elapsed={us}us"));
2580 }
2581 lines.push(total);
2582 }
2583 let columns = alloc::vec![ColumnSchema::new("QUERY PLAN", DataType::Text, false)];
2584 let rows: Vec<Row> = lines
2585 .into_iter()
2586 .map(|l| Row::new(alloc::vec![Value::Text(l)]))
2587 .collect();
2588 Ok(QueryResult::Rows { columns, rows })
2589 }
2590
2591 fn exec_show_tables(&self) -> QueryResult {
2592 let columns = alloc::vec![ColumnSchema::new("name", DataType::Text, false)];
2593 let rows: Vec<Row> = self
2594 .active_catalog()
2595 .table_names()
2596 .into_iter()
2597 .map(|n| Row::new(alloc::vec![Value::Text(n)]))
2598 .collect();
2599 QueryResult::Rows { columns, rows }
2600 }
2601
2602 fn exec_show_columns(&self, table_name: &str) -> Result<QueryResult, EngineError> {
2605 let table =
2606 self.active_catalog()
2607 .get(table_name)
2608 .ok_or_else(|| StorageError::TableNotFound {
2609 name: table_name.into(),
2610 })?;
2611 let columns = alloc::vec![
2612 ColumnSchema::new("name", DataType::Text, false),
2613 ColumnSchema::new("type", DataType::Text, false),
2614 ColumnSchema::new("nullable", DataType::Bool, false),
2615 ];
2616 let rows: Vec<Row> = table
2617 .schema()
2618 .columns
2619 .iter()
2620 .map(|c| {
2621 Row::new(alloc::vec![
2622 Value::Text(c.name.clone()),
2623 Value::Text(alloc::format!("{}", c.ty)),
2624 Value::Bool(c.nullable),
2625 ])
2626 })
2627 .collect();
2628 Ok(QueryResult::Rows { columns, rows })
2629 }
2630
2631 fn exec_begin(&mut self) -> Result<QueryResult, EngineError> {
2632 let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
2633 if self.tx_catalogs.contains_key(&tx_id) {
2634 return Err(EngineError::TransactionAlreadyOpen);
2635 }
2636 self.tx_catalogs.insert(
2637 tx_id,
2638 TxState {
2639 catalog: self.catalog.clone(),
2640 savepoints: Vec::new(),
2641 },
2642 );
2643 Ok(QueryResult::CommandOk {
2644 affected: 0,
2645 modified_catalog: false,
2646 })
2647 }
2648
2649 fn exec_commit(&mut self) -> Result<QueryResult, EngineError> {
2650 let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
2651 let state = self
2652 .tx_catalogs
2653 .remove(&tx_id)
2654 .ok_or(EngineError::NoActiveTransaction)?;
2655 self.catalog = state.catalog;
2656 Ok(QueryResult::CommandOk {
2660 affected: 0,
2661 modified_catalog: true,
2662 })
2663 }
2664
2665 fn exec_rollback(&mut self) -> Result<QueryResult, EngineError> {
2666 let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
2667 if self.tx_catalogs.remove(&tx_id).is_none() {
2668 return Err(EngineError::NoActiveTransaction);
2669 }
2670 Ok(QueryResult::CommandOk {
2672 affected: 0,
2673 modified_catalog: false,
2674 })
2675 }
2676
2677 fn exec_savepoint(&mut self, name: String) -> Result<QueryResult, EngineError> {
2678 let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
2679 let state = self
2680 .tx_catalogs
2681 .get_mut(&tx_id)
2682 .ok_or(EngineError::NoActiveTransaction)?;
2683 state.savepoints.retain(|(n, _)| n != &name);
2687 let snapshot = state.catalog.clone();
2688 state.savepoints.push((name, snapshot));
2689 Ok(QueryResult::CommandOk {
2690 affected: 0,
2691 modified_catalog: false,
2692 })
2693 }
2694
2695 fn exec_rollback_to_savepoint(&mut self, name: &str) -> Result<QueryResult, EngineError> {
2696 let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
2697 let state = self
2698 .tx_catalogs
2699 .get_mut(&tx_id)
2700 .ok_or(EngineError::NoActiveTransaction)?;
2701 let pos = state
2702 .savepoints
2703 .iter()
2704 .rposition(|(n, _)| n == name)
2705 .ok_or_else(|| {
2706 EngineError::Unsupported(alloc::format!("savepoint not found: {name}"))
2707 })?;
2708 let snapshot = state.savepoints[pos].1.clone();
2712 state.savepoints.truncate(pos + 1);
2713 state.catalog = snapshot;
2714 Ok(QueryResult::CommandOk {
2715 affected: 0,
2716 modified_catalog: false,
2717 })
2718 }
2719
2720 fn exec_release_savepoint(&mut self, name: &str) -> Result<QueryResult, EngineError> {
2721 let tx_id = self.current_tx.ok_or(EngineError::NoActiveTransaction)?;
2722 let state = self
2723 .tx_catalogs
2724 .get_mut(&tx_id)
2725 .ok_or(EngineError::NoActiveTransaction)?;
2726 let pos = state
2727 .savepoints
2728 .iter()
2729 .rposition(|(n, _)| n == name)
2730 .ok_or_else(|| {
2731 EngineError::Unsupported(alloc::format!("savepoint not found: {name}"))
2732 })?;
2733 state.savepoints.truncate(pos);
2736 Ok(QueryResult::CommandOk {
2737 affected: 0,
2738 modified_catalog: false,
2739 })
2740 }
2741
2742 fn exec_alter_table(
2753 &mut self,
2754 s: spg_sql::ast::AlterTableStatement,
2755 ) -> Result<QueryResult, EngineError> {
2756 match s.target {
2757 spg_sql::ast::AlterTableTarget::SetHotTierBytes(n) => {
2758 let table = self
2759 .active_catalog_mut()
2760 .get_mut(&s.name)
2761 .ok_or_else(|| {
2762 EngineError::Storage(StorageError::TableNotFound {
2763 name: s.name.clone(),
2764 })
2765 })?;
2766 table.schema_mut().hot_tier_bytes = Some(n);
2767 }
2768 spg_sql::ast::AlterTableTarget::AddForeignKey(fk) => {
2769 let cols_snapshot = self
2774 .active_catalog()
2775 .get(&s.name)
2776 .ok_or_else(|| {
2777 EngineError::Storage(StorageError::TableNotFound {
2778 name: s.name.clone(),
2779 })
2780 })?
2781 .schema()
2782 .columns
2783 .clone();
2784 let storage_fk = resolve_foreign_key(
2785 &s.name,
2786 &cols_snapshot,
2787 fk,
2788 self.active_catalog(),
2789 )?;
2790 let existing_rows: Vec<Vec<Value>> = self
2793 .active_catalog()
2794 .get(&s.name)
2795 .expect("checked above")
2796 .rows()
2797 .iter()
2798 .map(|r| r.values.clone())
2799 .collect();
2800 enforce_fk_inserts(
2801 self.active_catalog(),
2802 &s.name,
2803 core::slice::from_ref(&storage_fk),
2804 &existing_rows,
2805 )?;
2806 let table = self
2808 .active_catalog_mut()
2809 .get_mut(&s.name)
2810 .expect("checked above");
2811 if let Some(name) = &storage_fk.name
2812 && table
2813 .schema()
2814 .foreign_keys
2815 .iter()
2816 .any(|f| f.name.as_ref() == Some(name))
2817 {
2818 return Err(EngineError::Unsupported(alloc::format!(
2819 "ALTER TABLE ADD CONSTRAINT: a constraint named {name:?} already exists"
2820 )));
2821 }
2822 table.schema_mut().foreign_keys.push(storage_fk);
2823 }
2824 spg_sql::ast::AlterTableTarget::DropForeignKey(name) => {
2825 let table = self
2826 .active_catalog_mut()
2827 .get_mut(&s.name)
2828 .ok_or_else(|| {
2829 EngineError::Storage(StorageError::TableNotFound {
2830 name: s.name.clone(),
2831 })
2832 })?;
2833 let fks = &mut table.schema_mut().foreign_keys;
2834 let before = fks.len();
2835 fks.retain(|f| f.name.as_ref() != Some(&name));
2836 if fks.len() == before {
2837 return Err(EngineError::Unsupported(alloc::format!(
2838 "ALTER TABLE DROP CONSTRAINT: no FK named {name:?} on {:?}",
2839 s.name
2840 )));
2841 }
2842 }
2843 }
2844 Ok(QueryResult::CommandOk {
2845 affected: 0,
2846 modified_catalog: !self.in_transaction(),
2847 })
2848 }
2849
2850 fn exec_alter_index(
2851 &mut self,
2852 stmt: spg_sql::ast::AlterIndexStatement,
2853 ) -> Result<QueryResult, EngineError> {
2854 let spg_sql::ast::AlterIndexStatement {
2858 name: idx_name,
2859 target,
2860 } = stmt;
2861 let spg_sql::ast::AlterIndexTarget::Rebuild { encoding } = target;
2862 let target = encoding.map(|e| match e {
2863 SqlVecEncoding::F32 => VecEncoding::F32,
2864 SqlVecEncoding::Sq8 => VecEncoding::Sq8,
2865 SqlVecEncoding::F16 => VecEncoding::F16,
2866 });
2867 let table_name = {
2872 let cat = self.active_catalog();
2873 let mut found: Option<String> = None;
2874 for tname in cat.table_names() {
2875 if let Some(t) = cat.get(&tname)
2876 && t.indices().iter().any(|i| i.name == idx_name)
2877 {
2878 found = Some(tname);
2879 break;
2880 }
2881 }
2882 found.ok_or_else(|| {
2883 EngineError::Storage(StorageError::IndexNotFound {
2884 name: idx_name.clone(),
2885 })
2886 })?
2887 };
2888 let table = self
2889 .active_catalog_mut()
2890 .get_mut(&table_name)
2891 .expect("table found above");
2892 table.rebuild_nsw_index(&idx_name, target)?;
2893 self.plan_cache.evict_referencing(&table_name);
2896 Ok(QueryResult::CommandOk {
2897 affected: 0,
2898 modified_catalog: !self.in_transaction(),
2899 })
2900 }
2901
2902 fn exec_create_index(
2903 &mut self,
2904 stmt: CreateIndexStatement,
2905 ) -> Result<QueryResult, EngineError> {
2906 let table = self
2907 .active_catalog_mut()
2908 .get_mut(&stmt.table)
2909 .ok_or_else(|| {
2910 EngineError::Storage(StorageError::TableNotFound {
2911 name: stmt.table.clone(),
2912 })
2913 })?;
2914 if stmt.if_not_exists && table.indices().iter().any(|i| i.name == stmt.name) {
2916 return Ok(QueryResult::CommandOk {
2917 affected: 0,
2918 modified_catalog: false,
2919 });
2920 }
2921 let _ = &stmt.extra_columns; let table_name = stmt.table.clone();
2928 let included_positions: Vec<usize> = if stmt.included_columns.is_empty() {
2932 Vec::new()
2933 } else {
2934 let schema = table.schema();
2935 stmt.included_columns
2936 .iter()
2937 .map(|c| {
2938 schema.column_position(c).ok_or_else(|| {
2939 EngineError::Storage(StorageError::ColumnNotFound {
2940 column: c.clone(),
2941 })
2942 })
2943 })
2944 .collect::<Result<Vec<_>, _>>()?
2945 };
2946 match stmt.method {
2947 IndexMethod::BTree => table.add_index(stmt.name.clone(), &stmt.column)?,
2948 IndexMethod::Hnsw => {
2949 if !included_positions.is_empty() {
2950 return Err(EngineError::Unsupported(
2951 "INCLUDE columns are not supported on HNSW indexes".into(),
2952 ));
2953 }
2954 table.add_nsw_index(stmt.name.clone(), &stmt.column, spg_storage::NSW_DEFAULT_M)?;
2955 }
2956 IndexMethod::Brin => {
2958 if !included_positions.is_empty() {
2959 return Err(EngineError::Unsupported(
2960 "INCLUDE columns are not supported on BRIN indexes".into(),
2961 ));
2962 }
2963 table.add_brin_index(stmt.name.clone(), &stmt.column)?;
2964 }
2965 }
2966 if !included_positions.is_empty()
2967 && let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name)
2968 {
2969 idx.included_columns = included_positions;
2970 }
2971 if let Some(pred_expr) = &stmt.partial_predicate {
2979 let canonical = pred_expr.to_string();
2980 if matches!(stmt.method, IndexMethod::Hnsw | IndexMethod::Brin) {
2981 return Err(EngineError::Unsupported(
2982 "WHERE predicates are not supported on HNSW or BRIN indexes".into(),
2983 ));
2984 }
2985 if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
2986 idx.partial_predicate = Some(canonical);
2987 }
2988 }
2989 if let Some(key_expr) = &stmt.expression {
2997 if matches!(stmt.method, IndexMethod::Hnsw | IndexMethod::Brin) {
2998 return Err(EngineError::Unsupported(
2999 "Expression keys are not supported on HNSW or BRIN indexes".into(),
3000 ));
3001 }
3002 let canonical = key_expr.to_string();
3003 if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
3004 idx.expression = Some(canonical);
3005 }
3006 }
3007 if stmt.is_unique {
3016 let mut extra_positions: alloc::vec::Vec<usize> = alloc::vec::Vec::new();
3017 for col_name in &stmt.extra_columns {
3018 let pos = table
3019 .schema()
3020 .columns
3021 .iter()
3022 .position(|c| c.name.eq_ignore_ascii_case(col_name))
3023 .ok_or_else(|| {
3024 EngineError::Unsupported(alloc::format!(
3025 "UNIQUE INDEX {:?}: extra column {col_name:?} not in table {:?}",
3026 stmt.name, stmt.table
3027 ))
3028 })?;
3029 extra_positions.push(pos);
3030 }
3031 if let Some(idx) = table.indices_mut().iter_mut().find(|i| i.name == stmt.name) {
3032 idx.is_unique = true;
3033 idx.extra_column_positions = extra_positions;
3034 }
3035 let snapshot_indices = table.indices().to_vec();
3040 let snapshot_rows: alloc::vec::Vec<spg_storage::Row> =
3041 table.rows().iter().cloned().collect();
3042 let snapshot_schema = table.schema().clone();
3043 let idx_ref = snapshot_indices
3044 .iter()
3045 .find(|i| i.name == stmt.name)
3046 .expect("just-added index");
3047 check_existing_unique_violation(idx_ref, &snapshot_schema, &snapshot_rows)?;
3048 }
3049 self.plan_cache.evict_referencing(&table_name);
3052 Ok(QueryResult::CommandOk {
3053 affected: 0,
3054 modified_catalog: !self.in_transaction(),
3055 })
3056 }
3057
3058 fn exec_create_table(
3059 &mut self,
3060 stmt: CreateTableStatement,
3061 ) -> Result<QueryResult, EngineError> {
3062 if stmt.if_not_exists && self.active_catalog().get(&stmt.name).is_some() {
3063 return Ok(QueryResult::CommandOk {
3064 affected: 0,
3065 modified_catalog: false,
3066 });
3067 }
3068 let table_name = stmt.name.clone();
3069 let inline_pk_columns: Vec<String> = stmt
3073 .columns
3074 .iter()
3075 .filter(|c| c.is_primary_key)
3076 .map(|c| c.name.clone())
3077 .collect();
3078 let cols = stmt
3084 .columns
3085 .into_iter()
3086 .map(column_def_to_schema)
3087 .collect::<Result<Vec<_>, _>>()?;
3088 let mut cols = cols;
3090 for tc in &stmt.table_constraints {
3091 if let spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } = tc {
3092 for col_name in columns {
3093 if let Some(col) = cols.iter_mut().find(|c| c.name == *col_name) {
3094 col.nullable = false;
3095 }
3096 }
3097 }
3098 }
3099 let mut fks: Vec<spg_storage::ForeignKeyConstraint> =
3106 Vec::with_capacity(stmt.foreign_keys.len());
3107 for fk in stmt.foreign_keys {
3108 fks.push(resolve_foreign_key(
3109 &table_name,
3110 &cols,
3111 fk,
3112 self.active_catalog(),
3113 )?);
3114 }
3115 let mut schema = TableSchema::new(table_name.clone(), cols);
3116 schema.foreign_keys = fks;
3117 let mut uc_storage: Vec<spg_storage::UniquenessConstraint> = Vec::new();
3121 for tc in &stmt.table_constraints {
3122 let (is_pk, names) = match tc {
3123 spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } => {
3124 (true, columns.clone())
3125 }
3126 spg_sql::ast::TableConstraint::Unique { columns, .. } => {
3127 (false, columns.clone())
3128 }
3129 };
3130 let mut positions = Vec::with_capacity(names.len());
3131 for n in &names {
3132 let pos = schema
3133 .columns
3134 .iter()
3135 .position(|c| c.name == *n)
3136 .ok_or_else(|| {
3137 EngineError::Unsupported(alloc::format!(
3138 "table constraint references unknown column {n:?}"
3139 ))
3140 })?;
3141 positions.push(pos);
3142 }
3143 uc_storage.push(spg_storage::UniquenessConstraint {
3144 is_primary_key: is_pk,
3145 columns: positions,
3146 });
3147 }
3148 schema.uniqueness_constraints = uc_storage.clone();
3149 self.active_catalog_mut().create_table(schema)?;
3150 let table = self
3154 .active_catalog_mut()
3155 .get_mut(&table_name)
3156 .expect("just created");
3157 for (i, col_name) in inline_pk_columns.iter().enumerate() {
3158 let idx_name = if inline_pk_columns.len() == 1 {
3159 alloc::format!("{table_name}_pkey")
3160 } else {
3161 alloc::format!("{table_name}_pkey_{i}")
3162 };
3163 if let Err(e) = table.add_index(idx_name, col_name) {
3164 return Err(EngineError::Storage(e));
3165 }
3166 }
3167 for (i, tc) in stmt.table_constraints.iter().enumerate() {
3168 let (is_pk, names) = match tc {
3169 spg_sql::ast::TableConstraint::PrimaryKey { columns, .. } => {
3170 (true, columns)
3171 }
3172 spg_sql::ast::TableConstraint::Unique { columns, .. } => {
3173 (false, columns)
3174 }
3175 };
3176 let leading = &names[0];
3177 let already = table
3180 .indices()
3181 .iter()
3182 .any(|idx| {
3183 matches!(idx.kind, spg_storage::IndexKind::BTree(_))
3184 && table.schema().columns[idx.column_position].name == *leading
3185 });
3186 if already {
3187 continue;
3188 }
3189 let suffix = if is_pk { "pkey" } else { "key" };
3190 let idx_name = if names.len() == 1 {
3191 alloc::format!("{table_name}_{leading}_{suffix}")
3192 } else {
3193 alloc::format!("{table_name}_{leading}_{suffix}_{i}")
3194 };
3195 if let Err(e) = table.add_index(idx_name, leading) {
3196 return Err(EngineError::Storage(e));
3197 }
3198 }
3199 Ok(QueryResult::CommandOk {
3200 affected: 0,
3201 modified_catalog: !self.in_transaction(),
3202 })
3203 }
3204
3205 fn exec_insert(&mut self, stmt: InsertStatement) -> Result<QueryResult, EngineError> {
3206 let clock = self.clock;
3210 let table = self
3211 .active_catalog_mut()
3212 .get_mut(&stmt.table)
3213 .ok_or_else(|| {
3214 EngineError::Storage(StorageError::TableNotFound {
3215 name: stmt.table.clone(),
3216 })
3217 })?;
3218 let column_meta: Vec<ColumnSchema> = table.schema().columns.clone();
3224 let schema_cols_len = column_meta.len();
3225 let tuple_pos: Option<Vec<Option<usize>>> = match &stmt.columns {
3229 None => None, Some(cols) => {
3231 let mut map = alloc::vec![None; schema_cols_len];
3232 for (j, name) in cols.iter().enumerate() {
3233 let idx = column_meta
3234 .iter()
3235 .position(|c| c.name == *name)
3236 .ok_or_else(|| {
3237 EngineError::Eval(EvalError::ColumnNotFound { name: name.clone() })
3238 })?;
3239 if map[idx].is_some() {
3240 return Err(EngineError::Storage(StorageError::ArityMismatch {
3241 expected: schema_cols_len,
3242 actual: cols.len(),
3243 }));
3244 }
3245 map[idx] = Some(j);
3246 }
3247 for (i, col) in column_meta.iter().enumerate() {
3251 if map[i].is_none()
3252 && !col.nullable
3253 && col.default.is_none()
3254 && col.runtime_default.is_none()
3255 && !col.auto_increment
3256 {
3257 return Err(EngineError::Storage(StorageError::NullInNotNull {
3258 column: col.name.clone(),
3259 }));
3260 }
3261 }
3262 Some(map)
3263 }
3264 };
3265 let expected_tuple_len = stmt.columns.as_ref().map_or(schema_cols_len, Vec::len);
3266 let fks = table.schema().foreign_keys.clone();
3272 let mut affected = 0usize;
3273 let mut all_values: Vec<Vec<Value>> = Vec::with_capacity(stmt.rows.len());
3276 for tuple in stmt.rows {
3277 if tuple.len() != expected_tuple_len {
3278 return Err(EngineError::Storage(StorageError::ArityMismatch {
3279 expected: expected_tuple_len,
3280 actual: tuple.len(),
3281 }));
3282 }
3283 let values: Vec<Value> = if let Some(map) = &tuple_pos {
3287 let raw_tuple: Vec<Value> = tuple
3289 .into_iter()
3290 .map(literal_expr_to_value)
3291 .collect::<Result<_, _>>()?;
3292 let mut out = Vec::with_capacity(schema_cols_len);
3293 for (i, col) in column_meta.iter().enumerate() {
3294 let mut raw = match map[i] {
3295 Some(j) => raw_tuple[j].clone(),
3296 None => resolve_column_default_free(col, clock)?,
3297 };
3298 if col.auto_increment && raw.is_null() {
3299 let next = table.next_auto_value(i).ok_or_else(|| {
3300 EngineError::Unsupported(alloc::format!(
3301 "AUTO_INCREMENT applies to integer columns only (column `{}`)",
3302 col.name
3303 ))
3304 })?;
3305 raw = Value::BigInt(next);
3306 }
3307 out.push(coerce_value(raw, col.ty, &col.name, i)?);
3308 }
3309 out
3310 } else {
3311 let mut out = Vec::with_capacity(schema_cols_len);
3313 for (i, (col, expr)) in column_meta.iter().zip(tuple).enumerate() {
3314 let mut raw = literal_expr_to_value(expr)?;
3315 if col.auto_increment && raw.is_null() {
3316 let next = table.next_auto_value(i).ok_or_else(|| {
3317 EngineError::Unsupported(alloc::format!(
3318 "AUTO_INCREMENT applies to integer columns only (column `{}`)",
3319 col.name
3320 ))
3321 })?;
3322 raw = Value::BigInt(next);
3323 }
3324 out.push(coerce_value(raw, col.ty, &col.name, i)?);
3325 }
3326 out
3327 };
3328 all_values.push(values);
3329 }
3330 let uniqueness = table.schema().uniqueness_constraints.clone();
3335 let _ = table;
3336 if !fks.is_empty() {
3337 enforce_fk_inserts(self.active_catalog(), &stmt.table, &fks, &all_values)?;
3338 }
3339 enforce_uniqueness_inserts(
3341 self.active_catalog(),
3342 &stmt.table,
3343 &uniqueness,
3344 &all_values,
3345 )?;
3346 enforce_unique_index_inserts(
3353 self.active_catalog(),
3354 &stmt.table,
3355 &all_values,
3356 )?;
3357 let mut pending_updates: Vec<(usize, Vec<Value>)> = Vec::new();
3364 let mut skipped_count = 0usize;
3365 if let Some(clause) = &stmt.on_conflict {
3366 let conflict_cols = resolve_on_conflict_columns(
3367 self.active_catalog(),
3368 &stmt.table,
3369 clause.target_columns.as_slice(),
3370 )?;
3371 let mut kept: Vec<Vec<Value>> = Vec::with_capacity(all_values.len());
3372 let mut seen_keys: Vec<Vec<Value>> = Vec::new();
3373 for values in all_values {
3374 let key_tuple: Vec<&Value> =
3375 conflict_cols.iter().map(|&c| &values[c]).collect();
3376 let has_null_key = key_tuple.iter().any(|v| matches!(v, Value::Null));
3379 let collides_with_table = !has_null_key
3380 && on_conflict_keys_exist(
3381 self.active_catalog(),
3382 &stmt.table,
3383 &conflict_cols,
3384 &key_tuple,
3385 );
3386 let key_tuple_owned: Vec<Value> =
3387 key_tuple.iter().map(|v| (*v).clone()).collect();
3388 let collides_with_batch = !has_null_key
3389 && seen_keys.iter().any(|k| k == &key_tuple_owned);
3390 let collides = collides_with_table || collides_with_batch;
3391 match (&clause.action, collides) {
3392 (_, false) => {
3393 seen_keys.push(key_tuple_owned);
3394 kept.push(values);
3395 }
3396 (spg_sql::ast::OnConflictAction::Nothing, true) => {
3397 skipped_count += 1;
3398 }
3399 (
3400 spg_sql::ast::OnConflictAction::Update {
3401 assignments,
3402 where_,
3403 },
3404 true,
3405 ) => {
3406 if !collides_with_table {
3407 skipped_count += 1;
3408 continue;
3409 }
3410 let target_pos = lookup_row_position_by_keys(
3411 self.active_catalog(),
3412 &stmt.table,
3413 &conflict_cols,
3414 &key_tuple,
3415 )
3416 .ok_or_else(|| {
3417 EngineError::Unsupported(
3418 "ON CONFLICT DO UPDATE: conflict detected but row \
3419 position could not be resolved (cold-tier row?)"
3420 .into(),
3421 )
3422 })?;
3423 let updated = apply_on_conflict_assignments(
3424 self.active_catalog(),
3425 &stmt.table,
3426 target_pos,
3427 &values,
3428 assignments,
3429 where_.as_ref(),
3430 )?;
3431 if let Some(new_row) = updated {
3432 pending_updates.push((target_pos, new_row));
3433 } else {
3434 skipped_count += 1;
3435 }
3436 }
3437 }
3438 }
3439 all_values = kept;
3440 }
3441 let table = self
3443 .active_catalog_mut()
3444 .get_mut(&stmt.table)
3445 .ok_or_else(|| {
3446 EngineError::Storage(StorageError::TableNotFound {
3447 name: stmt.table.clone(),
3448 })
3449 })?;
3450 let mut returning_rows: Vec<Vec<Value>> = Vec::new();
3454 for values in all_values {
3455 if stmt.returning.is_some() {
3456 returning_rows.push(values.clone());
3457 }
3458 table.insert(Row::new(values))?;
3459 affected += 1;
3460 }
3461 for (pos, new_row) in pending_updates {
3465 if stmt.returning.is_some() {
3466 returning_rows.push(new_row.clone());
3467 }
3468 table.update_row(pos, new_row)?;
3469 affected += 1;
3470 }
3471 let _ = skipped_count;
3472 if let Some(items) = &stmt.returning {
3476 let _ = table;
3477 return self.build_returning_rows(
3478 &stmt.table,
3479 items,
3480 returning_rows,
3481 );
3482 }
3483 if !self.in_transaction() && affected > 0 {
3488 self.statistics
3489 .record_modifications(&stmt.table, affected as u64);
3490 }
3491 Ok(QueryResult::CommandOk {
3492 affected,
3493 modified_catalog: !self.in_transaction(),
3494 })
3495 }
3496
3497 fn exec_select_as_of_segment(
3510 &self,
3511 stmt: &SelectStatement,
3512 from: &spg_sql::ast::FromClause,
3513 segment_id: u32,
3514 ) -> Result<QueryResult, EngineError> {
3515 if !from.joins.is_empty()
3518 || stmt.group_by.is_some()
3519 || stmt.having.is_some()
3520 || !stmt.unions.is_empty()
3521 || !stmt.order_by.is_empty()
3522 || stmt.offset.is_some()
3523 || stmt.distinct
3524 || aggregate::uses_aggregate(stmt)
3525 {
3526 return Err(EngineError::Unsupported(
3527 "AS OF SEGMENT supports SELECT projection + WHERE + LIMIT only \
3528 (joins / aggregates / ORDER BY are STABILITY § \"Out of v6.10\")"
3529 .into(),
3530 ));
3531 }
3532 let table = self
3533 .active_catalog()
3534 .get(&from.primary.name)
3535 .ok_or_else(|| StorageError::TableNotFound {
3536 name: from.primary.name.clone(),
3537 })?;
3538 let schema = table.schema().clone();
3539 let schema_cols = &schema.columns;
3540 let alias = from
3541 .primary
3542 .alias
3543 .as_deref()
3544 .unwrap_or(from.primary.name.as_str());
3545 let ctx = EvalContext::new(schema_cols, Some(alias));
3546 let seg = self
3547 .active_catalog()
3548 .cold_segment(segment_id)
3549 .ok_or_else(|| {
3550 EngineError::Unsupported(alloc::format!(
3551 "AS OF SEGMENT: cold segment {segment_id} not registered"
3552 ))
3553 })?;
3554 let mut out_rows: Vec<Row> = Vec::new();
3555 let mut limit_remaining: Option<usize> =
3556 stmt.limit_literal().and_then(|n| usize::try_from(n).ok());
3557 for (_key, body) in seg.scan() {
3558 let (row, _consumed) = spg_storage::decode_row_body_dense(&body, &schema)
3559 .map_err(EngineError::Storage)?;
3560 if let Some(where_expr) = &stmt.where_ {
3561 let cond = self.eval_expr_simple(where_expr, &row, &ctx)?;
3562 if !matches!(cond, Value::Bool(true)) {
3563 continue;
3564 }
3565 }
3566 let projected = self.project_row_simple(&row, &stmt.items, schema_cols, alias)?;
3568 out_rows.push(projected);
3569 if let Some(rem) = limit_remaining.as_mut() {
3570 if *rem == 0 {
3571 out_rows.pop();
3572 break;
3573 }
3574 *rem -= 1;
3575 }
3576 }
3577 let columns = self.derive_output_columns(&stmt.items, schema_cols, alias);
3579 Ok(QueryResult::Rows {
3580 columns,
3581 rows: out_rows,
3582 })
3583 }
3584
3585 fn eval_expr_simple(
3590 &self,
3591 expr: &Expr,
3592 row: &Row,
3593 ctx: &EvalContext,
3594 ) -> Result<Value, EngineError> {
3595 let cancel = CancelToken::none();
3596 self.eval_expr_with_correlated(expr, row, ctx, cancel, None)
3597 }
3598
3599 fn build_returning_rows(
3606 &self,
3607 table_name: &str,
3608 items: &[SelectItem],
3609 mutated_rows: Vec<Vec<Value>>,
3610 ) -> Result<QueryResult, EngineError> {
3611 let table = self.active_catalog().get(table_name).ok_or_else(|| {
3612 EngineError::Storage(StorageError::TableNotFound {
3613 name: table_name.into(),
3614 })
3615 })?;
3616 let schema_cols = table.schema().columns.clone();
3617 let columns = self.derive_output_columns(items, &schema_cols, table_name);
3618 let mut out_rows: Vec<Row> = Vec::with_capacity(mutated_rows.len());
3619 for values in mutated_rows {
3620 let row = Row::new(values);
3621 let projected = self.project_row_simple(&row, items, &schema_cols, table_name)?;
3622 out_rows.push(projected);
3623 }
3624 Ok(QueryResult::Rows {
3625 columns,
3626 rows: out_rows,
3627 })
3628 }
3629
3630 fn project_row_simple(
3634 &self,
3635 row: &Row,
3636 items: &[SelectItem],
3637 schema_cols: &[ColumnSchema],
3638 alias: &str,
3639 ) -> Result<Row, EngineError> {
3640 let ctx = EvalContext::new(schema_cols, Some(alias));
3641 let cancel = CancelToken::none();
3642 let mut out_vals = Vec::new();
3643 for item in items {
3644 match item {
3645 SelectItem::Wildcard => {
3646 out_vals.extend(row.values.iter().cloned());
3647 }
3648 SelectItem::Expr { expr, .. } => {
3649 let v = self.eval_expr_with_correlated(expr, row, &ctx, cancel, None)?;
3650 out_vals.push(v);
3651 }
3652 }
3653 }
3654 Ok(Row::new(out_vals))
3655 }
3656
3657 fn derive_output_columns(
3662 &self,
3663 items: &[SelectItem],
3664 schema_cols: &[ColumnSchema],
3665 _alias: &str,
3666 ) -> Vec<ColumnSchema> {
3667 let mut out = Vec::new();
3668 for item in items {
3669 match item {
3670 SelectItem::Wildcard => {
3671 out.extend(schema_cols.iter().cloned());
3672 }
3673 SelectItem::Expr { alias, .. } => {
3674 let name = alias
3675 .clone()
3676 .unwrap_or_else(|| "?column?".to_string());
3677 out.push(ColumnSchema::new(name, DataType::Text, true));
3680 }
3681 }
3682 }
3683 out
3684 }
3685
3686 fn exec_select_cancel(
3687 &self,
3688 stmt: &SelectStatement,
3689 cancel: CancelToken<'_>,
3690 ) -> Result<QueryResult, EngineError> {
3691 cancel.check()?;
3692 if let Some(from) = &stmt.from
3701 && let Some(seg_id) = from.primary.as_of_segment
3702 {
3703 return self.exec_select_as_of_segment(stmt, from, seg_id);
3704 }
3705 if let Some(from) = &stmt.from
3709 && from.joins.is_empty()
3710 && stmt.where_.is_none()
3711 && stmt.group_by.is_none()
3712 && stmt.having.is_none()
3713 && stmt.unions.is_empty()
3714 && stmt.order_by.is_empty()
3715 && stmt.limit.is_none()
3716 && stmt.offset.is_none()
3717 && !stmt.distinct
3718 && stmt.items.iter().all(|i| matches!(i, SelectItem::Wildcard))
3719 {
3720 let lower = from.primary.name.to_ascii_lowercase();
3721 match lower.as_str() {
3722 "spg_statistic" => return Ok(self.exec_spg_statistic()),
3723 "spg_stat_replication" => return Ok(self.exec_spg_stat_replication()),
3725 "spg_stat_segment" => return Ok(self.exec_spg_stat_segment()),
3726 "spg_stat_query" => return Ok(self.exec_spg_stat_query()),
3727 "spg_stat_activity" => return Ok(self.exec_spg_stat_activity()),
3728 "spg_audit_chain" => return Ok(self.exec_spg_audit_chain()),
3729 "spg_audit_verify" => return Ok(self.exec_spg_audit_verify()),
3730 "spg_table_ddl" => return Ok(self.exec_spg_table_ddl()),
3731 "spg_role_ddl" => return Ok(self.exec_spg_role_ddl()),
3732 "spg_database_ddl" => return Ok(self.exec_spg_database_ddl()),
3733 _ => {}
3734 }
3735 }
3736 if !stmt.ctes.is_empty() {
3744 return self.exec_with_ctes(stmt, cancel);
3745 }
3746 let mut stmt_owned;
3753 let stmt_ref: &SelectStatement = if expr_tree_has_subquery(stmt) {
3754 stmt_owned = stmt.clone();
3755 self.resolve_select_subqueries(&mut stmt_owned, cancel)?;
3756 &stmt_owned
3757 } else {
3758 stmt
3759 };
3760 if stmt_ref.unions.is_empty() {
3761 return self.exec_bare_select_cancel(stmt_ref, cancel);
3762 }
3763 let mut head = stmt_ref.clone();
3768 head.unions = Vec::new();
3769 head.order_by = Vec::new();
3770 head.limit = None;
3771 let QueryResult::Rows { columns, mut rows } =
3772 self.exec_bare_select_cancel(&head, cancel)?
3773 else {
3774 unreachable!("bare SELECT cannot return CommandOk")
3775 };
3776 for (kind, peer) in &stmt_ref.unions {
3777 let QueryResult::Rows {
3778 columns: peer_cols,
3779 rows: peer_rows,
3780 } = self.exec_bare_select_cancel(peer, cancel)?
3781 else {
3782 unreachable!("bare SELECT cannot return CommandOk")
3783 };
3784 if peer_cols.len() != columns.len() {
3785 return Err(EngineError::Unsupported(alloc::format!(
3786 "UNION arity mismatch: head has {} columns, peer has {}",
3787 columns.len(),
3788 peer_cols.len()
3789 )));
3790 }
3791 rows.extend(peer_rows);
3792 if matches!(kind, UnionKind::Distinct) {
3793 rows = dedup_rows(rows);
3794 }
3795 }
3796 if !stmt.order_by.is_empty() {
3799 let synth_ctx = EvalContext::new(&columns, None);
3800 let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
3801 let mut tagged: Vec<(Vec<f64>, Row)> = Vec::with_capacity(rows.len());
3802 for r in rows {
3803 let keys = build_order_keys(&stmt.order_by, &r, &synth_ctx)?;
3804 tagged.push((keys, r));
3805 }
3806 sort_by_keys(&mut tagged, &descs);
3807 rows = tagged.into_iter().map(|(_, r)| r).collect();
3808 }
3809 apply_offset_and_limit(&mut rows, stmt.offset_literal(), stmt.limit_literal());
3810 Ok(QueryResult::Rows { columns, rows })
3811 }
3812
3813 #[allow(clippy::too_many_lines)]
3814 #[allow(clippy::too_many_lines)] fn exec_bare_select_cancel(
3816 &self,
3817 stmt: &SelectStatement,
3818 cancel: CancelToken<'_>,
3819 ) -> Result<QueryResult, EngineError> {
3820 if select_has_window(stmt) {
3825 return self.exec_select_with_window(stmt, cancel);
3826 }
3827 let Some(from) = &stmt.from else {
3832 let empty_schema: Vec<ColumnSchema> = Vec::new();
3833 let ctx = EvalContext::new(&empty_schema, None);
3834 let projection = build_projection(&stmt.items, &empty_schema, "")?;
3835 let dummy_row = Row::new(Vec::new());
3836 let mut values = Vec::with_capacity(projection.len());
3837 for p in &projection {
3838 values.push(eval::eval_expr(&p.expr, &dummy_row, &ctx)?);
3839 }
3840 let columns: Vec<ColumnSchema> = projection
3841 .into_iter()
3842 .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
3843 .collect();
3844 return Ok(QueryResult::Rows {
3845 columns,
3846 rows: alloc::vec![Row::new(values)],
3847 });
3848 };
3849 if !from.joins.is_empty() {
3853 return self.exec_joined_select(stmt, from);
3854 }
3855 let primary = &from.primary;
3856 let table = self.active_catalog().get(&primary.name).ok_or_else(|| {
3857 StorageError::TableNotFound {
3858 name: primary.name.clone(),
3859 }
3860 })?;
3861 let schema_cols = &table.schema().columns;
3862 let alias = primary.alias.as_deref().unwrap_or(primary.name.as_str());
3865 let ctx = EvalContext::new(schema_cols, Some(alias));
3866
3867 if let Some(nsw_rows) = try_nsw_knn(stmt, table, schema_cols, alias) {
3872 return materialise_in_order(stmt, table, schema_cols, alias, &nsw_rows);
3873 }
3874
3875 let indexed_rows: Option<Vec<Cow<'_, Row>>> = stmt
3883 .where_
3884 .as_ref()
3885 .and_then(|w| try_index_seek(w, schema_cols, self.active_catalog(), table, alias));
3886
3887 if aggregate::uses_aggregate(stmt) {
3890 let mut filtered: Vec<&Row> = Vec::new();
3891 let mut memo = memoize::MemoizeCache::new();
3895 if let Some(rows) = &indexed_rows {
3896 for cow in rows {
3897 let row = cow.as_ref();
3898 if let Some(where_expr) = &stmt.where_ {
3899 let cond = self.eval_expr_with_correlated(
3900 where_expr,
3901 row,
3902 &ctx,
3903 cancel,
3904 Some(&mut memo),
3905 )?;
3906 if !matches!(cond, Value::Bool(true)) {
3907 continue;
3908 }
3909 }
3910 filtered.push(row);
3911 }
3912 } else {
3913 for i in 0..table.row_count() {
3914 let row = &table.rows()[i];
3915 if let Some(where_expr) = &stmt.where_ {
3916 let cond = self.eval_expr_with_correlated(
3917 where_expr,
3918 row,
3919 &ctx,
3920 cancel,
3921 Some(&mut memo),
3922 )?;
3923 if !matches!(cond, Value::Bool(true)) {
3924 continue;
3925 }
3926 }
3927 filtered.push(row);
3928 }
3929 }
3930 let mut agg = aggregate::run(stmt, &filtered, schema_cols, Some(alias))?;
3931 apply_offset_and_limit(&mut agg.rows, stmt.offset_literal(), stmt.limit_literal());
3932 return Ok(QueryResult::Rows {
3933 columns: agg.columns,
3934 rows: agg.rows,
3935 });
3936 }
3937
3938 let projection = build_projection(&stmt.items, schema_cols, alias)?;
3939
3940 let mut tagged: Vec<(Vec<f64>, Row)> = Vec::new();
3943 let mut memo = memoize::MemoizeCache::new();
3945 let mut process_row = |row: &Row, loop_idx: usize| -> Result<(), EngineError> {
3948 if loop_idx.is_multiple_of(256) {
3949 cancel.check()?;
3950 }
3951 if let Some(where_expr) = &stmt.where_ {
3952 let cond = self.eval_expr_with_correlated(
3953 where_expr,
3954 row,
3955 &ctx,
3956 cancel,
3957 Some(&mut memo),
3958 )?;
3959 if !matches!(cond, Value::Bool(true)) {
3960 return Ok(());
3961 }
3962 }
3963 let mut values = Vec::with_capacity(projection.len());
3964 for p in &projection {
3965 values.push(eval::eval_expr(&p.expr, row, &ctx)?);
3966 }
3967 let order_keys = if stmt.order_by.is_empty() {
3968 Vec::new()
3969 } else {
3970 build_order_keys(&stmt.order_by, row, &ctx)?
3971 };
3972 tagged.push((order_keys, Row::new(values)));
3973 Ok(())
3974 };
3975 if let Some(rows) = &indexed_rows {
3976 for (loop_idx, cow) in rows.iter().enumerate() {
3977 process_row(cow.as_ref(), loop_idx)?;
3978 }
3979 } else {
3980 for i in 0..table.row_count() {
3981 process_row(&table.rows()[i], i)?;
3982 }
3983 }
3984
3985 if !stmt.order_by.is_empty() {
3986 let keep = if stmt.distinct {
3991 None
3992 } else {
3993 stmt.limit_literal()
3994 .map(|l| l as usize + stmt.offset_literal().map_or(0, |o| o as usize))
3995 };
3996 let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
3997 partial_sort_tagged(&mut tagged, keep, &descs);
3998 }
3999
4000 let mut output_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
4001 if stmt.distinct {
4002 output_rows = dedup_rows(output_rows);
4003 }
4004 apply_offset_and_limit(&mut output_rows, stmt.offset_literal(), stmt.limit_literal());
4005
4006 let columns: Vec<ColumnSchema> = projection
4007 .into_iter()
4008 .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
4009 .collect();
4010
4011 Ok(QueryResult::Rows {
4012 columns,
4013 rows: output_rows,
4014 })
4015 }
4016
4017 #[allow(clippy::too_many_lines)]
4024 fn exec_joined_select(
4025 &self,
4026 stmt: &SelectStatement,
4027 from: &FromClause,
4028 ) -> Result<QueryResult, EngineError> {
4029 let primary_table = self
4032 .active_catalog()
4033 .get(&from.primary.name)
4034 .ok_or_else(|| StorageError::TableNotFound {
4035 name: from.primary.name.clone(),
4036 })?;
4037 let primary_alias = from
4038 .primary
4039 .alias
4040 .as_deref()
4041 .unwrap_or(from.primary.name.as_str())
4042 .to_string();
4043 let mut joined_tables: Vec<(&Table, String, JoinKind, Option<&Expr>)> = Vec::new();
4044 for j in &from.joins {
4045 let t = self.active_catalog().get(&j.table.name).ok_or_else(|| {
4046 StorageError::TableNotFound {
4047 name: j.table.name.clone(),
4048 }
4049 })?;
4050 let a = j
4051 .table
4052 .alias
4053 .as_deref()
4054 .unwrap_or(j.table.name.as_str())
4055 .to_string();
4056 joined_tables.push((t, a, j.kind, j.on.as_ref()));
4057 }
4058
4059 let mut combined_schema: Vec<ColumnSchema> = Vec::new();
4062 for col in &primary_table.schema().columns {
4063 combined_schema.push(ColumnSchema::new(
4064 alloc::format!("{primary_alias}.{}", col.name),
4065 col.ty,
4066 col.nullable,
4067 ));
4068 }
4069 for (t, a, _, _) in &joined_tables {
4070 for col in &t.schema().columns {
4071 combined_schema.push(ColumnSchema::new(
4072 alloc::format!("{a}.{}", col.name),
4073 col.ty,
4074 col.nullable,
4075 ));
4076 }
4077 }
4078 let ctx = EvalContext::new(&combined_schema, None);
4079
4080 let mut working: Vec<Row> = primary_table.rows().iter().cloned().collect();
4083 let mut produced_len = primary_table.schema().columns.len();
4084 for (t, _, kind, on) in &joined_tables {
4085 let right_arity = t.schema().columns.len();
4086 let mut next: Vec<Row> = Vec::new();
4087 for left in &working {
4088 let mut left_matched = false;
4089 for right in t.rows() {
4090 let mut combined_vals = left.values.clone();
4091 combined_vals.extend(right.values.iter().cloned());
4092 let combined = Row::new(combined_vals);
4095 let keep = if let Some(on_expr) = on {
4096 let cond = eval::eval_expr(on_expr, &combined, &ctx)?;
4097 matches!(cond, Value::Bool(true))
4098 } else {
4099 true
4101 };
4102 if keep {
4103 next.push(combined);
4104 left_matched = true;
4105 }
4106 }
4107 if !left_matched && matches!(kind, JoinKind::Left) {
4108 let mut combined_vals = left.values.clone();
4111 for _ in 0..right_arity {
4112 combined_vals.push(Value::Null);
4113 }
4114 next.push(Row::new(combined_vals));
4115 }
4116 }
4117 working = next;
4118 produced_len += right_arity;
4119 debug_assert!(produced_len <= combined_schema.len());
4120 }
4121
4122 let mut filtered: Vec<Row> = Vec::new();
4124 for row in working {
4125 if let Some(where_expr) = &stmt.where_ {
4126 let cond = eval::eval_expr(where_expr, &row, &ctx)?;
4127 if !matches!(cond, Value::Bool(true)) {
4128 continue;
4129 }
4130 }
4131 filtered.push(row);
4132 }
4133
4134 if aggregate::uses_aggregate(stmt) {
4137 let refs: Vec<&Row> = filtered.iter().collect();
4138 let mut agg = aggregate::run(stmt, &refs, &combined_schema, None)?;
4139 apply_offset_and_limit(&mut agg.rows, stmt.offset_literal(), stmt.limit_literal());
4140 return Ok(QueryResult::Rows {
4141 columns: agg.columns,
4142 rows: agg.rows,
4143 });
4144 }
4145
4146 let projection = build_projection(&stmt.items, &combined_schema, "")?;
4147 let mut tagged: Vec<(Vec<f64>, Row)> = Vec::new();
4148 for row in &filtered {
4149 let mut values = Vec::with_capacity(projection.len());
4150 for p in &projection {
4151 values.push(eval::eval_expr(&p.expr, row, &ctx)?);
4152 }
4153 let order_keys = if stmt.order_by.is_empty() {
4154 Vec::new()
4155 } else {
4156 build_order_keys(&stmt.order_by, row, &ctx)?
4157 };
4158 tagged.push((order_keys, Row::new(values)));
4159 }
4160 if !stmt.order_by.is_empty() {
4161 let keep = if stmt.distinct {
4162 None
4163 } else {
4164 stmt.limit_literal()
4165 .map(|l| l as usize + stmt.offset_literal().map_or(0, |o| o as usize))
4166 };
4167 let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
4168 partial_sort_tagged(&mut tagged, keep, &descs);
4169 }
4170 let mut output_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
4171 if stmt.distinct {
4172 output_rows = dedup_rows(output_rows);
4173 }
4174 apply_offset_and_limit(&mut output_rows, stmt.offset_literal(), stmt.limit_literal());
4175 let columns: Vec<ColumnSchema> = projection
4176 .into_iter()
4177 .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
4178 .collect();
4179 Ok(QueryResult::Rows {
4180 columns,
4181 rows: output_rows,
4182 })
4183 }
4184}
4185
4186#[derive(Debug, Clone)]
4189struct ProjectedItem {
4190 expr: Expr,
4191 output_name: String,
4192 ty: DataType,
4193 nullable: bool,
4194}
4195
4196fn dedup_rows(rows: Vec<Row>) -> Vec<Row> {
4202 let mut out: Vec<Row> = Vec::with_capacity(rows.len());
4203 for r in rows {
4204 if !out.iter().any(|seen| seen == &r) {
4205 out.push(r);
4206 }
4207 }
4208 out
4209}
4210
4211fn value_to_order_key(v: &Value) -> Result<f64, EngineError> {
4215 match v {
4216 Value::Null => Ok(f64::INFINITY),
4217 Value::SmallInt(n) => Ok(f64::from(*n)),
4218 Value::Int(n) => Ok(f64::from(*n)),
4219 Value::Date(d) => Ok(f64::from(*d)),
4220 #[allow(clippy::cast_precision_loss)]
4221 Value::Timestamp(t) => Ok(*t as f64),
4222 #[allow(clippy::cast_precision_loss)]
4223 Value::Numeric { scaled, scale } => {
4224 let mut divisor = 1.0_f64;
4230 for _ in 0..*scale {
4231 divisor *= 10.0;
4232 }
4233 Ok((*scaled as f64) / divisor)
4234 }
4235 #[allow(clippy::cast_precision_loss)]
4236 Value::BigInt(n) => Ok(*n as f64),
4237 Value::Float(x) => Ok(*x),
4238 Value::Bool(b) => Ok(if *b { 1.0 } else { 0.0 }),
4239 Value::Text(s) => {
4240 let mut key: u64 = 0;
4244 for &b in s.as_bytes().iter().take(8) {
4245 key = (key << 8) | u64::from(b);
4246 }
4247 #[allow(clippy::cast_precision_loss)]
4248 Ok(key as f64)
4249 }
4250 Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => {
4251 Err(EngineError::Unsupported(
4252 "ORDER BY of a raw vector column is not meaningful — use `<->`".into(),
4253 ))
4254 }
4255 Value::Interval { .. } => Err(EngineError::Unsupported(
4256 "ORDER BY of an INTERVAL is not supported in v2.11 \
4257 (months vs micros has no single canonical ordering)"
4258 .into(),
4259 )),
4260 Value::Json(_) => Err(EngineError::Unsupported(
4261 "ORDER BY of a JSON value is not supported — cast the document to text first".into(),
4262 )),
4263 _ => Err(EngineError::Unsupported(
4267 "ORDER BY of this value type is not supported".into(),
4268 )),
4269 }
4270}
4271
4272fn try_nsw_knn(
4286 stmt: &SelectStatement,
4287 table: &Table,
4288 schema_cols: &[ColumnSchema],
4289 table_alias: &str,
4290) -> Option<Vec<usize>> {
4291 if stmt.distinct {
4292 return None;
4293 }
4294 let limit = usize::try_from(stmt.limit_literal()?).ok()?;
4295 if limit == 0 {
4296 return None;
4297 }
4298 if stmt.order_by.len() != 1 {
4302 return None;
4303 }
4304 let order = &stmt.order_by[0];
4305 if order.desc {
4309 return None;
4310 }
4311 let Expr::Binary { lhs, op, rhs } = &order.expr else {
4312 return None;
4313 };
4314 let metric = match op {
4315 BinOp::L2Distance => spg_storage::NswMetric::L2,
4316 BinOp::InnerProduct => spg_storage::NswMetric::InnerProduct,
4317 BinOp::CosineDistance => spg_storage::NswMetric::Cosine,
4318 _ => return None,
4319 };
4320 let ((Expr::Column(col), literal) | (literal, Expr::Column(col))) =
4322 (lhs.as_ref(), rhs.as_ref())
4323 else {
4324 return None;
4325 };
4326 if let Some(q) = &col.qualifier
4327 && q != table_alias
4328 {
4329 return None;
4330 }
4331 let col_pos = schema_cols.iter().position(|s| s.name == col.name)?;
4332 let query = literal_to_vector(literal)?;
4333 let idx = spg_storage::nsw_index_on(table, col_pos)?;
4334 if let Some(where_expr) = &stmt.where_ {
4335 let over_fetch = limit.saturating_mul(10).max(NSW_OVER_FETCH_FLOOR);
4339 let candidates = spg_storage::nsw_query(table, &idx.name, &query, over_fetch, metric);
4340 let ctx = EvalContext::new(schema_cols, Some(table_alias));
4341 let mut kept: Vec<usize> = Vec::with_capacity(limit);
4342 for i in candidates {
4343 let row = &table.rows()[i];
4344 let cond = eval::eval_expr(where_expr, row, &ctx).ok()?;
4345 if matches!(cond, Value::Bool(true)) {
4346 kept.push(i);
4347 if kept.len() >= limit {
4348 break;
4349 }
4350 }
4351 }
4352 Some(kept)
4353 } else {
4354 Some(spg_storage::nsw_query(
4355 table, &idx.name, &query, limit, metric,
4356 ))
4357 }
4358}
4359
4360const NSW_OVER_FETCH_FLOOR: usize = 32;
4364
4365fn literal_to_vector(e: &Expr) -> Option<Vec<f32>> {
4368 match e {
4369 Expr::Literal(Literal::Vector(v)) => Some(v.clone()),
4370 Expr::Cast { expr, .. } => literal_to_vector(expr),
4371 _ => None,
4372 }
4373}
4374
4375fn materialise_in_order(
4379 stmt: &SelectStatement,
4380 table: &Table,
4381 schema_cols: &[ColumnSchema],
4382 table_alias: &str,
4383 ordered_rows: &[usize],
4384) -> Result<QueryResult, EngineError> {
4385 let ctx = EvalContext::new(schema_cols, Some(table_alias));
4386 let projection = build_projection(&stmt.items, schema_cols, table_alias)?;
4387 let mut output_rows: Vec<Row> = Vec::with_capacity(ordered_rows.len());
4388 for &i in ordered_rows {
4389 let row = &table.rows()[i];
4390 let mut values = Vec::with_capacity(projection.len());
4391 for p in &projection {
4392 values.push(eval::eval_expr(&p.expr, row, &ctx)?);
4393 }
4394 output_rows.push(Row::new(values));
4395 }
4396 apply_offset_and_limit(&mut output_rows, stmt.offset_literal(), stmt.limit_literal());
4397 let columns: Vec<ColumnSchema> = projection
4398 .into_iter()
4399 .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
4400 .collect();
4401 Ok(QueryResult::Rows {
4402 columns,
4403 rows: output_rows,
4404 })
4405}
4406
4407fn try_index_seek<'a>(
4408 where_expr: &Expr,
4409 schema_cols: &[ColumnSchema],
4410 catalog: &'a Catalog,
4411 table: &'a Table,
4412 table_alias: &str,
4413) -> Option<Vec<Cow<'a, Row>>> {
4414 let Expr::Binary {
4415 lhs,
4416 op: BinOp::Eq,
4417 rhs,
4418 } = where_expr
4419 else {
4420 return None;
4421 };
4422 let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
4423 .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
4424 let idx = table.index_on(col_pos)?;
4425 let key = IndexKey::from_value(&value)?;
4426 let locators = idx.lookup_eq(&key);
4427 let table_name = table.schema().name.as_str();
4428 let mut out: Vec<Cow<'a, Row>> = Vec::with_capacity(locators.len());
4436 for loc in locators {
4437 match *loc {
4438 spg_storage::RowLocator::Hot(i) => {
4439 if let Some(row) = table.rows().get(i) {
4440 out.push(Cow::Borrowed(row));
4441 }
4442 }
4443 spg_storage::RowLocator::Cold { segment_id, .. } => {
4444 if let Some(row) = catalog.resolve_cold_locator(table_name, segment_id, &key) {
4445 out.push(Cow::Owned(row));
4446 }
4447 }
4448 }
4449 }
4450 Some(out)
4451}
4452
4453fn try_pk_predicate(
4465 where_expr: &Expr,
4466 schema_cols: &[ColumnSchema],
4467 table_alias: &str,
4468) -> Option<(usize, IndexKey)> {
4469 let Expr::Binary {
4470 lhs,
4471 op: BinOp::Eq,
4472 rhs,
4473 } = where_expr
4474 else {
4475 return None;
4476 };
4477 let (col_pos, value) = resolve_col_literal_pair(lhs, rhs, schema_cols, table_alias)
4478 .or_else(|| resolve_col_literal_pair(rhs, lhs, schema_cols, table_alias))?;
4479 let key = IndexKey::from_value(&value)?;
4480 Some((col_pos, key))
4481}
4482
4483fn resolve_col_literal_pair(
4484 col_side: &Expr,
4485 lit_side: &Expr,
4486 schema_cols: &[ColumnSchema],
4487 table_alias: &str,
4488) -> Option<(usize, Value)> {
4489 let Expr::Column(c) = col_side else {
4490 return None;
4491 };
4492 if let Some(q) = &c.qualifier
4493 && q != table_alias
4494 {
4495 return None;
4496 }
4497 let pos = schema_cols.iter().position(|s| s.name == c.name)?;
4498 let Expr::Literal(l) = lit_side else {
4499 return None;
4500 };
4501 let v = match l {
4502 Literal::Integer(n) => {
4503 if let Ok(small) = i32::try_from(*n) {
4504 Value::Int(small)
4505 } else {
4506 Value::BigInt(*n)
4507 }
4508 }
4509 Literal::Float(x) => Value::Float(*x),
4510 Literal::String(s) => Value::Text(s.clone()),
4511 Literal::Bool(b) => Value::Bool(*b),
4512 Literal::Null => Value::Null,
4513 Literal::Vector(_) | Literal::Interval { .. } => return None,
4516 };
4517 Some((pos, v))
4518}
4519
4520fn resolve_projection_column<'a>(
4525 c: &ColumnName,
4526 schema_cols: &'a [ColumnSchema],
4527 table_alias: &str,
4528) -> Result<&'a ColumnSchema, EngineError> {
4529 if let Some(q) = &c.qualifier {
4530 let composite = alloc::format!("{q}.{name}", name = c.name);
4531 if let Some(s) = schema_cols.iter().find(|s| s.name == composite) {
4532 return Ok(s);
4533 }
4534 if q == table_alias
4537 && let Some(s) = schema_cols.iter().find(|s| s.name == c.name)
4538 {
4539 return Ok(s);
4540 }
4541 let prefix = alloc::format!("{q}.");
4545 let qualifier_known =
4546 q == table_alias || schema_cols.iter().any(|s| s.name.starts_with(&prefix));
4547 if !qualifier_known {
4548 return Err(EngineError::Eval(EvalError::UnknownQualifier {
4549 qualifier: q.clone(),
4550 }));
4551 }
4552 return Err(EngineError::Eval(EvalError::ColumnNotFound {
4553 name: c.name.clone(),
4554 }));
4555 }
4556 if let Some(s) = schema_cols.iter().find(|s| s.name == c.name) {
4557 return Ok(s);
4558 }
4559 let suffix = alloc::format!(".{name}", name = c.name);
4560 let mut matches = schema_cols.iter().filter(|s| s.name.ends_with(&suffix));
4561 let first = matches.next();
4562 let extra = matches.next();
4563 match (first, extra) {
4564 (Some(s), None) => Ok(s),
4565 (Some(_), Some(_)) => Err(EngineError::Eval(EvalError::TypeMismatch {
4566 detail: alloc::format!("ambiguous column reference: {}", c.name),
4567 })),
4568 _ => Err(EngineError::Eval(EvalError::ColumnNotFound {
4569 name: c.name.clone(),
4570 })),
4571 }
4572}
4573
4574fn build_projection(
4575 items: &[SelectItem],
4576 schema_cols: &[ColumnSchema],
4577 table_alias: &str,
4578) -> Result<Vec<ProjectedItem>, EngineError> {
4579 let mut out = Vec::new();
4580 for item in items {
4581 match item {
4582 SelectItem::Wildcard => {
4583 for col in schema_cols {
4584 out.push(ProjectedItem {
4585 expr: Expr::Column(ColumnName {
4586 qualifier: None,
4587 name: col.name.clone(),
4588 }),
4589 output_name: col.name.clone(),
4590 ty: col.ty,
4591 nullable: col.nullable,
4592 });
4593 }
4594 }
4595 SelectItem::Expr { expr, alias } => {
4596 if let Expr::Column(c) = expr {
4601 let sch = resolve_projection_column(c, schema_cols, table_alias)?;
4602 let output_name = alias.clone().unwrap_or_else(|| c.name.clone());
4603 out.push(ProjectedItem {
4604 expr: expr.clone(),
4605 output_name,
4606 ty: sch.ty,
4607 nullable: sch.nullable,
4608 });
4609 } else {
4610 let output_name = alias.clone().unwrap_or_else(|| expr.to_string());
4611 out.push(ProjectedItem {
4612 expr: expr.clone(),
4613 output_name,
4614 ty: DataType::Text,
4615 nullable: true,
4616 });
4617 }
4618 }
4619 }
4620 }
4621 Ok(out)
4622}
4623
4624fn numeric_from_integer(
4628 n: i128,
4629 precision: u8,
4630 scale: u8,
4631 col_name: &str,
4632) -> Result<Value, EngineError> {
4633 let factor = pow10_i128(scale);
4634 let scaled = n.checked_mul(factor).ok_or_else(|| {
4635 EngineError::Unsupported(alloc::format!(
4636 "integer overflow scaling value for column `{col_name}` to scale {scale}"
4637 ))
4638 })?;
4639 check_precision(scaled, precision, col_name)?;
4640 Ok(Value::Numeric { scaled, scale })
4641}
4642
4643#[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)]
4646fn numeric_from_float(
4647 x: f64,
4648 precision: u8,
4649 scale: u8,
4650 col_name: &str,
4651) -> Result<Value, EngineError> {
4652 if !x.is_finite() {
4653 return Err(EngineError::Unsupported(alloc::format!(
4654 "cannot store non-finite float in NUMERIC column `{col_name}`"
4655 )));
4656 }
4657 let mut factor = 1.0_f64;
4658 for _ in 0..scale {
4659 factor *= 10.0;
4660 }
4661 let shifted = x * factor;
4666 let biased = if shifted >= 0.0 {
4667 shifted + 0.5
4668 } else {
4669 shifted - 0.5
4670 };
4671 if !(-1e38..=1e38).contains(&biased) {
4674 return Err(EngineError::Unsupported(alloc::format!(
4675 "value {x} overflows NUMERIC range for column `{col_name}`"
4676 )));
4677 }
4678 let scaled = biased as i128;
4679 check_precision(scaled, precision, col_name)?;
4680 Ok(Value::Numeric { scaled, scale })
4681}
4682
4683fn numeric_rescale(
4686 scaled: i128,
4687 src_scale: u8,
4688 precision: u8,
4689 dst_scale: u8,
4690 col_name: &str,
4691) -> Result<Value, EngineError> {
4692 let new_scaled = if dst_scale >= src_scale {
4693 let bump = pow10_i128(dst_scale - src_scale);
4694 scaled.checked_mul(bump).ok_or_else(|| {
4695 EngineError::Unsupported(alloc::format!(
4696 "overflow rescaling NUMERIC for column `{col_name}`"
4697 ))
4698 })?
4699 } else {
4700 let drop = pow10_i128(src_scale - dst_scale);
4701 let half = drop / 2;
4702 if scaled >= 0 {
4703 (scaled + half) / drop
4704 } else {
4705 (scaled - half) / drop
4706 }
4707 };
4708 check_precision(new_scaled, precision, col_name)?;
4709 Ok(Value::Numeric {
4710 scaled: new_scaled,
4711 scale: dst_scale,
4712 })
4713}
4714
4715const fn numeric_truncate_to_integer(scaled: i128, scale: u8) -> i128 {
4718 if scale == 0 {
4719 return scaled;
4720 }
4721 let factor = pow10_i128_const(scale);
4722 scaled / factor
4723}
4724
4725fn check_precision(scaled: i128, precision: u8, col_name: &str) -> Result<(), EngineError> {
4729 if precision == 0 {
4730 return Ok(());
4731 }
4732 let limit = pow10_i128(precision);
4733 if scaled.unsigned_abs() >= limit.unsigned_abs() {
4734 return Err(EngineError::Unsupported(alloc::format!(
4735 "NUMERIC value exceeds precision {precision} for column `{col_name}`"
4736 )));
4737 }
4738 Ok(())
4739}
4740
4741const fn pow10_i128_const(p: u8) -> i128 {
4742 let mut acc: i128 = 1;
4743 let mut i = 0;
4744 while i < p {
4745 acc *= 10;
4746 i += 1;
4747 }
4748 acc
4749}
4750
4751fn pow10_i128(p: u8) -> i128 {
4752 pow10_i128_const(p)
4753}
4754
4755impl Engine {
4770 #[allow(
4781 clippy::too_many_lines,
4782 clippy::type_complexity,
4783 clippy::needless_range_loop
4784 )] fn exec_select_with_window(
4786 &self,
4787 stmt: &SelectStatement,
4788 cancel: CancelToken<'_>,
4789 ) -> Result<QueryResult, EngineError> {
4790 let from = stmt.from.as_ref().ok_or_else(|| {
4791 EngineError::Unsupported("window functions require a FROM clause".into())
4792 })?;
4793 if !from.joins.is_empty() {
4796 return Err(EngineError::Unsupported(
4797 "JOIN with window functions not yet supported".into(),
4798 ));
4799 }
4800 let primary = &from.primary;
4801 let table = self.active_catalog().get(&primary.name).ok_or_else(|| {
4802 StorageError::TableNotFound {
4803 name: primary.name.clone(),
4804 }
4805 })?;
4806 let alias = primary.alias.as_deref().unwrap_or(primary.name.as_str());
4807 let schema_cols = &table.schema().columns;
4808 let ctx = EvalContext::new(schema_cols, Some(alias));
4809
4810 let mut filtered: Vec<&Row> = Vec::new();
4812 for (i, row) in table.rows().iter().enumerate() {
4813 if i.is_multiple_of(256) {
4814 cancel.check()?;
4815 }
4816 if let Some(w) = &stmt.where_ {
4817 let cond = eval::eval_expr(w, row, &ctx)?;
4818 if !matches!(cond, Value::Bool(true)) {
4819 continue;
4820 }
4821 }
4822 filtered.push(row);
4823 }
4824 let n_rows = filtered.len();
4825
4826 let mut window_nodes: Vec<Expr> = Vec::new();
4828 for item in &stmt.items {
4829 if let SelectItem::Expr { expr, .. } = item {
4830 collect_window_nodes(expr, &mut window_nodes);
4831 }
4832 }
4833
4834 let mut win_vals: Vec<Vec<Value>> = Vec::with_capacity(window_nodes.len());
4837 for wnode in &window_nodes {
4838 let Expr::WindowFunction {
4839 name,
4840 args,
4841 partition_by,
4842 order_by,
4843 frame,
4844 null_treatment,
4845 } = wnode
4846 else {
4847 unreachable!("collect_window_nodes pushes only WindowFunction");
4848 };
4849 let mut indexed: Vec<(Vec<Value>, Vec<(Value, bool)>, usize)> =
4851 Vec::with_capacity(n_rows);
4852 for (i, row) in filtered.iter().enumerate() {
4853 let pkey: Vec<Value> = partition_by
4854 .iter()
4855 .map(|p| eval::eval_expr(p, row, &ctx))
4856 .collect::<Result<_, _>>()?;
4857 let okey: Vec<(Value, bool)> = order_by
4858 .iter()
4859 .map(|(e, desc)| eval::eval_expr(e, row, &ctx).map(|v| (v, *desc)))
4860 .collect::<Result<_, _>>()?;
4861 indexed.push((pkey, okey, i));
4862 }
4863 indexed.sort_by(|a, b| {
4866 let p_cmp = partition_key_cmp(&a.0, &b.0);
4867 if p_cmp != core::cmp::Ordering::Equal {
4868 return p_cmp;
4869 }
4870 order_key_cmp(&a.1, &b.1)
4871 });
4872 let mut out_vals: Vec<Value> = alloc::vec![Value::Null; n_rows];
4874 let mut p_start = 0;
4875 while p_start < indexed.len() {
4876 let mut p_end = p_start + 1;
4877 while p_end < indexed.len()
4878 && partition_key_cmp(&indexed[p_start].0, &indexed[p_end].0)
4879 == core::cmp::Ordering::Equal
4880 {
4881 p_end += 1;
4882 }
4883 compute_window_partition(
4885 name,
4886 args,
4887 !order_by.is_empty(),
4888 frame.as_ref(),
4889 *null_treatment,
4890 &indexed[p_start..p_end],
4891 &filtered,
4892 &ctx,
4893 &mut out_vals,
4894 )?;
4895 p_start = p_end;
4896 }
4897 win_vals.push(out_vals);
4898 }
4899
4900 let mut ext_cols = schema_cols.clone();
4902 for i in 0..window_nodes.len() {
4903 ext_cols.push(ColumnSchema::new(
4904 alloc::format!("__win_{i}"),
4905 DataType::Text, true,
4907 ));
4908 }
4909 let mut ext_rows: Vec<Row> = Vec::with_capacity(n_rows);
4911 for i in 0..n_rows {
4912 let mut values = filtered[i].values.clone();
4913 for w in 0..window_nodes.len() {
4914 values.push(win_vals[w][i].clone());
4915 }
4916 ext_rows.push(Row::new(values));
4917 }
4918 let mut rewritten_items: Vec<SelectItem> = Vec::with_capacity(stmt.items.len());
4920 for item in &stmt.items {
4921 let new_item = match item {
4922 SelectItem::Wildcard => SelectItem::Wildcard,
4923 SelectItem::Expr { expr, alias } => {
4924 let mut e = expr.clone();
4925 rewrite_window_to_columns(&mut e, &window_nodes);
4926 SelectItem::Expr {
4927 expr: e,
4928 alias: alias.clone(),
4929 }
4930 }
4931 };
4932 rewritten_items.push(new_item);
4933 }
4934
4935 let ext_ctx = EvalContext::new(&ext_cols, Some(alias));
4937 let projection = build_projection(&rewritten_items, &ext_cols, alias)?;
4938 let mut tagged: Vec<(Vec<f64>, Row)> = Vec::with_capacity(n_rows);
4939 for (i, row) in ext_rows.iter().enumerate() {
4940 if i.is_multiple_of(256) {
4941 cancel.check()?;
4942 }
4943 let mut values = Vec::with_capacity(projection.len());
4944 for p in &projection {
4945 values.push(eval::eval_expr(&p.expr, row, &ext_ctx)?);
4946 }
4947 let order_keys = if stmt.order_by.is_empty() {
4948 Vec::new()
4949 } else {
4950 let mut keys = Vec::with_capacity(stmt.order_by.len());
4951 for o in &stmt.order_by {
4952 let mut e = o.expr.clone();
4953 rewrite_window_to_columns(&mut e, &window_nodes);
4954 let key = eval::eval_expr(&e, row, &ext_ctx)?;
4955 keys.push(value_to_order_key(&key)?);
4956 }
4957 keys
4958 };
4959 tagged.push((order_keys, Row::new(values)));
4960 }
4961 if !stmt.order_by.is_empty() {
4963 let descs: Vec<bool> = stmt.order_by.iter().map(|o| o.desc).collect();
4964 sort_by_keys(&mut tagged, &descs);
4965 }
4966 let mut out_rows: Vec<Row> = tagged.into_iter().map(|(_, r)| r).collect();
4967 apply_offset_and_limit(&mut out_rows, stmt.offset_literal(), stmt.limit_literal());
4968 let final_cols: Vec<ColumnSchema> = projection
4969 .into_iter()
4970 .map(|p| ColumnSchema::new(p.output_name, p.ty, p.nullable))
4971 .collect();
4972 Ok(QueryResult::Rows {
4973 columns: final_cols,
4974 rows: out_rows,
4975 })
4976 }
4977
4978 fn exec_with_ctes(
4985 &self,
4986 stmt: &SelectStatement,
4987 cancel: CancelToken<'_>,
4988 ) -> Result<QueryResult, EngineError> {
4989 cancel.check()?;
4990 let mut catalog = self.active_catalog().clone();
4991 for cte in &stmt.ctes {
4992 if catalog.get(&cte.name).is_some() {
4993 return Err(EngineError::Unsupported(alloc::format!(
4994 "CTE name {:?} shadows an existing table; rename the CTE",
4995 cte.name
4996 )));
4997 }
4998 let (columns, rows) = if cte.recursive {
4999 self.materialise_recursive_cte(cte, &catalog, cancel)?
5000 } else {
5001 let body_result = self.exec_select_cancel(&cte.body, cancel)?;
5002 let QueryResult::Rows { columns, rows } = body_result else {
5003 return Err(EngineError::Unsupported(alloc::format!(
5004 "CTE {:?} body did not return rows",
5005 cte.name
5006 )));
5007 };
5008 (columns, rows)
5009 };
5010 let inferred = infer_column_types(&columns, &rows);
5015 let mut columns = inferred;
5016 if !cte.column_overrides.is_empty() {
5018 if cte.column_overrides.len() != columns.len() {
5019 return Err(EngineError::Unsupported(alloc::format!(
5020 "CTE {:?} column list has {} names but body returns {} columns",
5021 cte.name,
5022 cte.column_overrides.len(),
5023 columns.len()
5024 )));
5025 }
5026 for (col, name) in columns.iter_mut().zip(cte.column_overrides.iter()) {
5027 col.name.clone_from(name);
5028 }
5029 }
5030 let schema = TableSchema::new(cte.name.clone(), columns);
5031 catalog.create_table(schema).map_err(EngineError::Storage)?;
5032 let table = catalog
5033 .get_mut(&cte.name)
5034 .expect("just-created CTE table must exist");
5035 for row in rows {
5036 table.insert(row).map_err(EngineError::Storage)?;
5037 }
5038 }
5039 let mut body = stmt.clone();
5042 body.ctes = Vec::new();
5043 let mut temp = Engine::restore(catalog);
5044 if let Some(c) = self.clock {
5045 temp = temp.with_clock(c);
5046 }
5047 if let Some(f) = self.salt_fn {
5048 temp = temp.with_salt_fn(f);
5049 }
5050 temp.exec_select_cancel(&body, cancel)
5051 }
5052
5053 #[allow(clippy::too_many_lines)]
5063 fn materialise_recursive_cte(
5064 &self,
5065 cte: &spg_sql::ast::Cte,
5066 base_catalog: &Catalog,
5067 cancel: CancelToken<'_>,
5068 ) -> Result<(Vec<ColumnSchema>, Vec<Row>), EngineError> {
5069 const MAX_TOTAL_ROWS: usize = 1_000_000;
5070 const MAX_ITERATIONS: usize = 100_000;
5071 cancel.check()?;
5072 if cte.body.unions.is_empty() {
5073 return Err(EngineError::Unsupported(alloc::format!(
5074 "WITH RECURSIVE {:?} body must be a UNION of an anchor and a recursive term",
5075 cte.name
5076 )));
5077 }
5078 let mut anchor = cte.body.clone();
5080 let union_terms = core::mem::take(&mut anchor.unions);
5081 anchor.ctes = Vec::new();
5082 if select_refers_to(&anchor, &cte.name) {
5084 return Err(EngineError::Unsupported(alloc::format!(
5085 "WITH RECURSIVE {:?}: the anchor must not reference the CTE itself",
5086 cte.name
5087 )));
5088 }
5089 let anchor_result = self.exec_select_cancel(&anchor, cancel)?;
5090 let QueryResult::Rows {
5091 columns: anchor_cols,
5092 rows: anchor_rows,
5093 } = anchor_result
5094 else {
5095 return Err(EngineError::Unsupported(alloc::format!(
5096 "WITH RECURSIVE {:?}: anchor did not return rows",
5097 cte.name
5098 )));
5099 };
5100 let mut columns = infer_column_types(&anchor_cols, &anchor_rows);
5104 if !cte.column_overrides.is_empty() {
5105 if cte.column_overrides.len() != columns.len() {
5106 return Err(EngineError::Unsupported(alloc::format!(
5107 "CTE {:?} column list has {} names but anchor returns {} columns",
5108 cte.name,
5109 cte.column_overrides.len(),
5110 columns.len()
5111 )));
5112 }
5113 for (col, name) in columns.iter_mut().zip(cte.column_overrides.iter()) {
5114 col.name.clone_from(name);
5115 }
5116 }
5117 let mut all_rows: Vec<Row> = anchor_rows.clone();
5118 let mut working_set: Vec<Row> = anchor_rows;
5119 let mut seen: alloc::collections::BTreeSet<Vec<u8>> = alloc::collections::BTreeSet::new();
5120 let all_union_all = union_terms.iter().all(|(k, _)| matches!(k, UnionKind::All));
5123 if !all_union_all {
5124 for r in &all_rows {
5125 seen.insert(encode_row_key(r));
5126 }
5127 }
5128 for iter in 0..MAX_ITERATIONS {
5129 cancel.check()?;
5130 if working_set.is_empty() {
5131 break;
5132 }
5133 let mut iter_catalog = base_catalog.clone();
5135 let schema = TableSchema::new(cte.name.clone(), columns.clone());
5136 iter_catalog
5137 .create_table(schema)
5138 .map_err(EngineError::Storage)?;
5139 {
5140 let table = iter_catalog.get_mut(&cte.name).expect("just-created");
5141 for row in &working_set {
5142 table.insert(row.clone()).map_err(EngineError::Storage)?;
5143 }
5144 }
5145 let mut iter_engine = Engine::restore(iter_catalog);
5146 if let Some(c) = self.clock {
5147 iter_engine = iter_engine.with_clock(c);
5148 }
5149 if let Some(f) = self.salt_fn {
5150 iter_engine = iter_engine.with_salt_fn(f);
5151 }
5152 let mut next_set: Vec<Row> = Vec::new();
5154 for (_, term) in &union_terms {
5155 let mut term = term.clone();
5156 term.ctes = Vec::new();
5157 let r = iter_engine.exec_select_cancel(&term, cancel)?;
5158 let QueryResult::Rows {
5159 columns: rc,
5160 rows: rs,
5161 } = r
5162 else {
5163 return Err(EngineError::Unsupported(alloc::format!(
5164 "WITH RECURSIVE {:?}: recursive term did not return rows",
5165 cte.name
5166 )));
5167 };
5168 if rc.len() != columns.len() {
5169 return Err(EngineError::Unsupported(alloc::format!(
5170 "WITH RECURSIVE {:?}: column count of recursive term ({}) does not match anchor ({})",
5171 cte.name,
5172 rc.len(),
5173 columns.len()
5174 )));
5175 }
5176 for row in rs {
5177 if !all_union_all {
5178 let key = encode_row_key(&row);
5179 if !seen.insert(key) {
5180 continue;
5181 }
5182 }
5183 next_set.push(row);
5184 }
5185 }
5186 if next_set.is_empty() {
5187 break;
5188 }
5189 all_rows.extend(next_set.iter().cloned());
5190 working_set = next_set;
5191 if all_rows.len() > MAX_TOTAL_ROWS {
5192 return Err(EngineError::Unsupported(alloc::format!(
5193 "WITH RECURSIVE {:?}: produced more than {MAX_TOTAL_ROWS} rows — likely runaway recursion",
5194 cte.name
5195 )));
5196 }
5197 if iter + 1 == MAX_ITERATIONS {
5198 return Err(EngineError::Unsupported(alloc::format!(
5199 "WITH RECURSIVE {:?}: exceeded {MAX_ITERATIONS} iterations",
5200 cte.name
5201 )));
5202 }
5203 }
5204 Ok((columns, all_rows))
5205 }
5206
5207 fn resolve_select_subqueries(
5208 &self,
5209 stmt: &mut SelectStatement,
5210 cancel: CancelToken<'_>,
5211 ) -> Result<(), EngineError> {
5212 for item in &mut stmt.items {
5213 if let SelectItem::Expr { expr, .. } = item {
5214 self.resolve_expr_subqueries(expr, cancel)?;
5215 }
5216 }
5217 if let Some(w) = &mut stmt.where_ {
5218 self.resolve_expr_subqueries(w, cancel)?;
5219 }
5220 if let Some(gs) = &mut stmt.group_by {
5221 for g in gs {
5222 self.resolve_expr_subqueries(g, cancel)?;
5223 }
5224 }
5225 if let Some(h) = &mut stmt.having {
5226 self.resolve_expr_subqueries(h, cancel)?;
5227 }
5228 for o in &mut stmt.order_by {
5229 self.resolve_expr_subqueries(&mut o.expr, cancel)?;
5230 }
5231 for (_, peer) in &mut stmt.unions {
5232 self.resolve_select_subqueries(peer, cancel)?;
5233 }
5234 Ok(())
5235 }
5236
5237 #[allow(clippy::only_used_in_recursion)] fn resolve_expr_subqueries(
5239 &self,
5240 e: &mut Expr,
5241 cancel: CancelToken<'_>,
5242 ) -> Result<(), EngineError> {
5243 if let Some(replacement) = self.subquery_replacement(e, cancel)? {
5245 *e = replacement;
5246 return Ok(());
5247 }
5248 match e {
5249 Expr::Binary { lhs, rhs, .. } => {
5250 self.resolve_expr_subqueries(lhs, cancel)?;
5251 self.resolve_expr_subqueries(rhs, cancel)?;
5252 }
5253 Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
5254 self.resolve_expr_subqueries(expr, cancel)?;
5255 }
5256 Expr::FunctionCall { args, .. } => {
5257 for a in args {
5258 self.resolve_expr_subqueries(a, cancel)?;
5259 }
5260 }
5261 Expr::Like { expr, pattern, .. } => {
5262 self.resolve_expr_subqueries(expr, cancel)?;
5263 self.resolve_expr_subqueries(pattern, cancel)?;
5264 }
5265 Expr::Extract { source, .. } => self.resolve_expr_subqueries(source, cancel)?,
5266 Expr::WindowFunction {
5269 args,
5270 partition_by,
5271 order_by,
5272 ..
5273 } => {
5274 for a in args {
5275 self.resolve_expr_subqueries(a, cancel)?;
5276 }
5277 for p in partition_by {
5278 self.resolve_expr_subqueries(p, cancel)?;
5279 }
5280 for (e, _) in order_by {
5281 self.resolve_expr_subqueries(e, cancel)?;
5282 }
5283 }
5284 Expr::ScalarSubquery(_)
5288 | Expr::Exists { .. }
5289 | Expr::InSubquery { .. }
5290 | Expr::Literal(_)
5291 | Expr::Placeholder(_)
5292 | Expr::Column(_) => {}
5293 }
5294 Ok(())
5295 }
5296
5297 fn eval_expr_with_correlated(
5305 &self,
5306 expr: &Expr,
5307 row: &Row,
5308 ctx: &EvalContext<'_>,
5309 cancel: CancelToken<'_>,
5310 memo: Option<&mut memoize::MemoizeCache>,
5311 ) -> Result<Value, EngineError> {
5312 if !expr_has_subquery(expr) {
5313 return eval::eval_expr(expr, row, ctx).map_err(EngineError::Eval);
5314 }
5315 let mut e = expr.clone();
5316 self.resolve_correlated_in_expr(&mut e, row, ctx, cancel, memo)?;
5317 eval::eval_expr(&e, row, ctx).map_err(EngineError::Eval)
5318 }
5319
5320 fn resolve_correlated_in_expr(
5321 &self,
5322 e: &mut Expr,
5323 row: &Row,
5324 ctx: &EvalContext<'_>,
5325 cancel: CancelToken<'_>,
5326 mut memo: Option<&mut memoize::MemoizeCache>,
5327 ) -> Result<(), EngineError> {
5328 match e {
5329 Expr::ScalarSubquery(inner) => {
5330 let cache_key = memo.as_ref().map(|_| memoize::CacheKey {
5335 subquery_repr: alloc::format!("{}", **inner),
5336 outer_values: row.values.clone(),
5337 });
5338 if let (Some(cache), Some(k)) = (memo.as_deref_mut(), cache_key.as_ref())
5339 && let Some(cached) = cache.get(k)
5340 {
5341 *e = value_to_literal_expr(cached)?;
5342 return Ok(());
5343 }
5344 let mut s = (**inner).clone();
5345 substitute_outer_columns(&mut s, row, ctx);
5346 let r = self.exec_select_cancel(&s, cancel)?;
5347 let QueryResult::Rows { rows, .. } = r else {
5348 return Err(EngineError::Unsupported(
5349 "scalar subquery: inner did not return rows".into(),
5350 ));
5351 };
5352 let value = match rows.as_slice() {
5353 [] => Value::Null,
5354 [r0] => r0.values.first().cloned().unwrap_or(Value::Null),
5355 _ => {
5356 return Err(EngineError::Unsupported(alloc::format!(
5357 "scalar subquery returned {} rows; expected 0 or 1",
5358 rows.len()
5359 )));
5360 }
5361 };
5362 if let (Some(cache), Some(k)) = (memo.as_deref_mut(), cache_key) {
5363 cache.insert(k, value.clone());
5364 }
5365 *e = value_to_literal_expr(value)?;
5366 }
5367 Expr::Exists { subquery, negated } => {
5368 let mut s = (**subquery).clone();
5369 substitute_outer_columns(&mut s, row, ctx);
5370 let r = self.exec_select_cancel(&s, cancel)?;
5371 let exists = matches!(r, QueryResult::Rows { rows, .. } if !rows.is_empty());
5372 let bit = if *negated { !exists } else { exists };
5373 *e = Expr::Literal(Literal::Bool(bit));
5374 }
5375 Expr::InSubquery {
5376 expr: lhs,
5377 subquery,
5378 negated,
5379 } => {
5380 self.resolve_correlated_in_expr(lhs, row, ctx, cancel, memo.as_deref_mut())?;
5381 let lhs_val = eval::eval_expr(lhs, row, ctx).map_err(EngineError::Eval)?;
5382 let mut s = (**subquery).clone();
5383 substitute_outer_columns(&mut s, row, ctx);
5384 let r = self.exec_select_cancel(&s, cancel)?;
5385 let QueryResult::Rows { columns, rows, .. } = r else {
5386 return Err(EngineError::Unsupported(
5387 "IN-subquery: inner did not return rows".into(),
5388 ));
5389 };
5390 if columns.len() != 1 {
5391 return Err(EngineError::Unsupported(alloc::format!(
5392 "IN-subquery must project exactly one column; got {}",
5393 columns.len()
5394 )));
5395 }
5396 let mut found = false;
5397 let mut any_null = false;
5398 for r0 in rows {
5399 let v = r0.values.into_iter().next().unwrap_or(Value::Null);
5400 if v.is_null() {
5401 any_null = true;
5402 continue;
5403 }
5404 if value_cmp(&v, &lhs_val) == core::cmp::Ordering::Equal {
5405 found = true;
5406 break;
5407 }
5408 }
5409 let bit = if found {
5410 !*negated
5411 } else if any_null {
5412 return Err(EngineError::Unsupported(
5413 "IN-subquery with NULL in result and no match: NULL semantics not yet implemented".into(),
5414 ));
5415 } else {
5416 *negated
5417 };
5418 *e = Expr::Literal(Literal::Bool(bit));
5419 }
5420 Expr::Binary { lhs, rhs, .. } => {
5421 self.resolve_correlated_in_expr(lhs, row, ctx, cancel, memo.as_deref_mut())?;
5422 self.resolve_correlated_in_expr(rhs, row, ctx, cancel, memo.as_deref_mut())?;
5423 }
5424 Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
5425 self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
5426 }
5427 Expr::Like { expr, pattern, .. } => {
5428 self.resolve_correlated_in_expr(expr, row, ctx, cancel, memo.as_deref_mut())?;
5429 self.resolve_correlated_in_expr(pattern, row, ctx, cancel, memo.as_deref_mut())?;
5430 }
5431 Expr::FunctionCall { args, .. } => {
5432 for a in args {
5433 self.resolve_correlated_in_expr(a, row, ctx, cancel, memo.as_deref_mut())?;
5434 }
5435 }
5436 Expr::Extract { source, .. } => {
5437 self.resolve_correlated_in_expr(source, row, ctx, cancel, memo.as_deref_mut())?;
5438 }
5439 Expr::WindowFunction { .. } | Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
5440 }
5441 Ok(())
5442 }
5443
5444 fn subquery_replacement(
5445 &self,
5446 e: &Expr,
5447 cancel: CancelToken<'_>,
5448 ) -> Result<Option<Expr>, EngineError> {
5449 match e {
5450 Expr::ScalarSubquery(inner) => {
5451 let mut s = (**inner).clone();
5452 self.resolve_select_subqueries(&mut s, cancel)?;
5455 let r = match self.exec_bare_select_cancel(&s, cancel) {
5456 Ok(r) => r,
5457 Err(e) if is_correlation_error(&e) => return Ok(None),
5458 Err(e) => return Err(e),
5459 };
5460 let QueryResult::Rows { rows, .. } = r else {
5461 return Err(EngineError::Unsupported(
5462 "scalar subquery: inner statement did not return rows".into(),
5463 ));
5464 };
5465 let value = match rows.as_slice() {
5466 [] => Value::Null,
5467 [row] => row.values.first().cloned().unwrap_or(Value::Null),
5468 _ => {
5469 return Err(EngineError::Unsupported(alloc::format!(
5470 "scalar subquery returned {} rows; expected 0 or 1",
5471 rows.len()
5472 )));
5473 }
5474 };
5475 Ok(Some(value_to_literal_expr(value)?))
5476 }
5477 Expr::Exists { subquery, negated } => {
5478 let mut s = (**subquery).clone();
5479 self.resolve_select_subqueries(&mut s, cancel)?;
5480 let r = match self.exec_bare_select_cancel(&s, cancel) {
5481 Ok(r) => r,
5482 Err(e) if is_correlation_error(&e) => return Ok(None),
5483 Err(e) => return Err(e),
5484 };
5485 let exists = match r {
5486 QueryResult::Rows { rows, .. } => !rows.is_empty(),
5487 QueryResult::CommandOk { .. } => false,
5488 };
5489 let bit = if *negated { !exists } else { exists };
5490 Ok(Some(Expr::Literal(Literal::Bool(bit))))
5491 }
5492 Expr::InSubquery {
5493 expr,
5494 subquery,
5495 negated,
5496 } => {
5497 let mut s = (**subquery).clone();
5498 self.resolve_select_subqueries(&mut s, cancel)?;
5499 let r = match self.exec_bare_select_cancel(&s, cancel) {
5500 Ok(r) => r,
5501 Err(e) if is_correlation_error(&e) => return Ok(None),
5502 Err(e) => return Err(e),
5503 };
5504 let QueryResult::Rows { columns, rows, .. } = r else {
5505 return Err(EngineError::Unsupported(
5506 "IN-subquery: inner statement did not return rows".into(),
5507 ));
5508 };
5509 if columns.len() != 1 {
5510 return Err(EngineError::Unsupported(alloc::format!(
5511 "IN-subquery must project exactly one column; got {}",
5512 columns.len()
5513 )));
5514 }
5515 let mut acc: Option<Expr> = None;
5518 for row in rows {
5519 let v = row.values.into_iter().next().unwrap_or(Value::Null);
5520 let lit = value_to_literal_expr(v)?;
5521 let cmp = Expr::Binary {
5522 lhs: expr.clone(),
5523 op: BinOp::Eq,
5524 rhs: Box::new(lit),
5525 };
5526 acc = Some(match acc {
5527 None => cmp,
5528 Some(prev) => Expr::Binary {
5529 lhs: Box::new(prev),
5530 op: BinOp::Or,
5531 rhs: Box::new(cmp),
5532 },
5533 });
5534 }
5535 let combined = acc.unwrap_or(Expr::Literal(Literal::Bool(false)));
5536 let final_expr = if *negated {
5537 Expr::Unary {
5538 op: UnOp::Not,
5539 expr: Box::new(combined),
5540 }
5541 } else {
5542 combined
5543 };
5544 Ok(Some(final_expr))
5545 }
5546 _ => Ok(None),
5547 }
5548 }
5549}
5550
5551fn select_refers_to(stmt: &SelectStatement, target: &str) -> bool {
5563 if let Some(from) = &stmt.from
5564 && from_refers_to(from, target)
5565 {
5566 return true;
5567 }
5568 for (_, peer) in &stmt.unions {
5569 if select_refers_to(peer, target) {
5570 return true;
5571 }
5572 }
5573 for item in &stmt.items {
5574 if let SelectItem::Expr { expr, .. } = item
5575 && expr_refers_to(expr, target)
5576 {
5577 return true;
5578 }
5579 }
5580 if let Some(w) = &stmt.where_
5581 && expr_refers_to(w, target)
5582 {
5583 return true;
5584 }
5585 false
5586}
5587
5588fn from_refers_to(from: &FromClause, target: &str) -> bool {
5589 if from.primary.name.eq_ignore_ascii_case(target) {
5590 return true;
5591 }
5592 from.joins
5593 .iter()
5594 .any(|j| j.table.name.eq_ignore_ascii_case(target))
5595}
5596
5597fn expr_refers_to(e: &Expr, target: &str) -> bool {
5598 match e {
5599 Expr::ScalarSubquery(s) => select_refers_to(s, target),
5600 Expr::Exists { subquery, .. } | Expr::InSubquery { subquery, .. } => {
5601 select_refers_to(subquery, target)
5602 }
5603 Expr::Binary { lhs, rhs, .. } => expr_refers_to(lhs, target) || expr_refers_to(rhs, target),
5604 Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
5605 expr_refers_to(expr, target)
5606 }
5607 Expr::Like { expr, pattern, .. } => {
5608 expr_refers_to(expr, target) || expr_refers_to(pattern, target)
5609 }
5610 Expr::FunctionCall { args, .. } => args.iter().any(|a| expr_refers_to(a, target)),
5611 Expr::Extract { source, .. } => expr_refers_to(source, target),
5612 Expr::WindowFunction {
5613 args,
5614 partition_by,
5615 order_by,
5616 ..
5617 } => {
5618 args.iter().any(|a| expr_refers_to(a, target))
5619 || partition_by.iter().any(|p| expr_refers_to(p, target))
5620 || order_by.iter().any(|(o, _)| expr_refers_to(o, target))
5621 }
5622 Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => false,
5623 }
5624}
5625
5626fn infer_column_types(columns: &[ColumnSchema], rows: &[Row]) -> Vec<ColumnSchema> {
5632 let mut out = columns.to_vec();
5633 for (col_idx, col) in out.iter_mut().enumerate() {
5634 if col.ty != DataType::Text {
5635 continue;
5636 }
5637 let mut inferred: Option<DataType> = None;
5638 let mut all_null = true;
5639 for row in rows {
5640 let Some(v) = row.values.get(col_idx) else {
5641 continue;
5642 };
5643 let ty = match v {
5644 Value::Null => continue,
5645 Value::SmallInt(_) => DataType::SmallInt,
5646 Value::Int(_) => DataType::Int,
5647 Value::BigInt(_) => DataType::BigInt,
5648 Value::Float(_) => DataType::Float,
5649 Value::Bool(_) => DataType::Bool,
5650 Value::Vector(_) => DataType::Vector {
5651 dim: 0,
5652 encoding: VecEncoding::F32,
5653 },
5654 _ => DataType::Text,
5655 };
5656 all_null = false;
5657 inferred = Some(match inferred {
5658 None => ty,
5659 Some(prev) if prev == ty => prev,
5660 Some(_) => DataType::Text,
5661 });
5662 }
5663 if let Some(t) = inferred {
5664 col.ty = t;
5665 col.nullable = true;
5666 } else if all_null {
5667 col.nullable = true;
5668 }
5669 }
5670 out
5671}
5672
5673#[allow(clippy::too_many_lines, clippy::format_push_string)]
5678fn build_index_suggestions(stmt: &SelectStatement, engine: &Engine) -> Vec<String> {
5695 use alloc::collections::BTreeSet;
5696 let mut seen: BTreeSet<(String, String)> = BTreeSet::new();
5697 let mut out: Vec<String> = Vec::new();
5698 let cat = engine.active_catalog();
5699 let Some(from) = &stmt.from else {
5703 return out;
5704 };
5705 let mut tables: Vec<String> = Vec::new();
5706 tables.push(from.primary.name.clone());
5707 for j in &from.joins {
5708 tables.push(j.table.name.clone());
5709 }
5710 let mut col_refs: Vec<spg_sql::ast::ColumnName> = Vec::new();
5713 if let Some(w) = &stmt.where_ {
5714 collect_column_refs(w, &mut col_refs);
5715 }
5716 for j in &from.joins {
5717 if let Some(on) = &j.on {
5718 collect_column_refs(on, &mut col_refs);
5719 }
5720 }
5721 for cn in &col_refs {
5722 let owner: Option<String> = if let Some(q) = &cn.qualifier {
5725 tables.iter().find(|t| t == &q).cloned()
5726 } else {
5727 tables.iter().find_map(|t| {
5728 cat.get(t).and_then(|tbl| {
5729 if tbl.schema().column_position(&cn.name).is_some() {
5730 Some(t.clone())
5731 } else {
5732 None
5733 }
5734 })
5735 })
5736 };
5737 let Some(owner) = owner else {
5738 continue;
5739 };
5740 let Some(tbl) = cat.get(&owner) else {
5741 continue;
5742 };
5743 let Some(col_pos) = tbl.schema().column_position(&cn.name) else {
5744 continue;
5745 };
5746 let already_indexed = tbl.indices().iter().any(|i| {
5749 matches!(i.kind, spg_storage::IndexKind::BTree(_))
5750 && i.column_position == col_pos
5751 && i.expression.is_none()
5752 && i.partial_predicate.is_none()
5753 });
5754 if already_indexed {
5755 continue;
5756 }
5757 if seen.insert((owner.clone(), cn.name.clone())) {
5758 out.push(alloc::format!(
5759 "SUGGEST: CREATE INDEX ix_{}_{} ON {} ({})",
5760 owner,
5761 cn.name,
5762 owner,
5763 cn.name
5764 ));
5765 }
5766 }
5767 out
5768}
5769
5770fn collect_column_refs(expr: &Expr, out: &mut Vec<spg_sql::ast::ColumnName>) {
5773 match expr {
5774 Expr::Column(cn) => out.push(cn.clone()),
5775 Expr::FunctionCall { args, .. } => {
5776 for a in args {
5777 collect_column_refs(a, out);
5778 }
5779 }
5780 Expr::Binary { lhs, rhs, .. } => {
5781 collect_column_refs(lhs, out);
5782 collect_column_refs(rhs, out);
5783 }
5784 Expr::Unary { expr: e, .. } => collect_column_refs(e, out),
5785 _ => {}
5786 }
5787}
5788
5789fn annotate_explain_lines(lines: &mut [String], total_rows: usize, engine: &Engine) {
5790 let catalog = engine.active_catalog();
5791 let cold_ids = catalog.cold_segment_ids_global();
5792 let any_cold = !cold_ids.is_empty();
5793 let cold_ids_repr = if any_cold {
5794 let mut s = alloc::string::String::from("[");
5795 for (i, id) in cold_ids.iter().enumerate() {
5796 if i > 0 {
5797 s.push(',');
5798 }
5799 s.push_str(&alloc::format!("{id}"));
5800 }
5801 s.push(']');
5802 s
5803 } else {
5804 alloc::string::String::new()
5805 };
5806 for (idx, line) in lines.iter_mut().enumerate() {
5807 let trimmed = line.trim_start();
5808 let is_top_level = idx == 0;
5809 if is_top_level {
5810 line.push_str(&alloc::format!(" (rows={total_rows})"));
5811 continue;
5812 }
5813 if let Some(rest) = trimmed.strip_prefix("From: ") {
5814 let (name, scan_kind) = match rest.split_once(" [") {
5815 Some((n, k)) => (n.trim(), k.trim_end_matches(']')),
5816 None => (rest.trim(), ""),
5817 };
5818 let bare = name.split_whitespace().next().unwrap_or(name);
5819 let hot = catalog.get(bare).map(|t| t.rows().len());
5820 let annot = match (hot, scan_kind) {
5825 (Some(h), "full scan") => {
5826 let mut s = alloc::format!(" (hot_rows={h}");
5827 if any_cold {
5828 s.push_str(&alloc::format!(
5829 ", cold_tier=present, cold_segments={cold_ids_repr}"
5830 ));
5831 }
5832 s.push(')');
5833 s
5834 }
5835 (Some(h), "index seek") => {
5836 let mut s = alloc::format!(" (hot_rows≤{h}");
5837 if any_cold {
5838 s.push_str(&alloc::format!(
5839 ", cold_tier=present, cold_segments={cold_ids_repr}"
5840 ));
5841 }
5842 s.push(')');
5843 s
5844 }
5845 _ => " (rows=—)".to_string(),
5846 };
5847 line.push_str(&annot);
5848 continue;
5849 }
5850 line.push_str(" (rows=—)");
5852 }
5853}
5854
5855fn explain_select(stmt: &SelectStatement, engine: &Engine, depth: usize, out: &mut Vec<String>) {
5856 let pad = " ".repeat(depth);
5857 let top = if !stmt.ctes.is_empty() {
5859 if stmt.ctes.iter().any(|c| c.recursive) {
5860 "CTEScan (WITH RECURSIVE)"
5861 } else {
5862 "CTEScan (WITH)"
5863 }
5864 } else if !stmt.unions.is_empty() {
5865 "UnionScan"
5866 } else if select_has_window(stmt) {
5867 "WindowAgg"
5868 } else if aggregate::uses_aggregate(stmt) {
5869 "Aggregate"
5870 } else if stmt.distinct {
5871 "Distinct"
5872 } else if stmt.from.is_some() {
5873 "TableScan"
5874 } else {
5875 "Result"
5876 };
5877 out.push(alloc::format!("{pad}{top}"));
5878 let child = " ".repeat(depth + 1);
5879 for cte in &stmt.ctes {
5881 let head = if cte.recursive {
5882 alloc::format!("{child}CTE (recursive): {}", cte.name)
5883 } else {
5884 alloc::format!("{child}CTE: {}", cte.name)
5885 };
5886 out.push(head);
5887 explain_select(&cte.body, engine, depth + 2, out);
5888 }
5889 if let Some(from) = &stmt.from {
5891 let mut tag = alloc::format!("{child}From: {}", from.primary.name);
5892 if let Some(alias) = &from.primary.alias {
5893 tag.push_str(&alloc::format!(" AS {alias}"));
5894 }
5895 if let Some(w) = &stmt.where_
5898 && let Some(table) = engine.active_catalog().get(&from.primary.name)
5899 {
5900 let alias = from.primary.alias.as_deref().unwrap_or(&from.primary.name);
5901 let cols = &table.schema().columns;
5902 if try_index_seek(w, cols, engine.active_catalog(), table, alias).is_some() {
5903 tag.push_str(" [index seek]");
5904 } else {
5905 tag.push_str(" [full scan]");
5906 }
5907 } else {
5908 tag.push_str(" [full scan]");
5909 }
5910 out.push(tag);
5911 for j in &from.joins {
5912 let kind = match j.kind {
5913 spg_sql::ast::JoinKind::Inner => "INNER JOIN",
5914 spg_sql::ast::JoinKind::Left => "LEFT JOIN",
5915 spg_sql::ast::JoinKind::Cross => "CROSS JOIN",
5916 };
5917 let mut s = alloc::format!("{child}{kind}: {}", j.table.name);
5918 if let Some(alias) = &j.table.alias {
5919 s.push_str(&alloc::format!(" AS {alias}"));
5920 }
5921 if j.on.is_some() {
5922 s.push_str(" (ON …)");
5923 }
5924 out.push(s);
5925 }
5926 }
5927 if let Some(w) = &stmt.where_ {
5929 let mut s = alloc::format!("{child}Filter: {w}");
5930 if expr_has_subquery(w) {
5931 s.push_str(" [subquery]");
5932 }
5933 out.push(s);
5934 }
5935 if let Some(gs) = &stmt.group_by {
5936 let mut parts = Vec::new();
5937 for g in gs {
5938 parts.push(alloc::format!("{g}"));
5939 }
5940 out.push(alloc::format!("{child}GroupBy: {}", parts.join(", ")));
5941 }
5942 if let Some(h) = &stmt.having {
5943 out.push(alloc::format!("{child}Having: {h}"));
5944 }
5945 for o in &stmt.order_by {
5946 let dir = if o.desc { "DESC" } else { "ASC" };
5947 out.push(alloc::format!("{child}OrderBy: {} {dir}", o.expr));
5948 }
5949 if let Some(lim) = stmt.limit {
5950 out.push(alloc::format!("{child}Limit: {lim}"));
5951 }
5952 if let Some(off) = stmt.offset {
5953 out.push(alloc::format!("{child}Offset: {off}"));
5954 }
5955 if stmt
5957 .items
5958 .iter()
5959 .any(|it| matches!(it, SelectItem::Wildcard))
5960 {
5961 out.push(alloc::format!("{child}Project: *"));
5962 } else {
5963 out.push(alloc::format!(
5964 "{child}Project: {} item(s)",
5965 stmt.items.len()
5966 ));
5967 }
5968 for (kind, peer) in &stmt.unions {
5970 let label = match kind {
5971 UnionKind::All => "UNION ALL",
5972 UnionKind::Distinct => "UNION",
5973 };
5974 out.push(alloc::format!("{child}{label}"));
5975 explain_select(peer, engine, depth + 2, out);
5976 }
5977}
5978
5979fn is_correlation_error(e: &EngineError) -> bool {
5984 matches!(
5985 e,
5986 EngineError::Eval(
5987 eval::EvalError::ColumnNotFound { .. } | eval::EvalError::UnknownQualifier { .. }
5988 )
5989 )
5990}
5991
5992fn substitute_outer_columns(stmt: &mut SelectStatement, row: &Row, ctx: &EvalContext<'_>) {
6000 let Some(outer_alias) = ctx.table_alias else {
6001 return;
6002 };
6003 substitute_in_select(stmt, row, ctx, outer_alias);
6004}
6005
6006fn substitute_in_select(
6007 stmt: &mut SelectStatement,
6008 row: &Row,
6009 ctx: &EvalContext<'_>,
6010 outer_alias: &str,
6011) {
6012 for item in &mut stmt.items {
6013 if let SelectItem::Expr { expr, .. } = item {
6014 substitute_in_expr(expr, row, ctx, outer_alias);
6015 }
6016 }
6017 if let Some(w) = &mut stmt.where_ {
6018 substitute_in_expr(w, row, ctx, outer_alias);
6019 }
6020 if let Some(gs) = &mut stmt.group_by {
6021 for g in gs {
6022 substitute_in_expr(g, row, ctx, outer_alias);
6023 }
6024 }
6025 if let Some(h) = &mut stmt.having {
6026 substitute_in_expr(h, row, ctx, outer_alias);
6027 }
6028 for o in &mut stmt.order_by {
6029 substitute_in_expr(&mut o.expr, row, ctx, outer_alias);
6030 }
6031 for (_, peer) in &mut stmt.unions {
6032 substitute_in_select(peer, row, ctx, outer_alias);
6033 }
6034}
6035
6036fn substitute_in_expr(e: &mut Expr, row: &Row, ctx: &EvalContext<'_>, outer_alias: &str) {
6037 if let Expr::Column(c) = e
6038 && let Some(qual) = &c.qualifier
6039 && qual.eq_ignore_ascii_case(outer_alias)
6040 {
6041 if let Some(idx) = ctx
6043 .columns
6044 .iter()
6045 .position(|sc| sc.name.eq_ignore_ascii_case(&c.name))
6046 {
6047 let v = row.values.get(idx).cloned().unwrap_or(Value::Null);
6048 if let Ok(lit) = value_to_literal_expr(v) {
6049 *e = lit;
6050 return;
6051 }
6052 }
6053 }
6054 match e {
6055 Expr::Binary { lhs, rhs, .. } => {
6056 substitute_in_expr(lhs, row, ctx, outer_alias);
6057 substitute_in_expr(rhs, row, ctx, outer_alias);
6058 }
6059 Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
6060 substitute_in_expr(expr, row, ctx, outer_alias);
6061 }
6062 Expr::Like { expr, pattern, .. } => {
6063 substitute_in_expr(expr, row, ctx, outer_alias);
6064 substitute_in_expr(pattern, row, ctx, outer_alias);
6065 }
6066 Expr::FunctionCall { args, .. } => {
6067 for a in args {
6068 substitute_in_expr(a, row, ctx, outer_alias);
6069 }
6070 }
6071 Expr::Extract { source, .. } => substitute_in_expr(source, row, ctx, outer_alias),
6072 Expr::WindowFunction {
6073 args,
6074 partition_by,
6075 order_by,
6076 ..
6077 } => {
6078 for a in args {
6079 substitute_in_expr(a, row, ctx, outer_alias);
6080 }
6081 for p in partition_by {
6082 substitute_in_expr(p, row, ctx, outer_alias);
6083 }
6084 for (o, _) in order_by {
6085 substitute_in_expr(o, row, ctx, outer_alias);
6086 }
6087 }
6088 Expr::ScalarSubquery(s) => substitute_in_select(s, row, ctx, outer_alias),
6089 Expr::Exists { subquery, .. } | Expr::InSubquery { subquery, .. } => {
6090 substitute_in_select(subquery, row, ctx, outer_alias);
6091 }
6092 Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
6093 }
6094}
6095
6096fn encode_row_key(row: &Row) -> Vec<u8> {
6100 let mut out = Vec::new();
6101 for v in &row.values {
6102 let s = alloc::format!("{v:?}|");
6103 out.extend_from_slice(s.as_bytes());
6104 }
6105 out
6106}
6107
6108fn select_has_window(stmt: &SelectStatement) -> bool {
6109 for item in &stmt.items {
6110 if let SelectItem::Expr { expr, .. } = item
6111 && expr_has_window(expr)
6112 {
6113 return true;
6114 }
6115 }
6116 false
6117}
6118
6119fn expr_has_window(e: &Expr) -> bool {
6120 match e {
6121 Expr::WindowFunction { .. } => true,
6122 Expr::Binary { lhs, rhs, .. } => expr_has_window(lhs) || expr_has_window(rhs),
6123 Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
6124 expr_has_window(expr)
6125 }
6126 Expr::FunctionCall { args, .. } => args.iter().any(expr_has_window),
6127 Expr::Like { expr, pattern, .. } => expr_has_window(expr) || expr_has_window(pattern),
6128 Expr::Extract { source, .. } => expr_has_window(source),
6129 Expr::ScalarSubquery(_)
6130 | Expr::Exists { .. }
6131 | Expr::InSubquery { .. }
6132 | Expr::Literal(_)
6133 | Expr::Placeholder(_)
6134 | Expr::Column(_) => false,
6135 }
6136}
6137
6138fn collect_window_nodes(e: &Expr, out: &mut Vec<Expr>) {
6139 if let Expr::WindowFunction { .. } = e {
6140 if !out.iter().any(|x| x == e) {
6145 out.push(e.clone());
6146 }
6147 return;
6148 }
6149 match e {
6150 Expr::WindowFunction { .. } => unreachable!(),
6152 Expr::Binary { lhs, rhs, .. } => {
6153 collect_window_nodes(lhs, out);
6154 collect_window_nodes(rhs, out);
6155 }
6156 Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
6157 collect_window_nodes(expr, out);
6158 }
6159 Expr::FunctionCall { args, .. } => {
6160 for a in args {
6161 collect_window_nodes(a, out);
6162 }
6163 }
6164 Expr::Like { expr, pattern, .. } => {
6165 collect_window_nodes(expr, out);
6166 collect_window_nodes(pattern, out);
6167 }
6168 Expr::Extract { source, .. } => collect_window_nodes(source, out),
6169 _ => {}
6170 }
6171}
6172
6173fn rewrite_window_to_columns(e: &mut Expr, window_nodes: &[Expr]) {
6174 if let Expr::WindowFunction { .. } = e
6175 && let Some(idx) = window_nodes.iter().position(|w| w == e)
6176 {
6177 *e = Expr::Column(spg_sql::ast::ColumnName {
6178 qualifier: None,
6179 name: alloc::format!("__win_{idx}"),
6180 });
6181 return;
6182 }
6183 match e {
6184 Expr::Binary { lhs, rhs, .. } => {
6185 rewrite_window_to_columns(lhs, window_nodes);
6186 rewrite_window_to_columns(rhs, window_nodes);
6187 }
6188 Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
6189 rewrite_window_to_columns(expr, window_nodes);
6190 }
6191 Expr::FunctionCall { args, .. } => {
6192 for a in args {
6193 rewrite_window_to_columns(a, window_nodes);
6194 }
6195 }
6196 Expr::Like { expr, pattern, .. } => {
6197 rewrite_window_to_columns(expr, window_nodes);
6198 rewrite_window_to_columns(pattern, window_nodes);
6199 }
6200 Expr::Extract { source, .. } => rewrite_window_to_columns(source, window_nodes),
6201 _ => {}
6202 }
6203}
6204
6205fn partition_key_cmp(a: &[Value], b: &[Value]) -> core::cmp::Ordering {
6209 for (x, y) in a.iter().zip(b.iter()) {
6210 let c = value_cmp(x, y);
6211 if c != core::cmp::Ordering::Equal {
6212 return c;
6213 }
6214 }
6215 a.len().cmp(&b.len())
6216}
6217
6218fn order_key_cmp(a: &[(Value, bool)], b: &[(Value, bool)]) -> core::cmp::Ordering {
6219 for ((va, desc), (vb, _)) in a.iter().zip(b.iter()) {
6220 let c = value_cmp(va, vb);
6221 let c = if *desc { c.reverse() } else { c };
6222 if c != core::cmp::Ordering::Equal {
6223 return c;
6224 }
6225 }
6226 a.len().cmp(&b.len())
6227}
6228
6229#[allow(clippy::match_same_arms)] fn value_cmp(a: &Value, b: &Value) -> core::cmp::Ordering {
6231 use core::cmp::Ordering;
6232 match (a, b) {
6233 (Value::Null, Value::Null) => Ordering::Equal,
6234 (Value::Null, _) => Ordering::Less,
6235 (_, Value::Null) => Ordering::Greater,
6236 (Value::Int(x), Value::Int(y)) => x.cmp(y),
6237 (Value::BigInt(x), Value::BigInt(y)) => x.cmp(y),
6238 (Value::SmallInt(x), Value::SmallInt(y)) => x.cmp(y),
6239 (Value::Text(x), Value::Text(y)) => x.cmp(y),
6240 (Value::Bool(x), Value::Bool(y)) => x.cmp(y),
6241 (Value::Float(x), Value::Float(y)) => x.partial_cmp(y).unwrap_or(Ordering::Equal),
6242 (Value::Date(x), Value::Date(y)) => x.cmp(y),
6243 (Value::Timestamp(x), Value::Timestamp(y)) => x.cmp(y),
6244 _ => alloc::format!("{a:?}").cmp(&alloc::format!("{b:?}")),
6247 }
6248}
6249
6250#[allow(
6256 clippy::too_many_arguments,
6257 clippy::cast_possible_truncation,
6258 clippy::cast_possible_wrap,
6259 clippy::cast_precision_loss,
6260 clippy::cast_sign_loss,
6261 clippy::doc_markdown,
6262 clippy::too_many_lines,
6263 clippy::type_complexity,
6264 clippy::match_same_arms
6265)]
6266fn compute_window_partition(
6267 name: &str,
6268 args: &[Expr],
6269 ordered: bool,
6270 frame: Option<&WindowFrame>,
6271 null_treatment: spg_sql::ast::NullTreatment,
6272 slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)],
6273 filtered_rows: &[&Row],
6274 ctx: &EvalContext<'_>,
6275 out_vals: &mut [Value],
6276) -> Result<(), EngineError> {
6277 let ignore_nulls = matches!(null_treatment, spg_sql::ast::NullTreatment::Ignore);
6278 let lower = name.to_ascii_lowercase();
6279 match lower.as_str() {
6280 "row_number" => {
6281 for (rank, (_, _, idx)) in slice.iter().enumerate() {
6282 out_vals[*idx] = Value::BigInt((rank + 1) as i64);
6283 }
6284 Ok(())
6285 }
6286 "rank" => {
6287 let mut prev_key: Option<&[(Value, bool)]> = None;
6288 let mut current_rank: i64 = 1;
6289 for (i, (_, okey, idx)) in slice.iter().enumerate() {
6290 if let Some(p) = prev_key
6291 && order_key_cmp(p, okey) != core::cmp::Ordering::Equal
6292 {
6293 current_rank = (i + 1) as i64;
6294 }
6295 if prev_key.is_none() {
6296 current_rank = 1;
6297 }
6298 out_vals[*idx] = Value::BigInt(current_rank);
6299 prev_key = Some(okey.as_slice());
6300 }
6301 Ok(())
6302 }
6303 "dense_rank" => {
6304 let mut prev_key: Option<&[(Value, bool)]> = None;
6305 let mut current_rank: i64 = 0;
6306 for (_, okey, idx) in slice {
6307 if prev_key.is_none_or(|p| order_key_cmp(p, okey) != core::cmp::Ordering::Equal) {
6308 current_rank += 1;
6309 }
6310 out_vals[*idx] = Value::BigInt(current_rank);
6311 prev_key = Some(okey.as_slice());
6312 }
6313 Ok(())
6314 }
6315 "sum" | "avg" | "min" | "max" | "count" | "count_star" => {
6316 let arg_values: Vec<Value> = if lower == "count_star" || args.is_empty() {
6319 slice.iter().map(|_| Value::Null).collect()
6320 } else {
6321 slice
6322 .iter()
6323 .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
6324 .collect::<Result<_, _>>()
6325 .map_err(EngineError::Eval)?
6326 };
6327 let eff = effective_frame(frame, ordered)?;
6331 #[allow(clippy::needless_range_loop)]
6332 for i in 0..slice.len() {
6333 let (lo, hi) = frame_bounds_for_row(&eff, i, slice);
6334 let mut sum: f64 = 0.0;
6335 let mut count: i64 = 0;
6336 let mut min_v: Option<f64> = None;
6337 let mut max_v: Option<f64> = None;
6338 let mut row_count: i64 = 0;
6339 if lo <= hi {
6340 for j in lo..=hi {
6341 let v = &arg_values[j];
6342 match lower.as_str() {
6343 "count_star" => row_count += 1,
6344 "count" => {
6345 if !v.is_null() {
6346 count += 1;
6347 }
6348 }
6349 _ => {
6350 if let Some(x) = value_to_f64(v) {
6351 sum += x;
6352 count += 1;
6353 min_v = Some(min_v.map_or(x, |m| m.min(x)));
6354 max_v = Some(max_v.map_or(x, |m| m.max(x)));
6355 }
6356 }
6357 }
6358 }
6359 }
6360 let value = match lower.as_str() {
6361 "count_star" => Value::BigInt(row_count),
6362 "count" => Value::BigInt(count),
6363 "sum" => Value::Float(sum),
6364 "avg" => {
6365 if count == 0 {
6366 Value::Null
6367 } else {
6368 Value::Float(sum / count as f64)
6369 }
6370 }
6371 "min" => min_v.map_or(Value::Null, Value::Float),
6372 "max" => max_v.map_or(Value::Null, Value::Float),
6373 _ => unreachable!(),
6374 };
6375 let (_, _, idx) = &slice[i];
6376 out_vals[*idx] = value;
6377 }
6378 Ok(())
6379 }
6380 "lag" | "lead" => {
6381 if args.is_empty() {
6384 return Err(EngineError::Unsupported(alloc::format!(
6385 "{lower}() requires at least one argument"
6386 )));
6387 }
6388 let offset: i64 = if args.len() >= 2 {
6389 let v = eval::eval_expr(&args[1], filtered_rows[slice[0].2], ctx)
6390 .map_err(EngineError::Eval)?;
6391 match v {
6392 Value::SmallInt(n) => i64::from(n),
6393 Value::Int(n) => i64::from(n),
6394 Value::BigInt(n) => n,
6395 _ => {
6396 return Err(EngineError::Unsupported(alloc::format!(
6397 "{lower}() offset must be integer"
6398 )));
6399 }
6400 }
6401 } else {
6402 1
6403 };
6404 let default: Value = if args.len() >= 3 {
6405 eval::eval_expr(&args[2], filtered_rows[slice[0].2], ctx)
6406 .map_err(EngineError::Eval)?
6407 } else {
6408 Value::Null
6409 };
6410 let values: Vec<Value> = slice
6411 .iter()
6412 .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
6413 .collect::<Result<_, _>>()
6414 .map_err(EngineError::Eval)?;
6415 let n = slice.len();
6416 for (i, (_, _, idx)) in slice.iter().enumerate() {
6417 let signed_offset = if lower == "lag" { -offset } else { offset };
6418 let v = if ignore_nulls {
6419 let step: i64 = if signed_offset >= 0 { 1 } else { -1 };
6423 let needed: i64 = signed_offset.abs();
6424 if needed == 0 {
6425 values[i].clone()
6426 } else {
6427 let mut j: i64 = i as i64;
6428 let mut hits: i64 = 0;
6429 let mut found: Option<Value> = None;
6430 loop {
6431 j += step;
6432 if j < 0 || j >= n as i64 {
6433 break;
6434 }
6435 #[allow(clippy::cast_sign_loss)]
6436 let v = &values[j as usize];
6437 if !v.is_null() {
6438 hits += 1;
6439 if hits == needed {
6440 found = Some(v.clone());
6441 break;
6442 }
6443 }
6444 }
6445 found.unwrap_or_else(|| default.clone())
6446 }
6447 } else {
6448 let target_signed = i64::try_from(i).unwrap_or(i64::MAX) + signed_offset;
6449 if target_signed < 0
6450 || target_signed >= i64::try_from(n).unwrap_or(i64::MAX)
6451 {
6452 default.clone()
6453 } else {
6454 #[allow(clippy::cast_sign_loss)]
6455 {
6456 values[target_signed as usize].clone()
6457 }
6458 }
6459 };
6460 out_vals[*idx] = v;
6461 }
6462 Ok(())
6463 }
6464 "first_value" | "last_value" | "nth_value" => {
6465 if args.is_empty() {
6466 return Err(EngineError::Unsupported(alloc::format!(
6467 "{lower}() requires at least one argument"
6468 )));
6469 }
6470 let values: Vec<Value> = slice
6471 .iter()
6472 .map(|(_, _, idx)| eval::eval_expr(&args[0], filtered_rows[*idx], ctx))
6473 .collect::<Result<_, _>>()
6474 .map_err(EngineError::Eval)?;
6475 let nth: usize = if lower == "nth_value" {
6476 if args.len() < 2 {
6477 return Err(EngineError::Unsupported(
6478 "nth_value() requires (expr, n)".into(),
6479 ));
6480 }
6481 let v = eval::eval_expr(&args[1], filtered_rows[slice[0].2], ctx)
6482 .map_err(EngineError::Eval)?;
6483 let raw = match v {
6484 Value::SmallInt(n) => i64::from(n),
6485 Value::Int(n) => i64::from(n),
6486 Value::BigInt(n) => n,
6487 _ => {
6488 return Err(EngineError::Unsupported(
6489 "nth_value() n must be integer".into(),
6490 ));
6491 }
6492 };
6493 if raw < 1 {
6494 return Err(EngineError::Unsupported(
6495 "nth_value() n must be >= 1".into(),
6496 ));
6497 }
6498 #[allow(clippy::cast_sign_loss)]
6499 {
6500 raw as usize
6501 }
6502 } else {
6503 0
6504 };
6505 let eff = effective_frame(frame, ordered)?;
6506 for i in 0..slice.len() {
6507 let (lo, hi) = frame_bounds_for_row(&eff, i, slice);
6508 let (_, _, idx) = &slice[i];
6509 let v = if lo > hi {
6510 Value::Null
6511 } else if ignore_nulls && matches!(lower.as_str(), "first_value" | "last_value") {
6512 if lower == "first_value" {
6515 (lo..=hi)
6516 .find_map(|j| {
6517 let v = &values[j];
6518 (!v.is_null()).then(|| v.clone())
6519 })
6520 .unwrap_or(Value::Null)
6521 } else {
6522 (lo..=hi)
6523 .rev()
6524 .find_map(|j| {
6525 let v = &values[j];
6526 (!v.is_null()).then(|| v.clone())
6527 })
6528 .unwrap_or(Value::Null)
6529 }
6530 } else {
6531 match lower.as_str() {
6532 "first_value" => values[lo].clone(),
6533 "last_value" => values[hi].clone(),
6534 "nth_value" => {
6535 let pos = lo + nth - 1;
6536 if pos > hi {
6537 Value::Null
6538 } else {
6539 values[pos].clone()
6540 }
6541 }
6542 _ => unreachable!(),
6543 }
6544 };
6545 out_vals[*idx] = v;
6546 }
6547 Ok(())
6548 }
6549 "ntile" => {
6550 if args.is_empty() {
6551 return Err(EngineError::Unsupported(
6552 "ntile(n) requires an integer argument".into(),
6553 ));
6554 }
6555 let v = eval::eval_expr(&args[0], filtered_rows[slice[0].2], ctx)
6556 .map_err(EngineError::Eval)?;
6557 let bucket_count: i64 = match v {
6558 Value::SmallInt(n) => i64::from(n),
6559 Value::Int(n) => i64::from(n),
6560 Value::BigInt(n) => n,
6561 _ => {
6562 return Err(EngineError::Unsupported(
6563 "ntile() argument must be integer".into(),
6564 ));
6565 }
6566 };
6567 if bucket_count < 1 {
6568 return Err(EngineError::Unsupported(
6569 "ntile() argument must be >= 1".into(),
6570 ));
6571 }
6572 #[allow(clippy::cast_sign_loss)]
6573 let buckets = bucket_count as usize;
6574 let n = slice.len();
6575 let base = n / buckets;
6578 let extras = n % buckets;
6579 let mut bucket: usize = 1;
6580 let mut remaining_in_bucket = if extras > 0 { base + 1 } else { base };
6581 let mut buckets_with_extra_remaining = extras;
6582 for (_, _, idx) in slice {
6583 if remaining_in_bucket == 0 {
6584 bucket += 1;
6585 buckets_with_extra_remaining = buckets_with_extra_remaining.saturating_sub(1);
6586 remaining_in_bucket = if buckets_with_extra_remaining > 0 {
6587 base + 1
6588 } else {
6589 base
6590 };
6591 if remaining_in_bucket == 0 {
6594 remaining_in_bucket = 1;
6595 }
6596 }
6597 out_vals[*idx] = Value::BigInt(i64::try_from(bucket).unwrap_or(i64::MAX));
6598 remaining_in_bucket -= 1;
6599 }
6600 Ok(())
6601 }
6602 "percent_rank" => {
6603 let n = slice.len();
6606 let mut prev_key: Option<&[(Value, bool)]> = None;
6607 let mut current_rank: i64 = 1;
6608 for (i, (_, okey, idx)) in slice.iter().enumerate() {
6609 if let Some(p) = prev_key
6610 && order_key_cmp(p, okey) != core::cmp::Ordering::Equal
6611 {
6612 current_rank = i64::try_from(i + 1).unwrap_or(i64::MAX);
6613 }
6614 if prev_key.is_none() {
6615 current_rank = 1;
6616 }
6617 #[allow(clippy::cast_precision_loss)]
6618 let pr = if n <= 1 {
6619 0.0
6620 } else {
6621 (current_rank - 1) as f64 / (n - 1) as f64
6622 };
6623 out_vals[*idx] = Value::Float(pr);
6624 prev_key = Some(okey.as_slice());
6625 }
6626 Ok(())
6627 }
6628 "cume_dist" => {
6629 let n = slice.len();
6631 for i in 0..slice.len() {
6633 let peer_end = peer_group_end(slice, i);
6634 #[allow(clippy::cast_precision_loss)]
6635 let cd = (peer_end + 1) as f64 / n as f64;
6636 let (_, _, idx) = &slice[i];
6637 out_vals[*idx] = Value::Float(cd);
6638 }
6639 Ok(())
6640 }
6641 other => Err(EngineError::Unsupported(alloc::format!(
6642 "window function {other:?} not supported (v4.21: row_number/rank/dense_rank/sum/avg/count/min/max/lag/lead/first_value/last_value/nth_value/ntile/percent_rank/cume_dist)"
6643 ))),
6644 }
6645}
6646
6647fn effective_frame(
6654 frame: Option<&WindowFrame>,
6655 ordered: bool,
6656) -> Result<(FrameKind, FrameBound, FrameBound), EngineError> {
6657 match frame {
6658 None => {
6659 if ordered {
6660 Ok((
6661 FrameKind::Range,
6662 FrameBound::UnboundedPreceding,
6663 FrameBound::CurrentRow,
6664 ))
6665 } else {
6666 Ok((
6667 FrameKind::Rows,
6668 FrameBound::UnboundedPreceding,
6669 FrameBound::UnboundedFollowing,
6670 ))
6671 }
6672 }
6673 Some(fr) => {
6674 let end = fr.end.clone().unwrap_or(FrameBound::CurrentRow);
6675 if matches!(fr.start, FrameBound::UnboundedFollowing)
6677 || matches!(end, FrameBound::UnboundedPreceding)
6678 {
6679 return Err(EngineError::Unsupported(alloc::format!(
6680 "invalid frame: start={:?} end={:?}",
6681 fr.start,
6682 end
6683 )));
6684 }
6685 if fr.kind == FrameKind::Range
6690 && (matches!(
6691 fr.start,
6692 FrameBound::OffsetPreceding(_) | FrameBound::OffsetFollowing(_)
6693 ) || matches!(
6694 end,
6695 FrameBound::OffsetPreceding(_) | FrameBound::OffsetFollowing(_)
6696 ))
6697 {
6698 return Err(EngineError::Unsupported(
6699 "RANGE with explicit offset bounds is not supported (v4.20: only UNBOUNDED / CURRENT ROW for RANGE)".into(),
6700 ));
6701 }
6702 Ok((fr.kind, fr.start.clone(), end))
6703 }
6704 }
6705}
6706
6707#[allow(clippy::type_complexity)]
6711fn frame_bounds_for_row(
6712 eff: &(FrameKind, FrameBound, FrameBound),
6713 i: usize,
6714 slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)],
6715) -> (usize, usize) {
6716 let (kind, start, end) = eff;
6717 let n = slice.len();
6718 let last = n.saturating_sub(1);
6719 let (mut lo, mut hi) = match kind {
6720 FrameKind::Rows => {
6721 let lo = match start {
6722 FrameBound::UnboundedPreceding => 0,
6723 FrameBound::OffsetPreceding(k) => {
6724 let k = usize::try_from(*k).unwrap_or(usize::MAX);
6725 i.saturating_sub(k)
6726 }
6727 FrameBound::CurrentRow => i,
6728 FrameBound::OffsetFollowing(k) => {
6729 let k = usize::try_from(*k).unwrap_or(usize::MAX);
6730 i.saturating_add(k).min(last)
6731 }
6732 FrameBound::UnboundedFollowing => last,
6733 };
6734 let hi = match end {
6735 FrameBound::UnboundedPreceding => 0,
6736 FrameBound::OffsetPreceding(k) => {
6737 let k = usize::try_from(*k).unwrap_or(usize::MAX);
6738 i.saturating_sub(k)
6739 }
6740 FrameBound::CurrentRow => i,
6741 FrameBound::OffsetFollowing(k) => {
6742 let k = usize::try_from(*k).unwrap_or(usize::MAX);
6743 i.saturating_add(k).min(last)
6744 }
6745 FrameBound::UnboundedFollowing => last,
6746 };
6747 (lo, hi)
6748 }
6749 FrameKind::Range => {
6750 let lo = match start {
6756 FrameBound::UnboundedPreceding => 0,
6757 FrameBound::CurrentRow => peer_group_start(slice, i),
6758 FrameBound::UnboundedFollowing => last,
6759 _ => unreachable!("offset bounds rejected for RANGE"),
6760 };
6761 let hi = match end {
6762 FrameBound::UnboundedPreceding => 0,
6763 FrameBound::CurrentRow => peer_group_end(slice, i),
6764 FrameBound::UnboundedFollowing => last,
6765 _ => unreachable!("offset bounds rejected for RANGE"),
6766 };
6767 (lo, hi)
6768 }
6769 };
6770 if hi >= n {
6771 hi = last;
6772 }
6773 if lo >= n {
6774 lo = last;
6775 }
6776 (lo, hi)
6777}
6778
6779#[allow(clippy::type_complexity)]
6783fn peer_group_start(slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)], i: usize) -> usize {
6784 let key = &slice[i].1;
6785 let mut j = i;
6786 while j > 0 && order_key_cmp(&slice[j - 1].1, key) == core::cmp::Ordering::Equal {
6787 j -= 1;
6788 }
6789 j
6790}
6791
6792#[allow(clippy::type_complexity)]
6795fn peer_group_end(slice: &[(Vec<Value>, Vec<(Value, bool)>, usize)], i: usize) -> usize {
6796 let key = &slice[i].1;
6797 let mut j = i;
6798 while j + 1 < slice.len() && order_key_cmp(&slice[j + 1].1, key) == core::cmp::Ordering::Equal {
6799 j += 1;
6800 }
6801 j
6802}
6803
6804fn value_to_f64(v: &Value) -> Option<f64> {
6805 match v {
6806 Value::SmallInt(n) => Some(f64::from(*n)),
6807 Value::Int(n) => Some(f64::from(*n)),
6808 #[allow(clippy::cast_precision_loss)]
6809 Value::BigInt(n) => Some(*n as f64),
6810 Value::Float(x) => Some(*x),
6811 _ => None,
6812 }
6813}
6814
6815fn expr_tree_has_subquery(stmt: &SelectStatement) -> bool {
6819 let mut any = false;
6820 for item in &stmt.items {
6821 if let SelectItem::Expr { expr, .. } = item {
6822 any = any || expr_has_subquery(expr);
6823 }
6824 }
6825 if let Some(w) = &stmt.where_ {
6826 any = any || expr_has_subquery(w);
6827 }
6828 if let Some(h) = &stmt.having {
6829 any = any || expr_has_subquery(h);
6830 }
6831 for o in &stmt.order_by {
6832 any = any || expr_has_subquery(&o.expr);
6833 }
6834 for (_, peer) in &stmt.unions {
6835 any = any || expr_tree_has_subquery(peer);
6836 }
6837 any
6838}
6839
6840fn expr_has_subquery(e: &Expr) -> bool {
6841 match e {
6842 Expr::ScalarSubquery(_) | Expr::Exists { .. } | Expr::InSubquery { .. } => true,
6843 Expr::Binary { lhs, rhs, .. } => expr_has_subquery(lhs) || expr_has_subquery(rhs),
6844 Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
6845 expr_has_subquery(expr)
6846 }
6847 Expr::FunctionCall { args, .. } => args.iter().any(expr_has_subquery),
6848 Expr::Like { expr, pattern, .. } => expr_has_subquery(expr) || expr_has_subquery(pattern),
6849 Expr::Extract { source, .. } => expr_has_subquery(source),
6850 Expr::WindowFunction {
6851 args,
6852 partition_by,
6853 order_by,
6854 ..
6855 } => {
6856 args.iter().any(expr_has_subquery)
6857 || partition_by.iter().any(expr_has_subquery)
6858 || order_by.iter().any(|(e, _)| expr_has_subquery(e))
6859 }
6860 Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => false,
6861 }
6862}
6863
6864fn value_to_literal_expr(v: Value) -> Result<Expr, EngineError> {
6871 let lit = match v {
6872 Value::Null => Literal::Null,
6873 Value::SmallInt(n) => Literal::Integer(i64::from(n)),
6874 Value::Int(n) => Literal::Integer(i64::from(n)),
6875 Value::BigInt(n) => Literal::Integer(n),
6876 Value::Float(x) => Literal::Float(x),
6877 Value::Text(s) | Value::Json(s) => Literal::String(s),
6878 Value::Bool(b) => Literal::Bool(b),
6879 other => {
6880 return Err(EngineError::Unsupported(alloc::format!(
6881 "subquery result type {:?} not yet materialisable; cast to text or integer in the inner SELECT",
6882 other.data_type()
6883 )));
6884 }
6885 };
6886 Ok(Expr::Literal(lit))
6887}
6888
6889fn substitute_placeholders(stmt: &mut Statement, params: &[Value]) -> Result<(), EngineError> {
6900 match stmt {
6901 Statement::Select(s) => substitute_select(s, params)?,
6902 Statement::Insert(ins) => {
6903 for row in &mut ins.rows {
6904 for e in row {
6905 substitute_expr(e, params)?;
6906 }
6907 }
6908 }
6909 Statement::Update(u) => {
6910 for (_, e) in &mut u.assignments {
6911 substitute_expr(e, params)?;
6912 }
6913 if let Some(w) = &mut u.where_ {
6914 substitute_expr(w, params)?;
6915 }
6916 }
6917 Statement::Delete(d) => {
6918 if let Some(w) = &mut d.where_ {
6919 substitute_expr(w, params)?;
6920 }
6921 }
6922 Statement::Explain(e) => substitute_select(&mut e.inner, params)?,
6923 _ => {}
6926 }
6927 Ok(())
6928}
6929
6930fn substitute_select(
6931 s: &mut SelectStatement,
6932 params: &[Value],
6933) -> Result<(), EngineError> {
6934 for item in &mut s.items {
6935 if let SelectItem::Expr { expr, .. } = item {
6936 substitute_expr(expr, params)?;
6937 }
6938 }
6939 if let Some(w) = &mut s.where_ {
6940 substitute_expr(w, params)?;
6941 }
6942 if let Some(gs) = &mut s.group_by {
6943 for g in gs {
6944 substitute_expr(g, params)?;
6945 }
6946 }
6947 if let Some(h) = &mut s.having {
6948 substitute_expr(h, params)?;
6949 }
6950 for o in &mut s.order_by {
6951 substitute_expr(&mut o.expr, params)?;
6952 }
6953 for (_, peer) in &mut s.unions {
6954 substitute_select(peer, params)?;
6955 }
6956 if let Some(le) = s.limit {
6961 s.limit = Some(resolve_limit_placeholder(le, params)?);
6962 }
6963 if let Some(le) = s.offset {
6964 s.offset = Some(resolve_limit_placeholder(le, params)?);
6965 }
6966 Ok(())
6967}
6968
6969fn resolve_limit_placeholder(
6970 le: spg_sql::ast::LimitExpr,
6971 params: &[Value],
6972) -> Result<spg_sql::ast::LimitExpr, EngineError> {
6973 use spg_sql::ast::LimitExpr;
6974 match le {
6975 LimitExpr::Literal(_) => Ok(le),
6976 LimitExpr::Placeholder(n) => {
6977 let idx = usize::from(n).saturating_sub(1);
6978 let v = params.get(idx).ok_or_else(|| {
6979 EngineError::Eval(EvalError::PlaceholderOutOfRange {
6980 n,
6981 bound: u16::try_from(params.len()).unwrap_or(u16::MAX),
6982 })
6983 })?;
6984 let int = match v {
6985 Value::SmallInt(x) => Some(i64::from(*x)),
6986 Value::Int(x) => Some(i64::from(*x)),
6987 Value::BigInt(x) => Some(*x),
6988 _ => None,
6989 }
6990 .ok_or_else(|| {
6991 EngineError::Unsupported(alloc::format!(
6992 "LIMIT/OFFSET ${n} bound to non-integer {v:?}"
6993 ))
6994 })?;
6995 if int < 0 {
6996 return Err(EngineError::Unsupported(alloc::format!(
6997 "LIMIT/OFFSET ${n} bound to negative value {int}"
6998 )));
6999 }
7000 let bounded = u32::try_from(int).map_err(|_| {
7001 EngineError::Unsupported(alloc::format!(
7002 "LIMIT/OFFSET ${n} value {int} exceeds u32 range"
7003 ))
7004 })?;
7005 Ok(LimitExpr::Literal(bounded))
7006 }
7007 }
7008}
7009
7010fn substitute_expr(e: &mut Expr, params: &[Value]) -> Result<(), EngineError> {
7011 if let Expr::Placeholder(n) = e {
7012 let idx = usize::from(*n).saturating_sub(1);
7013 let v = params.get(idx).ok_or_else(|| {
7014 EngineError::Eval(EvalError::PlaceholderOutOfRange {
7015 n: *n,
7016 bound: u16::try_from(params.len()).unwrap_or(u16::MAX),
7017 })
7018 })?;
7019 *e = Expr::Literal(value_to_literal(v.clone()));
7020 return Ok(());
7021 }
7022 match e {
7023 Expr::Binary { lhs, rhs, .. } => {
7024 substitute_expr(lhs, params)?;
7025 substitute_expr(rhs, params)?;
7026 }
7027 Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7028 substitute_expr(expr, params)?;
7029 }
7030 Expr::FunctionCall { args, .. } => {
7031 for a in args {
7032 substitute_expr(a, params)?;
7033 }
7034 }
7035 Expr::Like { expr, pattern, .. } => {
7036 substitute_expr(expr, params)?;
7037 substitute_expr(pattern, params)?;
7038 }
7039 Expr::Extract { source, .. } => substitute_expr(source, params)?,
7040 Expr::ScalarSubquery(s) => substitute_select(s, params)?,
7041 Expr::Exists { subquery, .. } => substitute_select(subquery, params)?,
7042 Expr::InSubquery { expr, subquery, .. } => {
7043 substitute_expr(expr, params)?;
7044 substitute_select(subquery, params)?;
7045 }
7046 Expr::WindowFunction {
7047 args,
7048 partition_by,
7049 order_by,
7050 ..
7051 } => {
7052 for a in args {
7053 substitute_expr(a, params)?;
7054 }
7055 for p in partition_by {
7056 substitute_expr(p, params)?;
7057 }
7058 for (e, _) in order_by {
7059 substitute_expr(e, params)?;
7060 }
7061 }
7062 Expr::Literal(_) | Expr::Column(_) => {}
7063 Expr::Placeholder(_) => unreachable!("Placeholder handled at top of fn"),
7065 }
7066 Ok(())
7067}
7068
7069fn sort_values_for_histogram(a: &Value, b: &Value) -> core::cmp::Ordering {
7087 use core::cmp::Ordering;
7088 match (a, b) {
7089 (Value::SmallInt(a), Value::SmallInt(b)) => a.cmp(b),
7090 (Value::Int(a), Value::Int(b)) => a.cmp(b),
7091 (Value::BigInt(a), Value::BigInt(b)) => a.cmp(b),
7092 (Value::SmallInt(a), Value::Int(b)) => i32::from(*a).cmp(b),
7093 (Value::Int(a), Value::SmallInt(b)) => a.cmp(&i32::from(*b)),
7094 (Value::Int(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
7095 (Value::BigInt(a), Value::Int(b)) => a.cmp(&i64::from(*b)),
7096 (Value::SmallInt(a), Value::BigInt(b)) => i64::from(*a).cmp(b),
7097 (Value::BigInt(a), Value::SmallInt(b)) => a.cmp(&i64::from(*b)),
7098 (Value::Float(a), Value::Float(b)) => a.partial_cmp(b).unwrap_or(Ordering::Equal),
7099 (Value::Text(a), Value::Text(b)) | (Value::Json(a), Value::Json(b)) => a.cmp(b),
7100 (Value::Bool(a), Value::Bool(b)) => a.cmp(b),
7101 (Value::Date(a), Value::Date(b)) => a.cmp(b),
7102 (Value::Timestamp(a), Value::Timestamp(b)) => a.cmp(b),
7103 (Value::SmallInt(n), Value::Float(x)) => {
7105 (f64::from(*n)).partial_cmp(x).unwrap_or(Ordering::Equal)
7106 }
7107 (Value::Float(x), Value::SmallInt(n)) => {
7108 x.partial_cmp(&f64::from(*n)).unwrap_or(Ordering::Equal)
7109 }
7110 (Value::Int(n), Value::Float(x)) => {
7111 (f64::from(*n)).partial_cmp(x).unwrap_or(Ordering::Equal)
7112 }
7113 (Value::Float(x), Value::Int(n)) => {
7114 x.partial_cmp(&f64::from(*n)).unwrap_or(Ordering::Equal)
7115 }
7116 (Value::BigInt(n), Value::Float(x)) => {
7117 #[allow(clippy::cast_precision_loss)]
7118 let nf = *n as f64;
7119 nf.partial_cmp(x).unwrap_or(Ordering::Equal)
7120 }
7121 (Value::Float(x), Value::BigInt(n)) => {
7122 #[allow(clippy::cast_precision_loss)]
7123 let nf = *n as f64;
7124 x.partial_cmp(&nf).unwrap_or(Ordering::Equal)
7125 }
7126 _ => canonical_value_repr(a).cmp(&canonical_value_repr(b)),
7129 }
7130}
7131
7132fn render_histogram_bounds(bounds: &[alloc::string::String]) -> alloc::string::String {
7139 let mut out = alloc::string::String::with_capacity(bounds.len() * 8 + 2);
7140 out.push('[');
7141 for (i, b) in bounds.iter().enumerate() {
7142 if i > 0 {
7143 out.push_str(", ");
7144 }
7145 let needs_quote = b.contains([',', '[', ']', '"']) || b.is_empty();
7146 if needs_quote {
7147 out.push('"');
7148 for ch in b.chars() {
7149 if ch == '"' || ch == '\\' {
7150 out.push('\\');
7151 }
7152 out.push(ch);
7153 }
7154 out.push('"');
7155 } else {
7156 out.push_str(b);
7157 }
7158 }
7159 out.push(']');
7160 out
7161}
7162
7163pub(crate) fn canonical_value_repr(v: &Value) -> alloc::string::String {
7173 match v {
7174 Value::Null => "NULL".to_string(),
7175 Value::SmallInt(n) => alloc::format!("{n}"),
7176 Value::Int(n) => alloc::format!("{n}"),
7177 Value::BigInt(n) => alloc::format!("{n}"),
7178 Value::Float(x) => alloc::format!("{x:?}"),
7179 Value::Text(s) | Value::Json(s) => s.clone(),
7180 Value::Bool(b) => if *b { "t" } else { "f" }.to_string(),
7181 Value::Date(d) => eval::format_date(*d),
7182 Value::Timestamp(t) => eval::format_timestamp(*t),
7183 Value::Interval { months, micros } => eval::format_interval(*months, *micros),
7184 Value::Numeric { scaled, scale } => eval::format_numeric(*scaled, *scale),
7185 Value::Vector(_) | Value::Sq8Vector(_) | Value::HalfVector(_) => {
7186 alloc::format!("{v:?}")
7190 }
7191 _ => alloc::format!("{v:?}"),
7195 }
7196}
7197
7198const fn is_internal_table_name(_name: &str) -> bool {
7205 false
7206}
7207
7208fn value_to_literal(v: Value) -> Literal {
7209 match v {
7210 Value::Null => Literal::Null,
7211 Value::SmallInt(n) => Literal::Integer(i64::from(n)),
7212 Value::Int(n) => Literal::Integer(i64::from(n)),
7213 Value::BigInt(n) => Literal::Integer(n),
7214 Value::Float(x) => Literal::Float(x),
7215 Value::Text(s) | Value::Json(s) => Literal::String(s),
7216 Value::Bool(b) => Literal::Bool(b),
7217 Value::Vector(v) => Literal::Vector(v),
7218 Value::Numeric { scaled, scale } => {
7219 Literal::String(eval::format_numeric(scaled, scale))
7220 }
7221 Value::Date(d) => Literal::String(eval::format_date(d)),
7222 Value::Timestamp(t) => Literal::String(eval::format_timestamp(t)),
7223 Value::Interval { months, micros } => Literal::Interval {
7224 months,
7225 micros,
7226 text: eval::format_interval(months, micros),
7227 },
7228 Value::Sq8Vector(q) => Literal::Vector(spg_storage::quantize::dequantize(&q)),
7231 Value::HalfVector(h) => Literal::Vector(h.to_f32_vec()),
7232 v => Literal::String(alloc::format!("{v:?}")),
7236 }
7237}
7238
7239fn rewrite_clock_calls(stmt: &mut Statement, now_micros: Option<i64>) {
7240 let Some(now) = now_micros else {
7241 return;
7242 };
7243 match stmt {
7244 Statement::Select(s) => rewrite_select_clock(s, now),
7245 Statement::Insert(ins) => {
7246 for row in &mut ins.rows {
7247 for e in row {
7248 rewrite_expr_clock(e, now);
7249 }
7250 }
7251 }
7252 _ => {}
7253 }
7254}
7255
7256fn rewrite_select_clock(s: &mut SelectStatement, now: i64) {
7257 for item in &mut s.items {
7258 if let SelectItem::Expr { expr, .. } = item {
7259 rewrite_expr_clock(expr, now);
7260 }
7261 }
7262 if let Some(w) = &mut s.where_ {
7263 rewrite_expr_clock(w, now);
7264 }
7265 if let Some(gs) = &mut s.group_by {
7266 for g in gs {
7267 rewrite_expr_clock(g, now);
7268 }
7269 }
7270 if let Some(h) = &mut s.having {
7271 rewrite_expr_clock(h, now);
7272 }
7273 for o in &mut s.order_by {
7274 rewrite_expr_clock(&mut o.expr, now);
7275 }
7276 for (_, peer) in &mut s.unions {
7277 rewrite_select_clock(peer, now);
7278 }
7279}
7280
7281fn rewrite_expr_clock(e: &mut Expr, now: i64) {
7289 if let Some(replacement) = clock_replacement_for(e, now) {
7293 *e = replacement;
7294 return;
7295 }
7296 match e {
7297 Expr::Binary { lhs, rhs, .. } => {
7298 rewrite_expr_clock(lhs, now);
7299 rewrite_expr_clock(rhs, now);
7300 }
7301 Expr::Unary { expr, .. } | Expr::Cast { expr, .. } | Expr::IsNull { expr, .. } => {
7302 rewrite_expr_clock(expr, now);
7303 }
7304 Expr::FunctionCall { args, .. } => {
7305 for a in args {
7306 rewrite_expr_clock(a, now);
7307 }
7308 }
7309 Expr::Like { expr, pattern, .. } => {
7310 rewrite_expr_clock(expr, now);
7311 rewrite_expr_clock(pattern, now);
7312 }
7313 Expr::Extract { source, .. } => rewrite_expr_clock(source, now),
7314 Expr::ScalarSubquery(s) => rewrite_select_clock(s, now),
7318 Expr::Exists { subquery, .. } => rewrite_select_clock(subquery, now),
7319 Expr::InSubquery { expr, subquery, .. } => {
7320 rewrite_expr_clock(expr, now);
7321 rewrite_select_clock(subquery, now);
7322 }
7323 Expr::WindowFunction {
7326 args,
7327 partition_by,
7328 order_by,
7329 ..
7330 } => {
7331 for a in args {
7332 rewrite_expr_clock(a, now);
7333 }
7334 for p in partition_by {
7335 rewrite_expr_clock(p, now);
7336 }
7337 for (e, _) in order_by {
7338 rewrite_expr_clock(e, now);
7339 }
7340 }
7341 Expr::Literal(_) | Expr::Placeholder(_) | Expr::Column(_) => {}
7342 }
7343}
7344
7345fn clock_replacement_for(e: &Expr, now: i64) -> Option<Expr> {
7352 let (kind, name) = match e {
7353 Expr::FunctionCall { name, args } if args.is_empty() => (ClockSite::Fn, name.as_str()),
7354 Expr::Column(c) if c.qualifier.is_none() => (ClockSite::BareIdent, c.name.as_str()),
7355 _ => return None,
7356 };
7357 let matched = match name.len() {
7360 3 if kind == ClockSite::Fn && name.eq_ignore_ascii_case("now") => Some(true),
7361 12 if name.eq_ignore_ascii_case("current_date") => Some(false),
7362 17 if name.eq_ignore_ascii_case("current_timestamp") => Some(true),
7363 _ => None,
7364 };
7365 let is_timestamp = matched?;
7366 let payload = if is_timestamp {
7367 now
7368 } else {
7369 now.div_euclid(86_400_000_000)
7370 };
7371 let target = if is_timestamp {
7372 spg_sql::ast::CastTarget::Timestamp
7373 } else {
7374 spg_sql::ast::CastTarget::Date
7375 };
7376 Some(Expr::Cast {
7377 expr: alloc::boxed::Box::new(Expr::Literal(spg_sql::ast::Literal::Integer(payload))),
7378 target,
7379 })
7380}
7381
7382#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7383enum ClockSite {
7384 Fn,
7385 BareIdent,
7386}
7387
7388fn expand_group_by_all(s: &mut SelectStatement) {
7399 if !s.group_by_all {
7400 for (_, peer) in &mut s.unions {
7401 expand_group_by_all(peer);
7402 }
7403 return;
7404 }
7405 let mut groups: Vec<Expr> = Vec::new();
7406 for item in &s.items {
7407 if let SelectItem::Expr { expr, .. } = item
7408 && !aggregate::contains_aggregate(expr)
7409 {
7410 groups.push(expr.clone());
7411 }
7412 }
7413 s.group_by = Some(groups);
7414 s.group_by_all = false;
7415 for (_, peer) in &mut s.unions {
7416 expand_group_by_all(peer);
7417 }
7418}
7419
7420fn resolve_order_by_position(s: &mut SelectStatement) {
7421 for order in &mut s.order_by {
7426 match &order.expr {
7427 Expr::Literal(Literal::Integer(n)) if *n >= 1 => {
7428 if let Ok(idx_one_based) = usize::try_from(*n) {
7429 let idx = idx_one_based - 1;
7430 if idx < s.items.len()
7431 && let SelectItem::Expr { expr, .. } = &s.items[idx]
7432 {
7433 order.expr = expr.clone();
7434 }
7435 }
7436 }
7437 Expr::Column(c) if c.qualifier.is_none() => {
7438 for item in &s.items {
7440 if let SelectItem::Expr {
7441 expr,
7442 alias: Some(a),
7443 } = item
7444 && a == &c.name
7445 {
7446 order.expr = expr.clone();
7447 break;
7448 }
7449 }
7450 }
7451 _ => {}
7452 }
7453 }
7454 for (_, peer) in &mut s.unions {
7455 resolve_order_by_position(peer);
7456 }
7457}
7458
7459fn partial_sort_tagged(
7472 tagged: &mut Vec<(Vec<f64>, Row)>,
7473 keep: Option<usize>,
7474 descs: &[bool],
7475) {
7476 let cmp = |a: &(Vec<f64>, Row), b: &(Vec<f64>, Row)| cmp_multi_key(&a.0, &b.0, descs);
7477 match keep {
7478 Some(k) if k < tagged.len() && k > 0 => {
7479 let pivot = k - 1;
7480 tagged.select_nth_unstable_by(pivot, cmp);
7481 tagged[..k].sort_by(cmp);
7482 tagged.truncate(k);
7483 }
7484 _ => {
7485 tagged.sort_by(cmp);
7486 }
7487 }
7488}
7489
7490fn sort_by_keys(tagged: &mut [(Vec<f64>, Row)], descs: &[bool]) {
7491 tagged.sort_by(|a, b| cmp_multi_key(&a.0, &b.0, descs));
7492}
7493
7494fn cmp_multi_key(a: &[f64], b: &[f64], descs: &[bool]) -> core::cmp::Ordering {
7498 use core::cmp::Ordering;
7499 for (i, (ka, kb)) in a.iter().zip(b.iter()).enumerate() {
7500 let ord = ka.partial_cmp(kb).unwrap_or(Ordering::Equal);
7501 let ord = if descs.get(i).copied().unwrap_or(false) {
7502 ord.reverse()
7503 } else {
7504 ord
7505 };
7506 if ord != Ordering::Equal {
7507 return ord;
7508 }
7509 }
7510 Ordering::Equal
7511}
7512
7513fn build_order_keys(
7516 order_by: &[OrderBy],
7517 row: &Row,
7518 ctx: &EvalContext,
7519) -> Result<Vec<f64>, EngineError> {
7520 let mut keys = Vec::with_capacity(order_by.len());
7521 for o in order_by {
7522 let v = eval::eval_expr(&o.expr, row, ctx)?;
7523 keys.push(value_to_order_key(&v)?);
7524 }
7525 Ok(keys)
7526}
7527
7528fn apply_offset_and_limit(rows: &mut Vec<Row>, offset: Option<u32>, limit: Option<u32>) {
7532 if let Some(off) = offset {
7533 let off = off as usize;
7534 if off >= rows.len() {
7535 rows.clear();
7536 } else {
7537 rows.drain(..off);
7538 }
7539 }
7540 if let Some(n) = limit {
7541 rows.truncate(n as usize);
7542 }
7543}
7544
7545fn resolve_foreign_key(
7559 local_table_name: &str,
7560 local_cols: &[ColumnSchema],
7561 fk: spg_sql::ast::ForeignKeyConstraint,
7562 catalog: &Catalog,
7563) -> Result<spg_storage::ForeignKeyConstraint, EngineError> {
7564 let mut local_columns = Vec::with_capacity(fk.columns.len());
7566 for name in &fk.columns {
7567 let pos = local_cols
7568 .iter()
7569 .position(|c| c.name == *name)
7570 .ok_or_else(|| {
7571 EngineError::Unsupported(alloc::format!(
7572 "FOREIGN KEY references unknown local column {name:?}"
7573 ))
7574 })?;
7575 local_columns.push(pos);
7576 }
7577 let is_self_ref = fk.parent_table == local_table_name;
7581 let (parent_cols_for_lookup, parent_table_str): (&[ColumnSchema], &str) = if is_self_ref {
7582 (local_cols, local_table_name)
7583 } else {
7584 let parent_table = catalog.get(&fk.parent_table).ok_or_else(|| {
7585 EngineError::Storage(StorageError::TableNotFound {
7586 name: fk.parent_table.clone(),
7587 })
7588 })?;
7589 (parent_table.schema().columns.as_slice(), fk.parent_table.as_str())
7590 };
7591 let parent_columns: Vec<usize> = if fk.parent_columns.is_empty() {
7596 if fk.columns.len() != 1 {
7597 return Err(EngineError::Unsupported(
7598 "composite FOREIGN KEY without explicit parent column list is not supported \
7599 — list the parent columns explicitly"
7600 .into(),
7601 ));
7602 }
7603 let pos = pick_pk_index_column(catalog, parent_table_str, is_self_ref, local_cols)
7605 .ok_or_else(|| {
7606 EngineError::Unsupported(alloc::format!(
7607 "parent table {parent_table_str:?} has no PRIMARY-key / UNIQUE BTree index \
7608 to default the FOREIGN KEY against"
7609 ))
7610 })?;
7611 alloc::vec![pos]
7612 } else {
7613 let mut out = Vec::with_capacity(fk.parent_columns.len());
7614 for name in &fk.parent_columns {
7615 let pos = parent_cols_for_lookup
7616 .iter()
7617 .position(|c| c.name == *name)
7618 .ok_or_else(|| {
7619 EngineError::Unsupported(alloc::format!(
7620 "FOREIGN KEY references unknown parent column \
7621 {name:?} on table {parent_table_str:?}"
7622 ))
7623 })?;
7624 out.push(pos);
7625 }
7626 out
7627 };
7628 if parent_columns.len() != local_columns.len() {
7629 return Err(EngineError::Unsupported(alloc::format!(
7630 "FOREIGN KEY arity mismatch: {} local columns vs {} parent columns",
7631 local_columns.len(),
7632 parent_columns.len()
7633 )));
7634 }
7635 if !is_self_ref {
7645 let parent_table = catalog
7646 .get(&fk.parent_table)
7647 .expect("checked above");
7648 let primary_parent_col = parent_columns[0];
7649 let has_btree = parent_table.schema().columns.get(primary_parent_col).is_some()
7650 && parent_table
7651 .indices()
7652 .iter()
7653 .any(|idx| {
7654 matches!(idx.kind, spg_storage::IndexKind::BTree(_))
7655 && idx.column_position == primary_parent_col
7656 && idx.partial_predicate.is_none()
7657 });
7658 if !has_btree {
7659 return Err(EngineError::Unsupported(alloc::format!(
7660 "FOREIGN KEY parent column on {:?} is not covered by an unconditional BTree \
7661 index — create one with `CREATE INDEX ... ON {} ({})` first",
7662 parent_table_str,
7663 parent_table_str,
7664 parent_table.schema().columns[primary_parent_col].name,
7665 )));
7666 }
7667 }
7668 let on_delete = fk_action_sql_to_storage(fk.on_delete);
7669 let on_update = fk_action_sql_to_storage(fk.on_update);
7670 Ok(spg_storage::ForeignKeyConstraint {
7671 name: fk.name,
7672 local_columns,
7673 parent_table: fk.parent_table,
7674 parent_columns,
7675 on_delete,
7676 on_update,
7677 })
7678}
7679
7680fn pick_pk_index_column(
7686 catalog: &Catalog,
7687 parent_name: &str,
7688 is_self_ref: bool,
7689 local_cols: &[ColumnSchema],
7690) -> Option<usize> {
7691 if is_self_ref {
7692 let _ = local_cols;
7696 return Some(0);
7697 }
7698 let parent = catalog.get(parent_name)?;
7699 parent.indices().iter().find_map(|idx| {
7700 if matches!(idx.kind, spg_storage::IndexKind::BTree(_))
7701 && idx.partial_predicate.is_none()
7702 && idx.included_columns.is_empty()
7703 && idx.expression.is_none()
7704 {
7705 Some(idx.column_position)
7706 } else {
7707 None
7708 }
7709 })
7710}
7711
7712fn resolve_on_conflict_columns(
7719 catalog: &Catalog,
7720 table_name: &str,
7721 target: &[String],
7722) -> Result<Vec<usize>, EngineError> {
7723 let table = catalog.get(table_name).ok_or_else(|| {
7724 EngineError::Storage(StorageError::TableNotFound {
7725 name: table_name.into(),
7726 })
7727 })?;
7728 if target.is_empty() {
7729 let pos = table
7730 .indices()
7731 .iter()
7732 .find_map(|idx| {
7733 if matches!(idx.kind, spg_storage::IndexKind::BTree(_))
7734 && idx.partial_predicate.is_none()
7735 && idx.included_columns.is_empty()
7736 && idx.expression.is_none()
7737 {
7738 Some(idx.column_position)
7739 } else {
7740 None
7741 }
7742 })
7743 .ok_or_else(|| {
7744 EngineError::Unsupported(alloc::format!(
7745 "ON CONFLICT without target requires a UNIQUE BTree index on {table_name:?}"
7746 ))
7747 })?;
7748 return Ok(alloc::vec![pos]);
7749 }
7750 let mut out = Vec::with_capacity(target.len());
7751 for name in target {
7752 let pos = table
7753 .schema()
7754 .columns
7755 .iter()
7756 .position(|c| c.name == *name)
7757 .ok_or_else(|| {
7758 EngineError::Unsupported(alloc::format!(
7759 "ON CONFLICT target column {name:?} not found on {table_name:?}"
7760 ))
7761 })?;
7762 out.push(pos);
7763 }
7764 Ok(out)
7765}
7766
7767fn on_conflict_key_exists(
7770 catalog: &Catalog,
7771 table_name: &str,
7772 column_pos: usize,
7773 key: &Value,
7774) -> bool {
7775 let Some(table) = catalog.get(table_name) else {
7776 return false;
7777 };
7778 let Some(idx_key) = spg_storage::IndexKey::from_value(key) else {
7779 return false;
7780 };
7781 table.indices().iter().any(|idx| {
7782 matches!(idx.kind, spg_storage::IndexKind::BTree(_))
7783 && idx.column_position == column_pos
7784 && idx.partial_predicate.is_none()
7785 && !idx.lookup_eq(&idx_key).is_empty()
7786 })
7787}
7788
7789fn lookup_row_position_by_keys(
7795 catalog: &Catalog,
7796 table_name: &str,
7797 column_positions: &[usize],
7798 key: &[&Value],
7799) -> Option<usize> {
7800 let table = catalog.get(table_name)?;
7801 table.rows().iter().position(|r| {
7802 column_positions
7803 .iter()
7804 .enumerate()
7805 .all(|(i, &pos)| r.values.get(pos) == Some(key[i]))
7806 })
7807}
7808
7809fn on_conflict_keys_exist(
7814 catalog: &Catalog,
7815 table_name: &str,
7816 column_positions: &[usize],
7817 key: &[&Value],
7818) -> bool {
7819 if column_positions.len() == 1 {
7820 return on_conflict_key_exists(
7821 catalog,
7822 table_name,
7823 column_positions[0],
7824 key[0],
7825 );
7826 }
7827 let Some(table) = catalog.get(table_name) else {
7828 return false;
7829 };
7830 table.rows().iter().any(|r| {
7831 column_positions
7832 .iter()
7833 .enumerate()
7834 .all(|(i, &pos)| r.values.get(pos) == Some(key[i]))
7835 })
7836}
7837
7838fn apply_on_conflict_assignments(
7851 catalog: &Catalog,
7852 table_name: &str,
7853 target_pos: usize,
7854 incoming: &[Value],
7855 assignments: &[(String, Expr)],
7856 where_: Option<&Expr>,
7857) -> Result<Option<Vec<Value>>, EngineError> {
7858 let table = catalog.get(table_name).ok_or_else(|| {
7859 EngineError::Storage(StorageError::TableNotFound {
7860 name: table_name.into(),
7861 })
7862 })?;
7863 let schema_cols = table.schema().columns.clone();
7864 let existing = table
7865 .rows()
7866 .get(target_pos)
7867 .ok_or_else(|| {
7868 EngineError::Unsupported(alloc::format!(
7869 "ON CONFLICT DO UPDATE: row position {target_pos} out of bounds on {table_name:?}"
7870 ))
7871 })?
7872 .clone();
7873 let ctx = eval::EvalContext::new(&schema_cols, Some(table_name));
7874 if let Some(w) = where_ {
7876 let pred = w.clone();
7877 let pred = substitute_excluded_refs(pred, &schema_cols, incoming);
7878 let v = eval::eval_expr(&pred, &existing, &ctx)?;
7879 if !matches!(v, Value::Bool(true)) {
7880 return Ok(None);
7881 }
7882 }
7883 let mut new_values = existing.values.clone();
7884 for (col_name, expr) in assignments {
7885 let target_idx = schema_cols
7886 .iter()
7887 .position(|c| c.name == *col_name)
7888 .ok_or_else(|| {
7889 EngineError::Eval(EvalError::ColumnNotFound {
7890 name: col_name.clone(),
7891 })
7892 })?;
7893 let sub = substitute_excluded_refs(expr.clone(), &schema_cols, incoming);
7894 let v = eval::eval_expr(&sub, &existing, &ctx)?;
7895 new_values[target_idx] =
7896 coerce_value(v, schema_cols[target_idx].ty, col_name, target_idx)?;
7897 }
7898 Ok(Some(new_values))
7899}
7900
7901fn substitute_excluded_refs(
7906 expr: Expr,
7907 schema_cols: &[ColumnSchema],
7908 incoming: &[Value],
7909) -> Expr {
7910 use spg_sql::ast::ColumnName;
7911 match expr {
7912 Expr::Column(ColumnName { qualifier, name })
7913 if qualifier
7914 .as_deref()
7915 .is_some_and(|q| q.eq_ignore_ascii_case("excluded")) =>
7916 {
7917 let pos = schema_cols.iter().position(|c| c.name == name);
7918 match pos {
7919 Some(p) => {
7920 let v = incoming.get(p).cloned().unwrap_or(Value::Null);
7921 value_to_literal_expr(v).unwrap_or_else(|_| {
7922 Expr::Literal(spg_sql::ast::Literal::Null)
7923 })
7924 }
7925 None => Expr::Column(ColumnName { qualifier, name }),
7926 }
7927 }
7928 Expr::Binary { op, lhs, rhs } => Expr::Binary {
7929 op,
7930 lhs: Box::new(substitute_excluded_refs(*lhs, schema_cols, incoming)),
7931 rhs: Box::new(substitute_excluded_refs(*rhs, schema_cols, incoming)),
7932 },
7933 Expr::Unary { op, expr } => Expr::Unary {
7934 op,
7935 expr: Box::new(substitute_excluded_refs(*expr, schema_cols, incoming)),
7936 },
7937 Expr::FunctionCall { name, args } => Expr::FunctionCall {
7938 name,
7939 args: args
7940 .into_iter()
7941 .map(|a| substitute_excluded_refs(a, schema_cols, incoming))
7942 .collect(),
7943 },
7944 other => other,
7945 }
7946}
7947
7948fn enforce_uniqueness_inserts(
7971 catalog: &Catalog,
7972 child_table: &str,
7973 constraints: &[spg_storage::UniquenessConstraint],
7974 rows: &[Vec<Value>],
7975) -> Result<(), EngineError> {
7976 if constraints.is_empty() {
7977 return Ok(());
7978 }
7979 let table = catalog.get(child_table).ok_or_else(|| {
7980 EngineError::Storage(StorageError::TableNotFound {
7981 name: child_table.into(),
7982 })
7983 })?;
7984 for uc in constraints {
7985 for (batch_idx, row_values) in rows.iter().enumerate() {
7986 let key: Vec<&Value> = uc.columns.iter().map(|&i| &row_values[i]).collect();
7987 let has_null = key.iter().any(|v| matches!(v, Value::Null));
7988 if has_null {
7989 continue;
7990 }
7991 let collides_in_table = table.rows().iter().any(|prow| {
7993 uc.columns
7994 .iter()
7995 .enumerate()
7996 .all(|(i, &p)| prow.values.get(p) == Some(key[i]))
7997 });
7998 let collides_in_batch = rows[..batch_idx].iter().any(|earlier| {
8000 uc.columns
8001 .iter()
8002 .enumerate()
8003 .all(|(i, &p)| earlier.get(p) == Some(key[i]))
8004 });
8005 if collides_in_table || collides_in_batch {
8006 let kind = if uc.is_primary_key { "PRIMARY KEY" } else { "UNIQUE" };
8007 let col_names: Vec<String> = uc
8008 .columns
8009 .iter()
8010 .map(|&i| table.schema().columns[i].name.clone())
8011 .collect();
8012 return Err(EngineError::Unsupported(alloc::format!(
8013 "{kind} violation on {child_table:?} columns {col_names:?}: \
8014 row #{batch_idx} duplicates an existing key"
8015 )));
8016 }
8017 }
8018 }
8019 Ok(())
8020}
8021
8022fn predicate_truthy(v: &spg_storage::Value) -> bool {
8030 use spg_storage::Value as V;
8031 match v {
8032 V::Bool(b) => *b,
8033 V::Int(n) => *n != 0,
8034 V::BigInt(n) => *n != 0,
8035 V::SmallInt(n) => *n != 0,
8036 _ => false,
8037 }
8038}
8039
8040fn check_existing_unique_violation(
8045 idx: &spg_storage::Index,
8046 schema: &spg_storage::TableSchema,
8047 rows: &[spg_storage::Row],
8048) -> Result<(), EngineError> {
8049 let predicate_expr = match idx.partial_predicate.as_deref() {
8050 Some(s) => Some(spg_sql::parser::parse_expression(s).map_err(|e| {
8051 EngineError::Unsupported(alloc::format!(
8052 "stored partial predicate {s:?} failed to re-parse: {e:?}"
8053 ))
8054 })?),
8055 None => None,
8056 };
8057 let ctx = eval::EvalContext::new(&schema.columns, None);
8058 let key_positions = unique_key_positions(idx);
8059 let mut seen: alloc::vec::Vec<alloc::vec::Vec<spg_storage::Value>> = alloc::vec::Vec::new();
8060 for row in rows {
8061 if let Some(expr) = &predicate_expr {
8062 let v = eval::eval_expr(expr, row, &ctx).map_err(|e| {
8063 EngineError::Unsupported(alloc::format!(
8064 "evaluating UNIQUE INDEX predicate against existing row: {e:?}"
8065 ))
8066 })?;
8067 if !predicate_truthy(&v) {
8068 continue;
8069 }
8070 }
8071 let key: alloc::vec::Vec<spg_storage::Value> = key_positions
8072 .iter()
8073 .map(|&p| {
8074 row.values
8075 .get(p)
8076 .cloned()
8077 .unwrap_or(spg_storage::Value::Null)
8078 })
8079 .collect();
8080 if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
8081 continue;
8082 }
8083 if seen.iter().any(|other| *other == key) {
8084 return Err(EngineError::Unsupported(alloc::format!(
8085 "CREATE UNIQUE INDEX {:?}: existing rows already violate the constraint",
8086 idx.name
8087 )));
8088 }
8089 seen.push(key);
8090 }
8091 Ok(())
8092}
8093
8094fn unique_key_positions(idx: &spg_storage::Index) -> alloc::vec::Vec<usize> {
8098 let mut out = alloc::vec::Vec::with_capacity(1 + idx.extra_column_positions.len());
8099 out.push(idx.column_position);
8100 out.extend_from_slice(&idx.extra_column_positions);
8101 out
8102}
8103
8104fn enforce_unique_index_inserts(
8112 catalog: &Catalog,
8113 table_name: &str,
8114 rows: &[alloc::vec::Vec<spg_storage::Value>],
8115) -> Result<(), EngineError> {
8116 let table = catalog.get(table_name).ok_or_else(|| {
8117 EngineError::Storage(StorageError::TableNotFound {
8118 name: table_name.into(),
8119 })
8120 })?;
8121 let schema = table.schema();
8122 let ctx = eval::EvalContext::new(&schema.columns, None);
8123 for idx in table.indices() {
8124 if !idx.is_unique {
8125 continue;
8126 }
8127 let predicate_expr = match idx.partial_predicate.as_deref() {
8129 Some(s) => Some(spg_sql::parser::parse_expression(s).map_err(|e| {
8130 EngineError::Unsupported(alloc::format!(
8131 "UNIQUE INDEX {:?} predicate {s:?} failed to re-parse: {e:?}",
8132 idx.name
8133 ))
8134 })?),
8135 None => None,
8136 };
8137 let key_positions = unique_key_positions(idx);
8138 let key_of = |values: &[spg_storage::Value]| -> alloc::vec::Vec<spg_storage::Value> {
8139 key_positions
8140 .iter()
8141 .map(|&p| {
8142 values
8143 .get(p)
8144 .cloned()
8145 .unwrap_or(spg_storage::Value::Null)
8146 })
8147 .collect()
8148 };
8149 let participates = |values: &[spg_storage::Value]| -> Result<bool, EngineError> {
8153 let Some(expr) = &predicate_expr else {
8154 return Ok(true);
8155 };
8156 let tmp_row = spg_storage::Row {
8157 values: values.to_vec(),
8158 };
8159 let v = eval::eval_expr(expr, &tmp_row, &ctx).map_err(|e| {
8160 EngineError::Unsupported(alloc::format!(
8161 "UNIQUE INDEX {:?} predicate eval: {e:?}",
8162 idx.name
8163 ))
8164 })?;
8165 Ok(predicate_truthy(&v))
8166 };
8167 for (batch_idx, row_values) in rows.iter().enumerate() {
8168 if !participates(row_values)? {
8169 continue;
8170 }
8171 let key = key_of(row_values);
8172 if key.iter().any(|v| matches!(v, spg_storage::Value::Null)) {
8173 continue;
8174 }
8175 for prow in table.rows() {
8177 if !participates(&prow.values)? {
8178 continue;
8179 }
8180 if key_of(&prow.values) == key {
8181 return Err(EngineError::Unsupported(alloc::format!(
8182 "UNIQUE INDEX {:?} violation on {table_name:?}: \
8183 row #{batch_idx} duplicates an existing key",
8184 idx.name
8185 )));
8186 }
8187 }
8188 for earlier in &rows[..batch_idx] {
8190 if !participates(earlier)? {
8191 continue;
8192 }
8193 if key_of(earlier) == key {
8194 return Err(EngineError::Unsupported(alloc::format!(
8195 "UNIQUE INDEX {:?} violation on {table_name:?}: \
8196 row #{batch_idx} duplicates an earlier row in the same batch",
8197 idx.name
8198 )));
8199 }
8200 }
8201 }
8202 }
8203 Ok(())
8204}
8205
8206fn enforce_fk_inserts(
8207 catalog: &Catalog,
8208 child_table: &str,
8209 fks: &[spg_storage::ForeignKeyConstraint],
8210 rows: &[Vec<Value>],
8211) -> Result<(), EngineError> {
8212 for fk in fks {
8213 let parent_is_self = fk.parent_table == child_table;
8214 let parent = if parent_is_self {
8215 catalog.get(child_table).ok_or_else(|| {
8218 EngineError::Storage(StorageError::TableNotFound {
8219 name: child_table.into(),
8220 })
8221 })?
8222 } else {
8223 catalog.get(&fk.parent_table).ok_or_else(|| {
8224 EngineError::Storage(StorageError::TableNotFound {
8225 name: fk.parent_table.clone(),
8226 })
8227 })?
8228 };
8229 for (batch_idx, row_values) in rows.iter().enumerate() {
8230 if fk.local_columns.len() == 1 {
8234 let v = &row_values[fk.local_columns[0]];
8235 if matches!(v, Value::Null) {
8236 continue;
8237 }
8238 let parent_col = fk.parent_columns[0];
8239 let key = spg_storage::IndexKey::from_value(v).ok_or_else(|| {
8240 EngineError::Unsupported(alloc::format!(
8241 "FOREIGN KEY column value of type {:?} is not index-eligible",
8242 v.data_type()
8243 ))
8244 })?;
8245 let present_committed = parent.indices().iter().any(|idx| {
8246 matches!(idx.kind, spg_storage::IndexKind::BTree(_))
8247 && idx.column_position == parent_col
8248 && idx.partial_predicate.is_none()
8249 && !idx.lookup_eq(&key).is_empty()
8250 });
8251 let present_in_batch = parent_is_self
8255 && rows[..batch_idx].iter().any(|earlier| {
8256 earlier.get(parent_col) == Some(v)
8257 });
8258 if !(present_committed || present_in_batch) {
8259 return Err(EngineError::Unsupported(alloc::format!(
8260 "FOREIGN KEY violation: no parent row in {:?} where {} = {:?}",
8261 fk.parent_table,
8262 parent
8263 .schema()
8264 .columns
8265 .get(parent_col)
8266 .map_or("?", |c| c.name.as_str()),
8267 v,
8268 )));
8269 }
8270 } else {
8271 if fk.local_columns
8275 .iter()
8276 .all(|&i| matches!(row_values.get(i), Some(Value::Null)))
8277 {
8278 continue;
8279 }
8280 let local: Vec<&Value> = fk.local_columns.iter().map(|&i| &row_values[i]).collect();
8281 let parent_match_committed = parent.rows().iter().any(|prow| {
8282 fk.parent_columns
8283 .iter()
8284 .enumerate()
8285 .all(|(i, &pi)| prow.values.get(pi) == Some(local[i]))
8286 });
8287 let parent_match_in_batch = parent_is_self
8288 && rows[..batch_idx].iter().any(|earlier| {
8289 fk.parent_columns
8290 .iter()
8291 .enumerate()
8292 .all(|(i, &pi)| earlier.get(pi) == Some(local[i]))
8293 });
8294 if !(parent_match_committed || parent_match_in_batch) {
8295 return Err(EngineError::Unsupported(alloc::format!(
8296 "FOREIGN KEY violation: no parent row in {:?} matching composite key",
8297 fk.parent_table,
8298 )));
8299 }
8300 }
8301 }
8302 }
8303 Ok(())
8304}
8305
8306#[derive(Debug, Clone)]
8310struct FkChildStep {
8311 child_table: String,
8312 action: FkChildAction,
8313}
8314
8315#[derive(Debug, Clone)]
8316enum FkChildAction {
8317 Delete { positions: Vec<usize> },
8319 SetNull {
8323 positions: Vec<usize>,
8324 columns: Vec<usize>,
8325 },
8326 SetDefault {
8330 positions: Vec<usize>,
8331 columns: Vec<usize>,
8332 defaults: Vec<Value>,
8333 },
8334}
8335
8336fn plan_fk_parent_deletions(
8352 catalog: &Catalog,
8353 parent_table_name: &str,
8354 to_delete_positions: &[usize],
8355 to_delete_rows: &[Vec<Value>],
8356) -> Result<Vec<FkChildStep>, EngineError> {
8357 use alloc::collections::{BTreeMap, BTreeSet};
8358 if to_delete_rows.is_empty() {
8359 return Ok(Vec::new());
8360 }
8361 let mut delete_plan: BTreeMap<String, BTreeSet<usize>> = BTreeMap::new();
8362 let mut setnull_plan: BTreeMap<String, BTreeSet<(usize, usize)>> = BTreeMap::new();
8364 let mut setdefault_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> =
8365 BTreeMap::new();
8366 let mut visited: BTreeSet<(String, usize)> = BTreeSet::new();
8367 for &p in to_delete_positions {
8368 visited.insert((parent_table_name.to_string(), p));
8369 }
8370 let mut work: Vec<(String, Vec<Value>)> = to_delete_rows
8371 .iter()
8372 .map(|r| (parent_table_name.to_string(), r.clone()))
8373 .collect();
8374 while let Some((cur_parent, parent_row)) = work.pop() {
8375 for child_name in catalog.table_names() {
8376 let child = catalog
8377 .get(&child_name)
8378 .expect("table_names → catalog.get round-trip is total");
8379 for fk in &child.schema().foreign_keys {
8380 if fk.parent_table != cur_parent {
8381 continue;
8382 }
8383 let parent_key: Vec<&Value> = fk
8384 .parent_columns
8385 .iter()
8386 .map(|&pi| &parent_row[pi])
8387 .collect();
8388 if parent_key.iter().any(|v| matches!(v, Value::Null)) {
8389 continue;
8390 }
8391 for (child_row_idx, child_row) in child.rows().iter().enumerate() {
8392 if child_name == cur_parent
8393 && visited.contains(&(child_name.clone(), child_row_idx))
8394 {
8395 continue;
8396 }
8397 let matches_key = fk
8398 .local_columns
8399 .iter()
8400 .enumerate()
8401 .all(|(i, &li)| child_row.values.get(li) == Some(parent_key[i]));
8402 if !matches_key {
8403 continue;
8404 }
8405 match fk.on_delete {
8406 spg_storage::FkAction::Restrict
8407 | spg_storage::FkAction::NoAction => {
8408 return Err(EngineError::Unsupported(alloc::format!(
8409 "FOREIGN KEY violation: DELETE on {cur_parent:?} is \
8410 restricted by FK from {child_name:?}.{:?}",
8411 fk.local_columns,
8412 )));
8413 }
8414 spg_storage::FkAction::Cascade => {
8415 if visited.insert((child_name.clone(), child_row_idx)) {
8416 delete_plan
8417 .entry(child_name.clone())
8418 .or_default()
8419 .insert(child_row_idx);
8420 work.push((child_name.clone(), child_row.values.clone()));
8421 }
8422 }
8423 spg_storage::FkAction::SetNull => {
8424 for &li in &fk.local_columns {
8426 let col = child.schema().columns.get(li).ok_or_else(|| {
8427 EngineError::Unsupported(alloc::format!(
8428 "FK local column {li} missing in {child_name:?}"
8429 ))
8430 })?;
8431 if !col.nullable {
8432 return Err(EngineError::Unsupported(alloc::format!(
8433 "FOREIGN KEY ON DELETE SET NULL: column \
8434 {child_name:?}.{:?} is NOT NULL — cannot SET NULL",
8435 col.name,
8436 )));
8437 }
8438 }
8439 let entry = setnull_plan.entry(child_name.clone()).or_default();
8440 for &li in &fk.local_columns {
8441 entry.insert((child_row_idx, li));
8442 }
8443 }
8444 spg_storage::FkAction::SetDefault => {
8445 let entry =
8447 setdefault_plan.entry(child_name.clone()).or_default();
8448 for &li in &fk.local_columns {
8449 let col = child.schema().columns.get(li).ok_or_else(|| {
8450 EngineError::Unsupported(alloc::format!(
8451 "FK local column {li} missing in {child_name:?}"
8452 ))
8453 })?;
8454 let default = col.default.clone().ok_or_else(|| {
8455 EngineError::Unsupported(alloc::format!(
8456 "FOREIGN KEY ON DELETE SET DEFAULT: column \
8457 {child_name:?}.{:?} has no DEFAULT declared",
8458 col.name,
8459 ))
8460 })?;
8461 entry.insert((child_row_idx, li), default);
8462 }
8463 }
8464 }
8465 }
8466 }
8467 }
8468 }
8469 let mut steps: Vec<FkChildStep> = Vec::new();
8477 for (child_table, entries) in setnull_plan {
8478 let (positions, columns): (Vec<usize>, Vec<usize>) = entries.into_iter().unzip();
8479 steps.push(FkChildStep {
8480 child_table,
8481 action: FkChildAction::SetNull { positions, columns },
8482 });
8483 }
8484 for (child_table, entries) in setdefault_plan {
8485 let mut positions = Vec::with_capacity(entries.len());
8486 let mut columns = Vec::with_capacity(entries.len());
8487 let mut defaults = Vec::with_capacity(entries.len());
8488 for ((p, c), v) in entries {
8489 positions.push(p);
8490 columns.push(c);
8491 defaults.push(v);
8492 }
8493 steps.push(FkChildStep {
8494 child_table,
8495 action: FkChildAction::SetDefault {
8496 positions,
8497 columns,
8498 defaults,
8499 },
8500 });
8501 }
8502 for (child_table, positions) in delete_plan {
8503 steps.push(FkChildStep {
8504 child_table,
8505 action: FkChildAction::Delete {
8506 positions: positions.into_iter().collect(),
8507 },
8508 });
8509 }
8510 Ok(steps)
8511}
8512
8513fn plan_fk_parent_updates(
8530 catalog: &Catalog,
8531 parent_table_name: &str,
8532 plan_with_old: &[(usize, Vec<Value>, Vec<Value>)],
8533) -> Result<Vec<FkChildStep>, EngineError> {
8534 use alloc::collections::BTreeMap;
8535 if plan_with_old.is_empty() {
8536 return Ok(Vec::new());
8537 }
8538 let delete_plan: BTreeMap<String, alloc::collections::BTreeSet<usize>> = BTreeMap::new();
8543 let mut setnull_plan: BTreeMap<
8544 String,
8545 alloc::collections::BTreeSet<(usize, usize)>,
8546 > = BTreeMap::new();
8547 let mut setdefault_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> =
8548 BTreeMap::new();
8549 let mut cascade_plan: BTreeMap<String, BTreeMap<(usize, usize), Value>> = BTreeMap::new();
8551
8552 for child_name in catalog.table_names() {
8553 let child = catalog
8554 .get(&child_name)
8555 .expect("table_names → catalog.get total");
8556 for fk in &child.schema().foreign_keys {
8557 if fk.parent_table != parent_table_name {
8558 continue;
8559 }
8560 for (_pos, old_row, new_row) in plan_with_old {
8561 let key_changed = fk
8563 .parent_columns
8564 .iter()
8565 .any(|&pi| old_row.get(pi) != new_row.get(pi));
8566 if !key_changed {
8567 continue;
8568 }
8569 let old_key: Vec<&Value> = fk
8571 .parent_columns
8572 .iter()
8573 .map(|&pi| &old_row[pi])
8574 .collect();
8575 if old_key.iter().any(|v| matches!(v, Value::Null)) {
8576 continue;
8578 }
8579 let new_key: Vec<&Value> = fk
8580 .parent_columns
8581 .iter()
8582 .map(|&pi| &new_row[pi])
8583 .collect();
8584 for (child_row_idx, child_row) in child.rows().iter().enumerate() {
8585 if child_name == parent_table_name
8588 && plan_with_old
8589 .iter()
8590 .any(|(p, _, _)| *p == child_row_idx)
8591 {
8592 continue;
8593 }
8594 let matches_key = fk
8595 .local_columns
8596 .iter()
8597 .enumerate()
8598 .all(|(i, &li)| child_row.values.get(li) == Some(old_key[i]));
8599 if !matches_key {
8600 continue;
8601 }
8602 match fk.on_update {
8603 spg_storage::FkAction::Restrict
8604 | spg_storage::FkAction::NoAction => {
8605 return Err(EngineError::Unsupported(alloc::format!(
8606 "FOREIGN KEY violation: UPDATE on {parent_table_name:?} PK is \
8607 restricted by FK from {child_name:?}.{:?}",
8608 fk.local_columns,
8609 )));
8610 }
8611 spg_storage::FkAction::Cascade => {
8612 let entry = cascade_plan.entry(child_name.clone()).or_default();
8614 for (i, &li) in fk.local_columns.iter().enumerate() {
8615 entry.insert((child_row_idx, li), new_key[i].clone());
8616 }
8617 }
8618 spg_storage::FkAction::SetNull => {
8619 for &li in &fk.local_columns {
8620 let col = child.schema().columns.get(li).ok_or_else(|| {
8621 EngineError::Unsupported(alloc::format!(
8622 "FK local column {li} missing in {child_name:?}"
8623 ))
8624 })?;
8625 if !col.nullable {
8626 return Err(EngineError::Unsupported(alloc::format!(
8627 "FOREIGN KEY ON UPDATE SET NULL: column \
8628 {child_name:?}.{:?} is NOT NULL",
8629 col.name,
8630 )));
8631 }
8632 }
8633 let entry = setnull_plan.entry(child_name.clone()).or_default();
8634 for &li in &fk.local_columns {
8635 entry.insert((child_row_idx, li));
8636 }
8637 }
8638 spg_storage::FkAction::SetDefault => {
8639 let entry =
8640 setdefault_plan.entry(child_name.clone()).or_default();
8641 for &li in &fk.local_columns {
8642 let col = child.schema().columns.get(li).ok_or_else(|| {
8643 EngineError::Unsupported(alloc::format!(
8644 "FK local column {li} missing in {child_name:?}"
8645 ))
8646 })?;
8647 let default = col.default.clone().ok_or_else(|| {
8648 EngineError::Unsupported(alloc::format!(
8649 "FOREIGN KEY ON UPDATE SET DEFAULT: column \
8650 {child_name:?}.{:?} has no DEFAULT",
8651 col.name,
8652 ))
8653 })?;
8654 entry.insert((child_row_idx, li), default);
8655 }
8656 }
8657 }
8658 }
8659 }
8660 }
8661 }
8662 let mut steps: Vec<FkChildStep> = Vec::new();
8665 for (child_table, entries) in cascade_plan {
8666 let mut positions = Vec::with_capacity(entries.len());
8667 let mut columns = Vec::with_capacity(entries.len());
8668 let mut defaults = Vec::with_capacity(entries.len());
8669 for ((p, c), v) in entries {
8670 positions.push(p);
8671 columns.push(c);
8672 defaults.push(v);
8673 }
8674 steps.push(FkChildStep {
8679 child_table,
8680 action: FkChildAction::SetDefault {
8681 positions,
8682 columns,
8683 defaults,
8684 },
8685 });
8686 }
8687 for (child_table, entries) in setnull_plan {
8688 let (positions, columns): (Vec<usize>, Vec<usize>) = entries.into_iter().unzip();
8689 steps.push(FkChildStep {
8690 child_table,
8691 action: FkChildAction::SetNull { positions, columns },
8692 });
8693 }
8694 for (child_table, entries) in setdefault_plan {
8695 let mut positions = Vec::with_capacity(entries.len());
8696 let mut columns = Vec::with_capacity(entries.len());
8697 let mut defaults = Vec::with_capacity(entries.len());
8698 for ((p, c), v) in entries {
8699 positions.push(p);
8700 columns.push(c);
8701 defaults.push(v);
8702 }
8703 steps.push(FkChildStep {
8704 child_table,
8705 action: FkChildAction::SetDefault {
8706 positions,
8707 columns,
8708 defaults,
8709 },
8710 });
8711 }
8712 let _ = delete_plan; Ok(steps)
8714}
8715
8716fn apply_fk_child_step(
8720 catalog: &mut Catalog,
8721 step: &FkChildStep,
8722) -> Result<(), EngineError> {
8723 let child = catalog.get_mut(&step.child_table).ok_or_else(|| {
8724 EngineError::Storage(StorageError::TableNotFound {
8725 name: step.child_table.clone(),
8726 })
8727 })?;
8728 match &step.action {
8729 FkChildAction::Delete { positions } => {
8730 let _ = child.delete_rows(positions);
8731 }
8732 FkChildAction::SetNull { positions, columns } => {
8733 apply_per_cell_writes(child, positions, columns, |_| Value::Null)?;
8734 }
8735 FkChildAction::SetDefault {
8736 positions,
8737 columns,
8738 defaults,
8739 } => {
8740 apply_per_cell_writes(child, positions, columns, |i| defaults[i].clone())?;
8741 }
8742 }
8743 Ok(())
8744}
8745
8746fn apply_per_cell_writes(
8752 child: &mut spg_storage::Table,
8753 positions: &[usize],
8754 columns: &[usize],
8755 mut value_for: impl FnMut(usize) -> Value,
8756) -> Result<(), EngineError> {
8757 use alloc::collections::BTreeMap;
8758 let mut by_row: BTreeMap<usize, Vec<(usize, Value)>> = BTreeMap::new();
8759 for i in 0..positions.len() {
8760 by_row
8761 .entry(positions[i])
8762 .or_default()
8763 .push((columns[i], value_for(i)));
8764 }
8765 for (pos, mutations) in by_row {
8766 let mut new_values = child.rows()[pos].values.clone();
8767 for (col, v) in mutations {
8768 if let Some(slot) = new_values.get_mut(col) {
8769 *slot = v;
8770 }
8771 }
8772 child
8773 .update_row(pos, new_values)
8774 .map_err(EngineError::Storage)?;
8775 }
8776 Ok(())
8777}
8778
8779fn fk_action_sql_to_storage(a: spg_sql::ast::FkAction) -> spg_storage::FkAction {
8780 match a {
8781 spg_sql::ast::FkAction::Restrict => spg_storage::FkAction::Restrict,
8782 spg_sql::ast::FkAction::Cascade => spg_storage::FkAction::Cascade,
8783 spg_sql::ast::FkAction::SetNull => spg_storage::FkAction::SetNull,
8784 spg_sql::ast::FkAction::SetDefault => spg_storage::FkAction::SetDefault,
8785 spg_sql::ast::FkAction::NoAction => spg_storage::FkAction::NoAction,
8786 }
8787}
8788
8789fn resolve_column_default_free(
8795 col: &ColumnSchema,
8796 clock_fn: Option<ClockFn>,
8797) -> Result<Value, EngineError> {
8798 if let Some(rt) = &col.runtime_default {
8799 return eval_runtime_default_free(rt, col.ty, clock_fn);
8800 }
8801 Ok(col.default.clone().unwrap_or(Value::Null))
8802}
8803
8804fn eval_runtime_default_free(
8805 rt: &str,
8806 ty: DataType,
8807 clock_fn: Option<ClockFn>,
8808) -> Result<Value, EngineError> {
8809 let s = rt.trim().to_ascii_lowercase();
8810 let canonical = s.trim_end_matches("()");
8811 let now_us = match clock_fn {
8812 Some(f) => f(),
8813 None => 0,
8814 };
8815 let v = match canonical {
8816 "now" | "current_timestamp" | "localtimestamp" => {
8817 Value::Timestamp(now_us)
8818 }
8819 "current_date" => Value::Date((now_us / 86_400_000_000) as i32),
8820 "current_time" | "localtime" => Value::Timestamp(now_us),
8821 other => {
8822 return Err(EngineError::Unsupported(alloc::format!(
8823 "runtime DEFAULT expression {other:?} not supported \
8824 (v7.9.21 whitelist: now() / current_timestamp / \
8825 current_date / current_time / localtimestamp / \
8826 localtime)"
8827 )));
8828 }
8829 };
8830 coerce_value(v, ty, "DEFAULT", 0)
8831}
8832
8833fn is_runtime_default_expr(expr: &Expr) -> bool {
8839 match expr {
8840 Expr::FunctionCall { .. } => true,
8841 Expr::Unary { expr, .. } => is_runtime_default_expr(expr),
8842 _ => false,
8843 }
8844}
8845
8846fn column_def_to_schema(c: ColumnDef) -> Result<ColumnSchema, EngineError> {
8847 let ty = column_type_to_data_type(c.ty);
8848 let mut schema = ColumnSchema::new(c.name.clone(), ty, c.nullable);
8849 if let Some(default_expr) = c.default {
8850 if is_runtime_default_expr(&default_expr) {
8856 let display = alloc::format!("{default_expr}");
8857 schema = schema.with_runtime_default(display);
8858 } else {
8859 let raw = literal_expr_to_value(default_expr)?;
8860 let coerced = coerce_value(raw, ty, &c.name, 0)?;
8861 schema = schema.with_default(coerced);
8862 }
8863 }
8864 if c.auto_increment {
8865 if !matches!(ty, DataType::SmallInt | DataType::Int | DataType::BigInt) {
8867 return Err(EngineError::Unsupported(alloc::format!(
8868 "AUTO_INCREMENT requires an integer column type, got {ty:?}"
8869 )));
8870 }
8871 schema = schema.with_auto_increment();
8872 }
8873 Ok(schema)
8874}
8875
8876fn decode_bytea_literal(s: &str) -> Result<alloc::vec::Vec<u8>, &'static str> {
8881 let s = s.trim();
8882 if let Some(hex) = s.strip_prefix("\\x").or_else(|| s.strip_prefix("\\X")) {
8883 let cleaned: alloc::string::String = hex.chars().filter(|c| !c.is_whitespace()).collect();
8885 if cleaned.len() % 2 != 0 {
8886 return Err("odd-length hex literal");
8887 }
8888 let mut out = alloc::vec::Vec::with_capacity(cleaned.len() / 2);
8889 let cleaned_bytes = cleaned.as_bytes();
8890 for i in (0..cleaned_bytes.len()).step_by(2) {
8891 let hi = hex_nibble(cleaned_bytes[i])?;
8892 let lo = hex_nibble(cleaned_bytes[i + 1])?;
8893 out.push((hi << 4) | lo);
8894 }
8895 return Ok(out);
8896 }
8897 let bytes = s.as_bytes();
8900 let mut out = alloc::vec::Vec::with_capacity(bytes.len());
8901 let mut i = 0;
8902 while i < bytes.len() {
8903 let b = bytes[i];
8904 if b == b'\\' && i + 1 < bytes.len() {
8905 let n = bytes[i + 1];
8906 if n == b'\\' {
8907 out.push(b'\\');
8908 i += 2;
8909 continue;
8910 }
8911 if n.is_ascii_digit() && i + 3 < bytes.len() && bytes[i + 2].is_ascii_digit()
8912 && bytes[i + 3].is_ascii_digit()
8913 {
8914 let oct = |x: u8| (x - b'0') as u32;
8915 let v = oct(n) * 64 + oct(bytes[i + 2]) * 8 + oct(bytes[i + 3]);
8916 if v <= 0xFF {
8917 out.push(v as u8);
8918 i += 4;
8919 continue;
8920 }
8921 }
8922 }
8923 out.push(b);
8924 i += 1;
8925 }
8926 Ok(out)
8927}
8928
8929fn hex_nibble(b: u8) -> Result<u8, &'static str> {
8930 match b {
8931 b'0'..=b'9' => Ok(b - b'0'),
8932 b'a'..=b'f' => Ok(b - b'a' + 10),
8933 b'A'..=b'F' => Ok(b - b'A' + 10),
8934 _ => Err("invalid hex digit"),
8935 }
8936}
8937
8938fn encode_bytea_hex(b: &[u8]) -> alloc::string::String {
8942 let mut out = alloc::string::String::with_capacity(2 + 2 * b.len());
8943 out.push_str("\\x");
8944 for byte in b {
8945 let hi = byte >> 4;
8946 let lo = byte & 0x0F;
8947 out.push(hex_digit(hi));
8948 out.push(hex_digit(lo));
8949 }
8950 out
8951}
8952
8953const fn hex_digit(n: u8) -> char {
8954 match n {
8955 0..=9 => (b'0' + n) as char,
8956 10..=15 => (b'a' + n - 10) as char,
8957 _ => '?',
8958 }
8959}
8960
8961const fn column_type_to_data_type(t: ColumnTypeName) -> DataType {
8962 match t {
8963 ColumnTypeName::SmallInt => DataType::SmallInt,
8964 ColumnTypeName::Int => DataType::Int,
8965 ColumnTypeName::BigInt => DataType::BigInt,
8966 ColumnTypeName::Float => DataType::Float,
8967 ColumnTypeName::Text => DataType::Text,
8968 ColumnTypeName::Varchar(n) => DataType::Varchar(n),
8969 ColumnTypeName::Char(n) => DataType::Char(n),
8970 ColumnTypeName::Bool => DataType::Bool,
8971 ColumnTypeName::Vector { dim, encoding } => DataType::Vector {
8972 dim,
8973 encoding: match encoding {
8974 SqlVecEncoding::F32 => VecEncoding::F32,
8975 SqlVecEncoding::Sq8 => VecEncoding::Sq8,
8976 SqlVecEncoding::F16 => VecEncoding::F16,
8977 },
8978 },
8979 ColumnTypeName::Numeric(precision, scale) => DataType::Numeric { precision, scale },
8980 ColumnTypeName::Date => DataType::Date,
8981 ColumnTypeName::Timestamp => DataType::Timestamp,
8982 ColumnTypeName::Timestamptz => DataType::Timestamptz,
8983 ColumnTypeName::Json => DataType::Json,
8984 ColumnTypeName::Jsonb => DataType::Jsonb,
8985 ColumnTypeName::Bytes => DataType::Bytes,
8986 }
8987}
8988
8989fn literal_expr_to_value(expr: Expr) -> Result<Value, EngineError> {
8993 match expr {
8994 Expr::Literal(l) => Ok(literal_to_value(l)),
8995 Expr::Cast { expr, target } => {
8996 let inner_value = literal_expr_to_value(*expr)?;
8997 crate::eval::cast_value(inner_value, target).map_err(EngineError::Eval)
8998 }
8999 Expr::Unary {
9000 op: UnOp::Neg,
9001 expr,
9002 } => match *expr {
9003 Expr::Literal(Literal::Integer(n)) => {
9004 let neg = n.checked_neg().ok_or_else(|| {
9007 EngineError::Unsupported("integer literal overflow on negation".into())
9008 })?;
9009 Ok(int_value_for(neg))
9010 }
9011 Expr::Literal(Literal::Float(x)) => Ok(Value::Float(-x)),
9012 other => Err(EngineError::Unsupported(alloc::format!(
9013 "unary minus over non-literal expression: {other:?}"
9014 ))),
9015 },
9016 other => Err(EngineError::Unsupported(alloc::format!(
9017 "non-literal INSERT value expression: {other:?}"
9018 ))),
9019 }
9020}
9021
9022fn literal_to_value(l: Literal) -> Value {
9023 match l {
9024 Literal::Integer(n) => int_value_for(n),
9025 Literal::Float(x) => Value::Float(x),
9026 Literal::String(s) => Value::Text(s),
9027 Literal::Bool(b) => Value::Bool(b),
9028 Literal::Null => Value::Null,
9029 Literal::Vector(v) => Value::Vector(v),
9030 Literal::Interval { months, micros, .. } => Value::Interval { months, micros },
9031 }
9032}
9033
9034fn int_value_for(n: i64) -> Value {
9038 if let Ok(small) = i32::try_from(n) {
9039 Value::Int(small)
9040 } else {
9041 Value::BigInt(n)
9042 }
9043}
9044
9045#[allow(clippy::too_many_lines)]
9051fn coerce_value(
9052 v: Value,
9053 expected: DataType,
9054 col_name: &str,
9055 position: usize,
9056) -> Result<Value, EngineError> {
9057 if v.is_null() {
9058 return Ok(Value::Null);
9059 }
9060 let actual = v.data_type().expect("non-null");
9061 if actual == expected {
9062 return Ok(v);
9063 }
9064 let coerced =
9065 match (v, expected) {
9066 (Value::Int(n), DataType::BigInt) => Some(Value::BigInt(i64::from(n))),
9067 (Value::Int(n), DataType::Float) => Some(Value::Float(f64::from(n))),
9068 (Value::Int(n), DataType::SmallInt) => i16::try_from(n).ok().map(Value::SmallInt),
9069 (Value::Int(n), DataType::Numeric { precision, scale }) => Some(numeric_from_integer(
9070 i128::from(n),
9071 precision,
9072 scale,
9073 col_name,
9074 )?),
9075 (Value::SmallInt(n), DataType::Int) => Some(Value::Int(i32::from(n))),
9076 (Value::SmallInt(n), DataType::BigInt) => Some(Value::BigInt(i64::from(n))),
9077 (Value::SmallInt(n), DataType::Float) => Some(Value::Float(f64::from(n))),
9078 (Value::SmallInt(n), DataType::Numeric { precision, scale }) => Some(
9079 numeric_from_integer(i128::from(n), precision, scale, col_name)?,
9080 ),
9081 (Value::BigInt(n), DataType::Int) => i32::try_from(n).ok().map(Value::Int),
9082 (Value::BigInt(n), DataType::SmallInt) => i16::try_from(n).ok().map(Value::SmallInt),
9083 #[allow(clippy::cast_precision_loss)]
9084 (Value::BigInt(n), DataType::Float) => Some(Value::Float(n as f64)),
9085 (Value::BigInt(n), DataType::Numeric { precision, scale }) => Some(
9086 numeric_from_integer(i128::from(n), precision, scale, col_name)?,
9087 ),
9088 (Value::Float(x), DataType::Numeric { precision, scale }) => {
9089 Some(numeric_from_float(x, precision, scale, col_name)?)
9090 }
9091 (Value::Text(s), DataType::Date) => {
9093 let d = eval::parse_date_literal(&s).ok_or_else(|| {
9094 EngineError::Eval(EvalError::TypeMismatch {
9095 detail: alloc::format!(
9096 "cannot parse {s:?} as DATE for column `{col_name}`"
9097 ),
9098 })
9099 })?;
9100 Some(Value::Date(d))
9101 }
9102 (Value::Text(s), DataType::Json | DataType::Jsonb) => Some(Value::Json(s)),
9106 (Value::Json(s), DataType::Text) => Some(Value::Text(s)),
9107 (Value::Text(s), DataType::Bytes) => {
9114 let bytes = decode_bytea_literal(&s).map_err(|e| {
9115 EngineError::Eval(EvalError::TypeMismatch {
9116 detail: alloc::format!(
9117 "cannot parse {s:?} as BYTEA for column `{col_name}`: {e}"
9118 ),
9119 })
9120 })?;
9121 Some(Value::Bytes(bytes))
9122 }
9123 (Value::Bytes(b), DataType::Text) => Some(Value::Text(encode_bytea_hex(&b))),
9127 (Value::Text(s), DataType::Timestamp | DataType::Timestamptz) => {
9128 let t = eval::parse_timestamp_literal(&s).ok_or_else(|| {
9129 EngineError::Eval(EvalError::TypeMismatch {
9130 detail: alloc::format!(
9131 "cannot parse {s:?} as TIMESTAMP for column `{col_name}`"
9132 ),
9133 })
9134 })?;
9135 Some(Value::Timestamp(t))
9136 }
9137 (Value::Date(d), DataType::Timestamp | DataType::Timestamptz) => {
9140 Some(Value::Timestamp(i64::from(d) * 86_400_000_000))
9141 }
9142 (Value::Timestamp(t), DataType::Timestamptz) => Some(Value::Timestamp(t)),
9146 (Value::Timestamp(t), DataType::Date) => {
9147 let days = t.div_euclid(86_400_000_000);
9148 i32::try_from(days).ok().map(Value::Date)
9149 }
9150 (
9151 Value::Numeric {
9152 scaled,
9153 scale: src_scale,
9154 },
9155 DataType::Numeric { precision, scale },
9156 ) => Some(numeric_rescale(
9157 scaled, src_scale, precision, scale, col_name,
9158 )?),
9159 #[allow(clippy::cast_precision_loss)]
9160 (Value::Numeric { scaled, scale }, DataType::Float) => {
9161 let mut div = 1.0_f64;
9162 for _ in 0..scale {
9163 div *= 10.0;
9164 }
9165 Some(Value::Float((scaled as f64) / div))
9166 }
9167 (Value::Numeric { scaled, scale }, DataType::Int) => {
9168 let truncated = numeric_truncate_to_integer(scaled, scale);
9169 i32::try_from(truncated).ok().map(Value::Int)
9170 }
9171 (Value::Numeric { scaled, scale }, DataType::BigInt) => {
9172 let truncated = numeric_truncate_to_integer(scaled, scale);
9173 i64::try_from(truncated).ok().map(Value::BigInt)
9174 }
9175 (Value::Numeric { scaled, scale }, DataType::SmallInt) => {
9176 let truncated = numeric_truncate_to_integer(scaled, scale);
9177 i16::try_from(truncated).ok().map(Value::SmallInt)
9178 }
9179 (Value::Text(s), DataType::Varchar(max)) => {
9181 if u32::try_from(s.chars().count()).unwrap_or(u32::MAX) <= max {
9182 Some(Value::Text(s))
9183 } else {
9184 return Err(EngineError::Unsupported(alloc::format!(
9185 "value for VARCHAR({max}) column `{col_name}` exceeds length: \
9186 {} chars",
9187 s.chars().count()
9188 )));
9189 }
9190 }
9191 (
9199 Value::Vector(v),
9200 DataType::Vector {
9201 dim,
9202 encoding: VecEncoding::Sq8,
9203 },
9204 ) if v.len() == dim as usize => {
9205 Some(Value::Sq8Vector(spg_storage::quantize::quantize(&v)))
9206 }
9207 (
9212 Value::Vector(v),
9213 DataType::Vector {
9214 dim,
9215 encoding: VecEncoding::F16,
9216 },
9217 ) if v.len() == dim as usize => Some(Value::HalfVector(
9218 spg_storage::halfvec::HalfVector::from_f32_slice(&v),
9219 )),
9220 (Value::Text(s), DataType::Char(size)) => {
9224 let len = u32::try_from(s.chars().count()).unwrap_or(u32::MAX);
9225 if len > size {
9226 return Err(EngineError::Unsupported(alloc::format!(
9227 "value for CHAR({size}) column `{col_name}` exceeds length: \
9228 {len} chars"
9229 )));
9230 }
9231 let need = (size - len) as usize;
9232 let mut padded = s;
9233 padded.reserve(need);
9234 for _ in 0..need {
9235 padded.push(' ');
9236 }
9237 Some(Value::Text(padded))
9238 }
9239 _ => None,
9240 };
9241 coerced.ok_or(EngineError::Storage(StorageError::TypeMismatch {
9242 column: col_name.into(),
9243 expected,
9244 actual,
9245 position,
9246 }))
9247}
9248
9249#[cfg(test)]
9250mod tests {
9251 use super::*;
9252 use alloc::vec;
9253
9254 fn unwrap_command_ok(r: &QueryResult) -> usize {
9255 match r {
9256 QueryResult::CommandOk { affected, .. } => *affected,
9257 QueryResult::Rows { .. } => panic!("expected CommandOk, got Rows"),
9258 }
9259 }
9260
9261 #[test]
9262 fn create_table_registers_schema() {
9263 let mut e = Engine::new();
9264 e.execute("CREATE TABLE foo (a INT NOT NULL, b TEXT)")
9265 .unwrap();
9266 assert_eq!(e.catalog().table_count(), 1);
9267 let t = e.catalog().get("foo").unwrap();
9268 assert_eq!(t.schema().columns.len(), 2);
9269 assert_eq!(t.schema().columns[0].ty, DataType::Int);
9270 assert!(!t.schema().columns[0].nullable);
9271 assert_eq!(t.schema().columns[1].ty, DataType::Text);
9272 }
9273
9274 #[test]
9275 fn create_table_vector_default_is_f32_encoded() {
9276 let mut e = Engine::new();
9277 e.execute("CREATE TABLE t (v VECTOR(8))").unwrap();
9278 let t = e.catalog().get("t").unwrap();
9279 assert_eq!(
9280 t.schema().columns[0].ty,
9281 DataType::Vector {
9282 dim: 8,
9283 encoding: VecEncoding::F32,
9284 },
9285 );
9286 }
9287
9288 #[test]
9289 fn create_table_vector_using_sq8_succeeds() {
9290 let mut e = Engine::new();
9294 e.execute("CREATE TABLE t (v VECTOR(8) USING SQ8)").unwrap();
9295 let t = e.catalog().get("t").unwrap();
9296 assert_eq!(
9297 t.schema().columns[0].ty,
9298 DataType::Vector {
9299 dim: 8,
9300 encoding: VecEncoding::Sq8,
9301 },
9302 );
9303 }
9304
9305 #[test]
9306 fn insert_into_sq8_column_quantises_f32_payload() {
9307 let mut e = Engine::new();
9314 e.execute("CREATE TABLE t (v VECTOR(4) USING SQ8)").unwrap();
9315 e.execute("INSERT INTO t VALUES ([0.0, 0.25, 0.5, 1.0])")
9316 .unwrap();
9317 let t = e.catalog().get("t").unwrap();
9318 assert_eq!(t.rows().len(), 1);
9319 match &t.rows()[0].values[0] {
9320 Value::Sq8Vector(q) => {
9321 assert_eq!(q.bytes.len(), 4);
9322 assert!((q.min - 0.0).abs() < 1e-6);
9324 assert!((q.max - 1.0).abs() < 1e-6);
9325 }
9326 other => panic!("expected Sq8Vector cell, got {other:?}"),
9327 }
9328 }
9329
9330 #[test]
9331 fn create_table_vector_using_half_succeeds_and_insert_converts_to_f16() {
9332 let mut e = Engine::new();
9339 e.execute("CREATE TABLE t (v VECTOR(4) USING HALF)")
9340 .unwrap();
9341 e.execute("INSERT INTO t VALUES ([0.0, 0.25, 0.5, 1.0])")
9342 .unwrap();
9343 let t = e.catalog().get("t").unwrap();
9344 assert_eq!(t.rows().len(), 1);
9345 match &t.rows()[0].values[0] {
9346 Value::HalfVector(h) => {
9347 assert_eq!(h.dim(), 4);
9348 let back = h.to_f32_vec();
9349 let expected = alloc::vec![0.0_f32, 0.25, 0.5, 1.0];
9350 for (g, e) in back.iter().zip(expected.iter()) {
9351 assert!(
9352 (g - e).abs() < 1e-6,
9353 "{g} vs {e} should be exact on f16 grid"
9354 );
9355 }
9356 }
9357 other => panic!("expected HalfVector cell, got {other:?}"),
9358 }
9359 }
9360
9361 #[test]
9362 fn alter_index_rebuild_in_place_succeeds() {
9363 let mut e = Engine::new();
9368 e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(3) NOT NULL)")
9369 .unwrap();
9370 for i in 0..8_i32 {
9371 #[allow(clippy::cast_precision_loss)]
9372 let base = (i as f32) * 0.1;
9373 e.execute(&alloc::format!(
9374 "INSERT INTO t VALUES ({i}, [{base}, {b1}, {b2}])",
9375 b1 = base + 0.01,
9376 b2 = base + 0.02,
9377 ))
9378 .unwrap();
9379 }
9380 e.execute("CREATE INDEX t_idx ON t USING hnsw (v)").unwrap();
9381 e.execute("ALTER INDEX t_idx REBUILD").unwrap();
9382 assert_eq!(
9384 e.catalog().get("t").unwrap().schema().columns[1].ty,
9385 DataType::Vector {
9386 dim: 3,
9387 encoding: VecEncoding::F32,
9388 },
9389 );
9390 }
9391
9392 #[test]
9393 fn alter_index_rebuild_with_encoding_switches_cell_type() {
9394 let mut e = Engine::new();
9399 e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(4) NOT NULL)")
9400 .unwrap();
9401 e.execute("INSERT INTO t VALUES (1, [0.0, 0.25, 0.5, 1.0])")
9402 .unwrap();
9403 e.execute("CREATE INDEX t_idx ON t USING hnsw (v)").unwrap();
9404 e.execute("ALTER INDEX t_idx REBUILD WITH (encoding = SQ8)")
9405 .unwrap();
9406 let t = e.catalog().get("t").unwrap();
9407 assert_eq!(
9408 t.schema().columns[1].ty,
9409 DataType::Vector {
9410 dim: 4,
9411 encoding: VecEncoding::Sq8,
9412 },
9413 );
9414 assert!(matches!(t.rows()[0].values[1], Value::Sq8Vector(_)));
9415 }
9416
9417 #[test]
9418 fn alter_index_rebuild_unknown_index_errors() {
9419 let mut e = Engine::new();
9420 let err = e.execute("ALTER INDEX nope REBUILD").unwrap_err();
9421 assert!(
9422 matches!(
9423 &err,
9424 EngineError::Storage(StorageError::IndexNotFound { name }) if name == "nope"
9425 ),
9426 "got: {err}"
9427 );
9428 }
9429
9430 #[test]
9431 fn alter_index_rebuild_on_btree_index_errors() {
9432 let mut e = Engine::new();
9435 e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
9436 e.execute("INSERT INTO t VALUES (1)").unwrap();
9437 e.execute("CREATE INDEX t_idx ON t (id)").unwrap();
9438 let err = e.execute("ALTER INDEX t_idx REBUILD").unwrap_err();
9439 assert!(
9440 matches!(&err, EngineError::Storage(StorageError::Unsupported(_))),
9441 "got: {err}"
9442 );
9443 }
9444
9445 #[test]
9446 fn prepared_insert_substitutes_placeholders() {
9447 let mut e = Engine::new();
9453 e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT NOT NULL)")
9454 .unwrap();
9455 let stmt = e.prepare("INSERT INTO t VALUES ($1, $2)").unwrap();
9456 for (id, name) in [(1, "alice"), (2, "bob"), (3, "carol")] {
9457 e.execute_prepared(
9458 stmt.clone(),
9459 &[Value::Int(id), Value::Text(name.into())],
9460 )
9461 .unwrap();
9462 }
9463 let rows_result = e.execute("SELECT id, name FROM t").unwrap();
9465 let QueryResult::Rows { rows, .. } = rows_result else {
9466 panic!("expected Rows")
9467 };
9468 assert_eq!(rows.len(), 3);
9469 }
9470
9471 #[test]
9472 fn prepared_select_with_placeholder_filters_rows() {
9473 let mut e = Engine::new();
9474 e.execute("CREATE TABLE t (id INT NOT NULL, v INT NOT NULL)")
9475 .unwrap();
9476 for i in 0..10_i32 {
9477 e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, {})", i * 7))
9478 .unwrap();
9479 }
9480 let stmt = e
9481 .prepare("SELECT id FROM t WHERE v = $1")
9482 .unwrap();
9483 let QueryResult::Rows { rows, .. } = e
9484 .execute_prepared(stmt, &[Value::Int(35)])
9485 .unwrap()
9486 else {
9487 panic!("expected Rows")
9488 };
9489 assert_eq!(rows.len(), 1);
9491 assert_eq!(rows[0].values[0], Value::Int(5));
9492 }
9493
9494 #[test]
9495 fn prepared_too_few_params_errors() {
9496 let mut e = Engine::new();
9497 e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
9498 let stmt = e.prepare("INSERT INTO t VALUES ($1)").unwrap();
9499 let err = e.execute_prepared(stmt, &[]).unwrap_err();
9500 assert!(
9501 matches!(
9502 &err,
9503 EngineError::Eval(EvalError::PlaceholderOutOfRange { n: 1, bound: 0 })
9504 ),
9505 "got: {err}"
9506 );
9507 }
9508
9509 #[test]
9510 fn insert_into_half_column_dim_mismatch_errors() {
9511 let mut e = Engine::new();
9512 e.execute("CREATE TABLE t (v VECTOR(4) USING HALF)")
9513 .unwrap();
9514 let err = e.execute("INSERT INTO t VALUES ([1.0, 2.0])").unwrap_err();
9515 assert!(matches!(
9516 &err,
9517 EngineError::Storage(StorageError::TypeMismatch { .. })
9518 ));
9519 }
9520
9521 #[test]
9522 fn insert_into_sq8_column_dim_mismatch_errors() {
9523 let mut e = Engine::new();
9528 e.execute("CREATE TABLE t (v VECTOR(4) USING SQ8)").unwrap();
9529 let err = e.execute("INSERT INTO t VALUES ([1.0, 2.0])").unwrap_err();
9530 assert!(
9531 matches!(
9532 &err,
9533 EngineError::Storage(StorageError::TypeMismatch { .. })
9534 ),
9535 "got: {err}",
9536 );
9537 }
9538
9539 #[test]
9540 fn create_table_duplicate_errors() {
9541 let mut e = Engine::new();
9542 e.execute("CREATE TABLE foo (a INT)").unwrap();
9543 let err = e.execute("CREATE TABLE foo (a INT)").unwrap_err();
9544 assert!(matches!(
9545 err,
9546 EngineError::Storage(StorageError::DuplicateTable { ref name }) if name == "foo"
9547 ));
9548 }
9549
9550 #[test]
9551 fn insert_into_unknown_table_errors() {
9552 let mut e = Engine::new();
9553 let err = e.execute("INSERT INTO ghost VALUES (1)").unwrap_err();
9554 assert!(matches!(
9555 err,
9556 EngineError::Storage(StorageError::TableNotFound { ref name }) if name == "ghost"
9557 ));
9558 }
9559
9560 #[test]
9561 fn insert_happy_path_reports_one_affected() {
9562 let mut e = Engine::new();
9563 e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
9564 let r = e.execute("INSERT INTO foo VALUES (42)").unwrap();
9565 assert_eq!(unwrap_command_ok(&r), 1);
9566 assert_eq!(e.catalog().get("foo").unwrap().row_count(), 1);
9567 }
9568
9569 #[test]
9570 fn insert_arity_mismatch_propagates() {
9571 let mut e = Engine::new();
9572 e.execute("CREATE TABLE foo (a INT, b TEXT)").unwrap();
9573 let err = e.execute("INSERT INTO foo VALUES (1)").unwrap_err();
9574 assert!(matches!(
9575 err,
9576 EngineError::Storage(StorageError::ArityMismatch { .. })
9577 ));
9578 }
9579
9580 #[test]
9581 fn insert_negative_integer_via_unary_minus() {
9582 let mut e = Engine::new();
9583 e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
9584 e.execute("INSERT INTO foo VALUES (-7)").unwrap();
9585 let rows = e.catalog().get("foo").unwrap().rows();
9586 assert_eq!(rows[0].values[0], Value::Int(-7));
9587 }
9588
9589 #[test]
9590 fn insert_non_literal_expr_unsupported() {
9591 let mut e = Engine::new();
9592 e.execute("CREATE TABLE foo (a INT NOT NULL)").unwrap();
9593 let err = e.execute("INSERT INTO foo VALUES (1 + 2)").unwrap_err();
9594 assert!(matches!(err, EngineError::Unsupported(_)));
9595 }
9596
9597 #[test]
9598 fn select_star_returns_all_rows_in_insertion_order() {
9599 let mut e = Engine::new();
9600 e.execute("CREATE TABLE foo (a INT NOT NULL, b TEXT NOT NULL)")
9601 .unwrap();
9602 e.execute("INSERT INTO foo VALUES (1, 'one')").unwrap();
9603 e.execute("INSERT INTO foo VALUES (2, 'two')").unwrap();
9604 e.execute("INSERT INTO foo VALUES (3, 'three')").unwrap();
9605
9606 let r = e.execute("SELECT * FROM foo").unwrap();
9607 let QueryResult::Rows { columns, rows } = r else {
9608 panic!("expected Rows")
9609 };
9610 assert_eq!(columns.len(), 2);
9611 assert_eq!(columns[0].name, "a");
9612 assert_eq!(rows.len(), 3);
9613 assert_eq!(
9614 rows[1].values,
9615 vec![Value::Int(2), Value::Text("two".into())]
9616 );
9617 }
9618
9619 #[test]
9620 fn select_star_on_empty_table_returns_zero_rows() {
9621 let mut e = Engine::new();
9622 e.execute("CREATE TABLE foo (a INT)").unwrap();
9623 let r = e.execute("SELECT * FROM foo").unwrap();
9624 match r {
9625 QueryResult::Rows { rows, .. } => assert!(rows.is_empty()),
9626 QueryResult::CommandOk { .. } => panic!("expected Rows"),
9627 }
9628 }
9629
9630 fn make_three_row_users(e: &mut Engine) {
9633 e.execute("CREATE TABLE users (id INT NOT NULL, name TEXT NOT NULL, score INT)")
9634 .unwrap();
9635 e.execute("INSERT INTO users VALUES (1, 'alice', 90)")
9636 .unwrap();
9637 e.execute("INSERT INTO users VALUES (2, 'bob', NULL)")
9638 .unwrap();
9639 e.execute("INSERT INTO users VALUES (3, 'cara', 70)")
9640 .unwrap();
9641 }
9642
9643 fn unwrap_rows(r: QueryResult) -> (Vec<ColumnSchema>, Vec<Row>) {
9644 match r {
9645 QueryResult::Rows { columns, rows } => (columns, rows),
9646 QueryResult::CommandOk { .. } => panic!("expected Rows"),
9647 }
9648 }
9649
9650 #[test]
9651 fn where_filter_passes_only_true_rows() {
9652 let mut e = Engine::new();
9653 make_three_row_users(&mut e);
9654 let r = e.execute("SELECT * FROM users WHERE id > 1").unwrap();
9655 let (_, rows) = unwrap_rows(r);
9656 assert_eq!(rows.len(), 2);
9657 assert_eq!(rows[0].values[0], Value::Int(2));
9658 assert_eq!(rows[1].values[0], Value::Int(3));
9659 }
9660
9661 #[test]
9662 fn where_with_null_result_filters_out_row() {
9663 let mut e = Engine::new();
9664 make_three_row_users(&mut e);
9665 let r = e.execute("SELECT * FROM users WHERE score > 80").unwrap();
9667 let (_, rows) = unwrap_rows(r);
9668 assert_eq!(rows.len(), 1);
9669 assert_eq!(rows[0].values[1], Value::Text("alice".into()));
9670 }
9671
9672 #[test]
9673 fn projection_named_columns() {
9674 let mut e = Engine::new();
9675 make_three_row_users(&mut e);
9676 let r = e.execute("SELECT name, score FROM users").unwrap();
9677 let (cols, rows) = unwrap_rows(r);
9678 assert_eq!(cols.len(), 2);
9679 assert_eq!(cols[0].name, "name");
9680 assert_eq!(cols[1].name, "score");
9681 assert_eq!(rows.len(), 3);
9682 assert_eq!(
9683 rows[0].values,
9684 vec![Value::Text("alice".into()), Value::Int(90)]
9685 );
9686 }
9687
9688 #[test]
9689 fn projection_with_column_alias() {
9690 let mut e = Engine::new();
9691 make_three_row_users(&mut e);
9692 let r = e
9693 .execute("SELECT name AS who FROM users WHERE id = 1")
9694 .unwrap();
9695 let (cols, rows) = unwrap_rows(r);
9696 assert_eq!(cols[0].name, "who");
9697 assert_eq!(rows.len(), 1);
9698 assert_eq!(rows[0].values[0], Value::Text("alice".into()));
9699 }
9700
9701 #[test]
9702 fn qualified_column_with_table_alias_resolves() {
9703 let mut e = Engine::new();
9704 make_three_row_users(&mut e);
9705 let r = e
9706 .execute("SELECT u.id, u.name FROM users AS u WHERE u.id < 3")
9707 .unwrap();
9708 let (cols, rows) = unwrap_rows(r);
9709 assert_eq!(cols.len(), 2);
9710 assert_eq!(rows.len(), 2);
9711 }
9712
9713 #[test]
9714 fn qualified_column_with_wrong_alias_errors() {
9715 let mut e = Engine::new();
9716 make_three_row_users(&mut e);
9717 let err = e.execute("SELECT x.id FROM users AS u").unwrap_err();
9718 assert!(matches!(
9719 err,
9720 EngineError::Eval(EvalError::UnknownQualifier { ref qualifier }) if qualifier == "x"
9721 ));
9722 }
9723
9724 #[test]
9725 fn select_unknown_column_errors_in_projection() {
9726 let mut e = Engine::new();
9727 make_three_row_users(&mut e);
9728 let err = e.execute("SELECT ghost FROM users").unwrap_err();
9729 assert!(matches!(
9730 err,
9731 EngineError::Eval(EvalError::ColumnNotFound { ref name }) if name == "ghost"
9732 ));
9733 }
9734
9735 #[test]
9736 fn where_unknown_column_errors() {
9737 let mut e = Engine::new();
9738 make_three_row_users(&mut e);
9739 let err = e
9740 .execute("SELECT * FROM users WHERE ghost = 1")
9741 .unwrap_err();
9742 assert!(matches!(
9743 err,
9744 EngineError::Eval(EvalError::ColumnNotFound { .. })
9745 ));
9746 }
9747
9748 #[test]
9749 fn expression_projection_evaluates_and_renders() {
9750 let mut e = Engine::new();
9753 e.execute("CREATE TABLE t (a INT NOT NULL)").unwrap();
9754 e.execute("INSERT INTO t VALUES (3)").unwrap();
9755 let (_, rows) = unwrap_rows(e.execute("SELECT 1 + 2 FROM t").unwrap());
9756 assert_eq!(rows.len(), 1);
9757 assert_eq!(rows[0].values[0], Value::Int(3));
9760 }
9761
9762 #[test]
9763 fn select_unknown_table_errors() {
9764 let mut e = Engine::new();
9765 let err = e.execute("SELECT * FROM ghost").unwrap_err();
9766 assert!(matches!(
9767 err,
9768 EngineError::Storage(StorageError::TableNotFound { .. })
9769 ));
9770 }
9771
9772 #[test]
9773 fn invalid_sql_returns_parse_error() {
9774 let mut e = Engine::new();
9777 let err = e.execute("THIS_IS_NOT_A_KEYWORD foo bar baz").unwrap_err();
9778 assert!(matches!(err, EngineError::Parse(_)));
9779 }
9780
9781 #[test]
9784 fn create_index_registers_on_table() {
9785 let mut e = Engine::new();
9786 make_three_row_users(&mut e);
9787 e.execute("CREATE INDEX by_name ON users (name)").unwrap();
9788 let t = e.catalog().get("users").unwrap();
9789 assert_eq!(t.indices().len(), 1);
9790 assert_eq!(t.indices()[0].name, "by_name");
9791 }
9792
9793 #[test]
9794 fn create_index_on_unknown_table_errors() {
9795 let mut e = Engine::new();
9796 let err = e.execute("CREATE INDEX i ON ghost (a)").unwrap_err();
9797 assert!(matches!(
9798 err,
9799 EngineError::Storage(StorageError::TableNotFound { .. })
9800 ));
9801 }
9802
9803 #[test]
9804 fn create_index_on_unknown_column_errors() {
9805 let mut e = Engine::new();
9806 make_three_row_users(&mut e);
9807 let err = e.execute("CREATE INDEX i ON users (ghost)").unwrap_err();
9808 assert!(matches!(
9809 err,
9810 EngineError::Storage(StorageError::ColumnNotFound { .. })
9811 ));
9812 }
9813
9814 #[test]
9815 fn select_eq_uses_index_returns_same_rows_as_scan() {
9816 let mut without = Engine::new();
9820 make_three_row_users(&mut without);
9821 let mut with = Engine::new();
9822 make_three_row_users(&mut with);
9823 with.execute("CREATE INDEX by_id ON users (id)").unwrap();
9824
9825 let q = "SELECT * FROM users WHERE id = 2";
9826 let (_, no_idx_rows) = unwrap_rows(without.execute(q).unwrap());
9827 let (_, idx_rows) = unwrap_rows(with.execute(q).unwrap());
9828 assert_eq!(no_idx_rows, idx_rows);
9829 assert_eq!(idx_rows.len(), 1);
9830 }
9831
9832 #[test]
9833 fn select_eq_with_no_matching_index_value_returns_empty() {
9834 let mut e = Engine::new();
9835 make_three_row_users(&mut e);
9836 e.execute("CREATE INDEX by_id ON users (id)").unwrap();
9837 let (_, rows) = unwrap_rows(e.execute("SELECT * FROM users WHERE id = 999").unwrap());
9838 assert_eq!(rows.len(), 0);
9839 }
9840
9841 #[test]
9844 fn begin_sets_in_transaction_flag() {
9845 let mut e = Engine::new();
9846 assert!(!e.in_transaction());
9847 e.execute("BEGIN").unwrap();
9848 assert!(e.in_transaction());
9849 }
9850
9851 #[test]
9852 fn double_begin_errors() {
9853 let mut e = Engine::new();
9854 e.execute("BEGIN").unwrap();
9855 let err = e.execute("BEGIN").unwrap_err();
9856 assert_eq!(err, EngineError::TransactionAlreadyOpen);
9857 }
9858
9859 #[test]
9860 fn commit_without_begin_errors() {
9861 let mut e = Engine::new();
9862 let err = e.execute("COMMIT").unwrap_err();
9863 assert_eq!(err, EngineError::NoActiveTransaction);
9864 }
9865
9866 #[test]
9867 fn rollback_without_begin_errors() {
9868 let mut e = Engine::new();
9869 let err = e.execute("ROLLBACK").unwrap_err();
9870 assert_eq!(err, EngineError::NoActiveTransaction);
9871 }
9872
9873 #[test]
9874 fn commit_applies_shadow_to_committed_catalog() {
9875 let mut e = Engine::new();
9876 e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
9877 e.execute("BEGIN").unwrap();
9878 e.execute("INSERT INTO t VALUES (1)").unwrap();
9879 e.execute("INSERT INTO t VALUES (2)").unwrap();
9880 e.execute("COMMIT").unwrap();
9881 assert!(!e.in_transaction());
9882 assert_eq!(e.catalog().get("t").unwrap().row_count(), 2);
9883 }
9884
9885 #[test]
9886 fn rollback_discards_shadow() {
9887 let mut e = Engine::new();
9888 e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
9889 e.execute("BEGIN").unwrap();
9890 e.execute("INSERT INTO t VALUES (1)").unwrap();
9891 e.execute("INSERT INTO t VALUES (2)").unwrap();
9892 e.execute("ROLLBACK").unwrap();
9893 assert!(!e.in_transaction());
9894 assert_eq!(e.catalog().get("t").unwrap().row_count(), 0);
9895 }
9896
9897 #[test]
9898 fn select_during_tx_sees_uncommitted_writes_own_session() {
9899 let mut e = Engine::new();
9902 e.execute("CREATE TABLE t (v INT NOT NULL)").unwrap();
9903 e.execute("BEGIN").unwrap();
9904 e.execute("INSERT INTO t VALUES (42)").unwrap();
9905 let (_, rows) = unwrap_rows(e.execute("SELECT * FROM t").unwrap());
9906 assert_eq!(rows.len(), 1);
9907 assert_eq!(rows[0].values[0], Value::Int(42));
9908 }
9909
9910 #[test]
9911 fn snapshot_with_no_users_is_bare_catalog_format() {
9912 let mut e = Engine::new();
9913 e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
9914 let bytes = e.snapshot();
9915 assert_eq!(
9916 &bytes[..8],
9917 b"SPGDB001",
9918 "must be the bare v3.x catalog magic"
9919 );
9920 let e2 = Engine::restore_envelope(&bytes).unwrap();
9921 assert!(e2.users().is_empty());
9922 assert_eq!(e2.catalog().table_count(), 1);
9923 }
9924
9925 #[test]
9926 fn snapshot_with_users_round_trips_both_via_envelope() {
9927 let mut e = Engine::new();
9928 e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
9929 e.create_user("alice", "pw1", Role::Admin, [9; 16]).unwrap();
9930 e.create_user("bob", "pw2", Role::ReadOnly, [5; 16])
9931 .unwrap();
9932 let bytes = e.snapshot();
9933 assert_eq!(&bytes[..8], b"SPGENV01", "must be the v4.1 envelope magic");
9934 let e2 = Engine::restore_envelope(&bytes).unwrap();
9935 assert_eq!(e2.users().len(), 2);
9936 assert_eq!(e2.verify_user("alice", "pw1"), Some(Role::Admin));
9937 assert_eq!(e2.verify_user("bob", "pw2"), Some(Role::ReadOnly));
9938 assert_eq!(e2.verify_user("alice", "wrong"), None);
9939 assert_eq!(e2.catalog().table_count(), 1);
9940 }
9941
9942 #[test]
9943 fn ddl_inside_tx_also_rolled_back() {
9944 let mut e = Engine::new();
9945 e.execute("BEGIN").unwrap();
9946 e.execute("CREATE TABLE t (v INT)").unwrap();
9947 e.execute("SELECT * FROM t").unwrap();
9949 e.execute("ROLLBACK").unwrap();
9950 let err = e.execute("SELECT * FROM t").unwrap_err();
9952 assert!(matches!(
9953 err,
9954 EngineError::Storage(StorageError::TableNotFound { .. })
9955 ));
9956 }
9957
9958 #[test]
9961 fn create_publication_lands_in_catalog() {
9962 let mut e = Engine::new();
9963 assert!(e.publications().is_empty());
9964 e.execute("CREATE PUBLICATION pub_a").unwrap();
9965 assert_eq!(e.publications().len(), 1);
9966 assert!(e.publications().contains("pub_a"));
9967 }
9968
9969 #[test]
9970 fn create_publication_duplicate_errors() {
9971 let mut e = Engine::new();
9972 e.execute("CREATE PUBLICATION pub_a").unwrap();
9973 let err = e.execute("CREATE PUBLICATION pub_a").unwrap_err();
9974 assert!(
9975 alloc::format!("{err:?}").contains("DuplicateName"),
9976 "got {err:?}"
9977 );
9978 }
9979
9980 #[test]
9981 fn drop_publication_silent_when_absent() {
9982 let mut e = Engine::new();
9983 let r = e.execute("DROP PUBLICATION nope").unwrap();
9986 match r {
9987 QueryResult::CommandOk { affected, .. } => assert_eq!(affected, 0),
9988 other => panic!("expected CommandOk, got {other:?}"),
9989 }
9990 }
9991
9992 #[test]
9993 fn drop_publication_present_reports_one_affected() {
9994 let mut e = Engine::new();
9995 e.execute("CREATE PUBLICATION pub_a").unwrap();
9996 let r = e.execute("DROP PUBLICATION pub_a").unwrap();
9997 match r {
9998 QueryResult::CommandOk {
9999 affected,
10000 modified_catalog,
10001 } => {
10002 assert_eq!(affected, 1);
10003 assert!(modified_catalog);
10004 }
10005 other => panic!("expected CommandOk, got {other:?}"),
10006 }
10007 assert!(e.publications().is_empty());
10008 }
10009
10010 #[test]
10011 fn publications_persist_across_snapshot_restore() {
10012 let mut e = Engine::new();
10017 e.execute("CREATE PUBLICATION pub_a").unwrap();
10018 e.execute("CREATE PUBLICATION pub_b FOR ALL TABLES").unwrap();
10019 let snap = e.snapshot();
10020 let e2 = Engine::restore_envelope(&snap).unwrap();
10021 assert_eq!(e2.publications().len(), 2);
10022 assert!(e2.publications().contains("pub_a"));
10023 assert!(e2.publications().contains("pub_b"));
10024 }
10025
10026 #[test]
10027 fn create_publication_allowed_inside_transaction() {
10028 let mut e = Engine::new();
10032 e.execute("BEGIN").unwrap();
10033 e.execute("CREATE PUBLICATION pub_a").unwrap();
10034 e.execute("COMMIT").unwrap();
10035 assert!(e.publications().contains("pub_a"));
10036 }
10037
10038 #[test]
10041 fn create_publication_for_table_list_lands_with_scope() {
10042 let mut e = Engine::new();
10043 e.execute("CREATE TABLE t1 (id INT NOT NULL)").unwrap();
10044 e.execute("CREATE TABLE t2 (id INT NOT NULL)").unwrap();
10045 e.execute("CREATE PUBLICATION pub_a FOR TABLE t1, t2")
10046 .unwrap();
10047 let scope = e.publications().get("pub_a").cloned();
10048 let Some(spg_sql::ast::PublicationScope::ForTables(ts)) = scope else {
10049 panic!("expected ForTables scope, got {scope:?}")
10050 };
10051 assert_eq!(ts, alloc::vec!["t1".to_string(), "t2".to_string()]);
10052 }
10053
10054 #[test]
10055 fn create_publication_all_tables_except_lands_with_scope() {
10056 let mut e = Engine::new();
10057 e.execute("CREATE PUBLICATION pub_a FOR ALL TABLES EXCEPT t3")
10058 .unwrap();
10059 let scope = e.publications().get("pub_a").cloned();
10060 let Some(spg_sql::ast::PublicationScope::AllTablesExcept(ts)) = scope else {
10061 panic!("expected AllTablesExcept scope, got {scope:?}")
10062 };
10063 assert_eq!(ts, alloc::vec!["t3".to_string()]);
10064 }
10065
10066 #[test]
10067 fn show_publications_empty_returns_zero_rows() {
10068 let e = Engine::new();
10069 let r = e.execute_readonly("SHOW PUBLICATIONS").unwrap();
10070 let QueryResult::Rows { rows, columns } = r else {
10071 panic!()
10072 };
10073 assert!(rows.is_empty());
10074 assert_eq!(columns.len(), 3);
10075 assert_eq!(columns[0].name, "name");
10076 assert_eq!(columns[1].name, "scope");
10077 assert_eq!(columns[2].name, "table_count");
10078 }
10079
10080 #[test]
10081 fn show_publications_returns_one_row_per_publication_ordered_by_name() {
10082 let mut e = Engine::new();
10083 e.execute("CREATE PUBLICATION z_pub").unwrap();
10084 e.execute("CREATE PUBLICATION a_pub FOR TABLE t1, t2")
10085 .unwrap();
10086 e.execute("CREATE PUBLICATION m_pub FOR ALL TABLES EXCEPT bad")
10087 .unwrap();
10088 let r = e.execute_readonly("SHOW PUBLICATIONS").unwrap();
10089 let QueryResult::Rows { rows, .. } = r else {
10090 panic!()
10091 };
10092 assert_eq!(rows.len(), 3);
10093 let names: Vec<&str> = rows
10095 .iter()
10096 .map(|r| {
10097 if let Value::Text(s) = &r.values[0] {
10098 s.as_str()
10099 } else {
10100 panic!()
10101 }
10102 })
10103 .collect();
10104 assert_eq!(names, alloc::vec!["a_pub", "m_pub", "z_pub"]);
10105 match &rows[0].values[1] {
10107 Value::Text(s) => assert_eq!(s, "FOR TABLE t1, t2"),
10108 other => panic!("expected Text, got {other:?}"),
10109 }
10110 assert_eq!(rows[0].values[2], Value::Int(2));
10111 match &rows[1].values[1] {
10113 Value::Text(s) => assert_eq!(s, "FOR ALL TABLES EXCEPT bad"),
10114 other => panic!("expected Text, got {other:?}"),
10115 }
10116 assert_eq!(rows[1].values[2], Value::Int(1));
10117 match &rows[2].values[1] {
10119 Value::Text(s) => assert_eq!(s, "FOR ALL TABLES"),
10120 other => panic!("expected Text, got {other:?}"),
10121 }
10122 assert_eq!(rows[2].values[2], Value::Null);
10123 }
10124
10125 #[test]
10126 fn for_list_scopes_persist_across_snapshot() {
10127 let mut e = Engine::new();
10130 e.execute("CREATE PUBLICATION p1 FOR TABLE t1, t2").unwrap();
10131 e.execute("CREATE PUBLICATION p2 FOR ALL TABLES EXCEPT bad, worse")
10132 .unwrap();
10133 let snap = e.snapshot();
10134 let e2 = Engine::restore_envelope(&snap).unwrap();
10135 assert_eq!(e2.publications().len(), 2);
10136 let p1 = e2.publications().get("p1").cloned();
10137 let Some(spg_sql::ast::PublicationScope::ForTables(ts)) = p1 else {
10138 panic!("p1 scope lost: {p1:?}")
10139 };
10140 assert_eq!(ts, alloc::vec!["t1".to_string(), "t2".to_string()]);
10141 let p2 = e2.publications().get("p2").cloned();
10142 let Some(spg_sql::ast::PublicationScope::AllTablesExcept(ts)) = p2 else {
10143 panic!("p2 scope lost: {p2:?}")
10144 };
10145 assert_eq!(ts, alloc::vec!["bad".to_string(), "worse".to_string()]);
10146 }
10147
10148 #[test]
10151 fn create_subscription_lands_in_catalog_with_defaults() {
10152 let mut e = Engine::new();
10153 e.execute(
10154 "CREATE SUBSCRIPTION sub_a CONNECTION 'host=127.0.0.1 port=20002' PUBLICATION pub_a",
10155 )
10156 .unwrap();
10157 let s = e.subscriptions().get("sub_a").cloned().expect("present");
10158 assert_eq!(s.conn_str, "host=127.0.0.1 port=20002");
10159 assert_eq!(s.publications, alloc::vec!["pub_a".to_string()]);
10160 assert!(s.enabled);
10161 assert_eq!(s.last_received_pos, 0);
10162 }
10163
10164 #[test]
10165 fn create_subscription_duplicate_name_errors() {
10166 let mut e = Engine::new();
10167 e.execute("CREATE SUBSCRIPTION s CONNECTION 'host=x' PUBLICATION p")
10168 .unwrap();
10169 let err = e
10170 .execute("CREATE SUBSCRIPTION s CONNECTION 'host=y' PUBLICATION p")
10171 .unwrap_err();
10172 assert!(
10173 alloc::format!("{err:?}").contains("DuplicateName"),
10174 "got {err:?}"
10175 );
10176 }
10177
10178 #[test]
10179 fn drop_subscription_silent_when_absent() {
10180 let mut e = Engine::new();
10181 let r = e.execute("DROP SUBSCRIPTION never").unwrap();
10182 match r {
10183 QueryResult::CommandOk { affected, .. } => assert_eq!(affected, 0),
10184 other => panic!("expected CommandOk, got {other:?}"),
10185 }
10186 }
10187
10188 #[test]
10189 fn subscription_advance_updates_last_pos_monotone() {
10190 let mut e = Engine::new();
10191 e.execute("CREATE SUBSCRIPTION s CONNECTION 'h=x' PUBLICATION p")
10192 .unwrap();
10193 assert!(e.subscription_advance("s", 100));
10194 assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 100);
10195 assert!(e.subscription_advance("s", 50)); assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 100);
10197 assert!(e.subscription_advance("s", 200));
10198 assert_eq!(e.subscriptions().get("s").unwrap().last_received_pos, 200);
10199 assert!(!e.subscription_advance("missing", 1));
10200 }
10201
10202 #[test]
10203 fn show_subscriptions_returns_rows_ordered_by_name() {
10204 let mut e = Engine::new();
10205 e.execute("CREATE SUBSCRIPTION z_sub CONNECTION 'h=x' PUBLICATION p1, p2")
10206 .unwrap();
10207 e.execute("CREATE SUBSCRIPTION a_sub CONNECTION 'h=y' PUBLICATION p3")
10208 .unwrap();
10209 let r = e.execute_readonly("SHOW SUBSCRIPTIONS").unwrap();
10210 let QueryResult::Rows { rows, columns } = r else {
10211 panic!()
10212 };
10213 assert_eq!(rows.len(), 2);
10214 assert_eq!(columns.len(), 5);
10215 assert_eq!(columns[0].name, "name");
10216 assert_eq!(columns[4].name, "last_received_pos");
10217 let names: Vec<&str> = rows
10219 .iter()
10220 .map(|r| {
10221 if let Value::Text(s) = &r.values[0] {
10222 s.as_str()
10223 } else {
10224 panic!()
10225 }
10226 })
10227 .collect();
10228 assert_eq!(names, alloc::vec!["a_sub", "z_sub"]);
10229 assert_eq!(rows[0].values[1], Value::Text("h=y".to_string()));
10231 assert_eq!(rows[0].values[2], Value::Text("p3".to_string()));
10232 assert_eq!(rows[0].values[3], Value::Bool(true));
10233 assert_eq!(rows[0].values[4], Value::BigInt(0));
10234 assert_eq!(rows[1].values[2], Value::Text("p1, p2".to_string()));
10236 }
10237
10238 #[test]
10239 fn subscriptions_persist_across_snapshot_envelope_v4() {
10240 let mut e = Engine::new();
10241 e.execute("CREATE SUBSCRIPTION s1 CONNECTION 'h=A' PUBLICATION p1, p2")
10242 .unwrap();
10243 e.execute("CREATE SUBSCRIPTION s2 CONNECTION 'h=B' PUBLICATION p3")
10244 .unwrap();
10245 e.subscription_advance("s2", 42);
10246 let snap = e.snapshot();
10247 let e2 = Engine::restore_envelope(&snap).unwrap();
10248 assert_eq!(e2.subscriptions().len(), 2);
10249 let s1 = e2.subscriptions().get("s1").unwrap();
10250 assert_eq!(s1.conn_str, "h=A");
10251 assert_eq!(s1.publications, alloc::vec!["p1".to_string(), "p2".to_string()]);
10252 assert_eq!(s1.last_received_pos, 0);
10253 let s2 = e2.subscriptions().get("s2").unwrap();
10254 assert_eq!(s2.last_received_pos, 42);
10255 }
10256
10257 #[test]
10258 fn v3_envelope_loads_with_empty_subscriptions() {
10259 let mut e = Engine::new();
10263 e.execute("CREATE PUBLICATION pub_legacy").unwrap();
10264 let catalog = e.catalog.serialize();
10265 let users = crate::users::serialize_users(&e.users);
10266 let pubs = e.publications.serialize();
10267 let mut buf = Vec::new();
10268 buf.extend_from_slice(b"SPGENV01");
10269 buf.push(3u8); buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
10271 buf.extend_from_slice(&catalog);
10272 buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
10273 buf.extend_from_slice(&users);
10274 buf.extend_from_slice(&u32::try_from(pubs.len()).unwrap().to_le_bytes());
10275 buf.extend_from_slice(&pubs);
10276 let crc = spg_crypto::crc32::crc32(&buf);
10277 buf.extend_from_slice(&crc.to_le_bytes());
10278
10279 let e2 = Engine::restore_envelope(&buf).expect("v3 envelope restores under v4 reader");
10280 assert!(e2.subscriptions().is_empty());
10281 assert!(e2.publications().contains("pub_legacy"));
10282 }
10283
10284 #[test]
10285 fn create_subscription_allowed_inside_transaction() {
10286 let mut e = Engine::new();
10287 e.execute("BEGIN").unwrap();
10288 e.execute("CREATE SUBSCRIPTION s CONNECTION 'h=x' PUBLICATION p")
10289 .unwrap();
10290 e.execute("COMMIT").unwrap();
10291 assert!(e.subscriptions().contains("s"));
10292 }
10293
10294 #[test]
10295 #[test]
10298 fn analyze_populates_histogram_bounds() {
10299 let mut e = Engine::new();
10300 e.execute("CREATE TABLE t (id INT NOT NULL, name TEXT)").unwrap();
10301 for i in 0..50 {
10302 e.execute(&alloc::format!(
10303 "INSERT INTO t VALUES ({i}, 'name{i}')"
10304 ))
10305 .unwrap();
10306 }
10307 e.execute("ANALYZE t").unwrap();
10308 let stats = e.statistics();
10309 let id_stats = stats.get("t", "id").unwrap();
10310 assert!(id_stats.histogram_bounds.len() >= 2);
10311 assert_eq!(id_stats.histogram_bounds.first().unwrap(), "0");
10312 assert_eq!(id_stats.histogram_bounds.last().unwrap(), "49");
10313 assert!((id_stats.null_frac - 0.0).abs() < 1e-6);
10314 assert_eq!(id_stats.n_distinct, 50);
10315 }
10316
10317 #[test]
10318 fn reanalyze_overwrites_prior_stats() {
10319 let mut e = Engine::new();
10320 e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
10321 for i in 0..10 {
10322 e.execute(&alloc::format!("INSERT INTO t VALUES ({i})")).unwrap();
10323 }
10324 e.execute("ANALYZE t").unwrap();
10325 let n1 = e.statistics().get("t", "id").unwrap().n_distinct;
10326 assert_eq!(n1, 10);
10327 for i in 10..30 {
10328 e.execute(&alloc::format!("INSERT INTO t VALUES ({i})")).unwrap();
10329 }
10330 e.execute("ANALYZE t").unwrap();
10331 let n2 = e.statistics().get("t", "id").unwrap().n_distinct;
10332 assert_eq!(n2, 30);
10333 }
10334
10335 #[test]
10336 fn analyze_unknown_table_errors() {
10337 let mut e = Engine::new();
10338 let err = e.execute("ANALYZE nonexistent").unwrap_err();
10339 assert!(matches!(err, EngineError::Storage(StorageError::TableNotFound { .. })));
10340 }
10341
10342 #[test]
10343 fn bare_analyze_covers_all_user_tables() {
10344 let mut e = Engine::new();
10345 e.execute("CREATE TABLE t1 (id INT NOT NULL)").unwrap();
10346 e.execute("CREATE TABLE t2 (name TEXT NOT NULL)").unwrap();
10347 e.execute("INSERT INTO t1 VALUES (1)").unwrap();
10348 e.execute("INSERT INTO t2 VALUES ('alice')").unwrap();
10349 let r = e.execute("ANALYZE").unwrap();
10350 match r {
10351 QueryResult::CommandOk { affected, modified_catalog } => {
10352 assert_eq!(affected, 2);
10353 assert!(modified_catalog);
10354 }
10355 other => panic!("expected CommandOk, got {other:?}"),
10356 }
10357 assert!(e.statistics().get("t1", "id").is_some());
10358 assert!(e.statistics().get("t2", "name").is_some());
10359 }
10360
10361 #[test]
10362 fn select_from_spg_statistic_returns_rows_per_column() {
10363 let mut e = Engine::new();
10364 e.execute("CREATE TABLE t (id INT NOT NULL, label TEXT)")
10365 .unwrap();
10366 e.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
10367 e.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
10368 e.execute("ANALYZE t").unwrap();
10369 let r = e.execute_readonly("SELECT * FROM spg_statistic").unwrap();
10370 let QueryResult::Rows { rows, columns } = r else {
10371 panic!()
10372 };
10373 assert_eq!(columns.len(), 6);
10375 assert_eq!(columns[0].name, "table_name");
10376 assert_eq!(columns[4].name, "histogram_bounds");
10377 assert_eq!(columns[5].name, "cold_row_count");
10378 assert_eq!(rows.len(), 2, "one row per column of t");
10379 match (&rows[0].values[0], &rows[0].values[1]) {
10381 (Value::Text(t), Value::Text(c)) => {
10382 assert_eq!(t, "t");
10383 assert_eq!(c, "id");
10385 }
10386 _ => panic!(),
10387 }
10388 }
10389
10390 #[test]
10391 fn analyze_skips_vector_columns() {
10392 let mut e = Engine::new();
10395 e.execute("CREATE TABLE t (id INT NOT NULL, v VECTOR(3) NOT NULL)")
10396 .unwrap();
10397 e.execute("INSERT INTO t VALUES (1, [1, 2, 3])").unwrap();
10398 e.execute("ANALYZE t").unwrap();
10399 assert!(e.statistics().get("t", "id").is_some());
10400 assert!(e.statistics().get("t", "v").is_none());
10401 }
10402
10403 #[test]
10404 fn statistics_persist_across_envelope_v5_round_trip() {
10405 let mut e = Engine::new();
10406 e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
10407 for i in 0..20 {
10408 e.execute(&alloc::format!("INSERT INTO t VALUES ({i})")).unwrap();
10409 }
10410 e.execute("ANALYZE").unwrap();
10411 let snap = e.snapshot();
10412 let e2 = Engine::restore_envelope(&snap).unwrap();
10413 let s = e2.statistics().get("t", "id").unwrap();
10414 assert_eq!(s.n_distinct, 20);
10415 }
10416
10417 #[test]
10420 fn auto_analyze_threshold_fires_after_10pct_of_min_rows_on_small_table() {
10421 let mut e = Engine::new();
10425 e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
10426 for i in 0..9 {
10427 e.execute(&alloc::format!("INSERT INTO t VALUES ({i})")).unwrap();
10428 }
10429 assert!(e.tables_needing_analyze().is_empty(), "9 < threshold");
10430 e.execute("INSERT INTO t VALUES (9)").unwrap();
10431 let needs = e.tables_needing_analyze();
10432 assert_eq!(needs, alloc::vec!["t".to_string()]);
10433 }
10434
10435 #[test]
10436 fn auto_analyze_threshold_uses_10pct_of_row_count_for_large_tables() {
10437 let mut e = Engine::new();
10443 e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
10444 for i in 0..1000 {
10445 e.execute(&alloc::format!("INSERT INTO t VALUES ({i})")).unwrap();
10446 }
10447 e.execute("ANALYZE t").unwrap();
10448 assert!(e.tables_needing_analyze().is_empty(), "fresh ANALYZE");
10449 for i in 1000..1050 {
10450 e.execute(&alloc::format!("INSERT INTO t VALUES ({i})")).unwrap();
10451 }
10452 assert!(
10453 e.tables_needing_analyze().is_empty(),
10454 "50 inserts < threshold of ~105"
10455 );
10456 for i in 1050..1200 {
10457 e.execute(&alloc::format!("INSERT INTO t VALUES ({i})")).unwrap();
10458 }
10459 assert_eq!(
10460 e.tables_needing_analyze(),
10461 alloc::vec!["t".to_string()],
10462 "200 inserts > 0.1 × 1200 threshold"
10463 );
10464 }
10465
10466 #[test]
10467 fn auto_analyze_threshold_resets_after_analyze() {
10468 let mut e = Engine::new();
10469 e.execute("CREATE TABLE t (id INT NOT NULL)").unwrap();
10470 for i in 0..200 {
10471 e.execute(&alloc::format!("INSERT INTO t VALUES ({i})")).unwrap();
10472 }
10473 assert!(!e.tables_needing_analyze().is_empty());
10474 e.execute("ANALYZE").unwrap();
10475 assert!(
10476 e.tables_needing_analyze().is_empty(),
10477 "ANALYZE must reset the counter"
10478 );
10479 }
10480
10481 #[test]
10482 fn auto_analyze_threshold_tracks_updates_and_deletes() {
10483 let mut e = Engine::new();
10484 e.execute("CREATE TABLE t (id INT NOT NULL, label TEXT)").unwrap();
10485 for i in 0..50 {
10486 e.execute(&alloc::format!("INSERT INTO t VALUES ({i}, 'x')"))
10487 .unwrap();
10488 }
10489 e.execute("ANALYZE t").unwrap();
10490 e.execute("UPDATE t SET label = 'y' WHERE id < 20").unwrap();
10493 e.execute("DELETE FROM t WHERE id >= 45").unwrap();
10494 assert_eq!(
10495 e.tables_needing_analyze(),
10496 alloc::vec!["t".to_string()]
10497 );
10498 }
10499
10500 #[test]
10501 fn v4_envelope_loads_with_empty_statistics() {
10502 let mut e = Engine::new();
10506 e.create_user("alice", "secret", crate::users::Role::ReadOnly, [0u8; 16])
10507 .unwrap();
10508 let catalog = e.catalog.serialize();
10509 let users = crate::users::serialize_users(&e.users);
10510 let pubs = e.publications.serialize();
10511 let subs = e.subscriptions.serialize();
10512 let mut buf = Vec::new();
10513 buf.extend_from_slice(b"SPGENV01");
10514 buf.push(4u8);
10515 buf.extend_from_slice(&u32::try_from(catalog.len()).unwrap().to_le_bytes());
10516 buf.extend_from_slice(&catalog);
10517 buf.extend_from_slice(&u32::try_from(users.len()).unwrap().to_le_bytes());
10518 buf.extend_from_slice(&users);
10519 buf.extend_from_slice(&u32::try_from(pubs.len()).unwrap().to_le_bytes());
10520 buf.extend_from_slice(&pubs);
10521 buf.extend_from_slice(&u32::try_from(subs.len()).unwrap().to_le_bytes());
10522 buf.extend_from_slice(&subs);
10523 let crc = spg_crypto::crc32::crc32(&buf);
10524 buf.extend_from_slice(&crc.to_le_bytes());
10525 let e2 = Engine::restore_envelope(&buf).expect("v4 envelope restores");
10526 assert!(e2.statistics().is_empty());
10527 }
10528
10529 #[test]
10530 fn v1_v2_envelope_loads_with_empty_publications() {
10531 let mut e = Engine::new();
10538 e.create_user(
10541 "alice",
10542 "secret",
10543 crate::users::Role::ReadOnly,
10544 [0u8; 16],
10545 )
10546 .unwrap();
10547
10548 let catalog = e.catalog.serialize();
10550 let users = crate::users::serialize_users(&e.users);
10551 let mut buf = Vec::new();
10552 buf.extend_from_slice(b"SPGENV01");
10553 buf.push(2u8); buf.extend_from_slice(
10555 &u32::try_from(catalog.len()).unwrap().to_le_bytes(),
10556 );
10557 buf.extend_from_slice(&catalog);
10558 buf.extend_from_slice(
10559 &u32::try_from(users.len()).unwrap().to_le_bytes(),
10560 );
10561 buf.extend_from_slice(&users);
10562 let crc = spg_crypto::crc32::crc32(&buf);
10563 buf.extend_from_slice(&crc.to_le_bytes());
10564
10565 let e2 = Engine::restore_envelope(&buf).expect("v2 envelope restores");
10566 assert!(e2.publications().is_empty());
10567 }
10568}