1use super::*;
2use crate::application::entity::metadata_to_json;
3use crate::auth::column_policy_gate::ColumnAccessRequest;
4use crate::auth::UserId;
5use crate::replication::cdc::ChangeRecord;
6use crate::replication::logical::{ApplyMode, LogicalChangeApplier};
7use crate::storage::query::ast::TableSource;
8
9thread_local! {
10 static CURRENT_CONN_ID: std::cell::Cell<u64> = const { std::cell::Cell::new(0) };
14
15 static CURRENT_AUTH_IDENTITY: std::cell::RefCell<Option<(String, crate::auth::Role)>> =
23 const { std::cell::RefCell::new(None) };
24
25 static CURRENT_SNAPSHOT: std::cell::RefCell<Option<SnapshotContext>> =
35 const { std::cell::RefCell::new(None) };
36
37 static HAS_SNAPSHOT: std::cell::Cell<bool> = const { std::cell::Cell::new(false) };
43
44 static CURRENT_TENANT_ID: std::cell::RefCell<Option<String>> =
54 const { std::cell::RefCell::new(None) };
55
56 static CURRENT_CONFIG_RESOLVER: std::cell::RefCell<Option<ConfigResolver>> =
60 const { std::cell::RefCell::new(None) };
61
62 static CURRENT_SECRET_RESOLVER: std::cell::RefCell<Option<SecretResolver>> =
66 const { std::cell::RefCell::new(None) };
67}
68
69fn secret_sql_value_to_string(value: &Value) -> RedDBResult<String> {
70 match value {
71 Value::Text(s) => Ok(s.to_string()),
72 Value::Integer(n) => Ok(n.to_string()),
73 Value::UnsignedInteger(n) => Ok(n.to_string()),
74 Value::Float(n) => Ok(n.to_string()),
75 Value::Boolean(b) => Ok(b.to_string()),
76 Value::Null => Err(RedDBError::Query(
77 "SET SECRET key = NULL deletes the secret; use DELETE SECRET for explicit deletes"
78 .to_string(),
79 )),
80 Value::Password(_) | Value::Secret(_) => Err(RedDBError::Query(
81 "SET SECRET accepts plain scalar literals; PASSWORD() and SECRET() are for typed columns"
82 .to_string(),
83 )),
84 _ => Err(RedDBError::Query(format!(
85 "SET SECRET does not support value type {:?} yet",
86 value.data_type()
87 ))),
88 }
89}
90
91fn view_records_to_entities(
100 table: &str,
101 records: &[crate::storage::query::unified::UnifiedRecord],
102) -> Vec<crate::storage::UnifiedEntity> {
103 use std::collections::HashMap;
104 let table_arc: std::sync::Arc<str> = std::sync::Arc::from(table);
105 let mut out = Vec::with_capacity(records.len());
106 for record in records {
107 let mut named: HashMap<String, crate::storage::schema::Value> = HashMap::new();
108 for (name, value) in record.iter_fields() {
109 named.insert(name.to_string(), value.clone());
110 }
111 let entity = crate::storage::UnifiedEntity::new(
112 crate::storage::EntityId::new(0),
113 crate::storage::EntityKind::TableRow {
114 table: std::sync::Arc::clone(&table_arc),
115 row_id: 0,
116 },
117 crate::storage::EntityData::Row(crate::storage::RowData {
118 columns: Vec::new(),
119 named: Some(named),
120 schema: None,
121 }),
122 );
123 out.push(entity);
124 }
125 out
126}
127
128fn system_keyed_collection_contract(
129 name: &str,
130 model: crate::catalog::CollectionModel,
131) -> crate::physical::CollectionContract {
132 let now = crate::utils::now_unix_millis() as u128;
133 crate::physical::CollectionContract {
134 name: name.to_string(),
135 declared_model: model,
136 schema_mode: crate::catalog::SchemaMode::Dynamic,
137 origin: crate::physical::ContractOrigin::Implicit,
138 version: 1,
139 created_at_unix_ms: now,
140 updated_at_unix_ms: now,
141 default_ttl_ms: None,
142 vector_dimension: None,
143 vector_metric: None,
144 context_index_fields: Vec::new(),
145 declared_columns: Vec::new(),
146 table_def: None,
147 timestamps_enabled: false,
148 context_index_enabled: false,
149 metrics_raw_retention_ms: None,
150 metrics_rollup_policies: Vec::new(),
151 metrics_tenant_identity: None,
152 metrics_namespace: None,
153 append_only: false,
154 subscriptions: Vec::new(),
155 session_key: None,
156 session_gap_ms: None,
157 retention_duration_ms: None,
158 }
159}
160
161#[derive(Clone)]
176pub struct SnapshotContext {
177 pub snapshot: crate::storage::transaction::snapshot::Snapshot,
178 pub manager: Arc<crate::storage::transaction::snapshot::SnapshotManager>,
179 pub own_xids: std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
180 pub requires_index_fallback: bool,
181}
182
183pub fn set_current_connection_id(id: u64) {
192 CURRENT_CONN_ID.with(|c| c.set(id));
193}
194
195pub fn clear_current_connection_id() {
197 CURRENT_CONN_ID.with(|c| c.set(0));
198}
199
200pub fn current_connection_id() -> u64 {
203 CURRENT_CONN_ID.with(|c| c.get())
204}
205
206pub fn set_current_auth_identity(username: String, role: crate::auth::Role) {
210 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = Some((username, role)));
211}
212
213pub fn clear_current_auth_identity() {
217 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = None);
218}
219
220pub(crate) fn current_auth_identity() -> Option<(String, crate::auth::Role)> {
223 CURRENT_AUTH_IDENTITY.with(|cell| cell.borrow().clone())
224}
225
226pub fn set_current_tenant(tenant_id: String) {
231 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = Some(tenant_id));
232}
233
234pub fn clear_current_tenant() {
237 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = None);
238}
239
240pub fn current_tenant() -> Option<String> {
251 let inherited = CURRENT_TENANT_ID.with(|cell| cell.borrow().clone());
252 if let Some(over) = current_scope_override() {
253 if over.tenant.is_active() {
254 return over.tenant.resolve(inherited);
255 }
256 }
257 if let Some(tx_local) = current_tx_local_tenant() {
258 return tx_local;
259 }
260 inherited
261}
262
263thread_local! {
264 static TX_LOCAL_TENANT: std::cell::RefCell<Option<Option<String>>> =
273 const { std::cell::RefCell::new(None) };
274}
275
276fn current_tx_local_tenant() -> Option<Option<String>> {
277 TX_LOCAL_TENANT.with(|cell| cell.borrow().clone())
278}
279
280fn parse_set_local_tenant(query: &str) -> RedDBResult<Option<Option<String>>> {
286 let mut tokens = query.split_ascii_whitespace();
287 let Some(w1) = tokens.next() else {
288 return Ok(None);
289 };
290 if !w1.eq_ignore_ascii_case("SET") {
291 return Ok(None);
292 }
293 let Some(w2) = tokens.next() else {
294 return Ok(None);
295 };
296 if !w2.eq_ignore_ascii_case("LOCAL") {
297 return Ok(None);
298 }
299 let Some(w3) = tokens.next() else {
300 return Ok(None);
301 };
302 if !w3.eq_ignore_ascii_case("TENANT") {
303 return Ok(None);
304 }
305 let rest: String = tokens.collect::<Vec<_>>().join(" ");
306 let rest = rest.trim().trim_end_matches(';').trim();
307 let value_str = rest.strip_prefix('=').map(|s| s.trim()).unwrap_or(rest);
308 if value_str.is_empty() {
309 return Err(RedDBError::Query(
310 "SET LOCAL TENANT expects a string literal or NULL".to_string(),
311 ));
312 }
313 if value_str.eq_ignore_ascii_case("NULL") {
314 return Ok(Some(None));
315 }
316 if value_str.starts_with('\'') && value_str.ends_with('\'') && value_str.len() >= 2 {
317 let inner = &value_str[1..value_str.len() - 1];
318 return Ok(Some(Some(inner.to_string())));
319 }
320 Err(RedDBError::Query(format!(
321 "SET LOCAL TENANT expects a string literal or NULL, got `{value_str}`"
322 )))
323}
324
325pub(crate) struct TxLocalTenantGuard;
326
327impl TxLocalTenantGuard {
328 pub fn install(value: Option<Option<String>>) -> Self {
329 TX_LOCAL_TENANT.with(|cell| *cell.borrow_mut() = value);
330 Self
331 }
332}
333
334impl Drop for TxLocalTenantGuard {
335 fn drop(&mut self) {
336 TX_LOCAL_TENANT.with(|cell| *cell.borrow_mut() = None);
337 }
338}
339
340thread_local! {
341 static SCOPE_OVERRIDES: std::cell::RefCell<Vec<crate::runtime::within_clause::ScopeOverride>> =
348 const { std::cell::RefCell::new(Vec::new()) };
349}
350
351pub(crate) fn push_scope_override(over: crate::runtime::within_clause::ScopeOverride) {
352 SCOPE_OVERRIDES.with(|cell| cell.borrow_mut().push(over));
353}
354
355pub(crate) fn pop_scope_override() {
356 SCOPE_OVERRIDES.with(|cell| {
357 cell.borrow_mut().pop();
358 });
359}
360
361pub(crate) fn current_scope_override() -> Option<crate::runtime::within_clause::ScopeOverride> {
362 SCOPE_OVERRIDES.with(|cell| cell.borrow().last().cloned())
363}
364
365pub(crate) fn has_scope_override_active() -> bool {
369 SCOPE_OVERRIDES.with(|cell| !cell.borrow().is_empty())
370}
371
372pub(crate) struct ScopeOverrideGuard;
376
377impl ScopeOverrideGuard {
378 pub fn install(over: crate::runtime::within_clause::ScopeOverride) -> Self {
379 push_scope_override(over);
380 Self
381 }
382}
383
384impl Drop for ScopeOverrideGuard {
385 fn drop(&mut self) {
386 pop_scope_override();
387 }
388}
389
390pub(crate) fn current_user_projected() -> Option<String> {
396 let inherited = current_auth_identity().map(|(u, _)| u);
397 if let Some(over) = current_scope_override() {
398 if over.user.is_active() {
399 return over.user.resolve(inherited);
400 }
401 }
402 inherited
403}
404
405pub(crate) fn current_role_projected() -> Option<String> {
406 let inherited = current_auth_identity().map(|(_, r)| format!("{r:?}").to_lowercase());
407 if let Some(over) = current_scope_override() {
408 if over.role.is_active() {
409 return over.role.resolve(inherited);
410 }
411 }
412 inherited
413}
414
415pub(crate) fn current_secret_value(path: &str) -> Option<String> {
416 let key = path.to_ascii_lowercase();
417 CURRENT_SECRET_RESOLVER.with(|cell| {
418 let mut resolver = cell.borrow_mut();
419 let resolver = resolver.as_mut()?;
420 if resolver.values.is_none() {
421 resolver.values = resolver
422 .store
423 .as_ref()
424 .map(|store| store.vault_kv_snapshot());
425 }
426 let values = resolver.values.as_ref()?;
427 values.get(&key).cloned().or_else(|| {
428 key.strip_prefix("red.vault/").and_then(|rest| {
429 values
430 .get(rest)
431 .cloned()
432 .or_else(|| values.get(&format!("red.secret.{rest}")).cloned())
433 })
434 })
435 })
436}
437
438struct SecretResolver {
439 store: Option<Arc<crate::auth::store::AuthStore>>,
440 values: Option<HashMap<String, String>>,
441}
442
443pub(super) struct SecretStoreGuard {
444 previous: Option<SecretResolver>,
445}
446
447impl SecretStoreGuard {
448 pub(super) fn install(store: Option<Arc<crate::auth::store::AuthStore>>) -> Self {
449 let previous = CURRENT_SECRET_RESOLVER.with(|cell| {
450 cell.replace(Some(SecretResolver {
451 store,
452 values: None,
453 }))
454 });
455 Self { previous }
456 }
457}
458
459impl Drop for SecretStoreGuard {
460 fn drop(&mut self) {
461 let previous = self.previous.take();
462 CURRENT_SECRET_RESOLVER.with(|cell| {
463 cell.replace(previous);
464 });
465 }
466}
467
468pub(crate) fn current_config_value(path: &str) -> Option<Value> {
469 let key = path.to_ascii_lowercase();
470 CURRENT_CONFIG_RESOLVER.with(|cell| {
471 let mut resolver = cell.borrow_mut();
472 let resolver = resolver.as_mut()?;
473 if resolver.values.is_none() {
474 resolver.values = Some(latest_config_snapshot(&resolver.db));
475 }
476 let values = resolver.values.as_ref()?;
477 values.get(&key).cloned().or_else(|| {
478 key.strip_prefix("red.config/")
479 .and_then(|rest| values.get(&format!("red.config.{rest}")).cloned())
480 })
481 })
482}
483
484fn update_current_config_value(path: &str, value: Value) {
485 let key = path.to_ascii_lowercase();
486 CURRENT_CONFIG_RESOLVER.with(|cell| {
487 if let Some(resolver) = cell.borrow_mut().as_mut() {
488 if let Some(values) = resolver.values.as_mut() {
489 values.insert(key, value);
490 }
491 }
492 });
493}
494
495fn update_current_secret_value(path: &str, value: Option<String>) {
496 let key = path.to_ascii_lowercase();
497 CURRENT_SECRET_RESOLVER.with(|cell| {
498 if let Some(resolver) = cell.borrow_mut().as_mut() {
499 let Some(values) = resolver.values.as_mut() else {
500 return;
501 };
502 match value {
503 Some(value) => {
504 values.insert(key, value);
505 }
506 None => {
507 values.remove(&key);
508 }
509 }
510 }
511 });
512}
513
514fn latest_config_snapshot(db: &RedDB) -> HashMap<String, Value> {
515 let mut latest: HashMap<String, (u64, Value)> = HashMap::new();
516
517 if let Some(manager) = db.store().get_collection("red_config") {
518 manager.for_each_entity(|entity| {
519 let Some(row) = entity.data.as_row() else {
520 return true;
521 };
522 let Some(Value::Text(key)) = row.get_field("key") else {
523 return true;
524 };
525 let value = row.get_field("value").cloned().unwrap_or(Value::Null);
526 let id = entity.id.raw();
527 let key = key.to_ascii_lowercase();
528 insert_latest_config_value(&mut latest, key.clone(), id, value.clone());
529 if let Some(rest) = key.strip_prefix("red.config.") {
530 insert_latest_config_value(&mut latest, format!("red.config/{rest}"), id, value);
531 }
532 true
533 });
534 }
535
536 if let Some(manager) = db.store().get_collection("red.config") {
537 manager.for_each_entity(|entity| {
538 let Some(row) = entity.data.as_row() else {
539 return true;
540 };
541 if matches!(row.get_field("tombstone"), Some(Value::Boolean(true))) {
542 return true;
543 }
544 let Some(Value::Text(key)) = row.get_field("key") else {
545 return true;
546 };
547 let value = row.get_field("value").cloned().unwrap_or(Value::Null);
548 insert_latest_config_value(
549 &mut latest,
550 format!("red.config/{}", key.to_ascii_lowercase()),
551 entity.id.raw(),
552 value,
553 );
554 true
555 });
556 }
557
558 latest
559 .into_iter()
560 .map(|(key, (_, value))| (key, value))
561 .collect()
562}
563
564fn insert_latest_config_value(
565 latest: &mut HashMap<String, (u64, Value)>,
566 key: String,
567 id: u64,
568 value: Value,
569) {
570 match latest.get(&key) {
571 Some((prev_id, _)) if *prev_id > id => {}
572 _ => {
573 latest.insert(key, (id, value));
574 }
575 }
576}
577
578struct ConfigResolver {
579 db: Arc<RedDB>,
580 values: Option<HashMap<String, Value>>,
581}
582
583pub(super) struct ConfigSnapshotGuard {
584 previous: Option<ConfigResolver>,
585}
586
587impl ConfigSnapshotGuard {
588 pub(super) fn install(db: Arc<RedDB>) -> Self {
589 let previous = CURRENT_CONFIG_RESOLVER
590 .with(|cell| cell.replace(Some(ConfigResolver { db, values: None })));
591 Self { previous }
592 }
593}
594
595impl Drop for ConfigSnapshotGuard {
596 fn drop(&mut self) {
597 let previous = self.previous.take();
598 CURRENT_CONFIG_RESOLVER.with(|cell| {
599 cell.replace(previous);
600 });
601 }
602}
603
604pub fn set_current_snapshot(ctx: SnapshotContext) {
609 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = Some(ctx));
610 HAS_SNAPSHOT.with(|c| c.set(true));
611}
612
613pub fn clear_current_snapshot() {
614 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = None);
615 HAS_SNAPSHOT.with(|c| c.set(false));
616}
617
618pub(crate) struct CurrentSnapshotGuard {
624 previous: Option<SnapshotContext>,
625}
626
627impl CurrentSnapshotGuard {
628 pub(crate) fn install(ctx: SnapshotContext) -> Self {
629 let previous = CURRENT_SNAPSHOT.with(|cell| cell.borrow().clone());
630 set_current_snapshot(ctx);
631 Self { previous }
632 }
633}
634
635impl Drop for CurrentSnapshotGuard {
636 fn drop(&mut self) {
637 let prev = self.previous.take();
638 let has = prev.is_some();
639 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = prev);
640 HAS_SNAPSHOT.with(|c| c.set(has));
641 }
642}
643
644#[inline]
655pub fn entity_visible_under_current_snapshot(
656 entity: &crate::storage::unified::entity::UnifiedEntity,
657) -> bool {
658 if !HAS_SNAPSHOT.with(|c| c.get()) {
664 return entity.xmax == 0;
665 }
666 CURRENT_SNAPSHOT.with(|cell| {
667 let guard = cell.borrow();
668 let Some(ctx) = guard.as_ref() else {
669 return true;
670 };
671 visibility_check(ctx, entity.xmin, entity.xmax)
672 })
673}
674
675#[inline]
680pub(crate) fn xids_visible_under_current_snapshot(xmin: u64, xmax: u64) -> bool {
681 if !HAS_SNAPSHOT.with(|c| c.get()) {
682 return true;
683 }
684 CURRENT_SNAPSHOT.with(|cell| {
685 let guard = cell.borrow();
686 let Some(ctx) = guard.as_ref() else {
687 return true;
688 };
689 visibility_check(ctx, xmin, xmax)
690 })
691}
692
693pub fn capture_current_snapshot() -> Option<SnapshotContext> {
700 CURRENT_SNAPSHOT.with(|cell| cell.borrow().clone())
701}
702
703pub(crate) fn current_snapshot_requires_index_fallback() -> bool {
708 if !HAS_SNAPSHOT.with(|c| c.get()) {
709 return false;
710 }
711 CURRENT_SNAPSHOT.with(|cell| {
712 cell.borrow()
713 .as_ref()
714 .is_some_and(|ctx| ctx.requires_index_fallback)
715 })
716}
717
718#[derive(Clone, Default)]
733pub struct SnapshotBundle {
734 pub snapshot: Option<SnapshotContext>,
735 pub auth: Option<(String, crate::auth::Role)>,
736 pub tenant: Option<String>,
737}
738
739pub fn snapshot_bundle() -> SnapshotBundle {
742 SnapshotBundle {
743 snapshot: capture_current_snapshot(),
744 auth: current_auth_identity(),
745 tenant: CURRENT_TENANT_ID.with(|cell| cell.borrow().clone()),
746 }
747}
748
749pub fn with_snapshot_bundle<R>(bundle: &SnapshotBundle, f: impl FnOnce() -> R) -> R {
754 struct Guard {
755 prev_snapshot: Option<SnapshotContext>,
756 prev_auth: Option<(String, crate::auth::Role)>,
757 prev_tenant: Option<String>,
758 }
759 impl Drop for Guard {
760 fn drop(&mut self) {
761 let snap = self.prev_snapshot.take();
762 let has = snap.is_some();
763 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = snap);
764 HAS_SNAPSHOT.with(|c| c.set(has));
765 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = self.prev_auth.take());
766 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = self.prev_tenant.take());
767 }
768 }
769
770 let _guard = {
771 let prev_snapshot = CURRENT_SNAPSHOT.with(|cell| cell.borrow().clone());
772 let prev_auth = CURRENT_AUTH_IDENTITY.with(|cell| cell.borrow().clone());
773 let prev_tenant = CURRENT_TENANT_ID.with(|cell| cell.borrow().clone());
774
775 match bundle.snapshot.clone() {
776 Some(ctx) => set_current_snapshot(ctx),
777 None => clear_current_snapshot(),
778 }
779 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = bundle.auth.clone());
780 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = bundle.tenant.clone());
781
782 Guard {
783 prev_snapshot,
784 prev_auth,
785 prev_tenant,
786 }
787 };
788 f()
789}
790
791#[inline]
795pub fn entity_visible_with_context(
796 ctx: Option<&SnapshotContext>,
797 entity: &crate::storage::unified::entity::UnifiedEntity,
798) -> bool {
799 match ctx {
800 Some(ctx) => visibility_check(ctx, entity.xmin, entity.xmax),
801 None => true,
802 }
803}
804
805fn table_row_index_fields(
806 entity: &crate::storage::unified::entity::UnifiedEntity,
807) -> Vec<(String, crate::storage::schema::Value)> {
808 let crate::storage::EntityData::Row(row) = &entity.data else {
809 return Vec::new();
810 };
811 if let Some(named) = &row.named {
812 return named
813 .iter()
814 .map(|(name, value)| (name.clone(), value.clone()))
815 .collect();
816 }
817 if let Some(schema) = &row.schema {
818 return schema
819 .iter()
820 .zip(row.columns.iter())
821 .map(|(name, value)| (name.clone(), value.clone()))
822 .collect();
823 }
824 Vec::new()
825}
826
827#[inline]
828fn visibility_check(ctx: &SnapshotContext, xmin: u64, xmax: u64) -> bool {
829 if xmin != 0 && ctx.manager.is_aborted(xmin) {
833 return false;
834 }
835 let effective_xmax = if xmax != 0 && ctx.manager.is_aborted(xmax) {
837 0
838 } else {
839 xmax
840 };
841 let own_xmin = xmin != 0 && ctx.own_xids.contains(&xmin);
845 let own_xmax = effective_xmax != 0 && ctx.own_xids.contains(&effective_xmax);
846 if own_xmax {
847 return false;
849 }
850 if own_xmin {
851 return true;
852 }
853 ctx.snapshot.sees(xmin, effective_xmax)
854}
855
856fn runtime_pool_lock(runtime: &RedDBRuntime) -> std::sync::MutexGuard<'_, PoolState> {
857 runtime
858 .inner
859 .pool
860 .lock()
861 .unwrap_or_else(|poisoned| poisoned.into_inner())
862}
863
864fn cache_scope_insert(scopes: &mut HashSet<String>, name: &str) {
865 if name.is_empty() || name.starts_with("__subq_") || is_universal_query_source(name) {
866 return;
867 }
868 scopes.insert(name.to_string());
869}
870
871fn collect_table_source_scopes(scopes: &mut HashSet<String>, query: &TableQuery) {
872 match query.source.as_ref() {
873 Some(crate::storage::query::ast::TableSource::Name(name)) => {
874 cache_scope_insert(scopes, name)
875 }
876 Some(crate::storage::query::ast::TableSource::Subquery(subquery)) => {
877 collect_query_expr_result_cache_scopes(scopes, subquery);
878 }
879 None => cache_scope_insert(scopes, &query.table),
880 }
881}
882
883fn collect_vector_source_scopes(
884 scopes: &mut HashSet<String>,
885 source: &crate::storage::query::ast::VectorSource,
886) {
887 match source {
888 crate::storage::query::ast::VectorSource::Reference { collection, .. } => {
889 cache_scope_insert(scopes, collection);
890 }
891 crate::storage::query::ast::VectorSource::Subquery(subquery) => {
892 collect_query_expr_result_cache_scopes(scopes, subquery);
893 }
894 crate::storage::query::ast::VectorSource::Literal(_)
895 | crate::storage::query::ast::VectorSource::Text(_) => {}
896 }
897}
898
899fn collect_path_selector_scopes(
900 scopes: &mut HashSet<String>,
901 selector: &crate::storage::query::ast::NodeSelector,
902) {
903 if let crate::storage::query::ast::NodeSelector::ByRow { table, .. } = selector {
904 cache_scope_insert(scopes, table);
905 }
906}
907
908fn collect_query_expr_result_cache_scopes(scopes: &mut HashSet<String>, expr: &QueryExpr) {
909 match expr {
910 QueryExpr::Table(query) => collect_table_source_scopes(scopes, query),
911 QueryExpr::Join(query) => {
912 collect_query_expr_result_cache_scopes(scopes, &query.left);
913 collect_query_expr_result_cache_scopes(scopes, &query.right);
914 }
915 QueryExpr::Path(query) => {
916 collect_path_selector_scopes(scopes, &query.from);
917 collect_path_selector_scopes(scopes, &query.to);
918 }
919 QueryExpr::Vector(query) => {
920 cache_scope_insert(scopes, &query.collection);
921 collect_vector_source_scopes(scopes, &query.query_vector);
922 }
923 QueryExpr::Hybrid(query) => {
924 collect_query_expr_result_cache_scopes(scopes, &query.structured);
925 cache_scope_insert(scopes, &query.vector.collection);
926 collect_vector_source_scopes(scopes, &query.vector.query_vector);
927 }
928 QueryExpr::Insert(query) => cache_scope_insert(scopes, &query.table),
929 QueryExpr::Update(query) => cache_scope_insert(scopes, &query.table),
930 QueryExpr::Delete(query) => cache_scope_insert(scopes, &query.table),
931 QueryExpr::CreateTable(query) => cache_scope_insert(scopes, &query.name),
932 QueryExpr::CreateCollection(query) => cache_scope_insert(scopes, &query.name),
933 QueryExpr::CreateVector(query) => cache_scope_insert(scopes, &query.name),
934 QueryExpr::DropTable(query) => cache_scope_insert(scopes, &query.name),
935 QueryExpr::DropGraph(query) => cache_scope_insert(scopes, &query.name),
936 QueryExpr::DropVector(query) => cache_scope_insert(scopes, &query.name),
937 QueryExpr::DropDocument(query) => cache_scope_insert(scopes, &query.name),
938 QueryExpr::DropKv(query) => cache_scope_insert(scopes, &query.name),
939 QueryExpr::DropCollection(query) => cache_scope_insert(scopes, &query.name),
940 QueryExpr::Truncate(query) => cache_scope_insert(scopes, &query.name),
941 QueryExpr::AlterTable(query) => cache_scope_insert(scopes, &query.name),
942 QueryExpr::CreateIndex(query) => cache_scope_insert(scopes, &query.table),
943 QueryExpr::DropIndex(query) => cache_scope_insert(scopes, &query.table),
944 QueryExpr::CreateTimeSeries(query) => cache_scope_insert(scopes, &query.name),
945 QueryExpr::DropTimeSeries(query) => cache_scope_insert(scopes, &query.name),
946 QueryExpr::CreateQueue(query) => cache_scope_insert(scopes, &query.name),
947 QueryExpr::AlterQueue(query) => cache_scope_insert(scopes, &query.name),
948 QueryExpr::DropQueue(query) => cache_scope_insert(scopes, &query.name),
949 QueryExpr::QueueSelect(query) => cache_scope_insert(scopes, &query.queue),
950 QueryExpr::QueueCommand(query) => match query {
951 QueueCommand::Push { queue, .. }
952 | QueueCommand::Pop { queue, .. }
953 | QueueCommand::Peek { queue, .. }
954 | QueueCommand::Len { queue }
955 | QueueCommand::Purge { queue }
956 | QueueCommand::GroupCreate { queue, .. }
957 | QueueCommand::GroupRead { queue, .. }
958 | QueueCommand::Pending { queue, .. }
959 | QueueCommand::Claim { queue, .. }
960 | QueueCommand::Ack { queue, .. }
961 | QueueCommand::Nack { queue, .. } => cache_scope_insert(scopes, queue),
962 QueueCommand::Move {
963 source,
964 destination,
965 ..
966 } => {
967 cache_scope_insert(scopes, source);
968 cache_scope_insert(scopes, destination);
969 }
970 },
971 QueryExpr::EventsBackfill(query) => {
972 cache_scope_insert(scopes, &query.collection);
973 cache_scope_insert(scopes, &query.target_queue);
974 }
975 QueryExpr::CreateTree(query) => cache_scope_insert(scopes, &query.collection),
976 QueryExpr::DropTree(query) => cache_scope_insert(scopes, &query.collection),
977 QueryExpr::TreeCommand(query) => match query {
978 TreeCommand::Insert { collection, .. }
979 | TreeCommand::Move { collection, .. }
980 | TreeCommand::Delete { collection, .. }
981 | TreeCommand::Validate { collection, .. }
982 | TreeCommand::Rebalance { collection, .. } => cache_scope_insert(scopes, collection),
983 },
984 QueryExpr::SearchCommand(query) => match query {
985 SearchCommand::Similar { collection, .. }
986 | SearchCommand::Hybrid { collection, .. }
987 | SearchCommand::SpatialRadius { collection, .. }
988 | SearchCommand::SpatialBbox { collection, .. }
989 | SearchCommand::SpatialNearest { collection, .. } => {
990 cache_scope_insert(scopes, collection);
991 }
992 SearchCommand::Text { collection, .. }
993 | SearchCommand::Multimodal { collection, .. }
994 | SearchCommand::Index { collection, .. }
995 | SearchCommand::Context { collection, .. } => {
996 if let Some(collection) = collection.as_deref() {
997 cache_scope_insert(scopes, collection);
998 }
999 }
1000 },
1001 QueryExpr::Ask(query) => {
1002 if let Some(collection) = query.collection.as_deref() {
1003 cache_scope_insert(scopes, collection);
1004 }
1005 }
1006 QueryExpr::ExplainAlter(query) => cache_scope_insert(scopes, &query.target.name),
1007 QueryExpr::MaintenanceCommand(cmd) => match cmd {
1008 crate::storage::query::ast::MaintenanceCommand::Vacuum { target, .. }
1009 | crate::storage::query::ast::MaintenanceCommand::Analyze { target } => {
1010 if let Some(t) = target {
1011 cache_scope_insert(scopes, t);
1012 }
1013 }
1014 },
1015 QueryExpr::CopyFrom(cmd) => cache_scope_insert(scopes, &cmd.table),
1016 QueryExpr::CreateView(cmd) => {
1017 cache_scope_insert(scopes, &cmd.name);
1018 collect_query_expr_result_cache_scopes(scopes, &cmd.query);
1020 }
1021 QueryExpr::DropView(cmd) => cache_scope_insert(scopes, &cmd.name),
1022 QueryExpr::RefreshMaterializedView(cmd) => cache_scope_insert(scopes, &cmd.name),
1023 QueryExpr::CreatePolicy(cmd) => cache_scope_insert(scopes, &cmd.table),
1024 QueryExpr::DropPolicy(cmd) => cache_scope_insert(scopes, &cmd.table),
1025 QueryExpr::CreateServer(_) | QueryExpr::DropServer(_) => {}
1026 QueryExpr::CreateForeignTable(cmd) => cache_scope_insert(scopes, &cmd.name),
1027 QueryExpr::DropForeignTable(cmd) => cache_scope_insert(scopes, &cmd.name),
1028 QueryExpr::Graph(_)
1029 | QueryExpr::GraphCommand(_)
1030 | QueryExpr::ProbabilisticCommand(_)
1031 | QueryExpr::SetConfig { .. }
1032 | QueryExpr::ShowConfig { .. }
1033 | QueryExpr::SetSecret { .. }
1034 | QueryExpr::DeleteSecret { .. }
1035 | QueryExpr::ShowSecrets { .. }
1036 | QueryExpr::SetTenant(_)
1037 | QueryExpr::ShowTenant
1038 | QueryExpr::TransactionControl(_)
1039 | QueryExpr::CreateSchema(_)
1040 | QueryExpr::DropSchema(_)
1041 | QueryExpr::CreateSequence(_)
1042 | QueryExpr::DropSequence(_)
1043 | QueryExpr::Grant(_)
1044 | QueryExpr::Revoke(_)
1045 | QueryExpr::AlterUser(_)
1046 | QueryExpr::CreateIamPolicy { .. }
1047 | QueryExpr::DropIamPolicy { .. }
1048 | QueryExpr::AttachPolicy { .. }
1049 | QueryExpr::DetachPolicy { .. }
1050 | QueryExpr::ShowPolicies { .. }
1051 | QueryExpr::ShowEffectivePermissions { .. }
1052 | QueryExpr::SimulatePolicy { .. }
1053 | QueryExpr::CreateMigration(_)
1054 | QueryExpr::ApplyMigration(_)
1055 | QueryExpr::RollbackMigration(_)
1056 | QueryExpr::ExplainMigration(_)
1057 | QueryExpr::EventsBackfillStatus { .. } => {}
1058 QueryExpr::KvCommand(cmd) => {
1059 use crate::storage::query::ast::KvCommand;
1060 match cmd {
1061 KvCommand::Put { collection, .. }
1062 | KvCommand::InvalidateTags { collection, .. }
1063 | KvCommand::Get { collection, .. }
1064 | KvCommand::Unseal { collection, .. }
1065 | KvCommand::Rotate { collection, .. }
1066 | KvCommand::History { collection, .. }
1067 | KvCommand::List { collection, .. }
1068 | KvCommand::Purge { collection, .. }
1069 | KvCommand::Watch { collection, .. }
1070 | KvCommand::Delete { collection, .. }
1071 | KvCommand::Incr { collection, .. }
1072 | KvCommand::Cas { collection, .. } => cache_scope_insert(scopes, collection),
1073 }
1074 }
1075 QueryExpr::ConfigCommand(cmd) => {
1076 use crate::storage::query::ast::ConfigCommand;
1077 match cmd {
1078 ConfigCommand::Put { collection, .. }
1079 | ConfigCommand::Get { collection, .. }
1080 | ConfigCommand::Resolve { collection, .. }
1081 | ConfigCommand::Rotate { collection, .. }
1082 | ConfigCommand::Delete { collection, .. }
1083 | ConfigCommand::History { collection, .. }
1084 | ConfigCommand::List { collection, .. }
1085 | ConfigCommand::Watch { collection, .. }
1086 | ConfigCommand::InvalidVolatileOperation { collection, .. } => {
1087 cache_scope_insert(scopes, collection)
1088 }
1089 }
1090 }
1091 }
1092}
1093
1094pub(crate) fn rls_policy_filter(
1102 runtime: &RedDBRuntime,
1103 table: &str,
1104 action: crate::storage::query::ast::PolicyAction,
1105) -> Option<crate::storage::query::ast::Filter> {
1106 rls_policy_filter_for_kind(
1107 runtime,
1108 table,
1109 action,
1110 crate::storage::query::ast::PolicyTargetKind::Table,
1111 )
1112}
1113
1114pub(crate) fn rls_policy_filter_for_kind(
1120 runtime: &RedDBRuntime,
1121 table: &str,
1122 action: crate::storage::query::ast::PolicyAction,
1123 kind: crate::storage::query::ast::PolicyTargetKind,
1124) -> Option<crate::storage::query::ast::Filter> {
1125 use crate::storage::query::ast::Filter;
1126
1127 if !runtime.inner.rls_enabled_tables.read().contains(table) {
1128 return None;
1129 }
1130 let role = current_auth_identity().map(|(_, role)| role);
1131 let role_str = role.map(|r| r.as_str().to_string());
1132 let policies = runtime.matching_rls_policies_for_kind(table, role_str.as_deref(), action, kind);
1133 if policies.is_empty() {
1134 return None;
1135 }
1136 policies
1137 .into_iter()
1138 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1139}
1140
1141pub(crate) fn rls_is_enabled(runtime: &RedDBRuntime, table: &str) -> bool {
1145 runtime.inner.rls_enabled_tables.read().contains(table)
1146}
1147
1148fn node_passes_rls(
1155 runtime: &RedDBRuntime,
1156 collection: &str,
1157 role: Option<&str>,
1158 cache: &mut std::collections::HashMap<String, Option<crate::storage::query::ast::Filter>>,
1159 entity: &crate::storage::unified::entity::UnifiedEntity,
1160) -> bool {
1161 use crate::storage::query::ast::{Filter, PolicyAction, PolicyTargetKind};
1162
1163 if !runtime.inner.rls_enabled_tables.read().contains(collection) {
1164 return true;
1165 }
1166 let filter = cache.entry(collection.to_string()).or_insert_with(|| {
1167 let policies = runtime.matching_rls_policies_for_kind(
1168 collection,
1169 role,
1170 PolicyAction::Select,
1171 PolicyTargetKind::Nodes,
1172 );
1173 if policies.is_empty() {
1174 None
1175 } else {
1176 policies
1177 .into_iter()
1178 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1179 }
1180 });
1181 let Some(filter) = filter else {
1182 return false;
1183 };
1184 crate::runtime::query_exec::evaluate_entity_filter_with_db(
1185 Some(&runtime.inner.db),
1186 entity,
1187 filter,
1188 collection,
1189 collection,
1190 )
1191}
1192
1193fn edge_passes_rls(
1196 runtime: &RedDBRuntime,
1197 collection: &str,
1198 role: Option<&str>,
1199 cache: &mut std::collections::HashMap<String, Option<crate::storage::query::ast::Filter>>,
1200 entity: &crate::storage::unified::entity::UnifiedEntity,
1201) -> bool {
1202 use crate::storage::query::ast::{Filter, PolicyAction, PolicyTargetKind};
1203
1204 if !runtime.inner.rls_enabled_tables.read().contains(collection) {
1205 return true;
1206 }
1207 let filter = cache.entry(collection.to_string()).or_insert_with(|| {
1208 let policies = runtime.matching_rls_policies_for_kind(
1209 collection,
1210 role,
1211 PolicyAction::Select,
1212 PolicyTargetKind::Edges,
1213 );
1214 if policies.is_empty() {
1215 None
1216 } else {
1217 policies
1218 .into_iter()
1219 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1220 }
1221 });
1222 let Some(filter) = filter else {
1223 return false;
1224 };
1225 crate::runtime::query_exec::evaluate_entity_filter_with_db(
1226 Some(&runtime.inner.db),
1227 entity,
1228 filter,
1229 collection,
1230 collection,
1231 )
1232}
1233
1234fn inject_rls_filters(
1255 runtime: &RedDBRuntime,
1256 frame: &dyn super::statement_frame::ReadFrame,
1257 mut table: crate::storage::query::ast::TableQuery,
1258) -> Option<crate::storage::query::ast::TableQuery> {
1259 use crate::storage::query::ast::{Filter, PolicyAction};
1260
1261 let role = frame.identity().map(|(_, role)| role);
1263 let role_str = role.map(|r| r.as_str().to_string());
1264 let policies =
1265 runtime.matching_rls_policies(&table.table, role_str.as_deref(), PolicyAction::Select);
1266
1267 if policies.is_empty() {
1268 return None;
1271 }
1272
1273 let combined = policies
1275 .into_iter()
1276 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1277 .expect("policies non-empty");
1278
1279 table.filter = Some(match table.filter.take() {
1281 Some(existing) => Filter::And(Box::new(existing), Box::new(combined)),
1282 None => combined,
1283 });
1284 Some(table)
1285}
1286
1287fn inject_rls_into_join(
1297 runtime: &RedDBRuntime,
1298 frame: &dyn super::statement_frame::ReadFrame,
1299 mut join: crate::storage::query::ast::JoinQuery,
1300) -> Option<crate::storage::query::ast::JoinQuery> {
1301 use crate::storage::query::ast::Filter;
1302
1303 let mut policy_filters: Vec<Filter> = Vec::new();
1304 if !collect_join_side_policy(runtime, frame, join.left.as_ref(), &mut policy_filters) {
1305 return None;
1306 }
1307 if !collect_join_side_policy(runtime, frame, join.right.as_ref(), &mut policy_filters) {
1308 return None;
1309 }
1310
1311 if policy_filters.is_empty() {
1312 return Some(join);
1313 }
1314
1315 let combined = policy_filters
1316 .into_iter()
1317 .reduce(|acc, f| Filter::And(Box::new(acc), Box::new(f)))
1318 .expect("policy_filters non-empty");
1319
1320 join.filter = Some(match join.filter.take() {
1321 Some(existing) => Filter::And(Box::new(existing), Box::new(combined)),
1322 None => combined,
1323 });
1324
1325 Some(join)
1326}
1327
1328fn collect_join_side_policy(
1333 runtime: &RedDBRuntime,
1334 frame: &dyn super::statement_frame::ReadFrame,
1335 expr: &crate::storage::query::ast::QueryExpr,
1336 out: &mut Vec<crate::storage::query::ast::Filter>,
1337) -> bool {
1338 use crate::storage::query::ast::{Filter, PolicyAction, QueryExpr};
1339 match expr {
1340 QueryExpr::Table(t) => {
1341 if !runtime.inner.rls_enabled_tables.read().contains(&t.table) {
1342 return true;
1343 }
1344 let role = frame.identity().map(|(_, role)| role);
1345 let role_str = role.map(|r| r.as_str().to_string());
1346 let policies =
1347 runtime.matching_rls_policies(&t.table, role_str.as_deref(), PolicyAction::Select);
1348 if policies.is_empty() {
1349 return false;
1350 }
1351 let combined = policies
1352 .into_iter()
1353 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1354 .expect("policies non-empty");
1355 out.push(combined);
1356 true
1357 }
1358 QueryExpr::Join(inner) => {
1359 collect_join_side_policy(runtime, frame, inner.left.as_ref(), out)
1360 && collect_join_side_policy(runtime, frame, inner.right.as_ref(), out)
1361 }
1362 _ => true,
1363 }
1364}
1365
1366fn apply_foreign_table_filters(
1377 records: Vec<crate::storage::query::unified::UnifiedRecord>,
1378 query: &crate::storage::query::ast::TableQuery,
1379) -> crate::storage::query::unified::UnifiedResult {
1380 use crate::storage::query::sql_lowering::{
1381 effective_table_filter, effective_table_projections,
1382 };
1383 use crate::storage::query::unified::UnifiedResult;
1384
1385 let filter = effective_table_filter(query);
1386 let projections = effective_table_projections(query);
1387
1388 let mut filtered: Vec<_> = records
1391 .into_iter()
1392 .filter(|record| match &filter {
1393 Some(f) => {
1394 super::join_filter::evaluate_runtime_filter_with_db(None, record, f, None, None)
1395 }
1396 None => true,
1397 })
1398 .collect();
1399
1400 if let Some(offset) = query.offset {
1402 let offset = offset as usize;
1403 if offset >= filtered.len() {
1404 filtered.clear();
1405 } else {
1406 filtered.drain(0..offset);
1407 }
1408 }
1409 if let Some(limit) = query.limit {
1410 filtered.truncate(limit as usize);
1411 }
1412
1413 let columns: Vec<String> = if projections.is_empty() {
1416 filtered
1417 .first()
1418 .map(|r| r.column_names().iter().map(|k| k.to_string()).collect())
1419 .unwrap_or_default()
1420 } else {
1421 projections
1422 .iter()
1423 .map(super::join_filter::projection_name)
1424 .collect()
1425 };
1426
1427 let mut result = UnifiedResult::empty();
1428 result.columns = columns;
1429 result.records = filtered;
1430 result
1431}
1432
1433pub(crate) fn collect_table_refs(expr: &QueryExpr) -> Vec<String> {
1440 let mut scopes: HashSet<String> = HashSet::new();
1441 collect_query_expr_result_cache_scopes(&mut scopes, expr);
1442 scopes.into_iter().collect()
1443}
1444
1445fn query_expr_result_cache_scopes(expr: &QueryExpr) -> HashSet<String> {
1446 let mut scopes = HashSet::new();
1447 collect_query_expr_result_cache_scopes(&mut scopes, expr);
1448 scopes
1449}
1450
1451const RESULT_CACHE_BACKEND_KEY: &str = "runtime.result_cache.backend";
1452const RESULT_CACHE_DEFAULT_BACKEND: &str = "legacy";
1453const RESULT_CACHE_BLOB_NAMESPACE: &str = "runtime.result_cache";
1454const RESULT_CACHE_TTL_SECS: u64 = 30;
1455const RESULT_CACHE_MAX_ENTRIES: usize = 1000;
1456const RESULT_CACHE_PAYLOAD_MAGIC: &[u8; 8] = b"RDRC0001";
1457
1458#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1459enum RuntimeResultCacheBackend {
1460 Legacy,
1461 BlobCache,
1462 Shadow,
1463}
1464
1465fn trim_result_cache(
1466 map: &mut HashMap<String, RuntimeResultCacheEntry>,
1467 order: &mut std::collections::VecDeque<String>,
1468) {
1469 while map.len() > RESULT_CACHE_MAX_ENTRIES {
1470 if let Some(oldest) = order.pop_front() {
1471 map.remove(&oldest);
1472 } else {
1473 break;
1474 }
1475 }
1476}
1477
1478fn result_cache_fingerprint(result: &RuntimeQueryResult) -> String {
1479 format!(
1480 "{:?}|{}|{}|{}|{}|{:?}",
1481 result.result,
1482 result.query,
1483 result.statement,
1484 result.engine,
1485 result.affected_rows,
1486 result.statement_type
1487 )
1488}
1489
1490fn mode_to_byte(mode: crate::storage::query::modes::QueryMode) -> u8 {
1491 match mode {
1492 crate::storage::query::modes::QueryMode::Sql => 0,
1493 crate::storage::query::modes::QueryMode::Gremlin => 1,
1494 crate::storage::query::modes::QueryMode::Cypher => 2,
1495 crate::storage::query::modes::QueryMode::Sparql => 3,
1496 crate::storage::query::modes::QueryMode::Path => 4,
1497 crate::storage::query::modes::QueryMode::Natural => 5,
1498 crate::storage::query::modes::QueryMode::Unknown => 255,
1499 }
1500}
1501
1502fn mode_from_byte(byte: u8) -> Option<crate::storage::query::modes::QueryMode> {
1503 match byte {
1504 0 => Some(crate::storage::query::modes::QueryMode::Sql),
1505 1 => Some(crate::storage::query::modes::QueryMode::Gremlin),
1506 2 => Some(crate::storage::query::modes::QueryMode::Cypher),
1507 3 => Some(crate::storage::query::modes::QueryMode::Sparql),
1508 4 => Some(crate::storage::query::modes::QueryMode::Path),
1509 5 => Some(crate::storage::query::modes::QueryMode::Natural),
1510 255 => Some(crate::storage::query::modes::QueryMode::Unknown),
1511 _ => None,
1512 }
1513}
1514
1515fn result_cache_static_str(value: &str) -> Option<&'static str> {
1516 match value {
1517 "select" => Some("select"),
1518 "materialized-graph" => Some("materialized-graph"),
1519 "runtime-red-schema" => Some("runtime-red-schema"),
1520 "runtime-fdw" => Some("runtime-fdw"),
1521 "runtime-table-rls" => Some("runtime-table-rls"),
1522 "runtime-table" => Some("runtime-table"),
1523 "runtime-join-rls" => Some("runtime-join-rls"),
1524 "runtime-join" => Some("runtime-join"),
1525 "runtime-vector" => Some("runtime-vector"),
1526 "runtime-hybrid" => Some("runtime-hybrid"),
1527 "runtime-secret" => Some("runtime-secret"),
1528 "runtime-config" => Some("runtime-config"),
1529 "runtime-tenant" => Some("runtime-tenant"),
1530 "runtime-explain" => Some("runtime-explain"),
1531 "runtime-tree" => Some("runtime-tree"),
1532 "runtime-kv" => Some("runtime-kv"),
1533 "runtime-queue" => Some("runtime-queue"),
1534 _ => None,
1535 }
1536}
1537
1538fn write_u32(out: &mut Vec<u8>, value: usize) -> Option<()> {
1539 let value = u32::try_from(value).ok()?;
1540 out.extend_from_slice(&value.to_le_bytes());
1541 Some(())
1542}
1543
1544fn write_string(out: &mut Vec<u8>, value: &str) -> Option<()> {
1545 write_u32(out, value.len())?;
1546 out.extend_from_slice(value.as_bytes());
1547 Some(())
1548}
1549
1550fn write_bytes(out: &mut Vec<u8>, value: &[u8]) -> Option<()> {
1551 write_u32(out, value.len())?;
1552 out.extend_from_slice(value);
1553 Some(())
1554}
1555
1556fn read_u8(input: &mut &[u8]) -> Option<u8> {
1557 let (&value, rest) = input.split_first()?;
1558 *input = rest;
1559 Some(value)
1560}
1561
1562fn read_u32(input: &mut &[u8]) -> Option<usize> {
1563 if input.len() < 4 {
1564 return None;
1565 }
1566 let value = u32::from_le_bytes(input[..4].try_into().ok()?) as usize;
1567 *input = &input[4..];
1568 Some(value)
1569}
1570
1571fn read_u64(input: &mut &[u8]) -> Option<u64> {
1572 if input.len() < 8 {
1573 return None;
1574 }
1575 let value = u64::from_le_bytes(input[..8].try_into().ok()?);
1576 *input = &input[8..];
1577 Some(value)
1578}
1579
1580fn read_string(input: &mut &[u8]) -> Option<String> {
1581 let len = read_u32(input)?;
1582 if input.len() < len {
1583 return None;
1584 }
1585 let value = String::from_utf8(input[..len].to_vec()).ok()?;
1586 *input = &input[len..];
1587 Some(value)
1588}
1589
1590fn read_bytes<'a>(input: &mut &'a [u8]) -> Option<&'a [u8]> {
1591 let len = read_u32(input)?;
1592 if input.len() < len {
1593 return None;
1594 }
1595 let value = &input[..len];
1596 *input = &input[len..];
1597 Some(value)
1598}
1599
1600fn encode_result_cache_payload(entry: &RuntimeResultCacheEntry) -> Option<Vec<u8>> {
1601 let result = &entry.result;
1602 if result.result.pre_serialized_json.is_some()
1603 || result_cache_static_str(result.statement).is_none()
1604 || result_cache_static_str(result.engine).is_none()
1605 || result_cache_static_str(result.statement_type).is_none()
1606 || result.result.records.iter().any(|record| {
1607 !record.nodes.is_empty()
1608 || !record.edges.is_empty()
1609 || !record.paths.is_empty()
1610 || !record.vector_results.is_empty()
1611 })
1612 {
1613 return None;
1614 }
1615
1616 let mut out = Vec::new();
1617 out.extend_from_slice(RESULT_CACHE_PAYLOAD_MAGIC);
1618 write_string(&mut out, &result.query)?;
1619 out.push(mode_to_byte(result.mode));
1620 write_string(&mut out, result.statement)?;
1621 write_string(&mut out, result.engine)?;
1622 out.extend_from_slice(&result.affected_rows.to_le_bytes());
1623 write_string(&mut out, result.statement_type)?;
1624
1625 write_u32(&mut out, result.result.columns.len())?;
1626 for column in &result.result.columns {
1627 write_string(&mut out, column)?;
1628 }
1629 out.extend_from_slice(&result.result.stats.nodes_scanned.to_le_bytes());
1630 out.extend_from_slice(&result.result.stats.edges_scanned.to_le_bytes());
1631 out.extend_from_slice(&result.result.stats.rows_scanned.to_le_bytes());
1632 out.extend_from_slice(&result.result.stats.exec_time_us.to_le_bytes());
1633
1634 write_u32(&mut out, result.result.records.len())?;
1635 for record in &result.result.records {
1636 let fields = record.iter_fields().collect::<Vec<_>>();
1637 write_u32(&mut out, fields.len())?;
1638 for (name, value) in fields {
1639 write_string(&mut out, name)?;
1640 let mut encoded = Vec::new();
1641 crate::storage::schema::value_codec::encode(value, &mut encoded);
1642 write_bytes(&mut out, &encoded)?;
1643 }
1644 }
1645
1646 write_u32(&mut out, entry.scopes.len())?;
1647 for scope in &entry.scopes {
1648 write_string(&mut out, scope)?;
1649 }
1650 Some(out)
1651}
1652
1653fn decode_result_cache_payload(mut input: &[u8]) -> Option<(RuntimeQueryResult, HashSet<String>)> {
1654 if input.len() < RESULT_CACHE_PAYLOAD_MAGIC.len()
1655 || &input[..RESULT_CACHE_PAYLOAD_MAGIC.len()] != RESULT_CACHE_PAYLOAD_MAGIC
1656 {
1657 return None;
1658 }
1659 input = &input[RESULT_CACHE_PAYLOAD_MAGIC.len()..];
1660
1661 let query = read_string(&mut input)?;
1662 let mode = mode_from_byte(read_u8(&mut input)?)?;
1663 let statement = result_cache_static_str(&read_string(&mut input)?)?;
1664 let engine = result_cache_static_str(&read_string(&mut input)?)?;
1665 let affected_rows = read_u64(&mut input)?;
1666 let statement_type = result_cache_static_str(&read_string(&mut input)?)?;
1667
1668 let mut columns = Vec::new();
1669 for _ in 0..read_u32(&mut input)? {
1670 columns.push(read_string(&mut input)?);
1671 }
1672 let stats = crate::storage::query::unified::QueryStats {
1673 nodes_scanned: read_u64(&mut input)?,
1674 edges_scanned: read_u64(&mut input)?,
1675 rows_scanned: read_u64(&mut input)?,
1676 exec_time_us: read_u64(&mut input)?,
1677 };
1678
1679 let mut records = Vec::new();
1680 for _ in 0..read_u32(&mut input)? {
1681 let mut record = crate::storage::query::unified::UnifiedRecord::new();
1682 for _ in 0..read_u32(&mut input)? {
1683 let name = read_string(&mut input)?;
1684 let bytes = read_bytes(&mut input)?;
1685 let (value, used) = crate::storage::schema::value_codec::decode(bytes).ok()?;
1686 if used != bytes.len() {
1687 return None;
1688 }
1689 record.set_owned(name, value);
1690 }
1691 records.push(record);
1692 }
1693
1694 let mut scopes = HashSet::new();
1695 for _ in 0..read_u32(&mut input)? {
1696 scopes.insert(read_string(&mut input)?);
1697 }
1698 if !input.is_empty() {
1699 return None;
1700 }
1701
1702 Some((
1703 RuntimeQueryResult {
1704 query,
1705 mode,
1706 statement,
1707 engine,
1708 result: crate::storage::query::unified::UnifiedResult {
1709 columns,
1710 records,
1711 stats,
1712 pre_serialized_json: None,
1713 },
1714 affected_rows,
1715 statement_type,
1716 },
1717 scopes,
1718 ))
1719}
1720
1721fn strip_explain_prefix(sql: &str) -> Option<&str> {
1735 let trimmed = sql.trim_start();
1736 let (head, rest) = trimmed.split_at(
1737 trimmed
1738 .find(|c: char| c.is_whitespace())
1739 .unwrap_or(trimmed.len()),
1740 );
1741 if !head.eq_ignore_ascii_case("EXPLAIN") {
1742 return None;
1743 }
1744 let rest = rest.trim_start();
1745 if rest.is_empty() {
1746 return None;
1747 }
1748 let next_head_end = rest.find(|c: char| c.is_whitespace()).unwrap_or(rest.len());
1752 if rest[..next_head_end].eq_ignore_ascii_case("ALTER")
1753 || rest[..next_head_end].eq_ignore_ascii_case("ASK")
1754 {
1755 return None;
1756 }
1757 Some(rest)
1758}
1759
1760pub(super) fn has_with_prefix(sql: &str) -> bool {
1765 let trimmed = sql.trim_start();
1766 let head_end = trimmed
1767 .find(|c: char| c.is_whitespace() || c == '(')
1768 .unwrap_or(trimmed.len());
1769 trimmed[..head_end].eq_ignore_ascii_case("WITH")
1770}
1771
1772fn peek_top_level_as_of(sql: &str) -> Option<crate::application::vcs::AsOfSpec> {
1780 peek_top_level_as_of_with_table(sql).map(|(spec, _)| spec)
1781}
1782
1783pub(super) fn peek_top_level_as_of_with_table(
1788 sql: &str,
1789) -> Option<(crate::application::vcs::AsOfSpec, Option<String>)> {
1790 if !sql
1791 .as_bytes()
1792 .windows(5)
1793 .any(|w| w.eq_ignore_ascii_case(b"as of"))
1794 {
1795 return None;
1796 }
1797 let parsed = crate::storage::query::parser::parse(sql).ok()?;
1798 let crate::storage::query::ast::QueryExpr::Table(table) = parsed.query else {
1799 return None;
1800 };
1801 let clause = table.as_of?;
1802 let table_name = if table.table.is_empty() || table.table == "any" {
1803 None
1804 } else {
1805 Some(table.table.clone())
1806 };
1807 let spec = match clause {
1808 crate::storage::query::ast::AsOfClause::Commit(h) => {
1809 crate::application::vcs::AsOfSpec::Commit(h)
1810 }
1811 crate::storage::query::ast::AsOfClause::Branch(b) => {
1812 crate::application::vcs::AsOfSpec::Branch(b)
1813 }
1814 crate::storage::query::ast::AsOfClause::Tag(t) => crate::application::vcs::AsOfSpec::Tag(t),
1815 crate::storage::query::ast::AsOfClause::TimestampMs(ts) => {
1816 crate::application::vcs::AsOfSpec::TimestampMs(ts)
1817 }
1818 crate::storage::query::ast::AsOfClause::Snapshot(x) => {
1819 crate::application::vcs::AsOfSpec::Snapshot(x)
1820 }
1821 };
1822 Some((spec, table_name))
1823}
1824
1825pub(super) fn query_has_volatile_builtin(sql: &str) -> bool {
1826 const VOLATILE_TOKENS: &[&str] = &[
1830 "pg_advisory_lock",
1831 "pg_try_advisory_lock",
1832 "pg_advisory_unlock",
1833 "random()",
1834 ];
1839 let lowered = sql.to_ascii_lowercase();
1840 VOLATILE_TOKENS.iter().any(|t| lowered.contains(t))
1841}
1842
1843pub(super) fn query_is_ask_statement(sql: &str) -> bool {
1844 let trimmed = sql.trim_start();
1845 let head_end = trimmed
1846 .find(|c: char| c.is_whitespace() || c == '(' || c == ';')
1847 .unwrap_or(trimmed.len());
1848 trimmed[..head_end].eq_ignore_ascii_case("ASK")
1849}
1850
1851pub(super) fn intent_lock_modes_for(
1861 expr: &QueryExpr,
1862) -> Option<(
1863 crate::storage::transaction::lock::LockMode,
1864 crate::storage::transaction::lock::LockMode,
1865)> {
1866 use crate::storage::transaction::lock::LockMode::{Exclusive, IntentExclusive, IntentShared};
1867
1868 match expr {
1869 QueryExpr::Table(_)
1871 | QueryExpr::Join(_)
1872 | QueryExpr::Vector(_)
1873 | QueryExpr::Hybrid(_)
1874 | QueryExpr::Graph(_)
1875 | QueryExpr::Path(_)
1876 | QueryExpr::Ask(_)
1877 | QueryExpr::SearchCommand(_)
1878 | QueryExpr::GraphCommand(_)
1879 | QueryExpr::QueueSelect(_) => Some((IntentShared, IntentShared)),
1880
1881 QueryExpr::Insert(_)
1889 | QueryExpr::Update(_)
1890 | QueryExpr::Delete(_)
1891 | QueryExpr::QueueCommand(QueueCommand::Move { .. }) => {
1892 Some((IntentExclusive, IntentExclusive))
1893 }
1894 QueryExpr::QueueCommand(_) => Some((IntentShared, IntentShared)),
1895
1896 QueryExpr::CreateTable(_)
1900 | QueryExpr::CreateCollection(_)
1901 | QueryExpr::CreateVector(_)
1902 | QueryExpr::DropTable(_)
1903 | QueryExpr::DropGraph(_)
1904 | QueryExpr::DropVector(_)
1905 | QueryExpr::DropDocument(_)
1906 | QueryExpr::DropKv(_)
1907 | QueryExpr::DropCollection(_)
1908 | QueryExpr::Truncate(_)
1909 | QueryExpr::AlterTable(_)
1910 | QueryExpr::CreateIndex(_)
1911 | QueryExpr::DropIndex(_)
1912 | QueryExpr::CreateTimeSeries(_)
1913 | QueryExpr::DropTimeSeries(_)
1914 | QueryExpr::CreateQueue(_)
1915 | QueryExpr::AlterQueue(_)
1916 | QueryExpr::DropQueue(_)
1917 | QueryExpr::CreateTree(_)
1918 | QueryExpr::DropTree(_)
1919 | QueryExpr::CreatePolicy(_)
1920 | QueryExpr::DropPolicy(_)
1921 | QueryExpr::CreateView(_)
1922 | QueryExpr::DropView(_)
1923 | QueryExpr::RefreshMaterializedView(_)
1924 | QueryExpr::CreateSchema(_)
1925 | QueryExpr::DropSchema(_)
1926 | QueryExpr::CreateSequence(_)
1927 | QueryExpr::DropSequence(_)
1928 | QueryExpr::CreateServer(_)
1929 | QueryExpr::DropServer(_)
1930 | QueryExpr::CreateForeignTable(_)
1931 | QueryExpr::DropForeignTable(_) => Some((IntentExclusive, Exclusive)),
1932
1933 _ => None,
1939 }
1940}
1941
1942pub(super) fn collections_referenced(expr: &QueryExpr) -> Vec<String> {
1947 let mut out = Vec::new();
1948 walk_collections(expr, &mut out);
1949 out.sort();
1950 out.dedup();
1951 out
1952}
1953
1954fn walk_collections(expr: &QueryExpr, out: &mut Vec<String>) {
1955 match expr {
1956 QueryExpr::Table(t) => out.push(t.table.clone()),
1957 QueryExpr::Join(j) => {
1958 walk_collections(&j.left, out);
1959 walk_collections(&j.right, out);
1960 }
1961 QueryExpr::Insert(i) => out.push(i.table.clone()),
1962 QueryExpr::Update(u) => out.push(u.table.clone()),
1963 QueryExpr::Delete(d) => out.push(d.table.clone()),
1964 QueryExpr::QueueSelect(q) => out.push(q.queue.clone()),
1965
1966 QueryExpr::CreateTable(q) => out.push(q.name.clone()),
1971 QueryExpr::CreateCollection(q) => out.push(q.name.clone()),
1972 QueryExpr::CreateVector(q) => out.push(q.name.clone()),
1973 QueryExpr::DropTable(q) => out.push(q.name.clone()),
1974 QueryExpr::DropGraph(q) => out.push(q.name.clone()),
1975 QueryExpr::DropVector(q) => out.push(q.name.clone()),
1976 QueryExpr::DropDocument(q) => out.push(q.name.clone()),
1977 QueryExpr::DropKv(q) => out.push(q.name.clone()),
1978 QueryExpr::DropCollection(q) => out.push(q.name.clone()),
1979 QueryExpr::Truncate(q) => out.push(q.name.clone()),
1980 QueryExpr::AlterTable(q) => out.push(q.name.clone()),
1981 QueryExpr::CreateIndex(q) => out.push(q.table.clone()),
1982 QueryExpr::DropIndex(q) => out.push(q.table.clone()),
1983 QueryExpr::CreateTimeSeries(q) => out.push(q.name.clone()),
1984 QueryExpr::DropTimeSeries(q) => out.push(q.name.clone()),
1985 QueryExpr::CreateQueue(q) => out.push(q.name.clone()),
1986 QueryExpr::AlterQueue(q) => out.push(q.name.clone()),
1987 QueryExpr::DropQueue(q) => out.push(q.name.clone()),
1988 QueryExpr::QueueCommand(QueueCommand::Move {
1989 source,
1990 destination,
1991 ..
1992 }) => {
1993 out.push(source.clone());
1994 out.push(destination.clone());
1995 }
1996 QueryExpr::CreatePolicy(q) => out.push(q.table.clone()),
1997 QueryExpr::CreateView(q) => out.push(q.name.clone()),
1998 QueryExpr::DropView(q) => out.push(q.name.clone()),
1999 QueryExpr::RefreshMaterializedView(q) => out.push(q.name.clone()),
2000
2001 _ => {}
2007 }
2008}
2009
2010impl RedDBRuntime {
2011 pub fn in_memory() -> RedDBResult<Self> {
2012 Self::with_options(RedDBOptions::in_memory())
2013 }
2014
2015 pub fn lock_manager(&self) -> std::sync::Arc<crate::storage::transaction::lock::LockManager> {
2019 self.inner.lock_manager.clone()
2020 }
2021
2022 #[inline(never)]
2023 pub fn with_options(options: RedDBOptions) -> RedDBResult<Self> {
2024 Self::with_pool(options, ConnectionPoolConfig::default())
2025 }
2026
2027 pub fn with_pool(
2028 options: RedDBOptions,
2029 pool_config: ConnectionPoolConfig,
2030 ) -> RedDBResult<Self> {
2031 let boot_open_start_ms = std::time::SystemTime::now()
2039 .duration_since(std::time::UNIX_EPOCH)
2040 .map(|d| d.as_millis() as u64)
2041 .unwrap_or(0);
2042 let db = Arc::new(
2043 RedDB::open_with_options(&options)
2044 .map_err(|err| RedDBError::Internal(err.to_string()))?,
2045 );
2046 let result_blob_cache = crate::storage::cache::BlobCache::open_with_l2(
2047 crate::storage::cache::BlobCacheConfig::default().with_l2_path(
2048 options
2049 .resolved_path("data.rdb")
2050 .with_extension("result-cache.l2"),
2051 ),
2052 )
2053 .map_err(|err| {
2054 RedDBError::Internal(format!("open result Blob Cache L2 failed: {err:?}"))
2055 })?;
2056 let storage_ready_ms = std::time::SystemTime::now()
2057 .duration_since(std::time::UNIX_EPOCH)
2058 .map(|d| d.as_millis() as u64)
2059 .unwrap_or(0);
2060
2061 let runtime = Self {
2062 inner: Arc::new(RuntimeInner {
2063 db,
2064 layout: PhysicalLayout::from_options(&options),
2065 indices: IndexCatalog::register_default_vector_graph(
2066 options.has_capability(crate::api::Capability::Table),
2067 options.has_capability(crate::api::Capability::Graph),
2068 ),
2069 pool_config,
2070 pool: Mutex::new(PoolState::default()),
2071 started_at_unix_ms: SystemTime::now()
2072 .duration_since(UNIX_EPOCH)
2073 .unwrap_or_default()
2074 .as_millis(),
2075 probabilistic: super::probabilistic_store::ProbabilisticStore::new(),
2076 index_store: super::index_store::IndexStore::new(),
2077 cdc: crate::replication::cdc::CdcBuffer::new(100_000),
2078 backup_scheduler: crate::replication::scheduler::BackupScheduler::new(3600),
2079 query_cache: parking_lot::RwLock::new(
2080 crate::storage::query::planner::cache::PlanCache::new(1000),
2081 ),
2082 result_cache: parking_lot::RwLock::new((
2083 HashMap::new(),
2084 std::collections::VecDeque::new(),
2085 )),
2086 result_blob_cache,
2087 result_blob_entries: parking_lot::RwLock::new((
2088 HashMap::new(),
2089 std::collections::VecDeque::new(),
2090 )),
2091 ask_answer_cache_entries: parking_lot::RwLock::new((
2092 HashSet::new(),
2093 std::collections::VecDeque::new(),
2094 )),
2095 result_cache_shadow_divergences: std::sync::atomic::AtomicU64::new(0),
2096 ask_daily_spend: parking_lot::RwLock::new(HashMap::new()),
2097 queue_message_locks: parking_lot::RwLock::new(HashMap::new()),
2098 rmw_locks: RmwLockTable::new(),
2099 planner_dirty_tables: parking_lot::RwLock::new(HashSet::new()),
2100 ec_registry: Arc::new(crate::ec::config::EcRegistry::new()),
2101 ec_worker: crate::ec::worker::EcWorker::new(),
2102 auth_store: parking_lot::RwLock::new(None),
2103 oauth_validator: parking_lot::RwLock::new(None),
2104 views: parking_lot::RwLock::new(HashMap::new()),
2105 materialized_views: parking_lot::RwLock::new(
2106 crate::storage::cache::result::MaterializedViewCache::new(),
2107 ),
2108 retention_sweeper: parking_lot::RwLock::new(
2109 crate::runtime::retention_sweeper::RetentionSweeperState::new(),
2110 ),
2111 snapshot_manager: Arc::new(
2112 crate::storage::transaction::snapshot::SnapshotManager::new(),
2113 ),
2114 tx_contexts: parking_lot::RwLock::new(HashMap::new()),
2115 tx_local_tenants: parking_lot::RwLock::new(HashMap::new()),
2116 env_config_overrides: crate::runtime::config_overlay::collect_env_overrides(),
2117 lock_manager: Arc::new({
2118 let env = crate::runtime::config_overlay::collect_env_overrides();
2123 let timeout_ms = env
2124 .get("concurrency.locking.deadlock_timeout_ms")
2125 .and_then(|raw| raw.parse::<u64>().ok())
2126 .unwrap_or_else(|| {
2127 match crate::runtime::config_matrix::default_for(
2128 "concurrency.locking.deadlock_timeout_ms",
2129 ) {
2130 Some(crate::serde_json::Value::Number(n)) => n as u64,
2131 _ => 5000,
2132 }
2133 });
2134 let cfg = crate::storage::transaction::lock::LockConfig {
2135 default_timeout: std::time::Duration::from_millis(timeout_ms),
2136 ..Default::default()
2137 };
2138 crate::storage::transaction::lock::LockManager::new(cfg)
2139 }),
2140 rls_policies: parking_lot::RwLock::new(HashMap::new()),
2141 rls_enabled_tables: parking_lot::RwLock::new(HashSet::new()),
2142 foreign_tables: Arc::new(crate::storage::fdw::ForeignTableRegistry::with_builtins()),
2143 pending_tombstones: parking_lot::RwLock::new(HashMap::new()),
2144 pending_versioned_updates: parking_lot::RwLock::new(HashMap::new()),
2145 pending_kv_watch_events: parking_lot::RwLock::new(HashMap::new()),
2146 pending_store_wal_actions: parking_lot::RwLock::new(HashMap::new()),
2147 tenant_tables: parking_lot::RwLock::new(HashMap::new()),
2148 ddl_epoch: std::sync::atomic::AtomicU64::new(0),
2149 write_gate: Arc::new(crate::runtime::write_gate::WriteGate::from_options(
2150 &options,
2151 )),
2152 lifecycle: crate::runtime::lifecycle::Lifecycle::new(),
2153 resource_limits: crate::runtime::resource_limits::ResourceLimits::from_env(),
2154 audit_log: {
2155 let data_path = options
2165 .data_path
2166 .clone()
2167 .unwrap_or_else(|| std::env::temp_dir().join("reddb"));
2168 let (audit_dest, _) = crate::api::tier_wiring::current_log_destinations();
2169 Arc::new(crate::runtime::audit_log::AuditLogger::for_destination(
2170 &audit_dest,
2171 &data_path,
2172 ))
2173 },
2174 lease_lifecycle: std::sync::OnceLock::new(),
2175 replica_apply_metrics: crate::replication::logical::ReplicaApplyMetrics::default(),
2176 quota_bucket: crate::runtime::quota_bucket::QuotaBucket::from_env(),
2177 schema_vocabulary: parking_lot::RwLock::new(
2178 crate::runtime::schema_vocabulary::SchemaVocabulary::new(),
2179 ),
2180 slow_query_logger: {
2181 let fallback_dir = options
2194 .data_path
2195 .as_ref()
2196 .and_then(|p| p.parent().map(std::path::PathBuf::from))
2197 .unwrap_or_else(|| std::env::temp_dir().join("reddb"));
2198 let threshold_ms = std::env::var("RED_SLOW_QUERY_THRESHOLD_MS")
2199 .ok()
2200 .and_then(|s| s.parse::<u64>().ok())
2201 .unwrap_or(1000);
2202 let sample_pct = std::env::var("RED_SLOW_QUERY_SAMPLE_PCT")
2203 .ok()
2204 .and_then(|s| s.parse::<u8>().ok())
2205 .unwrap_or(100);
2206 let (_, slow_dest) = crate::api::tier_wiring::current_log_destinations();
2207 crate::telemetry::slow_query_logger::SlowQueryLogger::for_destination(
2208 &slow_dest,
2209 &fallback_dir,
2210 threshold_ms,
2211 sample_pct,
2212 )
2213 },
2214 kv_stats: crate::runtime::KvStatsCounters::default(),
2215 metrics_ingest_stats: crate::runtime::MetricsIngestCounters::default(),
2216 metrics_tenant_activity_stats:
2217 crate::runtime::MetricsTenantActivityCounters::default(),
2218 queue_telemetry: Arc::new(
2219 crate::runtime::queue_telemetry::QueueTelemetryCounters::default(),
2220 ),
2221 kv_tag_index: crate::runtime::KvTagIndex::default(),
2222 chain_tip_cache: parking_lot::Mutex::new(HashMap::new()),
2223 chain_integrity_broken: parking_lot::Mutex::new(HashMap::new()),
2224 }),
2225 };
2226
2227 crate::telemetry::operator_event::install_global_audit_sink(Arc::clone(
2233 &runtime.inner.audit_log,
2234 ));
2235
2236 runtime
2244 .inner
2245 .lifecycle
2246 .set_restore_started_at_ms(boot_open_start_ms);
2247 runtime
2248 .inner
2249 .lifecycle
2250 .set_restore_ready_at_ms(storage_ready_ms);
2251 runtime
2252 .inner
2253 .lifecycle
2254 .set_wal_replay_started_at_ms(boot_open_start_ms);
2255 runtime
2256 .inner
2257 .lifecycle
2258 .set_wal_replay_ready_at_ms(storage_ready_ms);
2259
2260 let restored_cdc_lsn = runtime
2261 .inner
2262 .db
2263 .replication
2264 .as_ref()
2265 .map(|repl| {
2266 repl.logical_wal_spool
2267 .as_ref()
2268 .map(|spool| spool.current_lsn())
2269 .unwrap_or(0)
2270 })
2271 .unwrap_or(0)
2272 .max(runtime.config_u64("red.config.timeline.last_archived_lsn", 0));
2273 runtime.inner.cdc.set_current_lsn(restored_cdc_lsn);
2274 runtime.rehydrate_snapshot_xid_floor();
2275 runtime.bootstrap_system_keyed_collections()?;
2276 runtime.rehydrate_declared_column_schemas();
2277 runtime.load_probabilistic_state()?;
2278
2279 runtime.rehydrate_tenant_tables();
2283 runtime.rehydrate_materialized_view_descriptors();
2288 if let Some(repl) = &runtime.inner.db.replication {
2289 repl.wal_buffer.set_current_lsn(restored_cdc_lsn);
2290 }
2291
2292 {
2294 let sys = SystemInfo::collect();
2295 runtime.inner.db.store().set_config_tree(
2296 "red.system",
2297 &crate::serde_json::json!({
2298 "pid": sys.pid,
2299 "cpu_cores": sys.cpu_cores,
2300 "total_memory_bytes": sys.total_memory_bytes,
2301 "available_memory_bytes": sys.available_memory_bytes,
2302 "os": sys.os,
2303 "arch": sys.arch,
2304 "hostname": sys.hostname,
2305 "started_at": SystemTime::now()
2306 .duration_since(UNIX_EPOCH)
2307 .unwrap_or_default()
2308 .as_millis() as u64
2309 }),
2310 );
2311
2312 let store = runtime.inner.db.store();
2314 if store
2315 .get_collection("red_config")
2316 .map(|m| m.query_all(|_| true).len())
2317 .unwrap_or(0)
2318 <= 10
2319 {
2320 store.set_config_tree("red.ai", &crate::json!({
2321 "default": crate::json!({
2322 "provider": "openai",
2323 "model": crate::ai::DEFAULT_OPENAI_PROMPT_MODEL
2324 }),
2325 "max_embedding_inputs": 256,
2326 "max_prompt_batch": 256,
2327 "timeout": crate::json!({ "connect_secs": 10, "read_secs": 90, "write_secs": 30 })
2328 }));
2329 store.set_config_tree(
2330 "red.server",
2331 &crate::json!({
2332 "max_scan_limit": 1000,
2333 "max_body_size": 1048576,
2334 "read_timeout_ms": 5000,
2335 "write_timeout_ms": 5000
2336 }),
2337 );
2338 store.set_config_tree(
2339 "red.storage",
2340 &crate::json!({
2341 "page_size": 4096,
2342 "page_cache_capacity": 100000,
2343 "auto_checkpoint_pages": 1000,
2344 "snapshot_retention": 16,
2345 "verify_checksums": true,
2346 "segment": crate::json!({
2347 "max_entities": 100000,
2348 "max_bytes": 268435456_u64,
2349 "compression_level": 6
2350 }),
2351 "hnsw": crate::json!({ "m": 16, "ef_construction": 100, "ef_search": 50 }),
2352 "ivf": crate::json!({ "n_lists": 100, "n_probes": 10 }),
2353 "bm25": crate::json!({ "k1": 1.2, "b": 0.75 })
2354 }),
2355 );
2356 store.set_config_tree(
2357 "red.search",
2358 &crate::json!({
2359 "rag": crate::json!({
2360 "max_chunks_per_source": 10,
2361 "max_total_chunks": 25,
2362 "similarity_threshold": 0.8,
2363 "graph_depth": 2,
2364 "min_relevance": 0.3
2365 }),
2366 "fusion": crate::json!({
2367 "vector_weight": 0.5,
2368 "graph_weight": 0.3,
2369 "table_weight": 0.2,
2370 "dedup_threshold": 0.85
2371 })
2372 }),
2373 );
2374 store.set_config_tree(
2375 "red.auth",
2376 &crate::json!({
2377 "enabled": false,
2378 "session_ttl_secs": 3600,
2379 "require_auth": false
2380 }),
2381 );
2382 store.set_config_tree(
2383 "red.query",
2384 &crate::json!({
2385 "connection_pool": crate::json!({ "max_connections": 64, "max_idle": 16 }),
2386 "max_recursion_depth": 1000
2387 }),
2388 );
2389 store.set_config_tree(
2390 "red.indexes",
2391 &crate::json!({
2392 "auto_select": true,
2393 "bloom_filter": crate::json!({
2394 "enabled": true,
2395 "false_positive_rate": 0.01,
2396 "prune_on_scan": true
2397 }),
2398 "hash": crate::json!({ "enabled": true }),
2399 "bitmap": crate::json!({ "enabled": true, "max_cardinality": 1000 }),
2400 "spatial": crate::json!({ "enabled": true })
2401 }),
2402 );
2403 store.set_config_tree(
2404 "red.memtable",
2405 &crate::json!({
2406 "enabled": true,
2407 "max_bytes": 67108864_u64,
2408 "flush_threshold": 0.75
2409 }),
2410 );
2411 store.set_config_tree(
2412 "red.probabilistic",
2413 &crate::json!({
2414 "hll_registers": 16384,
2415 "sketch_default_width": 1000,
2416 "sketch_default_depth": 5,
2417 "filter_default_capacity": 100000
2418 }),
2419 );
2420 store.set_config_tree(
2421 "red.timeseries",
2422 &crate::json!({
2423 "default_chunk_size": 1024,
2424 "compression": crate::json!({
2425 "timestamps": "delta_of_delta",
2426 "values": "gorilla_xor"
2427 }),
2428 "default_retention_days": 0
2429 }),
2430 );
2431 store.set_config_tree(
2432 "red.queue",
2433 &crate::json!({
2434 "default_max_size": 0,
2435 "default_max_attempts": 3,
2436 "visibility_timeout_ms": 30000,
2437 "consumer_idle_timeout_ms": 60000
2438 }),
2439 );
2440 store.set_config_tree(
2441 "red.backup",
2442 &crate::json!({
2443 "enabled": false,
2444 "interval_secs": 3600,
2445 "retention_count": 24,
2446 "upload": false,
2447 "backend": "local"
2448 }),
2449 );
2450 store.set_config_tree(
2451 "red.wal",
2452 &crate::json!({
2453 "archive": crate::json!({
2454 "enabled": false,
2455 "retention_hours": 168,
2456 "prefix": "wal/"
2457 })
2458 }),
2459 );
2460 store.set_config_tree(
2461 "red.cdc",
2462 &crate::json!({
2463 "enabled": true,
2464 "buffer_size": 100000
2465 }),
2466 );
2467 store.set_config_tree(
2468 "red.config.secret",
2469 &crate::json!({
2470 "auto_encrypt": true,
2471 "auto_decrypt": true
2472 }),
2473 );
2474 }
2475
2476 crate::runtime::config_matrix::heal_critical_keys(store.as_ref());
2483
2484 let lehman_yao = runtime.config_bool("storage.btree.lehman_yao", true);
2491 crate::storage::engine::btree::lehman_yao::set_enabled(lehman_yao);
2492 if lehman_yao {
2493 tracing::info!(
2494 "storage.btree.lehman_yao=true — lock-free concurrent descent enabled"
2495 );
2496 }
2497
2498 let overlay_path = crate::runtime::config_overlay::config_file_path();
2503 let _ =
2504 crate::runtime::config_overlay::apply_config_file(store.as_ref(), &overlay_path);
2505 }
2506
2507 {
2511 let store = runtime.inner.db.store();
2512 for name in crate::application::vcs_collections::ALL {
2513 let _ = store.get_or_create_collection(*name);
2514 }
2515 store.set_config_tree(
2518 crate::application::vcs_collections::CONFIG_NAMESPACE,
2519 &crate::json!({
2520 "default_branch": "main",
2521 "author": crate::json!({
2522 "name": "reddb",
2523 "email": "reddb@localhost"
2524 }),
2525 "protected_branches": crate::json!(["main"]),
2526 "closure": crate::json!({
2527 "enabled": true,
2528 "lazy": true
2529 }),
2530 "merge": crate::json!({
2531 "default_strategy": "auto",
2532 "fast_forward": true
2533 })
2534 }),
2535 );
2536 }
2537
2538 {
2541 let store = runtime.inner.db.store();
2542 for name in crate::application::migration_collections::ALL {
2543 let _ = store.get_or_create_collection(*name);
2544 }
2545 }
2546
2547 {
2562 let weak = Arc::downgrade(&runtime.inner);
2563 std::thread::Builder::new()
2564 .name("reddb-maintenance".into())
2565 .spawn(move || {
2566 let tick = std::time::Duration::from_millis(200);
2567 let work_interval = std::time::Duration::from_secs(60);
2568 let mut last_work = std::time::Instant::now();
2569 loop {
2570 std::thread::sleep(tick);
2571 let Some(inner) = weak.upgrade() else {
2572 break;
2575 };
2576 if last_work.elapsed() >= work_interval {
2577 let _stats = inner.db.store().context_index().stats();
2578 last_work = std::time::Instant::now();
2579 }
2580 }
2581 })
2582 .ok();
2583 }
2584
2585 {
2587 let store = runtime.inner.db.store();
2588 let mut backup_enabled = false;
2589 let mut backup_interval = 3600u64;
2590
2591 if let Some(manager) = store.get_collection("red_config") {
2592 manager.for_each_entity(|entity| {
2593 if let Some(row) = entity.data.as_row() {
2594 let key = row.get_field("key").and_then(|v| match v {
2595 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
2596 _ => None,
2597 });
2598 let val = row.get_field("value");
2599 if key == Some("red.config.backup.enabled") {
2600 backup_enabled = match val {
2601 Some(crate::storage::schema::Value::Boolean(true)) => true,
2602 Some(crate::storage::schema::Value::Text(s)) => &**s == "true",
2603 _ => false,
2604 };
2605 } else if key == Some("red.config.backup.interval_secs") {
2606 if let Some(crate::storage::schema::Value::Integer(n)) = val {
2607 backup_interval = *n as u64;
2608 }
2609 }
2610 }
2611 true
2612 });
2613 }
2614
2615 if backup_enabled {
2616 runtime.inner.backup_scheduler.set_interval(backup_interval);
2617 let rt = runtime.clone();
2618 runtime
2619 .inner
2620 .backup_scheduler
2621 .start(move || rt.trigger_backup().map_err(|e| format!("{}", e)));
2622 }
2623 }
2624
2625 {
2627 runtime
2628 .inner
2629 .ec_registry
2630 .load_from_config_store(runtime.inner.db.store().as_ref());
2631 if !runtime.inner.ec_registry.async_configs().is_empty() {
2632 runtime.inner.ec_worker.start(
2633 Arc::clone(&runtime.inner.ec_registry),
2634 Arc::clone(&runtime.inner.db.store()),
2635 );
2636 }
2637 }
2638
2639 if let crate::replication::ReplicationRole::Replica { primary_addr } =
2640 runtime.inner.db.options().replication.role.clone()
2641 {
2642 let rt = runtime.clone();
2643 std::thread::Builder::new()
2644 .name("reddb-replica".into())
2645 .spawn(move || rt.run_replica_loop(primary_addr))
2646 .ok();
2647 }
2648
2649 runtime.inner.lifecycle.mark_ready();
2654
2655 {
2664 let weak_inner = Arc::downgrade(&runtime.inner);
2665 std::thread::Builder::new()
2666 .name("reddb-mv-scheduler".into())
2667 .spawn(move || loop {
2668 std::thread::sleep(std::time::Duration::from_millis(50));
2669 let Some(inner) = weak_inner.upgrade() else {
2670 break;
2671 };
2672 let rt = RedDBRuntime { inner };
2673 rt.refresh_due_materialized_views();
2674 })
2675 .ok();
2676 }
2677
2678 {
2688 let weak_inner = Arc::downgrade(&runtime.inner);
2689 std::thread::Builder::new()
2690 .name("reddb-retention-sweeper".into())
2691 .spawn(move || loop {
2692 std::thread::sleep(std::time::Duration::from_millis(500));
2693 let Some(inner) = weak_inner.upgrade() else {
2694 break;
2695 };
2696 let rt = RedDBRuntime { inner };
2697 rt.sweep_retention_tick(
2698 crate::runtime::retention_sweeper::DEFAULT_SWEEPER_BATCH,
2699 );
2700 })
2701 .ok();
2702 }
2703
2704 Ok(runtime)
2705 }
2706
2707 fn rehydrate_snapshot_xid_floor(&self) {
2708 let store = self.inner.db.store();
2709 for collection in store.list_collections() {
2710 let Some(manager) = store.get_collection(&collection) else {
2711 continue;
2712 };
2713 for entity in manager.query_all(|_| true) {
2714 self.inner
2715 .snapshot_manager
2716 .observe_committed_xid(entity.xmin);
2717 self.inner
2718 .snapshot_manager
2719 .observe_committed_xid(entity.xmax);
2720 }
2721 }
2722 }
2723
2724 pub(crate) fn ensure_materialized_view_backing(&self, name: &str) -> RedDBResult<()> {
2737 let store = self.inner.db.store();
2738 let mut changed = false;
2739 if store.get_collection(name).is_none() {
2740 store.get_or_create_collection(name);
2741 changed = true;
2742 }
2743 if self.inner.db.collection_contract(name).is_none() {
2744 self.inner
2745 .db
2746 .save_collection_contract(system_keyed_collection_contract(
2747 name,
2748 crate::catalog::CollectionModel::Table,
2749 ))
2750 .map_err(|err| RedDBError::Internal(err.to_string()))?;
2751 changed = true;
2752 }
2753 if changed {
2754 self.inner
2755 .db
2756 .persist_metadata()
2757 .map_err(|err| RedDBError::Internal(err.to_string()))?;
2758 }
2759 Ok(())
2760 }
2761
2762 pub(crate) fn drop_materialized_view_backing(&self, name: &str) -> RedDBResult<()> {
2767 let store = self.inner.db.store();
2768 if store.get_collection(name).is_none() {
2769 return Ok(());
2770 }
2771 store
2772 .drop_collection(name)
2773 .map_err(|err| RedDBError::Internal(err.to_string()))?;
2774 if self.inner.db.collection_contract(name).is_some() {
2777 self.inner
2778 .db
2779 .remove_collection_contract(name)
2780 .map_err(|err| RedDBError::Internal(err.to_string()))?;
2781 }
2782 self.invalidate_result_cache();
2783 self.inner
2784 .db
2785 .persist_metadata()
2786 .map_err(|err| RedDBError::Internal(err.to_string()))?;
2787 Ok(())
2788 }
2789
2790 fn bootstrap_system_keyed_collections(&self) -> RedDBResult<()> {
2791 let mut changed = false;
2792 for (name, model) in [
2793 ("red.config", crate::catalog::CollectionModel::Config),
2794 ("red.vault", crate::catalog::CollectionModel::Vault),
2795 (
2799 crate::runtime::continuous_materialized_view::CATALOG_COLLECTION,
2800 crate::catalog::CollectionModel::Config,
2801 ),
2802 ] {
2803 if self.inner.db.store().get_collection(name).is_none() {
2804 self.inner.db.store().get_or_create_collection(name);
2805 changed = true;
2806 }
2807 if self.inner.db.collection_contract(name).is_none() {
2808 self.inner
2809 .db
2810 .save_collection_contract(system_keyed_collection_contract(name, model))
2811 .map_err(|err| RedDBError::Internal(err.to_string()))?;
2812 changed = true;
2813 }
2814 }
2815 if changed {
2816 self.inner
2817 .db
2818 .persist_metadata()
2819 .map_err(|err| RedDBError::Internal(err.to_string()))?;
2820 }
2821 Ok(())
2822 }
2823
2824 pub fn db(&self) -> Arc<RedDB> {
2825 Arc::clone(&self.inner.db)
2826 }
2827
2828 pub fn index_store_ref(&self) -> &super::index_store::IndexStore {
2833 &self.inner.index_store
2834 }
2835
2836 pub(crate) fn schema_vocabulary_apply(
2841 &self,
2842 event: crate::runtime::schema_vocabulary::DdlEvent,
2843 ) {
2844 self.inner.schema_vocabulary.write().on_ddl(event);
2845 }
2846
2847 pub fn schema_vocabulary_lookup(
2852 &self,
2853 token: &str,
2854 ) -> Vec<crate::runtime::schema_vocabulary::VocabHit> {
2855 self.inner.schema_vocabulary.read().lookup(token).to_vec()
2856 }
2857
2858 pub fn set_auth_store(&self, store: Arc<crate::auth::store::AuthStore>) {
2862 *self.inner.auth_store.write() = Some(store);
2863 }
2864
2865 pub fn auth_store(&self) -> Option<Arc<crate::auth::store::AuthStore>> {
2868 self.inner.auth_store.read().clone()
2869 }
2870
2871 pub fn vault_kv_get(&self, key: &str) -> Option<String> {
2873 self.inner
2874 .auth_store
2875 .read()
2876 .as_ref()
2877 .and_then(|store| store.vault_kv_get(key))
2878 }
2879
2880 pub fn vault_kv_try_set(&self, key: String, value: String) -> RedDBResult<()> {
2883 let store = self.inner.auth_store.read().clone().ok_or_else(|| {
2884 RedDBError::Query("secret storage requires an enabled, unsealed vault".to_string())
2885 })?;
2886 store
2887 .vault_kv_try_set(key, value)
2888 .map_err(|err| RedDBError::Query(err.to_string()))
2889 }
2890
2891 pub fn set_oauth_validator(&self, validator: Option<Arc<crate::auth::oauth::OAuthValidator>>) {
2895 *self.inner.oauth_validator.write() = validator;
2896 }
2897
2898 pub fn oauth_validator(&self) -> Option<Arc<crate::auth::oauth::OAuthValidator>> {
2902 self.inner.oauth_validator.read().clone()
2903 }
2904
2905 pub(crate) fn secret_aes_key(&self) -> Option<[u8; 32]> {
2909 let guard = self.inner.auth_store.read();
2910 guard.as_ref().and_then(|s| s.vault_secret_key())
2911 }
2912
2913 pub(crate) fn config_bool(&self, key: &str, default: bool) -> bool {
2919 if let Some(raw) = self.inner.env_config_overrides.get(key) {
2920 if let Some(crate::storage::schema::Value::Boolean(b)) =
2921 crate::runtime::config_overlay::coerce_env_value(key, raw)
2922 {
2923 return b;
2924 }
2925 }
2926 let store = self.inner.db.store();
2927 let Some(manager) = store.get_collection("red_config") else {
2928 return default;
2929 };
2930 let mut result = default;
2931 let mut latest_id: u64 = 0;
2932 manager.for_each_entity(|entity| {
2933 if let Some(row) = entity.data.as_row() {
2934 let entry_key = row.get_field("key").and_then(|v| match v {
2935 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
2936 _ => None,
2937 });
2938 if entry_key == Some(key) {
2939 let id = entity.id.raw();
2940 if id >= latest_id {
2941 latest_id = id;
2942 result = match row.get_field("value") {
2943 Some(crate::storage::schema::Value::Boolean(b)) => *b,
2944 Some(crate::storage::schema::Value::Text(s)) => {
2945 matches!(s.as_ref(), "true" | "TRUE" | "True" | "1")
2946 }
2947 Some(crate::storage::schema::Value::Integer(n)) => *n != 0,
2948 _ => default,
2949 };
2950 }
2951 }
2952 }
2953 true
2954 });
2955 result
2956 }
2957
2958 pub(crate) fn config_u64(&self, key: &str, default: u64) -> u64 {
2959 if let Some(raw) = self.inner.env_config_overrides.get(key) {
2960 if let Some(crate::storage::schema::Value::UnsignedInteger(n)) =
2961 crate::runtime::config_overlay::coerce_env_value(key, raw)
2962 {
2963 return n;
2964 }
2965 }
2966 let store = self.inner.db.store();
2967 let Some(manager) = store.get_collection("red_config") else {
2968 return default;
2969 };
2970 let mut result = default;
2971 let mut latest_id: u64 = 0;
2972 manager.for_each_entity(|entity| {
2973 if let Some(row) = entity.data.as_row() {
2974 let entry_key = row.get_field("key").and_then(|v| match v {
2975 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
2976 _ => None,
2977 });
2978 if entry_key == Some(key) {
2979 let id = entity.id.raw();
2980 if id >= latest_id {
2981 latest_id = id;
2982 result = match row.get_field("value") {
2983 Some(crate::storage::schema::Value::Integer(n)) => *n as u64,
2984 Some(crate::storage::schema::Value::UnsignedInteger(n)) => *n,
2985 Some(crate::storage::schema::Value::Text(s)) => {
2986 s.parse::<u64>().unwrap_or(default)
2987 }
2988 _ => default,
2989 };
2990 }
2991 }
2992 }
2993 true
2994 });
2995 result
2996 }
2997
2998 pub(crate) fn config_f64(&self, key: &str, default: f64) -> f64 {
2999 if let Some(raw) = self.inner.env_config_overrides.get(key) {
3000 if let Ok(n) = raw.parse::<f64>() {
3001 return n;
3002 }
3003 }
3004 let store = self.inner.db.store();
3005 let Some(manager) = store.get_collection("red_config") else {
3006 return default;
3007 };
3008 let mut result = default;
3009 let mut latest_id: u64 = 0;
3010 manager.for_each_entity(|entity| {
3011 if let Some(row) = entity.data.as_row() {
3012 let entry_key = row.get_field("key").and_then(|v| match v {
3013 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
3014 _ => None,
3015 });
3016 if entry_key == Some(key) {
3017 let id = entity.id.raw();
3018 if id >= latest_id {
3019 latest_id = id;
3020 result = match row.get_field("value") {
3021 Some(crate::storage::schema::Value::Float(n)) => *n,
3022 Some(crate::storage::schema::Value::Integer(n)) => *n as f64,
3023 Some(crate::storage::schema::Value::UnsignedInteger(n)) => *n as f64,
3024 Some(crate::storage::schema::Value::Text(s)) => {
3025 s.parse::<f64>().unwrap_or(default)
3026 }
3027 _ => default,
3028 };
3029 }
3030 }
3031 }
3032 true
3033 });
3034 result
3035 }
3036
3037 pub(crate) fn config_string(&self, key: &str, default: &str) -> String {
3038 if let Some(raw) = self.inner.env_config_overrides.get(key) {
3039 return raw.clone();
3040 }
3041 let store = self.inner.db.store();
3042 let Some(manager) = store.get_collection("red_config") else {
3043 return default.to_string();
3044 };
3045 let mut result = default.to_string();
3046 let mut latest_id: u64 = 0;
3047 manager.for_each_entity(|entity| {
3048 if let Some(row) = entity.data.as_row() {
3049 let entry_key = row.get_field("key").and_then(|v| match v {
3050 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
3051 _ => None,
3052 });
3053 if entry_key == Some(key) {
3054 let id = entity.id.raw();
3055 if id >= latest_id {
3056 latest_id = id;
3057 if let Some(crate::storage::schema::Value::Text(value)) =
3058 row.get_field("value")
3059 {
3060 result = value.to_string();
3061 }
3062 }
3063 }
3064 }
3065 true
3066 });
3067 result
3068 }
3069
3070 fn latest_metadata_for(
3071 &self,
3072 collection: &str,
3073 entity_id: u64,
3074 ) -> Option<crate::serde_json::Value> {
3075 self.inner
3076 .db
3077 .store()
3078 .get_metadata(collection, EntityId::new(entity_id))
3079 .map(|metadata| metadata_to_json(&metadata))
3080 }
3081
3082 fn persist_replica_lsn(&self, lsn: u64) {
3083 self.inner.db.store().set_config_tree(
3084 "red.replication",
3085 &crate::json!({
3086 "last_applied_lsn": lsn
3087 }),
3088 );
3089 }
3090
3091 fn persist_replication_health(
3092 &self,
3093 state: &str,
3094 last_error: &str,
3095 primary_lsn: Option<u64>,
3096 oldest_available_lsn: Option<u64>,
3097 ) {
3098 self.inner.db.store().set_config_tree(
3099 "red.replication",
3100 &crate::json!({
3101 "state": state,
3102 "last_error": last_error,
3103 "last_seen_primary_lsn": primary_lsn.unwrap_or(0),
3104 "last_seen_oldest_lsn": oldest_available_lsn.unwrap_or(0),
3105 "updated_at_unix_ms": SystemTime::now()
3106 .duration_since(UNIX_EPOCH)
3107 .unwrap_or_default()
3108 .as_millis() as u64
3109 }),
3110 );
3111 }
3112
3113 pub(crate) fn secret_auto_encrypt(&self) -> bool {
3116 self.config_bool("red.config.secret.auto_encrypt", true)
3117 }
3118
3119 pub(crate) fn secret_auto_decrypt(&self) -> bool {
3124 self.config_bool("red.config.secret.auto_decrypt", true)
3125 }
3126
3127 pub(crate) fn apply_secret_decryption(&self, result: &mut RuntimeQueryResult) {
3134 if !self.secret_auto_decrypt() {
3135 return;
3136 }
3137 let Some(key) = self.secret_aes_key() else {
3138 return;
3139 };
3140 for record in result.result.records.iter_mut() {
3141 for value in record.values_mut() {
3142 if let Value::Secret(ref bytes) = value {
3143 if let Some(plain) =
3144 super::impl_dml::decrypt_secret_payload(&key, bytes.as_slice())
3145 {
3146 if let Ok(text) = String::from_utf8(plain) {
3147 *value = Value::text(text);
3148 }
3149 }
3150 }
3151 }
3152 }
3153 }
3154
3155 pub(crate) fn mutation_engine(&self) -> crate::runtime::mutation::MutationEngine<'_> {
3163 crate::runtime::mutation::MutationEngine::new(self)
3164 }
3165
3166 pub fn check_write(&self, kind: crate::runtime::write_gate::WriteKind) -> RedDBResult<()> {
3177 self.inner.write_gate.check(kind)
3178 }
3179
3180 pub fn write_gate(&self) -> &crate::runtime::write_gate::WriteGate {
3184 &self.inner.write_gate
3185 }
3186
3187 pub fn lifecycle(&self) -> &crate::runtime::lifecycle::Lifecycle {
3191 &self.inner.lifecycle
3192 }
3193
3194 pub fn resource_limits(&self) -> &crate::runtime::resource_limits::ResourceLimits {
3196 &self.inner.resource_limits
3197 }
3198
3199 pub fn audit_log(&self) -> &crate::runtime::audit_log::AuditLogger {
3201 &self.inner.audit_log
3202 }
3203
3204 pub fn audit_log_arc(&self) -> Arc<crate::runtime::audit_log::AuditLogger> {
3208 Arc::clone(&self.inner.audit_log)
3209 }
3210
3211 pub(crate) fn queue_telemetry(
3215 &self,
3216 ) -> &crate::runtime::queue_telemetry::QueueTelemetryCounters {
3217 &self.inner.queue_telemetry
3218 }
3219
3220 pub fn queue_telemetry_snapshot(
3223 &self,
3224 ) -> crate::runtime::queue_telemetry::QueueTelemetrySnapshot {
3225 crate::runtime::queue_telemetry::QueueTelemetrySnapshot {
3226 delivered: self.inner.queue_telemetry.delivered_snapshot(),
3227 acked: self.inner.queue_telemetry.acked_snapshot(),
3228 nacked: self.inner.queue_telemetry.nacked_snapshot(),
3229 }
3230 }
3231
3232 pub fn queue_pending_counts(&self) -> Vec<((String, String), u64)> {
3237 let store = self.inner.db.store();
3238 crate::runtime::impl_queue::pending_counts_by_group(store.as_ref())
3239 .into_iter()
3240 .collect()
3241 }
3242
3243 pub fn write_gate_arc(&self) -> Arc<crate::runtime::write_gate::WriteGate> {
3248 Arc::clone(&self.inner.write_gate)
3249 }
3250
3251 pub fn lease_lifecycle(&self) -> Option<&Arc<crate::runtime::lease_lifecycle::LeaseLifecycle>> {
3254 self.inner.lease_lifecycle.get()
3255 }
3256
3257 pub fn set_lease_lifecycle(
3260 &self,
3261 lifecycle: Arc<crate::runtime::lease_lifecycle::LeaseLifecycle>,
3262 ) -> Result<(), Arc<crate::runtime::lease_lifecycle::LeaseLifecycle>> {
3263 self.inner.lease_lifecycle.set(lifecycle)
3264 }
3265
3266 pub fn check_batch_size(&self, requested: usize) -> RedDBResult<()> {
3271 if self.inner.resource_limits.batch_size_exceeded(requested) {
3272 let max = self.inner.resource_limits.max_batch_size.unwrap_or(0);
3273 return Err(RedDBError::QuotaExceeded(format!(
3274 "max_batch_size:{requested}:{max}"
3275 )));
3276 }
3277 Ok(())
3278 }
3279
3280 pub fn check_db_size(&self) -> RedDBResult<()> {
3286 let Some(limit) = self.inner.resource_limits.max_db_size_bytes else {
3287 return Ok(());
3288 };
3289 if limit == 0 {
3290 return Ok(());
3291 }
3292 let Some(path) = self.inner.db.path() else {
3293 return Ok(());
3294 };
3295 let current = std::fs::metadata(path).map(|m| m.len()).unwrap_or(0);
3296 if current > limit {
3297 return Err(RedDBError::QuotaExceeded(format!(
3298 "max_db_size_bytes:{current}:{limit}"
3299 )));
3300 }
3301 Ok(())
3302 }
3303
3304 pub fn graceful_shutdown(
3322 &self,
3323 backup_on_shutdown: bool,
3324 ) -> RedDBResult<crate::runtime::lifecycle::ShutdownReport> {
3325 if !self.inner.lifecycle.begin_shutdown() {
3326 return Ok(self.inner.lifecycle.shutdown_report().unwrap_or_default());
3330 }
3331
3332 let started_ms = std::time::SystemTime::now()
3333 .duration_since(std::time::UNIX_EPOCH)
3334 .map(|d| d.as_millis() as u64)
3335 .unwrap_or(0);
3336 let mut report = crate::runtime::lifecycle::ShutdownReport {
3337 started_at_ms: started_ms,
3338 ..Default::default()
3339 };
3340
3341 let flush_res = self.inner.db.flush_local_only();
3347 report.flushed_wal = flush_res.is_ok();
3348 report.final_checkpoint = flush_res.is_ok();
3349 if let Err(err) = &flush_res {
3350 tracing::error!(
3351 target: "reddb::lifecycle",
3352 error = %err,
3353 "graceful_shutdown: local flush failed"
3354 );
3355 } else if let Err(lease_err) =
3356 self.assert_remote_write_allowed("shutdown/checkpoint_upload")
3357 {
3358 tracing::warn!(
3359 target: "reddb::serverless::lease",
3360 error = %lease_err,
3361 "graceful_shutdown: remote upload skipped — lease not held"
3362 );
3363 } else if let Err(err) = self.inner.db.upload_to_remote_backend() {
3364 tracing::error!(
3365 target: "reddb::lifecycle",
3366 error = %err,
3367 "graceful_shutdown: remote upload failed"
3368 );
3369 }
3370
3371 if backup_on_shutdown && self.inner.db.remote_backend.is_some() {
3376 match self.trigger_backup() {
3382 Ok(result) => {
3383 report.backup_uploaded = result.uploaded;
3384 }
3385 Err(err) => {
3386 tracing::warn!(
3387 target: "reddb::lifecycle",
3388 error = %err,
3389 "graceful_shutdown: final backup skipped"
3390 );
3391 }
3392 }
3393 }
3394
3395 let completed_ms = std::time::SystemTime::now()
3396 .duration_since(std::time::UNIX_EPOCH)
3397 .map(|d| d.as_millis() as u64)
3398 .unwrap_or(started_ms);
3399 report.completed_at_ms = completed_ms;
3400 report.duration_ms = completed_ms.saturating_sub(started_ms);
3401
3402 self.inner.lifecycle.finish_shutdown(report.clone());
3403 Ok(report)
3404 }
3405
3406 pub(crate) fn cdc_emit_no_cache_invalidate(
3412 &self,
3413 operation: crate::replication::cdc::ChangeOperation,
3414 collection: &str,
3415 entity_id: u64,
3416 entity_kind: &str,
3417 ) -> u64 {
3418 let lsn = self
3419 .inner
3420 .cdc
3421 .emit(operation, collection, entity_id, entity_kind);
3422
3423 if let Some(ref primary) = self.inner.db.replication {
3425 let store = self.inner.db.store();
3426 let entity = if operation == crate::replication::cdc::ChangeOperation::Delete {
3427 None
3428 } else {
3429 store.get(collection, EntityId::new(entity_id))
3430 };
3431 let record = ChangeRecord {
3432 lsn,
3433 timestamp: SystemTime::now()
3434 .duration_since(UNIX_EPOCH)
3435 .unwrap_or_default()
3436 .as_millis() as u64,
3437 operation,
3438 collection: collection.to_string(),
3439 entity_id,
3440 entity_kind: entity_kind.to_string(),
3441 entity_bytes: entity
3442 .as_ref()
3443 .map(|e| UnifiedStore::serialize_entity(e, store.format_version())),
3444 metadata: self.latest_metadata_for(collection, entity_id),
3445 refresh_records: None,
3446 };
3447 let encoded = record.encode();
3448 primary.wal_buffer.append(record.lsn, encoded.clone());
3449 if let Some(spool) = &primary.logical_wal_spool {
3450 let _ = spool.append(record.lsn, &encoded);
3451 }
3452 }
3453 lsn
3454 }
3455
3456 pub(crate) fn cdc_emit_insert_batch_no_cache_invalidate(
3457 &self,
3458 collection: &str,
3459 ids: &[EntityId],
3460 entity_kind: &str,
3461 ) -> Vec<u64> {
3462 if ids.is_empty() {
3463 return Vec::new();
3464 }
3465
3466 if self.inner.db.replication.is_none() {
3470 return self.inner.cdc.emit_batch_same_collection(
3471 crate::replication::cdc::ChangeOperation::Insert,
3472 collection,
3473 entity_kind,
3474 ids.iter().map(|id| id.raw()),
3475 );
3476 }
3477
3478 ids.iter()
3481 .map(|id| {
3482 self.cdc_emit_no_cache_invalidate(
3483 crate::replication::cdc::ChangeOperation::Insert,
3484 collection,
3485 id.raw(),
3486 entity_kind,
3487 )
3488 })
3489 .collect()
3490 }
3491
3492 pub fn cdc_emit(
3493 &self,
3494 operation: crate::replication::cdc::ChangeOperation,
3495 collection: &str,
3496 entity_id: u64,
3497 entity_kind: &str,
3498 ) -> u64 {
3499 let lsn = self
3500 .inner
3501 .cdc
3502 .emit(operation, collection, entity_id, entity_kind);
3503 self.invalidate_result_cache_for_table(collection);
3509
3510 if let Some(ref primary) = self.inner.db.replication {
3512 let store = self.inner.db.store();
3513 let entity = if operation == crate::replication::cdc::ChangeOperation::Delete {
3514 None
3515 } else {
3516 store.get(collection, EntityId::new(entity_id))
3517 };
3518 let record = ChangeRecord {
3519 lsn,
3520 timestamp: SystemTime::now()
3521 .duration_since(UNIX_EPOCH)
3522 .unwrap_or_default()
3523 .as_millis() as u64,
3524 operation,
3525 collection: collection.to_string(),
3526 entity_id,
3527 entity_kind: entity_kind.to_string(),
3528 entity_bytes: entity
3529 .as_ref()
3530 .map(|entity| UnifiedStore::serialize_entity(entity, store.format_version())),
3531 metadata: self.latest_metadata_for(collection, entity_id),
3532 refresh_records: None,
3533 };
3534 let encoded = record.encode();
3535 primary.wal_buffer.append(record.lsn, encoded.clone());
3536 if let Some(spool) = &primary.logical_wal_spool {
3537 let _ = spool.append(record.lsn, &encoded);
3538 }
3539 }
3540 lsn
3541 }
3542
3543 pub(crate) fn cdc_emit_kv(
3544 &self,
3545 operation: crate::replication::cdc::ChangeOperation,
3546 collection: &str,
3547 key: &str,
3548 entity_id: u64,
3549 before: Option<crate::json::Value>,
3550 after: Option<crate::json::Value>,
3551 ) -> u64 {
3552 let lsn = self
3553 .inner
3554 .cdc
3555 .emit_kv(operation, collection, key, entity_id, before, after);
3556 self.inner.kv_stats.incr_watch_events_emitted();
3557 self.invalidate_result_cache_for_table(collection);
3558 lsn
3559 }
3560
3561 pub(crate) fn record_kv_watch_event(
3562 &self,
3563 operation: crate::replication::cdc::ChangeOperation,
3564 collection: &str,
3565 key: &str,
3566 entity_id: u64,
3567 before: Option<crate::json::Value>,
3568 after: Option<crate::json::Value>,
3569 ) {
3570 if self.current_xid().is_some() {
3571 let conn_id = current_connection_id();
3572 let event = crate::replication::cdc::KvWatchEvent {
3573 collection: collection.to_string(),
3574 key: key.to_string(),
3575 op: operation,
3576 before,
3577 after,
3578 lsn: 0,
3579 committed_at: 0,
3580 dropped_event_count: 0,
3581 };
3582 self.inner
3583 .pending_kv_watch_events
3584 .write()
3585 .entry(conn_id)
3586 .or_default()
3587 .push(event);
3588 return;
3589 }
3590
3591 self.cdc_emit_kv(operation, collection, key, entity_id, before, after);
3592 }
3593
3594 pub(crate) fn cdc_emit_prebuilt(
3595 &self,
3596 operation: crate::replication::cdc::ChangeOperation,
3597 collection: &str,
3598 entity: &UnifiedEntity,
3599 entity_kind: &str,
3600 metadata: Option<&crate::storage::Metadata>,
3601 invalidate_cache: bool,
3602 ) -> u64 {
3603 self.cdc_emit_prebuilt_with_columns(
3604 operation,
3605 collection,
3606 entity,
3607 entity_kind,
3608 metadata,
3609 invalidate_cache,
3610 None,
3611 )
3612 }
3613
3614 pub(crate) fn cdc_emit_prebuilt_with_columns(
3621 &self,
3622 operation: crate::replication::cdc::ChangeOperation,
3623 collection: &str,
3624 entity: &UnifiedEntity,
3625 entity_kind: &str,
3626 metadata: Option<&crate::storage::Metadata>,
3627 invalidate_cache: bool,
3628 changed_columns: Option<Vec<String>>,
3629 ) -> u64 {
3630 if invalidate_cache {
3631 self.invalidate_result_cache();
3632 }
3633
3634 let public_id = entity.logical_id().raw();
3635 let lsn = self.inner.cdc.emit_with_columns(
3636 operation,
3637 collection,
3638 public_id,
3639 entity_kind,
3640 changed_columns,
3641 );
3642
3643 if let Some(ref primary) = self.inner.db.replication {
3644 let store = self.inner.db.store();
3645 let record = ChangeRecord {
3646 lsn,
3647 timestamp: SystemTime::now()
3648 .duration_since(UNIX_EPOCH)
3649 .unwrap_or_default()
3650 .as_millis() as u64,
3651 operation,
3652 collection: collection.to_string(),
3653 entity_id: entity.id.raw(),
3654 entity_kind: entity_kind.to_string(),
3655 entity_bytes: Some(UnifiedStore::serialize_entity(
3656 entity,
3657 store.format_version(),
3658 )),
3659 metadata: metadata
3660 .map(metadata_to_json)
3661 .or_else(|| self.latest_metadata_for(collection, entity.id.raw())),
3662 refresh_records: None,
3663 };
3664 let encoded = record.encode();
3665 primary.wal_buffer.append(record.lsn, encoded.clone());
3666 if let Some(spool) = &primary.logical_wal_spool {
3667 let _ = spool.append(record.lsn, &encoded);
3668 }
3669 }
3670
3671 lsn
3672 }
3673
3674 pub(crate) fn cdc_emit_prebuilt_batch<'a, I>(
3675 &self,
3676 operation: crate::replication::cdc::ChangeOperation,
3677 entity_kind: &str,
3678 items: I,
3679 invalidate_cache: bool,
3680 ) where
3681 I: IntoIterator<
3682 Item = (
3683 &'a str,
3684 &'a UnifiedEntity,
3685 Option<&'a crate::storage::Metadata>,
3686 ),
3687 >,
3688 {
3689 let items: Vec<(&str, &UnifiedEntity, Option<&crate::storage::Metadata>)> =
3690 items.into_iter().collect();
3691 if items.is_empty() {
3692 return;
3693 }
3694
3695 if invalidate_cache {
3696 self.invalidate_result_cache();
3697 }
3698
3699 for (collection, entity, metadata) in items {
3700 self.cdc_emit_prebuilt(operation, collection, entity, entity_kind, metadata, false);
3701 }
3702 }
3703
3704 fn run_replica_loop(&self, primary_addr: String) {
3705 let endpoint = if primary_addr.starts_with("http") {
3706 primary_addr
3707 } else {
3708 format!("http://{primary_addr}")
3709 };
3710 let poll_ms = self.inner.db.options().replication.poll_interval_ms;
3711 let max_count = self.inner.db.options().replication.max_batch_size;
3712 let mut since_lsn = self.config_u64("red.replication.last_applied_lsn", 0);
3713
3714 let runtime = match tokio::runtime::Builder::new_current_thread()
3715 .enable_all()
3716 .build()
3717 {
3718 Ok(runtime) => runtime,
3719 Err(_) => return,
3720 };
3721
3722 runtime.block_on(async move {
3723 use crate::grpc::proto::red_db_client::RedDbClient;
3724 use crate::grpc::proto::JsonPayloadRequest;
3725
3726 let mut client = loop {
3727 match RedDbClient::connect(endpoint.clone()).await {
3728 Ok(client) => {
3729 self.persist_replication_health("connecting", "", None, None);
3730 break client;
3731 }
3732 Err(_) => {
3733 self.persist_replication_health(
3734 "connecting",
3735 "waiting for primary connection",
3736 None,
3737 None,
3738 );
3739 std::thread::sleep(std::time::Duration::from_millis(poll_ms.max(250)))
3740 }
3741 }
3742 };
3743
3744 let applier = crate::replication::logical::LogicalChangeApplier::new(since_lsn);
3749
3750 loop {
3751 let payload = crate::json!({
3752 "since_lsn": since_lsn,
3753 "max_count": max_count
3754 });
3755 let request = tonic::Request::new(JsonPayloadRequest {
3756 payload_json: crate::json::to_string(&payload)
3757 .unwrap_or_else(|_| "{}".to_string()),
3758 });
3759
3760 if let Ok(response) = client.pull_wal_records(request).await {
3761 if let Ok(value) =
3762 crate::json::from_str::<crate::json::Value>(&response.into_inner().payload)
3763 {
3764 let current_lsn =
3765 value.get("current_lsn").and_then(crate::json::Value::as_u64);
3766 let oldest_available_lsn = value
3767 .get("oldest_available_lsn")
3768 .and_then(crate::json::Value::as_u64);
3769 if since_lsn > 0
3770 && oldest_available_lsn
3771 .map(|oldest| oldest > since_lsn.saturating_add(1))
3772 .unwrap_or(false)
3773 {
3774 self.persist_replication_health(
3775 "stalled_gap",
3776 "replica is behind the oldest logical WAL available on primary; re-bootstrap required",
3777 current_lsn,
3778 oldest_available_lsn,
3779 );
3780 std::thread::sleep(std::time::Duration::from_millis(poll_ms.max(250)));
3781 continue;
3782 }
3783 if let Some(records) =
3784 value.get("records").and_then(crate::json::Value::as_array)
3785 {
3786 for record in records {
3787 let Some(data_hex) =
3788 record.get("data").and_then(crate::json::Value::as_str)
3789 else {
3790 continue;
3791 };
3792 let Ok(data) = hex::decode(data_hex) else {
3793 self.inner.replica_apply_metrics.record(
3794 crate::replication::logical::ApplyErrorKind::Decode,
3795 );
3796 self.persist_replication_health(
3797 "apply_error",
3798 "failed to decode WAL record hex payload",
3799 current_lsn,
3800 oldest_available_lsn,
3801 );
3802 continue;
3803 };
3804 let Ok(change) = ChangeRecord::decode(&data) else {
3805 self.inner.replica_apply_metrics.record(
3806 crate::replication::logical::ApplyErrorKind::Decode,
3807 );
3808 self.persist_replication_health(
3809 "apply_error",
3810 "failed to decode logical WAL record",
3811 current_lsn,
3812 oldest_available_lsn,
3813 );
3814 continue;
3815 };
3816 match applier.apply(
3817 self.inner.db.as_ref(),
3818 &change,
3819 ApplyMode::Replica,
3820 ) {
3821 Ok(crate::replication::logical::ApplyOutcome::Applied) => {
3822 self.invalidate_result_cache_for_table(&change.collection);
3823 since_lsn = since_lsn.max(change.lsn);
3824 self.persist_replica_lsn(since_lsn);
3825 }
3826 Ok(_) => {
3827 }
3829 Err(err) => {
3830 self.inner.replica_apply_metrics.record(err.kind());
3831 match &err {
3840 crate::replication::logical::LogicalApplyError::Divergence { lsn, expected: _, got: _ } => {
3841 crate::telemetry::operator_event::OperatorEvent::Divergence {
3842 peer: "primary".to_string(),
3843 leader_lsn: *lsn,
3844 follower_lsn: since_lsn,
3845 }
3846 .emit_global();
3847 }
3848 crate::replication::logical::LogicalApplyError::Gap { last, next } => {
3849 crate::telemetry::operator_event::OperatorEvent::ReplicationBroken {
3850 peer: "primary".to_string(),
3851 reason: format!("stalled gap last={last} next={next}"),
3852 }
3853 .emit_global();
3854 }
3855 _ => {}
3856 }
3857 let kind = match &err {
3858 crate::replication::logical::LogicalApplyError::Gap { .. } => "stalled_gap",
3859 crate::replication::logical::LogicalApplyError::Divergence { .. } => "divergence",
3860 _ => "apply_error",
3861 };
3862 self.persist_replication_health(
3863 kind,
3864 &format!("replica apply rejected: {err}"),
3865 current_lsn,
3866 oldest_available_lsn,
3867 );
3868 break;
3879 }
3880 }
3881 }
3882 }
3883 self.persist_replication_health(
3884 "healthy",
3885 "",
3886 current_lsn,
3887 oldest_available_lsn,
3888 );
3889 } else {
3890 self.persist_replication_health(
3891 "apply_error",
3892 "failed to parse pull_wal_records response",
3893 None,
3894 None,
3895 );
3896 }
3897 } else {
3898 self.persist_replication_health(
3899 "connecting",
3900 "primary pull_wal_records request failed",
3901 None,
3902 None,
3903 );
3904 }
3905
3906 std::thread::sleep(std::time::Duration::from_millis(poll_ms));
3907 }
3908 });
3909 }
3910
3911 pub fn cdc_poll(
3913 &self,
3914 since_lsn: u64,
3915 max_count: usize,
3916 ) -> Vec<crate::replication::cdc::ChangeEvent> {
3917 self.inner.cdc.poll(since_lsn, max_count)
3918 }
3919
3920 pub fn cdc_current_lsn(&self) -> u64 {
3924 self.inner.cdc.current_lsn()
3925 }
3926
3927 pub fn kv_watch_events_since(
3928 &self,
3929 collection: &str,
3930 key: &str,
3931 since_lsn: u64,
3932 max_count: usize,
3933 ) -> Vec<crate::replication::cdc::KvWatchEvent> {
3934 self.inner
3935 .cdc
3936 .poll(since_lsn, max_count)
3937 .into_iter()
3938 .filter_map(|event| event.kv)
3939 .filter(|event| event.collection == collection && event.key == key)
3940 .collect()
3941 }
3942
3943 pub fn kv_watch_events_since_prefix(
3944 &self,
3945 collection: &str,
3946 prefix: &str,
3947 since_lsn: u64,
3948 max_count: usize,
3949 ) -> Vec<crate::replication::cdc::KvWatchEvent> {
3950 self.inner
3951 .cdc
3952 .poll(since_lsn, max_count)
3953 .into_iter()
3954 .filter_map(|event| event.kv)
3955 .filter(|event| event.collection == collection && event.key.starts_with(prefix))
3956 .collect()
3957 }
3958
3959 pub(crate) fn kv_watch_subscribe<'a>(
3960 &'a self,
3961 collection: impl Into<String>,
3962 key: impl Into<String>,
3963 from_lsn: Option<u64>,
3964 ) -> crate::runtime::kv_watch::KvWatchStream<'a> {
3965 crate::runtime::kv_watch::KvWatchStream::subscribe(
3966 &self.inner.cdc,
3967 &self.inner.kv_stats,
3968 collection,
3969 key,
3970 from_lsn,
3971 self.kv_watch_idle_timeout_ms(),
3972 )
3973 }
3974
3975 pub(crate) fn kv_watch_subscribe_prefix<'a>(
3976 &'a self,
3977 collection: impl Into<String>,
3978 prefix: impl Into<String>,
3979 from_lsn: Option<u64>,
3980 ) -> crate::runtime::kv_watch::KvWatchStream<'a> {
3981 crate::runtime::kv_watch::KvWatchStream::subscribe_prefix(
3982 &self.inner.cdc,
3983 &self.inner.kv_stats,
3984 collection,
3985 prefix,
3986 from_lsn,
3987 self.kv_watch_idle_timeout_ms(),
3988 )
3989 }
3990
3991 pub(crate) fn kv_watch_idle_timeout_ms(&self) -> u64 {
3992 self.config_u64("red.config.kv.watch.idle_timeout_ms", 60_000)
3993 }
3994
3995 pub fn backup_status(&self) -> crate::replication::scheduler::BackupStatus {
3997 self.inner.backup_scheduler.status()
3998 }
3999
4000 pub fn result_blob_cache(&self) -> &crate::storage::cache::BlobCache {
4010 &self.inner.result_blob_cache
4011 }
4012
4013 pub fn primary_replica_snapshots(&self) -> Vec<crate::replication::primary::ReplicaState> {
4017 self.inner
4018 .db
4019 .replication
4020 .as_ref()
4021 .map(|repl| repl.replica_snapshots())
4022 .unwrap_or_default()
4023 }
4024
4025 pub fn commit_policy(&self) -> crate::replication::CommitPolicy {
4030 crate::replication::CommitPolicy::from_env()
4031 }
4032
4033 pub fn replica_apply_error_counts(
4038 &self,
4039 ) -> [(crate::replication::logical::ApplyErrorKind, u64); 4] {
4040 self.inner.replica_apply_metrics.snapshot()
4041 }
4042
4043 pub fn quota_bucket(&self) -> &crate::runtime::quota_bucket::QuotaBucket {
4046 &self.inner.quota_bucket
4047 }
4048
4049 pub fn commit_waiter_snapshot(&self) -> Vec<(String, u64)> {
4053 self.inner
4054 .db
4055 .replication
4056 .as_ref()
4057 .map(|repl| repl.commit_waiter.snapshot())
4058 .unwrap_or_default()
4059 }
4060
4061 pub fn commit_waiter_metrics_snapshot(&self) -> (u64, u64, u64, u64) {
4064 self.inner
4065 .db
4066 .replication
4067 .as_ref()
4068 .map(|repl| repl.commit_waiter.metrics_snapshot())
4069 .unwrap_or((0, 0, 0, 0))
4070 }
4071
4072 pub fn await_replica_acks(
4082 &self,
4083 target_lsn: u64,
4084 count: u32,
4085 timeout: std::time::Duration,
4086 ) -> crate::replication::AwaitOutcome {
4087 match &self.inner.db.replication {
4088 Some(repl) => repl.commit_waiter.await_acks(target_lsn, count, timeout),
4089 None => {
4090 crate::replication::AwaitOutcome::NotRequired
4094 }
4095 }
4096 }
4097
4098 pub fn enforce_commit_policy(
4112 &self,
4113 post_lsn: u64,
4114 ) -> RedDBResult<crate::replication::AwaitOutcome> {
4115 let n = match self.commit_policy() {
4116 crate::replication::CommitPolicy::AckN(n) if n > 0 => n,
4117 _ => return Ok(crate::replication::AwaitOutcome::NotRequired),
4118 };
4119 let timeout_ms = std::env::var("RED_REPLICATION_ACK_TIMEOUT_MS")
4120 .ok()
4121 .and_then(|v| v.parse::<u64>().ok())
4122 .unwrap_or(5_000);
4123 let outcome =
4124 self.await_replica_acks(post_lsn, n, std::time::Duration::from_millis(timeout_ms));
4125 if let crate::replication::AwaitOutcome::TimedOut { observed, required } = &outcome {
4126 tracing::warn!(
4127 target: "reddb::commit",
4128 post_lsn,
4129 observed = *observed,
4130 required = *required,
4131 timeout_ms,
4132 "ack_n: timed out waiting for replicas"
4133 );
4134 let fail = std::env::var("RED_COMMIT_FAIL_ON_TIMEOUT")
4135 .ok()
4136 .map(|v| {
4137 let t = v.trim();
4138 t.eq_ignore_ascii_case("true") || t == "1" || t.eq_ignore_ascii_case("yes")
4139 })
4140 .unwrap_or(false);
4141 if fail {
4142 return Err(RedDBError::ReadOnly(format!(
4143 "commit policy timed out at lsn {post_lsn}: observed={observed} required={required} (RED_COMMIT_FAIL_ON_TIMEOUT=true)"
4144 )));
4145 }
4146 }
4147 Ok(outcome)
4148 }
4149
4150 pub fn encryption_at_rest_status(&self) -> (&'static str, Option<String>) {
4158 match crate::crypto::page_encryption::key_from_env() {
4159 Ok(Some(_)) => ("enabled", None),
4160 Ok(None) => ("disabled", None),
4161 Err(err) => ("error", Some(err)),
4162 }
4163 }
4164
4165 pub fn replica_apply_health(&self) -> Option<String> {
4171 let state = self.config_string("red.replication.state", "");
4172 if state.is_empty() {
4173 None
4174 } else {
4175 Some(state)
4176 }
4177 }
4178
4179 pub fn wal_archive_progress(&self) -> (u64, u64) {
4184 let current_lsn = self
4185 .inner
4186 .db
4187 .replication
4188 .as_ref()
4189 .map(|repl| {
4190 repl.logical_wal_spool
4191 .as_ref()
4192 .map(|spool| spool.current_lsn())
4193 .unwrap_or_else(|| repl.wal_buffer.current_lsn())
4194 })
4195 .unwrap_or_else(|| self.inner.cdc.current_lsn());
4196 let last_archived_lsn = self.config_u64("red.config.timeline.last_archived_lsn", 0);
4197 (current_lsn, last_archived_lsn)
4198 }
4199
4200 pub fn trigger_backup(&self) -> RedDBResult<crate::replication::scheduler::BackupResult> {
4202 self.check_write(crate::runtime::write_gate::WriteKind::Backup)?;
4203 self.assert_remote_write_allowed("admin/backup")?;
4208 let started = std::time::Instant::now();
4209 let snapshot = self.create_snapshot()?;
4210 let mut uploaded = false;
4211
4212 if let (Some(backend), Some(path)) = (&self.inner.db.remote_backend, self.inner.db.path()) {
4213 let default_snapshot_prefix = self.inner.db.options().default_snapshot_prefix();
4214 let default_wal_prefix = self.inner.db.options().default_wal_archive_prefix();
4215 let default_head_key = self.inner.db.options().default_backup_head_key();
4216 let snapshot_prefix = self.config_string(
4217 "red.config.backup.snapshot_prefix",
4218 &default_snapshot_prefix,
4219 );
4220 let wal_prefix =
4221 self.config_string("red.config.wal.archive.prefix", &default_wal_prefix);
4222 let head_key = self.config_string("red.config.backup.head_key", &default_head_key);
4223 let timeline_id = self.config_string("red.config.timeline.id", "main");
4224 let snapshot_key = crate::storage::wal::archive_snapshot(
4225 backend.as_ref(),
4226 path,
4227 snapshot.snapshot_id,
4228 &snapshot_prefix,
4229 )
4230 .map_err(|err| RedDBError::Internal(err.to_string()))?;
4231 let current_lsn = self
4232 .inner
4233 .db
4234 .replication
4235 .as_ref()
4236 .map(|repl| {
4237 repl.logical_wal_spool
4238 .as_ref()
4239 .map(|spool| spool.current_lsn())
4240 .unwrap_or_else(|| repl.wal_buffer.current_lsn())
4241 })
4242 .unwrap_or_else(|| self.inner.cdc.current_lsn());
4243 let last_archived_lsn = self.config_u64("red.config.timeline.last_archived_lsn", 0);
4244 let snapshot_sha256 =
4250 crate::storage::wal::SnapshotManifest::compute_snapshot_sha256(path)
4251 .map_err(|err| {
4252 tracing::warn!(
4253 target: "reddb::backup",
4254 error = %err,
4255 snapshot_id = snapshot.snapshot_id,
4256 "snapshot hash failed; manifest will lack checksum"
4257 );
4258 })
4259 .ok();
4260 let manifest = crate::storage::wal::SnapshotManifest {
4261 timeline_id: timeline_id.clone(),
4262 snapshot_key: snapshot_key.clone(),
4263 snapshot_id: snapshot.snapshot_id,
4264 snapshot_time: snapshot.created_at_unix_ms as u64,
4265 base_lsn: current_lsn,
4266 schema_version: crate::api::REDDB_FORMAT_VERSION,
4267 format_version: crate::api::REDDB_FORMAT_VERSION,
4268 snapshot_sha256,
4269 };
4270 crate::storage::wal::publish_snapshot_manifest(backend.as_ref(), &manifest)
4271 .map_err(|err| RedDBError::Internal(err.to_string()))?;
4272
4273 let prev_segment_hash = self.config_string("red.config.timeline.last_segment_hash", "");
4280 let prev_hash_arg = if prev_segment_hash.is_empty() {
4281 None
4282 } else {
4283 Some(prev_segment_hash)
4284 };
4285
4286 let archived_lsn = if let Some(primary) = &self.inner.db.replication {
4287 let oldest = primary
4288 .logical_wal_spool
4289 .as_ref()
4290 .and_then(|spool| spool.oldest_lsn().ok().flatten())
4291 .or_else(|| primary.wal_buffer.oldest_lsn())
4292 .unwrap_or(last_archived_lsn);
4293 if last_archived_lsn > 0 && last_archived_lsn < oldest.saturating_sub(1) {
4294 return Err(RedDBError::Internal(format!(
4295 "logical WAL gap detected: last_archived_lsn={last_archived_lsn}, oldest_available_lsn={oldest}"
4296 )));
4297 }
4298 let records = if let Some(spool) = &primary.logical_wal_spool {
4299 spool
4300 .read_since(last_archived_lsn, usize::MAX)
4301 .map_err(|err| RedDBError::Internal(err.to_string()))?
4302 } else {
4303 primary.wal_buffer.read_since(last_archived_lsn, usize::MAX)
4304 };
4305 if let Some(meta) = crate::storage::wal::archive_change_records(
4306 backend.as_ref(),
4307 &wal_prefix,
4308 &records,
4309 prev_hash_arg,
4310 )
4311 .map_err(|err| RedDBError::Internal(err.to_string()))?
4312 {
4313 if let Some(spool) = &primary.logical_wal_spool {
4314 let _ = spool.prune_through(meta.lsn_end);
4315 }
4316 if let Some(sha) = &meta.sha256 {
4322 self.inner.db.store().set_config_tree(
4323 "red.config.timeline",
4324 &crate::json!({ "last_segment_hash": sha }),
4325 );
4326 }
4327 meta.lsn_end
4328 } else {
4329 last_archived_lsn
4330 }
4331 } else {
4332 last_archived_lsn
4333 };
4334
4335 let head = crate::storage::wal::BackupHead {
4336 timeline_id,
4337 snapshot_key,
4338 snapshot_id: snapshot.snapshot_id,
4339 snapshot_time: snapshot.created_at_unix_ms as u64,
4340 current_lsn,
4341 last_archived_lsn: archived_lsn,
4342 wal_prefix,
4343 };
4344 crate::storage::wal::publish_backup_head(backend.as_ref(), &head_key, &head)
4345 .map_err(|err| RedDBError::Internal(err.to_string()))?;
4346 self.inner.db.store().set_config_tree(
4347 "red.config.timeline",
4348 &crate::json!({
4349 "last_archived_lsn": archived_lsn,
4350 "id": head.timeline_id
4351 }),
4352 );
4353
4354 if let Err(err) = crate::storage::wal::publish_unified_manifest_for_prefix(
4362 backend.as_ref(),
4363 &snapshot_prefix,
4364 ) {
4365 tracing::warn!(
4366 target: "reddb::backup",
4367 error = %err,
4368 snapshot_prefix = %snapshot_prefix,
4369 "unified MANIFEST.json refresh failed; per-artifact sidecars unaffected"
4370 );
4371 }
4372
4373 match self.commit_policy() {
4385 crate::replication::CommitPolicy::AckN(n) if n > 0 => {
4386 let timeout = std::env::var("RED_REPLICATION_ACK_TIMEOUT_MS")
4387 .ok()
4388 .and_then(|v| v.parse::<u64>().ok())
4389 .unwrap_or(5_000);
4390 let outcome = self.await_replica_acks(
4391 archived_lsn,
4392 n,
4393 std::time::Duration::from_millis(timeout),
4394 );
4395 match outcome {
4396 crate::replication::AwaitOutcome::Reached(count) => {
4397 tracing::debug!(
4398 target: "reddb::backup",
4399 archived_lsn,
4400 n,
4401 count,
4402 "ack_n: replicas synced before backup return"
4403 );
4404 }
4405 crate::replication::AwaitOutcome::TimedOut { observed, required } => {
4406 tracing::warn!(
4407 target: "reddb::backup",
4408 archived_lsn,
4409 observed,
4410 required,
4411 timeout_ms = timeout,
4412 "ack_n: timed out waiting for replicas; backup uploaded but DR posture degraded"
4413 );
4414 }
4415 crate::replication::AwaitOutcome::NotRequired => {}
4416 }
4417 }
4418 _ => {} }
4420
4421 if self.config_bool("red.config.backup.include_blob_cache", false) {
4433 let blob_cache_prefix = self.config_string(
4434 "red.config.backup.blob_cache_prefix",
4435 &format!("{snapshot_prefix}blob_cache/"),
4436 );
4437 if let Some(l2_path) = self.inner.result_blob_cache.l2_path() {
4438 match crate::storage::cache::archive_blob_cache_l2(
4439 backend.as_ref(),
4440 l2_path,
4441 &blob_cache_prefix,
4442 ) {
4443 Ok(count) => {
4444 tracing::info!(
4445 target: "reddb::backup",
4446 files_uploaded = count,
4447 blob_cache_prefix = %blob_cache_prefix,
4448 "include_blob_cache: archived L2 directory"
4449 );
4450 }
4451 Err(err) => {
4452 tracing::warn!(
4453 target: "reddb::backup",
4454 error = %err,
4455 blob_cache_prefix = %blob_cache_prefix,
4456 "include_blob_cache: L2 archive failed; backup proceeding (cache is derived state)"
4457 );
4458 }
4459 }
4460 } else {
4461 tracing::debug!(
4462 target: "reddb::backup",
4463 "include_blob_cache=true but no L2 path configured; nothing to archive"
4464 );
4465 }
4466 }
4467
4468 uploaded = true;
4469 }
4470
4471 Ok(crate::replication::scheduler::BackupResult {
4472 snapshot_id: snapshot.snapshot_id,
4473 uploaded,
4474 duration_ms: started.elapsed().as_millis() as u64,
4475 timestamp: snapshot.created_at_unix_ms as u64,
4476 })
4477 }
4478
4479 pub fn acquire(&self) -> RedDBResult<RuntimeConnection> {
4480 let mut pool = self
4481 .inner
4482 .pool
4483 .lock()
4484 .map_err(|e| RedDBError::Internal(format!("connection pool lock poisoned: {e}")))?;
4485 if pool.active >= self.inner.pool_config.max_connections {
4486 return Err(RedDBError::Internal(
4487 "connection pool exhausted".to_string(),
4488 ));
4489 }
4490
4491 let id = if let Some(id) = pool.idle.pop() {
4492 id
4493 } else {
4494 let id = pool.next_id;
4495 pool.next_id += 1;
4496 id
4497 };
4498 pool.active += 1;
4499 pool.total_checkouts += 1;
4500 drop(pool);
4501
4502 Ok(RuntimeConnection {
4503 id,
4504 inner: Arc::clone(&self.inner),
4505 })
4506 }
4507
4508 pub fn checkpoint(&self) -> RedDBResult<()> {
4509 self.inner.db.flush_local_only().map_err(|err| {
4514 let msg = err.to_string();
4519 crate::telemetry::operator_event::OperatorEvent::CheckpointFailed {
4520 lsn: 0,
4521 error: msg.clone(),
4522 }
4523 .emit_global();
4524 crate::telemetry::operator_event::OperatorEvent::WalFsyncFailed {
4525 path: "<flush_local_only>".to_string(),
4526 error: msg.clone(),
4527 }
4528 .emit_global();
4529 RedDBError::Engine(msg)
4530 })?;
4531 if let Err(err) = self.assert_remote_write_allowed("checkpoint") {
4532 tracing::warn!(
4533 target: "reddb::serverless::lease",
4534 error = %err,
4535 "checkpoint: skipping remote upload — lease not held"
4536 );
4537 return Ok(());
4538 }
4539 self.inner
4540 .db
4541 .upload_to_remote_backend()
4542 .map_err(|err| RedDBError::Engine(err.to_string()))
4543 }
4544
4545 pub(crate) fn assert_remote_write_allowed(&self, action: &str) -> RedDBResult<()> {
4552 if self.inner.db.remote_backend.is_none() {
4553 return Ok(());
4554 }
4555 match self.inner.write_gate.lease_state() {
4556 crate::runtime::write_gate::LeaseGateState::NotHeld => {
4557 self.inner.audit_log.record(
4558 action,
4559 "system",
4560 "remote_backend",
4561 "err: writer lease not held",
4562 crate::json::Value::Null,
4563 );
4564 Err(RedDBError::ReadOnly(format!(
4565 "writer lease not held — {action} blocked (serverless fence)"
4566 )))
4567 }
4568 _ => Ok(()),
4569 }
4570 }
4571
4572 pub fn run_maintenance(&self) -> RedDBResult<()> {
4573 self.inner
4574 .db
4575 .run_maintenance()
4576 .map_err(|err| RedDBError::Internal(err.to_string()))
4577 }
4578
4579 pub fn scan_collection(
4580 &self,
4581 collection: &str,
4582 cursor: Option<ScanCursor>,
4583 limit: usize,
4584 ) -> RedDBResult<ScanPage> {
4585 let store = self.inner.db.store();
4586 let manager = store
4587 .get_collection(collection)
4588 .ok_or_else(|| RedDBError::NotFound(collection.to_string()))?;
4589
4590 let mut entities = manager.query_all(|_| true);
4591 entities.sort_by_key(|entity| entity.id.raw());
4592
4593 let offset = cursor.map(|cursor| cursor.offset).unwrap_or(0);
4594 let total = entities.len();
4595 let end = total.min(offset.saturating_add(limit.max(1)));
4596 let items = if offset >= total {
4597 Vec::new()
4598 } else {
4599 entities[offset..end].to_vec()
4600 };
4601 let next = (end < total).then_some(ScanCursor { offset: end });
4602
4603 Ok(ScanPage {
4604 collection: collection.to_string(),
4605 items,
4606 next,
4607 total,
4608 })
4609 }
4610
4611 pub fn catalog(&self) -> CatalogModelSnapshot {
4612 self.inner.db.catalog_model_snapshot()
4613 }
4614
4615 pub fn catalog_consistency_report(&self) -> crate::catalog::CatalogConsistencyReport {
4616 self.inner.db.catalog_consistency_report()
4617 }
4618
4619 pub fn catalog_attention_summary(&self) -> CatalogAttentionSummary {
4620 crate::catalog::attention_summary(&self.catalog())
4621 }
4622
4623 pub fn collection_attention(&self) -> Vec<CollectionDescriptor> {
4624 crate::catalog::collection_attention(&self.catalog())
4625 }
4626
4627 pub fn index_attention(&self) -> Vec<CatalogIndexStatus> {
4628 crate::catalog::index_attention(&self.catalog())
4629 }
4630
4631 pub fn graph_projection_attention(&self) -> Vec<CatalogGraphProjectionStatus> {
4632 crate::catalog::graph_projection_attention(&self.catalog())
4633 }
4634
4635 pub fn analytics_job_attention(&self) -> Vec<CatalogAnalyticsJobStatus> {
4636 crate::catalog::analytics_job_attention(&self.catalog())
4637 }
4638
4639 pub fn stats(&self) -> RuntimeStats {
4640 let pool = runtime_pool_lock(self);
4641 RuntimeStats {
4642 active_connections: pool.active,
4643 idle_connections: pool.idle.len(),
4644 total_checkouts: pool.total_checkouts,
4645 paged_mode: self.inner.db.is_paged(),
4646 started_at_unix_ms: self.inner.started_at_unix_ms,
4647 store: self.inner.db.stats(),
4648 system: SystemInfo::collect(),
4649 result_blob_cache: self.inner.result_blob_cache.stats(),
4650 kv: self.inner.kv_stats.snapshot(),
4651 metrics_ingest: self.inner.metrics_ingest_stats.snapshot(),
4652 }
4653 }
4654
4655 pub(crate) fn record_metrics_ingest(
4656 &self,
4657 accepted_samples: u64,
4658 accepted_series: u64,
4659 rejected_samples: u64,
4660 rejected_series: u64,
4661 ) {
4662 self.inner.metrics_ingest_stats.record(
4663 accepted_samples,
4664 accepted_series,
4665 rejected_samples,
4666 rejected_series,
4667 );
4668 }
4669
4670 pub(crate) fn record_metrics_cardinality_budget_rejections(&self, rejected_series: u64) {
4671 self.inner
4672 .metrics_ingest_stats
4673 .record_cardinality_budget_rejections(rejected_series);
4674 }
4675
4676 pub(crate) fn record_metrics_tenant_activity(
4677 &self,
4678 tenant: &str,
4679 namespace: &str,
4680 operation: &str,
4681 ) {
4682 self.inner
4683 .metrics_tenant_activity_stats
4684 .record(tenant, namespace, operation);
4685 }
4686
4687 pub(crate) fn metrics_tenant_activity_snapshot(
4688 &self,
4689 ) -> Vec<crate::runtime::MetricsTenantActivityStats> {
4690 self.inner.metrics_tenant_activity_stats.snapshot()
4691 }
4692
4693 pub fn execute_query_with_scope(
4707 &self,
4708 query: &str,
4709 scope: crate::runtime::within_clause::ScopeOverride,
4710 ) -> RedDBResult<RuntimeQueryResult> {
4711 if scope.is_empty() {
4712 return self.execute_query(query);
4713 }
4714 let _scope_guard = ScopeOverrideGuard::install(scope);
4715 self.execute_query(query)
4716 }
4717
4718 pub fn execute_query(&self, query: &str) -> RedDBResult<RuntimeQueryResult> {
4727 let started = std::time::Instant::now();
4728 let result = self.execute_query_inner(query);
4729 let elapsed_ms = started.elapsed().as_millis() as u64;
4730
4731 let scope = self.ai_scope();
4736 let kind = match result
4737 .as_ref()
4738 .map(|r| r.statement_type)
4739 .unwrap_or("select")
4740 {
4741 "select" => crate::telemetry::slow_query_logger::QueryKind::Select,
4742 "insert" => crate::telemetry::slow_query_logger::QueryKind::Insert,
4743 "update" => crate::telemetry::slow_query_logger::QueryKind::Update,
4744 "delete" => crate::telemetry::slow_query_logger::QueryKind::Delete,
4745 _ => crate::telemetry::slow_query_logger::QueryKind::Internal,
4746 };
4747 self.inner
4753 .slow_query_logger
4754 .record(kind, elapsed_ms, query.to_string(), &scope);
4755
4756 result
4757 }
4758
4759 #[inline(never)]
4760 fn execute_query_inner(&self, query: &str) -> RedDBResult<RuntimeQueryResult> {
4761 if !has_scope_override_active()
4772 && !query.trim_start().starts_with("WITHIN")
4773 && !query.trim_start().starts_with("within")
4774 && !self
4775 .inner
4776 .tx_contexts
4777 .read()
4778 .contains_key(¤t_connection_id())
4779 {
4780 if let Some(result) = self.try_fast_entity_lookup(query) {
4781 return result;
4782 }
4783 }
4784
4785 match crate::runtime::within_clause::try_strip_within_prefix(query) {
4792 Ok(Some((scope, inner))) => {
4793 let _scope_guard = ScopeOverrideGuard::install(scope);
4794 return self.execute_query_inner(inner);
4799 }
4800 Ok(None) => {}
4801 Err(msg) => return Err(RedDBError::Query(msg)),
4802 }
4803
4804 if let Some(inner) = strip_explain_prefix(query) {
4811 return self.explain_as_rows(query, inner);
4812 }
4813
4814 if let Some(value) = parse_set_local_tenant(query)? {
4819 let conn_id = current_connection_id();
4820 if !self.inner.tx_contexts.read().contains_key(&conn_id) {
4821 return Err(RedDBError::Query(
4822 "SET LOCAL TENANT requires an active transaction".to_string(),
4823 ));
4824 }
4825 self.inner
4826 .tx_local_tenants
4827 .write()
4828 .insert(conn_id, value.clone());
4829 return Ok(RuntimeQueryResult::ok_message(
4830 query.to_string(),
4831 &match &value {
4832 Some(id) => format!("local tenant set: {id}"),
4833 None => "local tenant cleared".to_string(),
4834 },
4835 "set_local_tenant",
4836 ));
4837 }
4838
4839 if super::red_schema::is_system_schema_write(query) {
4840 return Err(RedDBError::Query(
4841 super::red_schema::READ_ONLY_ERROR.to_string(),
4842 ));
4843 }
4844
4845 let rewritten_query = super::red_schema::rewrite_virtual_names(query);
4846 let execution_query = rewritten_query.as_deref().unwrap_or(query);
4847
4848 let frame = super::statement_frame::StatementExecutionFrame::build(self, execution_query)?;
4849 let _frame_guards = frame.install(self);
4850
4851 let _log_span = crate::telemetry::span::query_span(query).entered();
4858
4859 if let Some(rewritten) = frame.prepare_cte(execution_query)? {
4861 return self.execute_query_expr(rewritten);
4862 }
4863
4864 if let Some(result) = self.try_fast_entity_lookup(execution_query) {
4866 return result;
4867 }
4868
4869 if let Some(result) = frame.read_result_cache(self) {
4871 return Ok(result);
4872 }
4873
4874 let prepared = frame.prepare_statement(self, execution_query)?;
4875 let mode = prepared.mode;
4876 let expr = prepared.expr;
4877
4878 let statement = query_expr_name(&expr);
4879 let result_cache_scopes = query_expr_result_cache_scopes(&expr);
4880
4881 let _lock_guard = frame.prepare_dispatch(self, &expr)?;
4882 let frame_iface: &dyn super::statement_frame::ReadFrame = &frame;
4883
4884 let query_result = match expr {
4885 QueryExpr::Graph(_) | QueryExpr::Path(_) => {
4886 let (graph, node_properties, edge_properties) =
4894 self.materialize_graph_with_rls()?;
4895 let result =
4896 crate::storage::query::unified::UnifiedExecutor::execute_on_with_graph_properties(
4897 &graph,
4898 &expr,
4899 node_properties,
4900 edge_properties,
4901 )
4902 .map_err(|err| RedDBError::Query(err.to_string()))?;
4903
4904 Ok(RuntimeQueryResult {
4905 query: query.to_string(),
4906 mode,
4907 statement,
4908 engine: "materialized-graph",
4909 result,
4910 affected_rows: 0,
4911 statement_type: "select",
4912 })
4913 }
4914 QueryExpr::Table(table) => {
4915 let table = self.resolve_table_expr_subqueries(
4916 table,
4917 &frame as &dyn super::statement_frame::ReadFrame,
4918 )?;
4919 if super::red_schema::is_virtual_table(&table.table) {
4920 return Ok(RuntimeQueryResult {
4921 query: query.to_string(),
4922 mode,
4923 statement,
4924 engine: "runtime-red-schema",
4925 result: super::red_schema::red_query(
4926 self,
4927 &table.table,
4928 &table,
4929 &frame as &dyn super::statement_frame::ReadFrame,
4930 )?,
4931 affected_rows: 0,
4932 statement_type: "select",
4933 });
4934 }
4935
4936 if let Some(result) = self.execute_probabilistic_select(&table)? {
4937 return Ok(RuntimeQueryResult {
4938 query: query.to_string(),
4939 mode,
4940 statement,
4941 engine: "runtime-probabilistic",
4942 result,
4943 affected_rows: 0,
4944 statement_type: "select",
4945 });
4946 }
4947
4948 if self.inner.foreign_tables.is_foreign_table(&table.table) {
4956 let records = self
4957 .inner
4958 .foreign_tables
4959 .scan(&table.table)
4960 .map_err(|e| RedDBError::Internal(e.to_string()))?;
4961 let result = apply_foreign_table_filters(records, &table);
4962 return Ok(RuntimeQueryResult {
4963 query: query.to_string(),
4964 mode,
4965 statement,
4966 engine: "runtime-fdw",
4967 result,
4968 affected_rows: 0,
4969 statement_type: "select",
4970 });
4971 }
4972
4973 let Some(table_with_rls) = self.authorize_relational_table_select(
4990 table,
4991 &frame as &dyn super::statement_frame::ReadFrame,
4992 )?
4993 else {
4994 let empty = crate::storage::query::unified::UnifiedResult::empty();
4995 return Ok(RuntimeQueryResult {
4996 query: query.to_string(),
4997 mode,
4998 statement,
4999 engine: "runtime-table-rls",
5000 result: empty,
5001 affected_rows: 0,
5002 statement_type: "select",
5003 });
5004 };
5005 Ok(RuntimeQueryResult {
5006 query: query.to_string(),
5007 mode,
5008 statement,
5009 engine: "runtime-table",
5010 result: execute_runtime_table_query(
5011 &self.inner.db,
5012 &table_with_rls,
5013 Some(&self.inner.index_store),
5014 )?,
5015 affected_rows: 0,
5016 statement_type: "select",
5017 })
5018 }
5019 QueryExpr::Join(join) => {
5020 let join_with_rls = match self.authorize_relational_join_select(
5029 join,
5030 &frame as &dyn super::statement_frame::ReadFrame,
5031 )? {
5032 Some(j) => j,
5033 None => {
5034 return Ok(RuntimeQueryResult {
5035 query: query.to_string(),
5036 mode,
5037 statement,
5038 engine: "runtime-join-rls",
5039 result: crate::storage::query::unified::UnifiedResult::empty(),
5040 affected_rows: 0,
5041 statement_type: "select",
5042 });
5043 }
5044 };
5045 Ok(RuntimeQueryResult {
5046 query: query.to_string(),
5047 mode,
5048 statement,
5049 engine: "runtime-join",
5050 result: execute_runtime_join_query(&self.inner.db, &join_with_rls)?,
5051 affected_rows: 0,
5052 statement_type: "select",
5053 })
5054 }
5055 QueryExpr::Vector(vector) => Ok(RuntimeQueryResult {
5056 query: query.to_string(),
5057 mode,
5058 statement,
5059 engine: "runtime-vector",
5060 result: execute_runtime_vector_query(&self.inner.db, &vector)?,
5061 affected_rows: 0,
5062 statement_type: "select",
5063 }),
5064 QueryExpr::Hybrid(hybrid) => Ok(RuntimeQueryResult {
5065 query: query.to_string(),
5066 mode,
5067 statement,
5068 engine: "runtime-hybrid",
5069 result: execute_runtime_hybrid_query(&self.inner.db, &hybrid)?,
5070 affected_rows: 0,
5071 statement_type: "select",
5072 }),
5073 QueryExpr::Insert(ref insert) if super::red_schema::is_virtual_table(&insert.table) => {
5075 Err(RedDBError::Query(
5076 super::red_schema::READ_ONLY_ERROR.to_string(),
5077 ))
5078 }
5079 QueryExpr::Update(ref update) if super::red_schema::is_virtual_table(&update.table) => {
5080 Err(RedDBError::Query(
5081 super::red_schema::READ_ONLY_ERROR.to_string(),
5082 ))
5083 }
5084 QueryExpr::Delete(ref delete) if super::red_schema::is_virtual_table(&delete.table) => {
5085 Err(RedDBError::Query(
5086 super::red_schema::READ_ONLY_ERROR.to_string(),
5087 ))
5088 }
5089 QueryExpr::Insert(ref insert) => self
5090 .with_deferred_store_wal_for_dml(self.insert_may_emit_events(insert), || {
5091 self.execute_insert(query, insert)
5092 }),
5093 QueryExpr::Update(ref update) => self
5094 .with_deferred_store_wal_for_dml(self.update_may_emit_events(update), || {
5095 self.execute_update(query, update)
5096 }),
5097 QueryExpr::Delete(ref delete) => self
5098 .with_deferred_store_wal_for_dml(self.delete_may_emit_events(delete), || {
5099 self.execute_delete(query, delete)
5100 }),
5101 QueryExpr::CreateTable(ref create) => self.execute_create_table(query, create),
5103 QueryExpr::CreateCollection(ref create) => {
5104 self.execute_create_collection(query, create)
5105 }
5106 QueryExpr::CreateVector(ref create) => self.execute_create_vector(query, create),
5107 QueryExpr::DropTable(ref drop_tbl) => self.execute_drop_table(query, drop_tbl),
5108 QueryExpr::DropGraph(ref drop_graph) => self.execute_drop_graph(query, drop_graph),
5109 QueryExpr::DropVector(ref drop_vector) => self.execute_drop_vector(query, drop_vector),
5110 QueryExpr::DropDocument(ref drop_document) => {
5111 self.execute_drop_document(query, drop_document)
5112 }
5113 QueryExpr::DropKv(ref drop_kv) => self.execute_drop_kv(query, drop_kv),
5114 QueryExpr::DropCollection(ref drop_collection) => {
5115 self.execute_drop_collection(query, drop_collection)
5116 }
5117 QueryExpr::Truncate(ref truncate) => self.execute_truncate(query, truncate),
5118 QueryExpr::AlterTable(ref alter) => self.execute_alter_table(query, alter),
5119 QueryExpr::ExplainAlter(ref explain) => self.execute_explain_alter(query, explain),
5120 QueryExpr::GraphCommand(ref cmd) => self.execute_graph_command(query, cmd),
5122 QueryExpr::SearchCommand(ref cmd) => self.execute_search_command(query, cmd),
5124 QueryExpr::Ask(ref ask) => self.execute_ask(query, ask),
5126 QueryExpr::CreateIndex(ref create_idx) => self.execute_create_index(query, create_idx),
5127 QueryExpr::DropIndex(ref drop_idx) => self.execute_drop_index(query, drop_idx),
5128 QueryExpr::ProbabilisticCommand(ref cmd) => {
5129 self.execute_probabilistic_command(query, cmd)
5130 }
5131 QueryExpr::CreateTimeSeries(ref ts) => self.execute_create_timeseries(query, ts),
5133 QueryExpr::DropTimeSeries(ref ts) => self.execute_drop_timeseries(query, ts),
5134 QueryExpr::CreateQueue(ref q) => self.execute_create_queue(query, q),
5136 QueryExpr::AlterQueue(ref q) => self.execute_alter_queue(query, q),
5137 QueryExpr::DropQueue(ref q) => self.execute_drop_queue(query, q),
5138 QueryExpr::QueueSelect(ref q) => self.execute_queue_select(query, q),
5139 QueryExpr::QueueCommand(ref cmd) => self.execute_queue_command(query, cmd),
5140 QueryExpr::EventsBackfill(ref backfill) => {
5141 self.execute_events_backfill(query, backfill)
5142 }
5143 QueryExpr::EventsBackfillStatus { ref collection } => Err(RedDBError::Query(format!(
5144 "EVENTS BACKFILL STATUS for '{collection}' is not implemented in this slice"
5145 ))),
5146 QueryExpr::KvCommand(ref cmd) => self.execute_kv_command(query, cmd),
5147 QueryExpr::ConfigCommand(ref cmd) => self.execute_config_command(query, cmd),
5148 QueryExpr::CreateTree(ref tree) => self.execute_create_tree(query, tree),
5149 QueryExpr::DropTree(ref tree) => self.execute_drop_tree(query, tree),
5150 QueryExpr::TreeCommand(ref cmd) => self.execute_tree_command(query, cmd),
5151 QueryExpr::SetConfig { ref key, ref value } => {
5153 if key.starts_with("red.secret.") {
5154 return Err(RedDBError::Query(
5155 "red.secret.* is reserved for vault secrets; use SET SECRET".to_string(),
5156 ));
5157 }
5158 let store = self.inner.db.store();
5159 let json_val = match value {
5160 Value::Text(s) => crate::serde_json::Value::String(s.to_string()),
5161 Value::Integer(n) => crate::serde_json::Value::Number(*n as f64),
5162 Value::Float(n) => crate::serde_json::Value::Number(*n),
5163 Value::Boolean(b) => crate::serde_json::Value::Bool(*b),
5164 _ => crate::serde_json::Value::String(value.to_string()),
5165 };
5166 store.set_config_tree(key, &json_val);
5167 update_current_config_value(key, value.clone());
5168 self.invalidate_result_cache();
5173 Ok(RuntimeQueryResult::ok_message(
5174 query.to_string(),
5175 &format!("config set: {key}"),
5176 "set",
5177 ))
5178 }
5179 QueryExpr::SetSecret { ref key, ref value } => {
5181 if key.starts_with("red.config.") {
5182 return Err(RedDBError::Query(
5183 "red.config.* is reserved for config; use SET CONFIG".to_string(),
5184 ));
5185 }
5186 let auth_store = self.inner.auth_store.read().clone().ok_or_else(|| {
5187 RedDBError::Query("SET SECRET requires an enabled, unsealed vault".to_string())
5188 })?;
5189 if matches!(value, Value::Null) {
5190 auth_store
5191 .vault_kv_try_delete(key)
5192 .map_err(|err| RedDBError::Query(err.to_string()))?;
5193 update_current_secret_value(key, None);
5194 self.invalidate_result_cache();
5195 return Ok(RuntimeQueryResult::ok_message(
5196 query.to_string(),
5197 &format!("secret deleted: {key}"),
5198 "delete_secret",
5199 ));
5200 }
5201 let value = secret_sql_value_to_string(value)?;
5202 auth_store
5203 .vault_kv_try_set(key.clone(), value.clone())
5204 .map_err(|err| RedDBError::Query(err.to_string()))?;
5205 update_current_secret_value(key, Some(value));
5206 self.invalidate_result_cache();
5207 Ok(RuntimeQueryResult::ok_message(
5208 query.to_string(),
5209 &format!("secret set: {key}"),
5210 "set_secret",
5211 ))
5212 }
5213 QueryExpr::DeleteSecret { ref key } => {
5215 let auth_store = self.inner.auth_store.read().clone().ok_or_else(|| {
5216 RedDBError::Query(
5217 "DELETE SECRET requires an enabled, unsealed vault".to_string(),
5218 )
5219 })?;
5220 let deleted = auth_store
5221 .vault_kv_try_delete(key)
5222 .map_err(|err| RedDBError::Query(err.to_string()))?;
5223 if deleted {
5224 update_current_secret_value(key, None);
5225 }
5226 self.invalidate_result_cache();
5227 Ok(RuntimeQueryResult::ok_message(
5228 query.to_string(),
5229 &format!("secret deleted: {key}"),
5230 if deleted {
5231 "delete_secret"
5232 } else {
5233 "delete_secret_not_found"
5234 },
5235 ))
5236 }
5237 QueryExpr::ShowSecrets { ref prefix } => {
5239 let auth_store = self.inner.auth_store.read().clone().ok_or_else(|| {
5240 RedDBError::Query("SHOW SECRET requires an enabled, unsealed vault".to_string())
5241 })?;
5242 if !auth_store.is_vault_backed() {
5243 return Err(RedDBError::Query(
5244 "SHOW SECRET requires an enabled, unsealed vault".to_string(),
5245 ));
5246 }
5247 let mut keys = auth_store.vault_kv_keys();
5248 keys.sort();
5249 let mut result = UnifiedResult::with_columns(vec![
5250 "key".into(),
5251 "value".into(),
5252 "status".into(),
5253 ]);
5254 for key in keys {
5255 if let Some(ref pfx) = prefix {
5256 if !key.starts_with(pfx) {
5257 continue;
5258 }
5259 }
5260 let mut record = UnifiedRecord::new();
5261 record.set("key", Value::text(key));
5262 record.set("value", Value::text("***"));
5263 record.set("status", Value::text("active"));
5264 result.push(record);
5265 }
5266 Ok(RuntimeQueryResult {
5267 query: query.to_string(),
5268 mode,
5269 statement: "show_secrets",
5270 engine: "runtime-secret",
5271 result,
5272 affected_rows: 0,
5273 statement_type: "select",
5274 })
5275 }
5276 QueryExpr::ShowConfig { ref prefix } => {
5278 let store = self.inner.db.store();
5279 let all_collections = store.list_collections();
5280 if !all_collections.contains(&"red_config".to_string()) {
5281 let result = UnifiedResult::with_columns(vec!["key".into(), "value".into()]);
5282 return Ok(RuntimeQueryResult {
5283 query: query.to_string(),
5284 mode,
5285 statement: "show_config",
5286 engine: "runtime-config",
5287 result,
5288 affected_rows: 0,
5289 statement_type: "select",
5290 });
5291 }
5292 let manager = store
5293 .get_collection("red_config")
5294 .ok_or_else(|| RedDBError::NotFound("red_config".to_string()))?;
5295 let entities = manager.query_all(|_| true);
5296 let mut latest = std::collections::BTreeMap::<String, (u64, Value, Value)>::new();
5297 for entity in entities {
5298 if let EntityData::Row(ref row) = entity.data {
5299 if let Some(ref named) = row.named {
5300 let key_val = named.get("key").cloned().unwrap_or(Value::Null);
5301 let val = named.get("value").cloned().unwrap_or(Value::Null);
5302 let key_str = match &key_val {
5303 Value::Text(s) => s.as_ref(),
5304 _ => continue,
5305 };
5306 if let Some(ref pfx) = prefix {
5307 if !key_str.starts_with(pfx.as_str()) {
5308 continue;
5309 }
5310 }
5311 let entity_id = entity.id.raw();
5312 match latest.get(key_str) {
5313 Some((prev_id, _, _)) if *prev_id > entity_id => {}
5314 _ => {
5315 latest.insert(key_str.to_string(), (entity_id, key_val, val));
5316 }
5317 }
5318 }
5319 }
5320 }
5321 let mut result = UnifiedResult::with_columns(vec!["key".into(), "value".into()]);
5322 for (_, key_val, val) in latest.into_values() {
5323 let mut record = UnifiedRecord::new();
5324 record.set("key", key_val);
5325 record.set("value", val);
5326 result.push(record);
5327 }
5328 Ok(RuntimeQueryResult {
5329 query: query.to_string(),
5330 mode,
5331 statement: "show_config",
5332 engine: "runtime-config",
5333 result,
5334 affected_rows: 0,
5335 statement_type: "select",
5336 })
5337 }
5338 QueryExpr::SetTenant(ref value) => {
5344 match value {
5345 Some(id) => set_current_tenant(id.clone()),
5346 None => clear_current_tenant(),
5347 }
5348 Ok(RuntimeQueryResult::ok_message(
5349 query.to_string(),
5350 &match value {
5351 Some(id) => format!("tenant set: {id}"),
5352 None => "tenant cleared".to_string(),
5353 },
5354 "set_tenant",
5355 ))
5356 }
5357 QueryExpr::ShowTenant => {
5358 let mut result = UnifiedResult::with_columns(vec!["tenant".into()]);
5359 let mut record = UnifiedRecord::new();
5360 record.set(
5361 "tenant",
5362 current_tenant().map(Value::text).unwrap_or(Value::Null),
5363 );
5364 result.push(record);
5365 Ok(RuntimeQueryResult {
5366 query: query.to_string(),
5367 mode,
5368 statement: "show_tenant",
5369 engine: "runtime-tenant",
5370 result,
5371 affected_rows: 0,
5372 statement_type: "select",
5373 })
5374 }
5375 QueryExpr::TransactionControl(ref ctl) => {
5387 use crate::storage::query::ast::TxnControl;
5388 use crate::storage::transaction::snapshot::{TxnContext, Xid};
5389 use crate::storage::transaction::IsolationLevel;
5390
5391 let conn_id = current_connection_id();
5396
5397 let (kind, msg) = match ctl {
5398 TxnControl::Begin => {
5399 let mgr = Arc::clone(&self.inner.snapshot_manager);
5400 let xid = mgr.begin();
5401 let snapshot = mgr.snapshot(xid);
5402 let ctx = TxnContext {
5403 xid,
5404 isolation: IsolationLevel::SnapshotIsolation,
5405 snapshot,
5406 savepoints: Vec::new(),
5407 released_sub_xids: Vec::new(),
5408 };
5409 self.inner.tx_contexts.write().insert(conn_id, ctx);
5410 ("begin", format!("BEGIN — xid={xid} (snapshot isolation)"))
5411 }
5412 TxnControl::Commit => {
5413 self.inner.tx_local_tenants.write().remove(&conn_id);
5415 let ctx = self.inner.tx_contexts.write().remove(&conn_id);
5416 match ctx {
5417 Some(ctx) => {
5418 let mut own_xids = std::collections::HashSet::new();
5419 own_xids.insert(ctx.xid);
5420 for (_, sub) in &ctx.savepoints {
5421 own_xids.insert(*sub);
5422 }
5423 for sub in &ctx.released_sub_xids {
5424 own_xids.insert(*sub);
5425 }
5426 if let Err(err) = self.check_table_row_write_conflicts(
5427 conn_id,
5428 &ctx.snapshot,
5429 &own_xids,
5430 ) {
5431 for (_, sub) in &ctx.savepoints {
5432 self.inner.snapshot_manager.rollback(*sub);
5433 }
5434 for sub in &ctx.released_sub_xids {
5435 self.inner.snapshot_manager.rollback(*sub);
5436 }
5437 self.inner.snapshot_manager.rollback(ctx.xid);
5438 self.revive_pending_versioned_updates(conn_id);
5439 self.revive_pending_tombstones(conn_id);
5440 self.discard_pending_kv_watch_events(conn_id);
5441 self.discard_pending_store_wal_actions(conn_id);
5442 return Err(err);
5443 }
5444 self.restore_pending_write_stamps(conn_id);
5445 if let Err(err) = self.flush_pending_store_wal_actions(conn_id) {
5446 for (_, sub) in &ctx.savepoints {
5447 self.inner.snapshot_manager.rollback(*sub);
5448 }
5449 for sub in &ctx.released_sub_xids {
5450 self.inner.snapshot_manager.rollback(*sub);
5451 }
5452 self.inner.snapshot_manager.rollback(ctx.xid);
5453 self.revive_pending_versioned_updates(conn_id);
5454 self.revive_pending_tombstones(conn_id);
5455 self.discard_pending_kv_watch_events(conn_id);
5456 return Err(err);
5457 }
5458 for (_, sub) in &ctx.savepoints {
5464 self.inner.snapshot_manager.commit(*sub);
5465 }
5466 for sub in &ctx.released_sub_xids {
5467 self.inner.snapshot_manager.commit(*sub);
5468 }
5469 self.inner.snapshot_manager.commit(ctx.xid);
5470 self.finalize_pending_versioned_updates(conn_id);
5471 self.finalize_pending_tombstones(conn_id);
5472 self.finalize_pending_kv_watch_events(conn_id);
5473 ("commit", format!("COMMIT — xid={} committed", ctx.xid))
5474 }
5475 None => (
5476 "commit",
5477 "COMMIT outside transaction — no-op (autocommit)".to_string(),
5478 ),
5479 }
5480 }
5481 TxnControl::Rollback => {
5482 self.inner.tx_local_tenants.write().remove(&conn_id);
5483 let ctx = self.inner.tx_contexts.write().remove(&conn_id);
5484 match ctx {
5485 Some(ctx) => {
5486 for (_, sub) in &ctx.savepoints {
5489 self.inner.snapshot_manager.rollback(*sub);
5490 }
5491 for sub in &ctx.released_sub_xids {
5492 self.inner.snapshot_manager.rollback(*sub);
5493 }
5494 self.inner.snapshot_manager.rollback(ctx.xid);
5495 self.revive_pending_versioned_updates(conn_id);
5499 self.revive_pending_tombstones(conn_id);
5500 self.discard_pending_kv_watch_events(conn_id);
5501 self.discard_pending_store_wal_actions(conn_id);
5502 ("rollback", format!("ROLLBACK — xid={} aborted", ctx.xid))
5503 }
5504 None => (
5505 "rollback",
5506 "ROLLBACK outside transaction — no-op (autocommit)".to_string(),
5507 ),
5508 }
5509 }
5510 TxnControl::Savepoint(name) => {
5517 let mgr = Arc::clone(&self.inner.snapshot_manager);
5518 let mut guard = self.inner.tx_contexts.write();
5519 match guard.get_mut(&conn_id) {
5520 Some(ctx) => {
5521 let sub = mgr.begin();
5522 ctx.savepoints.push((name.clone(), sub));
5523 ("savepoint", format!("SAVEPOINT {name} — sub_xid={sub}"))
5524 }
5525 None => (
5526 "savepoint",
5527 "SAVEPOINT outside transaction — no-op".to_string(),
5528 ),
5529 }
5530 }
5531 TxnControl::ReleaseSavepoint(name) => {
5532 let mut guard = self.inner.tx_contexts.write();
5533 match guard.get_mut(&conn_id) {
5534 Some(ctx) => {
5535 let pos = ctx
5536 .savepoints
5537 .iter()
5538 .position(|(n, _)| n == name)
5539 .ok_or_else(|| {
5540 RedDBError::Internal(format!(
5541 "savepoint {name} does not exist"
5542 ))
5543 })?;
5544 let released = ctx.savepoints.len() - pos;
5552 let popped: Vec<Xid> = ctx
5553 .savepoints
5554 .split_off(pos)
5555 .into_iter()
5556 .map(|(_, x)| x)
5557 .collect();
5558 ctx.released_sub_xids.extend(popped);
5559 (
5560 "release_savepoint",
5561 format!("RELEASE SAVEPOINT {name} — {released} level(s)"),
5562 )
5563 }
5564 None => (
5565 "release_savepoint",
5566 "RELEASE outside transaction — no-op".to_string(),
5567 ),
5568 }
5569 }
5570 TxnControl::RollbackToSavepoint(name) => {
5571 let mgr = Arc::clone(&self.inner.snapshot_manager);
5572 let drop_result: Option<(Xid, Vec<Xid>)> = {
5577 let mut guard = self.inner.tx_contexts.write();
5578 if let Some(ctx) = guard.get_mut(&conn_id) {
5579 let pos = ctx
5580 .savepoints
5581 .iter()
5582 .position(|(n, _)| n == name)
5583 .ok_or_else(|| {
5584 RedDBError::Internal(format!(
5585 "savepoint {name} does not exist"
5586 ))
5587 })?;
5588 let savepoint_xid = ctx.savepoints[pos].1;
5589 let aborted: Vec<Xid> = ctx
5590 .savepoints
5591 .split_off(pos)
5592 .into_iter()
5593 .map(|(_, x)| x)
5594 .collect();
5595 Some((savepoint_xid, aborted))
5596 } else {
5597 None
5598 }
5599 };
5600
5601 match drop_result {
5602 Some((savepoint_xid, aborted)) => {
5603 for x in &aborted {
5604 mgr.rollback(*x);
5605 }
5606 let reverted_updates =
5607 self.revive_versioned_updates_since(conn_id, savepoint_xid);
5608 let revived = self.revive_tombstones_since(conn_id, savepoint_xid);
5609 (
5610 "rollback_to_savepoint",
5611 format!(
5612 "ROLLBACK TO SAVEPOINT {name} — aborted {} sub_xid(s), reverted {reverted_updates} update(s), revived {revived} tombstone(s)",
5613 aborted.len(),
5614 ),
5615 )
5616 }
5617 None => (
5618 "rollback_to_savepoint",
5619 "ROLLBACK TO outside transaction — no-op".to_string(),
5620 ),
5621 }
5622 }
5623 };
5624 Ok(RuntimeQueryResult::ok_message(
5625 query.to_string(),
5626 &msg,
5627 kind,
5628 ))
5629 }
5630 QueryExpr::CreateSchema(ref q) => {
5643 let store = self.inner.db.store();
5644 let key = format!("schema.{}", q.name);
5645 if store.get_config(&key).is_some() {
5646 if q.if_not_exists {
5647 return Ok(RuntimeQueryResult::ok_message(
5648 query.to_string(),
5649 &format!("schema {} already exists — skipped", q.name),
5650 "create_schema",
5651 ));
5652 }
5653 return Err(RedDBError::Internal(format!(
5654 "schema {} already exists",
5655 q.name
5656 )));
5657 }
5658 store.set_config_tree(&key, &crate::serde_json::Value::Bool(true));
5659 Ok(RuntimeQueryResult::ok_message(
5660 query.to_string(),
5661 &format!("schema {} created", q.name),
5662 "create_schema",
5663 ))
5664 }
5665 QueryExpr::DropSchema(ref q) => {
5666 let store = self.inner.db.store();
5667 let key = format!("schema.{}", q.name);
5668 let existed = store.get_config(&key).is_some();
5669 if !existed && !q.if_exists {
5670 return Err(RedDBError::Internal(format!(
5671 "schema {} does not exist",
5672 q.name
5673 )));
5674 }
5675 store.set_config_tree(&key, &crate::serde_json::Value::Null);
5677 let suffix = if q.cascade {
5678 " (CASCADE accepted — tables untouched)"
5679 } else {
5680 ""
5681 };
5682 Ok(RuntimeQueryResult::ok_message(
5683 query.to_string(),
5684 &format!("schema {} dropped{}", q.name, suffix),
5685 "drop_schema",
5686 ))
5687 }
5688 QueryExpr::CreateSequence(ref q) => {
5689 let store = self.inner.db.store();
5690 let base = format!("sequence.{}", q.name);
5691 let start_key = format!("{base}.start");
5692 let incr_key = format!("{base}.increment");
5693 let curr_key = format!("{base}.current");
5694 if store.get_config(&start_key).is_some() {
5695 if q.if_not_exists {
5696 return Ok(RuntimeQueryResult::ok_message(
5697 query.to_string(),
5698 &format!("sequence {} already exists — skipped", q.name),
5699 "create_sequence",
5700 ));
5701 }
5702 return Err(RedDBError::Internal(format!(
5703 "sequence {} already exists",
5704 q.name
5705 )));
5706 }
5707 let initial_current = q.start - q.increment;
5710 store.set_config_tree(
5711 &start_key,
5712 &crate::serde_json::Value::Number(q.start as f64),
5713 );
5714 store.set_config_tree(
5715 &incr_key,
5716 &crate::serde_json::Value::Number(q.increment as f64),
5717 );
5718 store.set_config_tree(
5719 &curr_key,
5720 &crate::serde_json::Value::Number(initial_current as f64),
5721 );
5722 Ok(RuntimeQueryResult::ok_message(
5723 query.to_string(),
5724 &format!(
5725 "sequence {} created (start={}, increment={})",
5726 q.name, q.start, q.increment
5727 ),
5728 "create_sequence",
5729 ))
5730 }
5731 QueryExpr::DropSequence(ref q) => {
5732 let store = self.inner.db.store();
5733 let base = format!("sequence.{}", q.name);
5734 let existed = store.get_config(&format!("{base}.start")).is_some();
5735 if !existed && !q.if_exists {
5736 return Err(RedDBError::Internal(format!(
5737 "sequence {} does not exist",
5738 q.name
5739 )));
5740 }
5741 for k in ["start", "increment", "current"] {
5742 store.set_config_tree(&format!("{base}.{k}"), &crate::serde_json::Value::Null);
5743 }
5744 Ok(RuntimeQueryResult::ok_message(
5745 query.to_string(),
5746 &format!("sequence {} dropped", q.name),
5747 "drop_sequence",
5748 ))
5749 }
5750 QueryExpr::CreateView(ref q) => {
5760 let mut views = self.inner.views.write();
5761 if views.contains_key(&q.name) && !q.or_replace {
5762 if q.if_not_exists {
5763 return Ok(RuntimeQueryResult::ok_message(
5764 query.to_string(),
5765 &format!("view {} already exists — skipped", q.name),
5766 "create_view",
5767 ));
5768 }
5769 return Err(RedDBError::Internal(format!(
5770 "view {} already exists",
5771 q.name
5772 )));
5773 }
5774 views.insert(q.name.clone(), Arc::new(q.clone()));
5775 drop(views);
5776
5777 if q.materialized {
5779 use crate::storage::cache::result::{MaterializedViewDef, RefreshPolicy};
5780 let refresh = match q.refresh_every_ms {
5781 Some(ms) => RefreshPolicy::Periodic(std::time::Duration::from_millis(ms)),
5782 None => RefreshPolicy::Manual,
5783 };
5784 let dependencies = collect_table_refs(&q.query);
5785 let def = MaterializedViewDef {
5786 name: q.name.clone(),
5787 query: format!("<parsed view {}>", q.name),
5788 dependencies: dependencies.clone(),
5789 refresh,
5790 retention_duration_ms: q.retention_duration_ms,
5791 };
5792 self.inner.materialized_views.write().register(def);
5793
5794 let descriptor =
5800 crate::runtime::continuous_materialized_view::MaterializedViewDescriptor {
5801 name: q.name.clone(),
5802 source_sql: query.to_string(),
5803 source_collections: dependencies,
5804 refresh_every_ms: q.refresh_every_ms,
5805 retention_duration_ms: q.retention_duration_ms,
5806 };
5807 let store = self.inner.db.store();
5808 crate::runtime::continuous_materialized_view::persist_descriptor(
5809 store.as_ref(),
5810 &descriptor,
5811 )?;
5812
5813 self.ensure_materialized_view_backing(&q.name)?;
5820 }
5821 self.invalidate_plan_cache();
5826 self.invalidate_result_cache();
5827
5828 Ok(RuntimeQueryResult::ok_message(
5829 query.to_string(),
5830 &format!(
5831 "{}view {} created",
5832 if q.materialized { "materialized " } else { "" },
5833 q.name
5834 ),
5835 "create_view",
5836 ))
5837 }
5838 QueryExpr::DropView(ref q) => {
5839 let mut views = self.inner.views.write();
5840 let removed = views.remove(&q.name);
5841 let existed = removed.is_some();
5842 let removed_materialized =
5843 removed.as_ref().map(|v| v.materialized).unwrap_or(false);
5844 drop(views);
5845 if q.materialized || existed {
5846 self.inner.materialized_views.write().remove(&q.name);
5848 let store = self.inner.db.store();
5852 crate::runtime::continuous_materialized_view::remove_by_name(
5853 store.as_ref(),
5854 &q.name,
5855 )?;
5856 }
5857 if removed_materialized || q.materialized {
5861 self.drop_materialized_view_backing(&q.name)?;
5862 }
5863 self.invalidate_plan_cache();
5866 self.invalidate_result_cache();
5867 if !existed && !q.if_exists {
5868 return Err(RedDBError::Internal(format!(
5869 "view {} does not exist",
5870 q.name
5871 )));
5872 }
5873 self.invalidate_plan_cache();
5874 Ok(RuntimeQueryResult::ok_message(
5875 query.to_string(),
5876 &format!("view {} dropped", q.name),
5877 "drop_view",
5878 ))
5879 }
5880 QueryExpr::RefreshMaterializedView(ref q) => {
5881 let view = {
5884 let views = self.inner.views.read();
5885 views.get(&q.name).cloned()
5886 };
5887 let view = match view {
5888 Some(v) => v,
5889 None => {
5890 return Err(RedDBError::Internal(format!(
5891 "view {} does not exist",
5892 q.name
5893 )))
5894 }
5895 };
5896 if !view.materialized {
5897 return Err(RedDBError::Internal(format!(
5898 "view {} is not materialized — REFRESH requires \
5899 CREATE MATERIALIZED VIEW",
5900 q.name
5901 )));
5902 }
5903 let started = std::time::Instant::now();
5905 let now_ms = std::time::SystemTime::now()
5906 .duration_since(std::time::UNIX_EPOCH)
5907 .map(|d| d.as_millis() as u64)
5908 .unwrap_or(0);
5909 match self.execute_query_expr((*view.query).clone()) {
5910 Ok(inner_result) => {
5911 let entities =
5918 view_records_to_entities(&q.name, &inner_result.result.records);
5919 let row_count = entities.len() as u64;
5920 let store = self.inner.db.store();
5921 let serialized_records = match store.refresh_collection(&q.name, entities) {
5922 Ok(records) => records,
5923 Err(err) => {
5924 let duration_ms = started.elapsed().as_millis() as u64;
5925 let msg = err.to_string();
5926 self.inner
5927 .materialized_views
5928 .write()
5929 .record_refresh_failure(
5930 &q.name,
5931 msg.clone(),
5932 duration_ms,
5933 now_ms,
5934 );
5935 return Err(RedDBError::Internal(format!(
5936 "REFRESH MATERIALIZED VIEW {}: {msg}",
5937 q.name
5938 )));
5939 }
5940 };
5941
5942 if let Some(ref primary) = self.inner.db.replication {
5948 let lsn = self.inner.cdc.emit(
5949 crate::replication::cdc::ChangeOperation::Refresh,
5950 &q.name,
5951 0,
5952 "refresh",
5953 );
5954 self.invalidate_result_cache_for_table(&q.name);
5955 let timestamp = std::time::SystemTime::now()
5956 .duration_since(std::time::UNIX_EPOCH)
5957 .unwrap_or_default()
5958 .as_millis() as u64;
5959 let record = ChangeRecord::for_refresh(
5960 lsn,
5961 timestamp,
5962 q.name.clone(),
5963 serialized_records,
5964 );
5965 let encoded = record.encode();
5966 primary.wal_buffer.append(record.lsn, encoded.clone());
5967 if let Some(spool) = &primary.logical_wal_spool {
5968 let _ = spool.append(record.lsn, &encoded);
5969 }
5970 }
5971
5972 let duration_ms = started.elapsed().as_millis() as u64;
5973 let serialized = format!("{:?}", inner_result.result);
5974 self.inner
5975 .materialized_views
5976 .write()
5977 .record_refresh_success(
5978 &q.name,
5979 serialized.into_bytes(),
5980 row_count,
5981 duration_ms,
5982 now_ms,
5983 );
5984 self.invalidate_result_cache();
5989 Ok(RuntimeQueryResult::ok_message(
5990 query.to_string(),
5991 &format!("materialized view {} refreshed", q.name),
5992 "refresh_materialized_view",
5993 ))
5994 }
5995 Err(err) => {
5996 let duration_ms = started.elapsed().as_millis() as u64;
5997 let msg = err.to_string();
5998 self.inner
5999 .materialized_views
6000 .write()
6001 .record_refresh_failure(&q.name, msg.clone(), duration_ms, now_ms);
6002 Err(err)
6003 }
6004 }
6005 }
6006 QueryExpr::CreatePolicy(ref q) => {
6013 let key = (q.table.clone(), q.name.clone());
6014 self.inner
6015 .rls_policies
6016 .write()
6017 .insert(key, Arc::new(q.clone()));
6018 self.invalidate_plan_cache();
6019 self.schema_vocabulary_apply(
6023 crate::runtime::schema_vocabulary::DdlEvent::CreatePolicy {
6024 collection: q.table.clone(),
6025 policy: q.name.clone(),
6026 },
6027 );
6028 Ok(RuntimeQueryResult::ok_message(
6029 query.to_string(),
6030 &format!("policy {} on {} created", q.name, q.table),
6031 "create_policy",
6032 ))
6033 }
6034 QueryExpr::DropPolicy(ref q) => {
6035 let removed = self
6036 .inner
6037 .rls_policies
6038 .write()
6039 .remove(&(q.table.clone(), q.name.clone()))
6040 .is_some();
6041 if !removed && !q.if_exists {
6042 return Err(RedDBError::Internal(format!(
6043 "policy {} on {} does not exist",
6044 q.name, q.table
6045 )));
6046 }
6047 self.invalidate_plan_cache();
6048 self.schema_vocabulary_apply(
6051 crate::runtime::schema_vocabulary::DdlEvent::DropPolicy {
6052 collection: q.table.clone(),
6053 policy: q.name.clone(),
6054 },
6055 );
6056 Ok(RuntimeQueryResult::ok_message(
6057 query.to_string(),
6058 &format!("policy {} on {} dropped", q.name, q.table),
6059 "drop_policy",
6060 ))
6061 }
6062 QueryExpr::CreateServer(ref q) => {
6073 use crate::storage::fdw::FdwOptions;
6074 let registry = Arc::clone(&self.inner.foreign_tables);
6075 if registry.server(&q.name).is_some() {
6076 if q.if_not_exists {
6077 return Ok(RuntimeQueryResult::ok_message(
6078 query.to_string(),
6079 &format!("server {} already exists — skipped", q.name),
6080 "create_server",
6081 ));
6082 }
6083 return Err(RedDBError::Internal(format!(
6084 "server {} already exists",
6085 q.name
6086 )));
6087 }
6088 let mut opts = FdwOptions::new();
6089 for (k, v) in &q.options {
6090 opts.values.insert(k.clone(), v.clone());
6091 }
6092 registry
6093 .create_server(&q.name, &q.wrapper, opts)
6094 .map_err(|e| RedDBError::Internal(e.to_string()))?;
6095 Ok(RuntimeQueryResult::ok_message(
6096 query.to_string(),
6097 &format!("server {} created (wrapper {})", q.name, q.wrapper),
6098 "create_server",
6099 ))
6100 }
6101 QueryExpr::DropServer(ref q) => {
6102 let existed = self.inner.foreign_tables.drop_server(&q.name);
6103 if !existed && !q.if_exists {
6104 return Err(RedDBError::Internal(format!(
6105 "server {} does not exist",
6106 q.name
6107 )));
6108 }
6109 Ok(RuntimeQueryResult::ok_message(
6110 query.to_string(),
6111 &format!(
6112 "server {} dropped{}",
6113 q.name,
6114 if q.cascade { " (cascade)" } else { "" }
6115 ),
6116 "drop_server",
6117 ))
6118 }
6119 QueryExpr::CreateForeignTable(ref q) => {
6120 use crate::storage::fdw::{FdwOptions, ForeignColumn, ForeignTable};
6121 let registry = Arc::clone(&self.inner.foreign_tables);
6122 if registry.foreign_table(&q.name).is_some() {
6123 if q.if_not_exists {
6124 return Ok(RuntimeQueryResult::ok_message(
6125 query.to_string(),
6126 &format!("foreign table {} already exists — skipped", q.name),
6127 "create_foreign_table",
6128 ));
6129 }
6130 return Err(RedDBError::Internal(format!(
6131 "foreign table {} already exists",
6132 q.name
6133 )));
6134 }
6135 let mut opts = FdwOptions::new();
6136 for (k, v) in &q.options {
6137 opts.values.insert(k.clone(), v.clone());
6138 }
6139 let columns: Vec<ForeignColumn> = q
6140 .columns
6141 .iter()
6142 .map(|c| ForeignColumn {
6143 name: c.name.clone(),
6144 data_type: c.data_type.clone(),
6145 not_null: c.not_null,
6146 })
6147 .collect();
6148 registry
6149 .create_foreign_table(ForeignTable {
6150 name: q.name.clone(),
6151 server_name: q.server.clone(),
6152 columns,
6153 options: opts,
6154 })
6155 .map_err(|e| RedDBError::Internal(e.to_string()))?;
6156 self.invalidate_plan_cache();
6157 Ok(RuntimeQueryResult::ok_message(
6158 query.to_string(),
6159 &format!("foreign table {} created (server {})", q.name, q.server),
6160 "create_foreign_table",
6161 ))
6162 }
6163 QueryExpr::DropForeignTable(ref q) => {
6164 let existed = self.inner.foreign_tables.drop_foreign_table(&q.name);
6165 if !existed && !q.if_exists {
6166 return Err(RedDBError::Internal(format!(
6167 "foreign table {} does not exist",
6168 q.name
6169 )));
6170 }
6171 self.invalidate_plan_cache();
6172 Ok(RuntimeQueryResult::ok_message(
6173 query.to_string(),
6174 &format!("foreign table {} dropped", q.name),
6175 "drop_foreign_table",
6176 ))
6177 }
6178 QueryExpr::CopyFrom(ref q) => {
6184 use crate::storage::import::{CsvConfig, CsvImporter};
6185 let store = self.inner.db.store();
6186 let cfg = CsvConfig {
6187 collection: q.table.clone(),
6188 has_header: q.has_header,
6189 delimiter: q.delimiter.map(|c| c as u8).unwrap_or(b','),
6190 ..CsvConfig::default()
6191 };
6192 let importer = CsvImporter::new(cfg);
6193 let stats = importer
6194 .import_file(&q.path, store.as_ref())
6195 .map_err(|e| RedDBError::Internal(format!("COPY failed: {e}")))?;
6196 self.note_table_write(&q.table);
6198 Ok(RuntimeQueryResult::ok_message(
6199 query.to_string(),
6200 &format!(
6201 "COPY imported {} rows into {} ({} errors skipped, {}ms)",
6202 stats.records_imported, q.table, stats.errors_skipped, stats.duration_ms
6203 ),
6204 "copy_from",
6205 ))
6206 }
6207 QueryExpr::MaintenanceCommand(ref cmd) => {
6223 use crate::storage::query::ast::MaintenanceCommand as Mc;
6224 let store = self.inner.db.store();
6225 let (kind, msg) = match cmd {
6226 Mc::Analyze { target } => {
6227 let targets: Vec<String> = match target {
6228 Some(t) => vec![t.clone()],
6229 None => store.list_collections(),
6230 };
6231 for t in &targets {
6232 self.refresh_table_planner_stats(t);
6233 }
6234 (
6235 "analyze",
6236 format!("ANALYZE refreshed stats for {} table(s)", targets.len()),
6237 )
6238 }
6239 Mc::Vacuum { target, full } => {
6240 let targets: Vec<String> = match target {
6241 Some(t) => vec![t.clone()],
6242 None => store.list_collections(),
6243 };
6244 let cutoff_xid = self.mvcc_vacuum_cutoff_xid();
6245 let mut vacuum_stats =
6246 crate::storage::unified::store::MvccVacuumStats::default();
6247 for t in &targets {
6248 let stats = store.vacuum_mvcc_history(t, cutoff_xid).map_err(|e| {
6249 RedDBError::Internal(format!(
6250 "VACUUM MVCC history failed for {t}: {e}"
6251 ))
6252 })?;
6253 if stats.reclaimed_versions > 0 {
6254 self.rebuild_runtime_indexes_for_table(t)?;
6255 }
6256 vacuum_stats.add(&stats);
6257 }
6258 self.inner.snapshot_manager.prune_aborted(cutoff_xid);
6259 for t in &targets {
6261 self.refresh_table_planner_stats(t);
6262 }
6263 let persisted = if *full {
6267 match store.persist() {
6268 Ok(()) => true,
6269 Err(e) => {
6270 return Err(RedDBError::Internal(format!(
6271 "VACUUM FULL persist failed: {e:?}"
6272 )));
6273 }
6274 }
6275 } else {
6276 false
6277 };
6278 self.invalidate_result_cache();
6280 (
6281 "vacuum",
6282 format!(
6283 "VACUUM{} processed {} table(s): scanned_versions={}, retained_versions={}, reclaimed_versions={}, retained_history_versions={}, reclaimed_history_versions={}, retained_tombstones={}, reclaimed_tombstones={}{}",
6284 if *full { " FULL" } else { "" },
6285 targets.len(),
6286 vacuum_stats.scanned_versions,
6287 vacuum_stats.retained_versions,
6288 vacuum_stats.reclaimed_versions,
6289 vacuum_stats.retained_history_versions,
6290 vacuum_stats.reclaimed_history_versions,
6291 vacuum_stats.retained_tombstones,
6292 vacuum_stats.reclaimed_tombstones,
6293 if persisted {
6294 " (pages flushed to disk)"
6295 } else {
6296 ""
6297 }
6298 ),
6299 )
6300 }
6301 };
6302 Ok(RuntimeQueryResult::ok_message(
6303 query.to_string(),
6304 &msg,
6305 kind,
6306 ))
6307 }
6308 QueryExpr::Grant(ref g) => self.execute_grant_statement(query, g),
6315 QueryExpr::Revoke(ref r) => self.execute_revoke_statement(query, r),
6316 QueryExpr::AlterUser(ref a) => self.execute_alter_user_statement(query, a),
6317 QueryExpr::CreateIamPolicy { ref id, ref json } => {
6318 self.execute_create_iam_policy(query, id, json)
6319 }
6320 QueryExpr::DropIamPolicy { ref id } => self.execute_drop_iam_policy(query, id),
6321 QueryExpr::AttachPolicy {
6322 ref policy_id,
6323 ref principal,
6324 } => self.execute_attach_policy(query, policy_id, principal),
6325 QueryExpr::DetachPolicy {
6326 ref policy_id,
6327 ref principal,
6328 } => self.execute_detach_policy(query, policy_id, principal),
6329 QueryExpr::ShowPolicies { ref filter } => {
6330 self.execute_show_policies(query, filter.as_ref())
6331 }
6332 QueryExpr::ShowEffectivePermissions {
6333 ref user,
6334 ref resource,
6335 } => self.execute_show_effective_permissions(query, user, resource.as_ref()),
6336 QueryExpr::SimulatePolicy {
6337 ref user,
6338 ref action,
6339 ref resource,
6340 } => self.execute_simulate_policy(query, user, action, resource),
6341 QueryExpr::CreateMigration(ref q) => self.execute_create_migration(query, q),
6342 QueryExpr::ApplyMigration(ref q) => self.execute_apply_migration(query, q),
6343 QueryExpr::RollbackMigration(ref q) => self.execute_rollback_migration(query, q),
6344 QueryExpr::ExplainMigration(ref q) => self.execute_explain_migration(query, q),
6345 };
6346
6347 let mut query_result = query_result;
6351 if let Ok(ref mut result) = query_result {
6352 if result.statement_type == "select" {
6353 self.apply_secret_decryption(result);
6354 }
6355 }
6356
6357 if let Ok(ref result) = query_result {
6364 frame.write_result_cache(self, result, result_cache_scopes);
6365 }
6366
6367 query_result
6368 }
6369
6370 pub fn materialized_view_metadata(
6374 &self,
6375 ) -> Vec<crate::storage::cache::result::MaterializedViewMetadata> {
6376 let store = self.inner.db.store();
6383 let mut entries = self.inner.materialized_views.read().metadata();
6384 for entry in &mut entries {
6385 if let Some(manager) = store.get_collection(&entry.name) {
6386 entry.current_row_count = manager.count() as u64;
6387 }
6388 }
6389 entries
6390 }
6391
6392 pub(crate) fn retention_sweeper_snapshot(
6403 &self,
6404 ) -> Vec<(String, crate::runtime::retention_sweeper::SweeperState)> {
6405 self.inner.retention_sweeper.read().snapshot()
6406 }
6407
6408 pub fn sweep_retention_tick(&self, batch_size: usize) {
6430 if batch_size == 0 {
6431 return;
6432 }
6433 let now_ms = std::time::SystemTime::now()
6434 .duration_since(std::time::UNIX_EPOCH)
6435 .map(|d| d.as_millis() as u64)
6436 .unwrap_or(0);
6437
6438 let store = self.inner.db.store();
6439 let collections = store.list_collections();
6440 for name in collections {
6441 let Some(contract) = self.inner.db.collection_contract(&name) else {
6442 continue;
6443 };
6444 let Some(retention_ms) = contract.retention_duration_ms else {
6445 continue;
6446 };
6447 let Some(ts_column) =
6448 crate::runtime::retention_filter::resolve_timestamp_column(&contract)
6449 else {
6450 continue;
6451 };
6452 let Some(manager) = store.get_collection(&name) else {
6453 continue;
6454 };
6455 let cutoff = (now_ms as i64).saturating_sub(retention_ms as i64);
6456
6457 let mut expired_ts: Vec<i64> = Vec::new();
6465 manager.for_each_entity(|entity| {
6466 let ts = match ts_column.as_str() {
6467 "created_at" => Some(entity.created_at as i64),
6468 "updated_at" => Some(entity.updated_at as i64),
6469 other => entity
6470 .data
6471 .as_row()
6472 .and_then(|row| row.get_field(other))
6473 .and_then(|v| match v {
6474 crate::storage::schema::Value::TimestampMs(t) => Some(*t),
6475 crate::storage::schema::Value::Timestamp(t) => {
6476 Some(t.saturating_mul(1_000))
6477 }
6478 crate::storage::schema::Value::BigInt(t) => Some(*t),
6479 crate::storage::schema::Value::UnsignedInteger(t) => {
6480 i64::try_from(*t).ok()
6481 }
6482 crate::storage::schema::Value::Integer(t) => Some(*t),
6483 _ => None,
6484 }),
6485 };
6486 if let Some(t) = ts {
6487 if t < cutoff {
6488 expired_ts.push(t);
6489 }
6490 }
6491 true
6492 });
6493
6494 let total_expired = expired_ts.len() as u64;
6495 if total_expired == 0 {
6496 self.inner
6497 .retention_sweeper
6498 .write()
6499 .record_tick(&name, 0, 0, now_ms);
6500 continue;
6501 }
6502
6503 let (effective_cutoff, pending) = if (total_expired as usize) <= batch_size {
6504 (cutoff, 0u64)
6505 } else {
6506 expired_ts.sort_unstable();
6510 let nth = expired_ts[batch_size - 1];
6511 (
6512 nth.saturating_add(1),
6513 total_expired.saturating_sub(batch_size as u64),
6514 )
6515 };
6516
6517 let stmt = format!(
6518 "DELETE FROM {} WHERE {} < {}",
6519 name, ts_column, effective_cutoff
6520 );
6521 let deleted = match self.execute_query(&stmt) {
6522 Ok(r) => r.affected_rows,
6523 Err(_) => 0,
6524 };
6525
6526 self.inner
6527 .retention_sweeper
6528 .write()
6529 .record_tick(&name, deleted, pending, now_ms);
6530 }
6531 }
6532
6533 pub fn refresh_due_materialized_views(&self) {
6534 let due = {
6535 let mut cache = self.inner.materialized_views.write();
6536 cache.claim_due_at(std::time::Instant::now())
6537 };
6538 for name in due {
6539 let stmt = format!("REFRESH MATERIALIZED VIEW {}", name);
6546 let _ = self.execute_query(&stmt);
6547 }
6548 }
6549
6550 pub fn execute_query_expr(&self, expr: QueryExpr) -> RedDBResult<RuntimeQueryResult> {
6556 let _config_snapshot_guard = ConfigSnapshotGuard::install(Arc::clone(&self.inner.db));
6557 let _secret_store_guard = SecretStoreGuard::install(self.inner.auth_store.read().clone());
6558 let expr = self.rewrite_view_refs(expr);
6562
6563 self.validate_model_operations_before_auth(&expr)?;
6564 if let Err(err) = self.check_query_privilege(&expr) {
6568 return Err(RedDBError::Query(format!("permission denied: {err}")));
6569 }
6570
6571 let statement = query_expr_name(&expr);
6572 let mode = detect_mode(statement);
6573 let query_str = statement;
6574
6575 let result = self.dispatch_expr(expr, query_str, mode)?;
6576 let mut r = result;
6577 if r.statement_type == "select" {
6578 self.apply_secret_decryption(&mut r);
6579 }
6580 Ok(r)
6581 }
6582
6583 pub(super) fn validate_model_operations_before_auth(
6584 &self,
6585 expr: &QueryExpr,
6586 ) -> RedDBResult<()> {
6587 use crate::catalog::CollectionModel;
6588 use crate::runtime::ddl::polymorphic_resolver;
6589 use crate::storage::query::ast::KvCommand;
6590
6591 let system_schema_target = match expr {
6592 QueryExpr::DropTable(q) => Some(q.name.as_str()),
6593 QueryExpr::DropGraph(q) => Some(q.name.as_str()),
6594 QueryExpr::DropVector(q) => Some(q.name.as_str()),
6595 QueryExpr::DropDocument(q) => Some(q.name.as_str()),
6596 QueryExpr::DropKv(q) => Some(q.name.as_str()),
6597 QueryExpr::DropCollection(q) => Some(q.name.as_str()),
6598 QueryExpr::Truncate(q) => Some(q.name.as_str()),
6599 _ => None,
6600 };
6601 if system_schema_target.is_some_and(crate::runtime::impl_ddl::is_system_schema_name) {
6602 return Err(RedDBError::Query("system schema is read-only".to_string()));
6603 }
6604
6605 let expected = match expr {
6606 QueryExpr::DropTable(q) => Some((q.name.as_str(), CollectionModel::Table)),
6607 QueryExpr::DropGraph(q) => Some((q.name.as_str(), CollectionModel::Graph)),
6608 QueryExpr::DropVector(q) => Some((q.name.as_str(), CollectionModel::Vector)),
6609 QueryExpr::DropDocument(q) => Some((q.name.as_str(), CollectionModel::Document)),
6610 QueryExpr::DropKv(q) => Some((q.name.as_str(), q.model)),
6611 QueryExpr::DropCollection(q) => q.model.map(|model| (q.name.as_str(), model)),
6612 QueryExpr::Truncate(q) => q.model.map(|model| (q.name.as_str(), model)),
6613 QueryExpr::KvCommand(cmd) => {
6614 let (collection, model) = match cmd {
6615 KvCommand::Put {
6616 collection, model, ..
6617 }
6618 | KvCommand::Get {
6619 collection, model, ..
6620 }
6621 | KvCommand::Incr {
6622 collection, model, ..
6623 }
6624 | KvCommand::Cas {
6625 collection, model, ..
6626 }
6627 | KvCommand::Delete {
6628 collection, model, ..
6629 } => (collection.as_str(), *model),
6630 KvCommand::Rotate { collection, .. }
6631 | KvCommand::History { collection, .. }
6632 | KvCommand::List { collection, .. }
6633 | KvCommand::Purge { collection, .. } => {
6634 (collection.as_str(), CollectionModel::Vault)
6635 }
6636 KvCommand::InvalidateTags { collection, .. } => {
6637 (collection.as_str(), CollectionModel::Kv)
6638 }
6639 KvCommand::Watch {
6640 collection, model, ..
6641 } => (collection.as_str(), *model),
6642 KvCommand::Unseal { collection, .. } => {
6643 (collection.as_str(), CollectionModel::Vault)
6644 }
6645 };
6646 Some((collection, model))
6647 }
6648 QueryExpr::ConfigCommand(cmd) => {
6649 self.validate_config_command_before_auth(cmd)?;
6650 None
6651 }
6652 _ => None,
6653 };
6654
6655 let Some((name, expected_model)) = expected else {
6656 return Ok(());
6657 };
6658 let snapshot = self.inner.db.catalog_model_snapshot();
6659 let Some(actual_model) = snapshot
6660 .collections
6661 .iter()
6662 .find(|collection| collection.name == name)
6663 .map(|collection| collection.declared_model.unwrap_or(collection.model))
6664 else {
6665 return Ok(());
6666 };
6667 polymorphic_resolver::ensure_model_match(expected_model, actual_model)
6668 }
6669
6670 pub(super) fn rewrite_view_refs(&self, expr: QueryExpr) -> QueryExpr {
6675 if self.inner.views.read().is_empty() {
6677 return expr;
6678 }
6679 self.rewrite_view_refs_inner(expr)
6680 }
6681
6682 fn rewrite_view_refs_inner(&self, expr: QueryExpr) -> QueryExpr {
6683 use crate::storage::query::ast::{Filter, TableSource};
6684 match expr {
6685 QueryExpr::Table(mut tq) => {
6686 if let Some(TableSource::Subquery(body)) = tq.source.take() {
6692 tq.source = Some(TableSource::Subquery(Box::new(
6693 self.rewrite_view_refs_inner(*body),
6694 )));
6695 return QueryExpr::Table(tq);
6696 }
6697
6698 let maybe_view = {
6702 let views = self.inner.views.read();
6703 views.get(&tq.table).cloned()
6704 };
6705 let Some(view) = maybe_view else {
6706 return QueryExpr::Table(tq);
6707 };
6708
6709 if view.materialized {
6715 return QueryExpr::Table(tq);
6716 }
6717
6718 let inner_expr = self.rewrite_view_refs_inner((*view.query).clone());
6722
6723 match inner_expr {
6731 QueryExpr::Table(mut inner_tq) => {
6732 if let Some(outer_filter) = tq.filter.take() {
6733 inner_tq.filter = Some(match inner_tq.filter.take() {
6734 Some(existing) => {
6735 Filter::And(Box::new(existing), Box::new(outer_filter))
6736 }
6737 None => outer_filter,
6738 });
6739 inner_tq.where_expr = inner_tq
6747 .filter
6748 .as_ref()
6749 .map(crate::storage::query::sql_lowering::filter_to_expr);
6750 }
6751 if let Some(outer_limit) = tq.limit {
6752 inner_tq.limit = Some(match inner_tq.limit {
6753 Some(existing) => existing.min(outer_limit),
6754 None => outer_limit,
6755 });
6756 }
6757 if let Some(outer_offset) = tq.offset {
6758 inner_tq.offset = Some(match inner_tq.offset {
6759 Some(existing) => existing + outer_offset,
6760 None => outer_offset,
6761 });
6762 }
6763 QueryExpr::Table(inner_tq)
6764 }
6765 other => other,
6766 }
6767 }
6768 QueryExpr::Join(mut jq) => {
6769 jq.left = Box::new(self.rewrite_view_refs_inner(*jq.left));
6770 jq.right = Box::new(self.rewrite_view_refs_inner(*jq.right));
6771 QueryExpr::Join(jq)
6772 }
6773 other => other,
6776 }
6777 }
6778
6779 fn authorize_relational_table_select(
6783 &self,
6784 mut table: TableQuery,
6785 frame: &dyn super::statement_frame::ReadFrame,
6786 ) -> RedDBResult<Option<TableQuery>> {
6787 if let Some(TableSource::Subquery(inner)) = table.source.take() {
6788 let authorized_inner = self.authorize_relational_select_expr(*inner, frame)?;
6789 table.source = Some(TableSource::Subquery(Box::new(authorized_inner)));
6790 return Ok(Some(table));
6791 }
6792
6793 self.check_table_column_projection_authz(&table, frame)?;
6794
6795 if self.inner.rls_enabled_tables.read().contains(&table.table) {
6796 return Ok(inject_rls_filters(self, frame, table));
6797 }
6798
6799 Ok(Some(table))
6800 }
6801
6802 fn authorize_relational_join_select(
6803 &self,
6804 mut join: JoinQuery,
6805 frame: &dyn super::statement_frame::ReadFrame,
6806 ) -> RedDBResult<Option<JoinQuery>> {
6807 self.check_join_column_projection_authz(&join, frame)?;
6808 join.left = Box::new(self.authorize_relational_join_child(*join.left, frame)?);
6809 join.right = Box::new(self.authorize_relational_join_child(*join.right, frame)?);
6810 Ok(inject_rls_into_join(self, frame, join))
6811 }
6812
6813 fn authorize_relational_join_child(
6814 &self,
6815 expr: QueryExpr,
6816 frame: &dyn super::statement_frame::ReadFrame,
6817 ) -> RedDBResult<QueryExpr> {
6818 match expr {
6819 QueryExpr::Table(mut table) => {
6820 if let Some(TableSource::Subquery(inner)) = table.source.take() {
6821 let authorized_inner = self.authorize_relational_select_expr(*inner, frame)?;
6822 table.source = Some(TableSource::Subquery(Box::new(authorized_inner)));
6823 }
6824 Ok(QueryExpr::Table(table))
6825 }
6826 QueryExpr::Join(join) => self
6827 .authorize_relational_join_select(join, frame)?
6828 .map(QueryExpr::Join)
6829 .ok_or_else(|| {
6830 RedDBError::Query("permission denied: RLS denied relational subquery".into())
6831 }),
6832 other => Ok(other),
6833 }
6834 }
6835
6836 fn authorize_relational_select_expr(
6837 &self,
6838 expr: QueryExpr,
6839 frame: &dyn super::statement_frame::ReadFrame,
6840 ) -> RedDBResult<QueryExpr> {
6841 match expr {
6842 QueryExpr::Table(table) => self
6843 .authorize_relational_table_select(table, frame)?
6844 .map(QueryExpr::Table)
6845 .ok_or_else(|| {
6846 RedDBError::Query("permission denied: RLS denied relational subquery".into())
6847 }),
6848 QueryExpr::Join(join) => self
6849 .authorize_relational_join_select(join, frame)?
6850 .map(QueryExpr::Join)
6851 .ok_or_else(|| {
6852 RedDBError::Query("permission denied: RLS denied relational subquery".into())
6853 }),
6854 other => Ok(other),
6855 }
6856 }
6857
6858 fn check_table_column_projection_authz(
6859 &self,
6860 table: &TableQuery,
6861 frame: &dyn super::statement_frame::ReadFrame,
6862 ) -> RedDBResult<()> {
6863 let Some((username, role)) = frame.identity() else {
6864 return Ok(());
6865 };
6866 let Some(auth_store) = self.inner.auth_store.read().clone() else {
6867 return Ok(());
6868 };
6869
6870 let columns = self.resolved_table_projection_columns(table)?;
6871 let request = ColumnAccessRequest::select(table.table.clone(), columns);
6872 let principal = UserId::from_parts(frame.effective_scope(), username);
6873 let ctx = runtime_iam_context(role, frame.effective_scope());
6874 let outcome = auth_store.check_column_projection_authz(&principal, &request, &ctx);
6875 if outcome.allowed() {
6876 return Ok(());
6877 }
6878
6879 if let Some(denied) = outcome.first_denied_column() {
6880 return Err(RedDBError::Query(format!(
6881 "permission denied: principal=`{username}` cannot select column `{}`",
6882 denied.resource.name
6883 )));
6884 }
6885 Err(RedDBError::Query(format!(
6886 "permission denied: principal=`{username}` cannot select table `{}`",
6887 table.table
6888 )))
6889 }
6890
6891 fn check_join_column_projection_authz(
6892 &self,
6893 join: &JoinQuery,
6894 frame: &dyn super::statement_frame::ReadFrame,
6895 ) -> RedDBResult<()> {
6896 let mut by_table: HashMap<String, BTreeSet<String>> = HashMap::new();
6897 let projections = crate::storage::query::sql_lowering::effective_join_projections(join);
6898 self.collect_join_projection_columns(join, &projections, &mut by_table)?;
6899
6900 for (table, columns) in by_table {
6901 let query = TableQuery {
6902 table,
6903 source: None,
6904 alias: None,
6905 select_items: Vec::new(),
6906 columns: columns.into_iter().map(Projection::Column).collect(),
6907 where_expr: None,
6908 filter: None,
6909 group_by_exprs: Vec::new(),
6910 group_by: Vec::new(),
6911 having_expr: None,
6912 having: None,
6913 order_by: Vec::new(),
6914 limit: None,
6915 limit_param: None,
6916 offset: None,
6917 offset_param: None,
6918 expand: None,
6919 as_of: None,
6920 sessionize: None,
6921 };
6922 self.check_table_column_projection_authz(&query, frame)?;
6923 }
6924 Ok(())
6925 }
6926
6927 fn collect_join_projection_columns(
6928 &self,
6929 join: &JoinQuery,
6930 projections: &[Projection],
6931 out: &mut HashMap<String, BTreeSet<String>>,
6932 ) -> RedDBResult<()> {
6933 let left = table_side_context(join.left.as_ref());
6934 let right = table_side_context(join.right.as_ref());
6935
6936 if projections
6937 .iter()
6938 .any(|projection| matches!(projection, Projection::All))
6939 {
6940 for side in [left.as_ref(), right.as_ref()].into_iter().flatten() {
6941 out.entry(side.table.clone())
6942 .or_default()
6943 .extend(self.table_all_projection_columns(&side.table)?);
6944 }
6945 return Ok(());
6946 }
6947
6948 for projection in projections {
6949 collect_projection_columns_for_join_side(
6950 projection,
6951 left.as_ref(),
6952 right.as_ref(),
6953 out,
6954 )?;
6955 }
6956 Ok(())
6957 }
6958
6959 fn resolved_table_projection_columns(&self, table: &TableQuery) -> RedDBResult<Vec<String>> {
6960 let projections = crate::storage::query::sql_lowering::effective_table_projections(table);
6961 if projections
6962 .iter()
6963 .any(|projection| matches!(projection, Projection::All))
6964 {
6965 return self.table_all_projection_columns(&table.table);
6966 }
6967
6968 let mut columns = BTreeSet::new();
6969 for projection in &projections {
6970 collect_projection_columns_for_table(
6971 projection,
6972 &table.table,
6973 table.alias.as_deref(),
6974 &mut columns,
6975 );
6976 }
6977 Ok(columns.into_iter().collect())
6978 }
6979
6980 fn table_all_projection_columns(&self, table: &str) -> RedDBResult<Vec<String>> {
6981 if let Some(contract) = self.inner.db.collection_contract_arc(table) {
6982 let columns: Vec<String> = contract
6983 .declared_columns
6984 .iter()
6985 .map(|column| column.name.clone())
6986 .collect();
6987 if !columns.is_empty() {
6988 return Ok(columns);
6989 }
6990 }
6991
6992 let records = scan_runtime_table_source_records_limited(&self.inner.db, table, Some(1))?;
6993 Ok(records
6994 .first()
6995 .map(|record| {
6996 record
6997 .column_names()
6998 .into_iter()
6999 .map(|column| column.to_string())
7000 .collect()
7001 })
7002 .unwrap_or_default())
7003 }
7004
7005 fn resolve_table_expr_subqueries(
7006 &self,
7007 mut table: TableQuery,
7008 frame: &dyn super::statement_frame::ReadFrame,
7009 ) -> RedDBResult<TableQuery> {
7010 if let Some(TableSource::Subquery(inner)) = table.source.take() {
7011 let inner = self.resolve_select_expr_subqueries(*inner, frame)?;
7012 table.source = Some(TableSource::Subquery(Box::new(inner)));
7013 }
7014
7015 let outer_scopes = relation_scopes_for_query(&QueryExpr::Table(table.clone()));
7016 for item in &mut table.select_items {
7017 if let crate::storage::query::ast::SelectItem::Expr { expr, .. } = item {
7018 *expr = self.resolve_expr_subqueries(expr.clone(), &outer_scopes, frame)?;
7019 }
7020 }
7021 if let Some(where_expr) = table.where_expr.take() {
7022 table.where_expr =
7023 Some(self.resolve_expr_subqueries(where_expr, &outer_scopes, frame)?);
7024 table.filter = None;
7025 }
7026 if let Some(having_expr) = table.having_expr.take() {
7027 table.having_expr =
7028 Some(self.resolve_expr_subqueries(having_expr, &outer_scopes, frame)?);
7029 table.having = None;
7030 }
7031 for expr in &mut table.group_by_exprs {
7032 *expr = self.resolve_expr_subqueries(expr.clone(), &outer_scopes, frame)?;
7033 }
7034 for clause in &mut table.order_by {
7035 if let Some(expr) = clause.expr.take() {
7036 clause.expr = Some(self.resolve_expr_subqueries(expr, &outer_scopes, frame)?);
7037 }
7038 }
7039 Ok(table)
7040 }
7041
7042 fn resolve_select_expr_subqueries(
7043 &self,
7044 expr: QueryExpr,
7045 frame: &dyn super::statement_frame::ReadFrame,
7046 ) -> RedDBResult<QueryExpr> {
7047 match expr {
7048 QueryExpr::Table(table) => self
7049 .resolve_table_expr_subqueries(table, frame)
7050 .map(QueryExpr::Table),
7051 QueryExpr::Join(mut join) => {
7052 join.left = Box::new(self.resolve_select_expr_subqueries(*join.left, frame)?);
7053 join.right = Box::new(self.resolve_select_expr_subqueries(*join.right, frame)?);
7054 Ok(QueryExpr::Join(join))
7055 }
7056 other => Ok(other),
7057 }
7058 }
7059
7060 fn resolve_expr_subqueries(
7061 &self,
7062 expr: crate::storage::query::ast::Expr,
7063 outer_scopes: &[String],
7064 frame: &dyn super::statement_frame::ReadFrame,
7065 ) -> RedDBResult<crate::storage::query::ast::Expr> {
7066 use crate::storage::query::ast::Expr;
7067
7068 match expr {
7069 Expr::Subquery { query, span } => {
7070 let values = self.execute_expr_subquery_values(query, outer_scopes, frame)?;
7071 if values.len() > 1 {
7072 return Err(RedDBError::Query(
7073 "scalar subquery returned more than one row".to_string(),
7074 ));
7075 }
7076 Ok(Expr::Literal {
7077 value: values.into_iter().next().unwrap_or(Value::Null),
7078 span,
7079 })
7080 }
7081 Expr::BinaryOp { op, lhs, rhs, span } => Ok(Expr::BinaryOp {
7082 op,
7083 lhs: Box::new(self.resolve_expr_subqueries(*lhs, outer_scopes, frame)?),
7084 rhs: Box::new(self.resolve_expr_subqueries(*rhs, outer_scopes, frame)?),
7085 span,
7086 }),
7087 Expr::UnaryOp { op, operand, span } => Ok(Expr::UnaryOp {
7088 op,
7089 operand: Box::new(self.resolve_expr_subqueries(*operand, outer_scopes, frame)?),
7090 span,
7091 }),
7092 Expr::Cast {
7093 inner,
7094 target,
7095 span,
7096 } => Ok(Expr::Cast {
7097 inner: Box::new(self.resolve_expr_subqueries(*inner, outer_scopes, frame)?),
7098 target,
7099 span,
7100 }),
7101 Expr::FunctionCall { name, args, span } => {
7102 let args = args
7103 .into_iter()
7104 .map(|arg| self.resolve_expr_subqueries(arg, outer_scopes, frame))
7105 .collect::<RedDBResult<Vec<_>>>()?;
7106 Ok(Expr::FunctionCall { name, args, span })
7107 }
7108 Expr::Case {
7109 branches,
7110 else_,
7111 span,
7112 } => {
7113 let branches = branches
7114 .into_iter()
7115 .map(|(cond, value)| {
7116 Ok((
7117 self.resolve_expr_subqueries(cond, outer_scopes, frame)?,
7118 self.resolve_expr_subqueries(value, outer_scopes, frame)?,
7119 ))
7120 })
7121 .collect::<RedDBResult<Vec<_>>>()?;
7122 let else_ = else_
7123 .map(|expr| self.resolve_expr_subqueries(*expr, outer_scopes, frame))
7124 .transpose()?
7125 .map(Box::new);
7126 Ok(Expr::Case {
7127 branches,
7128 else_,
7129 span,
7130 })
7131 }
7132 Expr::IsNull {
7133 operand,
7134 negated,
7135 span,
7136 } => Ok(Expr::IsNull {
7137 operand: Box::new(self.resolve_expr_subqueries(*operand, outer_scopes, frame)?),
7138 negated,
7139 span,
7140 }),
7141 Expr::InList {
7142 target,
7143 values,
7144 negated,
7145 span,
7146 } => {
7147 let target =
7148 Box::new(self.resolve_expr_subqueries(*target, outer_scopes, frame)?);
7149 let mut resolved = Vec::new();
7150 for value in values {
7151 if let Expr::Subquery { query, .. } = value {
7152 resolved.extend(
7153 self.execute_expr_subquery_values(query, outer_scopes, frame)?
7154 .into_iter()
7155 .map(Expr::lit),
7156 );
7157 } else {
7158 resolved.push(self.resolve_expr_subqueries(value, outer_scopes, frame)?);
7159 }
7160 }
7161 Ok(Expr::InList {
7162 target,
7163 values: resolved,
7164 negated,
7165 span,
7166 })
7167 }
7168 Expr::Between {
7169 target,
7170 low,
7171 high,
7172 negated,
7173 span,
7174 } => Ok(Expr::Between {
7175 target: Box::new(self.resolve_expr_subqueries(*target, outer_scopes, frame)?),
7176 low: Box::new(self.resolve_expr_subqueries(*low, outer_scopes, frame)?),
7177 high: Box::new(self.resolve_expr_subqueries(*high, outer_scopes, frame)?),
7178 negated,
7179 span,
7180 }),
7181 other => Ok(other),
7182 }
7183 }
7184
7185 fn execute_expr_subquery_values(
7186 &self,
7187 subquery: crate::storage::query::ast::ExprSubquery,
7188 outer_scopes: &[String],
7189 frame: &dyn super::statement_frame::ReadFrame,
7190 ) -> RedDBResult<Vec<Value>> {
7191 let query = *subquery.query;
7192 if query_references_outer_scope(&query, outer_scopes) {
7193 return Err(RedDBError::Query(
7194 "NOT_YET_SUPPORTED: correlated subqueries are not supported yet; track follow-up issue #470-correlated-subqueries".to_string(),
7195 ));
7196 }
7197 let query = self.rewrite_view_refs(query);
7198 let query = self.resolve_select_expr_subqueries(query, frame)?;
7199 let query = self.authorize_relational_select_expr(query, frame)?;
7200 let result = match query {
7201 QueryExpr::Table(table) => {
7202 execute_runtime_table_query(&self.inner.db, &table, Some(&self.inner.index_store))?
7203 }
7204 QueryExpr::Join(join) => execute_runtime_join_query(&self.inner.db, &join)?,
7205 other => {
7206 return Err(RedDBError::Query(format!(
7207 "expression subquery must be a SELECT query, got {}",
7208 query_expr_name(&other)
7209 )))
7210 }
7211 };
7212 first_column_values(result)
7213 }
7214
7215 fn dispatch_expr(
7216 &self,
7217 expr: QueryExpr,
7218 query_str: &str,
7219 mode: QueryMode,
7220 ) -> RedDBResult<RuntimeQueryResult> {
7221 let statement = query_expr_name(&expr);
7222 match expr {
7223 QueryExpr::Graph(_) | QueryExpr::Path(_) => {
7224 Err(RedDBError::Query(
7226 "graph queries cannot be used as prepared statements".to_string(),
7227 ))
7228 }
7229 QueryExpr::Table(table) => {
7230 let scope = self.ai_scope();
7231 let table = self.resolve_table_expr_subqueries(
7232 table,
7233 &scope as &dyn super::statement_frame::ReadFrame,
7234 )?;
7235 if super::red_schema::is_virtual_table(&table.table) {
7236 return Ok(RuntimeQueryResult {
7237 query: query_str.to_string(),
7238 mode,
7239 statement,
7240 engine: "runtime-red-schema",
7241 result: super::red_schema::red_query(
7242 self,
7243 &table.table,
7244 &table,
7245 &scope as &dyn super::statement_frame::ReadFrame,
7246 )?,
7247 affected_rows: 0,
7248 statement_type: "select",
7249 });
7250 }
7251 let Some(table_with_rls) = self.authorize_relational_table_select(
7252 table,
7253 &scope as &dyn super::statement_frame::ReadFrame,
7254 )?
7255 else {
7256 return Ok(RuntimeQueryResult {
7257 query: query_str.to_string(),
7258 mode,
7259 statement,
7260 engine: "runtime-table-rls",
7261 result: crate::storage::query::unified::UnifiedResult::empty(),
7262 affected_rows: 0,
7263 statement_type: "select",
7264 });
7265 };
7266 Ok(RuntimeQueryResult {
7267 query: query_str.to_string(),
7268 mode,
7269 statement,
7270 engine: "runtime-table",
7271 result: execute_runtime_table_query(
7272 &self.inner.db,
7273 &table_with_rls,
7274 Some(&self.inner.index_store),
7275 )?,
7276 affected_rows: 0,
7277 statement_type: "select",
7278 })
7279 }
7280 QueryExpr::Join(join) => {
7281 let scope = self.ai_scope();
7282 let Some(join_with_rls) = self.authorize_relational_join_select(
7283 join,
7284 &scope as &dyn super::statement_frame::ReadFrame,
7285 )?
7286 else {
7287 return Ok(RuntimeQueryResult {
7288 query: query_str.to_string(),
7289 mode,
7290 statement,
7291 engine: "runtime-join-rls",
7292 result: crate::storage::query::unified::UnifiedResult::empty(),
7293 affected_rows: 0,
7294 statement_type: "select",
7295 });
7296 };
7297 Ok(RuntimeQueryResult {
7298 query: query_str.to_string(),
7299 mode,
7300 statement,
7301 engine: "runtime-join",
7302 result: execute_runtime_join_query(&self.inner.db, &join_with_rls)?,
7303 affected_rows: 0,
7304 statement_type: "select",
7305 })
7306 }
7307 QueryExpr::Vector(vector) => Ok(RuntimeQueryResult {
7308 query: query_str.to_string(),
7309 mode,
7310 statement,
7311 engine: "runtime-vector",
7312 result: execute_runtime_vector_query(&self.inner.db, &vector)?,
7313 affected_rows: 0,
7314 statement_type: "select",
7315 }),
7316 QueryExpr::Hybrid(hybrid) => Ok(RuntimeQueryResult {
7317 query: query_str.to_string(),
7318 mode,
7319 statement,
7320 engine: "runtime-hybrid",
7321 result: execute_runtime_hybrid_query(&self.inner.db, &hybrid)?,
7322 affected_rows: 0,
7323 statement_type: "select",
7324 }),
7325 QueryExpr::Insert(ref insert) if super::red_schema::is_virtual_table(&insert.table) => {
7326 Err(RedDBError::Query(
7327 super::red_schema::READ_ONLY_ERROR.to_string(),
7328 ))
7329 }
7330 QueryExpr::Update(ref update) if super::red_schema::is_virtual_table(&update.table) => {
7331 Err(RedDBError::Query(
7332 super::red_schema::READ_ONLY_ERROR.to_string(),
7333 ))
7334 }
7335 QueryExpr::Delete(ref delete) if super::red_schema::is_virtual_table(&delete.table) => {
7336 Err(RedDBError::Query(
7337 super::red_schema::READ_ONLY_ERROR.to_string(),
7338 ))
7339 }
7340 QueryExpr::Insert(ref insert) => self
7341 .with_deferred_store_wal_for_dml(self.insert_may_emit_events(insert), || {
7342 self.execute_insert(query_str, insert)
7343 }),
7344 QueryExpr::Update(ref update) => self
7345 .with_deferred_store_wal_for_dml(self.update_may_emit_events(update), || {
7346 self.execute_update(query_str, update)
7347 }),
7348 QueryExpr::Delete(ref delete) => self
7349 .with_deferred_store_wal_for_dml(self.delete_may_emit_events(delete), || {
7350 self.execute_delete(query_str, delete)
7351 }),
7352 QueryExpr::SearchCommand(ref cmd) => self.execute_search_command(query_str, cmd),
7353 QueryExpr::Ask(ref ask) => self.execute_ask(query_str, ask),
7354 _ => Err(RedDBError::Query(format!(
7355 "prepared-statement execution does not support {statement} statements"
7356 ))),
7357 }
7358 }
7359
7360 fn try_fast_entity_lookup(&self, query: &str) -> Option<RedDBResult<RuntimeQueryResult>> {
7363 let q = query.trim();
7366 if !q.starts_with("SELECT") && !q.starts_with("select") {
7367 return None;
7368 }
7369
7370 let where_pos = q
7372 .find("WHERE _entity_id")
7373 .or_else(|| q.find("where _entity_id"))?;
7374 let after_field = &q[where_pos + 16..].trim_start(); let after_eq = after_field.strip_prefix('=')?.trim_start();
7376
7377 let id_str = after_eq.trim();
7379 let entity_id: u64 = id_str.parse().ok()?;
7380
7381 let from_pos = q.find("FROM ").or_else(|| q.find("from "))? + 5;
7383 let table = q[from_pos..where_pos].trim();
7384 if table.is_empty()
7385 || table.contains(' ') && !table.contains(" AS ") && !table.contains(" as ")
7386 {
7387 return None; }
7389 let table_name = table.split_whitespace().next()?;
7390
7391 let store = self.inner.db.store();
7397 let entity = store
7398 .get(
7399 table_name,
7400 crate::storage::unified::EntityId::new(entity_id),
7401 )
7402 .filter(entity_visible_under_current_snapshot);
7403
7404 let count = if entity.is_some() { 1u64 } else { 0 };
7405
7406 let records: Vec<crate::storage::query::unified::UnifiedRecord> = entity
7412 .as_ref()
7413 .and_then(|e| runtime_table_record_from_entity(e.clone()))
7414 .into_iter()
7415 .collect();
7416
7417 let json = match entity {
7418 Some(ref e) => execute_runtime_serialize_single_entity(e),
7419 None => r#"{"columns":[],"record_count":0,"selection":{"scope":"any"},"records":[]}"#
7420 .to_string(),
7421 };
7422
7423 Some(Ok(RuntimeQueryResult {
7424 query: query.to_string(),
7425 mode: crate::storage::query::modes::QueryMode::Sql,
7426 statement: "select",
7427 engine: "fast-entity-lookup",
7428 result: crate::storage::query::unified::UnifiedResult {
7429 columns: Vec::new(),
7430 records,
7431 stats: crate::storage::query::unified::QueryStats {
7432 rows_scanned: count,
7433 ..Default::default()
7434 },
7435 pre_serialized_json: Some(json),
7436 },
7437 affected_rows: 0,
7438 statement_type: "select",
7439 }))
7440 }
7441
7442 fn result_cache_backend(&self) -> RuntimeResultCacheBackend {
7443 match self
7444 .config_string(RESULT_CACHE_BACKEND_KEY, RESULT_CACHE_DEFAULT_BACKEND)
7445 .as_str()
7446 {
7447 "blob_cache" => RuntimeResultCacheBackend::BlobCache,
7448 "shadow" => RuntimeResultCacheBackend::Shadow,
7449 _ => RuntimeResultCacheBackend::Legacy,
7450 }
7451 }
7452
7453 pub(super) fn get_result_cache_entry(&self, key: &str) -> Option<RuntimeQueryResult> {
7454 match self.result_cache_backend() {
7455 RuntimeResultCacheBackend::Legacy => self.get_legacy_result_cache_entry(key),
7456 RuntimeResultCacheBackend::BlobCache => self.get_blob_result_cache_entry(key),
7457 RuntimeResultCacheBackend::Shadow => {
7458 let legacy = self.get_legacy_result_cache_entry(key);
7459 let blob = self.get_blob_result_cache_entry(key);
7460 if let (Some(ref legacy), Some(ref blob)) = (&legacy, &blob) {
7461 if result_cache_fingerprint(legacy) != result_cache_fingerprint(blob) {
7462 self.inner
7463 .result_cache_shadow_divergences
7464 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
7465 tracing::warn!(
7466 key,
7467 metric = crate::runtime::METRIC_CACHE_SHADOW_DIVERGENCE_TOTAL,
7468 "result cache shadow backend diverged from legacy"
7469 );
7470 }
7471 }
7472 legacy
7473 }
7474 }
7475 }
7476
7477 fn get_legacy_result_cache_entry(&self, key: &str) -> Option<RuntimeQueryResult> {
7478 let cache = self.inner.result_cache.read();
7479 cache.0.get(key).and_then(|entry| {
7480 if entry.cached_at.elapsed().as_secs() < RESULT_CACHE_TTL_SECS {
7481 Some(entry.result.clone())
7482 } else {
7483 None
7484 }
7485 })
7486 }
7487
7488 fn get_blob_result_cache_entry(&self, key: &str) -> Option<RuntimeQueryResult> {
7489 let hit = self
7490 .inner
7491 .result_blob_cache
7492 .get(RESULT_CACHE_BLOB_NAMESPACE, key)?;
7493 {
7494 let cache = self.inner.result_blob_entries.read();
7495 if let Some(entry) = cache.0.get(key) {
7496 return Some(entry.result.clone());
7497 }
7498 }
7499
7500 let (result, scopes) = decode_result_cache_payload(hit.value())?;
7501 let mut cache = self.inner.result_blob_entries.write();
7502 let (ref mut map, ref mut order) = *cache;
7503 if !map.contains_key(key) {
7504 order.push_back(key.to_string());
7505 }
7506 map.insert(
7507 key.to_string(),
7508 RuntimeResultCacheEntry {
7509 result: result.clone(),
7510 cached_at: std::time::Instant::now(),
7511 scopes,
7512 },
7513 );
7514 trim_result_cache(map, order);
7515 Some(result)
7516 }
7517
7518 pub(super) fn put_result_cache_entry(&self, key: &str, entry: RuntimeResultCacheEntry) {
7519 match self.result_cache_backend() {
7520 RuntimeResultCacheBackend::Legacy => self.put_legacy_result_cache_entry(key, entry),
7521 RuntimeResultCacheBackend::BlobCache => self.put_blob_result_cache_entry(key, entry),
7522 RuntimeResultCacheBackend::Shadow => {
7523 self.put_legacy_result_cache_entry(key, entry.clone());
7524 self.put_blob_result_cache_entry(key, entry);
7525 }
7526 }
7527 }
7528
7529 fn put_legacy_result_cache_entry(&self, key: &str, entry: RuntimeResultCacheEntry) {
7530 let mut cache = self.inner.result_cache.write();
7531 let (ref mut map, ref mut order) = *cache;
7532 if !map.contains_key(key) {
7533 order.push_back(key.to_string());
7534 }
7535 map.insert(key.to_string(), entry);
7536 trim_result_cache(map, order);
7537 }
7538
7539 fn put_blob_result_cache_entry(&self, key: &str, entry: RuntimeResultCacheEntry) {
7540 let policy = crate::storage::cache::BlobCachePolicy::default()
7541 .ttl_ms(RESULT_CACHE_TTL_SECS * 1000)
7542 .priority(200);
7543 let dependencies = entry.scopes.iter().cloned().collect::<Vec<_>>();
7544 let bytes = encode_result_cache_payload(&entry)
7545 .unwrap_or_else(|| result_cache_fingerprint(&entry.result).into_bytes());
7546 let put = crate::storage::cache::BlobCachePut::new(bytes)
7547 .with_dependencies(dependencies)
7548 .with_policy(policy);
7549 if self
7550 .inner
7551 .result_blob_cache
7552 .put(RESULT_CACHE_BLOB_NAMESPACE, key, put)
7553 .is_err()
7554 {
7555 return;
7556 }
7557
7558 let mut cache = self.inner.result_blob_entries.write();
7559 let (ref mut map, ref mut order) = *cache;
7560 if !map.contains_key(key) {
7561 order.push_back(key.to_string());
7562 }
7563 map.insert(key.to_string(), entry);
7564 trim_result_cache(map, order);
7565 }
7566
7567 pub fn result_cache_shadow_divergences(&self) -> u64 {
7568 self.inner
7569 .result_cache_shadow_divergences
7570 .load(std::sync::atomic::Ordering::Relaxed)
7571 }
7572
7573 pub fn invalidate_result_cache(&self) {
7576 let mut cache = self.inner.result_cache.write();
7577 cache.0.clear();
7578 cache.1.clear();
7579 let mut blob_entries = self.inner.result_blob_entries.write();
7580 blob_entries.0.clear();
7581 blob_entries.1.clear();
7582 self.inner
7583 .result_blob_cache
7584 .invalidate_namespace(RESULT_CACHE_BLOB_NAMESPACE);
7585 let mut ask_entries = self.inner.ask_answer_cache_entries.write();
7586 ask_entries.0.clear();
7587 ask_entries.1.clear();
7588 self.inner
7589 .result_blob_cache
7590 .invalidate_namespace(ASK_ANSWER_CACHE_NAMESPACE);
7591 }
7592
7593 pub(crate) fn invalidate_result_cache_for_table(&self, table: &str) {
7596 let legacy_has_match = {
7599 let cache = self.inner.result_cache.read();
7600 let (ref map, _) = *cache;
7601 !map.is_empty() && map.values().any(|entry| entry.scopes.contains(table))
7602 };
7603 let blob_has_match = {
7604 let cache = self.inner.result_blob_entries.read();
7605 let (ref map, _) = *cache;
7606 !map.is_empty() && map.values().any(|entry| entry.scopes.contains(table))
7607 };
7608 if legacy_has_match {
7609 let mut cache = self.inner.result_cache.write();
7610 let (ref mut map, ref mut order) = *cache;
7611 map.retain(|_, entry| !entry.scopes.contains(table));
7612 order.retain(|key| map.contains_key(key));
7613 }
7614
7615 if matches!(
7616 self.result_cache_backend(),
7617 RuntimeResultCacheBackend::BlobCache | RuntimeResultCacheBackend::Shadow
7618 ) {
7619 let mut blob_entries = self.inner.result_blob_entries.write();
7620 let (ref mut blob_map, ref mut blob_order) = *blob_entries;
7621 blob_map.clear();
7622 blob_order.clear();
7623 self.inner
7624 .result_blob_cache
7625 .invalidate_namespace(RESULT_CACHE_BLOB_NAMESPACE);
7626 } else if blob_has_match {
7627 let mut blob_entries = self.inner.result_blob_entries.write();
7628 let (ref mut blob_map, ref mut blob_order) = *blob_entries;
7629 blob_map.retain(|_, entry| !entry.scopes.contains(table));
7630 blob_order.retain(|key| blob_map.contains_key(key));
7631 }
7632 let mut ask_entries = self.inner.ask_answer_cache_entries.write();
7633 ask_entries.0.clear();
7634 ask_entries.1.clear();
7635 self.inner
7636 .result_blob_cache
7637 .invalidate_namespace(ASK_ANSWER_CACHE_NAMESPACE);
7638 }
7639
7640 pub(crate) fn invalidate_plan_cache(&self) {
7641 self.inner.query_cache.write().clear();
7642 self.inner
7643 .ddl_epoch
7644 .fetch_add(1, std::sync::atomic::Ordering::Release);
7645 }
7646
7647 pub fn ddl_epoch(&self) -> u64 {
7651 self.inner
7652 .ddl_epoch
7653 .load(std::sync::atomic::Ordering::Acquire)
7654 }
7655
7656 pub(crate) fn clear_table_planner_stats(&self, table: &str) {
7657 let store = self.inner.db.store();
7658 crate::storage::query::planner::stats_catalog::clear_table_stats(store.as_ref(), table);
7659 self.invalidate_plan_cache();
7660 }
7661
7662 pub(crate) fn rehydrate_tenant_tables(&self) {
7671 let store = self.inner.db.store();
7672 let Some(manager) = store.get_collection("red_config") else {
7673 return;
7674 };
7675 for entity in manager.query_all(|_| true) {
7680 let crate::storage::unified::entity::EntityData::Row(row) = &entity.data else {
7681 continue;
7682 };
7683 let Some(named) = &row.named else { continue };
7684 let Some(crate::storage::schema::Value::Text(key)) = named.get("key") else {
7685 continue;
7686 };
7687 let Some(rest) = key.strip_prefix("tenant_tables.") else {
7689 continue;
7690 };
7691 let Some((table, suffix)) = rest.rsplit_once('.') else {
7692 crate::telemetry::operator_event::OperatorEvent::SchemaCorruption {
7698 collection: "red_config".to_string(),
7699 detail: format!("malformed tenant_tables key: {key}"),
7700 }
7701 .emit_global();
7702 continue;
7703 };
7704 if suffix != "column" {
7705 crate::telemetry::operator_event::OperatorEvent::SchemaCorruption {
7706 collection: "red_config".to_string(),
7707 detail: format!("unexpected tenant_tables suffix: {key}"),
7708 }
7709 .emit_global();
7710 continue;
7711 }
7712 match named.get("value") {
7713 Some(crate::storage::schema::Value::Text(column)) => {
7714 self.register_tenant_table(table, column);
7715 }
7716 Some(crate::storage::schema::Value::Null) | None => {
7718 self.unregister_tenant_table(table);
7719 }
7720 _ => {}
7721 }
7722 }
7723 }
7724
7725 pub(crate) fn rehydrate_materialized_view_descriptors(&self) {
7737 let store = self.inner.db.store();
7738 let descriptors = crate::runtime::continuous_materialized_view::load_all(store.as_ref());
7739 for descriptor in descriptors {
7740 let parsed = match crate::storage::query::parser::parse(&descriptor.source_sql) {
7741 Ok(qc) => qc,
7742 Err(err) => {
7743 crate::telemetry::operator_event::OperatorEvent::SchemaCorruption {
7744 collection:
7745 crate::runtime::continuous_materialized_view::CATALOG_COLLECTION
7746 .to_string(),
7747 detail: format!(
7748 "failed to re-parse materialized-view source for {}: {err}",
7749 descriptor.name
7750 ),
7751 }
7752 .emit_global();
7753 continue;
7754 }
7755 };
7756 let crate::storage::query::ast::QueryExpr::CreateView(create) = parsed.query else {
7757 crate::telemetry::operator_event::OperatorEvent::SchemaCorruption {
7758 collection: crate::runtime::continuous_materialized_view::CATALOG_COLLECTION
7759 .to_string(),
7760 detail: format!(
7761 "materialized-view source for {} did not re-parse as CREATE VIEW",
7762 descriptor.name
7763 ),
7764 }
7765 .emit_global();
7766 continue;
7767 };
7768 let view_name = create.name.clone();
7770 self.inner
7771 .views
7772 .write()
7773 .insert(view_name.clone(), Arc::new(create));
7774 use crate::storage::cache::result::{MaterializedViewDef, RefreshPolicy};
7776 let refresh = match descriptor.refresh_every_ms {
7777 Some(ms) => RefreshPolicy::Periodic(std::time::Duration::from_millis(ms)),
7778 None => RefreshPolicy::Manual,
7779 };
7780 let def = MaterializedViewDef {
7781 name: view_name.clone(),
7782 query: format!("<parsed view {}>", view_name),
7783 dependencies: descriptor.source_collections.clone(),
7784 refresh,
7785 retention_duration_ms: descriptor.retention_duration_ms,
7786 };
7787 self.inner.materialized_views.write().register(def);
7788 }
7789 self.invalidate_plan_cache();
7792 }
7793
7794 pub(crate) fn rehydrate_declared_column_schemas(&self) {
7795 let store = self.inner.db.store();
7796 for contract in self.inner.db.collection_contracts() {
7797 let columns: Vec<String> = contract
7798 .declared_columns
7799 .iter()
7800 .map(|column| column.name.clone())
7801 .collect();
7802 let Some(manager) = store.get_collection(&contract.name) else {
7803 continue;
7804 };
7805 manager.set_column_schema_if_empty(columns);
7806 }
7807 }
7808
7809 pub fn register_tenant_table(&self, table: &str, column: &str) {
7814 use crate::storage::query::ast::{
7815 CompareOp, CreatePolicyQuery, Expr, FieldRef, Filter, Span,
7816 };
7817 self.inner
7818 .tenant_tables
7819 .write()
7820 .insert(table.to_string(), column.to_string());
7821
7822 let lhs = Expr::Column {
7828 field: FieldRef::TableColumn {
7829 table: table.to_string(),
7830 column: column.to_string(),
7831 },
7832 span: Span::synthetic(),
7833 };
7834 let rhs = Expr::FunctionCall {
7835 name: "CURRENT_TENANT".to_string(),
7836 args: Vec::new(),
7837 span: Span::synthetic(),
7838 };
7839 let policy_filter = Filter::CompareExpr {
7840 lhs,
7841 op: CompareOp::Eq,
7842 rhs,
7843 };
7844
7845 let policy = CreatePolicyQuery {
7846 name: "__tenant_iso".to_string(),
7847 table: table.to_string(),
7848 action: None, role: None, using: Box::new(policy_filter),
7851 target_kind: crate::storage::query::ast::PolicyTargetKind::Table,
7858 };
7859
7860 self.inner.rls_policies.write().insert(
7862 (table.to_string(), "__tenant_iso".to_string()),
7863 Arc::new(policy),
7864 );
7865 self.inner
7866 .rls_enabled_tables
7867 .write()
7868 .insert(table.to_string());
7869
7870 self.ensure_tenant_index(table, column);
7876 }
7877
7878 fn ensure_tenant_index(&self, table: &str, column: &str) {
7886 if column.contains('.') {
7887 return;
7888 }
7889 let index_name = format!("__tenant_idx_{table}");
7890 let registry = self.inner.index_store.list_indices(table);
7891 if registry.iter().any(|idx| idx.name == index_name) {
7892 return;
7893 }
7894 if registry
7895 .iter()
7896 .any(|idx| idx.columns.first().map(|c| c.as_str()) == Some(column))
7897 {
7898 return;
7899 }
7900
7901 let store = self.inner.db.store();
7902 let Some(manager) = store.get_collection(table) else {
7903 return;
7904 };
7905 let entities = manager.query_all(|_| true);
7906 let entity_fields: Vec<(
7907 crate::storage::unified::EntityId,
7908 Vec<(String, crate::storage::schema::Value)>,
7909 )> = entities
7910 .iter()
7911 .map(|e| {
7912 let fields = match &e.data {
7913 crate::storage::EntityData::Row(row) => {
7914 if let Some(ref named) = row.named {
7915 named.iter().map(|(k, v)| (k.clone(), v.clone())).collect()
7916 } else if let Some(ref schema) = row.schema {
7917 schema
7918 .iter()
7919 .zip(row.columns.iter())
7920 .map(|(k, v)| (k.clone(), v.clone()))
7921 .collect()
7922 } else {
7923 Vec::new()
7924 }
7925 }
7926 crate::storage::EntityData::Node(node) => node
7927 .properties
7928 .iter()
7929 .map(|(k, v)| (k.clone(), v.clone()))
7930 .collect(),
7931 _ => Vec::new(),
7932 };
7933 (e.id, fields)
7934 })
7935 .collect();
7936
7937 let columns = vec![column.to_string()];
7938 if self
7939 .inner
7940 .index_store
7941 .create_index(
7942 &index_name,
7943 table,
7944 &columns,
7945 super::index_store::IndexMethodKind::Hash,
7946 false,
7947 &entity_fields,
7948 )
7949 .is_err()
7950 {
7951 return;
7952 }
7953 self.inner
7954 .index_store
7955 .register(super::index_store::RegisteredIndex {
7956 name: index_name,
7957 collection: table.to_string(),
7958 columns,
7959 method: super::index_store::IndexMethodKind::Hash,
7960 unique: false,
7961 });
7962 self.invalidate_plan_cache();
7963 }
7964
7965 fn drop_tenant_index(&self, table: &str) {
7968 let index_name = format!("__tenant_idx_{table}");
7969 self.inner.index_store.drop_index(&index_name, table);
7970 }
7971
7972 pub fn tenant_column(&self, table: &str) -> Option<String> {
7976 self.inner.tenant_tables.read().get(table).cloned()
7977 }
7978
7979 pub fn unregister_tenant_table(&self, table: &str) {
7983 self.inner.tenant_tables.write().remove(table);
7984 self.inner
7985 .rls_policies
7986 .write()
7987 .remove(&(table.to_string(), "__tenant_iso".to_string()));
7988 self.drop_tenant_index(table);
7989 let has_other_policies = self
7991 .inner
7992 .rls_policies
7993 .read()
7994 .keys()
7995 .any(|(t, _)| t == table);
7996 if !has_other_policies {
7997 self.inner.rls_enabled_tables.write().remove(table);
7998 }
7999 }
8000
8001 pub(crate) fn record_pending_tombstone(
8007 &self,
8008 conn_id: u64,
8009 collection: &str,
8010 id: crate::storage::unified::entity::EntityId,
8011 stamper_xid: crate::storage::transaction::snapshot::Xid,
8012 previous_xmax: crate::storage::transaction::snapshot::Xid,
8013 ) {
8014 self.inner
8015 .pending_tombstones
8016 .write()
8017 .entry(conn_id)
8018 .or_default()
8019 .push((collection.to_string(), id, stamper_xid, previous_xmax));
8020 }
8021
8022 pub(crate) fn record_pending_versioned_update(
8023 &self,
8024 conn_id: u64,
8025 collection: &str,
8026 old_id: crate::storage::unified::entity::EntityId,
8027 new_id: crate::storage::unified::entity::EntityId,
8028 stamper_xid: crate::storage::transaction::snapshot::Xid,
8029 previous_xmax: crate::storage::transaction::snapshot::Xid,
8030 ) {
8031 self.inner
8032 .pending_versioned_updates
8033 .write()
8034 .entry(conn_id)
8035 .or_default()
8036 .push((
8037 collection.to_string(),
8038 old_id,
8039 new_id,
8040 stamper_xid,
8041 previous_xmax,
8042 ));
8043 }
8044
8045 fn with_deferred_store_wal_if_transaction<T>(
8046 &self,
8047 f: impl FnOnce() -> RedDBResult<T>,
8048 ) -> RedDBResult<T> {
8049 let conn_id = current_connection_id();
8050 if !self.inner.tx_contexts.read().contains_key(&conn_id) {
8051 return f();
8052 }
8053
8054 crate::storage::UnifiedStore::begin_deferred_store_wal_capture();
8055 let result = f();
8056 let captured = crate::storage::UnifiedStore::take_deferred_store_wal_capture();
8057 match result {
8058 Ok(value) => {
8059 self.record_pending_store_wal_actions(conn_id, captured);
8060 Ok(value)
8061 }
8062 Err(err) => Err(err),
8063 }
8064 }
8065
8066 fn with_deferred_store_wal_for_dml<T>(
8067 &self,
8068 capture_autocommit_events: bool,
8069 f: impl FnOnce() -> RedDBResult<T>,
8070 ) -> RedDBResult<T> {
8071 let conn_id = current_connection_id();
8072 if self.inner.tx_contexts.read().contains_key(&conn_id) {
8073 return self.with_deferred_store_wal_if_transaction(f);
8074 }
8075 if !capture_autocommit_events {
8076 return f();
8077 }
8078
8079 crate::storage::UnifiedStore::begin_deferred_store_wal_capture();
8080 let result = f();
8081 let captured = crate::storage::UnifiedStore::take_deferred_store_wal_capture();
8082 self.inner
8083 .db
8084 .store()
8085 .append_deferred_store_wal_actions(captured)
8086 .map_err(|err| RedDBError::Internal(err.to_string()))?;
8087 result
8088 }
8089
8090 fn insert_may_emit_events(&self, query: &InsertQuery) -> bool {
8091 !query.suppress_events
8092 && self.collection_has_event_subscriptions_for_operation(
8093 &query.table,
8094 crate::catalog::SubscriptionOperation::Insert,
8095 )
8096 }
8097
8098 fn update_may_emit_events(&self, query: &UpdateQuery) -> bool {
8099 !query.suppress_events
8100 && self.collection_has_event_subscriptions_for_operation(
8101 &query.table,
8102 crate::catalog::SubscriptionOperation::Update,
8103 )
8104 }
8105
8106 fn delete_may_emit_events(&self, query: &DeleteQuery) -> bool {
8107 !query.suppress_events
8108 && self.collection_has_event_subscriptions_for_operation(
8109 &query.table,
8110 crate::catalog::SubscriptionOperation::Delete,
8111 )
8112 }
8113
8114 fn collection_has_event_subscriptions_for_operation(
8115 &self,
8116 collection: &str,
8117 operation: crate::catalog::SubscriptionOperation,
8118 ) -> bool {
8119 let Some(contract) = self.db().collection_contract_arc(collection) else {
8120 return false;
8121 };
8122 contract.subscriptions.iter().any(|subscription| {
8123 subscription.enabled
8124 && (subscription.ops_filter.is_empty()
8125 || subscription.ops_filter.contains(&operation))
8126 })
8127 }
8128
8129 fn record_pending_store_wal_actions(
8130 &self,
8131 conn_id: u64,
8132 actions: crate::storage::unified::DeferredStoreWalActions,
8133 ) {
8134 if actions.is_empty() {
8135 return;
8136 }
8137 let mut guard = self.inner.pending_store_wal_actions.write();
8138 guard.entry(conn_id).or_default().extend(actions);
8139 }
8140
8141 fn flush_pending_store_wal_actions(&self, conn_id: u64) -> RedDBResult<()> {
8142 let Some(actions) = self
8143 .inner
8144 .pending_store_wal_actions
8145 .write()
8146 .remove(&conn_id)
8147 else {
8148 return Ok(());
8149 };
8150 self.inner
8151 .db
8152 .store()
8153 .append_deferred_store_wal_actions(actions)
8154 .map_err(|err| RedDBError::Internal(err.to_string()))
8155 }
8156
8157 fn discard_pending_store_wal_actions(&self, conn_id: u64) {
8158 self.inner
8159 .pending_store_wal_actions
8160 .write()
8161 .remove(&conn_id);
8162 }
8163
8164 fn xid_conflicts_with_snapshot(
8165 &self,
8166 xid: crate::storage::transaction::snapshot::Xid,
8167 snapshot: &crate::storage::transaction::snapshot::Snapshot,
8168 own_xids: &std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
8169 ) -> bool {
8170 xid != 0
8171 && !own_xids.contains(&xid)
8172 && !self.inner.snapshot_manager.is_aborted(xid)
8173 && !self.inner.snapshot_manager.is_active(xid)
8174 && (xid > snapshot.xid || snapshot.in_progress.contains(&xid))
8175 }
8176
8177 fn conflict_error(
8178 collection: &str,
8179 logical_id: crate::storage::unified::entity::EntityId,
8180 xid: crate::storage::transaction::snapshot::Xid,
8181 ) -> RedDBError {
8182 RedDBError::Query(format!(
8183 "serialization conflict: table row {collection}/{} was modified by concurrent transaction {xid}",
8184 logical_id.raw()
8185 ))
8186 }
8187
8188 fn check_logical_row_conflict(
8189 &self,
8190 collection: &str,
8191 logical_id: crate::storage::unified::entity::EntityId,
8192 excluded_ids: &[crate::storage::unified::entity::EntityId],
8193 snapshot: &crate::storage::transaction::snapshot::Snapshot,
8194 own_xids: &std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
8195 ) -> RedDBResult<()> {
8196 let store = self.inner.db.store();
8197 let Some(manager) = store.get_collection(collection) else {
8198 return Ok(());
8199 };
8200
8201 for candidate in manager.query_all(|_| true) {
8202 if excluded_ids.contains(&candidate.id) || candidate.logical_id() != logical_id {
8203 continue;
8204 }
8205 if self.xid_conflicts_with_snapshot(candidate.xmin, snapshot, own_xids) {
8206 return Err(Self::conflict_error(collection, logical_id, candidate.xmin));
8207 }
8208 if self.xid_conflicts_with_snapshot(candidate.xmax, snapshot, own_xids) {
8209 return Err(Self::conflict_error(collection, logical_id, candidate.xmax));
8210 }
8211 }
8212 Ok(())
8213 }
8214
8215 pub(crate) fn check_table_row_write_conflicts(
8216 &self,
8217 conn_id: u64,
8218 snapshot: &crate::storage::transaction::snapshot::Snapshot,
8219 own_xids: &std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
8220 ) -> RedDBResult<()> {
8221 let versioned_updates = self
8222 .inner
8223 .pending_versioned_updates
8224 .read()
8225 .get(&conn_id)
8226 .cloned()
8227 .unwrap_or_default();
8228 let tombstones = self
8229 .inner
8230 .pending_tombstones
8231 .read()
8232 .get(&conn_id)
8233 .cloned()
8234 .unwrap_or_default();
8235
8236 let store = self.inner.db.store();
8237 for (collection, old_id, new_id, xid, previous_xmax) in versioned_updates {
8238 let Some(manager) = store.get_collection(&collection) else {
8239 continue;
8240 };
8241 let Some(old) = manager.get(old_id) else {
8242 continue;
8243 };
8244 let logical_id = old.logical_id();
8245 if self.xid_conflicts_with_snapshot(previous_xmax, snapshot, own_xids) {
8246 return Err(Self::conflict_error(&collection, logical_id, previous_xmax));
8247 }
8248 if old.xmax != xid && self.xid_conflicts_with_snapshot(old.xmax, snapshot, own_xids) {
8249 return Err(Self::conflict_error(&collection, logical_id, old.xmax));
8250 }
8251 self.check_logical_row_conflict(
8252 &collection,
8253 logical_id,
8254 &[old_id, new_id],
8255 snapshot,
8256 own_xids,
8257 )?;
8258 }
8259
8260 for (collection, id, xid, previous_xmax) in tombstones {
8261 let Some(manager) = store.get_collection(&collection) else {
8262 continue;
8263 };
8264 let Some(entity) = manager.get(id) else {
8265 continue;
8266 };
8267 let logical_id = entity.logical_id();
8268 if self.xid_conflicts_with_snapshot(previous_xmax, snapshot, own_xids) {
8269 return Err(Self::conflict_error(&collection, logical_id, previous_xmax));
8270 }
8271 if entity.xmax != xid
8272 && self.xid_conflicts_with_snapshot(entity.xmax, snapshot, own_xids)
8273 {
8274 return Err(Self::conflict_error(&collection, logical_id, entity.xmax));
8275 }
8276 self.check_logical_row_conflict(&collection, logical_id, &[id], snapshot, own_xids)?;
8277 }
8278
8279 Ok(())
8280 }
8281
8282 pub(crate) fn restore_pending_write_stamps(&self, conn_id: u64) {
8283 let versioned_updates = self
8284 .inner
8285 .pending_versioned_updates
8286 .read()
8287 .get(&conn_id)
8288 .cloned()
8289 .unwrap_or_default();
8290 let tombstones = self
8291 .inner
8292 .pending_tombstones
8293 .read()
8294 .get(&conn_id)
8295 .cloned()
8296 .unwrap_or_default();
8297
8298 let store = self.inner.db.store();
8299 for (collection, old_id, _new_id, xid, _previous_xmax) in versioned_updates {
8300 if let Some(manager) = store.get_collection(&collection) {
8301 if let Some(mut entity) = manager.get(old_id) {
8302 entity.set_xmax(xid);
8303 let _ = manager.update(entity);
8304 }
8305 }
8306 }
8307 for (collection, id, xid, _previous_xmax) in tombstones {
8308 if let Some(manager) = store.get_collection(&collection) {
8309 if let Some(mut entity) = manager.get(id) {
8310 entity.set_xmax(xid);
8311 let _ = manager.update(entity);
8312 }
8313 }
8314 }
8315 }
8316
8317 pub(crate) fn finalize_pending_versioned_updates(&self, conn_id: u64) {
8318 self.inner
8319 .pending_versioned_updates
8320 .write()
8321 .remove(&conn_id);
8322 }
8323
8324 pub(crate) fn revive_pending_versioned_updates(&self, conn_id: u64) {
8325 let Some(pending) = self
8326 .inner
8327 .pending_versioned_updates
8328 .write()
8329 .remove(&conn_id)
8330 else {
8331 return;
8332 };
8333
8334 let store = self.inner.db.store();
8335 for (collection, old_id, new_id, xid, previous_xmax) in pending {
8336 if let Some(manager) = store.get_collection(&collection) {
8337 if let Some(mut old) = manager.get(old_id) {
8338 if old.xmax == xid {
8339 old.set_xmax(previous_xmax);
8340 let _ = manager.update(old);
8341 }
8342 }
8343 }
8344 let _ = store.delete_batch(&collection, &[new_id]);
8345 }
8346 }
8347
8348 pub(crate) fn revive_versioned_updates_since(&self, conn_id: u64, stamper_xid: u64) -> usize {
8349 let mut guard = self.inner.pending_versioned_updates.write();
8350 let Some(pending) = guard.get_mut(&conn_id) else {
8351 return 0;
8352 };
8353
8354 let store = self.inner.db.store();
8355 let mut reverted = 0usize;
8356 pending.retain(|(collection, old_id, new_id, xid, previous_xmax)| {
8357 if *xid < stamper_xid {
8358 return true;
8359 }
8360 if let Some(manager) = store.get_collection(collection) {
8361 if let Some(mut old) = manager.get(*old_id) {
8362 if old.xmax == *xid {
8363 old.set_xmax(*previous_xmax);
8364 let _ = manager.update(old);
8365 }
8366 }
8367 }
8368 let _ = store.delete_batch(collection, &[*new_id]);
8369 reverted += 1;
8370 false
8371 });
8372 if pending.is_empty() {
8373 guard.remove(&conn_id);
8374 }
8375 reverted
8376 }
8377
8378 pub(crate) fn finalize_pending_tombstones(&self, conn_id: u64) {
8383 let Some(pending) = self.inner.pending_tombstones.write().remove(&conn_id) else {
8384 return;
8385 };
8386 if pending.is_empty() {
8387 return;
8388 }
8389
8390 let store = self.inner.db.store();
8391 for (collection, id, _xid, _previous_xmax) in pending {
8392 store.context_index().remove_entity(id);
8393 self.cdc_emit(
8394 crate::replication::cdc::ChangeOperation::Delete,
8395 &collection,
8396 id.raw(),
8397 "entity",
8398 );
8399 }
8400 }
8401
8402 pub(crate) fn revive_pending_tombstones(&self, conn_id: u64) {
8409 let Some(pending) = self.inner.pending_tombstones.write().remove(&conn_id) else {
8410 return;
8411 };
8412
8413 let store = self.inner.db.store();
8414 for (collection, id, xid, previous_xmax) in pending {
8415 let Some(manager) = store.get_collection(&collection) else {
8416 continue;
8417 };
8418 if let Some(mut entity) = manager.get(id) {
8419 if entity.xmax == xid {
8420 entity.set_xmax(previous_xmax);
8421 let _ = manager.update(entity);
8422 }
8423 }
8424 }
8425 }
8426
8427 pub(crate) fn finalize_pending_kv_watch_events(&self, conn_id: u64) {
8428 let Some(pending) = self.inner.pending_kv_watch_events.write().remove(&conn_id) else {
8429 return;
8430 };
8431 for event in pending {
8432 self.cdc_emit_kv(
8433 event.op,
8434 &event.collection,
8435 &event.key,
8436 0,
8437 event.before,
8438 event.after,
8439 );
8440 }
8441 }
8442
8443 pub(crate) fn discard_pending_kv_watch_events(&self, conn_id: u64) {
8444 self.inner.pending_kv_watch_events.write().remove(&conn_id);
8445 }
8446
8447 fn materialize_graph_with_rls(
8456 &self,
8457 ) -> RedDBResult<(
8458 crate::storage::engine::GraphStore,
8459 std::collections::HashMap<
8460 String,
8461 std::collections::HashMap<String, crate::storage::schema::Value>,
8462 >,
8463 crate::storage::query::unified::EdgeProperties,
8464 )> {
8465 use crate::storage::engine::GraphStore;
8466 use crate::storage::query::ast::{PolicyAction, PolicyTargetKind};
8467 use crate::storage::unified::entity::{EntityData, EntityKind};
8468 use std::collections::{HashMap, HashSet};
8469
8470 let store = self.inner.db.store();
8471 let snap_ctx = capture_current_snapshot();
8472 let role = current_auth_identity().map(|(_, r)| r.as_str().to_string());
8473
8474 let graph = GraphStore::new();
8475 let mut node_properties: HashMap<String, HashMap<String, crate::storage::schema::Value>> =
8476 HashMap::new();
8477 let mut edge_properties: crate::storage::query::unified::EdgeProperties = HashMap::new();
8478 let mut allowed_nodes: HashSet<String> = HashSet::new();
8479
8480 let mut node_rls: HashMap<String, Option<crate::storage::query::ast::Filter>> =
8484 HashMap::new();
8485 let mut edge_rls: HashMap<String, Option<crate::storage::query::ast::Filter>> =
8486 HashMap::new();
8487
8488 let collections = store.list_collections();
8489
8490 for collection in &collections {
8492 let Some(manager) = store.get_collection(collection) else {
8493 continue;
8494 };
8495 let entities = manager.query_all(|_| true);
8496 for entity in entities {
8497 if !entity_visible_with_context(snap_ctx.as_ref(), &entity) {
8498 continue;
8499 }
8500 let EntityKind::GraphNode(ref node) = entity.kind else {
8501 continue;
8502 };
8503 if !node_passes_rls(self, collection, role.as_deref(), &mut node_rls, &entity) {
8504 continue;
8505 }
8506 let id_str = entity.id.raw().to_string();
8507 graph
8508 .add_node_with_label(
8509 &id_str,
8510 &node.label,
8511 &super::graph_node_label(&node.node_type),
8512 )
8513 .map_err(|err| RedDBError::Query(err.to_string()))?;
8514 allowed_nodes.insert(id_str.clone());
8515 if let EntityData::Node(node_data) = &entity.data {
8516 node_properties.insert(id_str, node_data.properties.clone());
8517 }
8518 }
8519 }
8520
8521 for collection in &collections {
8525 let Some(manager) = store.get_collection(collection) else {
8526 continue;
8527 };
8528 let entities = manager.query_all(|_| true);
8529 for entity in entities {
8530 if !entity_visible_with_context(snap_ctx.as_ref(), &entity) {
8531 continue;
8532 }
8533 let EntityKind::GraphEdge(ref edge) = entity.kind else {
8534 continue;
8535 };
8536 if !allowed_nodes.contains(&edge.from_node)
8537 || !allowed_nodes.contains(&edge.to_node)
8538 {
8539 continue;
8540 }
8541 if !edge_passes_rls(self, collection, role.as_deref(), &mut edge_rls, &entity) {
8542 continue;
8543 }
8544 let weight = match &entity.data {
8545 EntityData::Edge(e) => e.weight,
8546 _ => edge.weight as f32 / 1000.0,
8547 };
8548 let edge_label = super::graph_edge_label(&edge.label);
8549 graph
8550 .add_edge_with_label(&edge.from_node, &edge.to_node, &edge_label, weight)
8551 .map_err(|err| RedDBError::Query(err.to_string()))?;
8552 if let EntityData::Edge(edge_data) = &entity.data {
8553 edge_properties.insert(
8554 (edge.from_node.clone(), edge_label, edge.to_node.clone()),
8555 edge_data.properties.clone(),
8556 );
8557 }
8558 }
8559 }
8560
8561 let _ = (PolicyAction::Select, PolicyTargetKind::Nodes);
8565
8566 Ok((graph, node_properties, edge_properties))
8567 }
8568
8569 pub(crate) fn stamp_xmin_if_in_txn(
8584 &self,
8585 collection: &str,
8586 id: crate::storage::unified::entity::EntityId,
8587 ) {
8588 let Some(xid) = self.current_xid() else {
8589 return;
8590 };
8591 let store = self.inner.db.store();
8592 let Some(manager) = store.get_collection(collection) else {
8593 return;
8594 };
8595 if let Some(mut entity) = manager.get(id) {
8596 entity.set_xmin(xid);
8597 let _ = manager.update(entity);
8598 }
8599 }
8600
8601 pub(crate) fn revive_tombstones_since(&self, conn_id: u64, stamper_xid: u64) -> usize {
8609 let mut guard = self.inner.pending_tombstones.write();
8610 let Some(pending) = guard.get_mut(&conn_id) else {
8611 return 0;
8612 };
8613
8614 let store = self.inner.db.store();
8615 let mut revived = 0usize;
8616 pending.retain(|(collection, id, xid, previous_xmax)| {
8617 if *xid < stamper_xid {
8618 return true;
8620 }
8621 if let Some(manager) = store.get_collection(collection) {
8622 if let Some(mut entity) = manager.get(*id) {
8623 if entity.xmax == *xid {
8624 entity.set_xmax(*previous_xmax);
8625 let _ = manager.update(entity);
8626 revived += 1;
8627 }
8628 }
8629 }
8630 false
8631 });
8632 if pending.is_empty() {
8633 guard.remove(&conn_id);
8634 }
8635 revived
8636 }
8637
8638 pub fn current_snapshot(&self) -> crate::storage::transaction::snapshot::Snapshot {
8647 let conn_id = current_connection_id();
8648 if let Some(ctx) = self.inner.tx_contexts.read().get(&conn_id).cloned() {
8649 return ctx.snapshot;
8650 }
8651 let high_water = self.inner.snapshot_manager.peek_next_xid();
8657 self.inner.snapshot_manager.snapshot(high_water)
8658 }
8659
8660 pub fn current_xid(&self) -> Option<crate::storage::transaction::snapshot::Xid> {
8670 let conn_id = current_connection_id();
8671 self.inner
8672 .tx_contexts
8673 .read()
8674 .get(&conn_id)
8675 .map(|ctx| ctx.writer_xid())
8676 }
8677
8678 pub fn snapshot_manager(&self) -> Arc<crate::storage::transaction::snapshot::SnapshotManager> {
8681 Arc::clone(&self.inner.snapshot_manager)
8682 }
8683
8684 fn mvcc_vacuum_cutoff_xid(&self) -> crate::storage::transaction::snapshot::Xid {
8685 let manager = &self.inner.snapshot_manager;
8686 let next_xid = manager.peek_next_xid();
8687 let mut cutoff = next_xid;
8688 if let Some(oldest_active) = manager.oldest_active_xid() {
8689 cutoff = cutoff.min(oldest_active);
8690 }
8691 if let Some(oldest_pinned) = manager.oldest_pinned_xid() {
8692 cutoff = cutoff.min(oldest_pinned);
8693 }
8694 let retention_xids = self.config_u64("runtime.mvcc.vacuum_retention_xids", 0);
8695 if retention_xids > 0 {
8696 cutoff = cutoff.min(next_xid.saturating_sub(retention_xids));
8697 }
8698 cutoff
8699 }
8700
8701 fn rebuild_runtime_indexes_for_table(&self, table: &str) -> RedDBResult<()> {
8702 let registered = self.inner.index_store.list_indices(table);
8703 if registered.is_empty() {
8704 return Ok(());
8705 }
8706 let store = self.inner.db.store();
8707 let Some(manager) = store.get_collection(table) else {
8708 return Ok(());
8709 };
8710 let entity_fields = manager
8711 .query_all(|entity| matches!(entity.kind, crate::storage::EntityKind::TableRow { .. }))
8712 .into_iter()
8713 .map(|entity| (entity.id, table_row_index_fields(&entity)))
8714 .collect::<Vec<_>>();
8715
8716 for index in registered {
8717 self.inner.index_store.drop_index(&index.name, table);
8718 self.inner
8719 .index_store
8720 .create_index(
8721 &index.name,
8722 table,
8723 &index.columns,
8724 index.method,
8725 index.unique,
8726 &entity_fields,
8727 )
8728 .map_err(RedDBError::Internal)?;
8729 self.inner.index_store.register(index);
8730 }
8731 self.invalidate_plan_cache();
8732 Ok(())
8733 }
8734
8735 pub fn current_txn_own_xids(
8740 &self,
8741 ) -> std::collections::HashSet<crate::storage::transaction::snapshot::Xid> {
8742 let mut set = std::collections::HashSet::new();
8743 if let Some(ctx) = self.inner.tx_contexts.read().get(¤t_connection_id()) {
8744 set.insert(ctx.xid);
8745 for (_, sub) in &ctx.savepoints {
8746 set.insert(*sub);
8747 }
8748 for sub in &ctx.released_sub_xids {
8749 set.insert(*sub);
8750 }
8751 }
8752 set
8753 }
8754
8755 pub fn foreign_tables(&self) -> Arc<crate::storage::fdw::ForeignTableRegistry> {
8762 Arc::clone(&self.inner.foreign_tables)
8763 }
8764
8765 pub fn is_rls_enabled(&self, table: &str) -> bool {
8767 self.inner.rls_enabled_tables.read().contains(table)
8768 }
8769
8770 pub fn matching_rls_policies(
8777 &self,
8778 table: &str,
8779 role: Option<&str>,
8780 action: crate::storage::query::ast::PolicyAction,
8781 ) -> Vec<crate::storage::query::ast::Filter> {
8782 self.matching_rls_policies_for_kind(
8787 table,
8788 role,
8789 action,
8790 crate::storage::query::ast::PolicyTargetKind::Table,
8791 )
8792 }
8793
8794 pub fn matching_rls_policies_for_kind(
8802 &self,
8803 table: &str,
8804 role: Option<&str>,
8805 action: crate::storage::query::ast::PolicyAction,
8806 kind: crate::storage::query::ast::PolicyTargetKind,
8807 ) -> Vec<crate::storage::query::ast::Filter> {
8808 if !self.is_rls_enabled(table) {
8809 return Vec::new();
8810 }
8811 let policies = self.inner.rls_policies.read();
8812 policies
8813 .iter()
8814 .filter_map(|((t, _), p)| {
8815 if t != table {
8816 return None;
8817 }
8818 if p.target_kind != kind
8827 && p.target_kind != crate::storage::query::ast::PolicyTargetKind::Table
8828 {
8829 return None;
8830 }
8831 if let Some(a) = p.action {
8833 if a != action {
8834 return None;
8835 }
8836 }
8837 if let Some(p_role) = p.role.as_deref() {
8839 match role {
8840 Some(r) if r == p_role => {}
8841 _ => return None,
8842 }
8843 }
8844 Some((*p.using).clone())
8845 })
8846 .collect()
8847 }
8848
8849 pub(crate) fn refresh_table_planner_stats(&self, table: &str) {
8850 let store = self.inner.db.store();
8851 if let Some(stats) =
8852 crate::storage::query::planner::stats_catalog::analyze_collection(store.as_ref(), table)
8853 {
8854 crate::storage::query::planner::stats_catalog::persist_table_stats(
8855 store.as_ref(),
8856 &stats,
8857 );
8858 } else {
8859 crate::storage::query::planner::stats_catalog::clear_table_stats(store.as_ref(), table);
8860 }
8861 self.invalidate_plan_cache();
8862 }
8863
8864 pub(crate) fn note_table_write(&self, table: &str) {
8865 let already_dirty = self.inner.planner_dirty_tables.read().contains(table);
8870 if !already_dirty {
8871 self.inner
8872 .planner_dirty_tables
8873 .write()
8874 .insert(table.to_string());
8875 }
8876 self.invalidate_result_cache_for_table(table);
8877 }
8878
8879 fn explain_as_rows(&self, raw_query: &str, inner_sql: &str) -> RedDBResult<RuntimeQueryResult> {
8887 let explain = self.explain_query(inner_sql)?;
8888
8889 let columns = vec![
8890 "op".to_string(),
8891 "source".to_string(),
8892 "est_rows".to_string(),
8893 "est_cost".to_string(),
8894 "depth".to_string(),
8895 ];
8896
8897 let mut records: Vec<crate::storage::query::unified::UnifiedRecord> = Vec::new();
8898
8899 for name in &explain.cte_materializations {
8905 use std::sync::Arc;
8906 let mut rec = crate::storage::query::unified::UnifiedRecord::default();
8907 rec.set_arc(Arc::from("op"), Value::text("CteScan".to_string()));
8908 rec.set_arc(Arc::from("source"), Value::text(name.clone()));
8909 rec.set_arc(Arc::from("est_rows"), Value::Float(0.0));
8910 rec.set_arc(Arc::from("est_cost"), Value::Float(0.0));
8911 rec.set_arc(Arc::from("depth"), Value::Integer(0));
8912 records.push(rec);
8913 }
8914
8915 walk_plan_node(&explain.logical_plan.root, 0, &mut records);
8916
8917 let result = crate::storage::query::unified::UnifiedResult {
8918 columns,
8919 records,
8920 stats: Default::default(),
8921 pre_serialized_json: None,
8922 };
8923
8924 Ok(RuntimeQueryResult {
8925 query: raw_query.to_string(),
8926 mode: explain.mode,
8927 statement: "explain",
8928 engine: "runtime-explain",
8929 result,
8930 affected_rows: 0,
8931 statement_type: "select",
8932 })
8933 }
8934
8935 pub(super) fn check_query_privilege(
8943 &self,
8944 expr: &crate::storage::query::ast::QueryExpr,
8945 ) -> Result<(), String> {
8946 use crate::auth::privileges::{Action, AuthzContext, Resource};
8947 use crate::auth::UserId;
8948 use crate::storage::query::ast::QueryExpr;
8949
8950 let auth_store = match self.inner.auth_store.read().clone() {
8955 Some(s) => s,
8956 None => return Ok(()),
8957 };
8958
8959 let (username, role) = match current_auth_identity() {
8965 Some(p) => p,
8966 None => return Ok(()),
8967 };
8968 let tenant = current_tenant();
8969
8970 let ctx = AuthzContext {
8971 principal: &username,
8972 effective_role: role,
8973 tenant: tenant.as_deref(),
8974 };
8975 let principal_id = UserId::from_parts(tenant.as_deref(), &username);
8976
8977 let (action, resource) = match expr {
8979 QueryExpr::Table(t) => (Action::Select, Resource::table_from_name(&t.table)),
8980 QueryExpr::QueueSelect(q) => (Action::Select, Resource::table_from_name(&q.queue)),
8981 QueryExpr::Graph(g) => {
8982 if auth_store.iam_authorization_enabled() {
8983 self.check_graph_property_projection_privilege(
8984 &auth_store,
8985 &principal_id,
8986 role,
8987 tenant.as_deref(),
8988 g,
8989 )?;
8990 return Ok(());
8991 }
8992 return Ok(());
8993 }
8994 QueryExpr::Vector(v) => {
8995 if auth_store.iam_authorization_enabled() {
8996 self.check_table_like_column_projection_privilege(
8997 &auth_store,
8998 &principal_id,
8999 role,
9000 tenant.as_deref(),
9001 &v.collection,
9002 &["content".to_string()],
9003 )?;
9004 return Ok(());
9005 }
9006 return Ok(());
9007 }
9008 QueryExpr::Insert(i) => (Action::Insert, Resource::table_from_name(&i.table)),
9009 QueryExpr::Update(u) => (Action::Update, Resource::table_from_name(&u.table)),
9010 QueryExpr::Delete(d) => (Action::Delete, Resource::table_from_name(&d.table)),
9011 QueryExpr::Join(_) => (Action::Select, Resource::Database),
9015 QueryExpr::Grant(_) | QueryExpr::Revoke(_) | QueryExpr::AlterUser(_) => {
9018 return if role == crate::auth::Role::Admin {
9019 Ok(())
9020 } else {
9021 Err(format!(
9022 "principal=`{}` role=`{:?}` cannot issue ACL/auth DDL",
9023 username, role
9024 ))
9025 };
9026 }
9027 QueryExpr::CreateIamPolicy { id, .. } => {
9028 return self.check_policy_management_privilege(
9029 &auth_store,
9030 &principal_id,
9031 role,
9032 tenant.as_deref(),
9033 "policy:put",
9034 "policy",
9035 id,
9036 );
9037 }
9038 QueryExpr::DropIamPolicy { id } => {
9039 return self.check_policy_management_privilege(
9040 &auth_store,
9041 &principal_id,
9042 role,
9043 tenant.as_deref(),
9044 "policy:drop",
9045 "policy",
9046 id,
9047 );
9048 }
9049 QueryExpr::AttachPolicy { policy_id, .. } => {
9050 return self.check_policy_management_privilege(
9051 &auth_store,
9052 &principal_id,
9053 role,
9054 tenant.as_deref(),
9055 "policy:attach",
9056 "policy",
9057 policy_id,
9058 );
9059 }
9060 QueryExpr::DetachPolicy { policy_id, .. } => {
9061 return self.check_policy_management_privilege(
9062 &auth_store,
9063 &principal_id,
9064 role,
9065 tenant.as_deref(),
9066 "policy:detach",
9067 "policy",
9068 policy_id,
9069 );
9070 }
9071 QueryExpr::ShowPolicies { .. } | QueryExpr::ShowEffectivePermissions { .. } => {
9072 return Ok(());
9073 }
9074 QueryExpr::SimulatePolicy { .. } => {
9075 return self.check_policy_management_privilege(
9076 &auth_store,
9077 &principal_id,
9078 role,
9079 tenant.as_deref(),
9080 "policy:simulate",
9081 "policy",
9082 "*",
9083 );
9084 }
9085 QueryExpr::DropTable(q) => {
9088 return self.check_ddl_collection_privilege(
9089 &auth_store,
9090 &principal_id,
9091 role,
9092 tenant.as_deref(),
9093 &username,
9094 "drop",
9095 &q.name,
9096 );
9097 }
9098 QueryExpr::DropGraph(q) => {
9099 return self.check_ddl_collection_privilege(
9100 &auth_store,
9101 &principal_id,
9102 role,
9103 tenant.as_deref(),
9104 &username,
9105 "drop",
9106 &q.name,
9107 );
9108 }
9109 QueryExpr::DropVector(q) => {
9110 return self.check_ddl_collection_privilege(
9111 &auth_store,
9112 &principal_id,
9113 role,
9114 tenant.as_deref(),
9115 &username,
9116 "drop",
9117 &q.name,
9118 );
9119 }
9120 QueryExpr::DropDocument(q) => {
9121 return self.check_ddl_collection_privilege(
9122 &auth_store,
9123 &principal_id,
9124 role,
9125 tenant.as_deref(),
9126 &username,
9127 "drop",
9128 &q.name,
9129 );
9130 }
9131 QueryExpr::DropKv(q) => {
9132 return self.check_ddl_collection_privilege(
9133 &auth_store,
9134 &principal_id,
9135 role,
9136 tenant.as_deref(),
9137 &username,
9138 "drop",
9139 &q.name,
9140 );
9141 }
9142 QueryExpr::DropCollection(q) => {
9143 return self.check_ddl_collection_privilege(
9144 &auth_store,
9145 &principal_id,
9146 role,
9147 tenant.as_deref(),
9148 &username,
9149 "drop",
9150 &q.name,
9151 );
9152 }
9153 QueryExpr::Truncate(q) => {
9154 return self.check_ddl_collection_privilege(
9155 &auth_store,
9156 &principal_id,
9157 role,
9158 tenant.as_deref(),
9159 &username,
9160 "truncate",
9161 &q.name,
9162 );
9163 }
9164 QueryExpr::CreateTable(_)
9166 | QueryExpr::CreateCollection(_)
9167 | QueryExpr::CreateVector(_)
9168 | QueryExpr::AlterTable(_)
9169 | QueryExpr::CreateIndex(_)
9170 | QueryExpr::DropIndex(_)
9171 | QueryExpr::CreateSchema(_)
9172 | QueryExpr::DropSchema(_)
9173 | QueryExpr::CreateSequence(_)
9174 | QueryExpr::DropSequence(_)
9175 | QueryExpr::CreateView(_)
9176 | QueryExpr::DropView(_)
9177 | QueryExpr::RefreshMaterializedView(_)
9178 | QueryExpr::CreatePolicy(_)
9179 | QueryExpr::DropPolicy(_)
9180 | QueryExpr::CreateServer(_)
9181 | QueryExpr::DropServer(_)
9182 | QueryExpr::CreateForeignTable(_)
9183 | QueryExpr::DropForeignTable(_)
9184 | QueryExpr::CreateTimeSeries(_)
9185 | QueryExpr::DropTimeSeries(_)
9186 | QueryExpr::CreateQueue(_)
9187 | QueryExpr::AlterQueue(_)
9188 | QueryExpr::DropQueue(_)
9189 | QueryExpr::CreateTree(_)
9190 | QueryExpr::DropTree(_) => {
9191 return if role >= crate::auth::Role::Write {
9192 Ok(())
9193 } else {
9194 Err(format!(
9195 "principal=`{}` role=`{:?}` cannot issue DDL",
9196 username, role
9197 ))
9198 };
9199 }
9200 QueryExpr::CreateMigration(_) => {
9202 return if role >= crate::auth::Role::Write {
9203 Ok(())
9204 } else {
9205 Err(format!(
9206 "principal=`{}` role=`{:?}` cannot issue CREATE MIGRATION",
9207 username, role
9208 ))
9209 };
9210 }
9211 QueryExpr::ApplyMigration(_) | QueryExpr::RollbackMigration(_) => {
9213 return if role == crate::auth::Role::Admin {
9214 Ok(())
9215 } else {
9216 Err(format!(
9217 "principal=`{}` role=`{:?}` cannot issue APPLY/ROLLBACK MIGRATION",
9218 username, role
9219 ))
9220 };
9221 }
9222 QueryExpr::ExplainMigration(_) => return Ok(()),
9224 _ => return Ok(()),
9228 };
9229
9230 if auth_store.iam_authorization_enabled() {
9231 let iam_action = legacy_action_to_iam(action);
9232 let iam_resource = legacy_resource_to_iam(&resource, tenant.as_deref());
9233 let iam_ctx = runtime_iam_context(role, tenant.as_deref());
9234 if !auth_store.check_policy_authz(&principal_id, iam_action, &iam_resource, &iam_ctx) {
9235 return Err(format!(
9236 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM policy",
9237 username, iam_action, iam_resource.kind, iam_resource.name
9238 ));
9239 }
9240
9241 if let QueryExpr::Table(table) = expr {
9242 self.check_table_column_projection_privilege(
9243 &auth_store,
9244 &principal_id,
9245 &iam_ctx,
9246 table,
9247 )?;
9248 }
9249
9250 if let QueryExpr::Update(update) = expr {
9251 let columns = update_set_target_columns(update);
9252 if !columns.is_empty() {
9253 let request = column_access_request_for_table_update(&update.table, columns);
9254 let outcome =
9255 auth_store.check_column_projection_authz(&principal_id, &request, &iam_ctx);
9256 if let Some(denied) = outcome.first_denied_column() {
9257 return Err(format!(
9258 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM column policy",
9259 username, iam_action, denied.resource.kind, denied.resource.name
9260 ));
9261 }
9262 if !outcome.allowed() {
9263 return Err(format!(
9264 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM policy",
9265 username,
9266 iam_action,
9267 outcome.table_resource.kind,
9268 outcome.table_resource.name
9269 ));
9270 }
9271 }
9272
9273 if let Some(columns) = update_returning_columns_for_policy(self, update) {
9274 let request = column_access_request_for_table_select(&update.table, columns);
9275 let outcome =
9276 auth_store.check_column_projection_authz(&principal_id, &request, &iam_ctx);
9277 if let Some(denied) = outcome.first_denied_column() {
9278 return Err(format!(
9279 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM column policy",
9280 username, denied.resource.kind, denied.resource.name
9281 ));
9282 }
9283 if !outcome.allowed() {
9284 return Err(format!(
9285 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM policy",
9286 username, outcome.table_resource.kind, outcome.table_resource.name
9287 ));
9288 }
9289 }
9290 }
9291
9292 Ok(())
9293 } else {
9294 auth_store
9295 .check_grant(&ctx, action, &resource)
9296 .map_err(|e| e.to_string())
9297 }
9298 }
9299
9300 fn check_table_column_projection_privilege(
9301 &self,
9302 auth_store: &Arc<crate::auth::store::AuthStore>,
9303 principal: &crate::auth::UserId,
9304 ctx: &crate::auth::policies::EvalContext,
9305 table: &crate::storage::query::ast::TableQuery,
9306 ) -> Result<(), String> {
9307 use crate::auth::{ColumnAccessRequest, ColumnDecisionEffect};
9308
9309 let columns = requested_table_columns_for_policy(table);
9310 if columns.is_empty() {
9311 return Ok(());
9312 }
9313
9314 let request = ColumnAccessRequest::select(table.table.clone(), columns);
9315 let outcome = auth_store.check_column_projection_authz(principal, &request, ctx);
9316 if outcome.allowed() {
9317 return Ok(());
9318 }
9319
9320 if !matches!(
9321 outcome.table_decision,
9322 crate::auth::policies::Decision::Allow { .. }
9323 | crate::auth::policies::Decision::AdminBypass
9324 ) {
9325 return Err(format!(
9326 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM policy",
9327 principal, outcome.table_resource.kind, outcome.table_resource.name
9328 ));
9329 }
9330
9331 let denied = outcome
9332 .first_denied_column()
9333 .filter(|decision| decision.effective == ColumnDecisionEffect::Denied);
9334 match denied {
9335 Some(decision) => Err(format!(
9336 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM policy",
9337 principal, decision.resource.kind, decision.resource.name
9338 )),
9339 None => Ok(()),
9340 }
9341 }
9342
9343 fn check_graph_property_projection_privilege(
9344 &self,
9345 auth_store: &Arc<crate::auth::store::AuthStore>,
9346 principal: &crate::auth::UserId,
9347 role: crate::auth::Role,
9348 tenant: Option<&str>,
9349 query: &crate::storage::query::ast::GraphQuery,
9350 ) -> Result<(), String> {
9351 let columns = explicit_graph_projection_properties(query);
9352 if columns.is_empty() {
9353 return Ok(());
9354 }
9355 self.check_table_like_column_projection_privilege(
9356 auth_store, principal, role, tenant, "graph", &columns,
9357 )
9358 }
9359
9360 fn check_table_like_column_projection_privilege(
9361 &self,
9362 auth_store: &Arc<crate::auth::store::AuthStore>,
9363 principal: &crate::auth::UserId,
9364 role: crate::auth::Role,
9365 tenant: Option<&str>,
9366 table: &str,
9367 columns: &[String],
9368 ) -> Result<(), String> {
9369 let iam_ctx = runtime_iam_context(role, tenant);
9370 let request =
9371 crate::auth::ColumnAccessRequest::select(table.to_string(), columns.iter().cloned());
9372 let outcome = auth_store.check_column_projection_authz(principal, &request, &iam_ctx);
9373 if outcome.allowed() {
9374 return Ok(());
9375 }
9376 let denied = outcome
9377 .first_denied_column()
9378 .map(|d| d.resource.name.clone())
9379 .unwrap_or_else(|| format!("{table}.<unknown>"));
9380 Err(format!(
9381 "principal=`{}` action=`select` resource=`column:{}` denied by IAM policy",
9382 principal, denied
9383 ))
9384 }
9385
9386 fn check_policy_management_privilege(
9387 &self,
9388 auth_store: &Arc<crate::auth::store::AuthStore>,
9389 principal: &crate::auth::UserId,
9390 role: crate::auth::Role,
9391 tenant: Option<&str>,
9392 action: &str,
9393 resource_kind: &str,
9394 resource_name: &str,
9395 ) -> Result<(), String> {
9396 if !auth_store.iam_authorization_enabled() {
9397 return if role == crate::auth::Role::Admin {
9398 Ok(())
9399 } else {
9400 Err(format!(
9401 "principal=`{}` role=`{:?}` cannot issue ACL/auth DDL",
9402 principal, role
9403 ))
9404 };
9405 }
9406
9407 let mut resource = crate::auth::policies::ResourceRef::new(
9408 resource_kind.to_string(),
9409 resource_name.to_string(),
9410 );
9411 if let Some(t) = tenant {
9412 resource = resource.with_tenant(t.to_string());
9413 }
9414 let ctx = runtime_iam_context(role, tenant);
9415 if auth_store.check_policy_authz(principal, action, &resource, &ctx) {
9416 Ok(())
9417 } else {
9418 Err(format!(
9419 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM policy",
9420 principal, action, resource.kind, resource.name
9421 ))
9422 }
9423 }
9424
9425 fn check_ddl_collection_privilege(
9432 &self,
9433 auth_store: &Arc<crate::auth::store::AuthStore>,
9434 principal: &crate::auth::UserId,
9435 role: crate::auth::Role,
9436 tenant: Option<&str>,
9437 username: &str,
9438 action: &str,
9439 collection: &str,
9440 ) -> Result<(), String> {
9441 if role < crate::auth::Role::Write {
9442 let msg = format!(
9443 "principal=`{}` role=`{:?}` cannot issue DDL",
9444 username, role
9445 );
9446 self.inner.audit_log.record(
9447 action,
9448 username,
9449 collection,
9450 "denied",
9451 crate::json::Value::Null,
9452 );
9453 return Err(msg);
9454 }
9455
9456 if !auth_store.iam_authorization_enabled() {
9457 self.inner.audit_log.record(
9458 action,
9459 username,
9460 collection,
9461 "ok",
9462 crate::json::Value::Null,
9463 );
9464 return Ok(());
9465 }
9466
9467 let resource_name = collection.to_string();
9468 let mut resource = crate::auth::policies::ResourceRef::new(
9469 "collection".to_string(),
9470 resource_name.clone(),
9471 );
9472 if let Some(t) = tenant {
9473 resource = resource.with_tenant(t.to_string());
9474 }
9475 let ctx = runtime_iam_context(role, tenant);
9476 if auth_store.check_policy_authz(principal, action, &resource, &ctx) {
9477 self.inner.audit_log.record(
9478 action,
9479 username,
9480 &resource_name,
9481 "ok",
9482 crate::json::Value::Null,
9483 );
9484 Ok(())
9485 } else {
9486 self.inner.audit_log.record(
9487 action,
9488 username,
9489 &resource_name,
9490 "denied",
9491 crate::json::Value::Null,
9492 );
9493 Err(format!(
9494 "principal=`{}` action=`{}` resource=`collection:{}` denied by IAM policy",
9495 username, action, resource_name
9496 ))
9497 }
9498 }
9499
9500 fn execute_grant_statement(
9502 &self,
9503 query: &str,
9504 stmt: &crate::storage::query::ast::GrantStmt,
9505 ) -> RedDBResult<RuntimeQueryResult> {
9506 use crate::auth::privileges::{Action, GrantPrincipal, Resource};
9507 use crate::auth::UserId;
9508 use crate::storage::query::ast::{GrantObjectKind, GrantPrincipalRef};
9509
9510 let auth_store = self
9511 .inner
9512 .auth_store
9513 .read()
9514 .clone()
9515 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9516
9517 let (gname, grole) = current_auth_identity().ok_or_else(|| {
9519 RedDBError::Query("GRANT requires an authenticated principal".to_string())
9520 })?;
9521 let granter = UserId::from_parts(current_tenant().as_deref(), &gname);
9522 let granter_role = grole;
9523
9524 let mut actions: Vec<Action> = Vec::new();
9526 if stmt.all {
9527 actions.push(Action::All);
9528 } else {
9529 for kw in &stmt.actions {
9530 let a = Action::from_keyword(kw).ok_or_else(|| {
9531 RedDBError::Query(format!("unknown privilege keyword `{}`", kw))
9532 })?;
9533 actions.push(a);
9534 }
9535 }
9536
9537 let mut applied = 0usize;
9539 for obj in &stmt.objects {
9540 let resource = match stmt.object_kind {
9541 GrantObjectKind::Table => Resource::Table {
9542 schema: obj.schema.clone(),
9543 table: obj.name.clone(),
9544 },
9545 GrantObjectKind::Schema => Resource::Schema(obj.name.clone()),
9546 GrantObjectKind::Database => Resource::Database,
9547 GrantObjectKind::Function => Resource::Function {
9548 schema: obj.schema.clone(),
9549 name: obj.name.clone(),
9550 },
9551 };
9552 for principal in &stmt.principals {
9553 let p = match principal {
9554 GrantPrincipalRef::Public => GrantPrincipal::Public,
9555 GrantPrincipalRef::Group(g) => GrantPrincipal::Group(g.clone()),
9556 GrantPrincipalRef::User { tenant, name } => {
9557 GrantPrincipal::User(UserId::from_parts(tenant.as_deref(), name))
9558 }
9559 };
9560 let tenant = granter.tenant.clone();
9563 auth_store
9564 .grant(
9565 &granter,
9566 granter_role,
9567 p.clone(),
9568 resource.clone(),
9569 actions.clone(),
9570 stmt.with_grant_option,
9571 tenant.clone(),
9572 )
9573 .map_err(|e| RedDBError::Query(e.to_string()))?;
9574
9575 if let Some(policy) =
9579 grant_to_iam_policy(&p, &resource, &actions, tenant.as_deref())
9580 {
9581 let pid = policy.id.clone();
9582 auth_store
9583 .put_policy_internal(policy)
9584 .map_err(|e| RedDBError::Query(e.to_string()))?;
9585 let attachment = match &p {
9586 GrantPrincipal::User(uid) => {
9587 crate::auth::store::PrincipalRef::User(uid.clone())
9588 }
9589 GrantPrincipal::Group(group) => {
9590 crate::auth::store::PrincipalRef::Group(group.clone())
9591 }
9592 GrantPrincipal::Public => crate::auth::store::PrincipalRef::Group(
9593 crate::auth::store::PUBLIC_IAM_GROUP.to_string(),
9594 ),
9595 };
9596 auth_store
9597 .attach_policy(attachment, &pid)
9598 .map_err(|e| RedDBError::Query(e.to_string()))?;
9599 }
9600 applied += 1;
9601 tracing::info!(
9602 target: "audit",
9603 principal = %granter,
9604 action = "grant",
9605 "GRANT applied"
9606 );
9607 }
9608 }
9609
9610 self.invalidate_result_cache();
9611 Ok(RuntimeQueryResult::ok_message(
9612 query.to_string(),
9613 &format!("GRANT applied to {} target(s)", applied),
9614 "grant",
9615 ))
9616 }
9617
9618 fn execute_revoke_statement(
9620 &self,
9621 query: &str,
9622 stmt: &crate::storage::query::ast::RevokeStmt,
9623 ) -> RedDBResult<RuntimeQueryResult> {
9624 use crate::auth::privileges::{Action, GrantPrincipal, Resource};
9625 use crate::auth::UserId;
9626 use crate::storage::query::ast::{GrantObjectKind, GrantPrincipalRef};
9627
9628 let auth_store = self
9629 .inner
9630 .auth_store
9631 .read()
9632 .clone()
9633 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9634
9635 let (_gname, grole) = current_auth_identity().ok_or_else(|| {
9636 RedDBError::Query("REVOKE requires an authenticated principal".to_string())
9637 })?;
9638 let granter_role = grole;
9639
9640 let actions: Vec<Action> = if stmt.all {
9641 vec![Action::All]
9642 } else {
9643 stmt.actions
9644 .iter()
9645 .map(|kw| Action::from_keyword(kw).unwrap_or(Action::Select))
9646 .collect()
9647 };
9648
9649 let mut total_removed = 0usize;
9650 for obj in &stmt.objects {
9651 let resource = match stmt.object_kind {
9652 GrantObjectKind::Table => Resource::Table {
9653 schema: obj.schema.clone(),
9654 table: obj.name.clone(),
9655 },
9656 GrantObjectKind::Schema => Resource::Schema(obj.name.clone()),
9657 GrantObjectKind::Database => Resource::Database,
9658 GrantObjectKind::Function => Resource::Function {
9659 schema: obj.schema.clone(),
9660 name: obj.name.clone(),
9661 },
9662 };
9663 for principal in &stmt.principals {
9664 let p = match principal {
9665 GrantPrincipalRef::Public => GrantPrincipal::Public,
9666 GrantPrincipalRef::Group(g) => GrantPrincipal::Group(g.clone()),
9667 GrantPrincipalRef::User { tenant, name } => {
9668 GrantPrincipal::User(UserId::from_parts(tenant.as_deref(), name))
9669 }
9670 };
9671 let removed = auth_store
9672 .revoke(granter_role, &p, &resource, &actions)
9673 .map_err(|e| RedDBError::Query(e.to_string()))?;
9674 let _removed_policies =
9675 auth_store.delete_synthetic_grant_policies(&p, &resource, &actions);
9676 total_removed += removed;
9677 }
9678 }
9679
9680 self.invalidate_result_cache();
9681 Ok(RuntimeQueryResult::ok_message(
9682 query.to_string(),
9683 &format!("REVOKE removed {} grant(s)", total_removed),
9684 "revoke",
9685 ))
9686 }
9687
9688 fn execute_alter_user_statement(
9690 &self,
9691 query: &str,
9692 stmt: &crate::storage::query::ast::AlterUserStmt,
9693 ) -> RedDBResult<RuntimeQueryResult> {
9694 use crate::auth::privileges::UserAttributes;
9695 use crate::auth::UserId;
9696 use crate::storage::query::ast::AlterUserAttribute;
9697
9698 let auth_store = self
9699 .inner
9700 .auth_store
9701 .read()
9702 .clone()
9703 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9704
9705 let (_gname, grole) = current_auth_identity().ok_or_else(|| {
9706 RedDBError::Query("ALTER USER requires an authenticated principal".to_string())
9707 })?;
9708 if grole != crate::auth::Role::Admin {
9709 return Err(RedDBError::Query(
9710 "ALTER USER requires Admin role".to_string(),
9711 ));
9712 }
9713
9714 let target = UserId::from_parts(stmt.tenant.as_deref(), &stmt.username);
9715
9716 let mut attrs = auth_store.user_attributes(&target);
9719 let mut enable_change: Option<bool> = None;
9720
9721 for a in &stmt.attributes {
9722 match a {
9723 AlterUserAttribute::ValidUntil(ts) => {
9724 let ms = parse_timestamp_to_ms(ts).ok_or_else(|| {
9728 RedDBError::Query(format!("invalid VALID UNTIL timestamp `{ts}`"))
9729 })?;
9730 attrs.valid_until = Some(ms);
9731 }
9732 AlterUserAttribute::ConnectionLimit(n) => {
9733 if *n < 0 {
9734 return Err(RedDBError::Query(
9735 "CONNECTION LIMIT must be non-negative".to_string(),
9736 ));
9737 }
9738 attrs.connection_limit = Some(*n as u32);
9739 }
9740 AlterUserAttribute::SetSearchPath(p) => {
9741 attrs.search_path = Some(p.clone());
9742 }
9743 AlterUserAttribute::AddGroup(g) => {
9744 if !attrs.groups.iter().any(|existing| existing == g) {
9745 attrs.groups.push(g.clone());
9746 attrs.groups.sort();
9747 }
9748 }
9749 AlterUserAttribute::DropGroup(g) => {
9750 attrs.groups.retain(|existing| existing != g);
9751 }
9752 AlterUserAttribute::Enable => enable_change = Some(true),
9753 AlterUserAttribute::Disable => enable_change = Some(false),
9754 AlterUserAttribute::Password(_) => {
9755 }
9759 }
9760 }
9761
9762 auth_store
9763 .set_user_attributes(&target, attrs)
9764 .map_err(|e| RedDBError::Query(e.to_string()))?;
9765 if let Some(en) = enable_change {
9766 auth_store
9767 .set_user_enabled(&target, en)
9768 .map_err(|e| RedDBError::Query(e.to_string()))?;
9769 }
9770 self.invalidate_result_cache();
9771 tracing::info!(
9772 target: "audit",
9773 principal = %target,
9774 action = "alter_user",
9775 "ALTER USER applied"
9776 );
9777
9778 Ok(RuntimeQueryResult::ok_message(
9779 query.to_string(),
9780 &format!("ALTER USER {} applied", target),
9781 "alter_user",
9782 ))
9783 }
9784
9785 fn execute_create_iam_policy(
9790 &self,
9791 query: &str,
9792 id: &str,
9793 json: &str,
9794 ) -> RedDBResult<RuntimeQueryResult> {
9795 use crate::auth::policies::Policy;
9796
9797 let auth_store = self
9798 .inner
9799 .auth_store
9800 .read()
9801 .clone()
9802 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9803
9804 let mut policy = Policy::from_json_str(json)
9809 .map_err(|e| RedDBError::Query(format!("policy parse: {e}")))?;
9810 if policy.id != id {
9811 policy.id = id.to_string();
9812 }
9813 let pid = policy.id.clone();
9814 auth_store
9815 .put_policy(policy)
9816 .map_err(|e| RedDBError::Query(e.to_string()))?;
9817
9818 let principal = current_auth_identity()
9819 .map(|(u, _)| u)
9820 .unwrap_or_else(|| "anonymous".into());
9821 tracing::info!(
9822 target: "audit",
9823 principal = %principal,
9824 action = "iam:policy.put",
9825 matched_policy_id = %pid,
9826 "CREATE POLICY applied"
9827 );
9828 self.inner.audit_log.record(
9829 "iam/policy.put",
9830 &principal,
9831 &pid,
9832 "ok",
9833 crate::json::Value::Null,
9834 );
9835
9836 self.invalidate_result_cache();
9837 Ok(RuntimeQueryResult::ok_message(
9838 query.to_string(),
9839 &format!("policy `{pid}` stored"),
9840 "create_iam_policy",
9841 ))
9842 }
9843
9844 fn execute_drop_iam_policy(&self, query: &str, id: &str) -> RedDBResult<RuntimeQueryResult> {
9845 let auth_store = self
9846 .inner
9847 .auth_store
9848 .read()
9849 .clone()
9850 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9851 auth_store
9852 .delete_policy(id)
9853 .map_err(|e| RedDBError::Query(e.to_string()))?;
9854
9855 let principal = current_auth_identity()
9856 .map(|(u, _)| u)
9857 .unwrap_or_else(|| "anonymous".into());
9858 tracing::info!(
9859 target: "audit",
9860 principal = %principal,
9861 action = "iam:policy.drop",
9862 matched_policy_id = %id,
9863 "DROP POLICY applied"
9864 );
9865 self.inner.audit_log.record(
9866 "iam/policy.drop",
9867 &principal,
9868 id,
9869 "ok",
9870 crate::json::Value::Null,
9871 );
9872
9873 self.invalidate_result_cache();
9874 Ok(RuntimeQueryResult::ok_message(
9875 query.to_string(),
9876 &format!("policy `{id}` dropped"),
9877 "drop_iam_policy",
9878 ))
9879 }
9880
9881 fn execute_attach_policy(
9882 &self,
9883 query: &str,
9884 policy_id: &str,
9885 principal: &crate::storage::query::ast::PolicyPrincipalRef,
9886 ) -> RedDBResult<RuntimeQueryResult> {
9887 use crate::auth::store::PrincipalRef;
9888 use crate::auth::UserId;
9889 use crate::storage::query::ast::PolicyPrincipalRef;
9890
9891 let auth_store = self
9892 .inner
9893 .auth_store
9894 .read()
9895 .clone()
9896 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9897 let p = match principal {
9898 PolicyPrincipalRef::User(u) => {
9899 PrincipalRef::User(UserId::from_parts(u.tenant.as_deref(), &u.username))
9900 }
9901 PolicyPrincipalRef::Group(g) => PrincipalRef::Group(g.clone()),
9902 };
9903 let pretty_target = principal_label(principal);
9904 auth_store
9905 .attach_policy(p, policy_id)
9906 .map_err(|e| RedDBError::Query(e.to_string()))?;
9907
9908 let principal_str = current_auth_identity()
9909 .map(|(u, _)| u)
9910 .unwrap_or_else(|| "anonymous".into());
9911 tracing::info!(
9912 target: "audit",
9913 principal = %principal_str,
9914 action = "iam:policy.attach",
9915 matched_policy_id = %policy_id,
9916 target = %pretty_target,
9917 "ATTACH POLICY applied"
9918 );
9919 self.inner.audit_log.record(
9920 "iam/policy.attach",
9921 &principal_str,
9922 &pretty_target,
9923 "ok",
9924 crate::json::Value::Null,
9925 );
9926
9927 self.invalidate_result_cache();
9928 Ok(RuntimeQueryResult::ok_message(
9929 query.to_string(),
9930 &format!("policy `{policy_id}` attached to {pretty_target}"),
9931 "attach_policy",
9932 ))
9933 }
9934
9935 fn execute_detach_policy(
9936 &self,
9937 query: &str,
9938 policy_id: &str,
9939 principal: &crate::storage::query::ast::PolicyPrincipalRef,
9940 ) -> RedDBResult<RuntimeQueryResult> {
9941 use crate::auth::store::PrincipalRef;
9942 use crate::auth::UserId;
9943 use crate::storage::query::ast::PolicyPrincipalRef;
9944
9945 let auth_store = self
9946 .inner
9947 .auth_store
9948 .read()
9949 .clone()
9950 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9951 let p = match principal {
9952 PolicyPrincipalRef::User(u) => {
9953 PrincipalRef::User(UserId::from_parts(u.tenant.as_deref(), &u.username))
9954 }
9955 PolicyPrincipalRef::Group(g) => PrincipalRef::Group(g.clone()),
9956 };
9957 let pretty_target = principal_label(principal);
9958 auth_store
9959 .detach_policy(p, policy_id)
9960 .map_err(|e| RedDBError::Query(e.to_string()))?;
9961
9962 let principal_str = current_auth_identity()
9963 .map(|(u, _)| u)
9964 .unwrap_or_else(|| "anonymous".into());
9965 tracing::info!(
9966 target: "audit",
9967 principal = %principal_str,
9968 action = "iam:policy.detach",
9969 matched_policy_id = %policy_id,
9970 target = %pretty_target,
9971 "DETACH POLICY applied"
9972 );
9973 self.inner.audit_log.record(
9974 "iam/policy.detach",
9975 &principal_str,
9976 &pretty_target,
9977 "ok",
9978 crate::json::Value::Null,
9979 );
9980
9981 self.invalidate_result_cache();
9982 Ok(RuntimeQueryResult::ok_message(
9983 query.to_string(),
9984 &format!("policy `{policy_id}` detached from {pretty_target}"),
9985 "detach_policy",
9986 ))
9987 }
9988
9989 fn execute_show_policies(
9990 &self,
9991 query: &str,
9992 filter: Option<&crate::storage::query::ast::PolicyPrincipalRef>,
9993 ) -> RedDBResult<RuntimeQueryResult> {
9994 use crate::auth::UserId;
9995 use crate::storage::query::ast::PolicyPrincipalRef;
9996 use crate::storage::query::unified::UnifiedRecord;
9997 use crate::storage::schema::Value as SchemaValue;
9998 use std::sync::Arc;
9999
10000 let auth_store = self
10001 .inner
10002 .auth_store
10003 .read()
10004 .clone()
10005 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
10006
10007 let pols = match filter {
10008 None => auth_store.list_policies(),
10009 Some(PolicyPrincipalRef::User(u)) => {
10010 let id = UserId::from_parts(u.tenant.as_deref(), &u.username);
10011 auth_store.effective_policies(&id)
10012 }
10013 Some(PolicyPrincipalRef::Group(g)) => auth_store.group_policies(g),
10014 };
10015
10016 let mut records = Vec::with_capacity(pols.len());
10017 for p in pols.iter() {
10018 let mut rec = UnifiedRecord::default();
10019 rec.set_arc(Arc::from("id"), SchemaValue::text(p.id.clone()));
10020 rec.set_arc(
10021 Arc::from("statements"),
10022 SchemaValue::Integer(p.statements.len() as i64),
10023 );
10024 rec.set_arc(
10025 Arc::from("tenant"),
10026 p.tenant
10027 .as_deref()
10028 .map(|t| SchemaValue::text(t.to_string()))
10029 .unwrap_or(SchemaValue::Null),
10030 );
10031 rec.set_arc(Arc::from("json"), SchemaValue::text(p.to_json_string()));
10032 records.push(rec);
10033 }
10034 let mut result = crate::storage::query::unified::UnifiedResult::empty();
10035 result.records = records;
10036 Ok(RuntimeQueryResult {
10037 query: query.to_string(),
10038 mode: crate::storage::query::modes::QueryMode::Sql,
10039 statement: "show_policies",
10040 engine: "iam-policies",
10041 result,
10042 affected_rows: 0,
10043 statement_type: "select",
10044 })
10045 }
10046
10047 fn execute_show_effective_permissions(
10048 &self,
10049 query: &str,
10050 user: &crate::storage::query::ast::PolicyUserRef,
10051 resource: Option<&crate::storage::query::ast::PolicyResourceRef>,
10052 ) -> RedDBResult<RuntimeQueryResult> {
10053 use crate::auth::UserId;
10054 use crate::storage::query::unified::UnifiedRecord;
10055 use crate::storage::schema::Value as SchemaValue;
10056 use std::sync::Arc;
10057
10058 let auth_store = self
10059 .inner
10060 .auth_store
10061 .read()
10062 .clone()
10063 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
10064 let id = UserId::from_parts(user.tenant.as_deref(), &user.username);
10065 let pols = auth_store.effective_policies(&id);
10066
10067 let mut records = Vec::new();
10070 for p in pols.iter() {
10071 for (idx, st) in p.statements.iter().enumerate() {
10072 if let Some(_r) = resource {
10073 }
10077 let mut rec = UnifiedRecord::default();
10078 rec.set_arc(Arc::from("policy_id"), SchemaValue::text(p.id.clone()));
10079 rec.set_arc(
10080 Arc::from("statement_index"),
10081 SchemaValue::Integer(idx as i64),
10082 );
10083 rec.set_arc(
10084 Arc::from("sid"),
10085 st.sid
10086 .as_deref()
10087 .map(|s| SchemaValue::text(s.to_string()))
10088 .unwrap_or(SchemaValue::Null),
10089 );
10090 rec.set_arc(
10091 Arc::from("effect"),
10092 SchemaValue::text(match st.effect {
10093 crate::auth::policies::Effect::Allow => "allow",
10094 crate::auth::policies::Effect::Deny => "deny",
10095 }),
10096 );
10097 rec.set_arc(
10098 Arc::from("actions"),
10099 SchemaValue::Integer(st.actions.len() as i64),
10100 );
10101 rec.set_arc(
10102 Arc::from("resources"),
10103 SchemaValue::Integer(st.resources.len() as i64),
10104 );
10105 records.push(rec);
10106 }
10107 }
10108 let mut result = crate::storage::query::unified::UnifiedResult::empty();
10109 result.records = records;
10110 Ok(RuntimeQueryResult {
10111 query: query.to_string(),
10112 mode: crate::storage::query::modes::QueryMode::Sql,
10113 statement: "show_effective_permissions",
10114 engine: "iam-policies",
10115 result,
10116 affected_rows: 0,
10117 statement_type: "select",
10118 })
10119 }
10120
10121 fn execute_simulate_policy(
10122 &self,
10123 query: &str,
10124 user: &crate::storage::query::ast::PolicyUserRef,
10125 action: &str,
10126 resource: &crate::storage::query::ast::PolicyResourceRef,
10127 ) -> RedDBResult<RuntimeQueryResult> {
10128 use crate::auth::policies::ResourceRef;
10129 use crate::auth::store::SimCtx;
10130 use crate::auth::UserId;
10131 use crate::storage::query::unified::UnifiedRecord;
10132 use crate::storage::schema::Value as SchemaValue;
10133 use std::sync::Arc;
10134
10135 let auth_store = self
10136 .inner
10137 .auth_store
10138 .read()
10139 .clone()
10140 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
10141 let id = UserId::from_parts(user.tenant.as_deref(), &user.username);
10142 let r = ResourceRef::new(resource.kind.clone(), resource.name.clone());
10143 let outcome = auth_store.simulate(&id, action, &r, SimCtx::default());
10144
10145 let principal_str = current_auth_identity()
10146 .map(|(u, _)| u)
10147 .unwrap_or_else(|| "anonymous".into());
10148 let (decision_str, matched_pid, matched_sid) = decision_to_strings(&outcome.decision);
10149 tracing::info!(
10150 target: "audit",
10151 principal = %principal_str,
10152 action = "iam:policy.simulate",
10153 decision = %decision_str,
10154 matched_policy_id = ?matched_pid,
10155 matched_sid = ?matched_sid,
10156 "SIMULATE issued"
10157 );
10158 self.inner.audit_log.record(
10159 "iam/policy.simulate",
10160 &principal_str,
10161 &id.to_string(),
10162 "ok",
10163 crate::json::Value::Null,
10164 );
10165
10166 let mut rec = UnifiedRecord::default();
10167 rec.set_arc(Arc::from("decision"), SchemaValue::text(decision_str));
10168 rec.set_arc(
10169 Arc::from("matched_policy_id"),
10170 matched_pid
10171 .map(SchemaValue::text)
10172 .unwrap_or(SchemaValue::Null),
10173 );
10174 rec.set_arc(
10175 Arc::from("matched_sid"),
10176 matched_sid
10177 .map(SchemaValue::text)
10178 .unwrap_or(SchemaValue::Null),
10179 );
10180 rec.set_arc(Arc::from("reason"), SchemaValue::text(outcome.reason));
10181 rec.set_arc(
10182 Arc::from("trail_len"),
10183 SchemaValue::Integer(outcome.trail.len() as i64),
10184 );
10185 let mut result = crate::storage::query::unified::UnifiedResult::empty();
10186 result.records = vec![rec];
10187 Ok(RuntimeQueryResult {
10188 query: query.to_string(),
10189 mode: crate::storage::query::modes::QueryMode::Sql,
10190 statement: "simulate_policy",
10191 engine: "iam-policies",
10192 result,
10193 affected_rows: 0,
10194 statement_type: "select",
10195 })
10196 }
10197}
10198
10199fn grant_to_iam_policy(
10204 principal: &crate::auth::privileges::GrantPrincipal,
10205 resource: &crate::auth::privileges::Resource,
10206 actions: &[crate::auth::privileges::Action],
10207 tenant: Option<&str>,
10208) -> Option<crate::auth::policies::Policy> {
10209 use crate::auth::policies::{
10210 compile_action, ActionPattern, Effect, Policy, ResourcePattern, Statement,
10211 };
10212 use crate::auth::privileges::{Action, GrantPrincipal, Resource};
10213
10214 if matches!(principal, GrantPrincipal::Group(_)) {
10215 return None;
10216 }
10217
10218 let now = crate::auth::now_ms();
10219 let id = format!("_grant_{:x}_{:x}", now, std::process::id());
10220
10221 let resource_str = match resource {
10222 Resource::Database => "table:*".to_string(),
10223 Resource::Schema(s) => format!("table:{s}.*"),
10224 Resource::Table { schema, table } => match schema {
10225 Some(s) => format!("table:{s}.{table}"),
10226 None => format!("table:{table}"),
10227 },
10228 Resource::Function { schema, name } => match schema {
10229 Some(s) => format!("function:{s}.{name}"),
10230 None => format!("function:{name}"),
10231 },
10232 };
10233
10234 let action_patterns: Vec<ActionPattern> = if actions.contains(&Action::All) {
10238 vec![ActionPattern::Wildcard]
10239 } else {
10240 actions
10241 .iter()
10242 .map(|a| compile_action(&a.as_str().to_ascii_lowercase()))
10243 .collect()
10244 };
10245 if action_patterns.is_empty() {
10246 return None;
10247 }
10248
10249 let resource_patterns = if resource_str == "*" {
10254 vec![ResourcePattern::Wildcard]
10255 } else if resource_str.contains('*') {
10256 vec![ResourcePattern::Glob(resource_str.clone())]
10257 } else if let Some((kind, name)) = resource_str.split_once(':') {
10258 vec![ResourcePattern::Exact {
10259 kind: kind.to_string(),
10260 name: name.to_string(),
10261 }]
10262 } else {
10263 vec![ResourcePattern::Wildcard]
10264 };
10265
10266 let policy = Policy {
10267 id,
10268 version: 1,
10269 tenant: tenant.map(|t| t.to_string()),
10270 created_at: now,
10271 updated_at: now,
10272 statements: vec![Statement {
10273 sid: None,
10274 effect: Effect::Allow,
10275 actions: action_patterns,
10276 resources: resource_patterns,
10277 condition: None,
10278 }],
10279 };
10280 if policy.validate().is_err() {
10281 return None;
10282 }
10283 Some(policy)
10284}
10285
10286fn legacy_action_to_iam(action: crate::auth::privileges::Action) -> &'static str {
10287 use crate::auth::privileges::Action;
10288 match action {
10289 Action::Select => "select",
10290 Action::Insert => "insert",
10291 Action::Update => "update",
10292 Action::Delete => "delete",
10293 Action::Truncate => "truncate",
10294 Action::References => "references",
10295 Action::Execute => "execute",
10296 Action::Usage => "usage",
10297 Action::All => "*",
10298 }
10299}
10300
10301fn update_set_target_columns(query: &crate::storage::query::ast::UpdateQuery) -> Vec<String> {
10302 let mut columns = Vec::new();
10303 for (column, _) in &query.assignment_exprs {
10304 if !columns.iter().any(|seen| seen == column) {
10305 columns.push(column.clone());
10306 }
10307 }
10308 columns
10309}
10310
10311fn column_access_request_for_table_update(
10312 table_name: &str,
10313 columns: Vec<String>,
10314) -> crate::auth::ColumnAccessRequest {
10315 match table_name.split_once('.') {
10316 Some((schema, table)) => {
10317 crate::auth::ColumnAccessRequest::update(table.to_string(), columns)
10318 .with_schema(schema.to_string())
10319 }
10320 None => crate::auth::ColumnAccessRequest::update(table_name.to_string(), columns),
10321 }
10322}
10323
10324fn column_access_request_for_table_select(
10325 table_name: &str,
10326 columns: Vec<String>,
10327) -> crate::auth::ColumnAccessRequest {
10328 match table_name.split_once('.') {
10329 Some((schema, table)) => {
10330 crate::auth::ColumnAccessRequest::select(table.to_string(), columns)
10331 .with_schema(schema.to_string())
10332 }
10333 None => crate::auth::ColumnAccessRequest::select(table_name.to_string(), columns),
10334 }
10335}
10336
10337fn update_returning_columns_for_policy(
10338 runtime: &RedDBRuntime,
10339 query: &crate::storage::query::ast::UpdateQuery,
10340) -> Option<Vec<String>> {
10341 let items = query.returning.as_ref()?;
10342 let mut columns = Vec::new();
10343 let project_all = items
10344 .iter()
10345 .any(|item| matches!(item, crate::storage::query::ast::ReturningItem::All));
10346 if project_all {
10347 collect_returning_star_columns(runtime, query, &mut columns);
10348 } else {
10349 for item in items {
10350 let crate::storage::query::ast::ReturningItem::Column(column) = item else {
10351 continue;
10352 };
10353 push_returning_policy_column(&mut columns, column);
10354 }
10355 }
10356 (!columns.is_empty()).then_some(columns)
10357}
10358
10359fn collect_returning_star_columns(
10360 runtime: &RedDBRuntime,
10361 query: &crate::storage::query::ast::UpdateQuery,
10362 columns: &mut Vec<String>,
10363) {
10364 let store = runtime.db().store();
10365 let Some(manager) = store.get_collection(&query.table) else {
10366 return;
10367 };
10368 if let Some(schema) = manager.column_schema() {
10369 for column in schema.iter() {
10370 push_returning_policy_column(columns, column);
10371 }
10372 }
10373 for entity in manager.query_all(|_| true) {
10374 if !returning_entity_matches_update_target(&entity, query.target) {
10375 continue;
10376 }
10377 match &entity.data {
10378 crate::storage::EntityData::Row(row) => {
10379 for (column, _) in row.iter_fields() {
10380 push_returning_policy_column(columns, column);
10381 }
10382 }
10383 crate::storage::EntityData::Node(node) => {
10384 push_returning_policy_column(columns, "label");
10385 push_returning_policy_column(columns, "node_type");
10386 for column in node.properties.keys() {
10387 push_returning_policy_column(columns, column);
10388 }
10389 }
10390 crate::storage::EntityData::Edge(edge) => {
10391 push_returning_policy_column(columns, "label");
10392 push_returning_policy_column(columns, "from_rid");
10393 push_returning_policy_column(columns, "to_rid");
10394 push_returning_policy_column(columns, "weight");
10395 for column in edge.properties.keys() {
10396 push_returning_policy_column(columns, column);
10397 }
10398 }
10399 _ => {}
10400 }
10401 }
10402}
10403
10404fn push_returning_policy_column(columns: &mut Vec<String>, column: &str) {
10405 if returning_public_envelope_column(column) {
10406 return;
10407 }
10408 if !columns.iter().any(|seen| seen == column) {
10409 columns.push(column.to_string());
10410 }
10411}
10412
10413fn returning_public_envelope_column(column: &str) -> bool {
10414 matches!(
10415 column.to_ascii_lowercase().as_str(),
10416 "rid" | "collection" | "kind" | "tenant" | "created_at" | "updated_at" | "red_entity_id"
10417 )
10418}
10419
10420fn returning_entity_matches_update_target(
10421 entity: &crate::storage::UnifiedEntity,
10422 target: crate::storage::query::ast::UpdateTarget,
10423) -> bool {
10424 use crate::storage::query::ast::UpdateTarget;
10425 match target {
10426 UpdateTarget::Rows => {
10427 matches!(returning_row_item_kind(entity), Some(ReturningRowKind::Row))
10428 }
10429 UpdateTarget::Documents => {
10430 matches!(
10431 returning_row_item_kind(entity),
10432 Some(ReturningRowKind::Document)
10433 )
10434 }
10435 UpdateTarget::Kv => matches!(returning_row_item_kind(entity), Some(ReturningRowKind::Kv)),
10436 UpdateTarget::Nodes => matches!(
10437 (&entity.kind, &entity.data),
10438 (
10439 crate::storage::EntityKind::GraphNode(_),
10440 crate::storage::EntityData::Node(_)
10441 )
10442 ),
10443 UpdateTarget::Edges => matches!(
10444 (&entity.kind, &entity.data),
10445 (
10446 crate::storage::EntityKind::GraphEdge(_),
10447 crate::storage::EntityData::Edge(_)
10448 )
10449 ),
10450 }
10451}
10452
10453#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10454enum ReturningRowKind {
10455 Row,
10456 Document,
10457 Kv,
10458}
10459
10460fn returning_row_item_kind(entity: &crate::storage::UnifiedEntity) -> Option<ReturningRowKind> {
10461 let row = entity.data.as_row()?;
10462 let is_kv = row.iter_fields().all(|(column, _)| {
10463 column.eq_ignore_ascii_case("key") || column.eq_ignore_ascii_case("value")
10464 });
10465 if is_kv {
10466 return Some(ReturningRowKind::Kv);
10467 }
10468 let is_document = row
10469 .iter_fields()
10470 .any(|(_, value)| matches!(value, crate::storage::schema::Value::Json(_)));
10471 if is_document {
10472 Some(ReturningRowKind::Document)
10473 } else {
10474 Some(ReturningRowKind::Row)
10475 }
10476}
10477
10478fn requested_table_columns_for_policy(
10479 table: &crate::storage::query::ast::TableQuery,
10480) -> Vec<String> {
10481 use crate::storage::query::sql_lowering::{
10482 effective_table_filter, effective_table_group_by_exprs, effective_table_having_filter,
10483 effective_table_projections,
10484 };
10485
10486 let table_name = table.table.as_str();
10487 let table_alias = table.alias.as_deref();
10488 let mut columns = std::collections::BTreeSet::new();
10489
10490 for projection in effective_table_projections(table) {
10491 collect_projection_columns(&projection, table_name, table_alias, &mut columns);
10492 }
10493 if let Some(filter) = effective_table_filter(table) {
10494 collect_filter_columns(&filter, table_name, table_alias, &mut columns);
10495 }
10496 for expr in effective_table_group_by_exprs(table) {
10497 collect_expr_columns(&expr, table_name, table_alias, &mut columns);
10498 }
10499 if let Some(filter) = effective_table_having_filter(table) {
10500 collect_filter_columns(&filter, table_name, table_alias, &mut columns);
10501 }
10502 for order in &table.order_by {
10503 if let Some(expr) = order.expr.as_ref() {
10504 collect_expr_columns(expr, table_name, table_alias, &mut columns);
10505 } else {
10506 collect_field_ref_column(&order.field, table_name, table_alias, &mut columns);
10507 }
10508 }
10509
10510 columns.into_iter().collect()
10511}
10512
10513fn collect_projection_columns(
10514 projection: &crate::storage::query::ast::Projection,
10515 table_name: &str,
10516 table_alias: Option<&str>,
10517 columns: &mut std::collections::BTreeSet<String>,
10518) {
10519 use crate::storage::query::ast::Projection;
10520 match projection {
10521 Projection::All => {
10522 columns.insert("*".to_string());
10523 }
10524 Projection::Column(column) | Projection::Alias(column, _) => {
10525 if column != "*" {
10526 columns.insert(column.clone());
10527 }
10528 }
10529 Projection::Function(_, args) => {
10530 for arg in args {
10531 collect_projection_columns(arg, table_name, table_alias, columns);
10532 }
10533 }
10534 Projection::Expression(filter, _) => {
10535 collect_filter_columns(filter, table_name, table_alias, columns);
10536 }
10537 Projection::Field(field, _) => {
10538 collect_field_ref_column(field, table_name, table_alias, columns);
10539 }
10540 Projection::Window { args, .. } => {
10544 for arg in args {
10545 collect_projection_columns(arg, table_name, table_alias, columns);
10546 }
10547 }
10548 }
10549}
10550
10551fn collect_filter_columns(
10552 filter: &crate::storage::query::ast::Filter,
10553 table_name: &str,
10554 table_alias: Option<&str>,
10555 columns: &mut std::collections::BTreeSet<String>,
10556) {
10557 use crate::storage::query::ast::Filter;
10558 match filter {
10559 Filter::Compare { field, .. }
10560 | Filter::IsNull(field)
10561 | Filter::IsNotNull(field)
10562 | Filter::In { field, .. }
10563 | Filter::Between { field, .. }
10564 | Filter::Like { field, .. }
10565 | Filter::StartsWith { field, .. }
10566 | Filter::EndsWith { field, .. }
10567 | Filter::Contains { field, .. } => {
10568 collect_field_ref_column(field, table_name, table_alias, columns);
10569 }
10570 Filter::CompareFields { left, right, .. } => {
10571 collect_field_ref_column(left, table_name, table_alias, columns);
10572 collect_field_ref_column(right, table_name, table_alias, columns);
10573 }
10574 Filter::CompareExpr { lhs, rhs, .. } => {
10575 collect_expr_columns(lhs, table_name, table_alias, columns);
10576 collect_expr_columns(rhs, table_name, table_alias, columns);
10577 }
10578 Filter::And(left, right) | Filter::Or(left, right) => {
10579 collect_filter_columns(left, table_name, table_alias, columns);
10580 collect_filter_columns(right, table_name, table_alias, columns);
10581 }
10582 Filter::Not(inner) => collect_filter_columns(inner, table_name, table_alias, columns),
10583 }
10584}
10585
10586fn collect_expr_columns(
10587 expr: &crate::storage::query::ast::Expr,
10588 table_name: &str,
10589 table_alias: Option<&str>,
10590 columns: &mut std::collections::BTreeSet<String>,
10591) {
10592 use crate::storage::query::ast::Expr;
10593 match expr {
10594 Expr::Column { field, .. } => {
10595 collect_field_ref_column(field, table_name, table_alias, columns);
10596 }
10597 Expr::Literal { .. } | Expr::Parameter { .. } => {}
10598 Expr::UnaryOp { operand, .. } | Expr::Cast { inner: operand, .. } => {
10599 collect_expr_columns(operand, table_name, table_alias, columns);
10600 }
10601 Expr::BinaryOp { lhs, rhs, .. } => {
10602 collect_expr_columns(lhs, table_name, table_alias, columns);
10603 collect_expr_columns(rhs, table_name, table_alias, columns);
10604 }
10605 Expr::FunctionCall { args, .. } => {
10606 for arg in args {
10607 collect_expr_columns(arg, table_name, table_alias, columns);
10608 }
10609 }
10610 Expr::Case {
10611 branches, else_, ..
10612 } => {
10613 for (condition, value) in branches {
10614 collect_expr_columns(condition, table_name, table_alias, columns);
10615 collect_expr_columns(value, table_name, table_alias, columns);
10616 }
10617 if let Some(value) = else_ {
10618 collect_expr_columns(value, table_name, table_alias, columns);
10619 }
10620 }
10621 Expr::IsNull { operand, .. } => {
10622 collect_expr_columns(operand, table_name, table_alias, columns);
10623 }
10624 Expr::InList { target, values, .. } => {
10625 collect_expr_columns(target, table_name, table_alias, columns);
10626 for value in values {
10627 collect_expr_columns(value, table_name, table_alias, columns);
10628 }
10629 }
10630 Expr::Between {
10631 target, low, high, ..
10632 } => {
10633 collect_expr_columns(target, table_name, table_alias, columns);
10634 collect_expr_columns(low, table_name, table_alias, columns);
10635 collect_expr_columns(high, table_name, table_alias, columns);
10636 }
10637 Expr::Subquery { .. } => {}
10638 Expr::WindowFunctionCall { args, window, .. } => {
10639 for arg in args {
10640 collect_expr_columns(arg, table_name, table_alias, columns);
10641 }
10642 for e in &window.partition_by {
10643 collect_expr_columns(e, table_name, table_alias, columns);
10644 }
10645 for o in &window.order_by {
10646 collect_expr_columns(&o.expr, table_name, table_alias, columns);
10647 }
10648 }
10649 }
10650}
10651
10652fn collect_field_ref_column(
10653 field: &crate::storage::query::ast::FieldRef,
10654 table_name: &str,
10655 table_alias: Option<&str>,
10656 columns: &mut std::collections::BTreeSet<String>,
10657) {
10658 if let Some(column) = policy_column_name_from_field_ref(field, table_name, table_alias) {
10659 if column != "*" {
10660 columns.insert(column);
10661 }
10662 }
10663}
10664
10665fn policy_column_name_from_field_ref(
10666 field: &crate::storage::query::ast::FieldRef,
10667 table_name: &str,
10668 table_alias: Option<&str>,
10669) -> Option<String> {
10670 match field {
10671 crate::storage::query::ast::FieldRef::TableColumn { table, column } => {
10672 if column == "*" {
10673 return Some("*".to_string());
10674 }
10675 if table.is_empty() || table == table_name || Some(table.as_str()) == table_alias {
10676 Some(column.clone())
10677 } else {
10678 Some(format!("{table}.{column}"))
10679 }
10680 }
10681 _ => None,
10682 }
10683}
10684
10685fn legacy_resource_to_iam(
10686 resource: &crate::auth::privileges::Resource,
10687 tenant: Option<&str>,
10688) -> crate::auth::policies::ResourceRef {
10689 use crate::auth::privileges::Resource;
10690
10691 let (kind, name) = match resource {
10692 Resource::Database => ("database".to_string(), "*".to_string()),
10693 Resource::Schema(s) => ("schema".to_string(), format!("{s}.*")),
10694 Resource::Table { schema, table } => (
10695 "table".to_string(),
10696 match schema {
10697 Some(s) => format!("{s}.{table}"),
10698 None => table.clone(),
10699 },
10700 ),
10701 Resource::Function { schema, name } => (
10702 "function".to_string(),
10703 match schema {
10704 Some(s) => format!("{s}.{name}"),
10705 None => name.clone(),
10706 },
10707 ),
10708 };
10709
10710 let mut out = crate::auth::policies::ResourceRef::new(kind, name);
10711 if let Some(t) = tenant {
10712 out = out.with_tenant(t.to_string());
10713 }
10714 out
10715}
10716
10717#[derive(Debug)]
10718struct JoinTableSide {
10719 table: String,
10720 alias: String,
10721}
10722
10723fn table_side_context(expr: &QueryExpr) -> Option<JoinTableSide> {
10724 match expr {
10725 QueryExpr::Table(table) => Some(JoinTableSide {
10726 table: table.table.clone(),
10727 alias: table.alias.clone().unwrap_or_else(|| table.table.clone()),
10728 }),
10729 _ => None,
10730 }
10731}
10732
10733fn collect_projection_columns_for_table(
10734 projection: &Projection,
10735 table: &str,
10736 alias: Option<&str>,
10737 out: &mut BTreeSet<String>,
10738) {
10739 match projection {
10740 Projection::Column(column) | Projection::Alias(column, _) => {
10741 match split_qualified_column(column) {
10742 Some((qualifier, column))
10743 if qualifier == table || alias.is_some_and(|alias| qualifier == alias) =>
10744 {
10745 push_policy_column(column, out);
10746 }
10747 Some(_) => {}
10748 None => push_policy_column(column, out),
10749 }
10750 }
10751 Projection::Field(
10752 FieldRef::TableColumn {
10753 table: qualifier,
10754 column,
10755 },
10756 _,
10757 ) => {
10758 if qualifier.is_empty()
10759 || qualifier == table
10760 || alias.is_some_and(|alias| qualifier == alias)
10761 {
10762 push_policy_column(column, out);
10763 }
10764 }
10765 Projection::Field(
10766 FieldRef::NodeProperty {
10767 alias: qualifier,
10768 property,
10769 },
10770 _,
10771 )
10772 | Projection::Field(
10773 FieldRef::EdgeProperty {
10774 alias: qualifier,
10775 property,
10776 },
10777 _,
10778 ) => {
10779 if qualifier == table || alias.is_some_and(|alias| qualifier == alias) {
10780 push_policy_column(property, out);
10781 }
10782 }
10783 Projection::Function(_, args) => {
10784 for arg in args {
10785 collect_projection_columns_for_table(arg, table, alias, out);
10786 }
10787 }
10788 Projection::Expression(_, _) | Projection::All | Projection::Field(_, _) => {}
10789 Projection::Window { args, .. } => {
10790 for arg in args {
10791 collect_projection_columns_for_table(arg, table, alias, out);
10792 }
10793 }
10794 }
10795}
10796
10797fn collect_projection_columns_for_join_side(
10798 projection: &Projection,
10799 left: Option<&JoinTableSide>,
10800 right: Option<&JoinTableSide>,
10801 out: &mut HashMap<String, BTreeSet<String>>,
10802) -> RedDBResult<()> {
10803 match projection {
10804 Projection::Column(column) | Projection::Alias(column, _) => {
10805 if let Some((qualifier, column)) = split_qualified_column(column) {
10806 push_qualified_join_column(qualifier, column, left, right, out);
10807 } else {
10808 push_unqualified_join_column(column, left, right, out);
10809 }
10810 }
10811 Projection::Field(FieldRef::TableColumn { table, column }, _) => {
10812 if table.is_empty() {
10813 push_unqualified_join_column(column, left, right, out);
10814 } else if let Some(side) = [left, right]
10815 .into_iter()
10816 .flatten()
10817 .find(|side| table == side.table.as_str() || table == side.alias.as_str())
10818 {
10819 push_join_column(&side.table, column, out);
10820 }
10821 }
10822 Projection::Field(FieldRef::NodeProperty { alias, property }, _)
10823 | Projection::Field(FieldRef::EdgeProperty { alias, property }, _) => {
10824 push_qualified_join_column(alias, property, left, right, out);
10825 }
10826 Projection::Function(_, args) => {
10827 for arg in args {
10828 collect_projection_columns_for_join_side(arg, left, right, out)?;
10829 }
10830 }
10831 Projection::Expression(_, _) | Projection::All | Projection::Field(_, _) => {}
10832 Projection::Window { args, .. } => {
10833 for arg in args {
10834 collect_projection_columns_for_join_side(arg, left, right, out)?;
10835 }
10836 }
10837 }
10838 Ok(())
10839}
10840
10841fn split_qualified_column(column: &str) -> Option<(&str, &str)> {
10842 let (qualifier, column) = column.split_once('.')?;
10843 if qualifier.is_empty() || column.is_empty() || column.contains('.') {
10844 return None;
10845 }
10846 Some((qualifier, column))
10847}
10848
10849fn push_qualified_join_column(
10850 qualifier: &str,
10851 column: &str,
10852 left: Option<&JoinTableSide>,
10853 right: Option<&JoinTableSide>,
10854 out: &mut HashMap<String, BTreeSet<String>>,
10855) {
10856 if let Some(side) = [left, right]
10857 .into_iter()
10858 .flatten()
10859 .find(|side| qualifier == side.table.as_str() || qualifier == side.alias.as_str())
10860 {
10861 push_join_column(&side.table, column, out);
10862 }
10863}
10864
10865fn push_unqualified_join_column(
10866 column: &str,
10867 left: Option<&JoinTableSide>,
10868 right: Option<&JoinTableSide>,
10869 out: &mut HashMap<String, BTreeSet<String>>,
10870) {
10871 for side in [left, right].into_iter().flatten() {
10872 push_join_column(&side.table, column, out);
10873 }
10874}
10875
10876fn push_join_column(table: &str, column: &str, out: &mut HashMap<String, BTreeSet<String>>) {
10877 if is_policy_column_name(column) {
10878 out.entry(table.to_string())
10879 .or_default()
10880 .insert(column.to_string());
10881 }
10882}
10883
10884fn push_policy_column(column: &str, out: &mut BTreeSet<String>) {
10885 if is_policy_column_name(column) {
10886 out.insert(column.to_string());
10887 }
10888}
10889
10890fn is_policy_column_name(column: &str) -> bool {
10891 !column.is_empty()
10892 && column != "*"
10893 && !column.starts_with("LIT:")
10894 && !column.starts_with("TYPE:")
10895}
10896
10897fn runtime_iam_context(
10898 role: crate::auth::Role,
10899 tenant: Option<&str>,
10900) -> crate::auth::policies::EvalContext {
10901 crate::auth::policies::EvalContext {
10902 principal_tenant: tenant.map(|t| t.to_string()),
10903 current_tenant: tenant.map(|t| t.to_string()),
10904 peer_ip: None,
10905 mfa_present: false,
10906 now_ms: crate::auth::now_ms(),
10907 principal_is_admin_role: role == crate::auth::Role::Admin,
10908 }
10909}
10910
10911fn explicit_table_projection_columns(
10912 query: &crate::storage::query::ast::TableQuery,
10913) -> Vec<String> {
10914 use crate::storage::query::ast::{FieldRef, Projection};
10915
10916 let mut columns = Vec::new();
10917 for projection in crate::storage::query::sql_lowering::effective_table_projections(query) {
10918 match projection {
10919 Projection::Column(column) | Projection::Alias(column, _) => {
10920 push_unique(&mut columns, column)
10921 }
10922 Projection::Field(FieldRef::TableColumn { column, .. }, _) => {
10923 push_unique(&mut columns, column)
10924 }
10925 _ => {}
10929 }
10930 }
10931 columns
10932}
10933
10934fn explicit_graph_projection_properties(
10935 query: &crate::storage::query::ast::GraphQuery,
10936) -> Vec<String> {
10937 use crate::storage::query::ast::{FieldRef, Projection};
10938
10939 let mut columns = Vec::new();
10940 for projection in &query.return_ {
10941 match projection {
10942 Projection::Field(FieldRef::NodeProperty { property, .. }, _)
10943 | Projection::Field(FieldRef::EdgeProperty { property, .. }, _) => {
10944 push_unique(&mut columns, property.clone())
10945 }
10946 _ => {}
10947 }
10948 }
10949 columns
10950}
10951
10952fn push_unique(columns: &mut Vec<String>, column: String) {
10953 if !columns.iter().any(|existing| existing == &column) {
10954 columns.push(column);
10955 }
10956}
10957
10958fn principal_label(p: &crate::storage::query::ast::PolicyPrincipalRef) -> String {
10959 use crate::storage::query::ast::PolicyPrincipalRef;
10960 match p {
10961 PolicyPrincipalRef::User(u) => match &u.tenant {
10962 Some(t) => format!("user:{t}/{}", u.username),
10963 None => format!("user:{}", u.username),
10964 },
10965 PolicyPrincipalRef::Group(g) => format!("group:{g}"),
10966 }
10967}
10968
10969pub(crate) fn decision_to_strings(
10972 d: &crate::auth::policies::Decision,
10973) -> (String, Option<String>, Option<String>) {
10974 use crate::auth::policies::Decision;
10975 match d {
10976 Decision::Allow {
10977 matched_policy_id,
10978 matched_sid,
10979 } => (
10980 "allow".into(),
10981 Some(matched_policy_id.clone()),
10982 matched_sid.clone(),
10983 ),
10984 Decision::Deny {
10985 matched_policy_id,
10986 matched_sid,
10987 } => (
10988 "deny".into(),
10989 Some(matched_policy_id.clone()),
10990 matched_sid.clone(),
10991 ),
10992 Decision::DefaultDeny => ("default_deny".into(), None, None),
10993 Decision::AdminBypass => ("admin_bypass".into(), None, None),
10994 }
10995}
10996
10997fn relation_scopes_for_query(query: &QueryExpr) -> Vec<String> {
10998 let mut scopes = Vec::new();
10999 collect_relation_scopes(query, &mut scopes);
11000 scopes.sort();
11001 scopes.dedup();
11002 scopes
11003}
11004
11005fn collect_relation_scopes(query: &QueryExpr, scopes: &mut Vec<String>) {
11006 match query {
11007 QueryExpr::Table(table) => {
11008 if !table.table.is_empty() {
11009 scopes.push(table.table.clone());
11010 }
11011 if let Some(alias) = &table.alias {
11012 scopes.push(alias.clone());
11013 }
11014 }
11015 QueryExpr::Join(join) => {
11016 collect_relation_scopes(&join.left, scopes);
11017 collect_relation_scopes(&join.right, scopes);
11018 }
11019 _ => {}
11020 }
11021}
11022
11023fn query_references_outer_scope(query: &QueryExpr, outer_scopes: &[String]) -> bool {
11024 let inner_scopes = relation_scopes_for_query(query);
11025 query_expr_references_outer_scope(query, outer_scopes, &inner_scopes)
11026}
11027
11028fn query_expr_references_outer_scope(
11029 query: &QueryExpr,
11030 outer_scopes: &[String],
11031 inner_scopes: &[String],
11032) -> bool {
11033 match query {
11034 QueryExpr::Table(table) => {
11035 table.select_items.iter().any(|item| match item {
11036 crate::storage::query::ast::SelectItem::Wildcard => false,
11037 crate::storage::query::ast::SelectItem::Expr { expr, .. } => {
11038 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
11039 }
11040 }) || table
11041 .where_expr
11042 .as_ref()
11043 .is_some_and(|expr| expr_references_outer_scope(expr, outer_scopes, inner_scopes))
11044 || table.filter.as_ref().is_some_and(|filter| {
11045 filter_references_outer_scope(filter, outer_scopes, inner_scopes)
11046 })
11047 || table.having_expr.as_ref().is_some_and(|expr| {
11048 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
11049 })
11050 || table.having.as_ref().is_some_and(|filter| {
11051 filter_references_outer_scope(filter, outer_scopes, inner_scopes)
11052 })
11053 || table
11054 .group_by_exprs
11055 .iter()
11056 .any(|expr| expr_references_outer_scope(expr, outer_scopes, inner_scopes))
11057 || table.order_by.iter().any(|clause| {
11058 clause.expr.as_ref().is_some_and(|expr| {
11059 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
11060 })
11061 })
11062 }
11063 QueryExpr::Join(join) => {
11064 query_expr_references_outer_scope(&join.left, outer_scopes, inner_scopes)
11065 || query_expr_references_outer_scope(&join.right, outer_scopes, inner_scopes)
11066 || join.filter.as_ref().is_some_and(|filter| {
11067 filter_references_outer_scope(filter, outer_scopes, inner_scopes)
11068 })
11069 || join.return_items.iter().any(|item| match item {
11070 crate::storage::query::ast::SelectItem::Wildcard => false,
11071 crate::storage::query::ast::SelectItem::Expr { expr, .. } => {
11072 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
11073 }
11074 })
11075 }
11076 _ => false,
11077 }
11078}
11079
11080fn filter_references_outer_scope(
11081 filter: &crate::storage::query::ast::Filter,
11082 outer_scopes: &[String],
11083 inner_scopes: &[String],
11084) -> bool {
11085 use crate::storage::query::ast::Filter;
11086 match filter {
11087 Filter::Compare { field, .. }
11088 | Filter::IsNull(field)
11089 | Filter::IsNotNull(field)
11090 | Filter::In { field, .. }
11091 | Filter::Between { field, .. }
11092 | Filter::Like { field, .. }
11093 | Filter::StartsWith { field, .. }
11094 | Filter::EndsWith { field, .. }
11095 | Filter::Contains { field, .. } => {
11096 field_ref_references_outer_scope(field, outer_scopes, inner_scopes)
11097 }
11098 Filter::CompareFields { left, right, .. } => {
11099 field_ref_references_outer_scope(left, outer_scopes, inner_scopes)
11100 || field_ref_references_outer_scope(right, outer_scopes, inner_scopes)
11101 }
11102 Filter::CompareExpr { lhs, rhs, .. } => {
11103 expr_references_outer_scope(lhs, outer_scopes, inner_scopes)
11104 || expr_references_outer_scope(rhs, outer_scopes, inner_scopes)
11105 }
11106 Filter::And(left, right) | Filter::Or(left, right) => {
11107 filter_references_outer_scope(left, outer_scopes, inner_scopes)
11108 || filter_references_outer_scope(right, outer_scopes, inner_scopes)
11109 }
11110 Filter::Not(inner) => filter_references_outer_scope(inner, outer_scopes, inner_scopes),
11111 }
11112}
11113
11114fn expr_references_outer_scope(
11115 expr: &crate::storage::query::ast::Expr,
11116 outer_scopes: &[String],
11117 inner_scopes: &[String],
11118) -> bool {
11119 use crate::storage::query::ast::Expr;
11120 match expr {
11121 Expr::Column { field, .. } => {
11122 field_ref_references_outer_scope(field, outer_scopes, inner_scopes)
11123 }
11124 Expr::BinaryOp { lhs, rhs, .. } => {
11125 expr_references_outer_scope(lhs, outer_scopes, inner_scopes)
11126 || expr_references_outer_scope(rhs, outer_scopes, inner_scopes)
11127 }
11128 Expr::UnaryOp { operand, .. }
11129 | Expr::Cast { inner: operand, .. }
11130 | Expr::IsNull { operand, .. } => {
11131 expr_references_outer_scope(operand, outer_scopes, inner_scopes)
11132 }
11133 Expr::FunctionCall { args, .. } => args
11134 .iter()
11135 .any(|arg| expr_references_outer_scope(arg, outer_scopes, inner_scopes)),
11136 Expr::Case {
11137 branches, else_, ..
11138 } => {
11139 branches.iter().any(|(cond, value)| {
11140 expr_references_outer_scope(cond, outer_scopes, inner_scopes)
11141 || expr_references_outer_scope(value, outer_scopes, inner_scopes)
11142 }) || else_
11143 .as_ref()
11144 .is_some_and(|expr| expr_references_outer_scope(expr, outer_scopes, inner_scopes))
11145 }
11146 Expr::InList { target, values, .. } => {
11147 expr_references_outer_scope(target, outer_scopes, inner_scopes)
11148 || values
11149 .iter()
11150 .any(|value| expr_references_outer_scope(value, outer_scopes, inner_scopes))
11151 }
11152 Expr::Between {
11153 target, low, high, ..
11154 } => {
11155 expr_references_outer_scope(target, outer_scopes, inner_scopes)
11156 || expr_references_outer_scope(low, outer_scopes, inner_scopes)
11157 || expr_references_outer_scope(high, outer_scopes, inner_scopes)
11158 }
11159 Expr::Subquery { query, .. } => query_references_outer_scope(&query.query, inner_scopes),
11160 Expr::Literal { .. } | Expr::Parameter { .. } => false,
11161 Expr::WindowFunctionCall { args, window, .. } => {
11162 args.iter()
11163 .any(|arg| expr_references_outer_scope(arg, outer_scopes, inner_scopes))
11164 || window
11165 .partition_by
11166 .iter()
11167 .any(|e| expr_references_outer_scope(e, outer_scopes, inner_scopes))
11168 || window
11169 .order_by
11170 .iter()
11171 .any(|o| expr_references_outer_scope(&o.expr, outer_scopes, inner_scopes))
11172 }
11173 }
11174}
11175
11176fn field_ref_references_outer_scope(
11177 field: &crate::storage::query::ast::FieldRef,
11178 outer_scopes: &[String],
11179 inner_scopes: &[String],
11180) -> bool {
11181 match field {
11182 crate::storage::query::ast::FieldRef::TableColumn { table, .. } if !table.is_empty() => {
11183 outer_scopes.iter().any(|scope| scope == table)
11184 && !inner_scopes.iter().any(|scope| scope == table)
11185 }
11186 _ => false,
11187 }
11188}
11189
11190fn first_column_values(
11191 result: crate::storage::query::unified::UnifiedResult,
11192) -> RedDBResult<Vec<Value>> {
11193 if result.columns.len() > 1 {
11194 return Err(RedDBError::Query(
11195 "expression subquery must return exactly one column".to_string(),
11196 ));
11197 }
11198 let fallback_column = result
11199 .records
11200 .first()
11201 .and_then(|record| record.column_names().into_iter().next())
11202 .map(|name| name.to_string());
11203 let column = result.columns.first().cloned().or(fallback_column);
11204 let Some(column) = column else {
11205 return Ok(Vec::new());
11206 };
11207 Ok(result
11208 .records
11209 .iter()
11210 .map(|record| record.get(column.as_str()).cloned().unwrap_or(Value::Null))
11211 .collect())
11212}
11213
11214fn parse_timestamp_to_ms(s: &str) -> Option<u128> {
11215 if let Ok(n) = s.parse::<u128>() {
11217 return Some(n);
11218 }
11219 if let Some(date) = s.split_whitespace().next() {
11223 let parts: Vec<&str> = date.split('-').collect();
11224 if parts.len() == 3 {
11225 let (y, m, d) = (parts[0], parts[1], parts[2]);
11226 if let (Ok(y), Ok(m), Ok(d)) = (y.parse::<i64>(), m.parse::<u32>(), d.parse::<u32>()) {
11227 let days_in = days_from_civil(y, m, d);
11231 return Some((days_in as u128) * 86_400_000u128);
11232 }
11233 }
11234 }
11235 None
11236}
11237
11238fn days_from_civil(y: i64, m: u32, d: u32) -> i64 {
11241 let y = if m <= 2 { y - 1 } else { y };
11242 let era = if y >= 0 { y } else { y - 399 } / 400;
11243 let yoe = (y - era * 400) as u64; let doy = (153 * (if m > 2 { m - 3 } else { m + 9 }) as u64 + 2) / 5 + d as u64 - 1;
11245 let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
11246 era * 146097 + doe as i64 - 719468
11247}
11248
11249fn walk_plan_node(
11250 node: &crate::storage::query::planner::CanonicalLogicalNode,
11251 depth: usize,
11252 out: &mut Vec<crate::storage::query::unified::UnifiedRecord>,
11253) {
11254 use std::sync::Arc;
11255 let mut rec = crate::storage::query::unified::UnifiedRecord::default();
11256 rec.set_arc(Arc::from("op"), Value::text(node.operator.clone()));
11257 rec.set_arc(
11258 Arc::from("source"),
11259 node.source.clone().map(Value::text).unwrap_or(Value::Null),
11260 );
11261 rec.set_arc(Arc::from("est_rows"), Value::Float(node.estimated_rows));
11262 rec.set_arc(Arc::from("est_cost"), Value::Float(node.operator_cost));
11263 rec.set_arc(Arc::from("depth"), Value::Integer(depth as i64));
11264 out.push(rec);
11265 for child in &node.children {
11266 walk_plan_node(child, depth + 1, out);
11267 }
11268}