1use super::*;
2use crate::application::entity::metadata_to_json;
3use crate::auth::column_policy_gate::ColumnAccessRequest;
4use crate::auth::UserId;
5use crate::replication::cdc::ChangeRecord;
6use crate::replication::logical::{ApplyMode, LogicalChangeApplier};
7use crate::storage::query::ast::TableSource;
8
9thread_local! {
10 static CURRENT_CONN_ID: std::cell::Cell<u64> = const { std::cell::Cell::new(0) };
14
15 static CURRENT_AUTH_IDENTITY: std::cell::RefCell<Option<(String, crate::auth::Role)>> =
23 const { std::cell::RefCell::new(None) };
24
25 static CURRENT_SNAPSHOT: std::cell::RefCell<Option<SnapshotContext>> =
35 const { std::cell::RefCell::new(None) };
36
37 static HAS_SNAPSHOT: std::cell::Cell<bool> = const { std::cell::Cell::new(false) };
43
44 static CURRENT_TENANT_ID: std::cell::RefCell<Option<String>> =
54 const { std::cell::RefCell::new(None) };
55
56 static CURRENT_CONFIG_RESOLVER: std::cell::RefCell<Option<ConfigResolver>> =
60 const { std::cell::RefCell::new(None) };
61
62 static CURRENT_SECRET_RESOLVER: std::cell::RefCell<Option<SecretResolver>> =
66 const { std::cell::RefCell::new(None) };
67}
68
69fn secret_sql_value_to_string(value: &Value) -> RedDBResult<String> {
70 match value {
71 Value::Text(s) => Ok(s.to_string()),
72 Value::Integer(n) => Ok(n.to_string()),
73 Value::UnsignedInteger(n) => Ok(n.to_string()),
74 Value::Float(n) => Ok(n.to_string()),
75 Value::Boolean(b) => Ok(b.to_string()),
76 Value::Null => Err(RedDBError::Query(
77 "SET SECRET key = NULL deletes the secret; use DELETE SECRET for explicit deletes"
78 .to_string(),
79 )),
80 Value::Password(_) | Value::Secret(_) => Err(RedDBError::Query(
81 "SET SECRET accepts plain scalar literals; PASSWORD() and SECRET() are for typed columns"
82 .to_string(),
83 )),
84 _ => Err(RedDBError::Query(format!(
85 "SET SECRET does not support value type {:?} yet",
86 value.data_type()
87 ))),
88 }
89}
90
91fn view_records_to_entities(
100 table: &str,
101 records: &[crate::storage::query::unified::UnifiedRecord],
102) -> Vec<crate::storage::UnifiedEntity> {
103 use std::collections::HashMap;
104 let table_arc: std::sync::Arc<str> = std::sync::Arc::from(table);
105 let mut out = Vec::with_capacity(records.len());
106 for record in records {
107 let mut named: HashMap<String, crate::storage::schema::Value> = HashMap::new();
108 for (name, value) in record.iter_fields() {
109 named.insert(name.to_string(), value.clone());
110 }
111 let entity = crate::storage::UnifiedEntity::new(
112 crate::storage::EntityId::new(0),
113 crate::storage::EntityKind::TableRow {
114 table: std::sync::Arc::clone(&table_arc),
115 row_id: 0,
116 },
117 crate::storage::EntityData::Row(crate::storage::RowData {
118 columns: Vec::new(),
119 named: Some(named),
120 schema: None,
121 }),
122 );
123 out.push(entity);
124 }
125 out
126}
127
128fn system_keyed_collection_contract(
129 name: &str,
130 model: crate::catalog::CollectionModel,
131) -> crate::physical::CollectionContract {
132 let now = crate::utils::now_unix_millis() as u128;
133 crate::physical::CollectionContract {
134 name: name.to_string(),
135 declared_model: model,
136 schema_mode: crate::catalog::SchemaMode::Dynamic,
137 origin: crate::physical::ContractOrigin::Implicit,
138 version: 1,
139 created_at_unix_ms: now,
140 updated_at_unix_ms: now,
141 default_ttl_ms: None,
142 vector_dimension: None,
143 vector_metric: None,
144 context_index_fields: Vec::new(),
145 declared_columns: Vec::new(),
146 table_def: None,
147 timestamps_enabled: false,
148 context_index_enabled: false,
149 metrics_raw_retention_ms: None,
150 metrics_rollup_policies: Vec::new(),
151 metrics_tenant_identity: None,
152 metrics_namespace: None,
153 append_only: false,
154 subscriptions: Vec::new(),
155 session_key: None,
156 session_gap_ms: None,
157 retention_duration_ms: None,
158 }
159}
160
161#[derive(Clone)]
176pub struct SnapshotContext {
177 pub snapshot: crate::storage::transaction::snapshot::Snapshot,
178 pub manager: Arc<crate::storage::transaction::snapshot::SnapshotManager>,
179 pub own_xids: std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
180 pub requires_index_fallback: bool,
181}
182
183pub fn set_current_connection_id(id: u64) {
192 CURRENT_CONN_ID.with(|c| c.set(id));
193}
194
195pub fn clear_current_connection_id() {
197 CURRENT_CONN_ID.with(|c| c.set(0));
198}
199
200pub fn current_connection_id() -> u64 {
203 CURRENT_CONN_ID.with(|c| c.get())
204}
205
206pub fn set_current_auth_identity(username: String, role: crate::auth::Role) {
210 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = Some((username, role)));
211}
212
213pub fn clear_current_auth_identity() {
217 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = None);
218}
219
220pub(crate) fn current_auth_identity() -> Option<(String, crate::auth::Role)> {
223 CURRENT_AUTH_IDENTITY.with(|cell| cell.borrow().clone())
224}
225
226pub fn set_current_tenant(tenant_id: String) {
231 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = Some(tenant_id));
232}
233
234pub fn clear_current_tenant() {
237 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = None);
238}
239
240pub fn current_tenant() -> Option<String> {
251 let inherited = CURRENT_TENANT_ID.with(|cell| cell.borrow().clone());
252 if let Some(over) = current_scope_override() {
253 if over.tenant.is_active() {
254 return over.tenant.resolve(inherited);
255 }
256 }
257 if let Some(tx_local) = current_tx_local_tenant() {
258 return tx_local;
259 }
260 inherited
261}
262
263thread_local! {
264 static TX_LOCAL_TENANT: std::cell::RefCell<Option<Option<String>>> =
273 const { std::cell::RefCell::new(None) };
274}
275
276fn current_tx_local_tenant() -> Option<Option<String>> {
277 TX_LOCAL_TENANT.with(|cell| cell.borrow().clone())
278}
279
280fn parse_set_local_tenant(query: &str) -> RedDBResult<Option<Option<String>>> {
286 let mut tokens = query.split_ascii_whitespace();
287 let Some(w1) = tokens.next() else {
288 return Ok(None);
289 };
290 if !w1.eq_ignore_ascii_case("SET") {
291 return Ok(None);
292 }
293 let Some(w2) = tokens.next() else {
294 return Ok(None);
295 };
296 if !w2.eq_ignore_ascii_case("LOCAL") {
297 return Ok(None);
298 }
299 let Some(w3) = tokens.next() else {
300 return Ok(None);
301 };
302 if !w3.eq_ignore_ascii_case("TENANT") {
303 return Ok(None);
304 }
305 let rest: String = tokens.collect::<Vec<_>>().join(" ");
306 let rest = rest.trim().trim_end_matches(';').trim();
307 let value_str = rest.strip_prefix('=').map(|s| s.trim()).unwrap_or(rest);
308 if value_str.is_empty() {
309 return Err(RedDBError::Query(
310 "SET LOCAL TENANT expects a string literal or NULL".to_string(),
311 ));
312 }
313 if value_str.eq_ignore_ascii_case("NULL") {
314 return Ok(Some(None));
315 }
316 if value_str.starts_with('\'') && value_str.ends_with('\'') && value_str.len() >= 2 {
317 let inner = &value_str[1..value_str.len() - 1];
318 return Ok(Some(Some(inner.to_string())));
319 }
320 Err(RedDBError::Query(format!(
321 "SET LOCAL TENANT expects a string literal or NULL, got `{value_str}`"
322 )))
323}
324
325pub(crate) struct TxLocalTenantGuard;
326
327impl TxLocalTenantGuard {
328 pub fn install(value: Option<Option<String>>) -> Self {
329 TX_LOCAL_TENANT.with(|cell| *cell.borrow_mut() = value);
330 Self
331 }
332}
333
334impl Drop for TxLocalTenantGuard {
335 fn drop(&mut self) {
336 TX_LOCAL_TENANT.with(|cell| *cell.borrow_mut() = None);
337 }
338}
339
340thread_local! {
341 static SCOPE_OVERRIDES: std::cell::RefCell<Vec<crate::runtime::within_clause::ScopeOverride>> =
348 const { std::cell::RefCell::new(Vec::new()) };
349}
350
351pub(crate) fn push_scope_override(over: crate::runtime::within_clause::ScopeOverride) {
352 SCOPE_OVERRIDES.with(|cell| cell.borrow_mut().push(over));
353}
354
355pub(crate) fn pop_scope_override() {
356 SCOPE_OVERRIDES.with(|cell| {
357 cell.borrow_mut().pop();
358 });
359}
360
361pub(crate) fn current_scope_override() -> Option<crate::runtime::within_clause::ScopeOverride> {
362 SCOPE_OVERRIDES.with(|cell| cell.borrow().last().cloned())
363}
364
365pub(crate) fn has_scope_override_active() -> bool {
369 SCOPE_OVERRIDES.with(|cell| !cell.borrow().is_empty())
370}
371
372pub(crate) struct ScopeOverrideGuard;
376
377impl ScopeOverrideGuard {
378 pub fn install(over: crate::runtime::within_clause::ScopeOverride) -> Self {
379 push_scope_override(over);
380 Self
381 }
382}
383
384impl Drop for ScopeOverrideGuard {
385 fn drop(&mut self) {
386 pop_scope_override();
387 }
388}
389
390pub(crate) fn current_user_projected() -> Option<String> {
396 let inherited = current_auth_identity().map(|(u, _)| u);
397 if let Some(over) = current_scope_override() {
398 if over.user.is_active() {
399 return over.user.resolve(inherited);
400 }
401 }
402 inherited
403}
404
405pub(crate) fn current_role_projected() -> Option<String> {
406 let inherited = current_auth_identity().map(|(_, r)| format!("{r:?}").to_lowercase());
407 if let Some(over) = current_scope_override() {
408 if over.role.is_active() {
409 return over.role.resolve(inherited);
410 }
411 }
412 inherited
413}
414
415pub(crate) fn current_secret_value(path: &str) -> Option<String> {
416 let key = path.to_ascii_lowercase();
417 CURRENT_SECRET_RESOLVER.with(|cell| {
418 let mut resolver = cell.borrow_mut();
419 let resolver = resolver.as_mut()?;
420 if resolver.values.is_none() {
421 resolver.values = resolver
422 .store
423 .as_ref()
424 .map(|store| store.vault_kv_snapshot());
425 }
426 let values = resolver.values.as_ref()?;
427 values.get(&key).cloned().or_else(|| {
428 key.strip_prefix("red.vault/").and_then(|rest| {
429 values
430 .get(rest)
431 .cloned()
432 .or_else(|| values.get(&format!("red.secret.{rest}")).cloned())
433 })
434 })
435 })
436}
437
438struct SecretResolver {
439 store: Option<Arc<crate::auth::store::AuthStore>>,
440 values: Option<HashMap<String, String>>,
441}
442
443pub(super) struct SecretStoreGuard {
444 previous: Option<SecretResolver>,
445}
446
447impl SecretStoreGuard {
448 pub(super) fn install(store: Option<Arc<crate::auth::store::AuthStore>>) -> Self {
449 let previous = CURRENT_SECRET_RESOLVER.with(|cell| {
450 cell.replace(Some(SecretResolver {
451 store,
452 values: None,
453 }))
454 });
455 Self { previous }
456 }
457}
458
459impl Drop for SecretStoreGuard {
460 fn drop(&mut self) {
461 let previous = self.previous.take();
462 CURRENT_SECRET_RESOLVER.with(|cell| {
463 cell.replace(previous);
464 });
465 }
466}
467
468pub(crate) fn current_config_value(path: &str) -> Option<Value> {
469 let key = path.to_ascii_lowercase();
470 CURRENT_CONFIG_RESOLVER.with(|cell| {
471 let mut resolver = cell.borrow_mut();
472 let resolver = resolver.as_mut()?;
473 if resolver.values.is_none() {
474 resolver.values = Some(latest_config_snapshot(&resolver.db));
475 }
476 let values = resolver.values.as_ref()?;
477 values.get(&key).cloned().or_else(|| {
478 key.strip_prefix("red.config/")
479 .and_then(|rest| values.get(&format!("red.config.{rest}")).cloned())
480 })
481 })
482}
483
484fn update_current_config_value(path: &str, value: Value) {
485 let key = path.to_ascii_lowercase();
486 CURRENT_CONFIG_RESOLVER.with(|cell| {
487 if let Some(resolver) = cell.borrow_mut().as_mut() {
488 if let Some(values) = resolver.values.as_mut() {
489 values.insert(key, value);
490 }
491 }
492 });
493}
494
495fn update_current_secret_value(path: &str, value: Option<String>) {
496 let key = path.to_ascii_lowercase();
497 CURRENT_SECRET_RESOLVER.with(|cell| {
498 if let Some(resolver) = cell.borrow_mut().as_mut() {
499 let Some(values) = resolver.values.as_mut() else {
500 return;
501 };
502 match value {
503 Some(value) => {
504 values.insert(key, value);
505 }
506 None => {
507 values.remove(&key);
508 }
509 }
510 }
511 });
512}
513
514fn latest_config_snapshot(db: &RedDB) -> HashMap<String, Value> {
515 let mut latest: HashMap<String, (u64, Value)> = HashMap::new();
516
517 if let Some(manager) = db.store().get_collection("red_config") {
518 manager.for_each_entity(|entity| {
519 let Some(row) = entity.data.as_row() else {
520 return true;
521 };
522 let Some(Value::Text(key)) = row.get_field("key") else {
523 return true;
524 };
525 let value = row.get_field("value").cloned().unwrap_or(Value::Null);
526 let id = entity.id.raw();
527 let key = key.to_ascii_lowercase();
528 insert_latest_config_value(&mut latest, key.clone(), id, value.clone());
529 if let Some(rest) = key.strip_prefix("red.config.") {
530 insert_latest_config_value(&mut latest, format!("red.config/{rest}"), id, value);
531 }
532 true
533 });
534 }
535
536 if let Some(manager) = db.store().get_collection("red.config") {
537 manager.for_each_entity(|entity| {
538 let Some(row) = entity.data.as_row() else {
539 return true;
540 };
541 if matches!(row.get_field("tombstone"), Some(Value::Boolean(true))) {
542 return true;
543 }
544 let Some(Value::Text(key)) = row.get_field("key") else {
545 return true;
546 };
547 let value = row.get_field("value").cloned().unwrap_or(Value::Null);
548 insert_latest_config_value(
549 &mut latest,
550 format!("red.config/{}", key.to_ascii_lowercase()),
551 entity.id.raw(),
552 value,
553 );
554 true
555 });
556 }
557
558 latest
559 .into_iter()
560 .map(|(key, (_, value))| (key, value))
561 .collect()
562}
563
564fn insert_latest_config_value(
565 latest: &mut HashMap<String, (u64, Value)>,
566 key: String,
567 id: u64,
568 value: Value,
569) {
570 match latest.get(&key) {
571 Some((prev_id, _)) if *prev_id > id => {}
572 _ => {
573 latest.insert(key, (id, value));
574 }
575 }
576}
577
578struct ConfigResolver {
579 db: Arc<RedDB>,
580 values: Option<HashMap<String, Value>>,
581}
582
583pub(super) struct ConfigSnapshotGuard {
584 previous: Option<ConfigResolver>,
585}
586
587impl ConfigSnapshotGuard {
588 pub(super) fn install(db: Arc<RedDB>) -> Self {
589 let previous = CURRENT_CONFIG_RESOLVER
590 .with(|cell| cell.replace(Some(ConfigResolver { db, values: None })));
591 Self { previous }
592 }
593}
594
595impl Drop for ConfigSnapshotGuard {
596 fn drop(&mut self) {
597 let previous = self.previous.take();
598 CURRENT_CONFIG_RESOLVER.with(|cell| {
599 cell.replace(previous);
600 });
601 }
602}
603
604pub fn set_current_snapshot(ctx: SnapshotContext) {
609 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = Some(ctx));
610 HAS_SNAPSHOT.with(|c| c.set(true));
611}
612
613pub fn clear_current_snapshot() {
614 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = None);
615 HAS_SNAPSHOT.with(|c| c.set(false));
616}
617
618pub(crate) struct CurrentSnapshotGuard {
624 previous: Option<SnapshotContext>,
625}
626
627impl CurrentSnapshotGuard {
628 pub(crate) fn install(ctx: SnapshotContext) -> Self {
629 let previous = CURRENT_SNAPSHOT.with(|cell| cell.borrow().clone());
630 set_current_snapshot(ctx);
631 Self { previous }
632 }
633}
634
635impl Drop for CurrentSnapshotGuard {
636 fn drop(&mut self) {
637 let prev = self.previous.take();
638 let has = prev.is_some();
639 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = prev);
640 HAS_SNAPSHOT.with(|c| c.set(has));
641 }
642}
643
644#[inline]
655pub fn entity_visible_under_current_snapshot(
656 entity: &crate::storage::unified::entity::UnifiedEntity,
657) -> bool {
658 if !HAS_SNAPSHOT.with(|c| c.get()) {
664 return entity.xmax == 0;
665 }
666 CURRENT_SNAPSHOT.with(|cell| {
667 let guard = cell.borrow();
668 let Some(ctx) = guard.as_ref() else {
669 return true;
670 };
671 visibility_check(ctx, entity.xmin, entity.xmax)
672 })
673}
674
675#[inline]
680pub(crate) fn xids_visible_under_current_snapshot(xmin: u64, xmax: u64) -> bool {
681 if !HAS_SNAPSHOT.with(|c| c.get()) {
682 return true;
683 }
684 CURRENT_SNAPSHOT.with(|cell| {
685 let guard = cell.borrow();
686 let Some(ctx) = guard.as_ref() else {
687 return true;
688 };
689 visibility_check(ctx, xmin, xmax)
690 })
691}
692
693pub fn capture_current_snapshot() -> Option<SnapshotContext> {
700 CURRENT_SNAPSHOT.with(|cell| cell.borrow().clone())
701}
702
703pub(crate) fn current_snapshot_requires_index_fallback() -> bool {
708 if !HAS_SNAPSHOT.with(|c| c.get()) {
709 return false;
710 }
711 CURRENT_SNAPSHOT.with(|cell| {
712 cell.borrow()
713 .as_ref()
714 .is_some_and(|ctx| ctx.requires_index_fallback)
715 })
716}
717
718#[derive(Clone, Default)]
733pub struct SnapshotBundle {
734 pub snapshot: Option<SnapshotContext>,
735 pub auth: Option<(String, crate::auth::Role)>,
736 pub tenant: Option<String>,
737}
738
739pub fn snapshot_bundle() -> SnapshotBundle {
742 SnapshotBundle {
743 snapshot: capture_current_snapshot(),
744 auth: current_auth_identity(),
745 tenant: CURRENT_TENANT_ID.with(|cell| cell.borrow().clone()),
746 }
747}
748
749pub fn with_snapshot_bundle<R>(bundle: &SnapshotBundle, f: impl FnOnce() -> R) -> R {
754 struct Guard {
755 prev_snapshot: Option<SnapshotContext>,
756 prev_auth: Option<(String, crate::auth::Role)>,
757 prev_tenant: Option<String>,
758 }
759 impl Drop for Guard {
760 fn drop(&mut self) {
761 let snap = self.prev_snapshot.take();
762 let has = snap.is_some();
763 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = snap);
764 HAS_SNAPSHOT.with(|c| c.set(has));
765 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = self.prev_auth.take());
766 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = self.prev_tenant.take());
767 }
768 }
769
770 let _guard = {
771 let prev_snapshot = CURRENT_SNAPSHOT.with(|cell| cell.borrow().clone());
772 let prev_auth = CURRENT_AUTH_IDENTITY.with(|cell| cell.borrow().clone());
773 let prev_tenant = CURRENT_TENANT_ID.with(|cell| cell.borrow().clone());
774
775 match bundle.snapshot.clone() {
776 Some(ctx) => set_current_snapshot(ctx),
777 None => clear_current_snapshot(),
778 }
779 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = bundle.auth.clone());
780 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = bundle.tenant.clone());
781
782 Guard {
783 prev_snapshot,
784 prev_auth,
785 prev_tenant,
786 }
787 };
788 f()
789}
790
791#[inline]
795pub fn entity_visible_with_context(
796 ctx: Option<&SnapshotContext>,
797 entity: &crate::storage::unified::entity::UnifiedEntity,
798) -> bool {
799 match ctx {
800 Some(ctx) => visibility_check(ctx, entity.xmin, entity.xmax),
801 None => true,
802 }
803}
804
805fn table_row_index_fields(
806 entity: &crate::storage::unified::entity::UnifiedEntity,
807) -> Vec<(String, crate::storage::schema::Value)> {
808 let crate::storage::EntityData::Row(row) = &entity.data else {
809 return Vec::new();
810 };
811 if let Some(named) = &row.named {
812 return named
813 .iter()
814 .map(|(name, value)| (name.clone(), value.clone()))
815 .collect();
816 }
817 if let Some(schema) = &row.schema {
818 return schema
819 .iter()
820 .zip(row.columns.iter())
821 .map(|(name, value)| (name.clone(), value.clone()))
822 .collect();
823 }
824 Vec::new()
825}
826
827#[inline]
828fn visibility_check(ctx: &SnapshotContext, xmin: u64, xmax: u64) -> bool {
829 if xmin != 0 && ctx.manager.is_aborted(xmin) {
833 return false;
834 }
835 let effective_xmax = if xmax != 0 && ctx.manager.is_aborted(xmax) {
837 0
838 } else {
839 xmax
840 };
841 let own_xmin = xmin != 0 && ctx.own_xids.contains(&xmin);
845 let own_xmax = effective_xmax != 0 && ctx.own_xids.contains(&effective_xmax);
846 if own_xmax {
847 return false;
849 }
850 if own_xmin {
851 return true;
852 }
853 ctx.snapshot.sees(xmin, effective_xmax)
854}
855
856fn runtime_pool_lock(runtime: &RedDBRuntime) -> std::sync::MutexGuard<'_, PoolState> {
857 runtime
858 .inner
859 .pool
860 .lock()
861 .unwrap_or_else(|poisoned| poisoned.into_inner())
862}
863
864fn cache_scope_insert(scopes: &mut HashSet<String>, name: &str) {
865 if name.is_empty() || name.starts_with("__subq_") || is_universal_query_source(name) {
866 return;
867 }
868 scopes.insert(name.to_string());
869}
870
871fn collect_table_source_scopes(scopes: &mut HashSet<String>, query: &TableQuery) {
872 match query.source.as_ref() {
873 Some(crate::storage::query::ast::TableSource::Name(name)) => {
874 cache_scope_insert(scopes, name)
875 }
876 Some(crate::storage::query::ast::TableSource::Subquery(subquery)) => {
877 collect_query_expr_result_cache_scopes(scopes, subquery);
878 }
879 None => cache_scope_insert(scopes, &query.table),
880 }
881}
882
883fn collect_vector_source_scopes(
884 scopes: &mut HashSet<String>,
885 source: &crate::storage::query::ast::VectorSource,
886) {
887 match source {
888 crate::storage::query::ast::VectorSource::Reference { collection, .. } => {
889 cache_scope_insert(scopes, collection);
890 }
891 crate::storage::query::ast::VectorSource::Subquery(subquery) => {
892 collect_query_expr_result_cache_scopes(scopes, subquery);
893 }
894 crate::storage::query::ast::VectorSource::Literal(_)
895 | crate::storage::query::ast::VectorSource::Text(_) => {}
896 }
897}
898
899fn collect_path_selector_scopes(
900 scopes: &mut HashSet<String>,
901 selector: &crate::storage::query::ast::NodeSelector,
902) {
903 if let crate::storage::query::ast::NodeSelector::ByRow { table, .. } = selector {
904 cache_scope_insert(scopes, table);
905 }
906}
907
908fn collect_query_expr_result_cache_scopes(scopes: &mut HashSet<String>, expr: &QueryExpr) {
909 match expr {
910 QueryExpr::Table(query) => collect_table_source_scopes(scopes, query),
911 QueryExpr::Join(query) => {
912 collect_query_expr_result_cache_scopes(scopes, &query.left);
913 collect_query_expr_result_cache_scopes(scopes, &query.right);
914 }
915 QueryExpr::Path(query) => {
916 collect_path_selector_scopes(scopes, &query.from);
917 collect_path_selector_scopes(scopes, &query.to);
918 }
919 QueryExpr::Vector(query) => {
920 cache_scope_insert(scopes, &query.collection);
921 collect_vector_source_scopes(scopes, &query.query_vector);
922 }
923 QueryExpr::Hybrid(query) => {
924 collect_query_expr_result_cache_scopes(scopes, &query.structured);
925 cache_scope_insert(scopes, &query.vector.collection);
926 collect_vector_source_scopes(scopes, &query.vector.query_vector);
927 }
928 QueryExpr::Insert(query) => cache_scope_insert(scopes, &query.table),
929 QueryExpr::Update(query) => cache_scope_insert(scopes, &query.table),
930 QueryExpr::Delete(query) => cache_scope_insert(scopes, &query.table),
931 QueryExpr::CreateTable(query) => cache_scope_insert(scopes, &query.name),
932 QueryExpr::CreateCollection(query) => cache_scope_insert(scopes, &query.name),
933 QueryExpr::CreateVector(query) => cache_scope_insert(scopes, &query.name),
934 QueryExpr::DropTable(query) => cache_scope_insert(scopes, &query.name),
935 QueryExpr::DropGraph(query) => cache_scope_insert(scopes, &query.name),
936 QueryExpr::DropVector(query) => cache_scope_insert(scopes, &query.name),
937 QueryExpr::DropDocument(query) => cache_scope_insert(scopes, &query.name),
938 QueryExpr::DropKv(query) => cache_scope_insert(scopes, &query.name),
939 QueryExpr::DropCollection(query) => cache_scope_insert(scopes, &query.name),
940 QueryExpr::Truncate(query) => cache_scope_insert(scopes, &query.name),
941 QueryExpr::AlterTable(query) => cache_scope_insert(scopes, &query.name),
942 QueryExpr::CreateIndex(query) => cache_scope_insert(scopes, &query.table),
943 QueryExpr::DropIndex(query) => cache_scope_insert(scopes, &query.table),
944 QueryExpr::CreateTimeSeries(query) => cache_scope_insert(scopes, &query.name),
945 QueryExpr::DropTimeSeries(query) => cache_scope_insert(scopes, &query.name),
946 QueryExpr::CreateQueue(query) => cache_scope_insert(scopes, &query.name),
947 QueryExpr::AlterQueue(query) => cache_scope_insert(scopes, &query.name),
948 QueryExpr::DropQueue(query) => cache_scope_insert(scopes, &query.name),
949 QueryExpr::QueueSelect(query) => cache_scope_insert(scopes, &query.queue),
950 QueryExpr::QueueCommand(query) => match query {
951 QueueCommand::Push { queue, .. }
952 | QueueCommand::Pop { queue, .. }
953 | QueueCommand::Peek { queue, .. }
954 | QueueCommand::Len { queue }
955 | QueueCommand::Purge { queue }
956 | QueueCommand::GroupCreate { queue, .. }
957 | QueueCommand::GroupRead { queue, .. }
958 | QueueCommand::Pending { queue, .. }
959 | QueueCommand::Claim { queue, .. }
960 | QueueCommand::Ack { queue, .. }
961 | QueueCommand::Nack { queue, .. } => cache_scope_insert(scopes, queue),
962 QueueCommand::Move {
963 source,
964 destination,
965 ..
966 } => {
967 cache_scope_insert(scopes, source);
968 cache_scope_insert(scopes, destination);
969 }
970 },
971 QueryExpr::EventsBackfill(query) => {
972 cache_scope_insert(scopes, &query.collection);
973 cache_scope_insert(scopes, &query.target_queue);
974 }
975 QueryExpr::CreateTree(query) => cache_scope_insert(scopes, &query.collection),
976 QueryExpr::DropTree(query) => cache_scope_insert(scopes, &query.collection),
977 QueryExpr::TreeCommand(query) => match query {
978 TreeCommand::Insert { collection, .. }
979 | TreeCommand::Move { collection, .. }
980 | TreeCommand::Delete { collection, .. }
981 | TreeCommand::Validate { collection, .. }
982 | TreeCommand::Rebalance { collection, .. } => cache_scope_insert(scopes, collection),
983 },
984 QueryExpr::SearchCommand(query) => match query {
985 SearchCommand::Similar { collection, .. }
986 | SearchCommand::Hybrid { collection, .. }
987 | SearchCommand::SpatialRadius { collection, .. }
988 | SearchCommand::SpatialBbox { collection, .. }
989 | SearchCommand::SpatialNearest { collection, .. } => {
990 cache_scope_insert(scopes, collection);
991 }
992 SearchCommand::Text { collection, .. }
993 | SearchCommand::Multimodal { collection, .. }
994 | SearchCommand::Index { collection, .. }
995 | SearchCommand::Context { collection, .. } => {
996 if let Some(collection) = collection.as_deref() {
997 cache_scope_insert(scopes, collection);
998 }
999 }
1000 },
1001 QueryExpr::Ask(query) => {
1002 if let Some(collection) = query.collection.as_deref() {
1003 cache_scope_insert(scopes, collection);
1004 }
1005 }
1006 QueryExpr::ExplainAlter(query) => cache_scope_insert(scopes, &query.target.name),
1007 QueryExpr::MaintenanceCommand(cmd) => match cmd {
1008 crate::storage::query::ast::MaintenanceCommand::Vacuum { target, .. }
1009 | crate::storage::query::ast::MaintenanceCommand::Analyze { target } => {
1010 if let Some(t) = target {
1011 cache_scope_insert(scopes, t);
1012 }
1013 }
1014 },
1015 QueryExpr::CopyFrom(cmd) => cache_scope_insert(scopes, &cmd.table),
1016 QueryExpr::CreateView(cmd) => {
1017 cache_scope_insert(scopes, &cmd.name);
1018 collect_query_expr_result_cache_scopes(scopes, &cmd.query);
1020 }
1021 QueryExpr::DropView(cmd) => cache_scope_insert(scopes, &cmd.name),
1022 QueryExpr::RefreshMaterializedView(cmd) => cache_scope_insert(scopes, &cmd.name),
1023 QueryExpr::CreatePolicy(cmd) => cache_scope_insert(scopes, &cmd.table),
1024 QueryExpr::DropPolicy(cmd) => cache_scope_insert(scopes, &cmd.table),
1025 QueryExpr::CreateServer(_) | QueryExpr::DropServer(_) => {}
1026 QueryExpr::CreateForeignTable(cmd) => cache_scope_insert(scopes, &cmd.name),
1027 QueryExpr::DropForeignTable(cmd) => cache_scope_insert(scopes, &cmd.name),
1028 QueryExpr::Graph(_)
1029 | QueryExpr::GraphCommand(_)
1030 | QueryExpr::ProbabilisticCommand(_)
1031 | QueryExpr::SetConfig { .. }
1032 | QueryExpr::ShowConfig { .. }
1033 | QueryExpr::SetSecret { .. }
1034 | QueryExpr::DeleteSecret { .. }
1035 | QueryExpr::ShowSecrets { .. }
1036 | QueryExpr::SetTenant(_)
1037 | QueryExpr::ShowTenant
1038 | QueryExpr::TransactionControl(_)
1039 | QueryExpr::CreateSchema(_)
1040 | QueryExpr::DropSchema(_)
1041 | QueryExpr::CreateSequence(_)
1042 | QueryExpr::DropSequence(_)
1043 | QueryExpr::Grant(_)
1044 | QueryExpr::Revoke(_)
1045 | QueryExpr::AlterUser(_)
1046 | QueryExpr::CreateIamPolicy { .. }
1047 | QueryExpr::DropIamPolicy { .. }
1048 | QueryExpr::AttachPolicy { .. }
1049 | QueryExpr::DetachPolicy { .. }
1050 | QueryExpr::ShowPolicies { .. }
1051 | QueryExpr::ShowEffectivePermissions { .. }
1052 | QueryExpr::SimulatePolicy { .. }
1053 | QueryExpr::CreateMigration(_)
1054 | QueryExpr::ApplyMigration(_)
1055 | QueryExpr::RollbackMigration(_)
1056 | QueryExpr::ExplainMigration(_)
1057 | QueryExpr::EventsBackfillStatus { .. } => {}
1058 QueryExpr::KvCommand(cmd) => {
1059 use crate::storage::query::ast::KvCommand;
1060 match cmd {
1061 KvCommand::Put { collection, .. }
1062 | KvCommand::InvalidateTags { collection, .. }
1063 | KvCommand::Get { collection, .. }
1064 | KvCommand::Unseal { collection, .. }
1065 | KvCommand::Rotate { collection, .. }
1066 | KvCommand::History { collection, .. }
1067 | KvCommand::List { collection, .. }
1068 | KvCommand::Purge { collection, .. }
1069 | KvCommand::Watch { collection, .. }
1070 | KvCommand::Delete { collection, .. }
1071 | KvCommand::Incr { collection, .. }
1072 | KvCommand::Cas { collection, .. } => cache_scope_insert(scopes, collection),
1073 }
1074 }
1075 QueryExpr::ConfigCommand(cmd) => {
1076 use crate::storage::query::ast::ConfigCommand;
1077 match cmd {
1078 ConfigCommand::Put { collection, .. }
1079 | ConfigCommand::Get { collection, .. }
1080 | ConfigCommand::Resolve { collection, .. }
1081 | ConfigCommand::Rotate { collection, .. }
1082 | ConfigCommand::Delete { collection, .. }
1083 | ConfigCommand::History { collection, .. }
1084 | ConfigCommand::List { collection, .. }
1085 | ConfigCommand::Watch { collection, .. }
1086 | ConfigCommand::InvalidVolatileOperation { collection, .. } => {
1087 cache_scope_insert(scopes, collection)
1088 }
1089 }
1090 }
1091 }
1092}
1093
1094pub(crate) fn rls_policy_filter(
1102 runtime: &RedDBRuntime,
1103 table: &str,
1104 action: crate::storage::query::ast::PolicyAction,
1105) -> Option<crate::storage::query::ast::Filter> {
1106 rls_policy_filter_for_kind(
1107 runtime,
1108 table,
1109 action,
1110 crate::storage::query::ast::PolicyTargetKind::Table,
1111 )
1112}
1113
1114pub(crate) fn rls_policy_filter_for_kind(
1120 runtime: &RedDBRuntime,
1121 table: &str,
1122 action: crate::storage::query::ast::PolicyAction,
1123 kind: crate::storage::query::ast::PolicyTargetKind,
1124) -> Option<crate::storage::query::ast::Filter> {
1125 use crate::storage::query::ast::Filter;
1126
1127 if !runtime.inner.rls_enabled_tables.read().contains(table) {
1128 return None;
1129 }
1130 let role = current_auth_identity().map(|(_, role)| role);
1131 let role_str = role.map(|r| r.as_str().to_string());
1132 let policies = runtime.matching_rls_policies_for_kind(table, role_str.as_deref(), action, kind);
1133 if policies.is_empty() {
1134 return None;
1135 }
1136 policies
1137 .into_iter()
1138 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1139}
1140
1141pub(crate) fn rls_is_enabled(runtime: &RedDBRuntime, table: &str) -> bool {
1145 runtime.inner.rls_enabled_tables.read().contains(table)
1146}
1147
1148fn node_passes_rls(
1155 runtime: &RedDBRuntime,
1156 collection: &str,
1157 role: Option<&str>,
1158 cache: &mut std::collections::HashMap<String, Option<crate::storage::query::ast::Filter>>,
1159 entity: &crate::storage::unified::entity::UnifiedEntity,
1160) -> bool {
1161 use crate::storage::query::ast::{Filter, PolicyAction, PolicyTargetKind};
1162
1163 if !runtime.inner.rls_enabled_tables.read().contains(collection) {
1164 return true;
1165 }
1166 let filter = cache.entry(collection.to_string()).or_insert_with(|| {
1167 let policies = runtime.matching_rls_policies_for_kind(
1168 collection,
1169 role,
1170 PolicyAction::Select,
1171 PolicyTargetKind::Nodes,
1172 );
1173 if policies.is_empty() {
1174 None
1175 } else {
1176 policies
1177 .into_iter()
1178 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1179 }
1180 });
1181 let Some(filter) = filter else {
1182 return false;
1183 };
1184 crate::runtime::query_exec::evaluate_entity_filter_with_db(
1185 Some(&runtime.inner.db),
1186 entity,
1187 filter,
1188 collection,
1189 collection,
1190 )
1191}
1192
1193fn edge_passes_rls(
1196 runtime: &RedDBRuntime,
1197 collection: &str,
1198 role: Option<&str>,
1199 cache: &mut std::collections::HashMap<String, Option<crate::storage::query::ast::Filter>>,
1200 entity: &crate::storage::unified::entity::UnifiedEntity,
1201) -> bool {
1202 use crate::storage::query::ast::{Filter, PolicyAction, PolicyTargetKind};
1203
1204 if !runtime.inner.rls_enabled_tables.read().contains(collection) {
1205 return true;
1206 }
1207 let filter = cache.entry(collection.to_string()).or_insert_with(|| {
1208 let policies = runtime.matching_rls_policies_for_kind(
1209 collection,
1210 role,
1211 PolicyAction::Select,
1212 PolicyTargetKind::Edges,
1213 );
1214 if policies.is_empty() {
1215 None
1216 } else {
1217 policies
1218 .into_iter()
1219 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1220 }
1221 });
1222 let Some(filter) = filter else {
1223 return false;
1224 };
1225 crate::runtime::query_exec::evaluate_entity_filter_with_db(
1226 Some(&runtime.inner.db),
1227 entity,
1228 filter,
1229 collection,
1230 collection,
1231 )
1232}
1233
1234fn inject_rls_filters(
1255 runtime: &RedDBRuntime,
1256 frame: &dyn super::statement_frame::ReadFrame,
1257 mut table: crate::storage::query::ast::TableQuery,
1258) -> Option<crate::storage::query::ast::TableQuery> {
1259 use crate::storage::query::ast::{Filter, PolicyAction};
1260
1261 let role = frame.identity().map(|(_, role)| role);
1263 let role_str = role.map(|r| r.as_str().to_string());
1264 let policies =
1265 runtime.matching_rls_policies(&table.table, role_str.as_deref(), PolicyAction::Select);
1266
1267 if policies.is_empty() {
1268 return None;
1271 }
1272
1273 let combined = policies
1275 .into_iter()
1276 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1277 .expect("policies non-empty");
1278
1279 use crate::storage::query::sql_lowering::{expr_to_filter, filter_to_expr};
1288 let had_where_expr = table.where_expr.is_some();
1289 let existing = table
1290 .filter
1291 .take()
1292 .or_else(|| table.where_expr.as_ref().map(expr_to_filter));
1293 let new_filter = match existing {
1294 Some(existing) => Filter::And(Box::new(existing), Box::new(combined)),
1295 None => combined,
1296 };
1297 if had_where_expr {
1300 table.where_expr = Some(filter_to_expr(&new_filter));
1301 }
1302 table.filter = Some(new_filter);
1303 Some(table)
1304}
1305
1306fn inject_rls_into_join(
1316 runtime: &RedDBRuntime,
1317 frame: &dyn super::statement_frame::ReadFrame,
1318 mut join: crate::storage::query::ast::JoinQuery,
1319) -> Option<crate::storage::query::ast::JoinQuery> {
1320 use crate::storage::query::ast::Filter;
1321
1322 let mut policy_filters: Vec<Filter> = Vec::new();
1323 if !collect_join_side_policy(runtime, frame, join.left.as_ref(), &mut policy_filters) {
1324 return None;
1325 }
1326 if !collect_join_side_policy(runtime, frame, join.right.as_ref(), &mut policy_filters) {
1327 return None;
1328 }
1329
1330 if policy_filters.is_empty() {
1331 return Some(join);
1332 }
1333
1334 let combined = policy_filters
1335 .into_iter()
1336 .reduce(|acc, f| Filter::And(Box::new(acc), Box::new(f)))
1337 .expect("policy_filters non-empty");
1338
1339 join.filter = Some(match join.filter.take() {
1340 Some(existing) => Filter::And(Box::new(existing), Box::new(combined)),
1341 None => combined,
1342 });
1343
1344 Some(join)
1345}
1346
1347fn collect_join_side_policy(
1352 runtime: &RedDBRuntime,
1353 frame: &dyn super::statement_frame::ReadFrame,
1354 expr: &crate::storage::query::ast::QueryExpr,
1355 out: &mut Vec<crate::storage::query::ast::Filter>,
1356) -> bool {
1357 use crate::storage::query::ast::{Filter, PolicyAction, QueryExpr};
1358 match expr {
1359 QueryExpr::Table(t) => {
1360 if !runtime.inner.rls_enabled_tables.read().contains(&t.table) {
1361 return true;
1362 }
1363 let role = frame.identity().map(|(_, role)| role);
1364 let role_str = role.map(|r| r.as_str().to_string());
1365 let policies =
1366 runtime.matching_rls_policies(&t.table, role_str.as_deref(), PolicyAction::Select);
1367 if policies.is_empty() {
1368 return false;
1369 }
1370 let combined = policies
1371 .into_iter()
1372 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1373 .expect("policies non-empty");
1374 out.push(combined);
1375 true
1376 }
1377 QueryExpr::Join(inner) => {
1378 collect_join_side_policy(runtime, frame, inner.left.as_ref(), out)
1379 && collect_join_side_policy(runtime, frame, inner.right.as_ref(), out)
1380 }
1381 _ => true,
1382 }
1383}
1384
1385fn apply_foreign_table_filters(
1396 records: Vec<crate::storage::query::unified::UnifiedRecord>,
1397 query: &crate::storage::query::ast::TableQuery,
1398) -> crate::storage::query::unified::UnifiedResult {
1399 use crate::storage::query::sql_lowering::{
1400 effective_table_filter, effective_table_projections,
1401 };
1402 use crate::storage::query::unified::UnifiedResult;
1403
1404 let filter = effective_table_filter(query);
1405 let projections = effective_table_projections(query);
1406
1407 let mut filtered: Vec<_> = records
1410 .into_iter()
1411 .filter(|record| match &filter {
1412 Some(f) => {
1413 super::join_filter::evaluate_runtime_filter_with_db(None, record, f, None, None)
1414 }
1415 None => true,
1416 })
1417 .collect();
1418
1419 if let Some(offset) = query.offset {
1421 let offset = offset as usize;
1422 if offset >= filtered.len() {
1423 filtered.clear();
1424 } else {
1425 filtered.drain(0..offset);
1426 }
1427 }
1428 if let Some(limit) = query.limit {
1429 filtered.truncate(limit as usize);
1430 }
1431
1432 let columns: Vec<String> = if projections.is_empty() {
1435 filtered
1436 .first()
1437 .map(|r| r.column_names().iter().map(|k| k.to_string()).collect())
1438 .unwrap_or_default()
1439 } else {
1440 projections
1441 .iter()
1442 .map(super::join_filter::projection_name)
1443 .collect()
1444 };
1445
1446 let mut result = UnifiedResult::empty();
1447 result.columns = columns;
1448 result.records = filtered;
1449 result
1450}
1451
1452pub(crate) fn collect_table_refs(expr: &QueryExpr) -> Vec<String> {
1459 let mut scopes: HashSet<String> = HashSet::new();
1460 collect_query_expr_result_cache_scopes(&mut scopes, expr);
1461 scopes.into_iter().collect()
1462}
1463
1464fn query_expr_result_cache_scopes(expr: &QueryExpr) -> HashSet<String> {
1465 let mut scopes = HashSet::new();
1466 collect_query_expr_result_cache_scopes(&mut scopes, expr);
1467 scopes
1468}
1469
1470const RESULT_CACHE_BACKEND_KEY: &str = "runtime.result_cache.backend";
1471const RESULT_CACHE_DEFAULT_BACKEND: &str = "legacy";
1472const RESULT_CACHE_BLOB_NAMESPACE: &str = "runtime.result_cache";
1473const RESULT_CACHE_TTL_SECS: u64 = 30;
1474const RESULT_CACHE_MAX_ENTRIES: usize = 1000;
1475const RESULT_CACHE_PAYLOAD_MAGIC: &[u8; 8] = b"RDRC0001";
1476
1477#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1478enum RuntimeResultCacheBackend {
1479 Legacy,
1480 BlobCache,
1481 Shadow,
1482}
1483
1484fn trim_result_cache(
1485 map: &mut HashMap<String, RuntimeResultCacheEntry>,
1486 order: &mut std::collections::VecDeque<String>,
1487) {
1488 while map.len() > RESULT_CACHE_MAX_ENTRIES {
1489 if let Some(oldest) = order.pop_front() {
1490 map.remove(&oldest);
1491 } else {
1492 break;
1493 }
1494 }
1495}
1496
1497fn result_cache_fingerprint(result: &RuntimeQueryResult) -> String {
1498 format!(
1499 "{:?}|{}|{}|{}|{}|{:?}",
1500 result.result,
1501 result.query,
1502 result.statement,
1503 result.engine,
1504 result.affected_rows,
1505 result.statement_type
1506 )
1507}
1508
1509fn mode_to_byte(mode: crate::storage::query::modes::QueryMode) -> u8 {
1510 match mode {
1511 crate::storage::query::modes::QueryMode::Sql => 0,
1512 crate::storage::query::modes::QueryMode::Gremlin => 1,
1513 crate::storage::query::modes::QueryMode::Cypher => 2,
1514 crate::storage::query::modes::QueryMode::Sparql => 3,
1515 crate::storage::query::modes::QueryMode::Path => 4,
1516 crate::storage::query::modes::QueryMode::Natural => 5,
1517 crate::storage::query::modes::QueryMode::Unknown => 255,
1518 }
1519}
1520
1521fn mode_from_byte(byte: u8) -> Option<crate::storage::query::modes::QueryMode> {
1522 match byte {
1523 0 => Some(crate::storage::query::modes::QueryMode::Sql),
1524 1 => Some(crate::storage::query::modes::QueryMode::Gremlin),
1525 2 => Some(crate::storage::query::modes::QueryMode::Cypher),
1526 3 => Some(crate::storage::query::modes::QueryMode::Sparql),
1527 4 => Some(crate::storage::query::modes::QueryMode::Path),
1528 5 => Some(crate::storage::query::modes::QueryMode::Natural),
1529 255 => Some(crate::storage::query::modes::QueryMode::Unknown),
1530 _ => None,
1531 }
1532}
1533
1534fn result_cache_static_str(value: &str) -> Option<&'static str> {
1535 match value {
1536 "select" => Some("select"),
1537 "materialized-graph" => Some("materialized-graph"),
1538 "runtime-red-schema" => Some("runtime-red-schema"),
1539 "runtime-fdw" => Some("runtime-fdw"),
1540 "runtime-table-rls" => Some("runtime-table-rls"),
1541 "runtime-table" => Some("runtime-table"),
1542 "runtime-join-rls" => Some("runtime-join-rls"),
1543 "runtime-join" => Some("runtime-join"),
1544 "runtime-vector" => Some("runtime-vector"),
1545 "runtime-hybrid" => Some("runtime-hybrid"),
1546 "runtime-secret" => Some("runtime-secret"),
1547 "runtime-config" => Some("runtime-config"),
1548 "runtime-tenant" => Some("runtime-tenant"),
1549 "runtime-explain" => Some("runtime-explain"),
1550 "runtime-tree" => Some("runtime-tree"),
1551 "runtime-kv" => Some("runtime-kv"),
1552 "runtime-queue" => Some("runtime-queue"),
1553 _ => None,
1554 }
1555}
1556
1557fn write_u32(out: &mut Vec<u8>, value: usize) -> Option<()> {
1558 let value = u32::try_from(value).ok()?;
1559 out.extend_from_slice(&value.to_le_bytes());
1560 Some(())
1561}
1562
1563fn write_string(out: &mut Vec<u8>, value: &str) -> Option<()> {
1564 write_u32(out, value.len())?;
1565 out.extend_from_slice(value.as_bytes());
1566 Some(())
1567}
1568
1569fn write_bytes(out: &mut Vec<u8>, value: &[u8]) -> Option<()> {
1570 write_u32(out, value.len())?;
1571 out.extend_from_slice(value);
1572 Some(())
1573}
1574
1575fn read_u8(input: &mut &[u8]) -> Option<u8> {
1576 let (&value, rest) = input.split_first()?;
1577 *input = rest;
1578 Some(value)
1579}
1580
1581fn read_u32(input: &mut &[u8]) -> Option<usize> {
1582 if input.len() < 4 {
1583 return None;
1584 }
1585 let value = u32::from_le_bytes(input[..4].try_into().ok()?) as usize;
1586 *input = &input[4..];
1587 Some(value)
1588}
1589
1590fn read_u64(input: &mut &[u8]) -> Option<u64> {
1591 if input.len() < 8 {
1592 return None;
1593 }
1594 let value = u64::from_le_bytes(input[..8].try_into().ok()?);
1595 *input = &input[8..];
1596 Some(value)
1597}
1598
1599fn read_string(input: &mut &[u8]) -> Option<String> {
1600 let len = read_u32(input)?;
1601 if input.len() < len {
1602 return None;
1603 }
1604 let value = String::from_utf8(input[..len].to_vec()).ok()?;
1605 *input = &input[len..];
1606 Some(value)
1607}
1608
1609fn read_bytes<'a>(input: &mut &'a [u8]) -> Option<&'a [u8]> {
1610 let len = read_u32(input)?;
1611 if input.len() < len {
1612 return None;
1613 }
1614 let value = &input[..len];
1615 *input = &input[len..];
1616 Some(value)
1617}
1618
1619fn encode_result_cache_payload(entry: &RuntimeResultCacheEntry) -> Option<Vec<u8>> {
1620 let result = &entry.result;
1621 if result.result.pre_serialized_json.is_some()
1622 || result_cache_static_str(result.statement).is_none()
1623 || result_cache_static_str(result.engine).is_none()
1624 || result_cache_static_str(result.statement_type).is_none()
1625 || result.result.records.iter().any(|record| {
1626 !record.nodes.is_empty()
1627 || !record.edges.is_empty()
1628 || !record.paths.is_empty()
1629 || !record.vector_results.is_empty()
1630 })
1631 {
1632 return None;
1633 }
1634
1635 let mut out = Vec::new();
1636 out.extend_from_slice(RESULT_CACHE_PAYLOAD_MAGIC);
1637 write_string(&mut out, &result.query)?;
1638 out.push(mode_to_byte(result.mode));
1639 write_string(&mut out, result.statement)?;
1640 write_string(&mut out, result.engine)?;
1641 out.extend_from_slice(&result.affected_rows.to_le_bytes());
1642 write_string(&mut out, result.statement_type)?;
1643
1644 write_u32(&mut out, result.result.columns.len())?;
1645 for column in &result.result.columns {
1646 write_string(&mut out, column)?;
1647 }
1648 out.extend_from_slice(&result.result.stats.nodes_scanned.to_le_bytes());
1649 out.extend_from_slice(&result.result.stats.edges_scanned.to_le_bytes());
1650 out.extend_from_slice(&result.result.stats.rows_scanned.to_le_bytes());
1651 out.extend_from_slice(&result.result.stats.exec_time_us.to_le_bytes());
1652
1653 write_u32(&mut out, result.result.records.len())?;
1654 for record in &result.result.records {
1655 let fields = record.iter_fields().collect::<Vec<_>>();
1656 write_u32(&mut out, fields.len())?;
1657 for (name, value) in fields {
1658 write_string(&mut out, name)?;
1659 let mut encoded = Vec::new();
1660 crate::storage::schema::value_codec::encode(value, &mut encoded);
1661 write_bytes(&mut out, &encoded)?;
1662 }
1663 }
1664
1665 write_u32(&mut out, entry.scopes.len())?;
1666 for scope in &entry.scopes {
1667 write_string(&mut out, scope)?;
1668 }
1669 Some(out)
1670}
1671
1672fn decode_result_cache_payload(mut input: &[u8]) -> Option<(RuntimeQueryResult, HashSet<String>)> {
1673 if input.len() < RESULT_CACHE_PAYLOAD_MAGIC.len()
1674 || &input[..RESULT_CACHE_PAYLOAD_MAGIC.len()] != RESULT_CACHE_PAYLOAD_MAGIC
1675 {
1676 return None;
1677 }
1678 input = &input[RESULT_CACHE_PAYLOAD_MAGIC.len()..];
1679
1680 let query = read_string(&mut input)?;
1681 let mode = mode_from_byte(read_u8(&mut input)?)?;
1682 let statement = result_cache_static_str(&read_string(&mut input)?)?;
1683 let engine = result_cache_static_str(&read_string(&mut input)?)?;
1684 let affected_rows = read_u64(&mut input)?;
1685 let statement_type = result_cache_static_str(&read_string(&mut input)?)?;
1686
1687 let mut columns = Vec::new();
1688 for _ in 0..read_u32(&mut input)? {
1689 columns.push(read_string(&mut input)?);
1690 }
1691 let stats = crate::storage::query::unified::QueryStats {
1692 nodes_scanned: read_u64(&mut input)?,
1693 edges_scanned: read_u64(&mut input)?,
1694 rows_scanned: read_u64(&mut input)?,
1695 exec_time_us: read_u64(&mut input)?,
1696 };
1697
1698 let mut records = Vec::new();
1699 for _ in 0..read_u32(&mut input)? {
1700 let mut record = crate::storage::query::unified::UnifiedRecord::new();
1701 for _ in 0..read_u32(&mut input)? {
1702 let name = read_string(&mut input)?;
1703 let bytes = read_bytes(&mut input)?;
1704 let (value, used) = crate::storage::schema::value_codec::decode(bytes).ok()?;
1705 if used != bytes.len() {
1706 return None;
1707 }
1708 record.set_owned(name, value);
1709 }
1710 records.push(record);
1711 }
1712
1713 let mut scopes = HashSet::new();
1714 for _ in 0..read_u32(&mut input)? {
1715 scopes.insert(read_string(&mut input)?);
1716 }
1717 if !input.is_empty() {
1718 return None;
1719 }
1720
1721 Some((
1722 RuntimeQueryResult {
1723 query,
1724 mode,
1725 statement,
1726 engine,
1727 result: crate::storage::query::unified::UnifiedResult {
1728 columns,
1729 records,
1730 stats,
1731 pre_serialized_json: None,
1732 },
1733 affected_rows,
1734 statement_type,
1735 },
1736 scopes,
1737 ))
1738}
1739
1740fn strip_explain_prefix(sql: &str) -> Option<&str> {
1754 let trimmed = sql.trim_start();
1755 let (head, rest) = trimmed.split_at(
1756 trimmed
1757 .find(|c: char| c.is_whitespace())
1758 .unwrap_or(trimmed.len()),
1759 );
1760 if !head.eq_ignore_ascii_case("EXPLAIN") {
1761 return None;
1762 }
1763 let rest = rest.trim_start();
1764 if rest.is_empty() {
1765 return None;
1766 }
1767 let next_head_end = rest.find(|c: char| c.is_whitespace()).unwrap_or(rest.len());
1771 if rest[..next_head_end].eq_ignore_ascii_case("ALTER")
1772 || rest[..next_head_end].eq_ignore_ascii_case("ASK")
1773 {
1774 return None;
1775 }
1776 Some(rest)
1777}
1778
1779pub(super) fn has_with_prefix(sql: &str) -> bool {
1784 let trimmed = sql.trim_start();
1785 let head_end = trimmed
1786 .find(|c: char| c.is_whitespace() || c == '(')
1787 .unwrap_or(trimmed.len());
1788 trimmed[..head_end].eq_ignore_ascii_case("WITH")
1789}
1790
1791fn peek_top_level_as_of(sql: &str) -> Option<crate::application::vcs::AsOfSpec> {
1799 peek_top_level_as_of_with_table(sql).map(|(spec, _)| spec)
1800}
1801
1802pub(super) fn peek_top_level_as_of_with_table(
1807 sql: &str,
1808) -> Option<(crate::application::vcs::AsOfSpec, Option<String>)> {
1809 if !sql
1810 .as_bytes()
1811 .windows(5)
1812 .any(|w| w.eq_ignore_ascii_case(b"as of"))
1813 {
1814 return None;
1815 }
1816 let parsed = crate::storage::query::parser::parse(sql).ok()?;
1817 let crate::storage::query::ast::QueryExpr::Table(table) = parsed.query else {
1818 return None;
1819 };
1820 let clause = table.as_of?;
1821 let table_name = if table.table.is_empty() || table.table == "any" {
1822 None
1823 } else {
1824 Some(table.table.clone())
1825 };
1826 let spec = match clause {
1827 crate::storage::query::ast::AsOfClause::Commit(h) => {
1828 crate::application::vcs::AsOfSpec::Commit(h)
1829 }
1830 crate::storage::query::ast::AsOfClause::Branch(b) => {
1831 crate::application::vcs::AsOfSpec::Branch(b)
1832 }
1833 crate::storage::query::ast::AsOfClause::Tag(t) => crate::application::vcs::AsOfSpec::Tag(t),
1834 crate::storage::query::ast::AsOfClause::TimestampMs(ts) => {
1835 crate::application::vcs::AsOfSpec::TimestampMs(ts)
1836 }
1837 crate::storage::query::ast::AsOfClause::Snapshot(x) => {
1838 crate::application::vcs::AsOfSpec::Snapshot(x)
1839 }
1840 };
1841 Some((spec, table_name))
1842}
1843
1844pub(super) fn query_has_volatile_builtin(sql: &str) -> bool {
1845 const VOLATILE_TOKENS: &[&str] = &[
1849 "pg_advisory_lock",
1850 "pg_try_advisory_lock",
1851 "pg_advisory_unlock",
1852 "random()",
1853 ];
1858 let lowered = sql.to_ascii_lowercase();
1859 VOLATILE_TOKENS.iter().any(|t| lowered.contains(t))
1860}
1861
1862pub(super) fn query_is_ask_statement(sql: &str) -> bool {
1863 let trimmed = sql.trim_start();
1864 let head_end = trimmed
1865 .find(|c: char| c.is_whitespace() || c == '(' || c == ';')
1866 .unwrap_or(trimmed.len());
1867 trimmed[..head_end].eq_ignore_ascii_case("ASK")
1868}
1869
1870pub(super) fn intent_lock_modes_for(
1880 expr: &QueryExpr,
1881) -> Option<(
1882 crate::storage::transaction::lock::LockMode,
1883 crate::storage::transaction::lock::LockMode,
1884)> {
1885 use crate::storage::transaction::lock::LockMode::{Exclusive, IntentExclusive, IntentShared};
1886
1887 match expr {
1888 QueryExpr::Table(_)
1890 | QueryExpr::Join(_)
1891 | QueryExpr::Vector(_)
1892 | QueryExpr::Hybrid(_)
1893 | QueryExpr::Graph(_)
1894 | QueryExpr::Path(_)
1895 | QueryExpr::Ask(_)
1896 | QueryExpr::SearchCommand(_)
1897 | QueryExpr::GraphCommand(_)
1898 | QueryExpr::QueueSelect(_) => Some((IntentShared, IntentShared)),
1899
1900 QueryExpr::Insert(_)
1908 | QueryExpr::Update(_)
1909 | QueryExpr::Delete(_)
1910 | QueryExpr::QueueCommand(QueueCommand::Move { .. }) => {
1911 Some((IntentExclusive, IntentExclusive))
1912 }
1913 QueryExpr::QueueCommand(_) => Some((IntentShared, IntentShared)),
1914
1915 QueryExpr::CreateTable(_)
1919 | QueryExpr::CreateCollection(_)
1920 | QueryExpr::CreateVector(_)
1921 | QueryExpr::DropTable(_)
1922 | QueryExpr::DropGraph(_)
1923 | QueryExpr::DropVector(_)
1924 | QueryExpr::DropDocument(_)
1925 | QueryExpr::DropKv(_)
1926 | QueryExpr::DropCollection(_)
1927 | QueryExpr::Truncate(_)
1928 | QueryExpr::AlterTable(_)
1929 | QueryExpr::CreateIndex(_)
1930 | QueryExpr::DropIndex(_)
1931 | QueryExpr::CreateTimeSeries(_)
1932 | QueryExpr::DropTimeSeries(_)
1933 | QueryExpr::CreateQueue(_)
1934 | QueryExpr::AlterQueue(_)
1935 | QueryExpr::DropQueue(_)
1936 | QueryExpr::CreateTree(_)
1937 | QueryExpr::DropTree(_)
1938 | QueryExpr::CreatePolicy(_)
1939 | QueryExpr::DropPolicy(_)
1940 | QueryExpr::CreateView(_)
1941 | QueryExpr::DropView(_)
1942 | QueryExpr::RefreshMaterializedView(_)
1943 | QueryExpr::CreateSchema(_)
1944 | QueryExpr::DropSchema(_)
1945 | QueryExpr::CreateSequence(_)
1946 | QueryExpr::DropSequence(_)
1947 | QueryExpr::CreateServer(_)
1948 | QueryExpr::DropServer(_)
1949 | QueryExpr::CreateForeignTable(_)
1950 | QueryExpr::DropForeignTable(_) => Some((IntentExclusive, Exclusive)),
1951
1952 _ => None,
1958 }
1959}
1960
1961pub(super) fn collections_referenced(expr: &QueryExpr) -> Vec<String> {
1966 let mut out = Vec::new();
1967 walk_collections(expr, &mut out);
1968 out.sort();
1969 out.dedup();
1970 out
1971}
1972
1973fn walk_collections(expr: &QueryExpr, out: &mut Vec<String>) {
1974 match expr {
1975 QueryExpr::Table(t) => out.push(t.table.clone()),
1976 QueryExpr::Join(j) => {
1977 walk_collections(&j.left, out);
1978 walk_collections(&j.right, out);
1979 }
1980 QueryExpr::Insert(i) => out.push(i.table.clone()),
1981 QueryExpr::Update(u) => out.push(u.table.clone()),
1982 QueryExpr::Delete(d) => out.push(d.table.clone()),
1983 QueryExpr::QueueSelect(q) => out.push(q.queue.clone()),
1984
1985 QueryExpr::CreateTable(q) => out.push(q.name.clone()),
1990 QueryExpr::CreateCollection(q) => out.push(q.name.clone()),
1991 QueryExpr::CreateVector(q) => out.push(q.name.clone()),
1992 QueryExpr::DropTable(q) => out.push(q.name.clone()),
1993 QueryExpr::DropGraph(q) => out.push(q.name.clone()),
1994 QueryExpr::DropVector(q) => out.push(q.name.clone()),
1995 QueryExpr::DropDocument(q) => out.push(q.name.clone()),
1996 QueryExpr::DropKv(q) => out.push(q.name.clone()),
1997 QueryExpr::DropCollection(q) => out.push(q.name.clone()),
1998 QueryExpr::Truncate(q) => out.push(q.name.clone()),
1999 QueryExpr::AlterTable(q) => out.push(q.name.clone()),
2000 QueryExpr::CreateIndex(q) => out.push(q.table.clone()),
2001 QueryExpr::DropIndex(q) => out.push(q.table.clone()),
2002 QueryExpr::CreateTimeSeries(q) => out.push(q.name.clone()),
2003 QueryExpr::DropTimeSeries(q) => out.push(q.name.clone()),
2004 QueryExpr::CreateQueue(q) => out.push(q.name.clone()),
2005 QueryExpr::AlterQueue(q) => out.push(q.name.clone()),
2006 QueryExpr::DropQueue(q) => out.push(q.name.clone()),
2007 QueryExpr::QueueCommand(QueueCommand::Move {
2008 source,
2009 destination,
2010 ..
2011 }) => {
2012 out.push(source.clone());
2013 out.push(destination.clone());
2014 }
2015 QueryExpr::CreatePolicy(q) => out.push(q.table.clone()),
2016 QueryExpr::CreateView(q) => out.push(q.name.clone()),
2017 QueryExpr::DropView(q) => out.push(q.name.clone()),
2018 QueryExpr::RefreshMaterializedView(q) => out.push(q.name.clone()),
2019
2020 _ => {}
2026 }
2027}
2028
2029impl RedDBRuntime {
2030 pub fn in_memory() -> RedDBResult<Self> {
2031 Self::with_options(RedDBOptions::in_memory())
2032 }
2033
2034 pub fn lock_manager(&self) -> std::sync::Arc<crate::storage::transaction::lock::LockManager> {
2038 self.inner.lock_manager.clone()
2039 }
2040
2041 #[inline(never)]
2042 pub fn with_options(options: RedDBOptions) -> RedDBResult<Self> {
2043 Self::with_pool(options, ConnectionPoolConfig::default())
2044 }
2045
2046 pub fn with_pool(
2047 options: RedDBOptions,
2048 pool_config: ConnectionPoolConfig,
2049 ) -> RedDBResult<Self> {
2050 let boot_open_start_ms = std::time::SystemTime::now()
2058 .duration_since(std::time::UNIX_EPOCH)
2059 .map(|d| d.as_millis() as u64)
2060 .unwrap_or(0);
2061 let db = Arc::new(
2062 RedDB::open_with_options(&options)
2063 .map_err(|err| RedDBError::Internal(err.to_string()))?,
2064 );
2065 let result_blob_cache = crate::storage::cache::BlobCache::open_with_l2(
2066 crate::storage::cache::BlobCacheConfig::default().with_l2_path(
2067 options
2068 .resolved_path("data.rdb")
2069 .with_extension("result-cache.l2"),
2070 ),
2071 )
2072 .map_err(|err| {
2073 RedDBError::Internal(format!("open result Blob Cache L2 failed: {err:?}"))
2074 })?;
2075 let storage_ready_ms = std::time::SystemTime::now()
2076 .duration_since(std::time::UNIX_EPOCH)
2077 .map(|d| d.as_millis() as u64)
2078 .unwrap_or(0);
2079
2080 let runtime = Self {
2081 inner: Arc::new(RuntimeInner {
2082 db,
2083 layout: PhysicalLayout::from_options(&options),
2084 indices: IndexCatalog::register_default_vector_graph(
2085 options.has_capability(crate::api::Capability::Table),
2086 options.has_capability(crate::api::Capability::Graph),
2087 ),
2088 pool_config,
2089 pool: Mutex::new(PoolState::default()),
2090 started_at_unix_ms: SystemTime::now()
2091 .duration_since(UNIX_EPOCH)
2092 .unwrap_or_default()
2093 .as_millis(),
2094 probabilistic: super::probabilistic_store::ProbabilisticStore::new(),
2095 index_store: super::index_store::IndexStore::new(),
2096 cdc: crate::replication::cdc::CdcBuffer::new(100_000),
2097 backup_scheduler: crate::replication::scheduler::BackupScheduler::new(3600),
2098 query_cache: parking_lot::RwLock::new(
2099 crate::storage::query::planner::cache::PlanCache::new(1000),
2100 ),
2101 result_cache: parking_lot::RwLock::new((
2102 HashMap::new(),
2103 std::collections::VecDeque::new(),
2104 )),
2105 result_blob_cache,
2106 result_blob_entries: parking_lot::RwLock::new((
2107 HashMap::new(),
2108 std::collections::VecDeque::new(),
2109 )),
2110 ask_answer_cache_entries: parking_lot::RwLock::new((
2111 HashSet::new(),
2112 std::collections::VecDeque::new(),
2113 )),
2114 result_cache_shadow_divergences: std::sync::atomic::AtomicU64::new(0),
2115 ask_daily_spend: parking_lot::RwLock::new(HashMap::new()),
2116 queue_message_locks: parking_lot::RwLock::new(HashMap::new()),
2117 rmw_locks: RmwLockTable::new(),
2118 planner_dirty_tables: parking_lot::RwLock::new(HashSet::new()),
2119 ec_registry: Arc::new(crate::ec::config::EcRegistry::new()),
2120 ec_worker: crate::ec::worker::EcWorker::new(),
2121 auth_store: parking_lot::RwLock::new(None),
2122 oauth_validator: parking_lot::RwLock::new(None),
2123 views: parking_lot::RwLock::new(HashMap::new()),
2124 materialized_views: parking_lot::RwLock::new(
2125 crate::storage::cache::result::MaterializedViewCache::new(),
2126 ),
2127 retention_sweeper: parking_lot::RwLock::new(
2128 crate::runtime::retention_sweeper::RetentionSweeperState::new(),
2129 ),
2130 snapshot_manager: Arc::new(
2131 crate::storage::transaction::snapshot::SnapshotManager::new(),
2132 ),
2133 tx_contexts: parking_lot::RwLock::new(HashMap::new()),
2134 tx_local_tenants: parking_lot::RwLock::new(HashMap::new()),
2135 env_config_overrides: crate::runtime::config_overlay::collect_env_overrides(),
2136 lock_manager: Arc::new({
2137 let env = crate::runtime::config_overlay::collect_env_overrides();
2142 let timeout_ms = env
2143 .get("concurrency.locking.deadlock_timeout_ms")
2144 .and_then(|raw| raw.parse::<u64>().ok())
2145 .unwrap_or_else(|| {
2146 match crate::runtime::config_matrix::default_for(
2147 "concurrency.locking.deadlock_timeout_ms",
2148 ) {
2149 Some(crate::serde_json::Value::Number(n)) => n as u64,
2150 _ => 5000,
2151 }
2152 });
2153 let cfg = crate::storage::transaction::lock::LockConfig {
2154 default_timeout: std::time::Duration::from_millis(timeout_ms),
2155 ..Default::default()
2156 };
2157 crate::storage::transaction::lock::LockManager::new(cfg)
2158 }),
2159 rls_policies: parking_lot::RwLock::new(HashMap::new()),
2160 rls_enabled_tables: parking_lot::RwLock::new(HashSet::new()),
2161 foreign_tables: Arc::new(crate::storage::fdw::ForeignTableRegistry::with_builtins()),
2162 pending_tombstones: parking_lot::RwLock::new(HashMap::new()),
2163 pending_versioned_updates: parking_lot::RwLock::new(HashMap::new()),
2164 pending_kv_watch_events: parking_lot::RwLock::new(HashMap::new()),
2165 pending_store_wal_actions: parking_lot::RwLock::new(HashMap::new()),
2166 tenant_tables: parking_lot::RwLock::new(HashMap::new()),
2167 ddl_epoch: std::sync::atomic::AtomicU64::new(0),
2168 write_gate: Arc::new(crate::runtime::write_gate::WriteGate::from_options(
2169 &options,
2170 )),
2171 lifecycle: crate::runtime::lifecycle::Lifecycle::new(),
2172 resource_limits: crate::runtime::resource_limits::ResourceLimits::from_env(),
2173 audit_log: {
2174 let data_path = options
2184 .data_path
2185 .clone()
2186 .unwrap_or_else(|| std::env::temp_dir().join("reddb"));
2187 let (audit_dest, _) = crate::api::tier_wiring::current_log_destinations();
2188 Arc::new(crate::runtime::audit_log::AuditLogger::for_destination(
2189 &audit_dest,
2190 &data_path,
2191 ))
2192 },
2193 lease_lifecycle: std::sync::OnceLock::new(),
2194 replica_apply_metrics: crate::replication::logical::ReplicaApplyMetrics::default(),
2195 quota_bucket: crate::runtime::quota_bucket::QuotaBucket::from_env(),
2196 schema_vocabulary: parking_lot::RwLock::new(
2197 crate::runtime::schema_vocabulary::SchemaVocabulary::new(),
2198 ),
2199 slow_query_logger: {
2200 let fallback_dir = options
2213 .data_path
2214 .as_ref()
2215 .and_then(|p| p.parent().map(std::path::PathBuf::from))
2216 .unwrap_or_else(|| std::env::temp_dir().join("reddb"));
2217 let threshold_ms = std::env::var("RED_SLOW_QUERY_THRESHOLD_MS")
2218 .ok()
2219 .and_then(|s| s.parse::<u64>().ok())
2220 .unwrap_or(1000);
2221 let sample_pct = std::env::var("RED_SLOW_QUERY_SAMPLE_PCT")
2222 .ok()
2223 .and_then(|s| s.parse::<u8>().ok())
2224 .unwrap_or(100);
2225 let (_, slow_dest) = crate::api::tier_wiring::current_log_destinations();
2226 crate::telemetry::slow_query_logger::SlowQueryLogger::for_destination(
2227 &slow_dest,
2228 &fallback_dir,
2229 threshold_ms,
2230 sample_pct,
2231 )
2232 },
2233 kv_stats: crate::runtime::KvStatsCounters::default(),
2234 metrics_ingest_stats: crate::runtime::MetricsIngestCounters::default(),
2235 metrics_tenant_activity_stats:
2236 crate::runtime::MetricsTenantActivityCounters::default(),
2237 queue_telemetry: Arc::new(
2238 crate::runtime::queue_telemetry::QueueTelemetryCounters::default(),
2239 ),
2240 kv_tag_index: crate::runtime::KvTagIndex::default(),
2241 chain_tip_cache: parking_lot::Mutex::new(HashMap::new()),
2242 chain_integrity_broken: parking_lot::Mutex::new(HashMap::new()),
2243 }),
2244 };
2245
2246 crate::telemetry::operator_event::install_global_audit_sink(Arc::clone(
2252 &runtime.inner.audit_log,
2253 ));
2254
2255 runtime
2263 .inner
2264 .lifecycle
2265 .set_restore_started_at_ms(boot_open_start_ms);
2266 runtime
2267 .inner
2268 .lifecycle
2269 .set_restore_ready_at_ms(storage_ready_ms);
2270 runtime
2271 .inner
2272 .lifecycle
2273 .set_wal_replay_started_at_ms(boot_open_start_ms);
2274 runtime
2275 .inner
2276 .lifecycle
2277 .set_wal_replay_ready_at_ms(storage_ready_ms);
2278
2279 let restored_cdc_lsn = runtime
2280 .inner
2281 .db
2282 .replication
2283 .as_ref()
2284 .map(|repl| {
2285 repl.logical_wal_spool
2286 .as_ref()
2287 .map(|spool| spool.current_lsn())
2288 .unwrap_or(0)
2289 })
2290 .unwrap_or(0)
2291 .max(runtime.config_u64("red.config.timeline.last_archived_lsn", 0));
2292 runtime.inner.cdc.set_current_lsn(restored_cdc_lsn);
2293 runtime.rehydrate_snapshot_xid_floor();
2294 runtime.bootstrap_system_keyed_collections()?;
2295 runtime.rehydrate_declared_column_schemas();
2296 runtime.load_probabilistic_state()?;
2297
2298 runtime.rehydrate_tenant_tables();
2302 runtime.rehydrate_materialized_view_descriptors();
2307 if let Some(repl) = &runtime.inner.db.replication {
2308 repl.wal_buffer.set_current_lsn(restored_cdc_lsn);
2309 }
2310
2311 {
2313 let sys = SystemInfo::collect();
2314 runtime.inner.db.store().set_config_tree(
2315 "red.system",
2316 &crate::serde_json::json!({
2317 "pid": sys.pid,
2318 "cpu_cores": sys.cpu_cores,
2319 "total_memory_bytes": sys.total_memory_bytes,
2320 "available_memory_bytes": sys.available_memory_bytes,
2321 "os": sys.os,
2322 "arch": sys.arch,
2323 "hostname": sys.hostname,
2324 "started_at": SystemTime::now()
2325 .duration_since(UNIX_EPOCH)
2326 .unwrap_or_default()
2327 .as_millis() as u64
2328 }),
2329 );
2330
2331 let store = runtime.inner.db.store();
2333 if store
2334 .get_collection("red_config")
2335 .map(|m| m.query_all(|_| true).len())
2336 .unwrap_or(0)
2337 <= 10
2338 {
2339 store.set_config_tree("red.ai", &crate::json!({
2340 "default": crate::json!({
2341 "provider": "openai",
2342 "model": crate::ai::DEFAULT_OPENAI_PROMPT_MODEL
2343 }),
2344 "max_embedding_inputs": 256,
2345 "max_prompt_batch": 256,
2346 "timeout": crate::json!({ "connect_secs": 10, "read_secs": 90, "write_secs": 30 })
2347 }));
2348 store.set_config_tree(
2349 "red.server",
2350 &crate::json!({
2351 "max_scan_limit": 1000,
2352 "max_body_size": 1048576,
2353 "read_timeout_ms": 5000,
2354 "write_timeout_ms": 5000
2355 }),
2356 );
2357 store.set_config_tree(
2358 "red.storage",
2359 &crate::json!({
2360 "page_size": 4096,
2361 "page_cache_capacity": 100000,
2362 "auto_checkpoint_pages": 1000,
2363 "snapshot_retention": 16,
2364 "verify_checksums": true,
2365 "segment": crate::json!({
2366 "max_entities": 100000,
2367 "max_bytes": 268435456_u64,
2368 "compression_level": 6
2369 }),
2370 "hnsw": crate::json!({ "m": 16, "ef_construction": 100, "ef_search": 50 }),
2371 "ivf": crate::json!({ "n_lists": 100, "n_probes": 10 }),
2372 "bm25": crate::json!({ "k1": 1.2, "b": 0.75 })
2373 }),
2374 );
2375 store.set_config_tree(
2376 "red.search",
2377 &crate::json!({
2378 "rag": crate::json!({
2379 "max_chunks_per_source": 10,
2380 "max_total_chunks": 25,
2381 "similarity_threshold": 0.8,
2382 "graph_depth": 2,
2383 "min_relevance": 0.3
2384 }),
2385 "fusion": crate::json!({
2386 "vector_weight": 0.5,
2387 "graph_weight": 0.3,
2388 "table_weight": 0.2,
2389 "dedup_threshold": 0.85
2390 })
2391 }),
2392 );
2393 store.set_config_tree(
2394 "red.auth",
2395 &crate::json!({
2396 "enabled": false,
2397 "session_ttl_secs": 3600,
2398 "require_auth": false
2399 }),
2400 );
2401 store.set_config_tree(
2402 "red.query",
2403 &crate::json!({
2404 "connection_pool": crate::json!({ "max_connections": 64, "max_idle": 16 }),
2405 "max_recursion_depth": 1000
2406 }),
2407 );
2408 store.set_config_tree(
2409 "red.indexes",
2410 &crate::json!({
2411 "auto_select": true,
2412 "bloom_filter": crate::json!({
2413 "enabled": true,
2414 "false_positive_rate": 0.01,
2415 "prune_on_scan": true
2416 }),
2417 "hash": crate::json!({ "enabled": true }),
2418 "bitmap": crate::json!({ "enabled": true, "max_cardinality": 1000 }),
2419 "spatial": crate::json!({ "enabled": true })
2420 }),
2421 );
2422 store.set_config_tree(
2423 "red.memtable",
2424 &crate::json!({
2425 "enabled": true,
2426 "max_bytes": 67108864_u64,
2427 "flush_threshold": 0.75
2428 }),
2429 );
2430 store.set_config_tree(
2431 "red.probabilistic",
2432 &crate::json!({
2433 "hll_registers": 16384,
2434 "sketch_default_width": 1000,
2435 "sketch_default_depth": 5,
2436 "filter_default_capacity": 100000
2437 }),
2438 );
2439 store.set_config_tree(
2440 "red.timeseries",
2441 &crate::json!({
2442 "default_chunk_size": 1024,
2443 "compression": crate::json!({
2444 "timestamps": "delta_of_delta",
2445 "values": "gorilla_xor"
2446 }),
2447 "default_retention_days": 0
2448 }),
2449 );
2450 store.set_config_tree(
2451 "red.queue",
2452 &crate::json!({
2453 "default_max_size": 0,
2454 "default_max_attempts": 3,
2455 "visibility_timeout_ms": 30000,
2456 "consumer_idle_timeout_ms": 60000
2457 }),
2458 );
2459 store.set_config_tree(
2460 "red.backup",
2461 &crate::json!({
2462 "enabled": false,
2463 "interval_secs": 3600,
2464 "retention_count": 24,
2465 "upload": false,
2466 "backend": "local"
2467 }),
2468 );
2469 store.set_config_tree(
2470 "red.wal",
2471 &crate::json!({
2472 "archive": crate::json!({
2473 "enabled": false,
2474 "retention_hours": 168,
2475 "prefix": "wal/"
2476 })
2477 }),
2478 );
2479 store.set_config_tree(
2480 "red.cdc",
2481 &crate::json!({
2482 "enabled": true,
2483 "buffer_size": 100000
2484 }),
2485 );
2486 store.set_config_tree(
2487 "red.config.secret",
2488 &crate::json!({
2489 "auto_encrypt": true,
2490 "auto_decrypt": true
2491 }),
2492 );
2493 }
2494
2495 crate::runtime::config_matrix::heal_critical_keys(store.as_ref());
2502
2503 let lehman_yao = runtime.config_bool("storage.btree.lehman_yao", true);
2510 crate::storage::engine::btree::lehman_yao::set_enabled(lehman_yao);
2511 if lehman_yao {
2512 tracing::info!(
2513 "storage.btree.lehman_yao=true — lock-free concurrent descent enabled"
2514 );
2515 }
2516
2517 let overlay_path = crate::runtime::config_overlay::config_file_path();
2522 let _ =
2523 crate::runtime::config_overlay::apply_config_file(store.as_ref(), &overlay_path);
2524 }
2525
2526 {
2530 let store = runtime.inner.db.store();
2531 for name in crate::application::vcs_collections::ALL {
2532 let _ = store.get_or_create_collection(*name);
2533 }
2534 store.set_config_tree(
2537 crate::application::vcs_collections::CONFIG_NAMESPACE,
2538 &crate::json!({
2539 "default_branch": "main",
2540 "author": crate::json!({
2541 "name": "reddb",
2542 "email": "reddb@localhost"
2543 }),
2544 "protected_branches": crate::json!(["main"]),
2545 "closure": crate::json!({
2546 "enabled": true,
2547 "lazy": true
2548 }),
2549 "merge": crate::json!({
2550 "default_strategy": "auto",
2551 "fast_forward": true
2552 })
2553 }),
2554 );
2555 }
2556
2557 {
2560 let store = runtime.inner.db.store();
2561 for name in crate::application::migration_collections::ALL {
2562 let _ = store.get_or_create_collection(*name);
2563 }
2564 }
2565
2566 {
2581 let weak = Arc::downgrade(&runtime.inner);
2582 std::thread::Builder::new()
2583 .name("reddb-maintenance".into())
2584 .spawn(move || {
2585 let tick = std::time::Duration::from_millis(200);
2586 let work_interval = std::time::Duration::from_secs(60);
2587 let mut last_work = std::time::Instant::now();
2588 loop {
2589 std::thread::sleep(tick);
2590 let Some(inner) = weak.upgrade() else {
2591 break;
2594 };
2595 if last_work.elapsed() >= work_interval {
2596 let _stats = inner.db.store().context_index().stats();
2597 last_work = std::time::Instant::now();
2598 }
2599 }
2600 })
2601 .ok();
2602 }
2603
2604 {
2606 let store = runtime.inner.db.store();
2607 let mut backup_enabled = false;
2608 let mut backup_interval = 3600u64;
2609
2610 if let Some(manager) = store.get_collection("red_config") {
2611 manager.for_each_entity(|entity| {
2612 if let Some(row) = entity.data.as_row() {
2613 let key = row.get_field("key").and_then(|v| match v {
2614 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
2615 _ => None,
2616 });
2617 let val = row.get_field("value");
2618 if key == Some("red.config.backup.enabled") {
2619 backup_enabled = match val {
2620 Some(crate::storage::schema::Value::Boolean(true)) => true,
2621 Some(crate::storage::schema::Value::Text(s)) => &**s == "true",
2622 _ => false,
2623 };
2624 } else if key == Some("red.config.backup.interval_secs") {
2625 if let Some(crate::storage::schema::Value::Integer(n)) = val {
2626 backup_interval = *n as u64;
2627 }
2628 }
2629 }
2630 true
2631 });
2632 }
2633
2634 if backup_enabled {
2635 runtime.inner.backup_scheduler.set_interval(backup_interval);
2636 let rt = runtime.clone();
2637 runtime
2638 .inner
2639 .backup_scheduler
2640 .start(move || rt.trigger_backup().map_err(|e| format!("{}", e)));
2641 }
2642 }
2643
2644 {
2646 runtime
2647 .inner
2648 .ec_registry
2649 .load_from_config_store(runtime.inner.db.store().as_ref());
2650 if !runtime.inner.ec_registry.async_configs().is_empty() {
2651 runtime.inner.ec_worker.start(
2652 Arc::clone(&runtime.inner.ec_registry),
2653 Arc::clone(&runtime.inner.db.store()),
2654 );
2655 }
2656 }
2657
2658 if let crate::replication::ReplicationRole::Replica { primary_addr } =
2659 runtime.inner.db.options().replication.role.clone()
2660 {
2661 let rt = runtime.clone();
2662 std::thread::Builder::new()
2663 .name("reddb-replica".into())
2664 .spawn(move || rt.run_replica_loop(primary_addr))
2665 .ok();
2666 }
2667
2668 runtime.inner.lifecycle.mark_ready();
2673
2674 {
2683 let weak_inner = Arc::downgrade(&runtime.inner);
2684 std::thread::Builder::new()
2685 .name("reddb-mv-scheduler".into())
2686 .spawn(move || loop {
2687 std::thread::sleep(std::time::Duration::from_millis(50));
2688 let Some(inner) = weak_inner.upgrade() else {
2689 break;
2690 };
2691 let rt = RedDBRuntime { inner };
2692 rt.refresh_due_materialized_views();
2693 })
2694 .ok();
2695 }
2696
2697 {
2707 let weak_inner = Arc::downgrade(&runtime.inner);
2708 std::thread::Builder::new()
2709 .name("reddb-retention-sweeper".into())
2710 .spawn(move || loop {
2711 std::thread::sleep(std::time::Duration::from_millis(500));
2712 let Some(inner) = weak_inner.upgrade() else {
2713 break;
2714 };
2715 let rt = RedDBRuntime { inner };
2716 rt.sweep_retention_tick(
2717 crate::runtime::retention_sweeper::DEFAULT_SWEEPER_BATCH,
2718 );
2719 })
2720 .ok();
2721 }
2722
2723 Ok(runtime)
2724 }
2725
2726 fn rehydrate_snapshot_xid_floor(&self) {
2727 let store = self.inner.db.store();
2728 for collection in store.list_collections() {
2729 let Some(manager) = store.get_collection(&collection) else {
2730 continue;
2731 };
2732 for entity in manager.query_all(|_| true) {
2733 self.inner
2734 .snapshot_manager
2735 .observe_committed_xid(entity.xmin);
2736 self.inner
2737 .snapshot_manager
2738 .observe_committed_xid(entity.xmax);
2739 }
2740 }
2741 }
2742
2743 pub(crate) fn ensure_materialized_view_backing(&self, name: &str) -> RedDBResult<()> {
2756 let store = self.inner.db.store();
2757 let mut changed = false;
2758 if store.get_collection(name).is_none() {
2759 store.get_or_create_collection(name);
2760 changed = true;
2761 }
2762 if self.inner.db.collection_contract(name).is_none() {
2763 self.inner
2764 .db
2765 .save_collection_contract(system_keyed_collection_contract(
2766 name,
2767 crate::catalog::CollectionModel::Table,
2768 ))
2769 .map_err(|err| RedDBError::Internal(err.to_string()))?;
2770 changed = true;
2771 }
2772 if changed {
2773 self.inner
2774 .db
2775 .persist_metadata()
2776 .map_err(|err| RedDBError::Internal(err.to_string()))?;
2777 }
2778 Ok(())
2779 }
2780
2781 pub(crate) fn drop_materialized_view_backing(&self, name: &str) -> RedDBResult<()> {
2786 let store = self.inner.db.store();
2787 if store.get_collection(name).is_none() {
2788 return Ok(());
2789 }
2790 store
2791 .drop_collection(name)
2792 .map_err(|err| RedDBError::Internal(err.to_string()))?;
2793 if self.inner.db.collection_contract(name).is_some() {
2796 self.inner
2797 .db
2798 .remove_collection_contract(name)
2799 .map_err(|err| RedDBError::Internal(err.to_string()))?;
2800 }
2801 self.invalidate_result_cache();
2802 self.inner
2803 .db
2804 .persist_metadata()
2805 .map_err(|err| RedDBError::Internal(err.to_string()))?;
2806 Ok(())
2807 }
2808
2809 fn bootstrap_system_keyed_collections(&self) -> RedDBResult<()> {
2810 let mut changed = false;
2811 for (name, model) in [
2812 ("red.config", crate::catalog::CollectionModel::Config),
2813 ("red.vault", crate::catalog::CollectionModel::Vault),
2814 (
2818 crate::runtime::continuous_materialized_view::CATALOG_COLLECTION,
2819 crate::catalog::CollectionModel::Config,
2820 ),
2821 ] {
2822 if self.inner.db.store().get_collection(name).is_none() {
2823 self.inner.db.store().get_or_create_collection(name);
2824 changed = true;
2825 }
2826 if self.inner.db.collection_contract(name).is_none() {
2827 self.inner
2828 .db
2829 .save_collection_contract(system_keyed_collection_contract(name, model))
2830 .map_err(|err| RedDBError::Internal(err.to_string()))?;
2831 changed = true;
2832 }
2833 }
2834 if changed {
2835 self.inner
2836 .db
2837 .persist_metadata()
2838 .map_err(|err| RedDBError::Internal(err.to_string()))?;
2839 }
2840 Ok(())
2841 }
2842
2843 pub fn db(&self) -> Arc<RedDB> {
2844 Arc::clone(&self.inner.db)
2845 }
2846
2847 pub fn index_store_ref(&self) -> &super::index_store::IndexStore {
2852 &self.inner.index_store
2853 }
2854
2855 pub(crate) fn schema_vocabulary_apply(
2860 &self,
2861 event: crate::runtime::schema_vocabulary::DdlEvent,
2862 ) {
2863 self.inner.schema_vocabulary.write().on_ddl(event);
2864 }
2865
2866 pub fn schema_vocabulary_lookup(
2871 &self,
2872 token: &str,
2873 ) -> Vec<crate::runtime::schema_vocabulary::VocabHit> {
2874 self.inner.schema_vocabulary.read().lookup(token).to_vec()
2875 }
2876
2877 pub fn set_auth_store(&self, store: Arc<crate::auth::store::AuthStore>) {
2881 *self.inner.auth_store.write() = Some(store);
2882 }
2883
2884 pub fn auth_store(&self) -> Option<Arc<crate::auth::store::AuthStore>> {
2887 self.inner.auth_store.read().clone()
2888 }
2889
2890 pub fn vault_kv_get(&self, key: &str) -> Option<String> {
2892 self.inner
2893 .auth_store
2894 .read()
2895 .as_ref()
2896 .and_then(|store| store.vault_kv_get(key))
2897 }
2898
2899 pub fn vault_kv_try_set(&self, key: String, value: String) -> RedDBResult<()> {
2902 let store = self.inner.auth_store.read().clone().ok_or_else(|| {
2903 RedDBError::Query("secret storage requires an enabled, unsealed vault".to_string())
2904 })?;
2905 store
2906 .vault_kv_try_set(key, value)
2907 .map_err(|err| RedDBError::Query(err.to_string()))
2908 }
2909
2910 pub fn set_oauth_validator(&self, validator: Option<Arc<crate::auth::oauth::OAuthValidator>>) {
2914 *self.inner.oauth_validator.write() = validator;
2915 }
2916
2917 pub fn oauth_validator(&self) -> Option<Arc<crate::auth::oauth::OAuthValidator>> {
2921 self.inner.oauth_validator.read().clone()
2922 }
2923
2924 pub(crate) fn secret_aes_key(&self) -> Option<[u8; 32]> {
2928 let guard = self.inner.auth_store.read();
2929 guard.as_ref().and_then(|s| s.vault_secret_key())
2930 }
2931
2932 pub(crate) fn config_bool(&self, key: &str, default: bool) -> bool {
2938 if let Some(raw) = self.inner.env_config_overrides.get(key) {
2939 if let Some(crate::storage::schema::Value::Boolean(b)) =
2940 crate::runtime::config_overlay::coerce_env_value(key, raw)
2941 {
2942 return b;
2943 }
2944 }
2945 let store = self.inner.db.store();
2946 let Some(manager) = store.get_collection("red_config") else {
2947 return default;
2948 };
2949 let mut result = default;
2950 let mut latest_id: u64 = 0;
2951 manager.for_each_entity(|entity| {
2952 if let Some(row) = entity.data.as_row() {
2953 let entry_key = row.get_field("key").and_then(|v| match v {
2954 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
2955 _ => None,
2956 });
2957 if entry_key == Some(key) {
2958 let id = entity.id.raw();
2959 if id >= latest_id {
2960 latest_id = id;
2961 result = match row.get_field("value") {
2962 Some(crate::storage::schema::Value::Boolean(b)) => *b,
2963 Some(crate::storage::schema::Value::Text(s)) => {
2964 matches!(s.as_ref(), "true" | "TRUE" | "True" | "1")
2965 }
2966 Some(crate::storage::schema::Value::Integer(n)) => *n != 0,
2967 _ => default,
2968 };
2969 }
2970 }
2971 }
2972 true
2973 });
2974 result
2975 }
2976
2977 pub(crate) fn config_u64(&self, key: &str, default: u64) -> u64 {
2978 if let Some(raw) = self.inner.env_config_overrides.get(key) {
2979 if let Some(crate::storage::schema::Value::UnsignedInteger(n)) =
2980 crate::runtime::config_overlay::coerce_env_value(key, raw)
2981 {
2982 return n;
2983 }
2984 }
2985 let store = self.inner.db.store();
2986 let Some(manager) = store.get_collection("red_config") else {
2987 return default;
2988 };
2989 let mut result = default;
2990 let mut latest_id: u64 = 0;
2991 manager.for_each_entity(|entity| {
2992 if let Some(row) = entity.data.as_row() {
2993 let entry_key = row.get_field("key").and_then(|v| match v {
2994 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
2995 _ => None,
2996 });
2997 if entry_key == Some(key) {
2998 let id = entity.id.raw();
2999 if id >= latest_id {
3000 latest_id = id;
3001 result = match row.get_field("value") {
3002 Some(crate::storage::schema::Value::Integer(n)) => *n as u64,
3003 Some(crate::storage::schema::Value::UnsignedInteger(n)) => *n,
3004 Some(crate::storage::schema::Value::Text(s)) => {
3005 s.parse::<u64>().unwrap_or(default)
3006 }
3007 _ => default,
3008 };
3009 }
3010 }
3011 }
3012 true
3013 });
3014 result
3015 }
3016
3017 pub(crate) fn config_f64(&self, key: &str, default: f64) -> f64 {
3018 if let Some(raw) = self.inner.env_config_overrides.get(key) {
3019 if let Ok(n) = raw.parse::<f64>() {
3020 return n;
3021 }
3022 }
3023 let store = self.inner.db.store();
3024 let Some(manager) = store.get_collection("red_config") else {
3025 return default;
3026 };
3027 let mut result = default;
3028 let mut latest_id: u64 = 0;
3029 manager.for_each_entity(|entity| {
3030 if let Some(row) = entity.data.as_row() {
3031 let entry_key = row.get_field("key").and_then(|v| match v {
3032 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
3033 _ => None,
3034 });
3035 if entry_key == Some(key) {
3036 let id = entity.id.raw();
3037 if id >= latest_id {
3038 latest_id = id;
3039 result = match row.get_field("value") {
3040 Some(crate::storage::schema::Value::Float(n)) => *n,
3041 Some(crate::storage::schema::Value::Integer(n)) => *n as f64,
3042 Some(crate::storage::schema::Value::UnsignedInteger(n)) => *n as f64,
3043 Some(crate::storage::schema::Value::Text(s)) => {
3044 s.parse::<f64>().unwrap_or(default)
3045 }
3046 _ => default,
3047 };
3048 }
3049 }
3050 }
3051 true
3052 });
3053 result
3054 }
3055
3056 pub(crate) fn config_string(&self, key: &str, default: &str) -> String {
3057 if let Some(raw) = self.inner.env_config_overrides.get(key) {
3058 return raw.clone();
3059 }
3060 let store = self.inner.db.store();
3061 let Some(manager) = store.get_collection("red_config") else {
3062 return default.to_string();
3063 };
3064 let mut result = default.to_string();
3065 let mut latest_id: u64 = 0;
3066 manager.for_each_entity(|entity| {
3067 if let Some(row) = entity.data.as_row() {
3068 let entry_key = row.get_field("key").and_then(|v| match v {
3069 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
3070 _ => None,
3071 });
3072 if entry_key == Some(key) {
3073 let id = entity.id.raw();
3074 if id >= latest_id {
3075 latest_id = id;
3076 if let Some(crate::storage::schema::Value::Text(value)) =
3077 row.get_field("value")
3078 {
3079 result = value.to_string();
3080 }
3081 }
3082 }
3083 }
3084 true
3085 });
3086 result
3087 }
3088
3089 fn latest_metadata_for(
3090 &self,
3091 collection: &str,
3092 entity_id: u64,
3093 ) -> Option<crate::serde_json::Value> {
3094 self.inner
3095 .db
3096 .store()
3097 .get_metadata(collection, EntityId::new(entity_id))
3098 .map(|metadata| metadata_to_json(&metadata))
3099 }
3100
3101 fn persist_replica_lsn(&self, lsn: u64) {
3102 self.inner.db.store().set_config_tree(
3103 "red.replication",
3104 &crate::json!({
3105 "last_applied_lsn": lsn
3106 }),
3107 );
3108 }
3109
3110 fn persist_replication_health(
3111 &self,
3112 state: &str,
3113 last_error: &str,
3114 primary_lsn: Option<u64>,
3115 oldest_available_lsn: Option<u64>,
3116 ) {
3117 self.inner.db.store().set_config_tree(
3118 "red.replication",
3119 &crate::json!({
3120 "state": state,
3121 "last_error": last_error,
3122 "last_seen_primary_lsn": primary_lsn.unwrap_or(0),
3123 "last_seen_oldest_lsn": oldest_available_lsn.unwrap_or(0),
3124 "updated_at_unix_ms": SystemTime::now()
3125 .duration_since(UNIX_EPOCH)
3126 .unwrap_or_default()
3127 .as_millis() as u64
3128 }),
3129 );
3130 }
3131
3132 pub(crate) fn secret_auto_encrypt(&self) -> bool {
3135 self.config_bool("red.config.secret.auto_encrypt", true)
3136 }
3137
3138 pub(crate) fn secret_auto_decrypt(&self) -> bool {
3143 self.config_bool("red.config.secret.auto_decrypt", true)
3144 }
3145
3146 pub(crate) fn apply_secret_decryption(&self, result: &mut RuntimeQueryResult) {
3153 if !self.secret_auto_decrypt() {
3154 return;
3155 }
3156 let Some(key) = self.secret_aes_key() else {
3157 return;
3158 };
3159 for record in result.result.records.iter_mut() {
3160 for value in record.values_mut() {
3161 if let Value::Secret(ref bytes) = value {
3162 if let Some(plain) =
3163 super::impl_dml::decrypt_secret_payload(&key, bytes.as_slice())
3164 {
3165 if let Ok(text) = String::from_utf8(plain) {
3166 *value = Value::text(text);
3167 }
3168 }
3169 }
3170 }
3171 }
3172 }
3173
3174 pub(crate) fn mutation_engine(&self) -> crate::runtime::mutation::MutationEngine<'_> {
3182 crate::runtime::mutation::MutationEngine::new(self)
3183 }
3184
3185 pub fn check_write(&self, kind: crate::runtime::write_gate::WriteKind) -> RedDBResult<()> {
3196 self.inner.write_gate.check(kind)
3197 }
3198
3199 pub fn write_gate(&self) -> &crate::runtime::write_gate::WriteGate {
3203 &self.inner.write_gate
3204 }
3205
3206 pub fn lifecycle(&self) -> &crate::runtime::lifecycle::Lifecycle {
3210 &self.inner.lifecycle
3211 }
3212
3213 pub fn resource_limits(&self) -> &crate::runtime::resource_limits::ResourceLimits {
3215 &self.inner.resource_limits
3216 }
3217
3218 pub fn audit_log(&self) -> &crate::runtime::audit_log::AuditLogger {
3220 &self.inner.audit_log
3221 }
3222
3223 pub fn audit_log_arc(&self) -> Arc<crate::runtime::audit_log::AuditLogger> {
3227 Arc::clone(&self.inner.audit_log)
3228 }
3229
3230 pub(crate) fn queue_telemetry(
3234 &self,
3235 ) -> &crate::runtime::queue_telemetry::QueueTelemetryCounters {
3236 &self.inner.queue_telemetry
3237 }
3238
3239 pub fn queue_telemetry_snapshot(
3242 &self,
3243 ) -> crate::runtime::queue_telemetry::QueueTelemetrySnapshot {
3244 crate::runtime::queue_telemetry::QueueTelemetrySnapshot {
3245 delivered: self.inner.queue_telemetry.delivered_snapshot(),
3246 acked: self.inner.queue_telemetry.acked_snapshot(),
3247 nacked: self.inner.queue_telemetry.nacked_snapshot(),
3248 }
3249 }
3250
3251 pub fn queue_pending_counts(&self) -> Vec<((String, String), u64)> {
3256 let store = self.inner.db.store();
3257 crate::runtime::impl_queue::pending_counts_by_group(store.as_ref())
3258 .into_iter()
3259 .collect()
3260 }
3261
3262 pub fn write_gate_arc(&self) -> Arc<crate::runtime::write_gate::WriteGate> {
3267 Arc::clone(&self.inner.write_gate)
3268 }
3269
3270 pub fn lease_lifecycle(&self) -> Option<&Arc<crate::runtime::lease_lifecycle::LeaseLifecycle>> {
3273 self.inner.lease_lifecycle.get()
3274 }
3275
3276 pub fn set_lease_lifecycle(
3279 &self,
3280 lifecycle: Arc<crate::runtime::lease_lifecycle::LeaseLifecycle>,
3281 ) -> Result<(), Arc<crate::runtime::lease_lifecycle::LeaseLifecycle>> {
3282 self.inner.lease_lifecycle.set(lifecycle)
3283 }
3284
3285 pub fn check_batch_size(&self, requested: usize) -> RedDBResult<()> {
3290 if self.inner.resource_limits.batch_size_exceeded(requested) {
3291 let max = self.inner.resource_limits.max_batch_size.unwrap_or(0);
3292 return Err(RedDBError::QuotaExceeded(format!(
3293 "max_batch_size:{requested}:{max}"
3294 )));
3295 }
3296 Ok(())
3297 }
3298
3299 pub fn check_db_size(&self) -> RedDBResult<()> {
3305 let Some(limit) = self.inner.resource_limits.max_db_size_bytes else {
3306 return Ok(());
3307 };
3308 if limit == 0 {
3309 return Ok(());
3310 }
3311 let Some(path) = self.inner.db.path() else {
3312 return Ok(());
3313 };
3314 let current = std::fs::metadata(path).map(|m| m.len()).unwrap_or(0);
3315 if current > limit {
3316 return Err(RedDBError::QuotaExceeded(format!(
3317 "max_db_size_bytes:{current}:{limit}"
3318 )));
3319 }
3320 Ok(())
3321 }
3322
3323 pub fn graceful_shutdown(
3341 &self,
3342 backup_on_shutdown: bool,
3343 ) -> RedDBResult<crate::runtime::lifecycle::ShutdownReport> {
3344 if !self.inner.lifecycle.begin_shutdown() {
3345 return Ok(self.inner.lifecycle.shutdown_report().unwrap_or_default());
3349 }
3350
3351 let started_ms = std::time::SystemTime::now()
3352 .duration_since(std::time::UNIX_EPOCH)
3353 .map(|d| d.as_millis() as u64)
3354 .unwrap_or(0);
3355 let mut report = crate::runtime::lifecycle::ShutdownReport {
3356 started_at_ms: started_ms,
3357 ..Default::default()
3358 };
3359
3360 let flush_res = self.inner.db.flush_local_only();
3366 report.flushed_wal = flush_res.is_ok();
3367 report.final_checkpoint = flush_res.is_ok();
3368 if let Err(err) = &flush_res {
3369 tracing::error!(
3370 target: "reddb::lifecycle",
3371 error = %err,
3372 "graceful_shutdown: local flush failed"
3373 );
3374 } else if let Err(lease_err) =
3375 self.assert_remote_write_allowed("shutdown/checkpoint_upload")
3376 {
3377 tracing::warn!(
3378 target: "reddb::serverless::lease",
3379 error = %lease_err,
3380 "graceful_shutdown: remote upload skipped — lease not held"
3381 );
3382 } else if let Err(err) = self.inner.db.upload_to_remote_backend() {
3383 tracing::error!(
3384 target: "reddb::lifecycle",
3385 error = %err,
3386 "graceful_shutdown: remote upload failed"
3387 );
3388 }
3389
3390 if backup_on_shutdown && self.inner.db.remote_backend.is_some() {
3395 match self.trigger_backup() {
3401 Ok(result) => {
3402 report.backup_uploaded = result.uploaded;
3403 }
3404 Err(err) => {
3405 tracing::warn!(
3406 target: "reddb::lifecycle",
3407 error = %err,
3408 "graceful_shutdown: final backup skipped"
3409 );
3410 }
3411 }
3412 }
3413
3414 let completed_ms = std::time::SystemTime::now()
3415 .duration_since(std::time::UNIX_EPOCH)
3416 .map(|d| d.as_millis() as u64)
3417 .unwrap_or(started_ms);
3418 report.completed_at_ms = completed_ms;
3419 report.duration_ms = completed_ms.saturating_sub(started_ms);
3420
3421 self.inner.lifecycle.finish_shutdown(report.clone());
3422 Ok(report)
3423 }
3424
3425 pub(crate) fn cdc_emit_no_cache_invalidate(
3431 &self,
3432 operation: crate::replication::cdc::ChangeOperation,
3433 collection: &str,
3434 entity_id: u64,
3435 entity_kind: &str,
3436 ) -> u64 {
3437 let lsn = self
3438 .inner
3439 .cdc
3440 .emit(operation, collection, entity_id, entity_kind);
3441
3442 if let Some(ref primary) = self.inner.db.replication {
3444 let store = self.inner.db.store();
3445 let entity = if operation == crate::replication::cdc::ChangeOperation::Delete {
3446 None
3447 } else {
3448 store.get(collection, EntityId::new(entity_id))
3449 };
3450 let record = ChangeRecord {
3451 lsn,
3452 timestamp: SystemTime::now()
3453 .duration_since(UNIX_EPOCH)
3454 .unwrap_or_default()
3455 .as_millis() as u64,
3456 operation,
3457 collection: collection.to_string(),
3458 entity_id,
3459 entity_kind: entity_kind.to_string(),
3460 entity_bytes: entity
3461 .as_ref()
3462 .map(|e| UnifiedStore::serialize_entity(e, store.format_version())),
3463 metadata: self.latest_metadata_for(collection, entity_id),
3464 refresh_records: None,
3465 };
3466 let encoded = record.encode();
3467 primary.wal_buffer.append(record.lsn, encoded.clone());
3468 if let Some(spool) = &primary.logical_wal_spool {
3469 let _ = spool.append(record.lsn, &encoded);
3470 }
3471 }
3472 lsn
3473 }
3474
3475 pub(crate) fn cdc_emit_insert_batch_no_cache_invalidate(
3476 &self,
3477 collection: &str,
3478 ids: &[EntityId],
3479 entity_kind: &str,
3480 ) -> Vec<u64> {
3481 if ids.is_empty() {
3482 return Vec::new();
3483 }
3484
3485 if self.inner.db.replication.is_none() {
3489 return self.inner.cdc.emit_batch_same_collection(
3490 crate::replication::cdc::ChangeOperation::Insert,
3491 collection,
3492 entity_kind,
3493 ids.iter().map(|id| id.raw()),
3494 );
3495 }
3496
3497 ids.iter()
3500 .map(|id| {
3501 self.cdc_emit_no_cache_invalidate(
3502 crate::replication::cdc::ChangeOperation::Insert,
3503 collection,
3504 id.raw(),
3505 entity_kind,
3506 )
3507 })
3508 .collect()
3509 }
3510
3511 pub fn cdc_emit(
3512 &self,
3513 operation: crate::replication::cdc::ChangeOperation,
3514 collection: &str,
3515 entity_id: u64,
3516 entity_kind: &str,
3517 ) -> u64 {
3518 let lsn = self
3519 .inner
3520 .cdc
3521 .emit(operation, collection, entity_id, entity_kind);
3522 self.invalidate_result_cache_for_table(collection);
3528
3529 if let Some(ref primary) = self.inner.db.replication {
3531 let store = self.inner.db.store();
3532 let entity = if operation == crate::replication::cdc::ChangeOperation::Delete {
3533 None
3534 } else {
3535 store.get(collection, EntityId::new(entity_id))
3536 };
3537 let record = ChangeRecord {
3538 lsn,
3539 timestamp: SystemTime::now()
3540 .duration_since(UNIX_EPOCH)
3541 .unwrap_or_default()
3542 .as_millis() as u64,
3543 operation,
3544 collection: collection.to_string(),
3545 entity_id,
3546 entity_kind: entity_kind.to_string(),
3547 entity_bytes: entity
3548 .as_ref()
3549 .map(|entity| UnifiedStore::serialize_entity(entity, store.format_version())),
3550 metadata: self.latest_metadata_for(collection, entity_id),
3551 refresh_records: None,
3552 };
3553 let encoded = record.encode();
3554 primary.wal_buffer.append(record.lsn, encoded.clone());
3555 if let Some(spool) = &primary.logical_wal_spool {
3556 let _ = spool.append(record.lsn, &encoded);
3557 }
3558 }
3559 lsn
3560 }
3561
3562 pub(crate) fn cdc_emit_kv(
3563 &self,
3564 operation: crate::replication::cdc::ChangeOperation,
3565 collection: &str,
3566 key: &str,
3567 entity_id: u64,
3568 before: Option<crate::json::Value>,
3569 after: Option<crate::json::Value>,
3570 ) -> u64 {
3571 let lsn = self
3572 .inner
3573 .cdc
3574 .emit_kv(operation, collection, key, entity_id, before, after);
3575 self.inner.kv_stats.incr_watch_events_emitted();
3576 self.invalidate_result_cache_for_table(collection);
3577 lsn
3578 }
3579
3580 pub(crate) fn record_kv_watch_event(
3581 &self,
3582 operation: crate::replication::cdc::ChangeOperation,
3583 collection: &str,
3584 key: &str,
3585 entity_id: u64,
3586 before: Option<crate::json::Value>,
3587 after: Option<crate::json::Value>,
3588 ) {
3589 if self.current_xid().is_some() {
3590 let conn_id = current_connection_id();
3591 let event = crate::replication::cdc::KvWatchEvent {
3592 collection: collection.to_string(),
3593 key: key.to_string(),
3594 op: operation,
3595 before,
3596 after,
3597 lsn: 0,
3598 committed_at: 0,
3599 dropped_event_count: 0,
3600 };
3601 self.inner
3602 .pending_kv_watch_events
3603 .write()
3604 .entry(conn_id)
3605 .or_default()
3606 .push(event);
3607 return;
3608 }
3609
3610 self.cdc_emit_kv(operation, collection, key, entity_id, before, after);
3611 }
3612
3613 pub(crate) fn cdc_emit_prebuilt(
3614 &self,
3615 operation: crate::replication::cdc::ChangeOperation,
3616 collection: &str,
3617 entity: &UnifiedEntity,
3618 entity_kind: &str,
3619 metadata: Option<&crate::storage::Metadata>,
3620 invalidate_cache: bool,
3621 ) -> u64 {
3622 self.cdc_emit_prebuilt_with_columns(
3623 operation,
3624 collection,
3625 entity,
3626 entity_kind,
3627 metadata,
3628 invalidate_cache,
3629 None,
3630 )
3631 }
3632
3633 pub(crate) fn cdc_emit_prebuilt_with_columns(
3640 &self,
3641 operation: crate::replication::cdc::ChangeOperation,
3642 collection: &str,
3643 entity: &UnifiedEntity,
3644 entity_kind: &str,
3645 metadata: Option<&crate::storage::Metadata>,
3646 invalidate_cache: bool,
3647 changed_columns: Option<Vec<String>>,
3648 ) -> u64 {
3649 if invalidate_cache {
3650 self.invalidate_result_cache();
3651 }
3652
3653 let public_id = entity.logical_id().raw();
3654 let lsn = self.inner.cdc.emit_with_columns(
3655 operation,
3656 collection,
3657 public_id,
3658 entity_kind,
3659 changed_columns,
3660 );
3661
3662 if let Some(ref primary) = self.inner.db.replication {
3663 let store = self.inner.db.store();
3664 let record = ChangeRecord {
3665 lsn,
3666 timestamp: SystemTime::now()
3667 .duration_since(UNIX_EPOCH)
3668 .unwrap_or_default()
3669 .as_millis() as u64,
3670 operation,
3671 collection: collection.to_string(),
3672 entity_id: entity.id.raw(),
3673 entity_kind: entity_kind.to_string(),
3674 entity_bytes: Some(UnifiedStore::serialize_entity(
3675 entity,
3676 store.format_version(),
3677 )),
3678 metadata: metadata
3679 .map(metadata_to_json)
3680 .or_else(|| self.latest_metadata_for(collection, entity.id.raw())),
3681 refresh_records: None,
3682 };
3683 let encoded = record.encode();
3684 primary.wal_buffer.append(record.lsn, encoded.clone());
3685 if let Some(spool) = &primary.logical_wal_spool {
3686 let _ = spool.append(record.lsn, &encoded);
3687 }
3688 }
3689
3690 lsn
3691 }
3692
3693 pub(crate) fn cdc_emit_prebuilt_batch<'a, I>(
3694 &self,
3695 operation: crate::replication::cdc::ChangeOperation,
3696 entity_kind: &str,
3697 items: I,
3698 invalidate_cache: bool,
3699 ) where
3700 I: IntoIterator<
3701 Item = (
3702 &'a str,
3703 &'a UnifiedEntity,
3704 Option<&'a crate::storage::Metadata>,
3705 ),
3706 >,
3707 {
3708 let items: Vec<(&str, &UnifiedEntity, Option<&crate::storage::Metadata>)> =
3709 items.into_iter().collect();
3710 if items.is_empty() {
3711 return;
3712 }
3713
3714 if invalidate_cache {
3715 self.invalidate_result_cache();
3716 }
3717
3718 for (collection, entity, metadata) in items {
3719 self.cdc_emit_prebuilt(operation, collection, entity, entity_kind, metadata, false);
3720 }
3721 }
3722
3723 fn run_replica_loop(&self, primary_addr: String) {
3724 let endpoint = if primary_addr.starts_with("http") {
3725 primary_addr
3726 } else {
3727 format!("http://{primary_addr}")
3728 };
3729 let poll_ms = self.inner.db.options().replication.poll_interval_ms;
3730 let max_count = self.inner.db.options().replication.max_batch_size;
3731 let mut since_lsn = self.config_u64("red.replication.last_applied_lsn", 0);
3732
3733 let runtime = match tokio::runtime::Builder::new_current_thread()
3734 .enable_all()
3735 .build()
3736 {
3737 Ok(runtime) => runtime,
3738 Err(_) => return,
3739 };
3740
3741 runtime.block_on(async move {
3742 use crate::grpc::proto::red_db_client::RedDbClient;
3743 use crate::grpc::proto::JsonPayloadRequest;
3744
3745 let mut client = loop {
3746 match RedDbClient::connect(endpoint.clone()).await {
3747 Ok(client) => {
3748 self.persist_replication_health("connecting", "", None, None);
3749 break client;
3750 }
3751 Err(_) => {
3752 self.persist_replication_health(
3753 "connecting",
3754 "waiting for primary connection",
3755 None,
3756 None,
3757 );
3758 std::thread::sleep(std::time::Duration::from_millis(poll_ms.max(250)))
3759 }
3760 }
3761 };
3762
3763 let applier = crate::replication::logical::LogicalChangeApplier::new(since_lsn);
3768
3769 loop {
3770 let payload = crate::json!({
3771 "since_lsn": since_lsn,
3772 "max_count": max_count
3773 });
3774 let request = tonic::Request::new(JsonPayloadRequest {
3775 payload_json: crate::json::to_string(&payload)
3776 .unwrap_or_else(|_| "{}".to_string()),
3777 });
3778
3779 if let Ok(response) = client.pull_wal_records(request).await {
3780 if let Ok(value) =
3781 crate::json::from_str::<crate::json::Value>(&response.into_inner().payload)
3782 {
3783 let current_lsn =
3784 value.get("current_lsn").and_then(crate::json::Value::as_u64);
3785 let oldest_available_lsn = value
3786 .get("oldest_available_lsn")
3787 .and_then(crate::json::Value::as_u64);
3788 if since_lsn > 0
3789 && oldest_available_lsn
3790 .map(|oldest| oldest > since_lsn.saturating_add(1))
3791 .unwrap_or(false)
3792 {
3793 self.persist_replication_health(
3794 "stalled_gap",
3795 "replica is behind the oldest logical WAL available on primary; re-bootstrap required",
3796 current_lsn,
3797 oldest_available_lsn,
3798 );
3799 std::thread::sleep(std::time::Duration::from_millis(poll_ms.max(250)));
3800 continue;
3801 }
3802 if let Some(records) =
3803 value.get("records").and_then(crate::json::Value::as_array)
3804 {
3805 for record in records {
3806 let Some(data_hex) =
3807 record.get("data").and_then(crate::json::Value::as_str)
3808 else {
3809 continue;
3810 };
3811 let Ok(data) = hex::decode(data_hex) else {
3812 self.inner.replica_apply_metrics.record(
3813 crate::replication::logical::ApplyErrorKind::Decode,
3814 );
3815 self.persist_replication_health(
3816 "apply_error",
3817 "failed to decode WAL record hex payload",
3818 current_lsn,
3819 oldest_available_lsn,
3820 );
3821 continue;
3822 };
3823 let Ok(change) = ChangeRecord::decode(&data) else {
3824 self.inner.replica_apply_metrics.record(
3825 crate::replication::logical::ApplyErrorKind::Decode,
3826 );
3827 self.persist_replication_health(
3828 "apply_error",
3829 "failed to decode logical WAL record",
3830 current_lsn,
3831 oldest_available_lsn,
3832 );
3833 continue;
3834 };
3835 match applier.apply(
3836 self.inner.db.as_ref(),
3837 &change,
3838 ApplyMode::Replica,
3839 ) {
3840 Ok(crate::replication::logical::ApplyOutcome::Applied) => {
3841 self.invalidate_result_cache_for_table(&change.collection);
3842 since_lsn = since_lsn.max(change.lsn);
3843 self.persist_replica_lsn(since_lsn);
3844 }
3845 Ok(_) => {
3846 }
3848 Err(err) => {
3849 self.inner.replica_apply_metrics.record(err.kind());
3850 match &err {
3859 crate::replication::logical::LogicalApplyError::Divergence { lsn, expected: _, got: _ } => {
3860 crate::telemetry::operator_event::OperatorEvent::Divergence {
3861 peer: "primary".to_string(),
3862 leader_lsn: *lsn,
3863 follower_lsn: since_lsn,
3864 }
3865 .emit_global();
3866 }
3867 crate::replication::logical::LogicalApplyError::Gap { last, next } => {
3868 crate::telemetry::operator_event::OperatorEvent::ReplicationBroken {
3869 peer: "primary".to_string(),
3870 reason: format!("stalled gap last={last} next={next}"),
3871 }
3872 .emit_global();
3873 }
3874 _ => {}
3875 }
3876 let kind = match &err {
3877 crate::replication::logical::LogicalApplyError::Gap { .. } => "stalled_gap",
3878 crate::replication::logical::LogicalApplyError::Divergence { .. } => "divergence",
3879 _ => "apply_error",
3880 };
3881 self.persist_replication_health(
3882 kind,
3883 &format!("replica apply rejected: {err}"),
3884 current_lsn,
3885 oldest_available_lsn,
3886 );
3887 break;
3898 }
3899 }
3900 }
3901 }
3902 self.persist_replication_health(
3903 "healthy",
3904 "",
3905 current_lsn,
3906 oldest_available_lsn,
3907 );
3908 } else {
3909 self.persist_replication_health(
3910 "apply_error",
3911 "failed to parse pull_wal_records response",
3912 None,
3913 None,
3914 );
3915 }
3916 } else {
3917 self.persist_replication_health(
3918 "connecting",
3919 "primary pull_wal_records request failed",
3920 None,
3921 None,
3922 );
3923 }
3924
3925 std::thread::sleep(std::time::Duration::from_millis(poll_ms));
3926 }
3927 });
3928 }
3929
3930 pub fn cdc_poll(
3932 &self,
3933 since_lsn: u64,
3934 max_count: usize,
3935 ) -> Vec<crate::replication::cdc::ChangeEvent> {
3936 self.inner.cdc.poll(since_lsn, max_count)
3937 }
3938
3939 pub fn cdc_current_lsn(&self) -> u64 {
3943 self.inner.cdc.current_lsn()
3944 }
3945
3946 pub fn kv_watch_events_since(
3947 &self,
3948 collection: &str,
3949 key: &str,
3950 since_lsn: u64,
3951 max_count: usize,
3952 ) -> Vec<crate::replication::cdc::KvWatchEvent> {
3953 self.inner
3954 .cdc
3955 .poll(since_lsn, max_count)
3956 .into_iter()
3957 .filter_map(|event| event.kv)
3958 .filter(|event| event.collection == collection && event.key == key)
3959 .collect()
3960 }
3961
3962 pub fn kv_watch_events_since_prefix(
3963 &self,
3964 collection: &str,
3965 prefix: &str,
3966 since_lsn: u64,
3967 max_count: usize,
3968 ) -> Vec<crate::replication::cdc::KvWatchEvent> {
3969 self.inner
3970 .cdc
3971 .poll(since_lsn, max_count)
3972 .into_iter()
3973 .filter_map(|event| event.kv)
3974 .filter(|event| event.collection == collection && event.key.starts_with(prefix))
3975 .collect()
3976 }
3977
3978 pub(crate) fn kv_watch_subscribe<'a>(
3979 &'a self,
3980 collection: impl Into<String>,
3981 key: impl Into<String>,
3982 from_lsn: Option<u64>,
3983 ) -> crate::runtime::kv_watch::KvWatchStream<'a> {
3984 crate::runtime::kv_watch::KvWatchStream::subscribe(
3985 &self.inner.cdc,
3986 &self.inner.kv_stats,
3987 collection,
3988 key,
3989 from_lsn,
3990 self.kv_watch_idle_timeout_ms(),
3991 )
3992 }
3993
3994 pub(crate) fn kv_watch_subscribe_prefix<'a>(
3995 &'a self,
3996 collection: impl Into<String>,
3997 prefix: impl Into<String>,
3998 from_lsn: Option<u64>,
3999 ) -> crate::runtime::kv_watch::KvWatchStream<'a> {
4000 crate::runtime::kv_watch::KvWatchStream::subscribe_prefix(
4001 &self.inner.cdc,
4002 &self.inner.kv_stats,
4003 collection,
4004 prefix,
4005 from_lsn,
4006 self.kv_watch_idle_timeout_ms(),
4007 )
4008 }
4009
4010 pub(crate) fn kv_watch_idle_timeout_ms(&self) -> u64 {
4011 self.config_u64("red.config.kv.watch.idle_timeout_ms", 60_000)
4012 }
4013
4014 pub fn backup_status(&self) -> crate::replication::scheduler::BackupStatus {
4016 self.inner.backup_scheduler.status()
4017 }
4018
4019 pub fn result_blob_cache(&self) -> &crate::storage::cache::BlobCache {
4029 &self.inner.result_blob_cache
4030 }
4031
4032 pub fn primary_replica_snapshots(&self) -> Vec<crate::replication::primary::ReplicaState> {
4036 self.inner
4037 .db
4038 .replication
4039 .as_ref()
4040 .map(|repl| repl.replica_snapshots())
4041 .unwrap_or_default()
4042 }
4043
4044 pub fn commit_policy(&self) -> crate::replication::CommitPolicy {
4049 crate::replication::CommitPolicy::from_env()
4050 }
4051
4052 pub fn replica_apply_error_counts(
4057 &self,
4058 ) -> [(crate::replication::logical::ApplyErrorKind, u64); 4] {
4059 self.inner.replica_apply_metrics.snapshot()
4060 }
4061
4062 pub fn quota_bucket(&self) -> &crate::runtime::quota_bucket::QuotaBucket {
4065 &self.inner.quota_bucket
4066 }
4067
4068 pub fn commit_waiter_snapshot(&self) -> Vec<(String, u64)> {
4072 self.inner
4073 .db
4074 .replication
4075 .as_ref()
4076 .map(|repl| repl.commit_waiter.snapshot())
4077 .unwrap_or_default()
4078 }
4079
4080 pub fn commit_waiter_metrics_snapshot(&self) -> (u64, u64, u64, u64) {
4083 self.inner
4084 .db
4085 .replication
4086 .as_ref()
4087 .map(|repl| repl.commit_waiter.metrics_snapshot())
4088 .unwrap_or((0, 0, 0, 0))
4089 }
4090
4091 pub fn await_replica_acks(
4101 &self,
4102 target_lsn: u64,
4103 count: u32,
4104 timeout: std::time::Duration,
4105 ) -> crate::replication::AwaitOutcome {
4106 match &self.inner.db.replication {
4107 Some(repl) => repl.commit_waiter.await_acks(target_lsn, count, timeout),
4108 None => {
4109 crate::replication::AwaitOutcome::NotRequired
4113 }
4114 }
4115 }
4116
4117 pub fn enforce_commit_policy(
4131 &self,
4132 post_lsn: u64,
4133 ) -> RedDBResult<crate::replication::AwaitOutcome> {
4134 let n = match self.commit_policy() {
4135 crate::replication::CommitPolicy::AckN(n) if n > 0 => n,
4136 _ => return Ok(crate::replication::AwaitOutcome::NotRequired),
4137 };
4138 let timeout_ms = std::env::var("RED_REPLICATION_ACK_TIMEOUT_MS")
4139 .ok()
4140 .and_then(|v| v.parse::<u64>().ok())
4141 .unwrap_or(5_000);
4142 let outcome =
4143 self.await_replica_acks(post_lsn, n, std::time::Duration::from_millis(timeout_ms));
4144 if let crate::replication::AwaitOutcome::TimedOut { observed, required } = &outcome {
4145 tracing::warn!(
4146 target: "reddb::commit",
4147 post_lsn,
4148 observed = *observed,
4149 required = *required,
4150 timeout_ms,
4151 "ack_n: timed out waiting for replicas"
4152 );
4153 let fail = std::env::var("RED_COMMIT_FAIL_ON_TIMEOUT")
4154 .ok()
4155 .map(|v| {
4156 let t = v.trim();
4157 t.eq_ignore_ascii_case("true") || t == "1" || t.eq_ignore_ascii_case("yes")
4158 })
4159 .unwrap_or(false);
4160 if fail {
4161 return Err(RedDBError::ReadOnly(format!(
4162 "commit policy timed out at lsn {post_lsn}: observed={observed} required={required} (RED_COMMIT_FAIL_ON_TIMEOUT=true)"
4163 )));
4164 }
4165 }
4166 Ok(outcome)
4167 }
4168
4169 pub fn encryption_at_rest_status(&self) -> (&'static str, Option<String>) {
4177 match crate::crypto::page_encryption::key_from_env() {
4178 Ok(Some(_)) => ("enabled", None),
4179 Ok(None) => ("disabled", None),
4180 Err(err) => ("error", Some(err)),
4181 }
4182 }
4183
4184 pub fn replica_apply_health(&self) -> Option<String> {
4190 let state = self.config_string("red.replication.state", "");
4191 if state.is_empty() {
4192 None
4193 } else {
4194 Some(state)
4195 }
4196 }
4197
4198 pub fn wal_archive_progress(&self) -> (u64, u64) {
4203 let current_lsn = self
4204 .inner
4205 .db
4206 .replication
4207 .as_ref()
4208 .map(|repl| {
4209 repl.logical_wal_spool
4210 .as_ref()
4211 .map(|spool| spool.current_lsn())
4212 .unwrap_or_else(|| repl.wal_buffer.current_lsn())
4213 })
4214 .unwrap_or_else(|| self.inner.cdc.current_lsn());
4215 let last_archived_lsn = self.config_u64("red.config.timeline.last_archived_lsn", 0);
4216 (current_lsn, last_archived_lsn)
4217 }
4218
4219 pub fn trigger_backup(&self) -> RedDBResult<crate::replication::scheduler::BackupResult> {
4221 self.check_write(crate::runtime::write_gate::WriteKind::Backup)?;
4222 self.assert_remote_write_allowed("admin/backup")?;
4227 let started = std::time::Instant::now();
4228 let snapshot = self.create_snapshot()?;
4229 let mut uploaded = false;
4230
4231 if let (Some(backend), Some(path)) = (&self.inner.db.remote_backend, self.inner.db.path()) {
4232 let default_snapshot_prefix = self.inner.db.options().default_snapshot_prefix();
4233 let default_wal_prefix = self.inner.db.options().default_wal_archive_prefix();
4234 let default_head_key = self.inner.db.options().default_backup_head_key();
4235 let snapshot_prefix = self.config_string(
4236 "red.config.backup.snapshot_prefix",
4237 &default_snapshot_prefix,
4238 );
4239 let wal_prefix =
4240 self.config_string("red.config.wal.archive.prefix", &default_wal_prefix);
4241 let head_key = self.config_string("red.config.backup.head_key", &default_head_key);
4242 let timeline_id = self.config_string("red.config.timeline.id", "main");
4243 let snapshot_key = crate::storage::wal::archive_snapshot(
4244 backend.as_ref(),
4245 path,
4246 snapshot.snapshot_id,
4247 &snapshot_prefix,
4248 )
4249 .map_err(|err| RedDBError::Internal(err.to_string()))?;
4250 let current_lsn = self
4251 .inner
4252 .db
4253 .replication
4254 .as_ref()
4255 .map(|repl| {
4256 repl.logical_wal_spool
4257 .as_ref()
4258 .map(|spool| spool.current_lsn())
4259 .unwrap_or_else(|| repl.wal_buffer.current_lsn())
4260 })
4261 .unwrap_or_else(|| self.inner.cdc.current_lsn());
4262 let last_archived_lsn = self.config_u64("red.config.timeline.last_archived_lsn", 0);
4263 let snapshot_sha256 =
4269 crate::storage::wal::SnapshotManifest::compute_snapshot_sha256(path)
4270 .map_err(|err| {
4271 tracing::warn!(
4272 target: "reddb::backup",
4273 error = %err,
4274 snapshot_id = snapshot.snapshot_id,
4275 "snapshot hash failed; manifest will lack checksum"
4276 );
4277 })
4278 .ok();
4279 let manifest = crate::storage::wal::SnapshotManifest {
4280 timeline_id: timeline_id.clone(),
4281 snapshot_key: snapshot_key.clone(),
4282 snapshot_id: snapshot.snapshot_id,
4283 snapshot_time: snapshot.created_at_unix_ms as u64,
4284 base_lsn: current_lsn,
4285 schema_version: crate::api::REDDB_FORMAT_VERSION,
4286 format_version: crate::api::REDDB_FORMAT_VERSION,
4287 snapshot_sha256,
4288 };
4289 crate::storage::wal::publish_snapshot_manifest(backend.as_ref(), &manifest)
4290 .map_err(|err| RedDBError::Internal(err.to_string()))?;
4291
4292 let prev_segment_hash = self.config_string("red.config.timeline.last_segment_hash", "");
4299 let prev_hash_arg = if prev_segment_hash.is_empty() {
4300 None
4301 } else {
4302 Some(prev_segment_hash)
4303 };
4304
4305 let archived_lsn = if let Some(primary) = &self.inner.db.replication {
4306 let oldest = primary
4307 .logical_wal_spool
4308 .as_ref()
4309 .and_then(|spool| spool.oldest_lsn().ok().flatten())
4310 .or_else(|| primary.wal_buffer.oldest_lsn())
4311 .unwrap_or(last_archived_lsn);
4312 if last_archived_lsn > 0 && last_archived_lsn < oldest.saturating_sub(1) {
4313 return Err(RedDBError::Internal(format!(
4314 "logical WAL gap detected: last_archived_lsn={last_archived_lsn}, oldest_available_lsn={oldest}"
4315 )));
4316 }
4317 let records = if let Some(spool) = &primary.logical_wal_spool {
4318 spool
4319 .read_since(last_archived_lsn, usize::MAX)
4320 .map_err(|err| RedDBError::Internal(err.to_string()))?
4321 } else {
4322 primary.wal_buffer.read_since(last_archived_lsn, usize::MAX)
4323 };
4324 if let Some(meta) = crate::storage::wal::archive_change_records(
4325 backend.as_ref(),
4326 &wal_prefix,
4327 &records,
4328 prev_hash_arg,
4329 )
4330 .map_err(|err| RedDBError::Internal(err.to_string()))?
4331 {
4332 if let Some(spool) = &primary.logical_wal_spool {
4333 let _ = spool.prune_through(meta.lsn_end);
4334 }
4335 if let Some(sha) = &meta.sha256 {
4341 self.inner.db.store().set_config_tree(
4342 "red.config.timeline",
4343 &crate::json!({ "last_segment_hash": sha }),
4344 );
4345 }
4346 meta.lsn_end
4347 } else {
4348 last_archived_lsn
4349 }
4350 } else {
4351 last_archived_lsn
4352 };
4353
4354 let head = crate::storage::wal::BackupHead {
4355 timeline_id,
4356 snapshot_key,
4357 snapshot_id: snapshot.snapshot_id,
4358 snapshot_time: snapshot.created_at_unix_ms as u64,
4359 current_lsn,
4360 last_archived_lsn: archived_lsn,
4361 wal_prefix,
4362 };
4363 crate::storage::wal::publish_backup_head(backend.as_ref(), &head_key, &head)
4364 .map_err(|err| RedDBError::Internal(err.to_string()))?;
4365 self.inner.db.store().set_config_tree(
4366 "red.config.timeline",
4367 &crate::json!({
4368 "last_archived_lsn": archived_lsn,
4369 "id": head.timeline_id
4370 }),
4371 );
4372
4373 if let Err(err) = crate::storage::wal::publish_unified_manifest_for_prefix(
4381 backend.as_ref(),
4382 &snapshot_prefix,
4383 ) {
4384 tracing::warn!(
4385 target: "reddb::backup",
4386 error = %err,
4387 snapshot_prefix = %snapshot_prefix,
4388 "unified MANIFEST.json refresh failed; per-artifact sidecars unaffected"
4389 );
4390 }
4391
4392 match self.commit_policy() {
4404 crate::replication::CommitPolicy::AckN(n) if n > 0 => {
4405 let timeout = std::env::var("RED_REPLICATION_ACK_TIMEOUT_MS")
4406 .ok()
4407 .and_then(|v| v.parse::<u64>().ok())
4408 .unwrap_or(5_000);
4409 let outcome = self.await_replica_acks(
4410 archived_lsn,
4411 n,
4412 std::time::Duration::from_millis(timeout),
4413 );
4414 match outcome {
4415 crate::replication::AwaitOutcome::Reached(count) => {
4416 tracing::debug!(
4417 target: "reddb::backup",
4418 archived_lsn,
4419 n,
4420 count,
4421 "ack_n: replicas synced before backup return"
4422 );
4423 }
4424 crate::replication::AwaitOutcome::TimedOut { observed, required } => {
4425 tracing::warn!(
4426 target: "reddb::backup",
4427 archived_lsn,
4428 observed,
4429 required,
4430 timeout_ms = timeout,
4431 "ack_n: timed out waiting for replicas; backup uploaded but DR posture degraded"
4432 );
4433 }
4434 crate::replication::AwaitOutcome::NotRequired => {}
4435 }
4436 }
4437 _ => {} }
4439
4440 if self.config_bool("red.config.backup.include_blob_cache", false) {
4452 let blob_cache_prefix = self.config_string(
4453 "red.config.backup.blob_cache_prefix",
4454 &format!("{snapshot_prefix}blob_cache/"),
4455 );
4456 if let Some(l2_path) = self.inner.result_blob_cache.l2_path() {
4457 match crate::storage::cache::archive_blob_cache_l2(
4458 backend.as_ref(),
4459 l2_path,
4460 &blob_cache_prefix,
4461 ) {
4462 Ok(count) => {
4463 tracing::info!(
4464 target: "reddb::backup",
4465 files_uploaded = count,
4466 blob_cache_prefix = %blob_cache_prefix,
4467 "include_blob_cache: archived L2 directory"
4468 );
4469 }
4470 Err(err) => {
4471 tracing::warn!(
4472 target: "reddb::backup",
4473 error = %err,
4474 blob_cache_prefix = %blob_cache_prefix,
4475 "include_blob_cache: L2 archive failed; backup proceeding (cache is derived state)"
4476 );
4477 }
4478 }
4479 } else {
4480 tracing::debug!(
4481 target: "reddb::backup",
4482 "include_blob_cache=true but no L2 path configured; nothing to archive"
4483 );
4484 }
4485 }
4486
4487 uploaded = true;
4488 }
4489
4490 Ok(crate::replication::scheduler::BackupResult {
4491 snapshot_id: snapshot.snapshot_id,
4492 uploaded,
4493 duration_ms: started.elapsed().as_millis() as u64,
4494 timestamp: snapshot.created_at_unix_ms as u64,
4495 })
4496 }
4497
4498 pub fn acquire(&self) -> RedDBResult<RuntimeConnection> {
4499 let mut pool = self
4500 .inner
4501 .pool
4502 .lock()
4503 .map_err(|e| RedDBError::Internal(format!("connection pool lock poisoned: {e}")))?;
4504 if pool.active >= self.inner.pool_config.max_connections {
4505 return Err(RedDBError::Internal(
4506 "connection pool exhausted".to_string(),
4507 ));
4508 }
4509
4510 let id = if let Some(id) = pool.idle.pop() {
4511 id
4512 } else {
4513 let id = pool.next_id;
4514 pool.next_id += 1;
4515 id
4516 };
4517 pool.active += 1;
4518 pool.total_checkouts += 1;
4519 drop(pool);
4520
4521 Ok(RuntimeConnection {
4522 id,
4523 inner: Arc::clone(&self.inner),
4524 })
4525 }
4526
4527 pub fn checkpoint(&self) -> RedDBResult<()> {
4528 self.inner.db.flush_local_only().map_err(|err| {
4533 let msg = err.to_string();
4538 crate::telemetry::operator_event::OperatorEvent::CheckpointFailed {
4539 lsn: 0,
4540 error: msg.clone(),
4541 }
4542 .emit_global();
4543 crate::telemetry::operator_event::OperatorEvent::WalFsyncFailed {
4544 path: "<flush_local_only>".to_string(),
4545 error: msg.clone(),
4546 }
4547 .emit_global();
4548 RedDBError::Engine(msg)
4549 })?;
4550 if let Err(err) = self.assert_remote_write_allowed("checkpoint") {
4551 tracing::warn!(
4552 target: "reddb::serverless::lease",
4553 error = %err,
4554 "checkpoint: skipping remote upload — lease not held"
4555 );
4556 return Ok(());
4557 }
4558 self.inner
4559 .db
4560 .upload_to_remote_backend()
4561 .map_err(|err| RedDBError::Engine(err.to_string()))
4562 }
4563
4564 pub(crate) fn assert_remote_write_allowed(&self, action: &str) -> RedDBResult<()> {
4571 if self.inner.db.remote_backend.is_none() {
4572 return Ok(());
4573 }
4574 match self.inner.write_gate.lease_state() {
4575 crate::runtime::write_gate::LeaseGateState::NotHeld => {
4576 self.inner.audit_log.record(
4577 action,
4578 "system",
4579 "remote_backend",
4580 "err: writer lease not held",
4581 crate::json::Value::Null,
4582 );
4583 Err(RedDBError::ReadOnly(format!(
4584 "writer lease not held — {action} blocked (serverless fence)"
4585 )))
4586 }
4587 _ => Ok(()),
4588 }
4589 }
4590
4591 pub fn run_maintenance(&self) -> RedDBResult<()> {
4592 self.inner
4593 .db
4594 .run_maintenance()
4595 .map_err(|err| RedDBError::Internal(err.to_string()))
4596 }
4597
4598 pub fn scan_collection(
4599 &self,
4600 collection: &str,
4601 cursor: Option<ScanCursor>,
4602 limit: usize,
4603 ) -> RedDBResult<ScanPage> {
4604 let store = self.inner.db.store();
4605 let manager = store
4606 .get_collection(collection)
4607 .ok_or_else(|| RedDBError::NotFound(collection.to_string()))?;
4608
4609 let mut entities = manager.query_all(|_| true);
4610 entities.sort_by_key(|entity| entity.id.raw());
4611
4612 let offset = cursor.map(|cursor| cursor.offset).unwrap_or(0);
4613 let total = entities.len();
4614 let end = total.min(offset.saturating_add(limit.max(1)));
4615 let items = if offset >= total {
4616 Vec::new()
4617 } else {
4618 entities[offset..end].to_vec()
4619 };
4620 let next = (end < total).then_some(ScanCursor { offset: end });
4621
4622 Ok(ScanPage {
4623 collection: collection.to_string(),
4624 items,
4625 next,
4626 total,
4627 })
4628 }
4629
4630 pub fn catalog(&self) -> CatalogModelSnapshot {
4631 self.inner.db.catalog_model_snapshot()
4632 }
4633
4634 pub fn catalog_consistency_report(&self) -> crate::catalog::CatalogConsistencyReport {
4635 self.inner.db.catalog_consistency_report()
4636 }
4637
4638 pub fn catalog_attention_summary(&self) -> CatalogAttentionSummary {
4639 crate::catalog::attention_summary(&self.catalog())
4640 }
4641
4642 pub fn collection_attention(&self) -> Vec<CollectionDescriptor> {
4643 crate::catalog::collection_attention(&self.catalog())
4644 }
4645
4646 pub fn index_attention(&self) -> Vec<CatalogIndexStatus> {
4647 crate::catalog::index_attention(&self.catalog())
4648 }
4649
4650 pub fn graph_projection_attention(&self) -> Vec<CatalogGraphProjectionStatus> {
4651 crate::catalog::graph_projection_attention(&self.catalog())
4652 }
4653
4654 pub fn analytics_job_attention(&self) -> Vec<CatalogAnalyticsJobStatus> {
4655 crate::catalog::analytics_job_attention(&self.catalog())
4656 }
4657
4658 pub fn stats(&self) -> RuntimeStats {
4659 let pool = runtime_pool_lock(self);
4660 RuntimeStats {
4661 active_connections: pool.active,
4662 idle_connections: pool.idle.len(),
4663 total_checkouts: pool.total_checkouts,
4664 paged_mode: self.inner.db.is_paged(),
4665 started_at_unix_ms: self.inner.started_at_unix_ms,
4666 store: self.inner.db.stats(),
4667 system: SystemInfo::collect(),
4668 result_blob_cache: self.inner.result_blob_cache.stats(),
4669 kv: self.inner.kv_stats.snapshot(),
4670 metrics_ingest: self.inner.metrics_ingest_stats.snapshot(),
4671 }
4672 }
4673
4674 pub(crate) fn record_metrics_ingest(
4675 &self,
4676 accepted_samples: u64,
4677 accepted_series: u64,
4678 rejected_samples: u64,
4679 rejected_series: u64,
4680 ) {
4681 self.inner.metrics_ingest_stats.record(
4682 accepted_samples,
4683 accepted_series,
4684 rejected_samples,
4685 rejected_series,
4686 );
4687 }
4688
4689 pub(crate) fn record_metrics_cardinality_budget_rejections(&self, rejected_series: u64) {
4690 self.inner
4691 .metrics_ingest_stats
4692 .record_cardinality_budget_rejections(rejected_series);
4693 }
4694
4695 pub(crate) fn record_metrics_tenant_activity(
4696 &self,
4697 tenant: &str,
4698 namespace: &str,
4699 operation: &str,
4700 ) {
4701 self.inner
4702 .metrics_tenant_activity_stats
4703 .record(tenant, namespace, operation);
4704 }
4705
4706 pub(crate) fn metrics_tenant_activity_snapshot(
4707 &self,
4708 ) -> Vec<crate::runtime::MetricsTenantActivityStats> {
4709 self.inner.metrics_tenant_activity_stats.snapshot()
4710 }
4711
4712 pub fn execute_query_with_scope(
4726 &self,
4727 query: &str,
4728 scope: crate::runtime::within_clause::ScopeOverride,
4729 ) -> RedDBResult<RuntimeQueryResult> {
4730 if scope.is_empty() {
4731 return self.execute_query(query);
4732 }
4733 let _scope_guard = ScopeOverrideGuard::install(scope);
4734 self.execute_query(query)
4735 }
4736
4737 pub fn execute_query(&self, query: &str) -> RedDBResult<RuntimeQueryResult> {
4746 let started = std::time::Instant::now();
4747 let result = self.execute_query_inner(query);
4748 let elapsed_ms = started.elapsed().as_millis() as u64;
4749
4750 let scope = self.ai_scope();
4755 let kind = match result
4756 .as_ref()
4757 .map(|r| r.statement_type)
4758 .unwrap_or("select")
4759 {
4760 "select" => crate::telemetry::slow_query_logger::QueryKind::Select,
4761 "insert" => crate::telemetry::slow_query_logger::QueryKind::Insert,
4762 "update" => crate::telemetry::slow_query_logger::QueryKind::Update,
4763 "delete" => crate::telemetry::slow_query_logger::QueryKind::Delete,
4764 _ => crate::telemetry::slow_query_logger::QueryKind::Internal,
4765 };
4766 self.inner
4772 .slow_query_logger
4773 .record(kind, elapsed_ms, query.to_string(), &scope);
4774
4775 result
4776 }
4777
4778 #[inline(never)]
4779 fn execute_query_inner(&self, query: &str) -> RedDBResult<RuntimeQueryResult> {
4780 if !has_scope_override_active()
4791 && !query.trim_start().starts_with("WITHIN")
4792 && !query.trim_start().starts_with("within")
4793 && !self
4794 .inner
4795 .tx_contexts
4796 .read()
4797 .contains_key(¤t_connection_id())
4798 {
4799 if let Some(result) = self.try_fast_entity_lookup(query) {
4800 return result;
4801 }
4802 }
4803
4804 match crate::runtime::within_clause::try_strip_within_prefix(query) {
4811 Ok(Some((scope, inner))) => {
4812 let _scope_guard = ScopeOverrideGuard::install(scope);
4813 return self.execute_query_inner(inner);
4818 }
4819 Ok(None) => {}
4820 Err(msg) => return Err(RedDBError::Query(msg)),
4821 }
4822
4823 if let Some(inner) = strip_explain_prefix(query) {
4830 return self.explain_as_rows(query, inner);
4831 }
4832
4833 if let Some(value) = parse_set_local_tenant(query)? {
4838 let conn_id = current_connection_id();
4839 if !self.inner.tx_contexts.read().contains_key(&conn_id) {
4840 return Err(RedDBError::Query(
4841 "SET LOCAL TENANT requires an active transaction".to_string(),
4842 ));
4843 }
4844 self.inner
4845 .tx_local_tenants
4846 .write()
4847 .insert(conn_id, value.clone());
4848 return Ok(RuntimeQueryResult::ok_message(
4849 query.to_string(),
4850 &match &value {
4851 Some(id) => format!("local tenant set: {id}"),
4852 None => "local tenant cleared".to_string(),
4853 },
4854 "set_local_tenant",
4855 ));
4856 }
4857
4858 if super::red_schema::is_system_schema_write(query) {
4859 return Err(RedDBError::Query(
4860 super::red_schema::READ_ONLY_ERROR.to_string(),
4861 ));
4862 }
4863
4864 let rewritten_query = super::red_schema::rewrite_virtual_names(query);
4865 let execution_query = rewritten_query.as_deref().unwrap_or(query);
4866
4867 let frame = super::statement_frame::StatementExecutionFrame::build(self, execution_query)?;
4868 let _frame_guards = frame.install(self);
4869
4870 let _log_span = crate::telemetry::span::query_span(query).entered();
4877
4878 if let Some(rewritten) = frame.prepare_cte(execution_query)? {
4880 return self.execute_query_expr(rewritten);
4881 }
4882
4883 if let Some(result) = self.try_fast_entity_lookup(execution_query) {
4885 return result;
4886 }
4887
4888 if let Some(result) = frame.read_result_cache(self) {
4890 return Ok(result);
4891 }
4892
4893 let prepared = frame.prepare_statement(self, execution_query)?;
4894 let mode = prepared.mode;
4895 let expr = prepared.expr;
4896
4897 let statement = query_expr_name(&expr);
4898 let result_cache_scopes = query_expr_result_cache_scopes(&expr);
4899
4900 let _lock_guard = frame.prepare_dispatch(self, &expr)?;
4901 let frame_iface: &dyn super::statement_frame::ReadFrame = &frame;
4902
4903 let query_result = match expr {
4904 QueryExpr::Graph(_) | QueryExpr::Path(_) => {
4905 let (graph, node_properties, edge_properties) =
4913 self.materialize_graph_with_rls()?;
4914 let result =
4915 crate::storage::query::unified::UnifiedExecutor::execute_on_with_graph_properties(
4916 &graph,
4917 &expr,
4918 node_properties,
4919 edge_properties,
4920 )
4921 .map_err(|err| RedDBError::Query(err.to_string()))?;
4922
4923 Ok(RuntimeQueryResult {
4924 query: query.to_string(),
4925 mode,
4926 statement,
4927 engine: "materialized-graph",
4928 result,
4929 affected_rows: 0,
4930 statement_type: "select",
4931 })
4932 }
4933 QueryExpr::Table(table) => {
4934 let table = self.resolve_table_expr_subqueries(
4935 table,
4936 &frame as &dyn super::statement_frame::ReadFrame,
4937 )?;
4938 if super::red_schema::is_virtual_table(&table.table) {
4939 return Ok(RuntimeQueryResult {
4940 query: query.to_string(),
4941 mode,
4942 statement,
4943 engine: "runtime-red-schema",
4944 result: super::red_schema::red_query(
4945 self,
4946 &table.table,
4947 &table,
4948 &frame as &dyn super::statement_frame::ReadFrame,
4949 )?,
4950 affected_rows: 0,
4951 statement_type: "select",
4952 });
4953 }
4954
4955 if let Some(result) = self.execute_probabilistic_select(&table)? {
4956 return Ok(RuntimeQueryResult {
4957 query: query.to_string(),
4958 mode,
4959 statement,
4960 engine: "runtime-probabilistic",
4961 result,
4962 affected_rows: 0,
4963 statement_type: "select",
4964 });
4965 }
4966
4967 if self.inner.foreign_tables.is_foreign_table(&table.table) {
4975 let records = self
4976 .inner
4977 .foreign_tables
4978 .scan(&table.table)
4979 .map_err(|e| RedDBError::Internal(e.to_string()))?;
4980 let result = apply_foreign_table_filters(records, &table);
4981 return Ok(RuntimeQueryResult {
4982 query: query.to_string(),
4983 mode,
4984 statement,
4985 engine: "runtime-fdw",
4986 result,
4987 affected_rows: 0,
4988 statement_type: "select",
4989 });
4990 }
4991
4992 let Some(table_with_rls) = self.authorize_relational_table_select(
5009 table,
5010 &frame as &dyn super::statement_frame::ReadFrame,
5011 )?
5012 else {
5013 let empty = crate::storage::query::unified::UnifiedResult::empty();
5014 return Ok(RuntimeQueryResult {
5015 query: query.to_string(),
5016 mode,
5017 statement,
5018 engine: "runtime-table-rls",
5019 result: empty,
5020 affected_rows: 0,
5021 statement_type: "select",
5022 });
5023 };
5024 Ok(RuntimeQueryResult {
5025 query: query.to_string(),
5026 mode,
5027 statement,
5028 engine: "runtime-table",
5029 result: execute_runtime_table_query(
5030 &self.inner.db,
5031 &table_with_rls,
5032 Some(&self.inner.index_store),
5033 )?,
5034 affected_rows: 0,
5035 statement_type: "select",
5036 })
5037 }
5038 QueryExpr::Join(join) => {
5039 let join_with_rls = match self.authorize_relational_join_select(
5048 join,
5049 &frame as &dyn super::statement_frame::ReadFrame,
5050 )? {
5051 Some(j) => j,
5052 None => {
5053 return Ok(RuntimeQueryResult {
5054 query: query.to_string(),
5055 mode,
5056 statement,
5057 engine: "runtime-join-rls",
5058 result: crate::storage::query::unified::UnifiedResult::empty(),
5059 affected_rows: 0,
5060 statement_type: "select",
5061 });
5062 }
5063 };
5064 Ok(RuntimeQueryResult {
5065 query: query.to_string(),
5066 mode,
5067 statement,
5068 engine: "runtime-join",
5069 result: execute_runtime_join_query(&self.inner.db, &join_with_rls)?,
5070 affected_rows: 0,
5071 statement_type: "select",
5072 })
5073 }
5074 QueryExpr::Vector(vector) => Ok(RuntimeQueryResult {
5075 query: query.to_string(),
5076 mode,
5077 statement,
5078 engine: "runtime-vector",
5079 result: execute_runtime_vector_query(&self.inner.db, &vector)?,
5080 affected_rows: 0,
5081 statement_type: "select",
5082 }),
5083 QueryExpr::Hybrid(hybrid) => Ok(RuntimeQueryResult {
5084 query: query.to_string(),
5085 mode,
5086 statement,
5087 engine: "runtime-hybrid",
5088 result: execute_runtime_hybrid_query(&self.inner.db, &hybrid)?,
5089 affected_rows: 0,
5090 statement_type: "select",
5091 }),
5092 QueryExpr::Insert(ref insert) if super::red_schema::is_virtual_table(&insert.table) => {
5094 Err(RedDBError::Query(
5095 super::red_schema::READ_ONLY_ERROR.to_string(),
5096 ))
5097 }
5098 QueryExpr::Update(ref update) if super::red_schema::is_virtual_table(&update.table) => {
5099 Err(RedDBError::Query(
5100 super::red_schema::READ_ONLY_ERROR.to_string(),
5101 ))
5102 }
5103 QueryExpr::Delete(ref delete) if super::red_schema::is_virtual_table(&delete.table) => {
5104 Err(RedDBError::Query(
5105 super::red_schema::READ_ONLY_ERROR.to_string(),
5106 ))
5107 }
5108 QueryExpr::Insert(ref insert) => self
5109 .with_deferred_store_wal_for_dml(self.insert_may_emit_events(insert), || {
5110 self.execute_insert(query, insert)
5111 }),
5112 QueryExpr::Update(ref update) => self
5113 .with_deferred_store_wal_for_dml(self.update_may_emit_events(update), || {
5114 self.execute_update(query, update)
5115 }),
5116 QueryExpr::Delete(ref delete) => self
5117 .with_deferred_store_wal_for_dml(self.delete_may_emit_events(delete), || {
5118 self.execute_delete(query, delete)
5119 }),
5120 QueryExpr::CreateTable(ref create) => self.execute_create_table(query, create),
5122 QueryExpr::CreateCollection(ref create) => {
5123 self.execute_create_collection(query, create)
5124 }
5125 QueryExpr::CreateVector(ref create) => self.execute_create_vector(query, create),
5126 QueryExpr::DropTable(ref drop_tbl) => self.execute_drop_table(query, drop_tbl),
5127 QueryExpr::DropGraph(ref drop_graph) => self.execute_drop_graph(query, drop_graph),
5128 QueryExpr::DropVector(ref drop_vector) => self.execute_drop_vector(query, drop_vector),
5129 QueryExpr::DropDocument(ref drop_document) => {
5130 self.execute_drop_document(query, drop_document)
5131 }
5132 QueryExpr::DropKv(ref drop_kv) => self.execute_drop_kv(query, drop_kv),
5133 QueryExpr::DropCollection(ref drop_collection) => {
5134 self.execute_drop_collection(query, drop_collection)
5135 }
5136 QueryExpr::Truncate(ref truncate) => self.execute_truncate(query, truncate),
5137 QueryExpr::AlterTable(ref alter) => self.execute_alter_table(query, alter),
5138 QueryExpr::ExplainAlter(ref explain) => self.execute_explain_alter(query, explain),
5139 QueryExpr::GraphCommand(ref cmd) => self.execute_graph_command(query, cmd),
5141 QueryExpr::SearchCommand(ref cmd) => self.execute_search_command(query, cmd),
5143 QueryExpr::Ask(ref ask) => self.execute_ask(query, ask),
5145 QueryExpr::CreateIndex(ref create_idx) => self.execute_create_index(query, create_idx),
5146 QueryExpr::DropIndex(ref drop_idx) => self.execute_drop_index(query, drop_idx),
5147 QueryExpr::ProbabilisticCommand(ref cmd) => {
5148 self.execute_probabilistic_command(query, cmd)
5149 }
5150 QueryExpr::CreateTimeSeries(ref ts) => self.execute_create_timeseries(query, ts),
5152 QueryExpr::DropTimeSeries(ref ts) => self.execute_drop_timeseries(query, ts),
5153 QueryExpr::CreateQueue(ref q) => self.execute_create_queue(query, q),
5155 QueryExpr::AlterQueue(ref q) => self.execute_alter_queue(query, q),
5156 QueryExpr::DropQueue(ref q) => self.execute_drop_queue(query, q),
5157 QueryExpr::QueueSelect(ref q) => self.execute_queue_select(query, q),
5158 QueryExpr::QueueCommand(ref cmd) => self.execute_queue_command(query, cmd),
5159 QueryExpr::EventsBackfill(ref backfill) => {
5160 self.execute_events_backfill(query, backfill)
5161 }
5162 QueryExpr::EventsBackfillStatus { ref collection } => Err(RedDBError::Query(format!(
5163 "EVENTS BACKFILL STATUS for '{collection}' is not implemented in this slice"
5164 ))),
5165 QueryExpr::KvCommand(ref cmd) => self.execute_kv_command(query, cmd),
5166 QueryExpr::ConfigCommand(ref cmd) => self.execute_config_command(query, cmd),
5167 QueryExpr::CreateTree(ref tree) => self.execute_create_tree(query, tree),
5168 QueryExpr::DropTree(ref tree) => self.execute_drop_tree(query, tree),
5169 QueryExpr::TreeCommand(ref cmd) => self.execute_tree_command(query, cmd),
5170 QueryExpr::SetConfig { ref key, ref value } => {
5172 if key.starts_with("red.secret.") {
5173 return Err(RedDBError::Query(
5174 "red.secret.* is reserved for vault secrets; use SET SECRET".to_string(),
5175 ));
5176 }
5177 let store = self.inner.db.store();
5178 let json_val = match value {
5179 Value::Text(s) => crate::serde_json::Value::String(s.to_string()),
5180 Value::Integer(n) => crate::serde_json::Value::Number(*n as f64),
5181 Value::Float(n) => crate::serde_json::Value::Number(*n),
5182 Value::Boolean(b) => crate::serde_json::Value::Bool(*b),
5183 _ => crate::serde_json::Value::String(value.to_string()),
5184 };
5185 store.set_config_tree(key, &json_val);
5186 update_current_config_value(key, value.clone());
5187 self.invalidate_result_cache();
5192 Ok(RuntimeQueryResult::ok_message(
5193 query.to_string(),
5194 &format!("config set: {key}"),
5195 "set",
5196 ))
5197 }
5198 QueryExpr::SetSecret { ref key, ref value } => {
5200 if key.starts_with("red.config.") {
5201 return Err(RedDBError::Query(
5202 "red.config.* is reserved for config; use SET CONFIG".to_string(),
5203 ));
5204 }
5205 let auth_store = self.inner.auth_store.read().clone().ok_or_else(|| {
5206 RedDBError::Query("SET SECRET requires an enabled, unsealed vault".to_string())
5207 })?;
5208 if matches!(value, Value::Null) {
5209 auth_store
5210 .vault_kv_try_delete(key)
5211 .map_err(|err| RedDBError::Query(err.to_string()))?;
5212 update_current_secret_value(key, None);
5213 self.invalidate_result_cache();
5214 return Ok(RuntimeQueryResult::ok_message(
5215 query.to_string(),
5216 &format!("secret deleted: {key}"),
5217 "delete_secret",
5218 ));
5219 }
5220 let value = secret_sql_value_to_string(value)?;
5221 auth_store
5222 .vault_kv_try_set(key.clone(), value.clone())
5223 .map_err(|err| RedDBError::Query(err.to_string()))?;
5224 update_current_secret_value(key, Some(value));
5225 self.invalidate_result_cache();
5226 Ok(RuntimeQueryResult::ok_message(
5227 query.to_string(),
5228 &format!("secret set: {key}"),
5229 "set_secret",
5230 ))
5231 }
5232 QueryExpr::DeleteSecret { ref key } => {
5234 let auth_store = self.inner.auth_store.read().clone().ok_or_else(|| {
5235 RedDBError::Query(
5236 "DELETE SECRET requires an enabled, unsealed vault".to_string(),
5237 )
5238 })?;
5239 let deleted = auth_store
5240 .vault_kv_try_delete(key)
5241 .map_err(|err| RedDBError::Query(err.to_string()))?;
5242 if deleted {
5243 update_current_secret_value(key, None);
5244 }
5245 self.invalidate_result_cache();
5246 Ok(RuntimeQueryResult::ok_message(
5247 query.to_string(),
5248 &format!("secret deleted: {key}"),
5249 if deleted {
5250 "delete_secret"
5251 } else {
5252 "delete_secret_not_found"
5253 },
5254 ))
5255 }
5256 QueryExpr::ShowSecrets { ref prefix } => {
5258 let auth_store = self.inner.auth_store.read().clone().ok_or_else(|| {
5259 RedDBError::Query("SHOW SECRET requires an enabled, unsealed vault".to_string())
5260 })?;
5261 if !auth_store.is_vault_backed() {
5262 return Err(RedDBError::Query(
5263 "SHOW SECRET requires an enabled, unsealed vault".to_string(),
5264 ));
5265 }
5266 let mut keys = auth_store.vault_kv_keys();
5267 keys.sort();
5268 let mut result = UnifiedResult::with_columns(vec![
5269 "key".into(),
5270 "value".into(),
5271 "status".into(),
5272 ]);
5273 for key in keys {
5274 if let Some(ref pfx) = prefix {
5275 if !key.starts_with(pfx) {
5276 continue;
5277 }
5278 }
5279 let mut record = UnifiedRecord::new();
5280 record.set("key", Value::text(key));
5281 record.set("value", Value::text("***"));
5282 record.set("status", Value::text("active"));
5283 result.push(record);
5284 }
5285 Ok(RuntimeQueryResult {
5286 query: query.to_string(),
5287 mode,
5288 statement: "show_secrets",
5289 engine: "runtime-secret",
5290 result,
5291 affected_rows: 0,
5292 statement_type: "select",
5293 })
5294 }
5295 QueryExpr::ShowConfig { ref prefix } => {
5297 let store = self.inner.db.store();
5298 let all_collections = store.list_collections();
5299 if !all_collections.contains(&"red_config".to_string()) {
5300 let result = UnifiedResult::with_columns(vec!["key".into(), "value".into()]);
5301 return Ok(RuntimeQueryResult {
5302 query: query.to_string(),
5303 mode,
5304 statement: "show_config",
5305 engine: "runtime-config",
5306 result,
5307 affected_rows: 0,
5308 statement_type: "select",
5309 });
5310 }
5311 let manager = store
5312 .get_collection("red_config")
5313 .ok_or_else(|| RedDBError::NotFound("red_config".to_string()))?;
5314 let entities = manager.query_all(|_| true);
5315 let mut latest = std::collections::BTreeMap::<String, (u64, Value, Value)>::new();
5316 for entity in entities {
5317 if let EntityData::Row(ref row) = entity.data {
5318 if let Some(ref named) = row.named {
5319 let key_val = named.get("key").cloned().unwrap_or(Value::Null);
5320 let val = named.get("value").cloned().unwrap_or(Value::Null);
5321 let key_str = match &key_val {
5322 Value::Text(s) => s.as_ref(),
5323 _ => continue,
5324 };
5325 if let Some(ref pfx) = prefix {
5326 if !key_str.starts_with(pfx.as_str()) {
5327 continue;
5328 }
5329 }
5330 let entity_id = entity.id.raw();
5331 match latest.get(key_str) {
5332 Some((prev_id, _, _)) if *prev_id > entity_id => {}
5333 _ => {
5334 latest.insert(key_str.to_string(), (entity_id, key_val, val));
5335 }
5336 }
5337 }
5338 }
5339 }
5340 let mut result = UnifiedResult::with_columns(vec!["key".into(), "value".into()]);
5341 for (_, key_val, val) in latest.into_values() {
5342 let mut record = UnifiedRecord::new();
5343 record.set("key", key_val);
5344 record.set("value", val);
5345 result.push(record);
5346 }
5347 Ok(RuntimeQueryResult {
5348 query: query.to_string(),
5349 mode,
5350 statement: "show_config",
5351 engine: "runtime-config",
5352 result,
5353 affected_rows: 0,
5354 statement_type: "select",
5355 })
5356 }
5357 QueryExpr::SetTenant(ref value) => {
5363 match value {
5364 Some(id) => set_current_tenant(id.clone()),
5365 None => clear_current_tenant(),
5366 }
5367 Ok(RuntimeQueryResult::ok_message(
5368 query.to_string(),
5369 &match value {
5370 Some(id) => format!("tenant set: {id}"),
5371 None => "tenant cleared".to_string(),
5372 },
5373 "set_tenant",
5374 ))
5375 }
5376 QueryExpr::ShowTenant => {
5377 let mut result = UnifiedResult::with_columns(vec!["tenant".into()]);
5378 let mut record = UnifiedRecord::new();
5379 record.set(
5380 "tenant",
5381 current_tenant().map(Value::text).unwrap_or(Value::Null),
5382 );
5383 result.push(record);
5384 Ok(RuntimeQueryResult {
5385 query: query.to_string(),
5386 mode,
5387 statement: "show_tenant",
5388 engine: "runtime-tenant",
5389 result,
5390 affected_rows: 0,
5391 statement_type: "select",
5392 })
5393 }
5394 QueryExpr::TransactionControl(ref ctl) => {
5406 use crate::storage::query::ast::TxnControl;
5407 use crate::storage::transaction::snapshot::{TxnContext, Xid};
5408 use crate::storage::transaction::IsolationLevel;
5409
5410 let conn_id = current_connection_id();
5415
5416 let (kind, msg) = match ctl {
5417 TxnControl::Begin => {
5418 let mgr = Arc::clone(&self.inner.snapshot_manager);
5419 let xid = mgr.begin();
5420 let snapshot = mgr.snapshot(xid);
5421 let ctx = TxnContext {
5422 xid,
5423 isolation: IsolationLevel::SnapshotIsolation,
5424 snapshot,
5425 savepoints: Vec::new(),
5426 released_sub_xids: Vec::new(),
5427 };
5428 self.inner.tx_contexts.write().insert(conn_id, ctx);
5429 ("begin", format!("BEGIN — xid={xid} (snapshot isolation)"))
5430 }
5431 TxnControl::Commit => {
5432 self.inner.tx_local_tenants.write().remove(&conn_id);
5434 let ctx = self.inner.tx_contexts.write().remove(&conn_id);
5435 match ctx {
5436 Some(ctx) => {
5437 let mut own_xids = std::collections::HashSet::new();
5438 own_xids.insert(ctx.xid);
5439 for (_, sub) in &ctx.savepoints {
5440 own_xids.insert(*sub);
5441 }
5442 for sub in &ctx.released_sub_xids {
5443 own_xids.insert(*sub);
5444 }
5445 if let Err(err) = self.check_table_row_write_conflicts(
5446 conn_id,
5447 &ctx.snapshot,
5448 &own_xids,
5449 ) {
5450 for (_, sub) in &ctx.savepoints {
5451 self.inner.snapshot_manager.rollback(*sub);
5452 }
5453 for sub in &ctx.released_sub_xids {
5454 self.inner.snapshot_manager.rollback(*sub);
5455 }
5456 self.inner.snapshot_manager.rollback(ctx.xid);
5457 self.revive_pending_versioned_updates(conn_id);
5458 self.revive_pending_tombstones(conn_id);
5459 self.discard_pending_kv_watch_events(conn_id);
5460 self.discard_pending_store_wal_actions(conn_id);
5461 return Err(err);
5462 }
5463 self.restore_pending_write_stamps(conn_id);
5464 if let Err(err) = self.flush_pending_store_wal_actions(conn_id) {
5465 for (_, sub) in &ctx.savepoints {
5466 self.inner.snapshot_manager.rollback(*sub);
5467 }
5468 for sub in &ctx.released_sub_xids {
5469 self.inner.snapshot_manager.rollback(*sub);
5470 }
5471 self.inner.snapshot_manager.rollback(ctx.xid);
5472 self.revive_pending_versioned_updates(conn_id);
5473 self.revive_pending_tombstones(conn_id);
5474 self.discard_pending_kv_watch_events(conn_id);
5475 return Err(err);
5476 }
5477 for (_, sub) in &ctx.savepoints {
5483 self.inner.snapshot_manager.commit(*sub);
5484 }
5485 for sub in &ctx.released_sub_xids {
5486 self.inner.snapshot_manager.commit(*sub);
5487 }
5488 self.inner.snapshot_manager.commit(ctx.xid);
5489 self.finalize_pending_versioned_updates(conn_id);
5490 self.finalize_pending_tombstones(conn_id);
5491 self.finalize_pending_kv_watch_events(conn_id);
5492 ("commit", format!("COMMIT — xid={} committed", ctx.xid))
5493 }
5494 None => (
5495 "commit",
5496 "COMMIT outside transaction — no-op (autocommit)".to_string(),
5497 ),
5498 }
5499 }
5500 TxnControl::Rollback => {
5501 self.inner.tx_local_tenants.write().remove(&conn_id);
5502 let ctx = self.inner.tx_contexts.write().remove(&conn_id);
5503 match ctx {
5504 Some(ctx) => {
5505 for (_, sub) in &ctx.savepoints {
5508 self.inner.snapshot_manager.rollback(*sub);
5509 }
5510 for sub in &ctx.released_sub_xids {
5511 self.inner.snapshot_manager.rollback(*sub);
5512 }
5513 self.inner.snapshot_manager.rollback(ctx.xid);
5514 self.revive_pending_versioned_updates(conn_id);
5518 self.revive_pending_tombstones(conn_id);
5519 self.discard_pending_kv_watch_events(conn_id);
5520 self.discard_pending_store_wal_actions(conn_id);
5521 ("rollback", format!("ROLLBACK — xid={} aborted", ctx.xid))
5522 }
5523 None => (
5524 "rollback",
5525 "ROLLBACK outside transaction — no-op (autocommit)".to_string(),
5526 ),
5527 }
5528 }
5529 TxnControl::Savepoint(name) => {
5536 let mgr = Arc::clone(&self.inner.snapshot_manager);
5537 let mut guard = self.inner.tx_contexts.write();
5538 match guard.get_mut(&conn_id) {
5539 Some(ctx) => {
5540 let sub = mgr.begin();
5541 ctx.savepoints.push((name.clone(), sub));
5542 ("savepoint", format!("SAVEPOINT {name} — sub_xid={sub}"))
5543 }
5544 None => (
5545 "savepoint",
5546 "SAVEPOINT outside transaction — no-op".to_string(),
5547 ),
5548 }
5549 }
5550 TxnControl::ReleaseSavepoint(name) => {
5551 let mut guard = self.inner.tx_contexts.write();
5552 match guard.get_mut(&conn_id) {
5553 Some(ctx) => {
5554 let pos = ctx
5555 .savepoints
5556 .iter()
5557 .position(|(n, _)| n == name)
5558 .ok_or_else(|| {
5559 RedDBError::Internal(format!(
5560 "savepoint {name} does not exist"
5561 ))
5562 })?;
5563 let released = ctx.savepoints.len() - pos;
5571 let popped: Vec<Xid> = ctx
5572 .savepoints
5573 .split_off(pos)
5574 .into_iter()
5575 .map(|(_, x)| x)
5576 .collect();
5577 ctx.released_sub_xids.extend(popped);
5578 (
5579 "release_savepoint",
5580 format!("RELEASE SAVEPOINT {name} — {released} level(s)"),
5581 )
5582 }
5583 None => (
5584 "release_savepoint",
5585 "RELEASE outside transaction — no-op".to_string(),
5586 ),
5587 }
5588 }
5589 TxnControl::RollbackToSavepoint(name) => {
5590 let mgr = Arc::clone(&self.inner.snapshot_manager);
5591 let drop_result: Option<(Xid, Vec<Xid>)> = {
5596 let mut guard = self.inner.tx_contexts.write();
5597 if let Some(ctx) = guard.get_mut(&conn_id) {
5598 let pos = ctx
5599 .savepoints
5600 .iter()
5601 .position(|(n, _)| n == name)
5602 .ok_or_else(|| {
5603 RedDBError::Internal(format!(
5604 "savepoint {name} does not exist"
5605 ))
5606 })?;
5607 let savepoint_xid = ctx.savepoints[pos].1;
5608 let aborted: Vec<Xid> = ctx
5609 .savepoints
5610 .split_off(pos)
5611 .into_iter()
5612 .map(|(_, x)| x)
5613 .collect();
5614 Some((savepoint_xid, aborted))
5615 } else {
5616 None
5617 }
5618 };
5619
5620 match drop_result {
5621 Some((savepoint_xid, aborted)) => {
5622 for x in &aborted {
5623 mgr.rollback(*x);
5624 }
5625 let reverted_updates =
5626 self.revive_versioned_updates_since(conn_id, savepoint_xid);
5627 let revived = self.revive_tombstones_since(conn_id, savepoint_xid);
5628 (
5629 "rollback_to_savepoint",
5630 format!(
5631 "ROLLBACK TO SAVEPOINT {name} — aborted {} sub_xid(s), reverted {reverted_updates} update(s), revived {revived} tombstone(s)",
5632 aborted.len(),
5633 ),
5634 )
5635 }
5636 None => (
5637 "rollback_to_savepoint",
5638 "ROLLBACK TO outside transaction — no-op".to_string(),
5639 ),
5640 }
5641 }
5642 };
5643 Ok(RuntimeQueryResult::ok_message(
5644 query.to_string(),
5645 &msg,
5646 kind,
5647 ))
5648 }
5649 QueryExpr::CreateSchema(ref q) => {
5662 let store = self.inner.db.store();
5663 let key = format!("schema.{}", q.name);
5664 if store.get_config(&key).is_some() {
5665 if q.if_not_exists {
5666 return Ok(RuntimeQueryResult::ok_message(
5667 query.to_string(),
5668 &format!("schema {} already exists — skipped", q.name),
5669 "create_schema",
5670 ));
5671 }
5672 return Err(RedDBError::Internal(format!(
5673 "schema {} already exists",
5674 q.name
5675 )));
5676 }
5677 store.set_config_tree(&key, &crate::serde_json::Value::Bool(true));
5678 Ok(RuntimeQueryResult::ok_message(
5679 query.to_string(),
5680 &format!("schema {} created", q.name),
5681 "create_schema",
5682 ))
5683 }
5684 QueryExpr::DropSchema(ref q) => {
5685 let store = self.inner.db.store();
5686 let key = format!("schema.{}", q.name);
5687 let existed = store.get_config(&key).is_some();
5688 if !existed && !q.if_exists {
5689 return Err(RedDBError::Internal(format!(
5690 "schema {} does not exist",
5691 q.name
5692 )));
5693 }
5694 store.set_config_tree(&key, &crate::serde_json::Value::Null);
5696 let suffix = if q.cascade {
5697 " (CASCADE accepted — tables untouched)"
5698 } else {
5699 ""
5700 };
5701 Ok(RuntimeQueryResult::ok_message(
5702 query.to_string(),
5703 &format!("schema {} dropped{}", q.name, suffix),
5704 "drop_schema",
5705 ))
5706 }
5707 QueryExpr::CreateSequence(ref q) => {
5708 let store = self.inner.db.store();
5709 let base = format!("sequence.{}", q.name);
5710 let start_key = format!("{base}.start");
5711 let incr_key = format!("{base}.increment");
5712 let curr_key = format!("{base}.current");
5713 if store.get_config(&start_key).is_some() {
5714 if q.if_not_exists {
5715 return Ok(RuntimeQueryResult::ok_message(
5716 query.to_string(),
5717 &format!("sequence {} already exists — skipped", q.name),
5718 "create_sequence",
5719 ));
5720 }
5721 return Err(RedDBError::Internal(format!(
5722 "sequence {} already exists",
5723 q.name
5724 )));
5725 }
5726 let initial_current = q.start - q.increment;
5729 store.set_config_tree(
5730 &start_key,
5731 &crate::serde_json::Value::Number(q.start as f64),
5732 );
5733 store.set_config_tree(
5734 &incr_key,
5735 &crate::serde_json::Value::Number(q.increment as f64),
5736 );
5737 store.set_config_tree(
5738 &curr_key,
5739 &crate::serde_json::Value::Number(initial_current as f64),
5740 );
5741 Ok(RuntimeQueryResult::ok_message(
5742 query.to_string(),
5743 &format!(
5744 "sequence {} created (start={}, increment={})",
5745 q.name, q.start, q.increment
5746 ),
5747 "create_sequence",
5748 ))
5749 }
5750 QueryExpr::DropSequence(ref q) => {
5751 let store = self.inner.db.store();
5752 let base = format!("sequence.{}", q.name);
5753 let existed = store.get_config(&format!("{base}.start")).is_some();
5754 if !existed && !q.if_exists {
5755 return Err(RedDBError::Internal(format!(
5756 "sequence {} does not exist",
5757 q.name
5758 )));
5759 }
5760 for k in ["start", "increment", "current"] {
5761 store.set_config_tree(&format!("{base}.{k}"), &crate::serde_json::Value::Null);
5762 }
5763 Ok(RuntimeQueryResult::ok_message(
5764 query.to_string(),
5765 &format!("sequence {} dropped", q.name),
5766 "drop_sequence",
5767 ))
5768 }
5769 QueryExpr::CreateView(ref q) => {
5779 let mut views = self.inner.views.write();
5780 if views.contains_key(&q.name) && !q.or_replace {
5781 if q.if_not_exists {
5782 return Ok(RuntimeQueryResult::ok_message(
5783 query.to_string(),
5784 &format!("view {} already exists — skipped", q.name),
5785 "create_view",
5786 ));
5787 }
5788 return Err(RedDBError::Internal(format!(
5789 "view {} already exists",
5790 q.name
5791 )));
5792 }
5793 views.insert(q.name.clone(), Arc::new(q.clone()));
5794 drop(views);
5795
5796 if q.materialized {
5798 use crate::storage::cache::result::{MaterializedViewDef, RefreshPolicy};
5799 let refresh = match q.refresh_every_ms {
5800 Some(ms) => RefreshPolicy::Periodic(std::time::Duration::from_millis(ms)),
5801 None => RefreshPolicy::Manual,
5802 };
5803 let dependencies = collect_table_refs(&q.query);
5804 let def = MaterializedViewDef {
5805 name: q.name.clone(),
5806 query: format!("<parsed view {}>", q.name),
5807 dependencies: dependencies.clone(),
5808 refresh,
5809 retention_duration_ms: q.retention_duration_ms,
5810 };
5811 self.inner.materialized_views.write().register(def);
5812
5813 let descriptor =
5819 crate::runtime::continuous_materialized_view::MaterializedViewDescriptor {
5820 name: q.name.clone(),
5821 source_sql: query.to_string(),
5822 source_collections: dependencies,
5823 refresh_every_ms: q.refresh_every_ms,
5824 retention_duration_ms: q.retention_duration_ms,
5825 };
5826 let store = self.inner.db.store();
5827 crate::runtime::continuous_materialized_view::persist_descriptor(
5828 store.as_ref(),
5829 &descriptor,
5830 )?;
5831
5832 self.ensure_materialized_view_backing(&q.name)?;
5839 }
5840 self.invalidate_plan_cache();
5845 self.invalidate_result_cache();
5846
5847 Ok(RuntimeQueryResult::ok_message(
5848 query.to_string(),
5849 &format!(
5850 "{}view {} created",
5851 if q.materialized { "materialized " } else { "" },
5852 q.name
5853 ),
5854 "create_view",
5855 ))
5856 }
5857 QueryExpr::DropView(ref q) => {
5858 let mut views = self.inner.views.write();
5859 let removed = views.remove(&q.name);
5860 let existed = removed.is_some();
5861 let removed_materialized =
5862 removed.as_ref().map(|v| v.materialized).unwrap_or(false);
5863 drop(views);
5864 if q.materialized || existed {
5865 self.inner.materialized_views.write().remove(&q.name);
5867 let store = self.inner.db.store();
5871 crate::runtime::continuous_materialized_view::remove_by_name(
5872 store.as_ref(),
5873 &q.name,
5874 )?;
5875 }
5876 if removed_materialized || q.materialized {
5880 self.drop_materialized_view_backing(&q.name)?;
5881 }
5882 self.invalidate_plan_cache();
5885 self.invalidate_result_cache();
5886 if !existed && !q.if_exists {
5887 return Err(RedDBError::Internal(format!(
5888 "view {} does not exist",
5889 q.name
5890 )));
5891 }
5892 self.invalidate_plan_cache();
5893 Ok(RuntimeQueryResult::ok_message(
5894 query.to_string(),
5895 &format!("view {} dropped", q.name),
5896 "drop_view",
5897 ))
5898 }
5899 QueryExpr::RefreshMaterializedView(ref q) => {
5900 let view = {
5903 let views = self.inner.views.read();
5904 views.get(&q.name).cloned()
5905 };
5906 let view = match view {
5907 Some(v) => v,
5908 None => {
5909 return Err(RedDBError::Internal(format!(
5910 "view {} does not exist",
5911 q.name
5912 )))
5913 }
5914 };
5915 if !view.materialized {
5916 return Err(RedDBError::Internal(format!(
5917 "view {} is not materialized — REFRESH requires \
5918 CREATE MATERIALIZED VIEW",
5919 q.name
5920 )));
5921 }
5922 let started = std::time::Instant::now();
5924 let now_ms = std::time::SystemTime::now()
5925 .duration_since(std::time::UNIX_EPOCH)
5926 .map(|d| d.as_millis() as u64)
5927 .unwrap_or(0);
5928 match self.execute_query_expr((*view.query).clone()) {
5929 Ok(inner_result) => {
5930 let entities =
5937 view_records_to_entities(&q.name, &inner_result.result.records);
5938 let row_count = entities.len() as u64;
5939 let store = self.inner.db.store();
5940 let serialized_records = match store.refresh_collection(&q.name, entities) {
5941 Ok(records) => records,
5942 Err(err) => {
5943 let duration_ms = started.elapsed().as_millis() as u64;
5944 let msg = err.to_string();
5945 self.inner
5946 .materialized_views
5947 .write()
5948 .record_refresh_failure(
5949 &q.name,
5950 msg.clone(),
5951 duration_ms,
5952 now_ms,
5953 );
5954 return Err(RedDBError::Internal(format!(
5955 "REFRESH MATERIALIZED VIEW {}: {msg}",
5956 q.name
5957 )));
5958 }
5959 };
5960
5961 if let Some(ref primary) = self.inner.db.replication {
5967 let lsn = self.inner.cdc.emit(
5968 crate::replication::cdc::ChangeOperation::Refresh,
5969 &q.name,
5970 0,
5971 "refresh",
5972 );
5973 self.invalidate_result_cache_for_table(&q.name);
5974 let timestamp = std::time::SystemTime::now()
5975 .duration_since(std::time::UNIX_EPOCH)
5976 .unwrap_or_default()
5977 .as_millis() as u64;
5978 let record = ChangeRecord::for_refresh(
5979 lsn,
5980 timestamp,
5981 q.name.clone(),
5982 serialized_records,
5983 );
5984 let encoded = record.encode();
5985 primary.wal_buffer.append(record.lsn, encoded.clone());
5986 if let Some(spool) = &primary.logical_wal_spool {
5987 let _ = spool.append(record.lsn, &encoded);
5988 }
5989 }
5990
5991 let duration_ms = started.elapsed().as_millis() as u64;
5992 let serialized = format!("{:?}", inner_result.result);
5993 self.inner
5994 .materialized_views
5995 .write()
5996 .record_refresh_success(
5997 &q.name,
5998 serialized.into_bytes(),
5999 row_count,
6000 duration_ms,
6001 now_ms,
6002 );
6003 self.invalidate_result_cache();
6008 Ok(RuntimeQueryResult::ok_message(
6009 query.to_string(),
6010 &format!("materialized view {} refreshed", q.name),
6011 "refresh_materialized_view",
6012 ))
6013 }
6014 Err(err) => {
6015 let duration_ms = started.elapsed().as_millis() as u64;
6016 let msg = err.to_string();
6017 self.inner
6018 .materialized_views
6019 .write()
6020 .record_refresh_failure(&q.name, msg.clone(), duration_ms, now_ms);
6021 Err(err)
6022 }
6023 }
6024 }
6025 QueryExpr::CreatePolicy(ref q) => {
6032 let key = (q.table.clone(), q.name.clone());
6033 self.inner
6034 .rls_policies
6035 .write()
6036 .insert(key, Arc::new(q.clone()));
6037 self.invalidate_plan_cache();
6038 self.schema_vocabulary_apply(
6042 crate::runtime::schema_vocabulary::DdlEvent::CreatePolicy {
6043 collection: q.table.clone(),
6044 policy: q.name.clone(),
6045 },
6046 );
6047 Ok(RuntimeQueryResult::ok_message(
6048 query.to_string(),
6049 &format!("policy {} on {} created", q.name, q.table),
6050 "create_policy",
6051 ))
6052 }
6053 QueryExpr::DropPolicy(ref q) => {
6054 let removed = self
6055 .inner
6056 .rls_policies
6057 .write()
6058 .remove(&(q.table.clone(), q.name.clone()))
6059 .is_some();
6060 if !removed && !q.if_exists {
6061 return Err(RedDBError::Internal(format!(
6062 "policy {} on {} does not exist",
6063 q.name, q.table
6064 )));
6065 }
6066 self.invalidate_plan_cache();
6067 self.schema_vocabulary_apply(
6070 crate::runtime::schema_vocabulary::DdlEvent::DropPolicy {
6071 collection: q.table.clone(),
6072 policy: q.name.clone(),
6073 },
6074 );
6075 Ok(RuntimeQueryResult::ok_message(
6076 query.to_string(),
6077 &format!("policy {} on {} dropped", q.name, q.table),
6078 "drop_policy",
6079 ))
6080 }
6081 QueryExpr::CreateServer(ref q) => {
6092 use crate::storage::fdw::FdwOptions;
6093 let registry = Arc::clone(&self.inner.foreign_tables);
6094 if registry.server(&q.name).is_some() {
6095 if q.if_not_exists {
6096 return Ok(RuntimeQueryResult::ok_message(
6097 query.to_string(),
6098 &format!("server {} already exists — skipped", q.name),
6099 "create_server",
6100 ));
6101 }
6102 return Err(RedDBError::Internal(format!(
6103 "server {} already exists",
6104 q.name
6105 )));
6106 }
6107 let mut opts = FdwOptions::new();
6108 for (k, v) in &q.options {
6109 opts.values.insert(k.clone(), v.clone());
6110 }
6111 registry
6112 .create_server(&q.name, &q.wrapper, opts)
6113 .map_err(|e| RedDBError::Internal(e.to_string()))?;
6114 Ok(RuntimeQueryResult::ok_message(
6115 query.to_string(),
6116 &format!("server {} created (wrapper {})", q.name, q.wrapper),
6117 "create_server",
6118 ))
6119 }
6120 QueryExpr::DropServer(ref q) => {
6121 let existed = self.inner.foreign_tables.drop_server(&q.name);
6122 if !existed && !q.if_exists {
6123 return Err(RedDBError::Internal(format!(
6124 "server {} does not exist",
6125 q.name
6126 )));
6127 }
6128 Ok(RuntimeQueryResult::ok_message(
6129 query.to_string(),
6130 &format!(
6131 "server {} dropped{}",
6132 q.name,
6133 if q.cascade { " (cascade)" } else { "" }
6134 ),
6135 "drop_server",
6136 ))
6137 }
6138 QueryExpr::CreateForeignTable(ref q) => {
6139 use crate::storage::fdw::{FdwOptions, ForeignColumn, ForeignTable};
6140 let registry = Arc::clone(&self.inner.foreign_tables);
6141 if registry.foreign_table(&q.name).is_some() {
6142 if q.if_not_exists {
6143 return Ok(RuntimeQueryResult::ok_message(
6144 query.to_string(),
6145 &format!("foreign table {} already exists — skipped", q.name),
6146 "create_foreign_table",
6147 ));
6148 }
6149 return Err(RedDBError::Internal(format!(
6150 "foreign table {} already exists",
6151 q.name
6152 )));
6153 }
6154 let mut opts = FdwOptions::new();
6155 for (k, v) in &q.options {
6156 opts.values.insert(k.clone(), v.clone());
6157 }
6158 let columns: Vec<ForeignColumn> = q
6159 .columns
6160 .iter()
6161 .map(|c| ForeignColumn {
6162 name: c.name.clone(),
6163 data_type: c.data_type.clone(),
6164 not_null: c.not_null,
6165 })
6166 .collect();
6167 registry
6168 .create_foreign_table(ForeignTable {
6169 name: q.name.clone(),
6170 server_name: q.server.clone(),
6171 columns,
6172 options: opts,
6173 })
6174 .map_err(|e| RedDBError::Internal(e.to_string()))?;
6175 self.invalidate_plan_cache();
6176 Ok(RuntimeQueryResult::ok_message(
6177 query.to_string(),
6178 &format!("foreign table {} created (server {})", q.name, q.server),
6179 "create_foreign_table",
6180 ))
6181 }
6182 QueryExpr::DropForeignTable(ref q) => {
6183 let existed = self.inner.foreign_tables.drop_foreign_table(&q.name);
6184 if !existed && !q.if_exists {
6185 return Err(RedDBError::Internal(format!(
6186 "foreign table {} does not exist",
6187 q.name
6188 )));
6189 }
6190 self.invalidate_plan_cache();
6191 Ok(RuntimeQueryResult::ok_message(
6192 query.to_string(),
6193 &format!("foreign table {} dropped", q.name),
6194 "drop_foreign_table",
6195 ))
6196 }
6197 QueryExpr::CopyFrom(ref q) => {
6203 use crate::storage::import::{CsvConfig, CsvImporter};
6204 let store = self.inner.db.store();
6205 let cfg = CsvConfig {
6206 collection: q.table.clone(),
6207 has_header: q.has_header,
6208 delimiter: q.delimiter.map(|c| c as u8).unwrap_or(b','),
6209 ..CsvConfig::default()
6210 };
6211 let importer = CsvImporter::new(cfg);
6212 let stats = importer
6213 .import_file(&q.path, store.as_ref())
6214 .map_err(|e| RedDBError::Internal(format!("COPY failed: {e}")))?;
6215 self.note_table_write(&q.table);
6217 Ok(RuntimeQueryResult::ok_message(
6218 query.to_string(),
6219 &format!(
6220 "COPY imported {} rows into {} ({} errors skipped, {}ms)",
6221 stats.records_imported, q.table, stats.errors_skipped, stats.duration_ms
6222 ),
6223 "copy_from",
6224 ))
6225 }
6226 QueryExpr::MaintenanceCommand(ref cmd) => {
6242 use crate::storage::query::ast::MaintenanceCommand as Mc;
6243 let store = self.inner.db.store();
6244 let (kind, msg) = match cmd {
6245 Mc::Analyze { target } => {
6246 let targets: Vec<String> = match target {
6247 Some(t) => vec![t.clone()],
6248 None => store.list_collections(),
6249 };
6250 for t in &targets {
6251 self.refresh_table_planner_stats(t);
6252 }
6253 (
6254 "analyze",
6255 format!("ANALYZE refreshed stats for {} table(s)", targets.len()),
6256 )
6257 }
6258 Mc::Vacuum { target, full } => {
6259 let targets: Vec<String> = match target {
6260 Some(t) => vec![t.clone()],
6261 None => store.list_collections(),
6262 };
6263 let cutoff_xid = self.mvcc_vacuum_cutoff_xid();
6264 let mut vacuum_stats =
6265 crate::storage::unified::store::MvccVacuumStats::default();
6266 for t in &targets {
6267 let stats = store.vacuum_mvcc_history(t, cutoff_xid).map_err(|e| {
6268 RedDBError::Internal(format!(
6269 "VACUUM MVCC history failed for {t}: {e}"
6270 ))
6271 })?;
6272 if stats.reclaimed_versions > 0 {
6273 self.rebuild_runtime_indexes_for_table(t)?;
6274 }
6275 vacuum_stats.add(&stats);
6276 }
6277 self.inner.snapshot_manager.prune_aborted(cutoff_xid);
6278 for t in &targets {
6280 self.refresh_table_planner_stats(t);
6281 }
6282 let persisted = if *full {
6286 match store.persist() {
6287 Ok(()) => true,
6288 Err(e) => {
6289 return Err(RedDBError::Internal(format!(
6290 "VACUUM FULL persist failed: {e:?}"
6291 )));
6292 }
6293 }
6294 } else {
6295 false
6296 };
6297 self.invalidate_result_cache();
6299 (
6300 "vacuum",
6301 format!(
6302 "VACUUM{} processed {} table(s): scanned_versions={}, retained_versions={}, reclaimed_versions={}, retained_history_versions={}, reclaimed_history_versions={}, retained_tombstones={}, reclaimed_tombstones={}{}",
6303 if *full { " FULL" } else { "" },
6304 targets.len(),
6305 vacuum_stats.scanned_versions,
6306 vacuum_stats.retained_versions,
6307 vacuum_stats.reclaimed_versions,
6308 vacuum_stats.retained_history_versions,
6309 vacuum_stats.reclaimed_history_versions,
6310 vacuum_stats.retained_tombstones,
6311 vacuum_stats.reclaimed_tombstones,
6312 if persisted {
6313 " (pages flushed to disk)"
6314 } else {
6315 ""
6316 }
6317 ),
6318 )
6319 }
6320 };
6321 Ok(RuntimeQueryResult::ok_message(
6322 query.to_string(),
6323 &msg,
6324 kind,
6325 ))
6326 }
6327 QueryExpr::Grant(ref g) => self.execute_grant_statement(query, g),
6334 QueryExpr::Revoke(ref r) => self.execute_revoke_statement(query, r),
6335 QueryExpr::AlterUser(ref a) => self.execute_alter_user_statement(query, a),
6336 QueryExpr::CreateIamPolicy { ref id, ref json } => {
6337 self.execute_create_iam_policy(query, id, json)
6338 }
6339 QueryExpr::DropIamPolicy { ref id } => self.execute_drop_iam_policy(query, id),
6340 QueryExpr::AttachPolicy {
6341 ref policy_id,
6342 ref principal,
6343 } => self.execute_attach_policy(query, policy_id, principal),
6344 QueryExpr::DetachPolicy {
6345 ref policy_id,
6346 ref principal,
6347 } => self.execute_detach_policy(query, policy_id, principal),
6348 QueryExpr::ShowPolicies { ref filter } => {
6349 self.execute_show_policies(query, filter.as_ref())
6350 }
6351 QueryExpr::ShowEffectivePermissions {
6352 ref user,
6353 ref resource,
6354 } => self.execute_show_effective_permissions(query, user, resource.as_ref()),
6355 QueryExpr::SimulatePolicy {
6356 ref user,
6357 ref action,
6358 ref resource,
6359 } => self.execute_simulate_policy(query, user, action, resource),
6360 QueryExpr::CreateMigration(ref q) => self.execute_create_migration(query, q),
6361 QueryExpr::ApplyMigration(ref q) => self.execute_apply_migration(query, q),
6362 QueryExpr::RollbackMigration(ref q) => self.execute_rollback_migration(query, q),
6363 QueryExpr::ExplainMigration(ref q) => self.execute_explain_migration(query, q),
6364 };
6365
6366 let mut query_result = query_result;
6370 if let Ok(ref mut result) = query_result {
6371 if result.statement_type == "select" {
6372 self.apply_secret_decryption(result);
6373 }
6374 }
6375
6376 if let Ok(ref result) = query_result {
6383 frame.write_result_cache(self, result, result_cache_scopes);
6384 }
6385
6386 query_result
6387 }
6388
6389 pub fn materialized_view_metadata(
6393 &self,
6394 ) -> Vec<crate::storage::cache::result::MaterializedViewMetadata> {
6395 let store = self.inner.db.store();
6402 let mut entries = self.inner.materialized_views.read().metadata();
6403 for entry in &mut entries {
6404 if let Some(manager) = store.get_collection(&entry.name) {
6405 entry.current_row_count = manager.count() as u64;
6406 }
6407 }
6408 entries
6409 }
6410
6411 pub(crate) fn retention_sweeper_snapshot(
6422 &self,
6423 ) -> Vec<(String, crate::runtime::retention_sweeper::SweeperState)> {
6424 self.inner.retention_sweeper.read().snapshot()
6425 }
6426
6427 pub fn sweep_retention_tick(&self, batch_size: usize) {
6449 if batch_size == 0 {
6450 return;
6451 }
6452 let now_ms = std::time::SystemTime::now()
6453 .duration_since(std::time::UNIX_EPOCH)
6454 .map(|d| d.as_millis() as u64)
6455 .unwrap_or(0);
6456
6457 let store = self.inner.db.store();
6458 let collections = store.list_collections();
6459 for name in collections {
6460 let Some(contract) = self.inner.db.collection_contract(&name) else {
6461 continue;
6462 };
6463 let Some(retention_ms) = contract.retention_duration_ms else {
6464 continue;
6465 };
6466 let Some(ts_column) =
6467 crate::runtime::retention_filter::resolve_timestamp_column(&contract)
6468 else {
6469 continue;
6470 };
6471 let Some(manager) = store.get_collection(&name) else {
6472 continue;
6473 };
6474 let cutoff = (now_ms as i64).saturating_sub(retention_ms as i64);
6475
6476 let mut expired_ts: Vec<i64> = Vec::new();
6484 manager.for_each_entity(|entity| {
6485 let ts = match ts_column.as_str() {
6486 "created_at" => Some(entity.created_at as i64),
6487 "updated_at" => Some(entity.updated_at as i64),
6488 other => entity
6489 .data
6490 .as_row()
6491 .and_then(|row| row.get_field(other))
6492 .and_then(|v| match v {
6493 crate::storage::schema::Value::TimestampMs(t) => Some(*t),
6494 crate::storage::schema::Value::Timestamp(t) => {
6495 Some(t.saturating_mul(1_000))
6496 }
6497 crate::storage::schema::Value::BigInt(t) => Some(*t),
6498 crate::storage::schema::Value::UnsignedInteger(t) => {
6499 i64::try_from(*t).ok()
6500 }
6501 crate::storage::schema::Value::Integer(t) => Some(*t),
6502 _ => None,
6503 }),
6504 };
6505 if let Some(t) = ts {
6506 if t < cutoff {
6507 expired_ts.push(t);
6508 }
6509 }
6510 true
6511 });
6512
6513 let total_expired = expired_ts.len() as u64;
6514 if total_expired == 0 {
6515 self.inner
6516 .retention_sweeper
6517 .write()
6518 .record_tick(&name, 0, 0, now_ms);
6519 continue;
6520 }
6521
6522 let (effective_cutoff, pending) = if (total_expired as usize) <= batch_size {
6523 (cutoff, 0u64)
6524 } else {
6525 expired_ts.sort_unstable();
6529 let nth = expired_ts[batch_size - 1];
6530 (
6531 nth.saturating_add(1),
6532 total_expired.saturating_sub(batch_size as u64),
6533 )
6534 };
6535
6536 let stmt = format!(
6537 "DELETE FROM {} WHERE {} < {}",
6538 name, ts_column, effective_cutoff
6539 );
6540 let deleted = match self.execute_query(&stmt) {
6541 Ok(r) => r.affected_rows,
6542 Err(_) => 0,
6543 };
6544
6545 self.inner
6546 .retention_sweeper
6547 .write()
6548 .record_tick(&name, deleted, pending, now_ms);
6549 }
6550 }
6551
6552 pub fn refresh_due_materialized_views(&self) {
6553 let due = {
6554 let mut cache = self.inner.materialized_views.write();
6555 cache.claim_due_at(std::time::Instant::now())
6556 };
6557 for name in due {
6558 let stmt = format!("REFRESH MATERIALIZED VIEW {}", name);
6565 let _ = self.execute_query(&stmt);
6566 }
6567 }
6568
6569 pub fn execute_query_expr(&self, expr: QueryExpr) -> RedDBResult<RuntimeQueryResult> {
6575 let _config_snapshot_guard = ConfigSnapshotGuard::install(Arc::clone(&self.inner.db));
6576 let _secret_store_guard = SecretStoreGuard::install(self.inner.auth_store.read().clone());
6577 let expr = self.rewrite_view_refs(expr);
6581
6582 self.validate_model_operations_before_auth(&expr)?;
6583 if let Err(err) = self.check_query_privilege(&expr) {
6587 return Err(RedDBError::Query(format!("permission denied: {err}")));
6588 }
6589
6590 let statement = query_expr_name(&expr);
6591 let mode = detect_mode(statement);
6592 let query_str = statement;
6593
6594 let result = self.dispatch_expr(expr, query_str, mode)?;
6595 let mut r = result;
6596 if r.statement_type == "select" {
6597 self.apply_secret_decryption(&mut r);
6598 }
6599 Ok(r)
6600 }
6601
6602 pub(super) fn validate_model_operations_before_auth(
6603 &self,
6604 expr: &QueryExpr,
6605 ) -> RedDBResult<()> {
6606 use crate::catalog::CollectionModel;
6607 use crate::runtime::ddl::polymorphic_resolver;
6608 use crate::storage::query::ast::KvCommand;
6609
6610 let system_schema_target = match expr {
6611 QueryExpr::DropTable(q) => Some(q.name.as_str()),
6612 QueryExpr::DropGraph(q) => Some(q.name.as_str()),
6613 QueryExpr::DropVector(q) => Some(q.name.as_str()),
6614 QueryExpr::DropDocument(q) => Some(q.name.as_str()),
6615 QueryExpr::DropKv(q) => Some(q.name.as_str()),
6616 QueryExpr::DropCollection(q) => Some(q.name.as_str()),
6617 QueryExpr::Truncate(q) => Some(q.name.as_str()),
6618 _ => None,
6619 };
6620 if system_schema_target.is_some_and(crate::runtime::impl_ddl::is_system_schema_name) {
6621 return Err(RedDBError::Query("system schema is read-only".to_string()));
6622 }
6623
6624 let expected = match expr {
6625 QueryExpr::DropTable(q) => Some((q.name.as_str(), CollectionModel::Table)),
6626 QueryExpr::DropGraph(q) => Some((q.name.as_str(), CollectionModel::Graph)),
6627 QueryExpr::DropVector(q) => Some((q.name.as_str(), CollectionModel::Vector)),
6628 QueryExpr::DropDocument(q) => Some((q.name.as_str(), CollectionModel::Document)),
6629 QueryExpr::DropKv(q) => Some((q.name.as_str(), q.model)),
6630 QueryExpr::DropCollection(q) => q.model.map(|model| (q.name.as_str(), model)),
6631 QueryExpr::Truncate(q) => q.model.map(|model| (q.name.as_str(), model)),
6632 QueryExpr::KvCommand(cmd) => {
6633 let (collection, model) = match cmd {
6634 KvCommand::Put {
6635 collection, model, ..
6636 }
6637 | KvCommand::Get {
6638 collection, model, ..
6639 }
6640 | KvCommand::Incr {
6641 collection, model, ..
6642 }
6643 | KvCommand::Cas {
6644 collection, model, ..
6645 }
6646 | KvCommand::Delete {
6647 collection, model, ..
6648 } => (collection.as_str(), *model),
6649 KvCommand::Rotate { collection, .. }
6650 | KvCommand::History { collection, .. }
6651 | KvCommand::List { collection, .. }
6652 | KvCommand::Purge { collection, .. } => {
6653 (collection.as_str(), CollectionModel::Vault)
6654 }
6655 KvCommand::InvalidateTags { collection, .. } => {
6656 (collection.as_str(), CollectionModel::Kv)
6657 }
6658 KvCommand::Watch {
6659 collection, model, ..
6660 } => (collection.as_str(), *model),
6661 KvCommand::Unseal { collection, .. } => {
6662 (collection.as_str(), CollectionModel::Vault)
6663 }
6664 };
6665 Some((collection, model))
6666 }
6667 QueryExpr::ConfigCommand(cmd) => {
6668 self.validate_config_command_before_auth(cmd)?;
6669 None
6670 }
6671 _ => None,
6672 };
6673
6674 let Some((name, expected_model)) = expected else {
6675 return Ok(());
6676 };
6677 let snapshot = self.inner.db.catalog_model_snapshot();
6678 let Some(actual_model) = snapshot
6679 .collections
6680 .iter()
6681 .find(|collection| collection.name == name)
6682 .map(|collection| collection.declared_model.unwrap_or(collection.model))
6683 else {
6684 return Ok(());
6685 };
6686 polymorphic_resolver::ensure_model_match(expected_model, actual_model)
6687 }
6688
6689 pub(super) fn rewrite_view_refs(&self, expr: QueryExpr) -> QueryExpr {
6694 if self.inner.views.read().is_empty() {
6696 return expr;
6697 }
6698 self.rewrite_view_refs_inner(expr)
6699 }
6700
6701 fn rewrite_view_refs_inner(&self, expr: QueryExpr) -> QueryExpr {
6702 use crate::storage::query::ast::{Filter, TableSource};
6703 match expr {
6704 QueryExpr::Table(mut tq) => {
6705 if let Some(TableSource::Subquery(body)) = tq.source.take() {
6711 tq.source = Some(TableSource::Subquery(Box::new(
6712 self.rewrite_view_refs_inner(*body),
6713 )));
6714 return QueryExpr::Table(tq);
6715 }
6716
6717 let maybe_view = {
6721 let views = self.inner.views.read();
6722 views.get(&tq.table).cloned()
6723 };
6724 let Some(view) = maybe_view else {
6725 return QueryExpr::Table(tq);
6726 };
6727
6728 if view.materialized {
6734 return QueryExpr::Table(tq);
6735 }
6736
6737 let inner_expr = self.rewrite_view_refs_inner((*view.query).clone());
6741
6742 match inner_expr {
6750 QueryExpr::Table(mut inner_tq) => {
6751 if let Some(outer_filter) = tq.filter.take() {
6752 inner_tq.filter = Some(match inner_tq.filter.take() {
6753 Some(existing) => {
6754 Filter::And(Box::new(existing), Box::new(outer_filter))
6755 }
6756 None => outer_filter,
6757 });
6758 inner_tq.where_expr = inner_tq
6766 .filter
6767 .as_ref()
6768 .map(crate::storage::query::sql_lowering::filter_to_expr);
6769 }
6770 if let Some(outer_limit) = tq.limit {
6771 inner_tq.limit = Some(match inner_tq.limit {
6772 Some(existing) => existing.min(outer_limit),
6773 None => outer_limit,
6774 });
6775 }
6776 if let Some(outer_offset) = tq.offset {
6777 inner_tq.offset = Some(match inner_tq.offset {
6778 Some(existing) => existing + outer_offset,
6779 None => outer_offset,
6780 });
6781 }
6782 QueryExpr::Table(inner_tq)
6783 }
6784 other => other,
6785 }
6786 }
6787 QueryExpr::Join(mut jq) => {
6788 jq.left = Box::new(self.rewrite_view_refs_inner(*jq.left));
6789 jq.right = Box::new(self.rewrite_view_refs_inner(*jq.right));
6790 QueryExpr::Join(jq)
6791 }
6792 other => other,
6795 }
6796 }
6797
6798 fn authorize_relational_table_select(
6802 &self,
6803 mut table: TableQuery,
6804 frame: &dyn super::statement_frame::ReadFrame,
6805 ) -> RedDBResult<Option<TableQuery>> {
6806 if let Some(TableSource::Subquery(inner)) = table.source.take() {
6807 let authorized_inner = self.authorize_relational_select_expr(*inner, frame)?;
6808 table.source = Some(TableSource::Subquery(Box::new(authorized_inner)));
6809 return Ok(Some(table));
6810 }
6811
6812 self.check_table_column_projection_authz(&table, frame)?;
6813
6814 if self.inner.rls_enabled_tables.read().contains(&table.table) {
6815 return Ok(inject_rls_filters(self, frame, table));
6816 }
6817
6818 Ok(Some(table))
6819 }
6820
6821 fn authorize_relational_join_select(
6822 &self,
6823 mut join: JoinQuery,
6824 frame: &dyn super::statement_frame::ReadFrame,
6825 ) -> RedDBResult<Option<JoinQuery>> {
6826 self.check_join_column_projection_authz(&join, frame)?;
6827 join.left = Box::new(self.authorize_relational_join_child(*join.left, frame)?);
6828 join.right = Box::new(self.authorize_relational_join_child(*join.right, frame)?);
6829 Ok(inject_rls_into_join(self, frame, join))
6830 }
6831
6832 fn authorize_relational_join_child(
6833 &self,
6834 expr: QueryExpr,
6835 frame: &dyn super::statement_frame::ReadFrame,
6836 ) -> RedDBResult<QueryExpr> {
6837 match expr {
6838 QueryExpr::Table(mut table) => {
6839 if let Some(TableSource::Subquery(inner)) = table.source.take() {
6840 let authorized_inner = self.authorize_relational_select_expr(*inner, frame)?;
6841 table.source = Some(TableSource::Subquery(Box::new(authorized_inner)));
6842 }
6843 Ok(QueryExpr::Table(table))
6844 }
6845 QueryExpr::Join(join) => self
6846 .authorize_relational_join_select(join, frame)?
6847 .map(QueryExpr::Join)
6848 .ok_or_else(|| {
6849 RedDBError::Query("permission denied: RLS denied relational subquery".into())
6850 }),
6851 other => Ok(other),
6852 }
6853 }
6854
6855 fn authorize_relational_select_expr(
6856 &self,
6857 expr: QueryExpr,
6858 frame: &dyn super::statement_frame::ReadFrame,
6859 ) -> RedDBResult<QueryExpr> {
6860 match expr {
6861 QueryExpr::Table(table) => self
6862 .authorize_relational_table_select(table, frame)?
6863 .map(QueryExpr::Table)
6864 .ok_or_else(|| {
6865 RedDBError::Query("permission denied: RLS denied relational subquery".into())
6866 }),
6867 QueryExpr::Join(join) => self
6868 .authorize_relational_join_select(join, frame)?
6869 .map(QueryExpr::Join)
6870 .ok_or_else(|| {
6871 RedDBError::Query("permission denied: RLS denied relational subquery".into())
6872 }),
6873 other => Ok(other),
6874 }
6875 }
6876
6877 fn check_table_column_projection_authz(
6878 &self,
6879 table: &TableQuery,
6880 frame: &dyn super::statement_frame::ReadFrame,
6881 ) -> RedDBResult<()> {
6882 let Some((username, role)) = frame.identity() else {
6883 return Ok(());
6884 };
6885 let Some(auth_store) = self.inner.auth_store.read().clone() else {
6886 return Ok(());
6887 };
6888
6889 let columns = self.resolved_table_projection_columns(table)?;
6890 let request = ColumnAccessRequest::select(table.table.clone(), columns);
6891 let principal = UserId::from_parts(frame.effective_scope(), username);
6892 let ctx = runtime_iam_context(
6893 role,
6894 frame.effective_scope(),
6895 auth_store.principal_is_system_owned(&principal),
6896 );
6897 let outcome = auth_store.check_column_projection_authz(&principal, &request, &ctx);
6898 if outcome.allowed() {
6899 return Ok(());
6900 }
6901
6902 if let Some(denied) = outcome.first_denied_column() {
6903 return Err(RedDBError::Query(format!(
6904 "permission denied: principal=`{username}` cannot select column `{}`",
6905 denied.resource.name
6906 )));
6907 }
6908 Err(RedDBError::Query(format!(
6909 "permission denied: principal=`{username}` cannot select table `{}`",
6910 table.table
6911 )))
6912 }
6913
6914 fn check_join_column_projection_authz(
6915 &self,
6916 join: &JoinQuery,
6917 frame: &dyn super::statement_frame::ReadFrame,
6918 ) -> RedDBResult<()> {
6919 let mut by_table: HashMap<String, BTreeSet<String>> = HashMap::new();
6920 let projections = crate::storage::query::sql_lowering::effective_join_projections(join);
6921 self.collect_join_projection_columns(join, &projections, &mut by_table)?;
6922
6923 for (table, columns) in by_table {
6924 let query = TableQuery {
6925 table,
6926 source: None,
6927 alias: None,
6928 select_items: Vec::new(),
6929 columns: columns.into_iter().map(Projection::Column).collect(),
6930 where_expr: None,
6931 filter: None,
6932 group_by_exprs: Vec::new(),
6933 group_by: Vec::new(),
6934 having_expr: None,
6935 having: None,
6936 order_by: Vec::new(),
6937 limit: None,
6938 limit_param: None,
6939 offset: None,
6940 offset_param: None,
6941 expand: None,
6942 as_of: None,
6943 sessionize: None,
6944 };
6945 self.check_table_column_projection_authz(&query, frame)?;
6946 }
6947 Ok(())
6948 }
6949
6950 fn collect_join_projection_columns(
6951 &self,
6952 join: &JoinQuery,
6953 projections: &[Projection],
6954 out: &mut HashMap<String, BTreeSet<String>>,
6955 ) -> RedDBResult<()> {
6956 let left = table_side_context(join.left.as_ref());
6957 let right = table_side_context(join.right.as_ref());
6958
6959 if projections
6960 .iter()
6961 .any(|projection| matches!(projection, Projection::All))
6962 {
6963 for side in [left.as_ref(), right.as_ref()].into_iter().flatten() {
6964 out.entry(side.table.clone())
6965 .or_default()
6966 .extend(self.table_all_projection_columns(&side.table)?);
6967 }
6968 return Ok(());
6969 }
6970
6971 for projection in projections {
6972 collect_projection_columns_for_join_side(
6973 projection,
6974 left.as_ref(),
6975 right.as_ref(),
6976 out,
6977 )?;
6978 }
6979 Ok(())
6980 }
6981
6982 fn resolved_table_projection_columns(&self, table: &TableQuery) -> RedDBResult<Vec<String>> {
6983 let projections = crate::storage::query::sql_lowering::effective_table_projections(table);
6984 if projections
6985 .iter()
6986 .any(|projection| matches!(projection, Projection::All))
6987 {
6988 return self.table_all_projection_columns(&table.table);
6989 }
6990
6991 let mut columns = BTreeSet::new();
6992 for projection in &projections {
6993 collect_projection_columns_for_table(
6994 projection,
6995 &table.table,
6996 table.alias.as_deref(),
6997 &mut columns,
6998 );
6999 }
7000 Ok(columns.into_iter().collect())
7001 }
7002
7003 fn table_all_projection_columns(&self, table: &str) -> RedDBResult<Vec<String>> {
7004 if let Some(contract) = self.inner.db.collection_contract_arc(table) {
7005 let columns: Vec<String> = contract
7006 .declared_columns
7007 .iter()
7008 .map(|column| column.name.clone())
7009 .collect();
7010 if !columns.is_empty() {
7011 return Ok(columns);
7012 }
7013 }
7014
7015 let records = scan_runtime_table_source_records_limited(&self.inner.db, table, Some(1))?;
7016 Ok(records
7017 .first()
7018 .map(|record| {
7019 record
7020 .column_names()
7021 .into_iter()
7022 .map(|column| column.to_string())
7023 .collect()
7024 })
7025 .unwrap_or_default())
7026 }
7027
7028 fn resolve_table_expr_subqueries(
7029 &self,
7030 mut table: TableQuery,
7031 frame: &dyn super::statement_frame::ReadFrame,
7032 ) -> RedDBResult<TableQuery> {
7033 if let Some(TableSource::Subquery(inner)) = table.source.take() {
7034 let inner = self.resolve_select_expr_subqueries(*inner, frame)?;
7035 table.source = Some(TableSource::Subquery(Box::new(inner)));
7036 }
7037
7038 let outer_scopes = relation_scopes_for_query(&QueryExpr::Table(table.clone()));
7039 for item in &mut table.select_items {
7040 if let crate::storage::query::ast::SelectItem::Expr { expr, .. } = item {
7041 *expr = self.resolve_expr_subqueries(expr.clone(), &outer_scopes, frame)?;
7042 }
7043 }
7044 if let Some(where_expr) = table.where_expr.take() {
7045 table.where_expr =
7046 Some(self.resolve_expr_subqueries(where_expr, &outer_scopes, frame)?);
7047 table.filter = None;
7048 }
7049 if let Some(having_expr) = table.having_expr.take() {
7050 table.having_expr =
7051 Some(self.resolve_expr_subqueries(having_expr, &outer_scopes, frame)?);
7052 table.having = None;
7053 }
7054 for expr in &mut table.group_by_exprs {
7055 *expr = self.resolve_expr_subqueries(expr.clone(), &outer_scopes, frame)?;
7056 }
7057 for clause in &mut table.order_by {
7058 if let Some(expr) = clause.expr.take() {
7059 clause.expr = Some(self.resolve_expr_subqueries(expr, &outer_scopes, frame)?);
7060 }
7061 }
7062 Ok(table)
7063 }
7064
7065 fn resolve_select_expr_subqueries(
7066 &self,
7067 expr: QueryExpr,
7068 frame: &dyn super::statement_frame::ReadFrame,
7069 ) -> RedDBResult<QueryExpr> {
7070 match expr {
7071 QueryExpr::Table(table) => self
7072 .resolve_table_expr_subqueries(table, frame)
7073 .map(QueryExpr::Table),
7074 QueryExpr::Join(mut join) => {
7075 join.left = Box::new(self.resolve_select_expr_subqueries(*join.left, frame)?);
7076 join.right = Box::new(self.resolve_select_expr_subqueries(*join.right, frame)?);
7077 Ok(QueryExpr::Join(join))
7078 }
7079 other => Ok(other),
7080 }
7081 }
7082
7083 fn resolve_expr_subqueries(
7084 &self,
7085 expr: crate::storage::query::ast::Expr,
7086 outer_scopes: &[String],
7087 frame: &dyn super::statement_frame::ReadFrame,
7088 ) -> RedDBResult<crate::storage::query::ast::Expr> {
7089 use crate::storage::query::ast::Expr;
7090
7091 match expr {
7092 Expr::Subquery { query, span } => {
7093 let values = self.execute_expr_subquery_values(query, outer_scopes, frame)?;
7094 if values.len() > 1 {
7095 return Err(RedDBError::Query(
7096 "scalar subquery returned more than one row".to_string(),
7097 ));
7098 }
7099 Ok(Expr::Literal {
7100 value: values.into_iter().next().unwrap_or(Value::Null),
7101 span,
7102 })
7103 }
7104 Expr::BinaryOp { op, lhs, rhs, span } => Ok(Expr::BinaryOp {
7105 op,
7106 lhs: Box::new(self.resolve_expr_subqueries(*lhs, outer_scopes, frame)?),
7107 rhs: Box::new(self.resolve_expr_subqueries(*rhs, outer_scopes, frame)?),
7108 span,
7109 }),
7110 Expr::UnaryOp { op, operand, span } => Ok(Expr::UnaryOp {
7111 op,
7112 operand: Box::new(self.resolve_expr_subqueries(*operand, outer_scopes, frame)?),
7113 span,
7114 }),
7115 Expr::Cast {
7116 inner,
7117 target,
7118 span,
7119 } => Ok(Expr::Cast {
7120 inner: Box::new(self.resolve_expr_subqueries(*inner, outer_scopes, frame)?),
7121 target,
7122 span,
7123 }),
7124 Expr::FunctionCall { name, args, span } => {
7125 let args = args
7126 .into_iter()
7127 .map(|arg| self.resolve_expr_subqueries(arg, outer_scopes, frame))
7128 .collect::<RedDBResult<Vec<_>>>()?;
7129 Ok(Expr::FunctionCall { name, args, span })
7130 }
7131 Expr::Case {
7132 branches,
7133 else_,
7134 span,
7135 } => {
7136 let branches = branches
7137 .into_iter()
7138 .map(|(cond, value)| {
7139 Ok((
7140 self.resolve_expr_subqueries(cond, outer_scopes, frame)?,
7141 self.resolve_expr_subqueries(value, outer_scopes, frame)?,
7142 ))
7143 })
7144 .collect::<RedDBResult<Vec<_>>>()?;
7145 let else_ = else_
7146 .map(|expr| self.resolve_expr_subqueries(*expr, outer_scopes, frame))
7147 .transpose()?
7148 .map(Box::new);
7149 Ok(Expr::Case {
7150 branches,
7151 else_,
7152 span,
7153 })
7154 }
7155 Expr::IsNull {
7156 operand,
7157 negated,
7158 span,
7159 } => Ok(Expr::IsNull {
7160 operand: Box::new(self.resolve_expr_subqueries(*operand, outer_scopes, frame)?),
7161 negated,
7162 span,
7163 }),
7164 Expr::InList {
7165 target,
7166 values,
7167 negated,
7168 span,
7169 } => {
7170 let target =
7171 Box::new(self.resolve_expr_subqueries(*target, outer_scopes, frame)?);
7172 let mut resolved = Vec::new();
7173 for value in values {
7174 if let Expr::Subquery { query, .. } = value {
7175 resolved.extend(
7176 self.execute_expr_subquery_values(query, outer_scopes, frame)?
7177 .into_iter()
7178 .map(Expr::lit),
7179 );
7180 } else {
7181 resolved.push(self.resolve_expr_subqueries(value, outer_scopes, frame)?);
7182 }
7183 }
7184 Ok(Expr::InList {
7185 target,
7186 values: resolved,
7187 negated,
7188 span,
7189 })
7190 }
7191 Expr::Between {
7192 target,
7193 low,
7194 high,
7195 negated,
7196 span,
7197 } => Ok(Expr::Between {
7198 target: Box::new(self.resolve_expr_subqueries(*target, outer_scopes, frame)?),
7199 low: Box::new(self.resolve_expr_subqueries(*low, outer_scopes, frame)?),
7200 high: Box::new(self.resolve_expr_subqueries(*high, outer_scopes, frame)?),
7201 negated,
7202 span,
7203 }),
7204 other => Ok(other),
7205 }
7206 }
7207
7208 fn execute_expr_subquery_values(
7209 &self,
7210 subquery: crate::storage::query::ast::ExprSubquery,
7211 outer_scopes: &[String],
7212 frame: &dyn super::statement_frame::ReadFrame,
7213 ) -> RedDBResult<Vec<Value>> {
7214 let query = *subquery.query;
7215 if query_references_outer_scope(&query, outer_scopes) {
7216 return Err(RedDBError::Query(
7217 "NOT_YET_SUPPORTED: correlated subqueries are not supported yet; track follow-up issue #470-correlated-subqueries".to_string(),
7218 ));
7219 }
7220 let query = self.rewrite_view_refs(query);
7221 let query = self.resolve_select_expr_subqueries(query, frame)?;
7222 let query = self.authorize_relational_select_expr(query, frame)?;
7223 let result = match query {
7224 QueryExpr::Table(table) => {
7225 execute_runtime_table_query(&self.inner.db, &table, Some(&self.inner.index_store))?
7226 }
7227 QueryExpr::Join(join) => execute_runtime_join_query(&self.inner.db, &join)?,
7228 other => {
7229 return Err(RedDBError::Query(format!(
7230 "expression subquery must be a SELECT query, got {}",
7231 query_expr_name(&other)
7232 )))
7233 }
7234 };
7235 first_column_values(result)
7236 }
7237
7238 fn dispatch_expr(
7239 &self,
7240 expr: QueryExpr,
7241 query_str: &str,
7242 mode: QueryMode,
7243 ) -> RedDBResult<RuntimeQueryResult> {
7244 let statement = query_expr_name(&expr);
7245 match expr {
7246 QueryExpr::Graph(_) | QueryExpr::Path(_) => {
7247 Err(RedDBError::Query(
7249 "graph queries cannot be used as prepared statements".to_string(),
7250 ))
7251 }
7252 QueryExpr::Table(table) => {
7253 let scope = self.ai_scope();
7254 let table = self.resolve_table_expr_subqueries(
7255 table,
7256 &scope as &dyn super::statement_frame::ReadFrame,
7257 )?;
7258 if super::red_schema::is_virtual_table(&table.table) {
7259 return Ok(RuntimeQueryResult {
7260 query: query_str.to_string(),
7261 mode,
7262 statement,
7263 engine: "runtime-red-schema",
7264 result: super::red_schema::red_query(
7265 self,
7266 &table.table,
7267 &table,
7268 &scope as &dyn super::statement_frame::ReadFrame,
7269 )?,
7270 affected_rows: 0,
7271 statement_type: "select",
7272 });
7273 }
7274 let Some(table_with_rls) = self.authorize_relational_table_select(
7275 table,
7276 &scope as &dyn super::statement_frame::ReadFrame,
7277 )?
7278 else {
7279 return Ok(RuntimeQueryResult {
7280 query: query_str.to_string(),
7281 mode,
7282 statement,
7283 engine: "runtime-table-rls",
7284 result: crate::storage::query::unified::UnifiedResult::empty(),
7285 affected_rows: 0,
7286 statement_type: "select",
7287 });
7288 };
7289 Ok(RuntimeQueryResult {
7290 query: query_str.to_string(),
7291 mode,
7292 statement,
7293 engine: "runtime-table",
7294 result: execute_runtime_table_query(
7295 &self.inner.db,
7296 &table_with_rls,
7297 Some(&self.inner.index_store),
7298 )?,
7299 affected_rows: 0,
7300 statement_type: "select",
7301 })
7302 }
7303 QueryExpr::Join(join) => {
7304 let scope = self.ai_scope();
7305 let Some(join_with_rls) = self.authorize_relational_join_select(
7306 join,
7307 &scope as &dyn super::statement_frame::ReadFrame,
7308 )?
7309 else {
7310 return Ok(RuntimeQueryResult {
7311 query: query_str.to_string(),
7312 mode,
7313 statement,
7314 engine: "runtime-join-rls",
7315 result: crate::storage::query::unified::UnifiedResult::empty(),
7316 affected_rows: 0,
7317 statement_type: "select",
7318 });
7319 };
7320 Ok(RuntimeQueryResult {
7321 query: query_str.to_string(),
7322 mode,
7323 statement,
7324 engine: "runtime-join",
7325 result: execute_runtime_join_query(&self.inner.db, &join_with_rls)?,
7326 affected_rows: 0,
7327 statement_type: "select",
7328 })
7329 }
7330 QueryExpr::Vector(vector) => Ok(RuntimeQueryResult {
7331 query: query_str.to_string(),
7332 mode,
7333 statement,
7334 engine: "runtime-vector",
7335 result: execute_runtime_vector_query(&self.inner.db, &vector)?,
7336 affected_rows: 0,
7337 statement_type: "select",
7338 }),
7339 QueryExpr::Hybrid(hybrid) => Ok(RuntimeQueryResult {
7340 query: query_str.to_string(),
7341 mode,
7342 statement,
7343 engine: "runtime-hybrid",
7344 result: execute_runtime_hybrid_query(&self.inner.db, &hybrid)?,
7345 affected_rows: 0,
7346 statement_type: "select",
7347 }),
7348 QueryExpr::Insert(ref insert) if super::red_schema::is_virtual_table(&insert.table) => {
7349 Err(RedDBError::Query(
7350 super::red_schema::READ_ONLY_ERROR.to_string(),
7351 ))
7352 }
7353 QueryExpr::Update(ref update) if super::red_schema::is_virtual_table(&update.table) => {
7354 Err(RedDBError::Query(
7355 super::red_schema::READ_ONLY_ERROR.to_string(),
7356 ))
7357 }
7358 QueryExpr::Delete(ref delete) if super::red_schema::is_virtual_table(&delete.table) => {
7359 Err(RedDBError::Query(
7360 super::red_schema::READ_ONLY_ERROR.to_string(),
7361 ))
7362 }
7363 QueryExpr::Insert(ref insert) => self
7364 .with_deferred_store_wal_for_dml(self.insert_may_emit_events(insert), || {
7365 self.execute_insert(query_str, insert)
7366 }),
7367 QueryExpr::Update(ref update) => self
7368 .with_deferred_store_wal_for_dml(self.update_may_emit_events(update), || {
7369 self.execute_update(query_str, update)
7370 }),
7371 QueryExpr::Delete(ref delete) => self
7372 .with_deferred_store_wal_for_dml(self.delete_may_emit_events(delete), || {
7373 self.execute_delete(query_str, delete)
7374 }),
7375 QueryExpr::SearchCommand(ref cmd) => self.execute_search_command(query_str, cmd),
7376 QueryExpr::Ask(ref ask) => self.execute_ask(query_str, ask),
7377 _ => Err(RedDBError::Query(format!(
7378 "prepared-statement execution does not support {statement} statements"
7379 ))),
7380 }
7381 }
7382
7383 fn try_fast_entity_lookup(&self, query: &str) -> Option<RedDBResult<RuntimeQueryResult>> {
7386 let q = query.trim();
7389 if !q.starts_with("SELECT") && !q.starts_with("select") {
7390 return None;
7391 }
7392
7393 let where_pos = q
7395 .find("WHERE _entity_id")
7396 .or_else(|| q.find("where _entity_id"))?;
7397 let after_field = &q[where_pos + 16..].trim_start(); let after_eq = after_field.strip_prefix('=')?.trim_start();
7399
7400 let id_str = after_eq.trim();
7402 let entity_id: u64 = id_str.parse().ok()?;
7403
7404 let from_pos = q.find("FROM ").or_else(|| q.find("from "))? + 5;
7406 let table = q[from_pos..where_pos].trim();
7407 if table.is_empty()
7408 || table.contains(' ') && !table.contains(" AS ") && !table.contains(" as ")
7409 {
7410 return None; }
7412 let table_name = table.split_whitespace().next()?;
7413
7414 let store = self.inner.db.store();
7420 let entity = store
7421 .get(
7422 table_name,
7423 crate::storage::unified::EntityId::new(entity_id),
7424 )
7425 .filter(entity_visible_under_current_snapshot);
7426
7427 let count = if entity.is_some() { 1u64 } else { 0 };
7428
7429 let records: Vec<crate::storage::query::unified::UnifiedRecord> = entity
7435 .as_ref()
7436 .and_then(|e| runtime_table_record_from_entity(e.clone()))
7437 .into_iter()
7438 .collect();
7439
7440 let json = match entity {
7441 Some(ref e) => execute_runtime_serialize_single_entity(e),
7442 None => r#"{"columns":[],"record_count":0,"selection":{"scope":"any"},"records":[]}"#
7443 .to_string(),
7444 };
7445
7446 Some(Ok(RuntimeQueryResult {
7447 query: query.to_string(),
7448 mode: crate::storage::query::modes::QueryMode::Sql,
7449 statement: "select",
7450 engine: "fast-entity-lookup",
7451 result: crate::storage::query::unified::UnifiedResult {
7452 columns: Vec::new(),
7453 records,
7454 stats: crate::storage::query::unified::QueryStats {
7455 rows_scanned: count,
7456 ..Default::default()
7457 },
7458 pre_serialized_json: Some(json),
7459 },
7460 affected_rows: 0,
7461 statement_type: "select",
7462 }))
7463 }
7464
7465 fn result_cache_backend(&self) -> RuntimeResultCacheBackend {
7466 match self
7467 .config_string(RESULT_CACHE_BACKEND_KEY, RESULT_CACHE_DEFAULT_BACKEND)
7468 .as_str()
7469 {
7470 "blob_cache" => RuntimeResultCacheBackend::BlobCache,
7471 "shadow" => RuntimeResultCacheBackend::Shadow,
7472 _ => RuntimeResultCacheBackend::Legacy,
7473 }
7474 }
7475
7476 pub(super) fn get_result_cache_entry(&self, key: &str) -> Option<RuntimeQueryResult> {
7477 match self.result_cache_backend() {
7478 RuntimeResultCacheBackend::Legacy => self.get_legacy_result_cache_entry(key),
7479 RuntimeResultCacheBackend::BlobCache => self.get_blob_result_cache_entry(key),
7480 RuntimeResultCacheBackend::Shadow => {
7481 let legacy = self.get_legacy_result_cache_entry(key);
7482 let blob = self.get_blob_result_cache_entry(key);
7483 if let (Some(ref legacy), Some(ref blob)) = (&legacy, &blob) {
7484 if result_cache_fingerprint(legacy) != result_cache_fingerprint(blob) {
7485 self.inner
7486 .result_cache_shadow_divergences
7487 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
7488 tracing::warn!(
7489 key,
7490 metric = crate::runtime::METRIC_CACHE_SHADOW_DIVERGENCE_TOTAL,
7491 "result cache shadow backend diverged from legacy"
7492 );
7493 }
7494 }
7495 legacy
7496 }
7497 }
7498 }
7499
7500 fn get_legacy_result_cache_entry(&self, key: &str) -> Option<RuntimeQueryResult> {
7501 let cache = self.inner.result_cache.read();
7502 cache.0.get(key).and_then(|entry| {
7503 if entry.cached_at.elapsed().as_secs() < RESULT_CACHE_TTL_SECS {
7504 Some(entry.result.clone())
7505 } else {
7506 None
7507 }
7508 })
7509 }
7510
7511 fn get_blob_result_cache_entry(&self, key: &str) -> Option<RuntimeQueryResult> {
7512 let hit = self
7513 .inner
7514 .result_blob_cache
7515 .get(RESULT_CACHE_BLOB_NAMESPACE, key)?;
7516 {
7517 let cache = self.inner.result_blob_entries.read();
7518 if let Some(entry) = cache.0.get(key) {
7519 return Some(entry.result.clone());
7520 }
7521 }
7522
7523 let (result, scopes) = decode_result_cache_payload(hit.value())?;
7524 let mut cache = self.inner.result_blob_entries.write();
7525 let (ref mut map, ref mut order) = *cache;
7526 if !map.contains_key(key) {
7527 order.push_back(key.to_string());
7528 }
7529 map.insert(
7530 key.to_string(),
7531 RuntimeResultCacheEntry {
7532 result: result.clone(),
7533 cached_at: std::time::Instant::now(),
7534 scopes,
7535 },
7536 );
7537 trim_result_cache(map, order);
7538 Some(result)
7539 }
7540
7541 pub(super) fn put_result_cache_entry(&self, key: &str, entry: RuntimeResultCacheEntry) {
7542 match self.result_cache_backend() {
7543 RuntimeResultCacheBackend::Legacy => self.put_legacy_result_cache_entry(key, entry),
7544 RuntimeResultCacheBackend::BlobCache => self.put_blob_result_cache_entry(key, entry),
7545 RuntimeResultCacheBackend::Shadow => {
7546 self.put_legacy_result_cache_entry(key, entry.clone());
7547 self.put_blob_result_cache_entry(key, entry);
7548 }
7549 }
7550 }
7551
7552 fn put_legacy_result_cache_entry(&self, key: &str, entry: RuntimeResultCacheEntry) {
7553 let mut cache = self.inner.result_cache.write();
7554 let (ref mut map, ref mut order) = *cache;
7555 if !map.contains_key(key) {
7556 order.push_back(key.to_string());
7557 }
7558 map.insert(key.to_string(), entry);
7559 trim_result_cache(map, order);
7560 }
7561
7562 fn put_blob_result_cache_entry(&self, key: &str, entry: RuntimeResultCacheEntry) {
7563 let policy = crate::storage::cache::BlobCachePolicy::default()
7564 .ttl_ms(RESULT_CACHE_TTL_SECS * 1000)
7565 .priority(200);
7566 let dependencies = entry.scopes.iter().cloned().collect::<Vec<_>>();
7567 let bytes = encode_result_cache_payload(&entry)
7568 .unwrap_or_else(|| result_cache_fingerprint(&entry.result).into_bytes());
7569 let put = crate::storage::cache::BlobCachePut::new(bytes)
7570 .with_dependencies(dependencies)
7571 .with_policy(policy);
7572 if self
7573 .inner
7574 .result_blob_cache
7575 .put(RESULT_CACHE_BLOB_NAMESPACE, key, put)
7576 .is_err()
7577 {
7578 return;
7579 }
7580
7581 let mut cache = self.inner.result_blob_entries.write();
7582 let (ref mut map, ref mut order) = *cache;
7583 if !map.contains_key(key) {
7584 order.push_back(key.to_string());
7585 }
7586 map.insert(key.to_string(), entry);
7587 trim_result_cache(map, order);
7588 }
7589
7590 pub fn result_cache_shadow_divergences(&self) -> u64 {
7591 self.inner
7592 .result_cache_shadow_divergences
7593 .load(std::sync::atomic::Ordering::Relaxed)
7594 }
7595
7596 pub fn invalidate_result_cache(&self) {
7599 let mut cache = self.inner.result_cache.write();
7600 cache.0.clear();
7601 cache.1.clear();
7602 let mut blob_entries = self.inner.result_blob_entries.write();
7603 blob_entries.0.clear();
7604 blob_entries.1.clear();
7605 self.inner
7606 .result_blob_cache
7607 .invalidate_namespace(RESULT_CACHE_BLOB_NAMESPACE);
7608 let mut ask_entries = self.inner.ask_answer_cache_entries.write();
7609 ask_entries.0.clear();
7610 ask_entries.1.clear();
7611 self.inner
7612 .result_blob_cache
7613 .invalidate_namespace(ASK_ANSWER_CACHE_NAMESPACE);
7614 }
7615
7616 pub(crate) fn invalidate_result_cache_for_table(&self, table: &str) {
7619 let legacy_has_match = {
7622 let cache = self.inner.result_cache.read();
7623 let (ref map, _) = *cache;
7624 !map.is_empty() && map.values().any(|entry| entry.scopes.contains(table))
7625 };
7626 let blob_has_match = {
7627 let cache = self.inner.result_blob_entries.read();
7628 let (ref map, _) = *cache;
7629 !map.is_empty() && map.values().any(|entry| entry.scopes.contains(table))
7630 };
7631 if legacy_has_match {
7632 let mut cache = self.inner.result_cache.write();
7633 let (ref mut map, ref mut order) = *cache;
7634 map.retain(|_, entry| !entry.scopes.contains(table));
7635 order.retain(|key| map.contains_key(key));
7636 }
7637
7638 if matches!(
7639 self.result_cache_backend(),
7640 RuntimeResultCacheBackend::BlobCache | RuntimeResultCacheBackend::Shadow
7641 ) {
7642 let mut blob_entries = self.inner.result_blob_entries.write();
7643 let (ref mut blob_map, ref mut blob_order) = *blob_entries;
7644 blob_map.clear();
7645 blob_order.clear();
7646 self.inner
7647 .result_blob_cache
7648 .invalidate_namespace(RESULT_CACHE_BLOB_NAMESPACE);
7649 } else if blob_has_match {
7650 let mut blob_entries = self.inner.result_blob_entries.write();
7651 let (ref mut blob_map, ref mut blob_order) = *blob_entries;
7652 blob_map.retain(|_, entry| !entry.scopes.contains(table));
7653 blob_order.retain(|key| blob_map.contains_key(key));
7654 }
7655 let mut ask_entries = self.inner.ask_answer_cache_entries.write();
7656 ask_entries.0.clear();
7657 ask_entries.1.clear();
7658 self.inner
7659 .result_blob_cache
7660 .invalidate_namespace(ASK_ANSWER_CACHE_NAMESPACE);
7661 }
7662
7663 pub(crate) fn invalidate_plan_cache(&self) {
7664 self.inner.query_cache.write().clear();
7665 self.inner
7666 .ddl_epoch
7667 .fetch_add(1, std::sync::atomic::Ordering::Release);
7668 }
7669
7670 pub fn ddl_epoch(&self) -> u64 {
7674 self.inner
7675 .ddl_epoch
7676 .load(std::sync::atomic::Ordering::Acquire)
7677 }
7678
7679 pub(crate) fn clear_table_planner_stats(&self, table: &str) {
7680 let store = self.inner.db.store();
7681 crate::storage::query::planner::stats_catalog::clear_table_stats(store.as_ref(), table);
7682 self.invalidate_plan_cache();
7683 }
7684
7685 pub(crate) fn rehydrate_tenant_tables(&self) {
7694 let store = self.inner.db.store();
7695 let Some(manager) = store.get_collection("red_config") else {
7696 return;
7697 };
7698 for entity in manager.query_all(|_| true) {
7703 let crate::storage::unified::entity::EntityData::Row(row) = &entity.data else {
7704 continue;
7705 };
7706 let Some(named) = &row.named else { continue };
7707 let Some(crate::storage::schema::Value::Text(key)) = named.get("key") else {
7708 continue;
7709 };
7710 let Some(rest) = key.strip_prefix("tenant_tables.") else {
7712 continue;
7713 };
7714 let Some((table, suffix)) = rest.rsplit_once('.') else {
7715 crate::telemetry::operator_event::OperatorEvent::SchemaCorruption {
7721 collection: "red_config".to_string(),
7722 detail: format!("malformed tenant_tables key: {key}"),
7723 }
7724 .emit_global();
7725 continue;
7726 };
7727 if suffix != "column" {
7728 crate::telemetry::operator_event::OperatorEvent::SchemaCorruption {
7729 collection: "red_config".to_string(),
7730 detail: format!("unexpected tenant_tables suffix: {key}"),
7731 }
7732 .emit_global();
7733 continue;
7734 }
7735 match named.get("value") {
7736 Some(crate::storage::schema::Value::Text(column)) => {
7737 self.register_tenant_table(table, column);
7738 }
7739 Some(crate::storage::schema::Value::Null) | None => {
7741 self.unregister_tenant_table(table);
7742 }
7743 _ => {}
7744 }
7745 }
7746 }
7747
7748 pub(crate) fn rehydrate_materialized_view_descriptors(&self) {
7760 let store = self.inner.db.store();
7761 let descriptors = crate::runtime::continuous_materialized_view::load_all(store.as_ref());
7762 for descriptor in descriptors {
7763 let parsed = match crate::storage::query::parser::parse(&descriptor.source_sql) {
7764 Ok(qc) => qc,
7765 Err(err) => {
7766 crate::telemetry::operator_event::OperatorEvent::SchemaCorruption {
7767 collection:
7768 crate::runtime::continuous_materialized_view::CATALOG_COLLECTION
7769 .to_string(),
7770 detail: format!(
7771 "failed to re-parse materialized-view source for {}: {err}",
7772 descriptor.name
7773 ),
7774 }
7775 .emit_global();
7776 continue;
7777 }
7778 };
7779 let crate::storage::query::ast::QueryExpr::CreateView(create) = parsed.query else {
7780 crate::telemetry::operator_event::OperatorEvent::SchemaCorruption {
7781 collection: crate::runtime::continuous_materialized_view::CATALOG_COLLECTION
7782 .to_string(),
7783 detail: format!(
7784 "materialized-view source for {} did not re-parse as CREATE VIEW",
7785 descriptor.name
7786 ),
7787 }
7788 .emit_global();
7789 continue;
7790 };
7791 let view_name = create.name.clone();
7793 self.inner
7794 .views
7795 .write()
7796 .insert(view_name.clone(), Arc::new(create));
7797 use crate::storage::cache::result::{MaterializedViewDef, RefreshPolicy};
7799 let refresh = match descriptor.refresh_every_ms {
7800 Some(ms) => RefreshPolicy::Periodic(std::time::Duration::from_millis(ms)),
7801 None => RefreshPolicy::Manual,
7802 };
7803 let def = MaterializedViewDef {
7804 name: view_name.clone(),
7805 query: format!("<parsed view {}>", view_name),
7806 dependencies: descriptor.source_collections.clone(),
7807 refresh,
7808 retention_duration_ms: descriptor.retention_duration_ms,
7809 };
7810 self.inner.materialized_views.write().register(def);
7811 }
7812 self.invalidate_plan_cache();
7815 }
7816
7817 pub(crate) fn rehydrate_declared_column_schemas(&self) {
7818 let store = self.inner.db.store();
7819 for contract in self.inner.db.collection_contracts() {
7820 let columns: Vec<String> = contract
7821 .declared_columns
7822 .iter()
7823 .map(|column| column.name.clone())
7824 .collect();
7825 let Some(manager) = store.get_collection(&contract.name) else {
7826 continue;
7827 };
7828 manager.set_column_schema_if_empty(columns);
7829 }
7830 }
7831
7832 pub fn register_tenant_table(&self, table: &str, column: &str) {
7837 use crate::storage::query::ast::{
7838 CompareOp, CreatePolicyQuery, Expr, FieldRef, Filter, Span,
7839 };
7840 self.inner
7841 .tenant_tables
7842 .write()
7843 .insert(table.to_string(), column.to_string());
7844
7845 let lhs = Expr::Column {
7851 field: FieldRef::TableColumn {
7852 table: table.to_string(),
7853 column: column.to_string(),
7854 },
7855 span: Span::synthetic(),
7856 };
7857 let rhs = Expr::FunctionCall {
7858 name: "CURRENT_TENANT".to_string(),
7859 args: Vec::new(),
7860 span: Span::synthetic(),
7861 };
7862 let policy_filter = Filter::CompareExpr {
7863 lhs,
7864 op: CompareOp::Eq,
7865 rhs,
7866 };
7867
7868 let policy = CreatePolicyQuery {
7869 name: "__tenant_iso".to_string(),
7870 table: table.to_string(),
7871 action: None, role: None, using: Box::new(policy_filter),
7874 target_kind: crate::storage::query::ast::PolicyTargetKind::Table,
7881 };
7882
7883 self.inner.rls_policies.write().insert(
7885 (table.to_string(), "__tenant_iso".to_string()),
7886 Arc::new(policy),
7887 );
7888 self.inner
7889 .rls_enabled_tables
7890 .write()
7891 .insert(table.to_string());
7892
7893 self.ensure_tenant_index(table, column);
7899 }
7900
7901 fn ensure_tenant_index(&self, table: &str, column: &str) {
7909 if column.contains('.') {
7910 return;
7911 }
7912 let index_name = format!("__tenant_idx_{table}");
7913 let registry = self.inner.index_store.list_indices(table);
7914 if registry.iter().any(|idx| idx.name == index_name) {
7915 return;
7916 }
7917 if registry
7918 .iter()
7919 .any(|idx| idx.columns.first().map(|c| c.as_str()) == Some(column))
7920 {
7921 return;
7922 }
7923
7924 let store = self.inner.db.store();
7925 let Some(manager) = store.get_collection(table) else {
7926 return;
7927 };
7928 let entities = manager.query_all(|_| true);
7929 let entity_fields: Vec<(
7930 crate::storage::unified::EntityId,
7931 Vec<(String, crate::storage::schema::Value)>,
7932 )> = entities
7933 .iter()
7934 .map(|e| {
7935 let fields = match &e.data {
7936 crate::storage::EntityData::Row(row) => {
7937 if let Some(ref named) = row.named {
7938 named.iter().map(|(k, v)| (k.clone(), v.clone())).collect()
7939 } else if let Some(ref schema) = row.schema {
7940 schema
7941 .iter()
7942 .zip(row.columns.iter())
7943 .map(|(k, v)| (k.clone(), v.clone()))
7944 .collect()
7945 } else {
7946 Vec::new()
7947 }
7948 }
7949 crate::storage::EntityData::Node(node) => node
7950 .properties
7951 .iter()
7952 .map(|(k, v)| (k.clone(), v.clone()))
7953 .collect(),
7954 _ => Vec::new(),
7955 };
7956 (e.id, fields)
7957 })
7958 .collect();
7959
7960 let columns = vec![column.to_string()];
7961 if self
7962 .inner
7963 .index_store
7964 .create_index(
7965 &index_name,
7966 table,
7967 &columns,
7968 super::index_store::IndexMethodKind::Hash,
7969 false,
7970 &entity_fields,
7971 )
7972 .is_err()
7973 {
7974 return;
7975 }
7976 self.inner
7977 .index_store
7978 .register(super::index_store::RegisteredIndex {
7979 name: index_name,
7980 collection: table.to_string(),
7981 columns,
7982 method: super::index_store::IndexMethodKind::Hash,
7983 unique: false,
7984 });
7985 self.invalidate_plan_cache();
7986 }
7987
7988 fn drop_tenant_index(&self, table: &str) {
7991 let index_name = format!("__tenant_idx_{table}");
7992 self.inner.index_store.drop_index(&index_name, table);
7993 }
7994
7995 pub fn tenant_column(&self, table: &str) -> Option<String> {
7999 self.inner.tenant_tables.read().get(table).cloned()
8000 }
8001
8002 pub fn unregister_tenant_table(&self, table: &str) {
8006 self.inner.tenant_tables.write().remove(table);
8007 self.inner
8008 .rls_policies
8009 .write()
8010 .remove(&(table.to_string(), "__tenant_iso".to_string()));
8011 self.drop_tenant_index(table);
8012 let has_other_policies = self
8014 .inner
8015 .rls_policies
8016 .read()
8017 .keys()
8018 .any(|(t, _)| t == table);
8019 if !has_other_policies {
8020 self.inner.rls_enabled_tables.write().remove(table);
8021 }
8022 }
8023
8024 pub(crate) fn record_pending_tombstone(
8030 &self,
8031 conn_id: u64,
8032 collection: &str,
8033 id: crate::storage::unified::entity::EntityId,
8034 stamper_xid: crate::storage::transaction::snapshot::Xid,
8035 previous_xmax: crate::storage::transaction::snapshot::Xid,
8036 ) {
8037 self.inner
8038 .pending_tombstones
8039 .write()
8040 .entry(conn_id)
8041 .or_default()
8042 .push((collection.to_string(), id, stamper_xid, previous_xmax));
8043 }
8044
8045 pub(crate) fn record_pending_versioned_update(
8046 &self,
8047 conn_id: u64,
8048 collection: &str,
8049 old_id: crate::storage::unified::entity::EntityId,
8050 new_id: crate::storage::unified::entity::EntityId,
8051 stamper_xid: crate::storage::transaction::snapshot::Xid,
8052 previous_xmax: crate::storage::transaction::snapshot::Xid,
8053 ) {
8054 self.inner
8055 .pending_versioned_updates
8056 .write()
8057 .entry(conn_id)
8058 .or_default()
8059 .push((
8060 collection.to_string(),
8061 old_id,
8062 new_id,
8063 stamper_xid,
8064 previous_xmax,
8065 ));
8066 }
8067
8068 fn with_deferred_store_wal_if_transaction<T>(
8069 &self,
8070 f: impl FnOnce() -> RedDBResult<T>,
8071 ) -> RedDBResult<T> {
8072 let conn_id = current_connection_id();
8073 if !self.inner.tx_contexts.read().contains_key(&conn_id) {
8074 return f();
8075 }
8076
8077 crate::storage::UnifiedStore::begin_deferred_store_wal_capture();
8078 let result = f();
8079 let captured = crate::storage::UnifiedStore::take_deferred_store_wal_capture();
8080 match result {
8081 Ok(value) => {
8082 self.record_pending_store_wal_actions(conn_id, captured);
8083 Ok(value)
8084 }
8085 Err(err) => Err(err),
8086 }
8087 }
8088
8089 fn with_deferred_store_wal_for_dml<T>(
8090 &self,
8091 capture_autocommit_events: bool,
8092 f: impl FnOnce() -> RedDBResult<T>,
8093 ) -> RedDBResult<T> {
8094 let conn_id = current_connection_id();
8095 if self.inner.tx_contexts.read().contains_key(&conn_id) {
8096 return self.with_deferred_store_wal_if_transaction(f);
8097 }
8098 if !capture_autocommit_events {
8099 return f();
8100 }
8101
8102 crate::storage::UnifiedStore::begin_deferred_store_wal_capture();
8103 let result = f();
8104 let captured = crate::storage::UnifiedStore::take_deferred_store_wal_capture();
8105 self.inner
8106 .db
8107 .store()
8108 .append_deferred_store_wal_actions(captured)
8109 .map_err(|err| RedDBError::Internal(err.to_string()))?;
8110 result
8111 }
8112
8113 fn insert_may_emit_events(&self, query: &InsertQuery) -> bool {
8114 !query.suppress_events
8115 && self.collection_has_event_subscriptions_for_operation(
8116 &query.table,
8117 crate::catalog::SubscriptionOperation::Insert,
8118 )
8119 }
8120
8121 fn update_may_emit_events(&self, query: &UpdateQuery) -> bool {
8122 !query.suppress_events
8123 && self.collection_has_event_subscriptions_for_operation(
8124 &query.table,
8125 crate::catalog::SubscriptionOperation::Update,
8126 )
8127 }
8128
8129 fn delete_may_emit_events(&self, query: &DeleteQuery) -> bool {
8130 !query.suppress_events
8131 && self.collection_has_event_subscriptions_for_operation(
8132 &query.table,
8133 crate::catalog::SubscriptionOperation::Delete,
8134 )
8135 }
8136
8137 fn collection_has_event_subscriptions_for_operation(
8138 &self,
8139 collection: &str,
8140 operation: crate::catalog::SubscriptionOperation,
8141 ) -> bool {
8142 let Some(contract) = self.db().collection_contract_arc(collection) else {
8143 return false;
8144 };
8145 contract.subscriptions.iter().any(|subscription| {
8146 subscription.enabled
8147 && (subscription.ops_filter.is_empty()
8148 || subscription.ops_filter.contains(&operation))
8149 })
8150 }
8151
8152 fn record_pending_store_wal_actions(
8153 &self,
8154 conn_id: u64,
8155 actions: crate::storage::unified::DeferredStoreWalActions,
8156 ) {
8157 if actions.is_empty() {
8158 return;
8159 }
8160 let mut guard = self.inner.pending_store_wal_actions.write();
8161 guard.entry(conn_id).or_default().extend(actions);
8162 }
8163
8164 fn flush_pending_store_wal_actions(&self, conn_id: u64) -> RedDBResult<()> {
8165 let Some(actions) = self
8166 .inner
8167 .pending_store_wal_actions
8168 .write()
8169 .remove(&conn_id)
8170 else {
8171 return Ok(());
8172 };
8173 self.inner
8174 .db
8175 .store()
8176 .append_deferred_store_wal_actions(actions)
8177 .map_err(|err| RedDBError::Internal(err.to_string()))
8178 }
8179
8180 fn discard_pending_store_wal_actions(&self, conn_id: u64) {
8181 self.inner
8182 .pending_store_wal_actions
8183 .write()
8184 .remove(&conn_id);
8185 }
8186
8187 fn xid_conflicts_with_snapshot(
8188 &self,
8189 xid: crate::storage::transaction::snapshot::Xid,
8190 snapshot: &crate::storage::transaction::snapshot::Snapshot,
8191 own_xids: &std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
8192 ) -> bool {
8193 xid != 0
8194 && !own_xids.contains(&xid)
8195 && !self.inner.snapshot_manager.is_aborted(xid)
8196 && !self.inner.snapshot_manager.is_active(xid)
8197 && (xid > snapshot.xid || snapshot.in_progress.contains(&xid))
8198 }
8199
8200 fn conflict_error(
8201 collection: &str,
8202 logical_id: crate::storage::unified::entity::EntityId,
8203 xid: crate::storage::transaction::snapshot::Xid,
8204 ) -> RedDBError {
8205 RedDBError::Query(format!(
8206 "serialization conflict: table row {collection}/{} was modified by concurrent transaction {xid}",
8207 logical_id.raw()
8208 ))
8209 }
8210
8211 fn check_logical_row_conflict(
8212 &self,
8213 collection: &str,
8214 logical_id: crate::storage::unified::entity::EntityId,
8215 excluded_ids: &[crate::storage::unified::entity::EntityId],
8216 snapshot: &crate::storage::transaction::snapshot::Snapshot,
8217 own_xids: &std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
8218 ) -> RedDBResult<()> {
8219 let store = self.inner.db.store();
8220 let Some(manager) = store.get_collection(collection) else {
8221 return Ok(());
8222 };
8223
8224 for candidate in manager.query_all(|_| true) {
8225 if excluded_ids.contains(&candidate.id) || candidate.logical_id() != logical_id {
8226 continue;
8227 }
8228 if self.xid_conflicts_with_snapshot(candidate.xmin, snapshot, own_xids) {
8229 return Err(Self::conflict_error(collection, logical_id, candidate.xmin));
8230 }
8231 if self.xid_conflicts_with_snapshot(candidate.xmax, snapshot, own_xids) {
8232 return Err(Self::conflict_error(collection, logical_id, candidate.xmax));
8233 }
8234 }
8235 Ok(())
8236 }
8237
8238 pub(crate) fn check_table_row_write_conflicts(
8239 &self,
8240 conn_id: u64,
8241 snapshot: &crate::storage::transaction::snapshot::Snapshot,
8242 own_xids: &std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
8243 ) -> RedDBResult<()> {
8244 let versioned_updates = self
8245 .inner
8246 .pending_versioned_updates
8247 .read()
8248 .get(&conn_id)
8249 .cloned()
8250 .unwrap_or_default();
8251 let tombstones = self
8252 .inner
8253 .pending_tombstones
8254 .read()
8255 .get(&conn_id)
8256 .cloned()
8257 .unwrap_or_default();
8258
8259 let store = self.inner.db.store();
8260 for (collection, old_id, new_id, xid, previous_xmax) in versioned_updates {
8261 let Some(manager) = store.get_collection(&collection) else {
8262 continue;
8263 };
8264 let Some(old) = manager.get(old_id) else {
8265 continue;
8266 };
8267 let logical_id = old.logical_id();
8268 if self.xid_conflicts_with_snapshot(previous_xmax, snapshot, own_xids) {
8269 return Err(Self::conflict_error(&collection, logical_id, previous_xmax));
8270 }
8271 if old.xmax != xid && self.xid_conflicts_with_snapshot(old.xmax, snapshot, own_xids) {
8272 return Err(Self::conflict_error(&collection, logical_id, old.xmax));
8273 }
8274 self.check_logical_row_conflict(
8275 &collection,
8276 logical_id,
8277 &[old_id, new_id],
8278 snapshot,
8279 own_xids,
8280 )?;
8281 }
8282
8283 for (collection, id, xid, previous_xmax) in tombstones {
8284 let Some(manager) = store.get_collection(&collection) else {
8285 continue;
8286 };
8287 let Some(entity) = manager.get(id) else {
8288 continue;
8289 };
8290 let logical_id = entity.logical_id();
8291 if self.xid_conflicts_with_snapshot(previous_xmax, snapshot, own_xids) {
8292 return Err(Self::conflict_error(&collection, logical_id, previous_xmax));
8293 }
8294 if entity.xmax != xid
8295 && self.xid_conflicts_with_snapshot(entity.xmax, snapshot, own_xids)
8296 {
8297 return Err(Self::conflict_error(&collection, logical_id, entity.xmax));
8298 }
8299 self.check_logical_row_conflict(&collection, logical_id, &[id], snapshot, own_xids)?;
8300 }
8301
8302 Ok(())
8303 }
8304
8305 pub(crate) fn restore_pending_write_stamps(&self, conn_id: u64) {
8306 let versioned_updates = self
8307 .inner
8308 .pending_versioned_updates
8309 .read()
8310 .get(&conn_id)
8311 .cloned()
8312 .unwrap_or_default();
8313 let tombstones = self
8314 .inner
8315 .pending_tombstones
8316 .read()
8317 .get(&conn_id)
8318 .cloned()
8319 .unwrap_or_default();
8320
8321 let store = self.inner.db.store();
8322 for (collection, old_id, _new_id, xid, _previous_xmax) in versioned_updates {
8323 if let Some(manager) = store.get_collection(&collection) {
8324 if let Some(mut entity) = manager.get(old_id) {
8325 entity.set_xmax(xid);
8326 let _ = manager.update(entity);
8327 }
8328 }
8329 }
8330 for (collection, id, xid, _previous_xmax) in tombstones {
8331 if let Some(manager) = store.get_collection(&collection) {
8332 if let Some(mut entity) = manager.get(id) {
8333 entity.set_xmax(xid);
8334 let _ = manager.update(entity);
8335 }
8336 }
8337 }
8338 }
8339
8340 pub(crate) fn finalize_pending_versioned_updates(&self, conn_id: u64) {
8341 self.inner
8342 .pending_versioned_updates
8343 .write()
8344 .remove(&conn_id);
8345 }
8346
8347 pub(crate) fn revive_pending_versioned_updates(&self, conn_id: u64) {
8348 let Some(pending) = self
8349 .inner
8350 .pending_versioned_updates
8351 .write()
8352 .remove(&conn_id)
8353 else {
8354 return;
8355 };
8356
8357 let store = self.inner.db.store();
8358 for (collection, old_id, new_id, xid, previous_xmax) in pending {
8359 if let Some(manager) = store.get_collection(&collection) {
8360 if let Some(mut old) = manager.get(old_id) {
8361 if old.xmax == xid {
8362 old.set_xmax(previous_xmax);
8363 let _ = manager.update(old);
8364 }
8365 }
8366 }
8367 let _ = store.delete_batch(&collection, &[new_id]);
8368 }
8369 }
8370
8371 pub(crate) fn revive_versioned_updates_since(&self, conn_id: u64, stamper_xid: u64) -> usize {
8372 let mut guard = self.inner.pending_versioned_updates.write();
8373 let Some(pending) = guard.get_mut(&conn_id) else {
8374 return 0;
8375 };
8376
8377 let store = self.inner.db.store();
8378 let mut reverted = 0usize;
8379 pending.retain(|(collection, old_id, new_id, xid, previous_xmax)| {
8380 if *xid < stamper_xid {
8381 return true;
8382 }
8383 if let Some(manager) = store.get_collection(collection) {
8384 if let Some(mut old) = manager.get(*old_id) {
8385 if old.xmax == *xid {
8386 old.set_xmax(*previous_xmax);
8387 let _ = manager.update(old);
8388 }
8389 }
8390 }
8391 let _ = store.delete_batch(collection, &[*new_id]);
8392 reverted += 1;
8393 false
8394 });
8395 if pending.is_empty() {
8396 guard.remove(&conn_id);
8397 }
8398 reverted
8399 }
8400
8401 pub(crate) fn finalize_pending_tombstones(&self, conn_id: u64) {
8406 let Some(pending) = self.inner.pending_tombstones.write().remove(&conn_id) else {
8407 return;
8408 };
8409 if pending.is_empty() {
8410 return;
8411 }
8412
8413 let store = self.inner.db.store();
8414 for (collection, id, _xid, _previous_xmax) in pending {
8415 store.context_index().remove_entity(id);
8416 self.cdc_emit(
8417 crate::replication::cdc::ChangeOperation::Delete,
8418 &collection,
8419 id.raw(),
8420 "entity",
8421 );
8422 }
8423 }
8424
8425 pub(crate) fn revive_pending_tombstones(&self, conn_id: u64) {
8432 let Some(pending) = self.inner.pending_tombstones.write().remove(&conn_id) else {
8433 return;
8434 };
8435
8436 let store = self.inner.db.store();
8437 for (collection, id, xid, previous_xmax) in pending {
8438 let Some(manager) = store.get_collection(&collection) else {
8439 continue;
8440 };
8441 if let Some(mut entity) = manager.get(id) {
8442 if entity.xmax == xid {
8443 entity.set_xmax(previous_xmax);
8444 let _ = manager.update(entity);
8445 }
8446 }
8447 }
8448 }
8449
8450 pub(crate) fn finalize_pending_kv_watch_events(&self, conn_id: u64) {
8451 let Some(pending) = self.inner.pending_kv_watch_events.write().remove(&conn_id) else {
8452 return;
8453 };
8454 for event in pending {
8455 self.cdc_emit_kv(
8456 event.op,
8457 &event.collection,
8458 &event.key,
8459 0,
8460 event.before,
8461 event.after,
8462 );
8463 }
8464 }
8465
8466 pub(crate) fn discard_pending_kv_watch_events(&self, conn_id: u64) {
8467 self.inner.pending_kv_watch_events.write().remove(&conn_id);
8468 }
8469
8470 fn materialize_graph_with_rls(
8479 &self,
8480 ) -> RedDBResult<(
8481 crate::storage::engine::GraphStore,
8482 std::collections::HashMap<
8483 String,
8484 std::collections::HashMap<String, crate::storage::schema::Value>,
8485 >,
8486 crate::storage::query::unified::EdgeProperties,
8487 )> {
8488 use crate::storage::engine::GraphStore;
8489 use crate::storage::query::ast::{PolicyAction, PolicyTargetKind};
8490 use crate::storage::unified::entity::{EntityData, EntityKind};
8491 use std::collections::{HashMap, HashSet};
8492
8493 let store = self.inner.db.store();
8494 let snap_ctx = capture_current_snapshot();
8495 let role = current_auth_identity().map(|(_, r)| r.as_str().to_string());
8496
8497 let graph = GraphStore::new();
8498 let mut node_properties: HashMap<String, HashMap<String, crate::storage::schema::Value>> =
8499 HashMap::new();
8500 let mut edge_properties: crate::storage::query::unified::EdgeProperties = HashMap::new();
8501 let mut allowed_nodes: HashSet<String> = HashSet::new();
8502
8503 let mut node_rls: HashMap<String, Option<crate::storage::query::ast::Filter>> =
8507 HashMap::new();
8508 let mut edge_rls: HashMap<String, Option<crate::storage::query::ast::Filter>> =
8509 HashMap::new();
8510
8511 let collections = store.list_collections();
8512
8513 for collection in &collections {
8515 let Some(manager) = store.get_collection(collection) else {
8516 continue;
8517 };
8518 let entities = manager.query_all(|_| true);
8519 for entity in entities {
8520 if !entity_visible_with_context(snap_ctx.as_ref(), &entity) {
8521 continue;
8522 }
8523 let EntityKind::GraphNode(ref node) = entity.kind else {
8524 continue;
8525 };
8526 if !node_passes_rls(self, collection, role.as_deref(), &mut node_rls, &entity) {
8527 continue;
8528 }
8529 let id_str = entity.id.raw().to_string();
8530 graph
8531 .add_node_with_label(
8532 &id_str,
8533 &node.label,
8534 &super::graph_node_label(&node.node_type),
8535 )
8536 .map_err(|err| RedDBError::Query(err.to_string()))?;
8537 allowed_nodes.insert(id_str.clone());
8538 if let EntityData::Node(node_data) = &entity.data {
8539 node_properties.insert(id_str, node_data.properties.clone());
8540 }
8541 }
8542 }
8543
8544 for collection in &collections {
8548 let Some(manager) = store.get_collection(collection) else {
8549 continue;
8550 };
8551 let entities = manager.query_all(|_| true);
8552 for entity in entities {
8553 if !entity_visible_with_context(snap_ctx.as_ref(), &entity) {
8554 continue;
8555 }
8556 let EntityKind::GraphEdge(ref edge) = entity.kind else {
8557 continue;
8558 };
8559 if !allowed_nodes.contains(&edge.from_node)
8560 || !allowed_nodes.contains(&edge.to_node)
8561 {
8562 continue;
8563 }
8564 if !edge_passes_rls(self, collection, role.as_deref(), &mut edge_rls, &entity) {
8565 continue;
8566 }
8567 let weight = match &entity.data {
8568 EntityData::Edge(e) => e.weight,
8569 _ => edge.weight as f32 / 1000.0,
8570 };
8571 let edge_label = super::graph_edge_label(&edge.label);
8572 graph
8573 .add_edge_with_label(&edge.from_node, &edge.to_node, &edge_label, weight)
8574 .map_err(|err| RedDBError::Query(err.to_string()))?;
8575 if let EntityData::Edge(edge_data) = &entity.data {
8576 edge_properties.insert(
8577 (edge.from_node.clone(), edge_label, edge.to_node.clone()),
8578 edge_data.properties.clone(),
8579 );
8580 }
8581 }
8582 }
8583
8584 let _ = (PolicyAction::Select, PolicyTargetKind::Nodes);
8588
8589 Ok((graph, node_properties, edge_properties))
8590 }
8591
8592 pub(crate) fn stamp_xmin_if_in_txn(
8607 &self,
8608 collection: &str,
8609 id: crate::storage::unified::entity::EntityId,
8610 ) {
8611 let Some(xid) = self.current_xid() else {
8612 return;
8613 };
8614 let store = self.inner.db.store();
8615 let Some(manager) = store.get_collection(collection) else {
8616 return;
8617 };
8618 if let Some(mut entity) = manager.get(id) {
8619 entity.set_xmin(xid);
8620 let _ = manager.update(entity);
8621 }
8622 }
8623
8624 pub(crate) fn revive_tombstones_since(&self, conn_id: u64, stamper_xid: u64) -> usize {
8632 let mut guard = self.inner.pending_tombstones.write();
8633 let Some(pending) = guard.get_mut(&conn_id) else {
8634 return 0;
8635 };
8636
8637 let store = self.inner.db.store();
8638 let mut revived = 0usize;
8639 pending.retain(|(collection, id, xid, previous_xmax)| {
8640 if *xid < stamper_xid {
8641 return true;
8643 }
8644 if let Some(manager) = store.get_collection(collection) {
8645 if let Some(mut entity) = manager.get(*id) {
8646 if entity.xmax == *xid {
8647 entity.set_xmax(*previous_xmax);
8648 let _ = manager.update(entity);
8649 revived += 1;
8650 }
8651 }
8652 }
8653 false
8654 });
8655 if pending.is_empty() {
8656 guard.remove(&conn_id);
8657 }
8658 revived
8659 }
8660
8661 pub fn current_snapshot(&self) -> crate::storage::transaction::snapshot::Snapshot {
8670 let conn_id = current_connection_id();
8671 if let Some(ctx) = self.inner.tx_contexts.read().get(&conn_id).cloned() {
8672 return ctx.snapshot;
8673 }
8674 let high_water = self.inner.snapshot_manager.peek_next_xid();
8680 self.inner.snapshot_manager.snapshot(high_water)
8681 }
8682
8683 pub fn current_xid(&self) -> Option<crate::storage::transaction::snapshot::Xid> {
8693 let conn_id = current_connection_id();
8694 self.inner
8695 .tx_contexts
8696 .read()
8697 .get(&conn_id)
8698 .map(|ctx| ctx.writer_xid())
8699 }
8700
8701 pub fn snapshot_manager(&self) -> Arc<crate::storage::transaction::snapshot::SnapshotManager> {
8704 Arc::clone(&self.inner.snapshot_manager)
8705 }
8706
8707 fn mvcc_vacuum_cutoff_xid(&self) -> crate::storage::transaction::snapshot::Xid {
8708 let manager = &self.inner.snapshot_manager;
8709 let next_xid = manager.peek_next_xid();
8710 let mut cutoff = next_xid;
8711 if let Some(oldest_active) = manager.oldest_active_xid() {
8712 cutoff = cutoff.min(oldest_active);
8713 }
8714 if let Some(oldest_pinned) = manager.oldest_pinned_xid() {
8715 cutoff = cutoff.min(oldest_pinned);
8716 }
8717 let retention_xids = self.config_u64("runtime.mvcc.vacuum_retention_xids", 0);
8718 if retention_xids > 0 {
8719 cutoff = cutoff.min(next_xid.saturating_sub(retention_xids));
8720 }
8721 cutoff
8722 }
8723
8724 fn rebuild_runtime_indexes_for_table(&self, table: &str) -> RedDBResult<()> {
8725 let registered = self.inner.index_store.list_indices(table);
8726 if registered.is_empty() {
8727 return Ok(());
8728 }
8729 let store = self.inner.db.store();
8730 let Some(manager) = store.get_collection(table) else {
8731 return Ok(());
8732 };
8733 let entity_fields = manager
8734 .query_all(|entity| matches!(entity.kind, crate::storage::EntityKind::TableRow { .. }))
8735 .into_iter()
8736 .map(|entity| (entity.id, table_row_index_fields(&entity)))
8737 .collect::<Vec<_>>();
8738
8739 for index in registered {
8740 self.inner.index_store.drop_index(&index.name, table);
8741 self.inner
8742 .index_store
8743 .create_index(
8744 &index.name,
8745 table,
8746 &index.columns,
8747 index.method,
8748 index.unique,
8749 &entity_fields,
8750 )
8751 .map_err(RedDBError::Internal)?;
8752 self.inner.index_store.register(index);
8753 }
8754 self.invalidate_plan_cache();
8755 Ok(())
8756 }
8757
8758 pub fn current_txn_own_xids(
8763 &self,
8764 ) -> std::collections::HashSet<crate::storage::transaction::snapshot::Xid> {
8765 let mut set = std::collections::HashSet::new();
8766 if let Some(ctx) = self.inner.tx_contexts.read().get(¤t_connection_id()) {
8767 set.insert(ctx.xid);
8768 for (_, sub) in &ctx.savepoints {
8769 set.insert(*sub);
8770 }
8771 for sub in &ctx.released_sub_xids {
8772 set.insert(*sub);
8773 }
8774 }
8775 set
8776 }
8777
8778 pub fn foreign_tables(&self) -> Arc<crate::storage::fdw::ForeignTableRegistry> {
8785 Arc::clone(&self.inner.foreign_tables)
8786 }
8787
8788 pub fn is_rls_enabled(&self, table: &str) -> bool {
8790 self.inner.rls_enabled_tables.read().contains(table)
8791 }
8792
8793 pub fn matching_rls_policies(
8800 &self,
8801 table: &str,
8802 role: Option<&str>,
8803 action: crate::storage::query::ast::PolicyAction,
8804 ) -> Vec<crate::storage::query::ast::Filter> {
8805 self.matching_rls_policies_for_kind(
8810 table,
8811 role,
8812 action,
8813 crate::storage::query::ast::PolicyTargetKind::Table,
8814 )
8815 }
8816
8817 pub fn matching_rls_policies_for_kind(
8825 &self,
8826 table: &str,
8827 role: Option<&str>,
8828 action: crate::storage::query::ast::PolicyAction,
8829 kind: crate::storage::query::ast::PolicyTargetKind,
8830 ) -> Vec<crate::storage::query::ast::Filter> {
8831 if !self.is_rls_enabled(table) {
8832 return Vec::new();
8833 }
8834 let policies = self.inner.rls_policies.read();
8835 policies
8836 .iter()
8837 .filter_map(|((t, _), p)| {
8838 if t != table {
8839 return None;
8840 }
8841 if p.target_kind != kind
8850 && p.target_kind != crate::storage::query::ast::PolicyTargetKind::Table
8851 {
8852 return None;
8853 }
8854 if let Some(a) = p.action {
8856 if a != action {
8857 return None;
8858 }
8859 }
8860 if let Some(p_role) = p.role.as_deref() {
8862 match role {
8863 Some(r) if r == p_role => {}
8864 _ => return None,
8865 }
8866 }
8867 Some((*p.using).clone())
8868 })
8869 .collect()
8870 }
8871
8872 pub(crate) fn refresh_table_planner_stats(&self, table: &str) {
8873 let store = self.inner.db.store();
8874 if let Some(stats) =
8875 crate::storage::query::planner::stats_catalog::analyze_collection(store.as_ref(), table)
8876 {
8877 crate::storage::query::planner::stats_catalog::persist_table_stats(
8878 store.as_ref(),
8879 &stats,
8880 );
8881 } else {
8882 crate::storage::query::planner::stats_catalog::clear_table_stats(store.as_ref(), table);
8883 }
8884 self.invalidate_plan_cache();
8885 }
8886
8887 pub(crate) fn note_table_write(&self, table: &str) {
8888 let already_dirty = self.inner.planner_dirty_tables.read().contains(table);
8893 if !already_dirty {
8894 self.inner
8895 .planner_dirty_tables
8896 .write()
8897 .insert(table.to_string());
8898 }
8899 self.invalidate_result_cache_for_table(table);
8900 }
8901
8902 fn explain_as_rows(&self, raw_query: &str, inner_sql: &str) -> RedDBResult<RuntimeQueryResult> {
8910 let explain = self.explain_query(inner_sql)?;
8911
8912 let columns = vec![
8913 "op".to_string(),
8914 "source".to_string(),
8915 "est_rows".to_string(),
8916 "est_cost".to_string(),
8917 "depth".to_string(),
8918 ];
8919
8920 let mut records: Vec<crate::storage::query::unified::UnifiedRecord> = Vec::new();
8921
8922 for name in &explain.cte_materializations {
8928 use std::sync::Arc;
8929 let mut rec = crate::storage::query::unified::UnifiedRecord::default();
8930 rec.set_arc(Arc::from("op"), Value::text("CteScan".to_string()));
8931 rec.set_arc(Arc::from("source"), Value::text(name.clone()));
8932 rec.set_arc(Arc::from("est_rows"), Value::Float(0.0));
8933 rec.set_arc(Arc::from("est_cost"), Value::Float(0.0));
8934 rec.set_arc(Arc::from("depth"), Value::Integer(0));
8935 records.push(rec);
8936 }
8937
8938 walk_plan_node(&explain.logical_plan.root, 0, &mut records);
8939
8940 let result = crate::storage::query::unified::UnifiedResult {
8941 columns,
8942 records,
8943 stats: Default::default(),
8944 pre_serialized_json: None,
8945 };
8946
8947 Ok(RuntimeQueryResult {
8948 query: raw_query.to_string(),
8949 mode: explain.mode,
8950 statement: "explain",
8951 engine: "runtime-explain",
8952 result,
8953 affected_rows: 0,
8954 statement_type: "select",
8955 })
8956 }
8957
8958 pub(super) fn check_query_privilege(
8966 &self,
8967 expr: &crate::storage::query::ast::QueryExpr,
8968 ) -> Result<(), String> {
8969 use crate::auth::privileges::{Action, AuthzContext, Resource};
8970 use crate::auth::UserId;
8971 use crate::storage::query::ast::QueryExpr;
8972
8973 let auth_store = match self.inner.auth_store.read().clone() {
8978 Some(s) => s,
8979 None => return Ok(()),
8980 };
8981
8982 let (username, role) = match current_auth_identity() {
8988 Some(p) => p,
8989 None => return Ok(()),
8990 };
8991 let tenant = current_tenant();
8992
8993 let ctx = AuthzContext {
8994 principal: &username,
8995 effective_role: role,
8996 tenant: tenant.as_deref(),
8997 };
8998 let principal_id = UserId::from_parts(tenant.as_deref(), &username);
8999
9000 let (action, resource) = match expr {
9002 QueryExpr::Table(t) => (Action::Select, Resource::table_from_name(&t.table)),
9003 QueryExpr::QueueSelect(q) => (Action::Select, Resource::table_from_name(&q.queue)),
9004 QueryExpr::Graph(g) => {
9005 if auth_store.iam_authorization_enabled() {
9006 self.check_graph_property_projection_privilege(
9007 &auth_store,
9008 &principal_id,
9009 role,
9010 tenant.as_deref(),
9011 g,
9012 )?;
9013 return Ok(());
9014 }
9015 return Ok(());
9016 }
9017 QueryExpr::Vector(v) => {
9018 if auth_store.iam_authorization_enabled() {
9019 self.check_table_like_column_projection_privilege(
9020 &auth_store,
9021 &principal_id,
9022 role,
9023 tenant.as_deref(),
9024 &v.collection,
9025 &["content".to_string()],
9026 )?;
9027 return Ok(());
9028 }
9029 return Ok(());
9030 }
9031 QueryExpr::Insert(i) => (Action::Insert, Resource::table_from_name(&i.table)),
9032 QueryExpr::Update(u) => (Action::Update, Resource::table_from_name(&u.table)),
9033 QueryExpr::Delete(d) => (Action::Delete, Resource::table_from_name(&d.table)),
9034 QueryExpr::Join(_) => (Action::Select, Resource::Database),
9038 QueryExpr::Grant(_) | QueryExpr::Revoke(_) | QueryExpr::AlterUser(_) => {
9041 return if role == crate::auth::Role::Admin {
9042 Ok(())
9043 } else {
9044 Err(format!(
9045 "principal=`{}` role=`{:?}` cannot issue ACL/auth DDL",
9046 username, role
9047 ))
9048 };
9049 }
9050 QueryExpr::CreateIamPolicy { id, .. } => {
9051 return self.check_policy_management_privilege(
9052 &auth_store,
9053 &principal_id,
9054 role,
9055 tenant.as_deref(),
9056 "policy:put",
9057 "policy",
9058 id,
9059 );
9060 }
9061 QueryExpr::DropIamPolicy { id } => {
9062 return self.check_policy_management_privilege(
9063 &auth_store,
9064 &principal_id,
9065 role,
9066 tenant.as_deref(),
9067 "policy:drop",
9068 "policy",
9069 id,
9070 );
9071 }
9072 QueryExpr::AttachPolicy { policy_id, .. } => {
9073 return self.check_policy_management_privilege(
9074 &auth_store,
9075 &principal_id,
9076 role,
9077 tenant.as_deref(),
9078 "policy:attach",
9079 "policy",
9080 policy_id,
9081 );
9082 }
9083 QueryExpr::DetachPolicy { policy_id, .. } => {
9084 return self.check_policy_management_privilege(
9085 &auth_store,
9086 &principal_id,
9087 role,
9088 tenant.as_deref(),
9089 "policy:detach",
9090 "policy",
9091 policy_id,
9092 );
9093 }
9094 QueryExpr::ShowPolicies { .. } | QueryExpr::ShowEffectivePermissions { .. } => {
9095 return Ok(());
9096 }
9097 QueryExpr::SimulatePolicy { .. } => {
9098 return self.check_policy_management_privilege(
9099 &auth_store,
9100 &principal_id,
9101 role,
9102 tenant.as_deref(),
9103 "policy:simulate",
9104 "policy",
9105 "*",
9106 );
9107 }
9108 QueryExpr::DropTable(q) => {
9111 return self.check_ddl_collection_privilege(
9112 &auth_store,
9113 &principal_id,
9114 role,
9115 tenant.as_deref(),
9116 &username,
9117 "drop",
9118 &q.name,
9119 );
9120 }
9121 QueryExpr::DropGraph(q) => {
9122 return self.check_ddl_collection_privilege(
9123 &auth_store,
9124 &principal_id,
9125 role,
9126 tenant.as_deref(),
9127 &username,
9128 "drop",
9129 &q.name,
9130 );
9131 }
9132 QueryExpr::DropVector(q) => {
9133 return self.check_ddl_collection_privilege(
9134 &auth_store,
9135 &principal_id,
9136 role,
9137 tenant.as_deref(),
9138 &username,
9139 "drop",
9140 &q.name,
9141 );
9142 }
9143 QueryExpr::DropDocument(q) => {
9144 return self.check_ddl_collection_privilege(
9145 &auth_store,
9146 &principal_id,
9147 role,
9148 tenant.as_deref(),
9149 &username,
9150 "drop",
9151 &q.name,
9152 );
9153 }
9154 QueryExpr::DropKv(q) => {
9155 return self.check_ddl_collection_privilege(
9156 &auth_store,
9157 &principal_id,
9158 role,
9159 tenant.as_deref(),
9160 &username,
9161 "drop",
9162 &q.name,
9163 );
9164 }
9165 QueryExpr::DropCollection(q) => {
9166 return self.check_ddl_collection_privilege(
9167 &auth_store,
9168 &principal_id,
9169 role,
9170 tenant.as_deref(),
9171 &username,
9172 "drop",
9173 &q.name,
9174 );
9175 }
9176 QueryExpr::Truncate(q) => {
9177 return self.check_ddl_collection_privilege(
9178 &auth_store,
9179 &principal_id,
9180 role,
9181 tenant.as_deref(),
9182 &username,
9183 "truncate",
9184 &q.name,
9185 );
9186 }
9187 QueryExpr::CreateTable(_)
9189 | QueryExpr::CreateCollection(_)
9190 | QueryExpr::CreateVector(_)
9191 | QueryExpr::AlterTable(_)
9192 | QueryExpr::CreateIndex(_)
9193 | QueryExpr::DropIndex(_)
9194 | QueryExpr::CreateSchema(_)
9195 | QueryExpr::DropSchema(_)
9196 | QueryExpr::CreateSequence(_)
9197 | QueryExpr::DropSequence(_)
9198 | QueryExpr::CreateView(_)
9199 | QueryExpr::DropView(_)
9200 | QueryExpr::RefreshMaterializedView(_)
9201 | QueryExpr::CreatePolicy(_)
9202 | QueryExpr::DropPolicy(_)
9203 | QueryExpr::CreateServer(_)
9204 | QueryExpr::DropServer(_)
9205 | QueryExpr::CreateForeignTable(_)
9206 | QueryExpr::DropForeignTable(_)
9207 | QueryExpr::CreateTimeSeries(_)
9208 | QueryExpr::DropTimeSeries(_)
9209 | QueryExpr::CreateQueue(_)
9210 | QueryExpr::AlterQueue(_)
9211 | QueryExpr::DropQueue(_)
9212 | QueryExpr::CreateTree(_)
9213 | QueryExpr::DropTree(_) => {
9214 return if role >= crate::auth::Role::Write {
9215 Ok(())
9216 } else {
9217 Err(format!(
9218 "principal=`{}` role=`{:?}` cannot issue DDL",
9219 username, role
9220 ))
9221 };
9222 }
9223 QueryExpr::CreateMigration(_) => {
9225 return if role >= crate::auth::Role::Write {
9226 Ok(())
9227 } else {
9228 Err(format!(
9229 "principal=`{}` role=`{:?}` cannot issue CREATE MIGRATION",
9230 username, role
9231 ))
9232 };
9233 }
9234 QueryExpr::ApplyMigration(_) | QueryExpr::RollbackMigration(_) => {
9236 return if role == crate::auth::Role::Admin {
9237 Ok(())
9238 } else {
9239 Err(format!(
9240 "principal=`{}` role=`{:?}` cannot issue APPLY/ROLLBACK MIGRATION",
9241 username, role
9242 ))
9243 };
9244 }
9245 QueryExpr::ExplainMigration(_) => return Ok(()),
9247 _ => return Ok(()),
9251 };
9252
9253 if auth_store.iam_authorization_enabled() {
9254 let iam_action = legacy_action_to_iam(action);
9255 let iam_resource = legacy_resource_to_iam(&resource, tenant.as_deref());
9256 let iam_ctx = runtime_iam_context(
9257 role,
9258 tenant.as_deref(),
9259 auth_store.principal_is_system_owned(&principal_id),
9260 );
9261 if !auth_store.check_policy_authz(&principal_id, iam_action, &iam_resource, &iam_ctx) {
9262 return Err(format!(
9263 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM policy",
9264 username, iam_action, iam_resource.kind, iam_resource.name
9265 ));
9266 }
9267
9268 if let QueryExpr::Table(table) = expr {
9269 self.check_table_column_projection_privilege(
9270 &auth_store,
9271 &principal_id,
9272 &iam_ctx,
9273 table,
9274 )?;
9275 }
9276
9277 if let QueryExpr::Update(update) = expr {
9278 let columns = update_set_target_columns(update);
9279 if !columns.is_empty() {
9280 let request = column_access_request_for_table_update(&update.table, columns);
9281 let outcome =
9282 auth_store.check_column_projection_authz(&principal_id, &request, &iam_ctx);
9283 if let Some(denied) = outcome.first_denied_column() {
9284 return Err(format!(
9285 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM column policy",
9286 username, iam_action, denied.resource.kind, denied.resource.name
9287 ));
9288 }
9289 if !outcome.allowed() {
9290 return Err(format!(
9291 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM policy",
9292 username,
9293 iam_action,
9294 outcome.table_resource.kind,
9295 outcome.table_resource.name
9296 ));
9297 }
9298 }
9299
9300 if let Some(columns) = update_returning_columns_for_policy(self, update) {
9301 let request = column_access_request_for_table_select(&update.table, columns);
9302 let outcome =
9303 auth_store.check_column_projection_authz(&principal_id, &request, &iam_ctx);
9304 if let Some(denied) = outcome.first_denied_column() {
9305 return Err(format!(
9306 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM column policy",
9307 username, denied.resource.kind, denied.resource.name
9308 ));
9309 }
9310 if !outcome.allowed() {
9311 return Err(format!(
9312 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM policy",
9313 username, outcome.table_resource.kind, outcome.table_resource.name
9314 ));
9315 }
9316 }
9317 }
9318
9319 Ok(())
9320 } else {
9321 auth_store
9322 .check_grant(&ctx, action, &resource)
9323 .map_err(|e| e.to_string())
9324 }
9325 }
9326
9327 fn check_table_column_projection_privilege(
9328 &self,
9329 auth_store: &Arc<crate::auth::store::AuthStore>,
9330 principal: &crate::auth::UserId,
9331 ctx: &crate::auth::policies::EvalContext,
9332 table: &crate::storage::query::ast::TableQuery,
9333 ) -> Result<(), String> {
9334 use crate::auth::{ColumnAccessRequest, ColumnDecisionEffect};
9335
9336 let columns = requested_table_columns_for_policy(table);
9337 if columns.is_empty() {
9338 return Ok(());
9339 }
9340
9341 let request = ColumnAccessRequest::select(table.table.clone(), columns);
9342 let outcome = auth_store.check_column_projection_authz(principal, &request, ctx);
9343 if outcome.allowed() {
9344 return Ok(());
9345 }
9346
9347 if !matches!(
9348 outcome.table_decision,
9349 crate::auth::policies::Decision::Allow { .. }
9350 | crate::auth::policies::Decision::AdminBypass
9351 ) {
9352 return Err(format!(
9353 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM policy",
9354 principal, outcome.table_resource.kind, outcome.table_resource.name
9355 ));
9356 }
9357
9358 let denied = outcome
9359 .first_denied_column()
9360 .filter(|decision| decision.effective == ColumnDecisionEffect::Denied);
9361 match denied {
9362 Some(decision) => Err(format!(
9363 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM policy",
9364 principal, decision.resource.kind, decision.resource.name
9365 )),
9366 None => Ok(()),
9367 }
9368 }
9369
9370 fn check_graph_property_projection_privilege(
9371 &self,
9372 auth_store: &Arc<crate::auth::store::AuthStore>,
9373 principal: &crate::auth::UserId,
9374 role: crate::auth::Role,
9375 tenant: Option<&str>,
9376 query: &crate::storage::query::ast::GraphQuery,
9377 ) -> Result<(), String> {
9378 let columns = explicit_graph_projection_properties(query);
9379 if columns.is_empty() {
9380 return Ok(());
9381 }
9382 self.check_table_like_column_projection_privilege(
9383 auth_store, principal, role, tenant, "graph", &columns,
9384 )
9385 }
9386
9387 fn check_table_like_column_projection_privilege(
9388 &self,
9389 auth_store: &Arc<crate::auth::store::AuthStore>,
9390 principal: &crate::auth::UserId,
9391 role: crate::auth::Role,
9392 tenant: Option<&str>,
9393 table: &str,
9394 columns: &[String],
9395 ) -> Result<(), String> {
9396 let iam_ctx = runtime_iam_context(
9397 role,
9398 tenant,
9399 auth_store.principal_is_system_owned(principal),
9400 );
9401 let request =
9402 crate::auth::ColumnAccessRequest::select(table.to_string(), columns.iter().cloned());
9403 let outcome = auth_store.check_column_projection_authz(principal, &request, &iam_ctx);
9404 if outcome.allowed() {
9405 return Ok(());
9406 }
9407 let denied = outcome
9408 .first_denied_column()
9409 .map(|d| d.resource.name.clone())
9410 .unwrap_or_else(|| format!("{table}.<unknown>"));
9411 Err(format!(
9412 "principal=`{}` action=`select` resource=`column:{}` denied by IAM policy",
9413 principal, denied
9414 ))
9415 }
9416
9417 fn check_policy_management_privilege(
9418 &self,
9419 auth_store: &Arc<crate::auth::store::AuthStore>,
9420 principal: &crate::auth::UserId,
9421 role: crate::auth::Role,
9422 tenant: Option<&str>,
9423 action: &str,
9424 resource_kind: &str,
9425 resource_name: &str,
9426 ) -> Result<(), String> {
9427 if !auth_store.iam_authorization_enabled() {
9428 return if role == crate::auth::Role::Admin {
9429 Ok(())
9430 } else {
9431 Err(format!(
9432 "principal=`{}` role=`{:?}` cannot issue ACL/auth DDL",
9433 principal, role
9434 ))
9435 };
9436 }
9437
9438 let mut resource = crate::auth::policies::ResourceRef::new(
9439 resource_kind.to_string(),
9440 resource_name.to_string(),
9441 );
9442 if let Some(t) = tenant {
9443 resource = resource.with_tenant(t.to_string());
9444 }
9445 let ctx = runtime_iam_context(
9446 role,
9447 tenant,
9448 auth_store.principal_is_system_owned(principal),
9449 );
9450 if auth_store.check_policy_authz(principal, action, &resource, &ctx) {
9451 Ok(())
9452 } else {
9453 Err(format!(
9454 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM policy",
9455 principal, action, resource.kind, resource.name
9456 ))
9457 }
9458 }
9459
9460 fn check_ddl_collection_privilege(
9468 &self,
9469 auth_store: &Arc<crate::auth::store::AuthStore>,
9470 principal: &crate::auth::UserId,
9471 role: crate::auth::Role,
9472 tenant: Option<&str>,
9473 username: &str,
9474 action: &str,
9475 collection: &str,
9476 ) -> Result<(), String> {
9477 if role < crate::auth::Role::Write {
9478 let msg = format!(
9479 "principal=`{}` role=`{:?}` cannot issue DDL",
9480 username, role
9481 );
9482 self.inner.audit_log.record(
9483 action,
9484 username,
9485 collection,
9486 "denied",
9487 crate::json::Value::Null,
9488 );
9489 return Err(msg);
9490 }
9491
9492 if !auth_store.iam_authorization_enabled() {
9493 self.inner.audit_log.record(
9494 action,
9495 username,
9496 collection,
9497 "ok",
9498 crate::json::Value::Null,
9499 );
9500 return Ok(());
9501 }
9502
9503 let resource_name = collection.to_string();
9504 let mut resource = crate::auth::policies::ResourceRef::new(
9505 "collection".to_string(),
9506 resource_name.clone(),
9507 );
9508 if let Some(t) = tenant {
9509 resource = resource.with_tenant(t.to_string());
9510 }
9511 let ctx = runtime_iam_context(
9512 role,
9513 tenant,
9514 auth_store.principal_is_system_owned(principal),
9515 );
9516 if auth_store.check_policy_authz(principal, action, &resource, &ctx) {
9517 self.inner.audit_log.record(
9518 action,
9519 username,
9520 &resource_name,
9521 "ok",
9522 crate::json::Value::Null,
9523 );
9524 Ok(())
9525 } else {
9526 self.inner.audit_log.record(
9527 action,
9528 username,
9529 &resource_name,
9530 "denied",
9531 crate::json::Value::Null,
9532 );
9533 Err(format!(
9534 "principal=`{}` action=`{}` resource=`collection:{}` denied by IAM policy",
9535 username, action, resource_name
9536 ))
9537 }
9538 }
9539
9540 fn execute_grant_statement(
9542 &self,
9543 query: &str,
9544 stmt: &crate::storage::query::ast::GrantStmt,
9545 ) -> RedDBResult<RuntimeQueryResult> {
9546 use crate::auth::privileges::{Action, GrantPrincipal, Resource};
9547 use crate::auth::UserId;
9548 use crate::storage::query::ast::{GrantObjectKind, GrantPrincipalRef};
9549
9550 let auth_store = self
9551 .inner
9552 .auth_store
9553 .read()
9554 .clone()
9555 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9556
9557 let (gname, grole) = current_auth_identity().ok_or_else(|| {
9559 RedDBError::Query("GRANT requires an authenticated principal".to_string())
9560 })?;
9561 let granter = UserId::from_parts(current_tenant().as_deref(), &gname);
9562 let granter_role = grole;
9563
9564 let mut actions: Vec<Action> = Vec::new();
9566 if stmt.all {
9567 actions.push(Action::All);
9568 } else {
9569 for kw in &stmt.actions {
9570 let a = Action::from_keyword(kw).ok_or_else(|| {
9571 RedDBError::Query(format!("unknown privilege keyword `{}`", kw))
9572 })?;
9573 actions.push(a);
9574 }
9575 }
9576
9577 let mut applied = 0usize;
9579 for obj in &stmt.objects {
9580 let resource = match stmt.object_kind {
9581 GrantObjectKind::Table => Resource::Table {
9582 schema: obj.schema.clone(),
9583 table: obj.name.clone(),
9584 },
9585 GrantObjectKind::Schema => Resource::Schema(obj.name.clone()),
9586 GrantObjectKind::Database => Resource::Database,
9587 GrantObjectKind::Function => Resource::Function {
9588 schema: obj.schema.clone(),
9589 name: obj.name.clone(),
9590 },
9591 };
9592 for principal in &stmt.principals {
9593 let p = match principal {
9594 GrantPrincipalRef::Public => GrantPrincipal::Public,
9595 GrantPrincipalRef::Group(g) => GrantPrincipal::Group(g.clone()),
9596 GrantPrincipalRef::User { tenant, name } => {
9597 GrantPrincipal::User(UserId::from_parts(tenant.as_deref(), name))
9598 }
9599 };
9600 let tenant = granter.tenant.clone();
9603 auth_store
9604 .grant(
9605 &granter,
9606 granter_role,
9607 p.clone(),
9608 resource.clone(),
9609 actions.clone(),
9610 stmt.with_grant_option,
9611 tenant.clone(),
9612 )
9613 .map_err(|e| RedDBError::Query(e.to_string()))?;
9614
9615 if let Some(policy) =
9619 grant_to_iam_policy(&p, &resource, &actions, tenant.as_deref())
9620 {
9621 let pid = policy.id.clone();
9622 auth_store
9623 .put_policy_internal(policy)
9624 .map_err(|e| RedDBError::Query(e.to_string()))?;
9625 let attachment = match &p {
9626 GrantPrincipal::User(uid) => {
9627 crate::auth::store::PrincipalRef::User(uid.clone())
9628 }
9629 GrantPrincipal::Group(group) => {
9630 crate::auth::store::PrincipalRef::Group(group.clone())
9631 }
9632 GrantPrincipal::Public => crate::auth::store::PrincipalRef::Group(
9633 crate::auth::store::PUBLIC_IAM_GROUP.to_string(),
9634 ),
9635 };
9636 auth_store
9637 .attach_policy(attachment, &pid)
9638 .map_err(|e| RedDBError::Query(e.to_string()))?;
9639 }
9640 applied += 1;
9641 tracing::info!(
9642 target: "audit",
9643 principal = %granter,
9644 action = "grant",
9645 "GRANT applied"
9646 );
9647 }
9648 }
9649
9650 self.invalidate_result_cache();
9651 Ok(RuntimeQueryResult::ok_message(
9652 query.to_string(),
9653 &format!("GRANT applied to {} target(s)", applied),
9654 "grant",
9655 ))
9656 }
9657
9658 fn execute_revoke_statement(
9660 &self,
9661 query: &str,
9662 stmt: &crate::storage::query::ast::RevokeStmt,
9663 ) -> RedDBResult<RuntimeQueryResult> {
9664 use crate::auth::privileges::{Action, GrantPrincipal, Resource};
9665 use crate::auth::UserId;
9666 use crate::storage::query::ast::{GrantObjectKind, GrantPrincipalRef};
9667
9668 let auth_store = self
9669 .inner
9670 .auth_store
9671 .read()
9672 .clone()
9673 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9674
9675 let (_gname, grole) = current_auth_identity().ok_or_else(|| {
9676 RedDBError::Query("REVOKE requires an authenticated principal".to_string())
9677 })?;
9678 let granter_role = grole;
9679
9680 let actions: Vec<Action> = if stmt.all {
9681 vec![Action::All]
9682 } else {
9683 stmt.actions
9684 .iter()
9685 .map(|kw| Action::from_keyword(kw).unwrap_or(Action::Select))
9686 .collect()
9687 };
9688
9689 let mut total_removed = 0usize;
9690 for obj in &stmt.objects {
9691 let resource = match stmt.object_kind {
9692 GrantObjectKind::Table => Resource::Table {
9693 schema: obj.schema.clone(),
9694 table: obj.name.clone(),
9695 },
9696 GrantObjectKind::Schema => Resource::Schema(obj.name.clone()),
9697 GrantObjectKind::Database => Resource::Database,
9698 GrantObjectKind::Function => Resource::Function {
9699 schema: obj.schema.clone(),
9700 name: obj.name.clone(),
9701 },
9702 };
9703 for principal in &stmt.principals {
9704 let p = match principal {
9705 GrantPrincipalRef::Public => GrantPrincipal::Public,
9706 GrantPrincipalRef::Group(g) => GrantPrincipal::Group(g.clone()),
9707 GrantPrincipalRef::User { tenant, name } => {
9708 GrantPrincipal::User(UserId::from_parts(tenant.as_deref(), name))
9709 }
9710 };
9711 let removed = auth_store
9712 .revoke(granter_role, &p, &resource, &actions)
9713 .map_err(|e| RedDBError::Query(e.to_string()))?;
9714 let _removed_policies =
9715 auth_store.delete_synthetic_grant_policies(&p, &resource, &actions);
9716 total_removed += removed;
9717 }
9718 }
9719
9720 self.invalidate_result_cache();
9721 Ok(RuntimeQueryResult::ok_message(
9722 query.to_string(),
9723 &format!("REVOKE removed {} grant(s)", total_removed),
9724 "revoke",
9725 ))
9726 }
9727
9728 fn execute_alter_user_statement(
9730 &self,
9731 query: &str,
9732 stmt: &crate::storage::query::ast::AlterUserStmt,
9733 ) -> RedDBResult<RuntimeQueryResult> {
9734 use crate::auth::privileges::UserAttributes;
9735 use crate::auth::UserId;
9736 use crate::storage::query::ast::AlterUserAttribute;
9737
9738 let auth_store = self
9739 .inner
9740 .auth_store
9741 .read()
9742 .clone()
9743 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9744
9745 let (_gname, grole) = current_auth_identity().ok_or_else(|| {
9746 RedDBError::Query("ALTER USER requires an authenticated principal".to_string())
9747 })?;
9748 if grole != crate::auth::Role::Admin {
9749 return Err(RedDBError::Query(
9750 "ALTER USER requires Admin role".to_string(),
9751 ));
9752 }
9753
9754 let target = UserId::from_parts(stmt.tenant.as_deref(), &stmt.username);
9755
9756 let mut attrs = auth_store.user_attributes(&target);
9759 let mut enable_change: Option<bool> = None;
9760
9761 for a in &stmt.attributes {
9762 match a {
9763 AlterUserAttribute::ValidUntil(ts) => {
9764 let ms = parse_timestamp_to_ms(ts).ok_or_else(|| {
9768 RedDBError::Query(format!("invalid VALID UNTIL timestamp `{ts}`"))
9769 })?;
9770 attrs.valid_until = Some(ms);
9771 }
9772 AlterUserAttribute::ConnectionLimit(n) => {
9773 if *n < 0 {
9774 return Err(RedDBError::Query(
9775 "CONNECTION LIMIT must be non-negative".to_string(),
9776 ));
9777 }
9778 attrs.connection_limit = Some(*n as u32);
9779 }
9780 AlterUserAttribute::SetSearchPath(p) => {
9781 attrs.search_path = Some(p.clone());
9782 }
9783 AlterUserAttribute::AddGroup(g) => {
9784 if !attrs.groups.iter().any(|existing| existing == g) {
9785 attrs.groups.push(g.clone());
9786 attrs.groups.sort();
9787 }
9788 }
9789 AlterUserAttribute::DropGroup(g) => {
9790 attrs.groups.retain(|existing| existing != g);
9791 }
9792 AlterUserAttribute::Enable => enable_change = Some(true),
9793 AlterUserAttribute::Disable => enable_change = Some(false),
9794 AlterUserAttribute::Password(_) => {
9795 }
9799 }
9800 }
9801
9802 auth_store
9803 .set_user_attributes(&target, attrs)
9804 .map_err(|e| RedDBError::Query(e.to_string()))?;
9805 if let Some(en) = enable_change {
9806 auth_store
9807 .set_user_enabled(&target, en)
9808 .map_err(|e| RedDBError::Query(e.to_string()))?;
9809 }
9810 self.invalidate_result_cache();
9811 tracing::info!(
9812 target: "audit",
9813 principal = %target,
9814 action = "alter_user",
9815 "ALTER USER applied"
9816 );
9817
9818 Ok(RuntimeQueryResult::ok_message(
9819 query.to_string(),
9820 &format!("ALTER USER {} applied", target),
9821 "alter_user",
9822 ))
9823 }
9824
9825 fn execute_create_iam_policy(
9830 &self,
9831 query: &str,
9832 id: &str,
9833 json: &str,
9834 ) -> RedDBResult<RuntimeQueryResult> {
9835 use crate::auth::policies::Policy;
9836
9837 let auth_store = self
9838 .inner
9839 .auth_store
9840 .read()
9841 .clone()
9842 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9843
9844 let mut policy = Policy::from_json_str(json)
9849 .map_err(|e| RedDBError::Query(format!("policy parse: {e}")))?;
9850 if policy.id != id {
9851 policy.id = id.to_string();
9852 }
9853 let pid = policy.id.clone();
9854 auth_store
9855 .put_policy(policy)
9856 .map_err(|e| RedDBError::Query(e.to_string()))?;
9857
9858 let principal = current_auth_identity()
9859 .map(|(u, _)| u)
9860 .unwrap_or_else(|| "anonymous".into());
9861 tracing::info!(
9862 target: "audit",
9863 principal = %principal,
9864 action = "iam:policy.put",
9865 matched_policy_id = %pid,
9866 "CREATE POLICY applied"
9867 );
9868 self.inner.audit_log.record(
9869 "iam/policy.put",
9870 &principal,
9871 &pid,
9872 "ok",
9873 crate::json::Value::Null,
9874 );
9875
9876 self.invalidate_result_cache();
9877 Ok(RuntimeQueryResult::ok_message(
9878 query.to_string(),
9879 &format!("policy `{pid}` stored"),
9880 "create_iam_policy",
9881 ))
9882 }
9883
9884 fn execute_drop_iam_policy(&self, query: &str, id: &str) -> RedDBResult<RuntimeQueryResult> {
9885 let auth_store = self
9886 .inner
9887 .auth_store
9888 .read()
9889 .clone()
9890 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9891 auth_store
9892 .delete_policy(id)
9893 .map_err(|e| RedDBError::Query(e.to_string()))?;
9894
9895 let principal = current_auth_identity()
9896 .map(|(u, _)| u)
9897 .unwrap_or_else(|| "anonymous".into());
9898 tracing::info!(
9899 target: "audit",
9900 principal = %principal,
9901 action = "iam:policy.drop",
9902 matched_policy_id = %id,
9903 "DROP POLICY applied"
9904 );
9905 self.inner.audit_log.record(
9906 "iam/policy.drop",
9907 &principal,
9908 id,
9909 "ok",
9910 crate::json::Value::Null,
9911 );
9912
9913 self.invalidate_result_cache();
9914 Ok(RuntimeQueryResult::ok_message(
9915 query.to_string(),
9916 &format!("policy `{id}` dropped"),
9917 "drop_iam_policy",
9918 ))
9919 }
9920
9921 fn execute_attach_policy(
9922 &self,
9923 query: &str,
9924 policy_id: &str,
9925 principal: &crate::storage::query::ast::PolicyPrincipalRef,
9926 ) -> RedDBResult<RuntimeQueryResult> {
9927 use crate::auth::store::PrincipalRef;
9928 use crate::auth::UserId;
9929 use crate::storage::query::ast::PolicyPrincipalRef;
9930
9931 let auth_store = self
9932 .inner
9933 .auth_store
9934 .read()
9935 .clone()
9936 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9937 let p = match principal {
9938 PolicyPrincipalRef::User(u) => {
9939 PrincipalRef::User(UserId::from_parts(u.tenant.as_deref(), &u.username))
9940 }
9941 PolicyPrincipalRef::Group(g) => PrincipalRef::Group(g.clone()),
9942 };
9943 let pretty_target = principal_label(principal);
9944 auth_store
9945 .attach_policy(p, policy_id)
9946 .map_err(|e| RedDBError::Query(e.to_string()))?;
9947
9948 let principal_str = current_auth_identity()
9949 .map(|(u, _)| u)
9950 .unwrap_or_else(|| "anonymous".into());
9951 tracing::info!(
9952 target: "audit",
9953 principal = %principal_str,
9954 action = "iam:policy.attach",
9955 matched_policy_id = %policy_id,
9956 target = %pretty_target,
9957 "ATTACH POLICY applied"
9958 );
9959 self.inner.audit_log.record(
9960 "iam/policy.attach",
9961 &principal_str,
9962 &pretty_target,
9963 "ok",
9964 crate::json::Value::Null,
9965 );
9966
9967 self.invalidate_result_cache();
9968 Ok(RuntimeQueryResult::ok_message(
9969 query.to_string(),
9970 &format!("policy `{policy_id}` attached to {pretty_target}"),
9971 "attach_policy",
9972 ))
9973 }
9974
9975 fn execute_detach_policy(
9976 &self,
9977 query: &str,
9978 policy_id: &str,
9979 principal: &crate::storage::query::ast::PolicyPrincipalRef,
9980 ) -> RedDBResult<RuntimeQueryResult> {
9981 use crate::auth::store::PrincipalRef;
9982 use crate::auth::UserId;
9983 use crate::storage::query::ast::PolicyPrincipalRef;
9984
9985 let auth_store = self
9986 .inner
9987 .auth_store
9988 .read()
9989 .clone()
9990 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9991 let p = match principal {
9992 PolicyPrincipalRef::User(u) => {
9993 PrincipalRef::User(UserId::from_parts(u.tenant.as_deref(), &u.username))
9994 }
9995 PolicyPrincipalRef::Group(g) => PrincipalRef::Group(g.clone()),
9996 };
9997 let pretty_target = principal_label(principal);
9998 auth_store
9999 .detach_policy(p, policy_id)
10000 .map_err(|e| RedDBError::Query(e.to_string()))?;
10001
10002 let principal_str = current_auth_identity()
10003 .map(|(u, _)| u)
10004 .unwrap_or_else(|| "anonymous".into());
10005 tracing::info!(
10006 target: "audit",
10007 principal = %principal_str,
10008 action = "iam:policy.detach",
10009 matched_policy_id = %policy_id,
10010 target = %pretty_target,
10011 "DETACH POLICY applied"
10012 );
10013 self.inner.audit_log.record(
10014 "iam/policy.detach",
10015 &principal_str,
10016 &pretty_target,
10017 "ok",
10018 crate::json::Value::Null,
10019 );
10020
10021 self.invalidate_result_cache();
10022 Ok(RuntimeQueryResult::ok_message(
10023 query.to_string(),
10024 &format!("policy `{policy_id}` detached from {pretty_target}"),
10025 "detach_policy",
10026 ))
10027 }
10028
10029 fn execute_show_policies(
10030 &self,
10031 query: &str,
10032 filter: Option<&crate::storage::query::ast::PolicyPrincipalRef>,
10033 ) -> RedDBResult<RuntimeQueryResult> {
10034 use crate::auth::UserId;
10035 use crate::storage::query::ast::PolicyPrincipalRef;
10036 use crate::storage::query::unified::UnifiedRecord;
10037 use crate::storage::schema::Value as SchemaValue;
10038 use std::sync::Arc;
10039
10040 let auth_store = self
10041 .inner
10042 .auth_store
10043 .read()
10044 .clone()
10045 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
10046
10047 let pols = match filter {
10048 None => auth_store.list_policies(),
10049 Some(PolicyPrincipalRef::User(u)) => {
10050 let id = UserId::from_parts(u.tenant.as_deref(), &u.username);
10051 auth_store.effective_policies(&id)
10052 }
10053 Some(PolicyPrincipalRef::Group(g)) => auth_store.group_policies(g),
10054 };
10055
10056 let mut records = Vec::with_capacity(pols.len());
10057 for p in pols.iter() {
10058 let mut rec = UnifiedRecord::default();
10059 rec.set_arc(Arc::from("id"), SchemaValue::text(p.id.clone()));
10060 rec.set_arc(
10061 Arc::from("statements"),
10062 SchemaValue::Integer(p.statements.len() as i64),
10063 );
10064 rec.set_arc(
10065 Arc::from("tenant"),
10066 p.tenant
10067 .as_deref()
10068 .map(|t| SchemaValue::text(t.to_string()))
10069 .unwrap_or(SchemaValue::Null),
10070 );
10071 rec.set_arc(Arc::from("json"), SchemaValue::text(p.to_json_string()));
10072 records.push(rec);
10073 }
10074 let mut result = crate::storage::query::unified::UnifiedResult::empty();
10075 result.records = records;
10076 Ok(RuntimeQueryResult {
10077 query: query.to_string(),
10078 mode: crate::storage::query::modes::QueryMode::Sql,
10079 statement: "show_policies",
10080 engine: "iam-policies",
10081 result,
10082 affected_rows: 0,
10083 statement_type: "select",
10084 })
10085 }
10086
10087 fn execute_show_effective_permissions(
10088 &self,
10089 query: &str,
10090 user: &crate::storage::query::ast::PolicyUserRef,
10091 resource: Option<&crate::storage::query::ast::PolicyResourceRef>,
10092 ) -> RedDBResult<RuntimeQueryResult> {
10093 use crate::auth::UserId;
10094 use crate::storage::query::unified::UnifiedRecord;
10095 use crate::storage::schema::Value as SchemaValue;
10096 use std::sync::Arc;
10097
10098 let auth_store = self
10099 .inner
10100 .auth_store
10101 .read()
10102 .clone()
10103 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
10104 let id = UserId::from_parts(user.tenant.as_deref(), &user.username);
10105 let pols = auth_store.effective_policies(&id);
10106
10107 let mut records = Vec::new();
10110 for p in pols.iter() {
10111 for (idx, st) in p.statements.iter().enumerate() {
10112 if let Some(_r) = resource {
10113 }
10117 let mut rec = UnifiedRecord::default();
10118 rec.set_arc(Arc::from("policy_id"), SchemaValue::text(p.id.clone()));
10119 rec.set_arc(
10120 Arc::from("statement_index"),
10121 SchemaValue::Integer(idx as i64),
10122 );
10123 rec.set_arc(
10124 Arc::from("sid"),
10125 st.sid
10126 .as_deref()
10127 .map(|s| SchemaValue::text(s.to_string()))
10128 .unwrap_or(SchemaValue::Null),
10129 );
10130 rec.set_arc(
10131 Arc::from("effect"),
10132 SchemaValue::text(match st.effect {
10133 crate::auth::policies::Effect::Allow => "allow",
10134 crate::auth::policies::Effect::Deny => "deny",
10135 }),
10136 );
10137 rec.set_arc(
10138 Arc::from("actions"),
10139 SchemaValue::Integer(st.actions.len() as i64),
10140 );
10141 rec.set_arc(
10142 Arc::from("resources"),
10143 SchemaValue::Integer(st.resources.len() as i64),
10144 );
10145 records.push(rec);
10146 }
10147 }
10148 let mut result = crate::storage::query::unified::UnifiedResult::empty();
10149 result.records = records;
10150 Ok(RuntimeQueryResult {
10151 query: query.to_string(),
10152 mode: crate::storage::query::modes::QueryMode::Sql,
10153 statement: "show_effective_permissions",
10154 engine: "iam-policies",
10155 result,
10156 affected_rows: 0,
10157 statement_type: "select",
10158 })
10159 }
10160
10161 fn execute_simulate_policy(
10162 &self,
10163 query: &str,
10164 user: &crate::storage::query::ast::PolicyUserRef,
10165 action: &str,
10166 resource: &crate::storage::query::ast::PolicyResourceRef,
10167 ) -> RedDBResult<RuntimeQueryResult> {
10168 use crate::auth::policies::ResourceRef;
10169 use crate::auth::store::SimCtx;
10170 use crate::auth::UserId;
10171 use crate::storage::query::unified::UnifiedRecord;
10172 use crate::storage::schema::Value as SchemaValue;
10173 use std::sync::Arc;
10174
10175 let auth_store = self
10176 .inner
10177 .auth_store
10178 .read()
10179 .clone()
10180 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
10181 let id = UserId::from_parts(user.tenant.as_deref(), &user.username);
10182 let r = ResourceRef::new(resource.kind.clone(), resource.name.clone());
10183 let outcome = auth_store.simulate(&id, action, &r, SimCtx::default());
10184
10185 let principal_str = current_auth_identity()
10186 .map(|(u, _)| u)
10187 .unwrap_or_else(|| "anonymous".into());
10188 let (decision_str, matched_pid, matched_sid) = decision_to_strings(&outcome.decision);
10189 tracing::info!(
10190 target: "audit",
10191 principal = %principal_str,
10192 action = "iam:policy.simulate",
10193 decision = %decision_str,
10194 matched_policy_id = ?matched_pid,
10195 matched_sid = ?matched_sid,
10196 "SIMULATE issued"
10197 );
10198 self.inner.audit_log.record(
10199 "iam/policy.simulate",
10200 &principal_str,
10201 &id.to_string(),
10202 "ok",
10203 crate::json::Value::Null,
10204 );
10205
10206 let mut rec = UnifiedRecord::default();
10207 rec.set_arc(Arc::from("decision"), SchemaValue::text(decision_str));
10208 rec.set_arc(
10209 Arc::from("matched_policy_id"),
10210 matched_pid
10211 .map(SchemaValue::text)
10212 .unwrap_or(SchemaValue::Null),
10213 );
10214 rec.set_arc(
10215 Arc::from("matched_sid"),
10216 matched_sid
10217 .map(SchemaValue::text)
10218 .unwrap_or(SchemaValue::Null),
10219 );
10220 rec.set_arc(Arc::from("reason"), SchemaValue::text(outcome.reason));
10221 rec.set_arc(
10222 Arc::from("trail_len"),
10223 SchemaValue::Integer(outcome.trail.len() as i64),
10224 );
10225 let mut result = crate::storage::query::unified::UnifiedResult::empty();
10226 result.records = vec![rec];
10227 Ok(RuntimeQueryResult {
10228 query: query.to_string(),
10229 mode: crate::storage::query::modes::QueryMode::Sql,
10230 statement: "simulate_policy",
10231 engine: "iam-policies",
10232 result,
10233 affected_rows: 0,
10234 statement_type: "select",
10235 })
10236 }
10237}
10238
10239fn grant_to_iam_policy(
10244 principal: &crate::auth::privileges::GrantPrincipal,
10245 resource: &crate::auth::privileges::Resource,
10246 actions: &[crate::auth::privileges::Action],
10247 tenant: Option<&str>,
10248) -> Option<crate::auth::policies::Policy> {
10249 use crate::auth::policies::{
10250 compile_action, ActionPattern, Effect, Policy, ResourcePattern, Statement,
10251 };
10252 use crate::auth::privileges::{Action, GrantPrincipal, Resource};
10253
10254 if matches!(principal, GrantPrincipal::Group(_)) {
10255 return None;
10256 }
10257
10258 let now = crate::auth::now_ms();
10259 let id = format!("_grant_{:x}_{:x}", now, std::process::id());
10260
10261 let resource_str = match resource {
10262 Resource::Database => "table:*".to_string(),
10263 Resource::Schema(s) => format!("table:{s}.*"),
10264 Resource::Table { schema, table } => match schema {
10265 Some(s) => format!("table:{s}.{table}"),
10266 None => format!("table:{table}"),
10267 },
10268 Resource::Function { schema, name } => match schema {
10269 Some(s) => format!("function:{s}.{name}"),
10270 None => format!("function:{name}"),
10271 },
10272 };
10273
10274 let action_patterns: Vec<ActionPattern> = if actions.contains(&Action::All) {
10278 vec![ActionPattern::Wildcard]
10279 } else {
10280 actions
10281 .iter()
10282 .map(|a| compile_action(&a.as_str().to_ascii_lowercase()))
10283 .collect()
10284 };
10285 if action_patterns.is_empty() {
10286 return None;
10287 }
10288
10289 let resource_patterns = if resource_str == "*" {
10294 vec![ResourcePattern::Wildcard]
10295 } else if resource_str.contains('*') {
10296 vec![ResourcePattern::Glob(resource_str.clone())]
10297 } else if let Some((kind, name)) = resource_str.split_once(':') {
10298 vec![ResourcePattern::Exact {
10299 kind: kind.to_string(),
10300 name: name.to_string(),
10301 }]
10302 } else {
10303 vec![ResourcePattern::Wildcard]
10304 };
10305
10306 let policy = Policy {
10307 id,
10308 version: 1,
10309 tenant: tenant.map(|t| t.to_string()),
10310 created_at: now,
10311 updated_at: now,
10312 statements: vec![Statement {
10313 sid: None,
10314 effect: Effect::Allow,
10315 actions: action_patterns,
10316 resources: resource_patterns,
10317 condition: None,
10318 }],
10319 };
10320 if policy.validate().is_err() {
10321 return None;
10322 }
10323 Some(policy)
10324}
10325
10326fn legacy_action_to_iam(action: crate::auth::privileges::Action) -> &'static str {
10327 use crate::auth::privileges::Action;
10328 match action {
10329 Action::Select => "select",
10330 Action::Insert => "insert",
10331 Action::Update => "update",
10332 Action::Delete => "delete",
10333 Action::Truncate => "truncate",
10334 Action::References => "references",
10335 Action::Execute => "execute",
10336 Action::Usage => "usage",
10337 Action::All => "*",
10338 }
10339}
10340
10341fn update_set_target_columns(query: &crate::storage::query::ast::UpdateQuery) -> Vec<String> {
10342 let mut columns = Vec::new();
10343 for (column, _) in &query.assignment_exprs {
10344 if !columns.iter().any(|seen| seen == column) {
10345 columns.push(column.clone());
10346 }
10347 }
10348 columns
10349}
10350
10351fn column_access_request_for_table_update(
10352 table_name: &str,
10353 columns: Vec<String>,
10354) -> crate::auth::ColumnAccessRequest {
10355 match table_name.split_once('.') {
10356 Some((schema, table)) => {
10357 crate::auth::ColumnAccessRequest::update(table.to_string(), columns)
10358 .with_schema(schema.to_string())
10359 }
10360 None => crate::auth::ColumnAccessRequest::update(table_name.to_string(), columns),
10361 }
10362}
10363
10364fn column_access_request_for_table_select(
10365 table_name: &str,
10366 columns: Vec<String>,
10367) -> crate::auth::ColumnAccessRequest {
10368 match table_name.split_once('.') {
10369 Some((schema, table)) => {
10370 crate::auth::ColumnAccessRequest::select(table.to_string(), columns)
10371 .with_schema(schema.to_string())
10372 }
10373 None => crate::auth::ColumnAccessRequest::select(table_name.to_string(), columns),
10374 }
10375}
10376
10377fn update_returning_columns_for_policy(
10378 runtime: &RedDBRuntime,
10379 query: &crate::storage::query::ast::UpdateQuery,
10380) -> Option<Vec<String>> {
10381 let items = query.returning.as_ref()?;
10382 let mut columns = Vec::new();
10383 let project_all = items
10384 .iter()
10385 .any(|item| matches!(item, crate::storage::query::ast::ReturningItem::All));
10386 if project_all {
10387 collect_returning_star_columns(runtime, query, &mut columns);
10388 } else {
10389 for item in items {
10390 let crate::storage::query::ast::ReturningItem::Column(column) = item else {
10391 continue;
10392 };
10393 push_returning_policy_column(&mut columns, column);
10394 }
10395 }
10396 (!columns.is_empty()).then_some(columns)
10397}
10398
10399fn collect_returning_star_columns(
10400 runtime: &RedDBRuntime,
10401 query: &crate::storage::query::ast::UpdateQuery,
10402 columns: &mut Vec<String>,
10403) {
10404 let store = runtime.db().store();
10405 let Some(manager) = store.get_collection(&query.table) else {
10406 return;
10407 };
10408 if let Some(schema) = manager.column_schema() {
10409 for column in schema.iter() {
10410 push_returning_policy_column(columns, column);
10411 }
10412 }
10413 for entity in manager.query_all(|_| true) {
10414 if !returning_entity_matches_update_target(&entity, query.target) {
10415 continue;
10416 }
10417 match &entity.data {
10418 crate::storage::EntityData::Row(row) => {
10419 for (column, _) in row.iter_fields() {
10420 push_returning_policy_column(columns, column);
10421 }
10422 }
10423 crate::storage::EntityData::Node(node) => {
10424 push_returning_policy_column(columns, "label");
10425 push_returning_policy_column(columns, "node_type");
10426 for column in node.properties.keys() {
10427 push_returning_policy_column(columns, column);
10428 }
10429 }
10430 crate::storage::EntityData::Edge(edge) => {
10431 push_returning_policy_column(columns, "label");
10432 push_returning_policy_column(columns, "from_rid");
10433 push_returning_policy_column(columns, "to_rid");
10434 push_returning_policy_column(columns, "weight");
10435 for column in edge.properties.keys() {
10436 push_returning_policy_column(columns, column);
10437 }
10438 }
10439 _ => {}
10440 }
10441 }
10442}
10443
10444fn push_returning_policy_column(columns: &mut Vec<String>, column: &str) {
10445 if returning_public_envelope_column(column) {
10446 return;
10447 }
10448 if !columns.iter().any(|seen| seen == column) {
10449 columns.push(column.to_string());
10450 }
10451}
10452
10453fn returning_public_envelope_column(column: &str) -> bool {
10454 matches!(
10455 column.to_ascii_lowercase().as_str(),
10456 "rid" | "collection" | "kind" | "tenant" | "created_at" | "updated_at" | "red_entity_id"
10457 )
10458}
10459
10460fn returning_entity_matches_update_target(
10461 entity: &crate::storage::UnifiedEntity,
10462 target: crate::storage::query::ast::UpdateTarget,
10463) -> bool {
10464 use crate::storage::query::ast::UpdateTarget;
10465 match target {
10466 UpdateTarget::Rows => {
10467 matches!(returning_row_item_kind(entity), Some(ReturningRowKind::Row))
10468 }
10469 UpdateTarget::Documents => {
10470 matches!(
10471 returning_row_item_kind(entity),
10472 Some(ReturningRowKind::Document)
10473 )
10474 }
10475 UpdateTarget::Kv => matches!(returning_row_item_kind(entity), Some(ReturningRowKind::Kv)),
10476 UpdateTarget::Nodes => matches!(
10477 (&entity.kind, &entity.data),
10478 (
10479 crate::storage::EntityKind::GraphNode(_),
10480 crate::storage::EntityData::Node(_)
10481 )
10482 ),
10483 UpdateTarget::Edges => matches!(
10484 (&entity.kind, &entity.data),
10485 (
10486 crate::storage::EntityKind::GraphEdge(_),
10487 crate::storage::EntityData::Edge(_)
10488 )
10489 ),
10490 }
10491}
10492
10493#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10494enum ReturningRowKind {
10495 Row,
10496 Document,
10497 Kv,
10498}
10499
10500fn returning_row_item_kind(entity: &crate::storage::UnifiedEntity) -> Option<ReturningRowKind> {
10501 let row = entity.data.as_row()?;
10502 let is_kv = row.iter_fields().all(|(column, _)| {
10503 column.eq_ignore_ascii_case("key") || column.eq_ignore_ascii_case("value")
10504 });
10505 if is_kv {
10506 return Some(ReturningRowKind::Kv);
10507 }
10508 let is_document = row
10509 .iter_fields()
10510 .any(|(_, value)| matches!(value, crate::storage::schema::Value::Json(_)));
10511 if is_document {
10512 Some(ReturningRowKind::Document)
10513 } else {
10514 Some(ReturningRowKind::Row)
10515 }
10516}
10517
10518fn requested_table_columns_for_policy(
10519 table: &crate::storage::query::ast::TableQuery,
10520) -> Vec<String> {
10521 use crate::storage::query::sql_lowering::{
10522 effective_table_filter, effective_table_group_by_exprs, effective_table_having_filter,
10523 effective_table_projections,
10524 };
10525
10526 let table_name = table.table.as_str();
10527 let table_alias = table.alias.as_deref();
10528 let mut columns = std::collections::BTreeSet::new();
10529
10530 for projection in effective_table_projections(table) {
10531 collect_projection_columns(&projection, table_name, table_alias, &mut columns);
10532 }
10533 if let Some(filter) = effective_table_filter(table) {
10534 collect_filter_columns(&filter, table_name, table_alias, &mut columns);
10535 }
10536 for expr in effective_table_group_by_exprs(table) {
10537 collect_expr_columns(&expr, table_name, table_alias, &mut columns);
10538 }
10539 if let Some(filter) = effective_table_having_filter(table) {
10540 collect_filter_columns(&filter, table_name, table_alias, &mut columns);
10541 }
10542 for order in &table.order_by {
10543 if let Some(expr) = order.expr.as_ref() {
10544 collect_expr_columns(expr, table_name, table_alias, &mut columns);
10545 } else {
10546 collect_field_ref_column(&order.field, table_name, table_alias, &mut columns);
10547 }
10548 }
10549
10550 columns.into_iter().collect()
10551}
10552
10553fn collect_projection_columns(
10554 projection: &crate::storage::query::ast::Projection,
10555 table_name: &str,
10556 table_alias: Option<&str>,
10557 columns: &mut std::collections::BTreeSet<String>,
10558) {
10559 use crate::storage::query::ast::Projection;
10560 match projection {
10561 Projection::All => {
10562 columns.insert("*".to_string());
10563 }
10564 Projection::Column(column) | Projection::Alias(column, _) => {
10565 if column != "*" {
10566 columns.insert(column.clone());
10567 }
10568 }
10569 Projection::Function(_, args) => {
10570 for arg in args {
10571 collect_projection_columns(arg, table_name, table_alias, columns);
10572 }
10573 }
10574 Projection::Expression(filter, _) => {
10575 collect_filter_columns(filter, table_name, table_alias, columns);
10576 }
10577 Projection::Field(field, _) => {
10578 collect_field_ref_column(field, table_name, table_alias, columns);
10579 }
10580 Projection::Window { args, .. } => {
10584 for arg in args {
10585 collect_projection_columns(arg, table_name, table_alias, columns);
10586 }
10587 }
10588 }
10589}
10590
10591fn collect_filter_columns(
10592 filter: &crate::storage::query::ast::Filter,
10593 table_name: &str,
10594 table_alias: Option<&str>,
10595 columns: &mut std::collections::BTreeSet<String>,
10596) {
10597 use crate::storage::query::ast::Filter;
10598 match filter {
10599 Filter::Compare { field, .. }
10600 | Filter::IsNull(field)
10601 | Filter::IsNotNull(field)
10602 | Filter::In { field, .. }
10603 | Filter::Between { field, .. }
10604 | Filter::Like { field, .. }
10605 | Filter::StartsWith { field, .. }
10606 | Filter::EndsWith { field, .. }
10607 | Filter::Contains { field, .. } => {
10608 collect_field_ref_column(field, table_name, table_alias, columns);
10609 }
10610 Filter::CompareFields { left, right, .. } => {
10611 collect_field_ref_column(left, table_name, table_alias, columns);
10612 collect_field_ref_column(right, table_name, table_alias, columns);
10613 }
10614 Filter::CompareExpr { lhs, rhs, .. } => {
10615 collect_expr_columns(lhs, table_name, table_alias, columns);
10616 collect_expr_columns(rhs, table_name, table_alias, columns);
10617 }
10618 Filter::And(left, right) | Filter::Or(left, right) => {
10619 collect_filter_columns(left, table_name, table_alias, columns);
10620 collect_filter_columns(right, table_name, table_alias, columns);
10621 }
10622 Filter::Not(inner) => collect_filter_columns(inner, table_name, table_alias, columns),
10623 }
10624}
10625
10626fn collect_expr_columns(
10627 expr: &crate::storage::query::ast::Expr,
10628 table_name: &str,
10629 table_alias: Option<&str>,
10630 columns: &mut std::collections::BTreeSet<String>,
10631) {
10632 use crate::storage::query::ast::Expr;
10633 match expr {
10634 Expr::Column { field, .. } => {
10635 collect_field_ref_column(field, table_name, table_alias, columns);
10636 }
10637 Expr::Literal { .. } | Expr::Parameter { .. } => {}
10638 Expr::UnaryOp { operand, .. } | Expr::Cast { inner: operand, .. } => {
10639 collect_expr_columns(operand, table_name, table_alias, columns);
10640 }
10641 Expr::BinaryOp { lhs, rhs, .. } => {
10642 collect_expr_columns(lhs, table_name, table_alias, columns);
10643 collect_expr_columns(rhs, table_name, table_alias, columns);
10644 }
10645 Expr::FunctionCall { args, .. } => {
10646 for arg in args {
10647 collect_expr_columns(arg, table_name, table_alias, columns);
10648 }
10649 }
10650 Expr::Case {
10651 branches, else_, ..
10652 } => {
10653 for (condition, value) in branches {
10654 collect_expr_columns(condition, table_name, table_alias, columns);
10655 collect_expr_columns(value, table_name, table_alias, columns);
10656 }
10657 if let Some(value) = else_ {
10658 collect_expr_columns(value, table_name, table_alias, columns);
10659 }
10660 }
10661 Expr::IsNull { operand, .. } => {
10662 collect_expr_columns(operand, table_name, table_alias, columns);
10663 }
10664 Expr::InList { target, values, .. } => {
10665 collect_expr_columns(target, table_name, table_alias, columns);
10666 for value in values {
10667 collect_expr_columns(value, table_name, table_alias, columns);
10668 }
10669 }
10670 Expr::Between {
10671 target, low, high, ..
10672 } => {
10673 collect_expr_columns(target, table_name, table_alias, columns);
10674 collect_expr_columns(low, table_name, table_alias, columns);
10675 collect_expr_columns(high, table_name, table_alias, columns);
10676 }
10677 Expr::Subquery { .. } => {}
10678 Expr::WindowFunctionCall { args, window, .. } => {
10679 for arg in args {
10680 collect_expr_columns(arg, table_name, table_alias, columns);
10681 }
10682 for e in &window.partition_by {
10683 collect_expr_columns(e, table_name, table_alias, columns);
10684 }
10685 for o in &window.order_by {
10686 collect_expr_columns(&o.expr, table_name, table_alias, columns);
10687 }
10688 }
10689 }
10690}
10691
10692fn collect_field_ref_column(
10693 field: &crate::storage::query::ast::FieldRef,
10694 table_name: &str,
10695 table_alias: Option<&str>,
10696 columns: &mut std::collections::BTreeSet<String>,
10697) {
10698 if let Some(column) = policy_column_name_from_field_ref(field, table_name, table_alias) {
10699 if column != "*" {
10700 columns.insert(column);
10701 }
10702 }
10703}
10704
10705fn policy_column_name_from_field_ref(
10706 field: &crate::storage::query::ast::FieldRef,
10707 table_name: &str,
10708 table_alias: Option<&str>,
10709) -> Option<String> {
10710 match field {
10711 crate::storage::query::ast::FieldRef::TableColumn { table, column } => {
10712 if column == "*" {
10713 return Some("*".to_string());
10714 }
10715 if table.is_empty() || table == table_name || Some(table.as_str()) == table_alias {
10716 Some(column.clone())
10717 } else {
10718 Some(format!("{table}.{column}"))
10719 }
10720 }
10721 _ => None,
10722 }
10723}
10724
10725fn legacy_resource_to_iam(
10726 resource: &crate::auth::privileges::Resource,
10727 tenant: Option<&str>,
10728) -> crate::auth::policies::ResourceRef {
10729 use crate::auth::privileges::Resource;
10730
10731 let (kind, name) = match resource {
10732 Resource::Database => ("database".to_string(), "*".to_string()),
10733 Resource::Schema(s) => ("schema".to_string(), format!("{s}.*")),
10734 Resource::Table { schema, table } => (
10735 "table".to_string(),
10736 match schema {
10737 Some(s) => format!("{s}.{table}"),
10738 None => table.clone(),
10739 },
10740 ),
10741 Resource::Function { schema, name } => (
10742 "function".to_string(),
10743 match schema {
10744 Some(s) => format!("{s}.{name}"),
10745 None => name.clone(),
10746 },
10747 ),
10748 };
10749
10750 let mut out = crate::auth::policies::ResourceRef::new(kind, name);
10751 if let Some(t) = tenant {
10752 out = out.with_tenant(t.to_string());
10753 }
10754 out
10755}
10756
10757#[derive(Debug)]
10758struct JoinTableSide {
10759 table: String,
10760 alias: String,
10761}
10762
10763fn table_side_context(expr: &QueryExpr) -> Option<JoinTableSide> {
10764 match expr {
10765 QueryExpr::Table(table) => Some(JoinTableSide {
10766 table: table.table.clone(),
10767 alias: table.alias.clone().unwrap_or_else(|| table.table.clone()),
10768 }),
10769 _ => None,
10770 }
10771}
10772
10773fn collect_projection_columns_for_table(
10774 projection: &Projection,
10775 table: &str,
10776 alias: Option<&str>,
10777 out: &mut BTreeSet<String>,
10778) {
10779 match projection {
10780 Projection::Column(column) | Projection::Alias(column, _) => {
10781 match split_qualified_column(column) {
10782 Some((qualifier, column))
10783 if qualifier == table || alias.is_some_and(|alias| qualifier == alias) =>
10784 {
10785 push_policy_column(column, out);
10786 }
10787 Some(_) => {}
10788 None => push_policy_column(column, out),
10789 }
10790 }
10791 Projection::Field(
10792 FieldRef::TableColumn {
10793 table: qualifier,
10794 column,
10795 },
10796 _,
10797 ) => {
10798 if qualifier.is_empty()
10799 || qualifier == table
10800 || alias.is_some_and(|alias| qualifier == alias)
10801 {
10802 push_policy_column(column, out);
10803 }
10804 }
10805 Projection::Field(
10806 FieldRef::NodeProperty {
10807 alias: qualifier,
10808 property,
10809 },
10810 _,
10811 )
10812 | Projection::Field(
10813 FieldRef::EdgeProperty {
10814 alias: qualifier,
10815 property,
10816 },
10817 _,
10818 ) => {
10819 if qualifier == table || alias.is_some_and(|alias| qualifier == alias) {
10820 push_policy_column(property, out);
10821 }
10822 }
10823 Projection::Function(_, args) => {
10824 for arg in args {
10825 collect_projection_columns_for_table(arg, table, alias, out);
10826 }
10827 }
10828 Projection::Expression(_, _) | Projection::All | Projection::Field(_, _) => {}
10829 Projection::Window { args, .. } => {
10830 for arg in args {
10831 collect_projection_columns_for_table(arg, table, alias, out);
10832 }
10833 }
10834 }
10835}
10836
10837fn collect_projection_columns_for_join_side(
10838 projection: &Projection,
10839 left: Option<&JoinTableSide>,
10840 right: Option<&JoinTableSide>,
10841 out: &mut HashMap<String, BTreeSet<String>>,
10842) -> RedDBResult<()> {
10843 match projection {
10844 Projection::Column(column) | Projection::Alias(column, _) => {
10845 if let Some((qualifier, column)) = split_qualified_column(column) {
10846 push_qualified_join_column(qualifier, column, left, right, out);
10847 } else {
10848 push_unqualified_join_column(column, left, right, out);
10849 }
10850 }
10851 Projection::Field(FieldRef::TableColumn { table, column }, _) => {
10852 if table.is_empty() {
10853 push_unqualified_join_column(column, left, right, out);
10854 } else if let Some(side) = [left, right]
10855 .into_iter()
10856 .flatten()
10857 .find(|side| table == side.table.as_str() || table == side.alias.as_str())
10858 {
10859 push_join_column(&side.table, column, out);
10860 }
10861 }
10862 Projection::Field(FieldRef::NodeProperty { alias, property }, _)
10863 | Projection::Field(FieldRef::EdgeProperty { alias, property }, _) => {
10864 push_qualified_join_column(alias, property, left, right, out);
10865 }
10866 Projection::Function(_, args) => {
10867 for arg in args {
10868 collect_projection_columns_for_join_side(arg, left, right, out)?;
10869 }
10870 }
10871 Projection::Expression(_, _) | Projection::All | Projection::Field(_, _) => {}
10872 Projection::Window { args, .. } => {
10873 for arg in args {
10874 collect_projection_columns_for_join_side(arg, left, right, out)?;
10875 }
10876 }
10877 }
10878 Ok(())
10879}
10880
10881fn split_qualified_column(column: &str) -> Option<(&str, &str)> {
10882 let (qualifier, column) = column.split_once('.')?;
10883 if qualifier.is_empty() || column.is_empty() || column.contains('.') {
10884 return None;
10885 }
10886 Some((qualifier, column))
10887}
10888
10889fn push_qualified_join_column(
10890 qualifier: &str,
10891 column: &str,
10892 left: Option<&JoinTableSide>,
10893 right: Option<&JoinTableSide>,
10894 out: &mut HashMap<String, BTreeSet<String>>,
10895) {
10896 if let Some(side) = [left, right]
10897 .into_iter()
10898 .flatten()
10899 .find(|side| qualifier == side.table.as_str() || qualifier == side.alias.as_str())
10900 {
10901 push_join_column(&side.table, column, out);
10902 }
10903}
10904
10905fn push_unqualified_join_column(
10906 column: &str,
10907 left: Option<&JoinTableSide>,
10908 right: Option<&JoinTableSide>,
10909 out: &mut HashMap<String, BTreeSet<String>>,
10910) {
10911 for side in [left, right].into_iter().flatten() {
10912 push_join_column(&side.table, column, out);
10913 }
10914}
10915
10916fn push_join_column(table: &str, column: &str, out: &mut HashMap<String, BTreeSet<String>>) {
10917 if is_policy_column_name(column) {
10918 out.entry(table.to_string())
10919 .or_default()
10920 .insert(column.to_string());
10921 }
10922}
10923
10924fn push_policy_column(column: &str, out: &mut BTreeSet<String>) {
10925 if is_policy_column_name(column) {
10926 out.insert(column.to_string());
10927 }
10928}
10929
10930fn is_policy_column_name(column: &str) -> bool {
10931 !column.is_empty()
10932 && column != "*"
10933 && !column.starts_with("LIT:")
10934 && !column.starts_with("TYPE:")
10935}
10936
10937fn runtime_iam_context(
10938 role: crate::auth::Role,
10939 tenant: Option<&str>,
10940 principal_is_system_owned: bool,
10941) -> crate::auth::policies::EvalContext {
10942 crate::auth::policies::EvalContext {
10943 principal_tenant: tenant.map(|t| t.to_string()),
10944 current_tenant: tenant.map(|t| t.to_string()),
10945 peer_ip: None,
10946 mfa_present: false,
10947 now_ms: crate::auth::now_ms(),
10948 principal_is_admin_role: role == crate::auth::Role::Admin,
10949 principal_is_system_owned,
10950 principal_is_platform_scoped: tenant.is_none(),
10951 }
10952}
10953
10954fn explicit_table_projection_columns(
10955 query: &crate::storage::query::ast::TableQuery,
10956) -> Vec<String> {
10957 use crate::storage::query::ast::{FieldRef, Projection};
10958
10959 let mut columns = Vec::new();
10960 for projection in crate::storage::query::sql_lowering::effective_table_projections(query) {
10961 match projection {
10962 Projection::Column(column) | Projection::Alias(column, _) => {
10963 push_unique(&mut columns, column)
10964 }
10965 Projection::Field(FieldRef::TableColumn { column, .. }, _) => {
10966 push_unique(&mut columns, column)
10967 }
10968 _ => {}
10972 }
10973 }
10974 columns
10975}
10976
10977fn explicit_graph_projection_properties(
10978 query: &crate::storage::query::ast::GraphQuery,
10979) -> Vec<String> {
10980 use crate::storage::query::ast::{FieldRef, Projection};
10981
10982 let mut columns = Vec::new();
10983 for projection in &query.return_ {
10984 match projection {
10985 Projection::Field(FieldRef::NodeProperty { property, .. }, _)
10986 | Projection::Field(FieldRef::EdgeProperty { property, .. }, _) => {
10987 push_unique(&mut columns, property.clone())
10988 }
10989 _ => {}
10990 }
10991 }
10992 columns
10993}
10994
10995fn push_unique(columns: &mut Vec<String>, column: String) {
10996 if !columns.iter().any(|existing| existing == &column) {
10997 columns.push(column);
10998 }
10999}
11000
11001fn principal_label(p: &crate::storage::query::ast::PolicyPrincipalRef) -> String {
11002 use crate::storage::query::ast::PolicyPrincipalRef;
11003 match p {
11004 PolicyPrincipalRef::User(u) => match &u.tenant {
11005 Some(t) => format!("user:{t}/{}", u.username),
11006 None => format!("user:{}", u.username),
11007 },
11008 PolicyPrincipalRef::Group(g) => format!("group:{g}"),
11009 }
11010}
11011
11012pub(crate) fn decision_to_strings(
11015 d: &crate::auth::policies::Decision,
11016) -> (String, Option<String>, Option<String>) {
11017 use crate::auth::policies::Decision;
11018 match d {
11019 Decision::Allow {
11020 matched_policy_id,
11021 matched_sid,
11022 } => (
11023 "allow".into(),
11024 Some(matched_policy_id.clone()),
11025 matched_sid.clone(),
11026 ),
11027 Decision::Deny {
11028 matched_policy_id,
11029 matched_sid,
11030 } => (
11031 "deny".into(),
11032 Some(matched_policy_id.clone()),
11033 matched_sid.clone(),
11034 ),
11035 Decision::DefaultDeny => ("default_deny".into(), None, None),
11036 Decision::AdminBypass => ("admin_bypass".into(), None, None),
11037 }
11038}
11039
11040fn relation_scopes_for_query(query: &QueryExpr) -> Vec<String> {
11041 let mut scopes = Vec::new();
11042 collect_relation_scopes(query, &mut scopes);
11043 scopes.sort();
11044 scopes.dedup();
11045 scopes
11046}
11047
11048fn collect_relation_scopes(query: &QueryExpr, scopes: &mut Vec<String>) {
11049 match query {
11050 QueryExpr::Table(table) => {
11051 if !table.table.is_empty() {
11052 scopes.push(table.table.clone());
11053 }
11054 if let Some(alias) = &table.alias {
11055 scopes.push(alias.clone());
11056 }
11057 }
11058 QueryExpr::Join(join) => {
11059 collect_relation_scopes(&join.left, scopes);
11060 collect_relation_scopes(&join.right, scopes);
11061 }
11062 _ => {}
11063 }
11064}
11065
11066fn query_references_outer_scope(query: &QueryExpr, outer_scopes: &[String]) -> bool {
11067 let inner_scopes = relation_scopes_for_query(query);
11068 query_expr_references_outer_scope(query, outer_scopes, &inner_scopes)
11069}
11070
11071fn query_expr_references_outer_scope(
11072 query: &QueryExpr,
11073 outer_scopes: &[String],
11074 inner_scopes: &[String],
11075) -> bool {
11076 match query {
11077 QueryExpr::Table(table) => {
11078 table.select_items.iter().any(|item| match item {
11079 crate::storage::query::ast::SelectItem::Wildcard => false,
11080 crate::storage::query::ast::SelectItem::Expr { expr, .. } => {
11081 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
11082 }
11083 }) || table
11084 .where_expr
11085 .as_ref()
11086 .is_some_and(|expr| expr_references_outer_scope(expr, outer_scopes, inner_scopes))
11087 || table.filter.as_ref().is_some_and(|filter| {
11088 filter_references_outer_scope(filter, outer_scopes, inner_scopes)
11089 })
11090 || table.having_expr.as_ref().is_some_and(|expr| {
11091 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
11092 })
11093 || table.having.as_ref().is_some_and(|filter| {
11094 filter_references_outer_scope(filter, outer_scopes, inner_scopes)
11095 })
11096 || table
11097 .group_by_exprs
11098 .iter()
11099 .any(|expr| expr_references_outer_scope(expr, outer_scopes, inner_scopes))
11100 || table.order_by.iter().any(|clause| {
11101 clause.expr.as_ref().is_some_and(|expr| {
11102 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
11103 })
11104 })
11105 }
11106 QueryExpr::Join(join) => {
11107 query_expr_references_outer_scope(&join.left, outer_scopes, inner_scopes)
11108 || query_expr_references_outer_scope(&join.right, outer_scopes, inner_scopes)
11109 || join.filter.as_ref().is_some_and(|filter| {
11110 filter_references_outer_scope(filter, outer_scopes, inner_scopes)
11111 })
11112 || join.return_items.iter().any(|item| match item {
11113 crate::storage::query::ast::SelectItem::Wildcard => false,
11114 crate::storage::query::ast::SelectItem::Expr { expr, .. } => {
11115 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
11116 }
11117 })
11118 }
11119 _ => false,
11120 }
11121}
11122
11123fn filter_references_outer_scope(
11124 filter: &crate::storage::query::ast::Filter,
11125 outer_scopes: &[String],
11126 inner_scopes: &[String],
11127) -> bool {
11128 use crate::storage::query::ast::Filter;
11129 match filter {
11130 Filter::Compare { field, .. }
11131 | Filter::IsNull(field)
11132 | Filter::IsNotNull(field)
11133 | Filter::In { field, .. }
11134 | Filter::Between { field, .. }
11135 | Filter::Like { field, .. }
11136 | Filter::StartsWith { field, .. }
11137 | Filter::EndsWith { field, .. }
11138 | Filter::Contains { field, .. } => {
11139 field_ref_references_outer_scope(field, outer_scopes, inner_scopes)
11140 }
11141 Filter::CompareFields { left, right, .. } => {
11142 field_ref_references_outer_scope(left, outer_scopes, inner_scopes)
11143 || field_ref_references_outer_scope(right, outer_scopes, inner_scopes)
11144 }
11145 Filter::CompareExpr { lhs, rhs, .. } => {
11146 expr_references_outer_scope(lhs, outer_scopes, inner_scopes)
11147 || expr_references_outer_scope(rhs, outer_scopes, inner_scopes)
11148 }
11149 Filter::And(left, right) | Filter::Or(left, right) => {
11150 filter_references_outer_scope(left, outer_scopes, inner_scopes)
11151 || filter_references_outer_scope(right, outer_scopes, inner_scopes)
11152 }
11153 Filter::Not(inner) => filter_references_outer_scope(inner, outer_scopes, inner_scopes),
11154 }
11155}
11156
11157fn expr_references_outer_scope(
11158 expr: &crate::storage::query::ast::Expr,
11159 outer_scopes: &[String],
11160 inner_scopes: &[String],
11161) -> bool {
11162 use crate::storage::query::ast::Expr;
11163 match expr {
11164 Expr::Column { field, .. } => {
11165 field_ref_references_outer_scope(field, outer_scopes, inner_scopes)
11166 }
11167 Expr::BinaryOp { lhs, rhs, .. } => {
11168 expr_references_outer_scope(lhs, outer_scopes, inner_scopes)
11169 || expr_references_outer_scope(rhs, outer_scopes, inner_scopes)
11170 }
11171 Expr::UnaryOp { operand, .. }
11172 | Expr::Cast { inner: operand, .. }
11173 | Expr::IsNull { operand, .. } => {
11174 expr_references_outer_scope(operand, outer_scopes, inner_scopes)
11175 }
11176 Expr::FunctionCall { args, .. } => args
11177 .iter()
11178 .any(|arg| expr_references_outer_scope(arg, outer_scopes, inner_scopes)),
11179 Expr::Case {
11180 branches, else_, ..
11181 } => {
11182 branches.iter().any(|(cond, value)| {
11183 expr_references_outer_scope(cond, outer_scopes, inner_scopes)
11184 || expr_references_outer_scope(value, outer_scopes, inner_scopes)
11185 }) || else_
11186 .as_ref()
11187 .is_some_and(|expr| expr_references_outer_scope(expr, outer_scopes, inner_scopes))
11188 }
11189 Expr::InList { target, values, .. } => {
11190 expr_references_outer_scope(target, outer_scopes, inner_scopes)
11191 || values
11192 .iter()
11193 .any(|value| expr_references_outer_scope(value, outer_scopes, inner_scopes))
11194 }
11195 Expr::Between {
11196 target, low, high, ..
11197 } => {
11198 expr_references_outer_scope(target, outer_scopes, inner_scopes)
11199 || expr_references_outer_scope(low, outer_scopes, inner_scopes)
11200 || expr_references_outer_scope(high, outer_scopes, inner_scopes)
11201 }
11202 Expr::Subquery { query, .. } => query_references_outer_scope(&query.query, inner_scopes),
11203 Expr::Literal { .. } | Expr::Parameter { .. } => false,
11204 Expr::WindowFunctionCall { args, window, .. } => {
11205 args.iter()
11206 .any(|arg| expr_references_outer_scope(arg, outer_scopes, inner_scopes))
11207 || window
11208 .partition_by
11209 .iter()
11210 .any(|e| expr_references_outer_scope(e, outer_scopes, inner_scopes))
11211 || window
11212 .order_by
11213 .iter()
11214 .any(|o| expr_references_outer_scope(&o.expr, outer_scopes, inner_scopes))
11215 }
11216 }
11217}
11218
11219fn field_ref_references_outer_scope(
11220 field: &crate::storage::query::ast::FieldRef,
11221 outer_scopes: &[String],
11222 inner_scopes: &[String],
11223) -> bool {
11224 match field {
11225 crate::storage::query::ast::FieldRef::TableColumn { table, .. } if !table.is_empty() => {
11226 outer_scopes.iter().any(|scope| scope == table)
11227 && !inner_scopes.iter().any(|scope| scope == table)
11228 }
11229 _ => false,
11230 }
11231}
11232
11233fn first_column_values(
11234 result: crate::storage::query::unified::UnifiedResult,
11235) -> RedDBResult<Vec<Value>> {
11236 if result.columns.len() > 1 {
11237 return Err(RedDBError::Query(
11238 "expression subquery must return exactly one column".to_string(),
11239 ));
11240 }
11241 let fallback_column = result
11242 .records
11243 .first()
11244 .and_then(|record| record.column_names().into_iter().next())
11245 .map(|name| name.to_string());
11246 let column = result.columns.first().cloned().or(fallback_column);
11247 let Some(column) = column else {
11248 return Ok(Vec::new());
11249 };
11250 Ok(result
11251 .records
11252 .iter()
11253 .map(|record| record.get(column.as_str()).cloned().unwrap_or(Value::Null))
11254 .collect())
11255}
11256
11257fn parse_timestamp_to_ms(s: &str) -> Option<u128> {
11258 if let Ok(n) = s.parse::<u128>() {
11260 return Some(n);
11261 }
11262 if let Some(date) = s.split_whitespace().next() {
11266 let parts: Vec<&str> = date.split('-').collect();
11267 if parts.len() == 3 {
11268 let (y, m, d) = (parts[0], parts[1], parts[2]);
11269 if let (Ok(y), Ok(m), Ok(d)) = (y.parse::<i64>(), m.parse::<u32>(), d.parse::<u32>()) {
11270 let days_in = days_from_civil(y, m, d);
11274 return Some((days_in as u128) * 86_400_000u128);
11275 }
11276 }
11277 }
11278 None
11279}
11280
11281fn days_from_civil(y: i64, m: u32, d: u32) -> i64 {
11284 let y = if m <= 2 { y - 1 } else { y };
11285 let era = if y >= 0 { y } else { y - 399 } / 400;
11286 let yoe = (y - era * 400) as u64; let doy = (153 * (if m > 2 { m - 3 } else { m + 9 }) as u64 + 2) / 5 + d as u64 - 1;
11288 let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
11289 era * 146097 + doe as i64 - 719468
11290}
11291
11292fn walk_plan_node(
11293 node: &crate::storage::query::planner::CanonicalLogicalNode,
11294 depth: usize,
11295 out: &mut Vec<crate::storage::query::unified::UnifiedRecord>,
11296) {
11297 use std::sync::Arc;
11298 let mut rec = crate::storage::query::unified::UnifiedRecord::default();
11299 rec.set_arc(Arc::from("op"), Value::text(node.operator.clone()));
11300 rec.set_arc(
11301 Arc::from("source"),
11302 node.source.clone().map(Value::text).unwrap_or(Value::Null),
11303 );
11304 rec.set_arc(Arc::from("est_rows"), Value::Float(node.estimated_rows));
11305 rec.set_arc(Arc::from("est_cost"), Value::Float(node.operator_cost));
11306 rec.set_arc(Arc::from("depth"), Value::Integer(depth as i64));
11307 out.push(rec);
11308 for child in &node.children {
11309 walk_plan_node(child, depth + 1, out);
11310 }
11311}