1use super::*;
2use crate::application::entity::metadata_to_json;
3use crate::auth::column_policy_gate::ColumnAccessRequest;
4use crate::auth::UserId;
5use crate::replication::cdc::ChangeRecord;
6use crate::replication::logical::{ApplyMode, LogicalChangeApplier};
7use crate::storage::query::ast::TableSource;
8
9thread_local! {
10 static CURRENT_CONN_ID: std::cell::Cell<u64> = const { std::cell::Cell::new(0) };
14
15 static CURRENT_AUTH_IDENTITY: std::cell::RefCell<Option<(String, crate::auth::Role)>> =
23 const { std::cell::RefCell::new(None) };
24
25 static CURRENT_SNAPSHOT: std::cell::RefCell<Option<SnapshotContext>> =
35 const { std::cell::RefCell::new(None) };
36
37 static HAS_SNAPSHOT: std::cell::Cell<bool> = const { std::cell::Cell::new(false) };
43
44 static CURRENT_TENANT_ID: std::cell::RefCell<Option<String>> =
54 const { std::cell::RefCell::new(None) };
55
56 static CURRENT_CONFIG_RESOLVER: std::cell::RefCell<Option<ConfigResolver>> =
60 const { std::cell::RefCell::new(None) };
61
62 static CURRENT_SECRET_RESOLVER: std::cell::RefCell<Option<SecretResolver>> =
66 const { std::cell::RefCell::new(None) };
67}
68
69fn secret_sql_value_to_string(value: &Value) -> RedDBResult<String> {
70 match value {
71 Value::Text(s) => Ok(s.to_string()),
72 Value::Integer(n) => Ok(n.to_string()),
73 Value::UnsignedInteger(n) => Ok(n.to_string()),
74 Value::Float(n) => Ok(n.to_string()),
75 Value::Boolean(b) => Ok(b.to_string()),
76 Value::Null => Err(RedDBError::Query(
77 "SET SECRET key = NULL deletes the secret; use DELETE SECRET for explicit deletes"
78 .to_string(),
79 )),
80 Value::Password(_) | Value::Secret(_) => Err(RedDBError::Query(
81 "SET SECRET accepts plain scalar literals; PASSWORD() and SECRET() are for typed columns"
82 .to_string(),
83 )),
84 _ => Err(RedDBError::Query(format!(
85 "SET SECRET does not support value type {:?} yet",
86 value.data_type()
87 ))),
88 }
89}
90
91fn system_keyed_collection_contract(
92 name: &str,
93 model: crate::catalog::CollectionModel,
94) -> crate::physical::CollectionContract {
95 let now = crate::utils::now_unix_millis() as u128;
96 crate::physical::CollectionContract {
97 name: name.to_string(),
98 declared_model: model,
99 schema_mode: crate::catalog::SchemaMode::Dynamic,
100 origin: crate::physical::ContractOrigin::Implicit,
101 version: 1,
102 created_at_unix_ms: now,
103 updated_at_unix_ms: now,
104 default_ttl_ms: None,
105 vector_dimension: None,
106 vector_metric: None,
107 context_index_fields: Vec::new(),
108 declared_columns: Vec::new(),
109 table_def: None,
110 timestamps_enabled: false,
111 context_index_enabled: false,
112 append_only: false,
113 subscriptions: Vec::new(),
114 }
115}
116
117#[derive(Clone)]
132pub struct SnapshotContext {
133 pub snapshot: crate::storage::transaction::snapshot::Snapshot,
134 pub manager: Arc<crate::storage::transaction::snapshot::SnapshotManager>,
135 pub own_xids: std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
136 pub requires_index_fallback: bool,
137}
138
139pub fn set_current_connection_id(id: u64) {
148 CURRENT_CONN_ID.with(|c| c.set(id));
149}
150
151pub fn clear_current_connection_id() {
153 CURRENT_CONN_ID.with(|c| c.set(0));
154}
155
156pub fn current_connection_id() -> u64 {
159 CURRENT_CONN_ID.with(|c| c.get())
160}
161
162pub fn set_current_auth_identity(username: String, role: crate::auth::Role) {
166 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = Some((username, role)));
167}
168
169pub fn clear_current_auth_identity() {
173 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = None);
174}
175
176pub(crate) fn current_auth_identity() -> Option<(String, crate::auth::Role)> {
179 CURRENT_AUTH_IDENTITY.with(|cell| cell.borrow().clone())
180}
181
182pub fn set_current_tenant(tenant_id: String) {
187 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = Some(tenant_id));
188}
189
190pub fn clear_current_tenant() {
193 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = None);
194}
195
196pub fn current_tenant() -> Option<String> {
207 let inherited = CURRENT_TENANT_ID.with(|cell| cell.borrow().clone());
208 if let Some(over) = current_scope_override() {
209 if over.tenant.is_active() {
210 return over.tenant.resolve(inherited);
211 }
212 }
213 if let Some(tx_local) = current_tx_local_tenant() {
214 return tx_local;
215 }
216 inherited
217}
218
219thread_local! {
220 static TX_LOCAL_TENANT: std::cell::RefCell<Option<Option<String>>> =
229 const { std::cell::RefCell::new(None) };
230}
231
232fn current_tx_local_tenant() -> Option<Option<String>> {
233 TX_LOCAL_TENANT.with(|cell| cell.borrow().clone())
234}
235
236fn parse_set_local_tenant(query: &str) -> RedDBResult<Option<Option<String>>> {
242 let mut tokens = query.split_ascii_whitespace();
243 let Some(w1) = tokens.next() else {
244 return Ok(None);
245 };
246 if !w1.eq_ignore_ascii_case("SET") {
247 return Ok(None);
248 }
249 let Some(w2) = tokens.next() else {
250 return Ok(None);
251 };
252 if !w2.eq_ignore_ascii_case("LOCAL") {
253 return Ok(None);
254 }
255 let Some(w3) = tokens.next() else {
256 return Ok(None);
257 };
258 if !w3.eq_ignore_ascii_case("TENANT") {
259 return Ok(None);
260 }
261 let rest: String = tokens.collect::<Vec<_>>().join(" ");
262 let rest = rest.trim().trim_end_matches(';').trim();
263 let value_str = rest.strip_prefix('=').map(|s| s.trim()).unwrap_or(rest);
264 if value_str.is_empty() {
265 return Err(RedDBError::Query(
266 "SET LOCAL TENANT expects a string literal or NULL".to_string(),
267 ));
268 }
269 if value_str.eq_ignore_ascii_case("NULL") {
270 return Ok(Some(None));
271 }
272 if value_str.starts_with('\'') && value_str.ends_with('\'') && value_str.len() >= 2 {
273 let inner = &value_str[1..value_str.len() - 1];
274 return Ok(Some(Some(inner.to_string())));
275 }
276 Err(RedDBError::Query(format!(
277 "SET LOCAL TENANT expects a string literal or NULL, got `{value_str}`"
278 )))
279}
280
281pub(crate) struct TxLocalTenantGuard;
282
283impl TxLocalTenantGuard {
284 pub fn install(value: Option<Option<String>>) -> Self {
285 TX_LOCAL_TENANT.with(|cell| *cell.borrow_mut() = value);
286 Self
287 }
288}
289
290impl Drop for TxLocalTenantGuard {
291 fn drop(&mut self) {
292 TX_LOCAL_TENANT.with(|cell| *cell.borrow_mut() = None);
293 }
294}
295
296thread_local! {
297 static SCOPE_OVERRIDES: std::cell::RefCell<Vec<crate::runtime::within_clause::ScopeOverride>> =
304 const { std::cell::RefCell::new(Vec::new()) };
305}
306
307pub(crate) fn push_scope_override(over: crate::runtime::within_clause::ScopeOverride) {
308 SCOPE_OVERRIDES.with(|cell| cell.borrow_mut().push(over));
309}
310
311pub(crate) fn pop_scope_override() {
312 SCOPE_OVERRIDES.with(|cell| {
313 cell.borrow_mut().pop();
314 });
315}
316
317pub(crate) fn current_scope_override() -> Option<crate::runtime::within_clause::ScopeOverride> {
318 SCOPE_OVERRIDES.with(|cell| cell.borrow().last().cloned())
319}
320
321pub(crate) fn has_scope_override_active() -> bool {
325 SCOPE_OVERRIDES.with(|cell| !cell.borrow().is_empty())
326}
327
328pub(crate) struct ScopeOverrideGuard;
332
333impl ScopeOverrideGuard {
334 pub fn install(over: crate::runtime::within_clause::ScopeOverride) -> Self {
335 push_scope_override(over);
336 Self
337 }
338}
339
340impl Drop for ScopeOverrideGuard {
341 fn drop(&mut self) {
342 pop_scope_override();
343 }
344}
345
346pub(crate) fn current_user_projected() -> Option<String> {
352 let inherited = current_auth_identity().map(|(u, _)| u);
353 if let Some(over) = current_scope_override() {
354 if over.user.is_active() {
355 return over.user.resolve(inherited);
356 }
357 }
358 inherited
359}
360
361pub(crate) fn current_role_projected() -> Option<String> {
362 let inherited = current_auth_identity().map(|(_, r)| format!("{r:?}").to_lowercase());
363 if let Some(over) = current_scope_override() {
364 if over.role.is_active() {
365 return over.role.resolve(inherited);
366 }
367 }
368 inherited
369}
370
371pub(crate) fn current_secret_value(path: &str) -> Option<String> {
372 let key = path.to_ascii_lowercase();
373 CURRENT_SECRET_RESOLVER.with(|cell| {
374 let mut resolver = cell.borrow_mut();
375 let resolver = resolver.as_mut()?;
376 if resolver.values.is_none() {
377 resolver.values = resolver
378 .store
379 .as_ref()
380 .map(|store| store.vault_kv_snapshot());
381 }
382 let values = resolver.values.as_ref()?;
383 values.get(&key).cloned().or_else(|| {
384 key.strip_prefix("red.vault/").and_then(|rest| {
385 values
386 .get(rest)
387 .cloned()
388 .or_else(|| values.get(&format!("red.secret.{rest}")).cloned())
389 })
390 })
391 })
392}
393
394struct SecretResolver {
395 store: Option<Arc<crate::auth::store::AuthStore>>,
396 values: Option<HashMap<String, String>>,
397}
398
399pub(super) struct SecretStoreGuard {
400 previous: Option<SecretResolver>,
401}
402
403impl SecretStoreGuard {
404 pub(super) fn install(store: Option<Arc<crate::auth::store::AuthStore>>) -> Self {
405 let previous = CURRENT_SECRET_RESOLVER.with(|cell| {
406 cell.replace(Some(SecretResolver {
407 store,
408 values: None,
409 }))
410 });
411 Self { previous }
412 }
413}
414
415impl Drop for SecretStoreGuard {
416 fn drop(&mut self) {
417 let previous = self.previous.take();
418 CURRENT_SECRET_RESOLVER.with(|cell| {
419 cell.replace(previous);
420 });
421 }
422}
423
424pub(crate) fn current_config_value(path: &str) -> Option<Value> {
425 let key = path.to_ascii_lowercase();
426 CURRENT_CONFIG_RESOLVER.with(|cell| {
427 let mut resolver = cell.borrow_mut();
428 let resolver = resolver.as_mut()?;
429 if resolver.values.is_none() {
430 resolver.values = Some(latest_config_snapshot(&resolver.db));
431 }
432 let values = resolver.values.as_ref()?;
433 values.get(&key).cloned().or_else(|| {
434 key.strip_prefix("red.config/")
435 .and_then(|rest| values.get(&format!("red.config.{rest}")).cloned())
436 })
437 })
438}
439
440fn update_current_config_value(path: &str, value: Value) {
441 let key = path.to_ascii_lowercase();
442 CURRENT_CONFIG_RESOLVER.with(|cell| {
443 if let Some(resolver) = cell.borrow_mut().as_mut() {
444 if let Some(values) = resolver.values.as_mut() {
445 values.insert(key, value);
446 }
447 }
448 });
449}
450
451fn update_current_secret_value(path: &str, value: Option<String>) {
452 let key = path.to_ascii_lowercase();
453 CURRENT_SECRET_RESOLVER.with(|cell| {
454 if let Some(resolver) = cell.borrow_mut().as_mut() {
455 let Some(values) = resolver.values.as_mut() else {
456 return;
457 };
458 match value {
459 Some(value) => {
460 values.insert(key, value);
461 }
462 None => {
463 values.remove(&key);
464 }
465 }
466 }
467 });
468}
469
470fn latest_config_snapshot(db: &RedDB) -> HashMap<String, Value> {
471 let mut latest: HashMap<String, (u64, Value)> = HashMap::new();
472
473 if let Some(manager) = db.store().get_collection("red_config") {
474 manager.for_each_entity(|entity| {
475 let Some(row) = entity.data.as_row() else {
476 return true;
477 };
478 let Some(Value::Text(key)) = row.get_field("key") else {
479 return true;
480 };
481 let value = row.get_field("value").cloned().unwrap_or(Value::Null);
482 let id = entity.id.raw();
483 let key = key.to_ascii_lowercase();
484 insert_latest_config_value(&mut latest, key.clone(), id, value.clone());
485 if let Some(rest) = key.strip_prefix("red.config.") {
486 insert_latest_config_value(&mut latest, format!("red.config/{rest}"), id, value);
487 }
488 true
489 });
490 }
491
492 if let Some(manager) = db.store().get_collection("red.config") {
493 manager.for_each_entity(|entity| {
494 let Some(row) = entity.data.as_row() else {
495 return true;
496 };
497 if matches!(row.get_field("tombstone"), Some(Value::Boolean(true))) {
498 return true;
499 }
500 let Some(Value::Text(key)) = row.get_field("key") else {
501 return true;
502 };
503 let value = row.get_field("value").cloned().unwrap_or(Value::Null);
504 insert_latest_config_value(
505 &mut latest,
506 format!("red.config/{}", key.to_ascii_lowercase()),
507 entity.id.raw(),
508 value,
509 );
510 true
511 });
512 }
513
514 latest
515 .into_iter()
516 .map(|(key, (_, value))| (key, value))
517 .collect()
518}
519
520fn insert_latest_config_value(
521 latest: &mut HashMap<String, (u64, Value)>,
522 key: String,
523 id: u64,
524 value: Value,
525) {
526 match latest.get(&key) {
527 Some((prev_id, _)) if *prev_id > id => {}
528 _ => {
529 latest.insert(key, (id, value));
530 }
531 }
532}
533
534struct ConfigResolver {
535 db: Arc<RedDB>,
536 values: Option<HashMap<String, Value>>,
537}
538
539pub(super) struct ConfigSnapshotGuard {
540 previous: Option<ConfigResolver>,
541}
542
543impl ConfigSnapshotGuard {
544 pub(super) fn install(db: Arc<RedDB>) -> Self {
545 let previous = CURRENT_CONFIG_RESOLVER
546 .with(|cell| cell.replace(Some(ConfigResolver { db, values: None })));
547 Self { previous }
548 }
549}
550
551impl Drop for ConfigSnapshotGuard {
552 fn drop(&mut self) {
553 let previous = self.previous.take();
554 CURRENT_CONFIG_RESOLVER.with(|cell| {
555 cell.replace(previous);
556 });
557 }
558}
559
560pub fn set_current_snapshot(ctx: SnapshotContext) {
565 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = Some(ctx));
566 HAS_SNAPSHOT.with(|c| c.set(true));
567}
568
569pub fn clear_current_snapshot() {
570 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = None);
571 HAS_SNAPSHOT.with(|c| c.set(false));
572}
573
574pub(crate) struct CurrentSnapshotGuard {
580 previous: Option<SnapshotContext>,
581}
582
583impl CurrentSnapshotGuard {
584 pub(crate) fn install(ctx: SnapshotContext) -> Self {
585 let previous = CURRENT_SNAPSHOT.with(|cell| cell.borrow().clone());
586 set_current_snapshot(ctx);
587 Self { previous }
588 }
589}
590
591impl Drop for CurrentSnapshotGuard {
592 fn drop(&mut self) {
593 let prev = self.previous.take();
594 let has = prev.is_some();
595 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = prev);
596 HAS_SNAPSHOT.with(|c| c.set(has));
597 }
598}
599
600#[inline]
611pub fn entity_visible_under_current_snapshot(
612 entity: &crate::storage::unified::entity::UnifiedEntity,
613) -> bool {
614 if !HAS_SNAPSHOT.with(|c| c.get()) {
620 return entity.xmax == 0;
621 }
622 CURRENT_SNAPSHOT.with(|cell| {
623 let guard = cell.borrow();
624 let Some(ctx) = guard.as_ref() else {
625 return true;
626 };
627 visibility_check(ctx, entity.xmin, entity.xmax)
628 })
629}
630
631#[inline]
636pub(crate) fn xids_visible_under_current_snapshot(xmin: u64, xmax: u64) -> bool {
637 if !HAS_SNAPSHOT.with(|c| c.get()) {
638 return true;
639 }
640 CURRENT_SNAPSHOT.with(|cell| {
641 let guard = cell.borrow();
642 let Some(ctx) = guard.as_ref() else {
643 return true;
644 };
645 visibility_check(ctx, xmin, xmax)
646 })
647}
648
649pub fn capture_current_snapshot() -> Option<SnapshotContext> {
656 CURRENT_SNAPSHOT.with(|cell| cell.borrow().clone())
657}
658
659pub(crate) fn current_snapshot_requires_index_fallback() -> bool {
664 if !HAS_SNAPSHOT.with(|c| c.get()) {
665 return false;
666 }
667 CURRENT_SNAPSHOT.with(|cell| {
668 cell.borrow()
669 .as_ref()
670 .is_some_and(|ctx| ctx.requires_index_fallback)
671 })
672}
673
674#[derive(Clone, Default)]
689pub struct SnapshotBundle {
690 pub snapshot: Option<SnapshotContext>,
691 pub auth: Option<(String, crate::auth::Role)>,
692 pub tenant: Option<String>,
693}
694
695pub fn snapshot_bundle() -> SnapshotBundle {
698 SnapshotBundle {
699 snapshot: capture_current_snapshot(),
700 auth: current_auth_identity(),
701 tenant: CURRENT_TENANT_ID.with(|cell| cell.borrow().clone()),
702 }
703}
704
705pub fn with_snapshot_bundle<R>(bundle: &SnapshotBundle, f: impl FnOnce() -> R) -> R {
710 struct Guard {
711 prev_snapshot: Option<SnapshotContext>,
712 prev_auth: Option<(String, crate::auth::Role)>,
713 prev_tenant: Option<String>,
714 }
715 impl Drop for Guard {
716 fn drop(&mut self) {
717 let snap = self.prev_snapshot.take();
718 let has = snap.is_some();
719 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = snap);
720 HAS_SNAPSHOT.with(|c| c.set(has));
721 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = self.prev_auth.take());
722 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = self.prev_tenant.take());
723 }
724 }
725
726 let _guard = {
727 let prev_snapshot = CURRENT_SNAPSHOT.with(|cell| cell.borrow().clone());
728 let prev_auth = CURRENT_AUTH_IDENTITY.with(|cell| cell.borrow().clone());
729 let prev_tenant = CURRENT_TENANT_ID.with(|cell| cell.borrow().clone());
730
731 match bundle.snapshot.clone() {
732 Some(ctx) => set_current_snapshot(ctx),
733 None => clear_current_snapshot(),
734 }
735 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = bundle.auth.clone());
736 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = bundle.tenant.clone());
737
738 Guard {
739 prev_snapshot,
740 prev_auth,
741 prev_tenant,
742 }
743 };
744 f()
745}
746
747#[inline]
751pub fn entity_visible_with_context(
752 ctx: Option<&SnapshotContext>,
753 entity: &crate::storage::unified::entity::UnifiedEntity,
754) -> bool {
755 match ctx {
756 Some(ctx) => visibility_check(ctx, entity.xmin, entity.xmax),
757 None => true,
758 }
759}
760
761fn table_row_index_fields(
762 entity: &crate::storage::unified::entity::UnifiedEntity,
763) -> Vec<(String, crate::storage::schema::Value)> {
764 let crate::storage::EntityData::Row(row) = &entity.data else {
765 return Vec::new();
766 };
767 if let Some(named) = &row.named {
768 return named
769 .iter()
770 .map(|(name, value)| (name.clone(), value.clone()))
771 .collect();
772 }
773 if let Some(schema) = &row.schema {
774 return schema
775 .iter()
776 .zip(row.columns.iter())
777 .map(|(name, value)| (name.clone(), value.clone()))
778 .collect();
779 }
780 Vec::new()
781}
782
783#[inline]
784fn visibility_check(ctx: &SnapshotContext, xmin: u64, xmax: u64) -> bool {
785 if xmin != 0 && ctx.manager.is_aborted(xmin) {
789 return false;
790 }
791 let effective_xmax = if xmax != 0 && ctx.manager.is_aborted(xmax) {
793 0
794 } else {
795 xmax
796 };
797 let own_xmin = xmin != 0 && ctx.own_xids.contains(&xmin);
801 let own_xmax = effective_xmax != 0 && ctx.own_xids.contains(&effective_xmax);
802 if own_xmax {
803 return false;
805 }
806 if own_xmin {
807 return true;
808 }
809 ctx.snapshot.sees(xmin, effective_xmax)
810}
811
812fn runtime_pool_lock(runtime: &RedDBRuntime) -> std::sync::MutexGuard<'_, PoolState> {
813 runtime
814 .inner
815 .pool
816 .lock()
817 .unwrap_or_else(|poisoned| poisoned.into_inner())
818}
819
820fn cache_scope_insert(scopes: &mut HashSet<String>, name: &str) {
821 if name.is_empty() || name.starts_with("__subq_") || is_universal_query_source(name) {
822 return;
823 }
824 scopes.insert(name.to_string());
825}
826
827fn collect_table_source_scopes(scopes: &mut HashSet<String>, query: &TableQuery) {
828 match query.source.as_ref() {
829 Some(crate::storage::query::ast::TableSource::Name(name)) => {
830 cache_scope_insert(scopes, name)
831 }
832 Some(crate::storage::query::ast::TableSource::Subquery(subquery)) => {
833 collect_query_expr_result_cache_scopes(scopes, subquery);
834 }
835 None => cache_scope_insert(scopes, &query.table),
836 }
837}
838
839fn collect_vector_source_scopes(
840 scopes: &mut HashSet<String>,
841 source: &crate::storage::query::ast::VectorSource,
842) {
843 match source {
844 crate::storage::query::ast::VectorSource::Reference { collection, .. } => {
845 cache_scope_insert(scopes, collection);
846 }
847 crate::storage::query::ast::VectorSource::Subquery(subquery) => {
848 collect_query_expr_result_cache_scopes(scopes, subquery);
849 }
850 crate::storage::query::ast::VectorSource::Literal(_)
851 | crate::storage::query::ast::VectorSource::Text(_) => {}
852 }
853}
854
855fn collect_path_selector_scopes(
856 scopes: &mut HashSet<String>,
857 selector: &crate::storage::query::ast::NodeSelector,
858) {
859 if let crate::storage::query::ast::NodeSelector::ByRow { table, .. } = selector {
860 cache_scope_insert(scopes, table);
861 }
862}
863
864fn collect_query_expr_result_cache_scopes(scopes: &mut HashSet<String>, expr: &QueryExpr) {
865 match expr {
866 QueryExpr::Table(query) => collect_table_source_scopes(scopes, query),
867 QueryExpr::Join(query) => {
868 collect_query_expr_result_cache_scopes(scopes, &query.left);
869 collect_query_expr_result_cache_scopes(scopes, &query.right);
870 }
871 QueryExpr::Path(query) => {
872 collect_path_selector_scopes(scopes, &query.from);
873 collect_path_selector_scopes(scopes, &query.to);
874 }
875 QueryExpr::Vector(query) => {
876 cache_scope_insert(scopes, &query.collection);
877 collect_vector_source_scopes(scopes, &query.query_vector);
878 }
879 QueryExpr::Hybrid(query) => {
880 collect_query_expr_result_cache_scopes(scopes, &query.structured);
881 cache_scope_insert(scopes, &query.vector.collection);
882 collect_vector_source_scopes(scopes, &query.vector.query_vector);
883 }
884 QueryExpr::Insert(query) => cache_scope_insert(scopes, &query.table),
885 QueryExpr::Update(query) => cache_scope_insert(scopes, &query.table),
886 QueryExpr::Delete(query) => cache_scope_insert(scopes, &query.table),
887 QueryExpr::CreateTable(query) => cache_scope_insert(scopes, &query.name),
888 QueryExpr::CreateCollection(query) => cache_scope_insert(scopes, &query.name),
889 QueryExpr::CreateVector(query) => cache_scope_insert(scopes, &query.name),
890 QueryExpr::DropTable(query) => cache_scope_insert(scopes, &query.name),
891 QueryExpr::DropGraph(query) => cache_scope_insert(scopes, &query.name),
892 QueryExpr::DropVector(query) => cache_scope_insert(scopes, &query.name),
893 QueryExpr::DropDocument(query) => cache_scope_insert(scopes, &query.name),
894 QueryExpr::DropKv(query) => cache_scope_insert(scopes, &query.name),
895 QueryExpr::DropCollection(query) => cache_scope_insert(scopes, &query.name),
896 QueryExpr::Truncate(query) => cache_scope_insert(scopes, &query.name),
897 QueryExpr::AlterTable(query) => cache_scope_insert(scopes, &query.name),
898 QueryExpr::CreateIndex(query) => cache_scope_insert(scopes, &query.table),
899 QueryExpr::DropIndex(query) => cache_scope_insert(scopes, &query.table),
900 QueryExpr::CreateTimeSeries(query) => cache_scope_insert(scopes, &query.name),
901 QueryExpr::DropTimeSeries(query) => cache_scope_insert(scopes, &query.name),
902 QueryExpr::CreateQueue(query) => cache_scope_insert(scopes, &query.name),
903 QueryExpr::AlterQueue(query) => cache_scope_insert(scopes, &query.name),
904 QueryExpr::DropQueue(query) => cache_scope_insert(scopes, &query.name),
905 QueryExpr::QueueSelect(query) => cache_scope_insert(scopes, &query.queue),
906 QueryExpr::QueueCommand(query) => match query {
907 QueueCommand::Push { queue, .. }
908 | QueueCommand::Pop { queue, .. }
909 | QueueCommand::Peek { queue, .. }
910 | QueueCommand::Len { queue }
911 | QueueCommand::Purge { queue }
912 | QueueCommand::GroupCreate { queue, .. }
913 | QueueCommand::GroupRead { queue, .. }
914 | QueueCommand::Pending { queue, .. }
915 | QueueCommand::Claim { queue, .. }
916 | QueueCommand::Ack { queue, .. }
917 | QueueCommand::Nack { queue, .. } => cache_scope_insert(scopes, queue),
918 QueueCommand::Move {
919 source,
920 destination,
921 ..
922 } => {
923 cache_scope_insert(scopes, source);
924 cache_scope_insert(scopes, destination);
925 }
926 },
927 QueryExpr::EventsBackfill(query) => {
928 cache_scope_insert(scopes, &query.collection);
929 cache_scope_insert(scopes, &query.target_queue);
930 }
931 QueryExpr::CreateTree(query) => cache_scope_insert(scopes, &query.collection),
932 QueryExpr::DropTree(query) => cache_scope_insert(scopes, &query.collection),
933 QueryExpr::TreeCommand(query) => match query {
934 TreeCommand::Insert { collection, .. }
935 | TreeCommand::Move { collection, .. }
936 | TreeCommand::Delete { collection, .. }
937 | TreeCommand::Validate { collection, .. }
938 | TreeCommand::Rebalance { collection, .. } => cache_scope_insert(scopes, collection),
939 },
940 QueryExpr::SearchCommand(query) => match query {
941 SearchCommand::Similar { collection, .. }
942 | SearchCommand::Hybrid { collection, .. }
943 | SearchCommand::SpatialRadius { collection, .. }
944 | SearchCommand::SpatialBbox { collection, .. }
945 | SearchCommand::SpatialNearest { collection, .. } => {
946 cache_scope_insert(scopes, collection);
947 }
948 SearchCommand::Text { collection, .. }
949 | SearchCommand::Multimodal { collection, .. }
950 | SearchCommand::Index { collection, .. }
951 | SearchCommand::Context { collection, .. } => {
952 if let Some(collection) = collection.as_deref() {
953 cache_scope_insert(scopes, collection);
954 }
955 }
956 },
957 QueryExpr::Ask(query) => {
958 if let Some(collection) = query.collection.as_deref() {
959 cache_scope_insert(scopes, collection);
960 }
961 }
962 QueryExpr::ExplainAlter(query) => cache_scope_insert(scopes, &query.target.name),
963 QueryExpr::MaintenanceCommand(cmd) => match cmd {
964 crate::storage::query::ast::MaintenanceCommand::Vacuum { target, .. }
965 | crate::storage::query::ast::MaintenanceCommand::Analyze { target } => {
966 if let Some(t) = target {
967 cache_scope_insert(scopes, t);
968 }
969 }
970 },
971 QueryExpr::CopyFrom(cmd) => cache_scope_insert(scopes, &cmd.table),
972 QueryExpr::CreateView(cmd) => {
973 cache_scope_insert(scopes, &cmd.name);
974 collect_query_expr_result_cache_scopes(scopes, &cmd.query);
976 }
977 QueryExpr::DropView(cmd) => cache_scope_insert(scopes, &cmd.name),
978 QueryExpr::RefreshMaterializedView(cmd) => cache_scope_insert(scopes, &cmd.name),
979 QueryExpr::CreatePolicy(cmd) => cache_scope_insert(scopes, &cmd.table),
980 QueryExpr::DropPolicy(cmd) => cache_scope_insert(scopes, &cmd.table),
981 QueryExpr::CreateServer(_) | QueryExpr::DropServer(_) => {}
982 QueryExpr::CreateForeignTable(cmd) => cache_scope_insert(scopes, &cmd.name),
983 QueryExpr::DropForeignTable(cmd) => cache_scope_insert(scopes, &cmd.name),
984 QueryExpr::Graph(_)
985 | QueryExpr::GraphCommand(_)
986 | QueryExpr::ProbabilisticCommand(_)
987 | QueryExpr::SetConfig { .. }
988 | QueryExpr::ShowConfig { .. }
989 | QueryExpr::SetSecret { .. }
990 | QueryExpr::DeleteSecret { .. }
991 | QueryExpr::ShowSecrets { .. }
992 | QueryExpr::SetTenant(_)
993 | QueryExpr::ShowTenant
994 | QueryExpr::TransactionControl(_)
995 | QueryExpr::CreateSchema(_)
996 | QueryExpr::DropSchema(_)
997 | QueryExpr::CreateSequence(_)
998 | QueryExpr::DropSequence(_)
999 | QueryExpr::Grant(_)
1000 | QueryExpr::Revoke(_)
1001 | QueryExpr::AlterUser(_)
1002 | QueryExpr::CreateIamPolicy { .. }
1003 | QueryExpr::DropIamPolicy { .. }
1004 | QueryExpr::AttachPolicy { .. }
1005 | QueryExpr::DetachPolicy { .. }
1006 | QueryExpr::ShowPolicies { .. }
1007 | QueryExpr::ShowEffectivePermissions { .. }
1008 | QueryExpr::SimulatePolicy { .. }
1009 | QueryExpr::CreateMigration(_)
1010 | QueryExpr::ApplyMigration(_)
1011 | QueryExpr::RollbackMigration(_)
1012 | QueryExpr::ExplainMigration(_)
1013 | QueryExpr::EventsBackfillStatus { .. } => {}
1014 QueryExpr::KvCommand(cmd) => {
1015 use crate::storage::query::ast::KvCommand;
1016 match cmd {
1017 KvCommand::Put { collection, .. }
1018 | KvCommand::InvalidateTags { collection, .. }
1019 | KvCommand::Get { collection, .. }
1020 | KvCommand::Unseal { collection, .. }
1021 | KvCommand::Rotate { collection, .. }
1022 | KvCommand::History { collection, .. }
1023 | KvCommand::List { collection, .. }
1024 | KvCommand::Purge { collection, .. }
1025 | KvCommand::Watch { collection, .. }
1026 | KvCommand::Delete { collection, .. }
1027 | KvCommand::Incr { collection, .. }
1028 | KvCommand::Cas { collection, .. } => cache_scope_insert(scopes, collection),
1029 }
1030 }
1031 QueryExpr::ConfigCommand(cmd) => {
1032 use crate::storage::query::ast::ConfigCommand;
1033 match cmd {
1034 ConfigCommand::Put { collection, .. }
1035 | ConfigCommand::Get { collection, .. }
1036 | ConfigCommand::Resolve { collection, .. }
1037 | ConfigCommand::Rotate { collection, .. }
1038 | ConfigCommand::Delete { collection, .. }
1039 | ConfigCommand::History { collection, .. }
1040 | ConfigCommand::List { collection, .. }
1041 | ConfigCommand::Watch { collection, .. }
1042 | ConfigCommand::InvalidVolatileOperation { collection, .. } => {
1043 cache_scope_insert(scopes, collection)
1044 }
1045 }
1046 }
1047 }
1048}
1049
1050pub(crate) fn rls_policy_filter(
1058 runtime: &RedDBRuntime,
1059 table: &str,
1060 action: crate::storage::query::ast::PolicyAction,
1061) -> Option<crate::storage::query::ast::Filter> {
1062 rls_policy_filter_for_kind(
1063 runtime,
1064 table,
1065 action,
1066 crate::storage::query::ast::PolicyTargetKind::Table,
1067 )
1068}
1069
1070pub(crate) fn rls_policy_filter_for_kind(
1076 runtime: &RedDBRuntime,
1077 table: &str,
1078 action: crate::storage::query::ast::PolicyAction,
1079 kind: crate::storage::query::ast::PolicyTargetKind,
1080) -> Option<crate::storage::query::ast::Filter> {
1081 use crate::storage::query::ast::Filter;
1082
1083 if !runtime.inner.rls_enabled_tables.read().contains(table) {
1084 return None;
1085 }
1086 let role = current_auth_identity().map(|(_, role)| role);
1087 let role_str = role.map(|r| r.as_str().to_string());
1088 let policies = runtime.matching_rls_policies_for_kind(table, role_str.as_deref(), action, kind);
1089 if policies.is_empty() {
1090 return None;
1091 }
1092 policies
1093 .into_iter()
1094 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1095}
1096
1097pub(crate) fn rls_is_enabled(runtime: &RedDBRuntime, table: &str) -> bool {
1101 runtime.inner.rls_enabled_tables.read().contains(table)
1102}
1103
1104fn node_passes_rls(
1111 runtime: &RedDBRuntime,
1112 collection: &str,
1113 role: Option<&str>,
1114 cache: &mut std::collections::HashMap<String, Option<crate::storage::query::ast::Filter>>,
1115 entity: &crate::storage::unified::entity::UnifiedEntity,
1116) -> bool {
1117 use crate::storage::query::ast::{Filter, PolicyAction, PolicyTargetKind};
1118
1119 if !runtime.inner.rls_enabled_tables.read().contains(collection) {
1120 return true;
1121 }
1122 let filter = cache.entry(collection.to_string()).or_insert_with(|| {
1123 let policies = runtime.matching_rls_policies_for_kind(
1124 collection,
1125 role,
1126 PolicyAction::Select,
1127 PolicyTargetKind::Nodes,
1128 );
1129 if policies.is_empty() {
1130 None
1131 } else {
1132 policies
1133 .into_iter()
1134 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1135 }
1136 });
1137 let Some(filter) = filter else {
1138 return false;
1139 };
1140 crate::runtime::query_exec::evaluate_entity_filter_with_db(
1141 Some(&runtime.inner.db),
1142 entity,
1143 filter,
1144 collection,
1145 collection,
1146 )
1147}
1148
1149fn edge_passes_rls(
1152 runtime: &RedDBRuntime,
1153 collection: &str,
1154 role: Option<&str>,
1155 cache: &mut std::collections::HashMap<String, Option<crate::storage::query::ast::Filter>>,
1156 entity: &crate::storage::unified::entity::UnifiedEntity,
1157) -> bool {
1158 use crate::storage::query::ast::{Filter, PolicyAction, PolicyTargetKind};
1159
1160 if !runtime.inner.rls_enabled_tables.read().contains(collection) {
1161 return true;
1162 }
1163 let filter = cache.entry(collection.to_string()).or_insert_with(|| {
1164 let policies = runtime.matching_rls_policies_for_kind(
1165 collection,
1166 role,
1167 PolicyAction::Select,
1168 PolicyTargetKind::Edges,
1169 );
1170 if policies.is_empty() {
1171 None
1172 } else {
1173 policies
1174 .into_iter()
1175 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1176 }
1177 });
1178 let Some(filter) = filter else {
1179 return false;
1180 };
1181 crate::runtime::query_exec::evaluate_entity_filter_with_db(
1182 Some(&runtime.inner.db),
1183 entity,
1184 filter,
1185 collection,
1186 collection,
1187 )
1188}
1189
1190fn inject_rls_filters(
1211 runtime: &RedDBRuntime,
1212 frame: &dyn super::statement_frame::ReadFrame,
1213 mut table: crate::storage::query::ast::TableQuery,
1214) -> Option<crate::storage::query::ast::TableQuery> {
1215 use crate::storage::query::ast::{Filter, PolicyAction};
1216
1217 let role = frame.identity().map(|(_, role)| role);
1219 let role_str = role.map(|r| r.as_str().to_string());
1220 let policies =
1221 runtime.matching_rls_policies(&table.table, role_str.as_deref(), PolicyAction::Select);
1222
1223 if policies.is_empty() {
1224 return None;
1227 }
1228
1229 let combined = policies
1231 .into_iter()
1232 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1233 .expect("policies non-empty");
1234
1235 table.filter = Some(match table.filter.take() {
1237 Some(existing) => Filter::And(Box::new(existing), Box::new(combined)),
1238 None => combined,
1239 });
1240 Some(table)
1241}
1242
1243fn inject_rls_into_join(
1253 runtime: &RedDBRuntime,
1254 frame: &dyn super::statement_frame::ReadFrame,
1255 mut join: crate::storage::query::ast::JoinQuery,
1256) -> Option<crate::storage::query::ast::JoinQuery> {
1257 use crate::storage::query::ast::Filter;
1258
1259 let mut policy_filters: Vec<Filter> = Vec::new();
1260 if !collect_join_side_policy(runtime, frame, join.left.as_ref(), &mut policy_filters) {
1261 return None;
1262 }
1263 if !collect_join_side_policy(runtime, frame, join.right.as_ref(), &mut policy_filters) {
1264 return None;
1265 }
1266
1267 if policy_filters.is_empty() {
1268 return Some(join);
1269 }
1270
1271 let combined = policy_filters
1272 .into_iter()
1273 .reduce(|acc, f| Filter::And(Box::new(acc), Box::new(f)))
1274 .expect("policy_filters non-empty");
1275
1276 join.filter = Some(match join.filter.take() {
1277 Some(existing) => Filter::And(Box::new(existing), Box::new(combined)),
1278 None => combined,
1279 });
1280
1281 Some(join)
1282}
1283
1284fn collect_join_side_policy(
1289 runtime: &RedDBRuntime,
1290 frame: &dyn super::statement_frame::ReadFrame,
1291 expr: &crate::storage::query::ast::QueryExpr,
1292 out: &mut Vec<crate::storage::query::ast::Filter>,
1293) -> bool {
1294 use crate::storage::query::ast::{Filter, PolicyAction, QueryExpr};
1295 match expr {
1296 QueryExpr::Table(t) => {
1297 if !runtime.inner.rls_enabled_tables.read().contains(&t.table) {
1298 return true;
1299 }
1300 let role = frame.identity().map(|(_, role)| role);
1301 let role_str = role.map(|r| r.as_str().to_string());
1302 let policies =
1303 runtime.matching_rls_policies(&t.table, role_str.as_deref(), PolicyAction::Select);
1304 if policies.is_empty() {
1305 return false;
1306 }
1307 let combined = policies
1308 .into_iter()
1309 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1310 .expect("policies non-empty");
1311 out.push(combined);
1312 true
1313 }
1314 QueryExpr::Join(inner) => {
1315 collect_join_side_policy(runtime, frame, inner.left.as_ref(), out)
1316 && collect_join_side_policy(runtime, frame, inner.right.as_ref(), out)
1317 }
1318 _ => true,
1319 }
1320}
1321
1322fn apply_foreign_table_filters(
1333 records: Vec<crate::storage::query::unified::UnifiedRecord>,
1334 query: &crate::storage::query::ast::TableQuery,
1335) -> crate::storage::query::unified::UnifiedResult {
1336 use crate::storage::query::sql_lowering::{
1337 effective_table_filter, effective_table_projections,
1338 };
1339 use crate::storage::query::unified::UnifiedResult;
1340
1341 let filter = effective_table_filter(query);
1342 let projections = effective_table_projections(query);
1343
1344 let mut filtered: Vec<_> = records
1347 .into_iter()
1348 .filter(|record| match &filter {
1349 Some(f) => {
1350 super::join_filter::evaluate_runtime_filter_with_db(None, record, f, None, None)
1351 }
1352 None => true,
1353 })
1354 .collect();
1355
1356 if let Some(offset) = query.offset {
1358 let offset = offset as usize;
1359 if offset >= filtered.len() {
1360 filtered.clear();
1361 } else {
1362 filtered.drain(0..offset);
1363 }
1364 }
1365 if let Some(limit) = query.limit {
1366 filtered.truncate(limit as usize);
1367 }
1368
1369 let columns: Vec<String> = if projections.is_empty() {
1372 filtered
1373 .first()
1374 .map(|r| r.column_names().iter().map(|k| k.to_string()).collect())
1375 .unwrap_or_default()
1376 } else {
1377 projections
1378 .iter()
1379 .map(super::join_filter::projection_name)
1380 .collect()
1381 };
1382
1383 let mut result = UnifiedResult::empty();
1384 result.columns = columns;
1385 result.records = filtered;
1386 result
1387}
1388
1389pub(crate) fn collect_table_refs(expr: &QueryExpr) -> Vec<String> {
1396 let mut scopes: HashSet<String> = HashSet::new();
1397 collect_query_expr_result_cache_scopes(&mut scopes, expr);
1398 scopes.into_iter().collect()
1399}
1400
1401fn query_expr_result_cache_scopes(expr: &QueryExpr) -> HashSet<String> {
1402 let mut scopes = HashSet::new();
1403 collect_query_expr_result_cache_scopes(&mut scopes, expr);
1404 scopes
1405}
1406
1407const RESULT_CACHE_BACKEND_KEY: &str = "runtime.result_cache.backend";
1408const RESULT_CACHE_DEFAULT_BACKEND: &str = "legacy";
1409const RESULT_CACHE_BLOB_NAMESPACE: &str = "runtime.result_cache";
1410const RESULT_CACHE_TTL_SECS: u64 = 30;
1411const RESULT_CACHE_MAX_ENTRIES: usize = 1000;
1412const RESULT_CACHE_PAYLOAD_MAGIC: &[u8; 8] = b"RDRC0001";
1413
1414#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1415enum RuntimeResultCacheBackend {
1416 Legacy,
1417 BlobCache,
1418 Shadow,
1419}
1420
1421fn trim_result_cache(
1422 map: &mut HashMap<String, RuntimeResultCacheEntry>,
1423 order: &mut std::collections::VecDeque<String>,
1424) {
1425 while map.len() > RESULT_CACHE_MAX_ENTRIES {
1426 if let Some(oldest) = order.pop_front() {
1427 map.remove(&oldest);
1428 } else {
1429 break;
1430 }
1431 }
1432}
1433
1434fn result_cache_fingerprint(result: &RuntimeQueryResult) -> String {
1435 format!(
1436 "{:?}|{}|{}|{}|{}|{:?}",
1437 result.result,
1438 result.query,
1439 result.statement,
1440 result.engine,
1441 result.affected_rows,
1442 result.statement_type
1443 )
1444}
1445
1446fn mode_to_byte(mode: crate::storage::query::modes::QueryMode) -> u8 {
1447 match mode {
1448 crate::storage::query::modes::QueryMode::Sql => 0,
1449 crate::storage::query::modes::QueryMode::Gremlin => 1,
1450 crate::storage::query::modes::QueryMode::Cypher => 2,
1451 crate::storage::query::modes::QueryMode::Sparql => 3,
1452 crate::storage::query::modes::QueryMode::Path => 4,
1453 crate::storage::query::modes::QueryMode::Natural => 5,
1454 crate::storage::query::modes::QueryMode::Unknown => 255,
1455 }
1456}
1457
1458fn mode_from_byte(byte: u8) -> Option<crate::storage::query::modes::QueryMode> {
1459 match byte {
1460 0 => Some(crate::storage::query::modes::QueryMode::Sql),
1461 1 => Some(crate::storage::query::modes::QueryMode::Gremlin),
1462 2 => Some(crate::storage::query::modes::QueryMode::Cypher),
1463 3 => Some(crate::storage::query::modes::QueryMode::Sparql),
1464 4 => Some(crate::storage::query::modes::QueryMode::Path),
1465 5 => Some(crate::storage::query::modes::QueryMode::Natural),
1466 255 => Some(crate::storage::query::modes::QueryMode::Unknown),
1467 _ => None,
1468 }
1469}
1470
1471fn result_cache_static_str(value: &str) -> Option<&'static str> {
1472 match value {
1473 "select" => Some("select"),
1474 "materialized-graph" => Some("materialized-graph"),
1475 "runtime-red-schema" => Some("runtime-red-schema"),
1476 "runtime-fdw" => Some("runtime-fdw"),
1477 "runtime-table-rls" => Some("runtime-table-rls"),
1478 "runtime-table" => Some("runtime-table"),
1479 "runtime-join-rls" => Some("runtime-join-rls"),
1480 "runtime-join" => Some("runtime-join"),
1481 "runtime-vector" => Some("runtime-vector"),
1482 "runtime-hybrid" => Some("runtime-hybrid"),
1483 "runtime-secret" => Some("runtime-secret"),
1484 "runtime-config" => Some("runtime-config"),
1485 "runtime-tenant" => Some("runtime-tenant"),
1486 "runtime-explain" => Some("runtime-explain"),
1487 "runtime-tree" => Some("runtime-tree"),
1488 "runtime-kv" => Some("runtime-kv"),
1489 "runtime-queue" => Some("runtime-queue"),
1490 _ => None,
1491 }
1492}
1493
1494fn write_u32(out: &mut Vec<u8>, value: usize) -> Option<()> {
1495 let value = u32::try_from(value).ok()?;
1496 out.extend_from_slice(&value.to_le_bytes());
1497 Some(())
1498}
1499
1500fn write_string(out: &mut Vec<u8>, value: &str) -> Option<()> {
1501 write_u32(out, value.len())?;
1502 out.extend_from_slice(value.as_bytes());
1503 Some(())
1504}
1505
1506fn write_bytes(out: &mut Vec<u8>, value: &[u8]) -> Option<()> {
1507 write_u32(out, value.len())?;
1508 out.extend_from_slice(value);
1509 Some(())
1510}
1511
1512fn read_u8(input: &mut &[u8]) -> Option<u8> {
1513 let (&value, rest) = input.split_first()?;
1514 *input = rest;
1515 Some(value)
1516}
1517
1518fn read_u32(input: &mut &[u8]) -> Option<usize> {
1519 if input.len() < 4 {
1520 return None;
1521 }
1522 let value = u32::from_le_bytes(input[..4].try_into().ok()?) as usize;
1523 *input = &input[4..];
1524 Some(value)
1525}
1526
1527fn read_u64(input: &mut &[u8]) -> Option<u64> {
1528 if input.len() < 8 {
1529 return None;
1530 }
1531 let value = u64::from_le_bytes(input[..8].try_into().ok()?);
1532 *input = &input[8..];
1533 Some(value)
1534}
1535
1536fn read_string(input: &mut &[u8]) -> Option<String> {
1537 let len = read_u32(input)?;
1538 if input.len() < len {
1539 return None;
1540 }
1541 let value = String::from_utf8(input[..len].to_vec()).ok()?;
1542 *input = &input[len..];
1543 Some(value)
1544}
1545
1546fn read_bytes<'a>(input: &mut &'a [u8]) -> Option<&'a [u8]> {
1547 let len = read_u32(input)?;
1548 if input.len() < len {
1549 return None;
1550 }
1551 let value = &input[..len];
1552 *input = &input[len..];
1553 Some(value)
1554}
1555
1556fn encode_result_cache_payload(entry: &RuntimeResultCacheEntry) -> Option<Vec<u8>> {
1557 let result = &entry.result;
1558 if result.result.pre_serialized_json.is_some()
1559 || result_cache_static_str(result.statement).is_none()
1560 || result_cache_static_str(result.engine).is_none()
1561 || result_cache_static_str(result.statement_type).is_none()
1562 || result.result.records.iter().any(|record| {
1563 !record.nodes.is_empty()
1564 || !record.edges.is_empty()
1565 || !record.paths.is_empty()
1566 || !record.vector_results.is_empty()
1567 })
1568 {
1569 return None;
1570 }
1571
1572 let mut out = Vec::new();
1573 out.extend_from_slice(RESULT_CACHE_PAYLOAD_MAGIC);
1574 write_string(&mut out, &result.query)?;
1575 out.push(mode_to_byte(result.mode));
1576 write_string(&mut out, result.statement)?;
1577 write_string(&mut out, result.engine)?;
1578 out.extend_from_slice(&result.affected_rows.to_le_bytes());
1579 write_string(&mut out, result.statement_type)?;
1580
1581 write_u32(&mut out, result.result.columns.len())?;
1582 for column in &result.result.columns {
1583 write_string(&mut out, column)?;
1584 }
1585 out.extend_from_slice(&result.result.stats.nodes_scanned.to_le_bytes());
1586 out.extend_from_slice(&result.result.stats.edges_scanned.to_le_bytes());
1587 out.extend_from_slice(&result.result.stats.rows_scanned.to_le_bytes());
1588 out.extend_from_slice(&result.result.stats.exec_time_us.to_le_bytes());
1589
1590 write_u32(&mut out, result.result.records.len())?;
1591 for record in &result.result.records {
1592 let fields = record.iter_fields().collect::<Vec<_>>();
1593 write_u32(&mut out, fields.len())?;
1594 for (name, value) in fields {
1595 write_string(&mut out, name)?;
1596 let mut encoded = Vec::new();
1597 crate::storage::schema::value_codec::encode(value, &mut encoded);
1598 write_bytes(&mut out, &encoded)?;
1599 }
1600 }
1601
1602 write_u32(&mut out, entry.scopes.len())?;
1603 for scope in &entry.scopes {
1604 write_string(&mut out, scope)?;
1605 }
1606 Some(out)
1607}
1608
1609fn decode_result_cache_payload(mut input: &[u8]) -> Option<(RuntimeQueryResult, HashSet<String>)> {
1610 if input.len() < RESULT_CACHE_PAYLOAD_MAGIC.len()
1611 || &input[..RESULT_CACHE_PAYLOAD_MAGIC.len()] != RESULT_CACHE_PAYLOAD_MAGIC
1612 {
1613 return None;
1614 }
1615 input = &input[RESULT_CACHE_PAYLOAD_MAGIC.len()..];
1616
1617 let query = read_string(&mut input)?;
1618 let mode = mode_from_byte(read_u8(&mut input)?)?;
1619 let statement = result_cache_static_str(&read_string(&mut input)?)?;
1620 let engine = result_cache_static_str(&read_string(&mut input)?)?;
1621 let affected_rows = read_u64(&mut input)?;
1622 let statement_type = result_cache_static_str(&read_string(&mut input)?)?;
1623
1624 let mut columns = Vec::new();
1625 for _ in 0..read_u32(&mut input)? {
1626 columns.push(read_string(&mut input)?);
1627 }
1628 let stats = crate::storage::query::unified::QueryStats {
1629 nodes_scanned: read_u64(&mut input)?,
1630 edges_scanned: read_u64(&mut input)?,
1631 rows_scanned: read_u64(&mut input)?,
1632 exec_time_us: read_u64(&mut input)?,
1633 };
1634
1635 let mut records = Vec::new();
1636 for _ in 0..read_u32(&mut input)? {
1637 let mut record = crate::storage::query::unified::UnifiedRecord::new();
1638 for _ in 0..read_u32(&mut input)? {
1639 let name = read_string(&mut input)?;
1640 let bytes = read_bytes(&mut input)?;
1641 let (value, used) = crate::storage::schema::value_codec::decode(bytes).ok()?;
1642 if used != bytes.len() {
1643 return None;
1644 }
1645 record.set_owned(name, value);
1646 }
1647 records.push(record);
1648 }
1649
1650 let mut scopes = HashSet::new();
1651 for _ in 0..read_u32(&mut input)? {
1652 scopes.insert(read_string(&mut input)?);
1653 }
1654 if !input.is_empty() {
1655 return None;
1656 }
1657
1658 Some((
1659 RuntimeQueryResult {
1660 query,
1661 mode,
1662 statement,
1663 engine,
1664 result: crate::storage::query::unified::UnifiedResult {
1665 columns,
1666 records,
1667 stats,
1668 pre_serialized_json: None,
1669 },
1670 affected_rows,
1671 statement_type,
1672 },
1673 scopes,
1674 ))
1675}
1676
1677fn strip_explain_prefix(sql: &str) -> Option<&str> {
1691 let trimmed = sql.trim_start();
1692 let (head, rest) = trimmed.split_at(
1693 trimmed
1694 .find(|c: char| c.is_whitespace())
1695 .unwrap_or(trimmed.len()),
1696 );
1697 if !head.eq_ignore_ascii_case("EXPLAIN") {
1698 return None;
1699 }
1700 let rest = rest.trim_start();
1701 if rest.is_empty() {
1702 return None;
1703 }
1704 let next_head_end = rest.find(|c: char| c.is_whitespace()).unwrap_or(rest.len());
1708 if rest[..next_head_end].eq_ignore_ascii_case("ALTER")
1709 || rest[..next_head_end].eq_ignore_ascii_case("ASK")
1710 {
1711 return None;
1712 }
1713 Some(rest)
1714}
1715
1716pub(super) fn has_with_prefix(sql: &str) -> bool {
1721 let trimmed = sql.trim_start();
1722 let head_end = trimmed
1723 .find(|c: char| c.is_whitespace() || c == '(')
1724 .unwrap_or(trimmed.len());
1725 trimmed[..head_end].eq_ignore_ascii_case("WITH")
1726}
1727
1728fn peek_top_level_as_of(sql: &str) -> Option<crate::application::vcs::AsOfSpec> {
1736 peek_top_level_as_of_with_table(sql).map(|(spec, _)| spec)
1737}
1738
1739pub(super) fn peek_top_level_as_of_with_table(
1744 sql: &str,
1745) -> Option<(crate::application::vcs::AsOfSpec, Option<String>)> {
1746 if !sql
1747 .as_bytes()
1748 .windows(5)
1749 .any(|w| w.eq_ignore_ascii_case(b"as of"))
1750 {
1751 return None;
1752 }
1753 let parsed = crate::storage::query::parser::parse(sql).ok()?;
1754 let crate::storage::query::ast::QueryExpr::Table(table) = parsed.query else {
1755 return None;
1756 };
1757 let clause = table.as_of?;
1758 let table_name = if table.table.is_empty() || table.table == "any" {
1759 None
1760 } else {
1761 Some(table.table.clone())
1762 };
1763 let spec = match clause {
1764 crate::storage::query::ast::AsOfClause::Commit(h) => {
1765 crate::application::vcs::AsOfSpec::Commit(h)
1766 }
1767 crate::storage::query::ast::AsOfClause::Branch(b) => {
1768 crate::application::vcs::AsOfSpec::Branch(b)
1769 }
1770 crate::storage::query::ast::AsOfClause::Tag(t) => crate::application::vcs::AsOfSpec::Tag(t),
1771 crate::storage::query::ast::AsOfClause::TimestampMs(ts) => {
1772 crate::application::vcs::AsOfSpec::TimestampMs(ts)
1773 }
1774 crate::storage::query::ast::AsOfClause::Snapshot(x) => {
1775 crate::application::vcs::AsOfSpec::Snapshot(x)
1776 }
1777 };
1778 Some((spec, table_name))
1779}
1780
1781pub(super) fn query_has_volatile_builtin(sql: &str) -> bool {
1782 const VOLATILE_TOKENS: &[&str] = &[
1786 "pg_advisory_lock",
1787 "pg_try_advisory_lock",
1788 "pg_advisory_unlock",
1789 "random()",
1790 ];
1795 let lowered = sql.to_ascii_lowercase();
1796 VOLATILE_TOKENS.iter().any(|t| lowered.contains(t))
1797}
1798
1799pub(super) fn query_is_ask_statement(sql: &str) -> bool {
1800 let trimmed = sql.trim_start();
1801 let head_end = trimmed
1802 .find(|c: char| c.is_whitespace() || c == '(' || c == ';')
1803 .unwrap_or(trimmed.len());
1804 trimmed[..head_end].eq_ignore_ascii_case("ASK")
1805}
1806
1807pub(super) fn intent_lock_modes_for(
1817 expr: &QueryExpr,
1818) -> Option<(
1819 crate::storage::transaction::lock::LockMode,
1820 crate::storage::transaction::lock::LockMode,
1821)> {
1822 use crate::storage::transaction::lock::LockMode::{Exclusive, IntentExclusive, IntentShared};
1823
1824 match expr {
1825 QueryExpr::Table(_)
1827 | QueryExpr::Join(_)
1828 | QueryExpr::Vector(_)
1829 | QueryExpr::Hybrid(_)
1830 | QueryExpr::Graph(_)
1831 | QueryExpr::Path(_)
1832 | QueryExpr::Ask(_)
1833 | QueryExpr::SearchCommand(_)
1834 | QueryExpr::GraphCommand(_)
1835 | QueryExpr::QueueSelect(_) => Some((IntentShared, IntentShared)),
1836
1837 QueryExpr::Insert(_)
1845 | QueryExpr::Update(_)
1846 | QueryExpr::Delete(_)
1847 | QueryExpr::QueueCommand(QueueCommand::Move { .. }) => {
1848 Some((IntentExclusive, IntentExclusive))
1849 }
1850 QueryExpr::QueueCommand(_) => Some((IntentShared, IntentShared)),
1851
1852 QueryExpr::CreateTable(_)
1856 | QueryExpr::CreateCollection(_)
1857 | QueryExpr::CreateVector(_)
1858 | QueryExpr::DropTable(_)
1859 | QueryExpr::DropGraph(_)
1860 | QueryExpr::DropVector(_)
1861 | QueryExpr::DropDocument(_)
1862 | QueryExpr::DropKv(_)
1863 | QueryExpr::DropCollection(_)
1864 | QueryExpr::Truncate(_)
1865 | QueryExpr::AlterTable(_)
1866 | QueryExpr::CreateIndex(_)
1867 | QueryExpr::DropIndex(_)
1868 | QueryExpr::CreateTimeSeries(_)
1869 | QueryExpr::DropTimeSeries(_)
1870 | QueryExpr::CreateQueue(_)
1871 | QueryExpr::AlterQueue(_)
1872 | QueryExpr::DropQueue(_)
1873 | QueryExpr::CreateTree(_)
1874 | QueryExpr::DropTree(_)
1875 | QueryExpr::CreatePolicy(_)
1876 | QueryExpr::DropPolicy(_)
1877 | QueryExpr::CreateView(_)
1878 | QueryExpr::DropView(_)
1879 | QueryExpr::RefreshMaterializedView(_)
1880 | QueryExpr::CreateSchema(_)
1881 | QueryExpr::DropSchema(_)
1882 | QueryExpr::CreateSequence(_)
1883 | QueryExpr::DropSequence(_)
1884 | QueryExpr::CreateServer(_)
1885 | QueryExpr::DropServer(_)
1886 | QueryExpr::CreateForeignTable(_)
1887 | QueryExpr::DropForeignTable(_) => Some((IntentExclusive, Exclusive)),
1888
1889 _ => None,
1895 }
1896}
1897
1898pub(super) fn collections_referenced(expr: &QueryExpr) -> Vec<String> {
1903 let mut out = Vec::new();
1904 walk_collections(expr, &mut out);
1905 out.sort();
1906 out.dedup();
1907 out
1908}
1909
1910fn walk_collections(expr: &QueryExpr, out: &mut Vec<String>) {
1911 match expr {
1912 QueryExpr::Table(t) => out.push(t.table.clone()),
1913 QueryExpr::Join(j) => {
1914 walk_collections(&j.left, out);
1915 walk_collections(&j.right, out);
1916 }
1917 QueryExpr::Insert(i) => out.push(i.table.clone()),
1918 QueryExpr::Update(u) => out.push(u.table.clone()),
1919 QueryExpr::Delete(d) => out.push(d.table.clone()),
1920 QueryExpr::QueueSelect(q) => out.push(q.queue.clone()),
1921
1922 QueryExpr::CreateTable(q) => out.push(q.name.clone()),
1927 QueryExpr::CreateCollection(q) => out.push(q.name.clone()),
1928 QueryExpr::CreateVector(q) => out.push(q.name.clone()),
1929 QueryExpr::DropTable(q) => out.push(q.name.clone()),
1930 QueryExpr::DropGraph(q) => out.push(q.name.clone()),
1931 QueryExpr::DropVector(q) => out.push(q.name.clone()),
1932 QueryExpr::DropDocument(q) => out.push(q.name.clone()),
1933 QueryExpr::DropKv(q) => out.push(q.name.clone()),
1934 QueryExpr::DropCollection(q) => out.push(q.name.clone()),
1935 QueryExpr::Truncate(q) => out.push(q.name.clone()),
1936 QueryExpr::AlterTable(q) => out.push(q.name.clone()),
1937 QueryExpr::CreateIndex(q) => out.push(q.table.clone()),
1938 QueryExpr::DropIndex(q) => out.push(q.table.clone()),
1939 QueryExpr::CreateTimeSeries(q) => out.push(q.name.clone()),
1940 QueryExpr::DropTimeSeries(q) => out.push(q.name.clone()),
1941 QueryExpr::CreateQueue(q) => out.push(q.name.clone()),
1942 QueryExpr::AlterQueue(q) => out.push(q.name.clone()),
1943 QueryExpr::DropQueue(q) => out.push(q.name.clone()),
1944 QueryExpr::QueueCommand(QueueCommand::Move {
1945 source,
1946 destination,
1947 ..
1948 }) => {
1949 out.push(source.clone());
1950 out.push(destination.clone());
1951 }
1952 QueryExpr::CreatePolicy(q) => out.push(q.table.clone()),
1953 QueryExpr::CreateView(q) => out.push(q.name.clone()),
1954 QueryExpr::DropView(q) => out.push(q.name.clone()),
1955 QueryExpr::RefreshMaterializedView(q) => out.push(q.name.clone()),
1956
1957 _ => {}
1963 }
1964}
1965
1966impl RedDBRuntime {
1967 pub fn in_memory() -> RedDBResult<Self> {
1968 Self::with_options(RedDBOptions::in_memory())
1969 }
1970
1971 pub fn lock_manager(&self) -> std::sync::Arc<crate::storage::transaction::lock::LockManager> {
1975 self.inner.lock_manager.clone()
1976 }
1977
1978 #[inline(never)]
1979 pub fn with_options(options: RedDBOptions) -> RedDBResult<Self> {
1980 Self::with_pool(options, ConnectionPoolConfig::default())
1981 }
1982
1983 pub fn with_pool(
1984 options: RedDBOptions,
1985 pool_config: ConnectionPoolConfig,
1986 ) -> RedDBResult<Self> {
1987 let boot_open_start_ms = std::time::SystemTime::now()
1995 .duration_since(std::time::UNIX_EPOCH)
1996 .map(|d| d.as_millis() as u64)
1997 .unwrap_or(0);
1998 let db = Arc::new(
1999 RedDB::open_with_options(&options)
2000 .map_err(|err| RedDBError::Internal(err.to_string()))?,
2001 );
2002 let result_blob_cache = crate::storage::cache::BlobCache::open_with_l2(
2003 crate::storage::cache::BlobCacheConfig::default().with_l2_path(
2004 options
2005 .resolved_path("data.rdb")
2006 .with_extension("result-cache.l2"),
2007 ),
2008 )
2009 .map_err(|err| {
2010 RedDBError::Internal(format!("open result Blob Cache L2 failed: {err:?}"))
2011 })?;
2012 let storage_ready_ms = std::time::SystemTime::now()
2013 .duration_since(std::time::UNIX_EPOCH)
2014 .map(|d| d.as_millis() as u64)
2015 .unwrap_or(0);
2016
2017 let runtime = Self {
2018 inner: Arc::new(RuntimeInner {
2019 db,
2020 layout: PhysicalLayout::from_options(&options),
2021 indices: IndexCatalog::register_default_vector_graph(
2022 options.has_capability(crate::api::Capability::Table),
2023 options.has_capability(crate::api::Capability::Graph),
2024 ),
2025 pool_config,
2026 pool: Mutex::new(PoolState::default()),
2027 started_at_unix_ms: SystemTime::now()
2028 .duration_since(UNIX_EPOCH)
2029 .unwrap_or_default()
2030 .as_millis(),
2031 probabilistic: super::probabilistic_store::ProbabilisticStore::new(),
2032 index_store: super::index_store::IndexStore::new(),
2033 cdc: crate::replication::cdc::CdcBuffer::new(100_000),
2034 backup_scheduler: crate::replication::scheduler::BackupScheduler::new(3600),
2035 query_cache: parking_lot::RwLock::new(
2036 crate::storage::query::planner::cache::PlanCache::new(1000),
2037 ),
2038 result_cache: parking_lot::RwLock::new((
2039 HashMap::new(),
2040 std::collections::VecDeque::new(),
2041 )),
2042 result_blob_cache,
2043 result_blob_entries: parking_lot::RwLock::new((
2044 HashMap::new(),
2045 std::collections::VecDeque::new(),
2046 )),
2047 ask_answer_cache_entries: parking_lot::RwLock::new((
2048 HashSet::new(),
2049 std::collections::VecDeque::new(),
2050 )),
2051 result_cache_shadow_divergences: std::sync::atomic::AtomicU64::new(0),
2052 ask_daily_spend: parking_lot::RwLock::new(HashMap::new()),
2053 queue_message_locks: parking_lot::RwLock::new(HashMap::new()),
2054 planner_dirty_tables: parking_lot::RwLock::new(HashSet::new()),
2055 ec_registry: Arc::new(crate::ec::config::EcRegistry::new()),
2056 ec_worker: crate::ec::worker::EcWorker::new(),
2057 auth_store: parking_lot::RwLock::new(None),
2058 oauth_validator: parking_lot::RwLock::new(None),
2059 views: parking_lot::RwLock::new(HashMap::new()),
2060 materialized_views: parking_lot::RwLock::new(
2061 crate::storage::cache::result::MaterializedViewCache::new(),
2062 ),
2063 snapshot_manager: Arc::new(
2064 crate::storage::transaction::snapshot::SnapshotManager::new(),
2065 ),
2066 tx_contexts: parking_lot::RwLock::new(HashMap::new()),
2067 tx_local_tenants: parking_lot::RwLock::new(HashMap::new()),
2068 env_config_overrides: crate::runtime::config_overlay::collect_env_overrides(),
2069 lock_manager: Arc::new({
2070 let env = crate::runtime::config_overlay::collect_env_overrides();
2075 let timeout_ms = env
2076 .get("concurrency.locking.deadlock_timeout_ms")
2077 .and_then(|raw| raw.parse::<u64>().ok())
2078 .unwrap_or_else(|| {
2079 match crate::runtime::config_matrix::default_for(
2080 "concurrency.locking.deadlock_timeout_ms",
2081 ) {
2082 Some(crate::serde_json::Value::Number(n)) => n as u64,
2083 _ => 5000,
2084 }
2085 });
2086 let cfg = crate::storage::transaction::lock::LockConfig {
2087 default_timeout: std::time::Duration::from_millis(timeout_ms),
2088 ..Default::default()
2089 };
2090 crate::storage::transaction::lock::LockManager::new(cfg)
2091 }),
2092 rls_policies: parking_lot::RwLock::new(HashMap::new()),
2093 rls_enabled_tables: parking_lot::RwLock::new(HashSet::new()),
2094 foreign_tables: Arc::new(crate::storage::fdw::ForeignTableRegistry::with_builtins()),
2095 pending_tombstones: parking_lot::RwLock::new(HashMap::new()),
2096 pending_versioned_updates: parking_lot::RwLock::new(HashMap::new()),
2097 pending_kv_watch_events: parking_lot::RwLock::new(HashMap::new()),
2098 pending_store_wal_actions: parking_lot::RwLock::new(HashMap::new()),
2099 tenant_tables: parking_lot::RwLock::new(HashMap::new()),
2100 ddl_epoch: std::sync::atomic::AtomicU64::new(0),
2101 write_gate: Arc::new(crate::runtime::write_gate::WriteGate::from_options(
2102 &options,
2103 )),
2104 lifecycle: crate::runtime::lifecycle::Lifecycle::new(),
2105 resource_limits: crate::runtime::resource_limits::ResourceLimits::from_env(),
2106 audit_log: {
2107 let data_path = options
2111 .data_path
2112 .clone()
2113 .unwrap_or_else(|| std::env::temp_dir().join("reddb"));
2114 Arc::new(crate::runtime::audit_log::AuditLogger::for_data_path(
2115 &data_path,
2116 ))
2117 },
2118 lease_lifecycle: std::sync::OnceLock::new(),
2119 replica_apply_metrics: crate::replication::logical::ReplicaApplyMetrics::default(),
2120 quota_bucket: crate::runtime::quota_bucket::QuotaBucket::from_env(),
2121 schema_vocabulary: parking_lot::RwLock::new(
2122 crate::runtime::schema_vocabulary::SchemaVocabulary::new(),
2123 ),
2124 slow_query_logger: {
2125 let log_dir = options
2136 .data_path
2137 .as_ref()
2138 .and_then(|p| p.parent().map(std::path::PathBuf::from))
2139 .unwrap_or_else(|| std::env::temp_dir().join("reddb"));
2140 let threshold_ms = std::env::var("RED_SLOW_QUERY_THRESHOLD_MS")
2141 .ok()
2142 .and_then(|s| s.parse::<u64>().ok())
2143 .unwrap_or(1000);
2144 let sample_pct = std::env::var("RED_SLOW_QUERY_SAMPLE_PCT")
2145 .ok()
2146 .and_then(|s| s.parse::<u8>().ok())
2147 .unwrap_or(100);
2148 crate::telemetry::slow_query_logger::SlowQueryLogger::new(
2149 crate::telemetry::slow_query_logger::SlowQueryOpts {
2150 log_dir,
2151 threshold_ms,
2152 sample_pct,
2153 },
2154 )
2155 },
2156 kv_stats: crate::runtime::KvStatsCounters::default(),
2157 kv_tag_index: crate::runtime::KvTagIndex::default(),
2158 }),
2159 };
2160
2161 crate::telemetry::operator_event::install_global_audit_sink(Arc::clone(
2167 &runtime.inner.audit_log,
2168 ));
2169
2170 runtime
2178 .inner
2179 .lifecycle
2180 .set_restore_started_at_ms(boot_open_start_ms);
2181 runtime
2182 .inner
2183 .lifecycle
2184 .set_restore_ready_at_ms(storage_ready_ms);
2185 runtime
2186 .inner
2187 .lifecycle
2188 .set_wal_replay_started_at_ms(boot_open_start_ms);
2189 runtime
2190 .inner
2191 .lifecycle
2192 .set_wal_replay_ready_at_ms(storage_ready_ms);
2193
2194 let restored_cdc_lsn = runtime
2195 .inner
2196 .db
2197 .replication
2198 .as_ref()
2199 .map(|repl| {
2200 repl.logical_wal_spool
2201 .as_ref()
2202 .map(|spool| spool.current_lsn())
2203 .unwrap_or(0)
2204 })
2205 .unwrap_or(0)
2206 .max(runtime.config_u64("red.config.timeline.last_archived_lsn", 0));
2207 runtime.inner.cdc.set_current_lsn(restored_cdc_lsn);
2208 runtime.rehydrate_snapshot_xid_floor();
2209 runtime.bootstrap_system_keyed_collections()?;
2210
2211 runtime.rehydrate_tenant_tables();
2215 if let Some(repl) = &runtime.inner.db.replication {
2216 repl.wal_buffer.set_current_lsn(restored_cdc_lsn);
2217 }
2218
2219 {
2221 let sys = SystemInfo::collect();
2222 runtime.inner.db.store().set_config_tree(
2223 "red.system",
2224 &crate::serde_json::json!({
2225 "pid": sys.pid,
2226 "cpu_cores": sys.cpu_cores,
2227 "total_memory_bytes": sys.total_memory_bytes,
2228 "available_memory_bytes": sys.available_memory_bytes,
2229 "os": sys.os,
2230 "arch": sys.arch,
2231 "hostname": sys.hostname,
2232 "started_at": SystemTime::now()
2233 .duration_since(UNIX_EPOCH)
2234 .unwrap_or_default()
2235 .as_millis() as u64
2236 }),
2237 );
2238
2239 let store = runtime.inner.db.store();
2241 if store
2242 .get_collection("red_config")
2243 .map(|m| m.query_all(|_| true).len())
2244 .unwrap_or(0)
2245 <= 10
2246 {
2247 store.set_config_tree("red.ai", &crate::json!({
2248 "default": crate::json!({
2249 "provider": "openai",
2250 "model": crate::ai::DEFAULT_OPENAI_PROMPT_MODEL
2251 }),
2252 "max_embedding_inputs": 256,
2253 "max_prompt_batch": 256,
2254 "timeout": crate::json!({ "connect_secs": 10, "read_secs": 90, "write_secs": 30 })
2255 }));
2256 store.set_config_tree(
2257 "red.server",
2258 &crate::json!({
2259 "max_scan_limit": 1000,
2260 "max_body_size": 1048576,
2261 "read_timeout_ms": 5000,
2262 "write_timeout_ms": 5000
2263 }),
2264 );
2265 store.set_config_tree(
2266 "red.storage",
2267 &crate::json!({
2268 "page_size": 4096,
2269 "page_cache_capacity": 100000,
2270 "auto_checkpoint_pages": 1000,
2271 "snapshot_retention": 16,
2272 "verify_checksums": true,
2273 "segment": crate::json!({
2274 "max_entities": 100000,
2275 "max_bytes": 268435456_u64,
2276 "compression_level": 6
2277 }),
2278 "hnsw": crate::json!({ "m": 16, "ef_construction": 100, "ef_search": 50 }),
2279 "ivf": crate::json!({ "n_lists": 100, "n_probes": 10 }),
2280 "bm25": crate::json!({ "k1": 1.2, "b": 0.75 })
2281 }),
2282 );
2283 store.set_config_tree(
2284 "red.search",
2285 &crate::json!({
2286 "rag": crate::json!({
2287 "max_chunks_per_source": 10,
2288 "max_total_chunks": 25,
2289 "similarity_threshold": 0.8,
2290 "graph_depth": 2,
2291 "min_relevance": 0.3
2292 }),
2293 "fusion": crate::json!({
2294 "vector_weight": 0.5,
2295 "graph_weight": 0.3,
2296 "table_weight": 0.2,
2297 "dedup_threshold": 0.85
2298 })
2299 }),
2300 );
2301 store.set_config_tree(
2302 "red.auth",
2303 &crate::json!({
2304 "enabled": false,
2305 "session_ttl_secs": 3600,
2306 "require_auth": false
2307 }),
2308 );
2309 store.set_config_tree(
2310 "red.query",
2311 &crate::json!({
2312 "connection_pool": crate::json!({ "max_connections": 64, "max_idle": 16 }),
2313 "max_recursion_depth": 1000
2314 }),
2315 );
2316 store.set_config_tree(
2317 "red.indexes",
2318 &crate::json!({
2319 "auto_select": true,
2320 "bloom_filter": crate::json!({
2321 "enabled": true,
2322 "false_positive_rate": 0.01,
2323 "prune_on_scan": true
2324 }),
2325 "hash": crate::json!({ "enabled": true }),
2326 "bitmap": crate::json!({ "enabled": true, "max_cardinality": 1000 }),
2327 "spatial": crate::json!({ "enabled": true })
2328 }),
2329 );
2330 store.set_config_tree(
2331 "red.memtable",
2332 &crate::json!({
2333 "enabled": true,
2334 "max_bytes": 67108864_u64,
2335 "flush_threshold": 0.75
2336 }),
2337 );
2338 store.set_config_tree(
2339 "red.probabilistic",
2340 &crate::json!({
2341 "hll_registers": 16384,
2342 "sketch_default_width": 1000,
2343 "sketch_default_depth": 5,
2344 "filter_default_capacity": 100000
2345 }),
2346 );
2347 store.set_config_tree(
2348 "red.timeseries",
2349 &crate::json!({
2350 "default_chunk_size": 1024,
2351 "compression": crate::json!({
2352 "timestamps": "delta_of_delta",
2353 "values": "gorilla_xor"
2354 }),
2355 "default_retention_days": 0
2356 }),
2357 );
2358 store.set_config_tree(
2359 "red.queue",
2360 &crate::json!({
2361 "default_max_size": 0,
2362 "default_max_attempts": 3,
2363 "visibility_timeout_ms": 30000,
2364 "consumer_idle_timeout_ms": 60000
2365 }),
2366 );
2367 store.set_config_tree(
2368 "red.backup",
2369 &crate::json!({
2370 "enabled": false,
2371 "interval_secs": 3600,
2372 "retention_count": 24,
2373 "upload": false,
2374 "backend": "local"
2375 }),
2376 );
2377 store.set_config_tree(
2378 "red.wal",
2379 &crate::json!({
2380 "archive": crate::json!({
2381 "enabled": false,
2382 "retention_hours": 168,
2383 "prefix": "wal/"
2384 })
2385 }),
2386 );
2387 store.set_config_tree(
2388 "red.cdc",
2389 &crate::json!({
2390 "enabled": true,
2391 "buffer_size": 100000
2392 }),
2393 );
2394 store.set_config_tree(
2395 "red.config.secret",
2396 &crate::json!({
2397 "auto_encrypt": true,
2398 "auto_decrypt": true
2399 }),
2400 );
2401 }
2402
2403 crate::runtime::config_matrix::heal_critical_keys(store.as_ref());
2410
2411 let lehman_yao = runtime.config_bool("storage.btree.lehman_yao", true);
2418 crate::storage::engine::btree::lehman_yao::set_enabled(lehman_yao);
2419 if lehman_yao {
2420 tracing::info!(
2421 "storage.btree.lehman_yao=true — lock-free concurrent descent enabled"
2422 );
2423 }
2424
2425 let overlay_path = crate::runtime::config_overlay::config_file_path();
2430 let _ =
2431 crate::runtime::config_overlay::apply_config_file(store.as_ref(), &overlay_path);
2432 }
2433
2434 {
2438 let store = runtime.inner.db.store();
2439 for name in crate::application::vcs_collections::ALL {
2440 let _ = store.get_or_create_collection(*name);
2441 }
2442 store.set_config_tree(
2445 crate::application::vcs_collections::CONFIG_NAMESPACE,
2446 &crate::json!({
2447 "default_branch": "main",
2448 "author": crate::json!({
2449 "name": "reddb",
2450 "email": "reddb@localhost"
2451 }),
2452 "protected_branches": crate::json!(["main"]),
2453 "closure": crate::json!({
2454 "enabled": true,
2455 "lazy": true
2456 }),
2457 "merge": crate::json!({
2458 "default_strategy": "auto",
2459 "fast_forward": true
2460 })
2461 }),
2462 );
2463 }
2464
2465 {
2468 let store = runtime.inner.db.store();
2469 for name in crate::application::migration_collections::ALL {
2470 let _ = store.get_or_create_collection(*name);
2471 }
2472 }
2473
2474 {
2489 let weak = Arc::downgrade(&runtime.inner);
2490 std::thread::Builder::new()
2491 .name("reddb-maintenance".into())
2492 .spawn(move || {
2493 let tick = std::time::Duration::from_millis(200);
2494 let work_interval = std::time::Duration::from_secs(60);
2495 let mut last_work = std::time::Instant::now();
2496 loop {
2497 std::thread::sleep(tick);
2498 let Some(inner) = weak.upgrade() else {
2499 break;
2502 };
2503 if last_work.elapsed() >= work_interval {
2504 let _stats = inner.db.store().context_index().stats();
2505 last_work = std::time::Instant::now();
2506 }
2507 }
2508 })
2509 .ok();
2510 }
2511
2512 {
2514 let store = runtime.inner.db.store();
2515 let mut backup_enabled = false;
2516 let mut backup_interval = 3600u64;
2517
2518 if let Some(manager) = store.get_collection("red_config") {
2519 manager.for_each_entity(|entity| {
2520 if let Some(row) = entity.data.as_row() {
2521 let key = row.get_field("key").and_then(|v| match v {
2522 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
2523 _ => None,
2524 });
2525 let val = row.get_field("value");
2526 if key == Some("red.config.backup.enabled") {
2527 backup_enabled = match val {
2528 Some(crate::storage::schema::Value::Boolean(true)) => true,
2529 Some(crate::storage::schema::Value::Text(s)) => &**s == "true",
2530 _ => false,
2531 };
2532 } else if key == Some("red.config.backup.interval_secs") {
2533 if let Some(crate::storage::schema::Value::Integer(n)) = val {
2534 backup_interval = *n as u64;
2535 }
2536 }
2537 }
2538 true
2539 });
2540 }
2541
2542 if backup_enabled {
2543 runtime.inner.backup_scheduler.set_interval(backup_interval);
2544 let rt = runtime.clone();
2545 runtime
2546 .inner
2547 .backup_scheduler
2548 .start(move || rt.trigger_backup().map_err(|e| format!("{}", e)));
2549 }
2550 }
2551
2552 {
2554 runtime
2555 .inner
2556 .ec_registry
2557 .load_from_config_store(runtime.inner.db.store().as_ref());
2558 if !runtime.inner.ec_registry.async_configs().is_empty() {
2559 runtime.inner.ec_worker.start(
2560 Arc::clone(&runtime.inner.ec_registry),
2561 Arc::clone(&runtime.inner.db.store()),
2562 );
2563 }
2564 }
2565
2566 if let crate::replication::ReplicationRole::Replica { primary_addr } =
2567 runtime.inner.db.options().replication.role.clone()
2568 {
2569 let rt = runtime.clone();
2570 std::thread::Builder::new()
2571 .name("reddb-replica".into())
2572 .spawn(move || rt.run_replica_loop(primary_addr))
2573 .ok();
2574 }
2575
2576 runtime.inner.lifecycle.mark_ready();
2581
2582 Ok(runtime)
2583 }
2584
2585 fn rehydrate_snapshot_xid_floor(&self) {
2586 let store = self.inner.db.store();
2587 for collection in store.list_collections() {
2588 let Some(manager) = store.get_collection(&collection) else {
2589 continue;
2590 };
2591 for entity in manager.query_all(|_| true) {
2592 self.inner
2593 .snapshot_manager
2594 .observe_committed_xid(entity.xmin);
2595 self.inner
2596 .snapshot_manager
2597 .observe_committed_xid(entity.xmax);
2598 }
2599 }
2600 }
2601
2602 fn bootstrap_system_keyed_collections(&self) -> RedDBResult<()> {
2603 let mut changed = false;
2604 for (name, model) in [
2605 ("red.config", crate::catalog::CollectionModel::Config),
2606 ("red.vault", crate::catalog::CollectionModel::Vault),
2607 ] {
2608 if self.inner.db.store().get_collection(name).is_none() {
2609 self.inner.db.store().get_or_create_collection(name);
2610 changed = true;
2611 }
2612 if self.inner.db.collection_contract(name).is_none() {
2613 self.inner
2614 .db
2615 .save_collection_contract(system_keyed_collection_contract(name, model))
2616 .map_err(|err| RedDBError::Internal(err.to_string()))?;
2617 changed = true;
2618 }
2619 }
2620 if changed {
2621 self.inner
2622 .db
2623 .persist_metadata()
2624 .map_err(|err| RedDBError::Internal(err.to_string()))?;
2625 }
2626 Ok(())
2627 }
2628
2629 pub fn db(&self) -> Arc<RedDB> {
2630 Arc::clone(&self.inner.db)
2631 }
2632
2633 pub fn index_store_ref(&self) -> &super::index_store::IndexStore {
2638 &self.inner.index_store
2639 }
2640
2641 pub(crate) fn schema_vocabulary_apply(
2646 &self,
2647 event: crate::runtime::schema_vocabulary::DdlEvent,
2648 ) {
2649 self.inner.schema_vocabulary.write().on_ddl(event);
2650 }
2651
2652 pub fn schema_vocabulary_lookup(
2657 &self,
2658 token: &str,
2659 ) -> Vec<crate::runtime::schema_vocabulary::VocabHit> {
2660 self.inner.schema_vocabulary.read().lookup(token).to_vec()
2661 }
2662
2663 pub fn set_auth_store(&self, store: Arc<crate::auth::store::AuthStore>) {
2667 *self.inner.auth_store.write() = Some(store);
2668 }
2669
2670 pub fn vault_kv_get(&self, key: &str) -> Option<String> {
2672 self.inner
2673 .auth_store
2674 .read()
2675 .as_ref()
2676 .and_then(|store| store.vault_kv_get(key))
2677 }
2678
2679 pub fn vault_kv_try_set(&self, key: String, value: String) -> RedDBResult<()> {
2682 let store = self.inner.auth_store.read().clone().ok_or_else(|| {
2683 RedDBError::Query("secret storage requires an enabled, unsealed vault".to_string())
2684 })?;
2685 store
2686 .vault_kv_try_set(key, value)
2687 .map_err(|err| RedDBError::Query(err.to_string()))
2688 }
2689
2690 pub fn set_oauth_validator(&self, validator: Option<Arc<crate::auth::oauth::OAuthValidator>>) {
2694 *self.inner.oauth_validator.write() = validator;
2695 }
2696
2697 pub fn oauth_validator(&self) -> Option<Arc<crate::auth::oauth::OAuthValidator>> {
2701 self.inner.oauth_validator.read().clone()
2702 }
2703
2704 pub(crate) fn secret_aes_key(&self) -> Option<[u8; 32]> {
2708 let guard = self.inner.auth_store.read();
2709 guard.as_ref().and_then(|s| s.vault_secret_key())
2710 }
2711
2712 pub(crate) fn config_bool(&self, key: &str, default: bool) -> bool {
2718 if let Some(raw) = self.inner.env_config_overrides.get(key) {
2719 if let Some(crate::storage::schema::Value::Boolean(b)) =
2720 crate::runtime::config_overlay::coerce_env_value(key, raw)
2721 {
2722 return b;
2723 }
2724 }
2725 let store = self.inner.db.store();
2726 let Some(manager) = store.get_collection("red_config") else {
2727 return default;
2728 };
2729 let mut result = default;
2730 let mut latest_id: u64 = 0;
2731 manager.for_each_entity(|entity| {
2732 if let Some(row) = entity.data.as_row() {
2733 let entry_key = row.get_field("key").and_then(|v| match v {
2734 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
2735 _ => None,
2736 });
2737 if entry_key == Some(key) {
2738 let id = entity.id.raw();
2739 if id >= latest_id {
2740 latest_id = id;
2741 result = match row.get_field("value") {
2742 Some(crate::storage::schema::Value::Boolean(b)) => *b,
2743 Some(crate::storage::schema::Value::Text(s)) => {
2744 matches!(s.as_ref(), "true" | "TRUE" | "True" | "1")
2745 }
2746 Some(crate::storage::schema::Value::Integer(n)) => *n != 0,
2747 _ => default,
2748 };
2749 }
2750 }
2751 }
2752 true
2753 });
2754 result
2755 }
2756
2757 pub(crate) fn config_u64(&self, key: &str, default: u64) -> u64 {
2758 if let Some(raw) = self.inner.env_config_overrides.get(key) {
2759 if let Some(crate::storage::schema::Value::UnsignedInteger(n)) =
2760 crate::runtime::config_overlay::coerce_env_value(key, raw)
2761 {
2762 return n;
2763 }
2764 }
2765 let store = self.inner.db.store();
2766 let Some(manager) = store.get_collection("red_config") else {
2767 return default;
2768 };
2769 let mut result = default;
2770 let mut latest_id: u64 = 0;
2771 manager.for_each_entity(|entity| {
2772 if let Some(row) = entity.data.as_row() {
2773 let entry_key = row.get_field("key").and_then(|v| match v {
2774 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
2775 _ => None,
2776 });
2777 if entry_key == Some(key) {
2778 let id = entity.id.raw();
2779 if id >= latest_id {
2780 latest_id = id;
2781 result = match row.get_field("value") {
2782 Some(crate::storage::schema::Value::Integer(n)) => *n as u64,
2783 Some(crate::storage::schema::Value::UnsignedInteger(n)) => *n,
2784 Some(crate::storage::schema::Value::Text(s)) => {
2785 s.parse::<u64>().unwrap_or(default)
2786 }
2787 _ => default,
2788 };
2789 }
2790 }
2791 }
2792 true
2793 });
2794 result
2795 }
2796
2797 pub(crate) fn config_f64(&self, key: &str, default: f64) -> f64 {
2798 if let Some(raw) = self.inner.env_config_overrides.get(key) {
2799 if let Ok(n) = raw.parse::<f64>() {
2800 return n;
2801 }
2802 }
2803 let store = self.inner.db.store();
2804 let Some(manager) = store.get_collection("red_config") else {
2805 return default;
2806 };
2807 let mut result = default;
2808 let mut latest_id: u64 = 0;
2809 manager.for_each_entity(|entity| {
2810 if let Some(row) = entity.data.as_row() {
2811 let entry_key = row.get_field("key").and_then(|v| match v {
2812 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
2813 _ => None,
2814 });
2815 if entry_key == Some(key) {
2816 let id = entity.id.raw();
2817 if id >= latest_id {
2818 latest_id = id;
2819 result = match row.get_field("value") {
2820 Some(crate::storage::schema::Value::Float(n)) => *n,
2821 Some(crate::storage::schema::Value::Integer(n)) => *n as f64,
2822 Some(crate::storage::schema::Value::UnsignedInteger(n)) => *n as f64,
2823 Some(crate::storage::schema::Value::Text(s)) => {
2824 s.parse::<f64>().unwrap_or(default)
2825 }
2826 _ => default,
2827 };
2828 }
2829 }
2830 }
2831 true
2832 });
2833 result
2834 }
2835
2836 pub(crate) fn config_string(&self, key: &str, default: &str) -> String {
2837 if let Some(raw) = self.inner.env_config_overrides.get(key) {
2838 return raw.clone();
2839 }
2840 let store = self.inner.db.store();
2841 let Some(manager) = store.get_collection("red_config") else {
2842 return default.to_string();
2843 };
2844 let mut result = default.to_string();
2845 let mut latest_id: u64 = 0;
2846 manager.for_each_entity(|entity| {
2847 if let Some(row) = entity.data.as_row() {
2848 let entry_key = row.get_field("key").and_then(|v| match v {
2849 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
2850 _ => None,
2851 });
2852 if entry_key == Some(key) {
2853 let id = entity.id.raw();
2854 if id >= latest_id {
2855 latest_id = id;
2856 if let Some(crate::storage::schema::Value::Text(value)) =
2857 row.get_field("value")
2858 {
2859 result = value.to_string();
2860 }
2861 }
2862 }
2863 }
2864 true
2865 });
2866 result
2867 }
2868
2869 fn latest_metadata_for(
2870 &self,
2871 collection: &str,
2872 entity_id: u64,
2873 ) -> Option<crate::serde_json::Value> {
2874 self.inner
2875 .db
2876 .store()
2877 .get_metadata(collection, EntityId::new(entity_id))
2878 .map(|metadata| metadata_to_json(&metadata))
2879 }
2880
2881 fn persist_replica_lsn(&self, lsn: u64) {
2882 self.inner.db.store().set_config_tree(
2883 "red.replication",
2884 &crate::json!({
2885 "last_applied_lsn": lsn
2886 }),
2887 );
2888 }
2889
2890 fn persist_replication_health(
2891 &self,
2892 state: &str,
2893 last_error: &str,
2894 primary_lsn: Option<u64>,
2895 oldest_available_lsn: Option<u64>,
2896 ) {
2897 self.inner.db.store().set_config_tree(
2898 "red.replication",
2899 &crate::json!({
2900 "state": state,
2901 "last_error": last_error,
2902 "last_seen_primary_lsn": primary_lsn.unwrap_or(0),
2903 "last_seen_oldest_lsn": oldest_available_lsn.unwrap_or(0),
2904 "updated_at_unix_ms": SystemTime::now()
2905 .duration_since(UNIX_EPOCH)
2906 .unwrap_or_default()
2907 .as_millis() as u64
2908 }),
2909 );
2910 }
2911
2912 pub(crate) fn secret_auto_encrypt(&self) -> bool {
2915 self.config_bool("red.config.secret.auto_encrypt", true)
2916 }
2917
2918 pub(crate) fn secret_auto_decrypt(&self) -> bool {
2923 self.config_bool("red.config.secret.auto_decrypt", true)
2924 }
2925
2926 pub(crate) fn apply_secret_decryption(&self, result: &mut RuntimeQueryResult) {
2933 if !self.secret_auto_decrypt() {
2934 return;
2935 }
2936 let Some(key) = self.secret_aes_key() else {
2937 return;
2938 };
2939 for record in result.result.records.iter_mut() {
2940 for value in record.values_mut() {
2941 if let Value::Secret(ref bytes) = value {
2942 if let Some(plain) =
2943 super::impl_dml::decrypt_secret_payload(&key, bytes.as_slice())
2944 {
2945 if let Ok(text) = String::from_utf8(plain) {
2946 *value = Value::text(text);
2947 }
2948 }
2949 }
2950 }
2951 }
2952 }
2953
2954 pub(crate) fn mutation_engine(&self) -> crate::runtime::mutation::MutationEngine<'_> {
2962 crate::runtime::mutation::MutationEngine::new(self)
2963 }
2964
2965 pub fn check_write(&self, kind: crate::runtime::write_gate::WriteKind) -> RedDBResult<()> {
2976 self.inner.write_gate.check(kind)
2977 }
2978
2979 pub fn write_gate(&self) -> &crate::runtime::write_gate::WriteGate {
2983 &self.inner.write_gate
2984 }
2985
2986 pub fn lifecycle(&self) -> &crate::runtime::lifecycle::Lifecycle {
2990 &self.inner.lifecycle
2991 }
2992
2993 pub fn resource_limits(&self) -> &crate::runtime::resource_limits::ResourceLimits {
2995 &self.inner.resource_limits
2996 }
2997
2998 pub fn audit_log(&self) -> &crate::runtime::audit_log::AuditLogger {
3000 &self.inner.audit_log
3001 }
3002
3003 pub fn audit_log_arc(&self) -> Arc<crate::runtime::audit_log::AuditLogger> {
3007 Arc::clone(&self.inner.audit_log)
3008 }
3009
3010 pub fn write_gate_arc(&self) -> Arc<crate::runtime::write_gate::WriteGate> {
3015 Arc::clone(&self.inner.write_gate)
3016 }
3017
3018 pub fn lease_lifecycle(&self) -> Option<&Arc<crate::runtime::lease_lifecycle::LeaseLifecycle>> {
3021 self.inner.lease_lifecycle.get()
3022 }
3023
3024 pub fn set_lease_lifecycle(
3027 &self,
3028 lifecycle: Arc<crate::runtime::lease_lifecycle::LeaseLifecycle>,
3029 ) -> Result<(), Arc<crate::runtime::lease_lifecycle::LeaseLifecycle>> {
3030 self.inner.lease_lifecycle.set(lifecycle)
3031 }
3032
3033 pub fn check_batch_size(&self, requested: usize) -> RedDBResult<()> {
3038 if self.inner.resource_limits.batch_size_exceeded(requested) {
3039 let max = self.inner.resource_limits.max_batch_size.unwrap_or(0);
3040 return Err(RedDBError::QuotaExceeded(format!(
3041 "max_batch_size:{requested}:{max}"
3042 )));
3043 }
3044 Ok(())
3045 }
3046
3047 pub fn check_db_size(&self) -> RedDBResult<()> {
3053 let Some(limit) = self.inner.resource_limits.max_db_size_bytes else {
3054 return Ok(());
3055 };
3056 if limit == 0 {
3057 return Ok(());
3058 }
3059 let Some(path) = self.inner.db.path() else {
3060 return Ok(());
3061 };
3062 let current = std::fs::metadata(path).map(|m| m.len()).unwrap_or(0);
3063 if current > limit {
3064 return Err(RedDBError::QuotaExceeded(format!(
3065 "max_db_size_bytes:{current}:{limit}"
3066 )));
3067 }
3068 Ok(())
3069 }
3070
3071 pub fn graceful_shutdown(
3089 &self,
3090 backup_on_shutdown: bool,
3091 ) -> RedDBResult<crate::runtime::lifecycle::ShutdownReport> {
3092 if !self.inner.lifecycle.begin_shutdown() {
3093 return Ok(self.inner.lifecycle.shutdown_report().unwrap_or_default());
3097 }
3098
3099 let started_ms = std::time::SystemTime::now()
3100 .duration_since(std::time::UNIX_EPOCH)
3101 .map(|d| d.as_millis() as u64)
3102 .unwrap_or(0);
3103 let mut report = crate::runtime::lifecycle::ShutdownReport {
3104 started_at_ms: started_ms,
3105 ..Default::default()
3106 };
3107
3108 let flush_res = self.inner.db.flush_local_only();
3114 report.flushed_wal = flush_res.is_ok();
3115 report.final_checkpoint = flush_res.is_ok();
3116 if let Err(err) = &flush_res {
3117 tracing::error!(
3118 target: "reddb::lifecycle",
3119 error = %err,
3120 "graceful_shutdown: local flush failed"
3121 );
3122 } else if let Err(lease_err) =
3123 self.assert_remote_write_allowed("shutdown/checkpoint_upload")
3124 {
3125 tracing::warn!(
3126 target: "reddb::serverless::lease",
3127 error = %lease_err,
3128 "graceful_shutdown: remote upload skipped — lease not held"
3129 );
3130 } else if let Err(err) = self.inner.db.upload_to_remote_backend() {
3131 tracing::error!(
3132 target: "reddb::lifecycle",
3133 error = %err,
3134 "graceful_shutdown: remote upload failed"
3135 );
3136 }
3137
3138 if backup_on_shutdown && self.inner.db.remote_backend.is_some() {
3143 match self.trigger_backup() {
3149 Ok(result) => {
3150 report.backup_uploaded = result.uploaded;
3151 }
3152 Err(err) => {
3153 tracing::warn!(
3154 target: "reddb::lifecycle",
3155 error = %err,
3156 "graceful_shutdown: final backup skipped"
3157 );
3158 }
3159 }
3160 }
3161
3162 let completed_ms = std::time::SystemTime::now()
3163 .duration_since(std::time::UNIX_EPOCH)
3164 .map(|d| d.as_millis() as u64)
3165 .unwrap_or(started_ms);
3166 report.completed_at_ms = completed_ms;
3167 report.duration_ms = completed_ms.saturating_sub(started_ms);
3168
3169 self.inner.lifecycle.finish_shutdown(report.clone());
3170 Ok(report)
3171 }
3172
3173 pub(crate) fn cdc_emit_no_cache_invalidate(
3179 &self,
3180 operation: crate::replication::cdc::ChangeOperation,
3181 collection: &str,
3182 entity_id: u64,
3183 entity_kind: &str,
3184 ) -> u64 {
3185 let lsn = self
3186 .inner
3187 .cdc
3188 .emit(operation, collection, entity_id, entity_kind);
3189
3190 if let Some(ref primary) = self.inner.db.replication {
3192 let store = self.inner.db.store();
3193 let entity = if operation == crate::replication::cdc::ChangeOperation::Delete {
3194 None
3195 } else {
3196 store.get(collection, EntityId::new(entity_id))
3197 };
3198 let record = ChangeRecord {
3199 lsn,
3200 timestamp: SystemTime::now()
3201 .duration_since(UNIX_EPOCH)
3202 .unwrap_or_default()
3203 .as_millis() as u64,
3204 operation,
3205 collection: collection.to_string(),
3206 entity_id,
3207 entity_kind: entity_kind.to_string(),
3208 entity_bytes: entity
3209 .as_ref()
3210 .map(|e| UnifiedStore::serialize_entity(e, store.format_version())),
3211 metadata: self.latest_metadata_for(collection, entity_id),
3212 };
3213 let encoded = record.encode();
3214 primary.wal_buffer.append(record.lsn, encoded.clone());
3215 if let Some(spool) = &primary.logical_wal_spool {
3216 let _ = spool.append(record.lsn, &encoded);
3217 }
3218 }
3219 lsn
3220 }
3221
3222 pub(crate) fn cdc_emit_insert_batch_no_cache_invalidate(
3223 &self,
3224 collection: &str,
3225 ids: &[EntityId],
3226 entity_kind: &str,
3227 ) -> Vec<u64> {
3228 if ids.is_empty() {
3229 return Vec::new();
3230 }
3231
3232 if self.inner.db.replication.is_none() {
3236 return self.inner.cdc.emit_batch_same_collection(
3237 crate::replication::cdc::ChangeOperation::Insert,
3238 collection,
3239 entity_kind,
3240 ids.iter().map(|id| id.raw()),
3241 );
3242 }
3243
3244 ids.iter()
3247 .map(|id| {
3248 self.cdc_emit_no_cache_invalidate(
3249 crate::replication::cdc::ChangeOperation::Insert,
3250 collection,
3251 id.raw(),
3252 entity_kind,
3253 )
3254 })
3255 .collect()
3256 }
3257
3258 pub fn cdc_emit(
3259 &self,
3260 operation: crate::replication::cdc::ChangeOperation,
3261 collection: &str,
3262 entity_id: u64,
3263 entity_kind: &str,
3264 ) -> u64 {
3265 let lsn = self
3266 .inner
3267 .cdc
3268 .emit(operation, collection, entity_id, entity_kind);
3269 self.invalidate_result_cache_for_table(collection);
3275
3276 if let Some(ref primary) = self.inner.db.replication {
3278 let store = self.inner.db.store();
3279 let entity = if operation == crate::replication::cdc::ChangeOperation::Delete {
3280 None
3281 } else {
3282 store.get(collection, EntityId::new(entity_id))
3283 };
3284 let record = ChangeRecord {
3285 lsn,
3286 timestamp: SystemTime::now()
3287 .duration_since(UNIX_EPOCH)
3288 .unwrap_or_default()
3289 .as_millis() as u64,
3290 operation,
3291 collection: collection.to_string(),
3292 entity_id,
3293 entity_kind: entity_kind.to_string(),
3294 entity_bytes: entity
3295 .as_ref()
3296 .map(|entity| UnifiedStore::serialize_entity(entity, store.format_version())),
3297 metadata: self.latest_metadata_for(collection, entity_id),
3298 };
3299 let encoded = record.encode();
3300 primary.wal_buffer.append(record.lsn, encoded.clone());
3301 if let Some(spool) = &primary.logical_wal_spool {
3302 let _ = spool.append(record.lsn, &encoded);
3303 }
3304 }
3305 lsn
3306 }
3307
3308 pub(crate) fn cdc_emit_kv(
3309 &self,
3310 operation: crate::replication::cdc::ChangeOperation,
3311 collection: &str,
3312 key: &str,
3313 entity_id: u64,
3314 before: Option<crate::json::Value>,
3315 after: Option<crate::json::Value>,
3316 ) -> u64 {
3317 let lsn = self
3318 .inner
3319 .cdc
3320 .emit_kv(operation, collection, key, entity_id, before, after);
3321 self.inner.kv_stats.incr_watch_events_emitted();
3322 self.invalidate_result_cache_for_table(collection);
3323 lsn
3324 }
3325
3326 pub(crate) fn record_kv_watch_event(
3327 &self,
3328 operation: crate::replication::cdc::ChangeOperation,
3329 collection: &str,
3330 key: &str,
3331 entity_id: u64,
3332 before: Option<crate::json::Value>,
3333 after: Option<crate::json::Value>,
3334 ) {
3335 if self.current_xid().is_some() {
3336 let conn_id = current_connection_id();
3337 let event = crate::replication::cdc::KvWatchEvent {
3338 collection: collection.to_string(),
3339 key: key.to_string(),
3340 op: operation,
3341 before,
3342 after,
3343 lsn: 0,
3344 committed_at: 0,
3345 dropped_event_count: 0,
3346 };
3347 self.inner
3348 .pending_kv_watch_events
3349 .write()
3350 .entry(conn_id)
3351 .or_default()
3352 .push(event);
3353 return;
3354 }
3355
3356 self.cdc_emit_kv(operation, collection, key, entity_id, before, after);
3357 }
3358
3359 pub(crate) fn cdc_emit_prebuilt(
3360 &self,
3361 operation: crate::replication::cdc::ChangeOperation,
3362 collection: &str,
3363 entity: &UnifiedEntity,
3364 entity_kind: &str,
3365 metadata: Option<&crate::storage::Metadata>,
3366 invalidate_cache: bool,
3367 ) -> u64 {
3368 self.cdc_emit_prebuilt_with_columns(
3369 operation,
3370 collection,
3371 entity,
3372 entity_kind,
3373 metadata,
3374 invalidate_cache,
3375 None,
3376 )
3377 }
3378
3379 pub(crate) fn cdc_emit_prebuilt_with_columns(
3386 &self,
3387 operation: crate::replication::cdc::ChangeOperation,
3388 collection: &str,
3389 entity: &UnifiedEntity,
3390 entity_kind: &str,
3391 metadata: Option<&crate::storage::Metadata>,
3392 invalidate_cache: bool,
3393 changed_columns: Option<Vec<String>>,
3394 ) -> u64 {
3395 if invalidate_cache {
3396 self.invalidate_result_cache();
3397 }
3398
3399 let lsn = self.inner.cdc.emit_with_columns(
3400 operation,
3401 collection,
3402 entity.id.raw(),
3403 entity_kind,
3404 changed_columns,
3405 );
3406
3407 if let Some(ref primary) = self.inner.db.replication {
3408 let store = self.inner.db.store();
3409 let record = ChangeRecord {
3410 lsn,
3411 timestamp: SystemTime::now()
3412 .duration_since(UNIX_EPOCH)
3413 .unwrap_or_default()
3414 .as_millis() as u64,
3415 operation,
3416 collection: collection.to_string(),
3417 entity_id: entity.id.raw(),
3418 entity_kind: entity_kind.to_string(),
3419 entity_bytes: Some(UnifiedStore::serialize_entity(
3420 entity,
3421 store.format_version(),
3422 )),
3423 metadata: metadata
3424 .map(metadata_to_json)
3425 .or_else(|| self.latest_metadata_for(collection, entity.id.raw())),
3426 };
3427 let encoded = record.encode();
3428 primary.wal_buffer.append(record.lsn, encoded.clone());
3429 if let Some(spool) = &primary.logical_wal_spool {
3430 let _ = spool.append(record.lsn, &encoded);
3431 }
3432 }
3433
3434 lsn
3435 }
3436
3437 pub(crate) fn cdc_emit_prebuilt_batch<'a, I>(
3438 &self,
3439 operation: crate::replication::cdc::ChangeOperation,
3440 entity_kind: &str,
3441 items: I,
3442 invalidate_cache: bool,
3443 ) where
3444 I: IntoIterator<
3445 Item = (
3446 &'a str,
3447 &'a UnifiedEntity,
3448 Option<&'a crate::storage::Metadata>,
3449 ),
3450 >,
3451 {
3452 let items: Vec<(&str, &UnifiedEntity, Option<&crate::storage::Metadata>)> =
3453 items.into_iter().collect();
3454 if items.is_empty() {
3455 return;
3456 }
3457
3458 if invalidate_cache {
3459 self.invalidate_result_cache();
3460 }
3461
3462 for (collection, entity, metadata) in items {
3463 self.cdc_emit_prebuilt(operation, collection, entity, entity_kind, metadata, false);
3464 }
3465 }
3466
3467 fn run_replica_loop(&self, primary_addr: String) {
3468 let endpoint = if primary_addr.starts_with("http") {
3469 primary_addr
3470 } else {
3471 format!("http://{primary_addr}")
3472 };
3473 let poll_ms = self.inner.db.options().replication.poll_interval_ms;
3474 let max_count = self.inner.db.options().replication.max_batch_size;
3475 let mut since_lsn = self.config_u64("red.replication.last_applied_lsn", 0);
3476
3477 let runtime = match tokio::runtime::Builder::new_current_thread()
3478 .enable_all()
3479 .build()
3480 {
3481 Ok(runtime) => runtime,
3482 Err(_) => return,
3483 };
3484
3485 runtime.block_on(async move {
3486 use crate::grpc::proto::red_db_client::RedDbClient;
3487 use crate::grpc::proto::JsonPayloadRequest;
3488
3489 let mut client = loop {
3490 match RedDbClient::connect(endpoint.clone()).await {
3491 Ok(client) => {
3492 self.persist_replication_health("connecting", "", None, None);
3493 break client;
3494 }
3495 Err(_) => {
3496 self.persist_replication_health(
3497 "connecting",
3498 "waiting for primary connection",
3499 None,
3500 None,
3501 );
3502 std::thread::sleep(std::time::Duration::from_millis(poll_ms.max(250)))
3503 }
3504 }
3505 };
3506
3507 let applier = crate::replication::logical::LogicalChangeApplier::new(since_lsn);
3512
3513 loop {
3514 let payload = crate::json!({
3515 "since_lsn": since_lsn,
3516 "max_count": max_count
3517 });
3518 let request = tonic::Request::new(JsonPayloadRequest {
3519 payload_json: crate::json::to_string(&payload)
3520 .unwrap_or_else(|_| "{}".to_string()),
3521 });
3522
3523 if let Ok(response) = client.pull_wal_records(request).await {
3524 if let Ok(value) =
3525 crate::json::from_str::<crate::json::Value>(&response.into_inner().payload)
3526 {
3527 let current_lsn =
3528 value.get("current_lsn").and_then(crate::json::Value::as_u64);
3529 let oldest_available_lsn = value
3530 .get("oldest_available_lsn")
3531 .and_then(crate::json::Value::as_u64);
3532 if since_lsn > 0
3533 && oldest_available_lsn
3534 .map(|oldest| oldest > since_lsn.saturating_add(1))
3535 .unwrap_or(false)
3536 {
3537 self.persist_replication_health(
3538 "stalled_gap",
3539 "replica is behind the oldest logical WAL available on primary; re-bootstrap required",
3540 current_lsn,
3541 oldest_available_lsn,
3542 );
3543 std::thread::sleep(std::time::Duration::from_millis(poll_ms.max(250)));
3544 continue;
3545 }
3546 if let Some(records) =
3547 value.get("records").and_then(crate::json::Value::as_array)
3548 {
3549 for record in records {
3550 let Some(data_hex) =
3551 record.get("data").and_then(crate::json::Value::as_str)
3552 else {
3553 continue;
3554 };
3555 let Ok(data) = hex::decode(data_hex) else {
3556 self.inner.replica_apply_metrics.record(
3557 crate::replication::logical::ApplyErrorKind::Decode,
3558 );
3559 self.persist_replication_health(
3560 "apply_error",
3561 "failed to decode WAL record hex payload",
3562 current_lsn,
3563 oldest_available_lsn,
3564 );
3565 continue;
3566 };
3567 let Ok(change) = ChangeRecord::decode(&data) else {
3568 self.inner.replica_apply_metrics.record(
3569 crate::replication::logical::ApplyErrorKind::Decode,
3570 );
3571 self.persist_replication_health(
3572 "apply_error",
3573 "failed to decode logical WAL record",
3574 current_lsn,
3575 oldest_available_lsn,
3576 );
3577 continue;
3578 };
3579 match applier.apply(
3580 self.inner.db.as_ref(),
3581 &change,
3582 ApplyMode::Replica,
3583 ) {
3584 Ok(crate::replication::logical::ApplyOutcome::Applied) => {
3585 self.invalidate_result_cache_for_table(&change.collection);
3586 since_lsn = since_lsn.max(change.lsn);
3587 self.persist_replica_lsn(since_lsn);
3588 }
3589 Ok(_) => {
3590 }
3592 Err(err) => {
3593 self.inner.replica_apply_metrics.record(err.kind());
3594 match &err {
3603 crate::replication::logical::LogicalApplyError::Divergence { lsn, expected: _, got: _ } => {
3604 crate::telemetry::operator_event::OperatorEvent::Divergence {
3605 peer: "primary".to_string(),
3606 leader_lsn: *lsn,
3607 follower_lsn: since_lsn,
3608 }
3609 .emit_global();
3610 }
3611 crate::replication::logical::LogicalApplyError::Gap { last, next } => {
3612 crate::telemetry::operator_event::OperatorEvent::ReplicationBroken {
3613 peer: "primary".to_string(),
3614 reason: format!("stalled gap last={last} next={next}"),
3615 }
3616 .emit_global();
3617 }
3618 _ => {}
3619 }
3620 let kind = match &err {
3621 crate::replication::logical::LogicalApplyError::Gap { .. } => "stalled_gap",
3622 crate::replication::logical::LogicalApplyError::Divergence { .. } => "divergence",
3623 _ => "apply_error",
3624 };
3625 self.persist_replication_health(
3626 kind,
3627 &format!("replica apply rejected: {err}"),
3628 current_lsn,
3629 oldest_available_lsn,
3630 );
3631 break;
3642 }
3643 }
3644 }
3645 }
3646 self.persist_replication_health(
3647 "healthy",
3648 "",
3649 current_lsn,
3650 oldest_available_lsn,
3651 );
3652 } else {
3653 self.persist_replication_health(
3654 "apply_error",
3655 "failed to parse pull_wal_records response",
3656 None,
3657 None,
3658 );
3659 }
3660 } else {
3661 self.persist_replication_health(
3662 "connecting",
3663 "primary pull_wal_records request failed",
3664 None,
3665 None,
3666 );
3667 }
3668
3669 std::thread::sleep(std::time::Duration::from_millis(poll_ms));
3670 }
3671 });
3672 }
3673
3674 pub fn cdc_poll(
3676 &self,
3677 since_lsn: u64,
3678 max_count: usize,
3679 ) -> Vec<crate::replication::cdc::ChangeEvent> {
3680 self.inner.cdc.poll(since_lsn, max_count)
3681 }
3682
3683 pub fn cdc_current_lsn(&self) -> u64 {
3687 self.inner.cdc.current_lsn()
3688 }
3689
3690 pub fn kv_watch_events_since(
3691 &self,
3692 collection: &str,
3693 key: &str,
3694 since_lsn: u64,
3695 max_count: usize,
3696 ) -> Vec<crate::replication::cdc::KvWatchEvent> {
3697 self.inner
3698 .cdc
3699 .poll(since_lsn, max_count)
3700 .into_iter()
3701 .filter_map(|event| event.kv)
3702 .filter(|event| event.collection == collection && event.key == key)
3703 .collect()
3704 }
3705
3706 pub fn kv_watch_events_since_prefix(
3707 &self,
3708 collection: &str,
3709 prefix: &str,
3710 since_lsn: u64,
3711 max_count: usize,
3712 ) -> Vec<crate::replication::cdc::KvWatchEvent> {
3713 self.inner
3714 .cdc
3715 .poll(since_lsn, max_count)
3716 .into_iter()
3717 .filter_map(|event| event.kv)
3718 .filter(|event| event.collection == collection && event.key.starts_with(prefix))
3719 .collect()
3720 }
3721
3722 pub(crate) fn kv_watch_subscribe<'a>(
3723 &'a self,
3724 collection: impl Into<String>,
3725 key: impl Into<String>,
3726 from_lsn: Option<u64>,
3727 ) -> crate::runtime::kv_watch::KvWatchStream<'a> {
3728 crate::runtime::kv_watch::KvWatchStream::subscribe(
3729 &self.inner.cdc,
3730 &self.inner.kv_stats,
3731 collection,
3732 key,
3733 from_lsn,
3734 self.kv_watch_idle_timeout_ms(),
3735 )
3736 }
3737
3738 pub(crate) fn kv_watch_subscribe_prefix<'a>(
3739 &'a self,
3740 collection: impl Into<String>,
3741 prefix: impl Into<String>,
3742 from_lsn: Option<u64>,
3743 ) -> crate::runtime::kv_watch::KvWatchStream<'a> {
3744 crate::runtime::kv_watch::KvWatchStream::subscribe_prefix(
3745 &self.inner.cdc,
3746 &self.inner.kv_stats,
3747 collection,
3748 prefix,
3749 from_lsn,
3750 self.kv_watch_idle_timeout_ms(),
3751 )
3752 }
3753
3754 pub(crate) fn kv_watch_idle_timeout_ms(&self) -> u64 {
3755 self.config_u64("red.config.kv.watch.idle_timeout_ms", 60_000)
3756 }
3757
3758 pub fn backup_status(&self) -> crate::replication::scheduler::BackupStatus {
3760 self.inner.backup_scheduler.status()
3761 }
3762
3763 pub fn result_blob_cache(&self) -> &crate::storage::cache::BlobCache {
3773 &self.inner.result_blob_cache
3774 }
3775
3776 pub fn primary_replica_snapshots(&self) -> Vec<crate::replication::primary::ReplicaState> {
3780 self.inner
3781 .db
3782 .replication
3783 .as_ref()
3784 .map(|repl| repl.replica_snapshots())
3785 .unwrap_or_default()
3786 }
3787
3788 pub fn commit_policy(&self) -> crate::replication::CommitPolicy {
3793 crate::replication::CommitPolicy::from_env()
3794 }
3795
3796 pub fn replica_apply_error_counts(
3801 &self,
3802 ) -> [(crate::replication::logical::ApplyErrorKind, u64); 4] {
3803 self.inner.replica_apply_metrics.snapshot()
3804 }
3805
3806 pub fn quota_bucket(&self) -> &crate::runtime::quota_bucket::QuotaBucket {
3809 &self.inner.quota_bucket
3810 }
3811
3812 pub fn commit_waiter_snapshot(&self) -> Vec<(String, u64)> {
3816 self.inner
3817 .db
3818 .replication
3819 .as_ref()
3820 .map(|repl| repl.commit_waiter.snapshot())
3821 .unwrap_or_default()
3822 }
3823
3824 pub fn commit_waiter_metrics_snapshot(&self) -> (u64, u64, u64, u64) {
3827 self.inner
3828 .db
3829 .replication
3830 .as_ref()
3831 .map(|repl| repl.commit_waiter.metrics_snapshot())
3832 .unwrap_or((0, 0, 0, 0))
3833 }
3834
3835 pub fn await_replica_acks(
3845 &self,
3846 target_lsn: u64,
3847 count: u32,
3848 timeout: std::time::Duration,
3849 ) -> crate::replication::AwaitOutcome {
3850 match &self.inner.db.replication {
3851 Some(repl) => repl.commit_waiter.await_acks(target_lsn, count, timeout),
3852 None => {
3853 crate::replication::AwaitOutcome::NotRequired
3857 }
3858 }
3859 }
3860
3861 pub fn enforce_commit_policy(
3875 &self,
3876 post_lsn: u64,
3877 ) -> RedDBResult<crate::replication::AwaitOutcome> {
3878 let n = match self.commit_policy() {
3879 crate::replication::CommitPolicy::AckN(n) if n > 0 => n,
3880 _ => return Ok(crate::replication::AwaitOutcome::NotRequired),
3881 };
3882 let timeout_ms = std::env::var("RED_REPLICATION_ACK_TIMEOUT_MS")
3883 .ok()
3884 .and_then(|v| v.parse::<u64>().ok())
3885 .unwrap_or(5_000);
3886 let outcome =
3887 self.await_replica_acks(post_lsn, n, std::time::Duration::from_millis(timeout_ms));
3888 if let crate::replication::AwaitOutcome::TimedOut { observed, required } = &outcome {
3889 tracing::warn!(
3890 target: "reddb::commit",
3891 post_lsn,
3892 observed = *observed,
3893 required = *required,
3894 timeout_ms,
3895 "ack_n: timed out waiting for replicas"
3896 );
3897 let fail = std::env::var("RED_COMMIT_FAIL_ON_TIMEOUT")
3898 .ok()
3899 .map(|v| {
3900 let t = v.trim();
3901 t.eq_ignore_ascii_case("true") || t == "1" || t.eq_ignore_ascii_case("yes")
3902 })
3903 .unwrap_or(false);
3904 if fail {
3905 return Err(RedDBError::ReadOnly(format!(
3906 "commit policy timed out at lsn {post_lsn}: observed={observed} required={required} (RED_COMMIT_FAIL_ON_TIMEOUT=true)"
3907 )));
3908 }
3909 }
3910 Ok(outcome)
3911 }
3912
3913 pub fn encryption_at_rest_status(&self) -> (&'static str, Option<String>) {
3921 match crate::crypto::page_encryption::key_from_env() {
3922 Ok(Some(_)) => ("enabled", None),
3923 Ok(None) => ("disabled", None),
3924 Err(err) => ("error", Some(err)),
3925 }
3926 }
3927
3928 pub fn replica_apply_health(&self) -> Option<String> {
3934 let state = self.config_string("red.replication.state", "");
3935 if state.is_empty() {
3936 None
3937 } else {
3938 Some(state)
3939 }
3940 }
3941
3942 pub fn wal_archive_progress(&self) -> (u64, u64) {
3947 let current_lsn = self
3948 .inner
3949 .db
3950 .replication
3951 .as_ref()
3952 .map(|repl| {
3953 repl.logical_wal_spool
3954 .as_ref()
3955 .map(|spool| spool.current_lsn())
3956 .unwrap_or_else(|| repl.wal_buffer.current_lsn())
3957 })
3958 .unwrap_or_else(|| self.inner.cdc.current_lsn());
3959 let last_archived_lsn = self.config_u64("red.config.timeline.last_archived_lsn", 0);
3960 (current_lsn, last_archived_lsn)
3961 }
3962
3963 pub fn trigger_backup(&self) -> RedDBResult<crate::replication::scheduler::BackupResult> {
3965 self.check_write(crate::runtime::write_gate::WriteKind::Backup)?;
3966 self.assert_remote_write_allowed("admin/backup")?;
3971 let started = std::time::Instant::now();
3972 let snapshot = self.create_snapshot()?;
3973 let mut uploaded = false;
3974
3975 if let (Some(backend), Some(path)) = (&self.inner.db.remote_backend, self.inner.db.path()) {
3976 let default_snapshot_prefix = self.inner.db.options().default_snapshot_prefix();
3977 let default_wal_prefix = self.inner.db.options().default_wal_archive_prefix();
3978 let default_head_key = self.inner.db.options().default_backup_head_key();
3979 let snapshot_prefix = self.config_string(
3980 "red.config.backup.snapshot_prefix",
3981 &default_snapshot_prefix,
3982 );
3983 let wal_prefix =
3984 self.config_string("red.config.wal.archive.prefix", &default_wal_prefix);
3985 let head_key = self.config_string("red.config.backup.head_key", &default_head_key);
3986 let timeline_id = self.config_string("red.config.timeline.id", "main");
3987 let snapshot_key = crate::storage::wal::archive_snapshot(
3988 backend.as_ref(),
3989 path,
3990 snapshot.snapshot_id,
3991 &snapshot_prefix,
3992 )
3993 .map_err(|err| RedDBError::Internal(err.to_string()))?;
3994 let current_lsn = self
3995 .inner
3996 .db
3997 .replication
3998 .as_ref()
3999 .map(|repl| {
4000 repl.logical_wal_spool
4001 .as_ref()
4002 .map(|spool| spool.current_lsn())
4003 .unwrap_or_else(|| repl.wal_buffer.current_lsn())
4004 })
4005 .unwrap_or_else(|| self.inner.cdc.current_lsn());
4006 let last_archived_lsn = self.config_u64("red.config.timeline.last_archived_lsn", 0);
4007 let snapshot_sha256 =
4013 crate::storage::wal::SnapshotManifest::compute_snapshot_sha256(path)
4014 .map_err(|err| {
4015 tracing::warn!(
4016 target: "reddb::backup",
4017 error = %err,
4018 snapshot_id = snapshot.snapshot_id,
4019 "snapshot hash failed; manifest will lack checksum"
4020 );
4021 })
4022 .ok();
4023 let manifest = crate::storage::wal::SnapshotManifest {
4024 timeline_id: timeline_id.clone(),
4025 snapshot_key: snapshot_key.clone(),
4026 snapshot_id: snapshot.snapshot_id,
4027 snapshot_time: snapshot.created_at_unix_ms as u64,
4028 base_lsn: current_lsn,
4029 schema_version: crate::api::REDDB_FORMAT_VERSION,
4030 format_version: crate::api::REDDB_FORMAT_VERSION,
4031 snapshot_sha256,
4032 };
4033 crate::storage::wal::publish_snapshot_manifest(backend.as_ref(), &manifest)
4034 .map_err(|err| RedDBError::Internal(err.to_string()))?;
4035
4036 let prev_segment_hash = self.config_string("red.config.timeline.last_segment_hash", "");
4043 let prev_hash_arg = if prev_segment_hash.is_empty() {
4044 None
4045 } else {
4046 Some(prev_segment_hash)
4047 };
4048
4049 let archived_lsn = if let Some(primary) = &self.inner.db.replication {
4050 let oldest = primary
4051 .logical_wal_spool
4052 .as_ref()
4053 .and_then(|spool| spool.oldest_lsn().ok().flatten())
4054 .or_else(|| primary.wal_buffer.oldest_lsn())
4055 .unwrap_or(last_archived_lsn);
4056 if last_archived_lsn > 0 && last_archived_lsn < oldest.saturating_sub(1) {
4057 return Err(RedDBError::Internal(format!(
4058 "logical WAL gap detected: last_archived_lsn={last_archived_lsn}, oldest_available_lsn={oldest}"
4059 )));
4060 }
4061 let records = if let Some(spool) = &primary.logical_wal_spool {
4062 spool
4063 .read_since(last_archived_lsn, usize::MAX)
4064 .map_err(|err| RedDBError::Internal(err.to_string()))?
4065 } else {
4066 primary.wal_buffer.read_since(last_archived_lsn, usize::MAX)
4067 };
4068 if let Some(meta) = crate::storage::wal::archive_change_records(
4069 backend.as_ref(),
4070 &wal_prefix,
4071 &records,
4072 prev_hash_arg,
4073 )
4074 .map_err(|err| RedDBError::Internal(err.to_string()))?
4075 {
4076 if let Some(spool) = &primary.logical_wal_spool {
4077 let _ = spool.prune_through(meta.lsn_end);
4078 }
4079 if let Some(sha) = &meta.sha256 {
4085 self.inner.db.store().set_config_tree(
4086 "red.config.timeline",
4087 &crate::json!({ "last_segment_hash": sha }),
4088 );
4089 }
4090 meta.lsn_end
4091 } else {
4092 last_archived_lsn
4093 }
4094 } else {
4095 last_archived_lsn
4096 };
4097
4098 let head = crate::storage::wal::BackupHead {
4099 timeline_id,
4100 snapshot_key,
4101 snapshot_id: snapshot.snapshot_id,
4102 snapshot_time: snapshot.created_at_unix_ms as u64,
4103 current_lsn,
4104 last_archived_lsn: archived_lsn,
4105 wal_prefix,
4106 };
4107 crate::storage::wal::publish_backup_head(backend.as_ref(), &head_key, &head)
4108 .map_err(|err| RedDBError::Internal(err.to_string()))?;
4109 self.inner.db.store().set_config_tree(
4110 "red.config.timeline",
4111 &crate::json!({
4112 "last_archived_lsn": archived_lsn,
4113 "id": head.timeline_id
4114 }),
4115 );
4116
4117 if let Err(err) = crate::storage::wal::publish_unified_manifest_for_prefix(
4125 backend.as_ref(),
4126 &snapshot_prefix,
4127 ) {
4128 tracing::warn!(
4129 target: "reddb::backup",
4130 error = %err,
4131 snapshot_prefix = %snapshot_prefix,
4132 "unified MANIFEST.json refresh failed; per-artifact sidecars unaffected"
4133 );
4134 }
4135
4136 match self.commit_policy() {
4148 crate::replication::CommitPolicy::AckN(n) if n > 0 => {
4149 let timeout = std::env::var("RED_REPLICATION_ACK_TIMEOUT_MS")
4150 .ok()
4151 .and_then(|v| v.parse::<u64>().ok())
4152 .unwrap_or(5_000);
4153 let outcome = self.await_replica_acks(
4154 archived_lsn,
4155 n,
4156 std::time::Duration::from_millis(timeout),
4157 );
4158 match outcome {
4159 crate::replication::AwaitOutcome::Reached(count) => {
4160 tracing::debug!(
4161 target: "reddb::backup",
4162 archived_lsn,
4163 n,
4164 count,
4165 "ack_n: replicas synced before backup return"
4166 );
4167 }
4168 crate::replication::AwaitOutcome::TimedOut { observed, required } => {
4169 tracing::warn!(
4170 target: "reddb::backup",
4171 archived_lsn,
4172 observed,
4173 required,
4174 timeout_ms = timeout,
4175 "ack_n: timed out waiting for replicas; backup uploaded but DR posture degraded"
4176 );
4177 }
4178 crate::replication::AwaitOutcome::NotRequired => {}
4179 }
4180 }
4181 _ => {} }
4183
4184 if self.config_bool("red.config.backup.include_blob_cache", false) {
4196 let blob_cache_prefix = self.config_string(
4197 "red.config.backup.blob_cache_prefix",
4198 &format!("{snapshot_prefix}blob_cache/"),
4199 );
4200 if let Some(l2_path) = self.inner.result_blob_cache.l2_path() {
4201 match crate::storage::cache::archive_blob_cache_l2(
4202 backend.as_ref(),
4203 l2_path,
4204 &blob_cache_prefix,
4205 ) {
4206 Ok(count) => {
4207 tracing::info!(
4208 target: "reddb::backup",
4209 files_uploaded = count,
4210 blob_cache_prefix = %blob_cache_prefix,
4211 "include_blob_cache: archived L2 directory"
4212 );
4213 }
4214 Err(err) => {
4215 tracing::warn!(
4216 target: "reddb::backup",
4217 error = %err,
4218 blob_cache_prefix = %blob_cache_prefix,
4219 "include_blob_cache: L2 archive failed; backup proceeding (cache is derived state)"
4220 );
4221 }
4222 }
4223 } else {
4224 tracing::debug!(
4225 target: "reddb::backup",
4226 "include_blob_cache=true but no L2 path configured; nothing to archive"
4227 );
4228 }
4229 }
4230
4231 uploaded = true;
4232 }
4233
4234 Ok(crate::replication::scheduler::BackupResult {
4235 snapshot_id: snapshot.snapshot_id,
4236 uploaded,
4237 duration_ms: started.elapsed().as_millis() as u64,
4238 timestamp: snapshot.created_at_unix_ms as u64,
4239 })
4240 }
4241
4242 pub fn acquire(&self) -> RedDBResult<RuntimeConnection> {
4243 let mut pool = self
4244 .inner
4245 .pool
4246 .lock()
4247 .map_err(|e| RedDBError::Internal(format!("connection pool lock poisoned: {e}")))?;
4248 if pool.active >= self.inner.pool_config.max_connections {
4249 return Err(RedDBError::Internal(
4250 "connection pool exhausted".to_string(),
4251 ));
4252 }
4253
4254 let id = if let Some(id) = pool.idle.pop() {
4255 id
4256 } else {
4257 let id = pool.next_id;
4258 pool.next_id += 1;
4259 id
4260 };
4261 pool.active += 1;
4262 pool.total_checkouts += 1;
4263 drop(pool);
4264
4265 Ok(RuntimeConnection {
4266 id,
4267 inner: Arc::clone(&self.inner),
4268 })
4269 }
4270
4271 pub fn checkpoint(&self) -> RedDBResult<()> {
4272 self.inner.db.flush_local_only().map_err(|err| {
4277 let msg = err.to_string();
4282 crate::telemetry::operator_event::OperatorEvent::CheckpointFailed {
4283 lsn: 0,
4284 error: msg.clone(),
4285 }
4286 .emit_global();
4287 crate::telemetry::operator_event::OperatorEvent::WalFsyncFailed {
4288 path: "<flush_local_only>".to_string(),
4289 error: msg.clone(),
4290 }
4291 .emit_global();
4292 RedDBError::Engine(msg)
4293 })?;
4294 if let Err(err) = self.assert_remote_write_allowed("checkpoint") {
4295 tracing::warn!(
4296 target: "reddb::serverless::lease",
4297 error = %err,
4298 "checkpoint: skipping remote upload — lease not held"
4299 );
4300 return Ok(());
4301 }
4302 self.inner
4303 .db
4304 .upload_to_remote_backend()
4305 .map_err(|err| RedDBError::Engine(err.to_string()))
4306 }
4307
4308 pub(crate) fn assert_remote_write_allowed(&self, action: &str) -> RedDBResult<()> {
4315 if self.inner.db.remote_backend.is_none() {
4316 return Ok(());
4317 }
4318 match self.inner.write_gate.lease_state() {
4319 crate::runtime::write_gate::LeaseGateState::NotHeld => {
4320 self.inner.audit_log.record(
4321 action,
4322 "system",
4323 "remote_backend",
4324 "err: writer lease not held",
4325 crate::json::Value::Null,
4326 );
4327 Err(RedDBError::ReadOnly(format!(
4328 "writer lease not held — {action} blocked (serverless fence)"
4329 )))
4330 }
4331 _ => Ok(()),
4332 }
4333 }
4334
4335 pub fn run_maintenance(&self) -> RedDBResult<()> {
4336 self.inner
4337 .db
4338 .run_maintenance()
4339 .map_err(|err| RedDBError::Internal(err.to_string()))
4340 }
4341
4342 pub fn scan_collection(
4343 &self,
4344 collection: &str,
4345 cursor: Option<ScanCursor>,
4346 limit: usize,
4347 ) -> RedDBResult<ScanPage> {
4348 let store = self.inner.db.store();
4349 let manager = store
4350 .get_collection(collection)
4351 .ok_or_else(|| RedDBError::NotFound(collection.to_string()))?;
4352
4353 let mut entities = manager.query_all(|_| true);
4354 entities.sort_by_key(|entity| entity.id.raw());
4355
4356 let offset = cursor.map(|cursor| cursor.offset).unwrap_or(0);
4357 let total = entities.len();
4358 let end = total.min(offset.saturating_add(limit.max(1)));
4359 let items = if offset >= total {
4360 Vec::new()
4361 } else {
4362 entities[offset..end].to_vec()
4363 };
4364 let next = (end < total).then_some(ScanCursor { offset: end });
4365
4366 Ok(ScanPage {
4367 collection: collection.to_string(),
4368 items,
4369 next,
4370 total,
4371 })
4372 }
4373
4374 pub fn catalog(&self) -> CatalogModelSnapshot {
4375 self.inner.db.catalog_model_snapshot()
4376 }
4377
4378 pub fn catalog_consistency_report(&self) -> crate::catalog::CatalogConsistencyReport {
4379 self.inner.db.catalog_consistency_report()
4380 }
4381
4382 pub fn catalog_attention_summary(&self) -> CatalogAttentionSummary {
4383 crate::catalog::attention_summary(&self.catalog())
4384 }
4385
4386 pub fn collection_attention(&self) -> Vec<CollectionDescriptor> {
4387 crate::catalog::collection_attention(&self.catalog())
4388 }
4389
4390 pub fn index_attention(&self) -> Vec<CatalogIndexStatus> {
4391 crate::catalog::index_attention(&self.catalog())
4392 }
4393
4394 pub fn graph_projection_attention(&self) -> Vec<CatalogGraphProjectionStatus> {
4395 crate::catalog::graph_projection_attention(&self.catalog())
4396 }
4397
4398 pub fn analytics_job_attention(&self) -> Vec<CatalogAnalyticsJobStatus> {
4399 crate::catalog::analytics_job_attention(&self.catalog())
4400 }
4401
4402 pub fn stats(&self) -> RuntimeStats {
4403 let pool = runtime_pool_lock(self);
4404 RuntimeStats {
4405 active_connections: pool.active,
4406 idle_connections: pool.idle.len(),
4407 total_checkouts: pool.total_checkouts,
4408 paged_mode: self.inner.db.is_paged(),
4409 started_at_unix_ms: self.inner.started_at_unix_ms,
4410 store: self.inner.db.stats(),
4411 system: SystemInfo::collect(),
4412 result_blob_cache: self.inner.result_blob_cache.stats(),
4413 kv: self.inner.kv_stats.snapshot(),
4414 }
4415 }
4416
4417 pub fn execute_query_with_scope(
4431 &self,
4432 query: &str,
4433 scope: crate::runtime::within_clause::ScopeOverride,
4434 ) -> RedDBResult<RuntimeQueryResult> {
4435 if scope.is_empty() {
4436 return self.execute_query(query);
4437 }
4438 let _scope_guard = ScopeOverrideGuard::install(scope);
4439 self.execute_query(query)
4440 }
4441
4442 pub fn execute_query(&self, query: &str) -> RedDBResult<RuntimeQueryResult> {
4451 let started = std::time::Instant::now();
4452 let result = self.execute_query_inner(query);
4453 let elapsed_ms = started.elapsed().as_millis() as u64;
4454
4455 let scope = self.ai_scope();
4460 let kind = match result
4461 .as_ref()
4462 .map(|r| r.statement_type)
4463 .unwrap_or("select")
4464 {
4465 "select" => crate::telemetry::slow_query_logger::QueryKind::Select,
4466 "insert" => crate::telemetry::slow_query_logger::QueryKind::Insert,
4467 "update" => crate::telemetry::slow_query_logger::QueryKind::Update,
4468 "delete" => crate::telemetry::slow_query_logger::QueryKind::Delete,
4469 _ => crate::telemetry::slow_query_logger::QueryKind::Internal,
4470 };
4471 self.inner
4477 .slow_query_logger
4478 .record(kind, elapsed_ms, query.to_string(), &scope);
4479
4480 result
4481 }
4482
4483 #[inline(never)]
4484 fn execute_query_inner(&self, query: &str) -> RedDBResult<RuntimeQueryResult> {
4485 if !has_scope_override_active()
4496 && !query.trim_start().starts_with("WITHIN")
4497 && !query.trim_start().starts_with("within")
4498 && !self
4499 .inner
4500 .tx_contexts
4501 .read()
4502 .contains_key(¤t_connection_id())
4503 {
4504 if let Some(result) = self.try_fast_entity_lookup(query) {
4505 return result;
4506 }
4507 }
4508
4509 match crate::runtime::within_clause::try_strip_within_prefix(query) {
4516 Ok(Some((scope, inner))) => {
4517 let _scope_guard = ScopeOverrideGuard::install(scope);
4518 return self.execute_query_inner(inner);
4523 }
4524 Ok(None) => {}
4525 Err(msg) => return Err(RedDBError::Query(msg)),
4526 }
4527
4528 if let Some(inner) = strip_explain_prefix(query) {
4535 return self.explain_as_rows(query, inner);
4536 }
4537
4538 if let Some(value) = parse_set_local_tenant(query)? {
4543 let conn_id = current_connection_id();
4544 if !self.inner.tx_contexts.read().contains_key(&conn_id) {
4545 return Err(RedDBError::Query(
4546 "SET LOCAL TENANT requires an active transaction".to_string(),
4547 ));
4548 }
4549 self.inner
4550 .tx_local_tenants
4551 .write()
4552 .insert(conn_id, value.clone());
4553 return Ok(RuntimeQueryResult::ok_message(
4554 query.to_string(),
4555 &match &value {
4556 Some(id) => format!("local tenant set: {id}"),
4557 None => "local tenant cleared".to_string(),
4558 },
4559 "set_local_tenant",
4560 ));
4561 }
4562
4563 if super::red_schema::is_system_schema_write(query) {
4564 return Err(RedDBError::Query(
4565 super::red_schema::READ_ONLY_ERROR.to_string(),
4566 ));
4567 }
4568
4569 let rewritten_query = super::red_schema::rewrite_virtual_names(query);
4570 let execution_query = rewritten_query.as_deref().unwrap_or(query);
4571
4572 let frame = super::statement_frame::StatementExecutionFrame::build(self, execution_query)?;
4573 let _frame_guards = frame.install(self);
4574
4575 let _log_span = crate::telemetry::span::query_span(query).entered();
4582
4583 if let Some(rewritten) = frame.prepare_cte(execution_query)? {
4585 return self.execute_query_expr(rewritten);
4586 }
4587
4588 if let Some(result) = self.try_fast_entity_lookup(execution_query) {
4590 return result;
4591 }
4592
4593 if let Some(result) = frame.read_result_cache(self) {
4595 return Ok(result);
4596 }
4597
4598 let prepared = frame.prepare_statement(self, execution_query)?;
4599 let mode = prepared.mode;
4600 let expr = prepared.expr;
4601
4602 let statement = query_expr_name(&expr);
4603 let result_cache_scopes = query_expr_result_cache_scopes(&expr);
4604
4605 let _lock_guard = frame.prepare_dispatch(self, &expr)?;
4606 let frame_iface: &dyn super::statement_frame::ReadFrame = &frame;
4607
4608 let query_result = match expr {
4609 QueryExpr::Graph(_) | QueryExpr::Path(_) => {
4610 let (graph, node_properties) = self.materialize_graph_with_rls()?;
4618 let result =
4619 crate::storage::query::unified::UnifiedExecutor::execute_on_with_node_properties(
4620 &graph,
4621 &expr,
4622 node_properties,
4623 )
4624 .map_err(|err| RedDBError::Query(err.to_string()))?;
4625
4626 Ok(RuntimeQueryResult {
4627 query: query.to_string(),
4628 mode,
4629 statement,
4630 engine: "materialized-graph",
4631 result,
4632 affected_rows: 0,
4633 statement_type: "select",
4634 })
4635 }
4636 QueryExpr::Table(table) => {
4637 let table = self.resolve_table_expr_subqueries(
4638 table,
4639 &frame as &dyn super::statement_frame::ReadFrame,
4640 )?;
4641 if super::red_schema::is_virtual_table(&table.table) {
4642 return Ok(RuntimeQueryResult {
4643 query: query.to_string(),
4644 mode,
4645 statement,
4646 engine: "runtime-red-schema",
4647 result: super::red_schema::red_query(
4648 self,
4649 &table.table,
4650 &table,
4651 &frame as &dyn super::statement_frame::ReadFrame,
4652 )?,
4653 affected_rows: 0,
4654 statement_type: "select",
4655 });
4656 }
4657
4658 if let Some(result) = self.execute_probabilistic_select(&table)? {
4659 return Ok(RuntimeQueryResult {
4660 query: query.to_string(),
4661 mode,
4662 statement,
4663 engine: "runtime-probabilistic",
4664 result,
4665 affected_rows: 0,
4666 statement_type: "select",
4667 });
4668 }
4669
4670 if self.inner.foreign_tables.is_foreign_table(&table.table) {
4678 let records = self
4679 .inner
4680 .foreign_tables
4681 .scan(&table.table)
4682 .map_err(|e| RedDBError::Internal(e.to_string()))?;
4683 let result = apply_foreign_table_filters(records, &table);
4684 return Ok(RuntimeQueryResult {
4685 query: query.to_string(),
4686 mode,
4687 statement,
4688 engine: "runtime-fdw",
4689 result,
4690 affected_rows: 0,
4691 statement_type: "select",
4692 });
4693 }
4694
4695 let Some(table_with_rls) = self.authorize_relational_table_select(
4712 table,
4713 &frame as &dyn super::statement_frame::ReadFrame,
4714 )?
4715 else {
4716 let empty = crate::storage::query::unified::UnifiedResult::empty();
4717 return Ok(RuntimeQueryResult {
4718 query: query.to_string(),
4719 mode,
4720 statement,
4721 engine: "runtime-table-rls",
4722 result: empty,
4723 affected_rows: 0,
4724 statement_type: "select",
4725 });
4726 };
4727 Ok(RuntimeQueryResult {
4728 query: query.to_string(),
4729 mode,
4730 statement,
4731 engine: "runtime-table",
4732 result: execute_runtime_table_query(
4733 &self.inner.db,
4734 &table_with_rls,
4735 Some(&self.inner.index_store),
4736 )?,
4737 affected_rows: 0,
4738 statement_type: "select",
4739 })
4740 }
4741 QueryExpr::Join(join) => {
4742 let join_with_rls = match self.authorize_relational_join_select(
4751 join,
4752 &frame as &dyn super::statement_frame::ReadFrame,
4753 )? {
4754 Some(j) => j,
4755 None => {
4756 return Ok(RuntimeQueryResult {
4757 query: query.to_string(),
4758 mode,
4759 statement,
4760 engine: "runtime-join-rls",
4761 result: crate::storage::query::unified::UnifiedResult::empty(),
4762 affected_rows: 0,
4763 statement_type: "select",
4764 });
4765 }
4766 };
4767 Ok(RuntimeQueryResult {
4768 query: query.to_string(),
4769 mode,
4770 statement,
4771 engine: "runtime-join",
4772 result: execute_runtime_join_query(&self.inner.db, &join_with_rls)?,
4773 affected_rows: 0,
4774 statement_type: "select",
4775 })
4776 }
4777 QueryExpr::Vector(vector) => Ok(RuntimeQueryResult {
4778 query: query.to_string(),
4779 mode,
4780 statement,
4781 engine: "runtime-vector",
4782 result: execute_runtime_vector_query(&self.inner.db, &vector)?,
4783 affected_rows: 0,
4784 statement_type: "select",
4785 }),
4786 QueryExpr::Hybrid(hybrid) => Ok(RuntimeQueryResult {
4787 query: query.to_string(),
4788 mode,
4789 statement,
4790 engine: "runtime-hybrid",
4791 result: execute_runtime_hybrid_query(&self.inner.db, &hybrid)?,
4792 affected_rows: 0,
4793 statement_type: "select",
4794 }),
4795 QueryExpr::Insert(ref insert) if super::red_schema::is_virtual_table(&insert.table) => {
4797 Err(RedDBError::Query(
4798 super::red_schema::READ_ONLY_ERROR.to_string(),
4799 ))
4800 }
4801 QueryExpr::Update(ref update) if super::red_schema::is_virtual_table(&update.table) => {
4802 Err(RedDBError::Query(
4803 super::red_schema::READ_ONLY_ERROR.to_string(),
4804 ))
4805 }
4806 QueryExpr::Delete(ref delete) if super::red_schema::is_virtual_table(&delete.table) => {
4807 Err(RedDBError::Query(
4808 super::red_schema::READ_ONLY_ERROR.to_string(),
4809 ))
4810 }
4811 QueryExpr::Insert(ref insert) => {
4812 self.with_deferred_store_wal_if_transaction(|| self.execute_insert(query, insert))
4813 }
4814 QueryExpr::Update(ref update) => {
4815 self.with_deferred_store_wal_if_transaction(|| self.execute_update(query, update))
4816 }
4817 QueryExpr::Delete(ref delete) => {
4818 self.with_deferred_store_wal_if_transaction(|| self.execute_delete(query, delete))
4819 }
4820 QueryExpr::CreateTable(ref create) => self.execute_create_table(query, create),
4822 QueryExpr::CreateCollection(ref create) => {
4823 self.execute_create_collection(query, create)
4824 }
4825 QueryExpr::CreateVector(ref create) => self.execute_create_vector(query, create),
4826 QueryExpr::DropTable(ref drop_tbl) => self.execute_drop_table(query, drop_tbl),
4827 QueryExpr::DropGraph(ref drop_graph) => self.execute_drop_graph(query, drop_graph),
4828 QueryExpr::DropVector(ref drop_vector) => self.execute_drop_vector(query, drop_vector),
4829 QueryExpr::DropDocument(ref drop_document) => {
4830 self.execute_drop_document(query, drop_document)
4831 }
4832 QueryExpr::DropKv(ref drop_kv) => self.execute_drop_kv(query, drop_kv),
4833 QueryExpr::DropCollection(ref drop_collection) => {
4834 self.execute_drop_collection(query, drop_collection)
4835 }
4836 QueryExpr::Truncate(ref truncate) => self.execute_truncate(query, truncate),
4837 QueryExpr::AlterTable(ref alter) => self.execute_alter_table(query, alter),
4838 QueryExpr::ExplainAlter(ref explain) => self.execute_explain_alter(query, explain),
4839 QueryExpr::GraphCommand(ref cmd) => self.execute_graph_command(query, cmd),
4841 QueryExpr::SearchCommand(ref cmd) => self.execute_search_command(query, cmd),
4843 QueryExpr::Ask(ref ask) => self.execute_ask(query, ask),
4845 QueryExpr::CreateIndex(ref create_idx) => self.execute_create_index(query, create_idx),
4846 QueryExpr::DropIndex(ref drop_idx) => self.execute_drop_index(query, drop_idx),
4847 QueryExpr::ProbabilisticCommand(ref cmd) => {
4848 self.execute_probabilistic_command(query, cmd)
4849 }
4850 QueryExpr::CreateTimeSeries(ref ts) => self.execute_create_timeseries(query, ts),
4852 QueryExpr::DropTimeSeries(ref ts) => self.execute_drop_timeseries(query, ts),
4853 QueryExpr::CreateQueue(ref q) => self.execute_create_queue(query, q),
4855 QueryExpr::AlterQueue(ref q) => self.execute_alter_queue(query, q),
4856 QueryExpr::DropQueue(ref q) => self.execute_drop_queue(query, q),
4857 QueryExpr::QueueSelect(ref q) => self.execute_queue_select(query, q),
4858 QueryExpr::QueueCommand(ref cmd) => self.execute_queue_command(query, cmd),
4859 QueryExpr::EventsBackfill(ref backfill) => {
4860 self.execute_events_backfill(query, backfill)
4861 }
4862 QueryExpr::EventsBackfillStatus { ref collection } => Err(RedDBError::Query(format!(
4863 "EVENTS BACKFILL STATUS for '{collection}' is not implemented in this slice"
4864 ))),
4865 QueryExpr::KvCommand(ref cmd) => self.execute_kv_command(query, cmd),
4866 QueryExpr::ConfigCommand(ref cmd) => self.execute_config_command(query, cmd),
4867 QueryExpr::CreateTree(ref tree) => self.execute_create_tree(query, tree),
4868 QueryExpr::DropTree(ref tree) => self.execute_drop_tree(query, tree),
4869 QueryExpr::TreeCommand(ref cmd) => self.execute_tree_command(query, cmd),
4870 QueryExpr::SetConfig { ref key, ref value } => {
4872 if key.starts_with("red.secret.") {
4873 return Err(RedDBError::Query(
4874 "red.secret.* is reserved for vault secrets; use SET SECRET".to_string(),
4875 ));
4876 }
4877 let store = self.inner.db.store();
4878 let json_val = match value {
4879 Value::Text(s) => crate::serde_json::Value::String(s.to_string()),
4880 Value::Integer(n) => crate::serde_json::Value::Number(*n as f64),
4881 Value::Float(n) => crate::serde_json::Value::Number(*n),
4882 Value::Boolean(b) => crate::serde_json::Value::Bool(*b),
4883 _ => crate::serde_json::Value::String(value.to_string()),
4884 };
4885 store.set_config_tree(key, &json_val);
4886 update_current_config_value(key, value.clone());
4887 self.invalidate_result_cache();
4892 Ok(RuntimeQueryResult::ok_message(
4893 query.to_string(),
4894 &format!("config set: {key}"),
4895 "set",
4896 ))
4897 }
4898 QueryExpr::SetSecret { ref key, ref value } => {
4900 if key.starts_with("red.config.") {
4901 return Err(RedDBError::Query(
4902 "red.config.* is reserved for config; use SET CONFIG".to_string(),
4903 ));
4904 }
4905 let auth_store = self.inner.auth_store.read().clone().ok_or_else(|| {
4906 RedDBError::Query("SET SECRET requires an enabled, unsealed vault".to_string())
4907 })?;
4908 if matches!(value, Value::Null) {
4909 auth_store
4910 .vault_kv_try_delete(key)
4911 .map_err(|err| RedDBError::Query(err.to_string()))?;
4912 update_current_secret_value(key, None);
4913 self.invalidate_result_cache();
4914 return Ok(RuntimeQueryResult::ok_message(
4915 query.to_string(),
4916 &format!("secret deleted: {key}"),
4917 "delete_secret",
4918 ));
4919 }
4920 let value = secret_sql_value_to_string(value)?;
4921 auth_store
4922 .vault_kv_try_set(key.clone(), value.clone())
4923 .map_err(|err| RedDBError::Query(err.to_string()))?;
4924 update_current_secret_value(key, Some(value));
4925 self.invalidate_result_cache();
4926 Ok(RuntimeQueryResult::ok_message(
4927 query.to_string(),
4928 &format!("secret set: {key}"),
4929 "set_secret",
4930 ))
4931 }
4932 QueryExpr::DeleteSecret { ref key } => {
4934 let auth_store = self.inner.auth_store.read().clone().ok_or_else(|| {
4935 RedDBError::Query(
4936 "DELETE SECRET requires an enabled, unsealed vault".to_string(),
4937 )
4938 })?;
4939 let deleted = auth_store
4940 .vault_kv_try_delete(key)
4941 .map_err(|err| RedDBError::Query(err.to_string()))?;
4942 if deleted {
4943 update_current_secret_value(key, None);
4944 }
4945 self.invalidate_result_cache();
4946 Ok(RuntimeQueryResult::ok_message(
4947 query.to_string(),
4948 &format!("secret deleted: {key}"),
4949 if deleted {
4950 "delete_secret"
4951 } else {
4952 "delete_secret_not_found"
4953 },
4954 ))
4955 }
4956 QueryExpr::ShowSecrets { ref prefix } => {
4958 let auth_store = self.inner.auth_store.read().clone().ok_or_else(|| {
4959 RedDBError::Query("SHOW SECRET requires an enabled, unsealed vault".to_string())
4960 })?;
4961 if !auth_store.is_vault_backed() {
4962 return Err(RedDBError::Query(
4963 "SHOW SECRET requires an enabled, unsealed vault".to_string(),
4964 ));
4965 }
4966 let mut keys = auth_store.vault_kv_keys();
4967 keys.sort();
4968 let mut result = UnifiedResult::with_columns(vec![
4969 "key".into(),
4970 "value".into(),
4971 "status".into(),
4972 ]);
4973 for key in keys {
4974 if let Some(ref pfx) = prefix {
4975 if !key.starts_with(pfx) {
4976 continue;
4977 }
4978 }
4979 let mut record = UnifiedRecord::new();
4980 record.set("key", Value::text(key));
4981 record.set("value", Value::text("***"));
4982 record.set("status", Value::text("active"));
4983 result.push(record);
4984 }
4985 Ok(RuntimeQueryResult {
4986 query: query.to_string(),
4987 mode,
4988 statement: "show_secrets",
4989 engine: "runtime-secret",
4990 result,
4991 affected_rows: 0,
4992 statement_type: "select",
4993 })
4994 }
4995 QueryExpr::ShowConfig { ref prefix } => {
4997 let store = self.inner.db.store();
4998 let all_collections = store.list_collections();
4999 if !all_collections.contains(&"red_config".to_string()) {
5000 let result = UnifiedResult::with_columns(vec!["key".into(), "value".into()]);
5001 return Ok(RuntimeQueryResult {
5002 query: query.to_string(),
5003 mode,
5004 statement: "show_config",
5005 engine: "runtime-config",
5006 result,
5007 affected_rows: 0,
5008 statement_type: "select",
5009 });
5010 }
5011 let manager = store
5012 .get_collection("red_config")
5013 .ok_or_else(|| RedDBError::NotFound("red_config".to_string()))?;
5014 let entities = manager.query_all(|_| true);
5015 let mut latest = std::collections::BTreeMap::<String, (u64, Value, Value)>::new();
5016 for entity in entities {
5017 if let EntityData::Row(ref row) = entity.data {
5018 if let Some(ref named) = row.named {
5019 let key_val = named.get("key").cloned().unwrap_or(Value::Null);
5020 let val = named.get("value").cloned().unwrap_or(Value::Null);
5021 let key_str = match &key_val {
5022 Value::Text(s) => s.as_ref(),
5023 _ => continue,
5024 };
5025 if let Some(ref pfx) = prefix {
5026 if !key_str.starts_with(pfx.as_str()) {
5027 continue;
5028 }
5029 }
5030 let entity_id = entity.id.raw();
5031 match latest.get(key_str) {
5032 Some((prev_id, _, _)) if *prev_id > entity_id => {}
5033 _ => {
5034 latest.insert(key_str.to_string(), (entity_id, key_val, val));
5035 }
5036 }
5037 }
5038 }
5039 }
5040 let mut result = UnifiedResult::with_columns(vec!["key".into(), "value".into()]);
5041 for (_, key_val, val) in latest.into_values() {
5042 let mut record = UnifiedRecord::new();
5043 record.set("key", key_val);
5044 record.set("value", val);
5045 result.push(record);
5046 }
5047 Ok(RuntimeQueryResult {
5048 query: query.to_string(),
5049 mode,
5050 statement: "show_config",
5051 engine: "runtime-config",
5052 result,
5053 affected_rows: 0,
5054 statement_type: "select",
5055 })
5056 }
5057 QueryExpr::SetTenant(ref value) => {
5063 match value {
5064 Some(id) => set_current_tenant(id.clone()),
5065 None => clear_current_tenant(),
5066 }
5067 Ok(RuntimeQueryResult::ok_message(
5068 query.to_string(),
5069 &match value {
5070 Some(id) => format!("tenant set: {id}"),
5071 None => "tenant cleared".to_string(),
5072 },
5073 "set_tenant",
5074 ))
5075 }
5076 QueryExpr::ShowTenant => {
5077 let mut result = UnifiedResult::with_columns(vec!["tenant".into()]);
5078 let mut record = UnifiedRecord::new();
5079 record.set(
5080 "tenant",
5081 current_tenant().map(Value::text).unwrap_or(Value::Null),
5082 );
5083 result.push(record);
5084 Ok(RuntimeQueryResult {
5085 query: query.to_string(),
5086 mode,
5087 statement: "show_tenant",
5088 engine: "runtime-tenant",
5089 result,
5090 affected_rows: 0,
5091 statement_type: "select",
5092 })
5093 }
5094 QueryExpr::TransactionControl(ref ctl) => {
5106 use crate::storage::query::ast::TxnControl;
5107 use crate::storage::transaction::snapshot::{TxnContext, Xid};
5108 use crate::storage::transaction::IsolationLevel;
5109
5110 let conn_id = current_connection_id();
5115
5116 let (kind, msg) = match ctl {
5117 TxnControl::Begin => {
5118 let mgr = Arc::clone(&self.inner.snapshot_manager);
5119 let xid = mgr.begin();
5120 let snapshot = mgr.snapshot(xid);
5121 let ctx = TxnContext {
5122 xid,
5123 isolation: IsolationLevel::SnapshotIsolation,
5124 snapshot,
5125 savepoints: Vec::new(),
5126 released_sub_xids: Vec::new(),
5127 };
5128 self.inner.tx_contexts.write().insert(conn_id, ctx);
5129 ("begin", format!("BEGIN — xid={xid} (snapshot isolation)"))
5130 }
5131 TxnControl::Commit => {
5132 self.inner.tx_local_tenants.write().remove(&conn_id);
5134 let ctx = self.inner.tx_contexts.write().remove(&conn_id);
5135 match ctx {
5136 Some(ctx) => {
5137 let mut own_xids = std::collections::HashSet::new();
5138 own_xids.insert(ctx.xid);
5139 for (_, sub) in &ctx.savepoints {
5140 own_xids.insert(*sub);
5141 }
5142 for sub in &ctx.released_sub_xids {
5143 own_xids.insert(*sub);
5144 }
5145 if let Err(err) = self.check_table_row_write_conflicts(
5146 conn_id,
5147 &ctx.snapshot,
5148 &own_xids,
5149 ) {
5150 for (_, sub) in &ctx.savepoints {
5151 self.inner.snapshot_manager.rollback(*sub);
5152 }
5153 for sub in &ctx.released_sub_xids {
5154 self.inner.snapshot_manager.rollback(*sub);
5155 }
5156 self.inner.snapshot_manager.rollback(ctx.xid);
5157 self.revive_pending_versioned_updates(conn_id);
5158 self.revive_pending_tombstones(conn_id);
5159 self.discard_pending_kv_watch_events(conn_id);
5160 self.discard_pending_store_wal_actions(conn_id);
5161 return Err(err);
5162 }
5163 self.restore_pending_write_stamps(conn_id);
5164 if let Err(err) = self.flush_pending_store_wal_actions(conn_id) {
5165 for (_, sub) in &ctx.savepoints {
5166 self.inner.snapshot_manager.rollback(*sub);
5167 }
5168 for sub in &ctx.released_sub_xids {
5169 self.inner.snapshot_manager.rollback(*sub);
5170 }
5171 self.inner.snapshot_manager.rollback(ctx.xid);
5172 self.revive_pending_versioned_updates(conn_id);
5173 self.revive_pending_tombstones(conn_id);
5174 self.discard_pending_kv_watch_events(conn_id);
5175 return Err(err);
5176 }
5177 for (_, sub) in &ctx.savepoints {
5183 self.inner.snapshot_manager.commit(*sub);
5184 }
5185 for sub in &ctx.released_sub_xids {
5186 self.inner.snapshot_manager.commit(*sub);
5187 }
5188 self.inner.snapshot_manager.commit(ctx.xid);
5189 self.finalize_pending_versioned_updates(conn_id);
5190 self.finalize_pending_tombstones(conn_id);
5191 self.finalize_pending_kv_watch_events(conn_id);
5192 ("commit", format!("COMMIT — xid={} committed", ctx.xid))
5193 }
5194 None => (
5195 "commit",
5196 "COMMIT outside transaction — no-op (autocommit)".to_string(),
5197 ),
5198 }
5199 }
5200 TxnControl::Rollback => {
5201 self.inner.tx_local_tenants.write().remove(&conn_id);
5202 let ctx = self.inner.tx_contexts.write().remove(&conn_id);
5203 match ctx {
5204 Some(ctx) => {
5205 for (_, sub) in &ctx.savepoints {
5208 self.inner.snapshot_manager.rollback(*sub);
5209 }
5210 for sub in &ctx.released_sub_xids {
5211 self.inner.snapshot_manager.rollback(*sub);
5212 }
5213 self.inner.snapshot_manager.rollback(ctx.xid);
5214 self.revive_pending_versioned_updates(conn_id);
5218 self.revive_pending_tombstones(conn_id);
5219 self.discard_pending_kv_watch_events(conn_id);
5220 self.discard_pending_store_wal_actions(conn_id);
5221 ("rollback", format!("ROLLBACK — xid={} aborted", ctx.xid))
5222 }
5223 None => (
5224 "rollback",
5225 "ROLLBACK outside transaction — no-op (autocommit)".to_string(),
5226 ),
5227 }
5228 }
5229 TxnControl::Savepoint(name) => {
5236 let mgr = Arc::clone(&self.inner.snapshot_manager);
5237 let mut guard = self.inner.tx_contexts.write();
5238 match guard.get_mut(&conn_id) {
5239 Some(ctx) => {
5240 let sub = mgr.begin();
5241 ctx.savepoints.push((name.clone(), sub));
5242 ("savepoint", format!("SAVEPOINT {name} — sub_xid={sub}"))
5243 }
5244 None => (
5245 "savepoint",
5246 "SAVEPOINT outside transaction — no-op".to_string(),
5247 ),
5248 }
5249 }
5250 TxnControl::ReleaseSavepoint(name) => {
5251 let mut guard = self.inner.tx_contexts.write();
5252 match guard.get_mut(&conn_id) {
5253 Some(ctx) => {
5254 let pos = ctx
5255 .savepoints
5256 .iter()
5257 .position(|(n, _)| n == name)
5258 .ok_or_else(|| {
5259 RedDBError::Internal(format!(
5260 "savepoint {name} does not exist"
5261 ))
5262 })?;
5263 let released = ctx.savepoints.len() - pos;
5271 let popped: Vec<Xid> = ctx
5272 .savepoints
5273 .split_off(pos)
5274 .into_iter()
5275 .map(|(_, x)| x)
5276 .collect();
5277 ctx.released_sub_xids.extend(popped);
5278 (
5279 "release_savepoint",
5280 format!("RELEASE SAVEPOINT {name} — {released} level(s)"),
5281 )
5282 }
5283 None => (
5284 "release_savepoint",
5285 "RELEASE outside transaction — no-op".to_string(),
5286 ),
5287 }
5288 }
5289 TxnControl::RollbackToSavepoint(name) => {
5290 let mgr = Arc::clone(&self.inner.snapshot_manager);
5291 let drop_result: Option<(Xid, Vec<Xid>)> = {
5296 let mut guard = self.inner.tx_contexts.write();
5297 if let Some(ctx) = guard.get_mut(&conn_id) {
5298 let pos = ctx
5299 .savepoints
5300 .iter()
5301 .position(|(n, _)| n == name)
5302 .ok_or_else(|| {
5303 RedDBError::Internal(format!(
5304 "savepoint {name} does not exist"
5305 ))
5306 })?;
5307 let savepoint_xid = ctx.savepoints[pos].1;
5308 let aborted: Vec<Xid> = ctx
5309 .savepoints
5310 .split_off(pos)
5311 .into_iter()
5312 .map(|(_, x)| x)
5313 .collect();
5314 Some((savepoint_xid, aborted))
5315 } else {
5316 None
5317 }
5318 };
5319
5320 match drop_result {
5321 Some((savepoint_xid, aborted)) => {
5322 for x in &aborted {
5323 mgr.rollback(*x);
5324 }
5325 let reverted_updates =
5326 self.revive_versioned_updates_since(conn_id, savepoint_xid);
5327 let revived = self.revive_tombstones_since(conn_id, savepoint_xid);
5328 (
5329 "rollback_to_savepoint",
5330 format!(
5331 "ROLLBACK TO SAVEPOINT {name} — aborted {} sub_xid(s), reverted {reverted_updates} update(s), revived {revived} tombstone(s)",
5332 aborted.len(),
5333 ),
5334 )
5335 }
5336 None => (
5337 "rollback_to_savepoint",
5338 "ROLLBACK TO outside transaction — no-op".to_string(),
5339 ),
5340 }
5341 }
5342 };
5343 Ok(RuntimeQueryResult::ok_message(
5344 query.to_string(),
5345 &msg,
5346 kind,
5347 ))
5348 }
5349 QueryExpr::CreateSchema(ref q) => {
5362 let store = self.inner.db.store();
5363 let key = format!("schema.{}", q.name);
5364 if store.get_config(&key).is_some() {
5365 if q.if_not_exists {
5366 return Ok(RuntimeQueryResult::ok_message(
5367 query.to_string(),
5368 &format!("schema {} already exists — skipped", q.name),
5369 "create_schema",
5370 ));
5371 }
5372 return Err(RedDBError::Internal(format!(
5373 "schema {} already exists",
5374 q.name
5375 )));
5376 }
5377 store.set_config_tree(&key, &crate::serde_json::Value::Bool(true));
5378 Ok(RuntimeQueryResult::ok_message(
5379 query.to_string(),
5380 &format!("schema {} created", q.name),
5381 "create_schema",
5382 ))
5383 }
5384 QueryExpr::DropSchema(ref q) => {
5385 let store = self.inner.db.store();
5386 let key = format!("schema.{}", q.name);
5387 let existed = store.get_config(&key).is_some();
5388 if !existed && !q.if_exists {
5389 return Err(RedDBError::Internal(format!(
5390 "schema {} does not exist",
5391 q.name
5392 )));
5393 }
5394 store.set_config_tree(&key, &crate::serde_json::Value::Null);
5396 let suffix = if q.cascade {
5397 " (CASCADE accepted — tables untouched)"
5398 } else {
5399 ""
5400 };
5401 Ok(RuntimeQueryResult::ok_message(
5402 query.to_string(),
5403 &format!("schema {} dropped{}", q.name, suffix),
5404 "drop_schema",
5405 ))
5406 }
5407 QueryExpr::CreateSequence(ref q) => {
5408 let store = self.inner.db.store();
5409 let base = format!("sequence.{}", q.name);
5410 let start_key = format!("{base}.start");
5411 let incr_key = format!("{base}.increment");
5412 let curr_key = format!("{base}.current");
5413 if store.get_config(&start_key).is_some() {
5414 if q.if_not_exists {
5415 return Ok(RuntimeQueryResult::ok_message(
5416 query.to_string(),
5417 &format!("sequence {} already exists — skipped", q.name),
5418 "create_sequence",
5419 ));
5420 }
5421 return Err(RedDBError::Internal(format!(
5422 "sequence {} already exists",
5423 q.name
5424 )));
5425 }
5426 let initial_current = q.start - q.increment;
5429 store.set_config_tree(
5430 &start_key,
5431 &crate::serde_json::Value::Number(q.start as f64),
5432 );
5433 store.set_config_tree(
5434 &incr_key,
5435 &crate::serde_json::Value::Number(q.increment as f64),
5436 );
5437 store.set_config_tree(
5438 &curr_key,
5439 &crate::serde_json::Value::Number(initial_current as f64),
5440 );
5441 Ok(RuntimeQueryResult::ok_message(
5442 query.to_string(),
5443 &format!(
5444 "sequence {} created (start={}, increment={})",
5445 q.name, q.start, q.increment
5446 ),
5447 "create_sequence",
5448 ))
5449 }
5450 QueryExpr::DropSequence(ref q) => {
5451 let store = self.inner.db.store();
5452 let base = format!("sequence.{}", q.name);
5453 let existed = store.get_config(&format!("{base}.start")).is_some();
5454 if !existed && !q.if_exists {
5455 return Err(RedDBError::Internal(format!(
5456 "sequence {} does not exist",
5457 q.name
5458 )));
5459 }
5460 for k in ["start", "increment", "current"] {
5461 store.set_config_tree(&format!("{base}.{k}"), &crate::serde_json::Value::Null);
5462 }
5463 Ok(RuntimeQueryResult::ok_message(
5464 query.to_string(),
5465 &format!("sequence {} dropped", q.name),
5466 "drop_sequence",
5467 ))
5468 }
5469 QueryExpr::CreateView(ref q) => {
5479 let mut views = self.inner.views.write();
5480 if views.contains_key(&q.name) && !q.or_replace {
5481 if q.if_not_exists {
5482 return Ok(RuntimeQueryResult::ok_message(
5483 query.to_string(),
5484 &format!("view {} already exists — skipped", q.name),
5485 "create_view",
5486 ));
5487 }
5488 return Err(RedDBError::Internal(format!(
5489 "view {} already exists",
5490 q.name
5491 )));
5492 }
5493 views.insert(q.name.clone(), Arc::new(q.clone()));
5494 drop(views);
5495
5496 if q.materialized {
5498 use crate::storage::cache::result::{MaterializedViewDef, RefreshPolicy};
5499 let def = MaterializedViewDef {
5500 name: q.name.clone(),
5501 query: format!("<parsed view {}>", q.name),
5502 dependencies: collect_table_refs(&q.query),
5503 refresh: RefreshPolicy::Manual,
5504 };
5505 self.inner.materialized_views.write().register(def);
5506 }
5507 self.invalidate_plan_cache();
5512 self.invalidate_result_cache();
5513
5514 Ok(RuntimeQueryResult::ok_message(
5515 query.to_string(),
5516 &format!(
5517 "{}view {} created",
5518 if q.materialized { "materialized " } else { "" },
5519 q.name
5520 ),
5521 "create_view",
5522 ))
5523 }
5524 QueryExpr::DropView(ref q) => {
5525 let mut views = self.inner.views.write();
5526 let existed = views.remove(&q.name).is_some();
5527 drop(views);
5528 if q.materialized || existed {
5529 self.inner.materialized_views.write().remove(&q.name);
5531 }
5532 self.invalidate_plan_cache();
5535 self.invalidate_result_cache();
5536 if !existed && !q.if_exists {
5537 return Err(RedDBError::Internal(format!(
5538 "view {} does not exist",
5539 q.name
5540 )));
5541 }
5542 self.invalidate_plan_cache();
5543 Ok(RuntimeQueryResult::ok_message(
5544 query.to_string(),
5545 &format!("view {} dropped", q.name),
5546 "drop_view",
5547 ))
5548 }
5549 QueryExpr::RefreshMaterializedView(ref q) => {
5550 let view = {
5553 let views = self.inner.views.read();
5554 views.get(&q.name).cloned()
5555 };
5556 let view = match view {
5557 Some(v) => v,
5558 None => {
5559 return Err(RedDBError::Internal(format!(
5560 "view {} does not exist",
5561 q.name
5562 )))
5563 }
5564 };
5565 if !view.materialized {
5566 return Err(RedDBError::Internal(format!(
5567 "view {} is not materialized — REFRESH requires \
5568 CREATE MATERIALIZED VIEW",
5569 q.name
5570 )));
5571 }
5572 let inner_result = self.execute_query_expr((*view.query).clone())?;
5574 let serialized = format!("{:?}", inner_result.result);
5577 self.inner
5578 .materialized_views
5579 .write()
5580 .refresh(&q.name, serialized.into_bytes());
5581 Ok(RuntimeQueryResult::ok_message(
5582 query.to_string(),
5583 &format!("materialized view {} refreshed", q.name),
5584 "refresh_materialized_view",
5585 ))
5586 }
5587 QueryExpr::CreatePolicy(ref q) => {
5594 let key = (q.table.clone(), q.name.clone());
5595 self.inner
5596 .rls_policies
5597 .write()
5598 .insert(key, Arc::new(q.clone()));
5599 self.invalidate_plan_cache();
5600 self.schema_vocabulary_apply(
5604 crate::runtime::schema_vocabulary::DdlEvent::CreatePolicy {
5605 collection: q.table.clone(),
5606 policy: q.name.clone(),
5607 },
5608 );
5609 Ok(RuntimeQueryResult::ok_message(
5610 query.to_string(),
5611 &format!("policy {} on {} created", q.name, q.table),
5612 "create_policy",
5613 ))
5614 }
5615 QueryExpr::DropPolicy(ref q) => {
5616 let removed = self
5617 .inner
5618 .rls_policies
5619 .write()
5620 .remove(&(q.table.clone(), q.name.clone()))
5621 .is_some();
5622 if !removed && !q.if_exists {
5623 return Err(RedDBError::Internal(format!(
5624 "policy {} on {} does not exist",
5625 q.name, q.table
5626 )));
5627 }
5628 self.invalidate_plan_cache();
5629 self.schema_vocabulary_apply(
5632 crate::runtime::schema_vocabulary::DdlEvent::DropPolicy {
5633 collection: q.table.clone(),
5634 policy: q.name.clone(),
5635 },
5636 );
5637 Ok(RuntimeQueryResult::ok_message(
5638 query.to_string(),
5639 &format!("policy {} on {} dropped", q.name, q.table),
5640 "drop_policy",
5641 ))
5642 }
5643 QueryExpr::CreateServer(ref q) => {
5654 use crate::storage::fdw::FdwOptions;
5655 let registry = Arc::clone(&self.inner.foreign_tables);
5656 if registry.server(&q.name).is_some() {
5657 if q.if_not_exists {
5658 return Ok(RuntimeQueryResult::ok_message(
5659 query.to_string(),
5660 &format!("server {} already exists — skipped", q.name),
5661 "create_server",
5662 ));
5663 }
5664 return Err(RedDBError::Internal(format!(
5665 "server {} already exists",
5666 q.name
5667 )));
5668 }
5669 let mut opts = FdwOptions::new();
5670 for (k, v) in &q.options {
5671 opts.values.insert(k.clone(), v.clone());
5672 }
5673 registry
5674 .create_server(&q.name, &q.wrapper, opts)
5675 .map_err(|e| RedDBError::Internal(e.to_string()))?;
5676 Ok(RuntimeQueryResult::ok_message(
5677 query.to_string(),
5678 &format!("server {} created (wrapper {})", q.name, q.wrapper),
5679 "create_server",
5680 ))
5681 }
5682 QueryExpr::DropServer(ref q) => {
5683 let existed = self.inner.foreign_tables.drop_server(&q.name);
5684 if !existed && !q.if_exists {
5685 return Err(RedDBError::Internal(format!(
5686 "server {} does not exist",
5687 q.name
5688 )));
5689 }
5690 Ok(RuntimeQueryResult::ok_message(
5691 query.to_string(),
5692 &format!(
5693 "server {} dropped{}",
5694 q.name,
5695 if q.cascade { " (cascade)" } else { "" }
5696 ),
5697 "drop_server",
5698 ))
5699 }
5700 QueryExpr::CreateForeignTable(ref q) => {
5701 use crate::storage::fdw::{FdwOptions, ForeignColumn, ForeignTable};
5702 let registry = Arc::clone(&self.inner.foreign_tables);
5703 if registry.foreign_table(&q.name).is_some() {
5704 if q.if_not_exists {
5705 return Ok(RuntimeQueryResult::ok_message(
5706 query.to_string(),
5707 &format!("foreign table {} already exists — skipped", q.name),
5708 "create_foreign_table",
5709 ));
5710 }
5711 return Err(RedDBError::Internal(format!(
5712 "foreign table {} already exists",
5713 q.name
5714 )));
5715 }
5716 let mut opts = FdwOptions::new();
5717 for (k, v) in &q.options {
5718 opts.values.insert(k.clone(), v.clone());
5719 }
5720 let columns: Vec<ForeignColumn> = q
5721 .columns
5722 .iter()
5723 .map(|c| ForeignColumn {
5724 name: c.name.clone(),
5725 data_type: c.data_type.clone(),
5726 not_null: c.not_null,
5727 })
5728 .collect();
5729 registry
5730 .create_foreign_table(ForeignTable {
5731 name: q.name.clone(),
5732 server_name: q.server.clone(),
5733 columns,
5734 options: opts,
5735 })
5736 .map_err(|e| RedDBError::Internal(e.to_string()))?;
5737 self.invalidate_plan_cache();
5738 Ok(RuntimeQueryResult::ok_message(
5739 query.to_string(),
5740 &format!("foreign table {} created (server {})", q.name, q.server),
5741 "create_foreign_table",
5742 ))
5743 }
5744 QueryExpr::DropForeignTable(ref q) => {
5745 let existed = self.inner.foreign_tables.drop_foreign_table(&q.name);
5746 if !existed && !q.if_exists {
5747 return Err(RedDBError::Internal(format!(
5748 "foreign table {} does not exist",
5749 q.name
5750 )));
5751 }
5752 self.invalidate_plan_cache();
5753 Ok(RuntimeQueryResult::ok_message(
5754 query.to_string(),
5755 &format!("foreign table {} dropped", q.name),
5756 "drop_foreign_table",
5757 ))
5758 }
5759 QueryExpr::CopyFrom(ref q) => {
5765 use crate::storage::import::{CsvConfig, CsvImporter};
5766 let store = self.inner.db.store();
5767 let cfg = CsvConfig {
5768 collection: q.table.clone(),
5769 has_header: q.has_header,
5770 delimiter: q.delimiter.map(|c| c as u8).unwrap_or(b','),
5771 ..CsvConfig::default()
5772 };
5773 let importer = CsvImporter::new(cfg);
5774 let stats = importer
5775 .import_file(&q.path, store.as_ref())
5776 .map_err(|e| RedDBError::Internal(format!("COPY failed: {e}")))?;
5777 self.note_table_write(&q.table);
5779 Ok(RuntimeQueryResult::ok_message(
5780 query.to_string(),
5781 &format!(
5782 "COPY imported {} rows into {} ({} errors skipped, {}ms)",
5783 stats.records_imported, q.table, stats.errors_skipped, stats.duration_ms
5784 ),
5785 "copy_from",
5786 ))
5787 }
5788 QueryExpr::MaintenanceCommand(ref cmd) => {
5804 use crate::storage::query::ast::MaintenanceCommand as Mc;
5805 let store = self.inner.db.store();
5806 let (kind, msg) = match cmd {
5807 Mc::Analyze { target } => {
5808 let targets: Vec<String> = match target {
5809 Some(t) => vec![t.clone()],
5810 None => store.list_collections(),
5811 };
5812 for t in &targets {
5813 self.refresh_table_planner_stats(t);
5814 }
5815 (
5816 "analyze",
5817 format!("ANALYZE refreshed stats for {} table(s)", targets.len()),
5818 )
5819 }
5820 Mc::Vacuum { target, full } => {
5821 let targets: Vec<String> = match target {
5822 Some(t) => vec![t.clone()],
5823 None => store.list_collections(),
5824 };
5825 let cutoff_xid = self.mvcc_vacuum_cutoff_xid();
5826 let mut vacuum_stats =
5827 crate::storage::unified::store::MvccVacuumStats::default();
5828 for t in &targets {
5829 let stats = store.vacuum_mvcc_history(t, cutoff_xid).map_err(|e| {
5830 RedDBError::Internal(format!(
5831 "VACUUM MVCC history failed for {t}: {e}"
5832 ))
5833 })?;
5834 if stats.reclaimed_versions > 0 {
5835 self.rebuild_runtime_indexes_for_table(t)?;
5836 }
5837 vacuum_stats.add(&stats);
5838 }
5839 self.inner.snapshot_manager.prune_aborted(cutoff_xid);
5840 for t in &targets {
5842 self.refresh_table_planner_stats(t);
5843 }
5844 let persisted = if *full {
5848 match store.persist() {
5849 Ok(()) => true,
5850 Err(e) => {
5851 return Err(RedDBError::Internal(format!(
5852 "VACUUM FULL persist failed: {e:?}"
5853 )));
5854 }
5855 }
5856 } else {
5857 false
5858 };
5859 self.invalidate_result_cache();
5861 (
5862 "vacuum",
5863 format!(
5864 "VACUUM{} processed {} table(s): scanned_versions={}, retained_versions={}, reclaimed_versions={}, retained_history_versions={}, reclaimed_history_versions={}, retained_tombstones={}, reclaimed_tombstones={}{}",
5865 if *full { " FULL" } else { "" },
5866 targets.len(),
5867 vacuum_stats.scanned_versions,
5868 vacuum_stats.retained_versions,
5869 vacuum_stats.reclaimed_versions,
5870 vacuum_stats.retained_history_versions,
5871 vacuum_stats.reclaimed_history_versions,
5872 vacuum_stats.retained_tombstones,
5873 vacuum_stats.reclaimed_tombstones,
5874 if persisted {
5875 " (pages flushed to disk)"
5876 } else {
5877 ""
5878 }
5879 ),
5880 )
5881 }
5882 };
5883 Ok(RuntimeQueryResult::ok_message(
5884 query.to_string(),
5885 &msg,
5886 kind,
5887 ))
5888 }
5889 QueryExpr::Grant(ref g) => self.execute_grant_statement(query, g),
5896 QueryExpr::Revoke(ref r) => self.execute_revoke_statement(query, r),
5897 QueryExpr::AlterUser(ref a) => self.execute_alter_user_statement(query, a),
5898 QueryExpr::CreateIamPolicy { ref id, ref json } => {
5899 self.execute_create_iam_policy(query, id, json)
5900 }
5901 QueryExpr::DropIamPolicy { ref id } => self.execute_drop_iam_policy(query, id),
5902 QueryExpr::AttachPolicy {
5903 ref policy_id,
5904 ref principal,
5905 } => self.execute_attach_policy(query, policy_id, principal),
5906 QueryExpr::DetachPolicy {
5907 ref policy_id,
5908 ref principal,
5909 } => self.execute_detach_policy(query, policy_id, principal),
5910 QueryExpr::ShowPolicies { ref filter } => {
5911 self.execute_show_policies(query, filter.as_ref())
5912 }
5913 QueryExpr::ShowEffectivePermissions {
5914 ref user,
5915 ref resource,
5916 } => self.execute_show_effective_permissions(query, user, resource.as_ref()),
5917 QueryExpr::SimulatePolicy {
5918 ref user,
5919 ref action,
5920 ref resource,
5921 } => self.execute_simulate_policy(query, user, action, resource),
5922 QueryExpr::CreateMigration(ref q) => self.execute_create_migration(query, q),
5923 QueryExpr::ApplyMigration(ref q) => self.execute_apply_migration(query, q),
5924 QueryExpr::RollbackMigration(ref q) => self.execute_rollback_migration(query, q),
5925 QueryExpr::ExplainMigration(ref q) => self.execute_explain_migration(query, q),
5926 };
5927
5928 let mut query_result = query_result;
5932 if let Ok(ref mut result) = query_result {
5933 if result.statement_type == "select" {
5934 self.apply_secret_decryption(result);
5935 }
5936 }
5937
5938 if let Ok(ref result) = query_result {
5945 frame.write_result_cache(self, result, result_cache_scopes);
5946 }
5947
5948 query_result
5949 }
5950
5951 pub fn execute_query_expr(&self, expr: QueryExpr) -> RedDBResult<RuntimeQueryResult> {
5957 let _config_snapshot_guard = ConfigSnapshotGuard::install(Arc::clone(&self.inner.db));
5958 let _secret_store_guard = SecretStoreGuard::install(self.inner.auth_store.read().clone());
5959 let expr = self.rewrite_view_refs(expr);
5963
5964 self.validate_model_operations_before_auth(&expr)?;
5965 if let Err(err) = self.check_query_privilege(&expr) {
5969 return Err(RedDBError::Query(format!("permission denied: {err}")));
5970 }
5971
5972 let statement = query_expr_name(&expr);
5973 let mode = detect_mode(statement);
5974 let query_str = statement;
5975
5976 let result = self.dispatch_expr(expr, query_str, mode)?;
5977 let mut r = result;
5978 if r.statement_type == "select" {
5979 self.apply_secret_decryption(&mut r);
5980 }
5981 Ok(r)
5982 }
5983
5984 pub(super) fn validate_model_operations_before_auth(
5985 &self,
5986 expr: &QueryExpr,
5987 ) -> RedDBResult<()> {
5988 use crate::catalog::CollectionModel;
5989 use crate::runtime::ddl::polymorphic_resolver;
5990 use crate::storage::query::ast::KvCommand;
5991
5992 let system_schema_target = match expr {
5993 QueryExpr::DropTable(q) => Some(q.name.as_str()),
5994 QueryExpr::DropGraph(q) => Some(q.name.as_str()),
5995 QueryExpr::DropVector(q) => Some(q.name.as_str()),
5996 QueryExpr::DropDocument(q) => Some(q.name.as_str()),
5997 QueryExpr::DropKv(q) => Some(q.name.as_str()),
5998 QueryExpr::DropCollection(q) => Some(q.name.as_str()),
5999 QueryExpr::Truncate(q) => Some(q.name.as_str()),
6000 _ => None,
6001 };
6002 if system_schema_target.is_some_and(crate::runtime::impl_ddl::is_system_schema_name) {
6003 return Err(RedDBError::Query("system schema is read-only".to_string()));
6004 }
6005
6006 let expected = match expr {
6007 QueryExpr::DropTable(q) => Some((q.name.as_str(), CollectionModel::Table)),
6008 QueryExpr::DropGraph(q) => Some((q.name.as_str(), CollectionModel::Graph)),
6009 QueryExpr::DropVector(q) => Some((q.name.as_str(), CollectionModel::Vector)),
6010 QueryExpr::DropDocument(q) => Some((q.name.as_str(), CollectionModel::Document)),
6011 QueryExpr::DropKv(q) => Some((q.name.as_str(), q.model)),
6012 QueryExpr::Truncate(q) => q.model.map(|model| (q.name.as_str(), model)),
6013 QueryExpr::KvCommand(cmd) => {
6014 let (collection, model) = match cmd {
6015 KvCommand::Put {
6016 collection, model, ..
6017 }
6018 | KvCommand::Get {
6019 collection, model, ..
6020 }
6021 | KvCommand::Incr {
6022 collection, model, ..
6023 }
6024 | KvCommand::Cas {
6025 collection, model, ..
6026 }
6027 | KvCommand::Delete {
6028 collection, model, ..
6029 } => (collection.as_str(), *model),
6030 KvCommand::Rotate { collection, .. }
6031 | KvCommand::History { collection, .. }
6032 | KvCommand::List { collection, .. }
6033 | KvCommand::Purge { collection, .. } => {
6034 (collection.as_str(), CollectionModel::Vault)
6035 }
6036 KvCommand::InvalidateTags { collection, .. } => {
6037 (collection.as_str(), CollectionModel::Kv)
6038 }
6039 KvCommand::Watch {
6040 collection, model, ..
6041 } => (collection.as_str(), *model),
6042 KvCommand::Unseal { collection, .. } => {
6043 (collection.as_str(), CollectionModel::Vault)
6044 }
6045 };
6046 Some((collection, model))
6047 }
6048 QueryExpr::ConfigCommand(cmd) => {
6049 self.validate_config_command_before_auth(cmd)?;
6050 None
6051 }
6052 _ => None,
6053 };
6054
6055 let Some((name, expected_model)) = expected else {
6056 return Ok(());
6057 };
6058 let snapshot = self.inner.db.catalog_model_snapshot();
6059 let Some(actual_model) = snapshot
6060 .collections
6061 .iter()
6062 .find(|collection| collection.name == name)
6063 .map(|collection| collection.declared_model.unwrap_or(collection.model))
6064 else {
6065 return Ok(());
6066 };
6067 polymorphic_resolver::ensure_model_match(expected_model, actual_model)
6068 }
6069
6070 pub(super) fn rewrite_view_refs(&self, expr: QueryExpr) -> QueryExpr {
6075 if self.inner.views.read().is_empty() {
6077 return expr;
6078 }
6079 self.rewrite_view_refs_inner(expr)
6080 }
6081
6082 fn rewrite_view_refs_inner(&self, expr: QueryExpr) -> QueryExpr {
6083 use crate::storage::query::ast::{Filter, TableSource};
6084 match expr {
6085 QueryExpr::Table(mut tq) => {
6086 if let Some(TableSource::Subquery(body)) = tq.source.take() {
6092 tq.source = Some(TableSource::Subquery(Box::new(
6093 self.rewrite_view_refs_inner(*body),
6094 )));
6095 return QueryExpr::Table(tq);
6096 }
6097
6098 let maybe_view = {
6102 let views = self.inner.views.read();
6103 views.get(&tq.table).cloned()
6104 };
6105 let Some(view) = maybe_view else {
6106 return QueryExpr::Table(tq);
6107 };
6108
6109 let inner_expr = self.rewrite_view_refs_inner((*view.query).clone());
6113
6114 match inner_expr {
6122 QueryExpr::Table(mut inner_tq) => {
6123 if let Some(outer_filter) = tq.filter.take() {
6124 inner_tq.filter = Some(match inner_tq.filter.take() {
6125 Some(existing) => {
6126 Filter::And(Box::new(existing), Box::new(outer_filter))
6127 }
6128 None => outer_filter,
6129 });
6130 }
6131 if let Some(outer_limit) = tq.limit {
6132 inner_tq.limit = Some(match inner_tq.limit {
6133 Some(existing) => existing.min(outer_limit),
6134 None => outer_limit,
6135 });
6136 }
6137 if let Some(outer_offset) = tq.offset {
6138 inner_tq.offset = Some(match inner_tq.offset {
6139 Some(existing) => existing + outer_offset,
6140 None => outer_offset,
6141 });
6142 }
6143 QueryExpr::Table(inner_tq)
6144 }
6145 other => other,
6146 }
6147 }
6148 QueryExpr::Join(mut jq) => {
6149 jq.left = Box::new(self.rewrite_view_refs_inner(*jq.left));
6150 jq.right = Box::new(self.rewrite_view_refs_inner(*jq.right));
6151 QueryExpr::Join(jq)
6152 }
6153 other => other,
6156 }
6157 }
6158
6159 fn authorize_relational_table_select(
6163 &self,
6164 mut table: TableQuery,
6165 frame: &dyn super::statement_frame::ReadFrame,
6166 ) -> RedDBResult<Option<TableQuery>> {
6167 if let Some(TableSource::Subquery(inner)) = table.source.take() {
6168 let authorized_inner = self.authorize_relational_select_expr(*inner, frame)?;
6169 table.source = Some(TableSource::Subquery(Box::new(authorized_inner)));
6170 return Ok(Some(table));
6171 }
6172
6173 self.check_table_column_projection_authz(&table, frame)?;
6174
6175 if self.inner.rls_enabled_tables.read().contains(&table.table) {
6176 return Ok(inject_rls_filters(self, frame, table));
6177 }
6178
6179 Ok(Some(table))
6180 }
6181
6182 fn authorize_relational_join_select(
6183 &self,
6184 mut join: JoinQuery,
6185 frame: &dyn super::statement_frame::ReadFrame,
6186 ) -> RedDBResult<Option<JoinQuery>> {
6187 self.check_join_column_projection_authz(&join, frame)?;
6188 join.left = Box::new(self.authorize_relational_join_child(*join.left, frame)?);
6189 join.right = Box::new(self.authorize_relational_join_child(*join.right, frame)?);
6190 Ok(inject_rls_into_join(self, frame, join))
6191 }
6192
6193 fn authorize_relational_join_child(
6194 &self,
6195 expr: QueryExpr,
6196 frame: &dyn super::statement_frame::ReadFrame,
6197 ) -> RedDBResult<QueryExpr> {
6198 match expr {
6199 QueryExpr::Table(mut table) => {
6200 if let Some(TableSource::Subquery(inner)) = table.source.take() {
6201 let authorized_inner = self.authorize_relational_select_expr(*inner, frame)?;
6202 table.source = Some(TableSource::Subquery(Box::new(authorized_inner)));
6203 }
6204 Ok(QueryExpr::Table(table))
6205 }
6206 QueryExpr::Join(join) => self
6207 .authorize_relational_join_select(join, frame)?
6208 .map(QueryExpr::Join)
6209 .ok_or_else(|| {
6210 RedDBError::Query("permission denied: RLS denied relational subquery".into())
6211 }),
6212 other => Ok(other),
6213 }
6214 }
6215
6216 fn authorize_relational_select_expr(
6217 &self,
6218 expr: QueryExpr,
6219 frame: &dyn super::statement_frame::ReadFrame,
6220 ) -> RedDBResult<QueryExpr> {
6221 match expr {
6222 QueryExpr::Table(table) => self
6223 .authorize_relational_table_select(table, frame)?
6224 .map(QueryExpr::Table)
6225 .ok_or_else(|| {
6226 RedDBError::Query("permission denied: RLS denied relational subquery".into())
6227 }),
6228 QueryExpr::Join(join) => self
6229 .authorize_relational_join_select(join, frame)?
6230 .map(QueryExpr::Join)
6231 .ok_or_else(|| {
6232 RedDBError::Query("permission denied: RLS denied relational subquery".into())
6233 }),
6234 other => Ok(other),
6235 }
6236 }
6237
6238 fn check_table_column_projection_authz(
6239 &self,
6240 table: &TableQuery,
6241 frame: &dyn super::statement_frame::ReadFrame,
6242 ) -> RedDBResult<()> {
6243 let Some((username, role)) = frame.identity() else {
6244 return Ok(());
6245 };
6246 let Some(auth_store) = self.inner.auth_store.read().clone() else {
6247 return Ok(());
6248 };
6249
6250 let columns = self.resolved_table_projection_columns(table)?;
6251 let request = ColumnAccessRequest::select(table.table.clone(), columns);
6252 let principal = UserId::from_parts(frame.effective_scope(), username);
6253 let ctx = runtime_iam_context(role, frame.effective_scope());
6254 let outcome = auth_store.check_column_projection_authz(&principal, &request, &ctx);
6255 if outcome.allowed() {
6256 return Ok(());
6257 }
6258
6259 if let Some(denied) = outcome.first_denied_column() {
6260 return Err(RedDBError::Query(format!(
6261 "permission denied: principal=`{username}` cannot select column `{}`",
6262 denied.resource.name
6263 )));
6264 }
6265 Err(RedDBError::Query(format!(
6266 "permission denied: principal=`{username}` cannot select table `{}`",
6267 table.table
6268 )))
6269 }
6270
6271 fn check_join_column_projection_authz(
6272 &self,
6273 join: &JoinQuery,
6274 frame: &dyn super::statement_frame::ReadFrame,
6275 ) -> RedDBResult<()> {
6276 let mut by_table: HashMap<String, BTreeSet<String>> = HashMap::new();
6277 let projections = crate::storage::query::sql_lowering::effective_join_projections(join);
6278 self.collect_join_projection_columns(join, &projections, &mut by_table)?;
6279
6280 for (table, columns) in by_table {
6281 let query = TableQuery {
6282 table,
6283 source: None,
6284 alias: None,
6285 select_items: Vec::new(),
6286 columns: columns.into_iter().map(Projection::Column).collect(),
6287 where_expr: None,
6288 filter: None,
6289 group_by_exprs: Vec::new(),
6290 group_by: Vec::new(),
6291 having_expr: None,
6292 having: None,
6293 order_by: Vec::new(),
6294 limit: None,
6295 limit_param: None,
6296 offset: None,
6297 offset_param: None,
6298 expand: None,
6299 as_of: None,
6300 };
6301 self.check_table_column_projection_authz(&query, frame)?;
6302 }
6303 Ok(())
6304 }
6305
6306 fn collect_join_projection_columns(
6307 &self,
6308 join: &JoinQuery,
6309 projections: &[Projection],
6310 out: &mut HashMap<String, BTreeSet<String>>,
6311 ) -> RedDBResult<()> {
6312 let left = table_side_context(join.left.as_ref());
6313 let right = table_side_context(join.right.as_ref());
6314
6315 if projections
6316 .iter()
6317 .any(|projection| matches!(projection, Projection::All))
6318 {
6319 for side in [left.as_ref(), right.as_ref()].into_iter().flatten() {
6320 out.entry(side.table.clone())
6321 .or_default()
6322 .extend(self.table_all_projection_columns(&side.table)?);
6323 }
6324 return Ok(());
6325 }
6326
6327 for projection in projections {
6328 collect_projection_columns_for_join_side(
6329 projection,
6330 left.as_ref(),
6331 right.as_ref(),
6332 out,
6333 )?;
6334 }
6335 Ok(())
6336 }
6337
6338 fn resolved_table_projection_columns(&self, table: &TableQuery) -> RedDBResult<Vec<String>> {
6339 let projections = crate::storage::query::sql_lowering::effective_table_projections(table);
6340 if projections
6341 .iter()
6342 .any(|projection| matches!(projection, Projection::All))
6343 {
6344 return self.table_all_projection_columns(&table.table);
6345 }
6346
6347 let mut columns = BTreeSet::new();
6348 for projection in &projections {
6349 collect_projection_columns_for_table(
6350 projection,
6351 &table.table,
6352 table.alias.as_deref(),
6353 &mut columns,
6354 );
6355 }
6356 Ok(columns.into_iter().collect())
6357 }
6358
6359 fn table_all_projection_columns(&self, table: &str) -> RedDBResult<Vec<String>> {
6360 if let Some(contract) = self.inner.db.collection_contract_arc(table) {
6361 let columns: Vec<String> = contract
6362 .declared_columns
6363 .iter()
6364 .map(|column| column.name.clone())
6365 .collect();
6366 if !columns.is_empty() {
6367 return Ok(columns);
6368 }
6369 }
6370
6371 let records = scan_runtime_table_source_records_limited(&self.inner.db, table, Some(1))?;
6372 Ok(records
6373 .first()
6374 .map(|record| {
6375 record
6376 .column_names()
6377 .into_iter()
6378 .map(|column| column.to_string())
6379 .collect()
6380 })
6381 .unwrap_or_default())
6382 }
6383
6384 fn resolve_table_expr_subqueries(
6385 &self,
6386 mut table: TableQuery,
6387 frame: &dyn super::statement_frame::ReadFrame,
6388 ) -> RedDBResult<TableQuery> {
6389 if let Some(TableSource::Subquery(inner)) = table.source.take() {
6390 let inner = self.resolve_select_expr_subqueries(*inner, frame)?;
6391 table.source = Some(TableSource::Subquery(Box::new(inner)));
6392 }
6393
6394 let outer_scopes = relation_scopes_for_query(&QueryExpr::Table(table.clone()));
6395 for item in &mut table.select_items {
6396 if let crate::storage::query::ast::SelectItem::Expr { expr, .. } = item {
6397 *expr = self.resolve_expr_subqueries(expr.clone(), &outer_scopes, frame)?;
6398 }
6399 }
6400 if let Some(where_expr) = table.where_expr.take() {
6401 table.where_expr =
6402 Some(self.resolve_expr_subqueries(where_expr, &outer_scopes, frame)?);
6403 table.filter = None;
6404 }
6405 if let Some(having_expr) = table.having_expr.take() {
6406 table.having_expr =
6407 Some(self.resolve_expr_subqueries(having_expr, &outer_scopes, frame)?);
6408 table.having = None;
6409 }
6410 for expr in &mut table.group_by_exprs {
6411 *expr = self.resolve_expr_subqueries(expr.clone(), &outer_scopes, frame)?;
6412 }
6413 for clause in &mut table.order_by {
6414 if let Some(expr) = clause.expr.take() {
6415 clause.expr = Some(self.resolve_expr_subqueries(expr, &outer_scopes, frame)?);
6416 }
6417 }
6418 Ok(table)
6419 }
6420
6421 fn resolve_select_expr_subqueries(
6422 &self,
6423 expr: QueryExpr,
6424 frame: &dyn super::statement_frame::ReadFrame,
6425 ) -> RedDBResult<QueryExpr> {
6426 match expr {
6427 QueryExpr::Table(table) => self
6428 .resolve_table_expr_subqueries(table, frame)
6429 .map(QueryExpr::Table),
6430 QueryExpr::Join(mut join) => {
6431 join.left = Box::new(self.resolve_select_expr_subqueries(*join.left, frame)?);
6432 join.right = Box::new(self.resolve_select_expr_subqueries(*join.right, frame)?);
6433 Ok(QueryExpr::Join(join))
6434 }
6435 other => Ok(other),
6436 }
6437 }
6438
6439 fn resolve_expr_subqueries(
6440 &self,
6441 expr: crate::storage::query::ast::Expr,
6442 outer_scopes: &[String],
6443 frame: &dyn super::statement_frame::ReadFrame,
6444 ) -> RedDBResult<crate::storage::query::ast::Expr> {
6445 use crate::storage::query::ast::Expr;
6446
6447 match expr {
6448 Expr::Subquery { query, span } => {
6449 let values = self.execute_expr_subquery_values(query, outer_scopes, frame)?;
6450 if values.len() > 1 {
6451 return Err(RedDBError::Query(
6452 "scalar subquery returned more than one row".to_string(),
6453 ));
6454 }
6455 Ok(Expr::Literal {
6456 value: values.into_iter().next().unwrap_or(Value::Null),
6457 span,
6458 })
6459 }
6460 Expr::BinaryOp { op, lhs, rhs, span } => Ok(Expr::BinaryOp {
6461 op,
6462 lhs: Box::new(self.resolve_expr_subqueries(*lhs, outer_scopes, frame)?),
6463 rhs: Box::new(self.resolve_expr_subqueries(*rhs, outer_scopes, frame)?),
6464 span,
6465 }),
6466 Expr::UnaryOp { op, operand, span } => Ok(Expr::UnaryOp {
6467 op,
6468 operand: Box::new(self.resolve_expr_subqueries(*operand, outer_scopes, frame)?),
6469 span,
6470 }),
6471 Expr::Cast {
6472 inner,
6473 target,
6474 span,
6475 } => Ok(Expr::Cast {
6476 inner: Box::new(self.resolve_expr_subqueries(*inner, outer_scopes, frame)?),
6477 target,
6478 span,
6479 }),
6480 Expr::FunctionCall { name, args, span } => {
6481 let args = args
6482 .into_iter()
6483 .map(|arg| self.resolve_expr_subqueries(arg, outer_scopes, frame))
6484 .collect::<RedDBResult<Vec<_>>>()?;
6485 Ok(Expr::FunctionCall { name, args, span })
6486 }
6487 Expr::Case {
6488 branches,
6489 else_,
6490 span,
6491 } => {
6492 let branches = branches
6493 .into_iter()
6494 .map(|(cond, value)| {
6495 Ok((
6496 self.resolve_expr_subqueries(cond, outer_scopes, frame)?,
6497 self.resolve_expr_subqueries(value, outer_scopes, frame)?,
6498 ))
6499 })
6500 .collect::<RedDBResult<Vec<_>>>()?;
6501 let else_ = else_
6502 .map(|expr| self.resolve_expr_subqueries(*expr, outer_scopes, frame))
6503 .transpose()?
6504 .map(Box::new);
6505 Ok(Expr::Case {
6506 branches,
6507 else_,
6508 span,
6509 })
6510 }
6511 Expr::IsNull {
6512 operand,
6513 negated,
6514 span,
6515 } => Ok(Expr::IsNull {
6516 operand: Box::new(self.resolve_expr_subqueries(*operand, outer_scopes, frame)?),
6517 negated,
6518 span,
6519 }),
6520 Expr::InList {
6521 target,
6522 values,
6523 negated,
6524 span,
6525 } => {
6526 let target =
6527 Box::new(self.resolve_expr_subqueries(*target, outer_scopes, frame)?);
6528 let mut resolved = Vec::new();
6529 for value in values {
6530 if let Expr::Subquery { query, .. } = value {
6531 resolved.extend(
6532 self.execute_expr_subquery_values(query, outer_scopes, frame)?
6533 .into_iter()
6534 .map(Expr::lit),
6535 );
6536 } else {
6537 resolved.push(self.resolve_expr_subqueries(value, outer_scopes, frame)?);
6538 }
6539 }
6540 Ok(Expr::InList {
6541 target,
6542 values: resolved,
6543 negated,
6544 span,
6545 })
6546 }
6547 Expr::Between {
6548 target,
6549 low,
6550 high,
6551 negated,
6552 span,
6553 } => Ok(Expr::Between {
6554 target: Box::new(self.resolve_expr_subqueries(*target, outer_scopes, frame)?),
6555 low: Box::new(self.resolve_expr_subqueries(*low, outer_scopes, frame)?),
6556 high: Box::new(self.resolve_expr_subqueries(*high, outer_scopes, frame)?),
6557 negated,
6558 span,
6559 }),
6560 other => Ok(other),
6561 }
6562 }
6563
6564 fn execute_expr_subquery_values(
6565 &self,
6566 subquery: crate::storage::query::ast::ExprSubquery,
6567 outer_scopes: &[String],
6568 frame: &dyn super::statement_frame::ReadFrame,
6569 ) -> RedDBResult<Vec<Value>> {
6570 let query = *subquery.query;
6571 if query_references_outer_scope(&query, outer_scopes) {
6572 return Err(RedDBError::Query(
6573 "NOT_YET_SUPPORTED: correlated subqueries are not supported yet; track follow-up issue #470-correlated-subqueries".to_string(),
6574 ));
6575 }
6576 let query = self.rewrite_view_refs(query);
6577 let query = self.resolve_select_expr_subqueries(query, frame)?;
6578 let query = self.authorize_relational_select_expr(query, frame)?;
6579 let result = match query {
6580 QueryExpr::Table(table) => {
6581 execute_runtime_table_query(&self.inner.db, &table, Some(&self.inner.index_store))?
6582 }
6583 QueryExpr::Join(join) => execute_runtime_join_query(&self.inner.db, &join)?,
6584 other => {
6585 return Err(RedDBError::Query(format!(
6586 "expression subquery must be a SELECT query, got {}",
6587 query_expr_name(&other)
6588 )))
6589 }
6590 };
6591 first_column_values(result)
6592 }
6593
6594 fn dispatch_expr(
6595 &self,
6596 expr: QueryExpr,
6597 query_str: &str,
6598 mode: QueryMode,
6599 ) -> RedDBResult<RuntimeQueryResult> {
6600 let statement = query_expr_name(&expr);
6601 match expr {
6602 QueryExpr::Graph(_) | QueryExpr::Path(_) => {
6603 Err(RedDBError::Query(
6605 "graph queries cannot be used as prepared statements".to_string(),
6606 ))
6607 }
6608 QueryExpr::Table(table) => {
6609 let scope = self.ai_scope();
6610 let table = self.resolve_table_expr_subqueries(
6611 table,
6612 &scope as &dyn super::statement_frame::ReadFrame,
6613 )?;
6614 if super::red_schema::is_virtual_table(&table.table) {
6615 return Ok(RuntimeQueryResult {
6616 query: query_str.to_string(),
6617 mode,
6618 statement,
6619 engine: "runtime-red-schema",
6620 result: super::red_schema::red_query(
6621 self,
6622 &table.table,
6623 &table,
6624 &scope as &dyn super::statement_frame::ReadFrame,
6625 )?,
6626 affected_rows: 0,
6627 statement_type: "select",
6628 });
6629 }
6630 let Some(table_with_rls) = self.authorize_relational_table_select(
6631 table,
6632 &scope as &dyn super::statement_frame::ReadFrame,
6633 )?
6634 else {
6635 return Ok(RuntimeQueryResult {
6636 query: query_str.to_string(),
6637 mode,
6638 statement,
6639 engine: "runtime-table-rls",
6640 result: crate::storage::query::unified::UnifiedResult::empty(),
6641 affected_rows: 0,
6642 statement_type: "select",
6643 });
6644 };
6645 Ok(RuntimeQueryResult {
6646 query: query_str.to_string(),
6647 mode,
6648 statement,
6649 engine: "runtime-table",
6650 result: execute_runtime_table_query(
6651 &self.inner.db,
6652 &table_with_rls,
6653 Some(&self.inner.index_store),
6654 )?,
6655 affected_rows: 0,
6656 statement_type: "select",
6657 })
6658 }
6659 QueryExpr::Join(join) => {
6660 let scope = self.ai_scope();
6661 let Some(join_with_rls) = self.authorize_relational_join_select(
6662 join,
6663 &scope as &dyn super::statement_frame::ReadFrame,
6664 )?
6665 else {
6666 return Ok(RuntimeQueryResult {
6667 query: query_str.to_string(),
6668 mode,
6669 statement,
6670 engine: "runtime-join-rls",
6671 result: crate::storage::query::unified::UnifiedResult::empty(),
6672 affected_rows: 0,
6673 statement_type: "select",
6674 });
6675 };
6676 Ok(RuntimeQueryResult {
6677 query: query_str.to_string(),
6678 mode,
6679 statement,
6680 engine: "runtime-join",
6681 result: execute_runtime_join_query(&self.inner.db, &join_with_rls)?,
6682 affected_rows: 0,
6683 statement_type: "select",
6684 })
6685 }
6686 QueryExpr::Vector(vector) => Ok(RuntimeQueryResult {
6687 query: query_str.to_string(),
6688 mode,
6689 statement,
6690 engine: "runtime-vector",
6691 result: execute_runtime_vector_query(&self.inner.db, &vector)?,
6692 affected_rows: 0,
6693 statement_type: "select",
6694 }),
6695 QueryExpr::Hybrid(hybrid) => Ok(RuntimeQueryResult {
6696 query: query_str.to_string(),
6697 mode,
6698 statement,
6699 engine: "runtime-hybrid",
6700 result: execute_runtime_hybrid_query(&self.inner.db, &hybrid)?,
6701 affected_rows: 0,
6702 statement_type: "select",
6703 }),
6704 QueryExpr::Insert(ref insert) if super::red_schema::is_virtual_table(&insert.table) => {
6705 Err(RedDBError::Query(
6706 super::red_schema::READ_ONLY_ERROR.to_string(),
6707 ))
6708 }
6709 QueryExpr::Update(ref update) if super::red_schema::is_virtual_table(&update.table) => {
6710 Err(RedDBError::Query(
6711 super::red_schema::READ_ONLY_ERROR.to_string(),
6712 ))
6713 }
6714 QueryExpr::Delete(ref delete) if super::red_schema::is_virtual_table(&delete.table) => {
6715 Err(RedDBError::Query(
6716 super::red_schema::READ_ONLY_ERROR.to_string(),
6717 ))
6718 }
6719 QueryExpr::Insert(ref insert) => self
6720 .with_deferred_store_wal_if_transaction(|| self.execute_insert(query_str, insert)),
6721 QueryExpr::Update(ref update) => self
6722 .with_deferred_store_wal_if_transaction(|| self.execute_update(query_str, update)),
6723 QueryExpr::Delete(ref delete) => self
6724 .with_deferred_store_wal_if_transaction(|| self.execute_delete(query_str, delete)),
6725 QueryExpr::SearchCommand(ref cmd) => self.execute_search_command(query_str, cmd),
6726 QueryExpr::Ask(ref ask) => self.execute_ask(query_str, ask),
6727 _ => Err(RedDBError::Query(format!(
6728 "prepared-statement execution does not support {statement} statements"
6729 ))),
6730 }
6731 }
6732
6733 fn try_fast_entity_lookup(&self, query: &str) -> Option<RedDBResult<RuntimeQueryResult>> {
6736 let q = query.trim();
6739 if !q.starts_with("SELECT") && !q.starts_with("select") {
6740 return None;
6741 }
6742
6743 let where_pos = q
6745 .find("WHERE _entity_id")
6746 .or_else(|| q.find("where _entity_id"))?;
6747 let after_field = &q[where_pos + 16..].trim_start(); let after_eq = after_field.strip_prefix('=')?.trim_start();
6749
6750 let id_str = after_eq.trim();
6752 let entity_id: u64 = id_str.parse().ok()?;
6753
6754 let from_pos = q.find("FROM ").or_else(|| q.find("from "))? + 5;
6756 let table = q[from_pos..where_pos].trim();
6757 if table.is_empty()
6758 || table.contains(' ') && !table.contains(" AS ") && !table.contains(" as ")
6759 {
6760 return None; }
6762 let table_name = table.split_whitespace().next()?;
6763
6764 let store = self.inner.db.store();
6770 let entity = store
6771 .get(
6772 table_name,
6773 crate::storage::unified::EntityId::new(entity_id),
6774 )
6775 .filter(entity_visible_under_current_snapshot);
6776
6777 let count = if entity.is_some() { 1u64 } else { 0 };
6778
6779 let records: Vec<crate::storage::query::unified::UnifiedRecord> = entity
6785 .as_ref()
6786 .and_then(|e| runtime_table_record_from_entity(e.clone()))
6787 .into_iter()
6788 .collect();
6789
6790 let json = match entity {
6791 Some(ref e) => execute_runtime_serialize_single_entity(e),
6792 None => r#"{"columns":[],"record_count":0,"selection":{"scope":"any"},"records":[]}"#
6793 .to_string(),
6794 };
6795
6796 Some(Ok(RuntimeQueryResult {
6797 query: query.to_string(),
6798 mode: crate::storage::query::modes::QueryMode::Sql,
6799 statement: "select",
6800 engine: "fast-entity-lookup",
6801 result: crate::storage::query::unified::UnifiedResult {
6802 columns: Vec::new(),
6803 records,
6804 stats: crate::storage::query::unified::QueryStats {
6805 rows_scanned: count,
6806 ..Default::default()
6807 },
6808 pre_serialized_json: Some(json),
6809 },
6810 affected_rows: 0,
6811 statement_type: "select",
6812 }))
6813 }
6814
6815 fn result_cache_backend(&self) -> RuntimeResultCacheBackend {
6816 match self
6817 .config_string(RESULT_CACHE_BACKEND_KEY, RESULT_CACHE_DEFAULT_BACKEND)
6818 .as_str()
6819 {
6820 "blob_cache" => RuntimeResultCacheBackend::BlobCache,
6821 "shadow" => RuntimeResultCacheBackend::Shadow,
6822 _ => RuntimeResultCacheBackend::Legacy,
6823 }
6824 }
6825
6826 pub(super) fn get_result_cache_entry(&self, key: &str) -> Option<RuntimeQueryResult> {
6827 match self.result_cache_backend() {
6828 RuntimeResultCacheBackend::Legacy => self.get_legacy_result_cache_entry(key),
6829 RuntimeResultCacheBackend::BlobCache => self.get_blob_result_cache_entry(key),
6830 RuntimeResultCacheBackend::Shadow => {
6831 let legacy = self.get_legacy_result_cache_entry(key);
6832 let blob = self.get_blob_result_cache_entry(key);
6833 if let (Some(ref legacy), Some(ref blob)) = (&legacy, &blob) {
6834 if result_cache_fingerprint(legacy) != result_cache_fingerprint(blob) {
6835 self.inner
6836 .result_cache_shadow_divergences
6837 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
6838 tracing::warn!(
6839 key,
6840 metric = crate::runtime::METRIC_CACHE_SHADOW_DIVERGENCE_TOTAL,
6841 "result cache shadow backend diverged from legacy"
6842 );
6843 }
6844 }
6845 legacy
6846 }
6847 }
6848 }
6849
6850 fn get_legacy_result_cache_entry(&self, key: &str) -> Option<RuntimeQueryResult> {
6851 let cache = self.inner.result_cache.read();
6852 cache.0.get(key).and_then(|entry| {
6853 if entry.cached_at.elapsed().as_secs() < RESULT_CACHE_TTL_SECS {
6854 Some(entry.result.clone())
6855 } else {
6856 None
6857 }
6858 })
6859 }
6860
6861 fn get_blob_result_cache_entry(&self, key: &str) -> Option<RuntimeQueryResult> {
6862 let hit = self
6863 .inner
6864 .result_blob_cache
6865 .get(RESULT_CACHE_BLOB_NAMESPACE, key)?;
6866 {
6867 let cache = self.inner.result_blob_entries.read();
6868 if let Some(entry) = cache.0.get(key) {
6869 return Some(entry.result.clone());
6870 }
6871 }
6872
6873 let (result, scopes) = decode_result_cache_payload(hit.value())?;
6874 let mut cache = self.inner.result_blob_entries.write();
6875 let (ref mut map, ref mut order) = *cache;
6876 if !map.contains_key(key) {
6877 order.push_back(key.to_string());
6878 }
6879 map.insert(
6880 key.to_string(),
6881 RuntimeResultCacheEntry {
6882 result: result.clone(),
6883 cached_at: std::time::Instant::now(),
6884 scopes,
6885 },
6886 );
6887 trim_result_cache(map, order);
6888 Some(result)
6889 }
6890
6891 pub(super) fn put_result_cache_entry(&self, key: &str, entry: RuntimeResultCacheEntry) {
6892 match self.result_cache_backend() {
6893 RuntimeResultCacheBackend::Legacy => self.put_legacy_result_cache_entry(key, entry),
6894 RuntimeResultCacheBackend::BlobCache => self.put_blob_result_cache_entry(key, entry),
6895 RuntimeResultCacheBackend::Shadow => {
6896 self.put_legacy_result_cache_entry(key, entry.clone());
6897 self.put_blob_result_cache_entry(key, entry);
6898 }
6899 }
6900 }
6901
6902 fn put_legacy_result_cache_entry(&self, key: &str, entry: RuntimeResultCacheEntry) {
6903 let mut cache = self.inner.result_cache.write();
6904 let (ref mut map, ref mut order) = *cache;
6905 if !map.contains_key(key) {
6906 order.push_back(key.to_string());
6907 }
6908 map.insert(key.to_string(), entry);
6909 trim_result_cache(map, order);
6910 }
6911
6912 fn put_blob_result_cache_entry(&self, key: &str, entry: RuntimeResultCacheEntry) {
6913 let policy = crate::storage::cache::BlobCachePolicy::default()
6914 .ttl_ms(RESULT_CACHE_TTL_SECS * 1000)
6915 .priority(200);
6916 let dependencies = entry.scopes.iter().cloned().collect::<Vec<_>>();
6917 let bytes = encode_result_cache_payload(&entry)
6918 .unwrap_or_else(|| result_cache_fingerprint(&entry.result).into_bytes());
6919 let put = crate::storage::cache::BlobCachePut::new(bytes)
6920 .with_dependencies(dependencies)
6921 .with_policy(policy);
6922 if self
6923 .inner
6924 .result_blob_cache
6925 .put(RESULT_CACHE_BLOB_NAMESPACE, key, put)
6926 .is_err()
6927 {
6928 return;
6929 }
6930
6931 let mut cache = self.inner.result_blob_entries.write();
6932 let (ref mut map, ref mut order) = *cache;
6933 if !map.contains_key(key) {
6934 order.push_back(key.to_string());
6935 }
6936 map.insert(key.to_string(), entry);
6937 trim_result_cache(map, order);
6938 }
6939
6940 pub fn result_cache_shadow_divergences(&self) -> u64 {
6941 self.inner
6942 .result_cache_shadow_divergences
6943 .load(std::sync::atomic::Ordering::Relaxed)
6944 }
6945
6946 pub fn invalidate_result_cache(&self) {
6949 let mut cache = self.inner.result_cache.write();
6950 cache.0.clear();
6951 cache.1.clear();
6952 let mut blob_entries = self.inner.result_blob_entries.write();
6953 blob_entries.0.clear();
6954 blob_entries.1.clear();
6955 self.inner
6956 .result_blob_cache
6957 .invalidate_namespace(RESULT_CACHE_BLOB_NAMESPACE);
6958 let mut ask_entries = self.inner.ask_answer_cache_entries.write();
6959 ask_entries.0.clear();
6960 ask_entries.1.clear();
6961 self.inner
6962 .result_blob_cache
6963 .invalidate_namespace(ASK_ANSWER_CACHE_NAMESPACE);
6964 }
6965
6966 pub(crate) fn invalidate_result_cache_for_table(&self, table: &str) {
6969 let legacy_has_match = {
6972 let cache = self.inner.result_cache.read();
6973 let (ref map, _) = *cache;
6974 !map.is_empty() && map.values().any(|entry| entry.scopes.contains(table))
6975 };
6976 let blob_has_match = {
6977 let cache = self.inner.result_blob_entries.read();
6978 let (ref map, _) = *cache;
6979 !map.is_empty() && map.values().any(|entry| entry.scopes.contains(table))
6980 };
6981 if legacy_has_match {
6982 let mut cache = self.inner.result_cache.write();
6983 let (ref mut map, ref mut order) = *cache;
6984 map.retain(|_, entry| !entry.scopes.contains(table));
6985 order.retain(|key| map.contains_key(key));
6986 }
6987
6988 if matches!(
6989 self.result_cache_backend(),
6990 RuntimeResultCacheBackend::BlobCache | RuntimeResultCacheBackend::Shadow
6991 ) {
6992 let mut blob_entries = self.inner.result_blob_entries.write();
6993 let (ref mut blob_map, ref mut blob_order) = *blob_entries;
6994 blob_map.clear();
6995 blob_order.clear();
6996 self.inner
6997 .result_blob_cache
6998 .invalidate_namespace(RESULT_CACHE_BLOB_NAMESPACE);
6999 } else if blob_has_match {
7000 let mut blob_entries = self.inner.result_blob_entries.write();
7001 let (ref mut blob_map, ref mut blob_order) = *blob_entries;
7002 blob_map.retain(|_, entry| !entry.scopes.contains(table));
7003 blob_order.retain(|key| blob_map.contains_key(key));
7004 }
7005 let mut ask_entries = self.inner.ask_answer_cache_entries.write();
7006 ask_entries.0.clear();
7007 ask_entries.1.clear();
7008 self.inner
7009 .result_blob_cache
7010 .invalidate_namespace(ASK_ANSWER_CACHE_NAMESPACE);
7011 }
7012
7013 pub(crate) fn invalidate_plan_cache(&self) {
7014 self.inner.query_cache.write().clear();
7015 self.inner
7016 .ddl_epoch
7017 .fetch_add(1, std::sync::atomic::Ordering::Release);
7018 }
7019
7020 pub fn ddl_epoch(&self) -> u64 {
7024 self.inner
7025 .ddl_epoch
7026 .load(std::sync::atomic::Ordering::Acquire)
7027 }
7028
7029 pub(crate) fn clear_table_planner_stats(&self, table: &str) {
7030 let store = self.inner.db.store();
7031 crate::storage::query::planner::stats_catalog::clear_table_stats(store.as_ref(), table);
7032 self.invalidate_plan_cache();
7033 }
7034
7035 pub(crate) fn rehydrate_tenant_tables(&self) {
7044 let store = self.inner.db.store();
7045 let Some(manager) = store.get_collection("red_config") else {
7046 return;
7047 };
7048 for entity in manager.query_all(|_| true) {
7053 let crate::storage::unified::entity::EntityData::Row(row) = &entity.data else {
7054 continue;
7055 };
7056 let Some(named) = &row.named else { continue };
7057 let Some(crate::storage::schema::Value::Text(key)) = named.get("key") else {
7058 continue;
7059 };
7060 let Some(rest) = key.strip_prefix("tenant_tables.") else {
7062 continue;
7063 };
7064 let Some((table, suffix)) = rest.rsplit_once('.') else {
7065 crate::telemetry::operator_event::OperatorEvent::SchemaCorruption {
7071 collection: "red_config".to_string(),
7072 detail: format!("malformed tenant_tables key: {key}"),
7073 }
7074 .emit_global();
7075 continue;
7076 };
7077 if suffix != "column" {
7078 crate::telemetry::operator_event::OperatorEvent::SchemaCorruption {
7079 collection: "red_config".to_string(),
7080 detail: format!("unexpected tenant_tables suffix: {key}"),
7081 }
7082 .emit_global();
7083 continue;
7084 }
7085 match named.get("value") {
7086 Some(crate::storage::schema::Value::Text(column)) => {
7087 self.register_tenant_table(table, column);
7088 }
7089 Some(crate::storage::schema::Value::Null) | None => {
7091 self.unregister_tenant_table(table);
7092 }
7093 _ => {}
7094 }
7095 }
7096 }
7097
7098 pub fn register_tenant_table(&self, table: &str, column: &str) {
7103 use crate::storage::query::ast::{
7104 CompareOp, CreatePolicyQuery, Expr, FieldRef, Filter, Span,
7105 };
7106 self.inner
7107 .tenant_tables
7108 .write()
7109 .insert(table.to_string(), column.to_string());
7110
7111 let lhs = Expr::Column {
7117 field: FieldRef::TableColumn {
7118 table: table.to_string(),
7119 column: column.to_string(),
7120 },
7121 span: Span::synthetic(),
7122 };
7123 let rhs = Expr::FunctionCall {
7124 name: "CURRENT_TENANT".to_string(),
7125 args: Vec::new(),
7126 span: Span::synthetic(),
7127 };
7128 let policy_filter = Filter::CompareExpr {
7129 lhs,
7130 op: CompareOp::Eq,
7131 rhs,
7132 };
7133
7134 let policy = CreatePolicyQuery {
7135 name: "__tenant_iso".to_string(),
7136 table: table.to_string(),
7137 action: None, role: None, using: Box::new(policy_filter),
7140 target_kind: crate::storage::query::ast::PolicyTargetKind::Table,
7147 };
7148
7149 self.inner.rls_policies.write().insert(
7151 (table.to_string(), "__tenant_iso".to_string()),
7152 Arc::new(policy),
7153 );
7154 self.inner
7155 .rls_enabled_tables
7156 .write()
7157 .insert(table.to_string());
7158
7159 self.ensure_tenant_index(table, column);
7165 }
7166
7167 fn ensure_tenant_index(&self, table: &str, column: &str) {
7175 if column.contains('.') {
7176 return;
7177 }
7178 let index_name = format!("__tenant_idx_{table}");
7179 let registry = self.inner.index_store.list_indices(table);
7180 if registry.iter().any(|idx| idx.name == index_name) {
7181 return;
7182 }
7183 if registry
7184 .iter()
7185 .any(|idx| idx.columns.first().map(|c| c.as_str()) == Some(column))
7186 {
7187 return;
7188 }
7189
7190 let store = self.inner.db.store();
7191 let Some(manager) = store.get_collection(table) else {
7192 return;
7193 };
7194 let entities = manager.query_all(|_| true);
7195 let entity_fields: Vec<(
7196 crate::storage::unified::EntityId,
7197 Vec<(String, crate::storage::schema::Value)>,
7198 )> = entities
7199 .iter()
7200 .map(|e| {
7201 let fields = match &e.data {
7202 crate::storage::EntityData::Row(row) => {
7203 if let Some(ref named) = row.named {
7204 named.iter().map(|(k, v)| (k.clone(), v.clone())).collect()
7205 } else if let Some(ref schema) = row.schema {
7206 schema
7207 .iter()
7208 .zip(row.columns.iter())
7209 .map(|(k, v)| (k.clone(), v.clone()))
7210 .collect()
7211 } else {
7212 Vec::new()
7213 }
7214 }
7215 crate::storage::EntityData::Node(node) => node
7216 .properties
7217 .iter()
7218 .map(|(k, v)| (k.clone(), v.clone()))
7219 .collect(),
7220 _ => Vec::new(),
7221 };
7222 (e.id, fields)
7223 })
7224 .collect();
7225
7226 let columns = vec![column.to_string()];
7227 if self
7228 .inner
7229 .index_store
7230 .create_index(
7231 &index_name,
7232 table,
7233 &columns,
7234 super::index_store::IndexMethodKind::Hash,
7235 false,
7236 &entity_fields,
7237 )
7238 .is_err()
7239 {
7240 return;
7241 }
7242 self.inner
7243 .index_store
7244 .register(super::index_store::RegisteredIndex {
7245 name: index_name,
7246 collection: table.to_string(),
7247 columns,
7248 method: super::index_store::IndexMethodKind::Hash,
7249 unique: false,
7250 });
7251 self.invalidate_plan_cache();
7252 }
7253
7254 fn drop_tenant_index(&self, table: &str) {
7257 let index_name = format!("__tenant_idx_{table}");
7258 self.inner.index_store.drop_index(&index_name, table);
7259 }
7260
7261 pub fn tenant_column(&self, table: &str) -> Option<String> {
7265 self.inner.tenant_tables.read().get(table).cloned()
7266 }
7267
7268 pub fn unregister_tenant_table(&self, table: &str) {
7272 self.inner.tenant_tables.write().remove(table);
7273 self.inner
7274 .rls_policies
7275 .write()
7276 .remove(&(table.to_string(), "__tenant_iso".to_string()));
7277 self.drop_tenant_index(table);
7278 let has_other_policies = self
7280 .inner
7281 .rls_policies
7282 .read()
7283 .keys()
7284 .any(|(t, _)| t == table);
7285 if !has_other_policies {
7286 self.inner.rls_enabled_tables.write().remove(table);
7287 }
7288 }
7289
7290 pub(crate) fn record_pending_tombstone(
7296 &self,
7297 conn_id: u64,
7298 collection: &str,
7299 id: crate::storage::unified::entity::EntityId,
7300 stamper_xid: crate::storage::transaction::snapshot::Xid,
7301 previous_xmax: crate::storage::transaction::snapshot::Xid,
7302 ) {
7303 self.inner
7304 .pending_tombstones
7305 .write()
7306 .entry(conn_id)
7307 .or_default()
7308 .push((collection.to_string(), id, stamper_xid, previous_xmax));
7309 }
7310
7311 pub(crate) fn record_pending_versioned_update(
7312 &self,
7313 conn_id: u64,
7314 collection: &str,
7315 old_id: crate::storage::unified::entity::EntityId,
7316 new_id: crate::storage::unified::entity::EntityId,
7317 stamper_xid: crate::storage::transaction::snapshot::Xid,
7318 previous_xmax: crate::storage::transaction::snapshot::Xid,
7319 ) {
7320 self.inner
7321 .pending_versioned_updates
7322 .write()
7323 .entry(conn_id)
7324 .or_default()
7325 .push((
7326 collection.to_string(),
7327 old_id,
7328 new_id,
7329 stamper_xid,
7330 previous_xmax,
7331 ));
7332 }
7333
7334 fn with_deferred_store_wal_if_transaction<T>(
7335 &self,
7336 f: impl FnOnce() -> RedDBResult<T>,
7337 ) -> RedDBResult<T> {
7338 let conn_id = current_connection_id();
7339 if !self.inner.tx_contexts.read().contains_key(&conn_id) {
7340 return f();
7341 }
7342
7343 crate::storage::UnifiedStore::begin_deferred_store_wal_capture();
7344 let result = f();
7345 let captured = crate::storage::UnifiedStore::take_deferred_store_wal_capture();
7346 match result {
7347 Ok(value) => {
7348 self.record_pending_store_wal_actions(conn_id, captured);
7349 Ok(value)
7350 }
7351 Err(err) => Err(err),
7352 }
7353 }
7354
7355 fn record_pending_store_wal_actions(
7356 &self,
7357 conn_id: u64,
7358 actions: crate::storage::unified::DeferredStoreWalActions,
7359 ) {
7360 if actions.is_empty() {
7361 return;
7362 }
7363 let mut guard = self.inner.pending_store_wal_actions.write();
7364 guard.entry(conn_id).or_default().extend(actions);
7365 }
7366
7367 fn flush_pending_store_wal_actions(&self, conn_id: u64) -> RedDBResult<()> {
7368 let Some(actions) = self
7369 .inner
7370 .pending_store_wal_actions
7371 .write()
7372 .remove(&conn_id)
7373 else {
7374 return Ok(());
7375 };
7376 self.inner
7377 .db
7378 .store()
7379 .append_deferred_store_wal_actions(actions)
7380 .map_err(|err| RedDBError::Internal(err.to_string()))
7381 }
7382
7383 fn discard_pending_store_wal_actions(&self, conn_id: u64) {
7384 self.inner
7385 .pending_store_wal_actions
7386 .write()
7387 .remove(&conn_id);
7388 }
7389
7390 fn xid_conflicts_with_snapshot(
7391 &self,
7392 xid: crate::storage::transaction::snapshot::Xid,
7393 snapshot: &crate::storage::transaction::snapshot::Snapshot,
7394 own_xids: &std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
7395 ) -> bool {
7396 xid != 0
7397 && !own_xids.contains(&xid)
7398 && !self.inner.snapshot_manager.is_aborted(xid)
7399 && !self.inner.snapshot_manager.is_active(xid)
7400 && (xid > snapshot.xid || snapshot.in_progress.contains(&xid))
7401 }
7402
7403 fn conflict_error(
7404 collection: &str,
7405 logical_id: crate::storage::unified::entity::EntityId,
7406 xid: crate::storage::transaction::snapshot::Xid,
7407 ) -> RedDBError {
7408 RedDBError::Query(format!(
7409 "serialization conflict: table row {collection}/{} was modified by concurrent transaction {xid}",
7410 logical_id.raw()
7411 ))
7412 }
7413
7414 fn check_logical_row_conflict(
7415 &self,
7416 collection: &str,
7417 logical_id: crate::storage::unified::entity::EntityId,
7418 excluded_ids: &[crate::storage::unified::entity::EntityId],
7419 snapshot: &crate::storage::transaction::snapshot::Snapshot,
7420 own_xids: &std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
7421 ) -> RedDBResult<()> {
7422 let store = self.inner.db.store();
7423 let Some(manager) = store.get_collection(collection) else {
7424 return Ok(());
7425 };
7426
7427 for candidate in manager.query_all(|_| true) {
7428 if excluded_ids.contains(&candidate.id) || candidate.logical_id() != logical_id {
7429 continue;
7430 }
7431 if self.xid_conflicts_with_snapshot(candidate.xmin, snapshot, own_xids) {
7432 return Err(Self::conflict_error(collection, logical_id, candidate.xmin));
7433 }
7434 if self.xid_conflicts_with_snapshot(candidate.xmax, snapshot, own_xids) {
7435 return Err(Self::conflict_error(collection, logical_id, candidate.xmax));
7436 }
7437 }
7438 Ok(())
7439 }
7440
7441 pub(crate) fn check_table_row_write_conflicts(
7442 &self,
7443 conn_id: u64,
7444 snapshot: &crate::storage::transaction::snapshot::Snapshot,
7445 own_xids: &std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
7446 ) -> RedDBResult<()> {
7447 let versioned_updates = self
7448 .inner
7449 .pending_versioned_updates
7450 .read()
7451 .get(&conn_id)
7452 .cloned()
7453 .unwrap_or_default();
7454 let tombstones = self
7455 .inner
7456 .pending_tombstones
7457 .read()
7458 .get(&conn_id)
7459 .cloned()
7460 .unwrap_or_default();
7461
7462 let store = self.inner.db.store();
7463 for (collection, old_id, new_id, xid, previous_xmax) in versioned_updates {
7464 let Some(manager) = store.get_collection(&collection) else {
7465 continue;
7466 };
7467 let Some(old) = manager.get(old_id) else {
7468 continue;
7469 };
7470 let logical_id = old.logical_id();
7471 if self.xid_conflicts_with_snapshot(previous_xmax, snapshot, own_xids) {
7472 return Err(Self::conflict_error(&collection, logical_id, previous_xmax));
7473 }
7474 if old.xmax != xid && self.xid_conflicts_with_snapshot(old.xmax, snapshot, own_xids) {
7475 return Err(Self::conflict_error(&collection, logical_id, old.xmax));
7476 }
7477 self.check_logical_row_conflict(
7478 &collection,
7479 logical_id,
7480 &[old_id, new_id],
7481 snapshot,
7482 own_xids,
7483 )?;
7484 }
7485
7486 for (collection, id, xid, previous_xmax) in tombstones {
7487 let Some(manager) = store.get_collection(&collection) else {
7488 continue;
7489 };
7490 let Some(entity) = manager.get(id) else {
7491 continue;
7492 };
7493 let logical_id = entity.logical_id();
7494 if self.xid_conflicts_with_snapshot(previous_xmax, snapshot, own_xids) {
7495 return Err(Self::conflict_error(&collection, logical_id, previous_xmax));
7496 }
7497 if entity.xmax != xid
7498 && self.xid_conflicts_with_snapshot(entity.xmax, snapshot, own_xids)
7499 {
7500 return Err(Self::conflict_error(&collection, logical_id, entity.xmax));
7501 }
7502 self.check_logical_row_conflict(&collection, logical_id, &[id], snapshot, own_xids)?;
7503 }
7504
7505 Ok(())
7506 }
7507
7508 pub(crate) fn restore_pending_write_stamps(&self, conn_id: u64) {
7509 let versioned_updates = self
7510 .inner
7511 .pending_versioned_updates
7512 .read()
7513 .get(&conn_id)
7514 .cloned()
7515 .unwrap_or_default();
7516 let tombstones = self
7517 .inner
7518 .pending_tombstones
7519 .read()
7520 .get(&conn_id)
7521 .cloned()
7522 .unwrap_or_default();
7523
7524 let store = self.inner.db.store();
7525 for (collection, old_id, _new_id, xid, _previous_xmax) in versioned_updates {
7526 if let Some(manager) = store.get_collection(&collection) {
7527 if let Some(mut entity) = manager.get(old_id) {
7528 entity.set_xmax(xid);
7529 let _ = manager.update(entity);
7530 }
7531 }
7532 }
7533 for (collection, id, xid, _previous_xmax) in tombstones {
7534 if let Some(manager) = store.get_collection(&collection) {
7535 if let Some(mut entity) = manager.get(id) {
7536 entity.set_xmax(xid);
7537 let _ = manager.update(entity);
7538 }
7539 }
7540 }
7541 }
7542
7543 pub(crate) fn finalize_pending_versioned_updates(&self, conn_id: u64) {
7544 self.inner
7545 .pending_versioned_updates
7546 .write()
7547 .remove(&conn_id);
7548 }
7549
7550 pub(crate) fn revive_pending_versioned_updates(&self, conn_id: u64) {
7551 let Some(pending) = self
7552 .inner
7553 .pending_versioned_updates
7554 .write()
7555 .remove(&conn_id)
7556 else {
7557 return;
7558 };
7559
7560 let store = self.inner.db.store();
7561 for (collection, old_id, new_id, xid, previous_xmax) in pending {
7562 if let Some(manager) = store.get_collection(&collection) {
7563 if let Some(mut old) = manager.get(old_id) {
7564 if old.xmax == xid {
7565 old.set_xmax(previous_xmax);
7566 let _ = manager.update(old);
7567 }
7568 }
7569 }
7570 let _ = store.delete_batch(&collection, &[new_id]);
7571 }
7572 }
7573
7574 pub(crate) fn revive_versioned_updates_since(&self, conn_id: u64, stamper_xid: u64) -> usize {
7575 let mut guard = self.inner.pending_versioned_updates.write();
7576 let Some(pending) = guard.get_mut(&conn_id) else {
7577 return 0;
7578 };
7579
7580 let store = self.inner.db.store();
7581 let mut reverted = 0usize;
7582 pending.retain(|(collection, old_id, new_id, xid, previous_xmax)| {
7583 if *xid < stamper_xid {
7584 return true;
7585 }
7586 if let Some(manager) = store.get_collection(collection) {
7587 if let Some(mut old) = manager.get(*old_id) {
7588 if old.xmax == *xid {
7589 old.set_xmax(*previous_xmax);
7590 let _ = manager.update(old);
7591 }
7592 }
7593 }
7594 let _ = store.delete_batch(collection, &[*new_id]);
7595 reverted += 1;
7596 false
7597 });
7598 if pending.is_empty() {
7599 guard.remove(&conn_id);
7600 }
7601 reverted
7602 }
7603
7604 pub(crate) fn finalize_pending_tombstones(&self, conn_id: u64) {
7609 let Some(pending) = self.inner.pending_tombstones.write().remove(&conn_id) else {
7610 return;
7611 };
7612 if pending.is_empty() {
7613 return;
7614 }
7615
7616 let store = self.inner.db.store();
7617 for (collection, id, _xid, _previous_xmax) in pending {
7618 store.context_index().remove_entity(id);
7619 self.cdc_emit(
7620 crate::replication::cdc::ChangeOperation::Delete,
7621 &collection,
7622 id.raw(),
7623 "entity",
7624 );
7625 }
7626 }
7627
7628 pub(crate) fn revive_pending_tombstones(&self, conn_id: u64) {
7635 let Some(pending) = self.inner.pending_tombstones.write().remove(&conn_id) else {
7636 return;
7637 };
7638
7639 let store = self.inner.db.store();
7640 for (collection, id, xid, previous_xmax) in pending {
7641 let Some(manager) = store.get_collection(&collection) else {
7642 continue;
7643 };
7644 if let Some(mut entity) = manager.get(id) {
7645 if entity.xmax == xid {
7646 entity.set_xmax(previous_xmax);
7647 let _ = manager.update(entity);
7648 }
7649 }
7650 }
7651 }
7652
7653 pub(crate) fn finalize_pending_kv_watch_events(&self, conn_id: u64) {
7654 let Some(pending) = self.inner.pending_kv_watch_events.write().remove(&conn_id) else {
7655 return;
7656 };
7657 for event in pending {
7658 self.cdc_emit_kv(
7659 event.op,
7660 &event.collection,
7661 &event.key,
7662 0,
7663 event.before,
7664 event.after,
7665 );
7666 }
7667 }
7668
7669 pub(crate) fn discard_pending_kv_watch_events(&self, conn_id: u64) {
7670 self.inner.pending_kv_watch_events.write().remove(&conn_id);
7671 }
7672
7673 fn materialize_graph_with_rls(
7682 &self,
7683 ) -> RedDBResult<(
7684 crate::storage::engine::GraphStore,
7685 std::collections::HashMap<
7686 String,
7687 std::collections::HashMap<String, crate::storage::schema::Value>,
7688 >,
7689 )> {
7690 use crate::storage::engine::GraphStore;
7691 use crate::storage::query::ast::{PolicyAction, PolicyTargetKind};
7692 use crate::storage::unified::entity::{EntityData, EntityKind};
7693 use std::collections::{HashMap, HashSet};
7694
7695 let store = self.inner.db.store();
7696 let snap_ctx = capture_current_snapshot();
7697 let role = current_auth_identity().map(|(_, r)| r.as_str().to_string());
7698
7699 let graph = GraphStore::new();
7700 let mut node_properties: HashMap<String, HashMap<String, crate::storage::schema::Value>> =
7701 HashMap::new();
7702 let mut allowed_nodes: HashSet<String> = HashSet::new();
7703
7704 let mut node_rls: HashMap<String, Option<crate::storage::query::ast::Filter>> =
7708 HashMap::new();
7709 let mut edge_rls: HashMap<String, Option<crate::storage::query::ast::Filter>> =
7710 HashMap::new();
7711
7712 let collections = store.list_collections();
7713
7714 for collection in &collections {
7716 let Some(manager) = store.get_collection(collection) else {
7717 continue;
7718 };
7719 let entities = manager.query_all(|_| true);
7720 for entity in entities {
7721 if !entity_visible_with_context(snap_ctx.as_ref(), &entity) {
7722 continue;
7723 }
7724 let EntityKind::GraphNode(ref node) = entity.kind else {
7725 continue;
7726 };
7727 if !node_passes_rls(self, collection, role.as_deref(), &mut node_rls, &entity) {
7728 continue;
7729 }
7730 let id_str = entity.id.raw().to_string();
7731 graph
7732 .add_node_with_label(
7733 &id_str,
7734 &node.label,
7735 &super::graph_node_label(&node.node_type),
7736 )
7737 .map_err(|err| RedDBError::Query(err.to_string()))?;
7738 allowed_nodes.insert(id_str.clone());
7739 if let EntityData::Node(node_data) = &entity.data {
7740 node_properties.insert(id_str, node_data.properties.clone());
7741 }
7742 }
7743 }
7744
7745 for collection in &collections {
7749 let Some(manager) = store.get_collection(collection) else {
7750 continue;
7751 };
7752 let entities = manager.query_all(|_| true);
7753 for entity in entities {
7754 if !entity_visible_with_context(snap_ctx.as_ref(), &entity) {
7755 continue;
7756 }
7757 let EntityKind::GraphEdge(ref edge) = entity.kind else {
7758 continue;
7759 };
7760 if !allowed_nodes.contains(&edge.from_node)
7761 || !allowed_nodes.contains(&edge.to_node)
7762 {
7763 continue;
7764 }
7765 if !edge_passes_rls(self, collection, role.as_deref(), &mut edge_rls, &entity) {
7766 continue;
7767 }
7768 let weight = match &entity.data {
7769 EntityData::Edge(e) => e.weight,
7770 _ => edge.weight as f32 / 1000.0,
7771 };
7772 graph
7773 .add_edge_with_label(
7774 &edge.from_node,
7775 &edge.to_node,
7776 &super::graph_edge_label(&edge.label),
7777 weight,
7778 )
7779 .map_err(|err| RedDBError::Query(err.to_string()))?;
7780 }
7781 }
7782
7783 let _ = (PolicyAction::Select, PolicyTargetKind::Nodes);
7787
7788 Ok((graph, node_properties))
7789 }
7790
7791 pub(crate) fn stamp_xmin_if_in_txn(
7806 &self,
7807 collection: &str,
7808 id: crate::storage::unified::entity::EntityId,
7809 ) {
7810 let Some(xid) = self.current_xid() else {
7811 return;
7812 };
7813 let store = self.inner.db.store();
7814 let Some(manager) = store.get_collection(collection) else {
7815 return;
7816 };
7817 if let Some(mut entity) = manager.get(id) {
7818 entity.set_xmin(xid);
7819 let _ = manager.update(entity);
7820 }
7821 }
7822
7823 pub(crate) fn revive_tombstones_since(&self, conn_id: u64, stamper_xid: u64) -> usize {
7831 let mut guard = self.inner.pending_tombstones.write();
7832 let Some(pending) = guard.get_mut(&conn_id) else {
7833 return 0;
7834 };
7835
7836 let store = self.inner.db.store();
7837 let mut revived = 0usize;
7838 pending.retain(|(collection, id, xid, previous_xmax)| {
7839 if *xid < stamper_xid {
7840 return true;
7842 }
7843 if let Some(manager) = store.get_collection(collection) {
7844 if let Some(mut entity) = manager.get(*id) {
7845 if entity.xmax == *xid {
7846 entity.set_xmax(*previous_xmax);
7847 let _ = manager.update(entity);
7848 revived += 1;
7849 }
7850 }
7851 }
7852 false
7853 });
7854 if pending.is_empty() {
7855 guard.remove(&conn_id);
7856 }
7857 revived
7858 }
7859
7860 pub fn current_snapshot(&self) -> crate::storage::transaction::snapshot::Snapshot {
7869 let conn_id = current_connection_id();
7870 if let Some(ctx) = self.inner.tx_contexts.read().get(&conn_id).cloned() {
7871 return ctx.snapshot;
7872 }
7873 let high_water = self.inner.snapshot_manager.peek_next_xid();
7879 self.inner.snapshot_manager.snapshot(high_water)
7880 }
7881
7882 pub fn current_xid(&self) -> Option<crate::storage::transaction::snapshot::Xid> {
7892 let conn_id = current_connection_id();
7893 self.inner
7894 .tx_contexts
7895 .read()
7896 .get(&conn_id)
7897 .map(|ctx| ctx.writer_xid())
7898 }
7899
7900 pub fn snapshot_manager(&self) -> Arc<crate::storage::transaction::snapshot::SnapshotManager> {
7903 Arc::clone(&self.inner.snapshot_manager)
7904 }
7905
7906 fn mvcc_vacuum_cutoff_xid(&self) -> crate::storage::transaction::snapshot::Xid {
7907 let manager = &self.inner.snapshot_manager;
7908 let next_xid = manager.peek_next_xid();
7909 let mut cutoff = next_xid;
7910 if let Some(oldest_active) = manager.oldest_active_xid() {
7911 cutoff = cutoff.min(oldest_active);
7912 }
7913 if let Some(oldest_pinned) = manager.oldest_pinned_xid() {
7914 cutoff = cutoff.min(oldest_pinned);
7915 }
7916 let retention_xids = self.config_u64("runtime.mvcc.vacuum_retention_xids", 0);
7917 if retention_xids > 0 {
7918 cutoff = cutoff.min(next_xid.saturating_sub(retention_xids));
7919 }
7920 cutoff
7921 }
7922
7923 fn rebuild_runtime_indexes_for_table(&self, table: &str) -> RedDBResult<()> {
7924 let registered = self.inner.index_store.list_indices(table);
7925 if registered.is_empty() {
7926 return Ok(());
7927 }
7928 let store = self.inner.db.store();
7929 let Some(manager) = store.get_collection(table) else {
7930 return Ok(());
7931 };
7932 let entity_fields = manager
7933 .query_all(|entity| matches!(entity.kind, crate::storage::EntityKind::TableRow { .. }))
7934 .into_iter()
7935 .map(|entity| (entity.id, table_row_index_fields(&entity)))
7936 .collect::<Vec<_>>();
7937
7938 for index in registered {
7939 self.inner.index_store.drop_index(&index.name, table);
7940 self.inner
7941 .index_store
7942 .create_index(
7943 &index.name,
7944 table,
7945 &index.columns,
7946 index.method,
7947 index.unique,
7948 &entity_fields,
7949 )
7950 .map_err(RedDBError::Internal)?;
7951 self.inner.index_store.register(index);
7952 }
7953 self.invalidate_plan_cache();
7954 Ok(())
7955 }
7956
7957 pub fn current_txn_own_xids(
7962 &self,
7963 ) -> std::collections::HashSet<crate::storage::transaction::snapshot::Xid> {
7964 let mut set = std::collections::HashSet::new();
7965 if let Some(ctx) = self.inner.tx_contexts.read().get(¤t_connection_id()) {
7966 set.insert(ctx.xid);
7967 for (_, sub) in &ctx.savepoints {
7968 set.insert(*sub);
7969 }
7970 for sub in &ctx.released_sub_xids {
7971 set.insert(*sub);
7972 }
7973 }
7974 set
7975 }
7976
7977 pub fn foreign_tables(&self) -> Arc<crate::storage::fdw::ForeignTableRegistry> {
7984 Arc::clone(&self.inner.foreign_tables)
7985 }
7986
7987 pub fn is_rls_enabled(&self, table: &str) -> bool {
7989 self.inner.rls_enabled_tables.read().contains(table)
7990 }
7991
7992 pub fn matching_rls_policies(
7999 &self,
8000 table: &str,
8001 role: Option<&str>,
8002 action: crate::storage::query::ast::PolicyAction,
8003 ) -> Vec<crate::storage::query::ast::Filter> {
8004 self.matching_rls_policies_for_kind(
8009 table,
8010 role,
8011 action,
8012 crate::storage::query::ast::PolicyTargetKind::Table,
8013 )
8014 }
8015
8016 pub fn matching_rls_policies_for_kind(
8024 &self,
8025 table: &str,
8026 role: Option<&str>,
8027 action: crate::storage::query::ast::PolicyAction,
8028 kind: crate::storage::query::ast::PolicyTargetKind,
8029 ) -> Vec<crate::storage::query::ast::Filter> {
8030 if !self.is_rls_enabled(table) {
8031 return Vec::new();
8032 }
8033 let policies = self.inner.rls_policies.read();
8034 policies
8035 .iter()
8036 .filter_map(|((t, _), p)| {
8037 if t != table {
8038 return None;
8039 }
8040 if p.target_kind != kind
8049 && p.target_kind != crate::storage::query::ast::PolicyTargetKind::Table
8050 {
8051 return None;
8052 }
8053 if let Some(a) = p.action {
8055 if a != action {
8056 return None;
8057 }
8058 }
8059 if let Some(p_role) = p.role.as_deref() {
8061 match role {
8062 Some(r) if r == p_role => {}
8063 _ => return None,
8064 }
8065 }
8066 Some((*p.using).clone())
8067 })
8068 .collect()
8069 }
8070
8071 pub(crate) fn refresh_table_planner_stats(&self, table: &str) {
8072 let store = self.inner.db.store();
8073 if let Some(stats) =
8074 crate::storage::query::planner::stats_catalog::analyze_collection(store.as_ref(), table)
8075 {
8076 crate::storage::query::planner::stats_catalog::persist_table_stats(
8077 store.as_ref(),
8078 &stats,
8079 );
8080 } else {
8081 crate::storage::query::planner::stats_catalog::clear_table_stats(store.as_ref(), table);
8082 }
8083 self.invalidate_plan_cache();
8084 }
8085
8086 pub(crate) fn note_table_write(&self, table: &str) {
8087 let already_dirty = self.inner.planner_dirty_tables.read().contains(table);
8092 if !already_dirty {
8093 self.inner
8094 .planner_dirty_tables
8095 .write()
8096 .insert(table.to_string());
8097 }
8098 self.invalidate_result_cache_for_table(table);
8099 }
8100
8101 fn explain_as_rows(&self, raw_query: &str, inner_sql: &str) -> RedDBResult<RuntimeQueryResult> {
8109 let explain = self.explain_query(inner_sql)?;
8110
8111 let columns = vec![
8112 "op".to_string(),
8113 "source".to_string(),
8114 "est_rows".to_string(),
8115 "est_cost".to_string(),
8116 "depth".to_string(),
8117 ];
8118
8119 let mut records: Vec<crate::storage::query::unified::UnifiedRecord> = Vec::new();
8120
8121 for name in &explain.cte_materializations {
8127 use std::sync::Arc;
8128 let mut rec = crate::storage::query::unified::UnifiedRecord::default();
8129 rec.set_arc(Arc::from("op"), Value::text("CteScan".to_string()));
8130 rec.set_arc(Arc::from("source"), Value::text(name.clone()));
8131 rec.set_arc(Arc::from("est_rows"), Value::Float(0.0));
8132 rec.set_arc(Arc::from("est_cost"), Value::Float(0.0));
8133 rec.set_arc(Arc::from("depth"), Value::Integer(0));
8134 records.push(rec);
8135 }
8136
8137 walk_plan_node(&explain.logical_plan.root, 0, &mut records);
8138
8139 let result = crate::storage::query::unified::UnifiedResult {
8140 columns,
8141 records,
8142 stats: Default::default(),
8143 pre_serialized_json: None,
8144 };
8145
8146 Ok(RuntimeQueryResult {
8147 query: raw_query.to_string(),
8148 mode: explain.mode,
8149 statement: "explain",
8150 engine: "runtime-explain",
8151 result,
8152 affected_rows: 0,
8153 statement_type: "select",
8154 })
8155 }
8156
8157 pub(super) fn check_query_privilege(
8165 &self,
8166 expr: &crate::storage::query::ast::QueryExpr,
8167 ) -> Result<(), String> {
8168 use crate::auth::privileges::{Action, AuthzContext, Resource};
8169 use crate::auth::UserId;
8170 use crate::storage::query::ast::QueryExpr;
8171
8172 let auth_store = match self.inner.auth_store.read().clone() {
8177 Some(s) => s,
8178 None => return Ok(()),
8179 };
8180
8181 let (username, role) = match current_auth_identity() {
8187 Some(p) => p,
8188 None => return Ok(()),
8189 };
8190 let tenant = current_tenant();
8191
8192 let ctx = AuthzContext {
8193 principal: &username,
8194 effective_role: role,
8195 tenant: tenant.as_deref(),
8196 };
8197 let principal_id = UserId::from_parts(tenant.as_deref(), &username);
8198
8199 let (action, resource) = match expr {
8201 QueryExpr::Table(t) => (Action::Select, Resource::table_from_name(&t.table)),
8202 QueryExpr::QueueSelect(q) => (Action::Select, Resource::table_from_name(&q.queue)),
8203 QueryExpr::Graph(g) => {
8204 if auth_store.iam_authorization_enabled() {
8205 self.check_graph_property_projection_privilege(
8206 &auth_store,
8207 &principal_id,
8208 role,
8209 tenant.as_deref(),
8210 g,
8211 )?;
8212 return Ok(());
8213 }
8214 return Ok(());
8215 }
8216 QueryExpr::Vector(v) => {
8217 if auth_store.iam_authorization_enabled() {
8218 self.check_table_like_column_projection_privilege(
8219 &auth_store,
8220 &principal_id,
8221 role,
8222 tenant.as_deref(),
8223 &v.collection,
8224 &["content".to_string()],
8225 )?;
8226 return Ok(());
8227 }
8228 return Ok(());
8229 }
8230 QueryExpr::Insert(i) => (Action::Insert, Resource::table_from_name(&i.table)),
8231 QueryExpr::Update(u) => (Action::Update, Resource::table_from_name(&u.table)),
8232 QueryExpr::Delete(d) => (Action::Delete, Resource::table_from_name(&d.table)),
8233 QueryExpr::Join(_) => (Action::Select, Resource::Database),
8237 QueryExpr::Grant(_) | QueryExpr::Revoke(_) | QueryExpr::AlterUser(_) => {
8240 return if role == crate::auth::Role::Admin {
8241 Ok(())
8242 } else {
8243 Err(format!(
8244 "principal=`{}` role=`{:?}` cannot issue ACL/auth DDL",
8245 username, role
8246 ))
8247 };
8248 }
8249 QueryExpr::CreateIamPolicy { id, .. } => {
8250 return self.check_policy_management_privilege(
8251 &auth_store,
8252 &principal_id,
8253 role,
8254 tenant.as_deref(),
8255 "policy:put",
8256 "policy",
8257 id,
8258 );
8259 }
8260 QueryExpr::DropIamPolicy { id } => {
8261 return self.check_policy_management_privilege(
8262 &auth_store,
8263 &principal_id,
8264 role,
8265 tenant.as_deref(),
8266 "policy:drop",
8267 "policy",
8268 id,
8269 );
8270 }
8271 QueryExpr::AttachPolicy { policy_id, .. } => {
8272 return self.check_policy_management_privilege(
8273 &auth_store,
8274 &principal_id,
8275 role,
8276 tenant.as_deref(),
8277 "policy:attach",
8278 "policy",
8279 policy_id,
8280 );
8281 }
8282 QueryExpr::DetachPolicy { policy_id, .. } => {
8283 return self.check_policy_management_privilege(
8284 &auth_store,
8285 &principal_id,
8286 role,
8287 tenant.as_deref(),
8288 "policy:detach",
8289 "policy",
8290 policy_id,
8291 );
8292 }
8293 QueryExpr::ShowPolicies { .. } | QueryExpr::ShowEffectivePermissions { .. } => {
8294 return Ok(());
8295 }
8296 QueryExpr::SimulatePolicy { .. } => {
8297 return self.check_policy_management_privilege(
8298 &auth_store,
8299 &principal_id,
8300 role,
8301 tenant.as_deref(),
8302 "policy:simulate",
8303 "policy",
8304 "*",
8305 );
8306 }
8307 QueryExpr::DropTable(q) => {
8310 return self.check_ddl_collection_privilege(
8311 &auth_store,
8312 &principal_id,
8313 role,
8314 tenant.as_deref(),
8315 &username,
8316 "drop",
8317 &q.name,
8318 );
8319 }
8320 QueryExpr::DropGraph(q) => {
8321 return self.check_ddl_collection_privilege(
8322 &auth_store,
8323 &principal_id,
8324 role,
8325 tenant.as_deref(),
8326 &username,
8327 "drop",
8328 &q.name,
8329 );
8330 }
8331 QueryExpr::DropVector(q) => {
8332 return self.check_ddl_collection_privilege(
8333 &auth_store,
8334 &principal_id,
8335 role,
8336 tenant.as_deref(),
8337 &username,
8338 "drop",
8339 &q.name,
8340 );
8341 }
8342 QueryExpr::DropDocument(q) => {
8343 return self.check_ddl_collection_privilege(
8344 &auth_store,
8345 &principal_id,
8346 role,
8347 tenant.as_deref(),
8348 &username,
8349 "drop",
8350 &q.name,
8351 );
8352 }
8353 QueryExpr::DropKv(q) => {
8354 return self.check_ddl_collection_privilege(
8355 &auth_store,
8356 &principal_id,
8357 role,
8358 tenant.as_deref(),
8359 &username,
8360 "drop",
8361 &q.name,
8362 );
8363 }
8364 QueryExpr::DropCollection(q) => {
8365 return self.check_ddl_collection_privilege(
8366 &auth_store,
8367 &principal_id,
8368 role,
8369 tenant.as_deref(),
8370 &username,
8371 "drop",
8372 &q.name,
8373 );
8374 }
8375 QueryExpr::Truncate(q) => {
8376 return self.check_ddl_collection_privilege(
8377 &auth_store,
8378 &principal_id,
8379 role,
8380 tenant.as_deref(),
8381 &username,
8382 "truncate",
8383 &q.name,
8384 );
8385 }
8386 QueryExpr::CreateTable(_)
8388 | QueryExpr::CreateCollection(_)
8389 | QueryExpr::CreateVector(_)
8390 | QueryExpr::AlterTable(_)
8391 | QueryExpr::CreateIndex(_)
8392 | QueryExpr::DropIndex(_)
8393 | QueryExpr::CreateSchema(_)
8394 | QueryExpr::DropSchema(_)
8395 | QueryExpr::CreateSequence(_)
8396 | QueryExpr::DropSequence(_)
8397 | QueryExpr::CreateView(_)
8398 | QueryExpr::DropView(_)
8399 | QueryExpr::RefreshMaterializedView(_)
8400 | QueryExpr::CreatePolicy(_)
8401 | QueryExpr::DropPolicy(_)
8402 | QueryExpr::CreateServer(_)
8403 | QueryExpr::DropServer(_)
8404 | QueryExpr::CreateForeignTable(_)
8405 | QueryExpr::DropForeignTable(_)
8406 | QueryExpr::CreateTimeSeries(_)
8407 | QueryExpr::DropTimeSeries(_)
8408 | QueryExpr::CreateQueue(_)
8409 | QueryExpr::AlterQueue(_)
8410 | QueryExpr::DropQueue(_)
8411 | QueryExpr::CreateTree(_)
8412 | QueryExpr::DropTree(_) => {
8413 return if role >= crate::auth::Role::Write {
8414 Ok(())
8415 } else {
8416 Err(format!(
8417 "principal=`{}` role=`{:?}` cannot issue DDL",
8418 username, role
8419 ))
8420 };
8421 }
8422 QueryExpr::CreateMigration(_) => {
8424 return if role >= crate::auth::Role::Write {
8425 Ok(())
8426 } else {
8427 Err(format!(
8428 "principal=`{}` role=`{:?}` cannot issue CREATE MIGRATION",
8429 username, role
8430 ))
8431 };
8432 }
8433 QueryExpr::ApplyMigration(_) | QueryExpr::RollbackMigration(_) => {
8435 return if role == crate::auth::Role::Admin {
8436 Ok(())
8437 } else {
8438 Err(format!(
8439 "principal=`{}` role=`{:?}` cannot issue APPLY/ROLLBACK MIGRATION",
8440 username, role
8441 ))
8442 };
8443 }
8444 QueryExpr::ExplainMigration(_) => return Ok(()),
8446 _ => return Ok(()),
8450 };
8451
8452 if auth_store.iam_authorization_enabled() {
8453 let iam_action = legacy_action_to_iam(action);
8454 let iam_resource = legacy_resource_to_iam(&resource, tenant.as_deref());
8455 let iam_ctx = runtime_iam_context(role, tenant.as_deref());
8456 if !auth_store.check_policy_authz(&principal_id, iam_action, &iam_resource, &iam_ctx) {
8457 return Err(format!(
8458 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM policy",
8459 username, iam_action, iam_resource.kind, iam_resource.name
8460 ));
8461 }
8462
8463 if let QueryExpr::Table(table) = expr {
8464 self.check_table_column_projection_privilege(
8465 &auth_store,
8466 &principal_id,
8467 &iam_ctx,
8468 table,
8469 )?;
8470 }
8471
8472 if let QueryExpr::Update(update) = expr {
8473 let columns = update_set_target_columns(update);
8474 if !columns.is_empty() {
8475 let request = column_access_request_for_table_update(&update.table, columns);
8476 let outcome =
8477 auth_store.check_column_projection_authz(&principal_id, &request, &iam_ctx);
8478 if let Some(denied) = outcome.first_denied_column() {
8479 return Err(format!(
8480 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM column policy",
8481 username, iam_action, denied.resource.kind, denied.resource.name
8482 ));
8483 }
8484 if !outcome.allowed() {
8485 return Err(format!(
8486 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM policy",
8487 username,
8488 iam_action,
8489 outcome.table_resource.kind,
8490 outcome.table_resource.name
8491 ));
8492 }
8493 }
8494 }
8495
8496 Ok(())
8497 } else {
8498 auth_store
8499 .check_grant(&ctx, action, &resource)
8500 .map_err(|e| e.to_string())
8501 }
8502 }
8503
8504 fn check_table_column_projection_privilege(
8505 &self,
8506 auth_store: &Arc<crate::auth::store::AuthStore>,
8507 principal: &crate::auth::UserId,
8508 ctx: &crate::auth::policies::EvalContext,
8509 table: &crate::storage::query::ast::TableQuery,
8510 ) -> Result<(), String> {
8511 use crate::auth::{ColumnAccessRequest, ColumnDecisionEffect};
8512
8513 let columns = requested_table_columns_for_policy(table);
8514 if columns.is_empty() {
8515 return Ok(());
8516 }
8517
8518 let request = ColumnAccessRequest::select(table.table.clone(), columns);
8519 let outcome = auth_store.check_column_projection_authz(principal, &request, ctx);
8520 if outcome.allowed() {
8521 return Ok(());
8522 }
8523
8524 if !matches!(
8525 outcome.table_decision,
8526 crate::auth::policies::Decision::Allow { .. }
8527 | crate::auth::policies::Decision::AdminBypass
8528 ) {
8529 return Err(format!(
8530 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM policy",
8531 principal, outcome.table_resource.kind, outcome.table_resource.name
8532 ));
8533 }
8534
8535 let denied = outcome
8536 .first_denied_column()
8537 .filter(|decision| decision.effective == ColumnDecisionEffect::Denied);
8538 match denied {
8539 Some(decision) => Err(format!(
8540 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM policy",
8541 principal, decision.resource.kind, decision.resource.name
8542 )),
8543 None => Ok(()),
8544 }
8545 }
8546
8547 fn check_graph_property_projection_privilege(
8548 &self,
8549 auth_store: &Arc<crate::auth::store::AuthStore>,
8550 principal: &crate::auth::UserId,
8551 role: crate::auth::Role,
8552 tenant: Option<&str>,
8553 query: &crate::storage::query::ast::GraphQuery,
8554 ) -> Result<(), String> {
8555 let columns = explicit_graph_projection_properties(query);
8556 if columns.is_empty() {
8557 return Ok(());
8558 }
8559 self.check_table_like_column_projection_privilege(
8560 auth_store, principal, role, tenant, "graph", &columns,
8561 )
8562 }
8563
8564 fn check_table_like_column_projection_privilege(
8565 &self,
8566 auth_store: &Arc<crate::auth::store::AuthStore>,
8567 principal: &crate::auth::UserId,
8568 role: crate::auth::Role,
8569 tenant: Option<&str>,
8570 table: &str,
8571 columns: &[String],
8572 ) -> Result<(), String> {
8573 let iam_ctx = runtime_iam_context(role, tenant);
8574 let request =
8575 crate::auth::ColumnAccessRequest::select(table.to_string(), columns.iter().cloned());
8576 let outcome = auth_store.check_column_projection_authz(principal, &request, &iam_ctx);
8577 if outcome.allowed() {
8578 return Ok(());
8579 }
8580 let denied = outcome
8581 .first_denied_column()
8582 .map(|d| d.resource.name.clone())
8583 .unwrap_or_else(|| format!("{table}.<unknown>"));
8584 Err(format!(
8585 "principal=`{}` action=`select` resource=`column:{}` denied by IAM policy",
8586 principal, denied
8587 ))
8588 }
8589
8590 fn check_policy_management_privilege(
8591 &self,
8592 auth_store: &Arc<crate::auth::store::AuthStore>,
8593 principal: &crate::auth::UserId,
8594 role: crate::auth::Role,
8595 tenant: Option<&str>,
8596 action: &str,
8597 resource_kind: &str,
8598 resource_name: &str,
8599 ) -> Result<(), String> {
8600 if !auth_store.iam_authorization_enabled() {
8601 return if role == crate::auth::Role::Admin {
8602 Ok(())
8603 } else {
8604 Err(format!(
8605 "principal=`{}` role=`{:?}` cannot issue ACL/auth DDL",
8606 principal, role
8607 ))
8608 };
8609 }
8610
8611 let mut resource = crate::auth::policies::ResourceRef::new(
8612 resource_kind.to_string(),
8613 resource_name.to_string(),
8614 );
8615 if let Some(t) = tenant {
8616 resource = resource.with_tenant(t.to_string());
8617 }
8618 let ctx = runtime_iam_context(role, tenant);
8619 if auth_store.check_policy_authz(principal, action, &resource, &ctx) {
8620 Ok(())
8621 } else {
8622 Err(format!(
8623 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM policy",
8624 principal, action, resource.kind, resource.name
8625 ))
8626 }
8627 }
8628
8629 fn check_ddl_collection_privilege(
8636 &self,
8637 auth_store: &Arc<crate::auth::store::AuthStore>,
8638 principal: &crate::auth::UserId,
8639 role: crate::auth::Role,
8640 tenant: Option<&str>,
8641 username: &str,
8642 action: &str,
8643 collection: &str,
8644 ) -> Result<(), String> {
8645 if role < crate::auth::Role::Write {
8646 let msg = format!(
8647 "principal=`{}` role=`{:?}` cannot issue DDL",
8648 username, role
8649 );
8650 self.inner.audit_log.record(
8651 action,
8652 username,
8653 collection,
8654 "denied",
8655 crate::json::Value::Null,
8656 );
8657 return Err(msg);
8658 }
8659
8660 if !auth_store.iam_authorization_enabled() {
8661 self.inner.audit_log.record(
8662 action,
8663 username,
8664 collection,
8665 "ok",
8666 crate::json::Value::Null,
8667 );
8668 return Ok(());
8669 }
8670
8671 let resource_name = collection.to_string();
8672 let mut resource = crate::auth::policies::ResourceRef::new(
8673 "collection".to_string(),
8674 resource_name.clone(),
8675 );
8676 if let Some(t) = tenant {
8677 resource = resource.with_tenant(t.to_string());
8678 }
8679 let ctx = runtime_iam_context(role, tenant);
8680 if auth_store.check_policy_authz(principal, action, &resource, &ctx) {
8681 self.inner.audit_log.record(
8682 action,
8683 username,
8684 &resource_name,
8685 "ok",
8686 crate::json::Value::Null,
8687 );
8688 Ok(())
8689 } else {
8690 self.inner.audit_log.record(
8691 action,
8692 username,
8693 &resource_name,
8694 "denied",
8695 crate::json::Value::Null,
8696 );
8697 Err(format!(
8698 "principal=`{}` action=`{}` resource=`collection:{}` denied by IAM policy",
8699 username, action, resource_name
8700 ))
8701 }
8702 }
8703
8704 fn execute_grant_statement(
8706 &self,
8707 query: &str,
8708 stmt: &crate::storage::query::ast::GrantStmt,
8709 ) -> RedDBResult<RuntimeQueryResult> {
8710 use crate::auth::privileges::{Action, GrantPrincipal, Resource};
8711 use crate::auth::UserId;
8712 use crate::storage::query::ast::{GrantObjectKind, GrantPrincipalRef};
8713
8714 let auth_store = self
8715 .inner
8716 .auth_store
8717 .read()
8718 .clone()
8719 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
8720
8721 let (gname, grole) = current_auth_identity().ok_or_else(|| {
8723 RedDBError::Query("GRANT requires an authenticated principal".to_string())
8724 })?;
8725 let granter = UserId::from_parts(current_tenant().as_deref(), &gname);
8726 let granter_role = grole;
8727
8728 let mut actions: Vec<Action> = Vec::new();
8730 if stmt.all {
8731 actions.push(Action::All);
8732 } else {
8733 for kw in &stmt.actions {
8734 let a = Action::from_keyword(kw).ok_or_else(|| {
8735 RedDBError::Query(format!("unknown privilege keyword `{}`", kw))
8736 })?;
8737 actions.push(a);
8738 }
8739 }
8740
8741 let mut applied = 0usize;
8743 for obj in &stmt.objects {
8744 let resource = match stmt.object_kind {
8745 GrantObjectKind::Table => Resource::Table {
8746 schema: obj.schema.clone(),
8747 table: obj.name.clone(),
8748 },
8749 GrantObjectKind::Schema => Resource::Schema(obj.name.clone()),
8750 GrantObjectKind::Database => Resource::Database,
8751 GrantObjectKind::Function => Resource::Function {
8752 schema: obj.schema.clone(),
8753 name: obj.name.clone(),
8754 },
8755 };
8756 for principal in &stmt.principals {
8757 let p = match principal {
8758 GrantPrincipalRef::Public => GrantPrincipal::Public,
8759 GrantPrincipalRef::Group(g) => GrantPrincipal::Group(g.clone()),
8760 GrantPrincipalRef::User { tenant, name } => {
8761 GrantPrincipal::User(UserId::from_parts(tenant.as_deref(), name))
8762 }
8763 };
8764 let tenant = granter.tenant.clone();
8767 auth_store
8768 .grant(
8769 &granter,
8770 granter_role,
8771 p.clone(),
8772 resource.clone(),
8773 actions.clone(),
8774 stmt.with_grant_option,
8775 tenant.clone(),
8776 )
8777 .map_err(|e| RedDBError::Query(e.to_string()))?;
8778
8779 if let Some(policy) =
8783 grant_to_iam_policy(&p, &resource, &actions, tenant.as_deref())
8784 {
8785 let pid = policy.id.clone();
8786 auth_store
8787 .put_policy_internal(policy)
8788 .map_err(|e| RedDBError::Query(e.to_string()))?;
8789 let attachment = match &p {
8790 GrantPrincipal::User(uid) => {
8791 crate::auth::store::PrincipalRef::User(uid.clone())
8792 }
8793 GrantPrincipal::Group(group) => {
8794 crate::auth::store::PrincipalRef::Group(group.clone())
8795 }
8796 GrantPrincipal::Public => crate::auth::store::PrincipalRef::Group(
8797 crate::auth::store::PUBLIC_IAM_GROUP.to_string(),
8798 ),
8799 };
8800 auth_store
8801 .attach_policy(attachment, &pid)
8802 .map_err(|e| RedDBError::Query(e.to_string()))?;
8803 }
8804 applied += 1;
8805 tracing::info!(
8806 target: "audit",
8807 principal = %granter,
8808 action = "grant",
8809 "GRANT applied"
8810 );
8811 }
8812 }
8813
8814 self.invalidate_result_cache();
8815 Ok(RuntimeQueryResult::ok_message(
8816 query.to_string(),
8817 &format!("GRANT applied to {} target(s)", applied),
8818 "grant",
8819 ))
8820 }
8821
8822 fn execute_revoke_statement(
8824 &self,
8825 query: &str,
8826 stmt: &crate::storage::query::ast::RevokeStmt,
8827 ) -> RedDBResult<RuntimeQueryResult> {
8828 use crate::auth::privileges::{Action, GrantPrincipal, Resource};
8829 use crate::auth::UserId;
8830 use crate::storage::query::ast::{GrantObjectKind, GrantPrincipalRef};
8831
8832 let auth_store = self
8833 .inner
8834 .auth_store
8835 .read()
8836 .clone()
8837 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
8838
8839 let (_gname, grole) = current_auth_identity().ok_or_else(|| {
8840 RedDBError::Query("REVOKE requires an authenticated principal".to_string())
8841 })?;
8842 let granter_role = grole;
8843
8844 let actions: Vec<Action> = if stmt.all {
8845 vec![Action::All]
8846 } else {
8847 stmt.actions
8848 .iter()
8849 .map(|kw| Action::from_keyword(kw).unwrap_or(Action::Select))
8850 .collect()
8851 };
8852
8853 let mut total_removed = 0usize;
8854 for obj in &stmt.objects {
8855 let resource = match stmt.object_kind {
8856 GrantObjectKind::Table => Resource::Table {
8857 schema: obj.schema.clone(),
8858 table: obj.name.clone(),
8859 },
8860 GrantObjectKind::Schema => Resource::Schema(obj.name.clone()),
8861 GrantObjectKind::Database => Resource::Database,
8862 GrantObjectKind::Function => Resource::Function {
8863 schema: obj.schema.clone(),
8864 name: obj.name.clone(),
8865 },
8866 };
8867 for principal in &stmt.principals {
8868 let p = match principal {
8869 GrantPrincipalRef::Public => GrantPrincipal::Public,
8870 GrantPrincipalRef::Group(g) => GrantPrincipal::Group(g.clone()),
8871 GrantPrincipalRef::User { tenant, name } => {
8872 GrantPrincipal::User(UserId::from_parts(tenant.as_deref(), name))
8873 }
8874 };
8875 let removed = auth_store
8876 .revoke(granter_role, &p, &resource, &actions)
8877 .map_err(|e| RedDBError::Query(e.to_string()))?;
8878 let _removed_policies =
8879 auth_store.delete_synthetic_grant_policies(&p, &resource, &actions);
8880 total_removed += removed;
8881 }
8882 }
8883
8884 self.invalidate_result_cache();
8885 Ok(RuntimeQueryResult::ok_message(
8886 query.to_string(),
8887 &format!("REVOKE removed {} grant(s)", total_removed),
8888 "revoke",
8889 ))
8890 }
8891
8892 fn execute_alter_user_statement(
8894 &self,
8895 query: &str,
8896 stmt: &crate::storage::query::ast::AlterUserStmt,
8897 ) -> RedDBResult<RuntimeQueryResult> {
8898 use crate::auth::privileges::UserAttributes;
8899 use crate::auth::UserId;
8900 use crate::storage::query::ast::AlterUserAttribute;
8901
8902 let auth_store = self
8903 .inner
8904 .auth_store
8905 .read()
8906 .clone()
8907 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
8908
8909 let (_gname, grole) = current_auth_identity().ok_or_else(|| {
8910 RedDBError::Query("ALTER USER requires an authenticated principal".to_string())
8911 })?;
8912 if grole != crate::auth::Role::Admin {
8913 return Err(RedDBError::Query(
8914 "ALTER USER requires Admin role".to_string(),
8915 ));
8916 }
8917
8918 let target = UserId::from_parts(stmt.tenant.as_deref(), &stmt.username);
8919
8920 let mut attrs = auth_store.user_attributes(&target);
8923 let mut enable_change: Option<bool> = None;
8924
8925 for a in &stmt.attributes {
8926 match a {
8927 AlterUserAttribute::ValidUntil(ts) => {
8928 let ms = parse_timestamp_to_ms(ts).ok_or_else(|| {
8932 RedDBError::Query(format!("invalid VALID UNTIL timestamp `{ts}`"))
8933 })?;
8934 attrs.valid_until = Some(ms);
8935 }
8936 AlterUserAttribute::ConnectionLimit(n) => {
8937 if *n < 0 {
8938 return Err(RedDBError::Query(
8939 "CONNECTION LIMIT must be non-negative".to_string(),
8940 ));
8941 }
8942 attrs.connection_limit = Some(*n as u32);
8943 }
8944 AlterUserAttribute::SetSearchPath(p) => {
8945 attrs.search_path = Some(p.clone());
8946 }
8947 AlterUserAttribute::AddGroup(g) => {
8948 if !attrs.groups.iter().any(|existing| existing == g) {
8949 attrs.groups.push(g.clone());
8950 attrs.groups.sort();
8951 }
8952 }
8953 AlterUserAttribute::DropGroup(g) => {
8954 attrs.groups.retain(|existing| existing != g);
8955 }
8956 AlterUserAttribute::Enable => enable_change = Some(true),
8957 AlterUserAttribute::Disable => enable_change = Some(false),
8958 AlterUserAttribute::Password(_) => {
8959 }
8963 }
8964 }
8965
8966 auth_store
8967 .set_user_attributes(&target, attrs)
8968 .map_err(|e| RedDBError::Query(e.to_string()))?;
8969 if let Some(en) = enable_change {
8970 auth_store
8971 .set_user_enabled(&target, en)
8972 .map_err(|e| RedDBError::Query(e.to_string()))?;
8973 }
8974 self.invalidate_result_cache();
8975 tracing::info!(
8976 target: "audit",
8977 principal = %target,
8978 action = "alter_user",
8979 "ALTER USER applied"
8980 );
8981
8982 Ok(RuntimeQueryResult::ok_message(
8983 query.to_string(),
8984 &format!("ALTER USER {} applied", target),
8985 "alter_user",
8986 ))
8987 }
8988
8989 fn execute_create_iam_policy(
8994 &self,
8995 query: &str,
8996 id: &str,
8997 json: &str,
8998 ) -> RedDBResult<RuntimeQueryResult> {
8999 use crate::auth::policies::Policy;
9000
9001 let auth_store = self
9002 .inner
9003 .auth_store
9004 .read()
9005 .clone()
9006 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9007
9008 let mut policy = Policy::from_json_str(json)
9013 .map_err(|e| RedDBError::Query(format!("policy parse: {e}")))?;
9014 if policy.id != id {
9015 policy.id = id.to_string();
9016 }
9017 let pid = policy.id.clone();
9018 auth_store
9019 .put_policy(policy)
9020 .map_err(|e| RedDBError::Query(e.to_string()))?;
9021
9022 let principal = current_auth_identity()
9023 .map(|(u, _)| u)
9024 .unwrap_or_else(|| "anonymous".into());
9025 tracing::info!(
9026 target: "audit",
9027 principal = %principal,
9028 action = "iam:policy.put",
9029 matched_policy_id = %pid,
9030 "CREATE POLICY applied"
9031 );
9032 self.inner.audit_log.record(
9033 "iam/policy.put",
9034 &principal,
9035 &pid,
9036 "ok",
9037 crate::json::Value::Null,
9038 );
9039
9040 self.invalidate_result_cache();
9041 Ok(RuntimeQueryResult::ok_message(
9042 query.to_string(),
9043 &format!("policy `{pid}` stored"),
9044 "create_iam_policy",
9045 ))
9046 }
9047
9048 fn execute_drop_iam_policy(&self, query: &str, id: &str) -> RedDBResult<RuntimeQueryResult> {
9049 let auth_store = self
9050 .inner
9051 .auth_store
9052 .read()
9053 .clone()
9054 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9055 auth_store
9056 .delete_policy(id)
9057 .map_err(|e| RedDBError::Query(e.to_string()))?;
9058
9059 let principal = current_auth_identity()
9060 .map(|(u, _)| u)
9061 .unwrap_or_else(|| "anonymous".into());
9062 tracing::info!(
9063 target: "audit",
9064 principal = %principal,
9065 action = "iam:policy.drop",
9066 matched_policy_id = %id,
9067 "DROP POLICY applied"
9068 );
9069 self.inner.audit_log.record(
9070 "iam/policy.drop",
9071 &principal,
9072 id,
9073 "ok",
9074 crate::json::Value::Null,
9075 );
9076
9077 self.invalidate_result_cache();
9078 Ok(RuntimeQueryResult::ok_message(
9079 query.to_string(),
9080 &format!("policy `{id}` dropped"),
9081 "drop_iam_policy",
9082 ))
9083 }
9084
9085 fn execute_attach_policy(
9086 &self,
9087 query: &str,
9088 policy_id: &str,
9089 principal: &crate::storage::query::ast::PolicyPrincipalRef,
9090 ) -> RedDBResult<RuntimeQueryResult> {
9091 use crate::auth::store::PrincipalRef;
9092 use crate::auth::UserId;
9093 use crate::storage::query::ast::PolicyPrincipalRef;
9094
9095 let auth_store = self
9096 .inner
9097 .auth_store
9098 .read()
9099 .clone()
9100 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9101 let p = match principal {
9102 PolicyPrincipalRef::User(u) => {
9103 PrincipalRef::User(UserId::from_parts(u.tenant.as_deref(), &u.username))
9104 }
9105 PolicyPrincipalRef::Group(g) => PrincipalRef::Group(g.clone()),
9106 };
9107 let pretty_target = principal_label(principal);
9108 auth_store
9109 .attach_policy(p, policy_id)
9110 .map_err(|e| RedDBError::Query(e.to_string()))?;
9111
9112 let principal_str = current_auth_identity()
9113 .map(|(u, _)| u)
9114 .unwrap_or_else(|| "anonymous".into());
9115 tracing::info!(
9116 target: "audit",
9117 principal = %principal_str,
9118 action = "iam:policy.attach",
9119 matched_policy_id = %policy_id,
9120 target = %pretty_target,
9121 "ATTACH POLICY applied"
9122 );
9123 self.inner.audit_log.record(
9124 "iam/policy.attach",
9125 &principal_str,
9126 &pretty_target,
9127 "ok",
9128 crate::json::Value::Null,
9129 );
9130
9131 self.invalidate_result_cache();
9132 Ok(RuntimeQueryResult::ok_message(
9133 query.to_string(),
9134 &format!("policy `{policy_id}` attached to {pretty_target}"),
9135 "attach_policy",
9136 ))
9137 }
9138
9139 fn execute_detach_policy(
9140 &self,
9141 query: &str,
9142 policy_id: &str,
9143 principal: &crate::storage::query::ast::PolicyPrincipalRef,
9144 ) -> RedDBResult<RuntimeQueryResult> {
9145 use crate::auth::store::PrincipalRef;
9146 use crate::auth::UserId;
9147 use crate::storage::query::ast::PolicyPrincipalRef;
9148
9149 let auth_store = self
9150 .inner
9151 .auth_store
9152 .read()
9153 .clone()
9154 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9155 let p = match principal {
9156 PolicyPrincipalRef::User(u) => {
9157 PrincipalRef::User(UserId::from_parts(u.tenant.as_deref(), &u.username))
9158 }
9159 PolicyPrincipalRef::Group(g) => PrincipalRef::Group(g.clone()),
9160 };
9161 let pretty_target = principal_label(principal);
9162 auth_store
9163 .detach_policy(p, policy_id)
9164 .map_err(|e| RedDBError::Query(e.to_string()))?;
9165
9166 let principal_str = current_auth_identity()
9167 .map(|(u, _)| u)
9168 .unwrap_or_else(|| "anonymous".into());
9169 tracing::info!(
9170 target: "audit",
9171 principal = %principal_str,
9172 action = "iam:policy.detach",
9173 matched_policy_id = %policy_id,
9174 target = %pretty_target,
9175 "DETACH POLICY applied"
9176 );
9177 self.inner.audit_log.record(
9178 "iam/policy.detach",
9179 &principal_str,
9180 &pretty_target,
9181 "ok",
9182 crate::json::Value::Null,
9183 );
9184
9185 self.invalidate_result_cache();
9186 Ok(RuntimeQueryResult::ok_message(
9187 query.to_string(),
9188 &format!("policy `{policy_id}` detached from {pretty_target}"),
9189 "detach_policy",
9190 ))
9191 }
9192
9193 fn execute_show_policies(
9194 &self,
9195 query: &str,
9196 filter: Option<&crate::storage::query::ast::PolicyPrincipalRef>,
9197 ) -> RedDBResult<RuntimeQueryResult> {
9198 use crate::auth::UserId;
9199 use crate::storage::query::ast::PolicyPrincipalRef;
9200 use crate::storage::query::unified::UnifiedRecord;
9201 use crate::storage::schema::Value as SchemaValue;
9202 use std::sync::Arc;
9203
9204 let auth_store = self
9205 .inner
9206 .auth_store
9207 .read()
9208 .clone()
9209 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9210
9211 let pols = match filter {
9212 None => auth_store.list_policies(),
9213 Some(PolicyPrincipalRef::User(u)) => {
9214 let id = UserId::from_parts(u.tenant.as_deref(), &u.username);
9215 auth_store.effective_policies(&id)
9216 }
9217 Some(PolicyPrincipalRef::Group(g)) => auth_store.group_policies(g),
9218 };
9219
9220 let mut records = Vec::with_capacity(pols.len());
9221 for p in pols.iter() {
9222 let mut rec = UnifiedRecord::default();
9223 rec.set_arc(Arc::from("id"), SchemaValue::text(p.id.clone()));
9224 rec.set_arc(
9225 Arc::from("statements"),
9226 SchemaValue::Integer(p.statements.len() as i64),
9227 );
9228 rec.set_arc(
9229 Arc::from("tenant"),
9230 p.tenant
9231 .as_deref()
9232 .map(|t| SchemaValue::text(t.to_string()))
9233 .unwrap_or(SchemaValue::Null),
9234 );
9235 rec.set_arc(Arc::from("json"), SchemaValue::text(p.to_json_string()));
9236 records.push(rec);
9237 }
9238 let mut result = crate::storage::query::unified::UnifiedResult::empty();
9239 result.records = records;
9240 Ok(RuntimeQueryResult {
9241 query: query.to_string(),
9242 mode: crate::storage::query::modes::QueryMode::Sql,
9243 statement: "show_policies",
9244 engine: "iam-policies",
9245 result,
9246 affected_rows: 0,
9247 statement_type: "select",
9248 })
9249 }
9250
9251 fn execute_show_effective_permissions(
9252 &self,
9253 query: &str,
9254 user: &crate::storage::query::ast::PolicyUserRef,
9255 resource: Option<&crate::storage::query::ast::PolicyResourceRef>,
9256 ) -> RedDBResult<RuntimeQueryResult> {
9257 use crate::auth::UserId;
9258 use crate::storage::query::unified::UnifiedRecord;
9259 use crate::storage::schema::Value as SchemaValue;
9260 use std::sync::Arc;
9261
9262 let auth_store = self
9263 .inner
9264 .auth_store
9265 .read()
9266 .clone()
9267 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9268 let id = UserId::from_parts(user.tenant.as_deref(), &user.username);
9269 let pols = auth_store.effective_policies(&id);
9270
9271 let mut records = Vec::new();
9274 for p in pols.iter() {
9275 for (idx, st) in p.statements.iter().enumerate() {
9276 if let Some(_r) = resource {
9277 }
9281 let mut rec = UnifiedRecord::default();
9282 rec.set_arc(Arc::from("policy_id"), SchemaValue::text(p.id.clone()));
9283 rec.set_arc(
9284 Arc::from("statement_index"),
9285 SchemaValue::Integer(idx as i64),
9286 );
9287 rec.set_arc(
9288 Arc::from("sid"),
9289 st.sid
9290 .as_deref()
9291 .map(|s| SchemaValue::text(s.to_string()))
9292 .unwrap_or(SchemaValue::Null),
9293 );
9294 rec.set_arc(
9295 Arc::from("effect"),
9296 SchemaValue::text(match st.effect {
9297 crate::auth::policies::Effect::Allow => "allow",
9298 crate::auth::policies::Effect::Deny => "deny",
9299 }),
9300 );
9301 rec.set_arc(
9302 Arc::from("actions"),
9303 SchemaValue::Integer(st.actions.len() as i64),
9304 );
9305 rec.set_arc(
9306 Arc::from("resources"),
9307 SchemaValue::Integer(st.resources.len() as i64),
9308 );
9309 records.push(rec);
9310 }
9311 }
9312 let mut result = crate::storage::query::unified::UnifiedResult::empty();
9313 result.records = records;
9314 Ok(RuntimeQueryResult {
9315 query: query.to_string(),
9316 mode: crate::storage::query::modes::QueryMode::Sql,
9317 statement: "show_effective_permissions",
9318 engine: "iam-policies",
9319 result,
9320 affected_rows: 0,
9321 statement_type: "select",
9322 })
9323 }
9324
9325 fn execute_simulate_policy(
9326 &self,
9327 query: &str,
9328 user: &crate::storage::query::ast::PolicyUserRef,
9329 action: &str,
9330 resource: &crate::storage::query::ast::PolicyResourceRef,
9331 ) -> RedDBResult<RuntimeQueryResult> {
9332 use crate::auth::policies::ResourceRef;
9333 use crate::auth::store::SimCtx;
9334 use crate::auth::UserId;
9335 use crate::storage::query::unified::UnifiedRecord;
9336 use crate::storage::schema::Value as SchemaValue;
9337 use std::sync::Arc;
9338
9339 let auth_store = self
9340 .inner
9341 .auth_store
9342 .read()
9343 .clone()
9344 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9345 let id = UserId::from_parts(user.tenant.as_deref(), &user.username);
9346 let r = ResourceRef::new(resource.kind.clone(), resource.name.clone());
9347 let outcome = auth_store.simulate(&id, action, &r, SimCtx::default());
9348
9349 let principal_str = current_auth_identity()
9350 .map(|(u, _)| u)
9351 .unwrap_or_else(|| "anonymous".into());
9352 let (decision_str, matched_pid, matched_sid) = decision_to_strings(&outcome.decision);
9353 tracing::info!(
9354 target: "audit",
9355 principal = %principal_str,
9356 action = "iam:policy.simulate",
9357 decision = %decision_str,
9358 matched_policy_id = ?matched_pid,
9359 matched_sid = ?matched_sid,
9360 "SIMULATE issued"
9361 );
9362 self.inner.audit_log.record(
9363 "iam/policy.simulate",
9364 &principal_str,
9365 &id.to_string(),
9366 "ok",
9367 crate::json::Value::Null,
9368 );
9369
9370 let mut rec = UnifiedRecord::default();
9371 rec.set_arc(Arc::from("decision"), SchemaValue::text(decision_str));
9372 rec.set_arc(
9373 Arc::from("matched_policy_id"),
9374 matched_pid
9375 .map(SchemaValue::text)
9376 .unwrap_or(SchemaValue::Null),
9377 );
9378 rec.set_arc(
9379 Arc::from("matched_sid"),
9380 matched_sid
9381 .map(SchemaValue::text)
9382 .unwrap_or(SchemaValue::Null),
9383 );
9384 rec.set_arc(Arc::from("reason"), SchemaValue::text(outcome.reason));
9385 rec.set_arc(
9386 Arc::from("trail_len"),
9387 SchemaValue::Integer(outcome.trail.len() as i64),
9388 );
9389 let mut result = crate::storage::query::unified::UnifiedResult::empty();
9390 result.records = vec![rec];
9391 Ok(RuntimeQueryResult {
9392 query: query.to_string(),
9393 mode: crate::storage::query::modes::QueryMode::Sql,
9394 statement: "simulate_policy",
9395 engine: "iam-policies",
9396 result,
9397 affected_rows: 0,
9398 statement_type: "select",
9399 })
9400 }
9401}
9402
9403fn grant_to_iam_policy(
9408 principal: &crate::auth::privileges::GrantPrincipal,
9409 resource: &crate::auth::privileges::Resource,
9410 actions: &[crate::auth::privileges::Action],
9411 tenant: Option<&str>,
9412) -> Option<crate::auth::policies::Policy> {
9413 use crate::auth::policies::{
9414 compile_action, ActionPattern, Effect, Policy, ResourcePattern, Statement,
9415 };
9416 use crate::auth::privileges::{Action, GrantPrincipal, Resource};
9417
9418 if matches!(principal, GrantPrincipal::Group(_)) {
9419 return None;
9420 }
9421
9422 let now = crate::auth::now_ms();
9423 let id = format!("_grant_{:x}_{:x}", now, std::process::id());
9424
9425 let resource_str = match resource {
9426 Resource::Database => "table:*".to_string(),
9427 Resource::Schema(s) => format!("table:{s}.*"),
9428 Resource::Table { schema, table } => match schema {
9429 Some(s) => format!("table:{s}.{table}"),
9430 None => format!("table:{table}"),
9431 },
9432 Resource::Function { schema, name } => match schema {
9433 Some(s) => format!("function:{s}.{name}"),
9434 None => format!("function:{name}"),
9435 },
9436 };
9437
9438 let action_patterns: Vec<ActionPattern> = if actions.contains(&Action::All) {
9442 vec![ActionPattern::Wildcard]
9443 } else {
9444 actions
9445 .iter()
9446 .map(|a| compile_action(&a.as_str().to_ascii_lowercase()))
9447 .collect()
9448 };
9449 if action_patterns.is_empty() {
9450 return None;
9451 }
9452
9453 let resource_patterns = if resource_str == "*" {
9458 vec![ResourcePattern::Wildcard]
9459 } else if resource_str.contains('*') {
9460 vec![ResourcePattern::Glob(resource_str.clone())]
9461 } else if let Some((kind, name)) = resource_str.split_once(':') {
9462 vec![ResourcePattern::Exact {
9463 kind: kind.to_string(),
9464 name: name.to_string(),
9465 }]
9466 } else {
9467 vec![ResourcePattern::Wildcard]
9468 };
9469
9470 let policy = Policy {
9471 id,
9472 version: 1,
9473 tenant: tenant.map(|t| t.to_string()),
9474 created_at: now,
9475 updated_at: now,
9476 statements: vec![Statement {
9477 sid: None,
9478 effect: Effect::Allow,
9479 actions: action_patterns,
9480 resources: resource_patterns,
9481 condition: None,
9482 }],
9483 };
9484 if policy.validate().is_err() {
9485 return None;
9486 }
9487 Some(policy)
9488}
9489
9490fn legacy_action_to_iam(action: crate::auth::privileges::Action) -> &'static str {
9491 use crate::auth::privileges::Action;
9492 match action {
9493 Action::Select => "select",
9494 Action::Insert => "insert",
9495 Action::Update => "update",
9496 Action::Delete => "delete",
9497 Action::Truncate => "truncate",
9498 Action::References => "references",
9499 Action::Execute => "execute",
9500 Action::Usage => "usage",
9501 Action::All => "*",
9502 }
9503}
9504
9505fn update_set_target_columns(query: &crate::storage::query::ast::UpdateQuery) -> Vec<String> {
9506 let mut columns = Vec::new();
9507 for (column, _) in &query.assignment_exprs {
9508 if !columns.iter().any(|seen| seen == column) {
9509 columns.push(column.clone());
9510 }
9511 }
9512 columns
9513}
9514
9515fn column_access_request_for_table_update(
9516 table_name: &str,
9517 columns: Vec<String>,
9518) -> crate::auth::ColumnAccessRequest {
9519 match table_name.split_once('.') {
9520 Some((schema, table)) => {
9521 crate::auth::ColumnAccessRequest::update(table.to_string(), columns)
9522 .with_schema(schema.to_string())
9523 }
9524 None => crate::auth::ColumnAccessRequest::update(table_name.to_string(), columns),
9525 }
9526}
9527
9528fn requested_table_columns_for_policy(
9529 table: &crate::storage::query::ast::TableQuery,
9530) -> Vec<String> {
9531 use crate::storage::query::sql_lowering::{
9532 effective_table_filter, effective_table_group_by_exprs, effective_table_having_filter,
9533 effective_table_projections,
9534 };
9535
9536 let table_name = table.table.as_str();
9537 let table_alias = table.alias.as_deref();
9538 let mut columns = std::collections::BTreeSet::new();
9539
9540 for projection in effective_table_projections(table) {
9541 collect_projection_columns(&projection, table_name, table_alias, &mut columns);
9542 }
9543 if let Some(filter) = effective_table_filter(table) {
9544 collect_filter_columns(&filter, table_name, table_alias, &mut columns);
9545 }
9546 for expr in effective_table_group_by_exprs(table) {
9547 collect_expr_columns(&expr, table_name, table_alias, &mut columns);
9548 }
9549 if let Some(filter) = effective_table_having_filter(table) {
9550 collect_filter_columns(&filter, table_name, table_alias, &mut columns);
9551 }
9552 for order in &table.order_by {
9553 if let Some(expr) = order.expr.as_ref() {
9554 collect_expr_columns(expr, table_name, table_alias, &mut columns);
9555 } else {
9556 collect_field_ref_column(&order.field, table_name, table_alias, &mut columns);
9557 }
9558 }
9559
9560 columns.into_iter().collect()
9561}
9562
9563fn collect_projection_columns(
9564 projection: &crate::storage::query::ast::Projection,
9565 table_name: &str,
9566 table_alias: Option<&str>,
9567 columns: &mut std::collections::BTreeSet<String>,
9568) {
9569 use crate::storage::query::ast::Projection;
9570 match projection {
9571 Projection::All => {
9572 columns.insert("*".to_string());
9573 }
9574 Projection::Column(column) | Projection::Alias(column, _) => {
9575 if column != "*" {
9576 columns.insert(column.clone());
9577 }
9578 }
9579 Projection::Function(_, args) => {
9580 for arg in args {
9581 collect_projection_columns(arg, table_name, table_alias, columns);
9582 }
9583 }
9584 Projection::Expression(filter, _) => {
9585 collect_filter_columns(filter, table_name, table_alias, columns);
9586 }
9587 Projection::Field(field, _) => {
9588 collect_field_ref_column(field, table_name, table_alias, columns);
9589 }
9590 }
9591}
9592
9593fn collect_filter_columns(
9594 filter: &crate::storage::query::ast::Filter,
9595 table_name: &str,
9596 table_alias: Option<&str>,
9597 columns: &mut std::collections::BTreeSet<String>,
9598) {
9599 use crate::storage::query::ast::Filter;
9600 match filter {
9601 Filter::Compare { field, .. }
9602 | Filter::IsNull(field)
9603 | Filter::IsNotNull(field)
9604 | Filter::In { field, .. }
9605 | Filter::Between { field, .. }
9606 | Filter::Like { field, .. }
9607 | Filter::StartsWith { field, .. }
9608 | Filter::EndsWith { field, .. }
9609 | Filter::Contains { field, .. } => {
9610 collect_field_ref_column(field, table_name, table_alias, columns);
9611 }
9612 Filter::CompareFields { left, right, .. } => {
9613 collect_field_ref_column(left, table_name, table_alias, columns);
9614 collect_field_ref_column(right, table_name, table_alias, columns);
9615 }
9616 Filter::CompareExpr { lhs, rhs, .. } => {
9617 collect_expr_columns(lhs, table_name, table_alias, columns);
9618 collect_expr_columns(rhs, table_name, table_alias, columns);
9619 }
9620 Filter::And(left, right) | Filter::Or(left, right) => {
9621 collect_filter_columns(left, table_name, table_alias, columns);
9622 collect_filter_columns(right, table_name, table_alias, columns);
9623 }
9624 Filter::Not(inner) => collect_filter_columns(inner, table_name, table_alias, columns),
9625 }
9626}
9627
9628fn collect_expr_columns(
9629 expr: &crate::storage::query::ast::Expr,
9630 table_name: &str,
9631 table_alias: Option<&str>,
9632 columns: &mut std::collections::BTreeSet<String>,
9633) {
9634 use crate::storage::query::ast::Expr;
9635 match expr {
9636 Expr::Column { field, .. } => {
9637 collect_field_ref_column(field, table_name, table_alias, columns);
9638 }
9639 Expr::Literal { .. } | Expr::Parameter { .. } => {}
9640 Expr::UnaryOp { operand, .. } | Expr::Cast { inner: operand, .. } => {
9641 collect_expr_columns(operand, table_name, table_alias, columns);
9642 }
9643 Expr::BinaryOp { lhs, rhs, .. } => {
9644 collect_expr_columns(lhs, table_name, table_alias, columns);
9645 collect_expr_columns(rhs, table_name, table_alias, columns);
9646 }
9647 Expr::FunctionCall { args, .. } => {
9648 for arg in args {
9649 collect_expr_columns(arg, table_name, table_alias, columns);
9650 }
9651 }
9652 Expr::Case {
9653 branches, else_, ..
9654 } => {
9655 for (condition, value) in branches {
9656 collect_expr_columns(condition, table_name, table_alias, columns);
9657 collect_expr_columns(value, table_name, table_alias, columns);
9658 }
9659 if let Some(value) = else_ {
9660 collect_expr_columns(value, table_name, table_alias, columns);
9661 }
9662 }
9663 Expr::IsNull { operand, .. } => {
9664 collect_expr_columns(operand, table_name, table_alias, columns);
9665 }
9666 Expr::InList { target, values, .. } => {
9667 collect_expr_columns(target, table_name, table_alias, columns);
9668 for value in values {
9669 collect_expr_columns(value, table_name, table_alias, columns);
9670 }
9671 }
9672 Expr::Between {
9673 target, low, high, ..
9674 } => {
9675 collect_expr_columns(target, table_name, table_alias, columns);
9676 collect_expr_columns(low, table_name, table_alias, columns);
9677 collect_expr_columns(high, table_name, table_alias, columns);
9678 }
9679 Expr::Subquery { .. } => {}
9680 }
9681}
9682
9683fn collect_field_ref_column(
9684 field: &crate::storage::query::ast::FieldRef,
9685 table_name: &str,
9686 table_alias: Option<&str>,
9687 columns: &mut std::collections::BTreeSet<String>,
9688) {
9689 if let Some(column) = policy_column_name_from_field_ref(field, table_name, table_alias) {
9690 if column != "*" {
9691 columns.insert(column);
9692 }
9693 }
9694}
9695
9696fn policy_column_name_from_field_ref(
9697 field: &crate::storage::query::ast::FieldRef,
9698 table_name: &str,
9699 table_alias: Option<&str>,
9700) -> Option<String> {
9701 match field {
9702 crate::storage::query::ast::FieldRef::TableColumn { table, column } => {
9703 if column == "*" {
9704 return Some("*".to_string());
9705 }
9706 if table.is_empty() || table == table_name || Some(table.as_str()) == table_alias {
9707 Some(column.clone())
9708 } else {
9709 Some(format!("{table}.{column}"))
9710 }
9711 }
9712 _ => None,
9713 }
9714}
9715
9716fn legacy_resource_to_iam(
9717 resource: &crate::auth::privileges::Resource,
9718 tenant: Option<&str>,
9719) -> crate::auth::policies::ResourceRef {
9720 use crate::auth::privileges::Resource;
9721
9722 let (kind, name) = match resource {
9723 Resource::Database => ("database".to_string(), "*".to_string()),
9724 Resource::Schema(s) => ("schema".to_string(), format!("{s}.*")),
9725 Resource::Table { schema, table } => (
9726 "table".to_string(),
9727 match schema {
9728 Some(s) => format!("{s}.{table}"),
9729 None => table.clone(),
9730 },
9731 ),
9732 Resource::Function { schema, name } => (
9733 "function".to_string(),
9734 match schema {
9735 Some(s) => format!("{s}.{name}"),
9736 None => name.clone(),
9737 },
9738 ),
9739 };
9740
9741 let mut out = crate::auth::policies::ResourceRef::new(kind, name);
9742 if let Some(t) = tenant {
9743 out = out.with_tenant(t.to_string());
9744 }
9745 out
9746}
9747
9748#[derive(Debug)]
9749struct JoinTableSide {
9750 table: String,
9751 alias: String,
9752}
9753
9754fn table_side_context(expr: &QueryExpr) -> Option<JoinTableSide> {
9755 match expr {
9756 QueryExpr::Table(table) => Some(JoinTableSide {
9757 table: table.table.clone(),
9758 alias: table.alias.clone().unwrap_or_else(|| table.table.clone()),
9759 }),
9760 _ => None,
9761 }
9762}
9763
9764fn collect_projection_columns_for_table(
9765 projection: &Projection,
9766 table: &str,
9767 alias: Option<&str>,
9768 out: &mut BTreeSet<String>,
9769) {
9770 match projection {
9771 Projection::Column(column) | Projection::Alias(column, _) => {
9772 match split_qualified_column(column) {
9773 Some((qualifier, column))
9774 if qualifier == table || alias.is_some_and(|alias| qualifier == alias) =>
9775 {
9776 push_policy_column(column, out);
9777 }
9778 Some(_) => {}
9779 None => push_policy_column(column, out),
9780 }
9781 }
9782 Projection::Field(
9783 FieldRef::TableColumn {
9784 table: qualifier,
9785 column,
9786 },
9787 _,
9788 ) => {
9789 if qualifier.is_empty()
9790 || qualifier == table
9791 || alias.is_some_and(|alias| qualifier == alias)
9792 {
9793 push_policy_column(column, out);
9794 }
9795 }
9796 Projection::Field(
9797 FieldRef::NodeProperty {
9798 alias: qualifier,
9799 property,
9800 },
9801 _,
9802 )
9803 | Projection::Field(
9804 FieldRef::EdgeProperty {
9805 alias: qualifier,
9806 property,
9807 },
9808 _,
9809 ) => {
9810 if qualifier == table || alias.is_some_and(|alias| qualifier == alias) {
9811 push_policy_column(property, out);
9812 }
9813 }
9814 Projection::Function(_, args) => {
9815 for arg in args {
9816 collect_projection_columns_for_table(arg, table, alias, out);
9817 }
9818 }
9819 Projection::Expression(_, _) | Projection::All | Projection::Field(_, _) => {}
9820 }
9821}
9822
9823fn collect_projection_columns_for_join_side(
9824 projection: &Projection,
9825 left: Option<&JoinTableSide>,
9826 right: Option<&JoinTableSide>,
9827 out: &mut HashMap<String, BTreeSet<String>>,
9828) -> RedDBResult<()> {
9829 match projection {
9830 Projection::Column(column) | Projection::Alias(column, _) => {
9831 if let Some((qualifier, column)) = split_qualified_column(column) {
9832 push_qualified_join_column(qualifier, column, left, right, out);
9833 } else {
9834 push_unqualified_join_column(column, left, right, out);
9835 }
9836 }
9837 Projection::Field(FieldRef::TableColumn { table, column }, _) => {
9838 if table.is_empty() {
9839 push_unqualified_join_column(column, left, right, out);
9840 } else if let Some(side) = [left, right]
9841 .into_iter()
9842 .flatten()
9843 .find(|side| table == side.table.as_str() || table == side.alias.as_str())
9844 {
9845 push_join_column(&side.table, column, out);
9846 }
9847 }
9848 Projection::Field(FieldRef::NodeProperty { alias, property }, _)
9849 | Projection::Field(FieldRef::EdgeProperty { alias, property }, _) => {
9850 push_qualified_join_column(alias, property, left, right, out);
9851 }
9852 Projection::Function(_, args) => {
9853 for arg in args {
9854 collect_projection_columns_for_join_side(arg, left, right, out)?;
9855 }
9856 }
9857 Projection::Expression(_, _) | Projection::All | Projection::Field(_, _) => {}
9858 }
9859 Ok(())
9860}
9861
9862fn split_qualified_column(column: &str) -> Option<(&str, &str)> {
9863 let (qualifier, column) = column.split_once('.')?;
9864 if qualifier.is_empty() || column.is_empty() || column.contains('.') {
9865 return None;
9866 }
9867 Some((qualifier, column))
9868}
9869
9870fn push_qualified_join_column(
9871 qualifier: &str,
9872 column: &str,
9873 left: Option<&JoinTableSide>,
9874 right: Option<&JoinTableSide>,
9875 out: &mut HashMap<String, BTreeSet<String>>,
9876) {
9877 if let Some(side) = [left, right]
9878 .into_iter()
9879 .flatten()
9880 .find(|side| qualifier == side.table.as_str() || qualifier == side.alias.as_str())
9881 {
9882 push_join_column(&side.table, column, out);
9883 }
9884}
9885
9886fn push_unqualified_join_column(
9887 column: &str,
9888 left: Option<&JoinTableSide>,
9889 right: Option<&JoinTableSide>,
9890 out: &mut HashMap<String, BTreeSet<String>>,
9891) {
9892 for side in [left, right].into_iter().flatten() {
9893 push_join_column(&side.table, column, out);
9894 }
9895}
9896
9897fn push_join_column(table: &str, column: &str, out: &mut HashMap<String, BTreeSet<String>>) {
9898 if is_policy_column_name(column) {
9899 out.entry(table.to_string())
9900 .or_default()
9901 .insert(column.to_string());
9902 }
9903}
9904
9905fn push_policy_column(column: &str, out: &mut BTreeSet<String>) {
9906 if is_policy_column_name(column) {
9907 out.insert(column.to_string());
9908 }
9909}
9910
9911fn is_policy_column_name(column: &str) -> bool {
9912 !column.is_empty()
9913 && column != "*"
9914 && !column.starts_with("LIT:")
9915 && !column.starts_with("TYPE:")
9916}
9917
9918fn runtime_iam_context(
9919 role: crate::auth::Role,
9920 tenant: Option<&str>,
9921) -> crate::auth::policies::EvalContext {
9922 crate::auth::policies::EvalContext {
9923 principal_tenant: tenant.map(|t| t.to_string()),
9924 current_tenant: tenant.map(|t| t.to_string()),
9925 peer_ip: None,
9926 mfa_present: false,
9927 now_ms: crate::auth::now_ms(),
9928 principal_is_admin_role: role == crate::auth::Role::Admin,
9929 }
9930}
9931
9932fn explicit_table_projection_columns(
9933 query: &crate::storage::query::ast::TableQuery,
9934) -> Vec<String> {
9935 use crate::storage::query::ast::{FieldRef, Projection};
9936
9937 let mut columns = Vec::new();
9938 for projection in crate::storage::query::sql_lowering::effective_table_projections(query) {
9939 match projection {
9940 Projection::Column(column) | Projection::Alias(column, _) => {
9941 push_unique(&mut columns, column)
9942 }
9943 Projection::Field(FieldRef::TableColumn { column, .. }, _) => {
9944 push_unique(&mut columns, column)
9945 }
9946 _ => {}
9950 }
9951 }
9952 columns
9953}
9954
9955fn explicit_graph_projection_properties(
9956 query: &crate::storage::query::ast::GraphQuery,
9957) -> Vec<String> {
9958 use crate::storage::query::ast::{FieldRef, Projection};
9959
9960 let mut columns = Vec::new();
9961 for projection in &query.return_ {
9962 match projection {
9963 Projection::Field(FieldRef::NodeProperty { property, .. }, _)
9964 | Projection::Field(FieldRef::EdgeProperty { property, .. }, _) => {
9965 push_unique(&mut columns, property.clone())
9966 }
9967 _ => {}
9968 }
9969 }
9970 columns
9971}
9972
9973fn push_unique(columns: &mut Vec<String>, column: String) {
9974 if !columns.iter().any(|existing| existing == &column) {
9975 columns.push(column);
9976 }
9977}
9978
9979fn principal_label(p: &crate::storage::query::ast::PolicyPrincipalRef) -> String {
9980 use crate::storage::query::ast::PolicyPrincipalRef;
9981 match p {
9982 PolicyPrincipalRef::User(u) => match &u.tenant {
9983 Some(t) => format!("user:{t}/{}", u.username),
9984 None => format!("user:{}", u.username),
9985 },
9986 PolicyPrincipalRef::Group(g) => format!("group:{g}"),
9987 }
9988}
9989
9990pub(crate) fn decision_to_strings(
9993 d: &crate::auth::policies::Decision,
9994) -> (String, Option<String>, Option<String>) {
9995 use crate::auth::policies::Decision;
9996 match d {
9997 Decision::Allow {
9998 matched_policy_id,
9999 matched_sid,
10000 } => (
10001 "allow".into(),
10002 Some(matched_policy_id.clone()),
10003 matched_sid.clone(),
10004 ),
10005 Decision::Deny {
10006 matched_policy_id,
10007 matched_sid,
10008 } => (
10009 "deny".into(),
10010 Some(matched_policy_id.clone()),
10011 matched_sid.clone(),
10012 ),
10013 Decision::DefaultDeny => ("default_deny".into(), None, None),
10014 Decision::AdminBypass => ("admin_bypass".into(), None, None),
10015 }
10016}
10017
10018fn relation_scopes_for_query(query: &QueryExpr) -> Vec<String> {
10019 let mut scopes = Vec::new();
10020 collect_relation_scopes(query, &mut scopes);
10021 scopes.sort();
10022 scopes.dedup();
10023 scopes
10024}
10025
10026fn collect_relation_scopes(query: &QueryExpr, scopes: &mut Vec<String>) {
10027 match query {
10028 QueryExpr::Table(table) => {
10029 if !table.table.is_empty() {
10030 scopes.push(table.table.clone());
10031 }
10032 if let Some(alias) = &table.alias {
10033 scopes.push(alias.clone());
10034 }
10035 }
10036 QueryExpr::Join(join) => {
10037 collect_relation_scopes(&join.left, scopes);
10038 collect_relation_scopes(&join.right, scopes);
10039 }
10040 _ => {}
10041 }
10042}
10043
10044fn query_references_outer_scope(query: &QueryExpr, outer_scopes: &[String]) -> bool {
10045 let inner_scopes = relation_scopes_for_query(query);
10046 query_expr_references_outer_scope(query, outer_scopes, &inner_scopes)
10047}
10048
10049fn query_expr_references_outer_scope(
10050 query: &QueryExpr,
10051 outer_scopes: &[String],
10052 inner_scopes: &[String],
10053) -> bool {
10054 match query {
10055 QueryExpr::Table(table) => {
10056 table.select_items.iter().any(|item| match item {
10057 crate::storage::query::ast::SelectItem::Wildcard => false,
10058 crate::storage::query::ast::SelectItem::Expr { expr, .. } => {
10059 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
10060 }
10061 }) || table
10062 .where_expr
10063 .as_ref()
10064 .is_some_and(|expr| expr_references_outer_scope(expr, outer_scopes, inner_scopes))
10065 || table.filter.as_ref().is_some_and(|filter| {
10066 filter_references_outer_scope(filter, outer_scopes, inner_scopes)
10067 })
10068 || table.having_expr.as_ref().is_some_and(|expr| {
10069 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
10070 })
10071 || table.having.as_ref().is_some_and(|filter| {
10072 filter_references_outer_scope(filter, outer_scopes, inner_scopes)
10073 })
10074 || table
10075 .group_by_exprs
10076 .iter()
10077 .any(|expr| expr_references_outer_scope(expr, outer_scopes, inner_scopes))
10078 || table.order_by.iter().any(|clause| {
10079 clause.expr.as_ref().is_some_and(|expr| {
10080 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
10081 })
10082 })
10083 }
10084 QueryExpr::Join(join) => {
10085 query_expr_references_outer_scope(&join.left, outer_scopes, inner_scopes)
10086 || query_expr_references_outer_scope(&join.right, outer_scopes, inner_scopes)
10087 || join.filter.as_ref().is_some_and(|filter| {
10088 filter_references_outer_scope(filter, outer_scopes, inner_scopes)
10089 })
10090 || join.return_items.iter().any(|item| match item {
10091 crate::storage::query::ast::SelectItem::Wildcard => false,
10092 crate::storage::query::ast::SelectItem::Expr { expr, .. } => {
10093 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
10094 }
10095 })
10096 }
10097 _ => false,
10098 }
10099}
10100
10101fn filter_references_outer_scope(
10102 filter: &crate::storage::query::ast::Filter,
10103 outer_scopes: &[String],
10104 inner_scopes: &[String],
10105) -> bool {
10106 use crate::storage::query::ast::Filter;
10107 match filter {
10108 Filter::Compare { field, .. }
10109 | Filter::IsNull(field)
10110 | Filter::IsNotNull(field)
10111 | Filter::In { field, .. }
10112 | Filter::Between { field, .. }
10113 | Filter::Like { field, .. }
10114 | Filter::StartsWith { field, .. }
10115 | Filter::EndsWith { field, .. }
10116 | Filter::Contains { field, .. } => {
10117 field_ref_references_outer_scope(field, outer_scopes, inner_scopes)
10118 }
10119 Filter::CompareFields { left, right, .. } => {
10120 field_ref_references_outer_scope(left, outer_scopes, inner_scopes)
10121 || field_ref_references_outer_scope(right, outer_scopes, inner_scopes)
10122 }
10123 Filter::CompareExpr { lhs, rhs, .. } => {
10124 expr_references_outer_scope(lhs, outer_scopes, inner_scopes)
10125 || expr_references_outer_scope(rhs, outer_scopes, inner_scopes)
10126 }
10127 Filter::And(left, right) | Filter::Or(left, right) => {
10128 filter_references_outer_scope(left, outer_scopes, inner_scopes)
10129 || filter_references_outer_scope(right, outer_scopes, inner_scopes)
10130 }
10131 Filter::Not(inner) => filter_references_outer_scope(inner, outer_scopes, inner_scopes),
10132 }
10133}
10134
10135fn expr_references_outer_scope(
10136 expr: &crate::storage::query::ast::Expr,
10137 outer_scopes: &[String],
10138 inner_scopes: &[String],
10139) -> bool {
10140 use crate::storage::query::ast::Expr;
10141 match expr {
10142 Expr::Column { field, .. } => {
10143 field_ref_references_outer_scope(field, outer_scopes, inner_scopes)
10144 }
10145 Expr::BinaryOp { lhs, rhs, .. } => {
10146 expr_references_outer_scope(lhs, outer_scopes, inner_scopes)
10147 || expr_references_outer_scope(rhs, outer_scopes, inner_scopes)
10148 }
10149 Expr::UnaryOp { operand, .. }
10150 | Expr::Cast { inner: operand, .. }
10151 | Expr::IsNull { operand, .. } => {
10152 expr_references_outer_scope(operand, outer_scopes, inner_scopes)
10153 }
10154 Expr::FunctionCall { args, .. } => args
10155 .iter()
10156 .any(|arg| expr_references_outer_scope(arg, outer_scopes, inner_scopes)),
10157 Expr::Case {
10158 branches, else_, ..
10159 } => {
10160 branches.iter().any(|(cond, value)| {
10161 expr_references_outer_scope(cond, outer_scopes, inner_scopes)
10162 || expr_references_outer_scope(value, outer_scopes, inner_scopes)
10163 }) || else_
10164 .as_ref()
10165 .is_some_and(|expr| expr_references_outer_scope(expr, outer_scopes, inner_scopes))
10166 }
10167 Expr::InList { target, values, .. } => {
10168 expr_references_outer_scope(target, outer_scopes, inner_scopes)
10169 || values
10170 .iter()
10171 .any(|value| expr_references_outer_scope(value, outer_scopes, inner_scopes))
10172 }
10173 Expr::Between {
10174 target, low, high, ..
10175 } => {
10176 expr_references_outer_scope(target, outer_scopes, inner_scopes)
10177 || expr_references_outer_scope(low, outer_scopes, inner_scopes)
10178 || expr_references_outer_scope(high, outer_scopes, inner_scopes)
10179 }
10180 Expr::Subquery { query, .. } => query_references_outer_scope(&query.query, inner_scopes),
10181 Expr::Literal { .. } | Expr::Parameter { .. } => false,
10182 }
10183}
10184
10185fn field_ref_references_outer_scope(
10186 field: &crate::storage::query::ast::FieldRef,
10187 outer_scopes: &[String],
10188 inner_scopes: &[String],
10189) -> bool {
10190 match field {
10191 crate::storage::query::ast::FieldRef::TableColumn { table, .. } if !table.is_empty() => {
10192 outer_scopes.iter().any(|scope| scope == table)
10193 && !inner_scopes.iter().any(|scope| scope == table)
10194 }
10195 _ => false,
10196 }
10197}
10198
10199fn first_column_values(
10200 result: crate::storage::query::unified::UnifiedResult,
10201) -> RedDBResult<Vec<Value>> {
10202 if result.columns.len() > 1 {
10203 return Err(RedDBError::Query(
10204 "expression subquery must return exactly one column".to_string(),
10205 ));
10206 }
10207 let fallback_column = result
10208 .records
10209 .first()
10210 .and_then(|record| record.column_names().into_iter().next())
10211 .map(|name| name.to_string());
10212 let column = result.columns.first().cloned().or(fallback_column);
10213 let Some(column) = column else {
10214 return Ok(Vec::new());
10215 };
10216 Ok(result
10217 .records
10218 .iter()
10219 .map(|record| record.get(column.as_str()).cloned().unwrap_or(Value::Null))
10220 .collect())
10221}
10222
10223fn parse_timestamp_to_ms(s: &str) -> Option<u128> {
10224 if let Ok(n) = s.parse::<u128>() {
10226 return Some(n);
10227 }
10228 if let Some(date) = s.split_whitespace().next() {
10232 let parts: Vec<&str> = date.split('-').collect();
10233 if parts.len() == 3 {
10234 let (y, m, d) = (parts[0], parts[1], parts[2]);
10235 if let (Ok(y), Ok(m), Ok(d)) = (y.parse::<i64>(), m.parse::<u32>(), d.parse::<u32>()) {
10236 let days_in = days_from_civil(y, m, d);
10240 return Some((days_in as u128) * 86_400_000u128);
10241 }
10242 }
10243 }
10244 None
10245}
10246
10247fn days_from_civil(y: i64, m: u32, d: u32) -> i64 {
10250 let y = if m <= 2 { y - 1 } else { y };
10251 let era = if y >= 0 { y } else { y - 399 } / 400;
10252 let yoe = (y - era * 400) as u64; let doy = (153 * (if m > 2 { m - 3 } else { m + 9 }) as u64 + 2) / 5 + d as u64 - 1;
10254 let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
10255 era * 146097 + doe as i64 - 719468
10256}
10257
10258fn walk_plan_node(
10259 node: &crate::storage::query::planner::CanonicalLogicalNode,
10260 depth: usize,
10261 out: &mut Vec<crate::storage::query::unified::UnifiedRecord>,
10262) {
10263 use std::sync::Arc;
10264 let mut rec = crate::storage::query::unified::UnifiedRecord::default();
10265 rec.set_arc(Arc::from("op"), Value::text(node.operator.clone()));
10266 rec.set_arc(
10267 Arc::from("source"),
10268 node.source.clone().map(Value::text).unwrap_or(Value::Null),
10269 );
10270 rec.set_arc(Arc::from("est_rows"), Value::Float(node.estimated_rows));
10271 rec.set_arc(Arc::from("est_cost"), Value::Float(node.operator_cost));
10272 rec.set_arc(Arc::from("depth"), Value::Integer(depth as i64));
10273 out.push(rec);
10274 for child in &node.children {
10275 walk_plan_node(child, depth + 1, out);
10276 }
10277}