1use super::*;
2use crate::application::entity::metadata_to_json;
3use crate::auth::column_policy_gate::ColumnAccessRequest;
4use crate::auth::UserId;
5use crate::replication::cdc::ChangeRecord;
6use crate::replication::logical::{ApplyMode, LogicalChangeApplier};
7use crate::storage::query::ast::TableSource;
8
9thread_local! {
10 static CURRENT_CONN_ID: std::cell::Cell<u64> = const { std::cell::Cell::new(0) };
14
15 static CURRENT_AUTH_IDENTITY: std::cell::RefCell<Option<(String, crate::auth::Role)>> =
23 const { std::cell::RefCell::new(None) };
24
25 static CURRENT_SNAPSHOT: std::cell::RefCell<Option<SnapshotContext>> =
35 const { std::cell::RefCell::new(None) };
36
37 static HAS_SNAPSHOT: std::cell::Cell<bool> = const { std::cell::Cell::new(false) };
43
44 static CURRENT_TENANT_ID: std::cell::RefCell<Option<String>> =
54 const { std::cell::RefCell::new(None) };
55
56 static CURRENT_CONFIG_RESOLVER: std::cell::RefCell<Option<ConfigResolver>> =
60 const { std::cell::RefCell::new(None) };
61
62 static CURRENT_SECRET_RESOLVER: std::cell::RefCell<Option<SecretResolver>> =
66 const { std::cell::RefCell::new(None) };
67}
68
69fn secret_sql_value_to_string(value: &Value) -> RedDBResult<String> {
70 match value {
71 Value::Text(s) => Ok(s.to_string()),
72 Value::Integer(n) => Ok(n.to_string()),
73 Value::UnsignedInteger(n) => Ok(n.to_string()),
74 Value::Float(n) => Ok(n.to_string()),
75 Value::Boolean(b) => Ok(b.to_string()),
76 Value::Null => Err(RedDBError::Query(
77 "SET SECRET key = NULL deletes the secret; use DELETE SECRET for explicit deletes"
78 .to_string(),
79 )),
80 Value::Password(_) | Value::Secret(_) => Err(RedDBError::Query(
81 "SET SECRET accepts plain scalar literals; PASSWORD() and SECRET() are for typed columns"
82 .to_string(),
83 )),
84 _ => Err(RedDBError::Query(format!(
85 "SET SECRET does not support value type {:?} yet",
86 value.data_type()
87 ))),
88 }
89}
90
91fn system_keyed_collection_contract(
92 name: &str,
93 model: crate::catalog::CollectionModel,
94) -> crate::physical::CollectionContract {
95 let now = crate::utils::now_unix_millis() as u128;
96 crate::physical::CollectionContract {
97 name: name.to_string(),
98 declared_model: model,
99 schema_mode: crate::catalog::SchemaMode::Dynamic,
100 origin: crate::physical::ContractOrigin::Implicit,
101 version: 1,
102 created_at_unix_ms: now,
103 updated_at_unix_ms: now,
104 default_ttl_ms: None,
105 vector_dimension: None,
106 vector_metric: None,
107 context_index_fields: Vec::new(),
108 declared_columns: Vec::new(),
109 table_def: None,
110 timestamps_enabled: false,
111 context_index_enabled: false,
112 metrics_raw_retention_ms: None,
113 metrics_rollup_policies: Vec::new(),
114 metrics_tenant_identity: None,
115 metrics_namespace: None,
116 append_only: false,
117 subscriptions: Vec::new(),
118 }
119}
120
121#[derive(Clone)]
136pub struct SnapshotContext {
137 pub snapshot: crate::storage::transaction::snapshot::Snapshot,
138 pub manager: Arc<crate::storage::transaction::snapshot::SnapshotManager>,
139 pub own_xids: std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
140 pub requires_index_fallback: bool,
141}
142
143pub fn set_current_connection_id(id: u64) {
152 CURRENT_CONN_ID.with(|c| c.set(id));
153}
154
155pub fn clear_current_connection_id() {
157 CURRENT_CONN_ID.with(|c| c.set(0));
158}
159
160pub fn current_connection_id() -> u64 {
163 CURRENT_CONN_ID.with(|c| c.get())
164}
165
166pub fn set_current_auth_identity(username: String, role: crate::auth::Role) {
170 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = Some((username, role)));
171}
172
173pub fn clear_current_auth_identity() {
177 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = None);
178}
179
180pub(crate) fn current_auth_identity() -> Option<(String, crate::auth::Role)> {
183 CURRENT_AUTH_IDENTITY.with(|cell| cell.borrow().clone())
184}
185
186pub fn set_current_tenant(tenant_id: String) {
191 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = Some(tenant_id));
192}
193
194pub fn clear_current_tenant() {
197 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = None);
198}
199
200pub fn current_tenant() -> Option<String> {
211 let inherited = CURRENT_TENANT_ID.with(|cell| cell.borrow().clone());
212 if let Some(over) = current_scope_override() {
213 if over.tenant.is_active() {
214 return over.tenant.resolve(inherited);
215 }
216 }
217 if let Some(tx_local) = current_tx_local_tenant() {
218 return tx_local;
219 }
220 inherited
221}
222
223thread_local! {
224 static TX_LOCAL_TENANT: std::cell::RefCell<Option<Option<String>>> =
233 const { std::cell::RefCell::new(None) };
234}
235
236fn current_tx_local_tenant() -> Option<Option<String>> {
237 TX_LOCAL_TENANT.with(|cell| cell.borrow().clone())
238}
239
240fn parse_set_local_tenant(query: &str) -> RedDBResult<Option<Option<String>>> {
246 let mut tokens = query.split_ascii_whitespace();
247 let Some(w1) = tokens.next() else {
248 return Ok(None);
249 };
250 if !w1.eq_ignore_ascii_case("SET") {
251 return Ok(None);
252 }
253 let Some(w2) = tokens.next() else {
254 return Ok(None);
255 };
256 if !w2.eq_ignore_ascii_case("LOCAL") {
257 return Ok(None);
258 }
259 let Some(w3) = tokens.next() else {
260 return Ok(None);
261 };
262 if !w3.eq_ignore_ascii_case("TENANT") {
263 return Ok(None);
264 }
265 let rest: String = tokens.collect::<Vec<_>>().join(" ");
266 let rest = rest.trim().trim_end_matches(';').trim();
267 let value_str = rest.strip_prefix('=').map(|s| s.trim()).unwrap_or(rest);
268 if value_str.is_empty() {
269 return Err(RedDBError::Query(
270 "SET LOCAL TENANT expects a string literal or NULL".to_string(),
271 ));
272 }
273 if value_str.eq_ignore_ascii_case("NULL") {
274 return Ok(Some(None));
275 }
276 if value_str.starts_with('\'') && value_str.ends_with('\'') && value_str.len() >= 2 {
277 let inner = &value_str[1..value_str.len() - 1];
278 return Ok(Some(Some(inner.to_string())));
279 }
280 Err(RedDBError::Query(format!(
281 "SET LOCAL TENANT expects a string literal or NULL, got `{value_str}`"
282 )))
283}
284
285pub(crate) struct TxLocalTenantGuard;
286
287impl TxLocalTenantGuard {
288 pub fn install(value: Option<Option<String>>) -> Self {
289 TX_LOCAL_TENANT.with(|cell| *cell.borrow_mut() = value);
290 Self
291 }
292}
293
294impl Drop for TxLocalTenantGuard {
295 fn drop(&mut self) {
296 TX_LOCAL_TENANT.with(|cell| *cell.borrow_mut() = None);
297 }
298}
299
300thread_local! {
301 static SCOPE_OVERRIDES: std::cell::RefCell<Vec<crate::runtime::within_clause::ScopeOverride>> =
308 const { std::cell::RefCell::new(Vec::new()) };
309}
310
311pub(crate) fn push_scope_override(over: crate::runtime::within_clause::ScopeOverride) {
312 SCOPE_OVERRIDES.with(|cell| cell.borrow_mut().push(over));
313}
314
315pub(crate) fn pop_scope_override() {
316 SCOPE_OVERRIDES.with(|cell| {
317 cell.borrow_mut().pop();
318 });
319}
320
321pub(crate) fn current_scope_override() -> Option<crate::runtime::within_clause::ScopeOverride> {
322 SCOPE_OVERRIDES.with(|cell| cell.borrow().last().cloned())
323}
324
325pub(crate) fn has_scope_override_active() -> bool {
329 SCOPE_OVERRIDES.with(|cell| !cell.borrow().is_empty())
330}
331
332pub(crate) struct ScopeOverrideGuard;
336
337impl ScopeOverrideGuard {
338 pub fn install(over: crate::runtime::within_clause::ScopeOverride) -> Self {
339 push_scope_override(over);
340 Self
341 }
342}
343
344impl Drop for ScopeOverrideGuard {
345 fn drop(&mut self) {
346 pop_scope_override();
347 }
348}
349
350pub(crate) fn current_user_projected() -> Option<String> {
356 let inherited = current_auth_identity().map(|(u, _)| u);
357 if let Some(over) = current_scope_override() {
358 if over.user.is_active() {
359 return over.user.resolve(inherited);
360 }
361 }
362 inherited
363}
364
365pub(crate) fn current_role_projected() -> Option<String> {
366 let inherited = current_auth_identity().map(|(_, r)| format!("{r:?}").to_lowercase());
367 if let Some(over) = current_scope_override() {
368 if over.role.is_active() {
369 return over.role.resolve(inherited);
370 }
371 }
372 inherited
373}
374
375pub(crate) fn current_secret_value(path: &str) -> Option<String> {
376 let key = path.to_ascii_lowercase();
377 CURRENT_SECRET_RESOLVER.with(|cell| {
378 let mut resolver = cell.borrow_mut();
379 let resolver = resolver.as_mut()?;
380 if resolver.values.is_none() {
381 resolver.values = resolver
382 .store
383 .as_ref()
384 .map(|store| store.vault_kv_snapshot());
385 }
386 let values = resolver.values.as_ref()?;
387 values.get(&key).cloned().or_else(|| {
388 key.strip_prefix("red.vault/").and_then(|rest| {
389 values
390 .get(rest)
391 .cloned()
392 .or_else(|| values.get(&format!("red.secret.{rest}")).cloned())
393 })
394 })
395 })
396}
397
398struct SecretResolver {
399 store: Option<Arc<crate::auth::store::AuthStore>>,
400 values: Option<HashMap<String, String>>,
401}
402
403pub(super) struct SecretStoreGuard {
404 previous: Option<SecretResolver>,
405}
406
407impl SecretStoreGuard {
408 pub(super) fn install(store: Option<Arc<crate::auth::store::AuthStore>>) -> Self {
409 let previous = CURRENT_SECRET_RESOLVER.with(|cell| {
410 cell.replace(Some(SecretResolver {
411 store,
412 values: None,
413 }))
414 });
415 Self { previous }
416 }
417}
418
419impl Drop for SecretStoreGuard {
420 fn drop(&mut self) {
421 let previous = self.previous.take();
422 CURRENT_SECRET_RESOLVER.with(|cell| {
423 cell.replace(previous);
424 });
425 }
426}
427
428pub(crate) fn current_config_value(path: &str) -> Option<Value> {
429 let key = path.to_ascii_lowercase();
430 CURRENT_CONFIG_RESOLVER.with(|cell| {
431 let mut resolver = cell.borrow_mut();
432 let resolver = resolver.as_mut()?;
433 if resolver.values.is_none() {
434 resolver.values = Some(latest_config_snapshot(&resolver.db));
435 }
436 let values = resolver.values.as_ref()?;
437 values.get(&key).cloned().or_else(|| {
438 key.strip_prefix("red.config/")
439 .and_then(|rest| values.get(&format!("red.config.{rest}")).cloned())
440 })
441 })
442}
443
444fn update_current_config_value(path: &str, value: Value) {
445 let key = path.to_ascii_lowercase();
446 CURRENT_CONFIG_RESOLVER.with(|cell| {
447 if let Some(resolver) = cell.borrow_mut().as_mut() {
448 if let Some(values) = resolver.values.as_mut() {
449 values.insert(key, value);
450 }
451 }
452 });
453}
454
455fn update_current_secret_value(path: &str, value: Option<String>) {
456 let key = path.to_ascii_lowercase();
457 CURRENT_SECRET_RESOLVER.with(|cell| {
458 if let Some(resolver) = cell.borrow_mut().as_mut() {
459 let Some(values) = resolver.values.as_mut() else {
460 return;
461 };
462 match value {
463 Some(value) => {
464 values.insert(key, value);
465 }
466 None => {
467 values.remove(&key);
468 }
469 }
470 }
471 });
472}
473
474fn latest_config_snapshot(db: &RedDB) -> HashMap<String, Value> {
475 let mut latest: HashMap<String, (u64, Value)> = HashMap::new();
476
477 if let Some(manager) = db.store().get_collection("red_config") {
478 manager.for_each_entity(|entity| {
479 let Some(row) = entity.data.as_row() else {
480 return true;
481 };
482 let Some(Value::Text(key)) = row.get_field("key") else {
483 return true;
484 };
485 let value = row.get_field("value").cloned().unwrap_or(Value::Null);
486 let id = entity.id.raw();
487 let key = key.to_ascii_lowercase();
488 insert_latest_config_value(&mut latest, key.clone(), id, value.clone());
489 if let Some(rest) = key.strip_prefix("red.config.") {
490 insert_latest_config_value(&mut latest, format!("red.config/{rest}"), id, value);
491 }
492 true
493 });
494 }
495
496 if let Some(manager) = db.store().get_collection("red.config") {
497 manager.for_each_entity(|entity| {
498 let Some(row) = entity.data.as_row() else {
499 return true;
500 };
501 if matches!(row.get_field("tombstone"), Some(Value::Boolean(true))) {
502 return true;
503 }
504 let Some(Value::Text(key)) = row.get_field("key") else {
505 return true;
506 };
507 let value = row.get_field("value").cloned().unwrap_or(Value::Null);
508 insert_latest_config_value(
509 &mut latest,
510 format!("red.config/{}", key.to_ascii_lowercase()),
511 entity.id.raw(),
512 value,
513 );
514 true
515 });
516 }
517
518 latest
519 .into_iter()
520 .map(|(key, (_, value))| (key, value))
521 .collect()
522}
523
524fn insert_latest_config_value(
525 latest: &mut HashMap<String, (u64, Value)>,
526 key: String,
527 id: u64,
528 value: Value,
529) {
530 match latest.get(&key) {
531 Some((prev_id, _)) if *prev_id > id => {}
532 _ => {
533 latest.insert(key, (id, value));
534 }
535 }
536}
537
538struct ConfigResolver {
539 db: Arc<RedDB>,
540 values: Option<HashMap<String, Value>>,
541}
542
543pub(super) struct ConfigSnapshotGuard {
544 previous: Option<ConfigResolver>,
545}
546
547impl ConfigSnapshotGuard {
548 pub(super) fn install(db: Arc<RedDB>) -> Self {
549 let previous = CURRENT_CONFIG_RESOLVER
550 .with(|cell| cell.replace(Some(ConfigResolver { db, values: None })));
551 Self { previous }
552 }
553}
554
555impl Drop for ConfigSnapshotGuard {
556 fn drop(&mut self) {
557 let previous = self.previous.take();
558 CURRENT_CONFIG_RESOLVER.with(|cell| {
559 cell.replace(previous);
560 });
561 }
562}
563
564pub fn set_current_snapshot(ctx: SnapshotContext) {
569 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = Some(ctx));
570 HAS_SNAPSHOT.with(|c| c.set(true));
571}
572
573pub fn clear_current_snapshot() {
574 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = None);
575 HAS_SNAPSHOT.with(|c| c.set(false));
576}
577
578pub(crate) struct CurrentSnapshotGuard {
584 previous: Option<SnapshotContext>,
585}
586
587impl CurrentSnapshotGuard {
588 pub(crate) fn install(ctx: SnapshotContext) -> Self {
589 let previous = CURRENT_SNAPSHOT.with(|cell| cell.borrow().clone());
590 set_current_snapshot(ctx);
591 Self { previous }
592 }
593}
594
595impl Drop for CurrentSnapshotGuard {
596 fn drop(&mut self) {
597 let prev = self.previous.take();
598 let has = prev.is_some();
599 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = prev);
600 HAS_SNAPSHOT.with(|c| c.set(has));
601 }
602}
603
604#[inline]
615pub fn entity_visible_under_current_snapshot(
616 entity: &crate::storage::unified::entity::UnifiedEntity,
617) -> bool {
618 if !HAS_SNAPSHOT.with(|c| c.get()) {
624 return entity.xmax == 0;
625 }
626 CURRENT_SNAPSHOT.with(|cell| {
627 let guard = cell.borrow();
628 let Some(ctx) = guard.as_ref() else {
629 return true;
630 };
631 visibility_check(ctx, entity.xmin, entity.xmax)
632 })
633}
634
635#[inline]
640pub(crate) fn xids_visible_under_current_snapshot(xmin: u64, xmax: u64) -> bool {
641 if !HAS_SNAPSHOT.with(|c| c.get()) {
642 return true;
643 }
644 CURRENT_SNAPSHOT.with(|cell| {
645 let guard = cell.borrow();
646 let Some(ctx) = guard.as_ref() else {
647 return true;
648 };
649 visibility_check(ctx, xmin, xmax)
650 })
651}
652
653pub fn capture_current_snapshot() -> Option<SnapshotContext> {
660 CURRENT_SNAPSHOT.with(|cell| cell.borrow().clone())
661}
662
663pub(crate) fn current_snapshot_requires_index_fallback() -> bool {
668 if !HAS_SNAPSHOT.with(|c| c.get()) {
669 return false;
670 }
671 CURRENT_SNAPSHOT.with(|cell| {
672 cell.borrow()
673 .as_ref()
674 .is_some_and(|ctx| ctx.requires_index_fallback)
675 })
676}
677
678#[derive(Clone, Default)]
693pub struct SnapshotBundle {
694 pub snapshot: Option<SnapshotContext>,
695 pub auth: Option<(String, crate::auth::Role)>,
696 pub tenant: Option<String>,
697}
698
699pub fn snapshot_bundle() -> SnapshotBundle {
702 SnapshotBundle {
703 snapshot: capture_current_snapshot(),
704 auth: current_auth_identity(),
705 tenant: CURRENT_TENANT_ID.with(|cell| cell.borrow().clone()),
706 }
707}
708
709pub fn with_snapshot_bundle<R>(bundle: &SnapshotBundle, f: impl FnOnce() -> R) -> R {
714 struct Guard {
715 prev_snapshot: Option<SnapshotContext>,
716 prev_auth: Option<(String, crate::auth::Role)>,
717 prev_tenant: Option<String>,
718 }
719 impl Drop for Guard {
720 fn drop(&mut self) {
721 let snap = self.prev_snapshot.take();
722 let has = snap.is_some();
723 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = snap);
724 HAS_SNAPSHOT.with(|c| c.set(has));
725 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = self.prev_auth.take());
726 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = self.prev_tenant.take());
727 }
728 }
729
730 let _guard = {
731 let prev_snapshot = CURRENT_SNAPSHOT.with(|cell| cell.borrow().clone());
732 let prev_auth = CURRENT_AUTH_IDENTITY.with(|cell| cell.borrow().clone());
733 let prev_tenant = CURRENT_TENANT_ID.with(|cell| cell.borrow().clone());
734
735 match bundle.snapshot.clone() {
736 Some(ctx) => set_current_snapshot(ctx),
737 None => clear_current_snapshot(),
738 }
739 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = bundle.auth.clone());
740 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = bundle.tenant.clone());
741
742 Guard {
743 prev_snapshot,
744 prev_auth,
745 prev_tenant,
746 }
747 };
748 f()
749}
750
751#[inline]
755pub fn entity_visible_with_context(
756 ctx: Option<&SnapshotContext>,
757 entity: &crate::storage::unified::entity::UnifiedEntity,
758) -> bool {
759 match ctx {
760 Some(ctx) => visibility_check(ctx, entity.xmin, entity.xmax),
761 None => true,
762 }
763}
764
765fn table_row_index_fields(
766 entity: &crate::storage::unified::entity::UnifiedEntity,
767) -> Vec<(String, crate::storage::schema::Value)> {
768 let crate::storage::EntityData::Row(row) = &entity.data else {
769 return Vec::new();
770 };
771 if let Some(named) = &row.named {
772 return named
773 .iter()
774 .map(|(name, value)| (name.clone(), value.clone()))
775 .collect();
776 }
777 if let Some(schema) = &row.schema {
778 return schema
779 .iter()
780 .zip(row.columns.iter())
781 .map(|(name, value)| (name.clone(), value.clone()))
782 .collect();
783 }
784 Vec::new()
785}
786
787#[inline]
788fn visibility_check(ctx: &SnapshotContext, xmin: u64, xmax: u64) -> bool {
789 if xmin != 0 && ctx.manager.is_aborted(xmin) {
793 return false;
794 }
795 let effective_xmax = if xmax != 0 && ctx.manager.is_aborted(xmax) {
797 0
798 } else {
799 xmax
800 };
801 let own_xmin = xmin != 0 && ctx.own_xids.contains(&xmin);
805 let own_xmax = effective_xmax != 0 && ctx.own_xids.contains(&effective_xmax);
806 if own_xmax {
807 return false;
809 }
810 if own_xmin {
811 return true;
812 }
813 ctx.snapshot.sees(xmin, effective_xmax)
814}
815
816fn runtime_pool_lock(runtime: &RedDBRuntime) -> std::sync::MutexGuard<'_, PoolState> {
817 runtime
818 .inner
819 .pool
820 .lock()
821 .unwrap_or_else(|poisoned| poisoned.into_inner())
822}
823
824fn cache_scope_insert(scopes: &mut HashSet<String>, name: &str) {
825 if name.is_empty() || name.starts_with("__subq_") || is_universal_query_source(name) {
826 return;
827 }
828 scopes.insert(name.to_string());
829}
830
831fn collect_table_source_scopes(scopes: &mut HashSet<String>, query: &TableQuery) {
832 match query.source.as_ref() {
833 Some(crate::storage::query::ast::TableSource::Name(name)) => {
834 cache_scope_insert(scopes, name)
835 }
836 Some(crate::storage::query::ast::TableSource::Subquery(subquery)) => {
837 collect_query_expr_result_cache_scopes(scopes, subquery);
838 }
839 None => cache_scope_insert(scopes, &query.table),
840 }
841}
842
843fn collect_vector_source_scopes(
844 scopes: &mut HashSet<String>,
845 source: &crate::storage::query::ast::VectorSource,
846) {
847 match source {
848 crate::storage::query::ast::VectorSource::Reference { collection, .. } => {
849 cache_scope_insert(scopes, collection);
850 }
851 crate::storage::query::ast::VectorSource::Subquery(subquery) => {
852 collect_query_expr_result_cache_scopes(scopes, subquery);
853 }
854 crate::storage::query::ast::VectorSource::Literal(_)
855 | crate::storage::query::ast::VectorSource::Text(_) => {}
856 }
857}
858
859fn collect_path_selector_scopes(
860 scopes: &mut HashSet<String>,
861 selector: &crate::storage::query::ast::NodeSelector,
862) {
863 if let crate::storage::query::ast::NodeSelector::ByRow { table, .. } = selector {
864 cache_scope_insert(scopes, table);
865 }
866}
867
868fn collect_query_expr_result_cache_scopes(scopes: &mut HashSet<String>, expr: &QueryExpr) {
869 match expr {
870 QueryExpr::Table(query) => collect_table_source_scopes(scopes, query),
871 QueryExpr::Join(query) => {
872 collect_query_expr_result_cache_scopes(scopes, &query.left);
873 collect_query_expr_result_cache_scopes(scopes, &query.right);
874 }
875 QueryExpr::Path(query) => {
876 collect_path_selector_scopes(scopes, &query.from);
877 collect_path_selector_scopes(scopes, &query.to);
878 }
879 QueryExpr::Vector(query) => {
880 cache_scope_insert(scopes, &query.collection);
881 collect_vector_source_scopes(scopes, &query.query_vector);
882 }
883 QueryExpr::Hybrid(query) => {
884 collect_query_expr_result_cache_scopes(scopes, &query.structured);
885 cache_scope_insert(scopes, &query.vector.collection);
886 collect_vector_source_scopes(scopes, &query.vector.query_vector);
887 }
888 QueryExpr::Insert(query) => cache_scope_insert(scopes, &query.table),
889 QueryExpr::Update(query) => cache_scope_insert(scopes, &query.table),
890 QueryExpr::Delete(query) => cache_scope_insert(scopes, &query.table),
891 QueryExpr::CreateTable(query) => cache_scope_insert(scopes, &query.name),
892 QueryExpr::CreateCollection(query) => cache_scope_insert(scopes, &query.name),
893 QueryExpr::CreateVector(query) => cache_scope_insert(scopes, &query.name),
894 QueryExpr::DropTable(query) => cache_scope_insert(scopes, &query.name),
895 QueryExpr::DropGraph(query) => cache_scope_insert(scopes, &query.name),
896 QueryExpr::DropVector(query) => cache_scope_insert(scopes, &query.name),
897 QueryExpr::DropDocument(query) => cache_scope_insert(scopes, &query.name),
898 QueryExpr::DropKv(query) => cache_scope_insert(scopes, &query.name),
899 QueryExpr::DropCollection(query) => cache_scope_insert(scopes, &query.name),
900 QueryExpr::Truncate(query) => cache_scope_insert(scopes, &query.name),
901 QueryExpr::AlterTable(query) => cache_scope_insert(scopes, &query.name),
902 QueryExpr::CreateIndex(query) => cache_scope_insert(scopes, &query.table),
903 QueryExpr::DropIndex(query) => cache_scope_insert(scopes, &query.table),
904 QueryExpr::CreateTimeSeries(query) => cache_scope_insert(scopes, &query.name),
905 QueryExpr::DropTimeSeries(query) => cache_scope_insert(scopes, &query.name),
906 QueryExpr::CreateQueue(query) => cache_scope_insert(scopes, &query.name),
907 QueryExpr::AlterQueue(query) => cache_scope_insert(scopes, &query.name),
908 QueryExpr::DropQueue(query) => cache_scope_insert(scopes, &query.name),
909 QueryExpr::QueueSelect(query) => cache_scope_insert(scopes, &query.queue),
910 QueryExpr::QueueCommand(query) => match query {
911 QueueCommand::Push { queue, .. }
912 | QueueCommand::Pop { queue, .. }
913 | QueueCommand::Peek { queue, .. }
914 | QueueCommand::Len { queue }
915 | QueueCommand::Purge { queue }
916 | QueueCommand::GroupCreate { queue, .. }
917 | QueueCommand::GroupRead { queue, .. }
918 | QueueCommand::Pending { queue, .. }
919 | QueueCommand::Claim { queue, .. }
920 | QueueCommand::Ack { queue, .. }
921 | QueueCommand::Nack { queue, .. } => cache_scope_insert(scopes, queue),
922 QueueCommand::Move {
923 source,
924 destination,
925 ..
926 } => {
927 cache_scope_insert(scopes, source);
928 cache_scope_insert(scopes, destination);
929 }
930 },
931 QueryExpr::EventsBackfill(query) => {
932 cache_scope_insert(scopes, &query.collection);
933 cache_scope_insert(scopes, &query.target_queue);
934 }
935 QueryExpr::CreateTree(query) => cache_scope_insert(scopes, &query.collection),
936 QueryExpr::DropTree(query) => cache_scope_insert(scopes, &query.collection),
937 QueryExpr::TreeCommand(query) => match query {
938 TreeCommand::Insert { collection, .. }
939 | TreeCommand::Move { collection, .. }
940 | TreeCommand::Delete { collection, .. }
941 | TreeCommand::Validate { collection, .. }
942 | TreeCommand::Rebalance { collection, .. } => cache_scope_insert(scopes, collection),
943 },
944 QueryExpr::SearchCommand(query) => match query {
945 SearchCommand::Similar { collection, .. }
946 | SearchCommand::Hybrid { collection, .. }
947 | SearchCommand::SpatialRadius { collection, .. }
948 | SearchCommand::SpatialBbox { collection, .. }
949 | SearchCommand::SpatialNearest { collection, .. } => {
950 cache_scope_insert(scopes, collection);
951 }
952 SearchCommand::Text { collection, .. }
953 | SearchCommand::Multimodal { collection, .. }
954 | SearchCommand::Index { collection, .. }
955 | SearchCommand::Context { collection, .. } => {
956 if let Some(collection) = collection.as_deref() {
957 cache_scope_insert(scopes, collection);
958 }
959 }
960 },
961 QueryExpr::Ask(query) => {
962 if let Some(collection) = query.collection.as_deref() {
963 cache_scope_insert(scopes, collection);
964 }
965 }
966 QueryExpr::ExplainAlter(query) => cache_scope_insert(scopes, &query.target.name),
967 QueryExpr::MaintenanceCommand(cmd) => match cmd {
968 crate::storage::query::ast::MaintenanceCommand::Vacuum { target, .. }
969 | crate::storage::query::ast::MaintenanceCommand::Analyze { target } => {
970 if let Some(t) = target {
971 cache_scope_insert(scopes, t);
972 }
973 }
974 },
975 QueryExpr::CopyFrom(cmd) => cache_scope_insert(scopes, &cmd.table),
976 QueryExpr::CreateView(cmd) => {
977 cache_scope_insert(scopes, &cmd.name);
978 collect_query_expr_result_cache_scopes(scopes, &cmd.query);
980 }
981 QueryExpr::DropView(cmd) => cache_scope_insert(scopes, &cmd.name),
982 QueryExpr::RefreshMaterializedView(cmd) => cache_scope_insert(scopes, &cmd.name),
983 QueryExpr::CreatePolicy(cmd) => cache_scope_insert(scopes, &cmd.table),
984 QueryExpr::DropPolicy(cmd) => cache_scope_insert(scopes, &cmd.table),
985 QueryExpr::CreateServer(_) | QueryExpr::DropServer(_) => {}
986 QueryExpr::CreateForeignTable(cmd) => cache_scope_insert(scopes, &cmd.name),
987 QueryExpr::DropForeignTable(cmd) => cache_scope_insert(scopes, &cmd.name),
988 QueryExpr::Graph(_)
989 | QueryExpr::GraphCommand(_)
990 | QueryExpr::ProbabilisticCommand(_)
991 | QueryExpr::SetConfig { .. }
992 | QueryExpr::ShowConfig { .. }
993 | QueryExpr::SetSecret { .. }
994 | QueryExpr::DeleteSecret { .. }
995 | QueryExpr::ShowSecrets { .. }
996 | QueryExpr::SetTenant(_)
997 | QueryExpr::ShowTenant
998 | QueryExpr::TransactionControl(_)
999 | QueryExpr::CreateSchema(_)
1000 | QueryExpr::DropSchema(_)
1001 | QueryExpr::CreateSequence(_)
1002 | QueryExpr::DropSequence(_)
1003 | QueryExpr::Grant(_)
1004 | QueryExpr::Revoke(_)
1005 | QueryExpr::AlterUser(_)
1006 | QueryExpr::CreateIamPolicy { .. }
1007 | QueryExpr::DropIamPolicy { .. }
1008 | QueryExpr::AttachPolicy { .. }
1009 | QueryExpr::DetachPolicy { .. }
1010 | QueryExpr::ShowPolicies { .. }
1011 | QueryExpr::ShowEffectivePermissions { .. }
1012 | QueryExpr::SimulatePolicy { .. }
1013 | QueryExpr::CreateMigration(_)
1014 | QueryExpr::ApplyMigration(_)
1015 | QueryExpr::RollbackMigration(_)
1016 | QueryExpr::ExplainMigration(_)
1017 | QueryExpr::EventsBackfillStatus { .. } => {}
1018 QueryExpr::KvCommand(cmd) => {
1019 use crate::storage::query::ast::KvCommand;
1020 match cmd {
1021 KvCommand::Put { collection, .. }
1022 | KvCommand::InvalidateTags { collection, .. }
1023 | KvCommand::Get { collection, .. }
1024 | KvCommand::Unseal { collection, .. }
1025 | KvCommand::Rotate { collection, .. }
1026 | KvCommand::History { collection, .. }
1027 | KvCommand::List { collection, .. }
1028 | KvCommand::Purge { collection, .. }
1029 | KvCommand::Watch { collection, .. }
1030 | KvCommand::Delete { collection, .. }
1031 | KvCommand::Incr { collection, .. }
1032 | KvCommand::Cas { collection, .. } => cache_scope_insert(scopes, collection),
1033 }
1034 }
1035 QueryExpr::ConfigCommand(cmd) => {
1036 use crate::storage::query::ast::ConfigCommand;
1037 match cmd {
1038 ConfigCommand::Put { collection, .. }
1039 | ConfigCommand::Get { collection, .. }
1040 | ConfigCommand::Resolve { collection, .. }
1041 | ConfigCommand::Rotate { collection, .. }
1042 | ConfigCommand::Delete { collection, .. }
1043 | ConfigCommand::History { collection, .. }
1044 | ConfigCommand::List { collection, .. }
1045 | ConfigCommand::Watch { collection, .. }
1046 | ConfigCommand::InvalidVolatileOperation { collection, .. } => {
1047 cache_scope_insert(scopes, collection)
1048 }
1049 }
1050 }
1051 }
1052}
1053
1054pub(crate) fn rls_policy_filter(
1062 runtime: &RedDBRuntime,
1063 table: &str,
1064 action: crate::storage::query::ast::PolicyAction,
1065) -> Option<crate::storage::query::ast::Filter> {
1066 rls_policy_filter_for_kind(
1067 runtime,
1068 table,
1069 action,
1070 crate::storage::query::ast::PolicyTargetKind::Table,
1071 )
1072}
1073
1074pub(crate) fn rls_policy_filter_for_kind(
1080 runtime: &RedDBRuntime,
1081 table: &str,
1082 action: crate::storage::query::ast::PolicyAction,
1083 kind: crate::storage::query::ast::PolicyTargetKind,
1084) -> Option<crate::storage::query::ast::Filter> {
1085 use crate::storage::query::ast::Filter;
1086
1087 if !runtime.inner.rls_enabled_tables.read().contains(table) {
1088 return None;
1089 }
1090 let role = current_auth_identity().map(|(_, role)| role);
1091 let role_str = role.map(|r| r.as_str().to_string());
1092 let policies = runtime.matching_rls_policies_for_kind(table, role_str.as_deref(), action, kind);
1093 if policies.is_empty() {
1094 return None;
1095 }
1096 policies
1097 .into_iter()
1098 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1099}
1100
1101pub(crate) fn rls_is_enabled(runtime: &RedDBRuntime, table: &str) -> bool {
1105 runtime.inner.rls_enabled_tables.read().contains(table)
1106}
1107
1108fn node_passes_rls(
1115 runtime: &RedDBRuntime,
1116 collection: &str,
1117 role: Option<&str>,
1118 cache: &mut std::collections::HashMap<String, Option<crate::storage::query::ast::Filter>>,
1119 entity: &crate::storage::unified::entity::UnifiedEntity,
1120) -> bool {
1121 use crate::storage::query::ast::{Filter, PolicyAction, PolicyTargetKind};
1122
1123 if !runtime.inner.rls_enabled_tables.read().contains(collection) {
1124 return true;
1125 }
1126 let filter = cache.entry(collection.to_string()).or_insert_with(|| {
1127 let policies = runtime.matching_rls_policies_for_kind(
1128 collection,
1129 role,
1130 PolicyAction::Select,
1131 PolicyTargetKind::Nodes,
1132 );
1133 if policies.is_empty() {
1134 None
1135 } else {
1136 policies
1137 .into_iter()
1138 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1139 }
1140 });
1141 let Some(filter) = filter else {
1142 return false;
1143 };
1144 crate::runtime::query_exec::evaluate_entity_filter_with_db(
1145 Some(&runtime.inner.db),
1146 entity,
1147 filter,
1148 collection,
1149 collection,
1150 )
1151}
1152
1153fn edge_passes_rls(
1156 runtime: &RedDBRuntime,
1157 collection: &str,
1158 role: Option<&str>,
1159 cache: &mut std::collections::HashMap<String, Option<crate::storage::query::ast::Filter>>,
1160 entity: &crate::storage::unified::entity::UnifiedEntity,
1161) -> bool {
1162 use crate::storage::query::ast::{Filter, PolicyAction, PolicyTargetKind};
1163
1164 if !runtime.inner.rls_enabled_tables.read().contains(collection) {
1165 return true;
1166 }
1167 let filter = cache.entry(collection.to_string()).or_insert_with(|| {
1168 let policies = runtime.matching_rls_policies_for_kind(
1169 collection,
1170 role,
1171 PolicyAction::Select,
1172 PolicyTargetKind::Edges,
1173 );
1174 if policies.is_empty() {
1175 None
1176 } else {
1177 policies
1178 .into_iter()
1179 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1180 }
1181 });
1182 let Some(filter) = filter else {
1183 return false;
1184 };
1185 crate::runtime::query_exec::evaluate_entity_filter_with_db(
1186 Some(&runtime.inner.db),
1187 entity,
1188 filter,
1189 collection,
1190 collection,
1191 )
1192}
1193
1194fn inject_rls_filters(
1215 runtime: &RedDBRuntime,
1216 frame: &dyn super::statement_frame::ReadFrame,
1217 mut table: crate::storage::query::ast::TableQuery,
1218) -> Option<crate::storage::query::ast::TableQuery> {
1219 use crate::storage::query::ast::{Filter, PolicyAction};
1220
1221 let role = frame.identity().map(|(_, role)| role);
1223 let role_str = role.map(|r| r.as_str().to_string());
1224 let policies =
1225 runtime.matching_rls_policies(&table.table, role_str.as_deref(), PolicyAction::Select);
1226
1227 if policies.is_empty() {
1228 return None;
1231 }
1232
1233 let combined = policies
1235 .into_iter()
1236 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1237 .expect("policies non-empty");
1238
1239 table.filter = Some(match table.filter.take() {
1241 Some(existing) => Filter::And(Box::new(existing), Box::new(combined)),
1242 None => combined,
1243 });
1244 Some(table)
1245}
1246
1247fn inject_rls_into_join(
1257 runtime: &RedDBRuntime,
1258 frame: &dyn super::statement_frame::ReadFrame,
1259 mut join: crate::storage::query::ast::JoinQuery,
1260) -> Option<crate::storage::query::ast::JoinQuery> {
1261 use crate::storage::query::ast::Filter;
1262
1263 let mut policy_filters: Vec<Filter> = Vec::new();
1264 if !collect_join_side_policy(runtime, frame, join.left.as_ref(), &mut policy_filters) {
1265 return None;
1266 }
1267 if !collect_join_side_policy(runtime, frame, join.right.as_ref(), &mut policy_filters) {
1268 return None;
1269 }
1270
1271 if policy_filters.is_empty() {
1272 return Some(join);
1273 }
1274
1275 let combined = policy_filters
1276 .into_iter()
1277 .reduce(|acc, f| Filter::And(Box::new(acc), Box::new(f)))
1278 .expect("policy_filters non-empty");
1279
1280 join.filter = Some(match join.filter.take() {
1281 Some(existing) => Filter::And(Box::new(existing), Box::new(combined)),
1282 None => combined,
1283 });
1284
1285 Some(join)
1286}
1287
1288fn collect_join_side_policy(
1293 runtime: &RedDBRuntime,
1294 frame: &dyn super::statement_frame::ReadFrame,
1295 expr: &crate::storage::query::ast::QueryExpr,
1296 out: &mut Vec<crate::storage::query::ast::Filter>,
1297) -> bool {
1298 use crate::storage::query::ast::{Filter, PolicyAction, QueryExpr};
1299 match expr {
1300 QueryExpr::Table(t) => {
1301 if !runtime.inner.rls_enabled_tables.read().contains(&t.table) {
1302 return true;
1303 }
1304 let role = frame.identity().map(|(_, role)| role);
1305 let role_str = role.map(|r| r.as_str().to_string());
1306 let policies =
1307 runtime.matching_rls_policies(&t.table, role_str.as_deref(), PolicyAction::Select);
1308 if policies.is_empty() {
1309 return false;
1310 }
1311 let combined = policies
1312 .into_iter()
1313 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1314 .expect("policies non-empty");
1315 out.push(combined);
1316 true
1317 }
1318 QueryExpr::Join(inner) => {
1319 collect_join_side_policy(runtime, frame, inner.left.as_ref(), out)
1320 && collect_join_side_policy(runtime, frame, inner.right.as_ref(), out)
1321 }
1322 _ => true,
1323 }
1324}
1325
1326fn apply_foreign_table_filters(
1337 records: Vec<crate::storage::query::unified::UnifiedRecord>,
1338 query: &crate::storage::query::ast::TableQuery,
1339) -> crate::storage::query::unified::UnifiedResult {
1340 use crate::storage::query::sql_lowering::{
1341 effective_table_filter, effective_table_projections,
1342 };
1343 use crate::storage::query::unified::UnifiedResult;
1344
1345 let filter = effective_table_filter(query);
1346 let projections = effective_table_projections(query);
1347
1348 let mut filtered: Vec<_> = records
1351 .into_iter()
1352 .filter(|record| match &filter {
1353 Some(f) => {
1354 super::join_filter::evaluate_runtime_filter_with_db(None, record, f, None, None)
1355 }
1356 None => true,
1357 })
1358 .collect();
1359
1360 if let Some(offset) = query.offset {
1362 let offset = offset as usize;
1363 if offset >= filtered.len() {
1364 filtered.clear();
1365 } else {
1366 filtered.drain(0..offset);
1367 }
1368 }
1369 if let Some(limit) = query.limit {
1370 filtered.truncate(limit as usize);
1371 }
1372
1373 let columns: Vec<String> = if projections.is_empty() {
1376 filtered
1377 .first()
1378 .map(|r| r.column_names().iter().map(|k| k.to_string()).collect())
1379 .unwrap_or_default()
1380 } else {
1381 projections
1382 .iter()
1383 .map(super::join_filter::projection_name)
1384 .collect()
1385 };
1386
1387 let mut result = UnifiedResult::empty();
1388 result.columns = columns;
1389 result.records = filtered;
1390 result
1391}
1392
1393pub(crate) fn collect_table_refs(expr: &QueryExpr) -> Vec<String> {
1400 let mut scopes: HashSet<String> = HashSet::new();
1401 collect_query_expr_result_cache_scopes(&mut scopes, expr);
1402 scopes.into_iter().collect()
1403}
1404
1405fn query_expr_result_cache_scopes(expr: &QueryExpr) -> HashSet<String> {
1406 let mut scopes = HashSet::new();
1407 collect_query_expr_result_cache_scopes(&mut scopes, expr);
1408 scopes
1409}
1410
1411const RESULT_CACHE_BACKEND_KEY: &str = "runtime.result_cache.backend";
1412const RESULT_CACHE_DEFAULT_BACKEND: &str = "legacy";
1413const RESULT_CACHE_BLOB_NAMESPACE: &str = "runtime.result_cache";
1414const RESULT_CACHE_TTL_SECS: u64 = 30;
1415const RESULT_CACHE_MAX_ENTRIES: usize = 1000;
1416const RESULT_CACHE_PAYLOAD_MAGIC: &[u8; 8] = b"RDRC0001";
1417
1418#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1419enum RuntimeResultCacheBackend {
1420 Legacy,
1421 BlobCache,
1422 Shadow,
1423}
1424
1425fn trim_result_cache(
1426 map: &mut HashMap<String, RuntimeResultCacheEntry>,
1427 order: &mut std::collections::VecDeque<String>,
1428) {
1429 while map.len() > RESULT_CACHE_MAX_ENTRIES {
1430 if let Some(oldest) = order.pop_front() {
1431 map.remove(&oldest);
1432 } else {
1433 break;
1434 }
1435 }
1436}
1437
1438fn result_cache_fingerprint(result: &RuntimeQueryResult) -> String {
1439 format!(
1440 "{:?}|{}|{}|{}|{}|{:?}",
1441 result.result,
1442 result.query,
1443 result.statement,
1444 result.engine,
1445 result.affected_rows,
1446 result.statement_type
1447 )
1448}
1449
1450fn mode_to_byte(mode: crate::storage::query::modes::QueryMode) -> u8 {
1451 match mode {
1452 crate::storage::query::modes::QueryMode::Sql => 0,
1453 crate::storage::query::modes::QueryMode::Gremlin => 1,
1454 crate::storage::query::modes::QueryMode::Cypher => 2,
1455 crate::storage::query::modes::QueryMode::Sparql => 3,
1456 crate::storage::query::modes::QueryMode::Path => 4,
1457 crate::storage::query::modes::QueryMode::Natural => 5,
1458 crate::storage::query::modes::QueryMode::Unknown => 255,
1459 }
1460}
1461
1462fn mode_from_byte(byte: u8) -> Option<crate::storage::query::modes::QueryMode> {
1463 match byte {
1464 0 => Some(crate::storage::query::modes::QueryMode::Sql),
1465 1 => Some(crate::storage::query::modes::QueryMode::Gremlin),
1466 2 => Some(crate::storage::query::modes::QueryMode::Cypher),
1467 3 => Some(crate::storage::query::modes::QueryMode::Sparql),
1468 4 => Some(crate::storage::query::modes::QueryMode::Path),
1469 5 => Some(crate::storage::query::modes::QueryMode::Natural),
1470 255 => Some(crate::storage::query::modes::QueryMode::Unknown),
1471 _ => None,
1472 }
1473}
1474
1475fn result_cache_static_str(value: &str) -> Option<&'static str> {
1476 match value {
1477 "select" => Some("select"),
1478 "materialized-graph" => Some("materialized-graph"),
1479 "runtime-red-schema" => Some("runtime-red-schema"),
1480 "runtime-fdw" => Some("runtime-fdw"),
1481 "runtime-table-rls" => Some("runtime-table-rls"),
1482 "runtime-table" => Some("runtime-table"),
1483 "runtime-join-rls" => Some("runtime-join-rls"),
1484 "runtime-join" => Some("runtime-join"),
1485 "runtime-vector" => Some("runtime-vector"),
1486 "runtime-hybrid" => Some("runtime-hybrid"),
1487 "runtime-secret" => Some("runtime-secret"),
1488 "runtime-config" => Some("runtime-config"),
1489 "runtime-tenant" => Some("runtime-tenant"),
1490 "runtime-explain" => Some("runtime-explain"),
1491 "runtime-tree" => Some("runtime-tree"),
1492 "runtime-kv" => Some("runtime-kv"),
1493 "runtime-queue" => Some("runtime-queue"),
1494 _ => None,
1495 }
1496}
1497
1498fn write_u32(out: &mut Vec<u8>, value: usize) -> Option<()> {
1499 let value = u32::try_from(value).ok()?;
1500 out.extend_from_slice(&value.to_le_bytes());
1501 Some(())
1502}
1503
1504fn write_string(out: &mut Vec<u8>, value: &str) -> Option<()> {
1505 write_u32(out, value.len())?;
1506 out.extend_from_slice(value.as_bytes());
1507 Some(())
1508}
1509
1510fn write_bytes(out: &mut Vec<u8>, value: &[u8]) -> Option<()> {
1511 write_u32(out, value.len())?;
1512 out.extend_from_slice(value);
1513 Some(())
1514}
1515
1516fn read_u8(input: &mut &[u8]) -> Option<u8> {
1517 let (&value, rest) = input.split_first()?;
1518 *input = rest;
1519 Some(value)
1520}
1521
1522fn read_u32(input: &mut &[u8]) -> Option<usize> {
1523 if input.len() < 4 {
1524 return None;
1525 }
1526 let value = u32::from_le_bytes(input[..4].try_into().ok()?) as usize;
1527 *input = &input[4..];
1528 Some(value)
1529}
1530
1531fn read_u64(input: &mut &[u8]) -> Option<u64> {
1532 if input.len() < 8 {
1533 return None;
1534 }
1535 let value = u64::from_le_bytes(input[..8].try_into().ok()?);
1536 *input = &input[8..];
1537 Some(value)
1538}
1539
1540fn read_string(input: &mut &[u8]) -> Option<String> {
1541 let len = read_u32(input)?;
1542 if input.len() < len {
1543 return None;
1544 }
1545 let value = String::from_utf8(input[..len].to_vec()).ok()?;
1546 *input = &input[len..];
1547 Some(value)
1548}
1549
1550fn read_bytes<'a>(input: &mut &'a [u8]) -> Option<&'a [u8]> {
1551 let len = read_u32(input)?;
1552 if input.len() < len {
1553 return None;
1554 }
1555 let value = &input[..len];
1556 *input = &input[len..];
1557 Some(value)
1558}
1559
1560fn encode_result_cache_payload(entry: &RuntimeResultCacheEntry) -> Option<Vec<u8>> {
1561 let result = &entry.result;
1562 if result.result.pre_serialized_json.is_some()
1563 || result_cache_static_str(result.statement).is_none()
1564 || result_cache_static_str(result.engine).is_none()
1565 || result_cache_static_str(result.statement_type).is_none()
1566 || result.result.records.iter().any(|record| {
1567 !record.nodes.is_empty()
1568 || !record.edges.is_empty()
1569 || !record.paths.is_empty()
1570 || !record.vector_results.is_empty()
1571 })
1572 {
1573 return None;
1574 }
1575
1576 let mut out = Vec::new();
1577 out.extend_from_slice(RESULT_CACHE_PAYLOAD_MAGIC);
1578 write_string(&mut out, &result.query)?;
1579 out.push(mode_to_byte(result.mode));
1580 write_string(&mut out, result.statement)?;
1581 write_string(&mut out, result.engine)?;
1582 out.extend_from_slice(&result.affected_rows.to_le_bytes());
1583 write_string(&mut out, result.statement_type)?;
1584
1585 write_u32(&mut out, result.result.columns.len())?;
1586 for column in &result.result.columns {
1587 write_string(&mut out, column)?;
1588 }
1589 out.extend_from_slice(&result.result.stats.nodes_scanned.to_le_bytes());
1590 out.extend_from_slice(&result.result.stats.edges_scanned.to_le_bytes());
1591 out.extend_from_slice(&result.result.stats.rows_scanned.to_le_bytes());
1592 out.extend_from_slice(&result.result.stats.exec_time_us.to_le_bytes());
1593
1594 write_u32(&mut out, result.result.records.len())?;
1595 for record in &result.result.records {
1596 let fields = record.iter_fields().collect::<Vec<_>>();
1597 write_u32(&mut out, fields.len())?;
1598 for (name, value) in fields {
1599 write_string(&mut out, name)?;
1600 let mut encoded = Vec::new();
1601 crate::storage::schema::value_codec::encode(value, &mut encoded);
1602 write_bytes(&mut out, &encoded)?;
1603 }
1604 }
1605
1606 write_u32(&mut out, entry.scopes.len())?;
1607 for scope in &entry.scopes {
1608 write_string(&mut out, scope)?;
1609 }
1610 Some(out)
1611}
1612
1613fn decode_result_cache_payload(mut input: &[u8]) -> Option<(RuntimeQueryResult, HashSet<String>)> {
1614 if input.len() < RESULT_CACHE_PAYLOAD_MAGIC.len()
1615 || &input[..RESULT_CACHE_PAYLOAD_MAGIC.len()] != RESULT_CACHE_PAYLOAD_MAGIC
1616 {
1617 return None;
1618 }
1619 input = &input[RESULT_CACHE_PAYLOAD_MAGIC.len()..];
1620
1621 let query = read_string(&mut input)?;
1622 let mode = mode_from_byte(read_u8(&mut input)?)?;
1623 let statement = result_cache_static_str(&read_string(&mut input)?)?;
1624 let engine = result_cache_static_str(&read_string(&mut input)?)?;
1625 let affected_rows = read_u64(&mut input)?;
1626 let statement_type = result_cache_static_str(&read_string(&mut input)?)?;
1627
1628 let mut columns = Vec::new();
1629 for _ in 0..read_u32(&mut input)? {
1630 columns.push(read_string(&mut input)?);
1631 }
1632 let stats = crate::storage::query::unified::QueryStats {
1633 nodes_scanned: read_u64(&mut input)?,
1634 edges_scanned: read_u64(&mut input)?,
1635 rows_scanned: read_u64(&mut input)?,
1636 exec_time_us: read_u64(&mut input)?,
1637 };
1638
1639 let mut records = Vec::new();
1640 for _ in 0..read_u32(&mut input)? {
1641 let mut record = crate::storage::query::unified::UnifiedRecord::new();
1642 for _ in 0..read_u32(&mut input)? {
1643 let name = read_string(&mut input)?;
1644 let bytes = read_bytes(&mut input)?;
1645 let (value, used) = crate::storage::schema::value_codec::decode(bytes).ok()?;
1646 if used != bytes.len() {
1647 return None;
1648 }
1649 record.set_owned(name, value);
1650 }
1651 records.push(record);
1652 }
1653
1654 let mut scopes = HashSet::new();
1655 for _ in 0..read_u32(&mut input)? {
1656 scopes.insert(read_string(&mut input)?);
1657 }
1658 if !input.is_empty() {
1659 return None;
1660 }
1661
1662 Some((
1663 RuntimeQueryResult {
1664 query,
1665 mode,
1666 statement,
1667 engine,
1668 result: crate::storage::query::unified::UnifiedResult {
1669 columns,
1670 records,
1671 stats,
1672 pre_serialized_json: None,
1673 },
1674 affected_rows,
1675 statement_type,
1676 },
1677 scopes,
1678 ))
1679}
1680
1681fn strip_explain_prefix(sql: &str) -> Option<&str> {
1695 let trimmed = sql.trim_start();
1696 let (head, rest) = trimmed.split_at(
1697 trimmed
1698 .find(|c: char| c.is_whitespace())
1699 .unwrap_or(trimmed.len()),
1700 );
1701 if !head.eq_ignore_ascii_case("EXPLAIN") {
1702 return None;
1703 }
1704 let rest = rest.trim_start();
1705 if rest.is_empty() {
1706 return None;
1707 }
1708 let next_head_end = rest.find(|c: char| c.is_whitespace()).unwrap_or(rest.len());
1712 if rest[..next_head_end].eq_ignore_ascii_case("ALTER")
1713 || rest[..next_head_end].eq_ignore_ascii_case("ASK")
1714 {
1715 return None;
1716 }
1717 Some(rest)
1718}
1719
1720pub(super) fn has_with_prefix(sql: &str) -> bool {
1725 let trimmed = sql.trim_start();
1726 let head_end = trimmed
1727 .find(|c: char| c.is_whitespace() || c == '(')
1728 .unwrap_or(trimmed.len());
1729 trimmed[..head_end].eq_ignore_ascii_case("WITH")
1730}
1731
1732fn peek_top_level_as_of(sql: &str) -> Option<crate::application::vcs::AsOfSpec> {
1740 peek_top_level_as_of_with_table(sql).map(|(spec, _)| spec)
1741}
1742
1743pub(super) fn peek_top_level_as_of_with_table(
1748 sql: &str,
1749) -> Option<(crate::application::vcs::AsOfSpec, Option<String>)> {
1750 if !sql
1751 .as_bytes()
1752 .windows(5)
1753 .any(|w| w.eq_ignore_ascii_case(b"as of"))
1754 {
1755 return None;
1756 }
1757 let parsed = crate::storage::query::parser::parse(sql).ok()?;
1758 let crate::storage::query::ast::QueryExpr::Table(table) = parsed.query else {
1759 return None;
1760 };
1761 let clause = table.as_of?;
1762 let table_name = if table.table.is_empty() || table.table == "any" {
1763 None
1764 } else {
1765 Some(table.table.clone())
1766 };
1767 let spec = match clause {
1768 crate::storage::query::ast::AsOfClause::Commit(h) => {
1769 crate::application::vcs::AsOfSpec::Commit(h)
1770 }
1771 crate::storage::query::ast::AsOfClause::Branch(b) => {
1772 crate::application::vcs::AsOfSpec::Branch(b)
1773 }
1774 crate::storage::query::ast::AsOfClause::Tag(t) => crate::application::vcs::AsOfSpec::Tag(t),
1775 crate::storage::query::ast::AsOfClause::TimestampMs(ts) => {
1776 crate::application::vcs::AsOfSpec::TimestampMs(ts)
1777 }
1778 crate::storage::query::ast::AsOfClause::Snapshot(x) => {
1779 crate::application::vcs::AsOfSpec::Snapshot(x)
1780 }
1781 };
1782 Some((spec, table_name))
1783}
1784
1785pub(super) fn query_has_volatile_builtin(sql: &str) -> bool {
1786 const VOLATILE_TOKENS: &[&str] = &[
1790 "pg_advisory_lock",
1791 "pg_try_advisory_lock",
1792 "pg_advisory_unlock",
1793 "random()",
1794 ];
1799 let lowered = sql.to_ascii_lowercase();
1800 VOLATILE_TOKENS.iter().any(|t| lowered.contains(t))
1801}
1802
1803pub(super) fn query_is_ask_statement(sql: &str) -> bool {
1804 let trimmed = sql.trim_start();
1805 let head_end = trimmed
1806 .find(|c: char| c.is_whitespace() || c == '(' || c == ';')
1807 .unwrap_or(trimmed.len());
1808 trimmed[..head_end].eq_ignore_ascii_case("ASK")
1809}
1810
1811pub(super) fn intent_lock_modes_for(
1821 expr: &QueryExpr,
1822) -> Option<(
1823 crate::storage::transaction::lock::LockMode,
1824 crate::storage::transaction::lock::LockMode,
1825)> {
1826 use crate::storage::transaction::lock::LockMode::{Exclusive, IntentExclusive, IntentShared};
1827
1828 match expr {
1829 QueryExpr::Table(_)
1831 | QueryExpr::Join(_)
1832 | QueryExpr::Vector(_)
1833 | QueryExpr::Hybrid(_)
1834 | QueryExpr::Graph(_)
1835 | QueryExpr::Path(_)
1836 | QueryExpr::Ask(_)
1837 | QueryExpr::SearchCommand(_)
1838 | QueryExpr::GraphCommand(_)
1839 | QueryExpr::QueueSelect(_) => Some((IntentShared, IntentShared)),
1840
1841 QueryExpr::Insert(_)
1849 | QueryExpr::Update(_)
1850 | QueryExpr::Delete(_)
1851 | QueryExpr::QueueCommand(QueueCommand::Move { .. }) => {
1852 Some((IntentExclusive, IntentExclusive))
1853 }
1854 QueryExpr::QueueCommand(_) => Some((IntentShared, IntentShared)),
1855
1856 QueryExpr::CreateTable(_)
1860 | QueryExpr::CreateCollection(_)
1861 | QueryExpr::CreateVector(_)
1862 | QueryExpr::DropTable(_)
1863 | QueryExpr::DropGraph(_)
1864 | QueryExpr::DropVector(_)
1865 | QueryExpr::DropDocument(_)
1866 | QueryExpr::DropKv(_)
1867 | QueryExpr::DropCollection(_)
1868 | QueryExpr::Truncate(_)
1869 | QueryExpr::AlterTable(_)
1870 | QueryExpr::CreateIndex(_)
1871 | QueryExpr::DropIndex(_)
1872 | QueryExpr::CreateTimeSeries(_)
1873 | QueryExpr::DropTimeSeries(_)
1874 | QueryExpr::CreateQueue(_)
1875 | QueryExpr::AlterQueue(_)
1876 | QueryExpr::DropQueue(_)
1877 | QueryExpr::CreateTree(_)
1878 | QueryExpr::DropTree(_)
1879 | QueryExpr::CreatePolicy(_)
1880 | QueryExpr::DropPolicy(_)
1881 | QueryExpr::CreateView(_)
1882 | QueryExpr::DropView(_)
1883 | QueryExpr::RefreshMaterializedView(_)
1884 | QueryExpr::CreateSchema(_)
1885 | QueryExpr::DropSchema(_)
1886 | QueryExpr::CreateSequence(_)
1887 | QueryExpr::DropSequence(_)
1888 | QueryExpr::CreateServer(_)
1889 | QueryExpr::DropServer(_)
1890 | QueryExpr::CreateForeignTable(_)
1891 | QueryExpr::DropForeignTable(_) => Some((IntentExclusive, Exclusive)),
1892
1893 _ => None,
1899 }
1900}
1901
1902pub(super) fn collections_referenced(expr: &QueryExpr) -> Vec<String> {
1907 let mut out = Vec::new();
1908 walk_collections(expr, &mut out);
1909 out.sort();
1910 out.dedup();
1911 out
1912}
1913
1914fn walk_collections(expr: &QueryExpr, out: &mut Vec<String>) {
1915 match expr {
1916 QueryExpr::Table(t) => out.push(t.table.clone()),
1917 QueryExpr::Join(j) => {
1918 walk_collections(&j.left, out);
1919 walk_collections(&j.right, out);
1920 }
1921 QueryExpr::Insert(i) => out.push(i.table.clone()),
1922 QueryExpr::Update(u) => out.push(u.table.clone()),
1923 QueryExpr::Delete(d) => out.push(d.table.clone()),
1924 QueryExpr::QueueSelect(q) => out.push(q.queue.clone()),
1925
1926 QueryExpr::CreateTable(q) => out.push(q.name.clone()),
1931 QueryExpr::CreateCollection(q) => out.push(q.name.clone()),
1932 QueryExpr::CreateVector(q) => out.push(q.name.clone()),
1933 QueryExpr::DropTable(q) => out.push(q.name.clone()),
1934 QueryExpr::DropGraph(q) => out.push(q.name.clone()),
1935 QueryExpr::DropVector(q) => out.push(q.name.clone()),
1936 QueryExpr::DropDocument(q) => out.push(q.name.clone()),
1937 QueryExpr::DropKv(q) => out.push(q.name.clone()),
1938 QueryExpr::DropCollection(q) => out.push(q.name.clone()),
1939 QueryExpr::Truncate(q) => out.push(q.name.clone()),
1940 QueryExpr::AlterTable(q) => out.push(q.name.clone()),
1941 QueryExpr::CreateIndex(q) => out.push(q.table.clone()),
1942 QueryExpr::DropIndex(q) => out.push(q.table.clone()),
1943 QueryExpr::CreateTimeSeries(q) => out.push(q.name.clone()),
1944 QueryExpr::DropTimeSeries(q) => out.push(q.name.clone()),
1945 QueryExpr::CreateQueue(q) => out.push(q.name.clone()),
1946 QueryExpr::AlterQueue(q) => out.push(q.name.clone()),
1947 QueryExpr::DropQueue(q) => out.push(q.name.clone()),
1948 QueryExpr::QueueCommand(QueueCommand::Move {
1949 source,
1950 destination,
1951 ..
1952 }) => {
1953 out.push(source.clone());
1954 out.push(destination.clone());
1955 }
1956 QueryExpr::CreatePolicy(q) => out.push(q.table.clone()),
1957 QueryExpr::CreateView(q) => out.push(q.name.clone()),
1958 QueryExpr::DropView(q) => out.push(q.name.clone()),
1959 QueryExpr::RefreshMaterializedView(q) => out.push(q.name.clone()),
1960
1961 _ => {}
1967 }
1968}
1969
1970impl RedDBRuntime {
1971 pub fn in_memory() -> RedDBResult<Self> {
1972 Self::with_options(RedDBOptions::in_memory())
1973 }
1974
1975 pub fn lock_manager(&self) -> std::sync::Arc<crate::storage::transaction::lock::LockManager> {
1979 self.inner.lock_manager.clone()
1980 }
1981
1982 #[inline(never)]
1983 pub fn with_options(options: RedDBOptions) -> RedDBResult<Self> {
1984 Self::with_pool(options, ConnectionPoolConfig::default())
1985 }
1986
1987 pub fn with_pool(
1988 options: RedDBOptions,
1989 pool_config: ConnectionPoolConfig,
1990 ) -> RedDBResult<Self> {
1991 let boot_open_start_ms = std::time::SystemTime::now()
1999 .duration_since(std::time::UNIX_EPOCH)
2000 .map(|d| d.as_millis() as u64)
2001 .unwrap_or(0);
2002 let db = Arc::new(
2003 RedDB::open_with_options(&options)
2004 .map_err(|err| RedDBError::Internal(err.to_string()))?,
2005 );
2006 let result_blob_cache = crate::storage::cache::BlobCache::open_with_l2(
2007 crate::storage::cache::BlobCacheConfig::default().with_l2_path(
2008 options
2009 .resolved_path("data.rdb")
2010 .with_extension("result-cache.l2"),
2011 ),
2012 )
2013 .map_err(|err| {
2014 RedDBError::Internal(format!("open result Blob Cache L2 failed: {err:?}"))
2015 })?;
2016 let storage_ready_ms = std::time::SystemTime::now()
2017 .duration_since(std::time::UNIX_EPOCH)
2018 .map(|d| d.as_millis() as u64)
2019 .unwrap_or(0);
2020
2021 let runtime = Self {
2022 inner: Arc::new(RuntimeInner {
2023 db,
2024 layout: PhysicalLayout::from_options(&options),
2025 indices: IndexCatalog::register_default_vector_graph(
2026 options.has_capability(crate::api::Capability::Table),
2027 options.has_capability(crate::api::Capability::Graph),
2028 ),
2029 pool_config,
2030 pool: Mutex::new(PoolState::default()),
2031 started_at_unix_ms: SystemTime::now()
2032 .duration_since(UNIX_EPOCH)
2033 .unwrap_or_default()
2034 .as_millis(),
2035 probabilistic: super::probabilistic_store::ProbabilisticStore::new(),
2036 index_store: super::index_store::IndexStore::new(),
2037 cdc: crate::replication::cdc::CdcBuffer::new(100_000),
2038 backup_scheduler: crate::replication::scheduler::BackupScheduler::new(3600),
2039 query_cache: parking_lot::RwLock::new(
2040 crate::storage::query::planner::cache::PlanCache::new(1000),
2041 ),
2042 result_cache: parking_lot::RwLock::new((
2043 HashMap::new(),
2044 std::collections::VecDeque::new(),
2045 )),
2046 result_blob_cache,
2047 result_blob_entries: parking_lot::RwLock::new((
2048 HashMap::new(),
2049 std::collections::VecDeque::new(),
2050 )),
2051 ask_answer_cache_entries: parking_lot::RwLock::new((
2052 HashSet::new(),
2053 std::collections::VecDeque::new(),
2054 )),
2055 result_cache_shadow_divergences: std::sync::atomic::AtomicU64::new(0),
2056 ask_daily_spend: parking_lot::RwLock::new(HashMap::new()),
2057 queue_message_locks: parking_lot::RwLock::new(HashMap::new()),
2058 rmw_locks: RmwLockTable::new(),
2059 planner_dirty_tables: parking_lot::RwLock::new(HashSet::new()),
2060 ec_registry: Arc::new(crate::ec::config::EcRegistry::new()),
2061 ec_worker: crate::ec::worker::EcWorker::new(),
2062 auth_store: parking_lot::RwLock::new(None),
2063 oauth_validator: parking_lot::RwLock::new(None),
2064 views: parking_lot::RwLock::new(HashMap::new()),
2065 materialized_views: parking_lot::RwLock::new(
2066 crate::storage::cache::result::MaterializedViewCache::new(),
2067 ),
2068 snapshot_manager: Arc::new(
2069 crate::storage::transaction::snapshot::SnapshotManager::new(),
2070 ),
2071 tx_contexts: parking_lot::RwLock::new(HashMap::new()),
2072 tx_local_tenants: parking_lot::RwLock::new(HashMap::new()),
2073 env_config_overrides: crate::runtime::config_overlay::collect_env_overrides(),
2074 lock_manager: Arc::new({
2075 let env = crate::runtime::config_overlay::collect_env_overrides();
2080 let timeout_ms = env
2081 .get("concurrency.locking.deadlock_timeout_ms")
2082 .and_then(|raw| raw.parse::<u64>().ok())
2083 .unwrap_or_else(|| {
2084 match crate::runtime::config_matrix::default_for(
2085 "concurrency.locking.deadlock_timeout_ms",
2086 ) {
2087 Some(crate::serde_json::Value::Number(n)) => n as u64,
2088 _ => 5000,
2089 }
2090 });
2091 let cfg = crate::storage::transaction::lock::LockConfig {
2092 default_timeout: std::time::Duration::from_millis(timeout_ms),
2093 ..Default::default()
2094 };
2095 crate::storage::transaction::lock::LockManager::new(cfg)
2096 }),
2097 rls_policies: parking_lot::RwLock::new(HashMap::new()),
2098 rls_enabled_tables: parking_lot::RwLock::new(HashSet::new()),
2099 foreign_tables: Arc::new(crate::storage::fdw::ForeignTableRegistry::with_builtins()),
2100 pending_tombstones: parking_lot::RwLock::new(HashMap::new()),
2101 pending_versioned_updates: parking_lot::RwLock::new(HashMap::new()),
2102 pending_kv_watch_events: parking_lot::RwLock::new(HashMap::new()),
2103 pending_store_wal_actions: parking_lot::RwLock::new(HashMap::new()),
2104 tenant_tables: parking_lot::RwLock::new(HashMap::new()),
2105 ddl_epoch: std::sync::atomic::AtomicU64::new(0),
2106 write_gate: Arc::new(crate::runtime::write_gate::WriteGate::from_options(
2107 &options,
2108 )),
2109 lifecycle: crate::runtime::lifecycle::Lifecycle::new(),
2110 resource_limits: crate::runtime::resource_limits::ResourceLimits::from_env(),
2111 audit_log: {
2112 let data_path = options
2116 .data_path
2117 .clone()
2118 .unwrap_or_else(|| std::env::temp_dir().join("reddb"));
2119 Arc::new(crate::runtime::audit_log::AuditLogger::for_data_path(
2120 &data_path,
2121 ))
2122 },
2123 lease_lifecycle: std::sync::OnceLock::new(),
2124 replica_apply_metrics: crate::replication::logical::ReplicaApplyMetrics::default(),
2125 quota_bucket: crate::runtime::quota_bucket::QuotaBucket::from_env(),
2126 schema_vocabulary: parking_lot::RwLock::new(
2127 crate::runtime::schema_vocabulary::SchemaVocabulary::new(),
2128 ),
2129 slow_query_logger: {
2130 let log_dir = options
2141 .data_path
2142 .as_ref()
2143 .and_then(|p| p.parent().map(std::path::PathBuf::from))
2144 .unwrap_or_else(|| std::env::temp_dir().join("reddb"));
2145 let threshold_ms = std::env::var("RED_SLOW_QUERY_THRESHOLD_MS")
2146 .ok()
2147 .and_then(|s| s.parse::<u64>().ok())
2148 .unwrap_or(1000);
2149 let sample_pct = std::env::var("RED_SLOW_QUERY_SAMPLE_PCT")
2150 .ok()
2151 .and_then(|s| s.parse::<u8>().ok())
2152 .unwrap_or(100);
2153 crate::telemetry::slow_query_logger::SlowQueryLogger::new(
2154 crate::telemetry::slow_query_logger::SlowQueryOpts {
2155 log_dir,
2156 threshold_ms,
2157 sample_pct,
2158 },
2159 )
2160 },
2161 kv_stats: crate::runtime::KvStatsCounters::default(),
2162 metrics_ingest_stats: crate::runtime::MetricsIngestCounters::default(),
2163 metrics_tenant_activity_stats:
2164 crate::runtime::MetricsTenantActivityCounters::default(),
2165 kv_tag_index: crate::runtime::KvTagIndex::default(),
2166 }),
2167 };
2168
2169 crate::telemetry::operator_event::install_global_audit_sink(Arc::clone(
2175 &runtime.inner.audit_log,
2176 ));
2177
2178 runtime
2186 .inner
2187 .lifecycle
2188 .set_restore_started_at_ms(boot_open_start_ms);
2189 runtime
2190 .inner
2191 .lifecycle
2192 .set_restore_ready_at_ms(storage_ready_ms);
2193 runtime
2194 .inner
2195 .lifecycle
2196 .set_wal_replay_started_at_ms(boot_open_start_ms);
2197 runtime
2198 .inner
2199 .lifecycle
2200 .set_wal_replay_ready_at_ms(storage_ready_ms);
2201
2202 let restored_cdc_lsn = runtime
2203 .inner
2204 .db
2205 .replication
2206 .as_ref()
2207 .map(|repl| {
2208 repl.logical_wal_spool
2209 .as_ref()
2210 .map(|spool| spool.current_lsn())
2211 .unwrap_or(0)
2212 })
2213 .unwrap_or(0)
2214 .max(runtime.config_u64("red.config.timeline.last_archived_lsn", 0));
2215 runtime.inner.cdc.set_current_lsn(restored_cdc_lsn);
2216 runtime.rehydrate_snapshot_xid_floor();
2217 runtime.bootstrap_system_keyed_collections()?;
2218 runtime.rehydrate_declared_column_schemas();
2219 runtime.load_probabilistic_state()?;
2220
2221 runtime.rehydrate_tenant_tables();
2225 if let Some(repl) = &runtime.inner.db.replication {
2226 repl.wal_buffer.set_current_lsn(restored_cdc_lsn);
2227 }
2228
2229 {
2231 let sys = SystemInfo::collect();
2232 runtime.inner.db.store().set_config_tree(
2233 "red.system",
2234 &crate::serde_json::json!({
2235 "pid": sys.pid,
2236 "cpu_cores": sys.cpu_cores,
2237 "total_memory_bytes": sys.total_memory_bytes,
2238 "available_memory_bytes": sys.available_memory_bytes,
2239 "os": sys.os,
2240 "arch": sys.arch,
2241 "hostname": sys.hostname,
2242 "started_at": SystemTime::now()
2243 .duration_since(UNIX_EPOCH)
2244 .unwrap_or_default()
2245 .as_millis() as u64
2246 }),
2247 );
2248
2249 let store = runtime.inner.db.store();
2251 if store
2252 .get_collection("red_config")
2253 .map(|m| m.query_all(|_| true).len())
2254 .unwrap_or(0)
2255 <= 10
2256 {
2257 store.set_config_tree("red.ai", &crate::json!({
2258 "default": crate::json!({
2259 "provider": "openai",
2260 "model": crate::ai::DEFAULT_OPENAI_PROMPT_MODEL
2261 }),
2262 "max_embedding_inputs": 256,
2263 "max_prompt_batch": 256,
2264 "timeout": crate::json!({ "connect_secs": 10, "read_secs": 90, "write_secs": 30 })
2265 }));
2266 store.set_config_tree(
2267 "red.server",
2268 &crate::json!({
2269 "max_scan_limit": 1000,
2270 "max_body_size": 1048576,
2271 "read_timeout_ms": 5000,
2272 "write_timeout_ms": 5000
2273 }),
2274 );
2275 store.set_config_tree(
2276 "red.storage",
2277 &crate::json!({
2278 "page_size": 4096,
2279 "page_cache_capacity": 100000,
2280 "auto_checkpoint_pages": 1000,
2281 "snapshot_retention": 16,
2282 "verify_checksums": true,
2283 "segment": crate::json!({
2284 "max_entities": 100000,
2285 "max_bytes": 268435456_u64,
2286 "compression_level": 6
2287 }),
2288 "hnsw": crate::json!({ "m": 16, "ef_construction": 100, "ef_search": 50 }),
2289 "ivf": crate::json!({ "n_lists": 100, "n_probes": 10 }),
2290 "bm25": crate::json!({ "k1": 1.2, "b": 0.75 })
2291 }),
2292 );
2293 store.set_config_tree(
2294 "red.search",
2295 &crate::json!({
2296 "rag": crate::json!({
2297 "max_chunks_per_source": 10,
2298 "max_total_chunks": 25,
2299 "similarity_threshold": 0.8,
2300 "graph_depth": 2,
2301 "min_relevance": 0.3
2302 }),
2303 "fusion": crate::json!({
2304 "vector_weight": 0.5,
2305 "graph_weight": 0.3,
2306 "table_weight": 0.2,
2307 "dedup_threshold": 0.85
2308 })
2309 }),
2310 );
2311 store.set_config_tree(
2312 "red.auth",
2313 &crate::json!({
2314 "enabled": false,
2315 "session_ttl_secs": 3600,
2316 "require_auth": false
2317 }),
2318 );
2319 store.set_config_tree(
2320 "red.query",
2321 &crate::json!({
2322 "connection_pool": crate::json!({ "max_connections": 64, "max_idle": 16 }),
2323 "max_recursion_depth": 1000
2324 }),
2325 );
2326 store.set_config_tree(
2327 "red.indexes",
2328 &crate::json!({
2329 "auto_select": true,
2330 "bloom_filter": crate::json!({
2331 "enabled": true,
2332 "false_positive_rate": 0.01,
2333 "prune_on_scan": true
2334 }),
2335 "hash": crate::json!({ "enabled": true }),
2336 "bitmap": crate::json!({ "enabled": true, "max_cardinality": 1000 }),
2337 "spatial": crate::json!({ "enabled": true })
2338 }),
2339 );
2340 store.set_config_tree(
2341 "red.memtable",
2342 &crate::json!({
2343 "enabled": true,
2344 "max_bytes": 67108864_u64,
2345 "flush_threshold": 0.75
2346 }),
2347 );
2348 store.set_config_tree(
2349 "red.probabilistic",
2350 &crate::json!({
2351 "hll_registers": 16384,
2352 "sketch_default_width": 1000,
2353 "sketch_default_depth": 5,
2354 "filter_default_capacity": 100000
2355 }),
2356 );
2357 store.set_config_tree(
2358 "red.timeseries",
2359 &crate::json!({
2360 "default_chunk_size": 1024,
2361 "compression": crate::json!({
2362 "timestamps": "delta_of_delta",
2363 "values": "gorilla_xor"
2364 }),
2365 "default_retention_days": 0
2366 }),
2367 );
2368 store.set_config_tree(
2369 "red.queue",
2370 &crate::json!({
2371 "default_max_size": 0,
2372 "default_max_attempts": 3,
2373 "visibility_timeout_ms": 30000,
2374 "consumer_idle_timeout_ms": 60000
2375 }),
2376 );
2377 store.set_config_tree(
2378 "red.backup",
2379 &crate::json!({
2380 "enabled": false,
2381 "interval_secs": 3600,
2382 "retention_count": 24,
2383 "upload": false,
2384 "backend": "local"
2385 }),
2386 );
2387 store.set_config_tree(
2388 "red.wal",
2389 &crate::json!({
2390 "archive": crate::json!({
2391 "enabled": false,
2392 "retention_hours": 168,
2393 "prefix": "wal/"
2394 })
2395 }),
2396 );
2397 store.set_config_tree(
2398 "red.cdc",
2399 &crate::json!({
2400 "enabled": true,
2401 "buffer_size": 100000
2402 }),
2403 );
2404 store.set_config_tree(
2405 "red.config.secret",
2406 &crate::json!({
2407 "auto_encrypt": true,
2408 "auto_decrypt": true
2409 }),
2410 );
2411 }
2412
2413 crate::runtime::config_matrix::heal_critical_keys(store.as_ref());
2420
2421 let lehman_yao = runtime.config_bool("storage.btree.lehman_yao", true);
2428 crate::storage::engine::btree::lehman_yao::set_enabled(lehman_yao);
2429 if lehman_yao {
2430 tracing::info!(
2431 "storage.btree.lehman_yao=true — lock-free concurrent descent enabled"
2432 );
2433 }
2434
2435 let overlay_path = crate::runtime::config_overlay::config_file_path();
2440 let _ =
2441 crate::runtime::config_overlay::apply_config_file(store.as_ref(), &overlay_path);
2442 }
2443
2444 {
2448 let store = runtime.inner.db.store();
2449 for name in crate::application::vcs_collections::ALL {
2450 let _ = store.get_or_create_collection(*name);
2451 }
2452 store.set_config_tree(
2455 crate::application::vcs_collections::CONFIG_NAMESPACE,
2456 &crate::json!({
2457 "default_branch": "main",
2458 "author": crate::json!({
2459 "name": "reddb",
2460 "email": "reddb@localhost"
2461 }),
2462 "protected_branches": crate::json!(["main"]),
2463 "closure": crate::json!({
2464 "enabled": true,
2465 "lazy": true
2466 }),
2467 "merge": crate::json!({
2468 "default_strategy": "auto",
2469 "fast_forward": true
2470 })
2471 }),
2472 );
2473 }
2474
2475 {
2478 let store = runtime.inner.db.store();
2479 for name in crate::application::migration_collections::ALL {
2480 let _ = store.get_or_create_collection(*name);
2481 }
2482 }
2483
2484 {
2499 let weak = Arc::downgrade(&runtime.inner);
2500 std::thread::Builder::new()
2501 .name("reddb-maintenance".into())
2502 .spawn(move || {
2503 let tick = std::time::Duration::from_millis(200);
2504 let work_interval = std::time::Duration::from_secs(60);
2505 let mut last_work = std::time::Instant::now();
2506 loop {
2507 std::thread::sleep(tick);
2508 let Some(inner) = weak.upgrade() else {
2509 break;
2512 };
2513 if last_work.elapsed() >= work_interval {
2514 let _stats = inner.db.store().context_index().stats();
2515 last_work = std::time::Instant::now();
2516 }
2517 }
2518 })
2519 .ok();
2520 }
2521
2522 {
2524 let store = runtime.inner.db.store();
2525 let mut backup_enabled = false;
2526 let mut backup_interval = 3600u64;
2527
2528 if let Some(manager) = store.get_collection("red_config") {
2529 manager.for_each_entity(|entity| {
2530 if let Some(row) = entity.data.as_row() {
2531 let key = row.get_field("key").and_then(|v| match v {
2532 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
2533 _ => None,
2534 });
2535 let val = row.get_field("value");
2536 if key == Some("red.config.backup.enabled") {
2537 backup_enabled = match val {
2538 Some(crate::storage::schema::Value::Boolean(true)) => true,
2539 Some(crate::storage::schema::Value::Text(s)) => &**s == "true",
2540 _ => false,
2541 };
2542 } else if key == Some("red.config.backup.interval_secs") {
2543 if let Some(crate::storage::schema::Value::Integer(n)) = val {
2544 backup_interval = *n as u64;
2545 }
2546 }
2547 }
2548 true
2549 });
2550 }
2551
2552 if backup_enabled {
2553 runtime.inner.backup_scheduler.set_interval(backup_interval);
2554 let rt = runtime.clone();
2555 runtime
2556 .inner
2557 .backup_scheduler
2558 .start(move || rt.trigger_backup().map_err(|e| format!("{}", e)));
2559 }
2560 }
2561
2562 {
2564 runtime
2565 .inner
2566 .ec_registry
2567 .load_from_config_store(runtime.inner.db.store().as_ref());
2568 if !runtime.inner.ec_registry.async_configs().is_empty() {
2569 runtime.inner.ec_worker.start(
2570 Arc::clone(&runtime.inner.ec_registry),
2571 Arc::clone(&runtime.inner.db.store()),
2572 );
2573 }
2574 }
2575
2576 if let crate::replication::ReplicationRole::Replica { primary_addr } =
2577 runtime.inner.db.options().replication.role.clone()
2578 {
2579 let rt = runtime.clone();
2580 std::thread::Builder::new()
2581 .name("reddb-replica".into())
2582 .spawn(move || rt.run_replica_loop(primary_addr))
2583 .ok();
2584 }
2585
2586 runtime.inner.lifecycle.mark_ready();
2591
2592 Ok(runtime)
2593 }
2594
2595 fn rehydrate_snapshot_xid_floor(&self) {
2596 let store = self.inner.db.store();
2597 for collection in store.list_collections() {
2598 let Some(manager) = store.get_collection(&collection) else {
2599 continue;
2600 };
2601 for entity in manager.query_all(|_| true) {
2602 self.inner
2603 .snapshot_manager
2604 .observe_committed_xid(entity.xmin);
2605 self.inner
2606 .snapshot_manager
2607 .observe_committed_xid(entity.xmax);
2608 }
2609 }
2610 }
2611
2612 fn bootstrap_system_keyed_collections(&self) -> RedDBResult<()> {
2613 let mut changed = false;
2614 for (name, model) in [
2615 ("red.config", crate::catalog::CollectionModel::Config),
2616 ("red.vault", crate::catalog::CollectionModel::Vault),
2617 ] {
2618 if self.inner.db.store().get_collection(name).is_none() {
2619 self.inner.db.store().get_or_create_collection(name);
2620 changed = true;
2621 }
2622 if self.inner.db.collection_contract(name).is_none() {
2623 self.inner
2624 .db
2625 .save_collection_contract(system_keyed_collection_contract(name, model))
2626 .map_err(|err| RedDBError::Internal(err.to_string()))?;
2627 changed = true;
2628 }
2629 }
2630 if changed {
2631 self.inner
2632 .db
2633 .persist_metadata()
2634 .map_err(|err| RedDBError::Internal(err.to_string()))?;
2635 }
2636 Ok(())
2637 }
2638
2639 pub fn db(&self) -> Arc<RedDB> {
2640 Arc::clone(&self.inner.db)
2641 }
2642
2643 pub fn index_store_ref(&self) -> &super::index_store::IndexStore {
2648 &self.inner.index_store
2649 }
2650
2651 pub(crate) fn schema_vocabulary_apply(
2656 &self,
2657 event: crate::runtime::schema_vocabulary::DdlEvent,
2658 ) {
2659 self.inner.schema_vocabulary.write().on_ddl(event);
2660 }
2661
2662 pub fn schema_vocabulary_lookup(
2667 &self,
2668 token: &str,
2669 ) -> Vec<crate::runtime::schema_vocabulary::VocabHit> {
2670 self.inner.schema_vocabulary.read().lookup(token).to_vec()
2671 }
2672
2673 pub fn set_auth_store(&self, store: Arc<crate::auth::store::AuthStore>) {
2677 *self.inner.auth_store.write() = Some(store);
2678 }
2679
2680 pub fn vault_kv_get(&self, key: &str) -> Option<String> {
2682 self.inner
2683 .auth_store
2684 .read()
2685 .as_ref()
2686 .and_then(|store| store.vault_kv_get(key))
2687 }
2688
2689 pub fn vault_kv_try_set(&self, key: String, value: String) -> RedDBResult<()> {
2692 let store = self.inner.auth_store.read().clone().ok_or_else(|| {
2693 RedDBError::Query("secret storage requires an enabled, unsealed vault".to_string())
2694 })?;
2695 store
2696 .vault_kv_try_set(key, value)
2697 .map_err(|err| RedDBError::Query(err.to_string()))
2698 }
2699
2700 pub fn set_oauth_validator(&self, validator: Option<Arc<crate::auth::oauth::OAuthValidator>>) {
2704 *self.inner.oauth_validator.write() = validator;
2705 }
2706
2707 pub fn oauth_validator(&self) -> Option<Arc<crate::auth::oauth::OAuthValidator>> {
2711 self.inner.oauth_validator.read().clone()
2712 }
2713
2714 pub(crate) fn secret_aes_key(&self) -> Option<[u8; 32]> {
2718 let guard = self.inner.auth_store.read();
2719 guard.as_ref().and_then(|s| s.vault_secret_key())
2720 }
2721
2722 pub(crate) fn config_bool(&self, key: &str, default: bool) -> bool {
2728 if let Some(raw) = self.inner.env_config_overrides.get(key) {
2729 if let Some(crate::storage::schema::Value::Boolean(b)) =
2730 crate::runtime::config_overlay::coerce_env_value(key, raw)
2731 {
2732 return b;
2733 }
2734 }
2735 let store = self.inner.db.store();
2736 let Some(manager) = store.get_collection("red_config") else {
2737 return default;
2738 };
2739 let mut result = default;
2740 let mut latest_id: u64 = 0;
2741 manager.for_each_entity(|entity| {
2742 if let Some(row) = entity.data.as_row() {
2743 let entry_key = row.get_field("key").and_then(|v| match v {
2744 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
2745 _ => None,
2746 });
2747 if entry_key == Some(key) {
2748 let id = entity.id.raw();
2749 if id >= latest_id {
2750 latest_id = id;
2751 result = match row.get_field("value") {
2752 Some(crate::storage::schema::Value::Boolean(b)) => *b,
2753 Some(crate::storage::schema::Value::Text(s)) => {
2754 matches!(s.as_ref(), "true" | "TRUE" | "True" | "1")
2755 }
2756 Some(crate::storage::schema::Value::Integer(n)) => *n != 0,
2757 _ => default,
2758 };
2759 }
2760 }
2761 }
2762 true
2763 });
2764 result
2765 }
2766
2767 pub(crate) fn config_u64(&self, key: &str, default: u64) -> u64 {
2768 if let Some(raw) = self.inner.env_config_overrides.get(key) {
2769 if let Some(crate::storage::schema::Value::UnsignedInteger(n)) =
2770 crate::runtime::config_overlay::coerce_env_value(key, raw)
2771 {
2772 return n;
2773 }
2774 }
2775 let store = self.inner.db.store();
2776 let Some(manager) = store.get_collection("red_config") else {
2777 return default;
2778 };
2779 let mut result = default;
2780 let mut latest_id: u64 = 0;
2781 manager.for_each_entity(|entity| {
2782 if let Some(row) = entity.data.as_row() {
2783 let entry_key = row.get_field("key").and_then(|v| match v {
2784 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
2785 _ => None,
2786 });
2787 if entry_key == Some(key) {
2788 let id = entity.id.raw();
2789 if id >= latest_id {
2790 latest_id = id;
2791 result = match row.get_field("value") {
2792 Some(crate::storage::schema::Value::Integer(n)) => *n as u64,
2793 Some(crate::storage::schema::Value::UnsignedInteger(n)) => *n,
2794 Some(crate::storage::schema::Value::Text(s)) => {
2795 s.parse::<u64>().unwrap_or(default)
2796 }
2797 _ => default,
2798 };
2799 }
2800 }
2801 }
2802 true
2803 });
2804 result
2805 }
2806
2807 pub(crate) fn config_f64(&self, key: &str, default: f64) -> f64 {
2808 if let Some(raw) = self.inner.env_config_overrides.get(key) {
2809 if let Ok(n) = raw.parse::<f64>() {
2810 return n;
2811 }
2812 }
2813 let store = self.inner.db.store();
2814 let Some(manager) = store.get_collection("red_config") else {
2815 return default;
2816 };
2817 let mut result = default;
2818 let mut latest_id: u64 = 0;
2819 manager.for_each_entity(|entity| {
2820 if let Some(row) = entity.data.as_row() {
2821 let entry_key = row.get_field("key").and_then(|v| match v {
2822 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
2823 _ => None,
2824 });
2825 if entry_key == Some(key) {
2826 let id = entity.id.raw();
2827 if id >= latest_id {
2828 latest_id = id;
2829 result = match row.get_field("value") {
2830 Some(crate::storage::schema::Value::Float(n)) => *n,
2831 Some(crate::storage::schema::Value::Integer(n)) => *n as f64,
2832 Some(crate::storage::schema::Value::UnsignedInteger(n)) => *n as f64,
2833 Some(crate::storage::schema::Value::Text(s)) => {
2834 s.parse::<f64>().unwrap_or(default)
2835 }
2836 _ => default,
2837 };
2838 }
2839 }
2840 }
2841 true
2842 });
2843 result
2844 }
2845
2846 pub(crate) fn config_string(&self, key: &str, default: &str) -> String {
2847 if let Some(raw) = self.inner.env_config_overrides.get(key) {
2848 return raw.clone();
2849 }
2850 let store = self.inner.db.store();
2851 let Some(manager) = store.get_collection("red_config") else {
2852 return default.to_string();
2853 };
2854 let mut result = default.to_string();
2855 let mut latest_id: u64 = 0;
2856 manager.for_each_entity(|entity| {
2857 if let Some(row) = entity.data.as_row() {
2858 let entry_key = row.get_field("key").and_then(|v| match v {
2859 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
2860 _ => None,
2861 });
2862 if entry_key == Some(key) {
2863 let id = entity.id.raw();
2864 if id >= latest_id {
2865 latest_id = id;
2866 if let Some(crate::storage::schema::Value::Text(value)) =
2867 row.get_field("value")
2868 {
2869 result = value.to_string();
2870 }
2871 }
2872 }
2873 }
2874 true
2875 });
2876 result
2877 }
2878
2879 fn latest_metadata_for(
2880 &self,
2881 collection: &str,
2882 entity_id: u64,
2883 ) -> Option<crate::serde_json::Value> {
2884 self.inner
2885 .db
2886 .store()
2887 .get_metadata(collection, EntityId::new(entity_id))
2888 .map(|metadata| metadata_to_json(&metadata))
2889 }
2890
2891 fn persist_replica_lsn(&self, lsn: u64) {
2892 self.inner.db.store().set_config_tree(
2893 "red.replication",
2894 &crate::json!({
2895 "last_applied_lsn": lsn
2896 }),
2897 );
2898 }
2899
2900 fn persist_replication_health(
2901 &self,
2902 state: &str,
2903 last_error: &str,
2904 primary_lsn: Option<u64>,
2905 oldest_available_lsn: Option<u64>,
2906 ) {
2907 self.inner.db.store().set_config_tree(
2908 "red.replication",
2909 &crate::json!({
2910 "state": state,
2911 "last_error": last_error,
2912 "last_seen_primary_lsn": primary_lsn.unwrap_or(0),
2913 "last_seen_oldest_lsn": oldest_available_lsn.unwrap_or(0),
2914 "updated_at_unix_ms": SystemTime::now()
2915 .duration_since(UNIX_EPOCH)
2916 .unwrap_or_default()
2917 .as_millis() as u64
2918 }),
2919 );
2920 }
2921
2922 pub(crate) fn secret_auto_encrypt(&self) -> bool {
2925 self.config_bool("red.config.secret.auto_encrypt", true)
2926 }
2927
2928 pub(crate) fn secret_auto_decrypt(&self) -> bool {
2933 self.config_bool("red.config.secret.auto_decrypt", true)
2934 }
2935
2936 pub(crate) fn apply_secret_decryption(&self, result: &mut RuntimeQueryResult) {
2943 if !self.secret_auto_decrypt() {
2944 return;
2945 }
2946 let Some(key) = self.secret_aes_key() else {
2947 return;
2948 };
2949 for record in result.result.records.iter_mut() {
2950 for value in record.values_mut() {
2951 if let Value::Secret(ref bytes) = value {
2952 if let Some(plain) =
2953 super::impl_dml::decrypt_secret_payload(&key, bytes.as_slice())
2954 {
2955 if let Ok(text) = String::from_utf8(plain) {
2956 *value = Value::text(text);
2957 }
2958 }
2959 }
2960 }
2961 }
2962 }
2963
2964 pub(crate) fn mutation_engine(&self) -> crate::runtime::mutation::MutationEngine<'_> {
2972 crate::runtime::mutation::MutationEngine::new(self)
2973 }
2974
2975 pub fn check_write(&self, kind: crate::runtime::write_gate::WriteKind) -> RedDBResult<()> {
2986 self.inner.write_gate.check(kind)
2987 }
2988
2989 pub fn write_gate(&self) -> &crate::runtime::write_gate::WriteGate {
2993 &self.inner.write_gate
2994 }
2995
2996 pub fn lifecycle(&self) -> &crate::runtime::lifecycle::Lifecycle {
3000 &self.inner.lifecycle
3001 }
3002
3003 pub fn resource_limits(&self) -> &crate::runtime::resource_limits::ResourceLimits {
3005 &self.inner.resource_limits
3006 }
3007
3008 pub fn audit_log(&self) -> &crate::runtime::audit_log::AuditLogger {
3010 &self.inner.audit_log
3011 }
3012
3013 pub fn audit_log_arc(&self) -> Arc<crate::runtime::audit_log::AuditLogger> {
3017 Arc::clone(&self.inner.audit_log)
3018 }
3019
3020 pub fn write_gate_arc(&self) -> Arc<crate::runtime::write_gate::WriteGate> {
3025 Arc::clone(&self.inner.write_gate)
3026 }
3027
3028 pub fn lease_lifecycle(&self) -> Option<&Arc<crate::runtime::lease_lifecycle::LeaseLifecycle>> {
3031 self.inner.lease_lifecycle.get()
3032 }
3033
3034 pub fn set_lease_lifecycle(
3037 &self,
3038 lifecycle: Arc<crate::runtime::lease_lifecycle::LeaseLifecycle>,
3039 ) -> Result<(), Arc<crate::runtime::lease_lifecycle::LeaseLifecycle>> {
3040 self.inner.lease_lifecycle.set(lifecycle)
3041 }
3042
3043 pub fn check_batch_size(&self, requested: usize) -> RedDBResult<()> {
3048 if self.inner.resource_limits.batch_size_exceeded(requested) {
3049 let max = self.inner.resource_limits.max_batch_size.unwrap_or(0);
3050 return Err(RedDBError::QuotaExceeded(format!(
3051 "max_batch_size:{requested}:{max}"
3052 )));
3053 }
3054 Ok(())
3055 }
3056
3057 pub fn check_db_size(&self) -> RedDBResult<()> {
3063 let Some(limit) = self.inner.resource_limits.max_db_size_bytes else {
3064 return Ok(());
3065 };
3066 if limit == 0 {
3067 return Ok(());
3068 }
3069 let Some(path) = self.inner.db.path() else {
3070 return Ok(());
3071 };
3072 let current = std::fs::metadata(path).map(|m| m.len()).unwrap_or(0);
3073 if current > limit {
3074 return Err(RedDBError::QuotaExceeded(format!(
3075 "max_db_size_bytes:{current}:{limit}"
3076 )));
3077 }
3078 Ok(())
3079 }
3080
3081 pub fn graceful_shutdown(
3099 &self,
3100 backup_on_shutdown: bool,
3101 ) -> RedDBResult<crate::runtime::lifecycle::ShutdownReport> {
3102 if !self.inner.lifecycle.begin_shutdown() {
3103 return Ok(self.inner.lifecycle.shutdown_report().unwrap_or_default());
3107 }
3108
3109 let started_ms = std::time::SystemTime::now()
3110 .duration_since(std::time::UNIX_EPOCH)
3111 .map(|d| d.as_millis() as u64)
3112 .unwrap_or(0);
3113 let mut report = crate::runtime::lifecycle::ShutdownReport {
3114 started_at_ms: started_ms,
3115 ..Default::default()
3116 };
3117
3118 let flush_res = self.inner.db.flush_local_only();
3124 report.flushed_wal = flush_res.is_ok();
3125 report.final_checkpoint = flush_res.is_ok();
3126 if let Err(err) = &flush_res {
3127 tracing::error!(
3128 target: "reddb::lifecycle",
3129 error = %err,
3130 "graceful_shutdown: local flush failed"
3131 );
3132 } else if let Err(lease_err) =
3133 self.assert_remote_write_allowed("shutdown/checkpoint_upload")
3134 {
3135 tracing::warn!(
3136 target: "reddb::serverless::lease",
3137 error = %lease_err,
3138 "graceful_shutdown: remote upload skipped — lease not held"
3139 );
3140 } else if let Err(err) = self.inner.db.upload_to_remote_backend() {
3141 tracing::error!(
3142 target: "reddb::lifecycle",
3143 error = %err,
3144 "graceful_shutdown: remote upload failed"
3145 );
3146 }
3147
3148 if backup_on_shutdown && self.inner.db.remote_backend.is_some() {
3153 match self.trigger_backup() {
3159 Ok(result) => {
3160 report.backup_uploaded = result.uploaded;
3161 }
3162 Err(err) => {
3163 tracing::warn!(
3164 target: "reddb::lifecycle",
3165 error = %err,
3166 "graceful_shutdown: final backup skipped"
3167 );
3168 }
3169 }
3170 }
3171
3172 let completed_ms = std::time::SystemTime::now()
3173 .duration_since(std::time::UNIX_EPOCH)
3174 .map(|d| d.as_millis() as u64)
3175 .unwrap_or(started_ms);
3176 report.completed_at_ms = completed_ms;
3177 report.duration_ms = completed_ms.saturating_sub(started_ms);
3178
3179 self.inner.lifecycle.finish_shutdown(report.clone());
3180 Ok(report)
3181 }
3182
3183 pub(crate) fn cdc_emit_no_cache_invalidate(
3189 &self,
3190 operation: crate::replication::cdc::ChangeOperation,
3191 collection: &str,
3192 entity_id: u64,
3193 entity_kind: &str,
3194 ) -> u64 {
3195 let lsn = self
3196 .inner
3197 .cdc
3198 .emit(operation, collection, entity_id, entity_kind);
3199
3200 if let Some(ref primary) = self.inner.db.replication {
3202 let store = self.inner.db.store();
3203 let entity = if operation == crate::replication::cdc::ChangeOperation::Delete {
3204 None
3205 } else {
3206 store.get(collection, EntityId::new(entity_id))
3207 };
3208 let record = ChangeRecord {
3209 lsn,
3210 timestamp: SystemTime::now()
3211 .duration_since(UNIX_EPOCH)
3212 .unwrap_or_default()
3213 .as_millis() as u64,
3214 operation,
3215 collection: collection.to_string(),
3216 entity_id,
3217 entity_kind: entity_kind.to_string(),
3218 entity_bytes: entity
3219 .as_ref()
3220 .map(|e| UnifiedStore::serialize_entity(e, store.format_version())),
3221 metadata: self.latest_metadata_for(collection, entity_id),
3222 };
3223 let encoded = record.encode();
3224 primary.wal_buffer.append(record.lsn, encoded.clone());
3225 if let Some(spool) = &primary.logical_wal_spool {
3226 let _ = spool.append(record.lsn, &encoded);
3227 }
3228 }
3229 lsn
3230 }
3231
3232 pub(crate) fn cdc_emit_insert_batch_no_cache_invalidate(
3233 &self,
3234 collection: &str,
3235 ids: &[EntityId],
3236 entity_kind: &str,
3237 ) -> Vec<u64> {
3238 if ids.is_empty() {
3239 return Vec::new();
3240 }
3241
3242 if self.inner.db.replication.is_none() {
3246 return self.inner.cdc.emit_batch_same_collection(
3247 crate::replication::cdc::ChangeOperation::Insert,
3248 collection,
3249 entity_kind,
3250 ids.iter().map(|id| id.raw()),
3251 );
3252 }
3253
3254 ids.iter()
3257 .map(|id| {
3258 self.cdc_emit_no_cache_invalidate(
3259 crate::replication::cdc::ChangeOperation::Insert,
3260 collection,
3261 id.raw(),
3262 entity_kind,
3263 )
3264 })
3265 .collect()
3266 }
3267
3268 pub fn cdc_emit(
3269 &self,
3270 operation: crate::replication::cdc::ChangeOperation,
3271 collection: &str,
3272 entity_id: u64,
3273 entity_kind: &str,
3274 ) -> u64 {
3275 let lsn = self
3276 .inner
3277 .cdc
3278 .emit(operation, collection, entity_id, entity_kind);
3279 self.invalidate_result_cache_for_table(collection);
3285
3286 if let Some(ref primary) = self.inner.db.replication {
3288 let store = self.inner.db.store();
3289 let entity = if operation == crate::replication::cdc::ChangeOperation::Delete {
3290 None
3291 } else {
3292 store.get(collection, EntityId::new(entity_id))
3293 };
3294 let record = ChangeRecord {
3295 lsn,
3296 timestamp: SystemTime::now()
3297 .duration_since(UNIX_EPOCH)
3298 .unwrap_or_default()
3299 .as_millis() as u64,
3300 operation,
3301 collection: collection.to_string(),
3302 entity_id,
3303 entity_kind: entity_kind.to_string(),
3304 entity_bytes: entity
3305 .as_ref()
3306 .map(|entity| UnifiedStore::serialize_entity(entity, store.format_version())),
3307 metadata: self.latest_metadata_for(collection, entity_id),
3308 };
3309 let encoded = record.encode();
3310 primary.wal_buffer.append(record.lsn, encoded.clone());
3311 if let Some(spool) = &primary.logical_wal_spool {
3312 let _ = spool.append(record.lsn, &encoded);
3313 }
3314 }
3315 lsn
3316 }
3317
3318 pub(crate) fn cdc_emit_kv(
3319 &self,
3320 operation: crate::replication::cdc::ChangeOperation,
3321 collection: &str,
3322 key: &str,
3323 entity_id: u64,
3324 before: Option<crate::json::Value>,
3325 after: Option<crate::json::Value>,
3326 ) -> u64 {
3327 let lsn = self
3328 .inner
3329 .cdc
3330 .emit_kv(operation, collection, key, entity_id, before, after);
3331 self.inner.kv_stats.incr_watch_events_emitted();
3332 self.invalidate_result_cache_for_table(collection);
3333 lsn
3334 }
3335
3336 pub(crate) fn record_kv_watch_event(
3337 &self,
3338 operation: crate::replication::cdc::ChangeOperation,
3339 collection: &str,
3340 key: &str,
3341 entity_id: u64,
3342 before: Option<crate::json::Value>,
3343 after: Option<crate::json::Value>,
3344 ) {
3345 if self.current_xid().is_some() {
3346 let conn_id = current_connection_id();
3347 let event = crate::replication::cdc::KvWatchEvent {
3348 collection: collection.to_string(),
3349 key: key.to_string(),
3350 op: operation,
3351 before,
3352 after,
3353 lsn: 0,
3354 committed_at: 0,
3355 dropped_event_count: 0,
3356 };
3357 self.inner
3358 .pending_kv_watch_events
3359 .write()
3360 .entry(conn_id)
3361 .or_default()
3362 .push(event);
3363 return;
3364 }
3365
3366 self.cdc_emit_kv(operation, collection, key, entity_id, before, after);
3367 }
3368
3369 pub(crate) fn cdc_emit_prebuilt(
3370 &self,
3371 operation: crate::replication::cdc::ChangeOperation,
3372 collection: &str,
3373 entity: &UnifiedEntity,
3374 entity_kind: &str,
3375 metadata: Option<&crate::storage::Metadata>,
3376 invalidate_cache: bool,
3377 ) -> u64 {
3378 self.cdc_emit_prebuilt_with_columns(
3379 operation,
3380 collection,
3381 entity,
3382 entity_kind,
3383 metadata,
3384 invalidate_cache,
3385 None,
3386 )
3387 }
3388
3389 pub(crate) fn cdc_emit_prebuilt_with_columns(
3396 &self,
3397 operation: crate::replication::cdc::ChangeOperation,
3398 collection: &str,
3399 entity: &UnifiedEntity,
3400 entity_kind: &str,
3401 metadata: Option<&crate::storage::Metadata>,
3402 invalidate_cache: bool,
3403 changed_columns: Option<Vec<String>>,
3404 ) -> u64 {
3405 if invalidate_cache {
3406 self.invalidate_result_cache();
3407 }
3408
3409 let public_id = entity.logical_id().raw();
3410 let lsn = self.inner.cdc.emit_with_columns(
3411 operation,
3412 collection,
3413 public_id,
3414 entity_kind,
3415 changed_columns,
3416 );
3417
3418 if let Some(ref primary) = self.inner.db.replication {
3419 let store = self.inner.db.store();
3420 let record = ChangeRecord {
3421 lsn,
3422 timestamp: SystemTime::now()
3423 .duration_since(UNIX_EPOCH)
3424 .unwrap_or_default()
3425 .as_millis() as u64,
3426 operation,
3427 collection: collection.to_string(),
3428 entity_id: entity.id.raw(),
3429 entity_kind: entity_kind.to_string(),
3430 entity_bytes: Some(UnifiedStore::serialize_entity(
3431 entity,
3432 store.format_version(),
3433 )),
3434 metadata: metadata
3435 .map(metadata_to_json)
3436 .or_else(|| self.latest_metadata_for(collection, entity.id.raw())),
3437 };
3438 let encoded = record.encode();
3439 primary.wal_buffer.append(record.lsn, encoded.clone());
3440 if let Some(spool) = &primary.logical_wal_spool {
3441 let _ = spool.append(record.lsn, &encoded);
3442 }
3443 }
3444
3445 lsn
3446 }
3447
3448 pub(crate) fn cdc_emit_prebuilt_batch<'a, I>(
3449 &self,
3450 operation: crate::replication::cdc::ChangeOperation,
3451 entity_kind: &str,
3452 items: I,
3453 invalidate_cache: bool,
3454 ) where
3455 I: IntoIterator<
3456 Item = (
3457 &'a str,
3458 &'a UnifiedEntity,
3459 Option<&'a crate::storage::Metadata>,
3460 ),
3461 >,
3462 {
3463 let items: Vec<(&str, &UnifiedEntity, Option<&crate::storage::Metadata>)> =
3464 items.into_iter().collect();
3465 if items.is_empty() {
3466 return;
3467 }
3468
3469 if invalidate_cache {
3470 self.invalidate_result_cache();
3471 }
3472
3473 for (collection, entity, metadata) in items {
3474 self.cdc_emit_prebuilt(operation, collection, entity, entity_kind, metadata, false);
3475 }
3476 }
3477
3478 fn run_replica_loop(&self, primary_addr: String) {
3479 let endpoint = if primary_addr.starts_with("http") {
3480 primary_addr
3481 } else {
3482 format!("http://{primary_addr}")
3483 };
3484 let poll_ms = self.inner.db.options().replication.poll_interval_ms;
3485 let max_count = self.inner.db.options().replication.max_batch_size;
3486 let mut since_lsn = self.config_u64("red.replication.last_applied_lsn", 0);
3487
3488 let runtime = match tokio::runtime::Builder::new_current_thread()
3489 .enable_all()
3490 .build()
3491 {
3492 Ok(runtime) => runtime,
3493 Err(_) => return,
3494 };
3495
3496 runtime.block_on(async move {
3497 use crate::grpc::proto::red_db_client::RedDbClient;
3498 use crate::grpc::proto::JsonPayloadRequest;
3499
3500 let mut client = loop {
3501 match RedDbClient::connect(endpoint.clone()).await {
3502 Ok(client) => {
3503 self.persist_replication_health("connecting", "", None, None);
3504 break client;
3505 }
3506 Err(_) => {
3507 self.persist_replication_health(
3508 "connecting",
3509 "waiting for primary connection",
3510 None,
3511 None,
3512 );
3513 std::thread::sleep(std::time::Duration::from_millis(poll_ms.max(250)))
3514 }
3515 }
3516 };
3517
3518 let applier = crate::replication::logical::LogicalChangeApplier::new(since_lsn);
3523
3524 loop {
3525 let payload = crate::json!({
3526 "since_lsn": since_lsn,
3527 "max_count": max_count
3528 });
3529 let request = tonic::Request::new(JsonPayloadRequest {
3530 payload_json: crate::json::to_string(&payload)
3531 .unwrap_or_else(|_| "{}".to_string()),
3532 });
3533
3534 if let Ok(response) = client.pull_wal_records(request).await {
3535 if let Ok(value) =
3536 crate::json::from_str::<crate::json::Value>(&response.into_inner().payload)
3537 {
3538 let current_lsn =
3539 value.get("current_lsn").and_then(crate::json::Value::as_u64);
3540 let oldest_available_lsn = value
3541 .get("oldest_available_lsn")
3542 .and_then(crate::json::Value::as_u64);
3543 if since_lsn > 0
3544 && oldest_available_lsn
3545 .map(|oldest| oldest > since_lsn.saturating_add(1))
3546 .unwrap_or(false)
3547 {
3548 self.persist_replication_health(
3549 "stalled_gap",
3550 "replica is behind the oldest logical WAL available on primary; re-bootstrap required",
3551 current_lsn,
3552 oldest_available_lsn,
3553 );
3554 std::thread::sleep(std::time::Duration::from_millis(poll_ms.max(250)));
3555 continue;
3556 }
3557 if let Some(records) =
3558 value.get("records").and_then(crate::json::Value::as_array)
3559 {
3560 for record in records {
3561 let Some(data_hex) =
3562 record.get("data").and_then(crate::json::Value::as_str)
3563 else {
3564 continue;
3565 };
3566 let Ok(data) = hex::decode(data_hex) else {
3567 self.inner.replica_apply_metrics.record(
3568 crate::replication::logical::ApplyErrorKind::Decode,
3569 );
3570 self.persist_replication_health(
3571 "apply_error",
3572 "failed to decode WAL record hex payload",
3573 current_lsn,
3574 oldest_available_lsn,
3575 );
3576 continue;
3577 };
3578 let Ok(change) = ChangeRecord::decode(&data) else {
3579 self.inner.replica_apply_metrics.record(
3580 crate::replication::logical::ApplyErrorKind::Decode,
3581 );
3582 self.persist_replication_health(
3583 "apply_error",
3584 "failed to decode logical WAL record",
3585 current_lsn,
3586 oldest_available_lsn,
3587 );
3588 continue;
3589 };
3590 match applier.apply(
3591 self.inner.db.as_ref(),
3592 &change,
3593 ApplyMode::Replica,
3594 ) {
3595 Ok(crate::replication::logical::ApplyOutcome::Applied) => {
3596 self.invalidate_result_cache_for_table(&change.collection);
3597 since_lsn = since_lsn.max(change.lsn);
3598 self.persist_replica_lsn(since_lsn);
3599 }
3600 Ok(_) => {
3601 }
3603 Err(err) => {
3604 self.inner.replica_apply_metrics.record(err.kind());
3605 match &err {
3614 crate::replication::logical::LogicalApplyError::Divergence { lsn, expected: _, got: _ } => {
3615 crate::telemetry::operator_event::OperatorEvent::Divergence {
3616 peer: "primary".to_string(),
3617 leader_lsn: *lsn,
3618 follower_lsn: since_lsn,
3619 }
3620 .emit_global();
3621 }
3622 crate::replication::logical::LogicalApplyError::Gap { last, next } => {
3623 crate::telemetry::operator_event::OperatorEvent::ReplicationBroken {
3624 peer: "primary".to_string(),
3625 reason: format!("stalled gap last={last} next={next}"),
3626 }
3627 .emit_global();
3628 }
3629 _ => {}
3630 }
3631 let kind = match &err {
3632 crate::replication::logical::LogicalApplyError::Gap { .. } => "stalled_gap",
3633 crate::replication::logical::LogicalApplyError::Divergence { .. } => "divergence",
3634 _ => "apply_error",
3635 };
3636 self.persist_replication_health(
3637 kind,
3638 &format!("replica apply rejected: {err}"),
3639 current_lsn,
3640 oldest_available_lsn,
3641 );
3642 break;
3653 }
3654 }
3655 }
3656 }
3657 self.persist_replication_health(
3658 "healthy",
3659 "",
3660 current_lsn,
3661 oldest_available_lsn,
3662 );
3663 } else {
3664 self.persist_replication_health(
3665 "apply_error",
3666 "failed to parse pull_wal_records response",
3667 None,
3668 None,
3669 );
3670 }
3671 } else {
3672 self.persist_replication_health(
3673 "connecting",
3674 "primary pull_wal_records request failed",
3675 None,
3676 None,
3677 );
3678 }
3679
3680 std::thread::sleep(std::time::Duration::from_millis(poll_ms));
3681 }
3682 });
3683 }
3684
3685 pub fn cdc_poll(
3687 &self,
3688 since_lsn: u64,
3689 max_count: usize,
3690 ) -> Vec<crate::replication::cdc::ChangeEvent> {
3691 self.inner.cdc.poll(since_lsn, max_count)
3692 }
3693
3694 pub fn cdc_current_lsn(&self) -> u64 {
3698 self.inner.cdc.current_lsn()
3699 }
3700
3701 pub fn kv_watch_events_since(
3702 &self,
3703 collection: &str,
3704 key: &str,
3705 since_lsn: u64,
3706 max_count: usize,
3707 ) -> Vec<crate::replication::cdc::KvWatchEvent> {
3708 self.inner
3709 .cdc
3710 .poll(since_lsn, max_count)
3711 .into_iter()
3712 .filter_map(|event| event.kv)
3713 .filter(|event| event.collection == collection && event.key == key)
3714 .collect()
3715 }
3716
3717 pub fn kv_watch_events_since_prefix(
3718 &self,
3719 collection: &str,
3720 prefix: &str,
3721 since_lsn: u64,
3722 max_count: usize,
3723 ) -> Vec<crate::replication::cdc::KvWatchEvent> {
3724 self.inner
3725 .cdc
3726 .poll(since_lsn, max_count)
3727 .into_iter()
3728 .filter_map(|event| event.kv)
3729 .filter(|event| event.collection == collection && event.key.starts_with(prefix))
3730 .collect()
3731 }
3732
3733 pub(crate) fn kv_watch_subscribe<'a>(
3734 &'a self,
3735 collection: impl Into<String>,
3736 key: impl Into<String>,
3737 from_lsn: Option<u64>,
3738 ) -> crate::runtime::kv_watch::KvWatchStream<'a> {
3739 crate::runtime::kv_watch::KvWatchStream::subscribe(
3740 &self.inner.cdc,
3741 &self.inner.kv_stats,
3742 collection,
3743 key,
3744 from_lsn,
3745 self.kv_watch_idle_timeout_ms(),
3746 )
3747 }
3748
3749 pub(crate) fn kv_watch_subscribe_prefix<'a>(
3750 &'a self,
3751 collection: impl Into<String>,
3752 prefix: impl Into<String>,
3753 from_lsn: Option<u64>,
3754 ) -> crate::runtime::kv_watch::KvWatchStream<'a> {
3755 crate::runtime::kv_watch::KvWatchStream::subscribe_prefix(
3756 &self.inner.cdc,
3757 &self.inner.kv_stats,
3758 collection,
3759 prefix,
3760 from_lsn,
3761 self.kv_watch_idle_timeout_ms(),
3762 )
3763 }
3764
3765 pub(crate) fn kv_watch_idle_timeout_ms(&self) -> u64 {
3766 self.config_u64("red.config.kv.watch.idle_timeout_ms", 60_000)
3767 }
3768
3769 pub fn backup_status(&self) -> crate::replication::scheduler::BackupStatus {
3771 self.inner.backup_scheduler.status()
3772 }
3773
3774 pub fn result_blob_cache(&self) -> &crate::storage::cache::BlobCache {
3784 &self.inner.result_blob_cache
3785 }
3786
3787 pub fn primary_replica_snapshots(&self) -> Vec<crate::replication::primary::ReplicaState> {
3791 self.inner
3792 .db
3793 .replication
3794 .as_ref()
3795 .map(|repl| repl.replica_snapshots())
3796 .unwrap_or_default()
3797 }
3798
3799 pub fn commit_policy(&self) -> crate::replication::CommitPolicy {
3804 crate::replication::CommitPolicy::from_env()
3805 }
3806
3807 pub fn replica_apply_error_counts(
3812 &self,
3813 ) -> [(crate::replication::logical::ApplyErrorKind, u64); 4] {
3814 self.inner.replica_apply_metrics.snapshot()
3815 }
3816
3817 pub fn quota_bucket(&self) -> &crate::runtime::quota_bucket::QuotaBucket {
3820 &self.inner.quota_bucket
3821 }
3822
3823 pub fn commit_waiter_snapshot(&self) -> Vec<(String, u64)> {
3827 self.inner
3828 .db
3829 .replication
3830 .as_ref()
3831 .map(|repl| repl.commit_waiter.snapshot())
3832 .unwrap_or_default()
3833 }
3834
3835 pub fn commit_waiter_metrics_snapshot(&self) -> (u64, u64, u64, u64) {
3838 self.inner
3839 .db
3840 .replication
3841 .as_ref()
3842 .map(|repl| repl.commit_waiter.metrics_snapshot())
3843 .unwrap_or((0, 0, 0, 0))
3844 }
3845
3846 pub fn await_replica_acks(
3856 &self,
3857 target_lsn: u64,
3858 count: u32,
3859 timeout: std::time::Duration,
3860 ) -> crate::replication::AwaitOutcome {
3861 match &self.inner.db.replication {
3862 Some(repl) => repl.commit_waiter.await_acks(target_lsn, count, timeout),
3863 None => {
3864 crate::replication::AwaitOutcome::NotRequired
3868 }
3869 }
3870 }
3871
3872 pub fn enforce_commit_policy(
3886 &self,
3887 post_lsn: u64,
3888 ) -> RedDBResult<crate::replication::AwaitOutcome> {
3889 let n = match self.commit_policy() {
3890 crate::replication::CommitPolicy::AckN(n) if n > 0 => n,
3891 _ => return Ok(crate::replication::AwaitOutcome::NotRequired),
3892 };
3893 let timeout_ms = std::env::var("RED_REPLICATION_ACK_TIMEOUT_MS")
3894 .ok()
3895 .and_then(|v| v.parse::<u64>().ok())
3896 .unwrap_or(5_000);
3897 let outcome =
3898 self.await_replica_acks(post_lsn, n, std::time::Duration::from_millis(timeout_ms));
3899 if let crate::replication::AwaitOutcome::TimedOut { observed, required } = &outcome {
3900 tracing::warn!(
3901 target: "reddb::commit",
3902 post_lsn,
3903 observed = *observed,
3904 required = *required,
3905 timeout_ms,
3906 "ack_n: timed out waiting for replicas"
3907 );
3908 let fail = std::env::var("RED_COMMIT_FAIL_ON_TIMEOUT")
3909 .ok()
3910 .map(|v| {
3911 let t = v.trim();
3912 t.eq_ignore_ascii_case("true") || t == "1" || t.eq_ignore_ascii_case("yes")
3913 })
3914 .unwrap_or(false);
3915 if fail {
3916 return Err(RedDBError::ReadOnly(format!(
3917 "commit policy timed out at lsn {post_lsn}: observed={observed} required={required} (RED_COMMIT_FAIL_ON_TIMEOUT=true)"
3918 )));
3919 }
3920 }
3921 Ok(outcome)
3922 }
3923
3924 pub fn encryption_at_rest_status(&self) -> (&'static str, Option<String>) {
3932 match crate::crypto::page_encryption::key_from_env() {
3933 Ok(Some(_)) => ("enabled", None),
3934 Ok(None) => ("disabled", None),
3935 Err(err) => ("error", Some(err)),
3936 }
3937 }
3938
3939 pub fn replica_apply_health(&self) -> Option<String> {
3945 let state = self.config_string("red.replication.state", "");
3946 if state.is_empty() {
3947 None
3948 } else {
3949 Some(state)
3950 }
3951 }
3952
3953 pub fn wal_archive_progress(&self) -> (u64, u64) {
3958 let current_lsn = self
3959 .inner
3960 .db
3961 .replication
3962 .as_ref()
3963 .map(|repl| {
3964 repl.logical_wal_spool
3965 .as_ref()
3966 .map(|spool| spool.current_lsn())
3967 .unwrap_or_else(|| repl.wal_buffer.current_lsn())
3968 })
3969 .unwrap_or_else(|| self.inner.cdc.current_lsn());
3970 let last_archived_lsn = self.config_u64("red.config.timeline.last_archived_lsn", 0);
3971 (current_lsn, last_archived_lsn)
3972 }
3973
3974 pub fn trigger_backup(&self) -> RedDBResult<crate::replication::scheduler::BackupResult> {
3976 self.check_write(crate::runtime::write_gate::WriteKind::Backup)?;
3977 self.assert_remote_write_allowed("admin/backup")?;
3982 let started = std::time::Instant::now();
3983 let snapshot = self.create_snapshot()?;
3984 let mut uploaded = false;
3985
3986 if let (Some(backend), Some(path)) = (&self.inner.db.remote_backend, self.inner.db.path()) {
3987 let default_snapshot_prefix = self.inner.db.options().default_snapshot_prefix();
3988 let default_wal_prefix = self.inner.db.options().default_wal_archive_prefix();
3989 let default_head_key = self.inner.db.options().default_backup_head_key();
3990 let snapshot_prefix = self.config_string(
3991 "red.config.backup.snapshot_prefix",
3992 &default_snapshot_prefix,
3993 );
3994 let wal_prefix =
3995 self.config_string("red.config.wal.archive.prefix", &default_wal_prefix);
3996 let head_key = self.config_string("red.config.backup.head_key", &default_head_key);
3997 let timeline_id = self.config_string("red.config.timeline.id", "main");
3998 let snapshot_key = crate::storage::wal::archive_snapshot(
3999 backend.as_ref(),
4000 path,
4001 snapshot.snapshot_id,
4002 &snapshot_prefix,
4003 )
4004 .map_err(|err| RedDBError::Internal(err.to_string()))?;
4005 let current_lsn = self
4006 .inner
4007 .db
4008 .replication
4009 .as_ref()
4010 .map(|repl| {
4011 repl.logical_wal_spool
4012 .as_ref()
4013 .map(|spool| spool.current_lsn())
4014 .unwrap_or_else(|| repl.wal_buffer.current_lsn())
4015 })
4016 .unwrap_or_else(|| self.inner.cdc.current_lsn());
4017 let last_archived_lsn = self.config_u64("red.config.timeline.last_archived_lsn", 0);
4018 let snapshot_sha256 =
4024 crate::storage::wal::SnapshotManifest::compute_snapshot_sha256(path)
4025 .map_err(|err| {
4026 tracing::warn!(
4027 target: "reddb::backup",
4028 error = %err,
4029 snapshot_id = snapshot.snapshot_id,
4030 "snapshot hash failed; manifest will lack checksum"
4031 );
4032 })
4033 .ok();
4034 let manifest = crate::storage::wal::SnapshotManifest {
4035 timeline_id: timeline_id.clone(),
4036 snapshot_key: snapshot_key.clone(),
4037 snapshot_id: snapshot.snapshot_id,
4038 snapshot_time: snapshot.created_at_unix_ms as u64,
4039 base_lsn: current_lsn,
4040 schema_version: crate::api::REDDB_FORMAT_VERSION,
4041 format_version: crate::api::REDDB_FORMAT_VERSION,
4042 snapshot_sha256,
4043 };
4044 crate::storage::wal::publish_snapshot_manifest(backend.as_ref(), &manifest)
4045 .map_err(|err| RedDBError::Internal(err.to_string()))?;
4046
4047 let prev_segment_hash = self.config_string("red.config.timeline.last_segment_hash", "");
4054 let prev_hash_arg = if prev_segment_hash.is_empty() {
4055 None
4056 } else {
4057 Some(prev_segment_hash)
4058 };
4059
4060 let archived_lsn = if let Some(primary) = &self.inner.db.replication {
4061 let oldest = primary
4062 .logical_wal_spool
4063 .as_ref()
4064 .and_then(|spool| spool.oldest_lsn().ok().flatten())
4065 .or_else(|| primary.wal_buffer.oldest_lsn())
4066 .unwrap_or(last_archived_lsn);
4067 if last_archived_lsn > 0 && last_archived_lsn < oldest.saturating_sub(1) {
4068 return Err(RedDBError::Internal(format!(
4069 "logical WAL gap detected: last_archived_lsn={last_archived_lsn}, oldest_available_lsn={oldest}"
4070 )));
4071 }
4072 let records = if let Some(spool) = &primary.logical_wal_spool {
4073 spool
4074 .read_since(last_archived_lsn, usize::MAX)
4075 .map_err(|err| RedDBError::Internal(err.to_string()))?
4076 } else {
4077 primary.wal_buffer.read_since(last_archived_lsn, usize::MAX)
4078 };
4079 if let Some(meta) = crate::storage::wal::archive_change_records(
4080 backend.as_ref(),
4081 &wal_prefix,
4082 &records,
4083 prev_hash_arg,
4084 )
4085 .map_err(|err| RedDBError::Internal(err.to_string()))?
4086 {
4087 if let Some(spool) = &primary.logical_wal_spool {
4088 let _ = spool.prune_through(meta.lsn_end);
4089 }
4090 if let Some(sha) = &meta.sha256 {
4096 self.inner.db.store().set_config_tree(
4097 "red.config.timeline",
4098 &crate::json!({ "last_segment_hash": sha }),
4099 );
4100 }
4101 meta.lsn_end
4102 } else {
4103 last_archived_lsn
4104 }
4105 } else {
4106 last_archived_lsn
4107 };
4108
4109 let head = crate::storage::wal::BackupHead {
4110 timeline_id,
4111 snapshot_key,
4112 snapshot_id: snapshot.snapshot_id,
4113 snapshot_time: snapshot.created_at_unix_ms as u64,
4114 current_lsn,
4115 last_archived_lsn: archived_lsn,
4116 wal_prefix,
4117 };
4118 crate::storage::wal::publish_backup_head(backend.as_ref(), &head_key, &head)
4119 .map_err(|err| RedDBError::Internal(err.to_string()))?;
4120 self.inner.db.store().set_config_tree(
4121 "red.config.timeline",
4122 &crate::json!({
4123 "last_archived_lsn": archived_lsn,
4124 "id": head.timeline_id
4125 }),
4126 );
4127
4128 if let Err(err) = crate::storage::wal::publish_unified_manifest_for_prefix(
4136 backend.as_ref(),
4137 &snapshot_prefix,
4138 ) {
4139 tracing::warn!(
4140 target: "reddb::backup",
4141 error = %err,
4142 snapshot_prefix = %snapshot_prefix,
4143 "unified MANIFEST.json refresh failed; per-artifact sidecars unaffected"
4144 );
4145 }
4146
4147 match self.commit_policy() {
4159 crate::replication::CommitPolicy::AckN(n) if n > 0 => {
4160 let timeout = std::env::var("RED_REPLICATION_ACK_TIMEOUT_MS")
4161 .ok()
4162 .and_then(|v| v.parse::<u64>().ok())
4163 .unwrap_or(5_000);
4164 let outcome = self.await_replica_acks(
4165 archived_lsn,
4166 n,
4167 std::time::Duration::from_millis(timeout),
4168 );
4169 match outcome {
4170 crate::replication::AwaitOutcome::Reached(count) => {
4171 tracing::debug!(
4172 target: "reddb::backup",
4173 archived_lsn,
4174 n,
4175 count,
4176 "ack_n: replicas synced before backup return"
4177 );
4178 }
4179 crate::replication::AwaitOutcome::TimedOut { observed, required } => {
4180 tracing::warn!(
4181 target: "reddb::backup",
4182 archived_lsn,
4183 observed,
4184 required,
4185 timeout_ms = timeout,
4186 "ack_n: timed out waiting for replicas; backup uploaded but DR posture degraded"
4187 );
4188 }
4189 crate::replication::AwaitOutcome::NotRequired => {}
4190 }
4191 }
4192 _ => {} }
4194
4195 if self.config_bool("red.config.backup.include_blob_cache", false) {
4207 let blob_cache_prefix = self.config_string(
4208 "red.config.backup.blob_cache_prefix",
4209 &format!("{snapshot_prefix}blob_cache/"),
4210 );
4211 if let Some(l2_path) = self.inner.result_blob_cache.l2_path() {
4212 match crate::storage::cache::archive_blob_cache_l2(
4213 backend.as_ref(),
4214 l2_path,
4215 &blob_cache_prefix,
4216 ) {
4217 Ok(count) => {
4218 tracing::info!(
4219 target: "reddb::backup",
4220 files_uploaded = count,
4221 blob_cache_prefix = %blob_cache_prefix,
4222 "include_blob_cache: archived L2 directory"
4223 );
4224 }
4225 Err(err) => {
4226 tracing::warn!(
4227 target: "reddb::backup",
4228 error = %err,
4229 blob_cache_prefix = %blob_cache_prefix,
4230 "include_blob_cache: L2 archive failed; backup proceeding (cache is derived state)"
4231 );
4232 }
4233 }
4234 } else {
4235 tracing::debug!(
4236 target: "reddb::backup",
4237 "include_blob_cache=true but no L2 path configured; nothing to archive"
4238 );
4239 }
4240 }
4241
4242 uploaded = true;
4243 }
4244
4245 Ok(crate::replication::scheduler::BackupResult {
4246 snapshot_id: snapshot.snapshot_id,
4247 uploaded,
4248 duration_ms: started.elapsed().as_millis() as u64,
4249 timestamp: snapshot.created_at_unix_ms as u64,
4250 })
4251 }
4252
4253 pub fn acquire(&self) -> RedDBResult<RuntimeConnection> {
4254 let mut pool = self
4255 .inner
4256 .pool
4257 .lock()
4258 .map_err(|e| RedDBError::Internal(format!("connection pool lock poisoned: {e}")))?;
4259 if pool.active >= self.inner.pool_config.max_connections {
4260 return Err(RedDBError::Internal(
4261 "connection pool exhausted".to_string(),
4262 ));
4263 }
4264
4265 let id = if let Some(id) = pool.idle.pop() {
4266 id
4267 } else {
4268 let id = pool.next_id;
4269 pool.next_id += 1;
4270 id
4271 };
4272 pool.active += 1;
4273 pool.total_checkouts += 1;
4274 drop(pool);
4275
4276 Ok(RuntimeConnection {
4277 id,
4278 inner: Arc::clone(&self.inner),
4279 })
4280 }
4281
4282 pub fn checkpoint(&self) -> RedDBResult<()> {
4283 self.inner.db.flush_local_only().map_err(|err| {
4288 let msg = err.to_string();
4293 crate::telemetry::operator_event::OperatorEvent::CheckpointFailed {
4294 lsn: 0,
4295 error: msg.clone(),
4296 }
4297 .emit_global();
4298 crate::telemetry::operator_event::OperatorEvent::WalFsyncFailed {
4299 path: "<flush_local_only>".to_string(),
4300 error: msg.clone(),
4301 }
4302 .emit_global();
4303 RedDBError::Engine(msg)
4304 })?;
4305 if let Err(err) = self.assert_remote_write_allowed("checkpoint") {
4306 tracing::warn!(
4307 target: "reddb::serverless::lease",
4308 error = %err,
4309 "checkpoint: skipping remote upload — lease not held"
4310 );
4311 return Ok(());
4312 }
4313 self.inner
4314 .db
4315 .upload_to_remote_backend()
4316 .map_err(|err| RedDBError::Engine(err.to_string()))
4317 }
4318
4319 pub(crate) fn assert_remote_write_allowed(&self, action: &str) -> RedDBResult<()> {
4326 if self.inner.db.remote_backend.is_none() {
4327 return Ok(());
4328 }
4329 match self.inner.write_gate.lease_state() {
4330 crate::runtime::write_gate::LeaseGateState::NotHeld => {
4331 self.inner.audit_log.record(
4332 action,
4333 "system",
4334 "remote_backend",
4335 "err: writer lease not held",
4336 crate::json::Value::Null,
4337 );
4338 Err(RedDBError::ReadOnly(format!(
4339 "writer lease not held — {action} blocked (serverless fence)"
4340 )))
4341 }
4342 _ => Ok(()),
4343 }
4344 }
4345
4346 pub fn run_maintenance(&self) -> RedDBResult<()> {
4347 self.inner
4348 .db
4349 .run_maintenance()
4350 .map_err(|err| RedDBError::Internal(err.to_string()))
4351 }
4352
4353 pub fn scan_collection(
4354 &self,
4355 collection: &str,
4356 cursor: Option<ScanCursor>,
4357 limit: usize,
4358 ) -> RedDBResult<ScanPage> {
4359 let store = self.inner.db.store();
4360 let manager = store
4361 .get_collection(collection)
4362 .ok_or_else(|| RedDBError::NotFound(collection.to_string()))?;
4363
4364 let mut entities = manager.query_all(|_| true);
4365 entities.sort_by_key(|entity| entity.id.raw());
4366
4367 let offset = cursor.map(|cursor| cursor.offset).unwrap_or(0);
4368 let total = entities.len();
4369 let end = total.min(offset.saturating_add(limit.max(1)));
4370 let items = if offset >= total {
4371 Vec::new()
4372 } else {
4373 entities[offset..end].to_vec()
4374 };
4375 let next = (end < total).then_some(ScanCursor { offset: end });
4376
4377 Ok(ScanPage {
4378 collection: collection.to_string(),
4379 items,
4380 next,
4381 total,
4382 })
4383 }
4384
4385 pub fn catalog(&self) -> CatalogModelSnapshot {
4386 self.inner.db.catalog_model_snapshot()
4387 }
4388
4389 pub fn catalog_consistency_report(&self) -> crate::catalog::CatalogConsistencyReport {
4390 self.inner.db.catalog_consistency_report()
4391 }
4392
4393 pub fn catalog_attention_summary(&self) -> CatalogAttentionSummary {
4394 crate::catalog::attention_summary(&self.catalog())
4395 }
4396
4397 pub fn collection_attention(&self) -> Vec<CollectionDescriptor> {
4398 crate::catalog::collection_attention(&self.catalog())
4399 }
4400
4401 pub fn index_attention(&self) -> Vec<CatalogIndexStatus> {
4402 crate::catalog::index_attention(&self.catalog())
4403 }
4404
4405 pub fn graph_projection_attention(&self) -> Vec<CatalogGraphProjectionStatus> {
4406 crate::catalog::graph_projection_attention(&self.catalog())
4407 }
4408
4409 pub fn analytics_job_attention(&self) -> Vec<CatalogAnalyticsJobStatus> {
4410 crate::catalog::analytics_job_attention(&self.catalog())
4411 }
4412
4413 pub fn stats(&self) -> RuntimeStats {
4414 let pool = runtime_pool_lock(self);
4415 RuntimeStats {
4416 active_connections: pool.active,
4417 idle_connections: pool.idle.len(),
4418 total_checkouts: pool.total_checkouts,
4419 paged_mode: self.inner.db.is_paged(),
4420 started_at_unix_ms: self.inner.started_at_unix_ms,
4421 store: self.inner.db.stats(),
4422 system: SystemInfo::collect(),
4423 result_blob_cache: self.inner.result_blob_cache.stats(),
4424 kv: self.inner.kv_stats.snapshot(),
4425 metrics_ingest: self.inner.metrics_ingest_stats.snapshot(),
4426 }
4427 }
4428
4429 pub(crate) fn record_metrics_ingest(
4430 &self,
4431 accepted_samples: u64,
4432 accepted_series: u64,
4433 rejected_samples: u64,
4434 rejected_series: u64,
4435 ) {
4436 self.inner.metrics_ingest_stats.record(
4437 accepted_samples,
4438 accepted_series,
4439 rejected_samples,
4440 rejected_series,
4441 );
4442 }
4443
4444 pub(crate) fn record_metrics_cardinality_budget_rejections(&self, rejected_series: u64) {
4445 self.inner
4446 .metrics_ingest_stats
4447 .record_cardinality_budget_rejections(rejected_series);
4448 }
4449
4450 pub(crate) fn record_metrics_tenant_activity(
4451 &self,
4452 tenant: &str,
4453 namespace: &str,
4454 operation: &str,
4455 ) {
4456 self.inner
4457 .metrics_tenant_activity_stats
4458 .record(tenant, namespace, operation);
4459 }
4460
4461 pub(crate) fn metrics_tenant_activity_snapshot(
4462 &self,
4463 ) -> Vec<crate::runtime::MetricsTenantActivityStats> {
4464 self.inner.metrics_tenant_activity_stats.snapshot()
4465 }
4466
4467 pub fn execute_query_with_scope(
4481 &self,
4482 query: &str,
4483 scope: crate::runtime::within_clause::ScopeOverride,
4484 ) -> RedDBResult<RuntimeQueryResult> {
4485 if scope.is_empty() {
4486 return self.execute_query(query);
4487 }
4488 let _scope_guard = ScopeOverrideGuard::install(scope);
4489 self.execute_query(query)
4490 }
4491
4492 pub fn execute_query(&self, query: &str) -> RedDBResult<RuntimeQueryResult> {
4501 let started = std::time::Instant::now();
4502 let result = self.execute_query_inner(query);
4503 let elapsed_ms = started.elapsed().as_millis() as u64;
4504
4505 let scope = self.ai_scope();
4510 let kind = match result
4511 .as_ref()
4512 .map(|r| r.statement_type)
4513 .unwrap_or("select")
4514 {
4515 "select" => crate::telemetry::slow_query_logger::QueryKind::Select,
4516 "insert" => crate::telemetry::slow_query_logger::QueryKind::Insert,
4517 "update" => crate::telemetry::slow_query_logger::QueryKind::Update,
4518 "delete" => crate::telemetry::slow_query_logger::QueryKind::Delete,
4519 _ => crate::telemetry::slow_query_logger::QueryKind::Internal,
4520 };
4521 self.inner
4527 .slow_query_logger
4528 .record(kind, elapsed_ms, query.to_string(), &scope);
4529
4530 result
4531 }
4532
4533 #[inline(never)]
4534 fn execute_query_inner(&self, query: &str) -> RedDBResult<RuntimeQueryResult> {
4535 if !has_scope_override_active()
4546 && !query.trim_start().starts_with("WITHIN")
4547 && !query.trim_start().starts_with("within")
4548 && !self
4549 .inner
4550 .tx_contexts
4551 .read()
4552 .contains_key(¤t_connection_id())
4553 {
4554 if let Some(result) = self.try_fast_entity_lookup(query) {
4555 return result;
4556 }
4557 }
4558
4559 match crate::runtime::within_clause::try_strip_within_prefix(query) {
4566 Ok(Some((scope, inner))) => {
4567 let _scope_guard = ScopeOverrideGuard::install(scope);
4568 return self.execute_query_inner(inner);
4573 }
4574 Ok(None) => {}
4575 Err(msg) => return Err(RedDBError::Query(msg)),
4576 }
4577
4578 if let Some(inner) = strip_explain_prefix(query) {
4585 return self.explain_as_rows(query, inner);
4586 }
4587
4588 if let Some(value) = parse_set_local_tenant(query)? {
4593 let conn_id = current_connection_id();
4594 if !self.inner.tx_contexts.read().contains_key(&conn_id) {
4595 return Err(RedDBError::Query(
4596 "SET LOCAL TENANT requires an active transaction".to_string(),
4597 ));
4598 }
4599 self.inner
4600 .tx_local_tenants
4601 .write()
4602 .insert(conn_id, value.clone());
4603 return Ok(RuntimeQueryResult::ok_message(
4604 query.to_string(),
4605 &match &value {
4606 Some(id) => format!("local tenant set: {id}"),
4607 None => "local tenant cleared".to_string(),
4608 },
4609 "set_local_tenant",
4610 ));
4611 }
4612
4613 if super::red_schema::is_system_schema_write(query) {
4614 return Err(RedDBError::Query(
4615 super::red_schema::READ_ONLY_ERROR.to_string(),
4616 ));
4617 }
4618
4619 let rewritten_query = super::red_schema::rewrite_virtual_names(query);
4620 let execution_query = rewritten_query.as_deref().unwrap_or(query);
4621
4622 let frame = super::statement_frame::StatementExecutionFrame::build(self, execution_query)?;
4623 let _frame_guards = frame.install(self);
4624
4625 let _log_span = crate::telemetry::span::query_span(query).entered();
4632
4633 if let Some(rewritten) = frame.prepare_cte(execution_query)? {
4635 return self.execute_query_expr(rewritten);
4636 }
4637
4638 if let Some(result) = self.try_fast_entity_lookup(execution_query) {
4640 return result;
4641 }
4642
4643 if let Some(result) = frame.read_result_cache(self) {
4645 return Ok(result);
4646 }
4647
4648 let prepared = frame.prepare_statement(self, execution_query)?;
4649 let mode = prepared.mode;
4650 let expr = prepared.expr;
4651
4652 let statement = query_expr_name(&expr);
4653 let result_cache_scopes = query_expr_result_cache_scopes(&expr);
4654
4655 let _lock_guard = frame.prepare_dispatch(self, &expr)?;
4656 let frame_iface: &dyn super::statement_frame::ReadFrame = &frame;
4657
4658 let query_result = match expr {
4659 QueryExpr::Graph(_) | QueryExpr::Path(_) => {
4660 let (graph, node_properties, edge_properties) =
4668 self.materialize_graph_with_rls()?;
4669 let result =
4670 crate::storage::query::unified::UnifiedExecutor::execute_on_with_graph_properties(
4671 &graph,
4672 &expr,
4673 node_properties,
4674 edge_properties,
4675 )
4676 .map_err(|err| RedDBError::Query(err.to_string()))?;
4677
4678 Ok(RuntimeQueryResult {
4679 query: query.to_string(),
4680 mode,
4681 statement,
4682 engine: "materialized-graph",
4683 result,
4684 affected_rows: 0,
4685 statement_type: "select",
4686 })
4687 }
4688 QueryExpr::Table(table) => {
4689 let table = self.resolve_table_expr_subqueries(
4690 table,
4691 &frame as &dyn super::statement_frame::ReadFrame,
4692 )?;
4693 if super::red_schema::is_virtual_table(&table.table) {
4694 return Ok(RuntimeQueryResult {
4695 query: query.to_string(),
4696 mode,
4697 statement,
4698 engine: "runtime-red-schema",
4699 result: super::red_schema::red_query(
4700 self,
4701 &table.table,
4702 &table,
4703 &frame as &dyn super::statement_frame::ReadFrame,
4704 )?,
4705 affected_rows: 0,
4706 statement_type: "select",
4707 });
4708 }
4709
4710 if let Some(result) = self.execute_probabilistic_select(&table)? {
4711 return Ok(RuntimeQueryResult {
4712 query: query.to_string(),
4713 mode,
4714 statement,
4715 engine: "runtime-probabilistic",
4716 result,
4717 affected_rows: 0,
4718 statement_type: "select",
4719 });
4720 }
4721
4722 if self.inner.foreign_tables.is_foreign_table(&table.table) {
4730 let records = self
4731 .inner
4732 .foreign_tables
4733 .scan(&table.table)
4734 .map_err(|e| RedDBError::Internal(e.to_string()))?;
4735 let result = apply_foreign_table_filters(records, &table);
4736 return Ok(RuntimeQueryResult {
4737 query: query.to_string(),
4738 mode,
4739 statement,
4740 engine: "runtime-fdw",
4741 result,
4742 affected_rows: 0,
4743 statement_type: "select",
4744 });
4745 }
4746
4747 let Some(table_with_rls) = self.authorize_relational_table_select(
4764 table,
4765 &frame as &dyn super::statement_frame::ReadFrame,
4766 )?
4767 else {
4768 let empty = crate::storage::query::unified::UnifiedResult::empty();
4769 return Ok(RuntimeQueryResult {
4770 query: query.to_string(),
4771 mode,
4772 statement,
4773 engine: "runtime-table-rls",
4774 result: empty,
4775 affected_rows: 0,
4776 statement_type: "select",
4777 });
4778 };
4779 Ok(RuntimeQueryResult {
4780 query: query.to_string(),
4781 mode,
4782 statement,
4783 engine: "runtime-table",
4784 result: execute_runtime_table_query(
4785 &self.inner.db,
4786 &table_with_rls,
4787 Some(&self.inner.index_store),
4788 )?,
4789 affected_rows: 0,
4790 statement_type: "select",
4791 })
4792 }
4793 QueryExpr::Join(join) => {
4794 let join_with_rls = match self.authorize_relational_join_select(
4803 join,
4804 &frame as &dyn super::statement_frame::ReadFrame,
4805 )? {
4806 Some(j) => j,
4807 None => {
4808 return Ok(RuntimeQueryResult {
4809 query: query.to_string(),
4810 mode,
4811 statement,
4812 engine: "runtime-join-rls",
4813 result: crate::storage::query::unified::UnifiedResult::empty(),
4814 affected_rows: 0,
4815 statement_type: "select",
4816 });
4817 }
4818 };
4819 Ok(RuntimeQueryResult {
4820 query: query.to_string(),
4821 mode,
4822 statement,
4823 engine: "runtime-join",
4824 result: execute_runtime_join_query(&self.inner.db, &join_with_rls)?,
4825 affected_rows: 0,
4826 statement_type: "select",
4827 })
4828 }
4829 QueryExpr::Vector(vector) => Ok(RuntimeQueryResult {
4830 query: query.to_string(),
4831 mode,
4832 statement,
4833 engine: "runtime-vector",
4834 result: execute_runtime_vector_query(&self.inner.db, &vector)?,
4835 affected_rows: 0,
4836 statement_type: "select",
4837 }),
4838 QueryExpr::Hybrid(hybrid) => Ok(RuntimeQueryResult {
4839 query: query.to_string(),
4840 mode,
4841 statement,
4842 engine: "runtime-hybrid",
4843 result: execute_runtime_hybrid_query(&self.inner.db, &hybrid)?,
4844 affected_rows: 0,
4845 statement_type: "select",
4846 }),
4847 QueryExpr::Insert(ref insert) if super::red_schema::is_virtual_table(&insert.table) => {
4849 Err(RedDBError::Query(
4850 super::red_schema::READ_ONLY_ERROR.to_string(),
4851 ))
4852 }
4853 QueryExpr::Update(ref update) if super::red_schema::is_virtual_table(&update.table) => {
4854 Err(RedDBError::Query(
4855 super::red_schema::READ_ONLY_ERROR.to_string(),
4856 ))
4857 }
4858 QueryExpr::Delete(ref delete) if super::red_schema::is_virtual_table(&delete.table) => {
4859 Err(RedDBError::Query(
4860 super::red_schema::READ_ONLY_ERROR.to_string(),
4861 ))
4862 }
4863 QueryExpr::Insert(ref insert) => self
4864 .with_deferred_store_wal_for_dml(self.insert_may_emit_events(insert), || {
4865 self.execute_insert(query, insert)
4866 }),
4867 QueryExpr::Update(ref update) => self
4868 .with_deferred_store_wal_for_dml(self.update_may_emit_events(update), || {
4869 self.execute_update(query, update)
4870 }),
4871 QueryExpr::Delete(ref delete) => self
4872 .with_deferred_store_wal_for_dml(self.delete_may_emit_events(delete), || {
4873 self.execute_delete(query, delete)
4874 }),
4875 QueryExpr::CreateTable(ref create) => self.execute_create_table(query, create),
4877 QueryExpr::CreateCollection(ref create) => {
4878 self.execute_create_collection(query, create)
4879 }
4880 QueryExpr::CreateVector(ref create) => self.execute_create_vector(query, create),
4881 QueryExpr::DropTable(ref drop_tbl) => self.execute_drop_table(query, drop_tbl),
4882 QueryExpr::DropGraph(ref drop_graph) => self.execute_drop_graph(query, drop_graph),
4883 QueryExpr::DropVector(ref drop_vector) => self.execute_drop_vector(query, drop_vector),
4884 QueryExpr::DropDocument(ref drop_document) => {
4885 self.execute_drop_document(query, drop_document)
4886 }
4887 QueryExpr::DropKv(ref drop_kv) => self.execute_drop_kv(query, drop_kv),
4888 QueryExpr::DropCollection(ref drop_collection) => {
4889 self.execute_drop_collection(query, drop_collection)
4890 }
4891 QueryExpr::Truncate(ref truncate) => self.execute_truncate(query, truncate),
4892 QueryExpr::AlterTable(ref alter) => self.execute_alter_table(query, alter),
4893 QueryExpr::ExplainAlter(ref explain) => self.execute_explain_alter(query, explain),
4894 QueryExpr::GraphCommand(ref cmd) => self.execute_graph_command(query, cmd),
4896 QueryExpr::SearchCommand(ref cmd) => self.execute_search_command(query, cmd),
4898 QueryExpr::Ask(ref ask) => self.execute_ask(query, ask),
4900 QueryExpr::CreateIndex(ref create_idx) => self.execute_create_index(query, create_idx),
4901 QueryExpr::DropIndex(ref drop_idx) => self.execute_drop_index(query, drop_idx),
4902 QueryExpr::ProbabilisticCommand(ref cmd) => {
4903 self.execute_probabilistic_command(query, cmd)
4904 }
4905 QueryExpr::CreateTimeSeries(ref ts) => self.execute_create_timeseries(query, ts),
4907 QueryExpr::DropTimeSeries(ref ts) => self.execute_drop_timeseries(query, ts),
4908 QueryExpr::CreateQueue(ref q) => self.execute_create_queue(query, q),
4910 QueryExpr::AlterQueue(ref q) => self.execute_alter_queue(query, q),
4911 QueryExpr::DropQueue(ref q) => self.execute_drop_queue(query, q),
4912 QueryExpr::QueueSelect(ref q) => self.execute_queue_select(query, q),
4913 QueryExpr::QueueCommand(ref cmd) => self.execute_queue_command(query, cmd),
4914 QueryExpr::EventsBackfill(ref backfill) => {
4915 self.execute_events_backfill(query, backfill)
4916 }
4917 QueryExpr::EventsBackfillStatus { ref collection } => Err(RedDBError::Query(format!(
4918 "EVENTS BACKFILL STATUS for '{collection}' is not implemented in this slice"
4919 ))),
4920 QueryExpr::KvCommand(ref cmd) => self.execute_kv_command(query, cmd),
4921 QueryExpr::ConfigCommand(ref cmd) => self.execute_config_command(query, cmd),
4922 QueryExpr::CreateTree(ref tree) => self.execute_create_tree(query, tree),
4923 QueryExpr::DropTree(ref tree) => self.execute_drop_tree(query, tree),
4924 QueryExpr::TreeCommand(ref cmd) => self.execute_tree_command(query, cmd),
4925 QueryExpr::SetConfig { ref key, ref value } => {
4927 if key.starts_with("red.secret.") {
4928 return Err(RedDBError::Query(
4929 "red.secret.* is reserved for vault secrets; use SET SECRET".to_string(),
4930 ));
4931 }
4932 let store = self.inner.db.store();
4933 let json_val = match value {
4934 Value::Text(s) => crate::serde_json::Value::String(s.to_string()),
4935 Value::Integer(n) => crate::serde_json::Value::Number(*n as f64),
4936 Value::Float(n) => crate::serde_json::Value::Number(*n),
4937 Value::Boolean(b) => crate::serde_json::Value::Bool(*b),
4938 _ => crate::serde_json::Value::String(value.to_string()),
4939 };
4940 store.set_config_tree(key, &json_val);
4941 update_current_config_value(key, value.clone());
4942 self.invalidate_result_cache();
4947 Ok(RuntimeQueryResult::ok_message(
4948 query.to_string(),
4949 &format!("config set: {key}"),
4950 "set",
4951 ))
4952 }
4953 QueryExpr::SetSecret { ref key, ref value } => {
4955 if key.starts_with("red.config.") {
4956 return Err(RedDBError::Query(
4957 "red.config.* is reserved for config; use SET CONFIG".to_string(),
4958 ));
4959 }
4960 let auth_store = self.inner.auth_store.read().clone().ok_or_else(|| {
4961 RedDBError::Query("SET SECRET requires an enabled, unsealed vault".to_string())
4962 })?;
4963 if matches!(value, Value::Null) {
4964 auth_store
4965 .vault_kv_try_delete(key)
4966 .map_err(|err| RedDBError::Query(err.to_string()))?;
4967 update_current_secret_value(key, None);
4968 self.invalidate_result_cache();
4969 return Ok(RuntimeQueryResult::ok_message(
4970 query.to_string(),
4971 &format!("secret deleted: {key}"),
4972 "delete_secret",
4973 ));
4974 }
4975 let value = secret_sql_value_to_string(value)?;
4976 auth_store
4977 .vault_kv_try_set(key.clone(), value.clone())
4978 .map_err(|err| RedDBError::Query(err.to_string()))?;
4979 update_current_secret_value(key, Some(value));
4980 self.invalidate_result_cache();
4981 Ok(RuntimeQueryResult::ok_message(
4982 query.to_string(),
4983 &format!("secret set: {key}"),
4984 "set_secret",
4985 ))
4986 }
4987 QueryExpr::DeleteSecret { ref key } => {
4989 let auth_store = self.inner.auth_store.read().clone().ok_or_else(|| {
4990 RedDBError::Query(
4991 "DELETE SECRET requires an enabled, unsealed vault".to_string(),
4992 )
4993 })?;
4994 let deleted = auth_store
4995 .vault_kv_try_delete(key)
4996 .map_err(|err| RedDBError::Query(err.to_string()))?;
4997 if deleted {
4998 update_current_secret_value(key, None);
4999 }
5000 self.invalidate_result_cache();
5001 Ok(RuntimeQueryResult::ok_message(
5002 query.to_string(),
5003 &format!("secret deleted: {key}"),
5004 if deleted {
5005 "delete_secret"
5006 } else {
5007 "delete_secret_not_found"
5008 },
5009 ))
5010 }
5011 QueryExpr::ShowSecrets { ref prefix } => {
5013 let auth_store = self.inner.auth_store.read().clone().ok_or_else(|| {
5014 RedDBError::Query("SHOW SECRET requires an enabled, unsealed vault".to_string())
5015 })?;
5016 if !auth_store.is_vault_backed() {
5017 return Err(RedDBError::Query(
5018 "SHOW SECRET requires an enabled, unsealed vault".to_string(),
5019 ));
5020 }
5021 let mut keys = auth_store.vault_kv_keys();
5022 keys.sort();
5023 let mut result = UnifiedResult::with_columns(vec![
5024 "key".into(),
5025 "value".into(),
5026 "status".into(),
5027 ]);
5028 for key in keys {
5029 if let Some(ref pfx) = prefix {
5030 if !key.starts_with(pfx) {
5031 continue;
5032 }
5033 }
5034 let mut record = UnifiedRecord::new();
5035 record.set("key", Value::text(key));
5036 record.set("value", Value::text("***"));
5037 record.set("status", Value::text("active"));
5038 result.push(record);
5039 }
5040 Ok(RuntimeQueryResult {
5041 query: query.to_string(),
5042 mode,
5043 statement: "show_secrets",
5044 engine: "runtime-secret",
5045 result,
5046 affected_rows: 0,
5047 statement_type: "select",
5048 })
5049 }
5050 QueryExpr::ShowConfig { ref prefix } => {
5052 let store = self.inner.db.store();
5053 let all_collections = store.list_collections();
5054 if !all_collections.contains(&"red_config".to_string()) {
5055 let result = UnifiedResult::with_columns(vec!["key".into(), "value".into()]);
5056 return Ok(RuntimeQueryResult {
5057 query: query.to_string(),
5058 mode,
5059 statement: "show_config",
5060 engine: "runtime-config",
5061 result,
5062 affected_rows: 0,
5063 statement_type: "select",
5064 });
5065 }
5066 let manager = store
5067 .get_collection("red_config")
5068 .ok_or_else(|| RedDBError::NotFound("red_config".to_string()))?;
5069 let entities = manager.query_all(|_| true);
5070 let mut latest = std::collections::BTreeMap::<String, (u64, Value, Value)>::new();
5071 for entity in entities {
5072 if let EntityData::Row(ref row) = entity.data {
5073 if let Some(ref named) = row.named {
5074 let key_val = named.get("key").cloned().unwrap_or(Value::Null);
5075 let val = named.get("value").cloned().unwrap_or(Value::Null);
5076 let key_str = match &key_val {
5077 Value::Text(s) => s.as_ref(),
5078 _ => continue,
5079 };
5080 if let Some(ref pfx) = prefix {
5081 if !key_str.starts_with(pfx.as_str()) {
5082 continue;
5083 }
5084 }
5085 let entity_id = entity.id.raw();
5086 match latest.get(key_str) {
5087 Some((prev_id, _, _)) if *prev_id > entity_id => {}
5088 _ => {
5089 latest.insert(key_str.to_string(), (entity_id, key_val, val));
5090 }
5091 }
5092 }
5093 }
5094 }
5095 let mut result = UnifiedResult::with_columns(vec!["key".into(), "value".into()]);
5096 for (_, key_val, val) in latest.into_values() {
5097 let mut record = UnifiedRecord::new();
5098 record.set("key", key_val);
5099 record.set("value", val);
5100 result.push(record);
5101 }
5102 Ok(RuntimeQueryResult {
5103 query: query.to_string(),
5104 mode,
5105 statement: "show_config",
5106 engine: "runtime-config",
5107 result,
5108 affected_rows: 0,
5109 statement_type: "select",
5110 })
5111 }
5112 QueryExpr::SetTenant(ref value) => {
5118 match value {
5119 Some(id) => set_current_tenant(id.clone()),
5120 None => clear_current_tenant(),
5121 }
5122 Ok(RuntimeQueryResult::ok_message(
5123 query.to_string(),
5124 &match value {
5125 Some(id) => format!("tenant set: {id}"),
5126 None => "tenant cleared".to_string(),
5127 },
5128 "set_tenant",
5129 ))
5130 }
5131 QueryExpr::ShowTenant => {
5132 let mut result = UnifiedResult::with_columns(vec!["tenant".into()]);
5133 let mut record = UnifiedRecord::new();
5134 record.set(
5135 "tenant",
5136 current_tenant().map(Value::text).unwrap_or(Value::Null),
5137 );
5138 result.push(record);
5139 Ok(RuntimeQueryResult {
5140 query: query.to_string(),
5141 mode,
5142 statement: "show_tenant",
5143 engine: "runtime-tenant",
5144 result,
5145 affected_rows: 0,
5146 statement_type: "select",
5147 })
5148 }
5149 QueryExpr::TransactionControl(ref ctl) => {
5161 use crate::storage::query::ast::TxnControl;
5162 use crate::storage::transaction::snapshot::{TxnContext, Xid};
5163 use crate::storage::transaction::IsolationLevel;
5164
5165 let conn_id = current_connection_id();
5170
5171 let (kind, msg) = match ctl {
5172 TxnControl::Begin => {
5173 let mgr = Arc::clone(&self.inner.snapshot_manager);
5174 let xid = mgr.begin();
5175 let snapshot = mgr.snapshot(xid);
5176 let ctx = TxnContext {
5177 xid,
5178 isolation: IsolationLevel::SnapshotIsolation,
5179 snapshot,
5180 savepoints: Vec::new(),
5181 released_sub_xids: Vec::new(),
5182 };
5183 self.inner.tx_contexts.write().insert(conn_id, ctx);
5184 ("begin", format!("BEGIN — xid={xid} (snapshot isolation)"))
5185 }
5186 TxnControl::Commit => {
5187 self.inner.tx_local_tenants.write().remove(&conn_id);
5189 let ctx = self.inner.tx_contexts.write().remove(&conn_id);
5190 match ctx {
5191 Some(ctx) => {
5192 let mut own_xids = std::collections::HashSet::new();
5193 own_xids.insert(ctx.xid);
5194 for (_, sub) in &ctx.savepoints {
5195 own_xids.insert(*sub);
5196 }
5197 for sub in &ctx.released_sub_xids {
5198 own_xids.insert(*sub);
5199 }
5200 if let Err(err) = self.check_table_row_write_conflicts(
5201 conn_id,
5202 &ctx.snapshot,
5203 &own_xids,
5204 ) {
5205 for (_, sub) in &ctx.savepoints {
5206 self.inner.snapshot_manager.rollback(*sub);
5207 }
5208 for sub in &ctx.released_sub_xids {
5209 self.inner.snapshot_manager.rollback(*sub);
5210 }
5211 self.inner.snapshot_manager.rollback(ctx.xid);
5212 self.revive_pending_versioned_updates(conn_id);
5213 self.revive_pending_tombstones(conn_id);
5214 self.discard_pending_kv_watch_events(conn_id);
5215 self.discard_pending_store_wal_actions(conn_id);
5216 return Err(err);
5217 }
5218 self.restore_pending_write_stamps(conn_id);
5219 if let Err(err) = self.flush_pending_store_wal_actions(conn_id) {
5220 for (_, sub) in &ctx.savepoints {
5221 self.inner.snapshot_manager.rollback(*sub);
5222 }
5223 for sub in &ctx.released_sub_xids {
5224 self.inner.snapshot_manager.rollback(*sub);
5225 }
5226 self.inner.snapshot_manager.rollback(ctx.xid);
5227 self.revive_pending_versioned_updates(conn_id);
5228 self.revive_pending_tombstones(conn_id);
5229 self.discard_pending_kv_watch_events(conn_id);
5230 return Err(err);
5231 }
5232 for (_, sub) in &ctx.savepoints {
5238 self.inner.snapshot_manager.commit(*sub);
5239 }
5240 for sub in &ctx.released_sub_xids {
5241 self.inner.snapshot_manager.commit(*sub);
5242 }
5243 self.inner.snapshot_manager.commit(ctx.xid);
5244 self.finalize_pending_versioned_updates(conn_id);
5245 self.finalize_pending_tombstones(conn_id);
5246 self.finalize_pending_kv_watch_events(conn_id);
5247 ("commit", format!("COMMIT — xid={} committed", ctx.xid))
5248 }
5249 None => (
5250 "commit",
5251 "COMMIT outside transaction — no-op (autocommit)".to_string(),
5252 ),
5253 }
5254 }
5255 TxnControl::Rollback => {
5256 self.inner.tx_local_tenants.write().remove(&conn_id);
5257 let ctx = self.inner.tx_contexts.write().remove(&conn_id);
5258 match ctx {
5259 Some(ctx) => {
5260 for (_, sub) in &ctx.savepoints {
5263 self.inner.snapshot_manager.rollback(*sub);
5264 }
5265 for sub in &ctx.released_sub_xids {
5266 self.inner.snapshot_manager.rollback(*sub);
5267 }
5268 self.inner.snapshot_manager.rollback(ctx.xid);
5269 self.revive_pending_versioned_updates(conn_id);
5273 self.revive_pending_tombstones(conn_id);
5274 self.discard_pending_kv_watch_events(conn_id);
5275 self.discard_pending_store_wal_actions(conn_id);
5276 ("rollback", format!("ROLLBACK — xid={} aborted", ctx.xid))
5277 }
5278 None => (
5279 "rollback",
5280 "ROLLBACK outside transaction — no-op (autocommit)".to_string(),
5281 ),
5282 }
5283 }
5284 TxnControl::Savepoint(name) => {
5291 let mgr = Arc::clone(&self.inner.snapshot_manager);
5292 let mut guard = self.inner.tx_contexts.write();
5293 match guard.get_mut(&conn_id) {
5294 Some(ctx) => {
5295 let sub = mgr.begin();
5296 ctx.savepoints.push((name.clone(), sub));
5297 ("savepoint", format!("SAVEPOINT {name} — sub_xid={sub}"))
5298 }
5299 None => (
5300 "savepoint",
5301 "SAVEPOINT outside transaction — no-op".to_string(),
5302 ),
5303 }
5304 }
5305 TxnControl::ReleaseSavepoint(name) => {
5306 let mut guard = self.inner.tx_contexts.write();
5307 match guard.get_mut(&conn_id) {
5308 Some(ctx) => {
5309 let pos = ctx
5310 .savepoints
5311 .iter()
5312 .position(|(n, _)| n == name)
5313 .ok_or_else(|| {
5314 RedDBError::Internal(format!(
5315 "savepoint {name} does not exist"
5316 ))
5317 })?;
5318 let released = ctx.savepoints.len() - pos;
5326 let popped: Vec<Xid> = ctx
5327 .savepoints
5328 .split_off(pos)
5329 .into_iter()
5330 .map(|(_, x)| x)
5331 .collect();
5332 ctx.released_sub_xids.extend(popped);
5333 (
5334 "release_savepoint",
5335 format!("RELEASE SAVEPOINT {name} — {released} level(s)"),
5336 )
5337 }
5338 None => (
5339 "release_savepoint",
5340 "RELEASE outside transaction — no-op".to_string(),
5341 ),
5342 }
5343 }
5344 TxnControl::RollbackToSavepoint(name) => {
5345 let mgr = Arc::clone(&self.inner.snapshot_manager);
5346 let drop_result: Option<(Xid, Vec<Xid>)> = {
5351 let mut guard = self.inner.tx_contexts.write();
5352 if let Some(ctx) = guard.get_mut(&conn_id) {
5353 let pos = ctx
5354 .savepoints
5355 .iter()
5356 .position(|(n, _)| n == name)
5357 .ok_or_else(|| {
5358 RedDBError::Internal(format!(
5359 "savepoint {name} does not exist"
5360 ))
5361 })?;
5362 let savepoint_xid = ctx.savepoints[pos].1;
5363 let aborted: Vec<Xid> = ctx
5364 .savepoints
5365 .split_off(pos)
5366 .into_iter()
5367 .map(|(_, x)| x)
5368 .collect();
5369 Some((savepoint_xid, aborted))
5370 } else {
5371 None
5372 }
5373 };
5374
5375 match drop_result {
5376 Some((savepoint_xid, aborted)) => {
5377 for x in &aborted {
5378 mgr.rollback(*x);
5379 }
5380 let reverted_updates =
5381 self.revive_versioned_updates_since(conn_id, savepoint_xid);
5382 let revived = self.revive_tombstones_since(conn_id, savepoint_xid);
5383 (
5384 "rollback_to_savepoint",
5385 format!(
5386 "ROLLBACK TO SAVEPOINT {name} — aborted {} sub_xid(s), reverted {reverted_updates} update(s), revived {revived} tombstone(s)",
5387 aborted.len(),
5388 ),
5389 )
5390 }
5391 None => (
5392 "rollback_to_savepoint",
5393 "ROLLBACK TO outside transaction — no-op".to_string(),
5394 ),
5395 }
5396 }
5397 };
5398 Ok(RuntimeQueryResult::ok_message(
5399 query.to_string(),
5400 &msg,
5401 kind,
5402 ))
5403 }
5404 QueryExpr::CreateSchema(ref q) => {
5417 let store = self.inner.db.store();
5418 let key = format!("schema.{}", q.name);
5419 if store.get_config(&key).is_some() {
5420 if q.if_not_exists {
5421 return Ok(RuntimeQueryResult::ok_message(
5422 query.to_string(),
5423 &format!("schema {} already exists — skipped", q.name),
5424 "create_schema",
5425 ));
5426 }
5427 return Err(RedDBError::Internal(format!(
5428 "schema {} already exists",
5429 q.name
5430 )));
5431 }
5432 store.set_config_tree(&key, &crate::serde_json::Value::Bool(true));
5433 Ok(RuntimeQueryResult::ok_message(
5434 query.to_string(),
5435 &format!("schema {} created", q.name),
5436 "create_schema",
5437 ))
5438 }
5439 QueryExpr::DropSchema(ref q) => {
5440 let store = self.inner.db.store();
5441 let key = format!("schema.{}", q.name);
5442 let existed = store.get_config(&key).is_some();
5443 if !existed && !q.if_exists {
5444 return Err(RedDBError::Internal(format!(
5445 "schema {} does not exist",
5446 q.name
5447 )));
5448 }
5449 store.set_config_tree(&key, &crate::serde_json::Value::Null);
5451 let suffix = if q.cascade {
5452 " (CASCADE accepted — tables untouched)"
5453 } else {
5454 ""
5455 };
5456 Ok(RuntimeQueryResult::ok_message(
5457 query.to_string(),
5458 &format!("schema {} dropped{}", q.name, suffix),
5459 "drop_schema",
5460 ))
5461 }
5462 QueryExpr::CreateSequence(ref q) => {
5463 let store = self.inner.db.store();
5464 let base = format!("sequence.{}", q.name);
5465 let start_key = format!("{base}.start");
5466 let incr_key = format!("{base}.increment");
5467 let curr_key = format!("{base}.current");
5468 if store.get_config(&start_key).is_some() {
5469 if q.if_not_exists {
5470 return Ok(RuntimeQueryResult::ok_message(
5471 query.to_string(),
5472 &format!("sequence {} already exists — skipped", q.name),
5473 "create_sequence",
5474 ));
5475 }
5476 return Err(RedDBError::Internal(format!(
5477 "sequence {} already exists",
5478 q.name
5479 )));
5480 }
5481 let initial_current = q.start - q.increment;
5484 store.set_config_tree(
5485 &start_key,
5486 &crate::serde_json::Value::Number(q.start as f64),
5487 );
5488 store.set_config_tree(
5489 &incr_key,
5490 &crate::serde_json::Value::Number(q.increment as f64),
5491 );
5492 store.set_config_tree(
5493 &curr_key,
5494 &crate::serde_json::Value::Number(initial_current as f64),
5495 );
5496 Ok(RuntimeQueryResult::ok_message(
5497 query.to_string(),
5498 &format!(
5499 "sequence {} created (start={}, increment={})",
5500 q.name, q.start, q.increment
5501 ),
5502 "create_sequence",
5503 ))
5504 }
5505 QueryExpr::DropSequence(ref q) => {
5506 let store = self.inner.db.store();
5507 let base = format!("sequence.{}", q.name);
5508 let existed = store.get_config(&format!("{base}.start")).is_some();
5509 if !existed && !q.if_exists {
5510 return Err(RedDBError::Internal(format!(
5511 "sequence {} does not exist",
5512 q.name
5513 )));
5514 }
5515 for k in ["start", "increment", "current"] {
5516 store.set_config_tree(&format!("{base}.{k}"), &crate::serde_json::Value::Null);
5517 }
5518 Ok(RuntimeQueryResult::ok_message(
5519 query.to_string(),
5520 &format!("sequence {} dropped", q.name),
5521 "drop_sequence",
5522 ))
5523 }
5524 QueryExpr::CreateView(ref q) => {
5534 let mut views = self.inner.views.write();
5535 if views.contains_key(&q.name) && !q.or_replace {
5536 if q.if_not_exists {
5537 return Ok(RuntimeQueryResult::ok_message(
5538 query.to_string(),
5539 &format!("view {} already exists — skipped", q.name),
5540 "create_view",
5541 ));
5542 }
5543 return Err(RedDBError::Internal(format!(
5544 "view {} already exists",
5545 q.name
5546 )));
5547 }
5548 views.insert(q.name.clone(), Arc::new(q.clone()));
5549 drop(views);
5550
5551 if q.materialized {
5553 use crate::storage::cache::result::{MaterializedViewDef, RefreshPolicy};
5554 let def = MaterializedViewDef {
5555 name: q.name.clone(),
5556 query: format!("<parsed view {}>", q.name),
5557 dependencies: collect_table_refs(&q.query),
5558 refresh: RefreshPolicy::Manual,
5559 };
5560 self.inner.materialized_views.write().register(def);
5561 }
5562 self.invalidate_plan_cache();
5567 self.invalidate_result_cache();
5568
5569 Ok(RuntimeQueryResult::ok_message(
5570 query.to_string(),
5571 &format!(
5572 "{}view {} created",
5573 if q.materialized { "materialized " } else { "" },
5574 q.name
5575 ),
5576 "create_view",
5577 ))
5578 }
5579 QueryExpr::DropView(ref q) => {
5580 let mut views = self.inner.views.write();
5581 let existed = views.remove(&q.name).is_some();
5582 drop(views);
5583 if q.materialized || existed {
5584 self.inner.materialized_views.write().remove(&q.name);
5586 }
5587 self.invalidate_plan_cache();
5590 self.invalidate_result_cache();
5591 if !existed && !q.if_exists {
5592 return Err(RedDBError::Internal(format!(
5593 "view {} does not exist",
5594 q.name
5595 )));
5596 }
5597 self.invalidate_plan_cache();
5598 Ok(RuntimeQueryResult::ok_message(
5599 query.to_string(),
5600 &format!("view {} dropped", q.name),
5601 "drop_view",
5602 ))
5603 }
5604 QueryExpr::RefreshMaterializedView(ref q) => {
5605 let view = {
5608 let views = self.inner.views.read();
5609 views.get(&q.name).cloned()
5610 };
5611 let view = match view {
5612 Some(v) => v,
5613 None => {
5614 return Err(RedDBError::Internal(format!(
5615 "view {} does not exist",
5616 q.name
5617 )))
5618 }
5619 };
5620 if !view.materialized {
5621 return Err(RedDBError::Internal(format!(
5622 "view {} is not materialized — REFRESH requires \
5623 CREATE MATERIALIZED VIEW",
5624 q.name
5625 )));
5626 }
5627 let inner_result = self.execute_query_expr((*view.query).clone())?;
5629 let serialized = format!("{:?}", inner_result.result);
5632 self.inner
5633 .materialized_views
5634 .write()
5635 .refresh(&q.name, serialized.into_bytes());
5636 Ok(RuntimeQueryResult::ok_message(
5637 query.to_string(),
5638 &format!("materialized view {} refreshed", q.name),
5639 "refresh_materialized_view",
5640 ))
5641 }
5642 QueryExpr::CreatePolicy(ref q) => {
5649 let key = (q.table.clone(), q.name.clone());
5650 self.inner
5651 .rls_policies
5652 .write()
5653 .insert(key, Arc::new(q.clone()));
5654 self.invalidate_plan_cache();
5655 self.schema_vocabulary_apply(
5659 crate::runtime::schema_vocabulary::DdlEvent::CreatePolicy {
5660 collection: q.table.clone(),
5661 policy: q.name.clone(),
5662 },
5663 );
5664 Ok(RuntimeQueryResult::ok_message(
5665 query.to_string(),
5666 &format!("policy {} on {} created", q.name, q.table),
5667 "create_policy",
5668 ))
5669 }
5670 QueryExpr::DropPolicy(ref q) => {
5671 let removed = self
5672 .inner
5673 .rls_policies
5674 .write()
5675 .remove(&(q.table.clone(), q.name.clone()))
5676 .is_some();
5677 if !removed && !q.if_exists {
5678 return Err(RedDBError::Internal(format!(
5679 "policy {} on {} does not exist",
5680 q.name, q.table
5681 )));
5682 }
5683 self.invalidate_plan_cache();
5684 self.schema_vocabulary_apply(
5687 crate::runtime::schema_vocabulary::DdlEvent::DropPolicy {
5688 collection: q.table.clone(),
5689 policy: q.name.clone(),
5690 },
5691 );
5692 Ok(RuntimeQueryResult::ok_message(
5693 query.to_string(),
5694 &format!("policy {} on {} dropped", q.name, q.table),
5695 "drop_policy",
5696 ))
5697 }
5698 QueryExpr::CreateServer(ref q) => {
5709 use crate::storage::fdw::FdwOptions;
5710 let registry = Arc::clone(&self.inner.foreign_tables);
5711 if registry.server(&q.name).is_some() {
5712 if q.if_not_exists {
5713 return Ok(RuntimeQueryResult::ok_message(
5714 query.to_string(),
5715 &format!("server {} already exists — skipped", q.name),
5716 "create_server",
5717 ));
5718 }
5719 return Err(RedDBError::Internal(format!(
5720 "server {} already exists",
5721 q.name
5722 )));
5723 }
5724 let mut opts = FdwOptions::new();
5725 for (k, v) in &q.options {
5726 opts.values.insert(k.clone(), v.clone());
5727 }
5728 registry
5729 .create_server(&q.name, &q.wrapper, opts)
5730 .map_err(|e| RedDBError::Internal(e.to_string()))?;
5731 Ok(RuntimeQueryResult::ok_message(
5732 query.to_string(),
5733 &format!("server {} created (wrapper {})", q.name, q.wrapper),
5734 "create_server",
5735 ))
5736 }
5737 QueryExpr::DropServer(ref q) => {
5738 let existed = self.inner.foreign_tables.drop_server(&q.name);
5739 if !existed && !q.if_exists {
5740 return Err(RedDBError::Internal(format!(
5741 "server {} does not exist",
5742 q.name
5743 )));
5744 }
5745 Ok(RuntimeQueryResult::ok_message(
5746 query.to_string(),
5747 &format!(
5748 "server {} dropped{}",
5749 q.name,
5750 if q.cascade { " (cascade)" } else { "" }
5751 ),
5752 "drop_server",
5753 ))
5754 }
5755 QueryExpr::CreateForeignTable(ref q) => {
5756 use crate::storage::fdw::{FdwOptions, ForeignColumn, ForeignTable};
5757 let registry = Arc::clone(&self.inner.foreign_tables);
5758 if registry.foreign_table(&q.name).is_some() {
5759 if q.if_not_exists {
5760 return Ok(RuntimeQueryResult::ok_message(
5761 query.to_string(),
5762 &format!("foreign table {} already exists — skipped", q.name),
5763 "create_foreign_table",
5764 ));
5765 }
5766 return Err(RedDBError::Internal(format!(
5767 "foreign table {} already exists",
5768 q.name
5769 )));
5770 }
5771 let mut opts = FdwOptions::new();
5772 for (k, v) in &q.options {
5773 opts.values.insert(k.clone(), v.clone());
5774 }
5775 let columns: Vec<ForeignColumn> = q
5776 .columns
5777 .iter()
5778 .map(|c| ForeignColumn {
5779 name: c.name.clone(),
5780 data_type: c.data_type.clone(),
5781 not_null: c.not_null,
5782 })
5783 .collect();
5784 registry
5785 .create_foreign_table(ForeignTable {
5786 name: q.name.clone(),
5787 server_name: q.server.clone(),
5788 columns,
5789 options: opts,
5790 })
5791 .map_err(|e| RedDBError::Internal(e.to_string()))?;
5792 self.invalidate_plan_cache();
5793 Ok(RuntimeQueryResult::ok_message(
5794 query.to_string(),
5795 &format!("foreign table {} created (server {})", q.name, q.server),
5796 "create_foreign_table",
5797 ))
5798 }
5799 QueryExpr::DropForeignTable(ref q) => {
5800 let existed = self.inner.foreign_tables.drop_foreign_table(&q.name);
5801 if !existed && !q.if_exists {
5802 return Err(RedDBError::Internal(format!(
5803 "foreign table {} does not exist",
5804 q.name
5805 )));
5806 }
5807 self.invalidate_plan_cache();
5808 Ok(RuntimeQueryResult::ok_message(
5809 query.to_string(),
5810 &format!("foreign table {} dropped", q.name),
5811 "drop_foreign_table",
5812 ))
5813 }
5814 QueryExpr::CopyFrom(ref q) => {
5820 use crate::storage::import::{CsvConfig, CsvImporter};
5821 let store = self.inner.db.store();
5822 let cfg = CsvConfig {
5823 collection: q.table.clone(),
5824 has_header: q.has_header,
5825 delimiter: q.delimiter.map(|c| c as u8).unwrap_or(b','),
5826 ..CsvConfig::default()
5827 };
5828 let importer = CsvImporter::new(cfg);
5829 let stats = importer
5830 .import_file(&q.path, store.as_ref())
5831 .map_err(|e| RedDBError::Internal(format!("COPY failed: {e}")))?;
5832 self.note_table_write(&q.table);
5834 Ok(RuntimeQueryResult::ok_message(
5835 query.to_string(),
5836 &format!(
5837 "COPY imported {} rows into {} ({} errors skipped, {}ms)",
5838 stats.records_imported, q.table, stats.errors_skipped, stats.duration_ms
5839 ),
5840 "copy_from",
5841 ))
5842 }
5843 QueryExpr::MaintenanceCommand(ref cmd) => {
5859 use crate::storage::query::ast::MaintenanceCommand as Mc;
5860 let store = self.inner.db.store();
5861 let (kind, msg) = match cmd {
5862 Mc::Analyze { target } => {
5863 let targets: Vec<String> = match target {
5864 Some(t) => vec![t.clone()],
5865 None => store.list_collections(),
5866 };
5867 for t in &targets {
5868 self.refresh_table_planner_stats(t);
5869 }
5870 (
5871 "analyze",
5872 format!("ANALYZE refreshed stats for {} table(s)", targets.len()),
5873 )
5874 }
5875 Mc::Vacuum { target, full } => {
5876 let targets: Vec<String> = match target {
5877 Some(t) => vec![t.clone()],
5878 None => store.list_collections(),
5879 };
5880 let cutoff_xid = self.mvcc_vacuum_cutoff_xid();
5881 let mut vacuum_stats =
5882 crate::storage::unified::store::MvccVacuumStats::default();
5883 for t in &targets {
5884 let stats = store.vacuum_mvcc_history(t, cutoff_xid).map_err(|e| {
5885 RedDBError::Internal(format!(
5886 "VACUUM MVCC history failed for {t}: {e}"
5887 ))
5888 })?;
5889 if stats.reclaimed_versions > 0 {
5890 self.rebuild_runtime_indexes_for_table(t)?;
5891 }
5892 vacuum_stats.add(&stats);
5893 }
5894 self.inner.snapshot_manager.prune_aborted(cutoff_xid);
5895 for t in &targets {
5897 self.refresh_table_planner_stats(t);
5898 }
5899 let persisted = if *full {
5903 match store.persist() {
5904 Ok(()) => true,
5905 Err(e) => {
5906 return Err(RedDBError::Internal(format!(
5907 "VACUUM FULL persist failed: {e:?}"
5908 )));
5909 }
5910 }
5911 } else {
5912 false
5913 };
5914 self.invalidate_result_cache();
5916 (
5917 "vacuum",
5918 format!(
5919 "VACUUM{} processed {} table(s): scanned_versions={}, retained_versions={}, reclaimed_versions={}, retained_history_versions={}, reclaimed_history_versions={}, retained_tombstones={}, reclaimed_tombstones={}{}",
5920 if *full { " FULL" } else { "" },
5921 targets.len(),
5922 vacuum_stats.scanned_versions,
5923 vacuum_stats.retained_versions,
5924 vacuum_stats.reclaimed_versions,
5925 vacuum_stats.retained_history_versions,
5926 vacuum_stats.reclaimed_history_versions,
5927 vacuum_stats.retained_tombstones,
5928 vacuum_stats.reclaimed_tombstones,
5929 if persisted {
5930 " (pages flushed to disk)"
5931 } else {
5932 ""
5933 }
5934 ),
5935 )
5936 }
5937 };
5938 Ok(RuntimeQueryResult::ok_message(
5939 query.to_string(),
5940 &msg,
5941 kind,
5942 ))
5943 }
5944 QueryExpr::Grant(ref g) => self.execute_grant_statement(query, g),
5951 QueryExpr::Revoke(ref r) => self.execute_revoke_statement(query, r),
5952 QueryExpr::AlterUser(ref a) => self.execute_alter_user_statement(query, a),
5953 QueryExpr::CreateIamPolicy { ref id, ref json } => {
5954 self.execute_create_iam_policy(query, id, json)
5955 }
5956 QueryExpr::DropIamPolicy { ref id } => self.execute_drop_iam_policy(query, id),
5957 QueryExpr::AttachPolicy {
5958 ref policy_id,
5959 ref principal,
5960 } => self.execute_attach_policy(query, policy_id, principal),
5961 QueryExpr::DetachPolicy {
5962 ref policy_id,
5963 ref principal,
5964 } => self.execute_detach_policy(query, policy_id, principal),
5965 QueryExpr::ShowPolicies { ref filter } => {
5966 self.execute_show_policies(query, filter.as_ref())
5967 }
5968 QueryExpr::ShowEffectivePermissions {
5969 ref user,
5970 ref resource,
5971 } => self.execute_show_effective_permissions(query, user, resource.as_ref()),
5972 QueryExpr::SimulatePolicy {
5973 ref user,
5974 ref action,
5975 ref resource,
5976 } => self.execute_simulate_policy(query, user, action, resource),
5977 QueryExpr::CreateMigration(ref q) => self.execute_create_migration(query, q),
5978 QueryExpr::ApplyMigration(ref q) => self.execute_apply_migration(query, q),
5979 QueryExpr::RollbackMigration(ref q) => self.execute_rollback_migration(query, q),
5980 QueryExpr::ExplainMigration(ref q) => self.execute_explain_migration(query, q),
5981 };
5982
5983 let mut query_result = query_result;
5987 if let Ok(ref mut result) = query_result {
5988 if result.statement_type == "select" {
5989 self.apply_secret_decryption(result);
5990 }
5991 }
5992
5993 if let Ok(ref result) = query_result {
6000 frame.write_result_cache(self, result, result_cache_scopes);
6001 }
6002
6003 query_result
6004 }
6005
6006 pub fn execute_query_expr(&self, expr: QueryExpr) -> RedDBResult<RuntimeQueryResult> {
6012 let _config_snapshot_guard = ConfigSnapshotGuard::install(Arc::clone(&self.inner.db));
6013 let _secret_store_guard = SecretStoreGuard::install(self.inner.auth_store.read().clone());
6014 let expr = self.rewrite_view_refs(expr);
6018
6019 self.validate_model_operations_before_auth(&expr)?;
6020 if let Err(err) = self.check_query_privilege(&expr) {
6024 return Err(RedDBError::Query(format!("permission denied: {err}")));
6025 }
6026
6027 let statement = query_expr_name(&expr);
6028 let mode = detect_mode(statement);
6029 let query_str = statement;
6030
6031 let result = self.dispatch_expr(expr, query_str, mode)?;
6032 let mut r = result;
6033 if r.statement_type == "select" {
6034 self.apply_secret_decryption(&mut r);
6035 }
6036 Ok(r)
6037 }
6038
6039 pub(super) fn validate_model_operations_before_auth(
6040 &self,
6041 expr: &QueryExpr,
6042 ) -> RedDBResult<()> {
6043 use crate::catalog::CollectionModel;
6044 use crate::runtime::ddl::polymorphic_resolver;
6045 use crate::storage::query::ast::KvCommand;
6046
6047 let system_schema_target = match expr {
6048 QueryExpr::DropTable(q) => Some(q.name.as_str()),
6049 QueryExpr::DropGraph(q) => Some(q.name.as_str()),
6050 QueryExpr::DropVector(q) => Some(q.name.as_str()),
6051 QueryExpr::DropDocument(q) => Some(q.name.as_str()),
6052 QueryExpr::DropKv(q) => Some(q.name.as_str()),
6053 QueryExpr::DropCollection(q) => Some(q.name.as_str()),
6054 QueryExpr::Truncate(q) => Some(q.name.as_str()),
6055 _ => None,
6056 };
6057 if system_schema_target.is_some_and(crate::runtime::impl_ddl::is_system_schema_name) {
6058 return Err(RedDBError::Query("system schema is read-only".to_string()));
6059 }
6060
6061 let expected = match expr {
6062 QueryExpr::DropTable(q) => Some((q.name.as_str(), CollectionModel::Table)),
6063 QueryExpr::DropGraph(q) => Some((q.name.as_str(), CollectionModel::Graph)),
6064 QueryExpr::DropVector(q) => Some((q.name.as_str(), CollectionModel::Vector)),
6065 QueryExpr::DropDocument(q) => Some((q.name.as_str(), CollectionModel::Document)),
6066 QueryExpr::DropKv(q) => Some((q.name.as_str(), q.model)),
6067 QueryExpr::DropCollection(q) => q.model.map(|model| (q.name.as_str(), model)),
6068 QueryExpr::Truncate(q) => q.model.map(|model| (q.name.as_str(), model)),
6069 QueryExpr::KvCommand(cmd) => {
6070 let (collection, model) = match cmd {
6071 KvCommand::Put {
6072 collection, model, ..
6073 }
6074 | KvCommand::Get {
6075 collection, model, ..
6076 }
6077 | KvCommand::Incr {
6078 collection, model, ..
6079 }
6080 | KvCommand::Cas {
6081 collection, model, ..
6082 }
6083 | KvCommand::Delete {
6084 collection, model, ..
6085 } => (collection.as_str(), *model),
6086 KvCommand::Rotate { collection, .. }
6087 | KvCommand::History { collection, .. }
6088 | KvCommand::List { collection, .. }
6089 | KvCommand::Purge { collection, .. } => {
6090 (collection.as_str(), CollectionModel::Vault)
6091 }
6092 KvCommand::InvalidateTags { collection, .. } => {
6093 (collection.as_str(), CollectionModel::Kv)
6094 }
6095 KvCommand::Watch {
6096 collection, model, ..
6097 } => (collection.as_str(), *model),
6098 KvCommand::Unseal { collection, .. } => {
6099 (collection.as_str(), CollectionModel::Vault)
6100 }
6101 };
6102 Some((collection, model))
6103 }
6104 QueryExpr::ConfigCommand(cmd) => {
6105 self.validate_config_command_before_auth(cmd)?;
6106 None
6107 }
6108 _ => None,
6109 };
6110
6111 let Some((name, expected_model)) = expected else {
6112 return Ok(());
6113 };
6114 let snapshot = self.inner.db.catalog_model_snapshot();
6115 let Some(actual_model) = snapshot
6116 .collections
6117 .iter()
6118 .find(|collection| collection.name == name)
6119 .map(|collection| collection.declared_model.unwrap_or(collection.model))
6120 else {
6121 return Ok(());
6122 };
6123 polymorphic_resolver::ensure_model_match(expected_model, actual_model)
6124 }
6125
6126 pub(super) fn rewrite_view_refs(&self, expr: QueryExpr) -> QueryExpr {
6131 if self.inner.views.read().is_empty() {
6133 return expr;
6134 }
6135 self.rewrite_view_refs_inner(expr)
6136 }
6137
6138 fn rewrite_view_refs_inner(&self, expr: QueryExpr) -> QueryExpr {
6139 use crate::storage::query::ast::{Filter, TableSource};
6140 match expr {
6141 QueryExpr::Table(mut tq) => {
6142 if let Some(TableSource::Subquery(body)) = tq.source.take() {
6148 tq.source = Some(TableSource::Subquery(Box::new(
6149 self.rewrite_view_refs_inner(*body),
6150 )));
6151 return QueryExpr::Table(tq);
6152 }
6153
6154 let maybe_view = {
6158 let views = self.inner.views.read();
6159 views.get(&tq.table).cloned()
6160 };
6161 let Some(view) = maybe_view else {
6162 return QueryExpr::Table(tq);
6163 };
6164
6165 let inner_expr = self.rewrite_view_refs_inner((*view.query).clone());
6169
6170 match inner_expr {
6178 QueryExpr::Table(mut inner_tq) => {
6179 if let Some(outer_filter) = tq.filter.take() {
6180 inner_tq.filter = Some(match inner_tq.filter.take() {
6181 Some(existing) => {
6182 Filter::And(Box::new(existing), Box::new(outer_filter))
6183 }
6184 None => outer_filter,
6185 });
6186 }
6187 if let Some(outer_limit) = tq.limit {
6188 inner_tq.limit = Some(match inner_tq.limit {
6189 Some(existing) => existing.min(outer_limit),
6190 None => outer_limit,
6191 });
6192 }
6193 if let Some(outer_offset) = tq.offset {
6194 inner_tq.offset = Some(match inner_tq.offset {
6195 Some(existing) => existing + outer_offset,
6196 None => outer_offset,
6197 });
6198 }
6199 QueryExpr::Table(inner_tq)
6200 }
6201 other => other,
6202 }
6203 }
6204 QueryExpr::Join(mut jq) => {
6205 jq.left = Box::new(self.rewrite_view_refs_inner(*jq.left));
6206 jq.right = Box::new(self.rewrite_view_refs_inner(*jq.right));
6207 QueryExpr::Join(jq)
6208 }
6209 other => other,
6212 }
6213 }
6214
6215 fn authorize_relational_table_select(
6219 &self,
6220 mut table: TableQuery,
6221 frame: &dyn super::statement_frame::ReadFrame,
6222 ) -> RedDBResult<Option<TableQuery>> {
6223 if let Some(TableSource::Subquery(inner)) = table.source.take() {
6224 let authorized_inner = self.authorize_relational_select_expr(*inner, frame)?;
6225 table.source = Some(TableSource::Subquery(Box::new(authorized_inner)));
6226 return Ok(Some(table));
6227 }
6228
6229 self.check_table_column_projection_authz(&table, frame)?;
6230
6231 if self.inner.rls_enabled_tables.read().contains(&table.table) {
6232 return Ok(inject_rls_filters(self, frame, table));
6233 }
6234
6235 Ok(Some(table))
6236 }
6237
6238 fn authorize_relational_join_select(
6239 &self,
6240 mut join: JoinQuery,
6241 frame: &dyn super::statement_frame::ReadFrame,
6242 ) -> RedDBResult<Option<JoinQuery>> {
6243 self.check_join_column_projection_authz(&join, frame)?;
6244 join.left = Box::new(self.authorize_relational_join_child(*join.left, frame)?);
6245 join.right = Box::new(self.authorize_relational_join_child(*join.right, frame)?);
6246 Ok(inject_rls_into_join(self, frame, join))
6247 }
6248
6249 fn authorize_relational_join_child(
6250 &self,
6251 expr: QueryExpr,
6252 frame: &dyn super::statement_frame::ReadFrame,
6253 ) -> RedDBResult<QueryExpr> {
6254 match expr {
6255 QueryExpr::Table(mut table) => {
6256 if let Some(TableSource::Subquery(inner)) = table.source.take() {
6257 let authorized_inner = self.authorize_relational_select_expr(*inner, frame)?;
6258 table.source = Some(TableSource::Subquery(Box::new(authorized_inner)));
6259 }
6260 Ok(QueryExpr::Table(table))
6261 }
6262 QueryExpr::Join(join) => self
6263 .authorize_relational_join_select(join, frame)?
6264 .map(QueryExpr::Join)
6265 .ok_or_else(|| {
6266 RedDBError::Query("permission denied: RLS denied relational subquery".into())
6267 }),
6268 other => Ok(other),
6269 }
6270 }
6271
6272 fn authorize_relational_select_expr(
6273 &self,
6274 expr: QueryExpr,
6275 frame: &dyn super::statement_frame::ReadFrame,
6276 ) -> RedDBResult<QueryExpr> {
6277 match expr {
6278 QueryExpr::Table(table) => self
6279 .authorize_relational_table_select(table, frame)?
6280 .map(QueryExpr::Table)
6281 .ok_or_else(|| {
6282 RedDBError::Query("permission denied: RLS denied relational subquery".into())
6283 }),
6284 QueryExpr::Join(join) => self
6285 .authorize_relational_join_select(join, frame)?
6286 .map(QueryExpr::Join)
6287 .ok_or_else(|| {
6288 RedDBError::Query("permission denied: RLS denied relational subquery".into())
6289 }),
6290 other => Ok(other),
6291 }
6292 }
6293
6294 fn check_table_column_projection_authz(
6295 &self,
6296 table: &TableQuery,
6297 frame: &dyn super::statement_frame::ReadFrame,
6298 ) -> RedDBResult<()> {
6299 let Some((username, role)) = frame.identity() else {
6300 return Ok(());
6301 };
6302 let Some(auth_store) = self.inner.auth_store.read().clone() else {
6303 return Ok(());
6304 };
6305
6306 let columns = self.resolved_table_projection_columns(table)?;
6307 let request = ColumnAccessRequest::select(table.table.clone(), columns);
6308 let principal = UserId::from_parts(frame.effective_scope(), username);
6309 let ctx = runtime_iam_context(role, frame.effective_scope());
6310 let outcome = auth_store.check_column_projection_authz(&principal, &request, &ctx);
6311 if outcome.allowed() {
6312 return Ok(());
6313 }
6314
6315 if let Some(denied) = outcome.first_denied_column() {
6316 return Err(RedDBError::Query(format!(
6317 "permission denied: principal=`{username}` cannot select column `{}`",
6318 denied.resource.name
6319 )));
6320 }
6321 Err(RedDBError::Query(format!(
6322 "permission denied: principal=`{username}` cannot select table `{}`",
6323 table.table
6324 )))
6325 }
6326
6327 fn check_join_column_projection_authz(
6328 &self,
6329 join: &JoinQuery,
6330 frame: &dyn super::statement_frame::ReadFrame,
6331 ) -> RedDBResult<()> {
6332 let mut by_table: HashMap<String, BTreeSet<String>> = HashMap::new();
6333 let projections = crate::storage::query::sql_lowering::effective_join_projections(join);
6334 self.collect_join_projection_columns(join, &projections, &mut by_table)?;
6335
6336 for (table, columns) in by_table {
6337 let query = TableQuery {
6338 table,
6339 source: None,
6340 alias: None,
6341 select_items: Vec::new(),
6342 columns: columns.into_iter().map(Projection::Column).collect(),
6343 where_expr: None,
6344 filter: None,
6345 group_by_exprs: Vec::new(),
6346 group_by: Vec::new(),
6347 having_expr: None,
6348 having: None,
6349 order_by: Vec::new(),
6350 limit: None,
6351 limit_param: None,
6352 offset: None,
6353 offset_param: None,
6354 expand: None,
6355 as_of: None,
6356 };
6357 self.check_table_column_projection_authz(&query, frame)?;
6358 }
6359 Ok(())
6360 }
6361
6362 fn collect_join_projection_columns(
6363 &self,
6364 join: &JoinQuery,
6365 projections: &[Projection],
6366 out: &mut HashMap<String, BTreeSet<String>>,
6367 ) -> RedDBResult<()> {
6368 let left = table_side_context(join.left.as_ref());
6369 let right = table_side_context(join.right.as_ref());
6370
6371 if projections
6372 .iter()
6373 .any(|projection| matches!(projection, Projection::All))
6374 {
6375 for side in [left.as_ref(), right.as_ref()].into_iter().flatten() {
6376 out.entry(side.table.clone())
6377 .or_default()
6378 .extend(self.table_all_projection_columns(&side.table)?);
6379 }
6380 return Ok(());
6381 }
6382
6383 for projection in projections {
6384 collect_projection_columns_for_join_side(
6385 projection,
6386 left.as_ref(),
6387 right.as_ref(),
6388 out,
6389 )?;
6390 }
6391 Ok(())
6392 }
6393
6394 fn resolved_table_projection_columns(&self, table: &TableQuery) -> RedDBResult<Vec<String>> {
6395 let projections = crate::storage::query::sql_lowering::effective_table_projections(table);
6396 if projections
6397 .iter()
6398 .any(|projection| matches!(projection, Projection::All))
6399 {
6400 return self.table_all_projection_columns(&table.table);
6401 }
6402
6403 let mut columns = BTreeSet::new();
6404 for projection in &projections {
6405 collect_projection_columns_for_table(
6406 projection,
6407 &table.table,
6408 table.alias.as_deref(),
6409 &mut columns,
6410 );
6411 }
6412 Ok(columns.into_iter().collect())
6413 }
6414
6415 fn table_all_projection_columns(&self, table: &str) -> RedDBResult<Vec<String>> {
6416 if let Some(contract) = self.inner.db.collection_contract_arc(table) {
6417 let columns: Vec<String> = contract
6418 .declared_columns
6419 .iter()
6420 .map(|column| column.name.clone())
6421 .collect();
6422 if !columns.is_empty() {
6423 return Ok(columns);
6424 }
6425 }
6426
6427 let records = scan_runtime_table_source_records_limited(&self.inner.db, table, Some(1))?;
6428 Ok(records
6429 .first()
6430 .map(|record| {
6431 record
6432 .column_names()
6433 .into_iter()
6434 .map(|column| column.to_string())
6435 .collect()
6436 })
6437 .unwrap_or_default())
6438 }
6439
6440 fn resolve_table_expr_subqueries(
6441 &self,
6442 mut table: TableQuery,
6443 frame: &dyn super::statement_frame::ReadFrame,
6444 ) -> RedDBResult<TableQuery> {
6445 if let Some(TableSource::Subquery(inner)) = table.source.take() {
6446 let inner = self.resolve_select_expr_subqueries(*inner, frame)?;
6447 table.source = Some(TableSource::Subquery(Box::new(inner)));
6448 }
6449
6450 let outer_scopes = relation_scopes_for_query(&QueryExpr::Table(table.clone()));
6451 for item in &mut table.select_items {
6452 if let crate::storage::query::ast::SelectItem::Expr { expr, .. } = item {
6453 *expr = self.resolve_expr_subqueries(expr.clone(), &outer_scopes, frame)?;
6454 }
6455 }
6456 if let Some(where_expr) = table.where_expr.take() {
6457 table.where_expr =
6458 Some(self.resolve_expr_subqueries(where_expr, &outer_scopes, frame)?);
6459 table.filter = None;
6460 }
6461 if let Some(having_expr) = table.having_expr.take() {
6462 table.having_expr =
6463 Some(self.resolve_expr_subqueries(having_expr, &outer_scopes, frame)?);
6464 table.having = None;
6465 }
6466 for expr in &mut table.group_by_exprs {
6467 *expr = self.resolve_expr_subqueries(expr.clone(), &outer_scopes, frame)?;
6468 }
6469 for clause in &mut table.order_by {
6470 if let Some(expr) = clause.expr.take() {
6471 clause.expr = Some(self.resolve_expr_subqueries(expr, &outer_scopes, frame)?);
6472 }
6473 }
6474 Ok(table)
6475 }
6476
6477 fn resolve_select_expr_subqueries(
6478 &self,
6479 expr: QueryExpr,
6480 frame: &dyn super::statement_frame::ReadFrame,
6481 ) -> RedDBResult<QueryExpr> {
6482 match expr {
6483 QueryExpr::Table(table) => self
6484 .resolve_table_expr_subqueries(table, frame)
6485 .map(QueryExpr::Table),
6486 QueryExpr::Join(mut join) => {
6487 join.left = Box::new(self.resolve_select_expr_subqueries(*join.left, frame)?);
6488 join.right = Box::new(self.resolve_select_expr_subqueries(*join.right, frame)?);
6489 Ok(QueryExpr::Join(join))
6490 }
6491 other => Ok(other),
6492 }
6493 }
6494
6495 fn resolve_expr_subqueries(
6496 &self,
6497 expr: crate::storage::query::ast::Expr,
6498 outer_scopes: &[String],
6499 frame: &dyn super::statement_frame::ReadFrame,
6500 ) -> RedDBResult<crate::storage::query::ast::Expr> {
6501 use crate::storage::query::ast::Expr;
6502
6503 match expr {
6504 Expr::Subquery { query, span } => {
6505 let values = self.execute_expr_subquery_values(query, outer_scopes, frame)?;
6506 if values.len() > 1 {
6507 return Err(RedDBError::Query(
6508 "scalar subquery returned more than one row".to_string(),
6509 ));
6510 }
6511 Ok(Expr::Literal {
6512 value: values.into_iter().next().unwrap_or(Value::Null),
6513 span,
6514 })
6515 }
6516 Expr::BinaryOp { op, lhs, rhs, span } => Ok(Expr::BinaryOp {
6517 op,
6518 lhs: Box::new(self.resolve_expr_subqueries(*lhs, outer_scopes, frame)?),
6519 rhs: Box::new(self.resolve_expr_subqueries(*rhs, outer_scopes, frame)?),
6520 span,
6521 }),
6522 Expr::UnaryOp { op, operand, span } => Ok(Expr::UnaryOp {
6523 op,
6524 operand: Box::new(self.resolve_expr_subqueries(*operand, outer_scopes, frame)?),
6525 span,
6526 }),
6527 Expr::Cast {
6528 inner,
6529 target,
6530 span,
6531 } => Ok(Expr::Cast {
6532 inner: Box::new(self.resolve_expr_subqueries(*inner, outer_scopes, frame)?),
6533 target,
6534 span,
6535 }),
6536 Expr::FunctionCall { name, args, span } => {
6537 let args = args
6538 .into_iter()
6539 .map(|arg| self.resolve_expr_subqueries(arg, outer_scopes, frame))
6540 .collect::<RedDBResult<Vec<_>>>()?;
6541 Ok(Expr::FunctionCall { name, args, span })
6542 }
6543 Expr::Case {
6544 branches,
6545 else_,
6546 span,
6547 } => {
6548 let branches = branches
6549 .into_iter()
6550 .map(|(cond, value)| {
6551 Ok((
6552 self.resolve_expr_subqueries(cond, outer_scopes, frame)?,
6553 self.resolve_expr_subqueries(value, outer_scopes, frame)?,
6554 ))
6555 })
6556 .collect::<RedDBResult<Vec<_>>>()?;
6557 let else_ = else_
6558 .map(|expr| self.resolve_expr_subqueries(*expr, outer_scopes, frame))
6559 .transpose()?
6560 .map(Box::new);
6561 Ok(Expr::Case {
6562 branches,
6563 else_,
6564 span,
6565 })
6566 }
6567 Expr::IsNull {
6568 operand,
6569 negated,
6570 span,
6571 } => Ok(Expr::IsNull {
6572 operand: Box::new(self.resolve_expr_subqueries(*operand, outer_scopes, frame)?),
6573 negated,
6574 span,
6575 }),
6576 Expr::InList {
6577 target,
6578 values,
6579 negated,
6580 span,
6581 } => {
6582 let target =
6583 Box::new(self.resolve_expr_subqueries(*target, outer_scopes, frame)?);
6584 let mut resolved = Vec::new();
6585 for value in values {
6586 if let Expr::Subquery { query, .. } = value {
6587 resolved.extend(
6588 self.execute_expr_subquery_values(query, outer_scopes, frame)?
6589 .into_iter()
6590 .map(Expr::lit),
6591 );
6592 } else {
6593 resolved.push(self.resolve_expr_subqueries(value, outer_scopes, frame)?);
6594 }
6595 }
6596 Ok(Expr::InList {
6597 target,
6598 values: resolved,
6599 negated,
6600 span,
6601 })
6602 }
6603 Expr::Between {
6604 target,
6605 low,
6606 high,
6607 negated,
6608 span,
6609 } => Ok(Expr::Between {
6610 target: Box::new(self.resolve_expr_subqueries(*target, outer_scopes, frame)?),
6611 low: Box::new(self.resolve_expr_subqueries(*low, outer_scopes, frame)?),
6612 high: Box::new(self.resolve_expr_subqueries(*high, outer_scopes, frame)?),
6613 negated,
6614 span,
6615 }),
6616 other => Ok(other),
6617 }
6618 }
6619
6620 fn execute_expr_subquery_values(
6621 &self,
6622 subquery: crate::storage::query::ast::ExprSubquery,
6623 outer_scopes: &[String],
6624 frame: &dyn super::statement_frame::ReadFrame,
6625 ) -> RedDBResult<Vec<Value>> {
6626 let query = *subquery.query;
6627 if query_references_outer_scope(&query, outer_scopes) {
6628 return Err(RedDBError::Query(
6629 "NOT_YET_SUPPORTED: correlated subqueries are not supported yet; track follow-up issue #470-correlated-subqueries".to_string(),
6630 ));
6631 }
6632 let query = self.rewrite_view_refs(query);
6633 let query = self.resolve_select_expr_subqueries(query, frame)?;
6634 let query = self.authorize_relational_select_expr(query, frame)?;
6635 let result = match query {
6636 QueryExpr::Table(table) => {
6637 execute_runtime_table_query(&self.inner.db, &table, Some(&self.inner.index_store))?
6638 }
6639 QueryExpr::Join(join) => execute_runtime_join_query(&self.inner.db, &join)?,
6640 other => {
6641 return Err(RedDBError::Query(format!(
6642 "expression subquery must be a SELECT query, got {}",
6643 query_expr_name(&other)
6644 )))
6645 }
6646 };
6647 first_column_values(result)
6648 }
6649
6650 fn dispatch_expr(
6651 &self,
6652 expr: QueryExpr,
6653 query_str: &str,
6654 mode: QueryMode,
6655 ) -> RedDBResult<RuntimeQueryResult> {
6656 let statement = query_expr_name(&expr);
6657 match expr {
6658 QueryExpr::Graph(_) | QueryExpr::Path(_) => {
6659 Err(RedDBError::Query(
6661 "graph queries cannot be used as prepared statements".to_string(),
6662 ))
6663 }
6664 QueryExpr::Table(table) => {
6665 let scope = self.ai_scope();
6666 let table = self.resolve_table_expr_subqueries(
6667 table,
6668 &scope as &dyn super::statement_frame::ReadFrame,
6669 )?;
6670 if super::red_schema::is_virtual_table(&table.table) {
6671 return Ok(RuntimeQueryResult {
6672 query: query_str.to_string(),
6673 mode,
6674 statement,
6675 engine: "runtime-red-schema",
6676 result: super::red_schema::red_query(
6677 self,
6678 &table.table,
6679 &table,
6680 &scope as &dyn super::statement_frame::ReadFrame,
6681 )?,
6682 affected_rows: 0,
6683 statement_type: "select",
6684 });
6685 }
6686 let Some(table_with_rls) = self.authorize_relational_table_select(
6687 table,
6688 &scope as &dyn super::statement_frame::ReadFrame,
6689 )?
6690 else {
6691 return Ok(RuntimeQueryResult {
6692 query: query_str.to_string(),
6693 mode,
6694 statement,
6695 engine: "runtime-table-rls",
6696 result: crate::storage::query::unified::UnifiedResult::empty(),
6697 affected_rows: 0,
6698 statement_type: "select",
6699 });
6700 };
6701 Ok(RuntimeQueryResult {
6702 query: query_str.to_string(),
6703 mode,
6704 statement,
6705 engine: "runtime-table",
6706 result: execute_runtime_table_query(
6707 &self.inner.db,
6708 &table_with_rls,
6709 Some(&self.inner.index_store),
6710 )?,
6711 affected_rows: 0,
6712 statement_type: "select",
6713 })
6714 }
6715 QueryExpr::Join(join) => {
6716 let scope = self.ai_scope();
6717 let Some(join_with_rls) = self.authorize_relational_join_select(
6718 join,
6719 &scope as &dyn super::statement_frame::ReadFrame,
6720 )?
6721 else {
6722 return Ok(RuntimeQueryResult {
6723 query: query_str.to_string(),
6724 mode,
6725 statement,
6726 engine: "runtime-join-rls",
6727 result: crate::storage::query::unified::UnifiedResult::empty(),
6728 affected_rows: 0,
6729 statement_type: "select",
6730 });
6731 };
6732 Ok(RuntimeQueryResult {
6733 query: query_str.to_string(),
6734 mode,
6735 statement,
6736 engine: "runtime-join",
6737 result: execute_runtime_join_query(&self.inner.db, &join_with_rls)?,
6738 affected_rows: 0,
6739 statement_type: "select",
6740 })
6741 }
6742 QueryExpr::Vector(vector) => Ok(RuntimeQueryResult {
6743 query: query_str.to_string(),
6744 mode,
6745 statement,
6746 engine: "runtime-vector",
6747 result: execute_runtime_vector_query(&self.inner.db, &vector)?,
6748 affected_rows: 0,
6749 statement_type: "select",
6750 }),
6751 QueryExpr::Hybrid(hybrid) => Ok(RuntimeQueryResult {
6752 query: query_str.to_string(),
6753 mode,
6754 statement,
6755 engine: "runtime-hybrid",
6756 result: execute_runtime_hybrid_query(&self.inner.db, &hybrid)?,
6757 affected_rows: 0,
6758 statement_type: "select",
6759 }),
6760 QueryExpr::Insert(ref insert) if super::red_schema::is_virtual_table(&insert.table) => {
6761 Err(RedDBError::Query(
6762 super::red_schema::READ_ONLY_ERROR.to_string(),
6763 ))
6764 }
6765 QueryExpr::Update(ref update) if super::red_schema::is_virtual_table(&update.table) => {
6766 Err(RedDBError::Query(
6767 super::red_schema::READ_ONLY_ERROR.to_string(),
6768 ))
6769 }
6770 QueryExpr::Delete(ref delete) if super::red_schema::is_virtual_table(&delete.table) => {
6771 Err(RedDBError::Query(
6772 super::red_schema::READ_ONLY_ERROR.to_string(),
6773 ))
6774 }
6775 QueryExpr::Insert(ref insert) => self
6776 .with_deferred_store_wal_for_dml(self.insert_may_emit_events(insert), || {
6777 self.execute_insert(query_str, insert)
6778 }),
6779 QueryExpr::Update(ref update) => self
6780 .with_deferred_store_wal_for_dml(self.update_may_emit_events(update), || {
6781 self.execute_update(query_str, update)
6782 }),
6783 QueryExpr::Delete(ref delete) => self
6784 .with_deferred_store_wal_for_dml(self.delete_may_emit_events(delete), || {
6785 self.execute_delete(query_str, delete)
6786 }),
6787 QueryExpr::SearchCommand(ref cmd) => self.execute_search_command(query_str, cmd),
6788 QueryExpr::Ask(ref ask) => self.execute_ask(query_str, ask),
6789 _ => Err(RedDBError::Query(format!(
6790 "prepared-statement execution does not support {statement} statements"
6791 ))),
6792 }
6793 }
6794
6795 fn try_fast_entity_lookup(&self, query: &str) -> Option<RedDBResult<RuntimeQueryResult>> {
6798 let q = query.trim();
6801 if !q.starts_with("SELECT") && !q.starts_with("select") {
6802 return None;
6803 }
6804
6805 let where_pos = q
6807 .find("WHERE _entity_id")
6808 .or_else(|| q.find("where _entity_id"))?;
6809 let after_field = &q[where_pos + 16..].trim_start(); let after_eq = after_field.strip_prefix('=')?.trim_start();
6811
6812 let id_str = after_eq.trim();
6814 let entity_id: u64 = id_str.parse().ok()?;
6815
6816 let from_pos = q.find("FROM ").or_else(|| q.find("from "))? + 5;
6818 let table = q[from_pos..where_pos].trim();
6819 if table.is_empty()
6820 || table.contains(' ') && !table.contains(" AS ") && !table.contains(" as ")
6821 {
6822 return None; }
6824 let table_name = table.split_whitespace().next()?;
6825
6826 let store = self.inner.db.store();
6832 let entity = store
6833 .get(
6834 table_name,
6835 crate::storage::unified::EntityId::new(entity_id),
6836 )
6837 .filter(entity_visible_under_current_snapshot);
6838
6839 let count = if entity.is_some() { 1u64 } else { 0 };
6840
6841 let records: Vec<crate::storage::query::unified::UnifiedRecord> = entity
6847 .as_ref()
6848 .and_then(|e| runtime_table_record_from_entity(e.clone()))
6849 .into_iter()
6850 .collect();
6851
6852 let json = match entity {
6853 Some(ref e) => execute_runtime_serialize_single_entity(e),
6854 None => r#"{"columns":[],"record_count":0,"selection":{"scope":"any"},"records":[]}"#
6855 .to_string(),
6856 };
6857
6858 Some(Ok(RuntimeQueryResult {
6859 query: query.to_string(),
6860 mode: crate::storage::query::modes::QueryMode::Sql,
6861 statement: "select",
6862 engine: "fast-entity-lookup",
6863 result: crate::storage::query::unified::UnifiedResult {
6864 columns: Vec::new(),
6865 records,
6866 stats: crate::storage::query::unified::QueryStats {
6867 rows_scanned: count,
6868 ..Default::default()
6869 },
6870 pre_serialized_json: Some(json),
6871 },
6872 affected_rows: 0,
6873 statement_type: "select",
6874 }))
6875 }
6876
6877 fn result_cache_backend(&self) -> RuntimeResultCacheBackend {
6878 match self
6879 .config_string(RESULT_CACHE_BACKEND_KEY, RESULT_CACHE_DEFAULT_BACKEND)
6880 .as_str()
6881 {
6882 "blob_cache" => RuntimeResultCacheBackend::BlobCache,
6883 "shadow" => RuntimeResultCacheBackend::Shadow,
6884 _ => RuntimeResultCacheBackend::Legacy,
6885 }
6886 }
6887
6888 pub(super) fn get_result_cache_entry(&self, key: &str) -> Option<RuntimeQueryResult> {
6889 match self.result_cache_backend() {
6890 RuntimeResultCacheBackend::Legacy => self.get_legacy_result_cache_entry(key),
6891 RuntimeResultCacheBackend::BlobCache => self.get_blob_result_cache_entry(key),
6892 RuntimeResultCacheBackend::Shadow => {
6893 let legacy = self.get_legacy_result_cache_entry(key);
6894 let blob = self.get_blob_result_cache_entry(key);
6895 if let (Some(ref legacy), Some(ref blob)) = (&legacy, &blob) {
6896 if result_cache_fingerprint(legacy) != result_cache_fingerprint(blob) {
6897 self.inner
6898 .result_cache_shadow_divergences
6899 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
6900 tracing::warn!(
6901 key,
6902 metric = crate::runtime::METRIC_CACHE_SHADOW_DIVERGENCE_TOTAL,
6903 "result cache shadow backend diverged from legacy"
6904 );
6905 }
6906 }
6907 legacy
6908 }
6909 }
6910 }
6911
6912 fn get_legacy_result_cache_entry(&self, key: &str) -> Option<RuntimeQueryResult> {
6913 let cache = self.inner.result_cache.read();
6914 cache.0.get(key).and_then(|entry| {
6915 if entry.cached_at.elapsed().as_secs() < RESULT_CACHE_TTL_SECS {
6916 Some(entry.result.clone())
6917 } else {
6918 None
6919 }
6920 })
6921 }
6922
6923 fn get_blob_result_cache_entry(&self, key: &str) -> Option<RuntimeQueryResult> {
6924 let hit = self
6925 .inner
6926 .result_blob_cache
6927 .get(RESULT_CACHE_BLOB_NAMESPACE, key)?;
6928 {
6929 let cache = self.inner.result_blob_entries.read();
6930 if let Some(entry) = cache.0.get(key) {
6931 return Some(entry.result.clone());
6932 }
6933 }
6934
6935 let (result, scopes) = decode_result_cache_payload(hit.value())?;
6936 let mut cache = self.inner.result_blob_entries.write();
6937 let (ref mut map, ref mut order) = *cache;
6938 if !map.contains_key(key) {
6939 order.push_back(key.to_string());
6940 }
6941 map.insert(
6942 key.to_string(),
6943 RuntimeResultCacheEntry {
6944 result: result.clone(),
6945 cached_at: std::time::Instant::now(),
6946 scopes,
6947 },
6948 );
6949 trim_result_cache(map, order);
6950 Some(result)
6951 }
6952
6953 pub(super) fn put_result_cache_entry(&self, key: &str, entry: RuntimeResultCacheEntry) {
6954 match self.result_cache_backend() {
6955 RuntimeResultCacheBackend::Legacy => self.put_legacy_result_cache_entry(key, entry),
6956 RuntimeResultCacheBackend::BlobCache => self.put_blob_result_cache_entry(key, entry),
6957 RuntimeResultCacheBackend::Shadow => {
6958 self.put_legacy_result_cache_entry(key, entry.clone());
6959 self.put_blob_result_cache_entry(key, entry);
6960 }
6961 }
6962 }
6963
6964 fn put_legacy_result_cache_entry(&self, key: &str, entry: RuntimeResultCacheEntry) {
6965 let mut cache = self.inner.result_cache.write();
6966 let (ref mut map, ref mut order) = *cache;
6967 if !map.contains_key(key) {
6968 order.push_back(key.to_string());
6969 }
6970 map.insert(key.to_string(), entry);
6971 trim_result_cache(map, order);
6972 }
6973
6974 fn put_blob_result_cache_entry(&self, key: &str, entry: RuntimeResultCacheEntry) {
6975 let policy = crate::storage::cache::BlobCachePolicy::default()
6976 .ttl_ms(RESULT_CACHE_TTL_SECS * 1000)
6977 .priority(200);
6978 let dependencies = entry.scopes.iter().cloned().collect::<Vec<_>>();
6979 let bytes = encode_result_cache_payload(&entry)
6980 .unwrap_or_else(|| result_cache_fingerprint(&entry.result).into_bytes());
6981 let put = crate::storage::cache::BlobCachePut::new(bytes)
6982 .with_dependencies(dependencies)
6983 .with_policy(policy);
6984 if self
6985 .inner
6986 .result_blob_cache
6987 .put(RESULT_CACHE_BLOB_NAMESPACE, key, put)
6988 .is_err()
6989 {
6990 return;
6991 }
6992
6993 let mut cache = self.inner.result_blob_entries.write();
6994 let (ref mut map, ref mut order) = *cache;
6995 if !map.contains_key(key) {
6996 order.push_back(key.to_string());
6997 }
6998 map.insert(key.to_string(), entry);
6999 trim_result_cache(map, order);
7000 }
7001
7002 pub fn result_cache_shadow_divergences(&self) -> u64 {
7003 self.inner
7004 .result_cache_shadow_divergences
7005 .load(std::sync::atomic::Ordering::Relaxed)
7006 }
7007
7008 pub fn invalidate_result_cache(&self) {
7011 let mut cache = self.inner.result_cache.write();
7012 cache.0.clear();
7013 cache.1.clear();
7014 let mut blob_entries = self.inner.result_blob_entries.write();
7015 blob_entries.0.clear();
7016 blob_entries.1.clear();
7017 self.inner
7018 .result_blob_cache
7019 .invalidate_namespace(RESULT_CACHE_BLOB_NAMESPACE);
7020 let mut ask_entries = self.inner.ask_answer_cache_entries.write();
7021 ask_entries.0.clear();
7022 ask_entries.1.clear();
7023 self.inner
7024 .result_blob_cache
7025 .invalidate_namespace(ASK_ANSWER_CACHE_NAMESPACE);
7026 }
7027
7028 pub(crate) fn invalidate_result_cache_for_table(&self, table: &str) {
7031 let legacy_has_match = {
7034 let cache = self.inner.result_cache.read();
7035 let (ref map, _) = *cache;
7036 !map.is_empty() && map.values().any(|entry| entry.scopes.contains(table))
7037 };
7038 let blob_has_match = {
7039 let cache = self.inner.result_blob_entries.read();
7040 let (ref map, _) = *cache;
7041 !map.is_empty() && map.values().any(|entry| entry.scopes.contains(table))
7042 };
7043 if legacy_has_match {
7044 let mut cache = self.inner.result_cache.write();
7045 let (ref mut map, ref mut order) = *cache;
7046 map.retain(|_, entry| !entry.scopes.contains(table));
7047 order.retain(|key| map.contains_key(key));
7048 }
7049
7050 if matches!(
7051 self.result_cache_backend(),
7052 RuntimeResultCacheBackend::BlobCache | RuntimeResultCacheBackend::Shadow
7053 ) {
7054 let mut blob_entries = self.inner.result_blob_entries.write();
7055 let (ref mut blob_map, ref mut blob_order) = *blob_entries;
7056 blob_map.clear();
7057 blob_order.clear();
7058 self.inner
7059 .result_blob_cache
7060 .invalidate_namespace(RESULT_CACHE_BLOB_NAMESPACE);
7061 } else if blob_has_match {
7062 let mut blob_entries = self.inner.result_blob_entries.write();
7063 let (ref mut blob_map, ref mut blob_order) = *blob_entries;
7064 blob_map.retain(|_, entry| !entry.scopes.contains(table));
7065 blob_order.retain(|key| blob_map.contains_key(key));
7066 }
7067 let mut ask_entries = self.inner.ask_answer_cache_entries.write();
7068 ask_entries.0.clear();
7069 ask_entries.1.clear();
7070 self.inner
7071 .result_blob_cache
7072 .invalidate_namespace(ASK_ANSWER_CACHE_NAMESPACE);
7073 }
7074
7075 pub(crate) fn invalidate_plan_cache(&self) {
7076 self.inner.query_cache.write().clear();
7077 self.inner
7078 .ddl_epoch
7079 .fetch_add(1, std::sync::atomic::Ordering::Release);
7080 }
7081
7082 pub fn ddl_epoch(&self) -> u64 {
7086 self.inner
7087 .ddl_epoch
7088 .load(std::sync::atomic::Ordering::Acquire)
7089 }
7090
7091 pub(crate) fn clear_table_planner_stats(&self, table: &str) {
7092 let store = self.inner.db.store();
7093 crate::storage::query::planner::stats_catalog::clear_table_stats(store.as_ref(), table);
7094 self.invalidate_plan_cache();
7095 }
7096
7097 pub(crate) fn rehydrate_tenant_tables(&self) {
7106 let store = self.inner.db.store();
7107 let Some(manager) = store.get_collection("red_config") else {
7108 return;
7109 };
7110 for entity in manager.query_all(|_| true) {
7115 let crate::storage::unified::entity::EntityData::Row(row) = &entity.data else {
7116 continue;
7117 };
7118 let Some(named) = &row.named else { continue };
7119 let Some(crate::storage::schema::Value::Text(key)) = named.get("key") else {
7120 continue;
7121 };
7122 let Some(rest) = key.strip_prefix("tenant_tables.") else {
7124 continue;
7125 };
7126 let Some((table, suffix)) = rest.rsplit_once('.') else {
7127 crate::telemetry::operator_event::OperatorEvent::SchemaCorruption {
7133 collection: "red_config".to_string(),
7134 detail: format!("malformed tenant_tables key: {key}"),
7135 }
7136 .emit_global();
7137 continue;
7138 };
7139 if suffix != "column" {
7140 crate::telemetry::operator_event::OperatorEvent::SchemaCorruption {
7141 collection: "red_config".to_string(),
7142 detail: format!("unexpected tenant_tables suffix: {key}"),
7143 }
7144 .emit_global();
7145 continue;
7146 }
7147 match named.get("value") {
7148 Some(crate::storage::schema::Value::Text(column)) => {
7149 self.register_tenant_table(table, column);
7150 }
7151 Some(crate::storage::schema::Value::Null) | None => {
7153 self.unregister_tenant_table(table);
7154 }
7155 _ => {}
7156 }
7157 }
7158 }
7159
7160 pub(crate) fn rehydrate_declared_column_schemas(&self) {
7161 let store = self.inner.db.store();
7162 for contract in self.inner.db.collection_contracts() {
7163 let columns: Vec<String> = contract
7164 .declared_columns
7165 .iter()
7166 .map(|column| column.name.clone())
7167 .collect();
7168 let Some(manager) = store.get_collection(&contract.name) else {
7169 continue;
7170 };
7171 manager.set_column_schema_if_empty(columns);
7172 }
7173 }
7174
7175 pub fn register_tenant_table(&self, table: &str, column: &str) {
7180 use crate::storage::query::ast::{
7181 CompareOp, CreatePolicyQuery, Expr, FieldRef, Filter, Span,
7182 };
7183 self.inner
7184 .tenant_tables
7185 .write()
7186 .insert(table.to_string(), column.to_string());
7187
7188 let lhs = Expr::Column {
7194 field: FieldRef::TableColumn {
7195 table: table.to_string(),
7196 column: column.to_string(),
7197 },
7198 span: Span::synthetic(),
7199 };
7200 let rhs = Expr::FunctionCall {
7201 name: "CURRENT_TENANT".to_string(),
7202 args: Vec::new(),
7203 span: Span::synthetic(),
7204 };
7205 let policy_filter = Filter::CompareExpr {
7206 lhs,
7207 op: CompareOp::Eq,
7208 rhs,
7209 };
7210
7211 let policy = CreatePolicyQuery {
7212 name: "__tenant_iso".to_string(),
7213 table: table.to_string(),
7214 action: None, role: None, using: Box::new(policy_filter),
7217 target_kind: crate::storage::query::ast::PolicyTargetKind::Table,
7224 };
7225
7226 self.inner.rls_policies.write().insert(
7228 (table.to_string(), "__tenant_iso".to_string()),
7229 Arc::new(policy),
7230 );
7231 self.inner
7232 .rls_enabled_tables
7233 .write()
7234 .insert(table.to_string());
7235
7236 self.ensure_tenant_index(table, column);
7242 }
7243
7244 fn ensure_tenant_index(&self, table: &str, column: &str) {
7252 if column.contains('.') {
7253 return;
7254 }
7255 let index_name = format!("__tenant_idx_{table}");
7256 let registry = self.inner.index_store.list_indices(table);
7257 if registry.iter().any(|idx| idx.name == index_name) {
7258 return;
7259 }
7260 if registry
7261 .iter()
7262 .any(|idx| idx.columns.first().map(|c| c.as_str()) == Some(column))
7263 {
7264 return;
7265 }
7266
7267 let store = self.inner.db.store();
7268 let Some(manager) = store.get_collection(table) else {
7269 return;
7270 };
7271 let entities = manager.query_all(|_| true);
7272 let entity_fields: Vec<(
7273 crate::storage::unified::EntityId,
7274 Vec<(String, crate::storage::schema::Value)>,
7275 )> = entities
7276 .iter()
7277 .map(|e| {
7278 let fields = match &e.data {
7279 crate::storage::EntityData::Row(row) => {
7280 if let Some(ref named) = row.named {
7281 named.iter().map(|(k, v)| (k.clone(), v.clone())).collect()
7282 } else if let Some(ref schema) = row.schema {
7283 schema
7284 .iter()
7285 .zip(row.columns.iter())
7286 .map(|(k, v)| (k.clone(), v.clone()))
7287 .collect()
7288 } else {
7289 Vec::new()
7290 }
7291 }
7292 crate::storage::EntityData::Node(node) => node
7293 .properties
7294 .iter()
7295 .map(|(k, v)| (k.clone(), v.clone()))
7296 .collect(),
7297 _ => Vec::new(),
7298 };
7299 (e.id, fields)
7300 })
7301 .collect();
7302
7303 let columns = vec![column.to_string()];
7304 if self
7305 .inner
7306 .index_store
7307 .create_index(
7308 &index_name,
7309 table,
7310 &columns,
7311 super::index_store::IndexMethodKind::Hash,
7312 false,
7313 &entity_fields,
7314 )
7315 .is_err()
7316 {
7317 return;
7318 }
7319 self.inner
7320 .index_store
7321 .register(super::index_store::RegisteredIndex {
7322 name: index_name,
7323 collection: table.to_string(),
7324 columns,
7325 method: super::index_store::IndexMethodKind::Hash,
7326 unique: false,
7327 });
7328 self.invalidate_plan_cache();
7329 }
7330
7331 fn drop_tenant_index(&self, table: &str) {
7334 let index_name = format!("__tenant_idx_{table}");
7335 self.inner.index_store.drop_index(&index_name, table);
7336 }
7337
7338 pub fn tenant_column(&self, table: &str) -> Option<String> {
7342 self.inner.tenant_tables.read().get(table).cloned()
7343 }
7344
7345 pub fn unregister_tenant_table(&self, table: &str) {
7349 self.inner.tenant_tables.write().remove(table);
7350 self.inner
7351 .rls_policies
7352 .write()
7353 .remove(&(table.to_string(), "__tenant_iso".to_string()));
7354 self.drop_tenant_index(table);
7355 let has_other_policies = self
7357 .inner
7358 .rls_policies
7359 .read()
7360 .keys()
7361 .any(|(t, _)| t == table);
7362 if !has_other_policies {
7363 self.inner.rls_enabled_tables.write().remove(table);
7364 }
7365 }
7366
7367 pub(crate) fn record_pending_tombstone(
7373 &self,
7374 conn_id: u64,
7375 collection: &str,
7376 id: crate::storage::unified::entity::EntityId,
7377 stamper_xid: crate::storage::transaction::snapshot::Xid,
7378 previous_xmax: crate::storage::transaction::snapshot::Xid,
7379 ) {
7380 self.inner
7381 .pending_tombstones
7382 .write()
7383 .entry(conn_id)
7384 .or_default()
7385 .push((collection.to_string(), id, stamper_xid, previous_xmax));
7386 }
7387
7388 pub(crate) fn record_pending_versioned_update(
7389 &self,
7390 conn_id: u64,
7391 collection: &str,
7392 old_id: crate::storage::unified::entity::EntityId,
7393 new_id: crate::storage::unified::entity::EntityId,
7394 stamper_xid: crate::storage::transaction::snapshot::Xid,
7395 previous_xmax: crate::storage::transaction::snapshot::Xid,
7396 ) {
7397 self.inner
7398 .pending_versioned_updates
7399 .write()
7400 .entry(conn_id)
7401 .or_default()
7402 .push((
7403 collection.to_string(),
7404 old_id,
7405 new_id,
7406 stamper_xid,
7407 previous_xmax,
7408 ));
7409 }
7410
7411 fn with_deferred_store_wal_if_transaction<T>(
7412 &self,
7413 f: impl FnOnce() -> RedDBResult<T>,
7414 ) -> RedDBResult<T> {
7415 let conn_id = current_connection_id();
7416 if !self.inner.tx_contexts.read().contains_key(&conn_id) {
7417 return f();
7418 }
7419
7420 crate::storage::UnifiedStore::begin_deferred_store_wal_capture();
7421 let result = f();
7422 let captured = crate::storage::UnifiedStore::take_deferred_store_wal_capture();
7423 match result {
7424 Ok(value) => {
7425 self.record_pending_store_wal_actions(conn_id, captured);
7426 Ok(value)
7427 }
7428 Err(err) => Err(err),
7429 }
7430 }
7431
7432 fn with_deferred_store_wal_for_dml<T>(
7433 &self,
7434 capture_autocommit_events: bool,
7435 f: impl FnOnce() -> RedDBResult<T>,
7436 ) -> RedDBResult<T> {
7437 let conn_id = current_connection_id();
7438 if self.inner.tx_contexts.read().contains_key(&conn_id) {
7439 return self.with_deferred_store_wal_if_transaction(f);
7440 }
7441 if !capture_autocommit_events {
7442 return f();
7443 }
7444
7445 crate::storage::UnifiedStore::begin_deferred_store_wal_capture();
7446 let result = f();
7447 let captured = crate::storage::UnifiedStore::take_deferred_store_wal_capture();
7448 self.inner
7449 .db
7450 .store()
7451 .append_deferred_store_wal_actions(captured)
7452 .map_err(|err| RedDBError::Internal(err.to_string()))?;
7453 result
7454 }
7455
7456 fn insert_may_emit_events(&self, query: &InsertQuery) -> bool {
7457 !query.suppress_events
7458 && self.collection_has_event_subscriptions_for_operation(
7459 &query.table,
7460 crate::catalog::SubscriptionOperation::Insert,
7461 )
7462 }
7463
7464 fn update_may_emit_events(&self, query: &UpdateQuery) -> bool {
7465 !query.suppress_events
7466 && self.collection_has_event_subscriptions_for_operation(
7467 &query.table,
7468 crate::catalog::SubscriptionOperation::Update,
7469 )
7470 }
7471
7472 fn delete_may_emit_events(&self, query: &DeleteQuery) -> bool {
7473 !query.suppress_events
7474 && self.collection_has_event_subscriptions_for_operation(
7475 &query.table,
7476 crate::catalog::SubscriptionOperation::Delete,
7477 )
7478 }
7479
7480 fn collection_has_event_subscriptions_for_operation(
7481 &self,
7482 collection: &str,
7483 operation: crate::catalog::SubscriptionOperation,
7484 ) -> bool {
7485 let Some(contract) = self.db().collection_contract_arc(collection) else {
7486 return false;
7487 };
7488 contract.subscriptions.iter().any(|subscription| {
7489 subscription.enabled
7490 && (subscription.ops_filter.is_empty()
7491 || subscription.ops_filter.contains(&operation))
7492 })
7493 }
7494
7495 fn record_pending_store_wal_actions(
7496 &self,
7497 conn_id: u64,
7498 actions: crate::storage::unified::DeferredStoreWalActions,
7499 ) {
7500 if actions.is_empty() {
7501 return;
7502 }
7503 let mut guard = self.inner.pending_store_wal_actions.write();
7504 guard.entry(conn_id).or_default().extend(actions);
7505 }
7506
7507 fn flush_pending_store_wal_actions(&self, conn_id: u64) -> RedDBResult<()> {
7508 let Some(actions) = self
7509 .inner
7510 .pending_store_wal_actions
7511 .write()
7512 .remove(&conn_id)
7513 else {
7514 return Ok(());
7515 };
7516 self.inner
7517 .db
7518 .store()
7519 .append_deferred_store_wal_actions(actions)
7520 .map_err(|err| RedDBError::Internal(err.to_string()))
7521 }
7522
7523 fn discard_pending_store_wal_actions(&self, conn_id: u64) {
7524 self.inner
7525 .pending_store_wal_actions
7526 .write()
7527 .remove(&conn_id);
7528 }
7529
7530 fn xid_conflicts_with_snapshot(
7531 &self,
7532 xid: crate::storage::transaction::snapshot::Xid,
7533 snapshot: &crate::storage::transaction::snapshot::Snapshot,
7534 own_xids: &std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
7535 ) -> bool {
7536 xid != 0
7537 && !own_xids.contains(&xid)
7538 && !self.inner.snapshot_manager.is_aborted(xid)
7539 && !self.inner.snapshot_manager.is_active(xid)
7540 && (xid > snapshot.xid || snapshot.in_progress.contains(&xid))
7541 }
7542
7543 fn conflict_error(
7544 collection: &str,
7545 logical_id: crate::storage::unified::entity::EntityId,
7546 xid: crate::storage::transaction::snapshot::Xid,
7547 ) -> RedDBError {
7548 RedDBError::Query(format!(
7549 "serialization conflict: table row {collection}/{} was modified by concurrent transaction {xid}",
7550 logical_id.raw()
7551 ))
7552 }
7553
7554 fn check_logical_row_conflict(
7555 &self,
7556 collection: &str,
7557 logical_id: crate::storage::unified::entity::EntityId,
7558 excluded_ids: &[crate::storage::unified::entity::EntityId],
7559 snapshot: &crate::storage::transaction::snapshot::Snapshot,
7560 own_xids: &std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
7561 ) -> RedDBResult<()> {
7562 let store = self.inner.db.store();
7563 let Some(manager) = store.get_collection(collection) else {
7564 return Ok(());
7565 };
7566
7567 for candidate in manager.query_all(|_| true) {
7568 if excluded_ids.contains(&candidate.id) || candidate.logical_id() != logical_id {
7569 continue;
7570 }
7571 if self.xid_conflicts_with_snapshot(candidate.xmin, snapshot, own_xids) {
7572 return Err(Self::conflict_error(collection, logical_id, candidate.xmin));
7573 }
7574 if self.xid_conflicts_with_snapshot(candidate.xmax, snapshot, own_xids) {
7575 return Err(Self::conflict_error(collection, logical_id, candidate.xmax));
7576 }
7577 }
7578 Ok(())
7579 }
7580
7581 pub(crate) fn check_table_row_write_conflicts(
7582 &self,
7583 conn_id: u64,
7584 snapshot: &crate::storage::transaction::snapshot::Snapshot,
7585 own_xids: &std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
7586 ) -> RedDBResult<()> {
7587 let versioned_updates = self
7588 .inner
7589 .pending_versioned_updates
7590 .read()
7591 .get(&conn_id)
7592 .cloned()
7593 .unwrap_or_default();
7594 let tombstones = self
7595 .inner
7596 .pending_tombstones
7597 .read()
7598 .get(&conn_id)
7599 .cloned()
7600 .unwrap_or_default();
7601
7602 let store = self.inner.db.store();
7603 for (collection, old_id, new_id, xid, previous_xmax) in versioned_updates {
7604 let Some(manager) = store.get_collection(&collection) else {
7605 continue;
7606 };
7607 let Some(old) = manager.get(old_id) else {
7608 continue;
7609 };
7610 let logical_id = old.logical_id();
7611 if self.xid_conflicts_with_snapshot(previous_xmax, snapshot, own_xids) {
7612 return Err(Self::conflict_error(&collection, logical_id, previous_xmax));
7613 }
7614 if old.xmax != xid && self.xid_conflicts_with_snapshot(old.xmax, snapshot, own_xids) {
7615 return Err(Self::conflict_error(&collection, logical_id, old.xmax));
7616 }
7617 self.check_logical_row_conflict(
7618 &collection,
7619 logical_id,
7620 &[old_id, new_id],
7621 snapshot,
7622 own_xids,
7623 )?;
7624 }
7625
7626 for (collection, id, xid, previous_xmax) in tombstones {
7627 let Some(manager) = store.get_collection(&collection) else {
7628 continue;
7629 };
7630 let Some(entity) = manager.get(id) else {
7631 continue;
7632 };
7633 let logical_id = entity.logical_id();
7634 if self.xid_conflicts_with_snapshot(previous_xmax, snapshot, own_xids) {
7635 return Err(Self::conflict_error(&collection, logical_id, previous_xmax));
7636 }
7637 if entity.xmax != xid
7638 && self.xid_conflicts_with_snapshot(entity.xmax, snapshot, own_xids)
7639 {
7640 return Err(Self::conflict_error(&collection, logical_id, entity.xmax));
7641 }
7642 self.check_logical_row_conflict(&collection, logical_id, &[id], snapshot, own_xids)?;
7643 }
7644
7645 Ok(())
7646 }
7647
7648 pub(crate) fn restore_pending_write_stamps(&self, conn_id: u64) {
7649 let versioned_updates = self
7650 .inner
7651 .pending_versioned_updates
7652 .read()
7653 .get(&conn_id)
7654 .cloned()
7655 .unwrap_or_default();
7656 let tombstones = self
7657 .inner
7658 .pending_tombstones
7659 .read()
7660 .get(&conn_id)
7661 .cloned()
7662 .unwrap_or_default();
7663
7664 let store = self.inner.db.store();
7665 for (collection, old_id, _new_id, xid, _previous_xmax) in versioned_updates {
7666 if let Some(manager) = store.get_collection(&collection) {
7667 if let Some(mut entity) = manager.get(old_id) {
7668 entity.set_xmax(xid);
7669 let _ = manager.update(entity);
7670 }
7671 }
7672 }
7673 for (collection, id, xid, _previous_xmax) in tombstones {
7674 if let Some(manager) = store.get_collection(&collection) {
7675 if let Some(mut entity) = manager.get(id) {
7676 entity.set_xmax(xid);
7677 let _ = manager.update(entity);
7678 }
7679 }
7680 }
7681 }
7682
7683 pub(crate) fn finalize_pending_versioned_updates(&self, conn_id: u64) {
7684 self.inner
7685 .pending_versioned_updates
7686 .write()
7687 .remove(&conn_id);
7688 }
7689
7690 pub(crate) fn revive_pending_versioned_updates(&self, conn_id: u64) {
7691 let Some(pending) = self
7692 .inner
7693 .pending_versioned_updates
7694 .write()
7695 .remove(&conn_id)
7696 else {
7697 return;
7698 };
7699
7700 let store = self.inner.db.store();
7701 for (collection, old_id, new_id, xid, previous_xmax) in pending {
7702 if let Some(manager) = store.get_collection(&collection) {
7703 if let Some(mut old) = manager.get(old_id) {
7704 if old.xmax == xid {
7705 old.set_xmax(previous_xmax);
7706 let _ = manager.update(old);
7707 }
7708 }
7709 }
7710 let _ = store.delete_batch(&collection, &[new_id]);
7711 }
7712 }
7713
7714 pub(crate) fn revive_versioned_updates_since(&self, conn_id: u64, stamper_xid: u64) -> usize {
7715 let mut guard = self.inner.pending_versioned_updates.write();
7716 let Some(pending) = guard.get_mut(&conn_id) else {
7717 return 0;
7718 };
7719
7720 let store = self.inner.db.store();
7721 let mut reverted = 0usize;
7722 pending.retain(|(collection, old_id, new_id, xid, previous_xmax)| {
7723 if *xid < stamper_xid {
7724 return true;
7725 }
7726 if let Some(manager) = store.get_collection(collection) {
7727 if let Some(mut old) = manager.get(*old_id) {
7728 if old.xmax == *xid {
7729 old.set_xmax(*previous_xmax);
7730 let _ = manager.update(old);
7731 }
7732 }
7733 }
7734 let _ = store.delete_batch(collection, &[*new_id]);
7735 reverted += 1;
7736 false
7737 });
7738 if pending.is_empty() {
7739 guard.remove(&conn_id);
7740 }
7741 reverted
7742 }
7743
7744 pub(crate) fn finalize_pending_tombstones(&self, conn_id: u64) {
7749 let Some(pending) = self.inner.pending_tombstones.write().remove(&conn_id) else {
7750 return;
7751 };
7752 if pending.is_empty() {
7753 return;
7754 }
7755
7756 let store = self.inner.db.store();
7757 for (collection, id, _xid, _previous_xmax) in pending {
7758 store.context_index().remove_entity(id);
7759 self.cdc_emit(
7760 crate::replication::cdc::ChangeOperation::Delete,
7761 &collection,
7762 id.raw(),
7763 "entity",
7764 );
7765 }
7766 }
7767
7768 pub(crate) fn revive_pending_tombstones(&self, conn_id: u64) {
7775 let Some(pending) = self.inner.pending_tombstones.write().remove(&conn_id) else {
7776 return;
7777 };
7778
7779 let store = self.inner.db.store();
7780 for (collection, id, xid, previous_xmax) in pending {
7781 let Some(manager) = store.get_collection(&collection) else {
7782 continue;
7783 };
7784 if let Some(mut entity) = manager.get(id) {
7785 if entity.xmax == xid {
7786 entity.set_xmax(previous_xmax);
7787 let _ = manager.update(entity);
7788 }
7789 }
7790 }
7791 }
7792
7793 pub(crate) fn finalize_pending_kv_watch_events(&self, conn_id: u64) {
7794 let Some(pending) = self.inner.pending_kv_watch_events.write().remove(&conn_id) else {
7795 return;
7796 };
7797 for event in pending {
7798 self.cdc_emit_kv(
7799 event.op,
7800 &event.collection,
7801 &event.key,
7802 0,
7803 event.before,
7804 event.after,
7805 );
7806 }
7807 }
7808
7809 pub(crate) fn discard_pending_kv_watch_events(&self, conn_id: u64) {
7810 self.inner.pending_kv_watch_events.write().remove(&conn_id);
7811 }
7812
7813 fn materialize_graph_with_rls(
7822 &self,
7823 ) -> RedDBResult<(
7824 crate::storage::engine::GraphStore,
7825 std::collections::HashMap<
7826 String,
7827 std::collections::HashMap<String, crate::storage::schema::Value>,
7828 >,
7829 crate::storage::query::unified::EdgeProperties,
7830 )> {
7831 use crate::storage::engine::GraphStore;
7832 use crate::storage::query::ast::{PolicyAction, PolicyTargetKind};
7833 use crate::storage::unified::entity::{EntityData, EntityKind};
7834 use std::collections::{HashMap, HashSet};
7835
7836 let store = self.inner.db.store();
7837 let snap_ctx = capture_current_snapshot();
7838 let role = current_auth_identity().map(|(_, r)| r.as_str().to_string());
7839
7840 let graph = GraphStore::new();
7841 let mut node_properties: HashMap<String, HashMap<String, crate::storage::schema::Value>> =
7842 HashMap::new();
7843 let mut edge_properties: crate::storage::query::unified::EdgeProperties = HashMap::new();
7844 let mut allowed_nodes: HashSet<String> = HashSet::new();
7845
7846 let mut node_rls: HashMap<String, Option<crate::storage::query::ast::Filter>> =
7850 HashMap::new();
7851 let mut edge_rls: HashMap<String, Option<crate::storage::query::ast::Filter>> =
7852 HashMap::new();
7853
7854 let collections = store.list_collections();
7855
7856 for collection in &collections {
7858 let Some(manager) = store.get_collection(collection) else {
7859 continue;
7860 };
7861 let entities = manager.query_all(|_| true);
7862 for entity in entities {
7863 if !entity_visible_with_context(snap_ctx.as_ref(), &entity) {
7864 continue;
7865 }
7866 let EntityKind::GraphNode(ref node) = entity.kind else {
7867 continue;
7868 };
7869 if !node_passes_rls(self, collection, role.as_deref(), &mut node_rls, &entity) {
7870 continue;
7871 }
7872 let id_str = entity.id.raw().to_string();
7873 graph
7874 .add_node_with_label(
7875 &id_str,
7876 &node.label,
7877 &super::graph_node_label(&node.node_type),
7878 )
7879 .map_err(|err| RedDBError::Query(err.to_string()))?;
7880 allowed_nodes.insert(id_str.clone());
7881 if let EntityData::Node(node_data) = &entity.data {
7882 node_properties.insert(id_str, node_data.properties.clone());
7883 }
7884 }
7885 }
7886
7887 for collection in &collections {
7891 let Some(manager) = store.get_collection(collection) else {
7892 continue;
7893 };
7894 let entities = manager.query_all(|_| true);
7895 for entity in entities {
7896 if !entity_visible_with_context(snap_ctx.as_ref(), &entity) {
7897 continue;
7898 }
7899 let EntityKind::GraphEdge(ref edge) = entity.kind else {
7900 continue;
7901 };
7902 if !allowed_nodes.contains(&edge.from_node)
7903 || !allowed_nodes.contains(&edge.to_node)
7904 {
7905 continue;
7906 }
7907 if !edge_passes_rls(self, collection, role.as_deref(), &mut edge_rls, &entity) {
7908 continue;
7909 }
7910 let weight = match &entity.data {
7911 EntityData::Edge(e) => e.weight,
7912 _ => edge.weight as f32 / 1000.0,
7913 };
7914 let edge_label = super::graph_edge_label(&edge.label);
7915 graph
7916 .add_edge_with_label(&edge.from_node, &edge.to_node, &edge_label, weight)
7917 .map_err(|err| RedDBError::Query(err.to_string()))?;
7918 if let EntityData::Edge(edge_data) = &entity.data {
7919 edge_properties.insert(
7920 (edge.from_node.clone(), edge_label, edge.to_node.clone()),
7921 edge_data.properties.clone(),
7922 );
7923 }
7924 }
7925 }
7926
7927 let _ = (PolicyAction::Select, PolicyTargetKind::Nodes);
7931
7932 Ok((graph, node_properties, edge_properties))
7933 }
7934
7935 pub(crate) fn stamp_xmin_if_in_txn(
7950 &self,
7951 collection: &str,
7952 id: crate::storage::unified::entity::EntityId,
7953 ) {
7954 let Some(xid) = self.current_xid() else {
7955 return;
7956 };
7957 let store = self.inner.db.store();
7958 let Some(manager) = store.get_collection(collection) else {
7959 return;
7960 };
7961 if let Some(mut entity) = manager.get(id) {
7962 entity.set_xmin(xid);
7963 let _ = manager.update(entity);
7964 }
7965 }
7966
7967 pub(crate) fn revive_tombstones_since(&self, conn_id: u64, stamper_xid: u64) -> usize {
7975 let mut guard = self.inner.pending_tombstones.write();
7976 let Some(pending) = guard.get_mut(&conn_id) else {
7977 return 0;
7978 };
7979
7980 let store = self.inner.db.store();
7981 let mut revived = 0usize;
7982 pending.retain(|(collection, id, xid, previous_xmax)| {
7983 if *xid < stamper_xid {
7984 return true;
7986 }
7987 if let Some(manager) = store.get_collection(collection) {
7988 if let Some(mut entity) = manager.get(*id) {
7989 if entity.xmax == *xid {
7990 entity.set_xmax(*previous_xmax);
7991 let _ = manager.update(entity);
7992 revived += 1;
7993 }
7994 }
7995 }
7996 false
7997 });
7998 if pending.is_empty() {
7999 guard.remove(&conn_id);
8000 }
8001 revived
8002 }
8003
8004 pub fn current_snapshot(&self) -> crate::storage::transaction::snapshot::Snapshot {
8013 let conn_id = current_connection_id();
8014 if let Some(ctx) = self.inner.tx_contexts.read().get(&conn_id).cloned() {
8015 return ctx.snapshot;
8016 }
8017 let high_water = self.inner.snapshot_manager.peek_next_xid();
8023 self.inner.snapshot_manager.snapshot(high_water)
8024 }
8025
8026 pub fn current_xid(&self) -> Option<crate::storage::transaction::snapshot::Xid> {
8036 let conn_id = current_connection_id();
8037 self.inner
8038 .tx_contexts
8039 .read()
8040 .get(&conn_id)
8041 .map(|ctx| ctx.writer_xid())
8042 }
8043
8044 pub fn snapshot_manager(&self) -> Arc<crate::storage::transaction::snapshot::SnapshotManager> {
8047 Arc::clone(&self.inner.snapshot_manager)
8048 }
8049
8050 fn mvcc_vacuum_cutoff_xid(&self) -> crate::storage::transaction::snapshot::Xid {
8051 let manager = &self.inner.snapshot_manager;
8052 let next_xid = manager.peek_next_xid();
8053 let mut cutoff = next_xid;
8054 if let Some(oldest_active) = manager.oldest_active_xid() {
8055 cutoff = cutoff.min(oldest_active);
8056 }
8057 if let Some(oldest_pinned) = manager.oldest_pinned_xid() {
8058 cutoff = cutoff.min(oldest_pinned);
8059 }
8060 let retention_xids = self.config_u64("runtime.mvcc.vacuum_retention_xids", 0);
8061 if retention_xids > 0 {
8062 cutoff = cutoff.min(next_xid.saturating_sub(retention_xids));
8063 }
8064 cutoff
8065 }
8066
8067 fn rebuild_runtime_indexes_for_table(&self, table: &str) -> RedDBResult<()> {
8068 let registered = self.inner.index_store.list_indices(table);
8069 if registered.is_empty() {
8070 return Ok(());
8071 }
8072 let store = self.inner.db.store();
8073 let Some(manager) = store.get_collection(table) else {
8074 return Ok(());
8075 };
8076 let entity_fields = manager
8077 .query_all(|entity| matches!(entity.kind, crate::storage::EntityKind::TableRow { .. }))
8078 .into_iter()
8079 .map(|entity| (entity.id, table_row_index_fields(&entity)))
8080 .collect::<Vec<_>>();
8081
8082 for index in registered {
8083 self.inner.index_store.drop_index(&index.name, table);
8084 self.inner
8085 .index_store
8086 .create_index(
8087 &index.name,
8088 table,
8089 &index.columns,
8090 index.method,
8091 index.unique,
8092 &entity_fields,
8093 )
8094 .map_err(RedDBError::Internal)?;
8095 self.inner.index_store.register(index);
8096 }
8097 self.invalidate_plan_cache();
8098 Ok(())
8099 }
8100
8101 pub fn current_txn_own_xids(
8106 &self,
8107 ) -> std::collections::HashSet<crate::storage::transaction::snapshot::Xid> {
8108 let mut set = std::collections::HashSet::new();
8109 if let Some(ctx) = self.inner.tx_contexts.read().get(¤t_connection_id()) {
8110 set.insert(ctx.xid);
8111 for (_, sub) in &ctx.savepoints {
8112 set.insert(*sub);
8113 }
8114 for sub in &ctx.released_sub_xids {
8115 set.insert(*sub);
8116 }
8117 }
8118 set
8119 }
8120
8121 pub fn foreign_tables(&self) -> Arc<crate::storage::fdw::ForeignTableRegistry> {
8128 Arc::clone(&self.inner.foreign_tables)
8129 }
8130
8131 pub fn is_rls_enabled(&self, table: &str) -> bool {
8133 self.inner.rls_enabled_tables.read().contains(table)
8134 }
8135
8136 pub fn matching_rls_policies(
8143 &self,
8144 table: &str,
8145 role: Option<&str>,
8146 action: crate::storage::query::ast::PolicyAction,
8147 ) -> Vec<crate::storage::query::ast::Filter> {
8148 self.matching_rls_policies_for_kind(
8153 table,
8154 role,
8155 action,
8156 crate::storage::query::ast::PolicyTargetKind::Table,
8157 )
8158 }
8159
8160 pub fn matching_rls_policies_for_kind(
8168 &self,
8169 table: &str,
8170 role: Option<&str>,
8171 action: crate::storage::query::ast::PolicyAction,
8172 kind: crate::storage::query::ast::PolicyTargetKind,
8173 ) -> Vec<crate::storage::query::ast::Filter> {
8174 if !self.is_rls_enabled(table) {
8175 return Vec::new();
8176 }
8177 let policies = self.inner.rls_policies.read();
8178 policies
8179 .iter()
8180 .filter_map(|((t, _), p)| {
8181 if t != table {
8182 return None;
8183 }
8184 if p.target_kind != kind
8193 && p.target_kind != crate::storage::query::ast::PolicyTargetKind::Table
8194 {
8195 return None;
8196 }
8197 if let Some(a) = p.action {
8199 if a != action {
8200 return None;
8201 }
8202 }
8203 if let Some(p_role) = p.role.as_deref() {
8205 match role {
8206 Some(r) if r == p_role => {}
8207 _ => return None,
8208 }
8209 }
8210 Some((*p.using).clone())
8211 })
8212 .collect()
8213 }
8214
8215 pub(crate) fn refresh_table_planner_stats(&self, table: &str) {
8216 let store = self.inner.db.store();
8217 if let Some(stats) =
8218 crate::storage::query::planner::stats_catalog::analyze_collection(store.as_ref(), table)
8219 {
8220 crate::storage::query::planner::stats_catalog::persist_table_stats(
8221 store.as_ref(),
8222 &stats,
8223 );
8224 } else {
8225 crate::storage::query::planner::stats_catalog::clear_table_stats(store.as_ref(), table);
8226 }
8227 self.invalidate_plan_cache();
8228 }
8229
8230 pub(crate) fn note_table_write(&self, table: &str) {
8231 let already_dirty = self.inner.planner_dirty_tables.read().contains(table);
8236 if !already_dirty {
8237 self.inner
8238 .planner_dirty_tables
8239 .write()
8240 .insert(table.to_string());
8241 }
8242 self.invalidate_result_cache_for_table(table);
8243 }
8244
8245 fn explain_as_rows(&self, raw_query: &str, inner_sql: &str) -> RedDBResult<RuntimeQueryResult> {
8253 let explain = self.explain_query(inner_sql)?;
8254
8255 let columns = vec![
8256 "op".to_string(),
8257 "source".to_string(),
8258 "est_rows".to_string(),
8259 "est_cost".to_string(),
8260 "depth".to_string(),
8261 ];
8262
8263 let mut records: Vec<crate::storage::query::unified::UnifiedRecord> = Vec::new();
8264
8265 for name in &explain.cte_materializations {
8271 use std::sync::Arc;
8272 let mut rec = crate::storage::query::unified::UnifiedRecord::default();
8273 rec.set_arc(Arc::from("op"), Value::text("CteScan".to_string()));
8274 rec.set_arc(Arc::from("source"), Value::text(name.clone()));
8275 rec.set_arc(Arc::from("est_rows"), Value::Float(0.0));
8276 rec.set_arc(Arc::from("est_cost"), Value::Float(0.0));
8277 rec.set_arc(Arc::from("depth"), Value::Integer(0));
8278 records.push(rec);
8279 }
8280
8281 walk_plan_node(&explain.logical_plan.root, 0, &mut records);
8282
8283 let result = crate::storage::query::unified::UnifiedResult {
8284 columns,
8285 records,
8286 stats: Default::default(),
8287 pre_serialized_json: None,
8288 };
8289
8290 Ok(RuntimeQueryResult {
8291 query: raw_query.to_string(),
8292 mode: explain.mode,
8293 statement: "explain",
8294 engine: "runtime-explain",
8295 result,
8296 affected_rows: 0,
8297 statement_type: "select",
8298 })
8299 }
8300
8301 pub(super) fn check_query_privilege(
8309 &self,
8310 expr: &crate::storage::query::ast::QueryExpr,
8311 ) -> Result<(), String> {
8312 use crate::auth::privileges::{Action, AuthzContext, Resource};
8313 use crate::auth::UserId;
8314 use crate::storage::query::ast::QueryExpr;
8315
8316 let auth_store = match self.inner.auth_store.read().clone() {
8321 Some(s) => s,
8322 None => return Ok(()),
8323 };
8324
8325 let (username, role) = match current_auth_identity() {
8331 Some(p) => p,
8332 None => return Ok(()),
8333 };
8334 let tenant = current_tenant();
8335
8336 let ctx = AuthzContext {
8337 principal: &username,
8338 effective_role: role,
8339 tenant: tenant.as_deref(),
8340 };
8341 let principal_id = UserId::from_parts(tenant.as_deref(), &username);
8342
8343 let (action, resource) = match expr {
8345 QueryExpr::Table(t) => (Action::Select, Resource::table_from_name(&t.table)),
8346 QueryExpr::QueueSelect(q) => (Action::Select, Resource::table_from_name(&q.queue)),
8347 QueryExpr::Graph(g) => {
8348 if auth_store.iam_authorization_enabled() {
8349 self.check_graph_property_projection_privilege(
8350 &auth_store,
8351 &principal_id,
8352 role,
8353 tenant.as_deref(),
8354 g,
8355 )?;
8356 return Ok(());
8357 }
8358 return Ok(());
8359 }
8360 QueryExpr::Vector(v) => {
8361 if auth_store.iam_authorization_enabled() {
8362 self.check_table_like_column_projection_privilege(
8363 &auth_store,
8364 &principal_id,
8365 role,
8366 tenant.as_deref(),
8367 &v.collection,
8368 &["content".to_string()],
8369 )?;
8370 return Ok(());
8371 }
8372 return Ok(());
8373 }
8374 QueryExpr::Insert(i) => (Action::Insert, Resource::table_from_name(&i.table)),
8375 QueryExpr::Update(u) => (Action::Update, Resource::table_from_name(&u.table)),
8376 QueryExpr::Delete(d) => (Action::Delete, Resource::table_from_name(&d.table)),
8377 QueryExpr::Join(_) => (Action::Select, Resource::Database),
8381 QueryExpr::Grant(_) | QueryExpr::Revoke(_) | QueryExpr::AlterUser(_) => {
8384 return if role == crate::auth::Role::Admin {
8385 Ok(())
8386 } else {
8387 Err(format!(
8388 "principal=`{}` role=`{:?}` cannot issue ACL/auth DDL",
8389 username, role
8390 ))
8391 };
8392 }
8393 QueryExpr::CreateIamPolicy { id, .. } => {
8394 return self.check_policy_management_privilege(
8395 &auth_store,
8396 &principal_id,
8397 role,
8398 tenant.as_deref(),
8399 "policy:put",
8400 "policy",
8401 id,
8402 );
8403 }
8404 QueryExpr::DropIamPolicy { id } => {
8405 return self.check_policy_management_privilege(
8406 &auth_store,
8407 &principal_id,
8408 role,
8409 tenant.as_deref(),
8410 "policy:drop",
8411 "policy",
8412 id,
8413 );
8414 }
8415 QueryExpr::AttachPolicy { policy_id, .. } => {
8416 return self.check_policy_management_privilege(
8417 &auth_store,
8418 &principal_id,
8419 role,
8420 tenant.as_deref(),
8421 "policy:attach",
8422 "policy",
8423 policy_id,
8424 );
8425 }
8426 QueryExpr::DetachPolicy { policy_id, .. } => {
8427 return self.check_policy_management_privilege(
8428 &auth_store,
8429 &principal_id,
8430 role,
8431 tenant.as_deref(),
8432 "policy:detach",
8433 "policy",
8434 policy_id,
8435 );
8436 }
8437 QueryExpr::ShowPolicies { .. } | QueryExpr::ShowEffectivePermissions { .. } => {
8438 return Ok(());
8439 }
8440 QueryExpr::SimulatePolicy { .. } => {
8441 return self.check_policy_management_privilege(
8442 &auth_store,
8443 &principal_id,
8444 role,
8445 tenant.as_deref(),
8446 "policy:simulate",
8447 "policy",
8448 "*",
8449 );
8450 }
8451 QueryExpr::DropTable(q) => {
8454 return self.check_ddl_collection_privilege(
8455 &auth_store,
8456 &principal_id,
8457 role,
8458 tenant.as_deref(),
8459 &username,
8460 "drop",
8461 &q.name,
8462 );
8463 }
8464 QueryExpr::DropGraph(q) => {
8465 return self.check_ddl_collection_privilege(
8466 &auth_store,
8467 &principal_id,
8468 role,
8469 tenant.as_deref(),
8470 &username,
8471 "drop",
8472 &q.name,
8473 );
8474 }
8475 QueryExpr::DropVector(q) => {
8476 return self.check_ddl_collection_privilege(
8477 &auth_store,
8478 &principal_id,
8479 role,
8480 tenant.as_deref(),
8481 &username,
8482 "drop",
8483 &q.name,
8484 );
8485 }
8486 QueryExpr::DropDocument(q) => {
8487 return self.check_ddl_collection_privilege(
8488 &auth_store,
8489 &principal_id,
8490 role,
8491 tenant.as_deref(),
8492 &username,
8493 "drop",
8494 &q.name,
8495 );
8496 }
8497 QueryExpr::DropKv(q) => {
8498 return self.check_ddl_collection_privilege(
8499 &auth_store,
8500 &principal_id,
8501 role,
8502 tenant.as_deref(),
8503 &username,
8504 "drop",
8505 &q.name,
8506 );
8507 }
8508 QueryExpr::DropCollection(q) => {
8509 return self.check_ddl_collection_privilege(
8510 &auth_store,
8511 &principal_id,
8512 role,
8513 tenant.as_deref(),
8514 &username,
8515 "drop",
8516 &q.name,
8517 );
8518 }
8519 QueryExpr::Truncate(q) => {
8520 return self.check_ddl_collection_privilege(
8521 &auth_store,
8522 &principal_id,
8523 role,
8524 tenant.as_deref(),
8525 &username,
8526 "truncate",
8527 &q.name,
8528 );
8529 }
8530 QueryExpr::CreateTable(_)
8532 | QueryExpr::CreateCollection(_)
8533 | QueryExpr::CreateVector(_)
8534 | QueryExpr::AlterTable(_)
8535 | QueryExpr::CreateIndex(_)
8536 | QueryExpr::DropIndex(_)
8537 | QueryExpr::CreateSchema(_)
8538 | QueryExpr::DropSchema(_)
8539 | QueryExpr::CreateSequence(_)
8540 | QueryExpr::DropSequence(_)
8541 | QueryExpr::CreateView(_)
8542 | QueryExpr::DropView(_)
8543 | QueryExpr::RefreshMaterializedView(_)
8544 | QueryExpr::CreatePolicy(_)
8545 | QueryExpr::DropPolicy(_)
8546 | QueryExpr::CreateServer(_)
8547 | QueryExpr::DropServer(_)
8548 | QueryExpr::CreateForeignTable(_)
8549 | QueryExpr::DropForeignTable(_)
8550 | QueryExpr::CreateTimeSeries(_)
8551 | QueryExpr::DropTimeSeries(_)
8552 | QueryExpr::CreateQueue(_)
8553 | QueryExpr::AlterQueue(_)
8554 | QueryExpr::DropQueue(_)
8555 | QueryExpr::CreateTree(_)
8556 | QueryExpr::DropTree(_) => {
8557 return if role >= crate::auth::Role::Write {
8558 Ok(())
8559 } else {
8560 Err(format!(
8561 "principal=`{}` role=`{:?}` cannot issue DDL",
8562 username, role
8563 ))
8564 };
8565 }
8566 QueryExpr::CreateMigration(_) => {
8568 return if role >= crate::auth::Role::Write {
8569 Ok(())
8570 } else {
8571 Err(format!(
8572 "principal=`{}` role=`{:?}` cannot issue CREATE MIGRATION",
8573 username, role
8574 ))
8575 };
8576 }
8577 QueryExpr::ApplyMigration(_) | QueryExpr::RollbackMigration(_) => {
8579 return if role == crate::auth::Role::Admin {
8580 Ok(())
8581 } else {
8582 Err(format!(
8583 "principal=`{}` role=`{:?}` cannot issue APPLY/ROLLBACK MIGRATION",
8584 username, role
8585 ))
8586 };
8587 }
8588 QueryExpr::ExplainMigration(_) => return Ok(()),
8590 _ => return Ok(()),
8594 };
8595
8596 if auth_store.iam_authorization_enabled() {
8597 let iam_action = legacy_action_to_iam(action);
8598 let iam_resource = legacy_resource_to_iam(&resource, tenant.as_deref());
8599 let iam_ctx = runtime_iam_context(role, tenant.as_deref());
8600 if !auth_store.check_policy_authz(&principal_id, iam_action, &iam_resource, &iam_ctx) {
8601 return Err(format!(
8602 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM policy",
8603 username, iam_action, iam_resource.kind, iam_resource.name
8604 ));
8605 }
8606
8607 if let QueryExpr::Table(table) = expr {
8608 self.check_table_column_projection_privilege(
8609 &auth_store,
8610 &principal_id,
8611 &iam_ctx,
8612 table,
8613 )?;
8614 }
8615
8616 if let QueryExpr::Update(update) = expr {
8617 let columns = update_set_target_columns(update);
8618 if !columns.is_empty() {
8619 let request = column_access_request_for_table_update(&update.table, columns);
8620 let outcome =
8621 auth_store.check_column_projection_authz(&principal_id, &request, &iam_ctx);
8622 if let Some(denied) = outcome.first_denied_column() {
8623 return Err(format!(
8624 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM column policy",
8625 username, iam_action, denied.resource.kind, denied.resource.name
8626 ));
8627 }
8628 if !outcome.allowed() {
8629 return Err(format!(
8630 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM policy",
8631 username,
8632 iam_action,
8633 outcome.table_resource.kind,
8634 outcome.table_resource.name
8635 ));
8636 }
8637 }
8638
8639 if let Some(columns) = update_returning_columns_for_policy(self, update) {
8640 let request = column_access_request_for_table_select(&update.table, columns);
8641 let outcome =
8642 auth_store.check_column_projection_authz(&principal_id, &request, &iam_ctx);
8643 if let Some(denied) = outcome.first_denied_column() {
8644 return Err(format!(
8645 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM column policy",
8646 username, denied.resource.kind, denied.resource.name
8647 ));
8648 }
8649 if !outcome.allowed() {
8650 return Err(format!(
8651 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM policy",
8652 username, outcome.table_resource.kind, outcome.table_resource.name
8653 ));
8654 }
8655 }
8656 }
8657
8658 Ok(())
8659 } else {
8660 auth_store
8661 .check_grant(&ctx, action, &resource)
8662 .map_err(|e| e.to_string())
8663 }
8664 }
8665
8666 fn check_table_column_projection_privilege(
8667 &self,
8668 auth_store: &Arc<crate::auth::store::AuthStore>,
8669 principal: &crate::auth::UserId,
8670 ctx: &crate::auth::policies::EvalContext,
8671 table: &crate::storage::query::ast::TableQuery,
8672 ) -> Result<(), String> {
8673 use crate::auth::{ColumnAccessRequest, ColumnDecisionEffect};
8674
8675 let columns = requested_table_columns_for_policy(table);
8676 if columns.is_empty() {
8677 return Ok(());
8678 }
8679
8680 let request = ColumnAccessRequest::select(table.table.clone(), columns);
8681 let outcome = auth_store.check_column_projection_authz(principal, &request, ctx);
8682 if outcome.allowed() {
8683 return Ok(());
8684 }
8685
8686 if !matches!(
8687 outcome.table_decision,
8688 crate::auth::policies::Decision::Allow { .. }
8689 | crate::auth::policies::Decision::AdminBypass
8690 ) {
8691 return Err(format!(
8692 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM policy",
8693 principal, outcome.table_resource.kind, outcome.table_resource.name
8694 ));
8695 }
8696
8697 let denied = outcome
8698 .first_denied_column()
8699 .filter(|decision| decision.effective == ColumnDecisionEffect::Denied);
8700 match denied {
8701 Some(decision) => Err(format!(
8702 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM policy",
8703 principal, decision.resource.kind, decision.resource.name
8704 )),
8705 None => Ok(()),
8706 }
8707 }
8708
8709 fn check_graph_property_projection_privilege(
8710 &self,
8711 auth_store: &Arc<crate::auth::store::AuthStore>,
8712 principal: &crate::auth::UserId,
8713 role: crate::auth::Role,
8714 tenant: Option<&str>,
8715 query: &crate::storage::query::ast::GraphQuery,
8716 ) -> Result<(), String> {
8717 let columns = explicit_graph_projection_properties(query);
8718 if columns.is_empty() {
8719 return Ok(());
8720 }
8721 self.check_table_like_column_projection_privilege(
8722 auth_store, principal, role, tenant, "graph", &columns,
8723 )
8724 }
8725
8726 fn check_table_like_column_projection_privilege(
8727 &self,
8728 auth_store: &Arc<crate::auth::store::AuthStore>,
8729 principal: &crate::auth::UserId,
8730 role: crate::auth::Role,
8731 tenant: Option<&str>,
8732 table: &str,
8733 columns: &[String],
8734 ) -> Result<(), String> {
8735 let iam_ctx = runtime_iam_context(role, tenant);
8736 let request =
8737 crate::auth::ColumnAccessRequest::select(table.to_string(), columns.iter().cloned());
8738 let outcome = auth_store.check_column_projection_authz(principal, &request, &iam_ctx);
8739 if outcome.allowed() {
8740 return Ok(());
8741 }
8742 let denied = outcome
8743 .first_denied_column()
8744 .map(|d| d.resource.name.clone())
8745 .unwrap_or_else(|| format!("{table}.<unknown>"));
8746 Err(format!(
8747 "principal=`{}` action=`select` resource=`column:{}` denied by IAM policy",
8748 principal, denied
8749 ))
8750 }
8751
8752 fn check_policy_management_privilege(
8753 &self,
8754 auth_store: &Arc<crate::auth::store::AuthStore>,
8755 principal: &crate::auth::UserId,
8756 role: crate::auth::Role,
8757 tenant: Option<&str>,
8758 action: &str,
8759 resource_kind: &str,
8760 resource_name: &str,
8761 ) -> Result<(), String> {
8762 if !auth_store.iam_authorization_enabled() {
8763 return if role == crate::auth::Role::Admin {
8764 Ok(())
8765 } else {
8766 Err(format!(
8767 "principal=`{}` role=`{:?}` cannot issue ACL/auth DDL",
8768 principal, role
8769 ))
8770 };
8771 }
8772
8773 let mut resource = crate::auth::policies::ResourceRef::new(
8774 resource_kind.to_string(),
8775 resource_name.to_string(),
8776 );
8777 if let Some(t) = tenant {
8778 resource = resource.with_tenant(t.to_string());
8779 }
8780 let ctx = runtime_iam_context(role, tenant);
8781 if auth_store.check_policy_authz(principal, action, &resource, &ctx) {
8782 Ok(())
8783 } else {
8784 Err(format!(
8785 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM policy",
8786 principal, action, resource.kind, resource.name
8787 ))
8788 }
8789 }
8790
8791 fn check_ddl_collection_privilege(
8798 &self,
8799 auth_store: &Arc<crate::auth::store::AuthStore>,
8800 principal: &crate::auth::UserId,
8801 role: crate::auth::Role,
8802 tenant: Option<&str>,
8803 username: &str,
8804 action: &str,
8805 collection: &str,
8806 ) -> Result<(), String> {
8807 if role < crate::auth::Role::Write {
8808 let msg = format!(
8809 "principal=`{}` role=`{:?}` cannot issue DDL",
8810 username, role
8811 );
8812 self.inner.audit_log.record(
8813 action,
8814 username,
8815 collection,
8816 "denied",
8817 crate::json::Value::Null,
8818 );
8819 return Err(msg);
8820 }
8821
8822 if !auth_store.iam_authorization_enabled() {
8823 self.inner.audit_log.record(
8824 action,
8825 username,
8826 collection,
8827 "ok",
8828 crate::json::Value::Null,
8829 );
8830 return Ok(());
8831 }
8832
8833 let resource_name = collection.to_string();
8834 let mut resource = crate::auth::policies::ResourceRef::new(
8835 "collection".to_string(),
8836 resource_name.clone(),
8837 );
8838 if let Some(t) = tenant {
8839 resource = resource.with_tenant(t.to_string());
8840 }
8841 let ctx = runtime_iam_context(role, tenant);
8842 if auth_store.check_policy_authz(principal, action, &resource, &ctx) {
8843 self.inner.audit_log.record(
8844 action,
8845 username,
8846 &resource_name,
8847 "ok",
8848 crate::json::Value::Null,
8849 );
8850 Ok(())
8851 } else {
8852 self.inner.audit_log.record(
8853 action,
8854 username,
8855 &resource_name,
8856 "denied",
8857 crate::json::Value::Null,
8858 );
8859 Err(format!(
8860 "principal=`{}` action=`{}` resource=`collection:{}` denied by IAM policy",
8861 username, action, resource_name
8862 ))
8863 }
8864 }
8865
8866 fn execute_grant_statement(
8868 &self,
8869 query: &str,
8870 stmt: &crate::storage::query::ast::GrantStmt,
8871 ) -> RedDBResult<RuntimeQueryResult> {
8872 use crate::auth::privileges::{Action, GrantPrincipal, Resource};
8873 use crate::auth::UserId;
8874 use crate::storage::query::ast::{GrantObjectKind, GrantPrincipalRef};
8875
8876 let auth_store = self
8877 .inner
8878 .auth_store
8879 .read()
8880 .clone()
8881 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
8882
8883 let (gname, grole) = current_auth_identity().ok_or_else(|| {
8885 RedDBError::Query("GRANT requires an authenticated principal".to_string())
8886 })?;
8887 let granter = UserId::from_parts(current_tenant().as_deref(), &gname);
8888 let granter_role = grole;
8889
8890 let mut actions: Vec<Action> = Vec::new();
8892 if stmt.all {
8893 actions.push(Action::All);
8894 } else {
8895 for kw in &stmt.actions {
8896 let a = Action::from_keyword(kw).ok_or_else(|| {
8897 RedDBError::Query(format!("unknown privilege keyword `{}`", kw))
8898 })?;
8899 actions.push(a);
8900 }
8901 }
8902
8903 let mut applied = 0usize;
8905 for obj in &stmt.objects {
8906 let resource = match stmt.object_kind {
8907 GrantObjectKind::Table => Resource::Table {
8908 schema: obj.schema.clone(),
8909 table: obj.name.clone(),
8910 },
8911 GrantObjectKind::Schema => Resource::Schema(obj.name.clone()),
8912 GrantObjectKind::Database => Resource::Database,
8913 GrantObjectKind::Function => Resource::Function {
8914 schema: obj.schema.clone(),
8915 name: obj.name.clone(),
8916 },
8917 };
8918 for principal in &stmt.principals {
8919 let p = match principal {
8920 GrantPrincipalRef::Public => GrantPrincipal::Public,
8921 GrantPrincipalRef::Group(g) => GrantPrincipal::Group(g.clone()),
8922 GrantPrincipalRef::User { tenant, name } => {
8923 GrantPrincipal::User(UserId::from_parts(tenant.as_deref(), name))
8924 }
8925 };
8926 let tenant = granter.tenant.clone();
8929 auth_store
8930 .grant(
8931 &granter,
8932 granter_role,
8933 p.clone(),
8934 resource.clone(),
8935 actions.clone(),
8936 stmt.with_grant_option,
8937 tenant.clone(),
8938 )
8939 .map_err(|e| RedDBError::Query(e.to_string()))?;
8940
8941 if let Some(policy) =
8945 grant_to_iam_policy(&p, &resource, &actions, tenant.as_deref())
8946 {
8947 let pid = policy.id.clone();
8948 auth_store
8949 .put_policy_internal(policy)
8950 .map_err(|e| RedDBError::Query(e.to_string()))?;
8951 let attachment = match &p {
8952 GrantPrincipal::User(uid) => {
8953 crate::auth::store::PrincipalRef::User(uid.clone())
8954 }
8955 GrantPrincipal::Group(group) => {
8956 crate::auth::store::PrincipalRef::Group(group.clone())
8957 }
8958 GrantPrincipal::Public => crate::auth::store::PrincipalRef::Group(
8959 crate::auth::store::PUBLIC_IAM_GROUP.to_string(),
8960 ),
8961 };
8962 auth_store
8963 .attach_policy(attachment, &pid)
8964 .map_err(|e| RedDBError::Query(e.to_string()))?;
8965 }
8966 applied += 1;
8967 tracing::info!(
8968 target: "audit",
8969 principal = %granter,
8970 action = "grant",
8971 "GRANT applied"
8972 );
8973 }
8974 }
8975
8976 self.invalidate_result_cache();
8977 Ok(RuntimeQueryResult::ok_message(
8978 query.to_string(),
8979 &format!("GRANT applied to {} target(s)", applied),
8980 "grant",
8981 ))
8982 }
8983
8984 fn execute_revoke_statement(
8986 &self,
8987 query: &str,
8988 stmt: &crate::storage::query::ast::RevokeStmt,
8989 ) -> RedDBResult<RuntimeQueryResult> {
8990 use crate::auth::privileges::{Action, GrantPrincipal, Resource};
8991 use crate::auth::UserId;
8992 use crate::storage::query::ast::{GrantObjectKind, GrantPrincipalRef};
8993
8994 let auth_store = self
8995 .inner
8996 .auth_store
8997 .read()
8998 .clone()
8999 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9000
9001 let (_gname, grole) = current_auth_identity().ok_or_else(|| {
9002 RedDBError::Query("REVOKE requires an authenticated principal".to_string())
9003 })?;
9004 let granter_role = grole;
9005
9006 let actions: Vec<Action> = if stmt.all {
9007 vec![Action::All]
9008 } else {
9009 stmt.actions
9010 .iter()
9011 .map(|kw| Action::from_keyword(kw).unwrap_or(Action::Select))
9012 .collect()
9013 };
9014
9015 let mut total_removed = 0usize;
9016 for obj in &stmt.objects {
9017 let resource = match stmt.object_kind {
9018 GrantObjectKind::Table => Resource::Table {
9019 schema: obj.schema.clone(),
9020 table: obj.name.clone(),
9021 },
9022 GrantObjectKind::Schema => Resource::Schema(obj.name.clone()),
9023 GrantObjectKind::Database => Resource::Database,
9024 GrantObjectKind::Function => Resource::Function {
9025 schema: obj.schema.clone(),
9026 name: obj.name.clone(),
9027 },
9028 };
9029 for principal in &stmt.principals {
9030 let p = match principal {
9031 GrantPrincipalRef::Public => GrantPrincipal::Public,
9032 GrantPrincipalRef::Group(g) => GrantPrincipal::Group(g.clone()),
9033 GrantPrincipalRef::User { tenant, name } => {
9034 GrantPrincipal::User(UserId::from_parts(tenant.as_deref(), name))
9035 }
9036 };
9037 let removed = auth_store
9038 .revoke(granter_role, &p, &resource, &actions)
9039 .map_err(|e| RedDBError::Query(e.to_string()))?;
9040 let _removed_policies =
9041 auth_store.delete_synthetic_grant_policies(&p, &resource, &actions);
9042 total_removed += removed;
9043 }
9044 }
9045
9046 self.invalidate_result_cache();
9047 Ok(RuntimeQueryResult::ok_message(
9048 query.to_string(),
9049 &format!("REVOKE removed {} grant(s)", total_removed),
9050 "revoke",
9051 ))
9052 }
9053
9054 fn execute_alter_user_statement(
9056 &self,
9057 query: &str,
9058 stmt: &crate::storage::query::ast::AlterUserStmt,
9059 ) -> RedDBResult<RuntimeQueryResult> {
9060 use crate::auth::privileges::UserAttributes;
9061 use crate::auth::UserId;
9062 use crate::storage::query::ast::AlterUserAttribute;
9063
9064 let auth_store = self
9065 .inner
9066 .auth_store
9067 .read()
9068 .clone()
9069 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9070
9071 let (_gname, grole) = current_auth_identity().ok_or_else(|| {
9072 RedDBError::Query("ALTER USER requires an authenticated principal".to_string())
9073 })?;
9074 if grole != crate::auth::Role::Admin {
9075 return Err(RedDBError::Query(
9076 "ALTER USER requires Admin role".to_string(),
9077 ));
9078 }
9079
9080 let target = UserId::from_parts(stmt.tenant.as_deref(), &stmt.username);
9081
9082 let mut attrs = auth_store.user_attributes(&target);
9085 let mut enable_change: Option<bool> = None;
9086
9087 for a in &stmt.attributes {
9088 match a {
9089 AlterUserAttribute::ValidUntil(ts) => {
9090 let ms = parse_timestamp_to_ms(ts).ok_or_else(|| {
9094 RedDBError::Query(format!("invalid VALID UNTIL timestamp `{ts}`"))
9095 })?;
9096 attrs.valid_until = Some(ms);
9097 }
9098 AlterUserAttribute::ConnectionLimit(n) => {
9099 if *n < 0 {
9100 return Err(RedDBError::Query(
9101 "CONNECTION LIMIT must be non-negative".to_string(),
9102 ));
9103 }
9104 attrs.connection_limit = Some(*n as u32);
9105 }
9106 AlterUserAttribute::SetSearchPath(p) => {
9107 attrs.search_path = Some(p.clone());
9108 }
9109 AlterUserAttribute::AddGroup(g) => {
9110 if !attrs.groups.iter().any(|existing| existing == g) {
9111 attrs.groups.push(g.clone());
9112 attrs.groups.sort();
9113 }
9114 }
9115 AlterUserAttribute::DropGroup(g) => {
9116 attrs.groups.retain(|existing| existing != g);
9117 }
9118 AlterUserAttribute::Enable => enable_change = Some(true),
9119 AlterUserAttribute::Disable => enable_change = Some(false),
9120 AlterUserAttribute::Password(_) => {
9121 }
9125 }
9126 }
9127
9128 auth_store
9129 .set_user_attributes(&target, attrs)
9130 .map_err(|e| RedDBError::Query(e.to_string()))?;
9131 if let Some(en) = enable_change {
9132 auth_store
9133 .set_user_enabled(&target, en)
9134 .map_err(|e| RedDBError::Query(e.to_string()))?;
9135 }
9136 self.invalidate_result_cache();
9137 tracing::info!(
9138 target: "audit",
9139 principal = %target,
9140 action = "alter_user",
9141 "ALTER USER applied"
9142 );
9143
9144 Ok(RuntimeQueryResult::ok_message(
9145 query.to_string(),
9146 &format!("ALTER USER {} applied", target),
9147 "alter_user",
9148 ))
9149 }
9150
9151 fn execute_create_iam_policy(
9156 &self,
9157 query: &str,
9158 id: &str,
9159 json: &str,
9160 ) -> RedDBResult<RuntimeQueryResult> {
9161 use crate::auth::policies::Policy;
9162
9163 let auth_store = self
9164 .inner
9165 .auth_store
9166 .read()
9167 .clone()
9168 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9169
9170 let mut policy = Policy::from_json_str(json)
9175 .map_err(|e| RedDBError::Query(format!("policy parse: {e}")))?;
9176 if policy.id != id {
9177 policy.id = id.to_string();
9178 }
9179 let pid = policy.id.clone();
9180 auth_store
9181 .put_policy(policy)
9182 .map_err(|e| RedDBError::Query(e.to_string()))?;
9183
9184 let principal = current_auth_identity()
9185 .map(|(u, _)| u)
9186 .unwrap_or_else(|| "anonymous".into());
9187 tracing::info!(
9188 target: "audit",
9189 principal = %principal,
9190 action = "iam:policy.put",
9191 matched_policy_id = %pid,
9192 "CREATE POLICY applied"
9193 );
9194 self.inner.audit_log.record(
9195 "iam/policy.put",
9196 &principal,
9197 &pid,
9198 "ok",
9199 crate::json::Value::Null,
9200 );
9201
9202 self.invalidate_result_cache();
9203 Ok(RuntimeQueryResult::ok_message(
9204 query.to_string(),
9205 &format!("policy `{pid}` stored"),
9206 "create_iam_policy",
9207 ))
9208 }
9209
9210 fn execute_drop_iam_policy(&self, query: &str, id: &str) -> RedDBResult<RuntimeQueryResult> {
9211 let auth_store = self
9212 .inner
9213 .auth_store
9214 .read()
9215 .clone()
9216 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9217 auth_store
9218 .delete_policy(id)
9219 .map_err(|e| RedDBError::Query(e.to_string()))?;
9220
9221 let principal = current_auth_identity()
9222 .map(|(u, _)| u)
9223 .unwrap_or_else(|| "anonymous".into());
9224 tracing::info!(
9225 target: "audit",
9226 principal = %principal,
9227 action = "iam:policy.drop",
9228 matched_policy_id = %id,
9229 "DROP POLICY applied"
9230 );
9231 self.inner.audit_log.record(
9232 "iam/policy.drop",
9233 &principal,
9234 id,
9235 "ok",
9236 crate::json::Value::Null,
9237 );
9238
9239 self.invalidate_result_cache();
9240 Ok(RuntimeQueryResult::ok_message(
9241 query.to_string(),
9242 &format!("policy `{id}` dropped"),
9243 "drop_iam_policy",
9244 ))
9245 }
9246
9247 fn execute_attach_policy(
9248 &self,
9249 query: &str,
9250 policy_id: &str,
9251 principal: &crate::storage::query::ast::PolicyPrincipalRef,
9252 ) -> RedDBResult<RuntimeQueryResult> {
9253 use crate::auth::store::PrincipalRef;
9254 use crate::auth::UserId;
9255 use crate::storage::query::ast::PolicyPrincipalRef;
9256
9257 let auth_store = self
9258 .inner
9259 .auth_store
9260 .read()
9261 .clone()
9262 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9263 let p = match principal {
9264 PolicyPrincipalRef::User(u) => {
9265 PrincipalRef::User(UserId::from_parts(u.tenant.as_deref(), &u.username))
9266 }
9267 PolicyPrincipalRef::Group(g) => PrincipalRef::Group(g.clone()),
9268 };
9269 let pretty_target = principal_label(principal);
9270 auth_store
9271 .attach_policy(p, policy_id)
9272 .map_err(|e| RedDBError::Query(e.to_string()))?;
9273
9274 let principal_str = current_auth_identity()
9275 .map(|(u, _)| u)
9276 .unwrap_or_else(|| "anonymous".into());
9277 tracing::info!(
9278 target: "audit",
9279 principal = %principal_str,
9280 action = "iam:policy.attach",
9281 matched_policy_id = %policy_id,
9282 target = %pretty_target,
9283 "ATTACH POLICY applied"
9284 );
9285 self.inner.audit_log.record(
9286 "iam/policy.attach",
9287 &principal_str,
9288 &pretty_target,
9289 "ok",
9290 crate::json::Value::Null,
9291 );
9292
9293 self.invalidate_result_cache();
9294 Ok(RuntimeQueryResult::ok_message(
9295 query.to_string(),
9296 &format!("policy `{policy_id}` attached to {pretty_target}"),
9297 "attach_policy",
9298 ))
9299 }
9300
9301 fn execute_detach_policy(
9302 &self,
9303 query: &str,
9304 policy_id: &str,
9305 principal: &crate::storage::query::ast::PolicyPrincipalRef,
9306 ) -> RedDBResult<RuntimeQueryResult> {
9307 use crate::auth::store::PrincipalRef;
9308 use crate::auth::UserId;
9309 use crate::storage::query::ast::PolicyPrincipalRef;
9310
9311 let auth_store = self
9312 .inner
9313 .auth_store
9314 .read()
9315 .clone()
9316 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9317 let p = match principal {
9318 PolicyPrincipalRef::User(u) => {
9319 PrincipalRef::User(UserId::from_parts(u.tenant.as_deref(), &u.username))
9320 }
9321 PolicyPrincipalRef::Group(g) => PrincipalRef::Group(g.clone()),
9322 };
9323 let pretty_target = principal_label(principal);
9324 auth_store
9325 .detach_policy(p, policy_id)
9326 .map_err(|e| RedDBError::Query(e.to_string()))?;
9327
9328 let principal_str = current_auth_identity()
9329 .map(|(u, _)| u)
9330 .unwrap_or_else(|| "anonymous".into());
9331 tracing::info!(
9332 target: "audit",
9333 principal = %principal_str,
9334 action = "iam:policy.detach",
9335 matched_policy_id = %policy_id,
9336 target = %pretty_target,
9337 "DETACH POLICY applied"
9338 );
9339 self.inner.audit_log.record(
9340 "iam/policy.detach",
9341 &principal_str,
9342 &pretty_target,
9343 "ok",
9344 crate::json::Value::Null,
9345 );
9346
9347 self.invalidate_result_cache();
9348 Ok(RuntimeQueryResult::ok_message(
9349 query.to_string(),
9350 &format!("policy `{policy_id}` detached from {pretty_target}"),
9351 "detach_policy",
9352 ))
9353 }
9354
9355 fn execute_show_policies(
9356 &self,
9357 query: &str,
9358 filter: Option<&crate::storage::query::ast::PolicyPrincipalRef>,
9359 ) -> RedDBResult<RuntimeQueryResult> {
9360 use crate::auth::UserId;
9361 use crate::storage::query::ast::PolicyPrincipalRef;
9362 use crate::storage::query::unified::UnifiedRecord;
9363 use crate::storage::schema::Value as SchemaValue;
9364 use std::sync::Arc;
9365
9366 let auth_store = self
9367 .inner
9368 .auth_store
9369 .read()
9370 .clone()
9371 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9372
9373 let pols = match filter {
9374 None => auth_store.list_policies(),
9375 Some(PolicyPrincipalRef::User(u)) => {
9376 let id = UserId::from_parts(u.tenant.as_deref(), &u.username);
9377 auth_store.effective_policies(&id)
9378 }
9379 Some(PolicyPrincipalRef::Group(g)) => auth_store.group_policies(g),
9380 };
9381
9382 let mut records = Vec::with_capacity(pols.len());
9383 for p in pols.iter() {
9384 let mut rec = UnifiedRecord::default();
9385 rec.set_arc(Arc::from("id"), SchemaValue::text(p.id.clone()));
9386 rec.set_arc(
9387 Arc::from("statements"),
9388 SchemaValue::Integer(p.statements.len() as i64),
9389 );
9390 rec.set_arc(
9391 Arc::from("tenant"),
9392 p.tenant
9393 .as_deref()
9394 .map(|t| SchemaValue::text(t.to_string()))
9395 .unwrap_or(SchemaValue::Null),
9396 );
9397 rec.set_arc(Arc::from("json"), SchemaValue::text(p.to_json_string()));
9398 records.push(rec);
9399 }
9400 let mut result = crate::storage::query::unified::UnifiedResult::empty();
9401 result.records = records;
9402 Ok(RuntimeQueryResult {
9403 query: query.to_string(),
9404 mode: crate::storage::query::modes::QueryMode::Sql,
9405 statement: "show_policies",
9406 engine: "iam-policies",
9407 result,
9408 affected_rows: 0,
9409 statement_type: "select",
9410 })
9411 }
9412
9413 fn execute_show_effective_permissions(
9414 &self,
9415 query: &str,
9416 user: &crate::storage::query::ast::PolicyUserRef,
9417 resource: Option<&crate::storage::query::ast::PolicyResourceRef>,
9418 ) -> RedDBResult<RuntimeQueryResult> {
9419 use crate::auth::UserId;
9420 use crate::storage::query::unified::UnifiedRecord;
9421 use crate::storage::schema::Value as SchemaValue;
9422 use std::sync::Arc;
9423
9424 let auth_store = self
9425 .inner
9426 .auth_store
9427 .read()
9428 .clone()
9429 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9430 let id = UserId::from_parts(user.tenant.as_deref(), &user.username);
9431 let pols = auth_store.effective_policies(&id);
9432
9433 let mut records = Vec::new();
9436 for p in pols.iter() {
9437 for (idx, st) in p.statements.iter().enumerate() {
9438 if let Some(_r) = resource {
9439 }
9443 let mut rec = UnifiedRecord::default();
9444 rec.set_arc(Arc::from("policy_id"), SchemaValue::text(p.id.clone()));
9445 rec.set_arc(
9446 Arc::from("statement_index"),
9447 SchemaValue::Integer(idx as i64),
9448 );
9449 rec.set_arc(
9450 Arc::from("sid"),
9451 st.sid
9452 .as_deref()
9453 .map(|s| SchemaValue::text(s.to_string()))
9454 .unwrap_or(SchemaValue::Null),
9455 );
9456 rec.set_arc(
9457 Arc::from("effect"),
9458 SchemaValue::text(match st.effect {
9459 crate::auth::policies::Effect::Allow => "allow",
9460 crate::auth::policies::Effect::Deny => "deny",
9461 }),
9462 );
9463 rec.set_arc(
9464 Arc::from("actions"),
9465 SchemaValue::Integer(st.actions.len() as i64),
9466 );
9467 rec.set_arc(
9468 Arc::from("resources"),
9469 SchemaValue::Integer(st.resources.len() as i64),
9470 );
9471 records.push(rec);
9472 }
9473 }
9474 let mut result = crate::storage::query::unified::UnifiedResult::empty();
9475 result.records = records;
9476 Ok(RuntimeQueryResult {
9477 query: query.to_string(),
9478 mode: crate::storage::query::modes::QueryMode::Sql,
9479 statement: "show_effective_permissions",
9480 engine: "iam-policies",
9481 result,
9482 affected_rows: 0,
9483 statement_type: "select",
9484 })
9485 }
9486
9487 fn execute_simulate_policy(
9488 &self,
9489 query: &str,
9490 user: &crate::storage::query::ast::PolicyUserRef,
9491 action: &str,
9492 resource: &crate::storage::query::ast::PolicyResourceRef,
9493 ) -> RedDBResult<RuntimeQueryResult> {
9494 use crate::auth::policies::ResourceRef;
9495 use crate::auth::store::SimCtx;
9496 use crate::auth::UserId;
9497 use crate::storage::query::unified::UnifiedRecord;
9498 use crate::storage::schema::Value as SchemaValue;
9499 use std::sync::Arc;
9500
9501 let auth_store = self
9502 .inner
9503 .auth_store
9504 .read()
9505 .clone()
9506 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9507 let id = UserId::from_parts(user.tenant.as_deref(), &user.username);
9508 let r = ResourceRef::new(resource.kind.clone(), resource.name.clone());
9509 let outcome = auth_store.simulate(&id, action, &r, SimCtx::default());
9510
9511 let principal_str = current_auth_identity()
9512 .map(|(u, _)| u)
9513 .unwrap_or_else(|| "anonymous".into());
9514 let (decision_str, matched_pid, matched_sid) = decision_to_strings(&outcome.decision);
9515 tracing::info!(
9516 target: "audit",
9517 principal = %principal_str,
9518 action = "iam:policy.simulate",
9519 decision = %decision_str,
9520 matched_policy_id = ?matched_pid,
9521 matched_sid = ?matched_sid,
9522 "SIMULATE issued"
9523 );
9524 self.inner.audit_log.record(
9525 "iam/policy.simulate",
9526 &principal_str,
9527 &id.to_string(),
9528 "ok",
9529 crate::json::Value::Null,
9530 );
9531
9532 let mut rec = UnifiedRecord::default();
9533 rec.set_arc(Arc::from("decision"), SchemaValue::text(decision_str));
9534 rec.set_arc(
9535 Arc::from("matched_policy_id"),
9536 matched_pid
9537 .map(SchemaValue::text)
9538 .unwrap_or(SchemaValue::Null),
9539 );
9540 rec.set_arc(
9541 Arc::from("matched_sid"),
9542 matched_sid
9543 .map(SchemaValue::text)
9544 .unwrap_or(SchemaValue::Null),
9545 );
9546 rec.set_arc(Arc::from("reason"), SchemaValue::text(outcome.reason));
9547 rec.set_arc(
9548 Arc::from("trail_len"),
9549 SchemaValue::Integer(outcome.trail.len() as i64),
9550 );
9551 let mut result = crate::storage::query::unified::UnifiedResult::empty();
9552 result.records = vec![rec];
9553 Ok(RuntimeQueryResult {
9554 query: query.to_string(),
9555 mode: crate::storage::query::modes::QueryMode::Sql,
9556 statement: "simulate_policy",
9557 engine: "iam-policies",
9558 result,
9559 affected_rows: 0,
9560 statement_type: "select",
9561 })
9562 }
9563}
9564
9565fn grant_to_iam_policy(
9570 principal: &crate::auth::privileges::GrantPrincipal,
9571 resource: &crate::auth::privileges::Resource,
9572 actions: &[crate::auth::privileges::Action],
9573 tenant: Option<&str>,
9574) -> Option<crate::auth::policies::Policy> {
9575 use crate::auth::policies::{
9576 compile_action, ActionPattern, Effect, Policy, ResourcePattern, Statement,
9577 };
9578 use crate::auth::privileges::{Action, GrantPrincipal, Resource};
9579
9580 if matches!(principal, GrantPrincipal::Group(_)) {
9581 return None;
9582 }
9583
9584 let now = crate::auth::now_ms();
9585 let id = format!("_grant_{:x}_{:x}", now, std::process::id());
9586
9587 let resource_str = match resource {
9588 Resource::Database => "table:*".to_string(),
9589 Resource::Schema(s) => format!("table:{s}.*"),
9590 Resource::Table { schema, table } => match schema {
9591 Some(s) => format!("table:{s}.{table}"),
9592 None => format!("table:{table}"),
9593 },
9594 Resource::Function { schema, name } => match schema {
9595 Some(s) => format!("function:{s}.{name}"),
9596 None => format!("function:{name}"),
9597 },
9598 };
9599
9600 let action_patterns: Vec<ActionPattern> = if actions.contains(&Action::All) {
9604 vec![ActionPattern::Wildcard]
9605 } else {
9606 actions
9607 .iter()
9608 .map(|a| compile_action(&a.as_str().to_ascii_lowercase()))
9609 .collect()
9610 };
9611 if action_patterns.is_empty() {
9612 return None;
9613 }
9614
9615 let resource_patterns = if resource_str == "*" {
9620 vec![ResourcePattern::Wildcard]
9621 } else if resource_str.contains('*') {
9622 vec![ResourcePattern::Glob(resource_str.clone())]
9623 } else if let Some((kind, name)) = resource_str.split_once(':') {
9624 vec![ResourcePattern::Exact {
9625 kind: kind.to_string(),
9626 name: name.to_string(),
9627 }]
9628 } else {
9629 vec![ResourcePattern::Wildcard]
9630 };
9631
9632 let policy = Policy {
9633 id,
9634 version: 1,
9635 tenant: tenant.map(|t| t.to_string()),
9636 created_at: now,
9637 updated_at: now,
9638 statements: vec![Statement {
9639 sid: None,
9640 effect: Effect::Allow,
9641 actions: action_patterns,
9642 resources: resource_patterns,
9643 condition: None,
9644 }],
9645 };
9646 if policy.validate().is_err() {
9647 return None;
9648 }
9649 Some(policy)
9650}
9651
9652fn legacy_action_to_iam(action: crate::auth::privileges::Action) -> &'static str {
9653 use crate::auth::privileges::Action;
9654 match action {
9655 Action::Select => "select",
9656 Action::Insert => "insert",
9657 Action::Update => "update",
9658 Action::Delete => "delete",
9659 Action::Truncate => "truncate",
9660 Action::References => "references",
9661 Action::Execute => "execute",
9662 Action::Usage => "usage",
9663 Action::All => "*",
9664 }
9665}
9666
9667fn update_set_target_columns(query: &crate::storage::query::ast::UpdateQuery) -> Vec<String> {
9668 let mut columns = Vec::new();
9669 for (column, _) in &query.assignment_exprs {
9670 if !columns.iter().any(|seen| seen == column) {
9671 columns.push(column.clone());
9672 }
9673 }
9674 columns
9675}
9676
9677fn column_access_request_for_table_update(
9678 table_name: &str,
9679 columns: Vec<String>,
9680) -> crate::auth::ColumnAccessRequest {
9681 match table_name.split_once('.') {
9682 Some((schema, table)) => {
9683 crate::auth::ColumnAccessRequest::update(table.to_string(), columns)
9684 .with_schema(schema.to_string())
9685 }
9686 None => crate::auth::ColumnAccessRequest::update(table_name.to_string(), columns),
9687 }
9688}
9689
9690fn column_access_request_for_table_select(
9691 table_name: &str,
9692 columns: Vec<String>,
9693) -> crate::auth::ColumnAccessRequest {
9694 match table_name.split_once('.') {
9695 Some((schema, table)) => {
9696 crate::auth::ColumnAccessRequest::select(table.to_string(), columns)
9697 .with_schema(schema.to_string())
9698 }
9699 None => crate::auth::ColumnAccessRequest::select(table_name.to_string(), columns),
9700 }
9701}
9702
9703fn update_returning_columns_for_policy(
9704 runtime: &RedDBRuntime,
9705 query: &crate::storage::query::ast::UpdateQuery,
9706) -> Option<Vec<String>> {
9707 let items = query.returning.as_ref()?;
9708 let mut columns = Vec::new();
9709 let project_all = items
9710 .iter()
9711 .any(|item| matches!(item, crate::storage::query::ast::ReturningItem::All));
9712 if project_all {
9713 collect_returning_star_columns(runtime, query, &mut columns);
9714 } else {
9715 for item in items {
9716 let crate::storage::query::ast::ReturningItem::Column(column) = item else {
9717 continue;
9718 };
9719 push_returning_policy_column(&mut columns, column);
9720 }
9721 }
9722 (!columns.is_empty()).then_some(columns)
9723}
9724
9725fn collect_returning_star_columns(
9726 runtime: &RedDBRuntime,
9727 query: &crate::storage::query::ast::UpdateQuery,
9728 columns: &mut Vec<String>,
9729) {
9730 let store = runtime.db().store();
9731 let Some(manager) = store.get_collection(&query.table) else {
9732 return;
9733 };
9734 if let Some(schema) = manager.column_schema() {
9735 for column in schema.iter() {
9736 push_returning_policy_column(columns, column);
9737 }
9738 }
9739 for entity in manager.query_all(|_| true) {
9740 if !returning_entity_matches_update_target(&entity, query.target) {
9741 continue;
9742 }
9743 match &entity.data {
9744 crate::storage::EntityData::Row(row) => {
9745 for (column, _) in row.iter_fields() {
9746 push_returning_policy_column(columns, column);
9747 }
9748 }
9749 crate::storage::EntityData::Node(node) => {
9750 push_returning_policy_column(columns, "label");
9751 push_returning_policy_column(columns, "node_type");
9752 for column in node.properties.keys() {
9753 push_returning_policy_column(columns, column);
9754 }
9755 }
9756 crate::storage::EntityData::Edge(edge) => {
9757 push_returning_policy_column(columns, "label");
9758 push_returning_policy_column(columns, "from_rid");
9759 push_returning_policy_column(columns, "to_rid");
9760 push_returning_policy_column(columns, "weight");
9761 for column in edge.properties.keys() {
9762 push_returning_policy_column(columns, column);
9763 }
9764 }
9765 _ => {}
9766 }
9767 }
9768}
9769
9770fn push_returning_policy_column(columns: &mut Vec<String>, column: &str) {
9771 if returning_public_envelope_column(column) {
9772 return;
9773 }
9774 if !columns.iter().any(|seen| seen == column) {
9775 columns.push(column.to_string());
9776 }
9777}
9778
9779fn returning_public_envelope_column(column: &str) -> bool {
9780 matches!(
9781 column.to_ascii_lowercase().as_str(),
9782 "rid" | "collection" | "kind" | "tenant" | "created_at" | "updated_at" | "red_entity_id"
9783 )
9784}
9785
9786fn returning_entity_matches_update_target(
9787 entity: &crate::storage::UnifiedEntity,
9788 target: crate::storage::query::ast::UpdateTarget,
9789) -> bool {
9790 use crate::storage::query::ast::UpdateTarget;
9791 match target {
9792 UpdateTarget::Rows => {
9793 matches!(returning_row_item_kind(entity), Some(ReturningRowKind::Row))
9794 }
9795 UpdateTarget::Documents => {
9796 matches!(
9797 returning_row_item_kind(entity),
9798 Some(ReturningRowKind::Document)
9799 )
9800 }
9801 UpdateTarget::Kv => matches!(returning_row_item_kind(entity), Some(ReturningRowKind::Kv)),
9802 UpdateTarget::Nodes => matches!(
9803 (&entity.kind, &entity.data),
9804 (
9805 crate::storage::EntityKind::GraphNode(_),
9806 crate::storage::EntityData::Node(_)
9807 )
9808 ),
9809 UpdateTarget::Edges => matches!(
9810 (&entity.kind, &entity.data),
9811 (
9812 crate::storage::EntityKind::GraphEdge(_),
9813 crate::storage::EntityData::Edge(_)
9814 )
9815 ),
9816 }
9817}
9818
9819#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9820enum ReturningRowKind {
9821 Row,
9822 Document,
9823 Kv,
9824}
9825
9826fn returning_row_item_kind(entity: &crate::storage::UnifiedEntity) -> Option<ReturningRowKind> {
9827 let row = entity.data.as_row()?;
9828 let is_kv = row.iter_fields().all(|(column, _)| {
9829 column.eq_ignore_ascii_case("key") || column.eq_ignore_ascii_case("value")
9830 });
9831 if is_kv {
9832 return Some(ReturningRowKind::Kv);
9833 }
9834 let is_document = row
9835 .iter_fields()
9836 .any(|(_, value)| matches!(value, crate::storage::schema::Value::Json(_)));
9837 if is_document {
9838 Some(ReturningRowKind::Document)
9839 } else {
9840 Some(ReturningRowKind::Row)
9841 }
9842}
9843
9844fn requested_table_columns_for_policy(
9845 table: &crate::storage::query::ast::TableQuery,
9846) -> Vec<String> {
9847 use crate::storage::query::sql_lowering::{
9848 effective_table_filter, effective_table_group_by_exprs, effective_table_having_filter,
9849 effective_table_projections,
9850 };
9851
9852 let table_name = table.table.as_str();
9853 let table_alias = table.alias.as_deref();
9854 let mut columns = std::collections::BTreeSet::new();
9855
9856 for projection in effective_table_projections(table) {
9857 collect_projection_columns(&projection, table_name, table_alias, &mut columns);
9858 }
9859 if let Some(filter) = effective_table_filter(table) {
9860 collect_filter_columns(&filter, table_name, table_alias, &mut columns);
9861 }
9862 for expr in effective_table_group_by_exprs(table) {
9863 collect_expr_columns(&expr, table_name, table_alias, &mut columns);
9864 }
9865 if let Some(filter) = effective_table_having_filter(table) {
9866 collect_filter_columns(&filter, table_name, table_alias, &mut columns);
9867 }
9868 for order in &table.order_by {
9869 if let Some(expr) = order.expr.as_ref() {
9870 collect_expr_columns(expr, table_name, table_alias, &mut columns);
9871 } else {
9872 collect_field_ref_column(&order.field, table_name, table_alias, &mut columns);
9873 }
9874 }
9875
9876 columns.into_iter().collect()
9877}
9878
9879fn collect_projection_columns(
9880 projection: &crate::storage::query::ast::Projection,
9881 table_name: &str,
9882 table_alias: Option<&str>,
9883 columns: &mut std::collections::BTreeSet<String>,
9884) {
9885 use crate::storage::query::ast::Projection;
9886 match projection {
9887 Projection::All => {
9888 columns.insert("*".to_string());
9889 }
9890 Projection::Column(column) | Projection::Alias(column, _) => {
9891 if column != "*" {
9892 columns.insert(column.clone());
9893 }
9894 }
9895 Projection::Function(_, args) => {
9896 for arg in args {
9897 collect_projection_columns(arg, table_name, table_alias, columns);
9898 }
9899 }
9900 Projection::Expression(filter, _) => {
9901 collect_filter_columns(filter, table_name, table_alias, columns);
9902 }
9903 Projection::Field(field, _) => {
9904 collect_field_ref_column(field, table_name, table_alias, columns);
9905 }
9906 }
9907}
9908
9909fn collect_filter_columns(
9910 filter: &crate::storage::query::ast::Filter,
9911 table_name: &str,
9912 table_alias: Option<&str>,
9913 columns: &mut std::collections::BTreeSet<String>,
9914) {
9915 use crate::storage::query::ast::Filter;
9916 match filter {
9917 Filter::Compare { field, .. }
9918 | Filter::IsNull(field)
9919 | Filter::IsNotNull(field)
9920 | Filter::In { field, .. }
9921 | Filter::Between { field, .. }
9922 | Filter::Like { field, .. }
9923 | Filter::StartsWith { field, .. }
9924 | Filter::EndsWith { field, .. }
9925 | Filter::Contains { field, .. } => {
9926 collect_field_ref_column(field, table_name, table_alias, columns);
9927 }
9928 Filter::CompareFields { left, right, .. } => {
9929 collect_field_ref_column(left, table_name, table_alias, columns);
9930 collect_field_ref_column(right, table_name, table_alias, columns);
9931 }
9932 Filter::CompareExpr { lhs, rhs, .. } => {
9933 collect_expr_columns(lhs, table_name, table_alias, columns);
9934 collect_expr_columns(rhs, table_name, table_alias, columns);
9935 }
9936 Filter::And(left, right) | Filter::Or(left, right) => {
9937 collect_filter_columns(left, table_name, table_alias, columns);
9938 collect_filter_columns(right, table_name, table_alias, columns);
9939 }
9940 Filter::Not(inner) => collect_filter_columns(inner, table_name, table_alias, columns),
9941 }
9942}
9943
9944fn collect_expr_columns(
9945 expr: &crate::storage::query::ast::Expr,
9946 table_name: &str,
9947 table_alias: Option<&str>,
9948 columns: &mut std::collections::BTreeSet<String>,
9949) {
9950 use crate::storage::query::ast::Expr;
9951 match expr {
9952 Expr::Column { field, .. } => {
9953 collect_field_ref_column(field, table_name, table_alias, columns);
9954 }
9955 Expr::Literal { .. } | Expr::Parameter { .. } => {}
9956 Expr::UnaryOp { operand, .. } | Expr::Cast { inner: operand, .. } => {
9957 collect_expr_columns(operand, table_name, table_alias, columns);
9958 }
9959 Expr::BinaryOp { lhs, rhs, .. } => {
9960 collect_expr_columns(lhs, table_name, table_alias, columns);
9961 collect_expr_columns(rhs, table_name, table_alias, columns);
9962 }
9963 Expr::FunctionCall { args, .. } => {
9964 for arg in args {
9965 collect_expr_columns(arg, table_name, table_alias, columns);
9966 }
9967 }
9968 Expr::Case {
9969 branches, else_, ..
9970 } => {
9971 for (condition, value) in branches {
9972 collect_expr_columns(condition, table_name, table_alias, columns);
9973 collect_expr_columns(value, table_name, table_alias, columns);
9974 }
9975 if let Some(value) = else_ {
9976 collect_expr_columns(value, table_name, table_alias, columns);
9977 }
9978 }
9979 Expr::IsNull { operand, .. } => {
9980 collect_expr_columns(operand, table_name, table_alias, columns);
9981 }
9982 Expr::InList { target, values, .. } => {
9983 collect_expr_columns(target, table_name, table_alias, columns);
9984 for value in values {
9985 collect_expr_columns(value, table_name, table_alias, columns);
9986 }
9987 }
9988 Expr::Between {
9989 target, low, high, ..
9990 } => {
9991 collect_expr_columns(target, table_name, table_alias, columns);
9992 collect_expr_columns(low, table_name, table_alias, columns);
9993 collect_expr_columns(high, table_name, table_alias, columns);
9994 }
9995 Expr::Subquery { .. } => {}
9996 }
9997}
9998
9999fn collect_field_ref_column(
10000 field: &crate::storage::query::ast::FieldRef,
10001 table_name: &str,
10002 table_alias: Option<&str>,
10003 columns: &mut std::collections::BTreeSet<String>,
10004) {
10005 if let Some(column) = policy_column_name_from_field_ref(field, table_name, table_alias) {
10006 if column != "*" {
10007 columns.insert(column);
10008 }
10009 }
10010}
10011
10012fn policy_column_name_from_field_ref(
10013 field: &crate::storage::query::ast::FieldRef,
10014 table_name: &str,
10015 table_alias: Option<&str>,
10016) -> Option<String> {
10017 match field {
10018 crate::storage::query::ast::FieldRef::TableColumn { table, column } => {
10019 if column == "*" {
10020 return Some("*".to_string());
10021 }
10022 if table.is_empty() || table == table_name || Some(table.as_str()) == table_alias {
10023 Some(column.clone())
10024 } else {
10025 Some(format!("{table}.{column}"))
10026 }
10027 }
10028 _ => None,
10029 }
10030}
10031
10032fn legacy_resource_to_iam(
10033 resource: &crate::auth::privileges::Resource,
10034 tenant: Option<&str>,
10035) -> crate::auth::policies::ResourceRef {
10036 use crate::auth::privileges::Resource;
10037
10038 let (kind, name) = match resource {
10039 Resource::Database => ("database".to_string(), "*".to_string()),
10040 Resource::Schema(s) => ("schema".to_string(), format!("{s}.*")),
10041 Resource::Table { schema, table } => (
10042 "table".to_string(),
10043 match schema {
10044 Some(s) => format!("{s}.{table}"),
10045 None => table.clone(),
10046 },
10047 ),
10048 Resource::Function { schema, name } => (
10049 "function".to_string(),
10050 match schema {
10051 Some(s) => format!("{s}.{name}"),
10052 None => name.clone(),
10053 },
10054 ),
10055 };
10056
10057 let mut out = crate::auth::policies::ResourceRef::new(kind, name);
10058 if let Some(t) = tenant {
10059 out = out.with_tenant(t.to_string());
10060 }
10061 out
10062}
10063
10064#[derive(Debug)]
10065struct JoinTableSide {
10066 table: String,
10067 alias: String,
10068}
10069
10070fn table_side_context(expr: &QueryExpr) -> Option<JoinTableSide> {
10071 match expr {
10072 QueryExpr::Table(table) => Some(JoinTableSide {
10073 table: table.table.clone(),
10074 alias: table.alias.clone().unwrap_or_else(|| table.table.clone()),
10075 }),
10076 _ => None,
10077 }
10078}
10079
10080fn collect_projection_columns_for_table(
10081 projection: &Projection,
10082 table: &str,
10083 alias: Option<&str>,
10084 out: &mut BTreeSet<String>,
10085) {
10086 match projection {
10087 Projection::Column(column) | Projection::Alias(column, _) => {
10088 match split_qualified_column(column) {
10089 Some((qualifier, column))
10090 if qualifier == table || alias.is_some_and(|alias| qualifier == alias) =>
10091 {
10092 push_policy_column(column, out);
10093 }
10094 Some(_) => {}
10095 None => push_policy_column(column, out),
10096 }
10097 }
10098 Projection::Field(
10099 FieldRef::TableColumn {
10100 table: qualifier,
10101 column,
10102 },
10103 _,
10104 ) => {
10105 if qualifier.is_empty()
10106 || qualifier == table
10107 || alias.is_some_and(|alias| qualifier == alias)
10108 {
10109 push_policy_column(column, out);
10110 }
10111 }
10112 Projection::Field(
10113 FieldRef::NodeProperty {
10114 alias: qualifier,
10115 property,
10116 },
10117 _,
10118 )
10119 | Projection::Field(
10120 FieldRef::EdgeProperty {
10121 alias: qualifier,
10122 property,
10123 },
10124 _,
10125 ) => {
10126 if qualifier == table || alias.is_some_and(|alias| qualifier == alias) {
10127 push_policy_column(property, out);
10128 }
10129 }
10130 Projection::Function(_, args) => {
10131 for arg in args {
10132 collect_projection_columns_for_table(arg, table, alias, out);
10133 }
10134 }
10135 Projection::Expression(_, _) | Projection::All | Projection::Field(_, _) => {}
10136 }
10137}
10138
10139fn collect_projection_columns_for_join_side(
10140 projection: &Projection,
10141 left: Option<&JoinTableSide>,
10142 right: Option<&JoinTableSide>,
10143 out: &mut HashMap<String, BTreeSet<String>>,
10144) -> RedDBResult<()> {
10145 match projection {
10146 Projection::Column(column) | Projection::Alias(column, _) => {
10147 if let Some((qualifier, column)) = split_qualified_column(column) {
10148 push_qualified_join_column(qualifier, column, left, right, out);
10149 } else {
10150 push_unqualified_join_column(column, left, right, out);
10151 }
10152 }
10153 Projection::Field(FieldRef::TableColumn { table, column }, _) => {
10154 if table.is_empty() {
10155 push_unqualified_join_column(column, left, right, out);
10156 } else if let Some(side) = [left, right]
10157 .into_iter()
10158 .flatten()
10159 .find(|side| table == side.table.as_str() || table == side.alias.as_str())
10160 {
10161 push_join_column(&side.table, column, out);
10162 }
10163 }
10164 Projection::Field(FieldRef::NodeProperty { alias, property }, _)
10165 | Projection::Field(FieldRef::EdgeProperty { alias, property }, _) => {
10166 push_qualified_join_column(alias, property, left, right, out);
10167 }
10168 Projection::Function(_, args) => {
10169 for arg in args {
10170 collect_projection_columns_for_join_side(arg, left, right, out)?;
10171 }
10172 }
10173 Projection::Expression(_, _) | Projection::All | Projection::Field(_, _) => {}
10174 }
10175 Ok(())
10176}
10177
10178fn split_qualified_column(column: &str) -> Option<(&str, &str)> {
10179 let (qualifier, column) = column.split_once('.')?;
10180 if qualifier.is_empty() || column.is_empty() || column.contains('.') {
10181 return None;
10182 }
10183 Some((qualifier, column))
10184}
10185
10186fn push_qualified_join_column(
10187 qualifier: &str,
10188 column: &str,
10189 left: Option<&JoinTableSide>,
10190 right: Option<&JoinTableSide>,
10191 out: &mut HashMap<String, BTreeSet<String>>,
10192) {
10193 if let Some(side) = [left, right]
10194 .into_iter()
10195 .flatten()
10196 .find(|side| qualifier == side.table.as_str() || qualifier == side.alias.as_str())
10197 {
10198 push_join_column(&side.table, column, out);
10199 }
10200}
10201
10202fn push_unqualified_join_column(
10203 column: &str,
10204 left: Option<&JoinTableSide>,
10205 right: Option<&JoinTableSide>,
10206 out: &mut HashMap<String, BTreeSet<String>>,
10207) {
10208 for side in [left, right].into_iter().flatten() {
10209 push_join_column(&side.table, column, out);
10210 }
10211}
10212
10213fn push_join_column(table: &str, column: &str, out: &mut HashMap<String, BTreeSet<String>>) {
10214 if is_policy_column_name(column) {
10215 out.entry(table.to_string())
10216 .or_default()
10217 .insert(column.to_string());
10218 }
10219}
10220
10221fn push_policy_column(column: &str, out: &mut BTreeSet<String>) {
10222 if is_policy_column_name(column) {
10223 out.insert(column.to_string());
10224 }
10225}
10226
10227fn is_policy_column_name(column: &str) -> bool {
10228 !column.is_empty()
10229 && column != "*"
10230 && !column.starts_with("LIT:")
10231 && !column.starts_with("TYPE:")
10232}
10233
10234fn runtime_iam_context(
10235 role: crate::auth::Role,
10236 tenant: Option<&str>,
10237) -> crate::auth::policies::EvalContext {
10238 crate::auth::policies::EvalContext {
10239 principal_tenant: tenant.map(|t| t.to_string()),
10240 current_tenant: tenant.map(|t| t.to_string()),
10241 peer_ip: None,
10242 mfa_present: false,
10243 now_ms: crate::auth::now_ms(),
10244 principal_is_admin_role: role == crate::auth::Role::Admin,
10245 }
10246}
10247
10248fn explicit_table_projection_columns(
10249 query: &crate::storage::query::ast::TableQuery,
10250) -> Vec<String> {
10251 use crate::storage::query::ast::{FieldRef, Projection};
10252
10253 let mut columns = Vec::new();
10254 for projection in crate::storage::query::sql_lowering::effective_table_projections(query) {
10255 match projection {
10256 Projection::Column(column) | Projection::Alias(column, _) => {
10257 push_unique(&mut columns, column)
10258 }
10259 Projection::Field(FieldRef::TableColumn { column, .. }, _) => {
10260 push_unique(&mut columns, column)
10261 }
10262 _ => {}
10266 }
10267 }
10268 columns
10269}
10270
10271fn explicit_graph_projection_properties(
10272 query: &crate::storage::query::ast::GraphQuery,
10273) -> Vec<String> {
10274 use crate::storage::query::ast::{FieldRef, Projection};
10275
10276 let mut columns = Vec::new();
10277 for projection in &query.return_ {
10278 match projection {
10279 Projection::Field(FieldRef::NodeProperty { property, .. }, _)
10280 | Projection::Field(FieldRef::EdgeProperty { property, .. }, _) => {
10281 push_unique(&mut columns, property.clone())
10282 }
10283 _ => {}
10284 }
10285 }
10286 columns
10287}
10288
10289fn push_unique(columns: &mut Vec<String>, column: String) {
10290 if !columns.iter().any(|existing| existing == &column) {
10291 columns.push(column);
10292 }
10293}
10294
10295fn principal_label(p: &crate::storage::query::ast::PolicyPrincipalRef) -> String {
10296 use crate::storage::query::ast::PolicyPrincipalRef;
10297 match p {
10298 PolicyPrincipalRef::User(u) => match &u.tenant {
10299 Some(t) => format!("user:{t}/{}", u.username),
10300 None => format!("user:{}", u.username),
10301 },
10302 PolicyPrincipalRef::Group(g) => format!("group:{g}"),
10303 }
10304}
10305
10306pub(crate) fn decision_to_strings(
10309 d: &crate::auth::policies::Decision,
10310) -> (String, Option<String>, Option<String>) {
10311 use crate::auth::policies::Decision;
10312 match d {
10313 Decision::Allow {
10314 matched_policy_id,
10315 matched_sid,
10316 } => (
10317 "allow".into(),
10318 Some(matched_policy_id.clone()),
10319 matched_sid.clone(),
10320 ),
10321 Decision::Deny {
10322 matched_policy_id,
10323 matched_sid,
10324 } => (
10325 "deny".into(),
10326 Some(matched_policy_id.clone()),
10327 matched_sid.clone(),
10328 ),
10329 Decision::DefaultDeny => ("default_deny".into(), None, None),
10330 Decision::AdminBypass => ("admin_bypass".into(), None, None),
10331 }
10332}
10333
10334fn relation_scopes_for_query(query: &QueryExpr) -> Vec<String> {
10335 let mut scopes = Vec::new();
10336 collect_relation_scopes(query, &mut scopes);
10337 scopes.sort();
10338 scopes.dedup();
10339 scopes
10340}
10341
10342fn collect_relation_scopes(query: &QueryExpr, scopes: &mut Vec<String>) {
10343 match query {
10344 QueryExpr::Table(table) => {
10345 if !table.table.is_empty() {
10346 scopes.push(table.table.clone());
10347 }
10348 if let Some(alias) = &table.alias {
10349 scopes.push(alias.clone());
10350 }
10351 }
10352 QueryExpr::Join(join) => {
10353 collect_relation_scopes(&join.left, scopes);
10354 collect_relation_scopes(&join.right, scopes);
10355 }
10356 _ => {}
10357 }
10358}
10359
10360fn query_references_outer_scope(query: &QueryExpr, outer_scopes: &[String]) -> bool {
10361 let inner_scopes = relation_scopes_for_query(query);
10362 query_expr_references_outer_scope(query, outer_scopes, &inner_scopes)
10363}
10364
10365fn query_expr_references_outer_scope(
10366 query: &QueryExpr,
10367 outer_scopes: &[String],
10368 inner_scopes: &[String],
10369) -> bool {
10370 match query {
10371 QueryExpr::Table(table) => {
10372 table.select_items.iter().any(|item| match item {
10373 crate::storage::query::ast::SelectItem::Wildcard => false,
10374 crate::storage::query::ast::SelectItem::Expr { expr, .. } => {
10375 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
10376 }
10377 }) || table
10378 .where_expr
10379 .as_ref()
10380 .is_some_and(|expr| expr_references_outer_scope(expr, outer_scopes, inner_scopes))
10381 || table.filter.as_ref().is_some_and(|filter| {
10382 filter_references_outer_scope(filter, outer_scopes, inner_scopes)
10383 })
10384 || table.having_expr.as_ref().is_some_and(|expr| {
10385 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
10386 })
10387 || table.having.as_ref().is_some_and(|filter| {
10388 filter_references_outer_scope(filter, outer_scopes, inner_scopes)
10389 })
10390 || table
10391 .group_by_exprs
10392 .iter()
10393 .any(|expr| expr_references_outer_scope(expr, outer_scopes, inner_scopes))
10394 || table.order_by.iter().any(|clause| {
10395 clause.expr.as_ref().is_some_and(|expr| {
10396 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
10397 })
10398 })
10399 }
10400 QueryExpr::Join(join) => {
10401 query_expr_references_outer_scope(&join.left, outer_scopes, inner_scopes)
10402 || query_expr_references_outer_scope(&join.right, outer_scopes, inner_scopes)
10403 || join.filter.as_ref().is_some_and(|filter| {
10404 filter_references_outer_scope(filter, outer_scopes, inner_scopes)
10405 })
10406 || join.return_items.iter().any(|item| match item {
10407 crate::storage::query::ast::SelectItem::Wildcard => false,
10408 crate::storage::query::ast::SelectItem::Expr { expr, .. } => {
10409 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
10410 }
10411 })
10412 }
10413 _ => false,
10414 }
10415}
10416
10417fn filter_references_outer_scope(
10418 filter: &crate::storage::query::ast::Filter,
10419 outer_scopes: &[String],
10420 inner_scopes: &[String],
10421) -> bool {
10422 use crate::storage::query::ast::Filter;
10423 match filter {
10424 Filter::Compare { field, .. }
10425 | Filter::IsNull(field)
10426 | Filter::IsNotNull(field)
10427 | Filter::In { field, .. }
10428 | Filter::Between { field, .. }
10429 | Filter::Like { field, .. }
10430 | Filter::StartsWith { field, .. }
10431 | Filter::EndsWith { field, .. }
10432 | Filter::Contains { field, .. } => {
10433 field_ref_references_outer_scope(field, outer_scopes, inner_scopes)
10434 }
10435 Filter::CompareFields { left, right, .. } => {
10436 field_ref_references_outer_scope(left, outer_scopes, inner_scopes)
10437 || field_ref_references_outer_scope(right, outer_scopes, inner_scopes)
10438 }
10439 Filter::CompareExpr { lhs, rhs, .. } => {
10440 expr_references_outer_scope(lhs, outer_scopes, inner_scopes)
10441 || expr_references_outer_scope(rhs, outer_scopes, inner_scopes)
10442 }
10443 Filter::And(left, right) | Filter::Or(left, right) => {
10444 filter_references_outer_scope(left, outer_scopes, inner_scopes)
10445 || filter_references_outer_scope(right, outer_scopes, inner_scopes)
10446 }
10447 Filter::Not(inner) => filter_references_outer_scope(inner, outer_scopes, inner_scopes),
10448 }
10449}
10450
10451fn expr_references_outer_scope(
10452 expr: &crate::storage::query::ast::Expr,
10453 outer_scopes: &[String],
10454 inner_scopes: &[String],
10455) -> bool {
10456 use crate::storage::query::ast::Expr;
10457 match expr {
10458 Expr::Column { field, .. } => {
10459 field_ref_references_outer_scope(field, outer_scopes, inner_scopes)
10460 }
10461 Expr::BinaryOp { lhs, rhs, .. } => {
10462 expr_references_outer_scope(lhs, outer_scopes, inner_scopes)
10463 || expr_references_outer_scope(rhs, outer_scopes, inner_scopes)
10464 }
10465 Expr::UnaryOp { operand, .. }
10466 | Expr::Cast { inner: operand, .. }
10467 | Expr::IsNull { operand, .. } => {
10468 expr_references_outer_scope(operand, outer_scopes, inner_scopes)
10469 }
10470 Expr::FunctionCall { args, .. } => args
10471 .iter()
10472 .any(|arg| expr_references_outer_scope(arg, outer_scopes, inner_scopes)),
10473 Expr::Case {
10474 branches, else_, ..
10475 } => {
10476 branches.iter().any(|(cond, value)| {
10477 expr_references_outer_scope(cond, outer_scopes, inner_scopes)
10478 || expr_references_outer_scope(value, outer_scopes, inner_scopes)
10479 }) || else_
10480 .as_ref()
10481 .is_some_and(|expr| expr_references_outer_scope(expr, outer_scopes, inner_scopes))
10482 }
10483 Expr::InList { target, values, .. } => {
10484 expr_references_outer_scope(target, outer_scopes, inner_scopes)
10485 || values
10486 .iter()
10487 .any(|value| expr_references_outer_scope(value, outer_scopes, inner_scopes))
10488 }
10489 Expr::Between {
10490 target, low, high, ..
10491 } => {
10492 expr_references_outer_scope(target, outer_scopes, inner_scopes)
10493 || expr_references_outer_scope(low, outer_scopes, inner_scopes)
10494 || expr_references_outer_scope(high, outer_scopes, inner_scopes)
10495 }
10496 Expr::Subquery { query, .. } => query_references_outer_scope(&query.query, inner_scopes),
10497 Expr::Literal { .. } | Expr::Parameter { .. } => false,
10498 }
10499}
10500
10501fn field_ref_references_outer_scope(
10502 field: &crate::storage::query::ast::FieldRef,
10503 outer_scopes: &[String],
10504 inner_scopes: &[String],
10505) -> bool {
10506 match field {
10507 crate::storage::query::ast::FieldRef::TableColumn { table, .. } if !table.is_empty() => {
10508 outer_scopes.iter().any(|scope| scope == table)
10509 && !inner_scopes.iter().any(|scope| scope == table)
10510 }
10511 _ => false,
10512 }
10513}
10514
10515fn first_column_values(
10516 result: crate::storage::query::unified::UnifiedResult,
10517) -> RedDBResult<Vec<Value>> {
10518 if result.columns.len() > 1 {
10519 return Err(RedDBError::Query(
10520 "expression subquery must return exactly one column".to_string(),
10521 ));
10522 }
10523 let fallback_column = result
10524 .records
10525 .first()
10526 .and_then(|record| record.column_names().into_iter().next())
10527 .map(|name| name.to_string());
10528 let column = result.columns.first().cloned().or(fallback_column);
10529 let Some(column) = column else {
10530 return Ok(Vec::new());
10531 };
10532 Ok(result
10533 .records
10534 .iter()
10535 .map(|record| record.get(column.as_str()).cloned().unwrap_or(Value::Null))
10536 .collect())
10537}
10538
10539fn parse_timestamp_to_ms(s: &str) -> Option<u128> {
10540 if let Ok(n) = s.parse::<u128>() {
10542 return Some(n);
10543 }
10544 if let Some(date) = s.split_whitespace().next() {
10548 let parts: Vec<&str> = date.split('-').collect();
10549 if parts.len() == 3 {
10550 let (y, m, d) = (parts[0], parts[1], parts[2]);
10551 if let (Ok(y), Ok(m), Ok(d)) = (y.parse::<i64>(), m.parse::<u32>(), d.parse::<u32>()) {
10552 let days_in = days_from_civil(y, m, d);
10556 return Some((days_in as u128) * 86_400_000u128);
10557 }
10558 }
10559 }
10560 None
10561}
10562
10563fn days_from_civil(y: i64, m: u32, d: u32) -> i64 {
10566 let y = if m <= 2 { y - 1 } else { y };
10567 let era = if y >= 0 { y } else { y - 399 } / 400;
10568 let yoe = (y - era * 400) as u64; let doy = (153 * (if m > 2 { m - 3 } else { m + 9 }) as u64 + 2) / 5 + d as u64 - 1;
10570 let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
10571 era * 146097 + doe as i64 - 719468
10572}
10573
10574fn walk_plan_node(
10575 node: &crate::storage::query::planner::CanonicalLogicalNode,
10576 depth: usize,
10577 out: &mut Vec<crate::storage::query::unified::UnifiedRecord>,
10578) {
10579 use std::sync::Arc;
10580 let mut rec = crate::storage::query::unified::UnifiedRecord::default();
10581 rec.set_arc(Arc::from("op"), Value::text(node.operator.clone()));
10582 rec.set_arc(
10583 Arc::from("source"),
10584 node.source.clone().map(Value::text).unwrap_or(Value::Null),
10585 );
10586 rec.set_arc(Arc::from("est_rows"), Value::Float(node.estimated_rows));
10587 rec.set_arc(Arc::from("est_cost"), Value::Float(node.operator_cost));
10588 rec.set_arc(Arc::from("depth"), Value::Integer(depth as i64));
10589 out.push(rec);
10590 for child in &node.children {
10591 walk_plan_node(child, depth + 1, out);
10592 }
10593}