1use super::*;
2use crate::application::entity::metadata_to_json;
3use crate::auth::column_policy_gate::ColumnAccessRequest;
4use crate::auth::UserId;
5use crate::replication::cdc::ChangeRecord;
6use crate::replication::logical::{ApplyMode, LogicalChangeApplier};
7use crate::storage::query::ast::TableSource;
8
9thread_local! {
10 static CURRENT_CONN_ID: std::cell::Cell<u64> = const { std::cell::Cell::new(0) };
14
15 static CURRENT_AUTH_IDENTITY: std::cell::RefCell<Option<(String, crate::auth::Role)>> =
23 const { std::cell::RefCell::new(None) };
24
25 static CURRENT_SNAPSHOT: std::cell::RefCell<Option<SnapshotContext>> =
35 const { std::cell::RefCell::new(None) };
36
37 static HAS_SNAPSHOT: std::cell::Cell<bool> = const { std::cell::Cell::new(false) };
43
44 static CURRENT_TENANT_ID: std::cell::RefCell<Option<String>> =
54 const { std::cell::RefCell::new(None) };
55
56 static CURRENT_CONFIG_RESOLVER: std::cell::RefCell<Option<ConfigResolver>> =
60 const { std::cell::RefCell::new(None) };
61
62 static CURRENT_SECRET_RESOLVER: std::cell::RefCell<Option<SecretResolver>> =
66 const { std::cell::RefCell::new(None) };
67}
68
69fn secret_sql_value_to_string(value: &Value) -> RedDBResult<String> {
70 match value {
71 Value::Text(s) => Ok(s.to_string()),
72 Value::Integer(n) => Ok(n.to_string()),
73 Value::UnsignedInteger(n) => Ok(n.to_string()),
74 Value::Float(n) => Ok(n.to_string()),
75 Value::Boolean(b) => Ok(b.to_string()),
76 Value::Null => Err(RedDBError::Query(
77 "SET SECRET key = NULL deletes the secret; use DELETE SECRET for explicit deletes"
78 .to_string(),
79 )),
80 Value::Password(_) | Value::Secret(_) => Err(RedDBError::Query(
81 "SET SECRET accepts plain scalar literals; PASSWORD() and SECRET() are for typed columns"
82 .to_string(),
83 )),
84 _ => Err(RedDBError::Query(format!(
85 "SET SECRET does not support value type {:?} yet",
86 value.data_type()
87 ))),
88 }
89}
90
91fn system_keyed_collection_contract(
92 name: &str,
93 model: crate::catalog::CollectionModel,
94) -> crate::physical::CollectionContract {
95 let now = crate::utils::now_unix_millis() as u128;
96 crate::physical::CollectionContract {
97 name: name.to_string(),
98 declared_model: model,
99 schema_mode: crate::catalog::SchemaMode::Dynamic,
100 origin: crate::physical::ContractOrigin::Implicit,
101 version: 1,
102 created_at_unix_ms: now,
103 updated_at_unix_ms: now,
104 default_ttl_ms: None,
105 vector_dimension: None,
106 vector_metric: None,
107 context_index_fields: Vec::new(),
108 declared_columns: Vec::new(),
109 table_def: None,
110 timestamps_enabled: false,
111 context_index_enabled: false,
112 metrics_raw_retention_ms: None,
113 metrics_rollup_policies: Vec::new(),
114 metrics_tenant_identity: None,
115 metrics_namespace: None,
116 append_only: false,
117 subscriptions: Vec::new(),
118 session_key: None,
119 session_gap_ms: None,
120 retention_duration_ms: None,
121 }
122}
123
124#[derive(Clone)]
139pub struct SnapshotContext {
140 pub snapshot: crate::storage::transaction::snapshot::Snapshot,
141 pub manager: Arc<crate::storage::transaction::snapshot::SnapshotManager>,
142 pub own_xids: std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
143 pub requires_index_fallback: bool,
144}
145
146pub fn set_current_connection_id(id: u64) {
155 CURRENT_CONN_ID.with(|c| c.set(id));
156}
157
158pub fn clear_current_connection_id() {
160 CURRENT_CONN_ID.with(|c| c.set(0));
161}
162
163pub fn current_connection_id() -> u64 {
166 CURRENT_CONN_ID.with(|c| c.get())
167}
168
169pub fn set_current_auth_identity(username: String, role: crate::auth::Role) {
173 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = Some((username, role)));
174}
175
176pub fn clear_current_auth_identity() {
180 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = None);
181}
182
183pub(crate) fn current_auth_identity() -> Option<(String, crate::auth::Role)> {
186 CURRENT_AUTH_IDENTITY.with(|cell| cell.borrow().clone())
187}
188
189pub fn set_current_tenant(tenant_id: String) {
194 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = Some(tenant_id));
195}
196
197pub fn clear_current_tenant() {
200 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = None);
201}
202
203pub fn current_tenant() -> Option<String> {
214 let inherited = CURRENT_TENANT_ID.with(|cell| cell.borrow().clone());
215 if let Some(over) = current_scope_override() {
216 if over.tenant.is_active() {
217 return over.tenant.resolve(inherited);
218 }
219 }
220 if let Some(tx_local) = current_tx_local_tenant() {
221 return tx_local;
222 }
223 inherited
224}
225
226thread_local! {
227 static TX_LOCAL_TENANT: std::cell::RefCell<Option<Option<String>>> =
236 const { std::cell::RefCell::new(None) };
237}
238
239fn current_tx_local_tenant() -> Option<Option<String>> {
240 TX_LOCAL_TENANT.with(|cell| cell.borrow().clone())
241}
242
243fn parse_set_local_tenant(query: &str) -> RedDBResult<Option<Option<String>>> {
249 let mut tokens = query.split_ascii_whitespace();
250 let Some(w1) = tokens.next() else {
251 return Ok(None);
252 };
253 if !w1.eq_ignore_ascii_case("SET") {
254 return Ok(None);
255 }
256 let Some(w2) = tokens.next() else {
257 return Ok(None);
258 };
259 if !w2.eq_ignore_ascii_case("LOCAL") {
260 return Ok(None);
261 }
262 let Some(w3) = tokens.next() else {
263 return Ok(None);
264 };
265 if !w3.eq_ignore_ascii_case("TENANT") {
266 return Ok(None);
267 }
268 let rest: String = tokens.collect::<Vec<_>>().join(" ");
269 let rest = rest.trim().trim_end_matches(';').trim();
270 let value_str = rest.strip_prefix('=').map(|s| s.trim()).unwrap_or(rest);
271 if value_str.is_empty() {
272 return Err(RedDBError::Query(
273 "SET LOCAL TENANT expects a string literal or NULL".to_string(),
274 ));
275 }
276 if value_str.eq_ignore_ascii_case("NULL") {
277 return Ok(Some(None));
278 }
279 if value_str.starts_with('\'') && value_str.ends_with('\'') && value_str.len() >= 2 {
280 let inner = &value_str[1..value_str.len() - 1];
281 return Ok(Some(Some(inner.to_string())));
282 }
283 Err(RedDBError::Query(format!(
284 "SET LOCAL TENANT expects a string literal or NULL, got `{value_str}`"
285 )))
286}
287
288pub(crate) struct TxLocalTenantGuard;
289
290impl TxLocalTenantGuard {
291 pub fn install(value: Option<Option<String>>) -> Self {
292 TX_LOCAL_TENANT.with(|cell| *cell.borrow_mut() = value);
293 Self
294 }
295}
296
297impl Drop for TxLocalTenantGuard {
298 fn drop(&mut self) {
299 TX_LOCAL_TENANT.with(|cell| *cell.borrow_mut() = None);
300 }
301}
302
303thread_local! {
304 static SCOPE_OVERRIDES: std::cell::RefCell<Vec<crate::runtime::within_clause::ScopeOverride>> =
311 const { std::cell::RefCell::new(Vec::new()) };
312}
313
314pub(crate) fn push_scope_override(over: crate::runtime::within_clause::ScopeOverride) {
315 SCOPE_OVERRIDES.with(|cell| cell.borrow_mut().push(over));
316}
317
318pub(crate) fn pop_scope_override() {
319 SCOPE_OVERRIDES.with(|cell| {
320 cell.borrow_mut().pop();
321 });
322}
323
324pub(crate) fn current_scope_override() -> Option<crate::runtime::within_clause::ScopeOverride> {
325 SCOPE_OVERRIDES.with(|cell| cell.borrow().last().cloned())
326}
327
328pub(crate) fn has_scope_override_active() -> bool {
332 SCOPE_OVERRIDES.with(|cell| !cell.borrow().is_empty())
333}
334
335pub(crate) struct ScopeOverrideGuard;
339
340impl ScopeOverrideGuard {
341 pub fn install(over: crate::runtime::within_clause::ScopeOverride) -> Self {
342 push_scope_override(over);
343 Self
344 }
345}
346
347impl Drop for ScopeOverrideGuard {
348 fn drop(&mut self) {
349 pop_scope_override();
350 }
351}
352
353pub(crate) fn current_user_projected() -> Option<String> {
359 let inherited = current_auth_identity().map(|(u, _)| u);
360 if let Some(over) = current_scope_override() {
361 if over.user.is_active() {
362 return over.user.resolve(inherited);
363 }
364 }
365 inherited
366}
367
368pub(crate) fn current_role_projected() -> Option<String> {
369 let inherited = current_auth_identity().map(|(_, r)| format!("{r:?}").to_lowercase());
370 if let Some(over) = current_scope_override() {
371 if over.role.is_active() {
372 return over.role.resolve(inherited);
373 }
374 }
375 inherited
376}
377
378pub(crate) fn current_secret_value(path: &str) -> Option<String> {
379 let key = path.to_ascii_lowercase();
380 CURRENT_SECRET_RESOLVER.with(|cell| {
381 let mut resolver = cell.borrow_mut();
382 let resolver = resolver.as_mut()?;
383 if resolver.values.is_none() {
384 resolver.values = resolver
385 .store
386 .as_ref()
387 .map(|store| store.vault_kv_snapshot());
388 }
389 let values = resolver.values.as_ref()?;
390 values.get(&key).cloned().or_else(|| {
391 key.strip_prefix("red.vault/").and_then(|rest| {
392 values
393 .get(rest)
394 .cloned()
395 .or_else(|| values.get(&format!("red.secret.{rest}")).cloned())
396 })
397 })
398 })
399}
400
401struct SecretResolver {
402 store: Option<Arc<crate::auth::store::AuthStore>>,
403 values: Option<HashMap<String, String>>,
404}
405
406pub(super) struct SecretStoreGuard {
407 previous: Option<SecretResolver>,
408}
409
410impl SecretStoreGuard {
411 pub(super) fn install(store: Option<Arc<crate::auth::store::AuthStore>>) -> Self {
412 let previous = CURRENT_SECRET_RESOLVER.with(|cell| {
413 cell.replace(Some(SecretResolver {
414 store,
415 values: None,
416 }))
417 });
418 Self { previous }
419 }
420}
421
422impl Drop for SecretStoreGuard {
423 fn drop(&mut self) {
424 let previous = self.previous.take();
425 CURRENT_SECRET_RESOLVER.with(|cell| {
426 cell.replace(previous);
427 });
428 }
429}
430
431pub(crate) fn current_config_value(path: &str) -> Option<Value> {
432 let key = path.to_ascii_lowercase();
433 CURRENT_CONFIG_RESOLVER.with(|cell| {
434 let mut resolver = cell.borrow_mut();
435 let resolver = resolver.as_mut()?;
436 if resolver.values.is_none() {
437 resolver.values = Some(latest_config_snapshot(&resolver.db));
438 }
439 let values = resolver.values.as_ref()?;
440 values.get(&key).cloned().or_else(|| {
441 key.strip_prefix("red.config/")
442 .and_then(|rest| values.get(&format!("red.config.{rest}")).cloned())
443 })
444 })
445}
446
447fn update_current_config_value(path: &str, value: Value) {
448 let key = path.to_ascii_lowercase();
449 CURRENT_CONFIG_RESOLVER.with(|cell| {
450 if let Some(resolver) = cell.borrow_mut().as_mut() {
451 if let Some(values) = resolver.values.as_mut() {
452 values.insert(key, value);
453 }
454 }
455 });
456}
457
458fn update_current_secret_value(path: &str, value: Option<String>) {
459 let key = path.to_ascii_lowercase();
460 CURRENT_SECRET_RESOLVER.with(|cell| {
461 if let Some(resolver) = cell.borrow_mut().as_mut() {
462 let Some(values) = resolver.values.as_mut() else {
463 return;
464 };
465 match value {
466 Some(value) => {
467 values.insert(key, value);
468 }
469 None => {
470 values.remove(&key);
471 }
472 }
473 }
474 });
475}
476
477fn latest_config_snapshot(db: &RedDB) -> HashMap<String, Value> {
478 let mut latest: HashMap<String, (u64, Value)> = HashMap::new();
479
480 if let Some(manager) = db.store().get_collection("red_config") {
481 manager.for_each_entity(|entity| {
482 let Some(row) = entity.data.as_row() else {
483 return true;
484 };
485 let Some(Value::Text(key)) = row.get_field("key") else {
486 return true;
487 };
488 let value = row.get_field("value").cloned().unwrap_or(Value::Null);
489 let id = entity.id.raw();
490 let key = key.to_ascii_lowercase();
491 insert_latest_config_value(&mut latest, key.clone(), id, value.clone());
492 if let Some(rest) = key.strip_prefix("red.config.") {
493 insert_latest_config_value(&mut latest, format!("red.config/{rest}"), id, value);
494 }
495 true
496 });
497 }
498
499 if let Some(manager) = db.store().get_collection("red.config") {
500 manager.for_each_entity(|entity| {
501 let Some(row) = entity.data.as_row() else {
502 return true;
503 };
504 if matches!(row.get_field("tombstone"), Some(Value::Boolean(true))) {
505 return true;
506 }
507 let Some(Value::Text(key)) = row.get_field("key") else {
508 return true;
509 };
510 let value = row.get_field("value").cloned().unwrap_or(Value::Null);
511 insert_latest_config_value(
512 &mut latest,
513 format!("red.config/{}", key.to_ascii_lowercase()),
514 entity.id.raw(),
515 value,
516 );
517 true
518 });
519 }
520
521 latest
522 .into_iter()
523 .map(|(key, (_, value))| (key, value))
524 .collect()
525}
526
527fn insert_latest_config_value(
528 latest: &mut HashMap<String, (u64, Value)>,
529 key: String,
530 id: u64,
531 value: Value,
532) {
533 match latest.get(&key) {
534 Some((prev_id, _)) if *prev_id > id => {}
535 _ => {
536 latest.insert(key, (id, value));
537 }
538 }
539}
540
541struct ConfigResolver {
542 db: Arc<RedDB>,
543 values: Option<HashMap<String, Value>>,
544}
545
546pub(super) struct ConfigSnapshotGuard {
547 previous: Option<ConfigResolver>,
548}
549
550impl ConfigSnapshotGuard {
551 pub(super) fn install(db: Arc<RedDB>) -> Self {
552 let previous = CURRENT_CONFIG_RESOLVER
553 .with(|cell| cell.replace(Some(ConfigResolver { db, values: None })));
554 Self { previous }
555 }
556}
557
558impl Drop for ConfigSnapshotGuard {
559 fn drop(&mut self) {
560 let previous = self.previous.take();
561 CURRENT_CONFIG_RESOLVER.with(|cell| {
562 cell.replace(previous);
563 });
564 }
565}
566
567pub fn set_current_snapshot(ctx: SnapshotContext) {
572 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = Some(ctx));
573 HAS_SNAPSHOT.with(|c| c.set(true));
574}
575
576pub fn clear_current_snapshot() {
577 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = None);
578 HAS_SNAPSHOT.with(|c| c.set(false));
579}
580
581pub(crate) struct CurrentSnapshotGuard {
587 previous: Option<SnapshotContext>,
588}
589
590impl CurrentSnapshotGuard {
591 pub(crate) fn install(ctx: SnapshotContext) -> Self {
592 let previous = CURRENT_SNAPSHOT.with(|cell| cell.borrow().clone());
593 set_current_snapshot(ctx);
594 Self { previous }
595 }
596}
597
598impl Drop for CurrentSnapshotGuard {
599 fn drop(&mut self) {
600 let prev = self.previous.take();
601 let has = prev.is_some();
602 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = prev);
603 HAS_SNAPSHOT.with(|c| c.set(has));
604 }
605}
606
607#[inline]
618pub fn entity_visible_under_current_snapshot(
619 entity: &crate::storage::unified::entity::UnifiedEntity,
620) -> bool {
621 if !HAS_SNAPSHOT.with(|c| c.get()) {
627 return entity.xmax == 0;
628 }
629 CURRENT_SNAPSHOT.with(|cell| {
630 let guard = cell.borrow();
631 let Some(ctx) = guard.as_ref() else {
632 return true;
633 };
634 visibility_check(ctx, entity.xmin, entity.xmax)
635 })
636}
637
638#[inline]
643pub(crate) fn xids_visible_under_current_snapshot(xmin: u64, xmax: u64) -> bool {
644 if !HAS_SNAPSHOT.with(|c| c.get()) {
645 return true;
646 }
647 CURRENT_SNAPSHOT.with(|cell| {
648 let guard = cell.borrow();
649 let Some(ctx) = guard.as_ref() else {
650 return true;
651 };
652 visibility_check(ctx, xmin, xmax)
653 })
654}
655
656pub fn capture_current_snapshot() -> Option<SnapshotContext> {
663 CURRENT_SNAPSHOT.with(|cell| cell.borrow().clone())
664}
665
666pub(crate) fn current_snapshot_requires_index_fallback() -> bool {
671 if !HAS_SNAPSHOT.with(|c| c.get()) {
672 return false;
673 }
674 CURRENT_SNAPSHOT.with(|cell| {
675 cell.borrow()
676 .as_ref()
677 .is_some_and(|ctx| ctx.requires_index_fallback)
678 })
679}
680
681#[derive(Clone, Default)]
696pub struct SnapshotBundle {
697 pub snapshot: Option<SnapshotContext>,
698 pub auth: Option<(String, crate::auth::Role)>,
699 pub tenant: Option<String>,
700}
701
702pub fn snapshot_bundle() -> SnapshotBundle {
705 SnapshotBundle {
706 snapshot: capture_current_snapshot(),
707 auth: current_auth_identity(),
708 tenant: CURRENT_TENANT_ID.with(|cell| cell.borrow().clone()),
709 }
710}
711
712pub fn with_snapshot_bundle<R>(bundle: &SnapshotBundle, f: impl FnOnce() -> R) -> R {
717 struct Guard {
718 prev_snapshot: Option<SnapshotContext>,
719 prev_auth: Option<(String, crate::auth::Role)>,
720 prev_tenant: Option<String>,
721 }
722 impl Drop for Guard {
723 fn drop(&mut self) {
724 let snap = self.prev_snapshot.take();
725 let has = snap.is_some();
726 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = snap);
727 HAS_SNAPSHOT.with(|c| c.set(has));
728 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = self.prev_auth.take());
729 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = self.prev_tenant.take());
730 }
731 }
732
733 let _guard = {
734 let prev_snapshot = CURRENT_SNAPSHOT.with(|cell| cell.borrow().clone());
735 let prev_auth = CURRENT_AUTH_IDENTITY.with(|cell| cell.borrow().clone());
736 let prev_tenant = CURRENT_TENANT_ID.with(|cell| cell.borrow().clone());
737
738 match bundle.snapshot.clone() {
739 Some(ctx) => set_current_snapshot(ctx),
740 None => clear_current_snapshot(),
741 }
742 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = bundle.auth.clone());
743 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = bundle.tenant.clone());
744
745 Guard {
746 prev_snapshot,
747 prev_auth,
748 prev_tenant,
749 }
750 };
751 f()
752}
753
754#[inline]
758pub fn entity_visible_with_context(
759 ctx: Option<&SnapshotContext>,
760 entity: &crate::storage::unified::entity::UnifiedEntity,
761) -> bool {
762 match ctx {
763 Some(ctx) => visibility_check(ctx, entity.xmin, entity.xmax),
764 None => true,
765 }
766}
767
768fn table_row_index_fields(
769 entity: &crate::storage::unified::entity::UnifiedEntity,
770) -> Vec<(String, crate::storage::schema::Value)> {
771 let crate::storage::EntityData::Row(row) = &entity.data else {
772 return Vec::new();
773 };
774 if let Some(named) = &row.named {
775 return named
776 .iter()
777 .map(|(name, value)| (name.clone(), value.clone()))
778 .collect();
779 }
780 if let Some(schema) = &row.schema {
781 return schema
782 .iter()
783 .zip(row.columns.iter())
784 .map(|(name, value)| (name.clone(), value.clone()))
785 .collect();
786 }
787 Vec::new()
788}
789
790#[inline]
791fn visibility_check(ctx: &SnapshotContext, xmin: u64, xmax: u64) -> bool {
792 if xmin != 0 && ctx.manager.is_aborted(xmin) {
796 return false;
797 }
798 let effective_xmax = if xmax != 0 && ctx.manager.is_aborted(xmax) {
800 0
801 } else {
802 xmax
803 };
804 let own_xmin = xmin != 0 && ctx.own_xids.contains(&xmin);
808 let own_xmax = effective_xmax != 0 && ctx.own_xids.contains(&effective_xmax);
809 if own_xmax {
810 return false;
812 }
813 if own_xmin {
814 return true;
815 }
816 ctx.snapshot.sees(xmin, effective_xmax)
817}
818
819fn runtime_pool_lock(runtime: &RedDBRuntime) -> std::sync::MutexGuard<'_, PoolState> {
820 runtime
821 .inner
822 .pool
823 .lock()
824 .unwrap_or_else(|poisoned| poisoned.into_inner())
825}
826
827fn cache_scope_insert(scopes: &mut HashSet<String>, name: &str) {
828 if name.is_empty() || name.starts_with("__subq_") || is_universal_query_source(name) {
829 return;
830 }
831 scopes.insert(name.to_string());
832}
833
834fn collect_table_source_scopes(scopes: &mut HashSet<String>, query: &TableQuery) {
835 match query.source.as_ref() {
836 Some(crate::storage::query::ast::TableSource::Name(name)) => {
837 cache_scope_insert(scopes, name)
838 }
839 Some(crate::storage::query::ast::TableSource::Subquery(subquery)) => {
840 collect_query_expr_result_cache_scopes(scopes, subquery);
841 }
842 None => cache_scope_insert(scopes, &query.table),
843 }
844}
845
846fn collect_vector_source_scopes(
847 scopes: &mut HashSet<String>,
848 source: &crate::storage::query::ast::VectorSource,
849) {
850 match source {
851 crate::storage::query::ast::VectorSource::Reference { collection, .. } => {
852 cache_scope_insert(scopes, collection);
853 }
854 crate::storage::query::ast::VectorSource::Subquery(subquery) => {
855 collect_query_expr_result_cache_scopes(scopes, subquery);
856 }
857 crate::storage::query::ast::VectorSource::Literal(_)
858 | crate::storage::query::ast::VectorSource::Text(_) => {}
859 }
860}
861
862fn collect_path_selector_scopes(
863 scopes: &mut HashSet<String>,
864 selector: &crate::storage::query::ast::NodeSelector,
865) {
866 if let crate::storage::query::ast::NodeSelector::ByRow { table, .. } = selector {
867 cache_scope_insert(scopes, table);
868 }
869}
870
871fn collect_query_expr_result_cache_scopes(scopes: &mut HashSet<String>, expr: &QueryExpr) {
872 match expr {
873 QueryExpr::Table(query) => collect_table_source_scopes(scopes, query),
874 QueryExpr::Join(query) => {
875 collect_query_expr_result_cache_scopes(scopes, &query.left);
876 collect_query_expr_result_cache_scopes(scopes, &query.right);
877 }
878 QueryExpr::Path(query) => {
879 collect_path_selector_scopes(scopes, &query.from);
880 collect_path_selector_scopes(scopes, &query.to);
881 }
882 QueryExpr::Vector(query) => {
883 cache_scope_insert(scopes, &query.collection);
884 collect_vector_source_scopes(scopes, &query.query_vector);
885 }
886 QueryExpr::Hybrid(query) => {
887 collect_query_expr_result_cache_scopes(scopes, &query.structured);
888 cache_scope_insert(scopes, &query.vector.collection);
889 collect_vector_source_scopes(scopes, &query.vector.query_vector);
890 }
891 QueryExpr::Insert(query) => cache_scope_insert(scopes, &query.table),
892 QueryExpr::Update(query) => cache_scope_insert(scopes, &query.table),
893 QueryExpr::Delete(query) => cache_scope_insert(scopes, &query.table),
894 QueryExpr::CreateTable(query) => cache_scope_insert(scopes, &query.name),
895 QueryExpr::CreateCollection(query) => cache_scope_insert(scopes, &query.name),
896 QueryExpr::CreateVector(query) => cache_scope_insert(scopes, &query.name),
897 QueryExpr::DropTable(query) => cache_scope_insert(scopes, &query.name),
898 QueryExpr::DropGraph(query) => cache_scope_insert(scopes, &query.name),
899 QueryExpr::DropVector(query) => cache_scope_insert(scopes, &query.name),
900 QueryExpr::DropDocument(query) => cache_scope_insert(scopes, &query.name),
901 QueryExpr::DropKv(query) => cache_scope_insert(scopes, &query.name),
902 QueryExpr::DropCollection(query) => cache_scope_insert(scopes, &query.name),
903 QueryExpr::Truncate(query) => cache_scope_insert(scopes, &query.name),
904 QueryExpr::AlterTable(query) => cache_scope_insert(scopes, &query.name),
905 QueryExpr::CreateIndex(query) => cache_scope_insert(scopes, &query.table),
906 QueryExpr::DropIndex(query) => cache_scope_insert(scopes, &query.table),
907 QueryExpr::CreateTimeSeries(query) => cache_scope_insert(scopes, &query.name),
908 QueryExpr::DropTimeSeries(query) => cache_scope_insert(scopes, &query.name),
909 QueryExpr::CreateQueue(query) => cache_scope_insert(scopes, &query.name),
910 QueryExpr::AlterQueue(query) => cache_scope_insert(scopes, &query.name),
911 QueryExpr::DropQueue(query) => cache_scope_insert(scopes, &query.name),
912 QueryExpr::QueueSelect(query) => cache_scope_insert(scopes, &query.queue),
913 QueryExpr::QueueCommand(query) => match query {
914 QueueCommand::Push { queue, .. }
915 | QueueCommand::Pop { queue, .. }
916 | QueueCommand::Peek { queue, .. }
917 | QueueCommand::Len { queue }
918 | QueueCommand::Purge { queue }
919 | QueueCommand::GroupCreate { queue, .. }
920 | QueueCommand::GroupRead { queue, .. }
921 | QueueCommand::Pending { queue, .. }
922 | QueueCommand::Claim { queue, .. }
923 | QueueCommand::Ack { queue, .. }
924 | QueueCommand::Nack { queue, .. } => cache_scope_insert(scopes, queue),
925 QueueCommand::Move {
926 source,
927 destination,
928 ..
929 } => {
930 cache_scope_insert(scopes, source);
931 cache_scope_insert(scopes, destination);
932 }
933 },
934 QueryExpr::EventsBackfill(query) => {
935 cache_scope_insert(scopes, &query.collection);
936 cache_scope_insert(scopes, &query.target_queue);
937 }
938 QueryExpr::CreateTree(query) => cache_scope_insert(scopes, &query.collection),
939 QueryExpr::DropTree(query) => cache_scope_insert(scopes, &query.collection),
940 QueryExpr::TreeCommand(query) => match query {
941 TreeCommand::Insert { collection, .. }
942 | TreeCommand::Move { collection, .. }
943 | TreeCommand::Delete { collection, .. }
944 | TreeCommand::Validate { collection, .. }
945 | TreeCommand::Rebalance { collection, .. } => cache_scope_insert(scopes, collection),
946 },
947 QueryExpr::SearchCommand(query) => match query {
948 SearchCommand::Similar { collection, .. }
949 | SearchCommand::Hybrid { collection, .. }
950 | SearchCommand::SpatialRadius { collection, .. }
951 | SearchCommand::SpatialBbox { collection, .. }
952 | SearchCommand::SpatialNearest { collection, .. } => {
953 cache_scope_insert(scopes, collection);
954 }
955 SearchCommand::Text { collection, .. }
956 | SearchCommand::Multimodal { collection, .. }
957 | SearchCommand::Index { collection, .. }
958 | SearchCommand::Context { collection, .. } => {
959 if let Some(collection) = collection.as_deref() {
960 cache_scope_insert(scopes, collection);
961 }
962 }
963 },
964 QueryExpr::Ask(query) => {
965 if let Some(collection) = query.collection.as_deref() {
966 cache_scope_insert(scopes, collection);
967 }
968 }
969 QueryExpr::ExplainAlter(query) => cache_scope_insert(scopes, &query.target.name),
970 QueryExpr::MaintenanceCommand(cmd) => match cmd {
971 crate::storage::query::ast::MaintenanceCommand::Vacuum { target, .. }
972 | crate::storage::query::ast::MaintenanceCommand::Analyze { target } => {
973 if let Some(t) = target {
974 cache_scope_insert(scopes, t);
975 }
976 }
977 },
978 QueryExpr::CopyFrom(cmd) => cache_scope_insert(scopes, &cmd.table),
979 QueryExpr::CreateView(cmd) => {
980 cache_scope_insert(scopes, &cmd.name);
981 collect_query_expr_result_cache_scopes(scopes, &cmd.query);
983 }
984 QueryExpr::DropView(cmd) => cache_scope_insert(scopes, &cmd.name),
985 QueryExpr::RefreshMaterializedView(cmd) => cache_scope_insert(scopes, &cmd.name),
986 QueryExpr::CreatePolicy(cmd) => cache_scope_insert(scopes, &cmd.table),
987 QueryExpr::DropPolicy(cmd) => cache_scope_insert(scopes, &cmd.table),
988 QueryExpr::CreateServer(_) | QueryExpr::DropServer(_) => {}
989 QueryExpr::CreateForeignTable(cmd) => cache_scope_insert(scopes, &cmd.name),
990 QueryExpr::DropForeignTable(cmd) => cache_scope_insert(scopes, &cmd.name),
991 QueryExpr::Graph(_)
992 | QueryExpr::GraphCommand(_)
993 | QueryExpr::ProbabilisticCommand(_)
994 | QueryExpr::SetConfig { .. }
995 | QueryExpr::ShowConfig { .. }
996 | QueryExpr::SetSecret { .. }
997 | QueryExpr::DeleteSecret { .. }
998 | QueryExpr::ShowSecrets { .. }
999 | QueryExpr::SetTenant(_)
1000 | QueryExpr::ShowTenant
1001 | QueryExpr::TransactionControl(_)
1002 | QueryExpr::CreateSchema(_)
1003 | QueryExpr::DropSchema(_)
1004 | QueryExpr::CreateSequence(_)
1005 | QueryExpr::DropSequence(_)
1006 | QueryExpr::Grant(_)
1007 | QueryExpr::Revoke(_)
1008 | QueryExpr::AlterUser(_)
1009 | QueryExpr::CreateIamPolicy { .. }
1010 | QueryExpr::DropIamPolicy { .. }
1011 | QueryExpr::AttachPolicy { .. }
1012 | QueryExpr::DetachPolicy { .. }
1013 | QueryExpr::ShowPolicies { .. }
1014 | QueryExpr::ShowEffectivePermissions { .. }
1015 | QueryExpr::SimulatePolicy { .. }
1016 | QueryExpr::CreateMigration(_)
1017 | QueryExpr::ApplyMigration(_)
1018 | QueryExpr::RollbackMigration(_)
1019 | QueryExpr::ExplainMigration(_)
1020 | QueryExpr::EventsBackfillStatus { .. } => {}
1021 QueryExpr::KvCommand(cmd) => {
1022 use crate::storage::query::ast::KvCommand;
1023 match cmd {
1024 KvCommand::Put { collection, .. }
1025 | KvCommand::InvalidateTags { collection, .. }
1026 | KvCommand::Get { collection, .. }
1027 | KvCommand::Unseal { collection, .. }
1028 | KvCommand::Rotate { collection, .. }
1029 | KvCommand::History { collection, .. }
1030 | KvCommand::List { collection, .. }
1031 | KvCommand::Purge { collection, .. }
1032 | KvCommand::Watch { collection, .. }
1033 | KvCommand::Delete { collection, .. }
1034 | KvCommand::Incr { collection, .. }
1035 | KvCommand::Cas { collection, .. } => cache_scope_insert(scopes, collection),
1036 }
1037 }
1038 QueryExpr::ConfigCommand(cmd) => {
1039 use crate::storage::query::ast::ConfigCommand;
1040 match cmd {
1041 ConfigCommand::Put { collection, .. }
1042 | ConfigCommand::Get { collection, .. }
1043 | ConfigCommand::Resolve { collection, .. }
1044 | ConfigCommand::Rotate { collection, .. }
1045 | ConfigCommand::Delete { collection, .. }
1046 | ConfigCommand::History { collection, .. }
1047 | ConfigCommand::List { collection, .. }
1048 | ConfigCommand::Watch { collection, .. }
1049 | ConfigCommand::InvalidVolatileOperation { collection, .. } => {
1050 cache_scope_insert(scopes, collection)
1051 }
1052 }
1053 }
1054 }
1055}
1056
1057pub(crate) fn rls_policy_filter(
1065 runtime: &RedDBRuntime,
1066 table: &str,
1067 action: crate::storage::query::ast::PolicyAction,
1068) -> Option<crate::storage::query::ast::Filter> {
1069 rls_policy_filter_for_kind(
1070 runtime,
1071 table,
1072 action,
1073 crate::storage::query::ast::PolicyTargetKind::Table,
1074 )
1075}
1076
1077pub(crate) fn rls_policy_filter_for_kind(
1083 runtime: &RedDBRuntime,
1084 table: &str,
1085 action: crate::storage::query::ast::PolicyAction,
1086 kind: crate::storage::query::ast::PolicyTargetKind,
1087) -> Option<crate::storage::query::ast::Filter> {
1088 use crate::storage::query::ast::Filter;
1089
1090 if !runtime.inner.rls_enabled_tables.read().contains(table) {
1091 return None;
1092 }
1093 let role = current_auth_identity().map(|(_, role)| role);
1094 let role_str = role.map(|r| r.as_str().to_string());
1095 let policies = runtime.matching_rls_policies_for_kind(table, role_str.as_deref(), action, kind);
1096 if policies.is_empty() {
1097 return None;
1098 }
1099 policies
1100 .into_iter()
1101 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1102}
1103
1104pub(crate) fn rls_is_enabled(runtime: &RedDBRuntime, table: &str) -> bool {
1108 runtime.inner.rls_enabled_tables.read().contains(table)
1109}
1110
1111fn node_passes_rls(
1118 runtime: &RedDBRuntime,
1119 collection: &str,
1120 role: Option<&str>,
1121 cache: &mut std::collections::HashMap<String, Option<crate::storage::query::ast::Filter>>,
1122 entity: &crate::storage::unified::entity::UnifiedEntity,
1123) -> bool {
1124 use crate::storage::query::ast::{Filter, PolicyAction, PolicyTargetKind};
1125
1126 if !runtime.inner.rls_enabled_tables.read().contains(collection) {
1127 return true;
1128 }
1129 let filter = cache.entry(collection.to_string()).or_insert_with(|| {
1130 let policies = runtime.matching_rls_policies_for_kind(
1131 collection,
1132 role,
1133 PolicyAction::Select,
1134 PolicyTargetKind::Nodes,
1135 );
1136 if policies.is_empty() {
1137 None
1138 } else {
1139 policies
1140 .into_iter()
1141 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1142 }
1143 });
1144 let Some(filter) = filter else {
1145 return false;
1146 };
1147 crate::runtime::query_exec::evaluate_entity_filter_with_db(
1148 Some(&runtime.inner.db),
1149 entity,
1150 filter,
1151 collection,
1152 collection,
1153 )
1154}
1155
1156fn edge_passes_rls(
1159 runtime: &RedDBRuntime,
1160 collection: &str,
1161 role: Option<&str>,
1162 cache: &mut std::collections::HashMap<String, Option<crate::storage::query::ast::Filter>>,
1163 entity: &crate::storage::unified::entity::UnifiedEntity,
1164) -> bool {
1165 use crate::storage::query::ast::{Filter, PolicyAction, PolicyTargetKind};
1166
1167 if !runtime.inner.rls_enabled_tables.read().contains(collection) {
1168 return true;
1169 }
1170 let filter = cache.entry(collection.to_string()).or_insert_with(|| {
1171 let policies = runtime.matching_rls_policies_for_kind(
1172 collection,
1173 role,
1174 PolicyAction::Select,
1175 PolicyTargetKind::Edges,
1176 );
1177 if policies.is_empty() {
1178 None
1179 } else {
1180 policies
1181 .into_iter()
1182 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1183 }
1184 });
1185 let Some(filter) = filter else {
1186 return false;
1187 };
1188 crate::runtime::query_exec::evaluate_entity_filter_with_db(
1189 Some(&runtime.inner.db),
1190 entity,
1191 filter,
1192 collection,
1193 collection,
1194 )
1195}
1196
1197fn inject_rls_filters(
1218 runtime: &RedDBRuntime,
1219 frame: &dyn super::statement_frame::ReadFrame,
1220 mut table: crate::storage::query::ast::TableQuery,
1221) -> Option<crate::storage::query::ast::TableQuery> {
1222 use crate::storage::query::ast::{Filter, PolicyAction};
1223
1224 let role = frame.identity().map(|(_, role)| role);
1226 let role_str = role.map(|r| r.as_str().to_string());
1227 let policies =
1228 runtime.matching_rls_policies(&table.table, role_str.as_deref(), PolicyAction::Select);
1229
1230 if policies.is_empty() {
1231 return None;
1234 }
1235
1236 let combined = policies
1238 .into_iter()
1239 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1240 .expect("policies non-empty");
1241
1242 table.filter = Some(match table.filter.take() {
1244 Some(existing) => Filter::And(Box::new(existing), Box::new(combined)),
1245 None => combined,
1246 });
1247 Some(table)
1248}
1249
1250fn inject_rls_into_join(
1260 runtime: &RedDBRuntime,
1261 frame: &dyn super::statement_frame::ReadFrame,
1262 mut join: crate::storage::query::ast::JoinQuery,
1263) -> Option<crate::storage::query::ast::JoinQuery> {
1264 use crate::storage::query::ast::Filter;
1265
1266 let mut policy_filters: Vec<Filter> = Vec::new();
1267 if !collect_join_side_policy(runtime, frame, join.left.as_ref(), &mut policy_filters) {
1268 return None;
1269 }
1270 if !collect_join_side_policy(runtime, frame, join.right.as_ref(), &mut policy_filters) {
1271 return None;
1272 }
1273
1274 if policy_filters.is_empty() {
1275 return Some(join);
1276 }
1277
1278 let combined = policy_filters
1279 .into_iter()
1280 .reduce(|acc, f| Filter::And(Box::new(acc), Box::new(f)))
1281 .expect("policy_filters non-empty");
1282
1283 join.filter = Some(match join.filter.take() {
1284 Some(existing) => Filter::And(Box::new(existing), Box::new(combined)),
1285 None => combined,
1286 });
1287
1288 Some(join)
1289}
1290
1291fn collect_join_side_policy(
1296 runtime: &RedDBRuntime,
1297 frame: &dyn super::statement_frame::ReadFrame,
1298 expr: &crate::storage::query::ast::QueryExpr,
1299 out: &mut Vec<crate::storage::query::ast::Filter>,
1300) -> bool {
1301 use crate::storage::query::ast::{Filter, PolicyAction, QueryExpr};
1302 match expr {
1303 QueryExpr::Table(t) => {
1304 if !runtime.inner.rls_enabled_tables.read().contains(&t.table) {
1305 return true;
1306 }
1307 let role = frame.identity().map(|(_, role)| role);
1308 let role_str = role.map(|r| r.as_str().to_string());
1309 let policies =
1310 runtime.matching_rls_policies(&t.table, role_str.as_deref(), PolicyAction::Select);
1311 if policies.is_empty() {
1312 return false;
1313 }
1314 let combined = policies
1315 .into_iter()
1316 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1317 .expect("policies non-empty");
1318 out.push(combined);
1319 true
1320 }
1321 QueryExpr::Join(inner) => {
1322 collect_join_side_policy(runtime, frame, inner.left.as_ref(), out)
1323 && collect_join_side_policy(runtime, frame, inner.right.as_ref(), out)
1324 }
1325 _ => true,
1326 }
1327}
1328
1329fn apply_foreign_table_filters(
1340 records: Vec<crate::storage::query::unified::UnifiedRecord>,
1341 query: &crate::storage::query::ast::TableQuery,
1342) -> crate::storage::query::unified::UnifiedResult {
1343 use crate::storage::query::sql_lowering::{
1344 effective_table_filter, effective_table_projections,
1345 };
1346 use crate::storage::query::unified::UnifiedResult;
1347
1348 let filter = effective_table_filter(query);
1349 let projections = effective_table_projections(query);
1350
1351 let mut filtered: Vec<_> = records
1354 .into_iter()
1355 .filter(|record| match &filter {
1356 Some(f) => {
1357 super::join_filter::evaluate_runtime_filter_with_db(None, record, f, None, None)
1358 }
1359 None => true,
1360 })
1361 .collect();
1362
1363 if let Some(offset) = query.offset {
1365 let offset = offset as usize;
1366 if offset >= filtered.len() {
1367 filtered.clear();
1368 } else {
1369 filtered.drain(0..offset);
1370 }
1371 }
1372 if let Some(limit) = query.limit {
1373 filtered.truncate(limit as usize);
1374 }
1375
1376 let columns: Vec<String> = if projections.is_empty() {
1379 filtered
1380 .first()
1381 .map(|r| r.column_names().iter().map(|k| k.to_string()).collect())
1382 .unwrap_or_default()
1383 } else {
1384 projections
1385 .iter()
1386 .map(super::join_filter::projection_name)
1387 .collect()
1388 };
1389
1390 let mut result = UnifiedResult::empty();
1391 result.columns = columns;
1392 result.records = filtered;
1393 result
1394}
1395
1396pub(crate) fn collect_table_refs(expr: &QueryExpr) -> Vec<String> {
1403 let mut scopes: HashSet<String> = HashSet::new();
1404 collect_query_expr_result_cache_scopes(&mut scopes, expr);
1405 scopes.into_iter().collect()
1406}
1407
1408fn query_expr_result_cache_scopes(expr: &QueryExpr) -> HashSet<String> {
1409 let mut scopes = HashSet::new();
1410 collect_query_expr_result_cache_scopes(&mut scopes, expr);
1411 scopes
1412}
1413
1414const RESULT_CACHE_BACKEND_KEY: &str = "runtime.result_cache.backend";
1415const RESULT_CACHE_DEFAULT_BACKEND: &str = "legacy";
1416const RESULT_CACHE_BLOB_NAMESPACE: &str = "runtime.result_cache";
1417const RESULT_CACHE_TTL_SECS: u64 = 30;
1418const RESULT_CACHE_MAX_ENTRIES: usize = 1000;
1419const RESULT_CACHE_PAYLOAD_MAGIC: &[u8; 8] = b"RDRC0001";
1420
1421#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1422enum RuntimeResultCacheBackend {
1423 Legacy,
1424 BlobCache,
1425 Shadow,
1426}
1427
1428fn trim_result_cache(
1429 map: &mut HashMap<String, RuntimeResultCacheEntry>,
1430 order: &mut std::collections::VecDeque<String>,
1431) {
1432 while map.len() > RESULT_CACHE_MAX_ENTRIES {
1433 if let Some(oldest) = order.pop_front() {
1434 map.remove(&oldest);
1435 } else {
1436 break;
1437 }
1438 }
1439}
1440
1441fn result_cache_fingerprint(result: &RuntimeQueryResult) -> String {
1442 format!(
1443 "{:?}|{}|{}|{}|{}|{:?}",
1444 result.result,
1445 result.query,
1446 result.statement,
1447 result.engine,
1448 result.affected_rows,
1449 result.statement_type
1450 )
1451}
1452
1453fn mode_to_byte(mode: crate::storage::query::modes::QueryMode) -> u8 {
1454 match mode {
1455 crate::storage::query::modes::QueryMode::Sql => 0,
1456 crate::storage::query::modes::QueryMode::Gremlin => 1,
1457 crate::storage::query::modes::QueryMode::Cypher => 2,
1458 crate::storage::query::modes::QueryMode::Sparql => 3,
1459 crate::storage::query::modes::QueryMode::Path => 4,
1460 crate::storage::query::modes::QueryMode::Natural => 5,
1461 crate::storage::query::modes::QueryMode::Unknown => 255,
1462 }
1463}
1464
1465fn mode_from_byte(byte: u8) -> Option<crate::storage::query::modes::QueryMode> {
1466 match byte {
1467 0 => Some(crate::storage::query::modes::QueryMode::Sql),
1468 1 => Some(crate::storage::query::modes::QueryMode::Gremlin),
1469 2 => Some(crate::storage::query::modes::QueryMode::Cypher),
1470 3 => Some(crate::storage::query::modes::QueryMode::Sparql),
1471 4 => Some(crate::storage::query::modes::QueryMode::Path),
1472 5 => Some(crate::storage::query::modes::QueryMode::Natural),
1473 255 => Some(crate::storage::query::modes::QueryMode::Unknown),
1474 _ => None,
1475 }
1476}
1477
1478fn result_cache_static_str(value: &str) -> Option<&'static str> {
1479 match value {
1480 "select" => Some("select"),
1481 "materialized-graph" => Some("materialized-graph"),
1482 "runtime-red-schema" => Some("runtime-red-schema"),
1483 "runtime-fdw" => Some("runtime-fdw"),
1484 "runtime-table-rls" => Some("runtime-table-rls"),
1485 "runtime-table" => Some("runtime-table"),
1486 "runtime-join-rls" => Some("runtime-join-rls"),
1487 "runtime-join" => Some("runtime-join"),
1488 "runtime-vector" => Some("runtime-vector"),
1489 "runtime-hybrid" => Some("runtime-hybrid"),
1490 "runtime-secret" => Some("runtime-secret"),
1491 "runtime-config" => Some("runtime-config"),
1492 "runtime-tenant" => Some("runtime-tenant"),
1493 "runtime-explain" => Some("runtime-explain"),
1494 "runtime-tree" => Some("runtime-tree"),
1495 "runtime-kv" => Some("runtime-kv"),
1496 "runtime-queue" => Some("runtime-queue"),
1497 _ => None,
1498 }
1499}
1500
1501fn write_u32(out: &mut Vec<u8>, value: usize) -> Option<()> {
1502 let value = u32::try_from(value).ok()?;
1503 out.extend_from_slice(&value.to_le_bytes());
1504 Some(())
1505}
1506
1507fn write_string(out: &mut Vec<u8>, value: &str) -> Option<()> {
1508 write_u32(out, value.len())?;
1509 out.extend_from_slice(value.as_bytes());
1510 Some(())
1511}
1512
1513fn write_bytes(out: &mut Vec<u8>, value: &[u8]) -> Option<()> {
1514 write_u32(out, value.len())?;
1515 out.extend_from_slice(value);
1516 Some(())
1517}
1518
1519fn read_u8(input: &mut &[u8]) -> Option<u8> {
1520 let (&value, rest) = input.split_first()?;
1521 *input = rest;
1522 Some(value)
1523}
1524
1525fn read_u32(input: &mut &[u8]) -> Option<usize> {
1526 if input.len() < 4 {
1527 return None;
1528 }
1529 let value = u32::from_le_bytes(input[..4].try_into().ok()?) as usize;
1530 *input = &input[4..];
1531 Some(value)
1532}
1533
1534fn read_u64(input: &mut &[u8]) -> Option<u64> {
1535 if input.len() < 8 {
1536 return None;
1537 }
1538 let value = u64::from_le_bytes(input[..8].try_into().ok()?);
1539 *input = &input[8..];
1540 Some(value)
1541}
1542
1543fn read_string(input: &mut &[u8]) -> Option<String> {
1544 let len = read_u32(input)?;
1545 if input.len() < len {
1546 return None;
1547 }
1548 let value = String::from_utf8(input[..len].to_vec()).ok()?;
1549 *input = &input[len..];
1550 Some(value)
1551}
1552
1553fn read_bytes<'a>(input: &mut &'a [u8]) -> Option<&'a [u8]> {
1554 let len = read_u32(input)?;
1555 if input.len() < len {
1556 return None;
1557 }
1558 let value = &input[..len];
1559 *input = &input[len..];
1560 Some(value)
1561}
1562
1563fn encode_result_cache_payload(entry: &RuntimeResultCacheEntry) -> Option<Vec<u8>> {
1564 let result = &entry.result;
1565 if result.result.pre_serialized_json.is_some()
1566 || result_cache_static_str(result.statement).is_none()
1567 || result_cache_static_str(result.engine).is_none()
1568 || result_cache_static_str(result.statement_type).is_none()
1569 || result.result.records.iter().any(|record| {
1570 !record.nodes.is_empty()
1571 || !record.edges.is_empty()
1572 || !record.paths.is_empty()
1573 || !record.vector_results.is_empty()
1574 })
1575 {
1576 return None;
1577 }
1578
1579 let mut out = Vec::new();
1580 out.extend_from_slice(RESULT_CACHE_PAYLOAD_MAGIC);
1581 write_string(&mut out, &result.query)?;
1582 out.push(mode_to_byte(result.mode));
1583 write_string(&mut out, result.statement)?;
1584 write_string(&mut out, result.engine)?;
1585 out.extend_from_slice(&result.affected_rows.to_le_bytes());
1586 write_string(&mut out, result.statement_type)?;
1587
1588 write_u32(&mut out, result.result.columns.len())?;
1589 for column in &result.result.columns {
1590 write_string(&mut out, column)?;
1591 }
1592 out.extend_from_slice(&result.result.stats.nodes_scanned.to_le_bytes());
1593 out.extend_from_slice(&result.result.stats.edges_scanned.to_le_bytes());
1594 out.extend_from_slice(&result.result.stats.rows_scanned.to_le_bytes());
1595 out.extend_from_slice(&result.result.stats.exec_time_us.to_le_bytes());
1596
1597 write_u32(&mut out, result.result.records.len())?;
1598 for record in &result.result.records {
1599 let fields = record.iter_fields().collect::<Vec<_>>();
1600 write_u32(&mut out, fields.len())?;
1601 for (name, value) in fields {
1602 write_string(&mut out, name)?;
1603 let mut encoded = Vec::new();
1604 crate::storage::schema::value_codec::encode(value, &mut encoded);
1605 write_bytes(&mut out, &encoded)?;
1606 }
1607 }
1608
1609 write_u32(&mut out, entry.scopes.len())?;
1610 for scope in &entry.scopes {
1611 write_string(&mut out, scope)?;
1612 }
1613 Some(out)
1614}
1615
1616fn decode_result_cache_payload(mut input: &[u8]) -> Option<(RuntimeQueryResult, HashSet<String>)> {
1617 if input.len() < RESULT_CACHE_PAYLOAD_MAGIC.len()
1618 || &input[..RESULT_CACHE_PAYLOAD_MAGIC.len()] != RESULT_CACHE_PAYLOAD_MAGIC
1619 {
1620 return None;
1621 }
1622 input = &input[RESULT_CACHE_PAYLOAD_MAGIC.len()..];
1623
1624 let query = read_string(&mut input)?;
1625 let mode = mode_from_byte(read_u8(&mut input)?)?;
1626 let statement = result_cache_static_str(&read_string(&mut input)?)?;
1627 let engine = result_cache_static_str(&read_string(&mut input)?)?;
1628 let affected_rows = read_u64(&mut input)?;
1629 let statement_type = result_cache_static_str(&read_string(&mut input)?)?;
1630
1631 let mut columns = Vec::new();
1632 for _ in 0..read_u32(&mut input)? {
1633 columns.push(read_string(&mut input)?);
1634 }
1635 let stats = crate::storage::query::unified::QueryStats {
1636 nodes_scanned: read_u64(&mut input)?,
1637 edges_scanned: read_u64(&mut input)?,
1638 rows_scanned: read_u64(&mut input)?,
1639 exec_time_us: read_u64(&mut input)?,
1640 };
1641
1642 let mut records = Vec::new();
1643 for _ in 0..read_u32(&mut input)? {
1644 let mut record = crate::storage::query::unified::UnifiedRecord::new();
1645 for _ in 0..read_u32(&mut input)? {
1646 let name = read_string(&mut input)?;
1647 let bytes = read_bytes(&mut input)?;
1648 let (value, used) = crate::storage::schema::value_codec::decode(bytes).ok()?;
1649 if used != bytes.len() {
1650 return None;
1651 }
1652 record.set_owned(name, value);
1653 }
1654 records.push(record);
1655 }
1656
1657 let mut scopes = HashSet::new();
1658 for _ in 0..read_u32(&mut input)? {
1659 scopes.insert(read_string(&mut input)?);
1660 }
1661 if !input.is_empty() {
1662 return None;
1663 }
1664
1665 Some((
1666 RuntimeQueryResult {
1667 query,
1668 mode,
1669 statement,
1670 engine,
1671 result: crate::storage::query::unified::UnifiedResult {
1672 columns,
1673 records,
1674 stats,
1675 pre_serialized_json: None,
1676 },
1677 affected_rows,
1678 statement_type,
1679 },
1680 scopes,
1681 ))
1682}
1683
1684fn strip_explain_prefix(sql: &str) -> Option<&str> {
1698 let trimmed = sql.trim_start();
1699 let (head, rest) = trimmed.split_at(
1700 trimmed
1701 .find(|c: char| c.is_whitespace())
1702 .unwrap_or(trimmed.len()),
1703 );
1704 if !head.eq_ignore_ascii_case("EXPLAIN") {
1705 return None;
1706 }
1707 let rest = rest.trim_start();
1708 if rest.is_empty() {
1709 return None;
1710 }
1711 let next_head_end = rest.find(|c: char| c.is_whitespace()).unwrap_or(rest.len());
1715 if rest[..next_head_end].eq_ignore_ascii_case("ALTER")
1716 || rest[..next_head_end].eq_ignore_ascii_case("ASK")
1717 {
1718 return None;
1719 }
1720 Some(rest)
1721}
1722
1723pub(super) fn has_with_prefix(sql: &str) -> bool {
1728 let trimmed = sql.trim_start();
1729 let head_end = trimmed
1730 .find(|c: char| c.is_whitespace() || c == '(')
1731 .unwrap_or(trimmed.len());
1732 trimmed[..head_end].eq_ignore_ascii_case("WITH")
1733}
1734
1735fn peek_top_level_as_of(sql: &str) -> Option<crate::application::vcs::AsOfSpec> {
1743 peek_top_level_as_of_with_table(sql).map(|(spec, _)| spec)
1744}
1745
1746pub(super) fn peek_top_level_as_of_with_table(
1751 sql: &str,
1752) -> Option<(crate::application::vcs::AsOfSpec, Option<String>)> {
1753 if !sql
1754 .as_bytes()
1755 .windows(5)
1756 .any(|w| w.eq_ignore_ascii_case(b"as of"))
1757 {
1758 return None;
1759 }
1760 let parsed = crate::storage::query::parser::parse(sql).ok()?;
1761 let crate::storage::query::ast::QueryExpr::Table(table) = parsed.query else {
1762 return None;
1763 };
1764 let clause = table.as_of?;
1765 let table_name = if table.table.is_empty() || table.table == "any" {
1766 None
1767 } else {
1768 Some(table.table.clone())
1769 };
1770 let spec = match clause {
1771 crate::storage::query::ast::AsOfClause::Commit(h) => {
1772 crate::application::vcs::AsOfSpec::Commit(h)
1773 }
1774 crate::storage::query::ast::AsOfClause::Branch(b) => {
1775 crate::application::vcs::AsOfSpec::Branch(b)
1776 }
1777 crate::storage::query::ast::AsOfClause::Tag(t) => crate::application::vcs::AsOfSpec::Tag(t),
1778 crate::storage::query::ast::AsOfClause::TimestampMs(ts) => {
1779 crate::application::vcs::AsOfSpec::TimestampMs(ts)
1780 }
1781 crate::storage::query::ast::AsOfClause::Snapshot(x) => {
1782 crate::application::vcs::AsOfSpec::Snapshot(x)
1783 }
1784 };
1785 Some((spec, table_name))
1786}
1787
1788pub(super) fn query_has_volatile_builtin(sql: &str) -> bool {
1789 const VOLATILE_TOKENS: &[&str] = &[
1793 "pg_advisory_lock",
1794 "pg_try_advisory_lock",
1795 "pg_advisory_unlock",
1796 "random()",
1797 ];
1802 let lowered = sql.to_ascii_lowercase();
1803 VOLATILE_TOKENS.iter().any(|t| lowered.contains(t))
1804}
1805
1806pub(super) fn query_is_ask_statement(sql: &str) -> bool {
1807 let trimmed = sql.trim_start();
1808 let head_end = trimmed
1809 .find(|c: char| c.is_whitespace() || c == '(' || c == ';')
1810 .unwrap_or(trimmed.len());
1811 trimmed[..head_end].eq_ignore_ascii_case("ASK")
1812}
1813
1814pub(super) fn intent_lock_modes_for(
1824 expr: &QueryExpr,
1825) -> Option<(
1826 crate::storage::transaction::lock::LockMode,
1827 crate::storage::transaction::lock::LockMode,
1828)> {
1829 use crate::storage::transaction::lock::LockMode::{Exclusive, IntentExclusive, IntentShared};
1830
1831 match expr {
1832 QueryExpr::Table(_)
1834 | QueryExpr::Join(_)
1835 | QueryExpr::Vector(_)
1836 | QueryExpr::Hybrid(_)
1837 | QueryExpr::Graph(_)
1838 | QueryExpr::Path(_)
1839 | QueryExpr::Ask(_)
1840 | QueryExpr::SearchCommand(_)
1841 | QueryExpr::GraphCommand(_)
1842 | QueryExpr::QueueSelect(_) => Some((IntentShared, IntentShared)),
1843
1844 QueryExpr::Insert(_)
1852 | QueryExpr::Update(_)
1853 | QueryExpr::Delete(_)
1854 | QueryExpr::QueueCommand(QueueCommand::Move { .. }) => {
1855 Some((IntentExclusive, IntentExclusive))
1856 }
1857 QueryExpr::QueueCommand(_) => Some((IntentShared, IntentShared)),
1858
1859 QueryExpr::CreateTable(_)
1863 | QueryExpr::CreateCollection(_)
1864 | QueryExpr::CreateVector(_)
1865 | QueryExpr::DropTable(_)
1866 | QueryExpr::DropGraph(_)
1867 | QueryExpr::DropVector(_)
1868 | QueryExpr::DropDocument(_)
1869 | QueryExpr::DropKv(_)
1870 | QueryExpr::DropCollection(_)
1871 | QueryExpr::Truncate(_)
1872 | QueryExpr::AlterTable(_)
1873 | QueryExpr::CreateIndex(_)
1874 | QueryExpr::DropIndex(_)
1875 | QueryExpr::CreateTimeSeries(_)
1876 | QueryExpr::DropTimeSeries(_)
1877 | QueryExpr::CreateQueue(_)
1878 | QueryExpr::AlterQueue(_)
1879 | QueryExpr::DropQueue(_)
1880 | QueryExpr::CreateTree(_)
1881 | QueryExpr::DropTree(_)
1882 | QueryExpr::CreatePolicy(_)
1883 | QueryExpr::DropPolicy(_)
1884 | QueryExpr::CreateView(_)
1885 | QueryExpr::DropView(_)
1886 | QueryExpr::RefreshMaterializedView(_)
1887 | QueryExpr::CreateSchema(_)
1888 | QueryExpr::DropSchema(_)
1889 | QueryExpr::CreateSequence(_)
1890 | QueryExpr::DropSequence(_)
1891 | QueryExpr::CreateServer(_)
1892 | QueryExpr::DropServer(_)
1893 | QueryExpr::CreateForeignTable(_)
1894 | QueryExpr::DropForeignTable(_) => Some((IntentExclusive, Exclusive)),
1895
1896 _ => None,
1902 }
1903}
1904
1905pub(super) fn collections_referenced(expr: &QueryExpr) -> Vec<String> {
1910 let mut out = Vec::new();
1911 walk_collections(expr, &mut out);
1912 out.sort();
1913 out.dedup();
1914 out
1915}
1916
1917fn walk_collections(expr: &QueryExpr, out: &mut Vec<String>) {
1918 match expr {
1919 QueryExpr::Table(t) => out.push(t.table.clone()),
1920 QueryExpr::Join(j) => {
1921 walk_collections(&j.left, out);
1922 walk_collections(&j.right, out);
1923 }
1924 QueryExpr::Insert(i) => out.push(i.table.clone()),
1925 QueryExpr::Update(u) => out.push(u.table.clone()),
1926 QueryExpr::Delete(d) => out.push(d.table.clone()),
1927 QueryExpr::QueueSelect(q) => out.push(q.queue.clone()),
1928
1929 QueryExpr::CreateTable(q) => out.push(q.name.clone()),
1934 QueryExpr::CreateCollection(q) => out.push(q.name.clone()),
1935 QueryExpr::CreateVector(q) => out.push(q.name.clone()),
1936 QueryExpr::DropTable(q) => out.push(q.name.clone()),
1937 QueryExpr::DropGraph(q) => out.push(q.name.clone()),
1938 QueryExpr::DropVector(q) => out.push(q.name.clone()),
1939 QueryExpr::DropDocument(q) => out.push(q.name.clone()),
1940 QueryExpr::DropKv(q) => out.push(q.name.clone()),
1941 QueryExpr::DropCollection(q) => out.push(q.name.clone()),
1942 QueryExpr::Truncate(q) => out.push(q.name.clone()),
1943 QueryExpr::AlterTable(q) => out.push(q.name.clone()),
1944 QueryExpr::CreateIndex(q) => out.push(q.table.clone()),
1945 QueryExpr::DropIndex(q) => out.push(q.table.clone()),
1946 QueryExpr::CreateTimeSeries(q) => out.push(q.name.clone()),
1947 QueryExpr::DropTimeSeries(q) => out.push(q.name.clone()),
1948 QueryExpr::CreateQueue(q) => out.push(q.name.clone()),
1949 QueryExpr::AlterQueue(q) => out.push(q.name.clone()),
1950 QueryExpr::DropQueue(q) => out.push(q.name.clone()),
1951 QueryExpr::QueueCommand(QueueCommand::Move {
1952 source,
1953 destination,
1954 ..
1955 }) => {
1956 out.push(source.clone());
1957 out.push(destination.clone());
1958 }
1959 QueryExpr::CreatePolicy(q) => out.push(q.table.clone()),
1960 QueryExpr::CreateView(q) => out.push(q.name.clone()),
1961 QueryExpr::DropView(q) => out.push(q.name.clone()),
1962 QueryExpr::RefreshMaterializedView(q) => out.push(q.name.clone()),
1963
1964 _ => {}
1970 }
1971}
1972
1973impl RedDBRuntime {
1974 pub fn in_memory() -> RedDBResult<Self> {
1975 Self::with_options(RedDBOptions::in_memory())
1976 }
1977
1978 pub fn lock_manager(&self) -> std::sync::Arc<crate::storage::transaction::lock::LockManager> {
1982 self.inner.lock_manager.clone()
1983 }
1984
1985 #[inline(never)]
1986 pub fn with_options(options: RedDBOptions) -> RedDBResult<Self> {
1987 Self::with_pool(options, ConnectionPoolConfig::default())
1988 }
1989
1990 pub fn with_pool(
1991 options: RedDBOptions,
1992 pool_config: ConnectionPoolConfig,
1993 ) -> RedDBResult<Self> {
1994 let boot_open_start_ms = std::time::SystemTime::now()
2002 .duration_since(std::time::UNIX_EPOCH)
2003 .map(|d| d.as_millis() as u64)
2004 .unwrap_or(0);
2005 let db = Arc::new(
2006 RedDB::open_with_options(&options)
2007 .map_err(|err| RedDBError::Internal(err.to_string()))?,
2008 );
2009 let result_blob_cache = crate::storage::cache::BlobCache::open_with_l2(
2010 crate::storage::cache::BlobCacheConfig::default().with_l2_path(
2011 options
2012 .resolved_path("data.rdb")
2013 .with_extension("result-cache.l2"),
2014 ),
2015 )
2016 .map_err(|err| {
2017 RedDBError::Internal(format!("open result Blob Cache L2 failed: {err:?}"))
2018 })?;
2019 let storage_ready_ms = std::time::SystemTime::now()
2020 .duration_since(std::time::UNIX_EPOCH)
2021 .map(|d| d.as_millis() as u64)
2022 .unwrap_or(0);
2023
2024 let runtime = Self {
2025 inner: Arc::new(RuntimeInner {
2026 db,
2027 layout: PhysicalLayout::from_options(&options),
2028 indices: IndexCatalog::register_default_vector_graph(
2029 options.has_capability(crate::api::Capability::Table),
2030 options.has_capability(crate::api::Capability::Graph),
2031 ),
2032 pool_config,
2033 pool: Mutex::new(PoolState::default()),
2034 started_at_unix_ms: SystemTime::now()
2035 .duration_since(UNIX_EPOCH)
2036 .unwrap_or_default()
2037 .as_millis(),
2038 probabilistic: super::probabilistic_store::ProbabilisticStore::new(),
2039 index_store: super::index_store::IndexStore::new(),
2040 cdc: crate::replication::cdc::CdcBuffer::new(100_000),
2041 backup_scheduler: crate::replication::scheduler::BackupScheduler::new(3600),
2042 query_cache: parking_lot::RwLock::new(
2043 crate::storage::query::planner::cache::PlanCache::new(1000),
2044 ),
2045 result_cache: parking_lot::RwLock::new((
2046 HashMap::new(),
2047 std::collections::VecDeque::new(),
2048 )),
2049 result_blob_cache,
2050 result_blob_entries: parking_lot::RwLock::new((
2051 HashMap::new(),
2052 std::collections::VecDeque::new(),
2053 )),
2054 ask_answer_cache_entries: parking_lot::RwLock::new((
2055 HashSet::new(),
2056 std::collections::VecDeque::new(),
2057 )),
2058 result_cache_shadow_divergences: std::sync::atomic::AtomicU64::new(0),
2059 ask_daily_spend: parking_lot::RwLock::new(HashMap::new()),
2060 queue_message_locks: parking_lot::RwLock::new(HashMap::new()),
2061 rmw_locks: RmwLockTable::new(),
2062 planner_dirty_tables: parking_lot::RwLock::new(HashSet::new()),
2063 ec_registry: Arc::new(crate::ec::config::EcRegistry::new()),
2064 ec_worker: crate::ec::worker::EcWorker::new(),
2065 auth_store: parking_lot::RwLock::new(None),
2066 oauth_validator: parking_lot::RwLock::new(None),
2067 views: parking_lot::RwLock::new(HashMap::new()),
2068 materialized_views: parking_lot::RwLock::new(
2069 crate::storage::cache::result::MaterializedViewCache::new(),
2070 ),
2071 retention_sweeper: parking_lot::RwLock::new(
2072 crate::runtime::retention_sweeper::RetentionSweeperState::new(),
2073 ),
2074 snapshot_manager: Arc::new(
2075 crate::storage::transaction::snapshot::SnapshotManager::new(),
2076 ),
2077 tx_contexts: parking_lot::RwLock::new(HashMap::new()),
2078 tx_local_tenants: parking_lot::RwLock::new(HashMap::new()),
2079 env_config_overrides: crate::runtime::config_overlay::collect_env_overrides(),
2080 lock_manager: Arc::new({
2081 let env = crate::runtime::config_overlay::collect_env_overrides();
2086 let timeout_ms = env
2087 .get("concurrency.locking.deadlock_timeout_ms")
2088 .and_then(|raw| raw.parse::<u64>().ok())
2089 .unwrap_or_else(|| {
2090 match crate::runtime::config_matrix::default_for(
2091 "concurrency.locking.deadlock_timeout_ms",
2092 ) {
2093 Some(crate::serde_json::Value::Number(n)) => n as u64,
2094 _ => 5000,
2095 }
2096 });
2097 let cfg = crate::storage::transaction::lock::LockConfig {
2098 default_timeout: std::time::Duration::from_millis(timeout_ms),
2099 ..Default::default()
2100 };
2101 crate::storage::transaction::lock::LockManager::new(cfg)
2102 }),
2103 rls_policies: parking_lot::RwLock::new(HashMap::new()),
2104 rls_enabled_tables: parking_lot::RwLock::new(HashSet::new()),
2105 foreign_tables: Arc::new(crate::storage::fdw::ForeignTableRegistry::with_builtins()),
2106 pending_tombstones: parking_lot::RwLock::new(HashMap::new()),
2107 pending_versioned_updates: parking_lot::RwLock::new(HashMap::new()),
2108 pending_kv_watch_events: parking_lot::RwLock::new(HashMap::new()),
2109 pending_store_wal_actions: parking_lot::RwLock::new(HashMap::new()),
2110 tenant_tables: parking_lot::RwLock::new(HashMap::new()),
2111 ddl_epoch: std::sync::atomic::AtomicU64::new(0),
2112 write_gate: Arc::new(crate::runtime::write_gate::WriteGate::from_options(
2113 &options,
2114 )),
2115 lifecycle: crate::runtime::lifecycle::Lifecycle::new(),
2116 resource_limits: crate::runtime::resource_limits::ResourceLimits::from_env(),
2117 audit_log: {
2118 let data_path = options
2128 .data_path
2129 .clone()
2130 .unwrap_or_else(|| std::env::temp_dir().join("reddb"));
2131 let (audit_dest, _) =
2132 crate::api::tier_wiring::current_log_destinations();
2133 Arc::new(crate::runtime::audit_log::AuditLogger::for_destination(
2134 &audit_dest,
2135 &data_path,
2136 ))
2137 },
2138 lease_lifecycle: std::sync::OnceLock::new(),
2139 replica_apply_metrics: crate::replication::logical::ReplicaApplyMetrics::default(),
2140 quota_bucket: crate::runtime::quota_bucket::QuotaBucket::from_env(),
2141 schema_vocabulary: parking_lot::RwLock::new(
2142 crate::runtime::schema_vocabulary::SchemaVocabulary::new(),
2143 ),
2144 slow_query_logger: {
2145 let fallback_dir = options
2158 .data_path
2159 .as_ref()
2160 .and_then(|p| p.parent().map(std::path::PathBuf::from))
2161 .unwrap_or_else(|| std::env::temp_dir().join("reddb"));
2162 let threshold_ms = std::env::var("RED_SLOW_QUERY_THRESHOLD_MS")
2163 .ok()
2164 .and_then(|s| s.parse::<u64>().ok())
2165 .unwrap_or(1000);
2166 let sample_pct = std::env::var("RED_SLOW_QUERY_SAMPLE_PCT")
2167 .ok()
2168 .and_then(|s| s.parse::<u8>().ok())
2169 .unwrap_or(100);
2170 let (_, slow_dest) =
2171 crate::api::tier_wiring::current_log_destinations();
2172 crate::telemetry::slow_query_logger::SlowQueryLogger::for_destination(
2173 &slow_dest,
2174 &fallback_dir,
2175 threshold_ms,
2176 sample_pct,
2177 )
2178 },
2179 kv_stats: crate::runtime::KvStatsCounters::default(),
2180 metrics_ingest_stats: crate::runtime::MetricsIngestCounters::default(),
2181 metrics_tenant_activity_stats:
2182 crate::runtime::MetricsTenantActivityCounters::default(),
2183 queue_telemetry: Arc::new(
2184 crate::runtime::queue_telemetry::QueueTelemetryCounters::default(),
2185 ),
2186 kv_tag_index: crate::runtime::KvTagIndex::default(),
2187 chain_tip_cache: parking_lot::Mutex::new(HashMap::new()),
2188 chain_integrity_broken: parking_lot::Mutex::new(HashMap::new()),
2189 }),
2190 };
2191
2192 crate::telemetry::operator_event::install_global_audit_sink(Arc::clone(
2198 &runtime.inner.audit_log,
2199 ));
2200
2201 runtime
2209 .inner
2210 .lifecycle
2211 .set_restore_started_at_ms(boot_open_start_ms);
2212 runtime
2213 .inner
2214 .lifecycle
2215 .set_restore_ready_at_ms(storage_ready_ms);
2216 runtime
2217 .inner
2218 .lifecycle
2219 .set_wal_replay_started_at_ms(boot_open_start_ms);
2220 runtime
2221 .inner
2222 .lifecycle
2223 .set_wal_replay_ready_at_ms(storage_ready_ms);
2224
2225 let restored_cdc_lsn = runtime
2226 .inner
2227 .db
2228 .replication
2229 .as_ref()
2230 .map(|repl| {
2231 repl.logical_wal_spool
2232 .as_ref()
2233 .map(|spool| spool.current_lsn())
2234 .unwrap_or(0)
2235 })
2236 .unwrap_or(0)
2237 .max(runtime.config_u64("red.config.timeline.last_archived_lsn", 0));
2238 runtime.inner.cdc.set_current_lsn(restored_cdc_lsn);
2239 runtime.rehydrate_snapshot_xid_floor();
2240 runtime.bootstrap_system_keyed_collections()?;
2241 runtime.rehydrate_declared_column_schemas();
2242 runtime.load_probabilistic_state()?;
2243
2244 runtime.rehydrate_tenant_tables();
2248 if let Some(repl) = &runtime.inner.db.replication {
2249 repl.wal_buffer.set_current_lsn(restored_cdc_lsn);
2250 }
2251
2252 {
2254 let sys = SystemInfo::collect();
2255 runtime.inner.db.store().set_config_tree(
2256 "red.system",
2257 &crate::serde_json::json!({
2258 "pid": sys.pid,
2259 "cpu_cores": sys.cpu_cores,
2260 "total_memory_bytes": sys.total_memory_bytes,
2261 "available_memory_bytes": sys.available_memory_bytes,
2262 "os": sys.os,
2263 "arch": sys.arch,
2264 "hostname": sys.hostname,
2265 "started_at": SystemTime::now()
2266 .duration_since(UNIX_EPOCH)
2267 .unwrap_or_default()
2268 .as_millis() as u64
2269 }),
2270 );
2271
2272 let store = runtime.inner.db.store();
2274 if store
2275 .get_collection("red_config")
2276 .map(|m| m.query_all(|_| true).len())
2277 .unwrap_or(0)
2278 <= 10
2279 {
2280 store.set_config_tree("red.ai", &crate::json!({
2281 "default": crate::json!({
2282 "provider": "openai",
2283 "model": crate::ai::DEFAULT_OPENAI_PROMPT_MODEL
2284 }),
2285 "max_embedding_inputs": 256,
2286 "max_prompt_batch": 256,
2287 "timeout": crate::json!({ "connect_secs": 10, "read_secs": 90, "write_secs": 30 })
2288 }));
2289 store.set_config_tree(
2290 "red.server",
2291 &crate::json!({
2292 "max_scan_limit": 1000,
2293 "max_body_size": 1048576,
2294 "read_timeout_ms": 5000,
2295 "write_timeout_ms": 5000
2296 }),
2297 );
2298 store.set_config_tree(
2299 "red.storage",
2300 &crate::json!({
2301 "page_size": 4096,
2302 "page_cache_capacity": 100000,
2303 "auto_checkpoint_pages": 1000,
2304 "snapshot_retention": 16,
2305 "verify_checksums": true,
2306 "segment": crate::json!({
2307 "max_entities": 100000,
2308 "max_bytes": 268435456_u64,
2309 "compression_level": 6
2310 }),
2311 "hnsw": crate::json!({ "m": 16, "ef_construction": 100, "ef_search": 50 }),
2312 "ivf": crate::json!({ "n_lists": 100, "n_probes": 10 }),
2313 "bm25": crate::json!({ "k1": 1.2, "b": 0.75 })
2314 }),
2315 );
2316 store.set_config_tree(
2317 "red.search",
2318 &crate::json!({
2319 "rag": crate::json!({
2320 "max_chunks_per_source": 10,
2321 "max_total_chunks": 25,
2322 "similarity_threshold": 0.8,
2323 "graph_depth": 2,
2324 "min_relevance": 0.3
2325 }),
2326 "fusion": crate::json!({
2327 "vector_weight": 0.5,
2328 "graph_weight": 0.3,
2329 "table_weight": 0.2,
2330 "dedup_threshold": 0.85
2331 })
2332 }),
2333 );
2334 store.set_config_tree(
2335 "red.auth",
2336 &crate::json!({
2337 "enabled": false,
2338 "session_ttl_secs": 3600,
2339 "require_auth": false
2340 }),
2341 );
2342 store.set_config_tree(
2343 "red.query",
2344 &crate::json!({
2345 "connection_pool": crate::json!({ "max_connections": 64, "max_idle": 16 }),
2346 "max_recursion_depth": 1000
2347 }),
2348 );
2349 store.set_config_tree(
2350 "red.indexes",
2351 &crate::json!({
2352 "auto_select": true,
2353 "bloom_filter": crate::json!({
2354 "enabled": true,
2355 "false_positive_rate": 0.01,
2356 "prune_on_scan": true
2357 }),
2358 "hash": crate::json!({ "enabled": true }),
2359 "bitmap": crate::json!({ "enabled": true, "max_cardinality": 1000 }),
2360 "spatial": crate::json!({ "enabled": true })
2361 }),
2362 );
2363 store.set_config_tree(
2364 "red.memtable",
2365 &crate::json!({
2366 "enabled": true,
2367 "max_bytes": 67108864_u64,
2368 "flush_threshold": 0.75
2369 }),
2370 );
2371 store.set_config_tree(
2372 "red.probabilistic",
2373 &crate::json!({
2374 "hll_registers": 16384,
2375 "sketch_default_width": 1000,
2376 "sketch_default_depth": 5,
2377 "filter_default_capacity": 100000
2378 }),
2379 );
2380 store.set_config_tree(
2381 "red.timeseries",
2382 &crate::json!({
2383 "default_chunk_size": 1024,
2384 "compression": crate::json!({
2385 "timestamps": "delta_of_delta",
2386 "values": "gorilla_xor"
2387 }),
2388 "default_retention_days": 0
2389 }),
2390 );
2391 store.set_config_tree(
2392 "red.queue",
2393 &crate::json!({
2394 "default_max_size": 0,
2395 "default_max_attempts": 3,
2396 "visibility_timeout_ms": 30000,
2397 "consumer_idle_timeout_ms": 60000
2398 }),
2399 );
2400 store.set_config_tree(
2401 "red.backup",
2402 &crate::json!({
2403 "enabled": false,
2404 "interval_secs": 3600,
2405 "retention_count": 24,
2406 "upload": false,
2407 "backend": "local"
2408 }),
2409 );
2410 store.set_config_tree(
2411 "red.wal",
2412 &crate::json!({
2413 "archive": crate::json!({
2414 "enabled": false,
2415 "retention_hours": 168,
2416 "prefix": "wal/"
2417 })
2418 }),
2419 );
2420 store.set_config_tree(
2421 "red.cdc",
2422 &crate::json!({
2423 "enabled": true,
2424 "buffer_size": 100000
2425 }),
2426 );
2427 store.set_config_tree(
2428 "red.config.secret",
2429 &crate::json!({
2430 "auto_encrypt": true,
2431 "auto_decrypt": true
2432 }),
2433 );
2434 }
2435
2436 crate::runtime::config_matrix::heal_critical_keys(store.as_ref());
2443
2444 let lehman_yao = runtime.config_bool("storage.btree.lehman_yao", true);
2451 crate::storage::engine::btree::lehman_yao::set_enabled(lehman_yao);
2452 if lehman_yao {
2453 tracing::info!(
2454 "storage.btree.lehman_yao=true — lock-free concurrent descent enabled"
2455 );
2456 }
2457
2458 let overlay_path = crate::runtime::config_overlay::config_file_path();
2463 let _ =
2464 crate::runtime::config_overlay::apply_config_file(store.as_ref(), &overlay_path);
2465 }
2466
2467 {
2471 let store = runtime.inner.db.store();
2472 for name in crate::application::vcs_collections::ALL {
2473 let _ = store.get_or_create_collection(*name);
2474 }
2475 store.set_config_tree(
2478 crate::application::vcs_collections::CONFIG_NAMESPACE,
2479 &crate::json!({
2480 "default_branch": "main",
2481 "author": crate::json!({
2482 "name": "reddb",
2483 "email": "reddb@localhost"
2484 }),
2485 "protected_branches": crate::json!(["main"]),
2486 "closure": crate::json!({
2487 "enabled": true,
2488 "lazy": true
2489 }),
2490 "merge": crate::json!({
2491 "default_strategy": "auto",
2492 "fast_forward": true
2493 })
2494 }),
2495 );
2496 }
2497
2498 {
2501 let store = runtime.inner.db.store();
2502 for name in crate::application::migration_collections::ALL {
2503 let _ = store.get_or_create_collection(*name);
2504 }
2505 }
2506
2507 {
2522 let weak = Arc::downgrade(&runtime.inner);
2523 std::thread::Builder::new()
2524 .name("reddb-maintenance".into())
2525 .spawn(move || {
2526 let tick = std::time::Duration::from_millis(200);
2527 let work_interval = std::time::Duration::from_secs(60);
2528 let mut last_work = std::time::Instant::now();
2529 loop {
2530 std::thread::sleep(tick);
2531 let Some(inner) = weak.upgrade() else {
2532 break;
2535 };
2536 if last_work.elapsed() >= work_interval {
2537 let _stats = inner.db.store().context_index().stats();
2538 last_work = std::time::Instant::now();
2539 }
2540 }
2541 })
2542 .ok();
2543 }
2544
2545 {
2547 let store = runtime.inner.db.store();
2548 let mut backup_enabled = false;
2549 let mut backup_interval = 3600u64;
2550
2551 if let Some(manager) = store.get_collection("red_config") {
2552 manager.for_each_entity(|entity| {
2553 if let Some(row) = entity.data.as_row() {
2554 let key = row.get_field("key").and_then(|v| match v {
2555 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
2556 _ => None,
2557 });
2558 let val = row.get_field("value");
2559 if key == Some("red.config.backup.enabled") {
2560 backup_enabled = match val {
2561 Some(crate::storage::schema::Value::Boolean(true)) => true,
2562 Some(crate::storage::schema::Value::Text(s)) => &**s == "true",
2563 _ => false,
2564 };
2565 } else if key == Some("red.config.backup.interval_secs") {
2566 if let Some(crate::storage::schema::Value::Integer(n)) = val {
2567 backup_interval = *n as u64;
2568 }
2569 }
2570 }
2571 true
2572 });
2573 }
2574
2575 if backup_enabled {
2576 runtime.inner.backup_scheduler.set_interval(backup_interval);
2577 let rt = runtime.clone();
2578 runtime
2579 .inner
2580 .backup_scheduler
2581 .start(move || rt.trigger_backup().map_err(|e| format!("{}", e)));
2582 }
2583 }
2584
2585 {
2587 runtime
2588 .inner
2589 .ec_registry
2590 .load_from_config_store(runtime.inner.db.store().as_ref());
2591 if !runtime.inner.ec_registry.async_configs().is_empty() {
2592 runtime.inner.ec_worker.start(
2593 Arc::clone(&runtime.inner.ec_registry),
2594 Arc::clone(&runtime.inner.db.store()),
2595 );
2596 }
2597 }
2598
2599 if let crate::replication::ReplicationRole::Replica { primary_addr } =
2600 runtime.inner.db.options().replication.role.clone()
2601 {
2602 let rt = runtime.clone();
2603 std::thread::Builder::new()
2604 .name("reddb-replica".into())
2605 .spawn(move || rt.run_replica_loop(primary_addr))
2606 .ok();
2607 }
2608
2609 runtime.inner.lifecycle.mark_ready();
2614
2615 {
2624 let weak_inner = Arc::downgrade(&runtime.inner);
2625 std::thread::Builder::new()
2626 .name("reddb-mv-scheduler".into())
2627 .spawn(move || loop {
2628 std::thread::sleep(std::time::Duration::from_millis(50));
2629 let Some(inner) = weak_inner.upgrade() else {
2630 break;
2631 };
2632 let rt = RedDBRuntime { inner };
2633 rt.refresh_due_materialized_views();
2634 })
2635 .ok();
2636 }
2637
2638 {
2648 let weak_inner = Arc::downgrade(&runtime.inner);
2649 std::thread::Builder::new()
2650 .name("reddb-retention-sweeper".into())
2651 .spawn(move || loop {
2652 std::thread::sleep(std::time::Duration::from_millis(500));
2653 let Some(inner) = weak_inner.upgrade() else {
2654 break;
2655 };
2656 let rt = RedDBRuntime { inner };
2657 rt.sweep_retention_tick(
2658 crate::runtime::retention_sweeper::DEFAULT_SWEEPER_BATCH,
2659 );
2660 })
2661 .ok();
2662 }
2663
2664 Ok(runtime)
2665 }
2666
2667 fn rehydrate_snapshot_xid_floor(&self) {
2668 let store = self.inner.db.store();
2669 for collection in store.list_collections() {
2670 let Some(manager) = store.get_collection(&collection) else {
2671 continue;
2672 };
2673 for entity in manager.query_all(|_| true) {
2674 self.inner
2675 .snapshot_manager
2676 .observe_committed_xid(entity.xmin);
2677 self.inner
2678 .snapshot_manager
2679 .observe_committed_xid(entity.xmax);
2680 }
2681 }
2682 }
2683
2684 fn bootstrap_system_keyed_collections(&self) -> RedDBResult<()> {
2685 let mut changed = false;
2686 for (name, model) in [
2687 ("red.config", crate::catalog::CollectionModel::Config),
2688 ("red.vault", crate::catalog::CollectionModel::Vault),
2689 ] {
2690 if self.inner.db.store().get_collection(name).is_none() {
2691 self.inner.db.store().get_or_create_collection(name);
2692 changed = true;
2693 }
2694 if self.inner.db.collection_contract(name).is_none() {
2695 self.inner
2696 .db
2697 .save_collection_contract(system_keyed_collection_contract(name, model))
2698 .map_err(|err| RedDBError::Internal(err.to_string()))?;
2699 changed = true;
2700 }
2701 }
2702 if changed {
2703 self.inner
2704 .db
2705 .persist_metadata()
2706 .map_err(|err| RedDBError::Internal(err.to_string()))?;
2707 }
2708 Ok(())
2709 }
2710
2711 pub fn db(&self) -> Arc<RedDB> {
2712 Arc::clone(&self.inner.db)
2713 }
2714
2715 pub fn index_store_ref(&self) -> &super::index_store::IndexStore {
2720 &self.inner.index_store
2721 }
2722
2723 pub(crate) fn schema_vocabulary_apply(
2728 &self,
2729 event: crate::runtime::schema_vocabulary::DdlEvent,
2730 ) {
2731 self.inner.schema_vocabulary.write().on_ddl(event);
2732 }
2733
2734 pub fn schema_vocabulary_lookup(
2739 &self,
2740 token: &str,
2741 ) -> Vec<crate::runtime::schema_vocabulary::VocabHit> {
2742 self.inner.schema_vocabulary.read().lookup(token).to_vec()
2743 }
2744
2745 pub fn set_auth_store(&self, store: Arc<crate::auth::store::AuthStore>) {
2749 *self.inner.auth_store.write() = Some(store);
2750 }
2751
2752 pub fn auth_store(&self) -> Option<Arc<crate::auth::store::AuthStore>> {
2755 self.inner.auth_store.read().clone()
2756 }
2757
2758 pub fn vault_kv_get(&self, key: &str) -> Option<String> {
2760 self.inner
2761 .auth_store
2762 .read()
2763 .as_ref()
2764 .and_then(|store| store.vault_kv_get(key))
2765 }
2766
2767 pub fn vault_kv_try_set(&self, key: String, value: String) -> RedDBResult<()> {
2770 let store = self.inner.auth_store.read().clone().ok_or_else(|| {
2771 RedDBError::Query("secret storage requires an enabled, unsealed vault".to_string())
2772 })?;
2773 store
2774 .vault_kv_try_set(key, value)
2775 .map_err(|err| RedDBError::Query(err.to_string()))
2776 }
2777
2778 pub fn set_oauth_validator(&self, validator: Option<Arc<crate::auth::oauth::OAuthValidator>>) {
2782 *self.inner.oauth_validator.write() = validator;
2783 }
2784
2785 pub fn oauth_validator(&self) -> Option<Arc<crate::auth::oauth::OAuthValidator>> {
2789 self.inner.oauth_validator.read().clone()
2790 }
2791
2792 pub(crate) fn secret_aes_key(&self) -> Option<[u8; 32]> {
2796 let guard = self.inner.auth_store.read();
2797 guard.as_ref().and_then(|s| s.vault_secret_key())
2798 }
2799
2800 pub(crate) fn config_bool(&self, key: &str, default: bool) -> bool {
2806 if let Some(raw) = self.inner.env_config_overrides.get(key) {
2807 if let Some(crate::storage::schema::Value::Boolean(b)) =
2808 crate::runtime::config_overlay::coerce_env_value(key, raw)
2809 {
2810 return b;
2811 }
2812 }
2813 let store = self.inner.db.store();
2814 let Some(manager) = store.get_collection("red_config") else {
2815 return default;
2816 };
2817 let mut result = default;
2818 let mut latest_id: u64 = 0;
2819 manager.for_each_entity(|entity| {
2820 if let Some(row) = entity.data.as_row() {
2821 let entry_key = row.get_field("key").and_then(|v| match v {
2822 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
2823 _ => None,
2824 });
2825 if entry_key == Some(key) {
2826 let id = entity.id.raw();
2827 if id >= latest_id {
2828 latest_id = id;
2829 result = match row.get_field("value") {
2830 Some(crate::storage::schema::Value::Boolean(b)) => *b,
2831 Some(crate::storage::schema::Value::Text(s)) => {
2832 matches!(s.as_ref(), "true" | "TRUE" | "True" | "1")
2833 }
2834 Some(crate::storage::schema::Value::Integer(n)) => *n != 0,
2835 _ => default,
2836 };
2837 }
2838 }
2839 }
2840 true
2841 });
2842 result
2843 }
2844
2845 pub(crate) fn config_u64(&self, key: &str, default: u64) -> u64 {
2846 if let Some(raw) = self.inner.env_config_overrides.get(key) {
2847 if let Some(crate::storage::schema::Value::UnsignedInteger(n)) =
2848 crate::runtime::config_overlay::coerce_env_value(key, raw)
2849 {
2850 return n;
2851 }
2852 }
2853 let store = self.inner.db.store();
2854 let Some(manager) = store.get_collection("red_config") else {
2855 return default;
2856 };
2857 let mut result = default;
2858 let mut latest_id: u64 = 0;
2859 manager.for_each_entity(|entity| {
2860 if let Some(row) = entity.data.as_row() {
2861 let entry_key = row.get_field("key").and_then(|v| match v {
2862 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
2863 _ => None,
2864 });
2865 if entry_key == Some(key) {
2866 let id = entity.id.raw();
2867 if id >= latest_id {
2868 latest_id = id;
2869 result = match row.get_field("value") {
2870 Some(crate::storage::schema::Value::Integer(n)) => *n as u64,
2871 Some(crate::storage::schema::Value::UnsignedInteger(n)) => *n,
2872 Some(crate::storage::schema::Value::Text(s)) => {
2873 s.parse::<u64>().unwrap_or(default)
2874 }
2875 _ => default,
2876 };
2877 }
2878 }
2879 }
2880 true
2881 });
2882 result
2883 }
2884
2885 pub(crate) fn config_f64(&self, key: &str, default: f64) -> f64 {
2886 if let Some(raw) = self.inner.env_config_overrides.get(key) {
2887 if let Ok(n) = raw.parse::<f64>() {
2888 return n;
2889 }
2890 }
2891 let store = self.inner.db.store();
2892 let Some(manager) = store.get_collection("red_config") else {
2893 return default;
2894 };
2895 let mut result = default;
2896 let mut latest_id: u64 = 0;
2897 manager.for_each_entity(|entity| {
2898 if let Some(row) = entity.data.as_row() {
2899 let entry_key = row.get_field("key").and_then(|v| match v {
2900 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
2901 _ => None,
2902 });
2903 if entry_key == Some(key) {
2904 let id = entity.id.raw();
2905 if id >= latest_id {
2906 latest_id = id;
2907 result = match row.get_field("value") {
2908 Some(crate::storage::schema::Value::Float(n)) => *n,
2909 Some(crate::storage::schema::Value::Integer(n)) => *n as f64,
2910 Some(crate::storage::schema::Value::UnsignedInteger(n)) => *n as f64,
2911 Some(crate::storage::schema::Value::Text(s)) => {
2912 s.parse::<f64>().unwrap_or(default)
2913 }
2914 _ => default,
2915 };
2916 }
2917 }
2918 }
2919 true
2920 });
2921 result
2922 }
2923
2924 pub(crate) fn config_string(&self, key: &str, default: &str) -> String {
2925 if let Some(raw) = self.inner.env_config_overrides.get(key) {
2926 return raw.clone();
2927 }
2928 let store = self.inner.db.store();
2929 let Some(manager) = store.get_collection("red_config") else {
2930 return default.to_string();
2931 };
2932 let mut result = default.to_string();
2933 let mut latest_id: u64 = 0;
2934 manager.for_each_entity(|entity| {
2935 if let Some(row) = entity.data.as_row() {
2936 let entry_key = row.get_field("key").and_then(|v| match v {
2937 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
2938 _ => None,
2939 });
2940 if entry_key == Some(key) {
2941 let id = entity.id.raw();
2942 if id >= latest_id {
2943 latest_id = id;
2944 if let Some(crate::storage::schema::Value::Text(value)) =
2945 row.get_field("value")
2946 {
2947 result = value.to_string();
2948 }
2949 }
2950 }
2951 }
2952 true
2953 });
2954 result
2955 }
2956
2957 fn latest_metadata_for(
2958 &self,
2959 collection: &str,
2960 entity_id: u64,
2961 ) -> Option<crate::serde_json::Value> {
2962 self.inner
2963 .db
2964 .store()
2965 .get_metadata(collection, EntityId::new(entity_id))
2966 .map(|metadata| metadata_to_json(&metadata))
2967 }
2968
2969 fn persist_replica_lsn(&self, lsn: u64) {
2970 self.inner.db.store().set_config_tree(
2971 "red.replication",
2972 &crate::json!({
2973 "last_applied_lsn": lsn
2974 }),
2975 );
2976 }
2977
2978 fn persist_replication_health(
2979 &self,
2980 state: &str,
2981 last_error: &str,
2982 primary_lsn: Option<u64>,
2983 oldest_available_lsn: Option<u64>,
2984 ) {
2985 self.inner.db.store().set_config_tree(
2986 "red.replication",
2987 &crate::json!({
2988 "state": state,
2989 "last_error": last_error,
2990 "last_seen_primary_lsn": primary_lsn.unwrap_or(0),
2991 "last_seen_oldest_lsn": oldest_available_lsn.unwrap_or(0),
2992 "updated_at_unix_ms": SystemTime::now()
2993 .duration_since(UNIX_EPOCH)
2994 .unwrap_or_default()
2995 .as_millis() as u64
2996 }),
2997 );
2998 }
2999
3000 pub(crate) fn secret_auto_encrypt(&self) -> bool {
3003 self.config_bool("red.config.secret.auto_encrypt", true)
3004 }
3005
3006 pub(crate) fn secret_auto_decrypt(&self) -> bool {
3011 self.config_bool("red.config.secret.auto_decrypt", true)
3012 }
3013
3014 pub(crate) fn apply_secret_decryption(&self, result: &mut RuntimeQueryResult) {
3021 if !self.secret_auto_decrypt() {
3022 return;
3023 }
3024 let Some(key) = self.secret_aes_key() else {
3025 return;
3026 };
3027 for record in result.result.records.iter_mut() {
3028 for value in record.values_mut() {
3029 if let Value::Secret(ref bytes) = value {
3030 if let Some(plain) =
3031 super::impl_dml::decrypt_secret_payload(&key, bytes.as_slice())
3032 {
3033 if let Ok(text) = String::from_utf8(plain) {
3034 *value = Value::text(text);
3035 }
3036 }
3037 }
3038 }
3039 }
3040 }
3041
3042 pub(crate) fn mutation_engine(&self) -> crate::runtime::mutation::MutationEngine<'_> {
3050 crate::runtime::mutation::MutationEngine::new(self)
3051 }
3052
3053 pub fn check_write(&self, kind: crate::runtime::write_gate::WriteKind) -> RedDBResult<()> {
3064 self.inner.write_gate.check(kind)
3065 }
3066
3067 pub fn write_gate(&self) -> &crate::runtime::write_gate::WriteGate {
3071 &self.inner.write_gate
3072 }
3073
3074 pub fn lifecycle(&self) -> &crate::runtime::lifecycle::Lifecycle {
3078 &self.inner.lifecycle
3079 }
3080
3081 pub fn resource_limits(&self) -> &crate::runtime::resource_limits::ResourceLimits {
3083 &self.inner.resource_limits
3084 }
3085
3086 pub fn audit_log(&self) -> &crate::runtime::audit_log::AuditLogger {
3088 &self.inner.audit_log
3089 }
3090
3091 pub fn audit_log_arc(&self) -> Arc<crate::runtime::audit_log::AuditLogger> {
3095 Arc::clone(&self.inner.audit_log)
3096 }
3097
3098 pub(crate) fn queue_telemetry(
3102 &self,
3103 ) -> &crate::runtime::queue_telemetry::QueueTelemetryCounters {
3104 &self.inner.queue_telemetry
3105 }
3106
3107 pub fn queue_telemetry_snapshot(
3110 &self,
3111 ) -> crate::runtime::queue_telemetry::QueueTelemetrySnapshot {
3112 crate::runtime::queue_telemetry::QueueTelemetrySnapshot {
3113 delivered: self.inner.queue_telemetry.delivered_snapshot(),
3114 acked: self.inner.queue_telemetry.acked_snapshot(),
3115 nacked: self.inner.queue_telemetry.nacked_snapshot(),
3116 }
3117 }
3118
3119 pub fn queue_pending_counts(&self) -> Vec<((String, String), u64)> {
3124 let store = self.inner.db.store();
3125 crate::runtime::impl_queue::pending_counts_by_group(store.as_ref())
3126 .into_iter()
3127 .collect()
3128 }
3129
3130 pub fn write_gate_arc(&self) -> Arc<crate::runtime::write_gate::WriteGate> {
3135 Arc::clone(&self.inner.write_gate)
3136 }
3137
3138 pub fn lease_lifecycle(&self) -> Option<&Arc<crate::runtime::lease_lifecycle::LeaseLifecycle>> {
3141 self.inner.lease_lifecycle.get()
3142 }
3143
3144 pub fn set_lease_lifecycle(
3147 &self,
3148 lifecycle: Arc<crate::runtime::lease_lifecycle::LeaseLifecycle>,
3149 ) -> Result<(), Arc<crate::runtime::lease_lifecycle::LeaseLifecycle>> {
3150 self.inner.lease_lifecycle.set(lifecycle)
3151 }
3152
3153 pub fn check_batch_size(&self, requested: usize) -> RedDBResult<()> {
3158 if self.inner.resource_limits.batch_size_exceeded(requested) {
3159 let max = self.inner.resource_limits.max_batch_size.unwrap_or(0);
3160 return Err(RedDBError::QuotaExceeded(format!(
3161 "max_batch_size:{requested}:{max}"
3162 )));
3163 }
3164 Ok(())
3165 }
3166
3167 pub fn check_db_size(&self) -> RedDBResult<()> {
3173 let Some(limit) = self.inner.resource_limits.max_db_size_bytes else {
3174 return Ok(());
3175 };
3176 if limit == 0 {
3177 return Ok(());
3178 }
3179 let Some(path) = self.inner.db.path() else {
3180 return Ok(());
3181 };
3182 let current = std::fs::metadata(path).map(|m| m.len()).unwrap_or(0);
3183 if current > limit {
3184 return Err(RedDBError::QuotaExceeded(format!(
3185 "max_db_size_bytes:{current}:{limit}"
3186 )));
3187 }
3188 Ok(())
3189 }
3190
3191 pub fn graceful_shutdown(
3209 &self,
3210 backup_on_shutdown: bool,
3211 ) -> RedDBResult<crate::runtime::lifecycle::ShutdownReport> {
3212 if !self.inner.lifecycle.begin_shutdown() {
3213 return Ok(self.inner.lifecycle.shutdown_report().unwrap_or_default());
3217 }
3218
3219 let started_ms = std::time::SystemTime::now()
3220 .duration_since(std::time::UNIX_EPOCH)
3221 .map(|d| d.as_millis() as u64)
3222 .unwrap_or(0);
3223 let mut report = crate::runtime::lifecycle::ShutdownReport {
3224 started_at_ms: started_ms,
3225 ..Default::default()
3226 };
3227
3228 let flush_res = self.inner.db.flush_local_only();
3234 report.flushed_wal = flush_res.is_ok();
3235 report.final_checkpoint = flush_res.is_ok();
3236 if let Err(err) = &flush_res {
3237 tracing::error!(
3238 target: "reddb::lifecycle",
3239 error = %err,
3240 "graceful_shutdown: local flush failed"
3241 );
3242 } else if let Err(lease_err) =
3243 self.assert_remote_write_allowed("shutdown/checkpoint_upload")
3244 {
3245 tracing::warn!(
3246 target: "reddb::serverless::lease",
3247 error = %lease_err,
3248 "graceful_shutdown: remote upload skipped — lease not held"
3249 );
3250 } else if let Err(err) = self.inner.db.upload_to_remote_backend() {
3251 tracing::error!(
3252 target: "reddb::lifecycle",
3253 error = %err,
3254 "graceful_shutdown: remote upload failed"
3255 );
3256 }
3257
3258 if backup_on_shutdown && self.inner.db.remote_backend.is_some() {
3263 match self.trigger_backup() {
3269 Ok(result) => {
3270 report.backup_uploaded = result.uploaded;
3271 }
3272 Err(err) => {
3273 tracing::warn!(
3274 target: "reddb::lifecycle",
3275 error = %err,
3276 "graceful_shutdown: final backup skipped"
3277 );
3278 }
3279 }
3280 }
3281
3282 let completed_ms = std::time::SystemTime::now()
3283 .duration_since(std::time::UNIX_EPOCH)
3284 .map(|d| d.as_millis() as u64)
3285 .unwrap_or(started_ms);
3286 report.completed_at_ms = completed_ms;
3287 report.duration_ms = completed_ms.saturating_sub(started_ms);
3288
3289 self.inner.lifecycle.finish_shutdown(report.clone());
3290 Ok(report)
3291 }
3292
3293 pub(crate) fn cdc_emit_no_cache_invalidate(
3299 &self,
3300 operation: crate::replication::cdc::ChangeOperation,
3301 collection: &str,
3302 entity_id: u64,
3303 entity_kind: &str,
3304 ) -> u64 {
3305 let lsn = self
3306 .inner
3307 .cdc
3308 .emit(operation, collection, entity_id, entity_kind);
3309
3310 if let Some(ref primary) = self.inner.db.replication {
3312 let store = self.inner.db.store();
3313 let entity = if operation == crate::replication::cdc::ChangeOperation::Delete {
3314 None
3315 } else {
3316 store.get(collection, EntityId::new(entity_id))
3317 };
3318 let record = ChangeRecord {
3319 lsn,
3320 timestamp: SystemTime::now()
3321 .duration_since(UNIX_EPOCH)
3322 .unwrap_or_default()
3323 .as_millis() as u64,
3324 operation,
3325 collection: collection.to_string(),
3326 entity_id,
3327 entity_kind: entity_kind.to_string(),
3328 entity_bytes: entity
3329 .as_ref()
3330 .map(|e| UnifiedStore::serialize_entity(e, store.format_version())),
3331 metadata: self.latest_metadata_for(collection, entity_id),
3332 };
3333 let encoded = record.encode();
3334 primary.wal_buffer.append(record.lsn, encoded.clone());
3335 if let Some(spool) = &primary.logical_wal_spool {
3336 let _ = spool.append(record.lsn, &encoded);
3337 }
3338 }
3339 lsn
3340 }
3341
3342 pub(crate) fn cdc_emit_insert_batch_no_cache_invalidate(
3343 &self,
3344 collection: &str,
3345 ids: &[EntityId],
3346 entity_kind: &str,
3347 ) -> Vec<u64> {
3348 if ids.is_empty() {
3349 return Vec::new();
3350 }
3351
3352 if self.inner.db.replication.is_none() {
3356 return self.inner.cdc.emit_batch_same_collection(
3357 crate::replication::cdc::ChangeOperation::Insert,
3358 collection,
3359 entity_kind,
3360 ids.iter().map(|id| id.raw()),
3361 );
3362 }
3363
3364 ids.iter()
3367 .map(|id| {
3368 self.cdc_emit_no_cache_invalidate(
3369 crate::replication::cdc::ChangeOperation::Insert,
3370 collection,
3371 id.raw(),
3372 entity_kind,
3373 )
3374 })
3375 .collect()
3376 }
3377
3378 pub fn cdc_emit(
3379 &self,
3380 operation: crate::replication::cdc::ChangeOperation,
3381 collection: &str,
3382 entity_id: u64,
3383 entity_kind: &str,
3384 ) -> u64 {
3385 let lsn = self
3386 .inner
3387 .cdc
3388 .emit(operation, collection, entity_id, entity_kind);
3389 self.invalidate_result_cache_for_table(collection);
3395
3396 if let Some(ref primary) = self.inner.db.replication {
3398 let store = self.inner.db.store();
3399 let entity = if operation == crate::replication::cdc::ChangeOperation::Delete {
3400 None
3401 } else {
3402 store.get(collection, EntityId::new(entity_id))
3403 };
3404 let record = ChangeRecord {
3405 lsn,
3406 timestamp: SystemTime::now()
3407 .duration_since(UNIX_EPOCH)
3408 .unwrap_or_default()
3409 .as_millis() as u64,
3410 operation,
3411 collection: collection.to_string(),
3412 entity_id,
3413 entity_kind: entity_kind.to_string(),
3414 entity_bytes: entity
3415 .as_ref()
3416 .map(|entity| UnifiedStore::serialize_entity(entity, store.format_version())),
3417 metadata: self.latest_metadata_for(collection, entity_id),
3418 };
3419 let encoded = record.encode();
3420 primary.wal_buffer.append(record.lsn, encoded.clone());
3421 if let Some(spool) = &primary.logical_wal_spool {
3422 let _ = spool.append(record.lsn, &encoded);
3423 }
3424 }
3425 lsn
3426 }
3427
3428 pub(crate) fn cdc_emit_kv(
3429 &self,
3430 operation: crate::replication::cdc::ChangeOperation,
3431 collection: &str,
3432 key: &str,
3433 entity_id: u64,
3434 before: Option<crate::json::Value>,
3435 after: Option<crate::json::Value>,
3436 ) -> u64 {
3437 let lsn = self
3438 .inner
3439 .cdc
3440 .emit_kv(operation, collection, key, entity_id, before, after);
3441 self.inner.kv_stats.incr_watch_events_emitted();
3442 self.invalidate_result_cache_for_table(collection);
3443 lsn
3444 }
3445
3446 pub(crate) fn record_kv_watch_event(
3447 &self,
3448 operation: crate::replication::cdc::ChangeOperation,
3449 collection: &str,
3450 key: &str,
3451 entity_id: u64,
3452 before: Option<crate::json::Value>,
3453 after: Option<crate::json::Value>,
3454 ) {
3455 if self.current_xid().is_some() {
3456 let conn_id = current_connection_id();
3457 let event = crate::replication::cdc::KvWatchEvent {
3458 collection: collection.to_string(),
3459 key: key.to_string(),
3460 op: operation,
3461 before,
3462 after,
3463 lsn: 0,
3464 committed_at: 0,
3465 dropped_event_count: 0,
3466 };
3467 self.inner
3468 .pending_kv_watch_events
3469 .write()
3470 .entry(conn_id)
3471 .or_default()
3472 .push(event);
3473 return;
3474 }
3475
3476 self.cdc_emit_kv(operation, collection, key, entity_id, before, after);
3477 }
3478
3479 pub(crate) fn cdc_emit_prebuilt(
3480 &self,
3481 operation: crate::replication::cdc::ChangeOperation,
3482 collection: &str,
3483 entity: &UnifiedEntity,
3484 entity_kind: &str,
3485 metadata: Option<&crate::storage::Metadata>,
3486 invalidate_cache: bool,
3487 ) -> u64 {
3488 self.cdc_emit_prebuilt_with_columns(
3489 operation,
3490 collection,
3491 entity,
3492 entity_kind,
3493 metadata,
3494 invalidate_cache,
3495 None,
3496 )
3497 }
3498
3499 pub(crate) fn cdc_emit_prebuilt_with_columns(
3506 &self,
3507 operation: crate::replication::cdc::ChangeOperation,
3508 collection: &str,
3509 entity: &UnifiedEntity,
3510 entity_kind: &str,
3511 metadata: Option<&crate::storage::Metadata>,
3512 invalidate_cache: bool,
3513 changed_columns: Option<Vec<String>>,
3514 ) -> u64 {
3515 if invalidate_cache {
3516 self.invalidate_result_cache();
3517 }
3518
3519 let public_id = entity.logical_id().raw();
3520 let lsn = self.inner.cdc.emit_with_columns(
3521 operation,
3522 collection,
3523 public_id,
3524 entity_kind,
3525 changed_columns,
3526 );
3527
3528 if let Some(ref primary) = self.inner.db.replication {
3529 let store = self.inner.db.store();
3530 let record = ChangeRecord {
3531 lsn,
3532 timestamp: SystemTime::now()
3533 .duration_since(UNIX_EPOCH)
3534 .unwrap_or_default()
3535 .as_millis() as u64,
3536 operation,
3537 collection: collection.to_string(),
3538 entity_id: entity.id.raw(),
3539 entity_kind: entity_kind.to_string(),
3540 entity_bytes: Some(UnifiedStore::serialize_entity(
3541 entity,
3542 store.format_version(),
3543 )),
3544 metadata: metadata
3545 .map(metadata_to_json)
3546 .or_else(|| self.latest_metadata_for(collection, entity.id.raw())),
3547 };
3548 let encoded = record.encode();
3549 primary.wal_buffer.append(record.lsn, encoded.clone());
3550 if let Some(spool) = &primary.logical_wal_spool {
3551 let _ = spool.append(record.lsn, &encoded);
3552 }
3553 }
3554
3555 lsn
3556 }
3557
3558 pub(crate) fn cdc_emit_prebuilt_batch<'a, I>(
3559 &self,
3560 operation: crate::replication::cdc::ChangeOperation,
3561 entity_kind: &str,
3562 items: I,
3563 invalidate_cache: bool,
3564 ) where
3565 I: IntoIterator<
3566 Item = (
3567 &'a str,
3568 &'a UnifiedEntity,
3569 Option<&'a crate::storage::Metadata>,
3570 ),
3571 >,
3572 {
3573 let items: Vec<(&str, &UnifiedEntity, Option<&crate::storage::Metadata>)> =
3574 items.into_iter().collect();
3575 if items.is_empty() {
3576 return;
3577 }
3578
3579 if invalidate_cache {
3580 self.invalidate_result_cache();
3581 }
3582
3583 for (collection, entity, metadata) in items {
3584 self.cdc_emit_prebuilt(operation, collection, entity, entity_kind, metadata, false);
3585 }
3586 }
3587
3588 fn run_replica_loop(&self, primary_addr: String) {
3589 let endpoint = if primary_addr.starts_with("http") {
3590 primary_addr
3591 } else {
3592 format!("http://{primary_addr}")
3593 };
3594 let poll_ms = self.inner.db.options().replication.poll_interval_ms;
3595 let max_count = self.inner.db.options().replication.max_batch_size;
3596 let mut since_lsn = self.config_u64("red.replication.last_applied_lsn", 0);
3597
3598 let runtime = match tokio::runtime::Builder::new_current_thread()
3599 .enable_all()
3600 .build()
3601 {
3602 Ok(runtime) => runtime,
3603 Err(_) => return,
3604 };
3605
3606 runtime.block_on(async move {
3607 use crate::grpc::proto::red_db_client::RedDbClient;
3608 use crate::grpc::proto::JsonPayloadRequest;
3609
3610 let mut client = loop {
3611 match RedDbClient::connect(endpoint.clone()).await {
3612 Ok(client) => {
3613 self.persist_replication_health("connecting", "", None, None);
3614 break client;
3615 }
3616 Err(_) => {
3617 self.persist_replication_health(
3618 "connecting",
3619 "waiting for primary connection",
3620 None,
3621 None,
3622 );
3623 std::thread::sleep(std::time::Duration::from_millis(poll_ms.max(250)))
3624 }
3625 }
3626 };
3627
3628 let applier = crate::replication::logical::LogicalChangeApplier::new(since_lsn);
3633
3634 loop {
3635 let payload = crate::json!({
3636 "since_lsn": since_lsn,
3637 "max_count": max_count
3638 });
3639 let request = tonic::Request::new(JsonPayloadRequest {
3640 payload_json: crate::json::to_string(&payload)
3641 .unwrap_or_else(|_| "{}".to_string()),
3642 });
3643
3644 if let Ok(response) = client.pull_wal_records(request).await {
3645 if let Ok(value) =
3646 crate::json::from_str::<crate::json::Value>(&response.into_inner().payload)
3647 {
3648 let current_lsn =
3649 value.get("current_lsn").and_then(crate::json::Value::as_u64);
3650 let oldest_available_lsn = value
3651 .get("oldest_available_lsn")
3652 .and_then(crate::json::Value::as_u64);
3653 if since_lsn > 0
3654 && oldest_available_lsn
3655 .map(|oldest| oldest > since_lsn.saturating_add(1))
3656 .unwrap_or(false)
3657 {
3658 self.persist_replication_health(
3659 "stalled_gap",
3660 "replica is behind the oldest logical WAL available on primary; re-bootstrap required",
3661 current_lsn,
3662 oldest_available_lsn,
3663 );
3664 std::thread::sleep(std::time::Duration::from_millis(poll_ms.max(250)));
3665 continue;
3666 }
3667 if let Some(records) =
3668 value.get("records").and_then(crate::json::Value::as_array)
3669 {
3670 for record in records {
3671 let Some(data_hex) =
3672 record.get("data").and_then(crate::json::Value::as_str)
3673 else {
3674 continue;
3675 };
3676 let Ok(data) = hex::decode(data_hex) else {
3677 self.inner.replica_apply_metrics.record(
3678 crate::replication::logical::ApplyErrorKind::Decode,
3679 );
3680 self.persist_replication_health(
3681 "apply_error",
3682 "failed to decode WAL record hex payload",
3683 current_lsn,
3684 oldest_available_lsn,
3685 );
3686 continue;
3687 };
3688 let Ok(change) = ChangeRecord::decode(&data) else {
3689 self.inner.replica_apply_metrics.record(
3690 crate::replication::logical::ApplyErrorKind::Decode,
3691 );
3692 self.persist_replication_health(
3693 "apply_error",
3694 "failed to decode logical WAL record",
3695 current_lsn,
3696 oldest_available_lsn,
3697 );
3698 continue;
3699 };
3700 match applier.apply(
3701 self.inner.db.as_ref(),
3702 &change,
3703 ApplyMode::Replica,
3704 ) {
3705 Ok(crate::replication::logical::ApplyOutcome::Applied) => {
3706 self.invalidate_result_cache_for_table(&change.collection);
3707 since_lsn = since_lsn.max(change.lsn);
3708 self.persist_replica_lsn(since_lsn);
3709 }
3710 Ok(_) => {
3711 }
3713 Err(err) => {
3714 self.inner.replica_apply_metrics.record(err.kind());
3715 match &err {
3724 crate::replication::logical::LogicalApplyError::Divergence { lsn, expected: _, got: _ } => {
3725 crate::telemetry::operator_event::OperatorEvent::Divergence {
3726 peer: "primary".to_string(),
3727 leader_lsn: *lsn,
3728 follower_lsn: since_lsn,
3729 }
3730 .emit_global();
3731 }
3732 crate::replication::logical::LogicalApplyError::Gap { last, next } => {
3733 crate::telemetry::operator_event::OperatorEvent::ReplicationBroken {
3734 peer: "primary".to_string(),
3735 reason: format!("stalled gap last={last} next={next}"),
3736 }
3737 .emit_global();
3738 }
3739 _ => {}
3740 }
3741 let kind = match &err {
3742 crate::replication::logical::LogicalApplyError::Gap { .. } => "stalled_gap",
3743 crate::replication::logical::LogicalApplyError::Divergence { .. } => "divergence",
3744 _ => "apply_error",
3745 };
3746 self.persist_replication_health(
3747 kind,
3748 &format!("replica apply rejected: {err}"),
3749 current_lsn,
3750 oldest_available_lsn,
3751 );
3752 break;
3763 }
3764 }
3765 }
3766 }
3767 self.persist_replication_health(
3768 "healthy",
3769 "",
3770 current_lsn,
3771 oldest_available_lsn,
3772 );
3773 } else {
3774 self.persist_replication_health(
3775 "apply_error",
3776 "failed to parse pull_wal_records response",
3777 None,
3778 None,
3779 );
3780 }
3781 } else {
3782 self.persist_replication_health(
3783 "connecting",
3784 "primary pull_wal_records request failed",
3785 None,
3786 None,
3787 );
3788 }
3789
3790 std::thread::sleep(std::time::Duration::from_millis(poll_ms));
3791 }
3792 });
3793 }
3794
3795 pub fn cdc_poll(
3797 &self,
3798 since_lsn: u64,
3799 max_count: usize,
3800 ) -> Vec<crate::replication::cdc::ChangeEvent> {
3801 self.inner.cdc.poll(since_lsn, max_count)
3802 }
3803
3804 pub fn cdc_current_lsn(&self) -> u64 {
3808 self.inner.cdc.current_lsn()
3809 }
3810
3811 pub fn kv_watch_events_since(
3812 &self,
3813 collection: &str,
3814 key: &str,
3815 since_lsn: u64,
3816 max_count: usize,
3817 ) -> Vec<crate::replication::cdc::KvWatchEvent> {
3818 self.inner
3819 .cdc
3820 .poll(since_lsn, max_count)
3821 .into_iter()
3822 .filter_map(|event| event.kv)
3823 .filter(|event| event.collection == collection && event.key == key)
3824 .collect()
3825 }
3826
3827 pub fn kv_watch_events_since_prefix(
3828 &self,
3829 collection: &str,
3830 prefix: &str,
3831 since_lsn: u64,
3832 max_count: usize,
3833 ) -> Vec<crate::replication::cdc::KvWatchEvent> {
3834 self.inner
3835 .cdc
3836 .poll(since_lsn, max_count)
3837 .into_iter()
3838 .filter_map(|event| event.kv)
3839 .filter(|event| event.collection == collection && event.key.starts_with(prefix))
3840 .collect()
3841 }
3842
3843 pub(crate) fn kv_watch_subscribe<'a>(
3844 &'a self,
3845 collection: impl Into<String>,
3846 key: impl Into<String>,
3847 from_lsn: Option<u64>,
3848 ) -> crate::runtime::kv_watch::KvWatchStream<'a> {
3849 crate::runtime::kv_watch::KvWatchStream::subscribe(
3850 &self.inner.cdc,
3851 &self.inner.kv_stats,
3852 collection,
3853 key,
3854 from_lsn,
3855 self.kv_watch_idle_timeout_ms(),
3856 )
3857 }
3858
3859 pub(crate) fn kv_watch_subscribe_prefix<'a>(
3860 &'a self,
3861 collection: impl Into<String>,
3862 prefix: impl Into<String>,
3863 from_lsn: Option<u64>,
3864 ) -> crate::runtime::kv_watch::KvWatchStream<'a> {
3865 crate::runtime::kv_watch::KvWatchStream::subscribe_prefix(
3866 &self.inner.cdc,
3867 &self.inner.kv_stats,
3868 collection,
3869 prefix,
3870 from_lsn,
3871 self.kv_watch_idle_timeout_ms(),
3872 )
3873 }
3874
3875 pub(crate) fn kv_watch_idle_timeout_ms(&self) -> u64 {
3876 self.config_u64("red.config.kv.watch.idle_timeout_ms", 60_000)
3877 }
3878
3879 pub fn backup_status(&self) -> crate::replication::scheduler::BackupStatus {
3881 self.inner.backup_scheduler.status()
3882 }
3883
3884 pub fn result_blob_cache(&self) -> &crate::storage::cache::BlobCache {
3894 &self.inner.result_blob_cache
3895 }
3896
3897 pub fn primary_replica_snapshots(&self) -> Vec<crate::replication::primary::ReplicaState> {
3901 self.inner
3902 .db
3903 .replication
3904 .as_ref()
3905 .map(|repl| repl.replica_snapshots())
3906 .unwrap_or_default()
3907 }
3908
3909 pub fn commit_policy(&self) -> crate::replication::CommitPolicy {
3914 crate::replication::CommitPolicy::from_env()
3915 }
3916
3917 pub fn replica_apply_error_counts(
3922 &self,
3923 ) -> [(crate::replication::logical::ApplyErrorKind, u64); 4] {
3924 self.inner.replica_apply_metrics.snapshot()
3925 }
3926
3927 pub fn quota_bucket(&self) -> &crate::runtime::quota_bucket::QuotaBucket {
3930 &self.inner.quota_bucket
3931 }
3932
3933 pub fn commit_waiter_snapshot(&self) -> Vec<(String, u64)> {
3937 self.inner
3938 .db
3939 .replication
3940 .as_ref()
3941 .map(|repl| repl.commit_waiter.snapshot())
3942 .unwrap_or_default()
3943 }
3944
3945 pub fn commit_waiter_metrics_snapshot(&self) -> (u64, u64, u64, u64) {
3948 self.inner
3949 .db
3950 .replication
3951 .as_ref()
3952 .map(|repl| repl.commit_waiter.metrics_snapshot())
3953 .unwrap_or((0, 0, 0, 0))
3954 }
3955
3956 pub fn await_replica_acks(
3966 &self,
3967 target_lsn: u64,
3968 count: u32,
3969 timeout: std::time::Duration,
3970 ) -> crate::replication::AwaitOutcome {
3971 match &self.inner.db.replication {
3972 Some(repl) => repl.commit_waiter.await_acks(target_lsn, count, timeout),
3973 None => {
3974 crate::replication::AwaitOutcome::NotRequired
3978 }
3979 }
3980 }
3981
3982 pub fn enforce_commit_policy(
3996 &self,
3997 post_lsn: u64,
3998 ) -> RedDBResult<crate::replication::AwaitOutcome> {
3999 let n = match self.commit_policy() {
4000 crate::replication::CommitPolicy::AckN(n) if n > 0 => n,
4001 _ => return Ok(crate::replication::AwaitOutcome::NotRequired),
4002 };
4003 let timeout_ms = std::env::var("RED_REPLICATION_ACK_TIMEOUT_MS")
4004 .ok()
4005 .and_then(|v| v.parse::<u64>().ok())
4006 .unwrap_or(5_000);
4007 let outcome =
4008 self.await_replica_acks(post_lsn, n, std::time::Duration::from_millis(timeout_ms));
4009 if let crate::replication::AwaitOutcome::TimedOut { observed, required } = &outcome {
4010 tracing::warn!(
4011 target: "reddb::commit",
4012 post_lsn,
4013 observed = *observed,
4014 required = *required,
4015 timeout_ms,
4016 "ack_n: timed out waiting for replicas"
4017 );
4018 let fail = std::env::var("RED_COMMIT_FAIL_ON_TIMEOUT")
4019 .ok()
4020 .map(|v| {
4021 let t = v.trim();
4022 t.eq_ignore_ascii_case("true") || t == "1" || t.eq_ignore_ascii_case("yes")
4023 })
4024 .unwrap_or(false);
4025 if fail {
4026 return Err(RedDBError::ReadOnly(format!(
4027 "commit policy timed out at lsn {post_lsn}: observed={observed} required={required} (RED_COMMIT_FAIL_ON_TIMEOUT=true)"
4028 )));
4029 }
4030 }
4031 Ok(outcome)
4032 }
4033
4034 pub fn encryption_at_rest_status(&self) -> (&'static str, Option<String>) {
4042 match crate::crypto::page_encryption::key_from_env() {
4043 Ok(Some(_)) => ("enabled", None),
4044 Ok(None) => ("disabled", None),
4045 Err(err) => ("error", Some(err)),
4046 }
4047 }
4048
4049 pub fn replica_apply_health(&self) -> Option<String> {
4055 let state = self.config_string("red.replication.state", "");
4056 if state.is_empty() {
4057 None
4058 } else {
4059 Some(state)
4060 }
4061 }
4062
4063 pub fn wal_archive_progress(&self) -> (u64, u64) {
4068 let current_lsn = self
4069 .inner
4070 .db
4071 .replication
4072 .as_ref()
4073 .map(|repl| {
4074 repl.logical_wal_spool
4075 .as_ref()
4076 .map(|spool| spool.current_lsn())
4077 .unwrap_or_else(|| repl.wal_buffer.current_lsn())
4078 })
4079 .unwrap_or_else(|| self.inner.cdc.current_lsn());
4080 let last_archived_lsn = self.config_u64("red.config.timeline.last_archived_lsn", 0);
4081 (current_lsn, last_archived_lsn)
4082 }
4083
4084 pub fn trigger_backup(&self) -> RedDBResult<crate::replication::scheduler::BackupResult> {
4086 self.check_write(crate::runtime::write_gate::WriteKind::Backup)?;
4087 self.assert_remote_write_allowed("admin/backup")?;
4092 let started = std::time::Instant::now();
4093 let snapshot = self.create_snapshot()?;
4094 let mut uploaded = false;
4095
4096 if let (Some(backend), Some(path)) = (&self.inner.db.remote_backend, self.inner.db.path()) {
4097 let default_snapshot_prefix = self.inner.db.options().default_snapshot_prefix();
4098 let default_wal_prefix = self.inner.db.options().default_wal_archive_prefix();
4099 let default_head_key = self.inner.db.options().default_backup_head_key();
4100 let snapshot_prefix = self.config_string(
4101 "red.config.backup.snapshot_prefix",
4102 &default_snapshot_prefix,
4103 );
4104 let wal_prefix =
4105 self.config_string("red.config.wal.archive.prefix", &default_wal_prefix);
4106 let head_key = self.config_string("red.config.backup.head_key", &default_head_key);
4107 let timeline_id = self.config_string("red.config.timeline.id", "main");
4108 let snapshot_key = crate::storage::wal::archive_snapshot(
4109 backend.as_ref(),
4110 path,
4111 snapshot.snapshot_id,
4112 &snapshot_prefix,
4113 )
4114 .map_err(|err| RedDBError::Internal(err.to_string()))?;
4115 let current_lsn = self
4116 .inner
4117 .db
4118 .replication
4119 .as_ref()
4120 .map(|repl| {
4121 repl.logical_wal_spool
4122 .as_ref()
4123 .map(|spool| spool.current_lsn())
4124 .unwrap_or_else(|| repl.wal_buffer.current_lsn())
4125 })
4126 .unwrap_or_else(|| self.inner.cdc.current_lsn());
4127 let last_archived_lsn = self.config_u64("red.config.timeline.last_archived_lsn", 0);
4128 let snapshot_sha256 =
4134 crate::storage::wal::SnapshotManifest::compute_snapshot_sha256(path)
4135 .map_err(|err| {
4136 tracing::warn!(
4137 target: "reddb::backup",
4138 error = %err,
4139 snapshot_id = snapshot.snapshot_id,
4140 "snapshot hash failed; manifest will lack checksum"
4141 );
4142 })
4143 .ok();
4144 let manifest = crate::storage::wal::SnapshotManifest {
4145 timeline_id: timeline_id.clone(),
4146 snapshot_key: snapshot_key.clone(),
4147 snapshot_id: snapshot.snapshot_id,
4148 snapshot_time: snapshot.created_at_unix_ms as u64,
4149 base_lsn: current_lsn,
4150 schema_version: crate::api::REDDB_FORMAT_VERSION,
4151 format_version: crate::api::REDDB_FORMAT_VERSION,
4152 snapshot_sha256,
4153 };
4154 crate::storage::wal::publish_snapshot_manifest(backend.as_ref(), &manifest)
4155 .map_err(|err| RedDBError::Internal(err.to_string()))?;
4156
4157 let prev_segment_hash = self.config_string("red.config.timeline.last_segment_hash", "");
4164 let prev_hash_arg = if prev_segment_hash.is_empty() {
4165 None
4166 } else {
4167 Some(prev_segment_hash)
4168 };
4169
4170 let archived_lsn = if let Some(primary) = &self.inner.db.replication {
4171 let oldest = primary
4172 .logical_wal_spool
4173 .as_ref()
4174 .and_then(|spool| spool.oldest_lsn().ok().flatten())
4175 .or_else(|| primary.wal_buffer.oldest_lsn())
4176 .unwrap_or(last_archived_lsn);
4177 if last_archived_lsn > 0 && last_archived_lsn < oldest.saturating_sub(1) {
4178 return Err(RedDBError::Internal(format!(
4179 "logical WAL gap detected: last_archived_lsn={last_archived_lsn}, oldest_available_lsn={oldest}"
4180 )));
4181 }
4182 let records = if let Some(spool) = &primary.logical_wal_spool {
4183 spool
4184 .read_since(last_archived_lsn, usize::MAX)
4185 .map_err(|err| RedDBError::Internal(err.to_string()))?
4186 } else {
4187 primary.wal_buffer.read_since(last_archived_lsn, usize::MAX)
4188 };
4189 if let Some(meta) = crate::storage::wal::archive_change_records(
4190 backend.as_ref(),
4191 &wal_prefix,
4192 &records,
4193 prev_hash_arg,
4194 )
4195 .map_err(|err| RedDBError::Internal(err.to_string()))?
4196 {
4197 if let Some(spool) = &primary.logical_wal_spool {
4198 let _ = spool.prune_through(meta.lsn_end);
4199 }
4200 if let Some(sha) = &meta.sha256 {
4206 self.inner.db.store().set_config_tree(
4207 "red.config.timeline",
4208 &crate::json!({ "last_segment_hash": sha }),
4209 );
4210 }
4211 meta.lsn_end
4212 } else {
4213 last_archived_lsn
4214 }
4215 } else {
4216 last_archived_lsn
4217 };
4218
4219 let head = crate::storage::wal::BackupHead {
4220 timeline_id,
4221 snapshot_key,
4222 snapshot_id: snapshot.snapshot_id,
4223 snapshot_time: snapshot.created_at_unix_ms as u64,
4224 current_lsn,
4225 last_archived_lsn: archived_lsn,
4226 wal_prefix,
4227 };
4228 crate::storage::wal::publish_backup_head(backend.as_ref(), &head_key, &head)
4229 .map_err(|err| RedDBError::Internal(err.to_string()))?;
4230 self.inner.db.store().set_config_tree(
4231 "red.config.timeline",
4232 &crate::json!({
4233 "last_archived_lsn": archived_lsn,
4234 "id": head.timeline_id
4235 }),
4236 );
4237
4238 if let Err(err) = crate::storage::wal::publish_unified_manifest_for_prefix(
4246 backend.as_ref(),
4247 &snapshot_prefix,
4248 ) {
4249 tracing::warn!(
4250 target: "reddb::backup",
4251 error = %err,
4252 snapshot_prefix = %snapshot_prefix,
4253 "unified MANIFEST.json refresh failed; per-artifact sidecars unaffected"
4254 );
4255 }
4256
4257 match self.commit_policy() {
4269 crate::replication::CommitPolicy::AckN(n) if n > 0 => {
4270 let timeout = std::env::var("RED_REPLICATION_ACK_TIMEOUT_MS")
4271 .ok()
4272 .and_then(|v| v.parse::<u64>().ok())
4273 .unwrap_or(5_000);
4274 let outcome = self.await_replica_acks(
4275 archived_lsn,
4276 n,
4277 std::time::Duration::from_millis(timeout),
4278 );
4279 match outcome {
4280 crate::replication::AwaitOutcome::Reached(count) => {
4281 tracing::debug!(
4282 target: "reddb::backup",
4283 archived_lsn,
4284 n,
4285 count,
4286 "ack_n: replicas synced before backup return"
4287 );
4288 }
4289 crate::replication::AwaitOutcome::TimedOut { observed, required } => {
4290 tracing::warn!(
4291 target: "reddb::backup",
4292 archived_lsn,
4293 observed,
4294 required,
4295 timeout_ms = timeout,
4296 "ack_n: timed out waiting for replicas; backup uploaded but DR posture degraded"
4297 );
4298 }
4299 crate::replication::AwaitOutcome::NotRequired => {}
4300 }
4301 }
4302 _ => {} }
4304
4305 if self.config_bool("red.config.backup.include_blob_cache", false) {
4317 let blob_cache_prefix = self.config_string(
4318 "red.config.backup.blob_cache_prefix",
4319 &format!("{snapshot_prefix}blob_cache/"),
4320 );
4321 if let Some(l2_path) = self.inner.result_blob_cache.l2_path() {
4322 match crate::storage::cache::archive_blob_cache_l2(
4323 backend.as_ref(),
4324 l2_path,
4325 &blob_cache_prefix,
4326 ) {
4327 Ok(count) => {
4328 tracing::info!(
4329 target: "reddb::backup",
4330 files_uploaded = count,
4331 blob_cache_prefix = %blob_cache_prefix,
4332 "include_blob_cache: archived L2 directory"
4333 );
4334 }
4335 Err(err) => {
4336 tracing::warn!(
4337 target: "reddb::backup",
4338 error = %err,
4339 blob_cache_prefix = %blob_cache_prefix,
4340 "include_blob_cache: L2 archive failed; backup proceeding (cache is derived state)"
4341 );
4342 }
4343 }
4344 } else {
4345 tracing::debug!(
4346 target: "reddb::backup",
4347 "include_blob_cache=true but no L2 path configured; nothing to archive"
4348 );
4349 }
4350 }
4351
4352 uploaded = true;
4353 }
4354
4355 Ok(crate::replication::scheduler::BackupResult {
4356 snapshot_id: snapshot.snapshot_id,
4357 uploaded,
4358 duration_ms: started.elapsed().as_millis() as u64,
4359 timestamp: snapshot.created_at_unix_ms as u64,
4360 })
4361 }
4362
4363 pub fn acquire(&self) -> RedDBResult<RuntimeConnection> {
4364 let mut pool = self
4365 .inner
4366 .pool
4367 .lock()
4368 .map_err(|e| RedDBError::Internal(format!("connection pool lock poisoned: {e}")))?;
4369 if pool.active >= self.inner.pool_config.max_connections {
4370 return Err(RedDBError::Internal(
4371 "connection pool exhausted".to_string(),
4372 ));
4373 }
4374
4375 let id = if let Some(id) = pool.idle.pop() {
4376 id
4377 } else {
4378 let id = pool.next_id;
4379 pool.next_id += 1;
4380 id
4381 };
4382 pool.active += 1;
4383 pool.total_checkouts += 1;
4384 drop(pool);
4385
4386 Ok(RuntimeConnection {
4387 id,
4388 inner: Arc::clone(&self.inner),
4389 })
4390 }
4391
4392 pub fn checkpoint(&self) -> RedDBResult<()> {
4393 self.inner.db.flush_local_only().map_err(|err| {
4398 let msg = err.to_string();
4403 crate::telemetry::operator_event::OperatorEvent::CheckpointFailed {
4404 lsn: 0,
4405 error: msg.clone(),
4406 }
4407 .emit_global();
4408 crate::telemetry::operator_event::OperatorEvent::WalFsyncFailed {
4409 path: "<flush_local_only>".to_string(),
4410 error: msg.clone(),
4411 }
4412 .emit_global();
4413 RedDBError::Engine(msg)
4414 })?;
4415 if let Err(err) = self.assert_remote_write_allowed("checkpoint") {
4416 tracing::warn!(
4417 target: "reddb::serverless::lease",
4418 error = %err,
4419 "checkpoint: skipping remote upload — lease not held"
4420 );
4421 return Ok(());
4422 }
4423 self.inner
4424 .db
4425 .upload_to_remote_backend()
4426 .map_err(|err| RedDBError::Engine(err.to_string()))
4427 }
4428
4429 pub(crate) fn assert_remote_write_allowed(&self, action: &str) -> RedDBResult<()> {
4436 if self.inner.db.remote_backend.is_none() {
4437 return Ok(());
4438 }
4439 match self.inner.write_gate.lease_state() {
4440 crate::runtime::write_gate::LeaseGateState::NotHeld => {
4441 self.inner.audit_log.record(
4442 action,
4443 "system",
4444 "remote_backend",
4445 "err: writer lease not held",
4446 crate::json::Value::Null,
4447 );
4448 Err(RedDBError::ReadOnly(format!(
4449 "writer lease not held — {action} blocked (serverless fence)"
4450 )))
4451 }
4452 _ => Ok(()),
4453 }
4454 }
4455
4456 pub fn run_maintenance(&self) -> RedDBResult<()> {
4457 self.inner
4458 .db
4459 .run_maintenance()
4460 .map_err(|err| RedDBError::Internal(err.to_string()))
4461 }
4462
4463 pub fn scan_collection(
4464 &self,
4465 collection: &str,
4466 cursor: Option<ScanCursor>,
4467 limit: usize,
4468 ) -> RedDBResult<ScanPage> {
4469 let store = self.inner.db.store();
4470 let manager = store
4471 .get_collection(collection)
4472 .ok_or_else(|| RedDBError::NotFound(collection.to_string()))?;
4473
4474 let mut entities = manager.query_all(|_| true);
4475 entities.sort_by_key(|entity| entity.id.raw());
4476
4477 let offset = cursor.map(|cursor| cursor.offset).unwrap_or(0);
4478 let total = entities.len();
4479 let end = total.min(offset.saturating_add(limit.max(1)));
4480 let items = if offset >= total {
4481 Vec::new()
4482 } else {
4483 entities[offset..end].to_vec()
4484 };
4485 let next = (end < total).then_some(ScanCursor { offset: end });
4486
4487 Ok(ScanPage {
4488 collection: collection.to_string(),
4489 items,
4490 next,
4491 total,
4492 })
4493 }
4494
4495 pub fn catalog(&self) -> CatalogModelSnapshot {
4496 self.inner.db.catalog_model_snapshot()
4497 }
4498
4499 pub fn catalog_consistency_report(&self) -> crate::catalog::CatalogConsistencyReport {
4500 self.inner.db.catalog_consistency_report()
4501 }
4502
4503 pub fn catalog_attention_summary(&self) -> CatalogAttentionSummary {
4504 crate::catalog::attention_summary(&self.catalog())
4505 }
4506
4507 pub fn collection_attention(&self) -> Vec<CollectionDescriptor> {
4508 crate::catalog::collection_attention(&self.catalog())
4509 }
4510
4511 pub fn index_attention(&self) -> Vec<CatalogIndexStatus> {
4512 crate::catalog::index_attention(&self.catalog())
4513 }
4514
4515 pub fn graph_projection_attention(&self) -> Vec<CatalogGraphProjectionStatus> {
4516 crate::catalog::graph_projection_attention(&self.catalog())
4517 }
4518
4519 pub fn analytics_job_attention(&self) -> Vec<CatalogAnalyticsJobStatus> {
4520 crate::catalog::analytics_job_attention(&self.catalog())
4521 }
4522
4523 pub fn stats(&self) -> RuntimeStats {
4524 let pool = runtime_pool_lock(self);
4525 RuntimeStats {
4526 active_connections: pool.active,
4527 idle_connections: pool.idle.len(),
4528 total_checkouts: pool.total_checkouts,
4529 paged_mode: self.inner.db.is_paged(),
4530 started_at_unix_ms: self.inner.started_at_unix_ms,
4531 store: self.inner.db.stats(),
4532 system: SystemInfo::collect(),
4533 result_blob_cache: self.inner.result_blob_cache.stats(),
4534 kv: self.inner.kv_stats.snapshot(),
4535 metrics_ingest: self.inner.metrics_ingest_stats.snapshot(),
4536 }
4537 }
4538
4539 pub(crate) fn record_metrics_ingest(
4540 &self,
4541 accepted_samples: u64,
4542 accepted_series: u64,
4543 rejected_samples: u64,
4544 rejected_series: u64,
4545 ) {
4546 self.inner.metrics_ingest_stats.record(
4547 accepted_samples,
4548 accepted_series,
4549 rejected_samples,
4550 rejected_series,
4551 );
4552 }
4553
4554 pub(crate) fn record_metrics_cardinality_budget_rejections(&self, rejected_series: u64) {
4555 self.inner
4556 .metrics_ingest_stats
4557 .record_cardinality_budget_rejections(rejected_series);
4558 }
4559
4560 pub(crate) fn record_metrics_tenant_activity(
4561 &self,
4562 tenant: &str,
4563 namespace: &str,
4564 operation: &str,
4565 ) {
4566 self.inner
4567 .metrics_tenant_activity_stats
4568 .record(tenant, namespace, operation);
4569 }
4570
4571 pub(crate) fn metrics_tenant_activity_snapshot(
4572 &self,
4573 ) -> Vec<crate::runtime::MetricsTenantActivityStats> {
4574 self.inner.metrics_tenant_activity_stats.snapshot()
4575 }
4576
4577 pub fn execute_query_with_scope(
4591 &self,
4592 query: &str,
4593 scope: crate::runtime::within_clause::ScopeOverride,
4594 ) -> RedDBResult<RuntimeQueryResult> {
4595 if scope.is_empty() {
4596 return self.execute_query(query);
4597 }
4598 let _scope_guard = ScopeOverrideGuard::install(scope);
4599 self.execute_query(query)
4600 }
4601
4602 pub fn execute_query(&self, query: &str) -> RedDBResult<RuntimeQueryResult> {
4611 let started = std::time::Instant::now();
4612 let result = self.execute_query_inner(query);
4613 let elapsed_ms = started.elapsed().as_millis() as u64;
4614
4615 let scope = self.ai_scope();
4620 let kind = match result
4621 .as_ref()
4622 .map(|r| r.statement_type)
4623 .unwrap_or("select")
4624 {
4625 "select" => crate::telemetry::slow_query_logger::QueryKind::Select,
4626 "insert" => crate::telemetry::slow_query_logger::QueryKind::Insert,
4627 "update" => crate::telemetry::slow_query_logger::QueryKind::Update,
4628 "delete" => crate::telemetry::slow_query_logger::QueryKind::Delete,
4629 _ => crate::telemetry::slow_query_logger::QueryKind::Internal,
4630 };
4631 self.inner
4637 .slow_query_logger
4638 .record(kind, elapsed_ms, query.to_string(), &scope);
4639
4640 result
4641 }
4642
4643 #[inline(never)]
4644 fn execute_query_inner(&self, query: &str) -> RedDBResult<RuntimeQueryResult> {
4645 if !has_scope_override_active()
4656 && !query.trim_start().starts_with("WITHIN")
4657 && !query.trim_start().starts_with("within")
4658 && !self
4659 .inner
4660 .tx_contexts
4661 .read()
4662 .contains_key(¤t_connection_id())
4663 {
4664 if let Some(result) = self.try_fast_entity_lookup(query) {
4665 return result;
4666 }
4667 }
4668
4669 match crate::runtime::within_clause::try_strip_within_prefix(query) {
4676 Ok(Some((scope, inner))) => {
4677 let _scope_guard = ScopeOverrideGuard::install(scope);
4678 return self.execute_query_inner(inner);
4683 }
4684 Ok(None) => {}
4685 Err(msg) => return Err(RedDBError::Query(msg)),
4686 }
4687
4688 if let Some(inner) = strip_explain_prefix(query) {
4695 return self.explain_as_rows(query, inner);
4696 }
4697
4698 if let Some(value) = parse_set_local_tenant(query)? {
4703 let conn_id = current_connection_id();
4704 if !self.inner.tx_contexts.read().contains_key(&conn_id) {
4705 return Err(RedDBError::Query(
4706 "SET LOCAL TENANT requires an active transaction".to_string(),
4707 ));
4708 }
4709 self.inner
4710 .tx_local_tenants
4711 .write()
4712 .insert(conn_id, value.clone());
4713 return Ok(RuntimeQueryResult::ok_message(
4714 query.to_string(),
4715 &match &value {
4716 Some(id) => format!("local tenant set: {id}"),
4717 None => "local tenant cleared".to_string(),
4718 },
4719 "set_local_tenant",
4720 ));
4721 }
4722
4723 if super::red_schema::is_system_schema_write(query) {
4724 return Err(RedDBError::Query(
4725 super::red_schema::READ_ONLY_ERROR.to_string(),
4726 ));
4727 }
4728
4729 let rewritten_query = super::red_schema::rewrite_virtual_names(query);
4730 let execution_query = rewritten_query.as_deref().unwrap_or(query);
4731
4732 let frame = super::statement_frame::StatementExecutionFrame::build(self, execution_query)?;
4733 let _frame_guards = frame.install(self);
4734
4735 let _log_span = crate::telemetry::span::query_span(query).entered();
4742
4743 if let Some(rewritten) = frame.prepare_cte(execution_query)? {
4745 return self.execute_query_expr(rewritten);
4746 }
4747
4748 if let Some(result) = self.try_fast_entity_lookup(execution_query) {
4750 return result;
4751 }
4752
4753 if let Some(result) = frame.read_result_cache(self) {
4755 return Ok(result);
4756 }
4757
4758 let prepared = frame.prepare_statement(self, execution_query)?;
4759 let mode = prepared.mode;
4760 let expr = prepared.expr;
4761
4762 let statement = query_expr_name(&expr);
4763 let result_cache_scopes = query_expr_result_cache_scopes(&expr);
4764
4765 let _lock_guard = frame.prepare_dispatch(self, &expr)?;
4766 let frame_iface: &dyn super::statement_frame::ReadFrame = &frame;
4767
4768 let query_result = match expr {
4769 QueryExpr::Graph(_) | QueryExpr::Path(_) => {
4770 let (graph, node_properties, edge_properties) =
4778 self.materialize_graph_with_rls()?;
4779 let result =
4780 crate::storage::query::unified::UnifiedExecutor::execute_on_with_graph_properties(
4781 &graph,
4782 &expr,
4783 node_properties,
4784 edge_properties,
4785 )
4786 .map_err(|err| RedDBError::Query(err.to_string()))?;
4787
4788 Ok(RuntimeQueryResult {
4789 query: query.to_string(),
4790 mode,
4791 statement,
4792 engine: "materialized-graph",
4793 result,
4794 affected_rows: 0,
4795 statement_type: "select",
4796 })
4797 }
4798 QueryExpr::Table(table) => {
4799 let table = self.resolve_table_expr_subqueries(
4800 table,
4801 &frame as &dyn super::statement_frame::ReadFrame,
4802 )?;
4803 if super::red_schema::is_virtual_table(&table.table) {
4804 return Ok(RuntimeQueryResult {
4805 query: query.to_string(),
4806 mode,
4807 statement,
4808 engine: "runtime-red-schema",
4809 result: super::red_schema::red_query(
4810 self,
4811 &table.table,
4812 &table,
4813 &frame as &dyn super::statement_frame::ReadFrame,
4814 )?,
4815 affected_rows: 0,
4816 statement_type: "select",
4817 });
4818 }
4819
4820 if let Some(result) = self.execute_probabilistic_select(&table)? {
4821 return Ok(RuntimeQueryResult {
4822 query: query.to_string(),
4823 mode,
4824 statement,
4825 engine: "runtime-probabilistic",
4826 result,
4827 affected_rows: 0,
4828 statement_type: "select",
4829 });
4830 }
4831
4832 if self.inner.foreign_tables.is_foreign_table(&table.table) {
4840 let records = self
4841 .inner
4842 .foreign_tables
4843 .scan(&table.table)
4844 .map_err(|e| RedDBError::Internal(e.to_string()))?;
4845 let result = apply_foreign_table_filters(records, &table);
4846 return Ok(RuntimeQueryResult {
4847 query: query.to_string(),
4848 mode,
4849 statement,
4850 engine: "runtime-fdw",
4851 result,
4852 affected_rows: 0,
4853 statement_type: "select",
4854 });
4855 }
4856
4857 let Some(table_with_rls) = self.authorize_relational_table_select(
4874 table,
4875 &frame as &dyn super::statement_frame::ReadFrame,
4876 )?
4877 else {
4878 let empty = crate::storage::query::unified::UnifiedResult::empty();
4879 return Ok(RuntimeQueryResult {
4880 query: query.to_string(),
4881 mode,
4882 statement,
4883 engine: "runtime-table-rls",
4884 result: empty,
4885 affected_rows: 0,
4886 statement_type: "select",
4887 });
4888 };
4889 Ok(RuntimeQueryResult {
4890 query: query.to_string(),
4891 mode,
4892 statement,
4893 engine: "runtime-table",
4894 result: execute_runtime_table_query(
4895 &self.inner.db,
4896 &table_with_rls,
4897 Some(&self.inner.index_store),
4898 )?,
4899 affected_rows: 0,
4900 statement_type: "select",
4901 })
4902 }
4903 QueryExpr::Join(join) => {
4904 let join_with_rls = match self.authorize_relational_join_select(
4913 join,
4914 &frame as &dyn super::statement_frame::ReadFrame,
4915 )? {
4916 Some(j) => j,
4917 None => {
4918 return Ok(RuntimeQueryResult {
4919 query: query.to_string(),
4920 mode,
4921 statement,
4922 engine: "runtime-join-rls",
4923 result: crate::storage::query::unified::UnifiedResult::empty(),
4924 affected_rows: 0,
4925 statement_type: "select",
4926 });
4927 }
4928 };
4929 Ok(RuntimeQueryResult {
4930 query: query.to_string(),
4931 mode,
4932 statement,
4933 engine: "runtime-join",
4934 result: execute_runtime_join_query(&self.inner.db, &join_with_rls)?,
4935 affected_rows: 0,
4936 statement_type: "select",
4937 })
4938 }
4939 QueryExpr::Vector(vector) => Ok(RuntimeQueryResult {
4940 query: query.to_string(),
4941 mode,
4942 statement,
4943 engine: "runtime-vector",
4944 result: execute_runtime_vector_query(&self.inner.db, &vector)?,
4945 affected_rows: 0,
4946 statement_type: "select",
4947 }),
4948 QueryExpr::Hybrid(hybrid) => Ok(RuntimeQueryResult {
4949 query: query.to_string(),
4950 mode,
4951 statement,
4952 engine: "runtime-hybrid",
4953 result: execute_runtime_hybrid_query(&self.inner.db, &hybrid)?,
4954 affected_rows: 0,
4955 statement_type: "select",
4956 }),
4957 QueryExpr::Insert(ref insert) if super::red_schema::is_virtual_table(&insert.table) => {
4959 Err(RedDBError::Query(
4960 super::red_schema::READ_ONLY_ERROR.to_string(),
4961 ))
4962 }
4963 QueryExpr::Update(ref update) if super::red_schema::is_virtual_table(&update.table) => {
4964 Err(RedDBError::Query(
4965 super::red_schema::READ_ONLY_ERROR.to_string(),
4966 ))
4967 }
4968 QueryExpr::Delete(ref delete) if super::red_schema::is_virtual_table(&delete.table) => {
4969 Err(RedDBError::Query(
4970 super::red_schema::READ_ONLY_ERROR.to_string(),
4971 ))
4972 }
4973 QueryExpr::Insert(ref insert) => self
4974 .with_deferred_store_wal_for_dml(self.insert_may_emit_events(insert), || {
4975 self.execute_insert(query, insert)
4976 }),
4977 QueryExpr::Update(ref update) => self
4978 .with_deferred_store_wal_for_dml(self.update_may_emit_events(update), || {
4979 self.execute_update(query, update)
4980 }),
4981 QueryExpr::Delete(ref delete) => self
4982 .with_deferred_store_wal_for_dml(self.delete_may_emit_events(delete), || {
4983 self.execute_delete(query, delete)
4984 }),
4985 QueryExpr::CreateTable(ref create) => self.execute_create_table(query, create),
4987 QueryExpr::CreateCollection(ref create) => {
4988 self.execute_create_collection(query, create)
4989 }
4990 QueryExpr::CreateVector(ref create) => self.execute_create_vector(query, create),
4991 QueryExpr::DropTable(ref drop_tbl) => self.execute_drop_table(query, drop_tbl),
4992 QueryExpr::DropGraph(ref drop_graph) => self.execute_drop_graph(query, drop_graph),
4993 QueryExpr::DropVector(ref drop_vector) => self.execute_drop_vector(query, drop_vector),
4994 QueryExpr::DropDocument(ref drop_document) => {
4995 self.execute_drop_document(query, drop_document)
4996 }
4997 QueryExpr::DropKv(ref drop_kv) => self.execute_drop_kv(query, drop_kv),
4998 QueryExpr::DropCollection(ref drop_collection) => {
4999 self.execute_drop_collection(query, drop_collection)
5000 }
5001 QueryExpr::Truncate(ref truncate) => self.execute_truncate(query, truncate),
5002 QueryExpr::AlterTable(ref alter) => self.execute_alter_table(query, alter),
5003 QueryExpr::ExplainAlter(ref explain) => self.execute_explain_alter(query, explain),
5004 QueryExpr::GraphCommand(ref cmd) => self.execute_graph_command(query, cmd),
5006 QueryExpr::SearchCommand(ref cmd) => self.execute_search_command(query, cmd),
5008 QueryExpr::Ask(ref ask) => self.execute_ask(query, ask),
5010 QueryExpr::CreateIndex(ref create_idx) => self.execute_create_index(query, create_idx),
5011 QueryExpr::DropIndex(ref drop_idx) => self.execute_drop_index(query, drop_idx),
5012 QueryExpr::ProbabilisticCommand(ref cmd) => {
5013 self.execute_probabilistic_command(query, cmd)
5014 }
5015 QueryExpr::CreateTimeSeries(ref ts) => self.execute_create_timeseries(query, ts),
5017 QueryExpr::DropTimeSeries(ref ts) => self.execute_drop_timeseries(query, ts),
5018 QueryExpr::CreateQueue(ref q) => self.execute_create_queue(query, q),
5020 QueryExpr::AlterQueue(ref q) => self.execute_alter_queue(query, q),
5021 QueryExpr::DropQueue(ref q) => self.execute_drop_queue(query, q),
5022 QueryExpr::QueueSelect(ref q) => self.execute_queue_select(query, q),
5023 QueryExpr::QueueCommand(ref cmd) => self.execute_queue_command(query, cmd),
5024 QueryExpr::EventsBackfill(ref backfill) => {
5025 self.execute_events_backfill(query, backfill)
5026 }
5027 QueryExpr::EventsBackfillStatus { ref collection } => Err(RedDBError::Query(format!(
5028 "EVENTS BACKFILL STATUS for '{collection}' is not implemented in this slice"
5029 ))),
5030 QueryExpr::KvCommand(ref cmd) => self.execute_kv_command(query, cmd),
5031 QueryExpr::ConfigCommand(ref cmd) => self.execute_config_command(query, cmd),
5032 QueryExpr::CreateTree(ref tree) => self.execute_create_tree(query, tree),
5033 QueryExpr::DropTree(ref tree) => self.execute_drop_tree(query, tree),
5034 QueryExpr::TreeCommand(ref cmd) => self.execute_tree_command(query, cmd),
5035 QueryExpr::SetConfig { ref key, ref value } => {
5037 if key.starts_with("red.secret.") {
5038 return Err(RedDBError::Query(
5039 "red.secret.* is reserved for vault secrets; use SET SECRET".to_string(),
5040 ));
5041 }
5042 let store = self.inner.db.store();
5043 let json_val = match value {
5044 Value::Text(s) => crate::serde_json::Value::String(s.to_string()),
5045 Value::Integer(n) => crate::serde_json::Value::Number(*n as f64),
5046 Value::Float(n) => crate::serde_json::Value::Number(*n),
5047 Value::Boolean(b) => crate::serde_json::Value::Bool(*b),
5048 _ => crate::serde_json::Value::String(value.to_string()),
5049 };
5050 store.set_config_tree(key, &json_val);
5051 update_current_config_value(key, value.clone());
5052 self.invalidate_result_cache();
5057 Ok(RuntimeQueryResult::ok_message(
5058 query.to_string(),
5059 &format!("config set: {key}"),
5060 "set",
5061 ))
5062 }
5063 QueryExpr::SetSecret { ref key, ref value } => {
5065 if key.starts_with("red.config.") {
5066 return Err(RedDBError::Query(
5067 "red.config.* is reserved for config; use SET CONFIG".to_string(),
5068 ));
5069 }
5070 let auth_store = self.inner.auth_store.read().clone().ok_or_else(|| {
5071 RedDBError::Query("SET SECRET requires an enabled, unsealed vault".to_string())
5072 })?;
5073 if matches!(value, Value::Null) {
5074 auth_store
5075 .vault_kv_try_delete(key)
5076 .map_err(|err| RedDBError::Query(err.to_string()))?;
5077 update_current_secret_value(key, None);
5078 self.invalidate_result_cache();
5079 return Ok(RuntimeQueryResult::ok_message(
5080 query.to_string(),
5081 &format!("secret deleted: {key}"),
5082 "delete_secret",
5083 ));
5084 }
5085 let value = secret_sql_value_to_string(value)?;
5086 auth_store
5087 .vault_kv_try_set(key.clone(), value.clone())
5088 .map_err(|err| RedDBError::Query(err.to_string()))?;
5089 update_current_secret_value(key, Some(value));
5090 self.invalidate_result_cache();
5091 Ok(RuntimeQueryResult::ok_message(
5092 query.to_string(),
5093 &format!("secret set: {key}"),
5094 "set_secret",
5095 ))
5096 }
5097 QueryExpr::DeleteSecret { ref key } => {
5099 let auth_store = self.inner.auth_store.read().clone().ok_or_else(|| {
5100 RedDBError::Query(
5101 "DELETE SECRET requires an enabled, unsealed vault".to_string(),
5102 )
5103 })?;
5104 let deleted = auth_store
5105 .vault_kv_try_delete(key)
5106 .map_err(|err| RedDBError::Query(err.to_string()))?;
5107 if deleted {
5108 update_current_secret_value(key, None);
5109 }
5110 self.invalidate_result_cache();
5111 Ok(RuntimeQueryResult::ok_message(
5112 query.to_string(),
5113 &format!("secret deleted: {key}"),
5114 if deleted {
5115 "delete_secret"
5116 } else {
5117 "delete_secret_not_found"
5118 },
5119 ))
5120 }
5121 QueryExpr::ShowSecrets { ref prefix } => {
5123 let auth_store = self.inner.auth_store.read().clone().ok_or_else(|| {
5124 RedDBError::Query("SHOW SECRET requires an enabled, unsealed vault".to_string())
5125 })?;
5126 if !auth_store.is_vault_backed() {
5127 return Err(RedDBError::Query(
5128 "SHOW SECRET requires an enabled, unsealed vault".to_string(),
5129 ));
5130 }
5131 let mut keys = auth_store.vault_kv_keys();
5132 keys.sort();
5133 let mut result = UnifiedResult::with_columns(vec![
5134 "key".into(),
5135 "value".into(),
5136 "status".into(),
5137 ]);
5138 for key in keys {
5139 if let Some(ref pfx) = prefix {
5140 if !key.starts_with(pfx) {
5141 continue;
5142 }
5143 }
5144 let mut record = UnifiedRecord::new();
5145 record.set("key", Value::text(key));
5146 record.set("value", Value::text("***"));
5147 record.set("status", Value::text("active"));
5148 result.push(record);
5149 }
5150 Ok(RuntimeQueryResult {
5151 query: query.to_string(),
5152 mode,
5153 statement: "show_secrets",
5154 engine: "runtime-secret",
5155 result,
5156 affected_rows: 0,
5157 statement_type: "select",
5158 })
5159 }
5160 QueryExpr::ShowConfig { ref prefix } => {
5162 let store = self.inner.db.store();
5163 let all_collections = store.list_collections();
5164 if !all_collections.contains(&"red_config".to_string()) {
5165 let result = UnifiedResult::with_columns(vec!["key".into(), "value".into()]);
5166 return Ok(RuntimeQueryResult {
5167 query: query.to_string(),
5168 mode,
5169 statement: "show_config",
5170 engine: "runtime-config",
5171 result,
5172 affected_rows: 0,
5173 statement_type: "select",
5174 });
5175 }
5176 let manager = store
5177 .get_collection("red_config")
5178 .ok_or_else(|| RedDBError::NotFound("red_config".to_string()))?;
5179 let entities = manager.query_all(|_| true);
5180 let mut latest = std::collections::BTreeMap::<String, (u64, Value, Value)>::new();
5181 for entity in entities {
5182 if let EntityData::Row(ref row) = entity.data {
5183 if let Some(ref named) = row.named {
5184 let key_val = named.get("key").cloned().unwrap_or(Value::Null);
5185 let val = named.get("value").cloned().unwrap_or(Value::Null);
5186 let key_str = match &key_val {
5187 Value::Text(s) => s.as_ref(),
5188 _ => continue,
5189 };
5190 if let Some(ref pfx) = prefix {
5191 if !key_str.starts_with(pfx.as_str()) {
5192 continue;
5193 }
5194 }
5195 let entity_id = entity.id.raw();
5196 match latest.get(key_str) {
5197 Some((prev_id, _, _)) if *prev_id > entity_id => {}
5198 _ => {
5199 latest.insert(key_str.to_string(), (entity_id, key_val, val));
5200 }
5201 }
5202 }
5203 }
5204 }
5205 let mut result = UnifiedResult::with_columns(vec!["key".into(), "value".into()]);
5206 for (_, key_val, val) in latest.into_values() {
5207 let mut record = UnifiedRecord::new();
5208 record.set("key", key_val);
5209 record.set("value", val);
5210 result.push(record);
5211 }
5212 Ok(RuntimeQueryResult {
5213 query: query.to_string(),
5214 mode,
5215 statement: "show_config",
5216 engine: "runtime-config",
5217 result,
5218 affected_rows: 0,
5219 statement_type: "select",
5220 })
5221 }
5222 QueryExpr::SetTenant(ref value) => {
5228 match value {
5229 Some(id) => set_current_tenant(id.clone()),
5230 None => clear_current_tenant(),
5231 }
5232 Ok(RuntimeQueryResult::ok_message(
5233 query.to_string(),
5234 &match value {
5235 Some(id) => format!("tenant set: {id}"),
5236 None => "tenant cleared".to_string(),
5237 },
5238 "set_tenant",
5239 ))
5240 }
5241 QueryExpr::ShowTenant => {
5242 let mut result = UnifiedResult::with_columns(vec!["tenant".into()]);
5243 let mut record = UnifiedRecord::new();
5244 record.set(
5245 "tenant",
5246 current_tenant().map(Value::text).unwrap_or(Value::Null),
5247 );
5248 result.push(record);
5249 Ok(RuntimeQueryResult {
5250 query: query.to_string(),
5251 mode,
5252 statement: "show_tenant",
5253 engine: "runtime-tenant",
5254 result,
5255 affected_rows: 0,
5256 statement_type: "select",
5257 })
5258 }
5259 QueryExpr::TransactionControl(ref ctl) => {
5271 use crate::storage::query::ast::TxnControl;
5272 use crate::storage::transaction::snapshot::{TxnContext, Xid};
5273 use crate::storage::transaction::IsolationLevel;
5274
5275 let conn_id = current_connection_id();
5280
5281 let (kind, msg) = match ctl {
5282 TxnControl::Begin => {
5283 let mgr = Arc::clone(&self.inner.snapshot_manager);
5284 let xid = mgr.begin();
5285 let snapshot = mgr.snapshot(xid);
5286 let ctx = TxnContext {
5287 xid,
5288 isolation: IsolationLevel::SnapshotIsolation,
5289 snapshot,
5290 savepoints: Vec::new(),
5291 released_sub_xids: Vec::new(),
5292 };
5293 self.inner.tx_contexts.write().insert(conn_id, ctx);
5294 ("begin", format!("BEGIN — xid={xid} (snapshot isolation)"))
5295 }
5296 TxnControl::Commit => {
5297 self.inner.tx_local_tenants.write().remove(&conn_id);
5299 let ctx = self.inner.tx_contexts.write().remove(&conn_id);
5300 match ctx {
5301 Some(ctx) => {
5302 let mut own_xids = std::collections::HashSet::new();
5303 own_xids.insert(ctx.xid);
5304 for (_, sub) in &ctx.savepoints {
5305 own_xids.insert(*sub);
5306 }
5307 for sub in &ctx.released_sub_xids {
5308 own_xids.insert(*sub);
5309 }
5310 if let Err(err) = self.check_table_row_write_conflicts(
5311 conn_id,
5312 &ctx.snapshot,
5313 &own_xids,
5314 ) {
5315 for (_, sub) in &ctx.savepoints {
5316 self.inner.snapshot_manager.rollback(*sub);
5317 }
5318 for sub in &ctx.released_sub_xids {
5319 self.inner.snapshot_manager.rollback(*sub);
5320 }
5321 self.inner.snapshot_manager.rollback(ctx.xid);
5322 self.revive_pending_versioned_updates(conn_id);
5323 self.revive_pending_tombstones(conn_id);
5324 self.discard_pending_kv_watch_events(conn_id);
5325 self.discard_pending_store_wal_actions(conn_id);
5326 return Err(err);
5327 }
5328 self.restore_pending_write_stamps(conn_id);
5329 if let Err(err) = self.flush_pending_store_wal_actions(conn_id) {
5330 for (_, sub) in &ctx.savepoints {
5331 self.inner.snapshot_manager.rollback(*sub);
5332 }
5333 for sub in &ctx.released_sub_xids {
5334 self.inner.snapshot_manager.rollback(*sub);
5335 }
5336 self.inner.snapshot_manager.rollback(ctx.xid);
5337 self.revive_pending_versioned_updates(conn_id);
5338 self.revive_pending_tombstones(conn_id);
5339 self.discard_pending_kv_watch_events(conn_id);
5340 return Err(err);
5341 }
5342 for (_, sub) in &ctx.savepoints {
5348 self.inner.snapshot_manager.commit(*sub);
5349 }
5350 for sub in &ctx.released_sub_xids {
5351 self.inner.snapshot_manager.commit(*sub);
5352 }
5353 self.inner.snapshot_manager.commit(ctx.xid);
5354 self.finalize_pending_versioned_updates(conn_id);
5355 self.finalize_pending_tombstones(conn_id);
5356 self.finalize_pending_kv_watch_events(conn_id);
5357 ("commit", format!("COMMIT — xid={} committed", ctx.xid))
5358 }
5359 None => (
5360 "commit",
5361 "COMMIT outside transaction — no-op (autocommit)".to_string(),
5362 ),
5363 }
5364 }
5365 TxnControl::Rollback => {
5366 self.inner.tx_local_tenants.write().remove(&conn_id);
5367 let ctx = self.inner.tx_contexts.write().remove(&conn_id);
5368 match ctx {
5369 Some(ctx) => {
5370 for (_, sub) in &ctx.savepoints {
5373 self.inner.snapshot_manager.rollback(*sub);
5374 }
5375 for sub in &ctx.released_sub_xids {
5376 self.inner.snapshot_manager.rollback(*sub);
5377 }
5378 self.inner.snapshot_manager.rollback(ctx.xid);
5379 self.revive_pending_versioned_updates(conn_id);
5383 self.revive_pending_tombstones(conn_id);
5384 self.discard_pending_kv_watch_events(conn_id);
5385 self.discard_pending_store_wal_actions(conn_id);
5386 ("rollback", format!("ROLLBACK — xid={} aborted", ctx.xid))
5387 }
5388 None => (
5389 "rollback",
5390 "ROLLBACK outside transaction — no-op (autocommit)".to_string(),
5391 ),
5392 }
5393 }
5394 TxnControl::Savepoint(name) => {
5401 let mgr = Arc::clone(&self.inner.snapshot_manager);
5402 let mut guard = self.inner.tx_contexts.write();
5403 match guard.get_mut(&conn_id) {
5404 Some(ctx) => {
5405 let sub = mgr.begin();
5406 ctx.savepoints.push((name.clone(), sub));
5407 ("savepoint", format!("SAVEPOINT {name} — sub_xid={sub}"))
5408 }
5409 None => (
5410 "savepoint",
5411 "SAVEPOINT outside transaction — no-op".to_string(),
5412 ),
5413 }
5414 }
5415 TxnControl::ReleaseSavepoint(name) => {
5416 let mut guard = self.inner.tx_contexts.write();
5417 match guard.get_mut(&conn_id) {
5418 Some(ctx) => {
5419 let pos = ctx
5420 .savepoints
5421 .iter()
5422 .position(|(n, _)| n == name)
5423 .ok_or_else(|| {
5424 RedDBError::Internal(format!(
5425 "savepoint {name} does not exist"
5426 ))
5427 })?;
5428 let released = ctx.savepoints.len() - pos;
5436 let popped: Vec<Xid> = ctx
5437 .savepoints
5438 .split_off(pos)
5439 .into_iter()
5440 .map(|(_, x)| x)
5441 .collect();
5442 ctx.released_sub_xids.extend(popped);
5443 (
5444 "release_savepoint",
5445 format!("RELEASE SAVEPOINT {name} — {released} level(s)"),
5446 )
5447 }
5448 None => (
5449 "release_savepoint",
5450 "RELEASE outside transaction — no-op".to_string(),
5451 ),
5452 }
5453 }
5454 TxnControl::RollbackToSavepoint(name) => {
5455 let mgr = Arc::clone(&self.inner.snapshot_manager);
5456 let drop_result: Option<(Xid, Vec<Xid>)> = {
5461 let mut guard = self.inner.tx_contexts.write();
5462 if let Some(ctx) = guard.get_mut(&conn_id) {
5463 let pos = ctx
5464 .savepoints
5465 .iter()
5466 .position(|(n, _)| n == name)
5467 .ok_or_else(|| {
5468 RedDBError::Internal(format!(
5469 "savepoint {name} does not exist"
5470 ))
5471 })?;
5472 let savepoint_xid = ctx.savepoints[pos].1;
5473 let aborted: Vec<Xid> = ctx
5474 .savepoints
5475 .split_off(pos)
5476 .into_iter()
5477 .map(|(_, x)| x)
5478 .collect();
5479 Some((savepoint_xid, aborted))
5480 } else {
5481 None
5482 }
5483 };
5484
5485 match drop_result {
5486 Some((savepoint_xid, aborted)) => {
5487 for x in &aborted {
5488 mgr.rollback(*x);
5489 }
5490 let reverted_updates =
5491 self.revive_versioned_updates_since(conn_id, savepoint_xid);
5492 let revived = self.revive_tombstones_since(conn_id, savepoint_xid);
5493 (
5494 "rollback_to_savepoint",
5495 format!(
5496 "ROLLBACK TO SAVEPOINT {name} — aborted {} sub_xid(s), reverted {reverted_updates} update(s), revived {revived} tombstone(s)",
5497 aborted.len(),
5498 ),
5499 )
5500 }
5501 None => (
5502 "rollback_to_savepoint",
5503 "ROLLBACK TO outside transaction — no-op".to_string(),
5504 ),
5505 }
5506 }
5507 };
5508 Ok(RuntimeQueryResult::ok_message(
5509 query.to_string(),
5510 &msg,
5511 kind,
5512 ))
5513 }
5514 QueryExpr::CreateSchema(ref q) => {
5527 let store = self.inner.db.store();
5528 let key = format!("schema.{}", q.name);
5529 if store.get_config(&key).is_some() {
5530 if q.if_not_exists {
5531 return Ok(RuntimeQueryResult::ok_message(
5532 query.to_string(),
5533 &format!("schema {} already exists — skipped", q.name),
5534 "create_schema",
5535 ));
5536 }
5537 return Err(RedDBError::Internal(format!(
5538 "schema {} already exists",
5539 q.name
5540 )));
5541 }
5542 store.set_config_tree(&key, &crate::serde_json::Value::Bool(true));
5543 Ok(RuntimeQueryResult::ok_message(
5544 query.to_string(),
5545 &format!("schema {} created", q.name),
5546 "create_schema",
5547 ))
5548 }
5549 QueryExpr::DropSchema(ref q) => {
5550 let store = self.inner.db.store();
5551 let key = format!("schema.{}", q.name);
5552 let existed = store.get_config(&key).is_some();
5553 if !existed && !q.if_exists {
5554 return Err(RedDBError::Internal(format!(
5555 "schema {} does not exist",
5556 q.name
5557 )));
5558 }
5559 store.set_config_tree(&key, &crate::serde_json::Value::Null);
5561 let suffix = if q.cascade {
5562 " (CASCADE accepted — tables untouched)"
5563 } else {
5564 ""
5565 };
5566 Ok(RuntimeQueryResult::ok_message(
5567 query.to_string(),
5568 &format!("schema {} dropped{}", q.name, suffix),
5569 "drop_schema",
5570 ))
5571 }
5572 QueryExpr::CreateSequence(ref q) => {
5573 let store = self.inner.db.store();
5574 let base = format!("sequence.{}", q.name);
5575 let start_key = format!("{base}.start");
5576 let incr_key = format!("{base}.increment");
5577 let curr_key = format!("{base}.current");
5578 if store.get_config(&start_key).is_some() {
5579 if q.if_not_exists {
5580 return Ok(RuntimeQueryResult::ok_message(
5581 query.to_string(),
5582 &format!("sequence {} already exists — skipped", q.name),
5583 "create_sequence",
5584 ));
5585 }
5586 return Err(RedDBError::Internal(format!(
5587 "sequence {} already exists",
5588 q.name
5589 )));
5590 }
5591 let initial_current = q.start - q.increment;
5594 store.set_config_tree(
5595 &start_key,
5596 &crate::serde_json::Value::Number(q.start as f64),
5597 );
5598 store.set_config_tree(
5599 &incr_key,
5600 &crate::serde_json::Value::Number(q.increment as f64),
5601 );
5602 store.set_config_tree(
5603 &curr_key,
5604 &crate::serde_json::Value::Number(initial_current as f64),
5605 );
5606 Ok(RuntimeQueryResult::ok_message(
5607 query.to_string(),
5608 &format!(
5609 "sequence {} created (start={}, increment={})",
5610 q.name, q.start, q.increment
5611 ),
5612 "create_sequence",
5613 ))
5614 }
5615 QueryExpr::DropSequence(ref q) => {
5616 let store = self.inner.db.store();
5617 let base = format!("sequence.{}", q.name);
5618 let existed = store.get_config(&format!("{base}.start")).is_some();
5619 if !existed && !q.if_exists {
5620 return Err(RedDBError::Internal(format!(
5621 "sequence {} does not exist",
5622 q.name
5623 )));
5624 }
5625 for k in ["start", "increment", "current"] {
5626 store.set_config_tree(&format!("{base}.{k}"), &crate::serde_json::Value::Null);
5627 }
5628 Ok(RuntimeQueryResult::ok_message(
5629 query.to_string(),
5630 &format!("sequence {} dropped", q.name),
5631 "drop_sequence",
5632 ))
5633 }
5634 QueryExpr::CreateView(ref q) => {
5644 let mut views = self.inner.views.write();
5645 if views.contains_key(&q.name) && !q.or_replace {
5646 if q.if_not_exists {
5647 return Ok(RuntimeQueryResult::ok_message(
5648 query.to_string(),
5649 &format!("view {} already exists — skipped", q.name),
5650 "create_view",
5651 ));
5652 }
5653 return Err(RedDBError::Internal(format!(
5654 "view {} already exists",
5655 q.name
5656 )));
5657 }
5658 views.insert(q.name.clone(), Arc::new(q.clone()));
5659 drop(views);
5660
5661 if q.materialized {
5663 use crate::storage::cache::result::{MaterializedViewDef, RefreshPolicy};
5664 let refresh = match q.refresh_every_ms {
5665 Some(ms) => {
5666 RefreshPolicy::Periodic(std::time::Duration::from_millis(ms))
5667 }
5668 None => RefreshPolicy::Manual,
5669 };
5670 let def = MaterializedViewDef {
5671 name: q.name.clone(),
5672 query: format!("<parsed view {}>", q.name),
5673 dependencies: collect_table_refs(&q.query),
5674 refresh,
5675 retention_duration_ms: q.retention_duration_ms,
5676 };
5677 self.inner.materialized_views.write().register(def);
5678 }
5679 self.invalidate_plan_cache();
5684 self.invalidate_result_cache();
5685
5686 Ok(RuntimeQueryResult::ok_message(
5687 query.to_string(),
5688 &format!(
5689 "{}view {} created",
5690 if q.materialized { "materialized " } else { "" },
5691 q.name
5692 ),
5693 "create_view",
5694 ))
5695 }
5696 QueryExpr::DropView(ref q) => {
5697 let mut views = self.inner.views.write();
5698 let existed = views.remove(&q.name).is_some();
5699 drop(views);
5700 if q.materialized || existed {
5701 self.inner.materialized_views.write().remove(&q.name);
5703 }
5704 self.invalidate_plan_cache();
5707 self.invalidate_result_cache();
5708 if !existed && !q.if_exists {
5709 return Err(RedDBError::Internal(format!(
5710 "view {} does not exist",
5711 q.name
5712 )));
5713 }
5714 self.invalidate_plan_cache();
5715 Ok(RuntimeQueryResult::ok_message(
5716 query.to_string(),
5717 &format!("view {} dropped", q.name),
5718 "drop_view",
5719 ))
5720 }
5721 QueryExpr::RefreshMaterializedView(ref q) => {
5722 let view = {
5725 let views = self.inner.views.read();
5726 views.get(&q.name).cloned()
5727 };
5728 let view = match view {
5729 Some(v) => v,
5730 None => {
5731 return Err(RedDBError::Internal(format!(
5732 "view {} does not exist",
5733 q.name
5734 )))
5735 }
5736 };
5737 if !view.materialized {
5738 return Err(RedDBError::Internal(format!(
5739 "view {} is not materialized — REFRESH requires \
5740 CREATE MATERIALIZED VIEW",
5741 q.name
5742 )));
5743 }
5744 let started = std::time::Instant::now();
5746 let now_ms = std::time::SystemTime::now()
5747 .duration_since(std::time::UNIX_EPOCH)
5748 .map(|d| d.as_millis() as u64)
5749 .unwrap_or(0);
5750 match self.execute_query_expr((*view.query).clone()) {
5751 Ok(inner_result) => {
5752 let duration_ms = started.elapsed().as_millis() as u64;
5753 let row_count = inner_result.result.records.len() as u64;
5754 let serialized = format!("{:?}", inner_result.result);
5755 self.inner
5756 .materialized_views
5757 .write()
5758 .record_refresh_success(
5759 &q.name,
5760 serialized.into_bytes(),
5761 row_count,
5762 duration_ms,
5763 now_ms,
5764 );
5765 Ok(RuntimeQueryResult::ok_message(
5766 query.to_string(),
5767 &format!("materialized view {} refreshed", q.name),
5768 "refresh_materialized_view",
5769 ))
5770 }
5771 Err(err) => {
5772 let duration_ms = started.elapsed().as_millis() as u64;
5773 let msg = err.to_string();
5774 self.inner
5775 .materialized_views
5776 .write()
5777 .record_refresh_failure(
5778 &q.name,
5779 msg.clone(),
5780 duration_ms,
5781 now_ms,
5782 );
5783 Err(err)
5784 }
5785 }
5786 }
5787 QueryExpr::CreatePolicy(ref q) => {
5794 let key = (q.table.clone(), q.name.clone());
5795 self.inner
5796 .rls_policies
5797 .write()
5798 .insert(key, Arc::new(q.clone()));
5799 self.invalidate_plan_cache();
5800 self.schema_vocabulary_apply(
5804 crate::runtime::schema_vocabulary::DdlEvent::CreatePolicy {
5805 collection: q.table.clone(),
5806 policy: q.name.clone(),
5807 },
5808 );
5809 Ok(RuntimeQueryResult::ok_message(
5810 query.to_string(),
5811 &format!("policy {} on {} created", q.name, q.table),
5812 "create_policy",
5813 ))
5814 }
5815 QueryExpr::DropPolicy(ref q) => {
5816 let removed = self
5817 .inner
5818 .rls_policies
5819 .write()
5820 .remove(&(q.table.clone(), q.name.clone()))
5821 .is_some();
5822 if !removed && !q.if_exists {
5823 return Err(RedDBError::Internal(format!(
5824 "policy {} on {} does not exist",
5825 q.name, q.table
5826 )));
5827 }
5828 self.invalidate_plan_cache();
5829 self.schema_vocabulary_apply(
5832 crate::runtime::schema_vocabulary::DdlEvent::DropPolicy {
5833 collection: q.table.clone(),
5834 policy: q.name.clone(),
5835 },
5836 );
5837 Ok(RuntimeQueryResult::ok_message(
5838 query.to_string(),
5839 &format!("policy {} on {} dropped", q.name, q.table),
5840 "drop_policy",
5841 ))
5842 }
5843 QueryExpr::CreateServer(ref q) => {
5854 use crate::storage::fdw::FdwOptions;
5855 let registry = Arc::clone(&self.inner.foreign_tables);
5856 if registry.server(&q.name).is_some() {
5857 if q.if_not_exists {
5858 return Ok(RuntimeQueryResult::ok_message(
5859 query.to_string(),
5860 &format!("server {} already exists — skipped", q.name),
5861 "create_server",
5862 ));
5863 }
5864 return Err(RedDBError::Internal(format!(
5865 "server {} already exists",
5866 q.name
5867 )));
5868 }
5869 let mut opts = FdwOptions::new();
5870 for (k, v) in &q.options {
5871 opts.values.insert(k.clone(), v.clone());
5872 }
5873 registry
5874 .create_server(&q.name, &q.wrapper, opts)
5875 .map_err(|e| RedDBError::Internal(e.to_string()))?;
5876 Ok(RuntimeQueryResult::ok_message(
5877 query.to_string(),
5878 &format!("server {} created (wrapper {})", q.name, q.wrapper),
5879 "create_server",
5880 ))
5881 }
5882 QueryExpr::DropServer(ref q) => {
5883 let existed = self.inner.foreign_tables.drop_server(&q.name);
5884 if !existed && !q.if_exists {
5885 return Err(RedDBError::Internal(format!(
5886 "server {} does not exist",
5887 q.name
5888 )));
5889 }
5890 Ok(RuntimeQueryResult::ok_message(
5891 query.to_string(),
5892 &format!(
5893 "server {} dropped{}",
5894 q.name,
5895 if q.cascade { " (cascade)" } else { "" }
5896 ),
5897 "drop_server",
5898 ))
5899 }
5900 QueryExpr::CreateForeignTable(ref q) => {
5901 use crate::storage::fdw::{FdwOptions, ForeignColumn, ForeignTable};
5902 let registry = Arc::clone(&self.inner.foreign_tables);
5903 if registry.foreign_table(&q.name).is_some() {
5904 if q.if_not_exists {
5905 return Ok(RuntimeQueryResult::ok_message(
5906 query.to_string(),
5907 &format!("foreign table {} already exists — skipped", q.name),
5908 "create_foreign_table",
5909 ));
5910 }
5911 return Err(RedDBError::Internal(format!(
5912 "foreign table {} already exists",
5913 q.name
5914 )));
5915 }
5916 let mut opts = FdwOptions::new();
5917 for (k, v) in &q.options {
5918 opts.values.insert(k.clone(), v.clone());
5919 }
5920 let columns: Vec<ForeignColumn> = q
5921 .columns
5922 .iter()
5923 .map(|c| ForeignColumn {
5924 name: c.name.clone(),
5925 data_type: c.data_type.clone(),
5926 not_null: c.not_null,
5927 })
5928 .collect();
5929 registry
5930 .create_foreign_table(ForeignTable {
5931 name: q.name.clone(),
5932 server_name: q.server.clone(),
5933 columns,
5934 options: opts,
5935 })
5936 .map_err(|e| RedDBError::Internal(e.to_string()))?;
5937 self.invalidate_plan_cache();
5938 Ok(RuntimeQueryResult::ok_message(
5939 query.to_string(),
5940 &format!("foreign table {} created (server {})", q.name, q.server),
5941 "create_foreign_table",
5942 ))
5943 }
5944 QueryExpr::DropForeignTable(ref q) => {
5945 let existed = self.inner.foreign_tables.drop_foreign_table(&q.name);
5946 if !existed && !q.if_exists {
5947 return Err(RedDBError::Internal(format!(
5948 "foreign table {} does not exist",
5949 q.name
5950 )));
5951 }
5952 self.invalidate_plan_cache();
5953 Ok(RuntimeQueryResult::ok_message(
5954 query.to_string(),
5955 &format!("foreign table {} dropped", q.name),
5956 "drop_foreign_table",
5957 ))
5958 }
5959 QueryExpr::CopyFrom(ref q) => {
5965 use crate::storage::import::{CsvConfig, CsvImporter};
5966 let store = self.inner.db.store();
5967 let cfg = CsvConfig {
5968 collection: q.table.clone(),
5969 has_header: q.has_header,
5970 delimiter: q.delimiter.map(|c| c as u8).unwrap_or(b','),
5971 ..CsvConfig::default()
5972 };
5973 let importer = CsvImporter::new(cfg);
5974 let stats = importer
5975 .import_file(&q.path, store.as_ref())
5976 .map_err(|e| RedDBError::Internal(format!("COPY failed: {e}")))?;
5977 self.note_table_write(&q.table);
5979 Ok(RuntimeQueryResult::ok_message(
5980 query.to_string(),
5981 &format!(
5982 "COPY imported {} rows into {} ({} errors skipped, {}ms)",
5983 stats.records_imported, q.table, stats.errors_skipped, stats.duration_ms
5984 ),
5985 "copy_from",
5986 ))
5987 }
5988 QueryExpr::MaintenanceCommand(ref cmd) => {
6004 use crate::storage::query::ast::MaintenanceCommand as Mc;
6005 let store = self.inner.db.store();
6006 let (kind, msg) = match cmd {
6007 Mc::Analyze { target } => {
6008 let targets: Vec<String> = match target {
6009 Some(t) => vec![t.clone()],
6010 None => store.list_collections(),
6011 };
6012 for t in &targets {
6013 self.refresh_table_planner_stats(t);
6014 }
6015 (
6016 "analyze",
6017 format!("ANALYZE refreshed stats for {} table(s)", targets.len()),
6018 )
6019 }
6020 Mc::Vacuum { target, full } => {
6021 let targets: Vec<String> = match target {
6022 Some(t) => vec![t.clone()],
6023 None => store.list_collections(),
6024 };
6025 let cutoff_xid = self.mvcc_vacuum_cutoff_xid();
6026 let mut vacuum_stats =
6027 crate::storage::unified::store::MvccVacuumStats::default();
6028 for t in &targets {
6029 let stats = store.vacuum_mvcc_history(t, cutoff_xid).map_err(|e| {
6030 RedDBError::Internal(format!(
6031 "VACUUM MVCC history failed for {t}: {e}"
6032 ))
6033 })?;
6034 if stats.reclaimed_versions > 0 {
6035 self.rebuild_runtime_indexes_for_table(t)?;
6036 }
6037 vacuum_stats.add(&stats);
6038 }
6039 self.inner.snapshot_manager.prune_aborted(cutoff_xid);
6040 for t in &targets {
6042 self.refresh_table_planner_stats(t);
6043 }
6044 let persisted = if *full {
6048 match store.persist() {
6049 Ok(()) => true,
6050 Err(e) => {
6051 return Err(RedDBError::Internal(format!(
6052 "VACUUM FULL persist failed: {e:?}"
6053 )));
6054 }
6055 }
6056 } else {
6057 false
6058 };
6059 self.invalidate_result_cache();
6061 (
6062 "vacuum",
6063 format!(
6064 "VACUUM{} processed {} table(s): scanned_versions={}, retained_versions={}, reclaimed_versions={}, retained_history_versions={}, reclaimed_history_versions={}, retained_tombstones={}, reclaimed_tombstones={}{}",
6065 if *full { " FULL" } else { "" },
6066 targets.len(),
6067 vacuum_stats.scanned_versions,
6068 vacuum_stats.retained_versions,
6069 vacuum_stats.reclaimed_versions,
6070 vacuum_stats.retained_history_versions,
6071 vacuum_stats.reclaimed_history_versions,
6072 vacuum_stats.retained_tombstones,
6073 vacuum_stats.reclaimed_tombstones,
6074 if persisted {
6075 " (pages flushed to disk)"
6076 } else {
6077 ""
6078 }
6079 ),
6080 )
6081 }
6082 };
6083 Ok(RuntimeQueryResult::ok_message(
6084 query.to_string(),
6085 &msg,
6086 kind,
6087 ))
6088 }
6089 QueryExpr::Grant(ref g) => self.execute_grant_statement(query, g),
6096 QueryExpr::Revoke(ref r) => self.execute_revoke_statement(query, r),
6097 QueryExpr::AlterUser(ref a) => self.execute_alter_user_statement(query, a),
6098 QueryExpr::CreateIamPolicy { ref id, ref json } => {
6099 self.execute_create_iam_policy(query, id, json)
6100 }
6101 QueryExpr::DropIamPolicy { ref id } => self.execute_drop_iam_policy(query, id),
6102 QueryExpr::AttachPolicy {
6103 ref policy_id,
6104 ref principal,
6105 } => self.execute_attach_policy(query, policy_id, principal),
6106 QueryExpr::DetachPolicy {
6107 ref policy_id,
6108 ref principal,
6109 } => self.execute_detach_policy(query, policy_id, principal),
6110 QueryExpr::ShowPolicies { ref filter } => {
6111 self.execute_show_policies(query, filter.as_ref())
6112 }
6113 QueryExpr::ShowEffectivePermissions {
6114 ref user,
6115 ref resource,
6116 } => self.execute_show_effective_permissions(query, user, resource.as_ref()),
6117 QueryExpr::SimulatePolicy {
6118 ref user,
6119 ref action,
6120 ref resource,
6121 } => self.execute_simulate_policy(query, user, action, resource),
6122 QueryExpr::CreateMigration(ref q) => self.execute_create_migration(query, q),
6123 QueryExpr::ApplyMigration(ref q) => self.execute_apply_migration(query, q),
6124 QueryExpr::RollbackMigration(ref q) => self.execute_rollback_migration(query, q),
6125 QueryExpr::ExplainMigration(ref q) => self.execute_explain_migration(query, q),
6126 };
6127
6128 let mut query_result = query_result;
6132 if let Ok(ref mut result) = query_result {
6133 if result.statement_type == "select" {
6134 self.apply_secret_decryption(result);
6135 }
6136 }
6137
6138 if let Ok(ref result) = query_result {
6145 frame.write_result_cache(self, result, result_cache_scopes);
6146 }
6147
6148 query_result
6149 }
6150
6151 pub fn materialized_view_metadata(
6155 &self,
6156 ) -> Vec<crate::storage::cache::result::MaterializedViewMetadata> {
6157 self.inner.materialized_views.read().metadata()
6158 }
6159
6160 pub(crate) fn retention_sweeper_snapshot(
6171 &self,
6172 ) -> Vec<(String, crate::runtime::retention_sweeper::SweeperState)> {
6173 self.inner.retention_sweeper.read().snapshot()
6174 }
6175
6176 pub fn sweep_retention_tick(&self, batch_size: usize) {
6198 if batch_size == 0 {
6199 return;
6200 }
6201 let now_ms = std::time::SystemTime::now()
6202 .duration_since(std::time::UNIX_EPOCH)
6203 .map(|d| d.as_millis() as u64)
6204 .unwrap_or(0);
6205
6206 let store = self.inner.db.store();
6207 let collections = store.list_collections();
6208 for name in collections {
6209 let Some(contract) = self.inner.db.collection_contract(&name) else {
6210 continue;
6211 };
6212 let Some(retention_ms) = contract.retention_duration_ms else {
6213 continue;
6214 };
6215 let Some(ts_column) =
6216 crate::runtime::retention_filter::resolve_timestamp_column(&contract)
6217 else {
6218 continue;
6219 };
6220 let Some(manager) = store.get_collection(&name) else {
6221 continue;
6222 };
6223 let cutoff = (now_ms as i64).saturating_sub(retention_ms as i64);
6224
6225 let mut expired_ts: Vec<i64> = Vec::new();
6233 manager.for_each_entity(|entity| {
6234 let ts = match ts_column.as_str() {
6235 "created_at" => Some(entity.created_at as i64),
6236 "updated_at" => Some(entity.updated_at as i64),
6237 other => entity
6238 .data
6239 .as_row()
6240 .and_then(|row| row.get_field(other))
6241 .and_then(|v| match v {
6242 crate::storage::schema::Value::TimestampMs(t) => Some(*t),
6243 crate::storage::schema::Value::Timestamp(t) => {
6244 Some(t.saturating_mul(1_000))
6245 }
6246 crate::storage::schema::Value::BigInt(t) => Some(*t),
6247 crate::storage::schema::Value::UnsignedInteger(t) => {
6248 i64::try_from(*t).ok()
6249 }
6250 crate::storage::schema::Value::Integer(t) => Some(*t as i64),
6251 _ => None,
6252 }),
6253 };
6254 if let Some(t) = ts {
6255 if t < cutoff {
6256 expired_ts.push(t);
6257 }
6258 }
6259 true
6260 });
6261
6262 let total_expired = expired_ts.len() as u64;
6263 if total_expired == 0 {
6264 self.inner
6265 .retention_sweeper
6266 .write()
6267 .record_tick(&name, 0, 0, now_ms);
6268 continue;
6269 }
6270
6271 let (effective_cutoff, pending) = if (total_expired as usize) <= batch_size {
6272 (cutoff, 0u64)
6273 } else {
6274 expired_ts.sort_unstable();
6278 let nth = expired_ts[batch_size - 1];
6279 (
6280 nth.saturating_add(1),
6281 total_expired.saturating_sub(batch_size as u64),
6282 )
6283 };
6284
6285 let stmt = format!(
6286 "DELETE FROM {} WHERE {} < {}",
6287 name, ts_column, effective_cutoff
6288 );
6289 let deleted = match self.execute_query(&stmt) {
6290 Ok(r) => r.affected_rows,
6291 Err(_) => 0,
6292 };
6293
6294 self.inner
6295 .retention_sweeper
6296 .write()
6297 .record_tick(&name, deleted, pending, now_ms);
6298 }
6299 }
6300
6301 pub fn refresh_due_materialized_views(&self) {
6302 let due = {
6303 let mut cache = self.inner.materialized_views.write();
6304 cache.claim_due_at(std::time::Instant::now())
6305 };
6306 for name in due {
6307 let stmt = format!("REFRESH MATERIALIZED VIEW {}", name);
6314 let _ = self.execute_query(&stmt);
6315 }
6316 }
6317
6318 pub fn execute_query_expr(&self, expr: QueryExpr) -> RedDBResult<RuntimeQueryResult> {
6324 let _config_snapshot_guard = ConfigSnapshotGuard::install(Arc::clone(&self.inner.db));
6325 let _secret_store_guard = SecretStoreGuard::install(self.inner.auth_store.read().clone());
6326 let expr = self.rewrite_view_refs(expr);
6330
6331 self.validate_model_operations_before_auth(&expr)?;
6332 if let Err(err) = self.check_query_privilege(&expr) {
6336 return Err(RedDBError::Query(format!("permission denied: {err}")));
6337 }
6338
6339 let statement = query_expr_name(&expr);
6340 let mode = detect_mode(statement);
6341 let query_str = statement;
6342
6343 let result = self.dispatch_expr(expr, query_str, mode)?;
6344 let mut r = result;
6345 if r.statement_type == "select" {
6346 self.apply_secret_decryption(&mut r);
6347 }
6348 Ok(r)
6349 }
6350
6351 pub(super) fn validate_model_operations_before_auth(
6352 &self,
6353 expr: &QueryExpr,
6354 ) -> RedDBResult<()> {
6355 use crate::catalog::CollectionModel;
6356 use crate::runtime::ddl::polymorphic_resolver;
6357 use crate::storage::query::ast::KvCommand;
6358
6359 let system_schema_target = match expr {
6360 QueryExpr::DropTable(q) => Some(q.name.as_str()),
6361 QueryExpr::DropGraph(q) => Some(q.name.as_str()),
6362 QueryExpr::DropVector(q) => Some(q.name.as_str()),
6363 QueryExpr::DropDocument(q) => Some(q.name.as_str()),
6364 QueryExpr::DropKv(q) => Some(q.name.as_str()),
6365 QueryExpr::DropCollection(q) => Some(q.name.as_str()),
6366 QueryExpr::Truncate(q) => Some(q.name.as_str()),
6367 _ => None,
6368 };
6369 if system_schema_target.is_some_and(crate::runtime::impl_ddl::is_system_schema_name) {
6370 return Err(RedDBError::Query("system schema is read-only".to_string()));
6371 }
6372
6373 let expected = match expr {
6374 QueryExpr::DropTable(q) => Some((q.name.as_str(), CollectionModel::Table)),
6375 QueryExpr::DropGraph(q) => Some((q.name.as_str(), CollectionModel::Graph)),
6376 QueryExpr::DropVector(q) => Some((q.name.as_str(), CollectionModel::Vector)),
6377 QueryExpr::DropDocument(q) => Some((q.name.as_str(), CollectionModel::Document)),
6378 QueryExpr::DropKv(q) => Some((q.name.as_str(), q.model)),
6379 QueryExpr::DropCollection(q) => q.model.map(|model| (q.name.as_str(), model)),
6380 QueryExpr::Truncate(q) => q.model.map(|model| (q.name.as_str(), model)),
6381 QueryExpr::KvCommand(cmd) => {
6382 let (collection, model) = match cmd {
6383 KvCommand::Put {
6384 collection, model, ..
6385 }
6386 | KvCommand::Get {
6387 collection, model, ..
6388 }
6389 | KvCommand::Incr {
6390 collection, model, ..
6391 }
6392 | KvCommand::Cas {
6393 collection, model, ..
6394 }
6395 | KvCommand::Delete {
6396 collection, model, ..
6397 } => (collection.as_str(), *model),
6398 KvCommand::Rotate { collection, .. }
6399 | KvCommand::History { collection, .. }
6400 | KvCommand::List { collection, .. }
6401 | KvCommand::Purge { collection, .. } => {
6402 (collection.as_str(), CollectionModel::Vault)
6403 }
6404 KvCommand::InvalidateTags { collection, .. } => {
6405 (collection.as_str(), CollectionModel::Kv)
6406 }
6407 KvCommand::Watch {
6408 collection, model, ..
6409 } => (collection.as_str(), *model),
6410 KvCommand::Unseal { collection, .. } => {
6411 (collection.as_str(), CollectionModel::Vault)
6412 }
6413 };
6414 Some((collection, model))
6415 }
6416 QueryExpr::ConfigCommand(cmd) => {
6417 self.validate_config_command_before_auth(cmd)?;
6418 None
6419 }
6420 _ => None,
6421 };
6422
6423 let Some((name, expected_model)) = expected else {
6424 return Ok(());
6425 };
6426 let snapshot = self.inner.db.catalog_model_snapshot();
6427 let Some(actual_model) = snapshot
6428 .collections
6429 .iter()
6430 .find(|collection| collection.name == name)
6431 .map(|collection| collection.declared_model.unwrap_or(collection.model))
6432 else {
6433 return Ok(());
6434 };
6435 polymorphic_resolver::ensure_model_match(expected_model, actual_model)
6436 }
6437
6438 pub(super) fn rewrite_view_refs(&self, expr: QueryExpr) -> QueryExpr {
6443 if self.inner.views.read().is_empty() {
6445 return expr;
6446 }
6447 self.rewrite_view_refs_inner(expr)
6448 }
6449
6450 fn rewrite_view_refs_inner(&self, expr: QueryExpr) -> QueryExpr {
6451 use crate::storage::query::ast::{Filter, TableSource};
6452 match expr {
6453 QueryExpr::Table(mut tq) => {
6454 if let Some(TableSource::Subquery(body)) = tq.source.take() {
6460 tq.source = Some(TableSource::Subquery(Box::new(
6461 self.rewrite_view_refs_inner(*body),
6462 )));
6463 return QueryExpr::Table(tq);
6464 }
6465
6466 let maybe_view = {
6470 let views = self.inner.views.read();
6471 views.get(&tq.table).cloned()
6472 };
6473 let Some(view) = maybe_view else {
6474 return QueryExpr::Table(tq);
6475 };
6476
6477 let inner_expr = self.rewrite_view_refs_inner((*view.query).clone());
6481
6482 match inner_expr {
6490 QueryExpr::Table(mut inner_tq) => {
6491 if let Some(outer_filter) = tq.filter.take() {
6492 inner_tq.filter = Some(match inner_tq.filter.take() {
6493 Some(existing) => {
6494 Filter::And(Box::new(existing), Box::new(outer_filter))
6495 }
6496 None => outer_filter,
6497 });
6498 }
6499 if let Some(outer_limit) = tq.limit {
6500 inner_tq.limit = Some(match inner_tq.limit {
6501 Some(existing) => existing.min(outer_limit),
6502 None => outer_limit,
6503 });
6504 }
6505 if let Some(outer_offset) = tq.offset {
6506 inner_tq.offset = Some(match inner_tq.offset {
6507 Some(existing) => existing + outer_offset,
6508 None => outer_offset,
6509 });
6510 }
6511 QueryExpr::Table(inner_tq)
6512 }
6513 other => other,
6514 }
6515 }
6516 QueryExpr::Join(mut jq) => {
6517 jq.left = Box::new(self.rewrite_view_refs_inner(*jq.left));
6518 jq.right = Box::new(self.rewrite_view_refs_inner(*jq.right));
6519 QueryExpr::Join(jq)
6520 }
6521 other => other,
6524 }
6525 }
6526
6527 fn authorize_relational_table_select(
6531 &self,
6532 mut table: TableQuery,
6533 frame: &dyn super::statement_frame::ReadFrame,
6534 ) -> RedDBResult<Option<TableQuery>> {
6535 if let Some(TableSource::Subquery(inner)) = table.source.take() {
6536 let authorized_inner = self.authorize_relational_select_expr(*inner, frame)?;
6537 table.source = Some(TableSource::Subquery(Box::new(authorized_inner)));
6538 return Ok(Some(table));
6539 }
6540
6541 self.check_table_column_projection_authz(&table, frame)?;
6542
6543 if self.inner.rls_enabled_tables.read().contains(&table.table) {
6544 return Ok(inject_rls_filters(self, frame, table));
6545 }
6546
6547 Ok(Some(table))
6548 }
6549
6550 fn authorize_relational_join_select(
6551 &self,
6552 mut join: JoinQuery,
6553 frame: &dyn super::statement_frame::ReadFrame,
6554 ) -> RedDBResult<Option<JoinQuery>> {
6555 self.check_join_column_projection_authz(&join, frame)?;
6556 join.left = Box::new(self.authorize_relational_join_child(*join.left, frame)?);
6557 join.right = Box::new(self.authorize_relational_join_child(*join.right, frame)?);
6558 Ok(inject_rls_into_join(self, frame, join))
6559 }
6560
6561 fn authorize_relational_join_child(
6562 &self,
6563 expr: QueryExpr,
6564 frame: &dyn super::statement_frame::ReadFrame,
6565 ) -> RedDBResult<QueryExpr> {
6566 match expr {
6567 QueryExpr::Table(mut table) => {
6568 if let Some(TableSource::Subquery(inner)) = table.source.take() {
6569 let authorized_inner = self.authorize_relational_select_expr(*inner, frame)?;
6570 table.source = Some(TableSource::Subquery(Box::new(authorized_inner)));
6571 }
6572 Ok(QueryExpr::Table(table))
6573 }
6574 QueryExpr::Join(join) => self
6575 .authorize_relational_join_select(join, frame)?
6576 .map(QueryExpr::Join)
6577 .ok_or_else(|| {
6578 RedDBError::Query("permission denied: RLS denied relational subquery".into())
6579 }),
6580 other => Ok(other),
6581 }
6582 }
6583
6584 fn authorize_relational_select_expr(
6585 &self,
6586 expr: QueryExpr,
6587 frame: &dyn super::statement_frame::ReadFrame,
6588 ) -> RedDBResult<QueryExpr> {
6589 match expr {
6590 QueryExpr::Table(table) => self
6591 .authorize_relational_table_select(table, frame)?
6592 .map(QueryExpr::Table)
6593 .ok_or_else(|| {
6594 RedDBError::Query("permission denied: RLS denied relational subquery".into())
6595 }),
6596 QueryExpr::Join(join) => self
6597 .authorize_relational_join_select(join, frame)?
6598 .map(QueryExpr::Join)
6599 .ok_or_else(|| {
6600 RedDBError::Query("permission denied: RLS denied relational subquery".into())
6601 }),
6602 other => Ok(other),
6603 }
6604 }
6605
6606 fn check_table_column_projection_authz(
6607 &self,
6608 table: &TableQuery,
6609 frame: &dyn super::statement_frame::ReadFrame,
6610 ) -> RedDBResult<()> {
6611 let Some((username, role)) = frame.identity() else {
6612 return Ok(());
6613 };
6614 let Some(auth_store) = self.inner.auth_store.read().clone() else {
6615 return Ok(());
6616 };
6617
6618 let columns = self.resolved_table_projection_columns(table)?;
6619 let request = ColumnAccessRequest::select(table.table.clone(), columns);
6620 let principal = UserId::from_parts(frame.effective_scope(), username);
6621 let ctx = runtime_iam_context(role, frame.effective_scope());
6622 let outcome = auth_store.check_column_projection_authz(&principal, &request, &ctx);
6623 if outcome.allowed() {
6624 return Ok(());
6625 }
6626
6627 if let Some(denied) = outcome.first_denied_column() {
6628 return Err(RedDBError::Query(format!(
6629 "permission denied: principal=`{username}` cannot select column `{}`",
6630 denied.resource.name
6631 )));
6632 }
6633 Err(RedDBError::Query(format!(
6634 "permission denied: principal=`{username}` cannot select table `{}`",
6635 table.table
6636 )))
6637 }
6638
6639 fn check_join_column_projection_authz(
6640 &self,
6641 join: &JoinQuery,
6642 frame: &dyn super::statement_frame::ReadFrame,
6643 ) -> RedDBResult<()> {
6644 let mut by_table: HashMap<String, BTreeSet<String>> = HashMap::new();
6645 let projections = crate::storage::query::sql_lowering::effective_join_projections(join);
6646 self.collect_join_projection_columns(join, &projections, &mut by_table)?;
6647
6648 for (table, columns) in by_table {
6649 let query = TableQuery {
6650 table,
6651 source: None,
6652 alias: None,
6653 select_items: Vec::new(),
6654 columns: columns.into_iter().map(Projection::Column).collect(),
6655 where_expr: None,
6656 filter: None,
6657 group_by_exprs: Vec::new(),
6658 group_by: Vec::new(),
6659 having_expr: None,
6660 having: None,
6661 order_by: Vec::new(),
6662 limit: None,
6663 limit_param: None,
6664 offset: None,
6665 offset_param: None,
6666 expand: None,
6667 as_of: None,
6668 sessionize: None,
6669 };
6670 self.check_table_column_projection_authz(&query, frame)?;
6671 }
6672 Ok(())
6673 }
6674
6675 fn collect_join_projection_columns(
6676 &self,
6677 join: &JoinQuery,
6678 projections: &[Projection],
6679 out: &mut HashMap<String, BTreeSet<String>>,
6680 ) -> RedDBResult<()> {
6681 let left = table_side_context(join.left.as_ref());
6682 let right = table_side_context(join.right.as_ref());
6683
6684 if projections
6685 .iter()
6686 .any(|projection| matches!(projection, Projection::All))
6687 {
6688 for side in [left.as_ref(), right.as_ref()].into_iter().flatten() {
6689 out.entry(side.table.clone())
6690 .or_default()
6691 .extend(self.table_all_projection_columns(&side.table)?);
6692 }
6693 return Ok(());
6694 }
6695
6696 for projection in projections {
6697 collect_projection_columns_for_join_side(
6698 projection,
6699 left.as_ref(),
6700 right.as_ref(),
6701 out,
6702 )?;
6703 }
6704 Ok(())
6705 }
6706
6707 fn resolved_table_projection_columns(&self, table: &TableQuery) -> RedDBResult<Vec<String>> {
6708 let projections = crate::storage::query::sql_lowering::effective_table_projections(table);
6709 if projections
6710 .iter()
6711 .any(|projection| matches!(projection, Projection::All))
6712 {
6713 return self.table_all_projection_columns(&table.table);
6714 }
6715
6716 let mut columns = BTreeSet::new();
6717 for projection in &projections {
6718 collect_projection_columns_for_table(
6719 projection,
6720 &table.table,
6721 table.alias.as_deref(),
6722 &mut columns,
6723 );
6724 }
6725 Ok(columns.into_iter().collect())
6726 }
6727
6728 fn table_all_projection_columns(&self, table: &str) -> RedDBResult<Vec<String>> {
6729 if let Some(contract) = self.inner.db.collection_contract_arc(table) {
6730 let columns: Vec<String> = contract
6731 .declared_columns
6732 .iter()
6733 .map(|column| column.name.clone())
6734 .collect();
6735 if !columns.is_empty() {
6736 return Ok(columns);
6737 }
6738 }
6739
6740 let records = scan_runtime_table_source_records_limited(&self.inner.db, table, Some(1))?;
6741 Ok(records
6742 .first()
6743 .map(|record| {
6744 record
6745 .column_names()
6746 .into_iter()
6747 .map(|column| column.to_string())
6748 .collect()
6749 })
6750 .unwrap_or_default())
6751 }
6752
6753 fn resolve_table_expr_subqueries(
6754 &self,
6755 mut table: TableQuery,
6756 frame: &dyn super::statement_frame::ReadFrame,
6757 ) -> RedDBResult<TableQuery> {
6758 if let Some(TableSource::Subquery(inner)) = table.source.take() {
6759 let inner = self.resolve_select_expr_subqueries(*inner, frame)?;
6760 table.source = Some(TableSource::Subquery(Box::new(inner)));
6761 }
6762
6763 let outer_scopes = relation_scopes_for_query(&QueryExpr::Table(table.clone()));
6764 for item in &mut table.select_items {
6765 if let crate::storage::query::ast::SelectItem::Expr { expr, .. } = item {
6766 *expr = self.resolve_expr_subqueries(expr.clone(), &outer_scopes, frame)?;
6767 }
6768 }
6769 if let Some(where_expr) = table.where_expr.take() {
6770 table.where_expr =
6771 Some(self.resolve_expr_subqueries(where_expr, &outer_scopes, frame)?);
6772 table.filter = None;
6773 }
6774 if let Some(having_expr) = table.having_expr.take() {
6775 table.having_expr =
6776 Some(self.resolve_expr_subqueries(having_expr, &outer_scopes, frame)?);
6777 table.having = None;
6778 }
6779 for expr in &mut table.group_by_exprs {
6780 *expr = self.resolve_expr_subqueries(expr.clone(), &outer_scopes, frame)?;
6781 }
6782 for clause in &mut table.order_by {
6783 if let Some(expr) = clause.expr.take() {
6784 clause.expr = Some(self.resolve_expr_subqueries(expr, &outer_scopes, frame)?);
6785 }
6786 }
6787 Ok(table)
6788 }
6789
6790 fn resolve_select_expr_subqueries(
6791 &self,
6792 expr: QueryExpr,
6793 frame: &dyn super::statement_frame::ReadFrame,
6794 ) -> RedDBResult<QueryExpr> {
6795 match expr {
6796 QueryExpr::Table(table) => self
6797 .resolve_table_expr_subqueries(table, frame)
6798 .map(QueryExpr::Table),
6799 QueryExpr::Join(mut join) => {
6800 join.left = Box::new(self.resolve_select_expr_subqueries(*join.left, frame)?);
6801 join.right = Box::new(self.resolve_select_expr_subqueries(*join.right, frame)?);
6802 Ok(QueryExpr::Join(join))
6803 }
6804 other => Ok(other),
6805 }
6806 }
6807
6808 fn resolve_expr_subqueries(
6809 &self,
6810 expr: crate::storage::query::ast::Expr,
6811 outer_scopes: &[String],
6812 frame: &dyn super::statement_frame::ReadFrame,
6813 ) -> RedDBResult<crate::storage::query::ast::Expr> {
6814 use crate::storage::query::ast::Expr;
6815
6816 match expr {
6817 Expr::Subquery { query, span } => {
6818 let values = self.execute_expr_subquery_values(query, outer_scopes, frame)?;
6819 if values.len() > 1 {
6820 return Err(RedDBError::Query(
6821 "scalar subquery returned more than one row".to_string(),
6822 ));
6823 }
6824 Ok(Expr::Literal {
6825 value: values.into_iter().next().unwrap_or(Value::Null),
6826 span,
6827 })
6828 }
6829 Expr::BinaryOp { op, lhs, rhs, span } => Ok(Expr::BinaryOp {
6830 op,
6831 lhs: Box::new(self.resolve_expr_subqueries(*lhs, outer_scopes, frame)?),
6832 rhs: Box::new(self.resolve_expr_subqueries(*rhs, outer_scopes, frame)?),
6833 span,
6834 }),
6835 Expr::UnaryOp { op, operand, span } => Ok(Expr::UnaryOp {
6836 op,
6837 operand: Box::new(self.resolve_expr_subqueries(*operand, outer_scopes, frame)?),
6838 span,
6839 }),
6840 Expr::Cast {
6841 inner,
6842 target,
6843 span,
6844 } => Ok(Expr::Cast {
6845 inner: Box::new(self.resolve_expr_subqueries(*inner, outer_scopes, frame)?),
6846 target,
6847 span,
6848 }),
6849 Expr::FunctionCall { name, args, span } => {
6850 let args = args
6851 .into_iter()
6852 .map(|arg| self.resolve_expr_subqueries(arg, outer_scopes, frame))
6853 .collect::<RedDBResult<Vec<_>>>()?;
6854 Ok(Expr::FunctionCall { name, args, span })
6855 }
6856 Expr::Case {
6857 branches,
6858 else_,
6859 span,
6860 } => {
6861 let branches = branches
6862 .into_iter()
6863 .map(|(cond, value)| {
6864 Ok((
6865 self.resolve_expr_subqueries(cond, outer_scopes, frame)?,
6866 self.resolve_expr_subqueries(value, outer_scopes, frame)?,
6867 ))
6868 })
6869 .collect::<RedDBResult<Vec<_>>>()?;
6870 let else_ = else_
6871 .map(|expr| self.resolve_expr_subqueries(*expr, outer_scopes, frame))
6872 .transpose()?
6873 .map(Box::new);
6874 Ok(Expr::Case {
6875 branches,
6876 else_,
6877 span,
6878 })
6879 }
6880 Expr::IsNull {
6881 operand,
6882 negated,
6883 span,
6884 } => Ok(Expr::IsNull {
6885 operand: Box::new(self.resolve_expr_subqueries(*operand, outer_scopes, frame)?),
6886 negated,
6887 span,
6888 }),
6889 Expr::InList {
6890 target,
6891 values,
6892 negated,
6893 span,
6894 } => {
6895 let target =
6896 Box::new(self.resolve_expr_subqueries(*target, outer_scopes, frame)?);
6897 let mut resolved = Vec::new();
6898 for value in values {
6899 if let Expr::Subquery { query, .. } = value {
6900 resolved.extend(
6901 self.execute_expr_subquery_values(query, outer_scopes, frame)?
6902 .into_iter()
6903 .map(Expr::lit),
6904 );
6905 } else {
6906 resolved.push(self.resolve_expr_subqueries(value, outer_scopes, frame)?);
6907 }
6908 }
6909 Ok(Expr::InList {
6910 target,
6911 values: resolved,
6912 negated,
6913 span,
6914 })
6915 }
6916 Expr::Between {
6917 target,
6918 low,
6919 high,
6920 negated,
6921 span,
6922 } => Ok(Expr::Between {
6923 target: Box::new(self.resolve_expr_subqueries(*target, outer_scopes, frame)?),
6924 low: Box::new(self.resolve_expr_subqueries(*low, outer_scopes, frame)?),
6925 high: Box::new(self.resolve_expr_subqueries(*high, outer_scopes, frame)?),
6926 negated,
6927 span,
6928 }),
6929 other => Ok(other),
6930 }
6931 }
6932
6933 fn execute_expr_subquery_values(
6934 &self,
6935 subquery: crate::storage::query::ast::ExprSubquery,
6936 outer_scopes: &[String],
6937 frame: &dyn super::statement_frame::ReadFrame,
6938 ) -> RedDBResult<Vec<Value>> {
6939 let query = *subquery.query;
6940 if query_references_outer_scope(&query, outer_scopes) {
6941 return Err(RedDBError::Query(
6942 "NOT_YET_SUPPORTED: correlated subqueries are not supported yet; track follow-up issue #470-correlated-subqueries".to_string(),
6943 ));
6944 }
6945 let query = self.rewrite_view_refs(query);
6946 let query = self.resolve_select_expr_subqueries(query, frame)?;
6947 let query = self.authorize_relational_select_expr(query, frame)?;
6948 let result = match query {
6949 QueryExpr::Table(table) => {
6950 execute_runtime_table_query(&self.inner.db, &table, Some(&self.inner.index_store))?
6951 }
6952 QueryExpr::Join(join) => execute_runtime_join_query(&self.inner.db, &join)?,
6953 other => {
6954 return Err(RedDBError::Query(format!(
6955 "expression subquery must be a SELECT query, got {}",
6956 query_expr_name(&other)
6957 )))
6958 }
6959 };
6960 first_column_values(result)
6961 }
6962
6963 fn dispatch_expr(
6964 &self,
6965 expr: QueryExpr,
6966 query_str: &str,
6967 mode: QueryMode,
6968 ) -> RedDBResult<RuntimeQueryResult> {
6969 let statement = query_expr_name(&expr);
6970 match expr {
6971 QueryExpr::Graph(_) | QueryExpr::Path(_) => {
6972 Err(RedDBError::Query(
6974 "graph queries cannot be used as prepared statements".to_string(),
6975 ))
6976 }
6977 QueryExpr::Table(table) => {
6978 let scope = self.ai_scope();
6979 let table = self.resolve_table_expr_subqueries(
6980 table,
6981 &scope as &dyn super::statement_frame::ReadFrame,
6982 )?;
6983 if super::red_schema::is_virtual_table(&table.table) {
6984 return Ok(RuntimeQueryResult {
6985 query: query_str.to_string(),
6986 mode,
6987 statement,
6988 engine: "runtime-red-schema",
6989 result: super::red_schema::red_query(
6990 self,
6991 &table.table,
6992 &table,
6993 &scope as &dyn super::statement_frame::ReadFrame,
6994 )?,
6995 affected_rows: 0,
6996 statement_type: "select",
6997 });
6998 }
6999 let Some(table_with_rls) = self.authorize_relational_table_select(
7000 table,
7001 &scope as &dyn super::statement_frame::ReadFrame,
7002 )?
7003 else {
7004 return Ok(RuntimeQueryResult {
7005 query: query_str.to_string(),
7006 mode,
7007 statement,
7008 engine: "runtime-table-rls",
7009 result: crate::storage::query::unified::UnifiedResult::empty(),
7010 affected_rows: 0,
7011 statement_type: "select",
7012 });
7013 };
7014 Ok(RuntimeQueryResult {
7015 query: query_str.to_string(),
7016 mode,
7017 statement,
7018 engine: "runtime-table",
7019 result: execute_runtime_table_query(
7020 &self.inner.db,
7021 &table_with_rls,
7022 Some(&self.inner.index_store),
7023 )?,
7024 affected_rows: 0,
7025 statement_type: "select",
7026 })
7027 }
7028 QueryExpr::Join(join) => {
7029 let scope = self.ai_scope();
7030 let Some(join_with_rls) = self.authorize_relational_join_select(
7031 join,
7032 &scope as &dyn super::statement_frame::ReadFrame,
7033 )?
7034 else {
7035 return Ok(RuntimeQueryResult {
7036 query: query_str.to_string(),
7037 mode,
7038 statement,
7039 engine: "runtime-join-rls",
7040 result: crate::storage::query::unified::UnifiedResult::empty(),
7041 affected_rows: 0,
7042 statement_type: "select",
7043 });
7044 };
7045 Ok(RuntimeQueryResult {
7046 query: query_str.to_string(),
7047 mode,
7048 statement,
7049 engine: "runtime-join",
7050 result: execute_runtime_join_query(&self.inner.db, &join_with_rls)?,
7051 affected_rows: 0,
7052 statement_type: "select",
7053 })
7054 }
7055 QueryExpr::Vector(vector) => Ok(RuntimeQueryResult {
7056 query: query_str.to_string(),
7057 mode,
7058 statement,
7059 engine: "runtime-vector",
7060 result: execute_runtime_vector_query(&self.inner.db, &vector)?,
7061 affected_rows: 0,
7062 statement_type: "select",
7063 }),
7064 QueryExpr::Hybrid(hybrid) => Ok(RuntimeQueryResult {
7065 query: query_str.to_string(),
7066 mode,
7067 statement,
7068 engine: "runtime-hybrid",
7069 result: execute_runtime_hybrid_query(&self.inner.db, &hybrid)?,
7070 affected_rows: 0,
7071 statement_type: "select",
7072 }),
7073 QueryExpr::Insert(ref insert) if super::red_schema::is_virtual_table(&insert.table) => {
7074 Err(RedDBError::Query(
7075 super::red_schema::READ_ONLY_ERROR.to_string(),
7076 ))
7077 }
7078 QueryExpr::Update(ref update) if super::red_schema::is_virtual_table(&update.table) => {
7079 Err(RedDBError::Query(
7080 super::red_schema::READ_ONLY_ERROR.to_string(),
7081 ))
7082 }
7083 QueryExpr::Delete(ref delete) if super::red_schema::is_virtual_table(&delete.table) => {
7084 Err(RedDBError::Query(
7085 super::red_schema::READ_ONLY_ERROR.to_string(),
7086 ))
7087 }
7088 QueryExpr::Insert(ref insert) => self
7089 .with_deferred_store_wal_for_dml(self.insert_may_emit_events(insert), || {
7090 self.execute_insert(query_str, insert)
7091 }),
7092 QueryExpr::Update(ref update) => self
7093 .with_deferred_store_wal_for_dml(self.update_may_emit_events(update), || {
7094 self.execute_update(query_str, update)
7095 }),
7096 QueryExpr::Delete(ref delete) => self
7097 .with_deferred_store_wal_for_dml(self.delete_may_emit_events(delete), || {
7098 self.execute_delete(query_str, delete)
7099 }),
7100 QueryExpr::SearchCommand(ref cmd) => self.execute_search_command(query_str, cmd),
7101 QueryExpr::Ask(ref ask) => self.execute_ask(query_str, ask),
7102 _ => Err(RedDBError::Query(format!(
7103 "prepared-statement execution does not support {statement} statements"
7104 ))),
7105 }
7106 }
7107
7108 fn try_fast_entity_lookup(&self, query: &str) -> Option<RedDBResult<RuntimeQueryResult>> {
7111 let q = query.trim();
7114 if !q.starts_with("SELECT") && !q.starts_with("select") {
7115 return None;
7116 }
7117
7118 let where_pos = q
7120 .find("WHERE _entity_id")
7121 .or_else(|| q.find("where _entity_id"))?;
7122 let after_field = &q[where_pos + 16..].trim_start(); let after_eq = after_field.strip_prefix('=')?.trim_start();
7124
7125 let id_str = after_eq.trim();
7127 let entity_id: u64 = id_str.parse().ok()?;
7128
7129 let from_pos = q.find("FROM ").or_else(|| q.find("from "))? + 5;
7131 let table = q[from_pos..where_pos].trim();
7132 if table.is_empty()
7133 || table.contains(' ') && !table.contains(" AS ") && !table.contains(" as ")
7134 {
7135 return None; }
7137 let table_name = table.split_whitespace().next()?;
7138
7139 let store = self.inner.db.store();
7145 let entity = store
7146 .get(
7147 table_name,
7148 crate::storage::unified::EntityId::new(entity_id),
7149 )
7150 .filter(entity_visible_under_current_snapshot);
7151
7152 let count = if entity.is_some() { 1u64 } else { 0 };
7153
7154 let records: Vec<crate::storage::query::unified::UnifiedRecord> = entity
7160 .as_ref()
7161 .and_then(|e| runtime_table_record_from_entity(e.clone()))
7162 .into_iter()
7163 .collect();
7164
7165 let json = match entity {
7166 Some(ref e) => execute_runtime_serialize_single_entity(e),
7167 None => r#"{"columns":[],"record_count":0,"selection":{"scope":"any"},"records":[]}"#
7168 .to_string(),
7169 };
7170
7171 Some(Ok(RuntimeQueryResult {
7172 query: query.to_string(),
7173 mode: crate::storage::query::modes::QueryMode::Sql,
7174 statement: "select",
7175 engine: "fast-entity-lookup",
7176 result: crate::storage::query::unified::UnifiedResult {
7177 columns: Vec::new(),
7178 records,
7179 stats: crate::storage::query::unified::QueryStats {
7180 rows_scanned: count,
7181 ..Default::default()
7182 },
7183 pre_serialized_json: Some(json),
7184 },
7185 affected_rows: 0,
7186 statement_type: "select",
7187 }))
7188 }
7189
7190 fn result_cache_backend(&self) -> RuntimeResultCacheBackend {
7191 match self
7192 .config_string(RESULT_CACHE_BACKEND_KEY, RESULT_CACHE_DEFAULT_BACKEND)
7193 .as_str()
7194 {
7195 "blob_cache" => RuntimeResultCacheBackend::BlobCache,
7196 "shadow" => RuntimeResultCacheBackend::Shadow,
7197 _ => RuntimeResultCacheBackend::Legacy,
7198 }
7199 }
7200
7201 pub(super) fn get_result_cache_entry(&self, key: &str) -> Option<RuntimeQueryResult> {
7202 match self.result_cache_backend() {
7203 RuntimeResultCacheBackend::Legacy => self.get_legacy_result_cache_entry(key),
7204 RuntimeResultCacheBackend::BlobCache => self.get_blob_result_cache_entry(key),
7205 RuntimeResultCacheBackend::Shadow => {
7206 let legacy = self.get_legacy_result_cache_entry(key);
7207 let blob = self.get_blob_result_cache_entry(key);
7208 if let (Some(ref legacy), Some(ref blob)) = (&legacy, &blob) {
7209 if result_cache_fingerprint(legacy) != result_cache_fingerprint(blob) {
7210 self.inner
7211 .result_cache_shadow_divergences
7212 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
7213 tracing::warn!(
7214 key,
7215 metric = crate::runtime::METRIC_CACHE_SHADOW_DIVERGENCE_TOTAL,
7216 "result cache shadow backend diverged from legacy"
7217 );
7218 }
7219 }
7220 legacy
7221 }
7222 }
7223 }
7224
7225 fn get_legacy_result_cache_entry(&self, key: &str) -> Option<RuntimeQueryResult> {
7226 let cache = self.inner.result_cache.read();
7227 cache.0.get(key).and_then(|entry| {
7228 if entry.cached_at.elapsed().as_secs() < RESULT_CACHE_TTL_SECS {
7229 Some(entry.result.clone())
7230 } else {
7231 None
7232 }
7233 })
7234 }
7235
7236 fn get_blob_result_cache_entry(&self, key: &str) -> Option<RuntimeQueryResult> {
7237 let hit = self
7238 .inner
7239 .result_blob_cache
7240 .get(RESULT_CACHE_BLOB_NAMESPACE, key)?;
7241 {
7242 let cache = self.inner.result_blob_entries.read();
7243 if let Some(entry) = cache.0.get(key) {
7244 return Some(entry.result.clone());
7245 }
7246 }
7247
7248 let (result, scopes) = decode_result_cache_payload(hit.value())?;
7249 let mut cache = self.inner.result_blob_entries.write();
7250 let (ref mut map, ref mut order) = *cache;
7251 if !map.contains_key(key) {
7252 order.push_back(key.to_string());
7253 }
7254 map.insert(
7255 key.to_string(),
7256 RuntimeResultCacheEntry {
7257 result: result.clone(),
7258 cached_at: std::time::Instant::now(),
7259 scopes,
7260 },
7261 );
7262 trim_result_cache(map, order);
7263 Some(result)
7264 }
7265
7266 pub(super) fn put_result_cache_entry(&self, key: &str, entry: RuntimeResultCacheEntry) {
7267 match self.result_cache_backend() {
7268 RuntimeResultCacheBackend::Legacy => self.put_legacy_result_cache_entry(key, entry),
7269 RuntimeResultCacheBackend::BlobCache => self.put_blob_result_cache_entry(key, entry),
7270 RuntimeResultCacheBackend::Shadow => {
7271 self.put_legacy_result_cache_entry(key, entry.clone());
7272 self.put_blob_result_cache_entry(key, entry);
7273 }
7274 }
7275 }
7276
7277 fn put_legacy_result_cache_entry(&self, key: &str, entry: RuntimeResultCacheEntry) {
7278 let mut cache = self.inner.result_cache.write();
7279 let (ref mut map, ref mut order) = *cache;
7280 if !map.contains_key(key) {
7281 order.push_back(key.to_string());
7282 }
7283 map.insert(key.to_string(), entry);
7284 trim_result_cache(map, order);
7285 }
7286
7287 fn put_blob_result_cache_entry(&self, key: &str, entry: RuntimeResultCacheEntry) {
7288 let policy = crate::storage::cache::BlobCachePolicy::default()
7289 .ttl_ms(RESULT_CACHE_TTL_SECS * 1000)
7290 .priority(200);
7291 let dependencies = entry.scopes.iter().cloned().collect::<Vec<_>>();
7292 let bytes = encode_result_cache_payload(&entry)
7293 .unwrap_or_else(|| result_cache_fingerprint(&entry.result).into_bytes());
7294 let put = crate::storage::cache::BlobCachePut::new(bytes)
7295 .with_dependencies(dependencies)
7296 .with_policy(policy);
7297 if self
7298 .inner
7299 .result_blob_cache
7300 .put(RESULT_CACHE_BLOB_NAMESPACE, key, put)
7301 .is_err()
7302 {
7303 return;
7304 }
7305
7306 let mut cache = self.inner.result_blob_entries.write();
7307 let (ref mut map, ref mut order) = *cache;
7308 if !map.contains_key(key) {
7309 order.push_back(key.to_string());
7310 }
7311 map.insert(key.to_string(), entry);
7312 trim_result_cache(map, order);
7313 }
7314
7315 pub fn result_cache_shadow_divergences(&self) -> u64 {
7316 self.inner
7317 .result_cache_shadow_divergences
7318 .load(std::sync::atomic::Ordering::Relaxed)
7319 }
7320
7321 pub fn invalidate_result_cache(&self) {
7324 let mut cache = self.inner.result_cache.write();
7325 cache.0.clear();
7326 cache.1.clear();
7327 let mut blob_entries = self.inner.result_blob_entries.write();
7328 blob_entries.0.clear();
7329 blob_entries.1.clear();
7330 self.inner
7331 .result_blob_cache
7332 .invalidate_namespace(RESULT_CACHE_BLOB_NAMESPACE);
7333 let mut ask_entries = self.inner.ask_answer_cache_entries.write();
7334 ask_entries.0.clear();
7335 ask_entries.1.clear();
7336 self.inner
7337 .result_blob_cache
7338 .invalidate_namespace(ASK_ANSWER_CACHE_NAMESPACE);
7339 }
7340
7341 pub(crate) fn invalidate_result_cache_for_table(&self, table: &str) {
7344 let legacy_has_match = {
7347 let cache = self.inner.result_cache.read();
7348 let (ref map, _) = *cache;
7349 !map.is_empty() && map.values().any(|entry| entry.scopes.contains(table))
7350 };
7351 let blob_has_match = {
7352 let cache = self.inner.result_blob_entries.read();
7353 let (ref map, _) = *cache;
7354 !map.is_empty() && map.values().any(|entry| entry.scopes.contains(table))
7355 };
7356 if legacy_has_match {
7357 let mut cache = self.inner.result_cache.write();
7358 let (ref mut map, ref mut order) = *cache;
7359 map.retain(|_, entry| !entry.scopes.contains(table));
7360 order.retain(|key| map.contains_key(key));
7361 }
7362
7363 if matches!(
7364 self.result_cache_backend(),
7365 RuntimeResultCacheBackend::BlobCache | RuntimeResultCacheBackend::Shadow
7366 ) {
7367 let mut blob_entries = self.inner.result_blob_entries.write();
7368 let (ref mut blob_map, ref mut blob_order) = *blob_entries;
7369 blob_map.clear();
7370 blob_order.clear();
7371 self.inner
7372 .result_blob_cache
7373 .invalidate_namespace(RESULT_CACHE_BLOB_NAMESPACE);
7374 } else if blob_has_match {
7375 let mut blob_entries = self.inner.result_blob_entries.write();
7376 let (ref mut blob_map, ref mut blob_order) = *blob_entries;
7377 blob_map.retain(|_, entry| !entry.scopes.contains(table));
7378 blob_order.retain(|key| blob_map.contains_key(key));
7379 }
7380 let mut ask_entries = self.inner.ask_answer_cache_entries.write();
7381 ask_entries.0.clear();
7382 ask_entries.1.clear();
7383 self.inner
7384 .result_blob_cache
7385 .invalidate_namespace(ASK_ANSWER_CACHE_NAMESPACE);
7386 }
7387
7388 pub(crate) fn invalidate_plan_cache(&self) {
7389 self.inner.query_cache.write().clear();
7390 self.inner
7391 .ddl_epoch
7392 .fetch_add(1, std::sync::atomic::Ordering::Release);
7393 }
7394
7395 pub fn ddl_epoch(&self) -> u64 {
7399 self.inner
7400 .ddl_epoch
7401 .load(std::sync::atomic::Ordering::Acquire)
7402 }
7403
7404 pub(crate) fn clear_table_planner_stats(&self, table: &str) {
7405 let store = self.inner.db.store();
7406 crate::storage::query::planner::stats_catalog::clear_table_stats(store.as_ref(), table);
7407 self.invalidate_plan_cache();
7408 }
7409
7410 pub(crate) fn rehydrate_tenant_tables(&self) {
7419 let store = self.inner.db.store();
7420 let Some(manager) = store.get_collection("red_config") else {
7421 return;
7422 };
7423 for entity in manager.query_all(|_| true) {
7428 let crate::storage::unified::entity::EntityData::Row(row) = &entity.data else {
7429 continue;
7430 };
7431 let Some(named) = &row.named else { continue };
7432 let Some(crate::storage::schema::Value::Text(key)) = named.get("key") else {
7433 continue;
7434 };
7435 let Some(rest) = key.strip_prefix("tenant_tables.") else {
7437 continue;
7438 };
7439 let Some((table, suffix)) = rest.rsplit_once('.') else {
7440 crate::telemetry::operator_event::OperatorEvent::SchemaCorruption {
7446 collection: "red_config".to_string(),
7447 detail: format!("malformed tenant_tables key: {key}"),
7448 }
7449 .emit_global();
7450 continue;
7451 };
7452 if suffix != "column" {
7453 crate::telemetry::operator_event::OperatorEvent::SchemaCorruption {
7454 collection: "red_config".to_string(),
7455 detail: format!("unexpected tenant_tables suffix: {key}"),
7456 }
7457 .emit_global();
7458 continue;
7459 }
7460 match named.get("value") {
7461 Some(crate::storage::schema::Value::Text(column)) => {
7462 self.register_tenant_table(table, column);
7463 }
7464 Some(crate::storage::schema::Value::Null) | None => {
7466 self.unregister_tenant_table(table);
7467 }
7468 _ => {}
7469 }
7470 }
7471 }
7472
7473 pub(crate) fn rehydrate_declared_column_schemas(&self) {
7474 let store = self.inner.db.store();
7475 for contract in self.inner.db.collection_contracts() {
7476 let columns: Vec<String> = contract
7477 .declared_columns
7478 .iter()
7479 .map(|column| column.name.clone())
7480 .collect();
7481 let Some(manager) = store.get_collection(&contract.name) else {
7482 continue;
7483 };
7484 manager.set_column_schema_if_empty(columns);
7485 }
7486 }
7487
7488 pub fn register_tenant_table(&self, table: &str, column: &str) {
7493 use crate::storage::query::ast::{
7494 CompareOp, CreatePolicyQuery, Expr, FieldRef, Filter, Span,
7495 };
7496 self.inner
7497 .tenant_tables
7498 .write()
7499 .insert(table.to_string(), column.to_string());
7500
7501 let lhs = Expr::Column {
7507 field: FieldRef::TableColumn {
7508 table: table.to_string(),
7509 column: column.to_string(),
7510 },
7511 span: Span::synthetic(),
7512 };
7513 let rhs = Expr::FunctionCall {
7514 name: "CURRENT_TENANT".to_string(),
7515 args: Vec::new(),
7516 span: Span::synthetic(),
7517 };
7518 let policy_filter = Filter::CompareExpr {
7519 lhs,
7520 op: CompareOp::Eq,
7521 rhs,
7522 };
7523
7524 let policy = CreatePolicyQuery {
7525 name: "__tenant_iso".to_string(),
7526 table: table.to_string(),
7527 action: None, role: None, using: Box::new(policy_filter),
7530 target_kind: crate::storage::query::ast::PolicyTargetKind::Table,
7537 };
7538
7539 self.inner.rls_policies.write().insert(
7541 (table.to_string(), "__tenant_iso".to_string()),
7542 Arc::new(policy),
7543 );
7544 self.inner
7545 .rls_enabled_tables
7546 .write()
7547 .insert(table.to_string());
7548
7549 self.ensure_tenant_index(table, column);
7555 }
7556
7557 fn ensure_tenant_index(&self, table: &str, column: &str) {
7565 if column.contains('.') {
7566 return;
7567 }
7568 let index_name = format!("__tenant_idx_{table}");
7569 let registry = self.inner.index_store.list_indices(table);
7570 if registry.iter().any(|idx| idx.name == index_name) {
7571 return;
7572 }
7573 if registry
7574 .iter()
7575 .any(|idx| idx.columns.first().map(|c| c.as_str()) == Some(column))
7576 {
7577 return;
7578 }
7579
7580 let store = self.inner.db.store();
7581 let Some(manager) = store.get_collection(table) else {
7582 return;
7583 };
7584 let entities = manager.query_all(|_| true);
7585 let entity_fields: Vec<(
7586 crate::storage::unified::EntityId,
7587 Vec<(String, crate::storage::schema::Value)>,
7588 )> = entities
7589 .iter()
7590 .map(|e| {
7591 let fields = match &e.data {
7592 crate::storage::EntityData::Row(row) => {
7593 if let Some(ref named) = row.named {
7594 named.iter().map(|(k, v)| (k.clone(), v.clone())).collect()
7595 } else if let Some(ref schema) = row.schema {
7596 schema
7597 .iter()
7598 .zip(row.columns.iter())
7599 .map(|(k, v)| (k.clone(), v.clone()))
7600 .collect()
7601 } else {
7602 Vec::new()
7603 }
7604 }
7605 crate::storage::EntityData::Node(node) => node
7606 .properties
7607 .iter()
7608 .map(|(k, v)| (k.clone(), v.clone()))
7609 .collect(),
7610 _ => Vec::new(),
7611 };
7612 (e.id, fields)
7613 })
7614 .collect();
7615
7616 let columns = vec![column.to_string()];
7617 if self
7618 .inner
7619 .index_store
7620 .create_index(
7621 &index_name,
7622 table,
7623 &columns,
7624 super::index_store::IndexMethodKind::Hash,
7625 false,
7626 &entity_fields,
7627 )
7628 .is_err()
7629 {
7630 return;
7631 }
7632 self.inner
7633 .index_store
7634 .register(super::index_store::RegisteredIndex {
7635 name: index_name,
7636 collection: table.to_string(),
7637 columns,
7638 method: super::index_store::IndexMethodKind::Hash,
7639 unique: false,
7640 });
7641 self.invalidate_plan_cache();
7642 }
7643
7644 fn drop_tenant_index(&self, table: &str) {
7647 let index_name = format!("__tenant_idx_{table}");
7648 self.inner.index_store.drop_index(&index_name, table);
7649 }
7650
7651 pub fn tenant_column(&self, table: &str) -> Option<String> {
7655 self.inner.tenant_tables.read().get(table).cloned()
7656 }
7657
7658 pub fn unregister_tenant_table(&self, table: &str) {
7662 self.inner.tenant_tables.write().remove(table);
7663 self.inner
7664 .rls_policies
7665 .write()
7666 .remove(&(table.to_string(), "__tenant_iso".to_string()));
7667 self.drop_tenant_index(table);
7668 let has_other_policies = self
7670 .inner
7671 .rls_policies
7672 .read()
7673 .keys()
7674 .any(|(t, _)| t == table);
7675 if !has_other_policies {
7676 self.inner.rls_enabled_tables.write().remove(table);
7677 }
7678 }
7679
7680 pub(crate) fn record_pending_tombstone(
7686 &self,
7687 conn_id: u64,
7688 collection: &str,
7689 id: crate::storage::unified::entity::EntityId,
7690 stamper_xid: crate::storage::transaction::snapshot::Xid,
7691 previous_xmax: crate::storage::transaction::snapshot::Xid,
7692 ) {
7693 self.inner
7694 .pending_tombstones
7695 .write()
7696 .entry(conn_id)
7697 .or_default()
7698 .push((collection.to_string(), id, stamper_xid, previous_xmax));
7699 }
7700
7701 pub(crate) fn record_pending_versioned_update(
7702 &self,
7703 conn_id: u64,
7704 collection: &str,
7705 old_id: crate::storage::unified::entity::EntityId,
7706 new_id: crate::storage::unified::entity::EntityId,
7707 stamper_xid: crate::storage::transaction::snapshot::Xid,
7708 previous_xmax: crate::storage::transaction::snapshot::Xid,
7709 ) {
7710 self.inner
7711 .pending_versioned_updates
7712 .write()
7713 .entry(conn_id)
7714 .or_default()
7715 .push((
7716 collection.to_string(),
7717 old_id,
7718 new_id,
7719 stamper_xid,
7720 previous_xmax,
7721 ));
7722 }
7723
7724 fn with_deferred_store_wal_if_transaction<T>(
7725 &self,
7726 f: impl FnOnce() -> RedDBResult<T>,
7727 ) -> RedDBResult<T> {
7728 let conn_id = current_connection_id();
7729 if !self.inner.tx_contexts.read().contains_key(&conn_id) {
7730 return f();
7731 }
7732
7733 crate::storage::UnifiedStore::begin_deferred_store_wal_capture();
7734 let result = f();
7735 let captured = crate::storage::UnifiedStore::take_deferred_store_wal_capture();
7736 match result {
7737 Ok(value) => {
7738 self.record_pending_store_wal_actions(conn_id, captured);
7739 Ok(value)
7740 }
7741 Err(err) => Err(err),
7742 }
7743 }
7744
7745 fn with_deferred_store_wal_for_dml<T>(
7746 &self,
7747 capture_autocommit_events: bool,
7748 f: impl FnOnce() -> RedDBResult<T>,
7749 ) -> RedDBResult<T> {
7750 let conn_id = current_connection_id();
7751 if self.inner.tx_contexts.read().contains_key(&conn_id) {
7752 return self.with_deferred_store_wal_if_transaction(f);
7753 }
7754 if !capture_autocommit_events {
7755 return f();
7756 }
7757
7758 crate::storage::UnifiedStore::begin_deferred_store_wal_capture();
7759 let result = f();
7760 let captured = crate::storage::UnifiedStore::take_deferred_store_wal_capture();
7761 self.inner
7762 .db
7763 .store()
7764 .append_deferred_store_wal_actions(captured)
7765 .map_err(|err| RedDBError::Internal(err.to_string()))?;
7766 result
7767 }
7768
7769 fn insert_may_emit_events(&self, query: &InsertQuery) -> bool {
7770 !query.suppress_events
7771 && self.collection_has_event_subscriptions_for_operation(
7772 &query.table,
7773 crate::catalog::SubscriptionOperation::Insert,
7774 )
7775 }
7776
7777 fn update_may_emit_events(&self, query: &UpdateQuery) -> bool {
7778 !query.suppress_events
7779 && self.collection_has_event_subscriptions_for_operation(
7780 &query.table,
7781 crate::catalog::SubscriptionOperation::Update,
7782 )
7783 }
7784
7785 fn delete_may_emit_events(&self, query: &DeleteQuery) -> bool {
7786 !query.suppress_events
7787 && self.collection_has_event_subscriptions_for_operation(
7788 &query.table,
7789 crate::catalog::SubscriptionOperation::Delete,
7790 )
7791 }
7792
7793 fn collection_has_event_subscriptions_for_operation(
7794 &self,
7795 collection: &str,
7796 operation: crate::catalog::SubscriptionOperation,
7797 ) -> bool {
7798 let Some(contract) = self.db().collection_contract_arc(collection) else {
7799 return false;
7800 };
7801 contract.subscriptions.iter().any(|subscription| {
7802 subscription.enabled
7803 && (subscription.ops_filter.is_empty()
7804 || subscription.ops_filter.contains(&operation))
7805 })
7806 }
7807
7808 fn record_pending_store_wal_actions(
7809 &self,
7810 conn_id: u64,
7811 actions: crate::storage::unified::DeferredStoreWalActions,
7812 ) {
7813 if actions.is_empty() {
7814 return;
7815 }
7816 let mut guard = self.inner.pending_store_wal_actions.write();
7817 guard.entry(conn_id).or_default().extend(actions);
7818 }
7819
7820 fn flush_pending_store_wal_actions(&self, conn_id: u64) -> RedDBResult<()> {
7821 let Some(actions) = self
7822 .inner
7823 .pending_store_wal_actions
7824 .write()
7825 .remove(&conn_id)
7826 else {
7827 return Ok(());
7828 };
7829 self.inner
7830 .db
7831 .store()
7832 .append_deferred_store_wal_actions(actions)
7833 .map_err(|err| RedDBError::Internal(err.to_string()))
7834 }
7835
7836 fn discard_pending_store_wal_actions(&self, conn_id: u64) {
7837 self.inner
7838 .pending_store_wal_actions
7839 .write()
7840 .remove(&conn_id);
7841 }
7842
7843 fn xid_conflicts_with_snapshot(
7844 &self,
7845 xid: crate::storage::transaction::snapshot::Xid,
7846 snapshot: &crate::storage::transaction::snapshot::Snapshot,
7847 own_xids: &std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
7848 ) -> bool {
7849 xid != 0
7850 && !own_xids.contains(&xid)
7851 && !self.inner.snapshot_manager.is_aborted(xid)
7852 && !self.inner.snapshot_manager.is_active(xid)
7853 && (xid > snapshot.xid || snapshot.in_progress.contains(&xid))
7854 }
7855
7856 fn conflict_error(
7857 collection: &str,
7858 logical_id: crate::storage::unified::entity::EntityId,
7859 xid: crate::storage::transaction::snapshot::Xid,
7860 ) -> RedDBError {
7861 RedDBError::Query(format!(
7862 "serialization conflict: table row {collection}/{} was modified by concurrent transaction {xid}",
7863 logical_id.raw()
7864 ))
7865 }
7866
7867 fn check_logical_row_conflict(
7868 &self,
7869 collection: &str,
7870 logical_id: crate::storage::unified::entity::EntityId,
7871 excluded_ids: &[crate::storage::unified::entity::EntityId],
7872 snapshot: &crate::storage::transaction::snapshot::Snapshot,
7873 own_xids: &std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
7874 ) -> RedDBResult<()> {
7875 let store = self.inner.db.store();
7876 let Some(manager) = store.get_collection(collection) else {
7877 return Ok(());
7878 };
7879
7880 for candidate in manager.query_all(|_| true) {
7881 if excluded_ids.contains(&candidate.id) || candidate.logical_id() != logical_id {
7882 continue;
7883 }
7884 if self.xid_conflicts_with_snapshot(candidate.xmin, snapshot, own_xids) {
7885 return Err(Self::conflict_error(collection, logical_id, candidate.xmin));
7886 }
7887 if self.xid_conflicts_with_snapshot(candidate.xmax, snapshot, own_xids) {
7888 return Err(Self::conflict_error(collection, logical_id, candidate.xmax));
7889 }
7890 }
7891 Ok(())
7892 }
7893
7894 pub(crate) fn check_table_row_write_conflicts(
7895 &self,
7896 conn_id: u64,
7897 snapshot: &crate::storage::transaction::snapshot::Snapshot,
7898 own_xids: &std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
7899 ) -> RedDBResult<()> {
7900 let versioned_updates = self
7901 .inner
7902 .pending_versioned_updates
7903 .read()
7904 .get(&conn_id)
7905 .cloned()
7906 .unwrap_or_default();
7907 let tombstones = self
7908 .inner
7909 .pending_tombstones
7910 .read()
7911 .get(&conn_id)
7912 .cloned()
7913 .unwrap_or_default();
7914
7915 let store = self.inner.db.store();
7916 for (collection, old_id, new_id, xid, previous_xmax) in versioned_updates {
7917 let Some(manager) = store.get_collection(&collection) else {
7918 continue;
7919 };
7920 let Some(old) = manager.get(old_id) else {
7921 continue;
7922 };
7923 let logical_id = old.logical_id();
7924 if self.xid_conflicts_with_snapshot(previous_xmax, snapshot, own_xids) {
7925 return Err(Self::conflict_error(&collection, logical_id, previous_xmax));
7926 }
7927 if old.xmax != xid && self.xid_conflicts_with_snapshot(old.xmax, snapshot, own_xids) {
7928 return Err(Self::conflict_error(&collection, logical_id, old.xmax));
7929 }
7930 self.check_logical_row_conflict(
7931 &collection,
7932 logical_id,
7933 &[old_id, new_id],
7934 snapshot,
7935 own_xids,
7936 )?;
7937 }
7938
7939 for (collection, id, xid, previous_xmax) in tombstones {
7940 let Some(manager) = store.get_collection(&collection) else {
7941 continue;
7942 };
7943 let Some(entity) = manager.get(id) else {
7944 continue;
7945 };
7946 let logical_id = entity.logical_id();
7947 if self.xid_conflicts_with_snapshot(previous_xmax, snapshot, own_xids) {
7948 return Err(Self::conflict_error(&collection, logical_id, previous_xmax));
7949 }
7950 if entity.xmax != xid
7951 && self.xid_conflicts_with_snapshot(entity.xmax, snapshot, own_xids)
7952 {
7953 return Err(Self::conflict_error(&collection, logical_id, entity.xmax));
7954 }
7955 self.check_logical_row_conflict(&collection, logical_id, &[id], snapshot, own_xids)?;
7956 }
7957
7958 Ok(())
7959 }
7960
7961 pub(crate) fn restore_pending_write_stamps(&self, conn_id: u64) {
7962 let versioned_updates = self
7963 .inner
7964 .pending_versioned_updates
7965 .read()
7966 .get(&conn_id)
7967 .cloned()
7968 .unwrap_or_default();
7969 let tombstones = self
7970 .inner
7971 .pending_tombstones
7972 .read()
7973 .get(&conn_id)
7974 .cloned()
7975 .unwrap_or_default();
7976
7977 let store = self.inner.db.store();
7978 for (collection, old_id, _new_id, xid, _previous_xmax) in versioned_updates {
7979 if let Some(manager) = store.get_collection(&collection) {
7980 if let Some(mut entity) = manager.get(old_id) {
7981 entity.set_xmax(xid);
7982 let _ = manager.update(entity);
7983 }
7984 }
7985 }
7986 for (collection, id, xid, _previous_xmax) in tombstones {
7987 if let Some(manager) = store.get_collection(&collection) {
7988 if let Some(mut entity) = manager.get(id) {
7989 entity.set_xmax(xid);
7990 let _ = manager.update(entity);
7991 }
7992 }
7993 }
7994 }
7995
7996 pub(crate) fn finalize_pending_versioned_updates(&self, conn_id: u64) {
7997 self.inner
7998 .pending_versioned_updates
7999 .write()
8000 .remove(&conn_id);
8001 }
8002
8003 pub(crate) fn revive_pending_versioned_updates(&self, conn_id: u64) {
8004 let Some(pending) = self
8005 .inner
8006 .pending_versioned_updates
8007 .write()
8008 .remove(&conn_id)
8009 else {
8010 return;
8011 };
8012
8013 let store = self.inner.db.store();
8014 for (collection, old_id, new_id, xid, previous_xmax) in pending {
8015 if let Some(manager) = store.get_collection(&collection) {
8016 if let Some(mut old) = manager.get(old_id) {
8017 if old.xmax == xid {
8018 old.set_xmax(previous_xmax);
8019 let _ = manager.update(old);
8020 }
8021 }
8022 }
8023 let _ = store.delete_batch(&collection, &[new_id]);
8024 }
8025 }
8026
8027 pub(crate) fn revive_versioned_updates_since(&self, conn_id: u64, stamper_xid: u64) -> usize {
8028 let mut guard = self.inner.pending_versioned_updates.write();
8029 let Some(pending) = guard.get_mut(&conn_id) else {
8030 return 0;
8031 };
8032
8033 let store = self.inner.db.store();
8034 let mut reverted = 0usize;
8035 pending.retain(|(collection, old_id, new_id, xid, previous_xmax)| {
8036 if *xid < stamper_xid {
8037 return true;
8038 }
8039 if let Some(manager) = store.get_collection(collection) {
8040 if let Some(mut old) = manager.get(*old_id) {
8041 if old.xmax == *xid {
8042 old.set_xmax(*previous_xmax);
8043 let _ = manager.update(old);
8044 }
8045 }
8046 }
8047 let _ = store.delete_batch(collection, &[*new_id]);
8048 reverted += 1;
8049 false
8050 });
8051 if pending.is_empty() {
8052 guard.remove(&conn_id);
8053 }
8054 reverted
8055 }
8056
8057 pub(crate) fn finalize_pending_tombstones(&self, conn_id: u64) {
8062 let Some(pending) = self.inner.pending_tombstones.write().remove(&conn_id) else {
8063 return;
8064 };
8065 if pending.is_empty() {
8066 return;
8067 }
8068
8069 let store = self.inner.db.store();
8070 for (collection, id, _xid, _previous_xmax) in pending {
8071 store.context_index().remove_entity(id);
8072 self.cdc_emit(
8073 crate::replication::cdc::ChangeOperation::Delete,
8074 &collection,
8075 id.raw(),
8076 "entity",
8077 );
8078 }
8079 }
8080
8081 pub(crate) fn revive_pending_tombstones(&self, conn_id: u64) {
8088 let Some(pending) = self.inner.pending_tombstones.write().remove(&conn_id) else {
8089 return;
8090 };
8091
8092 let store = self.inner.db.store();
8093 for (collection, id, xid, previous_xmax) in pending {
8094 let Some(manager) = store.get_collection(&collection) else {
8095 continue;
8096 };
8097 if let Some(mut entity) = manager.get(id) {
8098 if entity.xmax == xid {
8099 entity.set_xmax(previous_xmax);
8100 let _ = manager.update(entity);
8101 }
8102 }
8103 }
8104 }
8105
8106 pub(crate) fn finalize_pending_kv_watch_events(&self, conn_id: u64) {
8107 let Some(pending) = self.inner.pending_kv_watch_events.write().remove(&conn_id) else {
8108 return;
8109 };
8110 for event in pending {
8111 self.cdc_emit_kv(
8112 event.op,
8113 &event.collection,
8114 &event.key,
8115 0,
8116 event.before,
8117 event.after,
8118 );
8119 }
8120 }
8121
8122 pub(crate) fn discard_pending_kv_watch_events(&self, conn_id: u64) {
8123 self.inner.pending_kv_watch_events.write().remove(&conn_id);
8124 }
8125
8126 fn materialize_graph_with_rls(
8135 &self,
8136 ) -> RedDBResult<(
8137 crate::storage::engine::GraphStore,
8138 std::collections::HashMap<
8139 String,
8140 std::collections::HashMap<String, crate::storage::schema::Value>,
8141 >,
8142 crate::storage::query::unified::EdgeProperties,
8143 )> {
8144 use crate::storage::engine::GraphStore;
8145 use crate::storage::query::ast::{PolicyAction, PolicyTargetKind};
8146 use crate::storage::unified::entity::{EntityData, EntityKind};
8147 use std::collections::{HashMap, HashSet};
8148
8149 let store = self.inner.db.store();
8150 let snap_ctx = capture_current_snapshot();
8151 let role = current_auth_identity().map(|(_, r)| r.as_str().to_string());
8152
8153 let graph = GraphStore::new();
8154 let mut node_properties: HashMap<String, HashMap<String, crate::storage::schema::Value>> =
8155 HashMap::new();
8156 let mut edge_properties: crate::storage::query::unified::EdgeProperties = HashMap::new();
8157 let mut allowed_nodes: HashSet<String> = HashSet::new();
8158
8159 let mut node_rls: HashMap<String, Option<crate::storage::query::ast::Filter>> =
8163 HashMap::new();
8164 let mut edge_rls: HashMap<String, Option<crate::storage::query::ast::Filter>> =
8165 HashMap::new();
8166
8167 let collections = store.list_collections();
8168
8169 for collection in &collections {
8171 let Some(manager) = store.get_collection(collection) else {
8172 continue;
8173 };
8174 let entities = manager.query_all(|_| true);
8175 for entity in entities {
8176 if !entity_visible_with_context(snap_ctx.as_ref(), &entity) {
8177 continue;
8178 }
8179 let EntityKind::GraphNode(ref node) = entity.kind else {
8180 continue;
8181 };
8182 if !node_passes_rls(self, collection, role.as_deref(), &mut node_rls, &entity) {
8183 continue;
8184 }
8185 let id_str = entity.id.raw().to_string();
8186 graph
8187 .add_node_with_label(
8188 &id_str,
8189 &node.label,
8190 &super::graph_node_label(&node.node_type),
8191 )
8192 .map_err(|err| RedDBError::Query(err.to_string()))?;
8193 allowed_nodes.insert(id_str.clone());
8194 if let EntityData::Node(node_data) = &entity.data {
8195 node_properties.insert(id_str, node_data.properties.clone());
8196 }
8197 }
8198 }
8199
8200 for collection in &collections {
8204 let Some(manager) = store.get_collection(collection) else {
8205 continue;
8206 };
8207 let entities = manager.query_all(|_| true);
8208 for entity in entities {
8209 if !entity_visible_with_context(snap_ctx.as_ref(), &entity) {
8210 continue;
8211 }
8212 let EntityKind::GraphEdge(ref edge) = entity.kind else {
8213 continue;
8214 };
8215 if !allowed_nodes.contains(&edge.from_node)
8216 || !allowed_nodes.contains(&edge.to_node)
8217 {
8218 continue;
8219 }
8220 if !edge_passes_rls(self, collection, role.as_deref(), &mut edge_rls, &entity) {
8221 continue;
8222 }
8223 let weight = match &entity.data {
8224 EntityData::Edge(e) => e.weight,
8225 _ => edge.weight as f32 / 1000.0,
8226 };
8227 let edge_label = super::graph_edge_label(&edge.label);
8228 graph
8229 .add_edge_with_label(&edge.from_node, &edge.to_node, &edge_label, weight)
8230 .map_err(|err| RedDBError::Query(err.to_string()))?;
8231 if let EntityData::Edge(edge_data) = &entity.data {
8232 edge_properties.insert(
8233 (edge.from_node.clone(), edge_label, edge.to_node.clone()),
8234 edge_data.properties.clone(),
8235 );
8236 }
8237 }
8238 }
8239
8240 let _ = (PolicyAction::Select, PolicyTargetKind::Nodes);
8244
8245 Ok((graph, node_properties, edge_properties))
8246 }
8247
8248 pub(crate) fn stamp_xmin_if_in_txn(
8263 &self,
8264 collection: &str,
8265 id: crate::storage::unified::entity::EntityId,
8266 ) {
8267 let Some(xid) = self.current_xid() else {
8268 return;
8269 };
8270 let store = self.inner.db.store();
8271 let Some(manager) = store.get_collection(collection) else {
8272 return;
8273 };
8274 if let Some(mut entity) = manager.get(id) {
8275 entity.set_xmin(xid);
8276 let _ = manager.update(entity);
8277 }
8278 }
8279
8280 pub(crate) fn revive_tombstones_since(&self, conn_id: u64, stamper_xid: u64) -> usize {
8288 let mut guard = self.inner.pending_tombstones.write();
8289 let Some(pending) = guard.get_mut(&conn_id) else {
8290 return 0;
8291 };
8292
8293 let store = self.inner.db.store();
8294 let mut revived = 0usize;
8295 pending.retain(|(collection, id, xid, previous_xmax)| {
8296 if *xid < stamper_xid {
8297 return true;
8299 }
8300 if let Some(manager) = store.get_collection(collection) {
8301 if let Some(mut entity) = manager.get(*id) {
8302 if entity.xmax == *xid {
8303 entity.set_xmax(*previous_xmax);
8304 let _ = manager.update(entity);
8305 revived += 1;
8306 }
8307 }
8308 }
8309 false
8310 });
8311 if pending.is_empty() {
8312 guard.remove(&conn_id);
8313 }
8314 revived
8315 }
8316
8317 pub fn current_snapshot(&self) -> crate::storage::transaction::snapshot::Snapshot {
8326 let conn_id = current_connection_id();
8327 if let Some(ctx) = self.inner.tx_contexts.read().get(&conn_id).cloned() {
8328 return ctx.snapshot;
8329 }
8330 let high_water = self.inner.snapshot_manager.peek_next_xid();
8336 self.inner.snapshot_manager.snapshot(high_water)
8337 }
8338
8339 pub fn current_xid(&self) -> Option<crate::storage::transaction::snapshot::Xid> {
8349 let conn_id = current_connection_id();
8350 self.inner
8351 .tx_contexts
8352 .read()
8353 .get(&conn_id)
8354 .map(|ctx| ctx.writer_xid())
8355 }
8356
8357 pub fn snapshot_manager(&self) -> Arc<crate::storage::transaction::snapshot::SnapshotManager> {
8360 Arc::clone(&self.inner.snapshot_manager)
8361 }
8362
8363 fn mvcc_vacuum_cutoff_xid(&self) -> crate::storage::transaction::snapshot::Xid {
8364 let manager = &self.inner.snapshot_manager;
8365 let next_xid = manager.peek_next_xid();
8366 let mut cutoff = next_xid;
8367 if let Some(oldest_active) = manager.oldest_active_xid() {
8368 cutoff = cutoff.min(oldest_active);
8369 }
8370 if let Some(oldest_pinned) = manager.oldest_pinned_xid() {
8371 cutoff = cutoff.min(oldest_pinned);
8372 }
8373 let retention_xids = self.config_u64("runtime.mvcc.vacuum_retention_xids", 0);
8374 if retention_xids > 0 {
8375 cutoff = cutoff.min(next_xid.saturating_sub(retention_xids));
8376 }
8377 cutoff
8378 }
8379
8380 fn rebuild_runtime_indexes_for_table(&self, table: &str) -> RedDBResult<()> {
8381 let registered = self.inner.index_store.list_indices(table);
8382 if registered.is_empty() {
8383 return Ok(());
8384 }
8385 let store = self.inner.db.store();
8386 let Some(manager) = store.get_collection(table) else {
8387 return Ok(());
8388 };
8389 let entity_fields = manager
8390 .query_all(|entity| matches!(entity.kind, crate::storage::EntityKind::TableRow { .. }))
8391 .into_iter()
8392 .map(|entity| (entity.id, table_row_index_fields(&entity)))
8393 .collect::<Vec<_>>();
8394
8395 for index in registered {
8396 self.inner.index_store.drop_index(&index.name, table);
8397 self.inner
8398 .index_store
8399 .create_index(
8400 &index.name,
8401 table,
8402 &index.columns,
8403 index.method,
8404 index.unique,
8405 &entity_fields,
8406 )
8407 .map_err(RedDBError::Internal)?;
8408 self.inner.index_store.register(index);
8409 }
8410 self.invalidate_plan_cache();
8411 Ok(())
8412 }
8413
8414 pub fn current_txn_own_xids(
8419 &self,
8420 ) -> std::collections::HashSet<crate::storage::transaction::snapshot::Xid> {
8421 let mut set = std::collections::HashSet::new();
8422 if let Some(ctx) = self.inner.tx_contexts.read().get(¤t_connection_id()) {
8423 set.insert(ctx.xid);
8424 for (_, sub) in &ctx.savepoints {
8425 set.insert(*sub);
8426 }
8427 for sub in &ctx.released_sub_xids {
8428 set.insert(*sub);
8429 }
8430 }
8431 set
8432 }
8433
8434 pub fn foreign_tables(&self) -> Arc<crate::storage::fdw::ForeignTableRegistry> {
8441 Arc::clone(&self.inner.foreign_tables)
8442 }
8443
8444 pub fn is_rls_enabled(&self, table: &str) -> bool {
8446 self.inner.rls_enabled_tables.read().contains(table)
8447 }
8448
8449 pub fn matching_rls_policies(
8456 &self,
8457 table: &str,
8458 role: Option<&str>,
8459 action: crate::storage::query::ast::PolicyAction,
8460 ) -> Vec<crate::storage::query::ast::Filter> {
8461 self.matching_rls_policies_for_kind(
8466 table,
8467 role,
8468 action,
8469 crate::storage::query::ast::PolicyTargetKind::Table,
8470 )
8471 }
8472
8473 pub fn matching_rls_policies_for_kind(
8481 &self,
8482 table: &str,
8483 role: Option<&str>,
8484 action: crate::storage::query::ast::PolicyAction,
8485 kind: crate::storage::query::ast::PolicyTargetKind,
8486 ) -> Vec<crate::storage::query::ast::Filter> {
8487 if !self.is_rls_enabled(table) {
8488 return Vec::new();
8489 }
8490 let policies = self.inner.rls_policies.read();
8491 policies
8492 .iter()
8493 .filter_map(|((t, _), p)| {
8494 if t != table {
8495 return None;
8496 }
8497 if p.target_kind != kind
8506 && p.target_kind != crate::storage::query::ast::PolicyTargetKind::Table
8507 {
8508 return None;
8509 }
8510 if let Some(a) = p.action {
8512 if a != action {
8513 return None;
8514 }
8515 }
8516 if let Some(p_role) = p.role.as_deref() {
8518 match role {
8519 Some(r) if r == p_role => {}
8520 _ => return None,
8521 }
8522 }
8523 Some((*p.using).clone())
8524 })
8525 .collect()
8526 }
8527
8528 pub(crate) fn refresh_table_planner_stats(&self, table: &str) {
8529 let store = self.inner.db.store();
8530 if let Some(stats) =
8531 crate::storage::query::planner::stats_catalog::analyze_collection(store.as_ref(), table)
8532 {
8533 crate::storage::query::planner::stats_catalog::persist_table_stats(
8534 store.as_ref(),
8535 &stats,
8536 );
8537 } else {
8538 crate::storage::query::planner::stats_catalog::clear_table_stats(store.as_ref(), table);
8539 }
8540 self.invalidate_plan_cache();
8541 }
8542
8543 pub(crate) fn note_table_write(&self, table: &str) {
8544 let already_dirty = self.inner.planner_dirty_tables.read().contains(table);
8549 if !already_dirty {
8550 self.inner
8551 .planner_dirty_tables
8552 .write()
8553 .insert(table.to_string());
8554 }
8555 self.invalidate_result_cache_for_table(table);
8556 }
8557
8558 fn explain_as_rows(&self, raw_query: &str, inner_sql: &str) -> RedDBResult<RuntimeQueryResult> {
8566 let explain = self.explain_query(inner_sql)?;
8567
8568 let columns = vec![
8569 "op".to_string(),
8570 "source".to_string(),
8571 "est_rows".to_string(),
8572 "est_cost".to_string(),
8573 "depth".to_string(),
8574 ];
8575
8576 let mut records: Vec<crate::storage::query::unified::UnifiedRecord> = Vec::new();
8577
8578 for name in &explain.cte_materializations {
8584 use std::sync::Arc;
8585 let mut rec = crate::storage::query::unified::UnifiedRecord::default();
8586 rec.set_arc(Arc::from("op"), Value::text("CteScan".to_string()));
8587 rec.set_arc(Arc::from("source"), Value::text(name.clone()));
8588 rec.set_arc(Arc::from("est_rows"), Value::Float(0.0));
8589 rec.set_arc(Arc::from("est_cost"), Value::Float(0.0));
8590 rec.set_arc(Arc::from("depth"), Value::Integer(0));
8591 records.push(rec);
8592 }
8593
8594 walk_plan_node(&explain.logical_plan.root, 0, &mut records);
8595
8596 let result = crate::storage::query::unified::UnifiedResult {
8597 columns,
8598 records,
8599 stats: Default::default(),
8600 pre_serialized_json: None,
8601 };
8602
8603 Ok(RuntimeQueryResult {
8604 query: raw_query.to_string(),
8605 mode: explain.mode,
8606 statement: "explain",
8607 engine: "runtime-explain",
8608 result,
8609 affected_rows: 0,
8610 statement_type: "select",
8611 })
8612 }
8613
8614 pub(super) fn check_query_privilege(
8622 &self,
8623 expr: &crate::storage::query::ast::QueryExpr,
8624 ) -> Result<(), String> {
8625 use crate::auth::privileges::{Action, AuthzContext, Resource};
8626 use crate::auth::UserId;
8627 use crate::storage::query::ast::QueryExpr;
8628
8629 let auth_store = match self.inner.auth_store.read().clone() {
8634 Some(s) => s,
8635 None => return Ok(()),
8636 };
8637
8638 let (username, role) = match current_auth_identity() {
8644 Some(p) => p,
8645 None => return Ok(()),
8646 };
8647 let tenant = current_tenant();
8648
8649 let ctx = AuthzContext {
8650 principal: &username,
8651 effective_role: role,
8652 tenant: tenant.as_deref(),
8653 };
8654 let principal_id = UserId::from_parts(tenant.as_deref(), &username);
8655
8656 let (action, resource) = match expr {
8658 QueryExpr::Table(t) => (Action::Select, Resource::table_from_name(&t.table)),
8659 QueryExpr::QueueSelect(q) => (Action::Select, Resource::table_from_name(&q.queue)),
8660 QueryExpr::Graph(g) => {
8661 if auth_store.iam_authorization_enabled() {
8662 self.check_graph_property_projection_privilege(
8663 &auth_store,
8664 &principal_id,
8665 role,
8666 tenant.as_deref(),
8667 g,
8668 )?;
8669 return Ok(());
8670 }
8671 return Ok(());
8672 }
8673 QueryExpr::Vector(v) => {
8674 if auth_store.iam_authorization_enabled() {
8675 self.check_table_like_column_projection_privilege(
8676 &auth_store,
8677 &principal_id,
8678 role,
8679 tenant.as_deref(),
8680 &v.collection,
8681 &["content".to_string()],
8682 )?;
8683 return Ok(());
8684 }
8685 return Ok(());
8686 }
8687 QueryExpr::Insert(i) => (Action::Insert, Resource::table_from_name(&i.table)),
8688 QueryExpr::Update(u) => (Action::Update, Resource::table_from_name(&u.table)),
8689 QueryExpr::Delete(d) => (Action::Delete, Resource::table_from_name(&d.table)),
8690 QueryExpr::Join(_) => (Action::Select, Resource::Database),
8694 QueryExpr::Grant(_) | QueryExpr::Revoke(_) | QueryExpr::AlterUser(_) => {
8697 return if role == crate::auth::Role::Admin {
8698 Ok(())
8699 } else {
8700 Err(format!(
8701 "principal=`{}` role=`{:?}` cannot issue ACL/auth DDL",
8702 username, role
8703 ))
8704 };
8705 }
8706 QueryExpr::CreateIamPolicy { id, .. } => {
8707 return self.check_policy_management_privilege(
8708 &auth_store,
8709 &principal_id,
8710 role,
8711 tenant.as_deref(),
8712 "policy:put",
8713 "policy",
8714 id,
8715 );
8716 }
8717 QueryExpr::DropIamPolicy { id } => {
8718 return self.check_policy_management_privilege(
8719 &auth_store,
8720 &principal_id,
8721 role,
8722 tenant.as_deref(),
8723 "policy:drop",
8724 "policy",
8725 id,
8726 );
8727 }
8728 QueryExpr::AttachPolicy { policy_id, .. } => {
8729 return self.check_policy_management_privilege(
8730 &auth_store,
8731 &principal_id,
8732 role,
8733 tenant.as_deref(),
8734 "policy:attach",
8735 "policy",
8736 policy_id,
8737 );
8738 }
8739 QueryExpr::DetachPolicy { policy_id, .. } => {
8740 return self.check_policy_management_privilege(
8741 &auth_store,
8742 &principal_id,
8743 role,
8744 tenant.as_deref(),
8745 "policy:detach",
8746 "policy",
8747 policy_id,
8748 );
8749 }
8750 QueryExpr::ShowPolicies { .. } | QueryExpr::ShowEffectivePermissions { .. } => {
8751 return Ok(());
8752 }
8753 QueryExpr::SimulatePolicy { .. } => {
8754 return self.check_policy_management_privilege(
8755 &auth_store,
8756 &principal_id,
8757 role,
8758 tenant.as_deref(),
8759 "policy:simulate",
8760 "policy",
8761 "*",
8762 );
8763 }
8764 QueryExpr::DropTable(q) => {
8767 return self.check_ddl_collection_privilege(
8768 &auth_store,
8769 &principal_id,
8770 role,
8771 tenant.as_deref(),
8772 &username,
8773 "drop",
8774 &q.name,
8775 );
8776 }
8777 QueryExpr::DropGraph(q) => {
8778 return self.check_ddl_collection_privilege(
8779 &auth_store,
8780 &principal_id,
8781 role,
8782 tenant.as_deref(),
8783 &username,
8784 "drop",
8785 &q.name,
8786 );
8787 }
8788 QueryExpr::DropVector(q) => {
8789 return self.check_ddl_collection_privilege(
8790 &auth_store,
8791 &principal_id,
8792 role,
8793 tenant.as_deref(),
8794 &username,
8795 "drop",
8796 &q.name,
8797 );
8798 }
8799 QueryExpr::DropDocument(q) => {
8800 return self.check_ddl_collection_privilege(
8801 &auth_store,
8802 &principal_id,
8803 role,
8804 tenant.as_deref(),
8805 &username,
8806 "drop",
8807 &q.name,
8808 );
8809 }
8810 QueryExpr::DropKv(q) => {
8811 return self.check_ddl_collection_privilege(
8812 &auth_store,
8813 &principal_id,
8814 role,
8815 tenant.as_deref(),
8816 &username,
8817 "drop",
8818 &q.name,
8819 );
8820 }
8821 QueryExpr::DropCollection(q) => {
8822 return self.check_ddl_collection_privilege(
8823 &auth_store,
8824 &principal_id,
8825 role,
8826 tenant.as_deref(),
8827 &username,
8828 "drop",
8829 &q.name,
8830 );
8831 }
8832 QueryExpr::Truncate(q) => {
8833 return self.check_ddl_collection_privilege(
8834 &auth_store,
8835 &principal_id,
8836 role,
8837 tenant.as_deref(),
8838 &username,
8839 "truncate",
8840 &q.name,
8841 );
8842 }
8843 QueryExpr::CreateTable(_)
8845 | QueryExpr::CreateCollection(_)
8846 | QueryExpr::CreateVector(_)
8847 | QueryExpr::AlterTable(_)
8848 | QueryExpr::CreateIndex(_)
8849 | QueryExpr::DropIndex(_)
8850 | QueryExpr::CreateSchema(_)
8851 | QueryExpr::DropSchema(_)
8852 | QueryExpr::CreateSequence(_)
8853 | QueryExpr::DropSequence(_)
8854 | QueryExpr::CreateView(_)
8855 | QueryExpr::DropView(_)
8856 | QueryExpr::RefreshMaterializedView(_)
8857 | QueryExpr::CreatePolicy(_)
8858 | QueryExpr::DropPolicy(_)
8859 | QueryExpr::CreateServer(_)
8860 | QueryExpr::DropServer(_)
8861 | QueryExpr::CreateForeignTable(_)
8862 | QueryExpr::DropForeignTable(_)
8863 | QueryExpr::CreateTimeSeries(_)
8864 | QueryExpr::DropTimeSeries(_)
8865 | QueryExpr::CreateQueue(_)
8866 | QueryExpr::AlterQueue(_)
8867 | QueryExpr::DropQueue(_)
8868 | QueryExpr::CreateTree(_)
8869 | QueryExpr::DropTree(_) => {
8870 return if role >= crate::auth::Role::Write {
8871 Ok(())
8872 } else {
8873 Err(format!(
8874 "principal=`{}` role=`{:?}` cannot issue DDL",
8875 username, role
8876 ))
8877 };
8878 }
8879 QueryExpr::CreateMigration(_) => {
8881 return if role >= crate::auth::Role::Write {
8882 Ok(())
8883 } else {
8884 Err(format!(
8885 "principal=`{}` role=`{:?}` cannot issue CREATE MIGRATION",
8886 username, role
8887 ))
8888 };
8889 }
8890 QueryExpr::ApplyMigration(_) | QueryExpr::RollbackMigration(_) => {
8892 return if role == crate::auth::Role::Admin {
8893 Ok(())
8894 } else {
8895 Err(format!(
8896 "principal=`{}` role=`{:?}` cannot issue APPLY/ROLLBACK MIGRATION",
8897 username, role
8898 ))
8899 };
8900 }
8901 QueryExpr::ExplainMigration(_) => return Ok(()),
8903 _ => return Ok(()),
8907 };
8908
8909 if auth_store.iam_authorization_enabled() {
8910 let iam_action = legacy_action_to_iam(action);
8911 let iam_resource = legacy_resource_to_iam(&resource, tenant.as_deref());
8912 let iam_ctx = runtime_iam_context(role, tenant.as_deref());
8913 if !auth_store.check_policy_authz(&principal_id, iam_action, &iam_resource, &iam_ctx) {
8914 return Err(format!(
8915 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM policy",
8916 username, iam_action, iam_resource.kind, iam_resource.name
8917 ));
8918 }
8919
8920 if let QueryExpr::Table(table) = expr {
8921 self.check_table_column_projection_privilege(
8922 &auth_store,
8923 &principal_id,
8924 &iam_ctx,
8925 table,
8926 )?;
8927 }
8928
8929 if let QueryExpr::Update(update) = expr {
8930 let columns = update_set_target_columns(update);
8931 if !columns.is_empty() {
8932 let request = column_access_request_for_table_update(&update.table, columns);
8933 let outcome =
8934 auth_store.check_column_projection_authz(&principal_id, &request, &iam_ctx);
8935 if let Some(denied) = outcome.first_denied_column() {
8936 return Err(format!(
8937 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM column policy",
8938 username, iam_action, denied.resource.kind, denied.resource.name
8939 ));
8940 }
8941 if !outcome.allowed() {
8942 return Err(format!(
8943 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM policy",
8944 username,
8945 iam_action,
8946 outcome.table_resource.kind,
8947 outcome.table_resource.name
8948 ));
8949 }
8950 }
8951
8952 if let Some(columns) = update_returning_columns_for_policy(self, update) {
8953 let request = column_access_request_for_table_select(&update.table, columns);
8954 let outcome =
8955 auth_store.check_column_projection_authz(&principal_id, &request, &iam_ctx);
8956 if let Some(denied) = outcome.first_denied_column() {
8957 return Err(format!(
8958 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM column policy",
8959 username, denied.resource.kind, denied.resource.name
8960 ));
8961 }
8962 if !outcome.allowed() {
8963 return Err(format!(
8964 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM policy",
8965 username, outcome.table_resource.kind, outcome.table_resource.name
8966 ));
8967 }
8968 }
8969 }
8970
8971 Ok(())
8972 } else {
8973 auth_store
8974 .check_grant(&ctx, action, &resource)
8975 .map_err(|e| e.to_string())
8976 }
8977 }
8978
8979 fn check_table_column_projection_privilege(
8980 &self,
8981 auth_store: &Arc<crate::auth::store::AuthStore>,
8982 principal: &crate::auth::UserId,
8983 ctx: &crate::auth::policies::EvalContext,
8984 table: &crate::storage::query::ast::TableQuery,
8985 ) -> Result<(), String> {
8986 use crate::auth::{ColumnAccessRequest, ColumnDecisionEffect};
8987
8988 let columns = requested_table_columns_for_policy(table);
8989 if columns.is_empty() {
8990 return Ok(());
8991 }
8992
8993 let request = ColumnAccessRequest::select(table.table.clone(), columns);
8994 let outcome = auth_store.check_column_projection_authz(principal, &request, ctx);
8995 if outcome.allowed() {
8996 return Ok(());
8997 }
8998
8999 if !matches!(
9000 outcome.table_decision,
9001 crate::auth::policies::Decision::Allow { .. }
9002 | crate::auth::policies::Decision::AdminBypass
9003 ) {
9004 return Err(format!(
9005 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM policy",
9006 principal, outcome.table_resource.kind, outcome.table_resource.name
9007 ));
9008 }
9009
9010 let denied = outcome
9011 .first_denied_column()
9012 .filter(|decision| decision.effective == ColumnDecisionEffect::Denied);
9013 match denied {
9014 Some(decision) => Err(format!(
9015 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM policy",
9016 principal, decision.resource.kind, decision.resource.name
9017 )),
9018 None => Ok(()),
9019 }
9020 }
9021
9022 fn check_graph_property_projection_privilege(
9023 &self,
9024 auth_store: &Arc<crate::auth::store::AuthStore>,
9025 principal: &crate::auth::UserId,
9026 role: crate::auth::Role,
9027 tenant: Option<&str>,
9028 query: &crate::storage::query::ast::GraphQuery,
9029 ) -> Result<(), String> {
9030 let columns = explicit_graph_projection_properties(query);
9031 if columns.is_empty() {
9032 return Ok(());
9033 }
9034 self.check_table_like_column_projection_privilege(
9035 auth_store, principal, role, tenant, "graph", &columns,
9036 )
9037 }
9038
9039 fn check_table_like_column_projection_privilege(
9040 &self,
9041 auth_store: &Arc<crate::auth::store::AuthStore>,
9042 principal: &crate::auth::UserId,
9043 role: crate::auth::Role,
9044 tenant: Option<&str>,
9045 table: &str,
9046 columns: &[String],
9047 ) -> Result<(), String> {
9048 let iam_ctx = runtime_iam_context(role, tenant);
9049 let request =
9050 crate::auth::ColumnAccessRequest::select(table.to_string(), columns.iter().cloned());
9051 let outcome = auth_store.check_column_projection_authz(principal, &request, &iam_ctx);
9052 if outcome.allowed() {
9053 return Ok(());
9054 }
9055 let denied = outcome
9056 .first_denied_column()
9057 .map(|d| d.resource.name.clone())
9058 .unwrap_or_else(|| format!("{table}.<unknown>"));
9059 Err(format!(
9060 "principal=`{}` action=`select` resource=`column:{}` denied by IAM policy",
9061 principal, denied
9062 ))
9063 }
9064
9065 fn check_policy_management_privilege(
9066 &self,
9067 auth_store: &Arc<crate::auth::store::AuthStore>,
9068 principal: &crate::auth::UserId,
9069 role: crate::auth::Role,
9070 tenant: Option<&str>,
9071 action: &str,
9072 resource_kind: &str,
9073 resource_name: &str,
9074 ) -> Result<(), String> {
9075 if !auth_store.iam_authorization_enabled() {
9076 return if role == crate::auth::Role::Admin {
9077 Ok(())
9078 } else {
9079 Err(format!(
9080 "principal=`{}` role=`{:?}` cannot issue ACL/auth DDL",
9081 principal, role
9082 ))
9083 };
9084 }
9085
9086 let mut resource = crate::auth::policies::ResourceRef::new(
9087 resource_kind.to_string(),
9088 resource_name.to_string(),
9089 );
9090 if let Some(t) = tenant {
9091 resource = resource.with_tenant(t.to_string());
9092 }
9093 let ctx = runtime_iam_context(role, tenant);
9094 if auth_store.check_policy_authz(principal, action, &resource, &ctx) {
9095 Ok(())
9096 } else {
9097 Err(format!(
9098 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM policy",
9099 principal, action, resource.kind, resource.name
9100 ))
9101 }
9102 }
9103
9104 fn check_ddl_collection_privilege(
9111 &self,
9112 auth_store: &Arc<crate::auth::store::AuthStore>,
9113 principal: &crate::auth::UserId,
9114 role: crate::auth::Role,
9115 tenant: Option<&str>,
9116 username: &str,
9117 action: &str,
9118 collection: &str,
9119 ) -> Result<(), String> {
9120 if role < crate::auth::Role::Write {
9121 let msg = format!(
9122 "principal=`{}` role=`{:?}` cannot issue DDL",
9123 username, role
9124 );
9125 self.inner.audit_log.record(
9126 action,
9127 username,
9128 collection,
9129 "denied",
9130 crate::json::Value::Null,
9131 );
9132 return Err(msg);
9133 }
9134
9135 if !auth_store.iam_authorization_enabled() {
9136 self.inner.audit_log.record(
9137 action,
9138 username,
9139 collection,
9140 "ok",
9141 crate::json::Value::Null,
9142 );
9143 return Ok(());
9144 }
9145
9146 let resource_name = collection.to_string();
9147 let mut resource = crate::auth::policies::ResourceRef::new(
9148 "collection".to_string(),
9149 resource_name.clone(),
9150 );
9151 if let Some(t) = tenant {
9152 resource = resource.with_tenant(t.to_string());
9153 }
9154 let ctx = runtime_iam_context(role, tenant);
9155 if auth_store.check_policy_authz(principal, action, &resource, &ctx) {
9156 self.inner.audit_log.record(
9157 action,
9158 username,
9159 &resource_name,
9160 "ok",
9161 crate::json::Value::Null,
9162 );
9163 Ok(())
9164 } else {
9165 self.inner.audit_log.record(
9166 action,
9167 username,
9168 &resource_name,
9169 "denied",
9170 crate::json::Value::Null,
9171 );
9172 Err(format!(
9173 "principal=`{}` action=`{}` resource=`collection:{}` denied by IAM policy",
9174 username, action, resource_name
9175 ))
9176 }
9177 }
9178
9179 fn execute_grant_statement(
9181 &self,
9182 query: &str,
9183 stmt: &crate::storage::query::ast::GrantStmt,
9184 ) -> RedDBResult<RuntimeQueryResult> {
9185 use crate::auth::privileges::{Action, GrantPrincipal, Resource};
9186 use crate::auth::UserId;
9187 use crate::storage::query::ast::{GrantObjectKind, GrantPrincipalRef};
9188
9189 let auth_store = self
9190 .inner
9191 .auth_store
9192 .read()
9193 .clone()
9194 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9195
9196 let (gname, grole) = current_auth_identity().ok_or_else(|| {
9198 RedDBError::Query("GRANT requires an authenticated principal".to_string())
9199 })?;
9200 let granter = UserId::from_parts(current_tenant().as_deref(), &gname);
9201 let granter_role = grole;
9202
9203 let mut actions: Vec<Action> = Vec::new();
9205 if stmt.all {
9206 actions.push(Action::All);
9207 } else {
9208 for kw in &stmt.actions {
9209 let a = Action::from_keyword(kw).ok_or_else(|| {
9210 RedDBError::Query(format!("unknown privilege keyword `{}`", kw))
9211 })?;
9212 actions.push(a);
9213 }
9214 }
9215
9216 let mut applied = 0usize;
9218 for obj in &stmt.objects {
9219 let resource = match stmt.object_kind {
9220 GrantObjectKind::Table => Resource::Table {
9221 schema: obj.schema.clone(),
9222 table: obj.name.clone(),
9223 },
9224 GrantObjectKind::Schema => Resource::Schema(obj.name.clone()),
9225 GrantObjectKind::Database => Resource::Database,
9226 GrantObjectKind::Function => Resource::Function {
9227 schema: obj.schema.clone(),
9228 name: obj.name.clone(),
9229 },
9230 };
9231 for principal in &stmt.principals {
9232 let p = match principal {
9233 GrantPrincipalRef::Public => GrantPrincipal::Public,
9234 GrantPrincipalRef::Group(g) => GrantPrincipal::Group(g.clone()),
9235 GrantPrincipalRef::User { tenant, name } => {
9236 GrantPrincipal::User(UserId::from_parts(tenant.as_deref(), name))
9237 }
9238 };
9239 let tenant = granter.tenant.clone();
9242 auth_store
9243 .grant(
9244 &granter,
9245 granter_role,
9246 p.clone(),
9247 resource.clone(),
9248 actions.clone(),
9249 stmt.with_grant_option,
9250 tenant.clone(),
9251 )
9252 .map_err(|e| RedDBError::Query(e.to_string()))?;
9253
9254 if let Some(policy) =
9258 grant_to_iam_policy(&p, &resource, &actions, tenant.as_deref())
9259 {
9260 let pid = policy.id.clone();
9261 auth_store
9262 .put_policy_internal(policy)
9263 .map_err(|e| RedDBError::Query(e.to_string()))?;
9264 let attachment = match &p {
9265 GrantPrincipal::User(uid) => {
9266 crate::auth::store::PrincipalRef::User(uid.clone())
9267 }
9268 GrantPrincipal::Group(group) => {
9269 crate::auth::store::PrincipalRef::Group(group.clone())
9270 }
9271 GrantPrincipal::Public => crate::auth::store::PrincipalRef::Group(
9272 crate::auth::store::PUBLIC_IAM_GROUP.to_string(),
9273 ),
9274 };
9275 auth_store
9276 .attach_policy(attachment, &pid)
9277 .map_err(|e| RedDBError::Query(e.to_string()))?;
9278 }
9279 applied += 1;
9280 tracing::info!(
9281 target: "audit",
9282 principal = %granter,
9283 action = "grant",
9284 "GRANT applied"
9285 );
9286 }
9287 }
9288
9289 self.invalidate_result_cache();
9290 Ok(RuntimeQueryResult::ok_message(
9291 query.to_string(),
9292 &format!("GRANT applied to {} target(s)", applied),
9293 "grant",
9294 ))
9295 }
9296
9297 fn execute_revoke_statement(
9299 &self,
9300 query: &str,
9301 stmt: &crate::storage::query::ast::RevokeStmt,
9302 ) -> RedDBResult<RuntimeQueryResult> {
9303 use crate::auth::privileges::{Action, GrantPrincipal, Resource};
9304 use crate::auth::UserId;
9305 use crate::storage::query::ast::{GrantObjectKind, GrantPrincipalRef};
9306
9307 let auth_store = self
9308 .inner
9309 .auth_store
9310 .read()
9311 .clone()
9312 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9313
9314 let (_gname, grole) = current_auth_identity().ok_or_else(|| {
9315 RedDBError::Query("REVOKE requires an authenticated principal".to_string())
9316 })?;
9317 let granter_role = grole;
9318
9319 let actions: Vec<Action> = if stmt.all {
9320 vec![Action::All]
9321 } else {
9322 stmt.actions
9323 .iter()
9324 .map(|kw| Action::from_keyword(kw).unwrap_or(Action::Select))
9325 .collect()
9326 };
9327
9328 let mut total_removed = 0usize;
9329 for obj in &stmt.objects {
9330 let resource = match stmt.object_kind {
9331 GrantObjectKind::Table => Resource::Table {
9332 schema: obj.schema.clone(),
9333 table: obj.name.clone(),
9334 },
9335 GrantObjectKind::Schema => Resource::Schema(obj.name.clone()),
9336 GrantObjectKind::Database => Resource::Database,
9337 GrantObjectKind::Function => Resource::Function {
9338 schema: obj.schema.clone(),
9339 name: obj.name.clone(),
9340 },
9341 };
9342 for principal in &stmt.principals {
9343 let p = match principal {
9344 GrantPrincipalRef::Public => GrantPrincipal::Public,
9345 GrantPrincipalRef::Group(g) => GrantPrincipal::Group(g.clone()),
9346 GrantPrincipalRef::User { tenant, name } => {
9347 GrantPrincipal::User(UserId::from_parts(tenant.as_deref(), name))
9348 }
9349 };
9350 let removed = auth_store
9351 .revoke(granter_role, &p, &resource, &actions)
9352 .map_err(|e| RedDBError::Query(e.to_string()))?;
9353 let _removed_policies =
9354 auth_store.delete_synthetic_grant_policies(&p, &resource, &actions);
9355 total_removed += removed;
9356 }
9357 }
9358
9359 self.invalidate_result_cache();
9360 Ok(RuntimeQueryResult::ok_message(
9361 query.to_string(),
9362 &format!("REVOKE removed {} grant(s)", total_removed),
9363 "revoke",
9364 ))
9365 }
9366
9367 fn execute_alter_user_statement(
9369 &self,
9370 query: &str,
9371 stmt: &crate::storage::query::ast::AlterUserStmt,
9372 ) -> RedDBResult<RuntimeQueryResult> {
9373 use crate::auth::privileges::UserAttributes;
9374 use crate::auth::UserId;
9375 use crate::storage::query::ast::AlterUserAttribute;
9376
9377 let auth_store = self
9378 .inner
9379 .auth_store
9380 .read()
9381 .clone()
9382 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9383
9384 let (_gname, grole) = current_auth_identity().ok_or_else(|| {
9385 RedDBError::Query("ALTER USER requires an authenticated principal".to_string())
9386 })?;
9387 if grole != crate::auth::Role::Admin {
9388 return Err(RedDBError::Query(
9389 "ALTER USER requires Admin role".to_string(),
9390 ));
9391 }
9392
9393 let target = UserId::from_parts(stmt.tenant.as_deref(), &stmt.username);
9394
9395 let mut attrs = auth_store.user_attributes(&target);
9398 let mut enable_change: Option<bool> = None;
9399
9400 for a in &stmt.attributes {
9401 match a {
9402 AlterUserAttribute::ValidUntil(ts) => {
9403 let ms = parse_timestamp_to_ms(ts).ok_or_else(|| {
9407 RedDBError::Query(format!("invalid VALID UNTIL timestamp `{ts}`"))
9408 })?;
9409 attrs.valid_until = Some(ms);
9410 }
9411 AlterUserAttribute::ConnectionLimit(n) => {
9412 if *n < 0 {
9413 return Err(RedDBError::Query(
9414 "CONNECTION LIMIT must be non-negative".to_string(),
9415 ));
9416 }
9417 attrs.connection_limit = Some(*n as u32);
9418 }
9419 AlterUserAttribute::SetSearchPath(p) => {
9420 attrs.search_path = Some(p.clone());
9421 }
9422 AlterUserAttribute::AddGroup(g) => {
9423 if !attrs.groups.iter().any(|existing| existing == g) {
9424 attrs.groups.push(g.clone());
9425 attrs.groups.sort();
9426 }
9427 }
9428 AlterUserAttribute::DropGroup(g) => {
9429 attrs.groups.retain(|existing| existing != g);
9430 }
9431 AlterUserAttribute::Enable => enable_change = Some(true),
9432 AlterUserAttribute::Disable => enable_change = Some(false),
9433 AlterUserAttribute::Password(_) => {
9434 }
9438 }
9439 }
9440
9441 auth_store
9442 .set_user_attributes(&target, attrs)
9443 .map_err(|e| RedDBError::Query(e.to_string()))?;
9444 if let Some(en) = enable_change {
9445 auth_store
9446 .set_user_enabled(&target, en)
9447 .map_err(|e| RedDBError::Query(e.to_string()))?;
9448 }
9449 self.invalidate_result_cache();
9450 tracing::info!(
9451 target: "audit",
9452 principal = %target,
9453 action = "alter_user",
9454 "ALTER USER applied"
9455 );
9456
9457 Ok(RuntimeQueryResult::ok_message(
9458 query.to_string(),
9459 &format!("ALTER USER {} applied", target),
9460 "alter_user",
9461 ))
9462 }
9463
9464 fn execute_create_iam_policy(
9469 &self,
9470 query: &str,
9471 id: &str,
9472 json: &str,
9473 ) -> RedDBResult<RuntimeQueryResult> {
9474 use crate::auth::policies::Policy;
9475
9476 let auth_store = self
9477 .inner
9478 .auth_store
9479 .read()
9480 .clone()
9481 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9482
9483 let mut policy = Policy::from_json_str(json)
9488 .map_err(|e| RedDBError::Query(format!("policy parse: {e}")))?;
9489 if policy.id != id {
9490 policy.id = id.to_string();
9491 }
9492 let pid = policy.id.clone();
9493 auth_store
9494 .put_policy(policy)
9495 .map_err(|e| RedDBError::Query(e.to_string()))?;
9496
9497 let principal = current_auth_identity()
9498 .map(|(u, _)| u)
9499 .unwrap_or_else(|| "anonymous".into());
9500 tracing::info!(
9501 target: "audit",
9502 principal = %principal,
9503 action = "iam:policy.put",
9504 matched_policy_id = %pid,
9505 "CREATE POLICY applied"
9506 );
9507 self.inner.audit_log.record(
9508 "iam/policy.put",
9509 &principal,
9510 &pid,
9511 "ok",
9512 crate::json::Value::Null,
9513 );
9514
9515 self.invalidate_result_cache();
9516 Ok(RuntimeQueryResult::ok_message(
9517 query.to_string(),
9518 &format!("policy `{pid}` stored"),
9519 "create_iam_policy",
9520 ))
9521 }
9522
9523 fn execute_drop_iam_policy(&self, query: &str, id: &str) -> RedDBResult<RuntimeQueryResult> {
9524 let auth_store = self
9525 .inner
9526 .auth_store
9527 .read()
9528 .clone()
9529 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9530 auth_store
9531 .delete_policy(id)
9532 .map_err(|e| RedDBError::Query(e.to_string()))?;
9533
9534 let principal = current_auth_identity()
9535 .map(|(u, _)| u)
9536 .unwrap_or_else(|| "anonymous".into());
9537 tracing::info!(
9538 target: "audit",
9539 principal = %principal,
9540 action = "iam:policy.drop",
9541 matched_policy_id = %id,
9542 "DROP POLICY applied"
9543 );
9544 self.inner.audit_log.record(
9545 "iam/policy.drop",
9546 &principal,
9547 id,
9548 "ok",
9549 crate::json::Value::Null,
9550 );
9551
9552 self.invalidate_result_cache();
9553 Ok(RuntimeQueryResult::ok_message(
9554 query.to_string(),
9555 &format!("policy `{id}` dropped"),
9556 "drop_iam_policy",
9557 ))
9558 }
9559
9560 fn execute_attach_policy(
9561 &self,
9562 query: &str,
9563 policy_id: &str,
9564 principal: &crate::storage::query::ast::PolicyPrincipalRef,
9565 ) -> RedDBResult<RuntimeQueryResult> {
9566 use crate::auth::store::PrincipalRef;
9567 use crate::auth::UserId;
9568 use crate::storage::query::ast::PolicyPrincipalRef;
9569
9570 let auth_store = self
9571 .inner
9572 .auth_store
9573 .read()
9574 .clone()
9575 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9576 let p = match principal {
9577 PolicyPrincipalRef::User(u) => {
9578 PrincipalRef::User(UserId::from_parts(u.tenant.as_deref(), &u.username))
9579 }
9580 PolicyPrincipalRef::Group(g) => PrincipalRef::Group(g.clone()),
9581 };
9582 let pretty_target = principal_label(principal);
9583 auth_store
9584 .attach_policy(p, policy_id)
9585 .map_err(|e| RedDBError::Query(e.to_string()))?;
9586
9587 let principal_str = current_auth_identity()
9588 .map(|(u, _)| u)
9589 .unwrap_or_else(|| "anonymous".into());
9590 tracing::info!(
9591 target: "audit",
9592 principal = %principal_str,
9593 action = "iam:policy.attach",
9594 matched_policy_id = %policy_id,
9595 target = %pretty_target,
9596 "ATTACH POLICY applied"
9597 );
9598 self.inner.audit_log.record(
9599 "iam/policy.attach",
9600 &principal_str,
9601 &pretty_target,
9602 "ok",
9603 crate::json::Value::Null,
9604 );
9605
9606 self.invalidate_result_cache();
9607 Ok(RuntimeQueryResult::ok_message(
9608 query.to_string(),
9609 &format!("policy `{policy_id}` attached to {pretty_target}"),
9610 "attach_policy",
9611 ))
9612 }
9613
9614 fn execute_detach_policy(
9615 &self,
9616 query: &str,
9617 policy_id: &str,
9618 principal: &crate::storage::query::ast::PolicyPrincipalRef,
9619 ) -> RedDBResult<RuntimeQueryResult> {
9620 use crate::auth::store::PrincipalRef;
9621 use crate::auth::UserId;
9622 use crate::storage::query::ast::PolicyPrincipalRef;
9623
9624 let auth_store = self
9625 .inner
9626 .auth_store
9627 .read()
9628 .clone()
9629 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9630 let p = match principal {
9631 PolicyPrincipalRef::User(u) => {
9632 PrincipalRef::User(UserId::from_parts(u.tenant.as_deref(), &u.username))
9633 }
9634 PolicyPrincipalRef::Group(g) => PrincipalRef::Group(g.clone()),
9635 };
9636 let pretty_target = principal_label(principal);
9637 auth_store
9638 .detach_policy(p, policy_id)
9639 .map_err(|e| RedDBError::Query(e.to_string()))?;
9640
9641 let principal_str = current_auth_identity()
9642 .map(|(u, _)| u)
9643 .unwrap_or_else(|| "anonymous".into());
9644 tracing::info!(
9645 target: "audit",
9646 principal = %principal_str,
9647 action = "iam:policy.detach",
9648 matched_policy_id = %policy_id,
9649 target = %pretty_target,
9650 "DETACH POLICY applied"
9651 );
9652 self.inner.audit_log.record(
9653 "iam/policy.detach",
9654 &principal_str,
9655 &pretty_target,
9656 "ok",
9657 crate::json::Value::Null,
9658 );
9659
9660 self.invalidate_result_cache();
9661 Ok(RuntimeQueryResult::ok_message(
9662 query.to_string(),
9663 &format!("policy `{policy_id}` detached from {pretty_target}"),
9664 "detach_policy",
9665 ))
9666 }
9667
9668 fn execute_show_policies(
9669 &self,
9670 query: &str,
9671 filter: Option<&crate::storage::query::ast::PolicyPrincipalRef>,
9672 ) -> RedDBResult<RuntimeQueryResult> {
9673 use crate::auth::UserId;
9674 use crate::storage::query::ast::PolicyPrincipalRef;
9675 use crate::storage::query::unified::UnifiedRecord;
9676 use crate::storage::schema::Value as SchemaValue;
9677 use std::sync::Arc;
9678
9679 let auth_store = self
9680 .inner
9681 .auth_store
9682 .read()
9683 .clone()
9684 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9685
9686 let pols = match filter {
9687 None => auth_store.list_policies(),
9688 Some(PolicyPrincipalRef::User(u)) => {
9689 let id = UserId::from_parts(u.tenant.as_deref(), &u.username);
9690 auth_store.effective_policies(&id)
9691 }
9692 Some(PolicyPrincipalRef::Group(g)) => auth_store.group_policies(g),
9693 };
9694
9695 let mut records = Vec::with_capacity(pols.len());
9696 for p in pols.iter() {
9697 let mut rec = UnifiedRecord::default();
9698 rec.set_arc(Arc::from("id"), SchemaValue::text(p.id.clone()));
9699 rec.set_arc(
9700 Arc::from("statements"),
9701 SchemaValue::Integer(p.statements.len() as i64),
9702 );
9703 rec.set_arc(
9704 Arc::from("tenant"),
9705 p.tenant
9706 .as_deref()
9707 .map(|t| SchemaValue::text(t.to_string()))
9708 .unwrap_or(SchemaValue::Null),
9709 );
9710 rec.set_arc(Arc::from("json"), SchemaValue::text(p.to_json_string()));
9711 records.push(rec);
9712 }
9713 let mut result = crate::storage::query::unified::UnifiedResult::empty();
9714 result.records = records;
9715 Ok(RuntimeQueryResult {
9716 query: query.to_string(),
9717 mode: crate::storage::query::modes::QueryMode::Sql,
9718 statement: "show_policies",
9719 engine: "iam-policies",
9720 result,
9721 affected_rows: 0,
9722 statement_type: "select",
9723 })
9724 }
9725
9726 fn execute_show_effective_permissions(
9727 &self,
9728 query: &str,
9729 user: &crate::storage::query::ast::PolicyUserRef,
9730 resource: Option<&crate::storage::query::ast::PolicyResourceRef>,
9731 ) -> RedDBResult<RuntimeQueryResult> {
9732 use crate::auth::UserId;
9733 use crate::storage::query::unified::UnifiedRecord;
9734 use crate::storage::schema::Value as SchemaValue;
9735 use std::sync::Arc;
9736
9737 let auth_store = self
9738 .inner
9739 .auth_store
9740 .read()
9741 .clone()
9742 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9743 let id = UserId::from_parts(user.tenant.as_deref(), &user.username);
9744 let pols = auth_store.effective_policies(&id);
9745
9746 let mut records = Vec::new();
9749 for p in pols.iter() {
9750 for (idx, st) in p.statements.iter().enumerate() {
9751 if let Some(_r) = resource {
9752 }
9756 let mut rec = UnifiedRecord::default();
9757 rec.set_arc(Arc::from("policy_id"), SchemaValue::text(p.id.clone()));
9758 rec.set_arc(
9759 Arc::from("statement_index"),
9760 SchemaValue::Integer(idx as i64),
9761 );
9762 rec.set_arc(
9763 Arc::from("sid"),
9764 st.sid
9765 .as_deref()
9766 .map(|s| SchemaValue::text(s.to_string()))
9767 .unwrap_or(SchemaValue::Null),
9768 );
9769 rec.set_arc(
9770 Arc::from("effect"),
9771 SchemaValue::text(match st.effect {
9772 crate::auth::policies::Effect::Allow => "allow",
9773 crate::auth::policies::Effect::Deny => "deny",
9774 }),
9775 );
9776 rec.set_arc(
9777 Arc::from("actions"),
9778 SchemaValue::Integer(st.actions.len() as i64),
9779 );
9780 rec.set_arc(
9781 Arc::from("resources"),
9782 SchemaValue::Integer(st.resources.len() as i64),
9783 );
9784 records.push(rec);
9785 }
9786 }
9787 let mut result = crate::storage::query::unified::UnifiedResult::empty();
9788 result.records = records;
9789 Ok(RuntimeQueryResult {
9790 query: query.to_string(),
9791 mode: crate::storage::query::modes::QueryMode::Sql,
9792 statement: "show_effective_permissions",
9793 engine: "iam-policies",
9794 result,
9795 affected_rows: 0,
9796 statement_type: "select",
9797 })
9798 }
9799
9800 fn execute_simulate_policy(
9801 &self,
9802 query: &str,
9803 user: &crate::storage::query::ast::PolicyUserRef,
9804 action: &str,
9805 resource: &crate::storage::query::ast::PolicyResourceRef,
9806 ) -> RedDBResult<RuntimeQueryResult> {
9807 use crate::auth::policies::ResourceRef;
9808 use crate::auth::store::SimCtx;
9809 use crate::auth::UserId;
9810 use crate::storage::query::unified::UnifiedRecord;
9811 use crate::storage::schema::Value as SchemaValue;
9812 use std::sync::Arc;
9813
9814 let auth_store = self
9815 .inner
9816 .auth_store
9817 .read()
9818 .clone()
9819 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
9820 let id = UserId::from_parts(user.tenant.as_deref(), &user.username);
9821 let r = ResourceRef::new(resource.kind.clone(), resource.name.clone());
9822 let outcome = auth_store.simulate(&id, action, &r, SimCtx::default());
9823
9824 let principal_str = current_auth_identity()
9825 .map(|(u, _)| u)
9826 .unwrap_or_else(|| "anonymous".into());
9827 let (decision_str, matched_pid, matched_sid) = decision_to_strings(&outcome.decision);
9828 tracing::info!(
9829 target: "audit",
9830 principal = %principal_str,
9831 action = "iam:policy.simulate",
9832 decision = %decision_str,
9833 matched_policy_id = ?matched_pid,
9834 matched_sid = ?matched_sid,
9835 "SIMULATE issued"
9836 );
9837 self.inner.audit_log.record(
9838 "iam/policy.simulate",
9839 &principal_str,
9840 &id.to_string(),
9841 "ok",
9842 crate::json::Value::Null,
9843 );
9844
9845 let mut rec = UnifiedRecord::default();
9846 rec.set_arc(Arc::from("decision"), SchemaValue::text(decision_str));
9847 rec.set_arc(
9848 Arc::from("matched_policy_id"),
9849 matched_pid
9850 .map(SchemaValue::text)
9851 .unwrap_or(SchemaValue::Null),
9852 );
9853 rec.set_arc(
9854 Arc::from("matched_sid"),
9855 matched_sid
9856 .map(SchemaValue::text)
9857 .unwrap_or(SchemaValue::Null),
9858 );
9859 rec.set_arc(Arc::from("reason"), SchemaValue::text(outcome.reason));
9860 rec.set_arc(
9861 Arc::from("trail_len"),
9862 SchemaValue::Integer(outcome.trail.len() as i64),
9863 );
9864 let mut result = crate::storage::query::unified::UnifiedResult::empty();
9865 result.records = vec![rec];
9866 Ok(RuntimeQueryResult {
9867 query: query.to_string(),
9868 mode: crate::storage::query::modes::QueryMode::Sql,
9869 statement: "simulate_policy",
9870 engine: "iam-policies",
9871 result,
9872 affected_rows: 0,
9873 statement_type: "select",
9874 })
9875 }
9876}
9877
9878fn grant_to_iam_policy(
9883 principal: &crate::auth::privileges::GrantPrincipal,
9884 resource: &crate::auth::privileges::Resource,
9885 actions: &[crate::auth::privileges::Action],
9886 tenant: Option<&str>,
9887) -> Option<crate::auth::policies::Policy> {
9888 use crate::auth::policies::{
9889 compile_action, ActionPattern, Effect, Policy, ResourcePattern, Statement,
9890 };
9891 use crate::auth::privileges::{Action, GrantPrincipal, Resource};
9892
9893 if matches!(principal, GrantPrincipal::Group(_)) {
9894 return None;
9895 }
9896
9897 let now = crate::auth::now_ms();
9898 let id = format!("_grant_{:x}_{:x}", now, std::process::id());
9899
9900 let resource_str = match resource {
9901 Resource::Database => "table:*".to_string(),
9902 Resource::Schema(s) => format!("table:{s}.*"),
9903 Resource::Table { schema, table } => match schema {
9904 Some(s) => format!("table:{s}.{table}"),
9905 None => format!("table:{table}"),
9906 },
9907 Resource::Function { schema, name } => match schema {
9908 Some(s) => format!("function:{s}.{name}"),
9909 None => format!("function:{name}"),
9910 },
9911 };
9912
9913 let action_patterns: Vec<ActionPattern> = if actions.contains(&Action::All) {
9917 vec![ActionPattern::Wildcard]
9918 } else {
9919 actions
9920 .iter()
9921 .map(|a| compile_action(&a.as_str().to_ascii_lowercase()))
9922 .collect()
9923 };
9924 if action_patterns.is_empty() {
9925 return None;
9926 }
9927
9928 let resource_patterns = if resource_str == "*" {
9933 vec![ResourcePattern::Wildcard]
9934 } else if resource_str.contains('*') {
9935 vec![ResourcePattern::Glob(resource_str.clone())]
9936 } else if let Some((kind, name)) = resource_str.split_once(':') {
9937 vec![ResourcePattern::Exact {
9938 kind: kind.to_string(),
9939 name: name.to_string(),
9940 }]
9941 } else {
9942 vec![ResourcePattern::Wildcard]
9943 };
9944
9945 let policy = Policy {
9946 id,
9947 version: 1,
9948 tenant: tenant.map(|t| t.to_string()),
9949 created_at: now,
9950 updated_at: now,
9951 statements: vec![Statement {
9952 sid: None,
9953 effect: Effect::Allow,
9954 actions: action_patterns,
9955 resources: resource_patterns,
9956 condition: None,
9957 }],
9958 };
9959 if policy.validate().is_err() {
9960 return None;
9961 }
9962 Some(policy)
9963}
9964
9965fn legacy_action_to_iam(action: crate::auth::privileges::Action) -> &'static str {
9966 use crate::auth::privileges::Action;
9967 match action {
9968 Action::Select => "select",
9969 Action::Insert => "insert",
9970 Action::Update => "update",
9971 Action::Delete => "delete",
9972 Action::Truncate => "truncate",
9973 Action::References => "references",
9974 Action::Execute => "execute",
9975 Action::Usage => "usage",
9976 Action::All => "*",
9977 }
9978}
9979
9980fn update_set_target_columns(query: &crate::storage::query::ast::UpdateQuery) -> Vec<String> {
9981 let mut columns = Vec::new();
9982 for (column, _) in &query.assignment_exprs {
9983 if !columns.iter().any(|seen| seen == column) {
9984 columns.push(column.clone());
9985 }
9986 }
9987 columns
9988}
9989
9990fn column_access_request_for_table_update(
9991 table_name: &str,
9992 columns: Vec<String>,
9993) -> crate::auth::ColumnAccessRequest {
9994 match table_name.split_once('.') {
9995 Some((schema, table)) => {
9996 crate::auth::ColumnAccessRequest::update(table.to_string(), columns)
9997 .with_schema(schema.to_string())
9998 }
9999 None => crate::auth::ColumnAccessRequest::update(table_name.to_string(), columns),
10000 }
10001}
10002
10003fn column_access_request_for_table_select(
10004 table_name: &str,
10005 columns: Vec<String>,
10006) -> crate::auth::ColumnAccessRequest {
10007 match table_name.split_once('.') {
10008 Some((schema, table)) => {
10009 crate::auth::ColumnAccessRequest::select(table.to_string(), columns)
10010 .with_schema(schema.to_string())
10011 }
10012 None => crate::auth::ColumnAccessRequest::select(table_name.to_string(), columns),
10013 }
10014}
10015
10016fn update_returning_columns_for_policy(
10017 runtime: &RedDBRuntime,
10018 query: &crate::storage::query::ast::UpdateQuery,
10019) -> Option<Vec<String>> {
10020 let items = query.returning.as_ref()?;
10021 let mut columns = Vec::new();
10022 let project_all = items
10023 .iter()
10024 .any(|item| matches!(item, crate::storage::query::ast::ReturningItem::All));
10025 if project_all {
10026 collect_returning_star_columns(runtime, query, &mut columns);
10027 } else {
10028 for item in items {
10029 let crate::storage::query::ast::ReturningItem::Column(column) = item else {
10030 continue;
10031 };
10032 push_returning_policy_column(&mut columns, column);
10033 }
10034 }
10035 (!columns.is_empty()).then_some(columns)
10036}
10037
10038fn collect_returning_star_columns(
10039 runtime: &RedDBRuntime,
10040 query: &crate::storage::query::ast::UpdateQuery,
10041 columns: &mut Vec<String>,
10042) {
10043 let store = runtime.db().store();
10044 let Some(manager) = store.get_collection(&query.table) else {
10045 return;
10046 };
10047 if let Some(schema) = manager.column_schema() {
10048 for column in schema.iter() {
10049 push_returning_policy_column(columns, column);
10050 }
10051 }
10052 for entity in manager.query_all(|_| true) {
10053 if !returning_entity_matches_update_target(&entity, query.target) {
10054 continue;
10055 }
10056 match &entity.data {
10057 crate::storage::EntityData::Row(row) => {
10058 for (column, _) in row.iter_fields() {
10059 push_returning_policy_column(columns, column);
10060 }
10061 }
10062 crate::storage::EntityData::Node(node) => {
10063 push_returning_policy_column(columns, "label");
10064 push_returning_policy_column(columns, "node_type");
10065 for column in node.properties.keys() {
10066 push_returning_policy_column(columns, column);
10067 }
10068 }
10069 crate::storage::EntityData::Edge(edge) => {
10070 push_returning_policy_column(columns, "label");
10071 push_returning_policy_column(columns, "from_rid");
10072 push_returning_policy_column(columns, "to_rid");
10073 push_returning_policy_column(columns, "weight");
10074 for column in edge.properties.keys() {
10075 push_returning_policy_column(columns, column);
10076 }
10077 }
10078 _ => {}
10079 }
10080 }
10081}
10082
10083fn push_returning_policy_column(columns: &mut Vec<String>, column: &str) {
10084 if returning_public_envelope_column(column) {
10085 return;
10086 }
10087 if !columns.iter().any(|seen| seen == column) {
10088 columns.push(column.to_string());
10089 }
10090}
10091
10092fn returning_public_envelope_column(column: &str) -> bool {
10093 matches!(
10094 column.to_ascii_lowercase().as_str(),
10095 "rid" | "collection" | "kind" | "tenant" | "created_at" | "updated_at" | "red_entity_id"
10096 )
10097}
10098
10099fn returning_entity_matches_update_target(
10100 entity: &crate::storage::UnifiedEntity,
10101 target: crate::storage::query::ast::UpdateTarget,
10102) -> bool {
10103 use crate::storage::query::ast::UpdateTarget;
10104 match target {
10105 UpdateTarget::Rows => {
10106 matches!(returning_row_item_kind(entity), Some(ReturningRowKind::Row))
10107 }
10108 UpdateTarget::Documents => {
10109 matches!(
10110 returning_row_item_kind(entity),
10111 Some(ReturningRowKind::Document)
10112 )
10113 }
10114 UpdateTarget::Kv => matches!(returning_row_item_kind(entity), Some(ReturningRowKind::Kv)),
10115 UpdateTarget::Nodes => matches!(
10116 (&entity.kind, &entity.data),
10117 (
10118 crate::storage::EntityKind::GraphNode(_),
10119 crate::storage::EntityData::Node(_)
10120 )
10121 ),
10122 UpdateTarget::Edges => matches!(
10123 (&entity.kind, &entity.data),
10124 (
10125 crate::storage::EntityKind::GraphEdge(_),
10126 crate::storage::EntityData::Edge(_)
10127 )
10128 ),
10129 }
10130}
10131
10132#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10133enum ReturningRowKind {
10134 Row,
10135 Document,
10136 Kv,
10137}
10138
10139fn returning_row_item_kind(entity: &crate::storage::UnifiedEntity) -> Option<ReturningRowKind> {
10140 let row = entity.data.as_row()?;
10141 let is_kv = row.iter_fields().all(|(column, _)| {
10142 column.eq_ignore_ascii_case("key") || column.eq_ignore_ascii_case("value")
10143 });
10144 if is_kv {
10145 return Some(ReturningRowKind::Kv);
10146 }
10147 let is_document = row
10148 .iter_fields()
10149 .any(|(_, value)| matches!(value, crate::storage::schema::Value::Json(_)));
10150 if is_document {
10151 Some(ReturningRowKind::Document)
10152 } else {
10153 Some(ReturningRowKind::Row)
10154 }
10155}
10156
10157fn requested_table_columns_for_policy(
10158 table: &crate::storage::query::ast::TableQuery,
10159) -> Vec<String> {
10160 use crate::storage::query::sql_lowering::{
10161 effective_table_filter, effective_table_group_by_exprs, effective_table_having_filter,
10162 effective_table_projections,
10163 };
10164
10165 let table_name = table.table.as_str();
10166 let table_alias = table.alias.as_deref();
10167 let mut columns = std::collections::BTreeSet::new();
10168
10169 for projection in effective_table_projections(table) {
10170 collect_projection_columns(&projection, table_name, table_alias, &mut columns);
10171 }
10172 if let Some(filter) = effective_table_filter(table) {
10173 collect_filter_columns(&filter, table_name, table_alias, &mut columns);
10174 }
10175 for expr in effective_table_group_by_exprs(table) {
10176 collect_expr_columns(&expr, table_name, table_alias, &mut columns);
10177 }
10178 if let Some(filter) = effective_table_having_filter(table) {
10179 collect_filter_columns(&filter, table_name, table_alias, &mut columns);
10180 }
10181 for order in &table.order_by {
10182 if let Some(expr) = order.expr.as_ref() {
10183 collect_expr_columns(expr, table_name, table_alias, &mut columns);
10184 } else {
10185 collect_field_ref_column(&order.field, table_name, table_alias, &mut columns);
10186 }
10187 }
10188
10189 columns.into_iter().collect()
10190}
10191
10192fn collect_projection_columns(
10193 projection: &crate::storage::query::ast::Projection,
10194 table_name: &str,
10195 table_alias: Option<&str>,
10196 columns: &mut std::collections::BTreeSet<String>,
10197) {
10198 use crate::storage::query::ast::Projection;
10199 match projection {
10200 Projection::All => {
10201 columns.insert("*".to_string());
10202 }
10203 Projection::Column(column) | Projection::Alias(column, _) => {
10204 if column != "*" {
10205 columns.insert(column.clone());
10206 }
10207 }
10208 Projection::Function(_, args) => {
10209 for arg in args {
10210 collect_projection_columns(arg, table_name, table_alias, columns);
10211 }
10212 }
10213 Projection::Expression(filter, _) => {
10214 collect_filter_columns(filter, table_name, table_alias, columns);
10215 }
10216 Projection::Field(field, _) => {
10217 collect_field_ref_column(field, table_name, table_alias, columns);
10218 }
10219 }
10220}
10221
10222fn collect_filter_columns(
10223 filter: &crate::storage::query::ast::Filter,
10224 table_name: &str,
10225 table_alias: Option<&str>,
10226 columns: &mut std::collections::BTreeSet<String>,
10227) {
10228 use crate::storage::query::ast::Filter;
10229 match filter {
10230 Filter::Compare { field, .. }
10231 | Filter::IsNull(field)
10232 | Filter::IsNotNull(field)
10233 | Filter::In { field, .. }
10234 | Filter::Between { field, .. }
10235 | Filter::Like { field, .. }
10236 | Filter::StartsWith { field, .. }
10237 | Filter::EndsWith { field, .. }
10238 | Filter::Contains { field, .. } => {
10239 collect_field_ref_column(field, table_name, table_alias, columns);
10240 }
10241 Filter::CompareFields { left, right, .. } => {
10242 collect_field_ref_column(left, table_name, table_alias, columns);
10243 collect_field_ref_column(right, table_name, table_alias, columns);
10244 }
10245 Filter::CompareExpr { lhs, rhs, .. } => {
10246 collect_expr_columns(lhs, table_name, table_alias, columns);
10247 collect_expr_columns(rhs, table_name, table_alias, columns);
10248 }
10249 Filter::And(left, right) | Filter::Or(left, right) => {
10250 collect_filter_columns(left, table_name, table_alias, columns);
10251 collect_filter_columns(right, table_name, table_alias, columns);
10252 }
10253 Filter::Not(inner) => collect_filter_columns(inner, table_name, table_alias, columns),
10254 }
10255}
10256
10257fn collect_expr_columns(
10258 expr: &crate::storage::query::ast::Expr,
10259 table_name: &str,
10260 table_alias: Option<&str>,
10261 columns: &mut std::collections::BTreeSet<String>,
10262) {
10263 use crate::storage::query::ast::Expr;
10264 match expr {
10265 Expr::Column { field, .. } => {
10266 collect_field_ref_column(field, table_name, table_alias, columns);
10267 }
10268 Expr::Literal { .. } | Expr::Parameter { .. } => {}
10269 Expr::UnaryOp { operand, .. } | Expr::Cast { inner: operand, .. } => {
10270 collect_expr_columns(operand, table_name, table_alias, columns);
10271 }
10272 Expr::BinaryOp { lhs, rhs, .. } => {
10273 collect_expr_columns(lhs, table_name, table_alias, columns);
10274 collect_expr_columns(rhs, table_name, table_alias, columns);
10275 }
10276 Expr::FunctionCall { args, .. } => {
10277 for arg in args {
10278 collect_expr_columns(arg, table_name, table_alias, columns);
10279 }
10280 }
10281 Expr::Case {
10282 branches, else_, ..
10283 } => {
10284 for (condition, value) in branches {
10285 collect_expr_columns(condition, table_name, table_alias, columns);
10286 collect_expr_columns(value, table_name, table_alias, columns);
10287 }
10288 if let Some(value) = else_ {
10289 collect_expr_columns(value, table_name, table_alias, columns);
10290 }
10291 }
10292 Expr::IsNull { operand, .. } => {
10293 collect_expr_columns(operand, table_name, table_alias, columns);
10294 }
10295 Expr::InList { target, values, .. } => {
10296 collect_expr_columns(target, table_name, table_alias, columns);
10297 for value in values {
10298 collect_expr_columns(value, table_name, table_alias, columns);
10299 }
10300 }
10301 Expr::Between {
10302 target, low, high, ..
10303 } => {
10304 collect_expr_columns(target, table_name, table_alias, columns);
10305 collect_expr_columns(low, table_name, table_alias, columns);
10306 collect_expr_columns(high, table_name, table_alias, columns);
10307 }
10308 Expr::Subquery { .. } => {}
10309 }
10310}
10311
10312fn collect_field_ref_column(
10313 field: &crate::storage::query::ast::FieldRef,
10314 table_name: &str,
10315 table_alias: Option<&str>,
10316 columns: &mut std::collections::BTreeSet<String>,
10317) {
10318 if let Some(column) = policy_column_name_from_field_ref(field, table_name, table_alias) {
10319 if column != "*" {
10320 columns.insert(column);
10321 }
10322 }
10323}
10324
10325fn policy_column_name_from_field_ref(
10326 field: &crate::storage::query::ast::FieldRef,
10327 table_name: &str,
10328 table_alias: Option<&str>,
10329) -> Option<String> {
10330 match field {
10331 crate::storage::query::ast::FieldRef::TableColumn { table, column } => {
10332 if column == "*" {
10333 return Some("*".to_string());
10334 }
10335 if table.is_empty() || table == table_name || Some(table.as_str()) == table_alias {
10336 Some(column.clone())
10337 } else {
10338 Some(format!("{table}.{column}"))
10339 }
10340 }
10341 _ => None,
10342 }
10343}
10344
10345fn legacy_resource_to_iam(
10346 resource: &crate::auth::privileges::Resource,
10347 tenant: Option<&str>,
10348) -> crate::auth::policies::ResourceRef {
10349 use crate::auth::privileges::Resource;
10350
10351 let (kind, name) = match resource {
10352 Resource::Database => ("database".to_string(), "*".to_string()),
10353 Resource::Schema(s) => ("schema".to_string(), format!("{s}.*")),
10354 Resource::Table { schema, table } => (
10355 "table".to_string(),
10356 match schema {
10357 Some(s) => format!("{s}.{table}"),
10358 None => table.clone(),
10359 },
10360 ),
10361 Resource::Function { schema, name } => (
10362 "function".to_string(),
10363 match schema {
10364 Some(s) => format!("{s}.{name}"),
10365 None => name.clone(),
10366 },
10367 ),
10368 };
10369
10370 let mut out = crate::auth::policies::ResourceRef::new(kind, name);
10371 if let Some(t) = tenant {
10372 out = out.with_tenant(t.to_string());
10373 }
10374 out
10375}
10376
10377#[derive(Debug)]
10378struct JoinTableSide {
10379 table: String,
10380 alias: String,
10381}
10382
10383fn table_side_context(expr: &QueryExpr) -> Option<JoinTableSide> {
10384 match expr {
10385 QueryExpr::Table(table) => Some(JoinTableSide {
10386 table: table.table.clone(),
10387 alias: table.alias.clone().unwrap_or_else(|| table.table.clone()),
10388 }),
10389 _ => None,
10390 }
10391}
10392
10393fn collect_projection_columns_for_table(
10394 projection: &Projection,
10395 table: &str,
10396 alias: Option<&str>,
10397 out: &mut BTreeSet<String>,
10398) {
10399 match projection {
10400 Projection::Column(column) | Projection::Alias(column, _) => {
10401 match split_qualified_column(column) {
10402 Some((qualifier, column))
10403 if qualifier == table || alias.is_some_and(|alias| qualifier == alias) =>
10404 {
10405 push_policy_column(column, out);
10406 }
10407 Some(_) => {}
10408 None => push_policy_column(column, out),
10409 }
10410 }
10411 Projection::Field(
10412 FieldRef::TableColumn {
10413 table: qualifier,
10414 column,
10415 },
10416 _,
10417 ) => {
10418 if qualifier.is_empty()
10419 || qualifier == table
10420 || alias.is_some_and(|alias| qualifier == alias)
10421 {
10422 push_policy_column(column, out);
10423 }
10424 }
10425 Projection::Field(
10426 FieldRef::NodeProperty {
10427 alias: qualifier,
10428 property,
10429 },
10430 _,
10431 )
10432 | Projection::Field(
10433 FieldRef::EdgeProperty {
10434 alias: qualifier,
10435 property,
10436 },
10437 _,
10438 ) => {
10439 if qualifier == table || alias.is_some_and(|alias| qualifier == alias) {
10440 push_policy_column(property, out);
10441 }
10442 }
10443 Projection::Function(_, args) => {
10444 for arg in args {
10445 collect_projection_columns_for_table(arg, table, alias, out);
10446 }
10447 }
10448 Projection::Expression(_, _) | Projection::All | Projection::Field(_, _) => {}
10449 }
10450}
10451
10452fn collect_projection_columns_for_join_side(
10453 projection: &Projection,
10454 left: Option<&JoinTableSide>,
10455 right: Option<&JoinTableSide>,
10456 out: &mut HashMap<String, BTreeSet<String>>,
10457) -> RedDBResult<()> {
10458 match projection {
10459 Projection::Column(column) | Projection::Alias(column, _) => {
10460 if let Some((qualifier, column)) = split_qualified_column(column) {
10461 push_qualified_join_column(qualifier, column, left, right, out);
10462 } else {
10463 push_unqualified_join_column(column, left, right, out);
10464 }
10465 }
10466 Projection::Field(FieldRef::TableColumn { table, column }, _) => {
10467 if table.is_empty() {
10468 push_unqualified_join_column(column, left, right, out);
10469 } else if let Some(side) = [left, right]
10470 .into_iter()
10471 .flatten()
10472 .find(|side| table == side.table.as_str() || table == side.alias.as_str())
10473 {
10474 push_join_column(&side.table, column, out);
10475 }
10476 }
10477 Projection::Field(FieldRef::NodeProperty { alias, property }, _)
10478 | Projection::Field(FieldRef::EdgeProperty { alias, property }, _) => {
10479 push_qualified_join_column(alias, property, left, right, out);
10480 }
10481 Projection::Function(_, args) => {
10482 for arg in args {
10483 collect_projection_columns_for_join_side(arg, left, right, out)?;
10484 }
10485 }
10486 Projection::Expression(_, _) | Projection::All | Projection::Field(_, _) => {}
10487 }
10488 Ok(())
10489}
10490
10491fn split_qualified_column(column: &str) -> Option<(&str, &str)> {
10492 let (qualifier, column) = column.split_once('.')?;
10493 if qualifier.is_empty() || column.is_empty() || column.contains('.') {
10494 return None;
10495 }
10496 Some((qualifier, column))
10497}
10498
10499fn push_qualified_join_column(
10500 qualifier: &str,
10501 column: &str,
10502 left: Option<&JoinTableSide>,
10503 right: Option<&JoinTableSide>,
10504 out: &mut HashMap<String, BTreeSet<String>>,
10505) {
10506 if let Some(side) = [left, right]
10507 .into_iter()
10508 .flatten()
10509 .find(|side| qualifier == side.table.as_str() || qualifier == side.alias.as_str())
10510 {
10511 push_join_column(&side.table, column, out);
10512 }
10513}
10514
10515fn push_unqualified_join_column(
10516 column: &str,
10517 left: Option<&JoinTableSide>,
10518 right: Option<&JoinTableSide>,
10519 out: &mut HashMap<String, BTreeSet<String>>,
10520) {
10521 for side in [left, right].into_iter().flatten() {
10522 push_join_column(&side.table, column, out);
10523 }
10524}
10525
10526fn push_join_column(table: &str, column: &str, out: &mut HashMap<String, BTreeSet<String>>) {
10527 if is_policy_column_name(column) {
10528 out.entry(table.to_string())
10529 .or_default()
10530 .insert(column.to_string());
10531 }
10532}
10533
10534fn push_policy_column(column: &str, out: &mut BTreeSet<String>) {
10535 if is_policy_column_name(column) {
10536 out.insert(column.to_string());
10537 }
10538}
10539
10540fn is_policy_column_name(column: &str) -> bool {
10541 !column.is_empty()
10542 && column != "*"
10543 && !column.starts_with("LIT:")
10544 && !column.starts_with("TYPE:")
10545}
10546
10547fn runtime_iam_context(
10548 role: crate::auth::Role,
10549 tenant: Option<&str>,
10550) -> crate::auth::policies::EvalContext {
10551 crate::auth::policies::EvalContext {
10552 principal_tenant: tenant.map(|t| t.to_string()),
10553 current_tenant: tenant.map(|t| t.to_string()),
10554 peer_ip: None,
10555 mfa_present: false,
10556 now_ms: crate::auth::now_ms(),
10557 principal_is_admin_role: role == crate::auth::Role::Admin,
10558 }
10559}
10560
10561fn explicit_table_projection_columns(
10562 query: &crate::storage::query::ast::TableQuery,
10563) -> Vec<String> {
10564 use crate::storage::query::ast::{FieldRef, Projection};
10565
10566 let mut columns = Vec::new();
10567 for projection in crate::storage::query::sql_lowering::effective_table_projections(query) {
10568 match projection {
10569 Projection::Column(column) | Projection::Alias(column, _) => {
10570 push_unique(&mut columns, column)
10571 }
10572 Projection::Field(FieldRef::TableColumn { column, .. }, _) => {
10573 push_unique(&mut columns, column)
10574 }
10575 _ => {}
10579 }
10580 }
10581 columns
10582}
10583
10584fn explicit_graph_projection_properties(
10585 query: &crate::storage::query::ast::GraphQuery,
10586) -> Vec<String> {
10587 use crate::storage::query::ast::{FieldRef, Projection};
10588
10589 let mut columns = Vec::new();
10590 for projection in &query.return_ {
10591 match projection {
10592 Projection::Field(FieldRef::NodeProperty { property, .. }, _)
10593 | Projection::Field(FieldRef::EdgeProperty { property, .. }, _) => {
10594 push_unique(&mut columns, property.clone())
10595 }
10596 _ => {}
10597 }
10598 }
10599 columns
10600}
10601
10602fn push_unique(columns: &mut Vec<String>, column: String) {
10603 if !columns.iter().any(|existing| existing == &column) {
10604 columns.push(column);
10605 }
10606}
10607
10608fn principal_label(p: &crate::storage::query::ast::PolicyPrincipalRef) -> String {
10609 use crate::storage::query::ast::PolicyPrincipalRef;
10610 match p {
10611 PolicyPrincipalRef::User(u) => match &u.tenant {
10612 Some(t) => format!("user:{t}/{}", u.username),
10613 None => format!("user:{}", u.username),
10614 },
10615 PolicyPrincipalRef::Group(g) => format!("group:{g}"),
10616 }
10617}
10618
10619pub(crate) fn decision_to_strings(
10622 d: &crate::auth::policies::Decision,
10623) -> (String, Option<String>, Option<String>) {
10624 use crate::auth::policies::Decision;
10625 match d {
10626 Decision::Allow {
10627 matched_policy_id,
10628 matched_sid,
10629 } => (
10630 "allow".into(),
10631 Some(matched_policy_id.clone()),
10632 matched_sid.clone(),
10633 ),
10634 Decision::Deny {
10635 matched_policy_id,
10636 matched_sid,
10637 } => (
10638 "deny".into(),
10639 Some(matched_policy_id.clone()),
10640 matched_sid.clone(),
10641 ),
10642 Decision::DefaultDeny => ("default_deny".into(), None, None),
10643 Decision::AdminBypass => ("admin_bypass".into(), None, None),
10644 }
10645}
10646
10647fn relation_scopes_for_query(query: &QueryExpr) -> Vec<String> {
10648 let mut scopes = Vec::new();
10649 collect_relation_scopes(query, &mut scopes);
10650 scopes.sort();
10651 scopes.dedup();
10652 scopes
10653}
10654
10655fn collect_relation_scopes(query: &QueryExpr, scopes: &mut Vec<String>) {
10656 match query {
10657 QueryExpr::Table(table) => {
10658 if !table.table.is_empty() {
10659 scopes.push(table.table.clone());
10660 }
10661 if let Some(alias) = &table.alias {
10662 scopes.push(alias.clone());
10663 }
10664 }
10665 QueryExpr::Join(join) => {
10666 collect_relation_scopes(&join.left, scopes);
10667 collect_relation_scopes(&join.right, scopes);
10668 }
10669 _ => {}
10670 }
10671}
10672
10673fn query_references_outer_scope(query: &QueryExpr, outer_scopes: &[String]) -> bool {
10674 let inner_scopes = relation_scopes_for_query(query);
10675 query_expr_references_outer_scope(query, outer_scopes, &inner_scopes)
10676}
10677
10678fn query_expr_references_outer_scope(
10679 query: &QueryExpr,
10680 outer_scopes: &[String],
10681 inner_scopes: &[String],
10682) -> bool {
10683 match query {
10684 QueryExpr::Table(table) => {
10685 table.select_items.iter().any(|item| match item {
10686 crate::storage::query::ast::SelectItem::Wildcard => false,
10687 crate::storage::query::ast::SelectItem::Expr { expr, .. } => {
10688 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
10689 }
10690 }) || table
10691 .where_expr
10692 .as_ref()
10693 .is_some_and(|expr| expr_references_outer_scope(expr, outer_scopes, inner_scopes))
10694 || table.filter.as_ref().is_some_and(|filter| {
10695 filter_references_outer_scope(filter, outer_scopes, inner_scopes)
10696 })
10697 || table.having_expr.as_ref().is_some_and(|expr| {
10698 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
10699 })
10700 || table.having.as_ref().is_some_and(|filter| {
10701 filter_references_outer_scope(filter, outer_scopes, inner_scopes)
10702 })
10703 || table
10704 .group_by_exprs
10705 .iter()
10706 .any(|expr| expr_references_outer_scope(expr, outer_scopes, inner_scopes))
10707 || table.order_by.iter().any(|clause| {
10708 clause.expr.as_ref().is_some_and(|expr| {
10709 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
10710 })
10711 })
10712 }
10713 QueryExpr::Join(join) => {
10714 query_expr_references_outer_scope(&join.left, outer_scopes, inner_scopes)
10715 || query_expr_references_outer_scope(&join.right, outer_scopes, inner_scopes)
10716 || join.filter.as_ref().is_some_and(|filter| {
10717 filter_references_outer_scope(filter, outer_scopes, inner_scopes)
10718 })
10719 || join.return_items.iter().any(|item| match item {
10720 crate::storage::query::ast::SelectItem::Wildcard => false,
10721 crate::storage::query::ast::SelectItem::Expr { expr, .. } => {
10722 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
10723 }
10724 })
10725 }
10726 _ => false,
10727 }
10728}
10729
10730fn filter_references_outer_scope(
10731 filter: &crate::storage::query::ast::Filter,
10732 outer_scopes: &[String],
10733 inner_scopes: &[String],
10734) -> bool {
10735 use crate::storage::query::ast::Filter;
10736 match filter {
10737 Filter::Compare { field, .. }
10738 | Filter::IsNull(field)
10739 | Filter::IsNotNull(field)
10740 | Filter::In { field, .. }
10741 | Filter::Between { field, .. }
10742 | Filter::Like { field, .. }
10743 | Filter::StartsWith { field, .. }
10744 | Filter::EndsWith { field, .. }
10745 | Filter::Contains { field, .. } => {
10746 field_ref_references_outer_scope(field, outer_scopes, inner_scopes)
10747 }
10748 Filter::CompareFields { left, right, .. } => {
10749 field_ref_references_outer_scope(left, outer_scopes, inner_scopes)
10750 || field_ref_references_outer_scope(right, outer_scopes, inner_scopes)
10751 }
10752 Filter::CompareExpr { lhs, rhs, .. } => {
10753 expr_references_outer_scope(lhs, outer_scopes, inner_scopes)
10754 || expr_references_outer_scope(rhs, outer_scopes, inner_scopes)
10755 }
10756 Filter::And(left, right) | Filter::Or(left, right) => {
10757 filter_references_outer_scope(left, outer_scopes, inner_scopes)
10758 || filter_references_outer_scope(right, outer_scopes, inner_scopes)
10759 }
10760 Filter::Not(inner) => filter_references_outer_scope(inner, outer_scopes, inner_scopes),
10761 }
10762}
10763
10764fn expr_references_outer_scope(
10765 expr: &crate::storage::query::ast::Expr,
10766 outer_scopes: &[String],
10767 inner_scopes: &[String],
10768) -> bool {
10769 use crate::storage::query::ast::Expr;
10770 match expr {
10771 Expr::Column { field, .. } => {
10772 field_ref_references_outer_scope(field, outer_scopes, inner_scopes)
10773 }
10774 Expr::BinaryOp { lhs, rhs, .. } => {
10775 expr_references_outer_scope(lhs, outer_scopes, inner_scopes)
10776 || expr_references_outer_scope(rhs, outer_scopes, inner_scopes)
10777 }
10778 Expr::UnaryOp { operand, .. }
10779 | Expr::Cast { inner: operand, .. }
10780 | Expr::IsNull { operand, .. } => {
10781 expr_references_outer_scope(operand, outer_scopes, inner_scopes)
10782 }
10783 Expr::FunctionCall { args, .. } => args
10784 .iter()
10785 .any(|arg| expr_references_outer_scope(arg, outer_scopes, inner_scopes)),
10786 Expr::Case {
10787 branches, else_, ..
10788 } => {
10789 branches.iter().any(|(cond, value)| {
10790 expr_references_outer_scope(cond, outer_scopes, inner_scopes)
10791 || expr_references_outer_scope(value, outer_scopes, inner_scopes)
10792 }) || else_
10793 .as_ref()
10794 .is_some_and(|expr| expr_references_outer_scope(expr, outer_scopes, inner_scopes))
10795 }
10796 Expr::InList { target, values, .. } => {
10797 expr_references_outer_scope(target, outer_scopes, inner_scopes)
10798 || values
10799 .iter()
10800 .any(|value| expr_references_outer_scope(value, outer_scopes, inner_scopes))
10801 }
10802 Expr::Between {
10803 target, low, high, ..
10804 } => {
10805 expr_references_outer_scope(target, outer_scopes, inner_scopes)
10806 || expr_references_outer_scope(low, outer_scopes, inner_scopes)
10807 || expr_references_outer_scope(high, outer_scopes, inner_scopes)
10808 }
10809 Expr::Subquery { query, .. } => query_references_outer_scope(&query.query, inner_scopes),
10810 Expr::Literal { .. } | Expr::Parameter { .. } => false,
10811 }
10812}
10813
10814fn field_ref_references_outer_scope(
10815 field: &crate::storage::query::ast::FieldRef,
10816 outer_scopes: &[String],
10817 inner_scopes: &[String],
10818) -> bool {
10819 match field {
10820 crate::storage::query::ast::FieldRef::TableColumn { table, .. } if !table.is_empty() => {
10821 outer_scopes.iter().any(|scope| scope == table)
10822 && !inner_scopes.iter().any(|scope| scope == table)
10823 }
10824 _ => false,
10825 }
10826}
10827
10828fn first_column_values(
10829 result: crate::storage::query::unified::UnifiedResult,
10830) -> RedDBResult<Vec<Value>> {
10831 if result.columns.len() > 1 {
10832 return Err(RedDBError::Query(
10833 "expression subquery must return exactly one column".to_string(),
10834 ));
10835 }
10836 let fallback_column = result
10837 .records
10838 .first()
10839 .and_then(|record| record.column_names().into_iter().next())
10840 .map(|name| name.to_string());
10841 let column = result.columns.first().cloned().or(fallback_column);
10842 let Some(column) = column else {
10843 return Ok(Vec::new());
10844 };
10845 Ok(result
10846 .records
10847 .iter()
10848 .map(|record| record.get(column.as_str()).cloned().unwrap_or(Value::Null))
10849 .collect())
10850}
10851
10852fn parse_timestamp_to_ms(s: &str) -> Option<u128> {
10853 if let Ok(n) = s.parse::<u128>() {
10855 return Some(n);
10856 }
10857 if let Some(date) = s.split_whitespace().next() {
10861 let parts: Vec<&str> = date.split('-').collect();
10862 if parts.len() == 3 {
10863 let (y, m, d) = (parts[0], parts[1], parts[2]);
10864 if let (Ok(y), Ok(m), Ok(d)) = (y.parse::<i64>(), m.parse::<u32>(), d.parse::<u32>()) {
10865 let days_in = days_from_civil(y, m, d);
10869 return Some((days_in as u128) * 86_400_000u128);
10870 }
10871 }
10872 }
10873 None
10874}
10875
10876fn days_from_civil(y: i64, m: u32, d: u32) -> i64 {
10879 let y = if m <= 2 { y - 1 } else { y };
10880 let era = if y >= 0 { y } else { y - 399 } / 400;
10881 let yoe = (y - era * 400) as u64; let doy = (153 * (if m > 2 { m - 3 } else { m + 9 }) as u64 + 2) / 5 + d as u64 - 1;
10883 let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
10884 era * 146097 + doe as i64 - 719468
10885}
10886
10887fn walk_plan_node(
10888 node: &crate::storage::query::planner::CanonicalLogicalNode,
10889 depth: usize,
10890 out: &mut Vec<crate::storage::query::unified::UnifiedRecord>,
10891) {
10892 use std::sync::Arc;
10893 let mut rec = crate::storage::query::unified::UnifiedRecord::default();
10894 rec.set_arc(Arc::from("op"), Value::text(node.operator.clone()));
10895 rec.set_arc(
10896 Arc::from("source"),
10897 node.source.clone().map(Value::text).unwrap_or(Value::Null),
10898 );
10899 rec.set_arc(Arc::from("est_rows"), Value::Float(node.estimated_rows));
10900 rec.set_arc(Arc::from("est_cost"), Value::Float(node.operator_cost));
10901 rec.set_arc(Arc::from("depth"), Value::Integer(depth as i64));
10902 out.push(rec);
10903 for child in &node.children {
10904 walk_plan_node(child, depth + 1, out);
10905 }
10906}