1use super::*;
2use crate::application::entity::metadata_to_json;
3use crate::auth::column_policy_gate::ColumnAccessRequest;
4use crate::auth::UserId;
5use crate::replication::cdc::ChangeRecord;
6use crate::replication::logical::{ApplyMode, LogicalChangeApplier};
7use crate::storage::query::ast::TableSource;
8
9thread_local! {
10 static CURRENT_CONN_ID: std::cell::Cell<u64> = const { std::cell::Cell::new(0) };
14
15 static CURRENT_AUTH_IDENTITY: std::cell::RefCell<Option<(String, crate::auth::Role)>> =
23 const { std::cell::RefCell::new(None) };
24
25 static CURRENT_SNAPSHOT: std::cell::RefCell<Option<SnapshotContext>> =
35 const { std::cell::RefCell::new(None) };
36
37 static HAS_SNAPSHOT: std::cell::Cell<bool> = const { std::cell::Cell::new(false) };
43
44 static CURRENT_TENANT_ID: std::cell::RefCell<Option<String>> =
54 const { std::cell::RefCell::new(None) };
55
56 static CURRENT_CONFIG_RESOLVER: std::cell::RefCell<Option<ConfigResolver>> =
60 const { std::cell::RefCell::new(None) };
61
62 static CURRENT_SECRET_RESOLVER: std::cell::RefCell<Option<SecretResolver>> =
66 const { std::cell::RefCell::new(None) };
67}
68
69fn secret_sql_value_to_string(value: &Value) -> RedDBResult<String> {
70 match value {
71 Value::Text(s) => Ok(s.to_string()),
72 Value::Integer(n) => Ok(n.to_string()),
73 Value::UnsignedInteger(n) => Ok(n.to_string()),
74 Value::Float(n) => Ok(n.to_string()),
75 Value::Boolean(b) => Ok(b.to_string()),
76 Value::Null => Err(RedDBError::Query(
77 "SET SECRET key = NULL deletes the secret; use DELETE SECRET for explicit deletes"
78 .to_string(),
79 )),
80 Value::Password(_) | Value::Secret(_) => Err(RedDBError::Query(
81 "SET SECRET accepts plain scalar literals; PASSWORD() and SECRET() are for typed columns"
82 .to_string(),
83 )),
84 _ => Err(RedDBError::Query(format!(
85 "SET SECRET does not support value type {:?} yet",
86 value.data_type()
87 ))),
88 }
89}
90
91#[derive(Clone)]
92struct QueryControlEventSpec {
93 kind: crate::runtime::control_events::EventKind,
94 action: &'static str,
95 resource: Option<String>,
96 fields: Vec<(String, crate::runtime::control_events::Sensitivity)>,
97}
98
99#[derive(Clone)]
100struct QueryAuditPlan {
101 statement_kind: &'static str,
102 collections: Vec<String>,
103}
104
105fn query_audit_plan(expr: &QueryExpr) -> Option<QueryAuditPlan> {
106 let mut collections = Vec::new();
107 let statement_kind = match expr {
108 QueryExpr::Table(table) => {
109 push_query_audit_collection(&mut collections, &table.table);
110 "select"
111 }
112 QueryExpr::Join(join) => {
113 collect_query_audit_collections(&join.left, &mut collections);
114 collect_query_audit_collections(&join.right, &mut collections);
115 "select"
116 }
117 QueryExpr::Insert(insert) => {
118 push_query_audit_collection(&mut collections, &insert.table);
119 "insert"
120 }
121 QueryExpr::Update(update) => {
122 push_query_audit_collection(&mut collections, &update.table);
123 "update"
124 }
125 QueryExpr::Delete(delete) => {
126 push_query_audit_collection(&mut collections, &delete.table);
127 "delete"
128 }
129 _ => return None,
130 };
131 if collections.is_empty() {
132 None
133 } else {
134 Some(QueryAuditPlan {
135 statement_kind,
136 collections,
137 })
138 }
139}
140
141fn collect_query_audit_collections(expr: &QueryExpr, collections: &mut Vec<String>) {
142 match expr {
143 QueryExpr::Table(table) => push_query_audit_collection(collections, &table.table),
144 QueryExpr::Join(join) => {
145 collect_query_audit_collections(&join.left, collections);
146 collect_query_audit_collections(&join.right, collections);
147 }
148 _ => {}
149 }
150}
151
152fn push_query_audit_collection(collections: &mut Vec<String>, name: &str) {
153 if name == "red" || name.starts_with("red.") || name.starts_with("__red_schema_") {
154 return;
155 }
156 if !collections.iter().any(|existing| existing == name) {
157 collections.push(name.to_string());
158 }
159}
160
161fn query_control_event_specs(expr: &QueryExpr) -> Vec<QueryControlEventSpec> {
162 use crate::runtime::control_events::{EventKind, Sensitivity};
163
164 let mut specs = Vec::new();
165 let mut schema = |action: &'static str, resource: Option<String>| {
166 specs.push(QueryControlEventSpec {
167 kind: EventKind::SchemaDdl,
168 action,
169 resource,
170 fields: Vec::new(),
171 });
172 };
173 match expr {
174 QueryExpr::CreateTable(q) => {
175 schema("create_table", Some(format!("table:{}", q.name)));
176 if let Some(column) = &q.tenant_by {
177 specs.push(QueryControlEventSpec {
178 kind: EventKind::TenantGovernance,
179 action: "create_table_tenant_by",
180 resource: Some(format!("table:{}", q.name)),
181 fields: vec![("tenant_column".to_string(), Sensitivity::raw(column))],
182 });
183 }
184 }
185 QueryExpr::CreateCollection(q) => {
186 schema("create_collection", Some(format!("collection:{}", q.name)));
187 }
188 QueryExpr::CreateVector(q) => schema("create_vector", Some(format!("vector:{}", q.name))),
189 QueryExpr::DropTable(q) => schema("drop_table", Some(format!("table:{}", q.name))),
190 QueryExpr::DropGraph(q) => schema("drop_graph", Some(format!("graph:{}", q.name))),
191 QueryExpr::DropVector(q) => schema("drop_vector", Some(format!("vector:{}", q.name))),
192 QueryExpr::DropDocument(q) => {
193 schema("drop_document", Some(format!("document:{}", q.name)));
194 }
195 QueryExpr::DropKv(q) => schema("drop_kv", Some(format!("kv:{}", q.name))),
196 QueryExpr::DropCollection(q) => {
197 schema("drop_collection", Some(format!("collection:{}", q.name)));
198 }
199 QueryExpr::Truncate(q) => schema("truncate", Some(format!("collection:{}", q.name))),
200 QueryExpr::AlterTable(q) => {
201 schema("alter_table", Some(format!("table:{}", q.name)));
202 for op in &q.operations {
203 match op {
204 crate::storage::query::ast::AlterOperation::EnableRowLevelSecurity => {
205 specs.push(QueryControlEventSpec {
206 kind: EventKind::RlsGovernance,
207 action: "enable_rls",
208 resource: Some(format!("table:{}", q.name)),
209 fields: Vec::new(),
210 });
211 }
212 crate::storage::query::ast::AlterOperation::DisableRowLevelSecurity => {
213 specs.push(QueryControlEventSpec {
214 kind: EventKind::RlsGovernance,
215 action: "disable_rls",
216 resource: Some(format!("table:{}", q.name)),
217 fields: Vec::new(),
218 });
219 }
220 crate::storage::query::ast::AlterOperation::EnableTenancy { column } => {
221 specs.push(QueryControlEventSpec {
222 kind: EventKind::TenantGovernance,
223 action: "enable_tenancy",
224 resource: Some(format!("table:{}", q.name)),
225 fields: vec![("tenant_column".to_string(), Sensitivity::raw(column))],
226 });
227 }
228 crate::storage::query::ast::AlterOperation::DisableTenancy => {
229 specs.push(QueryControlEventSpec {
230 kind: EventKind::TenantGovernance,
231 action: "disable_tenancy",
232 resource: Some(format!("table:{}", q.name)),
233 fields: Vec::new(),
234 });
235 }
236 _ => {}
237 }
238 }
239 }
240 QueryExpr::CreateIndex(q) => {
241 schema(
242 "create_index",
243 Some(format!("index:{}:{}", q.table, q.name)),
244 );
245 }
246 QueryExpr::DropIndex(q) => {
247 schema("drop_index", Some(format!("index:{}:{}", q.table, q.name)));
248 }
249 QueryExpr::CreateTimeSeries(q) => {
250 schema("create_timeseries", Some(format!("timeseries:{}", q.name)));
251 }
252 QueryExpr::DropTimeSeries(q) => {
253 schema("drop_timeseries", Some(format!("timeseries:{}", q.name)));
254 }
255 QueryExpr::CreateQueue(q) => schema("create_queue", Some(format!("queue:{}", q.name))),
256 QueryExpr::AlterQueue(q) => schema("alter_queue", Some(format!("queue:{}", q.name))),
257 QueryExpr::DropQueue(q) => schema("drop_queue", Some(format!("queue:{}", q.name))),
258 QueryExpr::CreateTree(q) => {
259 schema(
260 "create_tree",
261 Some(format!("tree:{}:{}", q.collection, q.name)),
262 );
263 }
264 QueryExpr::DropTree(q) => {
265 schema(
266 "drop_tree",
267 Some(format!("tree:{}:{}", q.collection, q.name)),
268 );
269 }
270 QueryExpr::CreateSchema(q) => schema("create_schema", Some(format!("schema:{}", q.name))),
271 QueryExpr::DropSchema(q) => schema("drop_schema", Some(format!("schema:{}", q.name))),
272 QueryExpr::CreateSequence(q) => {
273 schema("create_sequence", Some(format!("sequence:{}", q.name)));
274 }
275 QueryExpr::DropSequence(q) => schema("drop_sequence", Some(format!("sequence:{}", q.name))),
276 QueryExpr::CreateView(q) => schema("create_view", Some(format!("view:{}", q.name))),
277 QueryExpr::DropView(q) => schema("drop_view", Some(format!("view:{}", q.name))),
278 QueryExpr::RefreshMaterializedView(q) => {
279 schema(
280 "refresh_materialized_view",
281 Some(format!("view:{}", q.name)),
282 );
283 }
284 QueryExpr::CreatePolicy(q) => {
285 specs.push(QueryControlEventSpec {
286 kind: EventKind::RlsGovernance,
287 action: "create_policy",
288 resource: Some(format!("table:{}:policy:{}", q.table, q.name)),
289 fields: vec![(
290 "target_kind".to_string(),
291 Sensitivity::raw(q.target_kind.as_ident()),
292 )],
293 });
294 }
295 QueryExpr::DropPolicy(q) => {
296 specs.push(QueryControlEventSpec {
297 kind: EventKind::RlsGovernance,
298 action: "drop_policy",
299 resource: Some(format!("table:{}:policy:{}", q.table, q.name)),
300 fields: Vec::new(),
301 });
302 }
303 QueryExpr::SetTenant(value) => {
304 let mut fields = Vec::new();
305 if let Some(value) = value {
306 fields.push(("tenant".to_string(), Sensitivity::raw(value)));
307 }
308 specs.push(QueryControlEventSpec {
309 kind: EventKind::TenantGovernance,
310 action: "set_tenant",
311 resource: Some("tenant:session".to_string()),
312 fields,
313 });
314 }
315 QueryExpr::SetConfig { key, .. } => {
316 specs.push(QueryControlEventSpec {
317 kind: EventKind::ConfigWrite,
318 action: "config:write",
319 resource: Some(format!("config:{key}")),
320 fields: vec![("key".to_string(), Sensitivity::raw(key))],
321 });
322 }
323 QueryExpr::ConfigCommand(cmd) => match cmd {
324 crate::storage::query::ast::ConfigCommand::Put {
325 collection, key, ..
326 }
327 | crate::storage::query::ast::ConfigCommand::Rotate {
328 collection, key, ..
329 } => {
330 let target = format!("{collection}/{key}");
331 specs.push(QueryControlEventSpec {
332 kind: EventKind::ConfigWrite,
333 action: "config:write",
334 resource: Some(format!("config:{target}")),
335 fields: vec![
336 ("collection".to_string(), Sensitivity::raw(collection)),
337 ("key".to_string(), Sensitivity::raw(key)),
338 ],
339 });
340 }
341 crate::storage::query::ast::ConfigCommand::Delete { collection, key } => {
342 let target = format!("{collection}/{key}");
343 specs.push(QueryControlEventSpec {
344 kind: EventKind::ConfigDelete,
345 action: "config:write",
346 resource: Some(format!("config:{target}")),
347 fields: vec![
348 ("collection".to_string(), Sensitivity::raw(collection)),
349 ("key".to_string(), Sensitivity::raw(key)),
350 ],
351 });
352 }
353 _ => {}
354 },
355 QueryExpr::AlterUser(stmt) => {
356 let disables = stmt.attributes.iter().any(|attr| {
357 matches!(
358 attr,
359 crate::storage::query::ast::AlterUserAttribute::Disable
360 )
361 });
362 specs.push(QueryControlEventSpec {
363 kind: if disables {
364 EventKind::UserDisable
365 } else {
366 EventKind::UserUpdate
367 },
368 action: "alter_user",
369 resource: Some(format!("user:{}", stmt.username)),
370 fields: Vec::new(),
371 });
372 }
373 _ => {}
374 }
375 specs
376}
377
378fn control_event_outcome_for_error(err: &RedDBError) -> crate::runtime::control_events::Outcome {
379 match err {
380 RedDBError::ReadOnly(_) => crate::runtime::control_events::Outcome::Denied,
381 RedDBError::Query(msg)
382 if msg.contains("permission denied")
383 || msg.contains("cannot issue")
384 || msg.contains("lacks") =>
385 {
386 crate::runtime::control_events::Outcome::Denied
387 }
388 _ => crate::runtime::control_events::Outcome::Error,
389 }
390}
391
392fn view_records_to_entities(
401 table: &str,
402 records: &[crate::storage::query::unified::UnifiedRecord],
403) -> Vec<crate::storage::UnifiedEntity> {
404 use std::collections::HashMap;
405 let table_arc: std::sync::Arc<str> = std::sync::Arc::from(table);
406 let mut out = Vec::with_capacity(records.len());
407 for record in records {
408 let mut named: HashMap<String, crate::storage::schema::Value> = HashMap::new();
409 for (name, value) in record.iter_fields() {
410 named.insert(name.to_string(), value.clone());
411 }
412 let entity = crate::storage::UnifiedEntity::new(
413 crate::storage::EntityId::new(0),
414 crate::storage::EntityKind::TableRow {
415 table: std::sync::Arc::clone(&table_arc),
416 row_id: 0,
417 },
418 crate::storage::EntityData::Row(crate::storage::RowData {
419 columns: Vec::new(),
420 named: Some(named),
421 schema: None,
422 }),
423 );
424 out.push(entity);
425 }
426 out
427}
428
429fn system_keyed_collection_contract(
430 name: &str,
431 model: crate::catalog::CollectionModel,
432) -> crate::physical::CollectionContract {
433 let now = crate::utils::now_unix_millis() as u128;
434 crate::physical::CollectionContract {
435 name: name.to_string(),
436 declared_model: model,
437 schema_mode: crate::catalog::SchemaMode::Dynamic,
438 origin: crate::physical::ContractOrigin::Implicit,
439 version: 1,
440 created_at_unix_ms: now,
441 updated_at_unix_ms: now,
442 default_ttl_ms: None,
443 vector_dimension: None,
444 vector_metric: None,
445 context_index_fields: Vec::new(),
446 declared_columns: Vec::new(),
447 table_def: None,
448 timestamps_enabled: false,
449 context_index_enabled: false,
450 metrics_raw_retention_ms: None,
451 metrics_rollup_policies: Vec::new(),
452 metrics_tenant_identity: None,
453 metrics_namespace: None,
454 append_only: false,
455 subscriptions: Vec::new(),
456 session_key: None,
457 session_gap_ms: None,
458 retention_duration_ms: None,
459 }
460}
461
462#[derive(Clone)]
477pub struct SnapshotContext {
478 pub snapshot: crate::storage::transaction::snapshot::Snapshot,
479 pub manager: Arc<crate::storage::transaction::snapshot::SnapshotManager>,
480 pub own_xids: std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
481 pub requires_index_fallback: bool,
482}
483
484pub fn set_current_connection_id(id: u64) {
493 CURRENT_CONN_ID.with(|c| c.set(id));
494}
495
496pub fn clear_current_connection_id() {
498 CURRENT_CONN_ID.with(|c| c.set(0));
499}
500
501pub fn current_connection_id() -> u64 {
504 CURRENT_CONN_ID.with(|c| c.get())
505}
506
507pub fn set_current_auth_identity(username: String, role: crate::auth::Role) {
511 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = Some((username, role)));
512}
513
514pub fn clear_current_auth_identity() {
518 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = None);
519}
520
521pub(crate) fn current_auth_identity() -> Option<(String, crate::auth::Role)> {
524 CURRENT_AUTH_IDENTITY.with(|cell| cell.borrow().clone())
525}
526
527pub fn current_auth_identity_for_audit() -> Option<(String, crate::auth::Role)> {
531 current_auth_identity()
532}
533
534pub fn set_current_tenant(tenant_id: String) {
539 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = Some(tenant_id));
540}
541
542pub fn clear_current_tenant() {
545 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = None);
546}
547
548pub fn current_tenant() -> Option<String> {
559 let inherited = CURRENT_TENANT_ID.with(|cell| cell.borrow().clone());
560 if let Some(over) = current_scope_override() {
561 if over.tenant.is_active() {
562 return over.tenant.resolve(inherited);
563 }
564 }
565 if let Some(tx_local) = current_tx_local_tenant() {
566 return tx_local;
567 }
568 inherited
569}
570
571thread_local! {
572 static TX_LOCAL_TENANT: std::cell::RefCell<Option<Option<String>>> =
581 const { std::cell::RefCell::new(None) };
582}
583
584fn current_tx_local_tenant() -> Option<Option<String>> {
585 TX_LOCAL_TENANT.with(|cell| cell.borrow().clone())
586}
587
588fn parse_set_local_tenant(query: &str) -> RedDBResult<Option<Option<String>>> {
594 let mut tokens = query.split_ascii_whitespace();
595 let Some(w1) = tokens.next() else {
596 return Ok(None);
597 };
598 if !w1.eq_ignore_ascii_case("SET") {
599 return Ok(None);
600 }
601 let Some(w2) = tokens.next() else {
602 return Ok(None);
603 };
604 if !w2.eq_ignore_ascii_case("LOCAL") {
605 return Ok(None);
606 }
607 let Some(w3) = tokens.next() else {
608 return Ok(None);
609 };
610 if !w3.eq_ignore_ascii_case("TENANT") {
611 return Ok(None);
612 }
613 let rest: String = tokens.collect::<Vec<_>>().join(" ");
614 let rest = rest.trim().trim_end_matches(';').trim();
615 let value_str = rest.strip_prefix('=').map(|s| s.trim()).unwrap_or(rest);
616 if value_str.is_empty() {
617 return Err(RedDBError::Query(
618 "SET LOCAL TENANT expects a string literal or NULL".to_string(),
619 ));
620 }
621 if value_str.eq_ignore_ascii_case("NULL") {
622 return Ok(Some(None));
623 }
624 if value_str.starts_with('\'') && value_str.ends_with('\'') && value_str.len() >= 2 {
625 let inner = &value_str[1..value_str.len() - 1];
626 return Ok(Some(Some(inner.to_string())));
627 }
628 Err(RedDBError::Query(format!(
629 "SET LOCAL TENANT expects a string literal or NULL, got `{value_str}`"
630 )))
631}
632
633pub(crate) struct TxLocalTenantGuard;
634
635impl TxLocalTenantGuard {
636 pub fn install(value: Option<Option<String>>) -> Self {
637 TX_LOCAL_TENANT.with(|cell| *cell.borrow_mut() = value);
638 Self
639 }
640}
641
642impl Drop for TxLocalTenantGuard {
643 fn drop(&mut self) {
644 TX_LOCAL_TENANT.with(|cell| *cell.borrow_mut() = None);
645 }
646}
647
648thread_local! {
649 static SCOPE_OVERRIDES: std::cell::RefCell<Vec<crate::runtime::within_clause::ScopeOverride>> =
656 const { std::cell::RefCell::new(Vec::new()) };
657}
658
659pub(crate) fn push_scope_override(over: crate::runtime::within_clause::ScopeOverride) {
660 SCOPE_OVERRIDES.with(|cell| cell.borrow_mut().push(over));
661}
662
663pub(crate) fn pop_scope_override() {
664 SCOPE_OVERRIDES.with(|cell| {
665 cell.borrow_mut().pop();
666 });
667}
668
669pub(crate) fn current_scope_override() -> Option<crate::runtime::within_clause::ScopeOverride> {
670 SCOPE_OVERRIDES.with(|cell| cell.borrow().last().cloned())
671}
672
673pub(crate) fn has_scope_override_active() -> bool {
677 SCOPE_OVERRIDES.with(|cell| !cell.borrow().is_empty())
678}
679
680pub(crate) struct ScopeOverrideGuard;
684
685impl ScopeOverrideGuard {
686 pub fn install(over: crate::runtime::within_clause::ScopeOverride) -> Self {
687 push_scope_override(over);
688 Self
689 }
690}
691
692impl Drop for ScopeOverrideGuard {
693 fn drop(&mut self) {
694 pop_scope_override();
695 }
696}
697
698pub(crate) fn current_user_projected() -> Option<String> {
704 let inherited = current_auth_identity().map(|(u, _)| u);
705 if let Some(over) = current_scope_override() {
706 if over.user.is_active() {
707 return over.user.resolve(inherited);
708 }
709 }
710 inherited
711}
712
713pub(crate) fn current_role_projected() -> Option<String> {
714 let inherited = current_auth_identity().map(|(_, r)| format!("{r:?}").to_lowercase());
715 if let Some(over) = current_scope_override() {
716 if over.role.is_active() {
717 return over.role.resolve(inherited);
718 }
719 }
720 inherited
721}
722
723pub(crate) fn current_secret_value(path: &str) -> Option<String> {
724 let key = path.to_ascii_lowercase();
725 CURRENT_SECRET_RESOLVER.with(|cell| {
726 let mut resolver = cell.borrow_mut();
727 let resolver = resolver.as_mut()?;
728 if resolver.values.is_none() {
729 resolver.values = resolver
730 .store
731 .as_ref()
732 .map(|store| store.vault_kv_snapshot());
733 }
734 let values = resolver.values.as_ref()?;
735 values.get(&key).cloned().or_else(|| {
736 key.strip_prefix("red.vault/").and_then(|rest| {
737 values
738 .get(rest)
739 .cloned()
740 .or_else(|| values.get(&format!("red.secret.{rest}")).cloned())
741 })
742 })
743 })
744}
745
746struct SecretResolver {
747 store: Option<Arc<crate::auth::store::AuthStore>>,
748 values: Option<HashMap<String, String>>,
749}
750
751pub(super) struct SecretStoreGuard {
752 previous: Option<SecretResolver>,
753}
754
755impl SecretStoreGuard {
756 pub(super) fn install(store: Option<Arc<crate::auth::store::AuthStore>>) -> Self {
757 let previous = CURRENT_SECRET_RESOLVER.with(|cell| {
758 cell.replace(Some(SecretResolver {
759 store,
760 values: None,
761 }))
762 });
763 Self { previous }
764 }
765}
766
767impl Drop for SecretStoreGuard {
768 fn drop(&mut self) {
769 let previous = self.previous.take();
770 CURRENT_SECRET_RESOLVER.with(|cell| {
771 cell.replace(previous);
772 });
773 }
774}
775
776pub(crate) fn current_config_value(path: &str) -> Option<Value> {
777 let key = path.to_ascii_lowercase();
778 CURRENT_CONFIG_RESOLVER.with(|cell| {
779 let mut resolver = cell.borrow_mut();
780 let resolver = resolver.as_mut()?;
781 if resolver.values.is_none() {
782 resolver.values = Some(latest_config_snapshot(&resolver.db));
783 }
784 let values = resolver.values.as_ref()?;
785 values.get(&key).cloned().or_else(|| {
786 key.strip_prefix("red.config/")
787 .and_then(|rest| values.get(&format!("red.config.{rest}")).cloned())
788 })
789 })
790}
791
792fn update_current_config_value(path: &str, value: Value) {
793 let key = path.to_ascii_lowercase();
794 CURRENT_CONFIG_RESOLVER.with(|cell| {
795 if let Some(resolver) = cell.borrow_mut().as_mut() {
796 if let Some(values) = resolver.values.as_mut() {
797 values.insert(key, value);
798 }
799 }
800 });
801}
802
803fn update_current_secret_value(path: &str, value: Option<String>) {
804 let key = path.to_ascii_lowercase();
805 CURRENT_SECRET_RESOLVER.with(|cell| {
806 if let Some(resolver) = cell.borrow_mut().as_mut() {
807 let Some(values) = resolver.values.as_mut() else {
808 return;
809 };
810 match value {
811 Some(value) => {
812 values.insert(key, value);
813 }
814 None => {
815 values.remove(&key);
816 }
817 }
818 }
819 });
820}
821
822fn latest_config_snapshot(db: &RedDB) -> HashMap<String, Value> {
823 let mut latest: HashMap<String, (u64, Value)> = HashMap::new();
824
825 if let Some(manager) = db.store().get_collection("red_config") {
826 manager.for_each_entity(|entity| {
827 let Some(row) = entity.data.as_row() else {
828 return true;
829 };
830 let Some(Value::Text(key)) = row.get_field("key") else {
831 return true;
832 };
833 let value = row.get_field("value").cloned().unwrap_or(Value::Null);
834 let id = entity.id.raw();
835 let key = key.to_ascii_lowercase();
836 insert_latest_config_value(&mut latest, key.clone(), id, value.clone());
837 if let Some(rest) = key.strip_prefix("red.config.") {
838 insert_latest_config_value(&mut latest, format!("red.config/{rest}"), id, value);
839 }
840 true
841 });
842 }
843
844 if let Some(manager) = db.store().get_collection("red.config") {
845 manager.for_each_entity(|entity| {
846 let Some(row) = entity.data.as_row() else {
847 return true;
848 };
849 if matches!(row.get_field("tombstone"), Some(Value::Boolean(true))) {
850 return true;
851 }
852 let Some(Value::Text(key)) = row.get_field("key") else {
853 return true;
854 };
855 let value = row.get_field("value").cloned().unwrap_or(Value::Null);
856 insert_latest_config_value(
857 &mut latest,
858 format!("red.config/{}", key.to_ascii_lowercase()),
859 entity.id.raw(),
860 value,
861 );
862 true
863 });
864 }
865
866 latest
867 .into_iter()
868 .map(|(key, (_, value))| (key, value))
869 .collect()
870}
871
872fn insert_latest_config_value(
873 latest: &mut HashMap<String, (u64, Value)>,
874 key: String,
875 id: u64,
876 value: Value,
877) {
878 match latest.get(&key) {
879 Some((prev_id, _)) if *prev_id > id => {}
880 _ => {
881 latest.insert(key, (id, value));
882 }
883 }
884}
885
886struct ConfigResolver {
887 db: Arc<RedDB>,
888 values: Option<HashMap<String, Value>>,
889}
890
891pub(super) struct ConfigSnapshotGuard {
892 previous: Option<ConfigResolver>,
893}
894
895impl ConfigSnapshotGuard {
896 pub(super) fn install(db: Arc<RedDB>) -> Self {
897 let previous = CURRENT_CONFIG_RESOLVER
898 .with(|cell| cell.replace(Some(ConfigResolver { db, values: None })));
899 Self { previous }
900 }
901}
902
903impl Drop for ConfigSnapshotGuard {
904 fn drop(&mut self) {
905 let previous = self.previous.take();
906 CURRENT_CONFIG_RESOLVER.with(|cell| {
907 cell.replace(previous);
908 });
909 }
910}
911
912pub fn set_current_snapshot(ctx: SnapshotContext) {
917 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = Some(ctx));
918 HAS_SNAPSHOT.with(|c| c.set(true));
919}
920
921pub fn clear_current_snapshot() {
922 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = None);
923 HAS_SNAPSHOT.with(|c| c.set(false));
924}
925
926pub(crate) struct CurrentSnapshotGuard {
932 previous: Option<SnapshotContext>,
933}
934
935impl CurrentSnapshotGuard {
936 pub(crate) fn install(ctx: SnapshotContext) -> Self {
937 let previous = CURRENT_SNAPSHOT.with(|cell| cell.borrow().clone());
938 set_current_snapshot(ctx);
939 Self { previous }
940 }
941}
942
943impl Drop for CurrentSnapshotGuard {
944 fn drop(&mut self) {
945 let prev = self.previous.take();
946 let has = prev.is_some();
947 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = prev);
948 HAS_SNAPSHOT.with(|c| c.set(has));
949 }
950}
951
952#[inline]
963pub fn entity_visible_under_current_snapshot(
964 entity: &crate::storage::unified::entity::UnifiedEntity,
965) -> bool {
966 if !HAS_SNAPSHOT.with(|c| c.get()) {
972 return entity.xmax == 0;
973 }
974 CURRENT_SNAPSHOT.with(|cell| {
975 let guard = cell.borrow();
976 let Some(ctx) = guard.as_ref() else {
977 return true;
978 };
979 visibility_check(ctx, entity.xmin, entity.xmax)
980 })
981}
982
983#[inline]
988pub(crate) fn xids_visible_under_current_snapshot(xmin: u64, xmax: u64) -> bool {
989 if !HAS_SNAPSHOT.with(|c| c.get()) {
990 return true;
991 }
992 CURRENT_SNAPSHOT.with(|cell| {
993 let guard = cell.borrow();
994 let Some(ctx) = guard.as_ref() else {
995 return true;
996 };
997 visibility_check(ctx, xmin, xmax)
998 })
999}
1000
1001pub fn capture_current_snapshot() -> Option<SnapshotContext> {
1008 CURRENT_SNAPSHOT.with(|cell| cell.borrow().clone())
1009}
1010
1011pub(crate) fn current_snapshot_requires_index_fallback() -> bool {
1016 if !HAS_SNAPSHOT.with(|c| c.get()) {
1017 return false;
1018 }
1019 CURRENT_SNAPSHOT.with(|cell| {
1020 cell.borrow()
1021 .as_ref()
1022 .is_some_and(|ctx| ctx.requires_index_fallback)
1023 })
1024}
1025
1026#[derive(Clone, Default)]
1041pub struct SnapshotBundle {
1042 pub snapshot: Option<SnapshotContext>,
1043 pub auth: Option<(String, crate::auth::Role)>,
1044 pub tenant: Option<String>,
1045}
1046
1047pub fn snapshot_bundle() -> SnapshotBundle {
1050 SnapshotBundle {
1051 snapshot: capture_current_snapshot(),
1052 auth: current_auth_identity(),
1053 tenant: CURRENT_TENANT_ID.with(|cell| cell.borrow().clone()),
1054 }
1055}
1056
1057pub fn with_snapshot_bundle<R>(bundle: &SnapshotBundle, f: impl FnOnce() -> R) -> R {
1062 struct Guard {
1063 prev_snapshot: Option<SnapshotContext>,
1064 prev_auth: Option<(String, crate::auth::Role)>,
1065 prev_tenant: Option<String>,
1066 }
1067 impl Drop for Guard {
1068 fn drop(&mut self) {
1069 let snap = self.prev_snapshot.take();
1070 let has = snap.is_some();
1071 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = snap);
1072 HAS_SNAPSHOT.with(|c| c.set(has));
1073 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = self.prev_auth.take());
1074 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = self.prev_tenant.take());
1075 }
1076 }
1077
1078 let _guard = {
1079 let prev_snapshot = CURRENT_SNAPSHOT.with(|cell| cell.borrow().clone());
1080 let prev_auth = CURRENT_AUTH_IDENTITY.with(|cell| cell.borrow().clone());
1081 let prev_tenant = CURRENT_TENANT_ID.with(|cell| cell.borrow().clone());
1082
1083 match bundle.snapshot.clone() {
1084 Some(ctx) => set_current_snapshot(ctx),
1085 None => clear_current_snapshot(),
1086 }
1087 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = bundle.auth.clone());
1088 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = bundle.tenant.clone());
1089
1090 Guard {
1091 prev_snapshot,
1092 prev_auth,
1093 prev_tenant,
1094 }
1095 };
1096 f()
1097}
1098
1099#[inline]
1103pub fn entity_visible_with_context(
1104 ctx: Option<&SnapshotContext>,
1105 entity: &crate::storage::unified::entity::UnifiedEntity,
1106) -> bool {
1107 match ctx {
1108 Some(ctx) => visibility_check(ctx, entity.xmin, entity.xmax),
1109 None => true,
1110 }
1111}
1112
1113fn table_row_index_fields(
1114 entity: &crate::storage::unified::entity::UnifiedEntity,
1115) -> Vec<(String, crate::storage::schema::Value)> {
1116 let crate::storage::EntityData::Row(row) = &entity.data else {
1117 return Vec::new();
1118 };
1119 if let Some(named) = &row.named {
1120 return named
1121 .iter()
1122 .map(|(name, value)| (name.clone(), value.clone()))
1123 .collect();
1124 }
1125 if let Some(schema) = &row.schema {
1126 return schema
1127 .iter()
1128 .zip(row.columns.iter())
1129 .map(|(name, value)| (name.clone(), value.clone()))
1130 .collect();
1131 }
1132 Vec::new()
1133}
1134
1135#[inline]
1136fn visibility_check(ctx: &SnapshotContext, xmin: u64, xmax: u64) -> bool {
1137 if xmin != 0 && ctx.manager.is_aborted(xmin) {
1141 return false;
1142 }
1143 let effective_xmax = if xmax != 0 && ctx.manager.is_aborted(xmax) {
1145 0
1146 } else {
1147 xmax
1148 };
1149 let own_xmin = xmin != 0 && ctx.own_xids.contains(&xmin);
1153 let own_xmax = effective_xmax != 0 && ctx.own_xids.contains(&effective_xmax);
1154 if own_xmax {
1155 return false;
1157 }
1158 if own_xmin {
1159 return true;
1160 }
1161 ctx.snapshot.sees(xmin, effective_xmax)
1162}
1163
1164fn runtime_pool_lock(runtime: &RedDBRuntime) -> std::sync::MutexGuard<'_, PoolState> {
1165 runtime
1166 .inner
1167 .pool
1168 .lock()
1169 .unwrap_or_else(|poisoned| poisoned.into_inner())
1170}
1171
1172fn cache_scope_insert(scopes: &mut HashSet<String>, name: &str) {
1173 if name.is_empty() || name.starts_with("__subq_") || is_universal_query_source(name) {
1174 return;
1175 }
1176 scopes.insert(name.to_string());
1177}
1178
1179fn collect_table_source_scopes(scopes: &mut HashSet<String>, query: &TableQuery) {
1180 match query.source.as_ref() {
1181 Some(crate::storage::query::ast::TableSource::Name(name)) => {
1182 cache_scope_insert(scopes, name)
1183 }
1184 Some(crate::storage::query::ast::TableSource::Subquery(subquery)) => {
1185 collect_query_expr_result_cache_scopes(scopes, subquery);
1186 }
1187 None => cache_scope_insert(scopes, &query.table),
1188 }
1189}
1190
1191fn collect_vector_source_scopes(
1192 scopes: &mut HashSet<String>,
1193 source: &crate::storage::query::ast::VectorSource,
1194) {
1195 match source {
1196 crate::storage::query::ast::VectorSource::Reference { collection, .. } => {
1197 cache_scope_insert(scopes, collection);
1198 }
1199 crate::storage::query::ast::VectorSource::Subquery(subquery) => {
1200 collect_query_expr_result_cache_scopes(scopes, subquery);
1201 }
1202 crate::storage::query::ast::VectorSource::Literal(_)
1203 | crate::storage::query::ast::VectorSource::Text(_) => {}
1204 }
1205}
1206
1207fn collect_path_selector_scopes(
1208 scopes: &mut HashSet<String>,
1209 selector: &crate::storage::query::ast::NodeSelector,
1210) {
1211 if let crate::storage::query::ast::NodeSelector::ByRow { table, .. } = selector {
1212 cache_scope_insert(scopes, table);
1213 }
1214}
1215
1216fn collect_query_expr_result_cache_scopes(scopes: &mut HashSet<String>, expr: &QueryExpr) {
1217 match expr {
1218 QueryExpr::Table(query) => collect_table_source_scopes(scopes, query),
1219 QueryExpr::Join(query) => {
1220 collect_query_expr_result_cache_scopes(scopes, &query.left);
1221 collect_query_expr_result_cache_scopes(scopes, &query.right);
1222 }
1223 QueryExpr::Path(query) => {
1224 collect_path_selector_scopes(scopes, &query.from);
1225 collect_path_selector_scopes(scopes, &query.to);
1226 }
1227 QueryExpr::Vector(query) => {
1228 cache_scope_insert(scopes, &query.collection);
1229 collect_vector_source_scopes(scopes, &query.query_vector);
1230 }
1231 QueryExpr::Hybrid(query) => {
1232 collect_query_expr_result_cache_scopes(scopes, &query.structured);
1233 cache_scope_insert(scopes, &query.vector.collection);
1234 collect_vector_source_scopes(scopes, &query.vector.query_vector);
1235 }
1236 QueryExpr::Insert(query) => cache_scope_insert(scopes, &query.table),
1237 QueryExpr::Update(query) => cache_scope_insert(scopes, &query.table),
1238 QueryExpr::Delete(query) => cache_scope_insert(scopes, &query.table),
1239 QueryExpr::CreateTable(query) => cache_scope_insert(scopes, &query.name),
1240 QueryExpr::CreateCollection(query) => cache_scope_insert(scopes, &query.name),
1241 QueryExpr::CreateVector(query) => cache_scope_insert(scopes, &query.name),
1242 QueryExpr::DropTable(query) => cache_scope_insert(scopes, &query.name),
1243 QueryExpr::DropGraph(query) => cache_scope_insert(scopes, &query.name),
1244 QueryExpr::DropVector(query) => cache_scope_insert(scopes, &query.name),
1245 QueryExpr::DropDocument(query) => cache_scope_insert(scopes, &query.name),
1246 QueryExpr::DropKv(query) => cache_scope_insert(scopes, &query.name),
1247 QueryExpr::DropCollection(query) => cache_scope_insert(scopes, &query.name),
1248 QueryExpr::Truncate(query) => cache_scope_insert(scopes, &query.name),
1249 QueryExpr::AlterTable(query) => cache_scope_insert(scopes, &query.name),
1250 QueryExpr::CreateIndex(query) => cache_scope_insert(scopes, &query.table),
1251 QueryExpr::DropIndex(query) => cache_scope_insert(scopes, &query.table),
1252 QueryExpr::CreateTimeSeries(query) => cache_scope_insert(scopes, &query.name),
1253 QueryExpr::DropTimeSeries(query) => cache_scope_insert(scopes, &query.name),
1254 QueryExpr::CreateQueue(query) => cache_scope_insert(scopes, &query.name),
1255 QueryExpr::AlterQueue(query) => cache_scope_insert(scopes, &query.name),
1256 QueryExpr::DropQueue(query) => cache_scope_insert(scopes, &query.name),
1257 QueryExpr::QueueSelect(query) => cache_scope_insert(scopes, &query.queue),
1258 QueryExpr::QueueCommand(query) => match query {
1259 QueueCommand::Push { queue, .. }
1260 | QueueCommand::Pop { queue, .. }
1261 | QueueCommand::Peek { queue, .. }
1262 | QueueCommand::Len { queue }
1263 | QueueCommand::Purge { queue }
1264 | QueueCommand::GroupCreate { queue, .. }
1265 | QueueCommand::GroupRead { queue, .. }
1266 | QueueCommand::Pending { queue, .. }
1267 | QueueCommand::Claim { queue, .. }
1268 | QueueCommand::Ack { queue, .. }
1269 | QueueCommand::Nack { queue, .. } => cache_scope_insert(scopes, queue),
1270 QueueCommand::Move {
1271 source,
1272 destination,
1273 ..
1274 } => {
1275 cache_scope_insert(scopes, source);
1276 cache_scope_insert(scopes, destination);
1277 }
1278 },
1279 QueryExpr::EventsBackfill(query) => {
1280 cache_scope_insert(scopes, &query.collection);
1281 cache_scope_insert(scopes, &query.target_queue);
1282 }
1283 QueryExpr::CreateTree(query) => cache_scope_insert(scopes, &query.collection),
1284 QueryExpr::DropTree(query) => cache_scope_insert(scopes, &query.collection),
1285 QueryExpr::TreeCommand(query) => match query {
1286 TreeCommand::Insert { collection, .. }
1287 | TreeCommand::Move { collection, .. }
1288 | TreeCommand::Delete { collection, .. }
1289 | TreeCommand::Validate { collection, .. }
1290 | TreeCommand::Rebalance { collection, .. } => cache_scope_insert(scopes, collection),
1291 },
1292 QueryExpr::SearchCommand(query) => match query {
1293 SearchCommand::Similar { collection, .. }
1294 | SearchCommand::Hybrid { collection, .. }
1295 | SearchCommand::SpatialRadius { collection, .. }
1296 | SearchCommand::SpatialBbox { collection, .. }
1297 | SearchCommand::SpatialNearest { collection, .. } => {
1298 cache_scope_insert(scopes, collection);
1299 }
1300 SearchCommand::Text { collection, .. }
1301 | SearchCommand::Multimodal { collection, .. }
1302 | SearchCommand::Index { collection, .. }
1303 | SearchCommand::Context { collection, .. } => {
1304 if let Some(collection) = collection.as_deref() {
1305 cache_scope_insert(scopes, collection);
1306 }
1307 }
1308 },
1309 QueryExpr::Ask(query) => {
1310 if let Some(collection) = query.collection.as_deref() {
1311 cache_scope_insert(scopes, collection);
1312 }
1313 }
1314 QueryExpr::ExplainAlter(query) => cache_scope_insert(scopes, &query.target.name),
1315 QueryExpr::MaintenanceCommand(cmd) => match cmd {
1316 crate::storage::query::ast::MaintenanceCommand::Vacuum { target, .. }
1317 | crate::storage::query::ast::MaintenanceCommand::Analyze { target } => {
1318 if let Some(t) = target {
1319 cache_scope_insert(scopes, t);
1320 }
1321 }
1322 },
1323 QueryExpr::CopyFrom(cmd) => cache_scope_insert(scopes, &cmd.table),
1324 QueryExpr::CreateView(cmd) => {
1325 cache_scope_insert(scopes, &cmd.name);
1326 collect_query_expr_result_cache_scopes(scopes, &cmd.query);
1328 }
1329 QueryExpr::DropView(cmd) => cache_scope_insert(scopes, &cmd.name),
1330 QueryExpr::RefreshMaterializedView(cmd) => cache_scope_insert(scopes, &cmd.name),
1331 QueryExpr::CreatePolicy(cmd) => cache_scope_insert(scopes, &cmd.table),
1332 QueryExpr::DropPolicy(cmd) => cache_scope_insert(scopes, &cmd.table),
1333 QueryExpr::CreateServer(_) | QueryExpr::DropServer(_) => {}
1334 QueryExpr::CreateForeignTable(cmd) => cache_scope_insert(scopes, &cmd.name),
1335 QueryExpr::DropForeignTable(cmd) => cache_scope_insert(scopes, &cmd.name),
1336 QueryExpr::Graph(_)
1337 | QueryExpr::GraphCommand(_)
1338 | QueryExpr::ProbabilisticCommand(_)
1339 | QueryExpr::SetConfig { .. }
1340 | QueryExpr::ShowConfig { .. }
1341 | QueryExpr::SetSecret { .. }
1342 | QueryExpr::DeleteSecret { .. }
1343 | QueryExpr::ShowSecrets { .. }
1344 | QueryExpr::SetTenant(_)
1345 | QueryExpr::ShowTenant
1346 | QueryExpr::TransactionControl(_)
1347 | QueryExpr::CreateSchema(_)
1348 | QueryExpr::DropSchema(_)
1349 | QueryExpr::CreateSequence(_)
1350 | QueryExpr::DropSequence(_)
1351 | QueryExpr::Grant(_)
1352 | QueryExpr::Revoke(_)
1353 | QueryExpr::AlterUser(_)
1354 | QueryExpr::CreateIamPolicy { .. }
1355 | QueryExpr::DropIamPolicy { .. }
1356 | QueryExpr::AttachPolicy { .. }
1357 | QueryExpr::DetachPolicy { .. }
1358 | QueryExpr::ShowPolicies { .. }
1359 | QueryExpr::ShowEffectivePermissions { .. }
1360 | QueryExpr::SimulatePolicy { .. }
1361 | QueryExpr::CreateMigration(_)
1362 | QueryExpr::ApplyMigration(_)
1363 | QueryExpr::RollbackMigration(_)
1364 | QueryExpr::ExplainMigration(_)
1365 | QueryExpr::EventsBackfillStatus { .. } => {}
1366 QueryExpr::KvCommand(cmd) => {
1367 use crate::storage::query::ast::KvCommand;
1368 match cmd {
1369 KvCommand::Put { collection, .. }
1370 | KvCommand::InvalidateTags { collection, .. }
1371 | KvCommand::Get { collection, .. }
1372 | KvCommand::Unseal { collection, .. }
1373 | KvCommand::Rotate { collection, .. }
1374 | KvCommand::History { collection, .. }
1375 | KvCommand::List { collection, .. }
1376 | KvCommand::Purge { collection, .. }
1377 | KvCommand::Watch { collection, .. }
1378 | KvCommand::Delete { collection, .. }
1379 | KvCommand::Incr { collection, .. }
1380 | KvCommand::Cas { collection, .. } => cache_scope_insert(scopes, collection),
1381 }
1382 }
1383 QueryExpr::ConfigCommand(cmd) => {
1384 use crate::storage::query::ast::ConfigCommand;
1385 match cmd {
1386 ConfigCommand::Put { collection, .. }
1387 | ConfigCommand::Get { collection, .. }
1388 | ConfigCommand::Resolve { collection, .. }
1389 | ConfigCommand::Rotate { collection, .. }
1390 | ConfigCommand::Delete { collection, .. }
1391 | ConfigCommand::History { collection, .. }
1392 | ConfigCommand::List { collection, .. }
1393 | ConfigCommand::Watch { collection, .. }
1394 | ConfigCommand::InvalidVolatileOperation { collection, .. } => {
1395 cache_scope_insert(scopes, collection)
1396 }
1397 }
1398 }
1399 }
1400}
1401
1402pub(crate) fn rls_policy_filter(
1410 runtime: &RedDBRuntime,
1411 table: &str,
1412 action: crate::storage::query::ast::PolicyAction,
1413) -> Option<crate::storage::query::ast::Filter> {
1414 rls_policy_filter_for_kind(
1415 runtime,
1416 table,
1417 action,
1418 crate::storage::query::ast::PolicyTargetKind::Table,
1419 )
1420}
1421
1422pub(crate) fn rls_policy_filter_for_kind(
1428 runtime: &RedDBRuntime,
1429 table: &str,
1430 action: crate::storage::query::ast::PolicyAction,
1431 kind: crate::storage::query::ast::PolicyTargetKind,
1432) -> Option<crate::storage::query::ast::Filter> {
1433 use crate::storage::query::ast::Filter;
1434
1435 if !runtime.inner.rls_enabled_tables.read().contains(table) {
1436 return None;
1437 }
1438 let role = current_auth_identity().map(|(_, role)| role);
1439 let role_str = role.map(|r| r.as_str().to_string());
1440 let policies = runtime.matching_rls_policies_for_kind(table, role_str.as_deref(), action, kind);
1441 if policies.is_empty() {
1442 return None;
1443 }
1444 policies
1445 .into_iter()
1446 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1447}
1448
1449pub(crate) fn rls_is_enabled(runtime: &RedDBRuntime, table: &str) -> bool {
1453 runtime.inner.rls_enabled_tables.read().contains(table)
1454}
1455
1456fn node_passes_rls(
1463 runtime: &RedDBRuntime,
1464 collection: &str,
1465 role: Option<&str>,
1466 cache: &mut std::collections::HashMap<String, Option<crate::storage::query::ast::Filter>>,
1467 entity: &crate::storage::unified::entity::UnifiedEntity,
1468) -> bool {
1469 use crate::storage::query::ast::{Filter, PolicyAction, PolicyTargetKind};
1470
1471 if !runtime.inner.rls_enabled_tables.read().contains(collection) {
1472 return true;
1473 }
1474 let filter = cache.entry(collection.to_string()).or_insert_with(|| {
1475 let policies = runtime.matching_rls_policies_for_kind(
1476 collection,
1477 role,
1478 PolicyAction::Select,
1479 PolicyTargetKind::Nodes,
1480 );
1481 if policies.is_empty() {
1482 None
1483 } else {
1484 policies
1485 .into_iter()
1486 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1487 }
1488 });
1489 let Some(filter) = filter else {
1490 return false;
1491 };
1492 crate::runtime::query_exec::evaluate_entity_filter_with_db(
1493 Some(&runtime.inner.db),
1494 entity,
1495 filter,
1496 collection,
1497 collection,
1498 )
1499}
1500
1501fn edge_passes_rls(
1504 runtime: &RedDBRuntime,
1505 collection: &str,
1506 role: Option<&str>,
1507 cache: &mut std::collections::HashMap<String, Option<crate::storage::query::ast::Filter>>,
1508 entity: &crate::storage::unified::entity::UnifiedEntity,
1509) -> bool {
1510 use crate::storage::query::ast::{Filter, PolicyAction, PolicyTargetKind};
1511
1512 if !runtime.inner.rls_enabled_tables.read().contains(collection) {
1513 return true;
1514 }
1515 let filter = cache.entry(collection.to_string()).or_insert_with(|| {
1516 let policies = runtime.matching_rls_policies_for_kind(
1517 collection,
1518 role,
1519 PolicyAction::Select,
1520 PolicyTargetKind::Edges,
1521 );
1522 if policies.is_empty() {
1523 None
1524 } else {
1525 policies
1526 .into_iter()
1527 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1528 }
1529 });
1530 let Some(filter) = filter else {
1531 return false;
1532 };
1533 crate::runtime::query_exec::evaluate_entity_filter_with_db(
1534 Some(&runtime.inner.db),
1535 entity,
1536 filter,
1537 collection,
1538 collection,
1539 )
1540}
1541
1542fn inject_rls_filters(
1563 runtime: &RedDBRuntime,
1564 frame: &dyn super::statement_frame::ReadFrame,
1565 mut table: crate::storage::query::ast::TableQuery,
1566) -> Option<crate::storage::query::ast::TableQuery> {
1567 use crate::storage::query::ast::{Filter, PolicyAction};
1568
1569 let role = frame.identity().map(|(_, role)| role);
1571 let role_str = role.map(|r| r.as_str().to_string());
1572 let policies =
1573 runtime.matching_rls_policies(&table.table, role_str.as_deref(), PolicyAction::Select);
1574
1575 if policies.is_empty() {
1576 return None;
1579 }
1580
1581 let combined = policies
1583 .into_iter()
1584 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1585 .expect("policies non-empty");
1586
1587 use crate::storage::query::sql_lowering::{expr_to_filter, filter_to_expr};
1596 let had_where_expr = table.where_expr.is_some();
1597 let existing = table
1598 .filter
1599 .take()
1600 .or_else(|| table.where_expr.as_ref().map(expr_to_filter));
1601 let new_filter = match existing {
1602 Some(existing) => Filter::And(Box::new(existing), Box::new(combined)),
1603 None => combined,
1604 };
1605 if had_where_expr {
1608 table.where_expr = Some(filter_to_expr(&new_filter));
1609 }
1610 table.filter = Some(new_filter);
1611 Some(table)
1612}
1613
1614fn inject_rls_into_join(
1624 runtime: &RedDBRuntime,
1625 frame: &dyn super::statement_frame::ReadFrame,
1626 mut join: crate::storage::query::ast::JoinQuery,
1627) -> Option<crate::storage::query::ast::JoinQuery> {
1628 use crate::storage::query::ast::Filter;
1629
1630 let mut policy_filters: Vec<Filter> = Vec::new();
1631 if !collect_join_side_policy(runtime, frame, join.left.as_ref(), &mut policy_filters) {
1632 return None;
1633 }
1634 if !collect_join_side_policy(runtime, frame, join.right.as_ref(), &mut policy_filters) {
1635 return None;
1636 }
1637
1638 if policy_filters.is_empty() {
1639 return Some(join);
1640 }
1641
1642 let combined = policy_filters
1643 .into_iter()
1644 .reduce(|acc, f| Filter::And(Box::new(acc), Box::new(f)))
1645 .expect("policy_filters non-empty");
1646
1647 join.filter = Some(match join.filter.take() {
1648 Some(existing) => Filter::And(Box::new(existing), Box::new(combined)),
1649 None => combined,
1650 });
1651
1652 Some(join)
1653}
1654
1655fn collect_join_side_policy(
1660 runtime: &RedDBRuntime,
1661 frame: &dyn super::statement_frame::ReadFrame,
1662 expr: &crate::storage::query::ast::QueryExpr,
1663 out: &mut Vec<crate::storage::query::ast::Filter>,
1664) -> bool {
1665 use crate::storage::query::ast::{Filter, PolicyAction, QueryExpr};
1666 match expr {
1667 QueryExpr::Table(t) => {
1668 if !runtime.inner.rls_enabled_tables.read().contains(&t.table) {
1669 return true;
1670 }
1671 let role = frame.identity().map(|(_, role)| role);
1672 let role_str = role.map(|r| r.as_str().to_string());
1673 let policies =
1674 runtime.matching_rls_policies(&t.table, role_str.as_deref(), PolicyAction::Select);
1675 if policies.is_empty() {
1676 return false;
1677 }
1678 let combined = policies
1679 .into_iter()
1680 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1681 .expect("policies non-empty");
1682 out.push(combined);
1683 true
1684 }
1685 QueryExpr::Join(inner) => {
1686 collect_join_side_policy(runtime, frame, inner.left.as_ref(), out)
1687 && collect_join_side_policy(runtime, frame, inner.right.as_ref(), out)
1688 }
1689 _ => true,
1690 }
1691}
1692
1693fn apply_foreign_table_filters(
1704 records: Vec<crate::storage::query::unified::UnifiedRecord>,
1705 query: &crate::storage::query::ast::TableQuery,
1706) -> crate::storage::query::unified::UnifiedResult {
1707 use crate::storage::query::sql_lowering::{
1708 effective_table_filter, effective_table_projections,
1709 };
1710 use crate::storage::query::unified::UnifiedResult;
1711
1712 let filter = effective_table_filter(query);
1713 let projections = effective_table_projections(query);
1714
1715 let mut filtered: Vec<_> = records
1718 .into_iter()
1719 .filter(|record| match &filter {
1720 Some(f) => {
1721 super::join_filter::evaluate_runtime_filter_with_db(None, record, f, None, None)
1722 }
1723 None => true,
1724 })
1725 .collect();
1726
1727 if let Some(offset) = query.offset {
1729 let offset = offset as usize;
1730 if offset >= filtered.len() {
1731 filtered.clear();
1732 } else {
1733 filtered.drain(0..offset);
1734 }
1735 }
1736 if let Some(limit) = query.limit {
1737 filtered.truncate(limit as usize);
1738 }
1739
1740 let columns: Vec<String> = if projections.is_empty() {
1743 filtered
1744 .first()
1745 .map(|r| r.column_names().iter().map(|k| k.to_string()).collect())
1746 .unwrap_or_default()
1747 } else {
1748 projections
1749 .iter()
1750 .map(super::join_filter::projection_name)
1751 .collect()
1752 };
1753
1754 let mut result = UnifiedResult::empty();
1755 result.columns = columns;
1756 result.records = filtered;
1757 result
1758}
1759
1760pub(crate) fn collect_table_refs(expr: &QueryExpr) -> Vec<String> {
1767 let mut scopes: HashSet<String> = HashSet::new();
1768 collect_query_expr_result_cache_scopes(&mut scopes, expr);
1769 scopes.into_iter().collect()
1770}
1771
1772fn query_expr_result_cache_scopes(expr: &QueryExpr) -> HashSet<String> {
1773 let mut scopes = HashSet::new();
1774 collect_query_expr_result_cache_scopes(&mut scopes, expr);
1775 scopes
1776}
1777
1778const RESULT_CACHE_BACKEND_KEY: &str = "runtime.result_cache.backend";
1779const RESULT_CACHE_DEFAULT_BACKEND: &str = "legacy";
1780const RESULT_CACHE_BLOB_NAMESPACE: &str = "runtime.result_cache";
1781const RESULT_CACHE_TTL_SECS: u64 = 30;
1782const RESULT_CACHE_MAX_ENTRIES: usize = 1000;
1783const RESULT_CACHE_PAYLOAD_MAGIC: &[u8; 8] = b"RDRC0001";
1784
1785#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1786enum RuntimeResultCacheBackend {
1787 Legacy,
1788 BlobCache,
1789 Shadow,
1790}
1791
1792fn trim_result_cache(
1793 map: &mut HashMap<String, RuntimeResultCacheEntry>,
1794 order: &mut std::collections::VecDeque<String>,
1795) {
1796 while map.len() > RESULT_CACHE_MAX_ENTRIES {
1797 if let Some(oldest) = order.pop_front() {
1798 map.remove(&oldest);
1799 } else {
1800 break;
1801 }
1802 }
1803}
1804
1805fn result_cache_fingerprint(result: &RuntimeQueryResult) -> String {
1806 format!(
1807 "{:?}|{}|{}|{}|{}|{:?}",
1808 result.result,
1809 result.query,
1810 result.statement,
1811 result.engine,
1812 result.affected_rows,
1813 result.statement_type
1814 )
1815}
1816
1817fn mode_to_byte(mode: crate::storage::query::modes::QueryMode) -> u8 {
1818 match mode {
1819 crate::storage::query::modes::QueryMode::Sql => 0,
1820 crate::storage::query::modes::QueryMode::Gremlin => 1,
1821 crate::storage::query::modes::QueryMode::Cypher => 2,
1822 crate::storage::query::modes::QueryMode::Sparql => 3,
1823 crate::storage::query::modes::QueryMode::Path => 4,
1824 crate::storage::query::modes::QueryMode::Natural => 5,
1825 crate::storage::query::modes::QueryMode::Unknown => 255,
1826 }
1827}
1828
1829fn mode_from_byte(byte: u8) -> Option<crate::storage::query::modes::QueryMode> {
1830 match byte {
1831 0 => Some(crate::storage::query::modes::QueryMode::Sql),
1832 1 => Some(crate::storage::query::modes::QueryMode::Gremlin),
1833 2 => Some(crate::storage::query::modes::QueryMode::Cypher),
1834 3 => Some(crate::storage::query::modes::QueryMode::Sparql),
1835 4 => Some(crate::storage::query::modes::QueryMode::Path),
1836 5 => Some(crate::storage::query::modes::QueryMode::Natural),
1837 255 => Some(crate::storage::query::modes::QueryMode::Unknown),
1838 _ => None,
1839 }
1840}
1841
1842fn result_cache_static_str(value: &str) -> Option<&'static str> {
1843 match value {
1844 "select" => Some("select"),
1845 "materialized-graph" => Some("materialized-graph"),
1846 "runtime-red-schema" => Some("runtime-red-schema"),
1847 "runtime-fdw" => Some("runtime-fdw"),
1848 "runtime-table-rls" => Some("runtime-table-rls"),
1849 "runtime-table" => Some("runtime-table"),
1850 "runtime-join-rls" => Some("runtime-join-rls"),
1851 "runtime-join" => Some("runtime-join"),
1852 "runtime-vector" => Some("runtime-vector"),
1853 "runtime-hybrid" => Some("runtime-hybrid"),
1854 "runtime-secret" => Some("runtime-secret"),
1855 "runtime-config" => Some("runtime-config"),
1856 "runtime-tenant" => Some("runtime-tenant"),
1857 "runtime-explain" => Some("runtime-explain"),
1858 "runtime-tree" => Some("runtime-tree"),
1859 "runtime-kv" => Some("runtime-kv"),
1860 "runtime-queue" => Some("runtime-queue"),
1861 _ => None,
1862 }
1863}
1864
1865fn write_u32(out: &mut Vec<u8>, value: usize) -> Option<()> {
1866 let value = u32::try_from(value).ok()?;
1867 out.extend_from_slice(&value.to_le_bytes());
1868 Some(())
1869}
1870
1871fn write_string(out: &mut Vec<u8>, value: &str) -> Option<()> {
1872 write_u32(out, value.len())?;
1873 out.extend_from_slice(value.as_bytes());
1874 Some(())
1875}
1876
1877fn write_bytes(out: &mut Vec<u8>, value: &[u8]) -> Option<()> {
1878 write_u32(out, value.len())?;
1879 out.extend_from_slice(value);
1880 Some(())
1881}
1882
1883fn read_u8(input: &mut &[u8]) -> Option<u8> {
1884 let (&value, rest) = input.split_first()?;
1885 *input = rest;
1886 Some(value)
1887}
1888
1889fn read_u32(input: &mut &[u8]) -> Option<usize> {
1890 if input.len() < 4 {
1891 return None;
1892 }
1893 let value = u32::from_le_bytes(input[..4].try_into().ok()?) as usize;
1894 *input = &input[4..];
1895 Some(value)
1896}
1897
1898fn read_u64(input: &mut &[u8]) -> Option<u64> {
1899 if input.len() < 8 {
1900 return None;
1901 }
1902 let value = u64::from_le_bytes(input[..8].try_into().ok()?);
1903 *input = &input[8..];
1904 Some(value)
1905}
1906
1907fn read_string(input: &mut &[u8]) -> Option<String> {
1908 let len = read_u32(input)?;
1909 if input.len() < len {
1910 return None;
1911 }
1912 let value = String::from_utf8(input[..len].to_vec()).ok()?;
1913 *input = &input[len..];
1914 Some(value)
1915}
1916
1917fn read_bytes<'a>(input: &mut &'a [u8]) -> Option<&'a [u8]> {
1918 let len = read_u32(input)?;
1919 if input.len() < len {
1920 return None;
1921 }
1922 let value = &input[..len];
1923 *input = &input[len..];
1924 Some(value)
1925}
1926
1927fn encode_result_cache_payload(entry: &RuntimeResultCacheEntry) -> Option<Vec<u8>> {
1928 let result = &entry.result;
1929 if result.result.pre_serialized_json.is_some()
1930 || result_cache_static_str(result.statement).is_none()
1931 || result_cache_static_str(result.engine).is_none()
1932 || result_cache_static_str(result.statement_type).is_none()
1933 || result.result.records.iter().any(|record| {
1934 !record.nodes.is_empty()
1935 || !record.edges.is_empty()
1936 || !record.paths.is_empty()
1937 || !record.vector_results.is_empty()
1938 })
1939 {
1940 return None;
1941 }
1942
1943 let mut out = Vec::new();
1944 out.extend_from_slice(RESULT_CACHE_PAYLOAD_MAGIC);
1945 write_string(&mut out, &result.query)?;
1946 out.push(mode_to_byte(result.mode));
1947 write_string(&mut out, result.statement)?;
1948 write_string(&mut out, result.engine)?;
1949 out.extend_from_slice(&result.affected_rows.to_le_bytes());
1950 write_string(&mut out, result.statement_type)?;
1951
1952 write_u32(&mut out, result.result.columns.len())?;
1953 for column in &result.result.columns {
1954 write_string(&mut out, column)?;
1955 }
1956 out.extend_from_slice(&result.result.stats.nodes_scanned.to_le_bytes());
1957 out.extend_from_slice(&result.result.stats.edges_scanned.to_le_bytes());
1958 out.extend_from_slice(&result.result.stats.rows_scanned.to_le_bytes());
1959 out.extend_from_slice(&result.result.stats.exec_time_us.to_le_bytes());
1960
1961 write_u32(&mut out, result.result.records.len())?;
1962 for record in &result.result.records {
1963 let fields = record.iter_fields().collect::<Vec<_>>();
1964 write_u32(&mut out, fields.len())?;
1965 for (name, value) in fields {
1966 write_string(&mut out, name)?;
1967 let mut encoded = Vec::new();
1968 crate::storage::schema::value_codec::encode(value, &mut encoded);
1969 write_bytes(&mut out, &encoded)?;
1970 }
1971 }
1972
1973 write_u32(&mut out, entry.scopes.len())?;
1974 for scope in &entry.scopes {
1975 write_string(&mut out, scope)?;
1976 }
1977 Some(out)
1978}
1979
1980fn decode_result_cache_payload(mut input: &[u8]) -> Option<(RuntimeQueryResult, HashSet<String>)> {
1981 if input.len() < RESULT_CACHE_PAYLOAD_MAGIC.len()
1982 || &input[..RESULT_CACHE_PAYLOAD_MAGIC.len()] != RESULT_CACHE_PAYLOAD_MAGIC
1983 {
1984 return None;
1985 }
1986 input = &input[RESULT_CACHE_PAYLOAD_MAGIC.len()..];
1987
1988 let query = read_string(&mut input)?;
1989 let mode = mode_from_byte(read_u8(&mut input)?)?;
1990 let statement = result_cache_static_str(&read_string(&mut input)?)?;
1991 let engine = result_cache_static_str(&read_string(&mut input)?)?;
1992 let affected_rows = read_u64(&mut input)?;
1993 let statement_type = result_cache_static_str(&read_string(&mut input)?)?;
1994
1995 let mut columns = Vec::new();
1996 for _ in 0..read_u32(&mut input)? {
1997 columns.push(read_string(&mut input)?);
1998 }
1999 let stats = crate::storage::query::unified::QueryStats {
2000 nodes_scanned: read_u64(&mut input)?,
2001 edges_scanned: read_u64(&mut input)?,
2002 rows_scanned: read_u64(&mut input)?,
2003 exec_time_us: read_u64(&mut input)?,
2004 };
2005
2006 let mut records = Vec::new();
2007 for _ in 0..read_u32(&mut input)? {
2008 let mut record = crate::storage::query::unified::UnifiedRecord::new();
2009 for _ in 0..read_u32(&mut input)? {
2010 let name = read_string(&mut input)?;
2011 let bytes = read_bytes(&mut input)?;
2012 let (value, used) = crate::storage::schema::value_codec::decode(bytes).ok()?;
2013 if used != bytes.len() {
2014 return None;
2015 }
2016 record.set_owned(name, value);
2017 }
2018 records.push(record);
2019 }
2020
2021 let mut scopes = HashSet::new();
2022 for _ in 0..read_u32(&mut input)? {
2023 scopes.insert(read_string(&mut input)?);
2024 }
2025 if !input.is_empty() {
2026 return None;
2027 }
2028
2029 Some((
2030 RuntimeQueryResult {
2031 query,
2032 mode,
2033 statement,
2034 engine,
2035 result: crate::storage::query::unified::UnifiedResult {
2036 columns,
2037 records,
2038 stats,
2039 pre_serialized_json: None,
2040 },
2041 affected_rows,
2042 statement_type,
2043 },
2044 scopes,
2045 ))
2046}
2047
2048fn strip_explain_prefix(sql: &str) -> Option<&str> {
2062 let trimmed = sql.trim_start();
2063 let (head, rest) = trimmed.split_at(
2064 trimmed
2065 .find(|c: char| c.is_whitespace())
2066 .unwrap_or(trimmed.len()),
2067 );
2068 if !head.eq_ignore_ascii_case("EXPLAIN") {
2069 return None;
2070 }
2071 let rest = rest.trim_start();
2072 if rest.is_empty() {
2073 return None;
2074 }
2075 let next_head_end = rest.find(|c: char| c.is_whitespace()).unwrap_or(rest.len());
2079 if rest[..next_head_end].eq_ignore_ascii_case("ALTER")
2080 || rest[..next_head_end].eq_ignore_ascii_case("ASK")
2081 {
2082 return None;
2083 }
2084 Some(rest)
2085}
2086
2087pub(super) fn has_with_prefix(sql: &str) -> bool {
2092 let trimmed = sql.trim_start();
2093 let head_end = trimmed
2094 .find(|c: char| c.is_whitespace() || c == '(')
2095 .unwrap_or(trimmed.len());
2096 trimmed[..head_end].eq_ignore_ascii_case("WITH")
2097}
2098
2099fn peek_top_level_as_of(sql: &str) -> Option<crate::application::vcs::AsOfSpec> {
2107 peek_top_level_as_of_with_table(sql).map(|(spec, _)| spec)
2108}
2109
2110pub(super) fn peek_top_level_as_of_with_table(
2115 sql: &str,
2116) -> Option<(crate::application::vcs::AsOfSpec, Option<String>)> {
2117 if !sql
2118 .as_bytes()
2119 .windows(5)
2120 .any(|w| w.eq_ignore_ascii_case(b"as of"))
2121 {
2122 return None;
2123 }
2124 let parsed = crate::storage::query::parser::parse(sql).ok()?;
2125 let crate::storage::query::ast::QueryExpr::Table(table) = parsed.query else {
2126 return None;
2127 };
2128 let clause = table.as_of?;
2129 let table_name = if table.table.is_empty() || table.table == "any" {
2130 None
2131 } else {
2132 Some(table.table.clone())
2133 };
2134 let spec = match clause {
2135 crate::storage::query::ast::AsOfClause::Commit(h) => {
2136 crate::application::vcs::AsOfSpec::Commit(h)
2137 }
2138 crate::storage::query::ast::AsOfClause::Branch(b) => {
2139 crate::application::vcs::AsOfSpec::Branch(b)
2140 }
2141 crate::storage::query::ast::AsOfClause::Tag(t) => crate::application::vcs::AsOfSpec::Tag(t),
2142 crate::storage::query::ast::AsOfClause::TimestampMs(ts) => {
2143 crate::application::vcs::AsOfSpec::TimestampMs(ts)
2144 }
2145 crate::storage::query::ast::AsOfClause::Snapshot(x) => {
2146 crate::application::vcs::AsOfSpec::Snapshot(x)
2147 }
2148 };
2149 Some((spec, table_name))
2150}
2151
2152pub(super) fn query_has_volatile_builtin(sql: &str) -> bool {
2153 const VOLATILE_TOKENS: &[&str] = &[
2157 "pg_advisory_lock",
2158 "pg_try_advisory_lock",
2159 "pg_advisory_unlock",
2160 "random()",
2161 ];
2166 let lowered = sql.to_ascii_lowercase();
2167 VOLATILE_TOKENS.iter().any(|t| lowered.contains(t))
2168}
2169
2170pub(super) fn query_is_ask_statement(sql: &str) -> bool {
2171 let trimmed = sql.trim_start();
2172 let head_end = trimmed
2173 .find(|c: char| c.is_whitespace() || c == '(' || c == ';')
2174 .unwrap_or(trimmed.len());
2175 trimmed[..head_end].eq_ignore_ascii_case("ASK")
2176}
2177
2178pub(super) fn intent_lock_modes_for(
2188 expr: &QueryExpr,
2189) -> Option<(
2190 crate::storage::transaction::lock::LockMode,
2191 crate::storage::transaction::lock::LockMode,
2192)> {
2193 use crate::storage::transaction::lock::LockMode::{Exclusive, IntentExclusive, IntentShared};
2194
2195 match expr {
2196 QueryExpr::Table(_)
2198 | QueryExpr::Join(_)
2199 | QueryExpr::Vector(_)
2200 | QueryExpr::Hybrid(_)
2201 | QueryExpr::Graph(_)
2202 | QueryExpr::Path(_)
2203 | QueryExpr::Ask(_)
2204 | QueryExpr::SearchCommand(_)
2205 | QueryExpr::GraphCommand(_)
2206 | QueryExpr::QueueSelect(_) => Some((IntentShared, IntentShared)),
2207
2208 QueryExpr::Insert(_)
2216 | QueryExpr::Update(_)
2217 | QueryExpr::Delete(_)
2218 | QueryExpr::QueueCommand(QueueCommand::Move { .. }) => {
2219 Some((IntentExclusive, IntentExclusive))
2220 }
2221 QueryExpr::QueueCommand(_) => Some((IntentShared, IntentShared)),
2222
2223 QueryExpr::CreateTable(_)
2227 | QueryExpr::CreateCollection(_)
2228 | QueryExpr::CreateVector(_)
2229 | QueryExpr::DropTable(_)
2230 | QueryExpr::DropGraph(_)
2231 | QueryExpr::DropVector(_)
2232 | QueryExpr::DropDocument(_)
2233 | QueryExpr::DropKv(_)
2234 | QueryExpr::DropCollection(_)
2235 | QueryExpr::Truncate(_)
2236 | QueryExpr::AlterTable(_)
2237 | QueryExpr::CreateIndex(_)
2238 | QueryExpr::DropIndex(_)
2239 | QueryExpr::CreateTimeSeries(_)
2240 | QueryExpr::DropTimeSeries(_)
2241 | QueryExpr::CreateQueue(_)
2242 | QueryExpr::AlterQueue(_)
2243 | QueryExpr::DropQueue(_)
2244 | QueryExpr::CreateTree(_)
2245 | QueryExpr::DropTree(_)
2246 | QueryExpr::CreatePolicy(_)
2247 | QueryExpr::DropPolicy(_)
2248 | QueryExpr::CreateView(_)
2249 | QueryExpr::DropView(_)
2250 | QueryExpr::RefreshMaterializedView(_)
2251 | QueryExpr::CreateSchema(_)
2252 | QueryExpr::DropSchema(_)
2253 | QueryExpr::CreateSequence(_)
2254 | QueryExpr::DropSequence(_)
2255 | QueryExpr::CreateServer(_)
2256 | QueryExpr::DropServer(_)
2257 | QueryExpr::CreateForeignTable(_)
2258 | QueryExpr::DropForeignTable(_) => Some((IntentExclusive, Exclusive)),
2259
2260 _ => None,
2266 }
2267}
2268
2269pub(super) fn collections_referenced(expr: &QueryExpr) -> Vec<String> {
2274 let mut out = Vec::new();
2275 walk_collections(expr, &mut out);
2276 out.sort();
2277 out.dedup();
2278 out
2279}
2280
2281fn walk_collections(expr: &QueryExpr, out: &mut Vec<String>) {
2282 match expr {
2283 QueryExpr::Table(t) => out.push(t.table.clone()),
2284 QueryExpr::Join(j) => {
2285 walk_collections(&j.left, out);
2286 walk_collections(&j.right, out);
2287 }
2288 QueryExpr::Insert(i) => out.push(i.table.clone()),
2289 QueryExpr::Update(u) => out.push(u.table.clone()),
2290 QueryExpr::Delete(d) => out.push(d.table.clone()),
2291 QueryExpr::QueueSelect(q) => out.push(q.queue.clone()),
2292
2293 QueryExpr::CreateTable(q) => out.push(q.name.clone()),
2298 QueryExpr::CreateCollection(q) => out.push(q.name.clone()),
2299 QueryExpr::CreateVector(q) => out.push(q.name.clone()),
2300 QueryExpr::DropTable(q) => out.push(q.name.clone()),
2301 QueryExpr::DropGraph(q) => out.push(q.name.clone()),
2302 QueryExpr::DropVector(q) => out.push(q.name.clone()),
2303 QueryExpr::DropDocument(q) => out.push(q.name.clone()),
2304 QueryExpr::DropKv(q) => out.push(q.name.clone()),
2305 QueryExpr::DropCollection(q) => out.push(q.name.clone()),
2306 QueryExpr::Truncate(q) => out.push(q.name.clone()),
2307 QueryExpr::AlterTable(q) => out.push(q.name.clone()),
2308 QueryExpr::CreateIndex(q) => out.push(q.table.clone()),
2309 QueryExpr::DropIndex(q) => out.push(q.table.clone()),
2310 QueryExpr::CreateTimeSeries(q) => out.push(q.name.clone()),
2311 QueryExpr::DropTimeSeries(q) => out.push(q.name.clone()),
2312 QueryExpr::CreateQueue(q) => out.push(q.name.clone()),
2313 QueryExpr::AlterQueue(q) => out.push(q.name.clone()),
2314 QueryExpr::DropQueue(q) => out.push(q.name.clone()),
2315 QueryExpr::QueueCommand(QueueCommand::Move {
2316 source,
2317 destination,
2318 ..
2319 }) => {
2320 out.push(source.clone());
2321 out.push(destination.clone());
2322 }
2323 QueryExpr::CreatePolicy(q) => out.push(q.table.clone()),
2324 QueryExpr::CreateView(q) => out.push(q.name.clone()),
2325 QueryExpr::DropView(q) => out.push(q.name.clone()),
2326 QueryExpr::RefreshMaterializedView(q) => out.push(q.name.clone()),
2327
2328 _ => {}
2334 }
2335}
2336
2337impl RedDBRuntime {
2338 pub fn in_memory() -> RedDBResult<Self> {
2339 Self::with_options(RedDBOptions::in_memory())
2340 }
2341
2342 pub fn lock_manager(&self) -> std::sync::Arc<crate::storage::transaction::lock::LockManager> {
2346 self.inner.lock_manager.clone()
2347 }
2348
2349 pub fn config_registry(&self) -> std::sync::Arc<crate::auth::registry::ConfigRegistry> {
2351 self.inner.config_registry.clone()
2352 }
2353
2354 pub fn query_audit(&self) -> std::sync::Arc<crate::runtime::query_audit::QueryAuditStream> {
2355 self.inner.query_audit.clone()
2356 }
2357
2358 pub fn control_events_require_persistence(&self) -> bool {
2359 self.inner.control_event_config.require_persistence()
2360 }
2361
2362 pub fn control_event_config(&self) -> crate::runtime::control_events::ControlEventConfig {
2363 self.inner.control_event_config
2364 }
2365
2366 pub fn control_event_ledger(
2367 &self,
2368 ) -> Arc<dyn crate::runtime::control_events::ControlEventLedger> {
2369 self.inner.control_event_ledger.read().clone()
2370 }
2371
2372 #[doc(hidden)]
2373 pub fn replace_control_event_ledger_for_tests(
2374 &self,
2375 ledger: Arc<dyn crate::runtime::control_events::ControlEventLedger>,
2376 ) {
2377 *self.inner.control_event_ledger.write() = ledger;
2378 }
2379
2380 #[inline(never)]
2381 pub fn with_options(options: RedDBOptions) -> RedDBResult<Self> {
2382 Self::with_pool(options, ConnectionPoolConfig::default())
2383 }
2384
2385 pub fn with_pool(
2386 options: RedDBOptions,
2387 pool_config: ConnectionPoolConfig,
2388 ) -> RedDBResult<Self> {
2389 let boot_open_start_ms = std::time::SystemTime::now()
2397 .duration_since(std::time::UNIX_EPOCH)
2398 .map(|d| d.as_millis() as u64)
2399 .unwrap_or(0);
2400 let db = Arc::new(
2401 RedDB::open_with_options(&options)
2402 .map_err(|err| RedDBError::Internal(err.to_string()))?,
2403 );
2404 let result_blob_cache = crate::storage::cache::BlobCache::open_with_l2(
2405 crate::storage::cache::BlobCacheConfig::default().with_l2_path(
2406 options
2407 .resolved_path("data.rdb")
2408 .with_extension("result-cache.l2"),
2409 ),
2410 )
2411 .map_err(|err| {
2412 RedDBError::Internal(format!("open result Blob Cache L2 failed: {err:?}"))
2413 })?;
2414 let storage_ready_ms = std::time::SystemTime::now()
2415 .duration_since(std::time::UNIX_EPOCH)
2416 .map(|d| d.as_millis() as u64)
2417 .unwrap_or(0);
2418
2419 let runtime = Self {
2420 inner: Arc::new(RuntimeInner {
2421 db: db.clone(),
2422 layout: PhysicalLayout::from_options(&options),
2423 indices: IndexCatalog::register_default_vector_graph(
2424 options.has_capability(crate::api::Capability::Table),
2425 options.has_capability(crate::api::Capability::Graph),
2426 ),
2427 pool_config,
2428 pool: Mutex::new(PoolState::default()),
2429 started_at_unix_ms: SystemTime::now()
2430 .duration_since(UNIX_EPOCH)
2431 .unwrap_or_default()
2432 .as_millis(),
2433 probabilistic: super::probabilistic_store::ProbabilisticStore::new(),
2434 index_store: super::index_store::IndexStore::new(),
2435 cdc: crate::replication::cdc::CdcBuffer::new(100_000),
2436 backup_scheduler: crate::replication::scheduler::BackupScheduler::new(3600),
2437 query_cache: parking_lot::RwLock::new(
2438 crate::storage::query::planner::cache::PlanCache::new(1000),
2439 ),
2440 result_cache: parking_lot::RwLock::new((
2441 HashMap::new(),
2442 std::collections::VecDeque::new(),
2443 )),
2444 result_blob_cache,
2445 result_blob_entries: parking_lot::RwLock::new((
2446 HashMap::new(),
2447 std::collections::VecDeque::new(),
2448 )),
2449 ask_answer_cache_entries: parking_lot::RwLock::new((
2450 HashSet::new(),
2451 std::collections::VecDeque::new(),
2452 )),
2453 result_cache_shadow_divergences: std::sync::atomic::AtomicU64::new(0),
2454 ask_daily_spend: parking_lot::RwLock::new(HashMap::new()),
2455 queue_message_locks: parking_lot::RwLock::new(HashMap::new()),
2456 rmw_locks: RmwLockTable::new(),
2457 planner_dirty_tables: parking_lot::RwLock::new(HashSet::new()),
2458 ec_registry: Arc::new(crate::ec::config::EcRegistry::new()),
2459 config_registry: Arc::new(crate::auth::registry::ConfigRegistry::new()),
2460 ec_worker: crate::ec::worker::EcWorker::new(),
2461 auth_store: parking_lot::RwLock::new(None),
2462 oauth_validator: parking_lot::RwLock::new(None),
2463 views: parking_lot::RwLock::new(HashMap::new()),
2464 materialized_views: parking_lot::RwLock::new(
2465 crate::storage::cache::result::MaterializedViewCache::new(),
2466 ),
2467 retention_sweeper: parking_lot::RwLock::new(
2468 crate::runtime::retention_sweeper::RetentionSweeperState::new(),
2469 ),
2470 snapshot_manager: Arc::new(
2471 crate::storage::transaction::snapshot::SnapshotManager::new(),
2472 ),
2473 tx_contexts: parking_lot::RwLock::new(HashMap::new()),
2474 tx_local_tenants: parking_lot::RwLock::new(HashMap::new()),
2475 env_config_overrides: crate::runtime::config_overlay::collect_env_overrides(),
2476 lock_manager: Arc::new({
2477 let env = crate::runtime::config_overlay::collect_env_overrides();
2482 let timeout_ms = env
2483 .get("concurrency.locking.deadlock_timeout_ms")
2484 .and_then(|raw| raw.parse::<u64>().ok())
2485 .unwrap_or_else(|| {
2486 match crate::runtime::config_matrix::default_for(
2487 "concurrency.locking.deadlock_timeout_ms",
2488 ) {
2489 Some(crate::serde_json::Value::Number(n)) => n as u64,
2490 _ => 5000,
2491 }
2492 });
2493 let cfg = crate::storage::transaction::lock::LockConfig {
2494 default_timeout: std::time::Duration::from_millis(timeout_ms),
2495 ..Default::default()
2496 };
2497 crate::storage::transaction::lock::LockManager::new(cfg)
2498 }),
2499 rls_policies: parking_lot::RwLock::new(HashMap::new()),
2500 rls_enabled_tables: parking_lot::RwLock::new(HashSet::new()),
2501 foreign_tables: Arc::new(crate::storage::fdw::ForeignTableRegistry::with_builtins()),
2502 pending_tombstones: parking_lot::RwLock::new(HashMap::new()),
2503 pending_versioned_updates: parking_lot::RwLock::new(HashMap::new()),
2504 pending_kv_watch_events: parking_lot::RwLock::new(HashMap::new()),
2505 pending_store_wal_actions: parking_lot::RwLock::new(HashMap::new()),
2506 tenant_tables: parking_lot::RwLock::new(HashMap::new()),
2507 ddl_epoch: std::sync::atomic::AtomicU64::new(0),
2508 write_gate: Arc::new(crate::runtime::write_gate::WriteGate::from_options(
2509 &options,
2510 )),
2511 lifecycle: crate::runtime::lifecycle::Lifecycle::new(),
2512 resource_limits: crate::runtime::resource_limits::ResourceLimits::from_env(),
2513 audit_log: {
2514 let data_path = options
2524 .data_path
2525 .clone()
2526 .unwrap_or_else(|| std::env::temp_dir().join("reddb"));
2527 let (audit_dest, _) = crate::api::tier_wiring::current_log_destinations();
2528 Arc::new(crate::runtime::audit_log::AuditLogger::for_destination(
2529 &audit_dest,
2530 &data_path,
2531 ))
2532 },
2533 control_event_ledger: parking_lot::RwLock::new(Arc::new(
2534 crate::runtime::control_events::RuntimeLedger::new(db.store()),
2535 )),
2536 control_event_config: options.control_events,
2537 query_audit: Arc::new(crate::runtime::query_audit::QueryAuditStream::new(
2538 db.store(),
2539 options.query_audit.clone(),
2540 )),
2541 lease_lifecycle: std::sync::OnceLock::new(),
2542 replica_apply_metrics: crate::replication::logical::ReplicaApplyMetrics::default(),
2543 quota_bucket: crate::runtime::quota_bucket::QuotaBucket::from_env(),
2544 schema_vocabulary: parking_lot::RwLock::new(
2545 crate::runtime::schema_vocabulary::SchemaVocabulary::new(),
2546 ),
2547 slow_query_logger: {
2548 let fallback_dir = options
2561 .data_path
2562 .as_ref()
2563 .and_then(|p| p.parent().map(std::path::PathBuf::from))
2564 .unwrap_or_else(|| std::env::temp_dir().join("reddb"));
2565 let threshold_ms = std::env::var("RED_SLOW_QUERY_THRESHOLD_MS")
2566 .ok()
2567 .and_then(|s| s.parse::<u64>().ok())
2568 .unwrap_or(1000);
2569 let sample_pct = std::env::var("RED_SLOW_QUERY_SAMPLE_PCT")
2570 .ok()
2571 .and_then(|s| s.parse::<u8>().ok())
2572 .unwrap_or(100);
2573 let (_, slow_dest) = crate::api::tier_wiring::current_log_destinations();
2574 crate::telemetry::slow_query_logger::SlowQueryLogger::for_destination(
2575 &slow_dest,
2576 &fallback_dir,
2577 threshold_ms,
2578 sample_pct,
2579 )
2580 },
2581 kv_stats: crate::runtime::KvStatsCounters::default(),
2582 metrics_ingest_stats: crate::runtime::MetricsIngestCounters::default(),
2583 metrics_tenant_activity_stats:
2584 crate::runtime::MetricsTenantActivityCounters::default(),
2585 queue_telemetry: Arc::new(
2586 crate::runtime::queue_telemetry::QueueTelemetryCounters::default(),
2587 ),
2588 kv_tag_index: crate::runtime::KvTagIndex::default(),
2589 chain_tip_cache: parking_lot::Mutex::new(HashMap::new()),
2590 chain_integrity_broken: parking_lot::Mutex::new(HashMap::new()),
2591 }),
2592 };
2593
2594 crate::telemetry::operator_event::install_global_audit_sink(Arc::clone(
2600 &runtime.inner.audit_log,
2601 ));
2602
2603 runtime
2611 .inner
2612 .lifecycle
2613 .set_restore_started_at_ms(boot_open_start_ms);
2614 runtime
2615 .inner
2616 .lifecycle
2617 .set_restore_ready_at_ms(storage_ready_ms);
2618 runtime
2619 .inner
2620 .lifecycle
2621 .set_wal_replay_started_at_ms(boot_open_start_ms);
2622 runtime
2623 .inner
2624 .lifecycle
2625 .set_wal_replay_ready_at_ms(storage_ready_ms);
2626
2627 let restored_cdc_lsn = runtime
2628 .inner
2629 .db
2630 .replication
2631 .as_ref()
2632 .map(|repl| {
2633 repl.logical_wal_spool
2634 .as_ref()
2635 .map(|spool| spool.current_lsn())
2636 .unwrap_or(0)
2637 })
2638 .unwrap_or(0)
2639 .max(runtime.config_u64("red.config.timeline.last_archived_lsn", 0));
2640 runtime.inner.cdc.set_current_lsn(restored_cdc_lsn);
2641 runtime.rehydrate_snapshot_xid_floor();
2642 runtime.bootstrap_system_keyed_collections()?;
2643 runtime.rehydrate_declared_column_schemas();
2644 runtime.load_probabilistic_state()?;
2645
2646 runtime.rehydrate_tenant_tables();
2650 runtime.rehydrate_materialized_view_descriptors();
2655 if let Some(repl) = &runtime.inner.db.replication {
2656 repl.wal_buffer.set_current_lsn(restored_cdc_lsn);
2657 }
2658
2659 {
2661 let sys = SystemInfo::collect();
2662 runtime.inner.db.store().set_config_tree(
2663 "red.system",
2664 &crate::serde_json::json!({
2665 "pid": sys.pid,
2666 "cpu_cores": sys.cpu_cores,
2667 "total_memory_bytes": sys.total_memory_bytes,
2668 "available_memory_bytes": sys.available_memory_bytes,
2669 "os": sys.os,
2670 "arch": sys.arch,
2671 "hostname": sys.hostname,
2672 "started_at": SystemTime::now()
2673 .duration_since(UNIX_EPOCH)
2674 .unwrap_or_default()
2675 .as_millis() as u64
2676 }),
2677 );
2678
2679 let store = runtime.inner.db.store();
2681 if store
2682 .get_collection("red_config")
2683 .map(|m| m.query_all(|_| true).len())
2684 .unwrap_or(0)
2685 <= 10
2686 {
2687 store.set_config_tree("red.ai", &crate::json!({
2688 "default": crate::json!({
2689 "provider": "openai",
2690 "model": crate::ai::DEFAULT_OPENAI_PROMPT_MODEL
2691 }),
2692 "max_embedding_inputs": 256,
2693 "max_prompt_batch": 256,
2694 "timeout": crate::json!({ "connect_secs": 10, "read_secs": 90, "write_secs": 30 })
2695 }));
2696 store.set_config_tree(
2697 "red.server",
2698 &crate::json!({
2699 "max_scan_limit": 1000,
2700 "max_body_size": 1048576,
2701 "read_timeout_ms": 5000,
2702 "write_timeout_ms": 5000
2703 }),
2704 );
2705 store.set_config_tree(
2706 "red.storage",
2707 &crate::json!({
2708 "page_size": 4096,
2709 "page_cache_capacity": 100000,
2710 "auto_checkpoint_pages": 1000,
2711 "snapshot_retention": 16,
2712 "verify_checksums": true,
2713 "segment": crate::json!({
2714 "max_entities": 100000,
2715 "max_bytes": 268435456_u64,
2716 "compression_level": 6
2717 }),
2718 "hnsw": crate::json!({ "m": 16, "ef_construction": 100, "ef_search": 50 }),
2719 "ivf": crate::json!({ "n_lists": 100, "n_probes": 10 }),
2720 "bm25": crate::json!({ "k1": 1.2, "b": 0.75 })
2721 }),
2722 );
2723 store.set_config_tree(
2724 "red.search",
2725 &crate::json!({
2726 "rag": crate::json!({
2727 "max_chunks_per_source": 10,
2728 "max_total_chunks": 25,
2729 "similarity_threshold": 0.8,
2730 "graph_depth": 2,
2731 "min_relevance": 0.3
2732 }),
2733 "fusion": crate::json!({
2734 "vector_weight": 0.5,
2735 "graph_weight": 0.3,
2736 "table_weight": 0.2,
2737 "dedup_threshold": 0.85
2738 })
2739 }),
2740 );
2741 store.set_config_tree(
2742 "red.auth",
2743 &crate::json!({
2744 "enabled": false,
2745 "session_ttl_secs": 3600,
2746 "require_auth": false
2747 }),
2748 );
2749 store.set_config_tree(
2750 "red.query",
2751 &crate::json!({
2752 "connection_pool": crate::json!({ "max_connections": 64, "max_idle": 16 }),
2753 "max_recursion_depth": 1000
2754 }),
2755 );
2756 store.set_config_tree(
2757 "red.indexes",
2758 &crate::json!({
2759 "auto_select": true,
2760 "bloom_filter": crate::json!({
2761 "enabled": true,
2762 "false_positive_rate": 0.01,
2763 "prune_on_scan": true
2764 }),
2765 "hash": crate::json!({ "enabled": true }),
2766 "bitmap": crate::json!({ "enabled": true, "max_cardinality": 1000 }),
2767 "spatial": crate::json!({ "enabled": true })
2768 }),
2769 );
2770 store.set_config_tree(
2771 "red.memtable",
2772 &crate::json!({
2773 "enabled": true,
2774 "max_bytes": 67108864_u64,
2775 "flush_threshold": 0.75
2776 }),
2777 );
2778 store.set_config_tree(
2779 "red.probabilistic",
2780 &crate::json!({
2781 "hll_registers": 16384,
2782 "sketch_default_width": 1000,
2783 "sketch_default_depth": 5,
2784 "filter_default_capacity": 100000
2785 }),
2786 );
2787 store.set_config_tree(
2788 "red.timeseries",
2789 &crate::json!({
2790 "default_chunk_size": 1024,
2791 "compression": crate::json!({
2792 "timestamps": "delta_of_delta",
2793 "values": "gorilla_xor"
2794 }),
2795 "default_retention_days": 0
2796 }),
2797 );
2798 store.set_config_tree(
2799 "red.queue",
2800 &crate::json!({
2801 "default_max_size": 0,
2802 "default_max_attempts": 3,
2803 "visibility_timeout_ms": 30000,
2804 "consumer_idle_timeout_ms": 60000
2805 }),
2806 );
2807 store.set_config_tree(
2808 "red.backup",
2809 &crate::json!({
2810 "enabled": false,
2811 "interval_secs": 3600,
2812 "retention_count": 24,
2813 "upload": false,
2814 "backend": "local"
2815 }),
2816 );
2817 store.set_config_tree(
2818 "red.wal",
2819 &crate::json!({
2820 "archive": crate::json!({
2821 "enabled": false,
2822 "retention_hours": 168,
2823 "prefix": "wal/"
2824 })
2825 }),
2826 );
2827 store.set_config_tree(
2828 "red.cdc",
2829 &crate::json!({
2830 "enabled": true,
2831 "buffer_size": 100000
2832 }),
2833 );
2834 store.set_config_tree(
2835 "red.config.secret",
2836 &crate::json!({
2837 "auto_encrypt": true,
2838 "auto_decrypt": true
2839 }),
2840 );
2841 }
2842
2843 crate::runtime::config_matrix::heal_critical_keys(store.as_ref());
2850
2851 let lehman_yao = runtime.config_bool("storage.btree.lehman_yao", true);
2858 crate::storage::engine::btree::lehman_yao::set_enabled(lehman_yao);
2859 if lehman_yao {
2860 tracing::info!(
2861 "storage.btree.lehman_yao=true — lock-free concurrent descent enabled"
2862 );
2863 }
2864
2865 let overlay_path = crate::runtime::config_overlay::config_file_path();
2870 let _ =
2871 crate::runtime::config_overlay::apply_config_file(store.as_ref(), &overlay_path);
2872 }
2873
2874 {
2878 let store = runtime.inner.db.store();
2879 for name in crate::application::vcs_collections::ALL {
2880 let _ = store.get_or_create_collection(*name);
2881 }
2882 store.set_config_tree(
2885 crate::application::vcs_collections::CONFIG_NAMESPACE,
2886 &crate::json!({
2887 "default_branch": "main",
2888 "author": crate::json!({
2889 "name": "reddb",
2890 "email": "reddb@localhost"
2891 }),
2892 "protected_branches": crate::json!(["main"]),
2893 "closure": crate::json!({
2894 "enabled": true,
2895 "lazy": true
2896 }),
2897 "merge": crate::json!({
2898 "default_strategy": "auto",
2899 "fast_forward": true
2900 })
2901 }),
2902 );
2903 }
2904
2905 {
2908 let store = runtime.inner.db.store();
2909 for name in crate::application::migration_collections::ALL {
2910 let _ = store.get_or_create_collection(*name);
2911 }
2912 }
2913
2914 {
2929 let weak = Arc::downgrade(&runtime.inner);
2930 std::thread::Builder::new()
2931 .name("reddb-maintenance".into())
2932 .spawn(move || {
2933 let tick = std::time::Duration::from_millis(200);
2934 let work_interval = std::time::Duration::from_secs(60);
2935 let mut last_work = std::time::Instant::now();
2936 loop {
2937 std::thread::sleep(tick);
2938 let Some(inner) = weak.upgrade() else {
2939 break;
2942 };
2943 if last_work.elapsed() >= work_interval {
2944 let _stats = inner.db.store().context_index().stats();
2945 last_work = std::time::Instant::now();
2946 }
2947 }
2948 })
2949 .ok();
2950 }
2951
2952 {
2954 let store = runtime.inner.db.store();
2955 let mut backup_enabled = false;
2956 let mut backup_interval = 3600u64;
2957
2958 if let Some(manager) = store.get_collection("red_config") {
2959 manager.for_each_entity(|entity| {
2960 if let Some(row) = entity.data.as_row() {
2961 let key = row.get_field("key").and_then(|v| match v {
2962 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
2963 _ => None,
2964 });
2965 let val = row.get_field("value");
2966 if key == Some("red.config.backup.enabled") {
2967 backup_enabled = match val {
2968 Some(crate::storage::schema::Value::Boolean(true)) => true,
2969 Some(crate::storage::schema::Value::Text(s)) => &**s == "true",
2970 _ => false,
2971 };
2972 } else if key == Some("red.config.backup.interval_secs") {
2973 if let Some(crate::storage::schema::Value::Integer(n)) = val {
2974 backup_interval = *n as u64;
2975 }
2976 }
2977 }
2978 true
2979 });
2980 }
2981
2982 if backup_enabled {
2983 runtime.inner.backup_scheduler.set_interval(backup_interval);
2984 let rt = runtime.clone();
2985 runtime
2986 .inner
2987 .backup_scheduler
2988 .start(move || rt.trigger_backup().map_err(|e| format!("{}", e)));
2989 }
2990 }
2991
2992 {
2994 runtime
2995 .inner
2996 .ec_registry
2997 .load_from_config_store(runtime.inner.db.store().as_ref());
2998 if !runtime.inner.ec_registry.async_configs().is_empty() {
2999 runtime.inner.ec_worker.start(
3000 Arc::clone(&runtime.inner.ec_registry),
3001 Arc::clone(&runtime.inner.db.store()),
3002 );
3003 }
3004 }
3005
3006 if let crate::replication::ReplicationRole::Replica { primary_addr } =
3007 runtime.inner.db.options().replication.role.clone()
3008 {
3009 let rt = runtime.clone();
3010 std::thread::Builder::new()
3011 .name("reddb-replica".into())
3012 .spawn(move || rt.run_replica_loop(primary_addr))
3013 .ok();
3014 }
3015
3016 runtime.inner.lifecycle.mark_ready();
3021
3022 {
3031 let weak_inner = Arc::downgrade(&runtime.inner);
3032 std::thread::Builder::new()
3033 .name("reddb-mv-scheduler".into())
3034 .spawn(move || loop {
3035 std::thread::sleep(std::time::Duration::from_millis(50));
3036 let Some(inner) = weak_inner.upgrade() else {
3037 break;
3038 };
3039 let rt = RedDBRuntime { inner };
3040 rt.refresh_due_materialized_views();
3041 })
3042 .ok();
3043 }
3044
3045 {
3055 let weak_inner = Arc::downgrade(&runtime.inner);
3056 std::thread::Builder::new()
3057 .name("reddb-retention-sweeper".into())
3058 .spawn(move || loop {
3059 std::thread::sleep(std::time::Duration::from_millis(500));
3060 let Some(inner) = weak_inner.upgrade() else {
3061 break;
3062 };
3063 let rt = RedDBRuntime { inner };
3064 rt.sweep_retention_tick(
3065 crate::runtime::retention_sweeper::DEFAULT_SWEEPER_BATCH,
3066 );
3067 })
3068 .ok();
3069 }
3070
3071 Ok(runtime)
3072 }
3073
3074 fn rehydrate_snapshot_xid_floor(&self) {
3075 let store = self.inner.db.store();
3076 for collection in store.list_collections() {
3077 let Some(manager) = store.get_collection(&collection) else {
3078 continue;
3079 };
3080 for entity in manager.query_all(|_| true) {
3081 self.inner
3082 .snapshot_manager
3083 .observe_committed_xid(entity.xmin);
3084 self.inner
3085 .snapshot_manager
3086 .observe_committed_xid(entity.xmax);
3087 }
3088 }
3089 }
3090
3091 pub(crate) fn ensure_materialized_view_backing(&self, name: &str) -> RedDBResult<()> {
3104 let store = self.inner.db.store();
3105 let mut changed = false;
3106 if store.get_collection(name).is_none() {
3107 store.get_or_create_collection(name);
3108 changed = true;
3109 }
3110 if self.inner.db.collection_contract(name).is_none() {
3111 self.inner
3112 .db
3113 .save_collection_contract(system_keyed_collection_contract(
3114 name,
3115 crate::catalog::CollectionModel::Table,
3116 ))
3117 .map_err(|err| RedDBError::Internal(err.to_string()))?;
3118 changed = true;
3119 }
3120 if changed {
3121 self.inner
3122 .db
3123 .persist_metadata()
3124 .map_err(|err| RedDBError::Internal(err.to_string()))?;
3125 }
3126 Ok(())
3127 }
3128
3129 pub(crate) fn drop_materialized_view_backing(&self, name: &str) -> RedDBResult<()> {
3134 let store = self.inner.db.store();
3135 if store.get_collection(name).is_none() {
3136 return Ok(());
3137 }
3138 store
3139 .drop_collection(name)
3140 .map_err(|err| RedDBError::Internal(err.to_string()))?;
3141 if self.inner.db.collection_contract(name).is_some() {
3144 self.inner
3145 .db
3146 .remove_collection_contract(name)
3147 .map_err(|err| RedDBError::Internal(err.to_string()))?;
3148 }
3149 self.invalidate_result_cache();
3150 self.inner
3151 .db
3152 .persist_metadata()
3153 .map_err(|err| RedDBError::Internal(err.to_string()))?;
3154 Ok(())
3155 }
3156
3157 fn bootstrap_system_keyed_collections(&self) -> RedDBResult<()> {
3158 let mut changed = false;
3159 for (name, model) in [
3160 ("red.config", crate::catalog::CollectionModel::Config),
3161 ("red.vault", crate::catalog::CollectionModel::Vault),
3162 (
3166 crate::runtime::continuous_materialized_view::CATALOG_COLLECTION,
3167 crate::catalog::CollectionModel::Config,
3168 ),
3169 ] {
3170 if self.inner.db.store().get_collection(name).is_none() {
3171 self.inner.db.store().get_or_create_collection(name);
3172 changed = true;
3173 }
3174 if self.inner.db.collection_contract(name).is_none() {
3175 self.inner
3176 .db
3177 .save_collection_contract(system_keyed_collection_contract(name, model))
3178 .map_err(|err| RedDBError::Internal(err.to_string()))?;
3179 changed = true;
3180 }
3181 }
3182 if changed {
3183 self.inner
3184 .db
3185 .persist_metadata()
3186 .map_err(|err| RedDBError::Internal(err.to_string()))?;
3187 }
3188 Ok(())
3189 }
3190
3191 pub fn db(&self) -> Arc<RedDB> {
3192 Arc::clone(&self.inner.db)
3193 }
3194
3195 pub fn index_store_ref(&self) -> &super::index_store::IndexStore {
3200 &self.inner.index_store
3201 }
3202
3203 pub(crate) fn schema_vocabulary_apply(
3208 &self,
3209 event: crate::runtime::schema_vocabulary::DdlEvent,
3210 ) {
3211 self.inner.schema_vocabulary.write().on_ddl(event);
3212 }
3213
3214 pub fn schema_vocabulary_lookup(
3219 &self,
3220 token: &str,
3221 ) -> Vec<crate::runtime::schema_vocabulary::VocabHit> {
3222 self.inner.schema_vocabulary.read().lookup(token).to_vec()
3223 }
3224
3225 pub fn set_auth_store(&self, store: Arc<crate::auth::store::AuthStore>) {
3229 *self.inner.auth_store.write() = Some(store);
3230 }
3231
3232 pub fn auth_store(&self) -> Option<Arc<crate::auth::store::AuthStore>> {
3235 self.inner.auth_store.read().clone()
3236 }
3237
3238 pub fn vault_kv_get(&self, key: &str) -> Option<String> {
3240 self.inner
3241 .auth_store
3242 .read()
3243 .as_ref()
3244 .and_then(|store| store.vault_kv_get(key))
3245 }
3246
3247 pub fn vault_kv_try_set(&self, key: String, value: String) -> RedDBResult<()> {
3250 let store = self.inner.auth_store.read().clone().ok_or_else(|| {
3251 RedDBError::Query("secret storage requires an enabled, unsealed vault".to_string())
3252 })?;
3253 store
3254 .vault_kv_try_set(key, value)
3255 .map_err(|err| RedDBError::Query(err.to_string()))
3256 }
3257
3258 pub fn set_oauth_validator(&self, validator: Option<Arc<crate::auth::oauth::OAuthValidator>>) {
3262 *self.inner.oauth_validator.write() = validator;
3263 }
3264
3265 pub fn oauth_validator(&self) -> Option<Arc<crate::auth::oauth::OAuthValidator>> {
3269 self.inner.oauth_validator.read().clone()
3270 }
3271
3272 pub(crate) fn secret_aes_key(&self) -> Option<[u8; 32]> {
3276 let guard = self.inner.auth_store.read();
3277 guard.as_ref().and_then(|s| s.vault_secret_key())
3278 }
3279
3280 pub(crate) fn config_bool(&self, key: &str, default: bool) -> bool {
3286 if let Some(raw) = self.inner.env_config_overrides.get(key) {
3287 if let Some(crate::storage::schema::Value::Boolean(b)) =
3288 crate::runtime::config_overlay::coerce_env_value(key, raw)
3289 {
3290 return b;
3291 }
3292 }
3293 let store = self.inner.db.store();
3294 let Some(manager) = store.get_collection("red_config") else {
3295 return default;
3296 };
3297 let mut result = default;
3298 let mut latest_id: u64 = 0;
3299 manager.for_each_entity(|entity| {
3300 if let Some(row) = entity.data.as_row() {
3301 let entry_key = row.get_field("key").and_then(|v| match v {
3302 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
3303 _ => None,
3304 });
3305 if entry_key == Some(key) {
3306 let id = entity.id.raw();
3307 if id >= latest_id {
3308 latest_id = id;
3309 result = match row.get_field("value") {
3310 Some(crate::storage::schema::Value::Boolean(b)) => *b,
3311 Some(crate::storage::schema::Value::Text(s)) => {
3312 matches!(s.as_ref(), "true" | "TRUE" | "True" | "1")
3313 }
3314 Some(crate::storage::schema::Value::Integer(n)) => *n != 0,
3315 _ => default,
3316 };
3317 }
3318 }
3319 }
3320 true
3321 });
3322 result
3323 }
3324
3325 pub(crate) fn config_u64(&self, key: &str, default: u64) -> u64 {
3326 if let Some(raw) = self.inner.env_config_overrides.get(key) {
3327 if let Some(crate::storage::schema::Value::UnsignedInteger(n)) =
3328 crate::runtime::config_overlay::coerce_env_value(key, raw)
3329 {
3330 return n;
3331 }
3332 }
3333 let store = self.inner.db.store();
3334 let Some(manager) = store.get_collection("red_config") else {
3335 return default;
3336 };
3337 let mut result = default;
3338 let mut latest_id: u64 = 0;
3339 manager.for_each_entity(|entity| {
3340 if let Some(row) = entity.data.as_row() {
3341 let entry_key = row.get_field("key").and_then(|v| match v {
3342 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
3343 _ => None,
3344 });
3345 if entry_key == Some(key) {
3346 let id = entity.id.raw();
3347 if id >= latest_id {
3348 latest_id = id;
3349 result = match row.get_field("value") {
3350 Some(crate::storage::schema::Value::Integer(n)) => *n as u64,
3351 Some(crate::storage::schema::Value::UnsignedInteger(n)) => *n,
3352 Some(crate::storage::schema::Value::Text(s)) => {
3353 s.parse::<u64>().unwrap_or(default)
3354 }
3355 _ => default,
3356 };
3357 }
3358 }
3359 }
3360 true
3361 });
3362 result
3363 }
3364
3365 pub(crate) fn config_f64(&self, key: &str, default: f64) -> f64 {
3366 if let Some(raw) = self.inner.env_config_overrides.get(key) {
3367 if let Ok(n) = raw.parse::<f64>() {
3368 return n;
3369 }
3370 }
3371 let store = self.inner.db.store();
3372 let Some(manager) = store.get_collection("red_config") else {
3373 return default;
3374 };
3375 let mut result = default;
3376 let mut latest_id: u64 = 0;
3377 manager.for_each_entity(|entity| {
3378 if let Some(row) = entity.data.as_row() {
3379 let entry_key = row.get_field("key").and_then(|v| match v {
3380 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
3381 _ => None,
3382 });
3383 if entry_key == Some(key) {
3384 let id = entity.id.raw();
3385 if id >= latest_id {
3386 latest_id = id;
3387 result = match row.get_field("value") {
3388 Some(crate::storage::schema::Value::Float(n)) => *n,
3389 Some(crate::storage::schema::Value::Integer(n)) => *n as f64,
3390 Some(crate::storage::schema::Value::UnsignedInteger(n)) => *n as f64,
3391 Some(crate::storage::schema::Value::Text(s)) => {
3392 s.parse::<f64>().unwrap_or(default)
3393 }
3394 _ => default,
3395 };
3396 }
3397 }
3398 }
3399 true
3400 });
3401 result
3402 }
3403
3404 pub(crate) fn config_string(&self, key: &str, default: &str) -> String {
3405 if let Some(raw) = self.inner.env_config_overrides.get(key) {
3406 return raw.clone();
3407 }
3408 let store = self.inner.db.store();
3409 let Some(manager) = store.get_collection("red_config") else {
3410 return default.to_string();
3411 };
3412 let mut result = default.to_string();
3413 let mut latest_id: u64 = 0;
3414 manager.for_each_entity(|entity| {
3415 if let Some(row) = entity.data.as_row() {
3416 let entry_key = row.get_field("key").and_then(|v| match v {
3417 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
3418 _ => None,
3419 });
3420 if entry_key == Some(key) {
3421 let id = entity.id.raw();
3422 if id >= latest_id {
3423 latest_id = id;
3424 if let Some(crate::storage::schema::Value::Text(value)) =
3425 row.get_field("value")
3426 {
3427 result = value.to_string();
3428 }
3429 }
3430 }
3431 }
3432 true
3433 });
3434 result
3435 }
3436
3437 fn latest_metadata_for(
3438 &self,
3439 collection: &str,
3440 entity_id: u64,
3441 ) -> Option<crate::serde_json::Value> {
3442 self.inner
3443 .db
3444 .store()
3445 .get_metadata(collection, EntityId::new(entity_id))
3446 .map(|metadata| metadata_to_json(&metadata))
3447 }
3448
3449 fn persist_replica_lsn(&self, lsn: u64) {
3450 self.inner.db.store().set_config_tree(
3451 "red.replication",
3452 &crate::json!({
3453 "last_applied_lsn": lsn
3454 }),
3455 );
3456 }
3457
3458 fn persist_replication_health(
3459 &self,
3460 state: &str,
3461 last_error: &str,
3462 primary_lsn: Option<u64>,
3463 oldest_available_lsn: Option<u64>,
3464 ) {
3465 self.inner.db.store().set_config_tree(
3466 "red.replication",
3467 &crate::json!({
3468 "state": state,
3469 "last_error": last_error,
3470 "last_seen_primary_lsn": primary_lsn.unwrap_or(0),
3471 "last_seen_oldest_lsn": oldest_available_lsn.unwrap_or(0),
3472 "updated_at_unix_ms": SystemTime::now()
3473 .duration_since(UNIX_EPOCH)
3474 .unwrap_or_default()
3475 .as_millis() as u64
3476 }),
3477 );
3478 }
3479
3480 pub(crate) fn secret_auto_encrypt(&self) -> bool {
3483 self.config_bool("red.config.secret.auto_encrypt", true)
3484 }
3485
3486 pub(crate) fn secret_auto_decrypt(&self) -> bool {
3491 self.config_bool("red.config.secret.auto_decrypt", true)
3492 }
3493
3494 pub(crate) fn apply_secret_decryption(&self, result: &mut RuntimeQueryResult) {
3501 if !self.secret_auto_decrypt() {
3502 return;
3503 }
3504 let Some(key) = self.secret_aes_key() else {
3505 return;
3506 };
3507 for record in result.result.records.iter_mut() {
3508 for value in record.values_mut() {
3509 if let Value::Secret(ref bytes) = value {
3510 if let Some(plain) =
3511 super::impl_dml::decrypt_secret_payload(&key, bytes.as_slice())
3512 {
3513 if let Ok(text) = String::from_utf8(plain) {
3514 *value = Value::text(text);
3515 }
3516 }
3517 }
3518 }
3519 }
3520 }
3521
3522 pub(crate) fn mutation_engine(&self) -> crate::runtime::mutation::MutationEngine<'_> {
3530 crate::runtime::mutation::MutationEngine::new(self)
3531 }
3532
3533 pub fn check_write(&self, kind: crate::runtime::write_gate::WriteKind) -> RedDBResult<()> {
3544 self.inner.write_gate.check(kind)
3545 }
3546
3547 pub fn write_gate(&self) -> &crate::runtime::write_gate::WriteGate {
3551 &self.inner.write_gate
3552 }
3553
3554 pub fn lifecycle(&self) -> &crate::runtime::lifecycle::Lifecycle {
3558 &self.inner.lifecycle
3559 }
3560
3561 pub fn resource_limits(&self) -> &crate::runtime::resource_limits::ResourceLimits {
3563 &self.inner.resource_limits
3564 }
3565
3566 pub fn audit_log(&self) -> &crate::runtime::audit_log::AuditLogger {
3568 &self.inner.audit_log
3569 }
3570
3571 pub fn audit_log_arc(&self) -> Arc<crate::runtime::audit_log::AuditLogger> {
3575 Arc::clone(&self.inner.audit_log)
3576 }
3577
3578 pub(crate) fn emit_control_event(
3579 &self,
3580 kind: crate::runtime::control_events::EventKind,
3581 outcome: crate::runtime::control_events::Outcome,
3582 action: &'static str,
3583 resource: Option<String>,
3584 reason: Option<String>,
3585 extra_fields: Vec<(String, crate::runtime::control_events::Sensitivity)>,
3586 ) -> RedDBResult<()> {
3587 use crate::runtime::control_events::{
3588 ActorRef, ControlEvent, ControlEventCtx, ControlEventLedger, Sensitivity,
3589 };
3590
3591 let tenant = current_tenant();
3592 let principal = current_auth_identity();
3593 let actor_user = principal
3594 .as_ref()
3595 .map(|(principal, _)| UserId::from_parts(tenant.as_deref(), principal));
3596 let actor = actor_user
3597 .as_ref()
3598 .map(ActorRef::User)
3599 .unwrap_or(ActorRef::Anonymous);
3600 let ctx = ControlEventCtx {
3601 actor,
3602 scope: tenant
3603 .as_ref()
3604 .map(|scope| std::borrow::Cow::Borrowed(scope.as_str())),
3605 request_id: Some(std::borrow::Cow::Owned(format!(
3606 "conn-{}",
3607 current_connection_id()
3608 ))),
3609 trace_id: None,
3610 };
3611 let mut fields = std::collections::HashMap::new();
3612 fields.insert(
3613 "connection_id".to_string(),
3614 Sensitivity::raw(current_connection_id().to_string()),
3615 );
3616 if let Some((_, role)) = principal {
3617 fields.insert("actor_role".to_string(), Sensitivity::raw(role.as_str()));
3618 }
3619 for (key, value) in extra_fields {
3620 fields.insert(key, value);
3621 }
3622 let event = ControlEvent {
3623 kind,
3624 outcome,
3625 action: std::borrow::Cow::Borrowed(action),
3626 resource,
3627 reason,
3628 matched_policy_id: None,
3629 fields,
3630 };
3631 let ledger = self.inner.control_event_ledger.read();
3632 match ledger.emit(&ctx, event) {
3633 Ok(_) => Ok(()),
3634 Err(err) if self.inner.control_event_config.require_persistence() => {
3635 Err(RedDBError::Internal(err.to_string()))
3636 }
3637 Err(_) => Ok(()),
3638 }
3639 }
3640
3641 fn policy_mutation_control_ctx<'a>(
3642 &self,
3643 actor: &'a crate::auth::UserId,
3644 tenant: Option<&'a str>,
3645 ) -> crate::runtime::control_events::ControlEventCtx<'a> {
3646 crate::runtime::control_events::ControlEventCtx {
3647 actor: crate::runtime::control_events::ActorRef::User(actor),
3648 scope: tenant.map(std::borrow::Cow::Borrowed),
3649 request_id: Some(std::borrow::Cow::Owned(format!(
3650 "conn-{}",
3651 current_connection_id()
3652 ))),
3653 trace_id: None,
3654 }
3655 }
3656
3657 fn emit_query_audit(
3658 &self,
3659 query: &str,
3660 plan: &QueryAuditPlan,
3661 duration_ms: u64,
3662 result: &RuntimeQueryResult,
3663 ) {
3664 if !self.inner.query_audit.has_rules() {
3665 return;
3666 }
3667 let actor = current_auth_identity().map(|(principal, _)| principal);
3668 let tenant = current_tenant();
3669 let row_count = if result.statement_type == "select" {
3670 result.result.records.len() as u64
3671 } else {
3672 result.affected_rows
3673 };
3674 self.inner
3675 .query_audit
3676 .emit(crate::runtime::query_audit::QueryAuditEvent {
3677 actor,
3678 tenant,
3679 statement_kind: plan.statement_kind,
3680 touched_collections: plan.collections.clone(),
3681 duration_ms,
3682 row_count,
3683 request_id: Some(crate::crypto::uuid::Uuid::new_v7().to_string()),
3684 query_hash: Some(blake3::hash(query.as_bytes()).to_hex().to_string()),
3685 });
3686 }
3687
3688 pub(crate) fn queue_telemetry(
3692 &self,
3693 ) -> &crate::runtime::queue_telemetry::QueueTelemetryCounters {
3694 &self.inner.queue_telemetry
3695 }
3696
3697 pub fn queue_telemetry_snapshot(
3700 &self,
3701 ) -> crate::runtime::queue_telemetry::QueueTelemetrySnapshot {
3702 crate::runtime::queue_telemetry::QueueTelemetrySnapshot {
3703 delivered: self.inner.queue_telemetry.delivered_snapshot(),
3704 acked: self.inner.queue_telemetry.acked_snapshot(),
3705 nacked: self.inner.queue_telemetry.nacked_snapshot(),
3706 }
3707 }
3708
3709 pub fn queue_pending_counts(&self) -> Vec<((String, String), u64)> {
3714 let store = self.inner.db.store();
3715 crate::runtime::impl_queue::pending_counts_by_group(store.as_ref())
3716 .into_iter()
3717 .collect()
3718 }
3719
3720 pub fn write_gate_arc(&self) -> Arc<crate::runtime::write_gate::WriteGate> {
3725 Arc::clone(&self.inner.write_gate)
3726 }
3727
3728 pub fn lease_lifecycle(&self) -> Option<&Arc<crate::runtime::lease_lifecycle::LeaseLifecycle>> {
3731 self.inner.lease_lifecycle.get()
3732 }
3733
3734 pub fn set_lease_lifecycle(
3737 &self,
3738 lifecycle: Arc<crate::runtime::lease_lifecycle::LeaseLifecycle>,
3739 ) -> Result<(), Arc<crate::runtime::lease_lifecycle::LeaseLifecycle>> {
3740 self.inner.lease_lifecycle.set(lifecycle)
3741 }
3742
3743 pub fn check_batch_size(&self, requested: usize) -> RedDBResult<()> {
3748 if self.inner.resource_limits.batch_size_exceeded(requested) {
3749 let max = self.inner.resource_limits.max_batch_size.unwrap_or(0);
3750 return Err(RedDBError::QuotaExceeded(format!(
3751 "max_batch_size:{requested}:{max}"
3752 )));
3753 }
3754 Ok(())
3755 }
3756
3757 pub fn check_db_size(&self) -> RedDBResult<()> {
3763 let Some(limit) = self.inner.resource_limits.max_db_size_bytes else {
3764 return Ok(());
3765 };
3766 if limit == 0 {
3767 return Ok(());
3768 }
3769 let Some(path) = self.inner.db.path() else {
3770 return Ok(());
3771 };
3772 let current = std::fs::metadata(path).map(|m| m.len()).unwrap_or(0);
3773 if current > limit {
3774 return Err(RedDBError::QuotaExceeded(format!(
3775 "max_db_size_bytes:{current}:{limit}"
3776 )));
3777 }
3778 Ok(())
3779 }
3780
3781 pub fn graceful_shutdown(
3799 &self,
3800 backup_on_shutdown: bool,
3801 ) -> RedDBResult<crate::runtime::lifecycle::ShutdownReport> {
3802 if !self.inner.lifecycle.begin_shutdown() {
3803 return Ok(self.inner.lifecycle.shutdown_report().unwrap_or_default());
3807 }
3808
3809 let started_ms = std::time::SystemTime::now()
3810 .duration_since(std::time::UNIX_EPOCH)
3811 .map(|d| d.as_millis() as u64)
3812 .unwrap_or(0);
3813 let mut report = crate::runtime::lifecycle::ShutdownReport {
3814 started_at_ms: started_ms,
3815 ..Default::default()
3816 };
3817
3818 let flush_res = self.inner.db.flush_local_only();
3824 report.flushed_wal = flush_res.is_ok();
3825 report.final_checkpoint = flush_res.is_ok();
3826 if let Err(err) = &flush_res {
3827 tracing::error!(
3828 target: "reddb::lifecycle",
3829 error = %err,
3830 "graceful_shutdown: local flush failed"
3831 );
3832 } else if let Err(lease_err) =
3833 self.assert_remote_write_allowed("shutdown/checkpoint_upload")
3834 {
3835 tracing::warn!(
3836 target: "reddb::serverless::lease",
3837 error = %lease_err,
3838 "graceful_shutdown: remote upload skipped — lease not held"
3839 );
3840 } else if let Err(err) = self.inner.db.upload_to_remote_backend() {
3841 tracing::error!(
3842 target: "reddb::lifecycle",
3843 error = %err,
3844 "graceful_shutdown: remote upload failed"
3845 );
3846 }
3847
3848 if backup_on_shutdown && self.inner.db.remote_backend.is_some() {
3853 match self.trigger_backup() {
3859 Ok(result) => {
3860 report.backup_uploaded = result.uploaded;
3861 }
3862 Err(err) => {
3863 tracing::warn!(
3864 target: "reddb::lifecycle",
3865 error = %err,
3866 "graceful_shutdown: final backup skipped"
3867 );
3868 }
3869 }
3870 }
3871
3872 let completed_ms = std::time::SystemTime::now()
3873 .duration_since(std::time::UNIX_EPOCH)
3874 .map(|d| d.as_millis() as u64)
3875 .unwrap_or(started_ms);
3876 report.completed_at_ms = completed_ms;
3877 report.duration_ms = completed_ms.saturating_sub(started_ms);
3878
3879 self.inner.lifecycle.finish_shutdown(report.clone());
3880 Ok(report)
3881 }
3882
3883 pub(crate) fn cdc_emit_no_cache_invalidate(
3889 &self,
3890 operation: crate::replication::cdc::ChangeOperation,
3891 collection: &str,
3892 entity_id: u64,
3893 entity_kind: &str,
3894 ) -> u64 {
3895 let lsn = self
3896 .inner
3897 .cdc
3898 .emit(operation, collection, entity_id, entity_kind);
3899
3900 if let Some(ref primary) = self.inner.db.replication {
3902 let store = self.inner.db.store();
3903 let entity = if operation == crate::replication::cdc::ChangeOperation::Delete {
3904 None
3905 } else {
3906 store.get(collection, EntityId::new(entity_id))
3907 };
3908 let record = ChangeRecord {
3909 lsn,
3910 timestamp: SystemTime::now()
3911 .duration_since(UNIX_EPOCH)
3912 .unwrap_or_default()
3913 .as_millis() as u64,
3914 operation,
3915 collection: collection.to_string(),
3916 entity_id,
3917 entity_kind: entity_kind.to_string(),
3918 entity_bytes: entity
3919 .as_ref()
3920 .map(|e| UnifiedStore::serialize_entity(e, store.format_version())),
3921 metadata: self.latest_metadata_for(collection, entity_id),
3922 refresh_records: None,
3923 };
3924 let encoded = record.encode();
3925 primary.wal_buffer.append(record.lsn, encoded.clone());
3926 if let Some(spool) = &primary.logical_wal_spool {
3927 let _ = spool.append(record.lsn, &encoded);
3928 }
3929 }
3930 lsn
3931 }
3932
3933 pub(crate) fn cdc_emit_insert_batch_no_cache_invalidate(
3934 &self,
3935 collection: &str,
3936 ids: &[EntityId],
3937 entity_kind: &str,
3938 ) -> Vec<u64> {
3939 if ids.is_empty() {
3940 return Vec::new();
3941 }
3942
3943 if self.inner.db.replication.is_none() {
3947 return self.inner.cdc.emit_batch_same_collection(
3948 crate::replication::cdc::ChangeOperation::Insert,
3949 collection,
3950 entity_kind,
3951 ids.iter().map(|id| id.raw()),
3952 );
3953 }
3954
3955 ids.iter()
3958 .map(|id| {
3959 self.cdc_emit_no_cache_invalidate(
3960 crate::replication::cdc::ChangeOperation::Insert,
3961 collection,
3962 id.raw(),
3963 entity_kind,
3964 )
3965 })
3966 .collect()
3967 }
3968
3969 pub fn cdc_emit(
3970 &self,
3971 operation: crate::replication::cdc::ChangeOperation,
3972 collection: &str,
3973 entity_id: u64,
3974 entity_kind: &str,
3975 ) -> u64 {
3976 let lsn = self
3977 .inner
3978 .cdc
3979 .emit(operation, collection, entity_id, entity_kind);
3980 self.invalidate_result_cache_for_table(collection);
3986
3987 if let Some(ref primary) = self.inner.db.replication {
3989 let store = self.inner.db.store();
3990 let entity = if operation == crate::replication::cdc::ChangeOperation::Delete {
3991 None
3992 } else {
3993 store.get(collection, EntityId::new(entity_id))
3994 };
3995 let record = ChangeRecord {
3996 lsn,
3997 timestamp: SystemTime::now()
3998 .duration_since(UNIX_EPOCH)
3999 .unwrap_or_default()
4000 .as_millis() as u64,
4001 operation,
4002 collection: collection.to_string(),
4003 entity_id,
4004 entity_kind: entity_kind.to_string(),
4005 entity_bytes: entity
4006 .as_ref()
4007 .map(|entity| UnifiedStore::serialize_entity(entity, store.format_version())),
4008 metadata: self.latest_metadata_for(collection, entity_id),
4009 refresh_records: None,
4010 };
4011 let encoded = record.encode();
4012 primary.wal_buffer.append(record.lsn, encoded.clone());
4013 if let Some(spool) = &primary.logical_wal_spool {
4014 let _ = spool.append(record.lsn, &encoded);
4015 }
4016 }
4017 lsn
4018 }
4019
4020 pub(crate) fn cdc_emit_kv(
4021 &self,
4022 operation: crate::replication::cdc::ChangeOperation,
4023 collection: &str,
4024 key: &str,
4025 entity_id: u64,
4026 before: Option<crate::json::Value>,
4027 after: Option<crate::json::Value>,
4028 ) -> u64 {
4029 let lsn = self
4030 .inner
4031 .cdc
4032 .emit_kv(operation, collection, key, entity_id, before, after);
4033 self.inner.kv_stats.incr_watch_events_emitted();
4034 self.invalidate_result_cache_for_table(collection);
4035 lsn
4036 }
4037
4038 pub(crate) fn record_kv_watch_event(
4039 &self,
4040 operation: crate::replication::cdc::ChangeOperation,
4041 collection: &str,
4042 key: &str,
4043 entity_id: u64,
4044 before: Option<crate::json::Value>,
4045 after: Option<crate::json::Value>,
4046 ) {
4047 if self.current_xid().is_some() {
4048 let conn_id = current_connection_id();
4049 let event = crate::replication::cdc::KvWatchEvent {
4050 collection: collection.to_string(),
4051 key: key.to_string(),
4052 op: operation,
4053 before,
4054 after,
4055 lsn: 0,
4056 committed_at: 0,
4057 dropped_event_count: 0,
4058 };
4059 self.inner
4060 .pending_kv_watch_events
4061 .write()
4062 .entry(conn_id)
4063 .or_default()
4064 .push(event);
4065 return;
4066 }
4067
4068 self.cdc_emit_kv(operation, collection, key, entity_id, before, after);
4069 }
4070
4071 pub(crate) fn cdc_emit_prebuilt(
4072 &self,
4073 operation: crate::replication::cdc::ChangeOperation,
4074 collection: &str,
4075 entity: &UnifiedEntity,
4076 entity_kind: &str,
4077 metadata: Option<&crate::storage::Metadata>,
4078 invalidate_cache: bool,
4079 ) -> u64 {
4080 self.cdc_emit_prebuilt_with_columns(
4081 operation,
4082 collection,
4083 entity,
4084 entity_kind,
4085 metadata,
4086 invalidate_cache,
4087 None,
4088 )
4089 }
4090
4091 pub(crate) fn cdc_emit_prebuilt_with_columns(
4098 &self,
4099 operation: crate::replication::cdc::ChangeOperation,
4100 collection: &str,
4101 entity: &UnifiedEntity,
4102 entity_kind: &str,
4103 metadata: Option<&crate::storage::Metadata>,
4104 invalidate_cache: bool,
4105 changed_columns: Option<Vec<String>>,
4106 ) -> u64 {
4107 if invalidate_cache {
4108 self.invalidate_result_cache();
4109 }
4110
4111 let public_id = entity.logical_id().raw();
4112 let lsn = self.inner.cdc.emit_with_columns(
4113 operation,
4114 collection,
4115 public_id,
4116 entity_kind,
4117 changed_columns,
4118 );
4119
4120 if let Some(ref primary) = self.inner.db.replication {
4121 let store = self.inner.db.store();
4122 let record = ChangeRecord {
4123 lsn,
4124 timestamp: SystemTime::now()
4125 .duration_since(UNIX_EPOCH)
4126 .unwrap_or_default()
4127 .as_millis() as u64,
4128 operation,
4129 collection: collection.to_string(),
4130 entity_id: entity.id.raw(),
4131 entity_kind: entity_kind.to_string(),
4132 entity_bytes: Some(UnifiedStore::serialize_entity(
4133 entity,
4134 store.format_version(),
4135 )),
4136 metadata: metadata
4137 .map(metadata_to_json)
4138 .or_else(|| self.latest_metadata_for(collection, entity.id.raw())),
4139 refresh_records: None,
4140 };
4141 let encoded = record.encode();
4142 primary.wal_buffer.append(record.lsn, encoded.clone());
4143 if let Some(spool) = &primary.logical_wal_spool {
4144 let _ = spool.append(record.lsn, &encoded);
4145 }
4146 }
4147
4148 lsn
4149 }
4150
4151 pub(crate) fn cdc_emit_prebuilt_batch<'a, I>(
4152 &self,
4153 operation: crate::replication::cdc::ChangeOperation,
4154 entity_kind: &str,
4155 items: I,
4156 invalidate_cache: bool,
4157 ) where
4158 I: IntoIterator<
4159 Item = (
4160 &'a str,
4161 &'a UnifiedEntity,
4162 Option<&'a crate::storage::Metadata>,
4163 ),
4164 >,
4165 {
4166 let items: Vec<(&str, &UnifiedEntity, Option<&crate::storage::Metadata>)> =
4167 items.into_iter().collect();
4168 if items.is_empty() {
4169 return;
4170 }
4171
4172 if invalidate_cache {
4173 self.invalidate_result_cache();
4174 }
4175
4176 for (collection, entity, metadata) in items {
4177 self.cdc_emit_prebuilt(operation, collection, entity, entity_kind, metadata, false);
4178 }
4179 }
4180
4181 fn run_replica_loop(&self, primary_addr: String) {
4182 let endpoint = if primary_addr.starts_with("http") {
4183 primary_addr
4184 } else {
4185 format!("http://{primary_addr}")
4186 };
4187 let poll_ms = self.inner.db.options().replication.poll_interval_ms;
4188 let max_count = self.inner.db.options().replication.max_batch_size;
4189 let mut since_lsn = self.config_u64("red.replication.last_applied_lsn", 0);
4190
4191 let runtime = match tokio::runtime::Builder::new_current_thread()
4192 .enable_all()
4193 .build()
4194 {
4195 Ok(runtime) => runtime,
4196 Err(_) => return,
4197 };
4198
4199 runtime.block_on(async move {
4200 use crate::grpc::proto::red_db_client::RedDbClient;
4201 use crate::grpc::proto::JsonPayloadRequest;
4202
4203 let mut client = loop {
4204 match RedDbClient::connect(endpoint.clone()).await {
4205 Ok(client) => {
4206 self.persist_replication_health("connecting", "", None, None);
4207 break client;
4208 }
4209 Err(_) => {
4210 self.persist_replication_health(
4211 "connecting",
4212 "waiting for primary connection",
4213 None,
4214 None,
4215 );
4216 std::thread::sleep(std::time::Duration::from_millis(poll_ms.max(250)))
4217 }
4218 }
4219 };
4220
4221 let applier = crate::replication::logical::LogicalChangeApplier::new(since_lsn);
4226
4227 loop {
4228 let payload = crate::json!({
4229 "since_lsn": since_lsn,
4230 "max_count": max_count
4231 });
4232 let request = tonic::Request::new(JsonPayloadRequest {
4233 payload_json: crate::json::to_string(&payload)
4234 .unwrap_or_else(|_| "{}".to_string()),
4235 });
4236
4237 if let Ok(response) = client.pull_wal_records(request).await {
4238 if let Ok(value) =
4239 crate::json::from_str::<crate::json::Value>(&response.into_inner().payload)
4240 {
4241 let current_lsn =
4242 value.get("current_lsn").and_then(crate::json::Value::as_u64);
4243 let oldest_available_lsn = value
4244 .get("oldest_available_lsn")
4245 .and_then(crate::json::Value::as_u64);
4246 if since_lsn > 0
4247 && oldest_available_lsn
4248 .map(|oldest| oldest > since_lsn.saturating_add(1))
4249 .unwrap_or(false)
4250 {
4251 self.persist_replication_health(
4252 "stalled_gap",
4253 "replica is behind the oldest logical WAL available on primary; re-bootstrap required",
4254 current_lsn,
4255 oldest_available_lsn,
4256 );
4257 std::thread::sleep(std::time::Duration::from_millis(poll_ms.max(250)));
4258 continue;
4259 }
4260 if let Some(records) =
4261 value.get("records").and_then(crate::json::Value::as_array)
4262 {
4263 for record in records {
4264 let Some(data_hex) =
4265 record.get("data").and_then(crate::json::Value::as_str)
4266 else {
4267 continue;
4268 };
4269 let Ok(data) = hex::decode(data_hex) else {
4270 self.inner.replica_apply_metrics.record(
4271 crate::replication::logical::ApplyErrorKind::Decode,
4272 );
4273 self.persist_replication_health(
4274 "apply_error",
4275 "failed to decode WAL record hex payload",
4276 current_lsn,
4277 oldest_available_lsn,
4278 );
4279 continue;
4280 };
4281 let Ok(change) = ChangeRecord::decode(&data) else {
4282 self.inner.replica_apply_metrics.record(
4283 crate::replication::logical::ApplyErrorKind::Decode,
4284 );
4285 self.persist_replication_health(
4286 "apply_error",
4287 "failed to decode logical WAL record",
4288 current_lsn,
4289 oldest_available_lsn,
4290 );
4291 continue;
4292 };
4293 match applier.apply(
4294 self.inner.db.as_ref(),
4295 &change,
4296 ApplyMode::Replica,
4297 ) {
4298 Ok(crate::replication::logical::ApplyOutcome::Applied) => {
4299 self.invalidate_result_cache_for_table(&change.collection);
4300 since_lsn = since_lsn.max(change.lsn);
4301 self.persist_replica_lsn(since_lsn);
4302 }
4303 Ok(_) => {
4304 }
4306 Err(err) => {
4307 self.inner.replica_apply_metrics.record(err.kind());
4308 match &err {
4317 crate::replication::logical::LogicalApplyError::Divergence { lsn, expected: _, got: _ } => {
4318 crate::telemetry::operator_event::OperatorEvent::Divergence {
4319 peer: "primary".to_string(),
4320 leader_lsn: *lsn,
4321 follower_lsn: since_lsn,
4322 }
4323 .emit_global();
4324 }
4325 crate::replication::logical::LogicalApplyError::Gap { last, next } => {
4326 crate::telemetry::operator_event::OperatorEvent::ReplicationBroken {
4327 peer: "primary".to_string(),
4328 reason: format!("stalled gap last={last} next={next}"),
4329 }
4330 .emit_global();
4331 }
4332 _ => {}
4333 }
4334 let kind = match &err {
4335 crate::replication::logical::LogicalApplyError::Gap { .. } => "stalled_gap",
4336 crate::replication::logical::LogicalApplyError::Divergence { .. } => "divergence",
4337 _ => "apply_error",
4338 };
4339 self.persist_replication_health(
4340 kind,
4341 &format!("replica apply rejected: {err}"),
4342 current_lsn,
4343 oldest_available_lsn,
4344 );
4345 break;
4356 }
4357 }
4358 }
4359 }
4360 self.persist_replication_health(
4361 "healthy",
4362 "",
4363 current_lsn,
4364 oldest_available_lsn,
4365 );
4366 } else {
4367 self.persist_replication_health(
4368 "apply_error",
4369 "failed to parse pull_wal_records response",
4370 None,
4371 None,
4372 );
4373 }
4374 } else {
4375 self.persist_replication_health(
4376 "connecting",
4377 "primary pull_wal_records request failed",
4378 None,
4379 None,
4380 );
4381 }
4382
4383 std::thread::sleep(std::time::Duration::from_millis(poll_ms));
4384 }
4385 });
4386 }
4387
4388 pub fn cdc_poll(
4390 &self,
4391 since_lsn: u64,
4392 max_count: usize,
4393 ) -> Vec<crate::replication::cdc::ChangeEvent> {
4394 self.inner.cdc.poll(since_lsn, max_count)
4395 }
4396
4397 pub fn cdc_current_lsn(&self) -> u64 {
4401 self.inner.cdc.current_lsn()
4402 }
4403
4404 pub fn kv_watch_events_since(
4405 &self,
4406 collection: &str,
4407 key: &str,
4408 since_lsn: u64,
4409 max_count: usize,
4410 ) -> Vec<crate::replication::cdc::KvWatchEvent> {
4411 self.inner
4412 .cdc
4413 .poll(since_lsn, max_count)
4414 .into_iter()
4415 .filter_map(|event| event.kv)
4416 .filter(|event| event.collection == collection && event.key == key)
4417 .collect()
4418 }
4419
4420 pub fn kv_watch_events_since_prefix(
4421 &self,
4422 collection: &str,
4423 prefix: &str,
4424 since_lsn: u64,
4425 max_count: usize,
4426 ) -> Vec<crate::replication::cdc::KvWatchEvent> {
4427 self.inner
4428 .cdc
4429 .poll(since_lsn, max_count)
4430 .into_iter()
4431 .filter_map(|event| event.kv)
4432 .filter(|event| event.collection == collection && event.key.starts_with(prefix))
4433 .collect()
4434 }
4435
4436 pub(crate) fn kv_watch_subscribe<'a>(
4437 &'a self,
4438 collection: impl Into<String>,
4439 key: impl Into<String>,
4440 from_lsn: Option<u64>,
4441 ) -> crate::runtime::kv_watch::KvWatchStream<'a> {
4442 crate::runtime::kv_watch::KvWatchStream::subscribe(
4443 &self.inner.cdc,
4444 &self.inner.kv_stats,
4445 collection,
4446 key,
4447 from_lsn,
4448 self.kv_watch_idle_timeout_ms(),
4449 )
4450 }
4451
4452 pub(crate) fn kv_watch_subscribe_prefix<'a>(
4453 &'a self,
4454 collection: impl Into<String>,
4455 prefix: impl Into<String>,
4456 from_lsn: Option<u64>,
4457 ) -> crate::runtime::kv_watch::KvWatchStream<'a> {
4458 crate::runtime::kv_watch::KvWatchStream::subscribe_prefix(
4459 &self.inner.cdc,
4460 &self.inner.kv_stats,
4461 collection,
4462 prefix,
4463 from_lsn,
4464 self.kv_watch_idle_timeout_ms(),
4465 )
4466 }
4467
4468 pub(crate) fn kv_watch_idle_timeout_ms(&self) -> u64 {
4469 self.config_u64("red.config.kv.watch.idle_timeout_ms", 60_000)
4470 }
4471
4472 pub fn backup_status(&self) -> crate::replication::scheduler::BackupStatus {
4474 self.inner.backup_scheduler.status()
4475 }
4476
4477 pub fn result_blob_cache(&self) -> &crate::storage::cache::BlobCache {
4487 &self.inner.result_blob_cache
4488 }
4489
4490 pub fn primary_replica_snapshots(&self) -> Vec<crate::replication::primary::ReplicaState> {
4494 self.inner
4495 .db
4496 .replication
4497 .as_ref()
4498 .map(|repl| repl.replica_snapshots())
4499 .unwrap_or_default()
4500 }
4501
4502 pub fn commit_policy(&self) -> crate::replication::CommitPolicy {
4507 crate::replication::CommitPolicy::from_env()
4508 }
4509
4510 pub fn replica_apply_error_counts(
4515 &self,
4516 ) -> [(crate::replication::logical::ApplyErrorKind, u64); 4] {
4517 self.inner.replica_apply_metrics.snapshot()
4518 }
4519
4520 pub fn quota_bucket(&self) -> &crate::runtime::quota_bucket::QuotaBucket {
4523 &self.inner.quota_bucket
4524 }
4525
4526 pub fn commit_waiter_snapshot(&self) -> Vec<(String, u64)> {
4530 self.inner
4531 .db
4532 .replication
4533 .as_ref()
4534 .map(|repl| repl.commit_waiter.snapshot())
4535 .unwrap_or_default()
4536 }
4537
4538 pub fn commit_waiter_metrics_snapshot(&self) -> (u64, u64, u64, u64) {
4541 self.inner
4542 .db
4543 .replication
4544 .as_ref()
4545 .map(|repl| repl.commit_waiter.metrics_snapshot())
4546 .unwrap_or((0, 0, 0, 0))
4547 }
4548
4549 pub fn await_replica_acks(
4559 &self,
4560 target_lsn: u64,
4561 count: u32,
4562 timeout: std::time::Duration,
4563 ) -> crate::replication::AwaitOutcome {
4564 match &self.inner.db.replication {
4565 Some(repl) => repl.commit_waiter.await_acks(target_lsn, count, timeout),
4566 None => {
4567 crate::replication::AwaitOutcome::NotRequired
4571 }
4572 }
4573 }
4574
4575 pub fn enforce_commit_policy(
4589 &self,
4590 post_lsn: u64,
4591 ) -> RedDBResult<crate::replication::AwaitOutcome> {
4592 let n = match self.commit_policy() {
4593 crate::replication::CommitPolicy::AckN(n) if n > 0 => n,
4594 _ => return Ok(crate::replication::AwaitOutcome::NotRequired),
4595 };
4596 let timeout_ms = std::env::var("RED_REPLICATION_ACK_TIMEOUT_MS")
4597 .ok()
4598 .and_then(|v| v.parse::<u64>().ok())
4599 .unwrap_or(5_000);
4600 let outcome =
4601 self.await_replica_acks(post_lsn, n, std::time::Duration::from_millis(timeout_ms));
4602 {
4603 use crate::runtime::control_events::{EventKind, Outcome, Sensitivity};
4604 let (event_outcome, fields) = match &outcome {
4605 crate::replication::AwaitOutcome::Reached(count) => (
4606 Outcome::Allowed,
4607 vec![
4608 (
4609 "post_lsn".to_string(),
4610 Sensitivity::raw(post_lsn.to_string()),
4611 ),
4612 ("required".to_string(), Sensitivity::raw(n.to_string())),
4613 ("observed".to_string(), Sensitivity::raw(count.to_string())),
4614 (
4615 "timeout_ms".to_string(),
4616 Sensitivity::raw(timeout_ms.to_string()),
4617 ),
4618 ],
4619 ),
4620 crate::replication::AwaitOutcome::TimedOut { observed, required } => (
4621 Outcome::Error,
4622 vec![
4623 (
4624 "post_lsn".to_string(),
4625 Sensitivity::raw(post_lsn.to_string()),
4626 ),
4627 (
4628 "required".to_string(),
4629 Sensitivity::raw(required.to_string()),
4630 ),
4631 (
4632 "observed".to_string(),
4633 Sensitivity::raw(observed.to_string()),
4634 ),
4635 (
4636 "timeout_ms".to_string(),
4637 Sensitivity::raw(timeout_ms.to_string()),
4638 ),
4639 ],
4640 ),
4641 crate::replication::AwaitOutcome::NotRequired => (Outcome::Allowed, Vec::new()),
4642 };
4643 if !fields.is_empty() {
4644 self.emit_control_event(
4645 EventKind::ReplicationSafety,
4646 event_outcome,
4647 "replication_commit_policy",
4648 Some(format!("replication:lsn:{post_lsn}")),
4649 None,
4650 fields,
4651 )?;
4652 }
4653 }
4654 if let crate::replication::AwaitOutcome::TimedOut { observed, required } = &outcome {
4655 tracing::warn!(
4656 target: "reddb::commit",
4657 post_lsn,
4658 observed = *observed,
4659 required = *required,
4660 timeout_ms,
4661 "ack_n: timed out waiting for replicas"
4662 );
4663 let fail = std::env::var("RED_COMMIT_FAIL_ON_TIMEOUT")
4664 .ok()
4665 .map(|v| {
4666 let t = v.trim();
4667 t.eq_ignore_ascii_case("true") || t == "1" || t.eq_ignore_ascii_case("yes")
4668 })
4669 .unwrap_or(false);
4670 if fail {
4671 return Err(RedDBError::ReadOnly(format!(
4672 "commit policy timed out at lsn {post_lsn}: observed={observed} required={required} (RED_COMMIT_FAIL_ON_TIMEOUT=true)"
4673 )));
4674 }
4675 }
4676 Ok(outcome)
4677 }
4678
4679 pub fn encryption_at_rest_status(&self) -> (&'static str, Option<String>) {
4687 match crate::crypto::page_encryption::key_from_env() {
4688 Ok(Some(_)) => ("enabled", None),
4689 Ok(None) => ("disabled", None),
4690 Err(err) => ("error", Some(err)),
4691 }
4692 }
4693
4694 pub fn replica_apply_health(&self) -> Option<String> {
4700 let state = self.config_string("red.replication.state", "");
4701 if state.is_empty() {
4702 None
4703 } else {
4704 Some(state)
4705 }
4706 }
4707
4708 pub fn wal_archive_progress(&self) -> (u64, u64) {
4713 let current_lsn = self
4714 .inner
4715 .db
4716 .replication
4717 .as_ref()
4718 .map(|repl| {
4719 repl.logical_wal_spool
4720 .as_ref()
4721 .map(|spool| spool.current_lsn())
4722 .unwrap_or_else(|| repl.wal_buffer.current_lsn())
4723 })
4724 .unwrap_or_else(|| self.inner.cdc.current_lsn());
4725 let last_archived_lsn = self.config_u64("red.config.timeline.last_archived_lsn", 0);
4726 (current_lsn, last_archived_lsn)
4727 }
4728
4729 pub fn trigger_backup(&self) -> RedDBResult<crate::replication::scheduler::BackupResult> {
4731 let result = (|| {
4732 self.check_write(crate::runtime::write_gate::WriteKind::Backup)?;
4733 self.assert_remote_write_allowed("admin/backup")?;
4738 let started = std::time::Instant::now();
4739 let snapshot = self.create_snapshot()?;
4740 let mut uploaded = false;
4741
4742 if let (Some(backend), Some(path)) =
4743 (&self.inner.db.remote_backend, self.inner.db.path())
4744 {
4745 let default_snapshot_prefix = self.inner.db.options().default_snapshot_prefix();
4746 let default_wal_prefix = self.inner.db.options().default_wal_archive_prefix();
4747 let default_head_key = self.inner.db.options().default_backup_head_key();
4748 let snapshot_prefix = self.config_string(
4749 "red.config.backup.snapshot_prefix",
4750 &default_snapshot_prefix,
4751 );
4752 let wal_prefix =
4753 self.config_string("red.config.wal.archive.prefix", &default_wal_prefix);
4754 let head_key = self.config_string("red.config.backup.head_key", &default_head_key);
4755 let timeline_id = self.config_string("red.config.timeline.id", "main");
4756 let snapshot_key = crate::storage::wal::archive_snapshot(
4757 backend.as_ref(),
4758 path,
4759 snapshot.snapshot_id,
4760 &snapshot_prefix,
4761 )
4762 .map_err(|err| RedDBError::Internal(err.to_string()))?;
4763 let current_lsn = self
4764 .inner
4765 .db
4766 .replication
4767 .as_ref()
4768 .map(|repl| {
4769 repl.logical_wal_spool
4770 .as_ref()
4771 .map(|spool| spool.current_lsn())
4772 .unwrap_or_else(|| repl.wal_buffer.current_lsn())
4773 })
4774 .unwrap_or_else(|| self.inner.cdc.current_lsn());
4775 let last_archived_lsn = self.config_u64("red.config.timeline.last_archived_lsn", 0);
4776 let snapshot_sha256 =
4782 crate::storage::wal::SnapshotManifest::compute_snapshot_sha256(path)
4783 .map_err(|err| {
4784 tracing::warn!(
4785 target: "reddb::backup",
4786 error = %err,
4787 snapshot_id = snapshot.snapshot_id,
4788 "snapshot hash failed; manifest will lack checksum"
4789 );
4790 })
4791 .ok();
4792 let manifest = crate::storage::wal::SnapshotManifest {
4793 timeline_id: timeline_id.clone(),
4794 snapshot_key: snapshot_key.clone(),
4795 snapshot_id: snapshot.snapshot_id,
4796 snapshot_time: snapshot.created_at_unix_ms as u64,
4797 base_lsn: current_lsn,
4798 schema_version: crate::api::REDDB_FORMAT_VERSION,
4799 format_version: crate::api::REDDB_FORMAT_VERSION,
4800 snapshot_sha256,
4801 };
4802 crate::storage::wal::publish_snapshot_manifest(backend.as_ref(), &manifest)
4803 .map_err(|err| RedDBError::Internal(err.to_string()))?;
4804
4805 let prev_segment_hash =
4812 self.config_string("red.config.timeline.last_segment_hash", "");
4813 let prev_hash_arg = if prev_segment_hash.is_empty() {
4814 None
4815 } else {
4816 Some(prev_segment_hash)
4817 };
4818
4819 let archived_lsn = if let Some(primary) = &self.inner.db.replication {
4820 let oldest = primary
4821 .logical_wal_spool
4822 .as_ref()
4823 .and_then(|spool| spool.oldest_lsn().ok().flatten())
4824 .or_else(|| primary.wal_buffer.oldest_lsn())
4825 .unwrap_or(last_archived_lsn);
4826 if last_archived_lsn > 0 && last_archived_lsn < oldest.saturating_sub(1) {
4827 return Err(RedDBError::Internal(format!(
4828 "logical WAL gap detected: last_archived_lsn={last_archived_lsn}, oldest_available_lsn={oldest}"
4829 )));
4830 }
4831 let records = if let Some(spool) = &primary.logical_wal_spool {
4832 spool
4833 .read_since(last_archived_lsn, usize::MAX)
4834 .map_err(|err| RedDBError::Internal(err.to_string()))?
4835 } else {
4836 primary.wal_buffer.read_since(last_archived_lsn, usize::MAX)
4837 };
4838 if let Some(meta) = crate::storage::wal::archive_change_records(
4839 backend.as_ref(),
4840 &wal_prefix,
4841 &records,
4842 prev_hash_arg,
4843 )
4844 .map_err(|err| RedDBError::Internal(err.to_string()))?
4845 {
4846 if let Some(spool) = &primary.logical_wal_spool {
4847 let _ = spool.prune_through(meta.lsn_end);
4848 }
4849 if let Some(sha) = &meta.sha256 {
4855 self.inner.db.store().set_config_tree(
4856 "red.config.timeline",
4857 &crate::json!({ "last_segment_hash": sha }),
4858 );
4859 }
4860 meta.lsn_end
4861 } else {
4862 last_archived_lsn
4863 }
4864 } else {
4865 last_archived_lsn
4866 };
4867
4868 let head = crate::storage::wal::BackupHead {
4869 timeline_id,
4870 snapshot_key,
4871 snapshot_id: snapshot.snapshot_id,
4872 snapshot_time: snapshot.created_at_unix_ms as u64,
4873 current_lsn,
4874 last_archived_lsn: archived_lsn,
4875 wal_prefix,
4876 };
4877 crate::storage::wal::publish_backup_head(backend.as_ref(), &head_key, &head)
4878 .map_err(|err| RedDBError::Internal(err.to_string()))?;
4879 self.inner.db.store().set_config_tree(
4880 "red.config.timeline",
4881 &crate::json!({
4882 "last_archived_lsn": archived_lsn,
4883 "id": head.timeline_id
4884 }),
4885 );
4886
4887 if let Err(err) = crate::storage::wal::publish_unified_manifest_for_prefix(
4895 backend.as_ref(),
4896 &snapshot_prefix,
4897 ) {
4898 tracing::warn!(
4899 target: "reddb::backup",
4900 error = %err,
4901 snapshot_prefix = %snapshot_prefix,
4902 "unified MANIFEST.json refresh failed; per-artifact sidecars unaffected"
4903 );
4904 }
4905
4906 match self.commit_policy() {
4918 crate::replication::CommitPolicy::AckN(n) if n > 0 => {
4919 let timeout = std::env::var("RED_REPLICATION_ACK_TIMEOUT_MS")
4920 .ok()
4921 .and_then(|v| v.parse::<u64>().ok())
4922 .unwrap_or(5_000);
4923 let outcome = self.await_replica_acks(
4924 archived_lsn,
4925 n,
4926 std::time::Duration::from_millis(timeout),
4927 );
4928 match outcome {
4929 crate::replication::AwaitOutcome::Reached(count) => {
4930 tracing::debug!(
4931 target: "reddb::backup",
4932 archived_lsn,
4933 n,
4934 count,
4935 "ack_n: replicas synced before backup return"
4936 );
4937 }
4938 crate::replication::AwaitOutcome::TimedOut { observed, required } => {
4939 tracing::warn!(
4940 target: "reddb::backup",
4941 archived_lsn,
4942 observed,
4943 required,
4944 timeout_ms = timeout,
4945 "ack_n: timed out waiting for replicas; backup uploaded but DR posture degraded"
4946 );
4947 }
4948 crate::replication::AwaitOutcome::NotRequired => {}
4949 }
4950 }
4951 _ => {} }
4953
4954 if self.config_bool("red.config.backup.include_blob_cache", false) {
4966 let blob_cache_prefix = self.config_string(
4967 "red.config.backup.blob_cache_prefix",
4968 &format!("{snapshot_prefix}blob_cache/"),
4969 );
4970 if let Some(l2_path) = self.inner.result_blob_cache.l2_path() {
4971 match crate::storage::cache::archive_blob_cache_l2(
4972 backend.as_ref(),
4973 l2_path,
4974 &blob_cache_prefix,
4975 ) {
4976 Ok(count) => {
4977 tracing::info!(
4978 target: "reddb::backup",
4979 files_uploaded = count,
4980 blob_cache_prefix = %blob_cache_prefix,
4981 "include_blob_cache: archived L2 directory"
4982 );
4983 }
4984 Err(err) => {
4985 tracing::warn!(
4986 target: "reddb::backup",
4987 error = %err,
4988 blob_cache_prefix = %blob_cache_prefix,
4989 "include_blob_cache: L2 archive failed; backup proceeding (cache is derived state)"
4990 );
4991 }
4992 }
4993 } else {
4994 tracing::debug!(
4995 target: "reddb::backup",
4996 "include_blob_cache=true but no L2 path configured; nothing to archive"
4997 );
4998 }
4999 }
5000
5001 uploaded = true;
5002 }
5003
5004 Ok(crate::replication::scheduler::BackupResult {
5005 snapshot_id: snapshot.snapshot_id,
5006 uploaded,
5007 duration_ms: started.elapsed().as_millis() as u64,
5008 timestamp: snapshot.created_at_unix_ms as u64,
5009 })
5010 })();
5011
5012 use crate::runtime::control_events::{EventKind, Outcome, Sensitivity};
5013 let (current_lsn, last_archived_lsn) = self.wal_archive_progress();
5014 let mut fields = vec![
5015 (
5016 "current_lsn".to_string(),
5017 Sensitivity::raw(current_lsn.to_string()),
5018 ),
5019 (
5020 "last_archived_lsn".to_string(),
5021 Sensitivity::raw(last_archived_lsn.to_string()),
5022 ),
5023 ];
5024 if let Ok(backup) = &result {
5025 fields.push((
5026 "snapshot_id".to_string(),
5027 Sensitivity::raw(backup.snapshot_id.to_string()),
5028 ));
5029 fields.push((
5030 "uploaded".to_string(),
5031 Sensitivity::raw(backup.uploaded.to_string()),
5032 ));
5033 fields.push((
5034 "duration_ms".to_string(),
5035 Sensitivity::raw(backup.duration_ms.to_string()),
5036 ));
5037 fields.push((
5038 "snapshot_time".to_string(),
5039 Sensitivity::raw(backup.timestamp.to_string()),
5040 ));
5041 }
5042 let outcome = match &result {
5043 Ok(_) => Outcome::Allowed,
5044 Err(err) => control_event_outcome_for_error(err),
5045 };
5046 let reason = result.as_ref().err().map(|err| err.to_string());
5047 self.emit_control_event(
5048 EventKind::BackupRun,
5049 outcome,
5050 "backup_trigger",
5051 Some("backup:trigger".to_string()),
5052 reason,
5053 fields,
5054 )?;
5055 result
5056 }
5057
5058 pub fn acquire(&self) -> RedDBResult<RuntimeConnection> {
5059 let mut pool = self
5060 .inner
5061 .pool
5062 .lock()
5063 .map_err(|e| RedDBError::Internal(format!("connection pool lock poisoned: {e}")))?;
5064 if pool.active >= self.inner.pool_config.max_connections {
5065 return Err(RedDBError::Internal(
5066 "connection pool exhausted".to_string(),
5067 ));
5068 }
5069
5070 let id = if let Some(id) = pool.idle.pop() {
5071 id
5072 } else {
5073 let id = pool.next_id;
5074 pool.next_id += 1;
5075 id
5076 };
5077 pool.active += 1;
5078 pool.total_checkouts += 1;
5079 drop(pool);
5080
5081 Ok(RuntimeConnection {
5082 id,
5083 inner: Arc::clone(&self.inner),
5084 })
5085 }
5086
5087 pub fn checkpoint(&self) -> RedDBResult<()> {
5088 self.inner.db.flush_local_only().map_err(|err| {
5093 let msg = err.to_string();
5098 crate::telemetry::operator_event::OperatorEvent::CheckpointFailed {
5099 lsn: 0,
5100 error: msg.clone(),
5101 }
5102 .emit_global();
5103 crate::telemetry::operator_event::OperatorEvent::WalFsyncFailed {
5104 path: "<flush_local_only>".to_string(),
5105 error: msg.clone(),
5106 }
5107 .emit_global();
5108 RedDBError::Engine(msg)
5109 })?;
5110 if let Err(err) = self.assert_remote_write_allowed("checkpoint") {
5111 tracing::warn!(
5112 target: "reddb::serverless::lease",
5113 error = %err,
5114 "checkpoint: skipping remote upload — lease not held"
5115 );
5116 return Ok(());
5117 }
5118 self.inner
5119 .db
5120 .upload_to_remote_backend()
5121 .map_err(|err| RedDBError::Engine(err.to_string()))
5122 }
5123
5124 pub(crate) fn assert_remote_write_allowed(&self, action: &str) -> RedDBResult<()> {
5131 if self.inner.db.remote_backend.is_none() {
5132 return Ok(());
5133 }
5134 match self.inner.write_gate.lease_state() {
5135 crate::runtime::write_gate::LeaseGateState::NotHeld => {
5136 self.inner.audit_log.record(
5137 action,
5138 "system",
5139 "remote_backend",
5140 "err: writer lease not held",
5141 crate::json::Value::Null,
5142 );
5143 Err(RedDBError::ReadOnly(format!(
5144 "writer lease not held — {action} blocked (serverless fence)"
5145 )))
5146 }
5147 _ => Ok(()),
5148 }
5149 }
5150
5151 pub fn run_maintenance(&self) -> RedDBResult<()> {
5152 self.inner
5153 .db
5154 .run_maintenance()
5155 .map_err(|err| RedDBError::Internal(err.to_string()))
5156 }
5157
5158 pub fn scan_collection(
5159 &self,
5160 collection: &str,
5161 cursor: Option<ScanCursor>,
5162 limit: usize,
5163 ) -> RedDBResult<ScanPage> {
5164 let store = self.inner.db.store();
5165 let manager = store
5166 .get_collection(collection)
5167 .ok_or_else(|| RedDBError::NotFound(collection.to_string()))?;
5168
5169 let mut entities = manager.query_all(|_| true);
5170 entities.sort_by_key(|entity| entity.id.raw());
5171
5172 let offset = cursor.map(|cursor| cursor.offset).unwrap_or(0);
5173 let total = entities.len();
5174 let end = total.min(offset.saturating_add(limit.max(1)));
5175 let items = if offset >= total {
5176 Vec::new()
5177 } else {
5178 entities[offset..end].to_vec()
5179 };
5180 let next = (end < total).then_some(ScanCursor { offset: end });
5181
5182 Ok(ScanPage {
5183 collection: collection.to_string(),
5184 items,
5185 next,
5186 total,
5187 })
5188 }
5189
5190 pub fn catalog(&self) -> CatalogModelSnapshot {
5191 self.inner.db.catalog_model_snapshot()
5192 }
5193
5194 pub fn catalog_consistency_report(&self) -> crate::catalog::CatalogConsistencyReport {
5195 self.inner.db.catalog_consistency_report()
5196 }
5197
5198 pub fn catalog_attention_summary(&self) -> CatalogAttentionSummary {
5199 crate::catalog::attention_summary(&self.catalog())
5200 }
5201
5202 pub fn collection_attention(&self) -> Vec<CollectionDescriptor> {
5203 crate::catalog::collection_attention(&self.catalog())
5204 }
5205
5206 pub fn index_attention(&self) -> Vec<CatalogIndexStatus> {
5207 crate::catalog::index_attention(&self.catalog())
5208 }
5209
5210 pub fn graph_projection_attention(&self) -> Vec<CatalogGraphProjectionStatus> {
5211 crate::catalog::graph_projection_attention(&self.catalog())
5212 }
5213
5214 pub fn analytics_job_attention(&self) -> Vec<CatalogAnalyticsJobStatus> {
5215 crate::catalog::analytics_job_attention(&self.catalog())
5216 }
5217
5218 pub fn stats(&self) -> RuntimeStats {
5219 let pool = runtime_pool_lock(self);
5220 RuntimeStats {
5221 active_connections: pool.active,
5222 idle_connections: pool.idle.len(),
5223 total_checkouts: pool.total_checkouts,
5224 paged_mode: self.inner.db.is_paged(),
5225 started_at_unix_ms: self.inner.started_at_unix_ms,
5226 store: self.inner.db.stats(),
5227 system: SystemInfo::collect(),
5228 result_blob_cache: self.inner.result_blob_cache.stats(),
5229 kv: self.inner.kv_stats.snapshot(),
5230 metrics_ingest: self.inner.metrics_ingest_stats.snapshot(),
5231 }
5232 }
5233
5234 pub(crate) fn record_metrics_ingest(
5235 &self,
5236 accepted_samples: u64,
5237 accepted_series: u64,
5238 rejected_samples: u64,
5239 rejected_series: u64,
5240 ) {
5241 self.inner.metrics_ingest_stats.record(
5242 accepted_samples,
5243 accepted_series,
5244 rejected_samples,
5245 rejected_series,
5246 );
5247 }
5248
5249 pub(crate) fn record_metrics_cardinality_budget_rejections(&self, rejected_series: u64) {
5250 self.inner
5251 .metrics_ingest_stats
5252 .record_cardinality_budget_rejections(rejected_series);
5253 }
5254
5255 pub(crate) fn record_metrics_tenant_activity(
5256 &self,
5257 tenant: &str,
5258 namespace: &str,
5259 operation: &str,
5260 ) {
5261 self.inner
5262 .metrics_tenant_activity_stats
5263 .record(tenant, namespace, operation);
5264 }
5265
5266 pub(crate) fn metrics_tenant_activity_snapshot(
5267 &self,
5268 ) -> Vec<crate::runtime::MetricsTenantActivityStats> {
5269 self.inner.metrics_tenant_activity_stats.snapshot()
5270 }
5271
5272 pub fn execute_query_with_scope(
5286 &self,
5287 query: &str,
5288 scope: crate::runtime::within_clause::ScopeOverride,
5289 ) -> RedDBResult<RuntimeQueryResult> {
5290 if scope.is_empty() {
5291 return self.execute_query(query);
5292 }
5293 let _scope_guard = ScopeOverrideGuard::install(scope);
5294 self.execute_query(query)
5295 }
5296
5297 pub fn execute_query(&self, query: &str) -> RedDBResult<RuntimeQueryResult> {
5306 let started = std::time::Instant::now();
5307 let result = self.execute_query_inner(query);
5308 let elapsed_ms = started.elapsed().as_millis() as u64;
5309
5310 let scope = self.ai_scope();
5315 let kind = match result
5316 .as_ref()
5317 .map(|r| r.statement_type)
5318 .unwrap_or("select")
5319 {
5320 "select" => crate::telemetry::slow_query_logger::QueryKind::Select,
5321 "insert" => crate::telemetry::slow_query_logger::QueryKind::Insert,
5322 "update" => crate::telemetry::slow_query_logger::QueryKind::Update,
5323 "delete" => crate::telemetry::slow_query_logger::QueryKind::Delete,
5324 _ => crate::telemetry::slow_query_logger::QueryKind::Internal,
5325 };
5326 self.inner
5332 .slow_query_logger
5333 .record(kind, elapsed_ms, query.to_string(), &scope);
5334
5335 result
5336 }
5337
5338 #[inline(never)]
5339 fn execute_query_inner(&self, query: &str) -> RedDBResult<RuntimeQueryResult> {
5340 if !has_scope_override_active()
5351 && !query.trim_start().starts_with("WITHIN")
5352 && !query.trim_start().starts_with("within")
5353 && !self.inner.query_audit.has_rules()
5354 && !self
5355 .inner
5356 .tx_contexts
5357 .read()
5358 .contains_key(¤t_connection_id())
5359 {
5360 if let Some(result) = self.try_fast_entity_lookup(query) {
5361 return result;
5362 }
5363 }
5364
5365 match crate::runtime::within_clause::try_strip_within_prefix(query) {
5372 Ok(Some((scope, inner))) => {
5373 let _scope_guard = ScopeOverrideGuard::install(scope);
5374 return self.execute_query_inner(inner);
5379 }
5380 Ok(None) => {}
5381 Err(msg) => return Err(RedDBError::Query(msg)),
5382 }
5383
5384 if let Some(inner) = strip_explain_prefix(query) {
5391 return self.explain_as_rows(query, inner);
5392 }
5393
5394 if let Some(value) = parse_set_local_tenant(query)? {
5399 let conn_id = current_connection_id();
5400 if !self.inner.tx_contexts.read().contains_key(&conn_id) {
5401 return Err(RedDBError::Query(
5402 "SET LOCAL TENANT requires an active transaction".to_string(),
5403 ));
5404 }
5405 self.inner
5406 .tx_local_tenants
5407 .write()
5408 .insert(conn_id, value.clone());
5409 return Ok(RuntimeQueryResult::ok_message(
5410 query.to_string(),
5411 &match &value {
5412 Some(id) => format!("local tenant set: {id}"),
5413 None => "local tenant cleared".to_string(),
5414 },
5415 "set_local_tenant",
5416 ));
5417 }
5418
5419 if super::red_schema::is_system_schema_write(query) {
5420 return Err(RedDBError::Query(
5421 super::red_schema::READ_ONLY_ERROR.to_string(),
5422 ));
5423 }
5424
5425 let rewritten_query = super::red_schema::rewrite_virtual_names(query);
5426 let execution_query = rewritten_query.as_deref().unwrap_or(query);
5427
5428 let frame = super::statement_frame::StatementExecutionFrame::build(self, execution_query)?;
5429 let _frame_guards = frame.install(self);
5430
5431 let _log_span = crate::telemetry::span::query_span(query).entered();
5438
5439 if let Some(rewritten) = frame.prepare_cte(execution_query)? {
5441 return self.execute_query_expr(rewritten);
5442 }
5443
5444 if !self.inner.query_audit.has_rules() {
5446 if let Some(result) = self.try_fast_entity_lookup(execution_query) {
5447 return result;
5448 }
5449 }
5450
5451 if !self.inner.query_audit.has_rules() {
5453 if let Some(result) = frame.read_result_cache(self) {
5454 return Ok(result);
5455 }
5456 }
5457
5458 let prepared = frame.prepare_statement(self, execution_query)?;
5459 let mode = prepared.mode;
5460 let expr = prepared.expr;
5461
5462 let statement = query_expr_name(&expr);
5463 let result_cache_scopes = query_expr_result_cache_scopes(&expr);
5464 let control_event_specs = query_control_event_specs(&expr);
5465 let query_audit_plan = query_audit_plan(&expr);
5466
5467 let _lock_guard = match frame.prepare_dispatch(self, &expr) {
5468 Ok(guard) => guard,
5469 Err(err) => {
5470 let outcome = control_event_outcome_for_error(&err);
5471 for spec in &control_event_specs {
5472 self.emit_control_event(
5473 spec.kind,
5474 outcome,
5475 spec.action,
5476 spec.resource.clone(),
5477 Some(err.to_string()),
5478 spec.fields.clone(),
5479 )?;
5480 }
5481 return Err(err);
5482 }
5483 };
5484 let frame_iface: &dyn super::statement_frame::ReadFrame = &frame;
5485 let query_audit_started = std::time::Instant::now();
5486
5487 let query_result = match expr {
5488 QueryExpr::Graph(_) | QueryExpr::Path(_) => {
5489 let (graph, node_properties, edge_properties) =
5497 self.materialize_graph_with_rls()?;
5498 let result =
5499 crate::storage::query::unified::UnifiedExecutor::execute_on_with_graph_properties(
5500 &graph,
5501 &expr,
5502 node_properties,
5503 edge_properties,
5504 )
5505 .map_err(|err| RedDBError::Query(err.to_string()))?;
5506
5507 Ok(RuntimeQueryResult {
5508 query: query.to_string(),
5509 mode,
5510 statement,
5511 engine: "materialized-graph",
5512 result,
5513 affected_rows: 0,
5514 statement_type: "select",
5515 })
5516 }
5517 QueryExpr::Table(table) => {
5518 let table = self.resolve_table_expr_subqueries(
5519 table,
5520 &frame as &dyn super::statement_frame::ReadFrame,
5521 )?;
5522 if super::red_schema::is_virtual_table(&table.table) {
5523 return Ok(RuntimeQueryResult {
5524 query: query.to_string(),
5525 mode,
5526 statement,
5527 engine: "runtime-red-schema",
5528 result: super::red_schema::red_query(
5529 self,
5530 &table.table,
5531 &table,
5532 &frame as &dyn super::statement_frame::ReadFrame,
5533 )?,
5534 affected_rows: 0,
5535 statement_type: "select",
5536 });
5537 }
5538
5539 if let Some(result) = self.execute_probabilistic_select(&table)? {
5540 return Ok(RuntimeQueryResult {
5541 query: query.to_string(),
5542 mode,
5543 statement,
5544 engine: "runtime-probabilistic",
5545 result,
5546 affected_rows: 0,
5547 statement_type: "select",
5548 });
5549 }
5550
5551 if self.inner.foreign_tables.is_foreign_table(&table.table) {
5559 let records = self
5560 .inner
5561 .foreign_tables
5562 .scan(&table.table)
5563 .map_err(|e| RedDBError::Internal(e.to_string()))?;
5564 let result = apply_foreign_table_filters(records, &table);
5565 return Ok(RuntimeQueryResult {
5566 query: query.to_string(),
5567 mode,
5568 statement,
5569 engine: "runtime-fdw",
5570 result,
5571 affected_rows: 0,
5572 statement_type: "select",
5573 });
5574 }
5575
5576 let Some(table_with_rls) = self.authorize_relational_table_select(
5593 table,
5594 &frame as &dyn super::statement_frame::ReadFrame,
5595 )?
5596 else {
5597 let empty = crate::storage::query::unified::UnifiedResult::empty();
5598 return Ok(RuntimeQueryResult {
5599 query: query.to_string(),
5600 mode,
5601 statement,
5602 engine: "runtime-table-rls",
5603 result: empty,
5604 affected_rows: 0,
5605 statement_type: "select",
5606 });
5607 };
5608 Ok(RuntimeQueryResult {
5609 query: query.to_string(),
5610 mode,
5611 statement,
5612 engine: "runtime-table",
5613 result: execute_runtime_table_query(
5614 &self.inner.db,
5615 &table_with_rls,
5616 Some(&self.inner.index_store),
5617 )?,
5618 affected_rows: 0,
5619 statement_type: "select",
5620 })
5621 }
5622 QueryExpr::Join(join) => {
5623 let join_with_rls = match self.authorize_relational_join_select(
5632 join,
5633 &frame as &dyn super::statement_frame::ReadFrame,
5634 )? {
5635 Some(j) => j,
5636 None => {
5637 return Ok(RuntimeQueryResult {
5638 query: query.to_string(),
5639 mode,
5640 statement,
5641 engine: "runtime-join-rls",
5642 result: crate::storage::query::unified::UnifiedResult::empty(),
5643 affected_rows: 0,
5644 statement_type: "select",
5645 });
5646 }
5647 };
5648 Ok(RuntimeQueryResult {
5649 query: query.to_string(),
5650 mode,
5651 statement,
5652 engine: "runtime-join",
5653 result: execute_runtime_join_query(&self.inner.db, &join_with_rls)?,
5654 affected_rows: 0,
5655 statement_type: "select",
5656 })
5657 }
5658 QueryExpr::Vector(vector) => Ok(RuntimeQueryResult {
5659 query: query.to_string(),
5660 mode,
5661 statement,
5662 engine: "runtime-vector",
5663 result: execute_runtime_vector_query(&self.inner.db, &vector)?,
5664 affected_rows: 0,
5665 statement_type: "select",
5666 }),
5667 QueryExpr::Hybrid(hybrid) => Ok(RuntimeQueryResult {
5668 query: query.to_string(),
5669 mode,
5670 statement,
5671 engine: "runtime-hybrid",
5672 result: execute_runtime_hybrid_query(&self.inner.db, &hybrid)?,
5673 affected_rows: 0,
5674 statement_type: "select",
5675 }),
5676 QueryExpr::Insert(ref insert) if super::red_schema::is_virtual_table(&insert.table) => {
5678 Err(RedDBError::Query(
5679 super::red_schema::READ_ONLY_ERROR.to_string(),
5680 ))
5681 }
5682 QueryExpr::Update(ref update) if super::red_schema::is_virtual_table(&update.table) => {
5683 Err(RedDBError::Query(
5684 super::red_schema::READ_ONLY_ERROR.to_string(),
5685 ))
5686 }
5687 QueryExpr::Delete(ref delete) if super::red_schema::is_virtual_table(&delete.table) => {
5688 Err(RedDBError::Query(
5689 super::red_schema::READ_ONLY_ERROR.to_string(),
5690 ))
5691 }
5692 QueryExpr::Insert(ref insert) => self
5693 .with_deferred_store_wal_for_dml(self.insert_may_emit_events(insert), || {
5694 self.execute_insert(query, insert)
5695 }),
5696 QueryExpr::Update(ref update) => self
5697 .with_deferred_store_wal_for_dml(self.update_may_emit_events(update), || {
5698 self.execute_update(query, update)
5699 }),
5700 QueryExpr::Delete(ref delete) => self
5701 .with_deferred_store_wal_for_dml(self.delete_may_emit_events(delete), || {
5702 self.execute_delete(query, delete)
5703 }),
5704 QueryExpr::CreateTable(ref create) => self.execute_create_table(query, create),
5706 QueryExpr::CreateCollection(ref create) => {
5707 self.execute_create_collection(query, create)
5708 }
5709 QueryExpr::CreateVector(ref create) => self.execute_create_vector(query, create),
5710 QueryExpr::DropTable(ref drop_tbl) => self.execute_drop_table(query, drop_tbl),
5711 QueryExpr::DropGraph(ref drop_graph) => self.execute_drop_graph(query, drop_graph),
5712 QueryExpr::DropVector(ref drop_vector) => self.execute_drop_vector(query, drop_vector),
5713 QueryExpr::DropDocument(ref drop_document) => {
5714 self.execute_drop_document(query, drop_document)
5715 }
5716 QueryExpr::DropKv(ref drop_kv) => self.execute_drop_kv(query, drop_kv),
5717 QueryExpr::DropCollection(ref drop_collection) => {
5718 self.execute_drop_collection(query, drop_collection)
5719 }
5720 QueryExpr::Truncate(ref truncate) => self.execute_truncate(query, truncate),
5721 QueryExpr::AlterTable(ref alter) => self.execute_alter_table(query, alter),
5722 QueryExpr::ExplainAlter(ref explain) => self.execute_explain_alter(query, explain),
5723 QueryExpr::GraphCommand(ref cmd) => self.execute_graph_command(query, cmd),
5725 QueryExpr::SearchCommand(ref cmd) => self.execute_search_command(query, cmd),
5727 QueryExpr::Ask(ref ask) => self.execute_ask(query, ask),
5729 QueryExpr::CreateIndex(ref create_idx) => self.execute_create_index(query, create_idx),
5730 QueryExpr::DropIndex(ref drop_idx) => self.execute_drop_index(query, drop_idx),
5731 QueryExpr::ProbabilisticCommand(ref cmd) => {
5732 self.execute_probabilistic_command(query, cmd)
5733 }
5734 QueryExpr::CreateTimeSeries(ref ts) => self.execute_create_timeseries(query, ts),
5736 QueryExpr::DropTimeSeries(ref ts) => self.execute_drop_timeseries(query, ts),
5737 QueryExpr::CreateQueue(ref q) => self.execute_create_queue(query, q),
5739 QueryExpr::AlterQueue(ref q) => self.execute_alter_queue(query, q),
5740 QueryExpr::DropQueue(ref q) => self.execute_drop_queue(query, q),
5741 QueryExpr::QueueSelect(ref q) => self.execute_queue_select(query, q),
5742 QueryExpr::QueueCommand(ref cmd) => self.execute_queue_command(query, cmd),
5743 QueryExpr::EventsBackfill(ref backfill) => {
5744 self.execute_events_backfill(query, backfill)
5745 }
5746 QueryExpr::EventsBackfillStatus { ref collection } => Err(RedDBError::Query(format!(
5747 "EVENTS BACKFILL STATUS for '{collection}' is not implemented in this slice"
5748 ))),
5749 QueryExpr::KvCommand(ref cmd) => self.execute_kv_command(query, cmd),
5750 QueryExpr::ConfigCommand(ref cmd) => self.execute_config_command(query, cmd),
5751 QueryExpr::CreateTree(ref tree) => self.execute_create_tree(query, tree),
5752 QueryExpr::DropTree(ref tree) => self.execute_drop_tree(query, tree),
5753 QueryExpr::TreeCommand(ref cmd) => self.execute_tree_command(query, cmd),
5754 QueryExpr::SetConfig { ref key, ref value } => {
5756 if key.starts_with("red.secret.") {
5757 return Err(RedDBError::Query(
5758 "red.secret.* is reserved for vault secrets; use SET SECRET".to_string(),
5759 ));
5760 }
5761 match self.check_managed_config_write_for_set_config(key) {
5762 Err(err) => Err(err),
5763 Ok(()) => {
5764 let store = self.inner.db.store();
5765 let json_val = match value {
5766 Value::Text(s) => crate::serde_json::Value::String(s.to_string()),
5767 Value::Integer(n) => crate::serde_json::Value::Number(*n as f64),
5768 Value::Float(n) => crate::serde_json::Value::Number(*n),
5769 Value::Boolean(b) => crate::serde_json::Value::Bool(*b),
5770 _ => crate::serde_json::Value::String(value.to_string()),
5771 };
5772 store.set_config_tree(key, &json_val);
5773 update_current_config_value(key, value.clone());
5774 self.invalidate_result_cache();
5779 Ok(RuntimeQueryResult::ok_message(
5780 query.to_string(),
5781 &format!("config set: {key}"),
5782 "set",
5783 ))
5784 }
5785 }
5786 }
5787 QueryExpr::SetSecret { ref key, ref value } => {
5789 if key.starts_with("red.config.") {
5790 return Err(RedDBError::Query(
5791 "red.config.* is reserved for config; use SET CONFIG".to_string(),
5792 ));
5793 }
5794 let auth_store = self.inner.auth_store.read().clone().ok_or_else(|| {
5795 RedDBError::Query("SET SECRET requires an enabled, unsealed vault".to_string())
5796 })?;
5797 if matches!(value, Value::Null) {
5798 auth_store
5799 .vault_kv_try_delete(key)
5800 .map_err(|err| RedDBError::Query(err.to_string()))?;
5801 update_current_secret_value(key, None);
5802 self.invalidate_result_cache();
5803 return Ok(RuntimeQueryResult::ok_message(
5804 query.to_string(),
5805 &format!("secret deleted: {key}"),
5806 "delete_secret",
5807 ));
5808 }
5809 let value = secret_sql_value_to_string(value)?;
5810 auth_store
5811 .vault_kv_try_set(key.clone(), value.clone())
5812 .map_err(|err| RedDBError::Query(err.to_string()))?;
5813 update_current_secret_value(key, Some(value));
5814 self.invalidate_result_cache();
5815 Ok(RuntimeQueryResult::ok_message(
5816 query.to_string(),
5817 &format!("secret set: {key}"),
5818 "set_secret",
5819 ))
5820 }
5821 QueryExpr::DeleteSecret { ref key } => {
5823 let auth_store = self.inner.auth_store.read().clone().ok_or_else(|| {
5824 RedDBError::Query(
5825 "DELETE SECRET requires an enabled, unsealed vault".to_string(),
5826 )
5827 })?;
5828 let deleted = auth_store
5829 .vault_kv_try_delete(key)
5830 .map_err(|err| RedDBError::Query(err.to_string()))?;
5831 if deleted {
5832 update_current_secret_value(key, None);
5833 }
5834 self.invalidate_result_cache();
5835 Ok(RuntimeQueryResult::ok_message(
5836 query.to_string(),
5837 &format!("secret deleted: {key}"),
5838 if deleted {
5839 "delete_secret"
5840 } else {
5841 "delete_secret_not_found"
5842 },
5843 ))
5844 }
5845 QueryExpr::ShowSecrets { ref prefix } => {
5847 let auth_store = self.inner.auth_store.read().clone().ok_or_else(|| {
5848 RedDBError::Query("SHOW SECRET requires an enabled, unsealed vault".to_string())
5849 })?;
5850 if !auth_store.is_vault_backed() {
5851 return Err(RedDBError::Query(
5852 "SHOW SECRET requires an enabled, unsealed vault".to_string(),
5853 ));
5854 }
5855 let mut keys = auth_store.vault_kv_keys();
5856 keys.sort();
5857 let mut result = UnifiedResult::with_columns(vec![
5858 "key".into(),
5859 "value".into(),
5860 "status".into(),
5861 ]);
5862 for key in keys {
5863 if let Some(ref pfx) = prefix {
5864 if !key.starts_with(pfx) {
5865 continue;
5866 }
5867 }
5868 let mut record = UnifiedRecord::new();
5869 record.set("key", Value::text(key));
5870 record.set("value", Value::text("***"));
5871 record.set("status", Value::text("active"));
5872 result.push(record);
5873 }
5874 Ok(RuntimeQueryResult {
5875 query: query.to_string(),
5876 mode,
5877 statement: "show_secrets",
5878 engine: "runtime-secret",
5879 result,
5880 affected_rows: 0,
5881 statement_type: "select",
5882 })
5883 }
5884 QueryExpr::ShowConfig { ref prefix } => {
5886 let store = self.inner.db.store();
5887 let all_collections = store.list_collections();
5888 if !all_collections.contains(&"red_config".to_string()) {
5889 let result = UnifiedResult::with_columns(vec!["key".into(), "value".into()]);
5890 return Ok(RuntimeQueryResult {
5891 query: query.to_string(),
5892 mode,
5893 statement: "show_config",
5894 engine: "runtime-config",
5895 result,
5896 affected_rows: 0,
5897 statement_type: "select",
5898 });
5899 }
5900 let manager = store
5901 .get_collection("red_config")
5902 .ok_or_else(|| RedDBError::NotFound("red_config".to_string()))?;
5903 let entities = manager.query_all(|_| true);
5904 let mut latest = std::collections::BTreeMap::<String, (u64, Value, Value)>::new();
5905 for entity in entities {
5906 if let EntityData::Row(ref row) = entity.data {
5907 if let Some(ref named) = row.named {
5908 let key_val = named.get("key").cloned().unwrap_or(Value::Null);
5909 let val = named.get("value").cloned().unwrap_or(Value::Null);
5910 let key_str = match &key_val {
5911 Value::Text(s) => s.as_ref(),
5912 _ => continue,
5913 };
5914 if let Some(ref pfx) = prefix {
5915 if !key_str.starts_with(pfx.as_str()) {
5916 continue;
5917 }
5918 }
5919 let entity_id = entity.id.raw();
5920 match latest.get(key_str) {
5921 Some((prev_id, _, _)) if *prev_id > entity_id => {}
5922 _ => {
5923 latest.insert(key_str.to_string(), (entity_id, key_val, val));
5924 }
5925 }
5926 }
5927 }
5928 }
5929 let mut result = UnifiedResult::with_columns(vec!["key".into(), "value".into()]);
5930 for (_, key_val, val) in latest.into_values() {
5931 let mut record = UnifiedRecord::new();
5932 record.set("key", key_val);
5933 record.set("value", val);
5934 result.push(record);
5935 }
5936 Ok(RuntimeQueryResult {
5937 query: query.to_string(),
5938 mode,
5939 statement: "show_config",
5940 engine: "runtime-config",
5941 result,
5942 affected_rows: 0,
5943 statement_type: "select",
5944 })
5945 }
5946 QueryExpr::SetTenant(ref value) => {
5952 match value {
5953 Some(id) => set_current_tenant(id.clone()),
5954 None => clear_current_tenant(),
5955 }
5956 Ok(RuntimeQueryResult::ok_message(
5957 query.to_string(),
5958 &match value {
5959 Some(id) => format!("tenant set: {id}"),
5960 None => "tenant cleared".to_string(),
5961 },
5962 "set_tenant",
5963 ))
5964 }
5965 QueryExpr::ShowTenant => {
5966 let mut result = UnifiedResult::with_columns(vec!["tenant".into()]);
5967 let mut record = UnifiedRecord::new();
5968 record.set(
5969 "tenant",
5970 current_tenant().map(Value::text).unwrap_or(Value::Null),
5971 );
5972 result.push(record);
5973 Ok(RuntimeQueryResult {
5974 query: query.to_string(),
5975 mode,
5976 statement: "show_tenant",
5977 engine: "runtime-tenant",
5978 result,
5979 affected_rows: 0,
5980 statement_type: "select",
5981 })
5982 }
5983 QueryExpr::TransactionControl(ref ctl) => {
5995 use crate::storage::query::ast::TxnControl;
5996 use crate::storage::transaction::snapshot::{TxnContext, Xid};
5997 use crate::storage::transaction::IsolationLevel;
5998
5999 let conn_id = current_connection_id();
6004
6005 let (kind, msg) = match ctl {
6006 TxnControl::Begin => {
6007 let mgr = Arc::clone(&self.inner.snapshot_manager);
6008 let xid = mgr.begin();
6009 let snapshot = mgr.snapshot(xid);
6010 let ctx = TxnContext {
6011 xid,
6012 isolation: IsolationLevel::SnapshotIsolation,
6013 snapshot,
6014 savepoints: Vec::new(),
6015 released_sub_xids: Vec::new(),
6016 };
6017 self.inner.tx_contexts.write().insert(conn_id, ctx);
6018 ("begin", format!("BEGIN — xid={xid} (snapshot isolation)"))
6019 }
6020 TxnControl::Commit => {
6021 self.inner.tx_local_tenants.write().remove(&conn_id);
6023 let ctx = self.inner.tx_contexts.write().remove(&conn_id);
6024 match ctx {
6025 Some(ctx) => {
6026 let mut own_xids = std::collections::HashSet::new();
6027 own_xids.insert(ctx.xid);
6028 for (_, sub) in &ctx.savepoints {
6029 own_xids.insert(*sub);
6030 }
6031 for sub in &ctx.released_sub_xids {
6032 own_xids.insert(*sub);
6033 }
6034 if let Err(err) = self.check_table_row_write_conflicts(
6035 conn_id,
6036 &ctx.snapshot,
6037 &own_xids,
6038 ) {
6039 for (_, sub) in &ctx.savepoints {
6040 self.inner.snapshot_manager.rollback(*sub);
6041 }
6042 for sub in &ctx.released_sub_xids {
6043 self.inner.snapshot_manager.rollback(*sub);
6044 }
6045 self.inner.snapshot_manager.rollback(ctx.xid);
6046 self.revive_pending_versioned_updates(conn_id);
6047 self.revive_pending_tombstones(conn_id);
6048 self.discard_pending_kv_watch_events(conn_id);
6049 self.discard_pending_store_wal_actions(conn_id);
6050 return Err(err);
6051 }
6052 self.restore_pending_write_stamps(conn_id);
6053 if let Err(err) = self.flush_pending_store_wal_actions(conn_id) {
6054 for (_, sub) in &ctx.savepoints {
6055 self.inner.snapshot_manager.rollback(*sub);
6056 }
6057 for sub in &ctx.released_sub_xids {
6058 self.inner.snapshot_manager.rollback(*sub);
6059 }
6060 self.inner.snapshot_manager.rollback(ctx.xid);
6061 self.revive_pending_versioned_updates(conn_id);
6062 self.revive_pending_tombstones(conn_id);
6063 self.discard_pending_kv_watch_events(conn_id);
6064 return Err(err);
6065 }
6066 for (_, sub) in &ctx.savepoints {
6072 self.inner.snapshot_manager.commit(*sub);
6073 }
6074 for sub in &ctx.released_sub_xids {
6075 self.inner.snapshot_manager.commit(*sub);
6076 }
6077 self.inner.snapshot_manager.commit(ctx.xid);
6078 self.finalize_pending_versioned_updates(conn_id);
6079 self.finalize_pending_tombstones(conn_id);
6080 self.finalize_pending_kv_watch_events(conn_id);
6081 ("commit", format!("COMMIT — xid={} committed", ctx.xid))
6082 }
6083 None => (
6084 "commit",
6085 "COMMIT outside transaction — no-op (autocommit)".to_string(),
6086 ),
6087 }
6088 }
6089 TxnControl::Rollback => {
6090 self.inner.tx_local_tenants.write().remove(&conn_id);
6091 let ctx = self.inner.tx_contexts.write().remove(&conn_id);
6092 match ctx {
6093 Some(ctx) => {
6094 for (_, sub) in &ctx.savepoints {
6097 self.inner.snapshot_manager.rollback(*sub);
6098 }
6099 for sub in &ctx.released_sub_xids {
6100 self.inner.snapshot_manager.rollback(*sub);
6101 }
6102 self.inner.snapshot_manager.rollback(ctx.xid);
6103 self.revive_pending_versioned_updates(conn_id);
6107 self.revive_pending_tombstones(conn_id);
6108 self.discard_pending_kv_watch_events(conn_id);
6109 self.discard_pending_store_wal_actions(conn_id);
6110 ("rollback", format!("ROLLBACK — xid={} aborted", ctx.xid))
6111 }
6112 None => (
6113 "rollback",
6114 "ROLLBACK outside transaction — no-op (autocommit)".to_string(),
6115 ),
6116 }
6117 }
6118 TxnControl::Savepoint(name) => {
6125 let mgr = Arc::clone(&self.inner.snapshot_manager);
6126 let mut guard = self.inner.tx_contexts.write();
6127 match guard.get_mut(&conn_id) {
6128 Some(ctx) => {
6129 let sub = mgr.begin();
6130 ctx.savepoints.push((name.clone(), sub));
6131 ("savepoint", format!("SAVEPOINT {name} — sub_xid={sub}"))
6132 }
6133 None => (
6134 "savepoint",
6135 "SAVEPOINT outside transaction — no-op".to_string(),
6136 ),
6137 }
6138 }
6139 TxnControl::ReleaseSavepoint(name) => {
6140 let mut guard = self.inner.tx_contexts.write();
6141 match guard.get_mut(&conn_id) {
6142 Some(ctx) => {
6143 let pos = ctx
6144 .savepoints
6145 .iter()
6146 .position(|(n, _)| n == name)
6147 .ok_or_else(|| {
6148 RedDBError::Internal(format!(
6149 "savepoint {name} does not exist"
6150 ))
6151 })?;
6152 let released = ctx.savepoints.len() - pos;
6160 let popped: Vec<Xid> = ctx
6161 .savepoints
6162 .split_off(pos)
6163 .into_iter()
6164 .map(|(_, x)| x)
6165 .collect();
6166 ctx.released_sub_xids.extend(popped);
6167 (
6168 "release_savepoint",
6169 format!("RELEASE SAVEPOINT {name} — {released} level(s)"),
6170 )
6171 }
6172 None => (
6173 "release_savepoint",
6174 "RELEASE outside transaction — no-op".to_string(),
6175 ),
6176 }
6177 }
6178 TxnControl::RollbackToSavepoint(name) => {
6179 let mgr = Arc::clone(&self.inner.snapshot_manager);
6180 let drop_result: Option<(Xid, Vec<Xid>)> = {
6185 let mut guard = self.inner.tx_contexts.write();
6186 if let Some(ctx) = guard.get_mut(&conn_id) {
6187 let pos = ctx
6188 .savepoints
6189 .iter()
6190 .position(|(n, _)| n == name)
6191 .ok_or_else(|| {
6192 RedDBError::Internal(format!(
6193 "savepoint {name} does not exist"
6194 ))
6195 })?;
6196 let savepoint_xid = ctx.savepoints[pos].1;
6197 let aborted: Vec<Xid> = ctx
6198 .savepoints
6199 .split_off(pos)
6200 .into_iter()
6201 .map(|(_, x)| x)
6202 .collect();
6203 Some((savepoint_xid, aborted))
6204 } else {
6205 None
6206 }
6207 };
6208
6209 match drop_result {
6210 Some((savepoint_xid, aborted)) => {
6211 for x in &aborted {
6212 mgr.rollback(*x);
6213 }
6214 let reverted_updates =
6215 self.revive_versioned_updates_since(conn_id, savepoint_xid);
6216 let revived = self.revive_tombstones_since(conn_id, savepoint_xid);
6217 (
6218 "rollback_to_savepoint",
6219 format!(
6220 "ROLLBACK TO SAVEPOINT {name} — aborted {} sub_xid(s), reverted {reverted_updates} update(s), revived {revived} tombstone(s)",
6221 aborted.len(),
6222 ),
6223 )
6224 }
6225 None => (
6226 "rollback_to_savepoint",
6227 "ROLLBACK TO outside transaction — no-op".to_string(),
6228 ),
6229 }
6230 }
6231 };
6232 Ok(RuntimeQueryResult::ok_message(
6233 query.to_string(),
6234 &msg,
6235 kind,
6236 ))
6237 }
6238 QueryExpr::CreateSchema(ref q) => {
6251 let store = self.inner.db.store();
6252 let key = format!("schema.{}", q.name);
6253 if store.get_config(&key).is_some() {
6254 if q.if_not_exists {
6255 return Ok(RuntimeQueryResult::ok_message(
6256 query.to_string(),
6257 &format!("schema {} already exists — skipped", q.name),
6258 "create_schema",
6259 ));
6260 }
6261 return Err(RedDBError::Internal(format!(
6262 "schema {} already exists",
6263 q.name
6264 )));
6265 }
6266 store.set_config_tree(&key, &crate::serde_json::Value::Bool(true));
6267 Ok(RuntimeQueryResult::ok_message(
6268 query.to_string(),
6269 &format!("schema {} created", q.name),
6270 "create_schema",
6271 ))
6272 }
6273 QueryExpr::DropSchema(ref q) => {
6274 let store = self.inner.db.store();
6275 let key = format!("schema.{}", q.name);
6276 let existed = store.get_config(&key).is_some();
6277 if !existed && !q.if_exists {
6278 return Err(RedDBError::Internal(format!(
6279 "schema {} does not exist",
6280 q.name
6281 )));
6282 }
6283 store.set_config_tree(&key, &crate::serde_json::Value::Null);
6285 let suffix = if q.cascade {
6286 " (CASCADE accepted — tables untouched)"
6287 } else {
6288 ""
6289 };
6290 Ok(RuntimeQueryResult::ok_message(
6291 query.to_string(),
6292 &format!("schema {} dropped{}", q.name, suffix),
6293 "drop_schema",
6294 ))
6295 }
6296 QueryExpr::CreateSequence(ref q) => {
6297 let store = self.inner.db.store();
6298 let base = format!("sequence.{}", q.name);
6299 let start_key = format!("{base}.start");
6300 let incr_key = format!("{base}.increment");
6301 let curr_key = format!("{base}.current");
6302 if store.get_config(&start_key).is_some() {
6303 if q.if_not_exists {
6304 return Ok(RuntimeQueryResult::ok_message(
6305 query.to_string(),
6306 &format!("sequence {} already exists — skipped", q.name),
6307 "create_sequence",
6308 ));
6309 }
6310 return Err(RedDBError::Internal(format!(
6311 "sequence {} already exists",
6312 q.name
6313 )));
6314 }
6315 let initial_current = q.start - q.increment;
6318 store.set_config_tree(
6319 &start_key,
6320 &crate::serde_json::Value::Number(q.start as f64),
6321 );
6322 store.set_config_tree(
6323 &incr_key,
6324 &crate::serde_json::Value::Number(q.increment as f64),
6325 );
6326 store.set_config_tree(
6327 &curr_key,
6328 &crate::serde_json::Value::Number(initial_current as f64),
6329 );
6330 Ok(RuntimeQueryResult::ok_message(
6331 query.to_string(),
6332 &format!(
6333 "sequence {} created (start={}, increment={})",
6334 q.name, q.start, q.increment
6335 ),
6336 "create_sequence",
6337 ))
6338 }
6339 QueryExpr::DropSequence(ref q) => {
6340 let store = self.inner.db.store();
6341 let base = format!("sequence.{}", q.name);
6342 let existed = store.get_config(&format!("{base}.start")).is_some();
6343 if !existed && !q.if_exists {
6344 return Err(RedDBError::Internal(format!(
6345 "sequence {} does not exist",
6346 q.name
6347 )));
6348 }
6349 for k in ["start", "increment", "current"] {
6350 store.set_config_tree(&format!("{base}.{k}"), &crate::serde_json::Value::Null);
6351 }
6352 Ok(RuntimeQueryResult::ok_message(
6353 query.to_string(),
6354 &format!("sequence {} dropped", q.name),
6355 "drop_sequence",
6356 ))
6357 }
6358 QueryExpr::CreateView(ref q) => {
6368 let mut views = self.inner.views.write();
6369 if views.contains_key(&q.name) && !q.or_replace {
6370 if q.if_not_exists {
6371 return Ok(RuntimeQueryResult::ok_message(
6372 query.to_string(),
6373 &format!("view {} already exists — skipped", q.name),
6374 "create_view",
6375 ));
6376 }
6377 return Err(RedDBError::Internal(format!(
6378 "view {} already exists",
6379 q.name
6380 )));
6381 }
6382 views.insert(q.name.clone(), Arc::new(q.clone()));
6383 drop(views);
6384
6385 if q.materialized {
6387 use crate::storage::cache::result::{MaterializedViewDef, RefreshPolicy};
6388 let refresh = match q.refresh_every_ms {
6389 Some(ms) => RefreshPolicy::Periodic(std::time::Duration::from_millis(ms)),
6390 None => RefreshPolicy::Manual,
6391 };
6392 let dependencies = collect_table_refs(&q.query);
6393 let def = MaterializedViewDef {
6394 name: q.name.clone(),
6395 query: format!("<parsed view {}>", q.name),
6396 dependencies: dependencies.clone(),
6397 refresh,
6398 retention_duration_ms: q.retention_duration_ms,
6399 };
6400 self.inner.materialized_views.write().register(def);
6401
6402 let descriptor =
6408 crate::runtime::continuous_materialized_view::MaterializedViewDescriptor {
6409 name: q.name.clone(),
6410 source_sql: query.to_string(),
6411 source_collections: dependencies,
6412 refresh_every_ms: q.refresh_every_ms,
6413 retention_duration_ms: q.retention_duration_ms,
6414 };
6415 let store = self.inner.db.store();
6416 crate::runtime::continuous_materialized_view::persist_descriptor(
6417 store.as_ref(),
6418 &descriptor,
6419 )?;
6420
6421 self.ensure_materialized_view_backing(&q.name)?;
6428 }
6429 self.invalidate_plan_cache();
6434 self.invalidate_result_cache();
6435
6436 Ok(RuntimeQueryResult::ok_message(
6437 query.to_string(),
6438 &format!(
6439 "{}view {} created",
6440 if q.materialized { "materialized " } else { "" },
6441 q.name
6442 ),
6443 "create_view",
6444 ))
6445 }
6446 QueryExpr::DropView(ref q) => {
6447 let mut views = self.inner.views.write();
6448 let removed = views.remove(&q.name);
6449 let existed = removed.is_some();
6450 let removed_materialized =
6451 removed.as_ref().map(|v| v.materialized).unwrap_or(false);
6452 drop(views);
6453 if q.materialized || existed {
6454 self.inner.materialized_views.write().remove(&q.name);
6456 let store = self.inner.db.store();
6460 crate::runtime::continuous_materialized_view::remove_by_name(
6461 store.as_ref(),
6462 &q.name,
6463 )?;
6464 }
6465 if removed_materialized || q.materialized {
6469 self.drop_materialized_view_backing(&q.name)?;
6470 }
6471 self.invalidate_plan_cache();
6474 self.invalidate_result_cache();
6475 if !existed && !q.if_exists {
6476 return Err(RedDBError::Internal(format!(
6477 "view {} does not exist",
6478 q.name
6479 )));
6480 }
6481 self.invalidate_plan_cache();
6482 Ok(RuntimeQueryResult::ok_message(
6483 query.to_string(),
6484 &format!("view {} dropped", q.name),
6485 "drop_view",
6486 ))
6487 }
6488 QueryExpr::RefreshMaterializedView(ref q) => {
6489 let view = {
6492 let views = self.inner.views.read();
6493 views.get(&q.name).cloned()
6494 };
6495 let view = match view {
6496 Some(v) => v,
6497 None => {
6498 return Err(RedDBError::Internal(format!(
6499 "view {} does not exist",
6500 q.name
6501 )))
6502 }
6503 };
6504 if !view.materialized {
6505 return Err(RedDBError::Internal(format!(
6506 "view {} is not materialized — REFRESH requires \
6507 CREATE MATERIALIZED VIEW",
6508 q.name
6509 )));
6510 }
6511 let started = std::time::Instant::now();
6513 let now_ms = std::time::SystemTime::now()
6514 .duration_since(std::time::UNIX_EPOCH)
6515 .map(|d| d.as_millis() as u64)
6516 .unwrap_or(0);
6517 match self.execute_query_expr((*view.query).clone()) {
6518 Ok(inner_result) => {
6519 let entities =
6526 view_records_to_entities(&q.name, &inner_result.result.records);
6527 let row_count = entities.len() as u64;
6528 let store = self.inner.db.store();
6529 let serialized_records = match store.refresh_collection(&q.name, entities) {
6530 Ok(records) => records,
6531 Err(err) => {
6532 let duration_ms = started.elapsed().as_millis() as u64;
6533 let msg = err.to_string();
6534 self.inner
6535 .materialized_views
6536 .write()
6537 .record_refresh_failure(
6538 &q.name,
6539 msg.clone(),
6540 duration_ms,
6541 now_ms,
6542 );
6543 return Err(RedDBError::Internal(format!(
6544 "REFRESH MATERIALIZED VIEW {}: {msg}",
6545 q.name
6546 )));
6547 }
6548 };
6549
6550 if let Some(ref primary) = self.inner.db.replication {
6556 let lsn = self.inner.cdc.emit(
6557 crate::replication::cdc::ChangeOperation::Refresh,
6558 &q.name,
6559 0,
6560 "refresh",
6561 );
6562 self.invalidate_result_cache_for_table(&q.name);
6563 let timestamp = std::time::SystemTime::now()
6564 .duration_since(std::time::UNIX_EPOCH)
6565 .unwrap_or_default()
6566 .as_millis() as u64;
6567 let record = ChangeRecord::for_refresh(
6568 lsn,
6569 timestamp,
6570 q.name.clone(),
6571 serialized_records,
6572 );
6573 let encoded = record.encode();
6574 primary.wal_buffer.append(record.lsn, encoded.clone());
6575 if let Some(spool) = &primary.logical_wal_spool {
6576 let _ = spool.append(record.lsn, &encoded);
6577 }
6578 }
6579
6580 let duration_ms = started.elapsed().as_millis() as u64;
6581 let serialized = format!("{:?}", inner_result.result);
6582 self.inner
6583 .materialized_views
6584 .write()
6585 .record_refresh_success(
6586 &q.name,
6587 serialized.into_bytes(),
6588 row_count,
6589 duration_ms,
6590 now_ms,
6591 );
6592 self.invalidate_result_cache();
6597 Ok(RuntimeQueryResult::ok_message(
6598 query.to_string(),
6599 &format!("materialized view {} refreshed", q.name),
6600 "refresh_materialized_view",
6601 ))
6602 }
6603 Err(err) => {
6604 let duration_ms = started.elapsed().as_millis() as u64;
6605 let msg = err.to_string();
6606 self.inner
6607 .materialized_views
6608 .write()
6609 .record_refresh_failure(&q.name, msg.clone(), duration_ms, now_ms);
6610 Err(err)
6611 }
6612 }
6613 }
6614 QueryExpr::CreatePolicy(ref q) => {
6621 let key = (q.table.clone(), q.name.clone());
6622 self.inner
6623 .rls_policies
6624 .write()
6625 .insert(key, Arc::new(q.clone()));
6626 self.invalidate_plan_cache();
6627 self.schema_vocabulary_apply(
6631 crate::runtime::schema_vocabulary::DdlEvent::CreatePolicy {
6632 collection: q.table.clone(),
6633 policy: q.name.clone(),
6634 },
6635 );
6636 Ok(RuntimeQueryResult::ok_message(
6637 query.to_string(),
6638 &format!("policy {} on {} created", q.name, q.table),
6639 "create_policy",
6640 ))
6641 }
6642 QueryExpr::DropPolicy(ref q) => {
6643 let removed = self
6644 .inner
6645 .rls_policies
6646 .write()
6647 .remove(&(q.table.clone(), q.name.clone()))
6648 .is_some();
6649 if !removed && !q.if_exists {
6650 return Err(RedDBError::Internal(format!(
6651 "policy {} on {} does not exist",
6652 q.name, q.table
6653 )));
6654 }
6655 self.invalidate_plan_cache();
6656 self.schema_vocabulary_apply(
6659 crate::runtime::schema_vocabulary::DdlEvent::DropPolicy {
6660 collection: q.table.clone(),
6661 policy: q.name.clone(),
6662 },
6663 );
6664 Ok(RuntimeQueryResult::ok_message(
6665 query.to_string(),
6666 &format!("policy {} on {} dropped", q.name, q.table),
6667 "drop_policy",
6668 ))
6669 }
6670 QueryExpr::CreateServer(ref q) => {
6681 use crate::storage::fdw::FdwOptions;
6682 let registry = Arc::clone(&self.inner.foreign_tables);
6683 if registry.server(&q.name).is_some() {
6684 if q.if_not_exists {
6685 return Ok(RuntimeQueryResult::ok_message(
6686 query.to_string(),
6687 &format!("server {} already exists — skipped", q.name),
6688 "create_server",
6689 ));
6690 }
6691 return Err(RedDBError::Internal(format!(
6692 "server {} already exists",
6693 q.name
6694 )));
6695 }
6696 let mut opts = FdwOptions::new();
6697 for (k, v) in &q.options {
6698 opts.values.insert(k.clone(), v.clone());
6699 }
6700 registry
6701 .create_server(&q.name, &q.wrapper, opts)
6702 .map_err(|e| RedDBError::Internal(e.to_string()))?;
6703 Ok(RuntimeQueryResult::ok_message(
6704 query.to_string(),
6705 &format!("server {} created (wrapper {})", q.name, q.wrapper),
6706 "create_server",
6707 ))
6708 }
6709 QueryExpr::DropServer(ref q) => {
6710 let existed = self.inner.foreign_tables.drop_server(&q.name);
6711 if !existed && !q.if_exists {
6712 return Err(RedDBError::Internal(format!(
6713 "server {} does not exist",
6714 q.name
6715 )));
6716 }
6717 Ok(RuntimeQueryResult::ok_message(
6718 query.to_string(),
6719 &format!(
6720 "server {} dropped{}",
6721 q.name,
6722 if q.cascade { " (cascade)" } else { "" }
6723 ),
6724 "drop_server",
6725 ))
6726 }
6727 QueryExpr::CreateForeignTable(ref q) => {
6728 use crate::storage::fdw::{FdwOptions, ForeignColumn, ForeignTable};
6729 let registry = Arc::clone(&self.inner.foreign_tables);
6730 if registry.foreign_table(&q.name).is_some() {
6731 if q.if_not_exists {
6732 return Ok(RuntimeQueryResult::ok_message(
6733 query.to_string(),
6734 &format!("foreign table {} already exists — skipped", q.name),
6735 "create_foreign_table",
6736 ));
6737 }
6738 return Err(RedDBError::Internal(format!(
6739 "foreign table {} already exists",
6740 q.name
6741 )));
6742 }
6743 let mut opts = FdwOptions::new();
6744 for (k, v) in &q.options {
6745 opts.values.insert(k.clone(), v.clone());
6746 }
6747 let columns: Vec<ForeignColumn> = q
6748 .columns
6749 .iter()
6750 .map(|c| ForeignColumn {
6751 name: c.name.clone(),
6752 data_type: c.data_type.clone(),
6753 not_null: c.not_null,
6754 })
6755 .collect();
6756 registry
6757 .create_foreign_table(ForeignTable {
6758 name: q.name.clone(),
6759 server_name: q.server.clone(),
6760 columns,
6761 options: opts,
6762 })
6763 .map_err(|e| RedDBError::Internal(e.to_string()))?;
6764 self.invalidate_plan_cache();
6765 Ok(RuntimeQueryResult::ok_message(
6766 query.to_string(),
6767 &format!("foreign table {} created (server {})", q.name, q.server),
6768 "create_foreign_table",
6769 ))
6770 }
6771 QueryExpr::DropForeignTable(ref q) => {
6772 let existed = self.inner.foreign_tables.drop_foreign_table(&q.name);
6773 if !existed && !q.if_exists {
6774 return Err(RedDBError::Internal(format!(
6775 "foreign table {} does not exist",
6776 q.name
6777 )));
6778 }
6779 self.invalidate_plan_cache();
6780 Ok(RuntimeQueryResult::ok_message(
6781 query.to_string(),
6782 &format!("foreign table {} dropped", q.name),
6783 "drop_foreign_table",
6784 ))
6785 }
6786 QueryExpr::CopyFrom(ref q) => {
6792 use crate::storage::import::{CsvConfig, CsvImporter};
6793 let store = self.inner.db.store();
6794 let cfg = CsvConfig {
6795 collection: q.table.clone(),
6796 has_header: q.has_header,
6797 delimiter: q.delimiter.map(|c| c as u8).unwrap_or(b','),
6798 ..CsvConfig::default()
6799 };
6800 let importer = CsvImporter::new(cfg);
6801 let stats = importer
6802 .import_file(&q.path, store.as_ref())
6803 .map_err(|e| RedDBError::Internal(format!("COPY failed: {e}")))?;
6804 self.note_table_write(&q.table);
6806 Ok(RuntimeQueryResult::ok_message(
6807 query.to_string(),
6808 &format!(
6809 "COPY imported {} rows into {} ({} errors skipped, {}ms)",
6810 stats.records_imported, q.table, stats.errors_skipped, stats.duration_ms
6811 ),
6812 "copy_from",
6813 ))
6814 }
6815 QueryExpr::MaintenanceCommand(ref cmd) => {
6831 use crate::storage::query::ast::MaintenanceCommand as Mc;
6832 let store = self.inner.db.store();
6833 let (kind, msg) = match cmd {
6834 Mc::Analyze { target } => {
6835 let targets: Vec<String> = match target {
6836 Some(t) => vec![t.clone()],
6837 None => store.list_collections(),
6838 };
6839 for t in &targets {
6840 self.refresh_table_planner_stats(t);
6841 }
6842 (
6843 "analyze",
6844 format!("ANALYZE refreshed stats for {} table(s)", targets.len()),
6845 )
6846 }
6847 Mc::Vacuum { target, full } => {
6848 let targets: Vec<String> = match target {
6849 Some(t) => vec![t.clone()],
6850 None => store.list_collections(),
6851 };
6852 let cutoff_xid = self.mvcc_vacuum_cutoff_xid();
6853 let mut vacuum_stats =
6854 crate::storage::unified::store::MvccVacuumStats::default();
6855 for t in &targets {
6856 let stats = store.vacuum_mvcc_history(t, cutoff_xid).map_err(|e| {
6857 RedDBError::Internal(format!(
6858 "VACUUM MVCC history failed for {t}: {e}"
6859 ))
6860 })?;
6861 if stats.reclaimed_versions > 0 {
6862 self.rebuild_runtime_indexes_for_table(t)?;
6863 }
6864 vacuum_stats.add(&stats);
6865 }
6866 self.inner.snapshot_manager.prune_aborted(cutoff_xid);
6867 for t in &targets {
6869 self.refresh_table_planner_stats(t);
6870 }
6871 let persisted = if *full {
6875 match store.persist() {
6876 Ok(()) => true,
6877 Err(e) => {
6878 return Err(RedDBError::Internal(format!(
6879 "VACUUM FULL persist failed: {e:?}"
6880 )));
6881 }
6882 }
6883 } else {
6884 false
6885 };
6886 self.invalidate_result_cache();
6888 (
6889 "vacuum",
6890 format!(
6891 "VACUUM{} processed {} table(s): scanned_versions={}, retained_versions={}, reclaimed_versions={}, retained_history_versions={}, reclaimed_history_versions={}, retained_tombstones={}, reclaimed_tombstones={}{}",
6892 if *full { " FULL" } else { "" },
6893 targets.len(),
6894 vacuum_stats.scanned_versions,
6895 vacuum_stats.retained_versions,
6896 vacuum_stats.reclaimed_versions,
6897 vacuum_stats.retained_history_versions,
6898 vacuum_stats.reclaimed_history_versions,
6899 vacuum_stats.retained_tombstones,
6900 vacuum_stats.reclaimed_tombstones,
6901 if persisted {
6902 " (pages flushed to disk)"
6903 } else {
6904 ""
6905 }
6906 ),
6907 )
6908 }
6909 };
6910 Ok(RuntimeQueryResult::ok_message(
6911 query.to_string(),
6912 &msg,
6913 kind,
6914 ))
6915 }
6916 QueryExpr::Grant(ref g) => self.execute_grant_statement(query, g),
6923 QueryExpr::Revoke(ref r) => self.execute_revoke_statement(query, r),
6924 QueryExpr::AlterUser(ref a) => self.execute_alter_user_statement(query, a),
6925 QueryExpr::CreateIamPolicy { ref id, ref json } => {
6926 self.execute_create_iam_policy(query, id, json)
6927 }
6928 QueryExpr::DropIamPolicy { ref id } => self.execute_drop_iam_policy(query, id),
6929 QueryExpr::AttachPolicy {
6930 ref policy_id,
6931 ref principal,
6932 } => self.execute_attach_policy(query, policy_id, principal),
6933 QueryExpr::DetachPolicy {
6934 ref policy_id,
6935 ref principal,
6936 } => self.execute_detach_policy(query, policy_id, principal),
6937 QueryExpr::ShowPolicies { ref filter } => {
6938 self.execute_show_policies(query, filter.as_ref())
6939 }
6940 QueryExpr::ShowEffectivePermissions {
6941 ref user,
6942 ref resource,
6943 } => self.execute_show_effective_permissions(query, user, resource.as_ref()),
6944 QueryExpr::SimulatePolicy {
6945 ref user,
6946 ref action,
6947 ref resource,
6948 } => self.execute_simulate_policy(query, user, action, resource),
6949 QueryExpr::CreateMigration(ref q) => self.execute_create_migration(query, q),
6950 QueryExpr::ApplyMigration(ref q) => self.execute_apply_migration(query, q),
6951 QueryExpr::RollbackMigration(ref q) => self.execute_rollback_migration(query, q),
6952 QueryExpr::ExplainMigration(ref q) => self.execute_explain_migration(query, q),
6953 };
6954
6955 if !control_event_specs.is_empty() {
6956 let (outcome, reason) = match &query_result {
6957 Ok(_) => (crate::runtime::control_events::Outcome::Allowed, None),
6958 Err(err) => (control_event_outcome_for_error(err), Some(err.to_string())),
6959 };
6960 for spec in &control_event_specs {
6961 self.emit_control_event(
6962 spec.kind,
6963 outcome,
6964 spec.action,
6965 spec.resource.clone(),
6966 reason.clone(),
6967 spec.fields.clone(),
6968 )?;
6969 }
6970 }
6971
6972 if let (Some(plan), Ok(result)) = (&query_audit_plan, &query_result) {
6973 self.emit_query_audit(
6974 query,
6975 plan,
6976 query_audit_started.elapsed().as_millis() as u64,
6977 result,
6978 );
6979 }
6980
6981 let mut query_result = query_result;
6985 if let Ok(ref mut result) = query_result {
6986 if result.statement_type == "select" {
6987 self.apply_secret_decryption(result);
6988 }
6989 }
6990
6991 if let Ok(ref result) = query_result {
6998 frame.write_result_cache(self, result, result_cache_scopes);
6999 }
7000
7001 query_result
7002 }
7003
7004 pub fn materialized_view_metadata(
7008 &self,
7009 ) -> Vec<crate::storage::cache::result::MaterializedViewMetadata> {
7010 let store = self.inner.db.store();
7017 let mut entries = self.inner.materialized_views.read().metadata();
7018 for entry in &mut entries {
7019 if let Some(manager) = store.get_collection(&entry.name) {
7020 entry.current_row_count = manager.count() as u64;
7021 }
7022 }
7023 entries
7024 }
7025
7026 pub(crate) fn retention_sweeper_snapshot(
7037 &self,
7038 ) -> Vec<(String, crate::runtime::retention_sweeper::SweeperState)> {
7039 self.inner.retention_sweeper.read().snapshot()
7040 }
7041
7042 pub fn sweep_retention_tick(&self, batch_size: usize) {
7064 if batch_size == 0 {
7065 return;
7066 }
7067 let now_ms = std::time::SystemTime::now()
7068 .duration_since(std::time::UNIX_EPOCH)
7069 .map(|d| d.as_millis() as u64)
7070 .unwrap_or(0);
7071
7072 let store = self.inner.db.store();
7073 let collections = store.list_collections();
7074 for name in collections {
7075 let Some(contract) = self.inner.db.collection_contract(&name) else {
7076 continue;
7077 };
7078 let Some(retention_ms) = contract.retention_duration_ms else {
7079 continue;
7080 };
7081 let Some(ts_column) =
7082 crate::runtime::retention_filter::resolve_timestamp_column(&contract)
7083 else {
7084 continue;
7085 };
7086 let Some(manager) = store.get_collection(&name) else {
7087 continue;
7088 };
7089 let cutoff = (now_ms as i64).saturating_sub(retention_ms as i64);
7090
7091 let mut expired_ts: Vec<i64> = Vec::new();
7099 manager.for_each_entity(|entity| {
7100 let ts = match ts_column.as_str() {
7101 "created_at" => Some(entity.created_at as i64),
7102 "updated_at" => Some(entity.updated_at as i64),
7103 other => entity
7104 .data
7105 .as_row()
7106 .and_then(|row| row.get_field(other))
7107 .and_then(|v| match v {
7108 crate::storage::schema::Value::TimestampMs(t) => Some(*t),
7109 crate::storage::schema::Value::Timestamp(t) => {
7110 Some(t.saturating_mul(1_000))
7111 }
7112 crate::storage::schema::Value::BigInt(t) => Some(*t),
7113 crate::storage::schema::Value::UnsignedInteger(t) => {
7114 i64::try_from(*t).ok()
7115 }
7116 crate::storage::schema::Value::Integer(t) => Some(*t),
7117 _ => None,
7118 }),
7119 };
7120 if let Some(t) = ts {
7121 if t < cutoff {
7122 expired_ts.push(t);
7123 }
7124 }
7125 true
7126 });
7127
7128 let total_expired = expired_ts.len() as u64;
7129 if total_expired == 0 {
7130 self.inner
7131 .retention_sweeper
7132 .write()
7133 .record_tick(&name, 0, 0, now_ms);
7134 continue;
7135 }
7136
7137 let (effective_cutoff, pending) = if (total_expired as usize) <= batch_size {
7138 (cutoff, 0u64)
7139 } else {
7140 expired_ts.sort_unstable();
7144 let nth = expired_ts[batch_size - 1];
7145 (
7146 nth.saturating_add(1),
7147 total_expired.saturating_sub(batch_size as u64),
7148 )
7149 };
7150
7151 let stmt = format!(
7152 "DELETE FROM {} WHERE {} < {}",
7153 name, ts_column, effective_cutoff
7154 );
7155 let deleted = match self.execute_query(&stmt) {
7156 Ok(r) => r.affected_rows,
7157 Err(_) => 0,
7158 };
7159
7160 self.inner
7161 .retention_sweeper
7162 .write()
7163 .record_tick(&name, deleted, pending, now_ms);
7164 }
7165 }
7166
7167 pub fn refresh_due_materialized_views(&self) {
7168 let due = {
7169 let mut cache = self.inner.materialized_views.write();
7170 cache.claim_due_at(std::time::Instant::now())
7171 };
7172 for name in due {
7173 let stmt = format!("REFRESH MATERIALIZED VIEW {}", name);
7180 let _ = self.execute_query(&stmt);
7181 }
7182 }
7183
7184 pub fn execute_query_expr(&self, expr: QueryExpr) -> RedDBResult<RuntimeQueryResult> {
7190 let _config_snapshot_guard = ConfigSnapshotGuard::install(Arc::clone(&self.inner.db));
7191 let _secret_store_guard = SecretStoreGuard::install(self.inner.auth_store.read().clone());
7192 let expr = self.rewrite_view_refs(expr);
7196
7197 self.validate_model_operations_before_auth(&expr)?;
7198 if let Err(err) = self.check_query_privilege(&expr) {
7202 return Err(RedDBError::Query(format!("permission denied: {err}")));
7203 }
7204
7205 let statement = query_expr_name(&expr);
7206 let mode = detect_mode(statement);
7207 let query_str = statement;
7208
7209 let result = self.dispatch_expr(expr, query_str, mode)?;
7210 let mut r = result;
7211 if r.statement_type == "select" {
7212 self.apply_secret_decryption(&mut r);
7213 }
7214 Ok(r)
7215 }
7216
7217 pub(super) fn validate_model_operations_before_auth(
7218 &self,
7219 expr: &QueryExpr,
7220 ) -> RedDBResult<()> {
7221 use crate::catalog::CollectionModel;
7222 use crate::runtime::ddl::polymorphic_resolver;
7223 use crate::storage::query::ast::KvCommand;
7224
7225 let system_schema_target = match expr {
7226 QueryExpr::DropTable(q) => Some(q.name.as_str()),
7227 QueryExpr::DropGraph(q) => Some(q.name.as_str()),
7228 QueryExpr::DropVector(q) => Some(q.name.as_str()),
7229 QueryExpr::DropDocument(q) => Some(q.name.as_str()),
7230 QueryExpr::DropKv(q) => Some(q.name.as_str()),
7231 QueryExpr::DropCollection(q) => Some(q.name.as_str()),
7232 QueryExpr::Truncate(q) => Some(q.name.as_str()),
7233 _ => None,
7234 };
7235 if system_schema_target.is_some_and(crate::runtime::impl_ddl::is_system_schema_name) {
7236 return Err(RedDBError::Query("system schema is read-only".to_string()));
7237 }
7238
7239 let expected = match expr {
7240 QueryExpr::DropTable(q) => Some((q.name.as_str(), CollectionModel::Table)),
7241 QueryExpr::DropGraph(q) => Some((q.name.as_str(), CollectionModel::Graph)),
7242 QueryExpr::DropVector(q) => Some((q.name.as_str(), CollectionModel::Vector)),
7243 QueryExpr::DropDocument(q) => Some((q.name.as_str(), CollectionModel::Document)),
7244 QueryExpr::DropKv(q) => Some((q.name.as_str(), q.model)),
7245 QueryExpr::DropCollection(q) => q.model.map(|model| (q.name.as_str(), model)),
7246 QueryExpr::Truncate(q) => q.model.map(|model| (q.name.as_str(), model)),
7247 QueryExpr::KvCommand(cmd) => {
7248 let (collection, model) = match cmd {
7249 KvCommand::Put {
7250 collection, model, ..
7251 }
7252 | KvCommand::Get {
7253 collection, model, ..
7254 }
7255 | KvCommand::Incr {
7256 collection, model, ..
7257 }
7258 | KvCommand::Cas {
7259 collection, model, ..
7260 }
7261 | KvCommand::Delete {
7262 collection, model, ..
7263 } => (collection.as_str(), *model),
7264 KvCommand::Rotate { collection, .. }
7265 | KvCommand::History { collection, .. }
7266 | KvCommand::List { collection, .. }
7267 | KvCommand::Purge { collection, .. } => {
7268 (collection.as_str(), CollectionModel::Vault)
7269 }
7270 KvCommand::InvalidateTags { collection, .. } => {
7271 (collection.as_str(), CollectionModel::Kv)
7272 }
7273 KvCommand::Watch {
7274 collection, model, ..
7275 } => (collection.as_str(), *model),
7276 KvCommand::Unseal { collection, .. } => {
7277 (collection.as_str(), CollectionModel::Vault)
7278 }
7279 };
7280 Some((collection, model))
7281 }
7282 QueryExpr::ConfigCommand(cmd) => {
7283 self.validate_config_command_before_auth(cmd)?;
7284 None
7285 }
7286 _ => None,
7287 };
7288
7289 let Some((name, expected_model)) = expected else {
7290 return Ok(());
7291 };
7292 let snapshot = self.inner.db.catalog_model_snapshot();
7293 let Some(actual_model) = snapshot
7294 .collections
7295 .iter()
7296 .find(|collection| collection.name == name)
7297 .map(|collection| collection.declared_model.unwrap_or(collection.model))
7298 else {
7299 return Ok(());
7300 };
7301 polymorphic_resolver::ensure_model_match(expected_model, actual_model)
7302 }
7303
7304 pub(super) fn rewrite_view_refs(&self, expr: QueryExpr) -> QueryExpr {
7309 if self.inner.views.read().is_empty() {
7311 return expr;
7312 }
7313 self.rewrite_view_refs_inner(expr)
7314 }
7315
7316 fn rewrite_view_refs_inner(&self, expr: QueryExpr) -> QueryExpr {
7317 use crate::storage::query::ast::{Filter, TableSource};
7318 match expr {
7319 QueryExpr::Table(mut tq) => {
7320 if let Some(TableSource::Subquery(body)) = tq.source.take() {
7326 tq.source = Some(TableSource::Subquery(Box::new(
7327 self.rewrite_view_refs_inner(*body),
7328 )));
7329 return QueryExpr::Table(tq);
7330 }
7331
7332 let maybe_view = {
7336 let views = self.inner.views.read();
7337 views.get(&tq.table).cloned()
7338 };
7339 let Some(view) = maybe_view else {
7340 return QueryExpr::Table(tq);
7341 };
7342
7343 if view.materialized {
7349 return QueryExpr::Table(tq);
7350 }
7351
7352 let inner_expr = self.rewrite_view_refs_inner((*view.query).clone());
7356
7357 match inner_expr {
7365 QueryExpr::Table(mut inner_tq) => {
7366 if let Some(outer_filter) = tq.filter.take() {
7367 inner_tq.filter = Some(match inner_tq.filter.take() {
7368 Some(existing) => {
7369 Filter::And(Box::new(existing), Box::new(outer_filter))
7370 }
7371 None => outer_filter,
7372 });
7373 inner_tq.where_expr = inner_tq
7381 .filter
7382 .as_ref()
7383 .map(crate::storage::query::sql_lowering::filter_to_expr);
7384 }
7385 if let Some(outer_limit) = tq.limit {
7386 inner_tq.limit = Some(match inner_tq.limit {
7387 Some(existing) => existing.min(outer_limit),
7388 None => outer_limit,
7389 });
7390 }
7391 if let Some(outer_offset) = tq.offset {
7392 inner_tq.offset = Some(match inner_tq.offset {
7393 Some(existing) => existing + outer_offset,
7394 None => outer_offset,
7395 });
7396 }
7397 QueryExpr::Table(inner_tq)
7398 }
7399 other => other,
7400 }
7401 }
7402 QueryExpr::Join(mut jq) => {
7403 jq.left = Box::new(self.rewrite_view_refs_inner(*jq.left));
7404 jq.right = Box::new(self.rewrite_view_refs_inner(*jq.right));
7405 QueryExpr::Join(jq)
7406 }
7407 other => other,
7410 }
7411 }
7412
7413 fn authorize_relational_table_select(
7417 &self,
7418 mut table: TableQuery,
7419 frame: &dyn super::statement_frame::ReadFrame,
7420 ) -> RedDBResult<Option<TableQuery>> {
7421 if let Some(TableSource::Subquery(inner)) = table.source.take() {
7422 let authorized_inner = self.authorize_relational_select_expr(*inner, frame)?;
7423 table.source = Some(TableSource::Subquery(Box::new(authorized_inner)));
7424 return Ok(Some(table));
7425 }
7426
7427 self.check_table_column_projection_authz(&table, frame)?;
7428
7429 if self.inner.rls_enabled_tables.read().contains(&table.table) {
7430 return Ok(inject_rls_filters(self, frame, table));
7431 }
7432
7433 Ok(Some(table))
7434 }
7435
7436 fn authorize_relational_join_select(
7437 &self,
7438 mut join: JoinQuery,
7439 frame: &dyn super::statement_frame::ReadFrame,
7440 ) -> RedDBResult<Option<JoinQuery>> {
7441 self.check_join_column_projection_authz(&join, frame)?;
7442 join.left = Box::new(self.authorize_relational_join_child(*join.left, frame)?);
7443 join.right = Box::new(self.authorize_relational_join_child(*join.right, frame)?);
7444 Ok(inject_rls_into_join(self, frame, join))
7445 }
7446
7447 fn authorize_relational_join_child(
7448 &self,
7449 expr: QueryExpr,
7450 frame: &dyn super::statement_frame::ReadFrame,
7451 ) -> RedDBResult<QueryExpr> {
7452 match expr {
7453 QueryExpr::Table(mut table) => {
7454 if let Some(TableSource::Subquery(inner)) = table.source.take() {
7455 let authorized_inner = self.authorize_relational_select_expr(*inner, frame)?;
7456 table.source = Some(TableSource::Subquery(Box::new(authorized_inner)));
7457 }
7458 Ok(QueryExpr::Table(table))
7459 }
7460 QueryExpr::Join(join) => self
7461 .authorize_relational_join_select(join, frame)?
7462 .map(QueryExpr::Join)
7463 .ok_or_else(|| {
7464 RedDBError::Query("permission denied: RLS denied relational subquery".into())
7465 }),
7466 other => Ok(other),
7467 }
7468 }
7469
7470 fn authorize_relational_select_expr(
7471 &self,
7472 expr: QueryExpr,
7473 frame: &dyn super::statement_frame::ReadFrame,
7474 ) -> RedDBResult<QueryExpr> {
7475 match expr {
7476 QueryExpr::Table(table) => self
7477 .authorize_relational_table_select(table, frame)?
7478 .map(QueryExpr::Table)
7479 .ok_or_else(|| {
7480 RedDBError::Query("permission denied: RLS denied relational subquery".into())
7481 }),
7482 QueryExpr::Join(join) => self
7483 .authorize_relational_join_select(join, frame)?
7484 .map(QueryExpr::Join)
7485 .ok_or_else(|| {
7486 RedDBError::Query("permission denied: RLS denied relational subquery".into())
7487 }),
7488 other => Ok(other),
7489 }
7490 }
7491
7492 fn check_table_column_projection_authz(
7493 &self,
7494 table: &TableQuery,
7495 frame: &dyn super::statement_frame::ReadFrame,
7496 ) -> RedDBResult<()> {
7497 let Some((username, role)) = frame.identity() else {
7498 return Ok(());
7499 };
7500 let Some(auth_store) = self.inner.auth_store.read().clone() else {
7501 return Ok(());
7502 };
7503
7504 let columns = self.resolved_table_projection_columns(table)?;
7505 let request = ColumnAccessRequest::select(table.table.clone(), columns);
7506 let principal = UserId::from_parts(frame.effective_scope(), username);
7507 let ctx = runtime_iam_context(
7508 role,
7509 frame.effective_scope(),
7510 auth_store.principal_is_system_owned(&principal),
7511 );
7512 let outcome = auth_store.check_column_projection_authz(&principal, &request, &ctx);
7513 if outcome.allowed() {
7514 return Ok(());
7515 }
7516
7517 if let Some(denied) = outcome.first_denied_column() {
7518 return Err(RedDBError::Query(format!(
7519 "permission denied: principal=`{username}` cannot select column `{}`",
7520 denied.resource.name
7521 )));
7522 }
7523 Err(RedDBError::Query(format!(
7524 "permission denied: principal=`{username}` cannot select table `{}`",
7525 table.table
7526 )))
7527 }
7528
7529 fn check_join_column_projection_authz(
7530 &self,
7531 join: &JoinQuery,
7532 frame: &dyn super::statement_frame::ReadFrame,
7533 ) -> RedDBResult<()> {
7534 let mut by_table: HashMap<String, BTreeSet<String>> = HashMap::new();
7535 let projections = crate::storage::query::sql_lowering::effective_join_projections(join);
7536 self.collect_join_projection_columns(join, &projections, &mut by_table)?;
7537
7538 for (table, columns) in by_table {
7539 let query = TableQuery {
7540 table,
7541 source: None,
7542 alias: None,
7543 select_items: Vec::new(),
7544 columns: columns.into_iter().map(Projection::Column).collect(),
7545 where_expr: None,
7546 filter: None,
7547 group_by_exprs: Vec::new(),
7548 group_by: Vec::new(),
7549 having_expr: None,
7550 having: None,
7551 order_by: Vec::new(),
7552 limit: None,
7553 limit_param: None,
7554 offset: None,
7555 offset_param: None,
7556 expand: None,
7557 as_of: None,
7558 sessionize: None,
7559 };
7560 self.check_table_column_projection_authz(&query, frame)?;
7561 }
7562 Ok(())
7563 }
7564
7565 fn collect_join_projection_columns(
7566 &self,
7567 join: &JoinQuery,
7568 projections: &[Projection],
7569 out: &mut HashMap<String, BTreeSet<String>>,
7570 ) -> RedDBResult<()> {
7571 let left = table_side_context(join.left.as_ref());
7572 let right = table_side_context(join.right.as_ref());
7573
7574 if projections
7575 .iter()
7576 .any(|projection| matches!(projection, Projection::All))
7577 {
7578 for side in [left.as_ref(), right.as_ref()].into_iter().flatten() {
7579 out.entry(side.table.clone())
7580 .or_default()
7581 .extend(self.table_all_projection_columns(&side.table)?);
7582 }
7583 return Ok(());
7584 }
7585
7586 for projection in projections {
7587 collect_projection_columns_for_join_side(
7588 projection,
7589 left.as_ref(),
7590 right.as_ref(),
7591 out,
7592 )?;
7593 }
7594 Ok(())
7595 }
7596
7597 fn resolved_table_projection_columns(&self, table: &TableQuery) -> RedDBResult<Vec<String>> {
7598 let projections = crate::storage::query::sql_lowering::effective_table_projections(table);
7599 if projections
7600 .iter()
7601 .any(|projection| matches!(projection, Projection::All))
7602 {
7603 return self.table_all_projection_columns(&table.table);
7604 }
7605
7606 let mut columns = BTreeSet::new();
7607 for projection in &projections {
7608 collect_projection_columns_for_table(
7609 projection,
7610 &table.table,
7611 table.alias.as_deref(),
7612 &mut columns,
7613 );
7614 }
7615 Ok(columns.into_iter().collect())
7616 }
7617
7618 fn table_all_projection_columns(&self, table: &str) -> RedDBResult<Vec<String>> {
7619 if let Some(contract) = self.inner.db.collection_contract_arc(table) {
7620 let columns: Vec<String> = contract
7621 .declared_columns
7622 .iter()
7623 .map(|column| column.name.clone())
7624 .collect();
7625 if !columns.is_empty() {
7626 return Ok(columns);
7627 }
7628 }
7629
7630 let records = scan_runtime_table_source_records_limited(&self.inner.db, table, Some(1))?;
7631 Ok(records
7632 .first()
7633 .map(|record| {
7634 record
7635 .column_names()
7636 .into_iter()
7637 .map(|column| column.to_string())
7638 .collect()
7639 })
7640 .unwrap_or_default())
7641 }
7642
7643 fn resolve_table_expr_subqueries(
7644 &self,
7645 mut table: TableQuery,
7646 frame: &dyn super::statement_frame::ReadFrame,
7647 ) -> RedDBResult<TableQuery> {
7648 if let Some(TableSource::Subquery(inner)) = table.source.take() {
7649 let inner = self.resolve_select_expr_subqueries(*inner, frame)?;
7650 table.source = Some(TableSource::Subquery(Box::new(inner)));
7651 }
7652
7653 let outer_scopes = relation_scopes_for_query(&QueryExpr::Table(table.clone()));
7654 for item in &mut table.select_items {
7655 if let crate::storage::query::ast::SelectItem::Expr { expr, .. } = item {
7656 *expr = self.resolve_expr_subqueries(expr.clone(), &outer_scopes, frame)?;
7657 }
7658 }
7659 if let Some(where_expr) = table.where_expr.take() {
7660 table.where_expr =
7661 Some(self.resolve_expr_subqueries(where_expr, &outer_scopes, frame)?);
7662 table.filter = None;
7663 }
7664 if let Some(having_expr) = table.having_expr.take() {
7665 table.having_expr =
7666 Some(self.resolve_expr_subqueries(having_expr, &outer_scopes, frame)?);
7667 table.having = None;
7668 }
7669 for expr in &mut table.group_by_exprs {
7670 *expr = self.resolve_expr_subqueries(expr.clone(), &outer_scopes, frame)?;
7671 }
7672 for clause in &mut table.order_by {
7673 if let Some(expr) = clause.expr.take() {
7674 clause.expr = Some(self.resolve_expr_subqueries(expr, &outer_scopes, frame)?);
7675 }
7676 }
7677 Ok(table)
7678 }
7679
7680 fn resolve_select_expr_subqueries(
7681 &self,
7682 expr: QueryExpr,
7683 frame: &dyn super::statement_frame::ReadFrame,
7684 ) -> RedDBResult<QueryExpr> {
7685 match expr {
7686 QueryExpr::Table(table) => self
7687 .resolve_table_expr_subqueries(table, frame)
7688 .map(QueryExpr::Table),
7689 QueryExpr::Join(mut join) => {
7690 join.left = Box::new(self.resolve_select_expr_subqueries(*join.left, frame)?);
7691 join.right = Box::new(self.resolve_select_expr_subqueries(*join.right, frame)?);
7692 Ok(QueryExpr::Join(join))
7693 }
7694 other => Ok(other),
7695 }
7696 }
7697
7698 fn resolve_expr_subqueries(
7699 &self,
7700 expr: crate::storage::query::ast::Expr,
7701 outer_scopes: &[String],
7702 frame: &dyn super::statement_frame::ReadFrame,
7703 ) -> RedDBResult<crate::storage::query::ast::Expr> {
7704 use crate::storage::query::ast::Expr;
7705
7706 match expr {
7707 Expr::Subquery { query, span } => {
7708 let values = self.execute_expr_subquery_values(query, outer_scopes, frame)?;
7709 if values.len() > 1 {
7710 return Err(RedDBError::Query(
7711 "scalar subquery returned more than one row".to_string(),
7712 ));
7713 }
7714 Ok(Expr::Literal {
7715 value: values.into_iter().next().unwrap_or(Value::Null),
7716 span,
7717 })
7718 }
7719 Expr::BinaryOp { op, lhs, rhs, span } => Ok(Expr::BinaryOp {
7720 op,
7721 lhs: Box::new(self.resolve_expr_subqueries(*lhs, outer_scopes, frame)?),
7722 rhs: Box::new(self.resolve_expr_subqueries(*rhs, outer_scopes, frame)?),
7723 span,
7724 }),
7725 Expr::UnaryOp { op, operand, span } => Ok(Expr::UnaryOp {
7726 op,
7727 operand: Box::new(self.resolve_expr_subqueries(*operand, outer_scopes, frame)?),
7728 span,
7729 }),
7730 Expr::Cast {
7731 inner,
7732 target,
7733 span,
7734 } => Ok(Expr::Cast {
7735 inner: Box::new(self.resolve_expr_subqueries(*inner, outer_scopes, frame)?),
7736 target,
7737 span,
7738 }),
7739 Expr::FunctionCall { name, args, span } => {
7740 let args = args
7741 .into_iter()
7742 .map(|arg| self.resolve_expr_subqueries(arg, outer_scopes, frame))
7743 .collect::<RedDBResult<Vec<_>>>()?;
7744 Ok(Expr::FunctionCall { name, args, span })
7745 }
7746 Expr::Case {
7747 branches,
7748 else_,
7749 span,
7750 } => {
7751 let branches = branches
7752 .into_iter()
7753 .map(|(cond, value)| {
7754 Ok((
7755 self.resolve_expr_subqueries(cond, outer_scopes, frame)?,
7756 self.resolve_expr_subqueries(value, outer_scopes, frame)?,
7757 ))
7758 })
7759 .collect::<RedDBResult<Vec<_>>>()?;
7760 let else_ = else_
7761 .map(|expr| self.resolve_expr_subqueries(*expr, outer_scopes, frame))
7762 .transpose()?
7763 .map(Box::new);
7764 Ok(Expr::Case {
7765 branches,
7766 else_,
7767 span,
7768 })
7769 }
7770 Expr::IsNull {
7771 operand,
7772 negated,
7773 span,
7774 } => Ok(Expr::IsNull {
7775 operand: Box::new(self.resolve_expr_subqueries(*operand, outer_scopes, frame)?),
7776 negated,
7777 span,
7778 }),
7779 Expr::InList {
7780 target,
7781 values,
7782 negated,
7783 span,
7784 } => {
7785 let target =
7786 Box::new(self.resolve_expr_subqueries(*target, outer_scopes, frame)?);
7787 let mut resolved = Vec::new();
7788 for value in values {
7789 if let Expr::Subquery { query, .. } = value {
7790 resolved.extend(
7791 self.execute_expr_subquery_values(query, outer_scopes, frame)?
7792 .into_iter()
7793 .map(Expr::lit),
7794 );
7795 } else {
7796 resolved.push(self.resolve_expr_subqueries(value, outer_scopes, frame)?);
7797 }
7798 }
7799 Ok(Expr::InList {
7800 target,
7801 values: resolved,
7802 negated,
7803 span,
7804 })
7805 }
7806 Expr::Between {
7807 target,
7808 low,
7809 high,
7810 negated,
7811 span,
7812 } => Ok(Expr::Between {
7813 target: Box::new(self.resolve_expr_subqueries(*target, outer_scopes, frame)?),
7814 low: Box::new(self.resolve_expr_subqueries(*low, outer_scopes, frame)?),
7815 high: Box::new(self.resolve_expr_subqueries(*high, outer_scopes, frame)?),
7816 negated,
7817 span,
7818 }),
7819 other => Ok(other),
7820 }
7821 }
7822
7823 fn execute_expr_subquery_values(
7824 &self,
7825 subquery: crate::storage::query::ast::ExprSubquery,
7826 outer_scopes: &[String],
7827 frame: &dyn super::statement_frame::ReadFrame,
7828 ) -> RedDBResult<Vec<Value>> {
7829 let query = *subquery.query;
7830 if query_references_outer_scope(&query, outer_scopes) {
7831 return Err(RedDBError::Query(
7832 "NOT_YET_SUPPORTED: correlated subqueries are not supported yet; track follow-up issue #470-correlated-subqueries".to_string(),
7833 ));
7834 }
7835 let query = self.rewrite_view_refs(query);
7836 let query = self.resolve_select_expr_subqueries(query, frame)?;
7837 let query = self.authorize_relational_select_expr(query, frame)?;
7838 let result = match query {
7839 QueryExpr::Table(table) => {
7840 execute_runtime_table_query(&self.inner.db, &table, Some(&self.inner.index_store))?
7841 }
7842 QueryExpr::Join(join) => execute_runtime_join_query(&self.inner.db, &join)?,
7843 other => {
7844 return Err(RedDBError::Query(format!(
7845 "expression subquery must be a SELECT query, got {}",
7846 query_expr_name(&other)
7847 )))
7848 }
7849 };
7850 first_column_values(result)
7851 }
7852
7853 fn dispatch_expr(
7854 &self,
7855 expr: QueryExpr,
7856 query_str: &str,
7857 mode: QueryMode,
7858 ) -> RedDBResult<RuntimeQueryResult> {
7859 let statement = query_expr_name(&expr);
7860 match expr {
7861 QueryExpr::Graph(_) | QueryExpr::Path(_) => {
7862 Err(RedDBError::Query(
7864 "graph queries cannot be used as prepared statements".to_string(),
7865 ))
7866 }
7867 QueryExpr::Table(table) => {
7868 let scope = self.ai_scope();
7869 let table = self.resolve_table_expr_subqueries(
7870 table,
7871 &scope as &dyn super::statement_frame::ReadFrame,
7872 )?;
7873 if super::red_schema::is_virtual_table(&table.table) {
7874 return Ok(RuntimeQueryResult {
7875 query: query_str.to_string(),
7876 mode,
7877 statement,
7878 engine: "runtime-red-schema",
7879 result: super::red_schema::red_query(
7880 self,
7881 &table.table,
7882 &table,
7883 &scope as &dyn super::statement_frame::ReadFrame,
7884 )?,
7885 affected_rows: 0,
7886 statement_type: "select",
7887 });
7888 }
7889 let Some(table_with_rls) = self.authorize_relational_table_select(
7890 table,
7891 &scope as &dyn super::statement_frame::ReadFrame,
7892 )?
7893 else {
7894 return Ok(RuntimeQueryResult {
7895 query: query_str.to_string(),
7896 mode,
7897 statement,
7898 engine: "runtime-table-rls",
7899 result: crate::storage::query::unified::UnifiedResult::empty(),
7900 affected_rows: 0,
7901 statement_type: "select",
7902 });
7903 };
7904 Ok(RuntimeQueryResult {
7905 query: query_str.to_string(),
7906 mode,
7907 statement,
7908 engine: "runtime-table",
7909 result: execute_runtime_table_query(
7910 &self.inner.db,
7911 &table_with_rls,
7912 Some(&self.inner.index_store),
7913 )?,
7914 affected_rows: 0,
7915 statement_type: "select",
7916 })
7917 }
7918 QueryExpr::Join(join) => {
7919 let scope = self.ai_scope();
7920 let Some(join_with_rls) = self.authorize_relational_join_select(
7921 join,
7922 &scope as &dyn super::statement_frame::ReadFrame,
7923 )?
7924 else {
7925 return Ok(RuntimeQueryResult {
7926 query: query_str.to_string(),
7927 mode,
7928 statement,
7929 engine: "runtime-join-rls",
7930 result: crate::storage::query::unified::UnifiedResult::empty(),
7931 affected_rows: 0,
7932 statement_type: "select",
7933 });
7934 };
7935 Ok(RuntimeQueryResult {
7936 query: query_str.to_string(),
7937 mode,
7938 statement,
7939 engine: "runtime-join",
7940 result: execute_runtime_join_query(&self.inner.db, &join_with_rls)?,
7941 affected_rows: 0,
7942 statement_type: "select",
7943 })
7944 }
7945 QueryExpr::Vector(vector) => Ok(RuntimeQueryResult {
7946 query: query_str.to_string(),
7947 mode,
7948 statement,
7949 engine: "runtime-vector",
7950 result: execute_runtime_vector_query(&self.inner.db, &vector)?,
7951 affected_rows: 0,
7952 statement_type: "select",
7953 }),
7954 QueryExpr::Hybrid(hybrid) => Ok(RuntimeQueryResult {
7955 query: query_str.to_string(),
7956 mode,
7957 statement,
7958 engine: "runtime-hybrid",
7959 result: execute_runtime_hybrid_query(&self.inner.db, &hybrid)?,
7960 affected_rows: 0,
7961 statement_type: "select",
7962 }),
7963 QueryExpr::Insert(ref insert) if super::red_schema::is_virtual_table(&insert.table) => {
7964 Err(RedDBError::Query(
7965 super::red_schema::READ_ONLY_ERROR.to_string(),
7966 ))
7967 }
7968 QueryExpr::Update(ref update) if super::red_schema::is_virtual_table(&update.table) => {
7969 Err(RedDBError::Query(
7970 super::red_schema::READ_ONLY_ERROR.to_string(),
7971 ))
7972 }
7973 QueryExpr::Delete(ref delete) if super::red_schema::is_virtual_table(&delete.table) => {
7974 Err(RedDBError::Query(
7975 super::red_schema::READ_ONLY_ERROR.to_string(),
7976 ))
7977 }
7978 QueryExpr::Insert(ref insert) => self
7979 .with_deferred_store_wal_for_dml(self.insert_may_emit_events(insert), || {
7980 self.execute_insert(query_str, insert)
7981 }),
7982 QueryExpr::Update(ref update) => self
7983 .with_deferred_store_wal_for_dml(self.update_may_emit_events(update), || {
7984 self.execute_update(query_str, update)
7985 }),
7986 QueryExpr::Delete(ref delete) => self
7987 .with_deferred_store_wal_for_dml(self.delete_may_emit_events(delete), || {
7988 self.execute_delete(query_str, delete)
7989 }),
7990 QueryExpr::SearchCommand(ref cmd) => self.execute_search_command(query_str, cmd),
7991 QueryExpr::Ask(ref ask) => self.execute_ask(query_str, ask),
7992 _ => Err(RedDBError::Query(format!(
7993 "prepared-statement execution does not support {statement} statements"
7994 ))),
7995 }
7996 }
7997
7998 fn try_fast_entity_lookup(&self, query: &str) -> Option<RedDBResult<RuntimeQueryResult>> {
8001 let q = query.trim();
8004 if !q.starts_with("SELECT") && !q.starts_with("select") {
8005 return None;
8006 }
8007
8008 let where_pos = q
8010 .find("WHERE _entity_id")
8011 .or_else(|| q.find("where _entity_id"))?;
8012 let after_field = &q[where_pos + 16..].trim_start(); let after_eq = after_field.strip_prefix('=')?.trim_start();
8014
8015 let id_str = after_eq.trim();
8017 let entity_id: u64 = id_str.parse().ok()?;
8018
8019 let from_pos = q.find("FROM ").or_else(|| q.find("from "))? + 5;
8021 let table = q[from_pos..where_pos].trim();
8022 if table.is_empty()
8023 || table.contains(' ') && !table.contains(" AS ") && !table.contains(" as ")
8024 {
8025 return None; }
8027 let table_name = table.split_whitespace().next()?;
8028
8029 let store = self.inner.db.store();
8035 let entity = store
8036 .get(
8037 table_name,
8038 crate::storage::unified::EntityId::new(entity_id),
8039 )
8040 .filter(entity_visible_under_current_snapshot);
8041
8042 let count = if entity.is_some() { 1u64 } else { 0 };
8043
8044 let records: Vec<crate::storage::query::unified::UnifiedRecord> = entity
8050 .as_ref()
8051 .and_then(|e| runtime_table_record_from_entity(e.clone()))
8052 .into_iter()
8053 .collect();
8054
8055 let json = match entity {
8056 Some(ref e) => execute_runtime_serialize_single_entity(e),
8057 None => r#"{"columns":[],"record_count":0,"selection":{"scope":"any"},"records":[]}"#
8058 .to_string(),
8059 };
8060
8061 Some(Ok(RuntimeQueryResult {
8062 query: query.to_string(),
8063 mode: crate::storage::query::modes::QueryMode::Sql,
8064 statement: "select",
8065 engine: "fast-entity-lookup",
8066 result: crate::storage::query::unified::UnifiedResult {
8067 columns: Vec::new(),
8068 records,
8069 stats: crate::storage::query::unified::QueryStats {
8070 rows_scanned: count,
8071 ..Default::default()
8072 },
8073 pre_serialized_json: Some(json),
8074 },
8075 affected_rows: 0,
8076 statement_type: "select",
8077 }))
8078 }
8079
8080 fn result_cache_backend(&self) -> RuntimeResultCacheBackend {
8081 match self
8082 .config_string(RESULT_CACHE_BACKEND_KEY, RESULT_CACHE_DEFAULT_BACKEND)
8083 .as_str()
8084 {
8085 "blob_cache" => RuntimeResultCacheBackend::BlobCache,
8086 "shadow" => RuntimeResultCacheBackend::Shadow,
8087 _ => RuntimeResultCacheBackend::Legacy,
8088 }
8089 }
8090
8091 pub(super) fn get_result_cache_entry(&self, key: &str) -> Option<RuntimeQueryResult> {
8092 match self.result_cache_backend() {
8093 RuntimeResultCacheBackend::Legacy => self.get_legacy_result_cache_entry(key),
8094 RuntimeResultCacheBackend::BlobCache => self.get_blob_result_cache_entry(key),
8095 RuntimeResultCacheBackend::Shadow => {
8096 let legacy = self.get_legacy_result_cache_entry(key);
8097 let blob = self.get_blob_result_cache_entry(key);
8098 if let (Some(ref legacy), Some(ref blob)) = (&legacy, &blob) {
8099 if result_cache_fingerprint(legacy) != result_cache_fingerprint(blob) {
8100 self.inner
8101 .result_cache_shadow_divergences
8102 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
8103 tracing::warn!(
8104 key,
8105 metric = crate::runtime::METRIC_CACHE_SHADOW_DIVERGENCE_TOTAL,
8106 "result cache shadow backend diverged from legacy"
8107 );
8108 }
8109 }
8110 legacy
8111 }
8112 }
8113 }
8114
8115 fn get_legacy_result_cache_entry(&self, key: &str) -> Option<RuntimeQueryResult> {
8116 let cache = self.inner.result_cache.read();
8117 cache.0.get(key).and_then(|entry| {
8118 if entry.cached_at.elapsed().as_secs() < RESULT_CACHE_TTL_SECS {
8119 Some(entry.result.clone())
8120 } else {
8121 None
8122 }
8123 })
8124 }
8125
8126 fn get_blob_result_cache_entry(&self, key: &str) -> Option<RuntimeQueryResult> {
8127 let hit = self
8128 .inner
8129 .result_blob_cache
8130 .get(RESULT_CACHE_BLOB_NAMESPACE, key)?;
8131 {
8132 let cache = self.inner.result_blob_entries.read();
8133 if let Some(entry) = cache.0.get(key) {
8134 return Some(entry.result.clone());
8135 }
8136 }
8137
8138 let (result, scopes) = decode_result_cache_payload(hit.value())?;
8139 let mut cache = self.inner.result_blob_entries.write();
8140 let (ref mut map, ref mut order) = *cache;
8141 if !map.contains_key(key) {
8142 order.push_back(key.to_string());
8143 }
8144 map.insert(
8145 key.to_string(),
8146 RuntimeResultCacheEntry {
8147 result: result.clone(),
8148 cached_at: std::time::Instant::now(),
8149 scopes,
8150 },
8151 );
8152 trim_result_cache(map, order);
8153 Some(result)
8154 }
8155
8156 pub(super) fn put_result_cache_entry(&self, key: &str, entry: RuntimeResultCacheEntry) {
8157 match self.result_cache_backend() {
8158 RuntimeResultCacheBackend::Legacy => self.put_legacy_result_cache_entry(key, entry),
8159 RuntimeResultCacheBackend::BlobCache => self.put_blob_result_cache_entry(key, entry),
8160 RuntimeResultCacheBackend::Shadow => {
8161 self.put_legacy_result_cache_entry(key, entry.clone());
8162 self.put_blob_result_cache_entry(key, entry);
8163 }
8164 }
8165 }
8166
8167 fn put_legacy_result_cache_entry(&self, key: &str, entry: RuntimeResultCacheEntry) {
8168 let mut cache = self.inner.result_cache.write();
8169 let (ref mut map, ref mut order) = *cache;
8170 if !map.contains_key(key) {
8171 order.push_back(key.to_string());
8172 }
8173 map.insert(key.to_string(), entry);
8174 trim_result_cache(map, order);
8175 }
8176
8177 fn put_blob_result_cache_entry(&self, key: &str, entry: RuntimeResultCacheEntry) {
8178 let policy = crate::storage::cache::BlobCachePolicy::default()
8179 .ttl_ms(RESULT_CACHE_TTL_SECS * 1000)
8180 .priority(200);
8181 let dependencies = entry.scopes.iter().cloned().collect::<Vec<_>>();
8182 let bytes = encode_result_cache_payload(&entry)
8183 .unwrap_or_else(|| result_cache_fingerprint(&entry.result).into_bytes());
8184 let put = crate::storage::cache::BlobCachePut::new(bytes)
8185 .with_dependencies(dependencies)
8186 .with_policy(policy);
8187 if self
8188 .inner
8189 .result_blob_cache
8190 .put(RESULT_CACHE_BLOB_NAMESPACE, key, put)
8191 .is_err()
8192 {
8193 return;
8194 }
8195
8196 let mut cache = self.inner.result_blob_entries.write();
8197 let (ref mut map, ref mut order) = *cache;
8198 if !map.contains_key(key) {
8199 order.push_back(key.to_string());
8200 }
8201 map.insert(key.to_string(), entry);
8202 trim_result_cache(map, order);
8203 }
8204
8205 pub fn result_cache_shadow_divergences(&self) -> u64 {
8206 self.inner
8207 .result_cache_shadow_divergences
8208 .load(std::sync::atomic::Ordering::Relaxed)
8209 }
8210
8211 pub fn invalidate_result_cache(&self) {
8214 let mut cache = self.inner.result_cache.write();
8215 cache.0.clear();
8216 cache.1.clear();
8217 let mut blob_entries = self.inner.result_blob_entries.write();
8218 blob_entries.0.clear();
8219 blob_entries.1.clear();
8220 self.inner
8221 .result_blob_cache
8222 .invalidate_namespace(RESULT_CACHE_BLOB_NAMESPACE);
8223 let mut ask_entries = self.inner.ask_answer_cache_entries.write();
8224 ask_entries.0.clear();
8225 ask_entries.1.clear();
8226 self.inner
8227 .result_blob_cache
8228 .invalidate_namespace(ASK_ANSWER_CACHE_NAMESPACE);
8229 }
8230
8231 pub(crate) fn invalidate_result_cache_for_table(&self, table: &str) {
8234 let legacy_has_match = {
8237 let cache = self.inner.result_cache.read();
8238 let (ref map, _) = *cache;
8239 !map.is_empty() && map.values().any(|entry| entry.scopes.contains(table))
8240 };
8241 let blob_has_match = {
8242 let cache = self.inner.result_blob_entries.read();
8243 let (ref map, _) = *cache;
8244 !map.is_empty() && map.values().any(|entry| entry.scopes.contains(table))
8245 };
8246 if legacy_has_match {
8247 let mut cache = self.inner.result_cache.write();
8248 let (ref mut map, ref mut order) = *cache;
8249 map.retain(|_, entry| !entry.scopes.contains(table));
8250 order.retain(|key| map.contains_key(key));
8251 }
8252
8253 if matches!(
8254 self.result_cache_backend(),
8255 RuntimeResultCacheBackend::BlobCache | RuntimeResultCacheBackend::Shadow
8256 ) {
8257 let mut blob_entries = self.inner.result_blob_entries.write();
8258 let (ref mut blob_map, ref mut blob_order) = *blob_entries;
8259 blob_map.clear();
8260 blob_order.clear();
8261 self.inner
8262 .result_blob_cache
8263 .invalidate_namespace(RESULT_CACHE_BLOB_NAMESPACE);
8264 } else if blob_has_match {
8265 let mut blob_entries = self.inner.result_blob_entries.write();
8266 let (ref mut blob_map, ref mut blob_order) = *blob_entries;
8267 blob_map.retain(|_, entry| !entry.scopes.contains(table));
8268 blob_order.retain(|key| blob_map.contains_key(key));
8269 }
8270 let mut ask_entries = self.inner.ask_answer_cache_entries.write();
8271 ask_entries.0.clear();
8272 ask_entries.1.clear();
8273 self.inner
8274 .result_blob_cache
8275 .invalidate_namespace(ASK_ANSWER_CACHE_NAMESPACE);
8276 }
8277
8278 pub(crate) fn invalidate_plan_cache(&self) {
8279 self.inner.query_cache.write().clear();
8280 self.inner
8281 .ddl_epoch
8282 .fetch_add(1, std::sync::atomic::Ordering::Release);
8283 }
8284
8285 pub fn ddl_epoch(&self) -> u64 {
8289 self.inner
8290 .ddl_epoch
8291 .load(std::sync::atomic::Ordering::Acquire)
8292 }
8293
8294 pub(crate) fn clear_table_planner_stats(&self, table: &str) {
8295 let store = self.inner.db.store();
8296 crate::storage::query::planner::stats_catalog::clear_table_stats(store.as_ref(), table);
8297 self.invalidate_plan_cache();
8298 }
8299
8300 pub(crate) fn rehydrate_tenant_tables(&self) {
8309 let store = self.inner.db.store();
8310 let Some(manager) = store.get_collection("red_config") else {
8311 return;
8312 };
8313 for entity in manager.query_all(|_| true) {
8318 let crate::storage::unified::entity::EntityData::Row(row) = &entity.data else {
8319 continue;
8320 };
8321 let Some(named) = &row.named else { continue };
8322 let Some(crate::storage::schema::Value::Text(key)) = named.get("key") else {
8323 continue;
8324 };
8325 let Some(rest) = key.strip_prefix("tenant_tables.") else {
8327 continue;
8328 };
8329 let Some((table, suffix)) = rest.rsplit_once('.') else {
8330 crate::telemetry::operator_event::OperatorEvent::SchemaCorruption {
8336 collection: "red_config".to_string(),
8337 detail: format!("malformed tenant_tables key: {key}"),
8338 }
8339 .emit_global();
8340 continue;
8341 };
8342 if suffix != "column" {
8343 crate::telemetry::operator_event::OperatorEvent::SchemaCorruption {
8344 collection: "red_config".to_string(),
8345 detail: format!("unexpected tenant_tables suffix: {key}"),
8346 }
8347 .emit_global();
8348 continue;
8349 }
8350 match named.get("value") {
8351 Some(crate::storage::schema::Value::Text(column)) => {
8352 self.register_tenant_table(table, column);
8353 }
8354 Some(crate::storage::schema::Value::Null) | None => {
8356 self.unregister_tenant_table(table);
8357 }
8358 _ => {}
8359 }
8360 }
8361 }
8362
8363 pub(crate) fn rehydrate_materialized_view_descriptors(&self) {
8375 let store = self.inner.db.store();
8376 let descriptors = crate::runtime::continuous_materialized_view::load_all(store.as_ref());
8377 for descriptor in descriptors {
8378 let parsed = match crate::storage::query::parser::parse(&descriptor.source_sql) {
8379 Ok(qc) => qc,
8380 Err(err) => {
8381 crate::telemetry::operator_event::OperatorEvent::SchemaCorruption {
8382 collection:
8383 crate::runtime::continuous_materialized_view::CATALOG_COLLECTION
8384 .to_string(),
8385 detail: format!(
8386 "failed to re-parse materialized-view source for {}: {err}",
8387 descriptor.name
8388 ),
8389 }
8390 .emit_global();
8391 continue;
8392 }
8393 };
8394 let crate::storage::query::ast::QueryExpr::CreateView(create) = parsed.query else {
8395 crate::telemetry::operator_event::OperatorEvent::SchemaCorruption {
8396 collection: crate::runtime::continuous_materialized_view::CATALOG_COLLECTION
8397 .to_string(),
8398 detail: format!(
8399 "materialized-view source for {} did not re-parse as CREATE VIEW",
8400 descriptor.name
8401 ),
8402 }
8403 .emit_global();
8404 continue;
8405 };
8406 let view_name = create.name.clone();
8408 self.inner
8409 .views
8410 .write()
8411 .insert(view_name.clone(), Arc::new(create));
8412 use crate::storage::cache::result::{MaterializedViewDef, RefreshPolicy};
8414 let refresh = match descriptor.refresh_every_ms {
8415 Some(ms) => RefreshPolicy::Periodic(std::time::Duration::from_millis(ms)),
8416 None => RefreshPolicy::Manual,
8417 };
8418 let def = MaterializedViewDef {
8419 name: view_name.clone(),
8420 query: format!("<parsed view {}>", view_name),
8421 dependencies: descriptor.source_collections.clone(),
8422 refresh,
8423 retention_duration_ms: descriptor.retention_duration_ms,
8424 };
8425 self.inner.materialized_views.write().register(def);
8426 }
8427 self.invalidate_plan_cache();
8430 }
8431
8432 pub(crate) fn rehydrate_declared_column_schemas(&self) {
8433 let store = self.inner.db.store();
8434 for contract in self.inner.db.collection_contracts() {
8435 let columns: Vec<String> = contract
8436 .declared_columns
8437 .iter()
8438 .map(|column| column.name.clone())
8439 .collect();
8440 let Some(manager) = store.get_collection(&contract.name) else {
8441 continue;
8442 };
8443 manager.set_column_schema_if_empty(columns);
8444 }
8445 }
8446
8447 pub fn register_tenant_table(&self, table: &str, column: &str) {
8452 use crate::storage::query::ast::{
8453 CompareOp, CreatePolicyQuery, Expr, FieldRef, Filter, Span,
8454 };
8455 self.inner
8456 .tenant_tables
8457 .write()
8458 .insert(table.to_string(), column.to_string());
8459
8460 let lhs = Expr::Column {
8466 field: FieldRef::TableColumn {
8467 table: table.to_string(),
8468 column: column.to_string(),
8469 },
8470 span: Span::synthetic(),
8471 };
8472 let rhs = Expr::FunctionCall {
8473 name: "CURRENT_TENANT".to_string(),
8474 args: Vec::new(),
8475 span: Span::synthetic(),
8476 };
8477 let policy_filter = Filter::CompareExpr {
8478 lhs,
8479 op: CompareOp::Eq,
8480 rhs,
8481 };
8482
8483 let policy = CreatePolicyQuery {
8484 name: "__tenant_iso".to_string(),
8485 table: table.to_string(),
8486 action: None, role: None, using: Box::new(policy_filter),
8489 target_kind: crate::storage::query::ast::PolicyTargetKind::Table,
8496 };
8497
8498 self.inner.rls_policies.write().insert(
8500 (table.to_string(), "__tenant_iso".to_string()),
8501 Arc::new(policy),
8502 );
8503 self.inner
8504 .rls_enabled_tables
8505 .write()
8506 .insert(table.to_string());
8507
8508 self.ensure_tenant_index(table, column);
8514 }
8515
8516 fn ensure_tenant_index(&self, table: &str, column: &str) {
8524 if column.contains('.') {
8525 return;
8526 }
8527 let index_name = format!("__tenant_idx_{table}");
8528 let registry = self.inner.index_store.list_indices(table);
8529 if registry.iter().any(|idx| idx.name == index_name) {
8530 return;
8531 }
8532 if registry
8533 .iter()
8534 .any(|idx| idx.columns.first().map(|c| c.as_str()) == Some(column))
8535 {
8536 return;
8537 }
8538
8539 let store = self.inner.db.store();
8540 let Some(manager) = store.get_collection(table) else {
8541 return;
8542 };
8543 let entities = manager.query_all(|_| true);
8544 let entity_fields: Vec<(
8545 crate::storage::unified::EntityId,
8546 Vec<(String, crate::storage::schema::Value)>,
8547 )> = entities
8548 .iter()
8549 .map(|e| {
8550 let fields = match &e.data {
8551 crate::storage::EntityData::Row(row) => {
8552 if let Some(ref named) = row.named {
8553 named.iter().map(|(k, v)| (k.clone(), v.clone())).collect()
8554 } else if let Some(ref schema) = row.schema {
8555 schema
8556 .iter()
8557 .zip(row.columns.iter())
8558 .map(|(k, v)| (k.clone(), v.clone()))
8559 .collect()
8560 } else {
8561 Vec::new()
8562 }
8563 }
8564 crate::storage::EntityData::Node(node) => node
8565 .properties
8566 .iter()
8567 .map(|(k, v)| (k.clone(), v.clone()))
8568 .collect(),
8569 _ => Vec::new(),
8570 };
8571 (e.id, fields)
8572 })
8573 .collect();
8574
8575 let columns = vec![column.to_string()];
8576 if self
8577 .inner
8578 .index_store
8579 .create_index(
8580 &index_name,
8581 table,
8582 &columns,
8583 super::index_store::IndexMethodKind::Hash,
8584 false,
8585 &entity_fields,
8586 )
8587 .is_err()
8588 {
8589 return;
8590 }
8591 self.inner
8592 .index_store
8593 .register(super::index_store::RegisteredIndex {
8594 name: index_name,
8595 collection: table.to_string(),
8596 columns,
8597 method: super::index_store::IndexMethodKind::Hash,
8598 unique: false,
8599 });
8600 self.invalidate_plan_cache();
8601 }
8602
8603 fn drop_tenant_index(&self, table: &str) {
8606 let index_name = format!("__tenant_idx_{table}");
8607 self.inner.index_store.drop_index(&index_name, table);
8608 }
8609
8610 pub fn tenant_column(&self, table: &str) -> Option<String> {
8614 self.inner.tenant_tables.read().get(table).cloned()
8615 }
8616
8617 pub fn unregister_tenant_table(&self, table: &str) {
8621 self.inner.tenant_tables.write().remove(table);
8622 self.inner
8623 .rls_policies
8624 .write()
8625 .remove(&(table.to_string(), "__tenant_iso".to_string()));
8626 self.drop_tenant_index(table);
8627 let has_other_policies = self
8629 .inner
8630 .rls_policies
8631 .read()
8632 .keys()
8633 .any(|(t, _)| t == table);
8634 if !has_other_policies {
8635 self.inner.rls_enabled_tables.write().remove(table);
8636 }
8637 }
8638
8639 pub(crate) fn record_pending_tombstone(
8645 &self,
8646 conn_id: u64,
8647 collection: &str,
8648 id: crate::storage::unified::entity::EntityId,
8649 stamper_xid: crate::storage::transaction::snapshot::Xid,
8650 previous_xmax: crate::storage::transaction::snapshot::Xid,
8651 ) {
8652 self.inner
8653 .pending_tombstones
8654 .write()
8655 .entry(conn_id)
8656 .or_default()
8657 .push((collection.to_string(), id, stamper_xid, previous_xmax));
8658 }
8659
8660 pub(crate) fn record_pending_versioned_update(
8661 &self,
8662 conn_id: u64,
8663 collection: &str,
8664 old_id: crate::storage::unified::entity::EntityId,
8665 new_id: crate::storage::unified::entity::EntityId,
8666 stamper_xid: crate::storage::transaction::snapshot::Xid,
8667 previous_xmax: crate::storage::transaction::snapshot::Xid,
8668 ) {
8669 self.inner
8670 .pending_versioned_updates
8671 .write()
8672 .entry(conn_id)
8673 .or_default()
8674 .push((
8675 collection.to_string(),
8676 old_id,
8677 new_id,
8678 stamper_xid,
8679 previous_xmax,
8680 ));
8681 }
8682
8683 fn with_deferred_store_wal_if_transaction<T>(
8684 &self,
8685 f: impl FnOnce() -> RedDBResult<T>,
8686 ) -> RedDBResult<T> {
8687 let conn_id = current_connection_id();
8688 if !self.inner.tx_contexts.read().contains_key(&conn_id) {
8689 return f();
8690 }
8691
8692 crate::storage::UnifiedStore::begin_deferred_store_wal_capture();
8693 let result = f();
8694 let captured = crate::storage::UnifiedStore::take_deferred_store_wal_capture();
8695 match result {
8696 Ok(value) => {
8697 self.record_pending_store_wal_actions(conn_id, captured);
8698 Ok(value)
8699 }
8700 Err(err) => Err(err),
8701 }
8702 }
8703
8704 fn with_deferred_store_wal_for_dml<T>(
8705 &self,
8706 capture_autocommit_events: bool,
8707 f: impl FnOnce() -> RedDBResult<T>,
8708 ) -> RedDBResult<T> {
8709 let conn_id = current_connection_id();
8710 if self.inner.tx_contexts.read().contains_key(&conn_id) {
8711 return self.with_deferred_store_wal_if_transaction(f);
8712 }
8713 if !capture_autocommit_events {
8714 return f();
8715 }
8716
8717 crate::storage::UnifiedStore::begin_deferred_store_wal_capture();
8718 let result = f();
8719 let captured = crate::storage::UnifiedStore::take_deferred_store_wal_capture();
8720 self.inner
8721 .db
8722 .store()
8723 .append_deferred_store_wal_actions(captured)
8724 .map_err(|err| RedDBError::Internal(err.to_string()))?;
8725 result
8726 }
8727
8728 fn insert_may_emit_events(&self, query: &InsertQuery) -> bool {
8729 !query.suppress_events
8730 && self.collection_has_event_subscriptions_for_operation(
8731 &query.table,
8732 crate::catalog::SubscriptionOperation::Insert,
8733 )
8734 }
8735
8736 fn update_may_emit_events(&self, query: &UpdateQuery) -> bool {
8737 !query.suppress_events
8738 && self.collection_has_event_subscriptions_for_operation(
8739 &query.table,
8740 crate::catalog::SubscriptionOperation::Update,
8741 )
8742 }
8743
8744 fn delete_may_emit_events(&self, query: &DeleteQuery) -> bool {
8745 !query.suppress_events
8746 && self.collection_has_event_subscriptions_for_operation(
8747 &query.table,
8748 crate::catalog::SubscriptionOperation::Delete,
8749 )
8750 }
8751
8752 fn collection_has_event_subscriptions_for_operation(
8753 &self,
8754 collection: &str,
8755 operation: crate::catalog::SubscriptionOperation,
8756 ) -> bool {
8757 let Some(contract) = self.db().collection_contract_arc(collection) else {
8758 return false;
8759 };
8760 contract.subscriptions.iter().any(|subscription| {
8761 subscription.enabled
8762 && (subscription.ops_filter.is_empty()
8763 || subscription.ops_filter.contains(&operation))
8764 })
8765 }
8766
8767 fn record_pending_store_wal_actions(
8768 &self,
8769 conn_id: u64,
8770 actions: crate::storage::unified::DeferredStoreWalActions,
8771 ) {
8772 if actions.is_empty() {
8773 return;
8774 }
8775 let mut guard = self.inner.pending_store_wal_actions.write();
8776 guard.entry(conn_id).or_default().extend(actions);
8777 }
8778
8779 fn flush_pending_store_wal_actions(&self, conn_id: u64) -> RedDBResult<()> {
8780 let Some(actions) = self
8781 .inner
8782 .pending_store_wal_actions
8783 .write()
8784 .remove(&conn_id)
8785 else {
8786 return Ok(());
8787 };
8788 self.inner
8789 .db
8790 .store()
8791 .append_deferred_store_wal_actions(actions)
8792 .map_err(|err| RedDBError::Internal(err.to_string()))
8793 }
8794
8795 fn discard_pending_store_wal_actions(&self, conn_id: u64) {
8796 self.inner
8797 .pending_store_wal_actions
8798 .write()
8799 .remove(&conn_id);
8800 }
8801
8802 fn xid_conflicts_with_snapshot(
8803 &self,
8804 xid: crate::storage::transaction::snapshot::Xid,
8805 snapshot: &crate::storage::transaction::snapshot::Snapshot,
8806 own_xids: &std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
8807 ) -> bool {
8808 xid != 0
8809 && !own_xids.contains(&xid)
8810 && !self.inner.snapshot_manager.is_aborted(xid)
8811 && !self.inner.snapshot_manager.is_active(xid)
8812 && (xid > snapshot.xid || snapshot.in_progress.contains(&xid))
8813 }
8814
8815 fn conflict_error(
8816 collection: &str,
8817 logical_id: crate::storage::unified::entity::EntityId,
8818 xid: crate::storage::transaction::snapshot::Xid,
8819 ) -> RedDBError {
8820 RedDBError::Query(format!(
8821 "serialization conflict: table row {collection}/{} was modified by concurrent transaction {xid}",
8822 logical_id.raw()
8823 ))
8824 }
8825
8826 fn check_logical_row_conflict(
8827 &self,
8828 collection: &str,
8829 logical_id: crate::storage::unified::entity::EntityId,
8830 excluded_ids: &[crate::storage::unified::entity::EntityId],
8831 snapshot: &crate::storage::transaction::snapshot::Snapshot,
8832 own_xids: &std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
8833 ) -> RedDBResult<()> {
8834 let store = self.inner.db.store();
8835 let Some(manager) = store.get_collection(collection) else {
8836 return Ok(());
8837 };
8838
8839 for candidate in manager.query_all(|_| true) {
8840 if excluded_ids.contains(&candidate.id) || candidate.logical_id() != logical_id {
8841 continue;
8842 }
8843 if self.xid_conflicts_with_snapshot(candidate.xmin, snapshot, own_xids) {
8844 return Err(Self::conflict_error(collection, logical_id, candidate.xmin));
8845 }
8846 if self.xid_conflicts_with_snapshot(candidate.xmax, snapshot, own_xids) {
8847 return Err(Self::conflict_error(collection, logical_id, candidate.xmax));
8848 }
8849 }
8850 Ok(())
8851 }
8852
8853 pub(crate) fn check_table_row_write_conflicts(
8854 &self,
8855 conn_id: u64,
8856 snapshot: &crate::storage::transaction::snapshot::Snapshot,
8857 own_xids: &std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
8858 ) -> RedDBResult<()> {
8859 let versioned_updates = self
8860 .inner
8861 .pending_versioned_updates
8862 .read()
8863 .get(&conn_id)
8864 .cloned()
8865 .unwrap_or_default();
8866 let tombstones = self
8867 .inner
8868 .pending_tombstones
8869 .read()
8870 .get(&conn_id)
8871 .cloned()
8872 .unwrap_or_default();
8873
8874 let store = self.inner.db.store();
8875 for (collection, old_id, new_id, xid, previous_xmax) in versioned_updates {
8876 let Some(manager) = store.get_collection(&collection) else {
8877 continue;
8878 };
8879 let Some(old) = manager.get(old_id) else {
8880 continue;
8881 };
8882 let logical_id = old.logical_id();
8883 if self.xid_conflicts_with_snapshot(previous_xmax, snapshot, own_xids) {
8884 return Err(Self::conflict_error(&collection, logical_id, previous_xmax));
8885 }
8886 if old.xmax != xid && self.xid_conflicts_with_snapshot(old.xmax, snapshot, own_xids) {
8887 return Err(Self::conflict_error(&collection, logical_id, old.xmax));
8888 }
8889 self.check_logical_row_conflict(
8890 &collection,
8891 logical_id,
8892 &[old_id, new_id],
8893 snapshot,
8894 own_xids,
8895 )?;
8896 }
8897
8898 for (collection, id, xid, previous_xmax) in tombstones {
8899 let Some(manager) = store.get_collection(&collection) else {
8900 continue;
8901 };
8902 let Some(entity) = manager.get(id) else {
8903 continue;
8904 };
8905 let logical_id = entity.logical_id();
8906 if self.xid_conflicts_with_snapshot(previous_xmax, snapshot, own_xids) {
8907 return Err(Self::conflict_error(&collection, logical_id, previous_xmax));
8908 }
8909 if entity.xmax != xid
8910 && self.xid_conflicts_with_snapshot(entity.xmax, snapshot, own_xids)
8911 {
8912 return Err(Self::conflict_error(&collection, logical_id, entity.xmax));
8913 }
8914 self.check_logical_row_conflict(&collection, logical_id, &[id], snapshot, own_xids)?;
8915 }
8916
8917 Ok(())
8918 }
8919
8920 pub(crate) fn restore_pending_write_stamps(&self, conn_id: u64) {
8921 let versioned_updates = self
8922 .inner
8923 .pending_versioned_updates
8924 .read()
8925 .get(&conn_id)
8926 .cloned()
8927 .unwrap_or_default();
8928 let tombstones = self
8929 .inner
8930 .pending_tombstones
8931 .read()
8932 .get(&conn_id)
8933 .cloned()
8934 .unwrap_or_default();
8935
8936 let store = self.inner.db.store();
8937 for (collection, old_id, _new_id, xid, _previous_xmax) in versioned_updates {
8938 if let Some(manager) = store.get_collection(&collection) {
8939 if let Some(mut entity) = manager.get(old_id) {
8940 entity.set_xmax(xid);
8941 let _ = manager.update(entity);
8942 }
8943 }
8944 }
8945 for (collection, id, xid, _previous_xmax) in tombstones {
8946 if let Some(manager) = store.get_collection(&collection) {
8947 if let Some(mut entity) = manager.get(id) {
8948 entity.set_xmax(xid);
8949 let _ = manager.update(entity);
8950 }
8951 }
8952 }
8953 }
8954
8955 pub(crate) fn finalize_pending_versioned_updates(&self, conn_id: u64) {
8956 self.inner
8957 .pending_versioned_updates
8958 .write()
8959 .remove(&conn_id);
8960 }
8961
8962 pub(crate) fn revive_pending_versioned_updates(&self, conn_id: u64) {
8963 let Some(pending) = self
8964 .inner
8965 .pending_versioned_updates
8966 .write()
8967 .remove(&conn_id)
8968 else {
8969 return;
8970 };
8971
8972 let store = self.inner.db.store();
8973 for (collection, old_id, new_id, xid, previous_xmax) in pending {
8974 if let Some(manager) = store.get_collection(&collection) {
8975 if let Some(mut old) = manager.get(old_id) {
8976 if old.xmax == xid {
8977 old.set_xmax(previous_xmax);
8978 let _ = manager.update(old);
8979 }
8980 }
8981 }
8982 let _ = store.delete_batch(&collection, &[new_id]);
8983 }
8984 }
8985
8986 pub(crate) fn revive_versioned_updates_since(&self, conn_id: u64, stamper_xid: u64) -> usize {
8987 let mut guard = self.inner.pending_versioned_updates.write();
8988 let Some(pending) = guard.get_mut(&conn_id) else {
8989 return 0;
8990 };
8991
8992 let store = self.inner.db.store();
8993 let mut reverted = 0usize;
8994 pending.retain(|(collection, old_id, new_id, xid, previous_xmax)| {
8995 if *xid < stamper_xid {
8996 return true;
8997 }
8998 if let Some(manager) = store.get_collection(collection) {
8999 if let Some(mut old) = manager.get(*old_id) {
9000 if old.xmax == *xid {
9001 old.set_xmax(*previous_xmax);
9002 let _ = manager.update(old);
9003 }
9004 }
9005 }
9006 let _ = store.delete_batch(collection, &[*new_id]);
9007 reverted += 1;
9008 false
9009 });
9010 if pending.is_empty() {
9011 guard.remove(&conn_id);
9012 }
9013 reverted
9014 }
9015
9016 pub(crate) fn finalize_pending_tombstones(&self, conn_id: u64) {
9021 let Some(pending) = self.inner.pending_tombstones.write().remove(&conn_id) else {
9022 return;
9023 };
9024 if pending.is_empty() {
9025 return;
9026 }
9027
9028 let store = self.inner.db.store();
9029 for (collection, id, _xid, _previous_xmax) in pending {
9030 store.context_index().remove_entity(id);
9031 self.cdc_emit(
9032 crate::replication::cdc::ChangeOperation::Delete,
9033 &collection,
9034 id.raw(),
9035 "entity",
9036 );
9037 }
9038 }
9039
9040 pub(crate) fn revive_pending_tombstones(&self, conn_id: u64) {
9047 let Some(pending) = self.inner.pending_tombstones.write().remove(&conn_id) else {
9048 return;
9049 };
9050
9051 let store = self.inner.db.store();
9052 for (collection, id, xid, previous_xmax) in pending {
9053 let Some(manager) = store.get_collection(&collection) else {
9054 continue;
9055 };
9056 if let Some(mut entity) = manager.get(id) {
9057 if entity.xmax == xid {
9058 entity.set_xmax(previous_xmax);
9059 let _ = manager.update(entity);
9060 }
9061 }
9062 }
9063 }
9064
9065 pub(crate) fn finalize_pending_kv_watch_events(&self, conn_id: u64) {
9066 let Some(pending) = self.inner.pending_kv_watch_events.write().remove(&conn_id) else {
9067 return;
9068 };
9069 for event in pending {
9070 self.cdc_emit_kv(
9071 event.op,
9072 &event.collection,
9073 &event.key,
9074 0,
9075 event.before,
9076 event.after,
9077 );
9078 }
9079 }
9080
9081 pub(crate) fn discard_pending_kv_watch_events(&self, conn_id: u64) {
9082 self.inner.pending_kv_watch_events.write().remove(&conn_id);
9083 }
9084
9085 fn materialize_graph_with_rls(
9094 &self,
9095 ) -> RedDBResult<(
9096 crate::storage::engine::GraphStore,
9097 std::collections::HashMap<
9098 String,
9099 std::collections::HashMap<String, crate::storage::schema::Value>,
9100 >,
9101 crate::storage::query::unified::EdgeProperties,
9102 )> {
9103 use crate::storage::engine::GraphStore;
9104 use crate::storage::query::ast::{PolicyAction, PolicyTargetKind};
9105 use crate::storage::unified::entity::{EntityData, EntityKind};
9106 use std::collections::{HashMap, HashSet};
9107
9108 let store = self.inner.db.store();
9109 let snap_ctx = capture_current_snapshot();
9110 let role = current_auth_identity().map(|(_, r)| r.as_str().to_string());
9111
9112 let graph = GraphStore::new();
9113 let mut node_properties: HashMap<String, HashMap<String, crate::storage::schema::Value>> =
9114 HashMap::new();
9115 let mut edge_properties: crate::storage::query::unified::EdgeProperties = HashMap::new();
9116 let mut allowed_nodes: HashSet<String> = HashSet::new();
9117
9118 let mut node_rls: HashMap<String, Option<crate::storage::query::ast::Filter>> =
9122 HashMap::new();
9123 let mut edge_rls: HashMap<String, Option<crate::storage::query::ast::Filter>> =
9124 HashMap::new();
9125
9126 let collections = store.list_collections();
9127
9128 for collection in &collections {
9130 let Some(manager) = store.get_collection(collection) else {
9131 continue;
9132 };
9133 let entities = manager.query_all(|_| true);
9134 for entity in entities {
9135 if !entity_visible_with_context(snap_ctx.as_ref(), &entity) {
9136 continue;
9137 }
9138 let EntityKind::GraphNode(ref node) = entity.kind else {
9139 continue;
9140 };
9141 if !node_passes_rls(self, collection, role.as_deref(), &mut node_rls, &entity) {
9142 continue;
9143 }
9144 let id_str = entity.id.raw().to_string();
9145 graph
9146 .add_node_with_label(
9147 &id_str,
9148 &node.label,
9149 &super::graph_node_label(&node.node_type),
9150 )
9151 .map_err(|err| RedDBError::Query(err.to_string()))?;
9152 allowed_nodes.insert(id_str.clone());
9153 if let EntityData::Node(node_data) = &entity.data {
9154 node_properties.insert(id_str, node_data.properties.clone());
9155 }
9156 }
9157 }
9158
9159 for collection in &collections {
9163 let Some(manager) = store.get_collection(collection) else {
9164 continue;
9165 };
9166 let entities = manager.query_all(|_| true);
9167 for entity in entities {
9168 if !entity_visible_with_context(snap_ctx.as_ref(), &entity) {
9169 continue;
9170 }
9171 let EntityKind::GraphEdge(ref edge) = entity.kind else {
9172 continue;
9173 };
9174 if !allowed_nodes.contains(&edge.from_node)
9175 || !allowed_nodes.contains(&edge.to_node)
9176 {
9177 continue;
9178 }
9179 if !edge_passes_rls(self, collection, role.as_deref(), &mut edge_rls, &entity) {
9180 continue;
9181 }
9182 let weight = match &entity.data {
9183 EntityData::Edge(e) => e.weight,
9184 _ => edge.weight as f32 / 1000.0,
9185 };
9186 let edge_label = super::graph_edge_label(&edge.label);
9187 graph
9188 .add_edge_with_label(&edge.from_node, &edge.to_node, &edge_label, weight)
9189 .map_err(|err| RedDBError::Query(err.to_string()))?;
9190 if let EntityData::Edge(edge_data) = &entity.data {
9191 edge_properties.insert(
9192 (edge.from_node.clone(), edge_label, edge.to_node.clone()),
9193 edge_data.properties.clone(),
9194 );
9195 }
9196 }
9197 }
9198
9199 let _ = (PolicyAction::Select, PolicyTargetKind::Nodes);
9203
9204 Ok((graph, node_properties, edge_properties))
9205 }
9206
9207 pub(crate) fn stamp_xmin_if_in_txn(
9222 &self,
9223 collection: &str,
9224 id: crate::storage::unified::entity::EntityId,
9225 ) {
9226 let Some(xid) = self.current_xid() else {
9227 return;
9228 };
9229 let store = self.inner.db.store();
9230 let Some(manager) = store.get_collection(collection) else {
9231 return;
9232 };
9233 if let Some(mut entity) = manager.get(id) {
9234 entity.set_xmin(xid);
9235 let _ = manager.update(entity);
9236 }
9237 }
9238
9239 pub(crate) fn revive_tombstones_since(&self, conn_id: u64, stamper_xid: u64) -> usize {
9247 let mut guard = self.inner.pending_tombstones.write();
9248 let Some(pending) = guard.get_mut(&conn_id) else {
9249 return 0;
9250 };
9251
9252 let store = self.inner.db.store();
9253 let mut revived = 0usize;
9254 pending.retain(|(collection, id, xid, previous_xmax)| {
9255 if *xid < stamper_xid {
9256 return true;
9258 }
9259 if let Some(manager) = store.get_collection(collection) {
9260 if let Some(mut entity) = manager.get(*id) {
9261 if entity.xmax == *xid {
9262 entity.set_xmax(*previous_xmax);
9263 let _ = manager.update(entity);
9264 revived += 1;
9265 }
9266 }
9267 }
9268 false
9269 });
9270 if pending.is_empty() {
9271 guard.remove(&conn_id);
9272 }
9273 revived
9274 }
9275
9276 pub fn current_snapshot(&self) -> crate::storage::transaction::snapshot::Snapshot {
9285 let conn_id = current_connection_id();
9286 if let Some(ctx) = self.inner.tx_contexts.read().get(&conn_id).cloned() {
9287 return ctx.snapshot;
9288 }
9289 let high_water = self.inner.snapshot_manager.peek_next_xid();
9295 self.inner.snapshot_manager.snapshot(high_water)
9296 }
9297
9298 pub fn current_xid(&self) -> Option<crate::storage::transaction::snapshot::Xid> {
9308 let conn_id = current_connection_id();
9309 self.inner
9310 .tx_contexts
9311 .read()
9312 .get(&conn_id)
9313 .map(|ctx| ctx.writer_xid())
9314 }
9315
9316 pub fn snapshot_manager(&self) -> Arc<crate::storage::transaction::snapshot::SnapshotManager> {
9319 Arc::clone(&self.inner.snapshot_manager)
9320 }
9321
9322 fn mvcc_vacuum_cutoff_xid(&self) -> crate::storage::transaction::snapshot::Xid {
9323 let manager = &self.inner.snapshot_manager;
9324 let next_xid = manager.peek_next_xid();
9325 let mut cutoff = next_xid;
9326 if let Some(oldest_active) = manager.oldest_active_xid() {
9327 cutoff = cutoff.min(oldest_active);
9328 }
9329 if let Some(oldest_pinned) = manager.oldest_pinned_xid() {
9330 cutoff = cutoff.min(oldest_pinned);
9331 }
9332 let retention_xids = self.config_u64("runtime.mvcc.vacuum_retention_xids", 0);
9333 if retention_xids > 0 {
9334 cutoff = cutoff.min(next_xid.saturating_sub(retention_xids));
9335 }
9336 cutoff
9337 }
9338
9339 fn rebuild_runtime_indexes_for_table(&self, table: &str) -> RedDBResult<()> {
9340 let registered = self.inner.index_store.list_indices(table);
9341 if registered.is_empty() {
9342 return Ok(());
9343 }
9344 let store = self.inner.db.store();
9345 let Some(manager) = store.get_collection(table) else {
9346 return Ok(());
9347 };
9348 let entity_fields = manager
9349 .query_all(|entity| matches!(entity.kind, crate::storage::EntityKind::TableRow { .. }))
9350 .into_iter()
9351 .map(|entity| (entity.id, table_row_index_fields(&entity)))
9352 .collect::<Vec<_>>();
9353
9354 for index in registered {
9355 self.inner.index_store.drop_index(&index.name, table);
9356 self.inner
9357 .index_store
9358 .create_index(
9359 &index.name,
9360 table,
9361 &index.columns,
9362 index.method,
9363 index.unique,
9364 &entity_fields,
9365 )
9366 .map_err(RedDBError::Internal)?;
9367 self.inner.index_store.register(index);
9368 }
9369 self.invalidate_plan_cache();
9370 Ok(())
9371 }
9372
9373 pub fn current_txn_own_xids(
9378 &self,
9379 ) -> std::collections::HashSet<crate::storage::transaction::snapshot::Xid> {
9380 let mut set = std::collections::HashSet::new();
9381 if let Some(ctx) = self.inner.tx_contexts.read().get(¤t_connection_id()) {
9382 set.insert(ctx.xid);
9383 for (_, sub) in &ctx.savepoints {
9384 set.insert(*sub);
9385 }
9386 for sub in &ctx.released_sub_xids {
9387 set.insert(*sub);
9388 }
9389 }
9390 set
9391 }
9392
9393 pub fn foreign_tables(&self) -> Arc<crate::storage::fdw::ForeignTableRegistry> {
9400 Arc::clone(&self.inner.foreign_tables)
9401 }
9402
9403 pub fn is_rls_enabled(&self, table: &str) -> bool {
9405 self.inner.rls_enabled_tables.read().contains(table)
9406 }
9407
9408 pub fn matching_rls_policies(
9415 &self,
9416 table: &str,
9417 role: Option<&str>,
9418 action: crate::storage::query::ast::PolicyAction,
9419 ) -> Vec<crate::storage::query::ast::Filter> {
9420 self.matching_rls_policies_for_kind(
9425 table,
9426 role,
9427 action,
9428 crate::storage::query::ast::PolicyTargetKind::Table,
9429 )
9430 }
9431
9432 pub fn matching_rls_policies_for_kind(
9440 &self,
9441 table: &str,
9442 role: Option<&str>,
9443 action: crate::storage::query::ast::PolicyAction,
9444 kind: crate::storage::query::ast::PolicyTargetKind,
9445 ) -> Vec<crate::storage::query::ast::Filter> {
9446 if !self.is_rls_enabled(table) {
9447 return Vec::new();
9448 }
9449 let policies = self.inner.rls_policies.read();
9450 policies
9451 .iter()
9452 .filter_map(|((t, _), p)| {
9453 if t != table {
9454 return None;
9455 }
9456 if p.target_kind != kind
9465 && p.target_kind != crate::storage::query::ast::PolicyTargetKind::Table
9466 {
9467 return None;
9468 }
9469 if let Some(a) = p.action {
9471 if a != action {
9472 return None;
9473 }
9474 }
9475 if let Some(p_role) = p.role.as_deref() {
9477 match role {
9478 Some(r) if r == p_role => {}
9479 _ => return None,
9480 }
9481 }
9482 Some((*p.using).clone())
9483 })
9484 .collect()
9485 }
9486
9487 pub(crate) fn refresh_table_planner_stats(&self, table: &str) {
9488 let store = self.inner.db.store();
9489 if let Some(stats) =
9490 crate::storage::query::planner::stats_catalog::analyze_collection(store.as_ref(), table)
9491 {
9492 crate::storage::query::planner::stats_catalog::persist_table_stats(
9493 store.as_ref(),
9494 &stats,
9495 );
9496 } else {
9497 crate::storage::query::planner::stats_catalog::clear_table_stats(store.as_ref(), table);
9498 }
9499 self.invalidate_plan_cache();
9500 }
9501
9502 pub(crate) fn note_table_write(&self, table: &str) {
9503 let already_dirty = self.inner.planner_dirty_tables.read().contains(table);
9508 if !already_dirty {
9509 self.inner
9510 .planner_dirty_tables
9511 .write()
9512 .insert(table.to_string());
9513 }
9514 self.invalidate_result_cache_for_table(table);
9515 }
9516
9517 fn explain_as_rows(&self, raw_query: &str, inner_sql: &str) -> RedDBResult<RuntimeQueryResult> {
9525 let explain = self.explain_query(inner_sql)?;
9526
9527 let columns = vec![
9528 "op".to_string(),
9529 "source".to_string(),
9530 "est_rows".to_string(),
9531 "est_cost".to_string(),
9532 "depth".to_string(),
9533 ];
9534
9535 let mut records: Vec<crate::storage::query::unified::UnifiedRecord> = Vec::new();
9536
9537 for name in &explain.cte_materializations {
9543 use std::sync::Arc;
9544 let mut rec = crate::storage::query::unified::UnifiedRecord::default();
9545 rec.set_arc(Arc::from("op"), Value::text("CteScan".to_string()));
9546 rec.set_arc(Arc::from("source"), Value::text(name.clone()));
9547 rec.set_arc(Arc::from("est_rows"), Value::Float(0.0));
9548 rec.set_arc(Arc::from("est_cost"), Value::Float(0.0));
9549 rec.set_arc(Arc::from("depth"), Value::Integer(0));
9550 records.push(rec);
9551 }
9552
9553 walk_plan_node(&explain.logical_plan.root, 0, &mut records);
9554
9555 let result = crate::storage::query::unified::UnifiedResult {
9556 columns,
9557 records,
9558 stats: Default::default(),
9559 pre_serialized_json: None,
9560 };
9561
9562 Ok(RuntimeQueryResult {
9563 query: raw_query.to_string(),
9564 mode: explain.mode,
9565 statement: "explain",
9566 engine: "runtime-explain",
9567 result,
9568 affected_rows: 0,
9569 statement_type: "select",
9570 })
9571 }
9572
9573 pub(super) fn check_query_privilege(
9581 &self,
9582 expr: &crate::storage::query::ast::QueryExpr,
9583 ) -> Result<(), String> {
9584 use crate::auth::privileges::{Action, AuthzContext, Resource};
9585 use crate::auth::UserId;
9586 use crate::storage::query::ast::QueryExpr;
9587
9588 let auth_store = match self.inner.auth_store.read().clone() {
9593 Some(s) => s,
9594 None => return Ok(()),
9595 };
9596
9597 let (username, role) = match current_auth_identity() {
9603 Some(p) => p,
9604 None => return Ok(()),
9605 };
9606 let tenant = current_tenant();
9607
9608 let ctx = AuthzContext {
9609 principal: &username,
9610 effective_role: role,
9611 tenant: tenant.as_deref(),
9612 };
9613 let principal_id = UserId::from_parts(tenant.as_deref(), &username);
9614
9615 let (action, resource) = match expr {
9617 QueryExpr::Table(t) => (Action::Select, Resource::table_from_name(&t.table)),
9618 QueryExpr::QueueSelect(q) => (Action::Select, Resource::table_from_name(&q.queue)),
9619 QueryExpr::Graph(g) => {
9620 if auth_store.iam_authorization_enabled() {
9621 self.check_graph_property_projection_privilege(
9622 &auth_store,
9623 &principal_id,
9624 role,
9625 tenant.as_deref(),
9626 g,
9627 )?;
9628 return Ok(());
9629 }
9630 return Ok(());
9631 }
9632 QueryExpr::Vector(v) => {
9633 if auth_store.iam_authorization_enabled() {
9634 self.check_table_like_column_projection_privilege(
9635 &auth_store,
9636 &principal_id,
9637 role,
9638 tenant.as_deref(),
9639 &v.collection,
9640 &["content".to_string()],
9641 )?;
9642 return Ok(());
9643 }
9644 return Ok(());
9645 }
9646 QueryExpr::Insert(i) => (Action::Insert, Resource::table_from_name(&i.table)),
9647 QueryExpr::Update(u) => (Action::Update, Resource::table_from_name(&u.table)),
9648 QueryExpr::Delete(d) => (Action::Delete, Resource::table_from_name(&d.table)),
9649 QueryExpr::Join(_) => (Action::Select, Resource::Database),
9653 QueryExpr::Grant(_) | QueryExpr::Revoke(_) | QueryExpr::AlterUser(_) => {
9656 return if role == crate::auth::Role::Admin {
9657 Ok(())
9658 } else {
9659 Err(format!(
9660 "principal=`{}` role=`{:?}` cannot issue ACL/auth DDL",
9661 username, role
9662 ))
9663 };
9664 }
9665 QueryExpr::CreateIamPolicy { id, .. } => {
9666 return self.check_policy_management_privilege(
9667 &auth_store,
9668 &principal_id,
9669 role,
9670 tenant.as_deref(),
9671 "policy:put",
9672 "policy",
9673 id,
9674 );
9675 }
9676 QueryExpr::DropIamPolicy { id } => {
9677 return self.check_policy_management_privilege(
9678 &auth_store,
9679 &principal_id,
9680 role,
9681 tenant.as_deref(),
9682 "policy:drop",
9683 "policy",
9684 id,
9685 );
9686 }
9687 QueryExpr::AttachPolicy { policy_id, .. } => {
9688 return self.check_policy_management_privilege(
9689 &auth_store,
9690 &principal_id,
9691 role,
9692 tenant.as_deref(),
9693 "policy:attach",
9694 "policy",
9695 policy_id,
9696 );
9697 }
9698 QueryExpr::DetachPolicy { policy_id, .. } => {
9699 return self.check_policy_management_privilege(
9700 &auth_store,
9701 &principal_id,
9702 role,
9703 tenant.as_deref(),
9704 "policy:detach",
9705 "policy",
9706 policy_id,
9707 );
9708 }
9709 QueryExpr::ShowPolicies { .. } | QueryExpr::ShowEffectivePermissions { .. } => {
9710 return Ok(());
9711 }
9712 QueryExpr::SimulatePolicy { .. } => {
9713 return self.check_policy_management_privilege(
9714 &auth_store,
9715 &principal_id,
9716 role,
9717 tenant.as_deref(),
9718 "policy:simulate",
9719 "policy",
9720 "*",
9721 );
9722 }
9723 QueryExpr::DropTable(q) => {
9726 return self.check_ddl_collection_privilege(
9727 &auth_store,
9728 &principal_id,
9729 role,
9730 tenant.as_deref(),
9731 &username,
9732 "drop",
9733 &q.name,
9734 );
9735 }
9736 QueryExpr::DropGraph(q) => {
9737 return self.check_ddl_collection_privilege(
9738 &auth_store,
9739 &principal_id,
9740 role,
9741 tenant.as_deref(),
9742 &username,
9743 "drop",
9744 &q.name,
9745 );
9746 }
9747 QueryExpr::DropVector(q) => {
9748 return self.check_ddl_collection_privilege(
9749 &auth_store,
9750 &principal_id,
9751 role,
9752 tenant.as_deref(),
9753 &username,
9754 "drop",
9755 &q.name,
9756 );
9757 }
9758 QueryExpr::DropDocument(q) => {
9759 return self.check_ddl_collection_privilege(
9760 &auth_store,
9761 &principal_id,
9762 role,
9763 tenant.as_deref(),
9764 &username,
9765 "drop",
9766 &q.name,
9767 );
9768 }
9769 QueryExpr::DropKv(q) => {
9770 return self.check_ddl_collection_privilege(
9771 &auth_store,
9772 &principal_id,
9773 role,
9774 tenant.as_deref(),
9775 &username,
9776 "drop",
9777 &q.name,
9778 );
9779 }
9780 QueryExpr::DropCollection(q) => {
9781 return self.check_ddl_collection_privilege(
9782 &auth_store,
9783 &principal_id,
9784 role,
9785 tenant.as_deref(),
9786 &username,
9787 "drop",
9788 &q.name,
9789 );
9790 }
9791 QueryExpr::Truncate(q) => {
9792 return self.check_ddl_collection_privilege(
9793 &auth_store,
9794 &principal_id,
9795 role,
9796 tenant.as_deref(),
9797 &username,
9798 "truncate",
9799 &q.name,
9800 );
9801 }
9802 QueryExpr::CreateTable(_)
9804 | QueryExpr::CreateCollection(_)
9805 | QueryExpr::CreateVector(_)
9806 | QueryExpr::AlterTable(_)
9807 | QueryExpr::CreateIndex(_)
9808 | QueryExpr::DropIndex(_)
9809 | QueryExpr::CreateSchema(_)
9810 | QueryExpr::DropSchema(_)
9811 | QueryExpr::CreateSequence(_)
9812 | QueryExpr::DropSequence(_)
9813 | QueryExpr::CreateView(_)
9814 | QueryExpr::DropView(_)
9815 | QueryExpr::RefreshMaterializedView(_)
9816 | QueryExpr::CreatePolicy(_)
9817 | QueryExpr::DropPolicy(_)
9818 | QueryExpr::CreateServer(_)
9819 | QueryExpr::DropServer(_)
9820 | QueryExpr::CreateForeignTable(_)
9821 | QueryExpr::DropForeignTable(_)
9822 | QueryExpr::CreateTimeSeries(_)
9823 | QueryExpr::DropTimeSeries(_)
9824 | QueryExpr::CreateQueue(_)
9825 | QueryExpr::AlterQueue(_)
9826 | QueryExpr::DropQueue(_)
9827 | QueryExpr::CreateTree(_)
9828 | QueryExpr::DropTree(_) => {
9829 return if role >= crate::auth::Role::Write {
9830 Ok(())
9831 } else {
9832 Err(format!(
9833 "principal=`{}` role=`{:?}` cannot issue DDL",
9834 username, role
9835 ))
9836 };
9837 }
9838 QueryExpr::CreateMigration(_) => {
9840 return if role >= crate::auth::Role::Write {
9841 Ok(())
9842 } else {
9843 Err(format!(
9844 "principal=`{}` role=`{:?}` cannot issue CREATE MIGRATION",
9845 username, role
9846 ))
9847 };
9848 }
9849 QueryExpr::ApplyMigration(_) | QueryExpr::RollbackMigration(_) => {
9851 return if role == crate::auth::Role::Admin {
9852 Ok(())
9853 } else {
9854 Err(format!(
9855 "principal=`{}` role=`{:?}` cannot issue APPLY/ROLLBACK MIGRATION",
9856 username, role
9857 ))
9858 };
9859 }
9860 QueryExpr::ExplainMigration(_) => return Ok(()),
9862 _ => return Ok(()),
9866 };
9867
9868 if auth_store.iam_authorization_enabled() {
9869 let iam_action = legacy_action_to_iam(action);
9870 let iam_resource = legacy_resource_to_iam(&resource, tenant.as_deref());
9871 let iam_ctx = runtime_iam_context(
9872 role,
9873 tenant.as_deref(),
9874 auth_store.principal_is_system_owned(&principal_id),
9875 );
9876 if !auth_store.check_policy_authz_with_role(
9877 &principal_id,
9878 iam_action,
9879 &iam_resource,
9880 &iam_ctx,
9881 role,
9882 ) {
9883 return Err(format!(
9884 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM policy",
9885 username, iam_action, iam_resource.kind, iam_resource.name
9886 ));
9887 }
9888
9889 if let QueryExpr::Table(table) = expr {
9890 self.check_table_column_projection_privilege(
9891 &auth_store,
9892 &principal_id,
9893 &iam_ctx,
9894 table,
9895 )?;
9896 }
9897
9898 if let QueryExpr::Update(update) = expr {
9899 let columns = update_set_target_columns(update);
9900 if !columns.is_empty() {
9901 let request = column_access_request_for_table_update(&update.table, columns);
9902 let outcome =
9903 auth_store.check_column_projection_authz(&principal_id, &request, &iam_ctx);
9904 if let Some(denied) = outcome.first_denied_column() {
9905 return Err(format!(
9906 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM column policy",
9907 username, iam_action, denied.resource.kind, denied.resource.name
9908 ));
9909 }
9910 if !outcome.allowed() {
9911 return Err(format!(
9912 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM policy",
9913 username,
9914 iam_action,
9915 outcome.table_resource.kind,
9916 outcome.table_resource.name
9917 ));
9918 }
9919 }
9920
9921 if let Some(columns) = update_returning_columns_for_policy(self, update) {
9922 let request = column_access_request_for_table_select(&update.table, columns);
9923 let outcome =
9924 auth_store.check_column_projection_authz(&principal_id, &request, &iam_ctx);
9925 if let Some(denied) = outcome.first_denied_column() {
9926 return Err(format!(
9927 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM column policy",
9928 username, denied.resource.kind, denied.resource.name
9929 ));
9930 }
9931 if !outcome.allowed() {
9932 return Err(format!(
9933 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM policy",
9934 username, outcome.table_resource.kind, outcome.table_resource.name
9935 ));
9936 }
9937 }
9938 }
9939
9940 Ok(())
9941 } else {
9942 auth_store
9943 .check_grant(&ctx, action, &resource)
9944 .map_err(|e| e.to_string())
9945 }
9946 }
9947
9948 fn check_table_column_projection_privilege(
9949 &self,
9950 auth_store: &Arc<crate::auth::store::AuthStore>,
9951 principal: &crate::auth::UserId,
9952 ctx: &crate::auth::policies::EvalContext,
9953 table: &crate::storage::query::ast::TableQuery,
9954 ) -> Result<(), String> {
9955 use crate::auth::{ColumnAccessRequest, ColumnDecisionEffect};
9956
9957 let columns = requested_table_columns_for_policy(table);
9958 if columns.is_empty() {
9959 return Ok(());
9960 }
9961
9962 let request = ColumnAccessRequest::select(table.table.clone(), columns);
9963 let outcome = auth_store.check_column_projection_authz(principal, &request, ctx);
9964 if outcome.allowed() {
9965 return Ok(());
9966 }
9967
9968 if !matches!(
9969 outcome.table_decision,
9970 crate::auth::policies::Decision::Allow { .. }
9971 | crate::auth::policies::Decision::AdminBypass
9972 ) {
9973 return Err(format!(
9974 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM policy",
9975 principal, outcome.table_resource.kind, outcome.table_resource.name
9976 ));
9977 }
9978
9979 let denied = outcome
9980 .first_denied_column()
9981 .filter(|decision| decision.effective == ColumnDecisionEffect::Denied);
9982 match denied {
9983 Some(decision) => Err(format!(
9984 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM policy",
9985 principal, decision.resource.kind, decision.resource.name
9986 )),
9987 None => Ok(()),
9988 }
9989 }
9990
9991 fn check_graph_property_projection_privilege(
9992 &self,
9993 auth_store: &Arc<crate::auth::store::AuthStore>,
9994 principal: &crate::auth::UserId,
9995 role: crate::auth::Role,
9996 tenant: Option<&str>,
9997 query: &crate::storage::query::ast::GraphQuery,
9998 ) -> Result<(), String> {
9999 let columns = explicit_graph_projection_properties(query);
10000 if columns.is_empty() {
10001 return Ok(());
10002 }
10003 self.check_table_like_column_projection_privilege(
10004 auth_store, principal, role, tenant, "graph", &columns,
10005 )
10006 }
10007
10008 fn check_table_like_column_projection_privilege(
10009 &self,
10010 auth_store: &Arc<crate::auth::store::AuthStore>,
10011 principal: &crate::auth::UserId,
10012 role: crate::auth::Role,
10013 tenant: Option<&str>,
10014 table: &str,
10015 columns: &[String],
10016 ) -> Result<(), String> {
10017 let iam_ctx = runtime_iam_context(
10018 role,
10019 tenant,
10020 auth_store.principal_is_system_owned(principal),
10021 );
10022 let request =
10023 crate::auth::ColumnAccessRequest::select(table.to_string(), columns.iter().cloned());
10024 let outcome = auth_store.check_column_projection_authz(principal, &request, &iam_ctx);
10025 if outcome.allowed() {
10026 return Ok(());
10027 }
10028 let denied = outcome
10029 .first_denied_column()
10030 .map(|d| d.resource.name.clone())
10031 .unwrap_or_else(|| format!("{table}.<unknown>"));
10032 Err(format!(
10033 "principal=`{}` action=`select` resource=`column:{}` denied by IAM policy",
10034 principal, denied
10035 ))
10036 }
10037
10038 fn check_policy_management_privilege(
10039 &self,
10040 auth_store: &Arc<crate::auth::store::AuthStore>,
10041 principal: &crate::auth::UserId,
10042 role: crate::auth::Role,
10043 tenant: Option<&str>,
10044 action: &str,
10045 resource_kind: &str,
10046 resource_name: &str,
10047 ) -> Result<(), String> {
10048 let ctx = runtime_iam_context(
10049 role,
10050 tenant,
10051 auth_store.principal_is_system_owned(principal),
10052 );
10053
10054 if !auth_store.iam_authorization_enabled() {
10055 return if role == crate::auth::Role::Admin {
10056 Ok(())
10057 } else {
10058 Err(format!(
10059 "principal=`{}` role=`{:?}` cannot issue ACL/auth DDL",
10060 principal, role
10061 ))
10062 };
10063 }
10064
10065 let mut resource = crate::auth::policies::ResourceRef::new(
10066 resource_kind.to_string(),
10067 resource_name.to_string(),
10068 );
10069 if let Some(t) = tenant {
10070 resource = resource.with_tenant(t.to_string());
10071 }
10072 if auth_store.check_policy_authz_with_role(principal, action, &resource, &ctx, role) {
10073 Ok(())
10074 } else {
10075 Err(format!(
10076 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM policy",
10077 principal, action, resource.kind, resource.name
10078 ))
10079 }
10080 }
10081
10082 fn check_managed_config_write_for_set_config(&self, key: &str) -> RedDBResult<()> {
10083 let Some(auth_store) = self.inner.auth_store.read().clone() else {
10084 return Ok(());
10085 };
10086 let (username, role) = current_auth_identity()
10087 .unwrap_or_else(|| ("anonymous".to_string(), crate::auth::Role::Read));
10088 let tenant = current_tenant();
10089 let principal = crate::auth::UserId::from_parts(tenant.as_deref(), &username);
10090 let ctx = runtime_iam_context(
10091 role,
10092 tenant.as_deref(),
10093 auth_store.principal_is_system_owned(&principal),
10094 );
10095 let gate = crate::auth::managed_config::ManagedConfigGate::new(
10096 self.inner.config_registry.as_ref(),
10097 );
10098 match gate.check_write(&auth_store, &principal, &ctx, key) {
10099 crate::auth::managed_config::ManagedConfigDecision::PassThrough { .. }
10100 | crate::auth::managed_config::ManagedConfigDecision::Allow { .. } => Ok(()),
10101 crate::auth::managed_config::ManagedConfigDecision::Deny { reason, .. } => {
10102 Err(RedDBError::Query(format!(
10103 "permission denied: managed config mutation blocked for `{key}`: {reason}"
10104 )))
10105 }
10106 }
10107 }
10108
10109 fn check_ddl_collection_privilege(
10117 &self,
10118 auth_store: &Arc<crate::auth::store::AuthStore>,
10119 principal: &crate::auth::UserId,
10120 role: crate::auth::Role,
10121 tenant: Option<&str>,
10122 username: &str,
10123 action: &str,
10124 collection: &str,
10125 ) -> Result<(), String> {
10126 if role < crate::auth::Role::Write {
10127 let msg = format!(
10128 "principal=`{}` role=`{:?}` cannot issue DDL",
10129 username, role
10130 );
10131 self.inner.audit_log.record(
10132 action,
10133 username,
10134 collection,
10135 "denied",
10136 crate::json::Value::Null,
10137 );
10138 return Err(msg);
10139 }
10140
10141 if !auth_store.iam_authorization_enabled() {
10142 self.inner.audit_log.record(
10143 action,
10144 username,
10145 collection,
10146 "ok",
10147 crate::json::Value::Null,
10148 );
10149 return Ok(());
10150 }
10151
10152 let resource_name = collection.to_string();
10153 let mut resource = crate::auth::policies::ResourceRef::new(
10154 "collection".to_string(),
10155 resource_name.clone(),
10156 );
10157 if let Some(t) = tenant {
10158 resource = resource.with_tenant(t.to_string());
10159 }
10160 let ctx = runtime_iam_context(
10161 role,
10162 tenant,
10163 auth_store.principal_is_system_owned(principal),
10164 );
10165 if auth_store.check_policy_authz_with_role(principal, action, &resource, &ctx, role) {
10166 self.inner.audit_log.record(
10167 action,
10168 username,
10169 &resource_name,
10170 "ok",
10171 crate::json::Value::Null,
10172 );
10173 Ok(())
10174 } else {
10175 self.inner.audit_log.record(
10176 action,
10177 username,
10178 &resource_name,
10179 "denied",
10180 crate::json::Value::Null,
10181 );
10182 Err(format!(
10183 "principal=`{}` action=`{}` resource=`collection:{}` denied by IAM policy",
10184 username, action, resource_name
10185 ))
10186 }
10187 }
10188
10189 fn execute_grant_statement(
10191 &self,
10192 query: &str,
10193 stmt: &crate::storage::query::ast::GrantStmt,
10194 ) -> RedDBResult<RuntimeQueryResult> {
10195 use crate::auth::privileges::{Action, GrantPrincipal, Resource};
10196 use crate::auth::UserId;
10197 use crate::storage::query::ast::{GrantObjectKind, GrantPrincipalRef};
10198
10199 let auth_store = self
10200 .inner
10201 .auth_store
10202 .read()
10203 .clone()
10204 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
10205
10206 let (gname, grole) = current_auth_identity().ok_or_else(|| {
10208 RedDBError::Query("GRANT requires an authenticated principal".to_string())
10209 })?;
10210 let granter = UserId::from_parts(current_tenant().as_deref(), &gname);
10211 let granter_role = grole;
10212
10213 let mut actions: Vec<Action> = Vec::new();
10215 if stmt.all {
10216 actions.push(Action::All);
10217 } else {
10218 for kw in &stmt.actions {
10219 let a = Action::from_keyword(kw).ok_or_else(|| {
10220 RedDBError::Query(format!("unknown privilege keyword `{}`", kw))
10221 })?;
10222 actions.push(a);
10223 }
10224 }
10225
10226 let mut applied = 0usize;
10228 for obj in &stmt.objects {
10229 let resource = match stmt.object_kind {
10230 GrantObjectKind::Table => Resource::Table {
10231 schema: obj.schema.clone(),
10232 table: obj.name.clone(),
10233 },
10234 GrantObjectKind::Schema => Resource::Schema(obj.name.clone()),
10235 GrantObjectKind::Database => Resource::Database,
10236 GrantObjectKind::Function => Resource::Function {
10237 schema: obj.schema.clone(),
10238 name: obj.name.clone(),
10239 },
10240 };
10241 for principal in &stmt.principals {
10242 let p = match principal {
10243 GrantPrincipalRef::Public => GrantPrincipal::Public,
10244 GrantPrincipalRef::Group(g) => GrantPrincipal::Group(g.clone()),
10245 GrantPrincipalRef::User { tenant, name } => {
10246 GrantPrincipal::User(UserId::from_parts(tenant.as_deref(), name))
10247 }
10248 };
10249 let tenant = granter.tenant.clone();
10252 auth_store
10253 .grant(
10254 &granter,
10255 granter_role,
10256 p.clone(),
10257 resource.clone(),
10258 actions.clone(),
10259 stmt.with_grant_option,
10260 tenant.clone(),
10261 )
10262 .map_err(|e| RedDBError::Query(e.to_string()))?;
10263
10264 if let Some(policy) =
10268 grant_to_iam_policy(&p, &resource, &actions, tenant.as_deref())
10269 {
10270 let pid = policy.id.clone();
10271 auth_store
10272 .put_policy_internal(policy)
10273 .map_err(|e| RedDBError::Query(e.to_string()))?;
10274 let attachment = match &p {
10275 GrantPrincipal::User(uid) => {
10276 crate::auth::store::PrincipalRef::User(uid.clone())
10277 }
10278 GrantPrincipal::Group(group) => {
10279 crate::auth::store::PrincipalRef::Group(group.clone())
10280 }
10281 GrantPrincipal::Public => crate::auth::store::PrincipalRef::Group(
10282 crate::auth::store::PUBLIC_IAM_GROUP.to_string(),
10283 ),
10284 };
10285 auth_store
10286 .attach_policy(attachment, &pid)
10287 .map_err(|e| RedDBError::Query(e.to_string()))?;
10288 }
10289 applied += 1;
10290 tracing::info!(
10291 target: "audit",
10292 principal = %granter,
10293 action = "grant",
10294 "GRANT applied"
10295 );
10296 }
10297 }
10298
10299 self.invalidate_result_cache();
10300 Ok(RuntimeQueryResult::ok_message(
10301 query.to_string(),
10302 &format!("GRANT applied to {} target(s)", applied),
10303 "grant",
10304 ))
10305 }
10306
10307 fn execute_revoke_statement(
10309 &self,
10310 query: &str,
10311 stmt: &crate::storage::query::ast::RevokeStmt,
10312 ) -> RedDBResult<RuntimeQueryResult> {
10313 use crate::auth::privileges::{Action, GrantPrincipal, Resource};
10314 use crate::auth::UserId;
10315 use crate::storage::query::ast::{GrantObjectKind, GrantPrincipalRef};
10316
10317 let auth_store = self
10318 .inner
10319 .auth_store
10320 .read()
10321 .clone()
10322 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
10323
10324 let (_gname, grole) = current_auth_identity().ok_or_else(|| {
10325 RedDBError::Query("REVOKE requires an authenticated principal".to_string())
10326 })?;
10327 let granter_role = grole;
10328
10329 let actions: Vec<Action> = if stmt.all {
10330 vec![Action::All]
10331 } else {
10332 stmt.actions
10333 .iter()
10334 .map(|kw| Action::from_keyword(kw).unwrap_or(Action::Select))
10335 .collect()
10336 };
10337
10338 let mut total_removed = 0usize;
10339 for obj in &stmt.objects {
10340 let resource = match stmt.object_kind {
10341 GrantObjectKind::Table => Resource::Table {
10342 schema: obj.schema.clone(),
10343 table: obj.name.clone(),
10344 },
10345 GrantObjectKind::Schema => Resource::Schema(obj.name.clone()),
10346 GrantObjectKind::Database => Resource::Database,
10347 GrantObjectKind::Function => Resource::Function {
10348 schema: obj.schema.clone(),
10349 name: obj.name.clone(),
10350 },
10351 };
10352 for principal in &stmt.principals {
10353 let p = match principal {
10354 GrantPrincipalRef::Public => GrantPrincipal::Public,
10355 GrantPrincipalRef::Group(g) => GrantPrincipal::Group(g.clone()),
10356 GrantPrincipalRef::User { tenant, name } => {
10357 GrantPrincipal::User(UserId::from_parts(tenant.as_deref(), name))
10358 }
10359 };
10360 let removed = auth_store
10361 .revoke(granter_role, &p, &resource, &actions)
10362 .map_err(|e| RedDBError::Query(e.to_string()))?;
10363 let _removed_policies =
10364 auth_store.delete_synthetic_grant_policies(&p, &resource, &actions);
10365 total_removed += removed;
10366 }
10367 }
10368
10369 self.invalidate_result_cache();
10370 Ok(RuntimeQueryResult::ok_message(
10371 query.to_string(),
10372 &format!("REVOKE removed {} grant(s)", total_removed),
10373 "revoke",
10374 ))
10375 }
10376
10377 fn execute_alter_user_statement(
10379 &self,
10380 query: &str,
10381 stmt: &crate::storage::query::ast::AlterUserStmt,
10382 ) -> RedDBResult<RuntimeQueryResult> {
10383 use crate::auth::privileges::UserAttributes;
10384 use crate::auth::UserId;
10385 use crate::storage::query::ast::AlterUserAttribute;
10386
10387 let auth_store = self
10388 .inner
10389 .auth_store
10390 .read()
10391 .clone()
10392 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
10393
10394 let (_gname, grole) = current_auth_identity().ok_or_else(|| {
10395 RedDBError::Query("ALTER USER requires an authenticated principal".to_string())
10396 })?;
10397 if grole != crate::auth::Role::Admin {
10398 return Err(RedDBError::Query(
10399 "ALTER USER requires Admin role".to_string(),
10400 ));
10401 }
10402
10403 let target = UserId::from_parts(stmt.tenant.as_deref(), &stmt.username);
10404
10405 let mut attrs = auth_store.user_attributes(&target);
10408 let mut enable_change: Option<bool> = None;
10409
10410 for a in &stmt.attributes {
10411 match a {
10412 AlterUserAttribute::ValidUntil(ts) => {
10413 let ms = parse_timestamp_to_ms(ts).ok_or_else(|| {
10417 RedDBError::Query(format!("invalid VALID UNTIL timestamp `{ts}`"))
10418 })?;
10419 attrs.valid_until = Some(ms);
10420 }
10421 AlterUserAttribute::ConnectionLimit(n) => {
10422 if *n < 0 {
10423 return Err(RedDBError::Query(
10424 "CONNECTION LIMIT must be non-negative".to_string(),
10425 ));
10426 }
10427 attrs.connection_limit = Some(*n as u32);
10428 }
10429 AlterUserAttribute::SetSearchPath(p) => {
10430 attrs.search_path = Some(p.clone());
10431 }
10432 AlterUserAttribute::AddGroup(g) => {
10433 if !attrs.groups.iter().any(|existing| existing == g) {
10434 attrs.groups.push(g.clone());
10435 attrs.groups.sort();
10436 }
10437 }
10438 AlterUserAttribute::DropGroup(g) => {
10439 attrs.groups.retain(|existing| existing != g);
10440 }
10441 AlterUserAttribute::Enable => enable_change = Some(true),
10442 AlterUserAttribute::Disable => enable_change = Some(false),
10443 AlterUserAttribute::Password(_) => {
10444 }
10448 }
10449 }
10450
10451 auth_store
10452 .set_user_attributes(&target, attrs)
10453 .map_err(|e| RedDBError::Query(e.to_string()))?;
10454 if let Some(en) = enable_change {
10455 auth_store
10456 .set_user_enabled(&target, en)
10457 .map_err(|e| RedDBError::Query(e.to_string()))?;
10458 }
10459 self.invalidate_result_cache();
10460 tracing::info!(
10461 target: "audit",
10462 principal = %target,
10463 action = "alter_user",
10464 "ALTER USER applied"
10465 );
10466
10467 Ok(RuntimeQueryResult::ok_message(
10468 query.to_string(),
10469 &format!("ALTER USER {} applied", target),
10470 "alter_user",
10471 ))
10472 }
10473
10474 fn execute_create_iam_policy(
10479 &self,
10480 query: &str,
10481 id: &str,
10482 json: &str,
10483 ) -> RedDBResult<RuntimeQueryResult> {
10484 use crate::auth::policies::Policy;
10485
10486 let auth_store = self
10487 .inner
10488 .auth_store
10489 .read()
10490 .clone()
10491 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
10492
10493 let mut policy = Policy::from_json_str(json)
10498 .map_err(|e| RedDBError::Query(format!("policy parse: {e}")))?;
10499 if policy.id != id {
10500 policy.id = id.to_string();
10501 }
10502 let pid = policy.id.clone();
10503 let tenant = current_tenant();
10504 let (actor_name, actor_role) = current_auth_identity()
10505 .unwrap_or_else(|| ("anonymous".to_string(), crate::auth::Role::Read));
10506 let actor = crate::auth::UserId::from_parts(tenant.as_deref(), &actor_name);
10507 let eval_ctx = runtime_iam_context(
10508 actor_role,
10509 tenant.as_deref(),
10510 auth_store.principal_is_system_owned(&actor),
10511 );
10512 let event_ctx = self.policy_mutation_control_ctx(&actor, tenant.as_deref());
10513 let ledger = self.inner.control_event_ledger.read();
10514 let control = crate::auth::store::PolicyMutationControl {
10515 ctx: &event_ctx,
10516 ledger: ledger.as_ref(),
10517 config: self.inner.control_event_config,
10518 registry: Some(self.inner.config_registry.as_ref()),
10519 actor: &actor,
10520 eval_ctx: &eval_ctx,
10521 };
10522 auth_store
10523 .put_policy_with_control_events(policy, &control)
10524 .map_err(|e| RedDBError::Query(e.to_string()))?;
10525
10526 let principal = actor_name;
10527 tracing::info!(
10528 target: "audit",
10529 principal = %principal,
10530 action = "iam:policy.put",
10531 matched_policy_id = %pid,
10532 "CREATE POLICY applied"
10533 );
10534 self.inner.audit_log.record(
10535 "iam/policy.put",
10536 &principal,
10537 &pid,
10538 "ok",
10539 crate::json::Value::Null,
10540 );
10541
10542 self.invalidate_result_cache();
10543 Ok(RuntimeQueryResult::ok_message(
10544 query.to_string(),
10545 &format!("policy `{pid}` stored"),
10546 "create_iam_policy",
10547 ))
10548 }
10549
10550 fn execute_drop_iam_policy(&self, query: &str, id: &str) -> RedDBResult<RuntimeQueryResult> {
10551 let auth_store = self
10552 .inner
10553 .auth_store
10554 .read()
10555 .clone()
10556 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
10557 let tenant = current_tenant();
10558 let (actor_name, actor_role) = current_auth_identity()
10559 .unwrap_or_else(|| ("anonymous".to_string(), crate::auth::Role::Read));
10560 let actor = crate::auth::UserId::from_parts(tenant.as_deref(), &actor_name);
10561 let eval_ctx = runtime_iam_context(
10562 actor_role,
10563 tenant.as_deref(),
10564 auth_store.principal_is_system_owned(&actor),
10565 );
10566 let event_ctx = self.policy_mutation_control_ctx(&actor, tenant.as_deref());
10567 let ledger = self.inner.control_event_ledger.read();
10568 let control = crate::auth::store::PolicyMutationControl {
10569 ctx: &event_ctx,
10570 ledger: ledger.as_ref(),
10571 config: self.inner.control_event_config,
10572 registry: Some(self.inner.config_registry.as_ref()),
10573 actor: &actor,
10574 eval_ctx: &eval_ctx,
10575 };
10576 auth_store
10577 .delete_policy_with_control_events(id, &control)
10578 .map_err(|e| RedDBError::Query(e.to_string()))?;
10579
10580 let principal = actor_name;
10581 tracing::info!(
10582 target: "audit",
10583 principal = %principal,
10584 action = "iam:policy.drop",
10585 matched_policy_id = %id,
10586 "DROP POLICY applied"
10587 );
10588 self.inner.audit_log.record(
10589 "iam/policy.drop",
10590 &principal,
10591 id,
10592 "ok",
10593 crate::json::Value::Null,
10594 );
10595
10596 self.invalidate_result_cache();
10597 Ok(RuntimeQueryResult::ok_message(
10598 query.to_string(),
10599 &format!("policy `{id}` dropped"),
10600 "drop_iam_policy",
10601 ))
10602 }
10603
10604 fn execute_attach_policy(
10605 &self,
10606 query: &str,
10607 policy_id: &str,
10608 principal: &crate::storage::query::ast::PolicyPrincipalRef,
10609 ) -> RedDBResult<RuntimeQueryResult> {
10610 use crate::auth::store::PrincipalRef;
10611 use crate::auth::UserId;
10612 use crate::storage::query::ast::PolicyPrincipalRef;
10613
10614 let auth_store = self
10615 .inner
10616 .auth_store
10617 .read()
10618 .clone()
10619 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
10620 let p = match principal {
10621 PolicyPrincipalRef::User(u) => {
10622 PrincipalRef::User(UserId::from_parts(u.tenant.as_deref(), &u.username))
10623 }
10624 PolicyPrincipalRef::Group(g) => PrincipalRef::Group(g.clone()),
10625 };
10626 let pretty_target = principal_label(principal);
10627 let tenant = current_tenant();
10628 let (actor_name, actor_role) = current_auth_identity()
10629 .unwrap_or_else(|| ("anonymous".to_string(), crate::auth::Role::Read));
10630 let actor = crate::auth::UserId::from_parts(tenant.as_deref(), &actor_name);
10631 let eval_ctx = runtime_iam_context(
10632 actor_role,
10633 tenant.as_deref(),
10634 auth_store.principal_is_system_owned(&actor),
10635 );
10636 let event_ctx = self.policy_mutation_control_ctx(&actor, tenant.as_deref());
10637 let ledger = self.inner.control_event_ledger.read();
10638 let control = crate::auth::store::PolicyMutationControl {
10639 ctx: &event_ctx,
10640 ledger: ledger.as_ref(),
10641 config: self.inner.control_event_config,
10642 registry: Some(self.inner.config_registry.as_ref()),
10643 actor: &actor,
10644 eval_ctx: &eval_ctx,
10645 };
10646 auth_store
10647 .attach_policy_with_control_events(p, policy_id, &control)
10648 .map_err(|e| RedDBError::Query(e.to_string()))?;
10649
10650 let principal_str = actor_name;
10651 tracing::info!(
10652 target: "audit",
10653 principal = %principal_str,
10654 action = "iam:policy.attach",
10655 matched_policy_id = %policy_id,
10656 target = %pretty_target,
10657 "ATTACH POLICY applied"
10658 );
10659 self.inner.audit_log.record(
10660 "iam/policy.attach",
10661 &principal_str,
10662 &pretty_target,
10663 "ok",
10664 crate::json::Value::Null,
10665 );
10666
10667 self.invalidate_result_cache();
10668 Ok(RuntimeQueryResult::ok_message(
10669 query.to_string(),
10670 &format!("policy `{policy_id}` attached to {pretty_target}"),
10671 "attach_policy",
10672 ))
10673 }
10674
10675 fn execute_detach_policy(
10676 &self,
10677 query: &str,
10678 policy_id: &str,
10679 principal: &crate::storage::query::ast::PolicyPrincipalRef,
10680 ) -> RedDBResult<RuntimeQueryResult> {
10681 use crate::auth::store::PrincipalRef;
10682 use crate::auth::UserId;
10683 use crate::storage::query::ast::PolicyPrincipalRef;
10684
10685 let auth_store = self
10686 .inner
10687 .auth_store
10688 .read()
10689 .clone()
10690 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
10691 let p = match principal {
10692 PolicyPrincipalRef::User(u) => {
10693 PrincipalRef::User(UserId::from_parts(u.tenant.as_deref(), &u.username))
10694 }
10695 PolicyPrincipalRef::Group(g) => PrincipalRef::Group(g.clone()),
10696 };
10697 let pretty_target = principal_label(principal);
10698 let tenant = current_tenant();
10699 let (actor_name, actor_role) = current_auth_identity()
10700 .unwrap_or_else(|| ("anonymous".to_string(), crate::auth::Role::Read));
10701 let actor = crate::auth::UserId::from_parts(tenant.as_deref(), &actor_name);
10702 let eval_ctx = runtime_iam_context(
10703 actor_role,
10704 tenant.as_deref(),
10705 auth_store.principal_is_system_owned(&actor),
10706 );
10707 let event_ctx = self.policy_mutation_control_ctx(&actor, tenant.as_deref());
10708 let ledger = self.inner.control_event_ledger.read();
10709 let control = crate::auth::store::PolicyMutationControl {
10710 ctx: &event_ctx,
10711 ledger: ledger.as_ref(),
10712 config: self.inner.control_event_config,
10713 registry: Some(self.inner.config_registry.as_ref()),
10714 actor: &actor,
10715 eval_ctx: &eval_ctx,
10716 };
10717 auth_store
10718 .detach_policy_with_control_events(p, policy_id, &control)
10719 .map_err(|e| RedDBError::Query(e.to_string()))?;
10720
10721 let principal_str = actor_name;
10722 tracing::info!(
10723 target: "audit",
10724 principal = %principal_str,
10725 action = "iam:policy.detach",
10726 matched_policy_id = %policy_id,
10727 target = %pretty_target,
10728 "DETACH POLICY applied"
10729 );
10730 self.inner.audit_log.record(
10731 "iam/policy.detach",
10732 &principal_str,
10733 &pretty_target,
10734 "ok",
10735 crate::json::Value::Null,
10736 );
10737
10738 self.invalidate_result_cache();
10739 Ok(RuntimeQueryResult::ok_message(
10740 query.to_string(),
10741 &format!("policy `{policy_id}` detached from {pretty_target}"),
10742 "detach_policy",
10743 ))
10744 }
10745
10746 fn execute_show_policies(
10747 &self,
10748 query: &str,
10749 filter: Option<&crate::storage::query::ast::PolicyPrincipalRef>,
10750 ) -> RedDBResult<RuntimeQueryResult> {
10751 use crate::auth::UserId;
10752 use crate::storage::query::ast::PolicyPrincipalRef;
10753 use crate::storage::query::unified::UnifiedRecord;
10754 use crate::storage::schema::Value as SchemaValue;
10755 use std::sync::Arc;
10756
10757 let auth_store = self
10758 .inner
10759 .auth_store
10760 .read()
10761 .clone()
10762 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
10763
10764 let pols = match filter {
10765 None => auth_store.list_policies(),
10766 Some(PolicyPrincipalRef::User(u)) => {
10767 let id = UserId::from_parts(u.tenant.as_deref(), &u.username);
10768 auth_store.effective_policies(&id)
10769 }
10770 Some(PolicyPrincipalRef::Group(g)) => auth_store.group_policies(g),
10771 };
10772
10773 let mut records = Vec::with_capacity(pols.len() + 1);
10774
10775 let mode = auth_store.enforcement_mode();
10780 let mut header = UnifiedRecord::default();
10781 header.set_arc(
10782 Arc::from("id"),
10783 SchemaValue::text("<enforcement_mode>".to_string()),
10784 );
10785 header.set_arc(Arc::from("statements"), SchemaValue::Integer(0));
10786 header.set_arc(Arc::from("tenant"), SchemaValue::Null);
10787 let header_json = format!(
10788 r#"{{"enforcement_mode":"{}","policy_only_hard_version":"{}"}}"#,
10789 mode.as_str(),
10790 crate::auth::enforcement_mode::POLICY_ONLY_HARD_VERSION
10791 );
10792 header.set_arc(Arc::from("json"), SchemaValue::text(header_json));
10793 records.push(header);
10794
10795 for p in pols.iter() {
10796 let mut rec = UnifiedRecord::default();
10797 rec.set_arc(Arc::from("id"), SchemaValue::text(p.id.clone()));
10798 rec.set_arc(
10799 Arc::from("statements"),
10800 SchemaValue::Integer(p.statements.len() as i64),
10801 );
10802 rec.set_arc(
10803 Arc::from("tenant"),
10804 p.tenant
10805 .as_deref()
10806 .map(|t| SchemaValue::text(t.to_string()))
10807 .unwrap_or(SchemaValue::Null),
10808 );
10809 rec.set_arc(Arc::from("json"), SchemaValue::text(p.to_json_string()));
10810 records.push(rec);
10811 }
10812 let mut result = crate::storage::query::unified::UnifiedResult::empty();
10813 result.records = records;
10814 Ok(RuntimeQueryResult {
10815 query: query.to_string(),
10816 mode: crate::storage::query::modes::QueryMode::Sql,
10817 statement: "show_policies",
10818 engine: "iam-policies",
10819 result,
10820 affected_rows: 0,
10821 statement_type: "select",
10822 })
10823 }
10824
10825 fn execute_show_effective_permissions(
10826 &self,
10827 query: &str,
10828 user: &crate::storage::query::ast::PolicyUserRef,
10829 resource: Option<&crate::storage::query::ast::PolicyResourceRef>,
10830 ) -> RedDBResult<RuntimeQueryResult> {
10831 use crate::auth::UserId;
10832 use crate::storage::query::unified::UnifiedRecord;
10833 use crate::storage::schema::Value as SchemaValue;
10834 use std::sync::Arc;
10835
10836 let auth_store = self
10837 .inner
10838 .auth_store
10839 .read()
10840 .clone()
10841 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
10842 let id = UserId::from_parts(user.tenant.as_deref(), &user.username);
10843 let pols = auth_store.effective_policies(&id);
10844
10845 let mut records = Vec::new();
10848 for p in pols.iter() {
10849 for (idx, st) in p.statements.iter().enumerate() {
10850 if let Some(_r) = resource {
10851 }
10855 let mut rec = UnifiedRecord::default();
10856 rec.set_arc(Arc::from("policy_id"), SchemaValue::text(p.id.clone()));
10857 rec.set_arc(
10858 Arc::from("statement_index"),
10859 SchemaValue::Integer(idx as i64),
10860 );
10861 rec.set_arc(
10862 Arc::from("sid"),
10863 st.sid
10864 .as_deref()
10865 .map(|s| SchemaValue::text(s.to_string()))
10866 .unwrap_or(SchemaValue::Null),
10867 );
10868 rec.set_arc(
10869 Arc::from("effect"),
10870 SchemaValue::text(match st.effect {
10871 crate::auth::policies::Effect::Allow => "allow",
10872 crate::auth::policies::Effect::Deny => "deny",
10873 }),
10874 );
10875 rec.set_arc(
10876 Arc::from("actions"),
10877 SchemaValue::Integer(st.actions.len() as i64),
10878 );
10879 rec.set_arc(
10880 Arc::from("resources"),
10881 SchemaValue::Integer(st.resources.len() as i64),
10882 );
10883 records.push(rec);
10884 }
10885 }
10886 let mut result = crate::storage::query::unified::UnifiedResult::empty();
10887 result.records = records;
10888 Ok(RuntimeQueryResult {
10889 query: query.to_string(),
10890 mode: crate::storage::query::modes::QueryMode::Sql,
10891 statement: "show_effective_permissions",
10892 engine: "iam-policies",
10893 result,
10894 affected_rows: 0,
10895 statement_type: "select",
10896 })
10897 }
10898
10899 fn execute_simulate_policy(
10900 &self,
10901 query: &str,
10902 user: &crate::storage::query::ast::PolicyUserRef,
10903 action: &str,
10904 resource: &crate::storage::query::ast::PolicyResourceRef,
10905 ) -> RedDBResult<RuntimeQueryResult> {
10906 use crate::auth::policies::ResourceRef;
10907 use crate::auth::store::SimCtx;
10908 use crate::auth::UserId;
10909 use crate::storage::query::unified::UnifiedRecord;
10910 use crate::storage::schema::Value as SchemaValue;
10911 use std::sync::Arc;
10912
10913 let auth_store = self
10914 .inner
10915 .auth_store
10916 .read()
10917 .clone()
10918 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
10919 let id = UserId::from_parts(user.tenant.as_deref(), &user.username);
10920 let r = ResourceRef::new(resource.kind.clone(), resource.name.clone());
10921 let outcome = auth_store.simulate(&id, action, &r, SimCtx::default());
10922
10923 let principal_str = current_auth_identity()
10924 .map(|(u, _)| u)
10925 .unwrap_or_else(|| "anonymous".into());
10926 let (decision_str, matched_pid, matched_sid) = decision_to_strings(&outcome.decision);
10927 tracing::info!(
10928 target: "audit",
10929 principal = %principal_str,
10930 action = "iam:policy.simulate",
10931 decision = %decision_str,
10932 matched_policy_id = ?matched_pid,
10933 matched_sid = ?matched_sid,
10934 "SIMULATE issued"
10935 );
10936 self.inner.audit_log.record(
10937 "iam/policy.simulate",
10938 &principal_str,
10939 &id.to_string(),
10940 "ok",
10941 crate::json::Value::Null,
10942 );
10943
10944 let mut rec = UnifiedRecord::default();
10945 rec.set_arc(Arc::from("decision"), SchemaValue::text(decision_str));
10946 rec.set_arc(
10947 Arc::from("matched_policy_id"),
10948 matched_pid
10949 .map(SchemaValue::text)
10950 .unwrap_or(SchemaValue::Null),
10951 );
10952 rec.set_arc(
10953 Arc::from("matched_sid"),
10954 matched_sid
10955 .map(SchemaValue::text)
10956 .unwrap_or(SchemaValue::Null),
10957 );
10958 rec.set_arc(Arc::from("reason"), SchemaValue::text(outcome.reason));
10959 rec.set_arc(
10960 Arc::from("trail_len"),
10961 SchemaValue::Integer(outcome.trail.len() as i64),
10962 );
10963 let mut result = crate::storage::query::unified::UnifiedResult::empty();
10964 result.records = vec![rec];
10965 Ok(RuntimeQueryResult {
10966 query: query.to_string(),
10967 mode: crate::storage::query::modes::QueryMode::Sql,
10968 statement: "simulate_policy",
10969 engine: "iam-policies",
10970 result,
10971 affected_rows: 0,
10972 statement_type: "select",
10973 })
10974 }
10975}
10976
10977fn grant_to_iam_policy(
10982 principal: &crate::auth::privileges::GrantPrincipal,
10983 resource: &crate::auth::privileges::Resource,
10984 actions: &[crate::auth::privileges::Action],
10985 tenant: Option<&str>,
10986) -> Option<crate::auth::policies::Policy> {
10987 use crate::auth::policies::{
10988 compile_action, ActionPattern, Effect, Policy, ResourcePattern, Statement,
10989 };
10990 use crate::auth::privileges::{Action, GrantPrincipal, Resource};
10991
10992 if matches!(principal, GrantPrincipal::Group(_)) {
10993 return None;
10994 }
10995
10996 let now = crate::auth::now_ms();
10997 let id = format!("_grant_{:x}_{:x}", now, std::process::id());
10998
10999 let resource_str = match resource {
11000 Resource::Database => "table:*".to_string(),
11001 Resource::Schema(s) => format!("table:{s}.*"),
11002 Resource::Table { schema, table } => match schema {
11003 Some(s) => format!("table:{s}.{table}"),
11004 None => format!("table:{table}"),
11005 },
11006 Resource::Function { schema, name } => match schema {
11007 Some(s) => format!("function:{s}.{name}"),
11008 None => format!("function:{name}"),
11009 },
11010 };
11011
11012 let action_patterns: Vec<ActionPattern> = if actions.contains(&Action::All) {
11016 vec![ActionPattern::Wildcard]
11017 } else {
11018 actions
11019 .iter()
11020 .map(|a| compile_action(&a.as_str().to_ascii_lowercase()))
11021 .collect()
11022 };
11023 if action_patterns.is_empty() {
11024 return None;
11025 }
11026
11027 let resource_patterns = if resource_str == "*" {
11032 vec![ResourcePattern::Wildcard]
11033 } else if resource_str.contains('*') {
11034 vec![ResourcePattern::Glob(resource_str.clone())]
11035 } else if let Some((kind, name)) = resource_str.split_once(':') {
11036 vec![ResourcePattern::Exact {
11037 kind: kind.to_string(),
11038 name: name.to_string(),
11039 }]
11040 } else {
11041 vec![ResourcePattern::Wildcard]
11042 };
11043
11044 let policy = Policy {
11045 id,
11046 version: 1,
11047 tenant: tenant.map(|t| t.to_string()),
11048 created_at: now,
11049 updated_at: now,
11050 statements: vec![Statement {
11051 sid: None,
11052 effect: Effect::Allow,
11053 actions: action_patterns,
11054 resources: resource_patterns,
11055 condition: None,
11056 }],
11057 };
11058 if policy.validate().is_err() {
11059 return None;
11060 }
11061 Some(policy)
11062}
11063
11064fn legacy_action_to_iam(action: crate::auth::privileges::Action) -> &'static str {
11065 use crate::auth::privileges::Action;
11066 match action {
11067 Action::Select => "select",
11068 Action::Insert => "insert",
11069 Action::Update => "update",
11070 Action::Delete => "delete",
11071 Action::Truncate => "truncate",
11072 Action::References => "references",
11073 Action::Execute => "execute",
11074 Action::Usage => "usage",
11075 Action::All => "*",
11076 }
11077}
11078
11079fn update_set_target_columns(query: &crate::storage::query::ast::UpdateQuery) -> Vec<String> {
11080 let mut columns = Vec::new();
11081 for (column, _) in &query.assignment_exprs {
11082 if !columns.iter().any(|seen| seen == column) {
11083 columns.push(column.clone());
11084 }
11085 }
11086 columns
11087}
11088
11089fn column_access_request_for_table_update(
11090 table_name: &str,
11091 columns: Vec<String>,
11092) -> crate::auth::ColumnAccessRequest {
11093 match table_name.split_once('.') {
11094 Some((schema, table)) => {
11095 crate::auth::ColumnAccessRequest::update(table.to_string(), columns)
11096 .with_schema(schema.to_string())
11097 }
11098 None => crate::auth::ColumnAccessRequest::update(table_name.to_string(), columns),
11099 }
11100}
11101
11102fn column_access_request_for_table_select(
11103 table_name: &str,
11104 columns: Vec<String>,
11105) -> crate::auth::ColumnAccessRequest {
11106 match table_name.split_once('.') {
11107 Some((schema, table)) => {
11108 crate::auth::ColumnAccessRequest::select(table.to_string(), columns)
11109 .with_schema(schema.to_string())
11110 }
11111 None => crate::auth::ColumnAccessRequest::select(table_name.to_string(), columns),
11112 }
11113}
11114
11115fn update_returning_columns_for_policy(
11116 runtime: &RedDBRuntime,
11117 query: &crate::storage::query::ast::UpdateQuery,
11118) -> Option<Vec<String>> {
11119 let items = query.returning.as_ref()?;
11120 let mut columns = Vec::new();
11121 let project_all = items
11122 .iter()
11123 .any(|item| matches!(item, crate::storage::query::ast::ReturningItem::All));
11124 if project_all {
11125 collect_returning_star_columns(runtime, query, &mut columns);
11126 } else {
11127 for item in items {
11128 let crate::storage::query::ast::ReturningItem::Column(column) = item else {
11129 continue;
11130 };
11131 push_returning_policy_column(&mut columns, column);
11132 }
11133 }
11134 (!columns.is_empty()).then_some(columns)
11135}
11136
11137fn collect_returning_star_columns(
11138 runtime: &RedDBRuntime,
11139 query: &crate::storage::query::ast::UpdateQuery,
11140 columns: &mut Vec<String>,
11141) {
11142 let store = runtime.db().store();
11143 let Some(manager) = store.get_collection(&query.table) else {
11144 return;
11145 };
11146 if let Some(schema) = manager.column_schema() {
11147 for column in schema.iter() {
11148 push_returning_policy_column(columns, column);
11149 }
11150 }
11151 for entity in manager.query_all(|_| true) {
11152 if !returning_entity_matches_update_target(&entity, query.target) {
11153 continue;
11154 }
11155 match &entity.data {
11156 crate::storage::EntityData::Row(row) => {
11157 for (column, _) in row.iter_fields() {
11158 push_returning_policy_column(columns, column);
11159 }
11160 }
11161 crate::storage::EntityData::Node(node) => {
11162 push_returning_policy_column(columns, "label");
11163 push_returning_policy_column(columns, "node_type");
11164 for column in node.properties.keys() {
11165 push_returning_policy_column(columns, column);
11166 }
11167 }
11168 crate::storage::EntityData::Edge(edge) => {
11169 push_returning_policy_column(columns, "label");
11170 push_returning_policy_column(columns, "from_rid");
11171 push_returning_policy_column(columns, "to_rid");
11172 push_returning_policy_column(columns, "weight");
11173 for column in edge.properties.keys() {
11174 push_returning_policy_column(columns, column);
11175 }
11176 }
11177 _ => {}
11178 }
11179 }
11180}
11181
11182fn push_returning_policy_column(columns: &mut Vec<String>, column: &str) {
11183 if returning_public_envelope_column(column) {
11184 return;
11185 }
11186 if !columns.iter().any(|seen| seen == column) {
11187 columns.push(column.to_string());
11188 }
11189}
11190
11191fn returning_public_envelope_column(column: &str) -> bool {
11192 matches!(
11193 column.to_ascii_lowercase().as_str(),
11194 "rid" | "collection" | "kind" | "tenant" | "created_at" | "updated_at" | "red_entity_id"
11195 )
11196}
11197
11198fn returning_entity_matches_update_target(
11199 entity: &crate::storage::UnifiedEntity,
11200 target: crate::storage::query::ast::UpdateTarget,
11201) -> bool {
11202 use crate::storage::query::ast::UpdateTarget;
11203 match target {
11204 UpdateTarget::Rows => {
11205 matches!(returning_row_item_kind(entity), Some(ReturningRowKind::Row))
11206 }
11207 UpdateTarget::Documents => {
11208 matches!(
11209 returning_row_item_kind(entity),
11210 Some(ReturningRowKind::Document)
11211 )
11212 }
11213 UpdateTarget::Kv => matches!(returning_row_item_kind(entity), Some(ReturningRowKind::Kv)),
11214 UpdateTarget::Nodes => matches!(
11215 (&entity.kind, &entity.data),
11216 (
11217 crate::storage::EntityKind::GraphNode(_),
11218 crate::storage::EntityData::Node(_)
11219 )
11220 ),
11221 UpdateTarget::Edges => matches!(
11222 (&entity.kind, &entity.data),
11223 (
11224 crate::storage::EntityKind::GraphEdge(_),
11225 crate::storage::EntityData::Edge(_)
11226 )
11227 ),
11228 }
11229}
11230
11231#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11232enum ReturningRowKind {
11233 Row,
11234 Document,
11235 Kv,
11236}
11237
11238fn returning_row_item_kind(entity: &crate::storage::UnifiedEntity) -> Option<ReturningRowKind> {
11239 let row = entity.data.as_row()?;
11240 let is_kv = row.iter_fields().all(|(column, _)| {
11241 column.eq_ignore_ascii_case("key") || column.eq_ignore_ascii_case("value")
11242 });
11243 if is_kv {
11244 return Some(ReturningRowKind::Kv);
11245 }
11246 let is_document = row
11247 .iter_fields()
11248 .any(|(_, value)| matches!(value, crate::storage::schema::Value::Json(_)));
11249 if is_document {
11250 Some(ReturningRowKind::Document)
11251 } else {
11252 Some(ReturningRowKind::Row)
11253 }
11254}
11255
11256fn requested_table_columns_for_policy(
11257 table: &crate::storage::query::ast::TableQuery,
11258) -> Vec<String> {
11259 use crate::storage::query::sql_lowering::{
11260 effective_table_filter, effective_table_group_by_exprs, effective_table_having_filter,
11261 effective_table_projections,
11262 };
11263
11264 let table_name = table.table.as_str();
11265 let table_alias = table.alias.as_deref();
11266 let mut columns = std::collections::BTreeSet::new();
11267
11268 for projection in effective_table_projections(table) {
11269 collect_projection_columns(&projection, table_name, table_alias, &mut columns);
11270 }
11271 if let Some(filter) = effective_table_filter(table) {
11272 collect_filter_columns(&filter, table_name, table_alias, &mut columns);
11273 }
11274 for expr in effective_table_group_by_exprs(table) {
11275 collect_expr_columns(&expr, table_name, table_alias, &mut columns);
11276 }
11277 if let Some(filter) = effective_table_having_filter(table) {
11278 collect_filter_columns(&filter, table_name, table_alias, &mut columns);
11279 }
11280 for order in &table.order_by {
11281 if let Some(expr) = order.expr.as_ref() {
11282 collect_expr_columns(expr, table_name, table_alias, &mut columns);
11283 } else {
11284 collect_field_ref_column(&order.field, table_name, table_alias, &mut columns);
11285 }
11286 }
11287
11288 columns.into_iter().collect()
11289}
11290
11291fn collect_projection_columns(
11292 projection: &crate::storage::query::ast::Projection,
11293 table_name: &str,
11294 table_alias: Option<&str>,
11295 columns: &mut std::collections::BTreeSet<String>,
11296) {
11297 use crate::storage::query::ast::Projection;
11298 match projection {
11299 Projection::All => {
11300 columns.insert("*".to_string());
11301 }
11302 Projection::Column(column) | Projection::Alias(column, _) => {
11303 if column != "*" {
11304 columns.insert(column.clone());
11305 }
11306 }
11307 Projection::Function(_, args) => {
11308 for arg in args {
11309 collect_projection_columns(arg, table_name, table_alias, columns);
11310 }
11311 }
11312 Projection::Expression(filter, _) => {
11313 collect_filter_columns(filter, table_name, table_alias, columns);
11314 }
11315 Projection::Field(field, _) => {
11316 collect_field_ref_column(field, table_name, table_alias, columns);
11317 }
11318 Projection::Window { args, .. } => {
11322 for arg in args {
11323 collect_projection_columns(arg, table_name, table_alias, columns);
11324 }
11325 }
11326 }
11327}
11328
11329fn collect_filter_columns(
11330 filter: &crate::storage::query::ast::Filter,
11331 table_name: &str,
11332 table_alias: Option<&str>,
11333 columns: &mut std::collections::BTreeSet<String>,
11334) {
11335 use crate::storage::query::ast::Filter;
11336 match filter {
11337 Filter::Compare { field, .. }
11338 | Filter::IsNull(field)
11339 | Filter::IsNotNull(field)
11340 | Filter::In { field, .. }
11341 | Filter::Between { field, .. }
11342 | Filter::Like { field, .. }
11343 | Filter::StartsWith { field, .. }
11344 | Filter::EndsWith { field, .. }
11345 | Filter::Contains { field, .. } => {
11346 collect_field_ref_column(field, table_name, table_alias, columns);
11347 }
11348 Filter::CompareFields { left, right, .. } => {
11349 collect_field_ref_column(left, table_name, table_alias, columns);
11350 collect_field_ref_column(right, table_name, table_alias, columns);
11351 }
11352 Filter::CompareExpr { lhs, rhs, .. } => {
11353 collect_expr_columns(lhs, table_name, table_alias, columns);
11354 collect_expr_columns(rhs, table_name, table_alias, columns);
11355 }
11356 Filter::And(left, right) | Filter::Or(left, right) => {
11357 collect_filter_columns(left, table_name, table_alias, columns);
11358 collect_filter_columns(right, table_name, table_alias, columns);
11359 }
11360 Filter::Not(inner) => collect_filter_columns(inner, table_name, table_alias, columns),
11361 }
11362}
11363
11364fn collect_expr_columns(
11365 expr: &crate::storage::query::ast::Expr,
11366 table_name: &str,
11367 table_alias: Option<&str>,
11368 columns: &mut std::collections::BTreeSet<String>,
11369) {
11370 use crate::storage::query::ast::Expr;
11371 match expr {
11372 Expr::Column { field, .. } => {
11373 collect_field_ref_column(field, table_name, table_alias, columns);
11374 }
11375 Expr::Literal { .. } | Expr::Parameter { .. } => {}
11376 Expr::UnaryOp { operand, .. } | Expr::Cast { inner: operand, .. } => {
11377 collect_expr_columns(operand, table_name, table_alias, columns);
11378 }
11379 Expr::BinaryOp { lhs, rhs, .. } => {
11380 collect_expr_columns(lhs, table_name, table_alias, columns);
11381 collect_expr_columns(rhs, table_name, table_alias, columns);
11382 }
11383 Expr::FunctionCall { args, .. } => {
11384 for arg in args {
11385 collect_expr_columns(arg, table_name, table_alias, columns);
11386 }
11387 }
11388 Expr::Case {
11389 branches, else_, ..
11390 } => {
11391 for (condition, value) in branches {
11392 collect_expr_columns(condition, table_name, table_alias, columns);
11393 collect_expr_columns(value, table_name, table_alias, columns);
11394 }
11395 if let Some(value) = else_ {
11396 collect_expr_columns(value, table_name, table_alias, columns);
11397 }
11398 }
11399 Expr::IsNull { operand, .. } => {
11400 collect_expr_columns(operand, table_name, table_alias, columns);
11401 }
11402 Expr::InList { target, values, .. } => {
11403 collect_expr_columns(target, table_name, table_alias, columns);
11404 for value in values {
11405 collect_expr_columns(value, table_name, table_alias, columns);
11406 }
11407 }
11408 Expr::Between {
11409 target, low, high, ..
11410 } => {
11411 collect_expr_columns(target, table_name, table_alias, columns);
11412 collect_expr_columns(low, table_name, table_alias, columns);
11413 collect_expr_columns(high, table_name, table_alias, columns);
11414 }
11415 Expr::Subquery { .. } => {}
11416 Expr::WindowFunctionCall { args, window, .. } => {
11417 for arg in args {
11418 collect_expr_columns(arg, table_name, table_alias, columns);
11419 }
11420 for e in &window.partition_by {
11421 collect_expr_columns(e, table_name, table_alias, columns);
11422 }
11423 for o in &window.order_by {
11424 collect_expr_columns(&o.expr, table_name, table_alias, columns);
11425 }
11426 }
11427 }
11428}
11429
11430fn collect_field_ref_column(
11431 field: &crate::storage::query::ast::FieldRef,
11432 table_name: &str,
11433 table_alias: Option<&str>,
11434 columns: &mut std::collections::BTreeSet<String>,
11435) {
11436 if let Some(column) = policy_column_name_from_field_ref(field, table_name, table_alias) {
11437 if column != "*" {
11438 columns.insert(column);
11439 }
11440 }
11441}
11442
11443fn policy_column_name_from_field_ref(
11444 field: &crate::storage::query::ast::FieldRef,
11445 table_name: &str,
11446 table_alias: Option<&str>,
11447) -> Option<String> {
11448 match field {
11449 crate::storage::query::ast::FieldRef::TableColumn { table, column } => {
11450 if column == "*" {
11451 return Some("*".to_string());
11452 }
11453 if table.is_empty() || table == table_name || Some(table.as_str()) == table_alias {
11454 Some(column.clone())
11455 } else {
11456 Some(format!("{table}.{column}"))
11457 }
11458 }
11459 _ => None,
11460 }
11461}
11462
11463fn legacy_resource_to_iam(
11464 resource: &crate::auth::privileges::Resource,
11465 tenant: Option<&str>,
11466) -> crate::auth::policies::ResourceRef {
11467 use crate::auth::privileges::Resource;
11468
11469 let (kind, name) = match resource {
11470 Resource::Database => ("database".to_string(), "*".to_string()),
11471 Resource::Schema(s) => ("schema".to_string(), format!("{s}.*")),
11472 Resource::Table { schema, table } => (
11473 "table".to_string(),
11474 match schema {
11475 Some(s) => format!("{s}.{table}"),
11476 None => table.clone(),
11477 },
11478 ),
11479 Resource::Function { schema, name } => (
11480 "function".to_string(),
11481 match schema {
11482 Some(s) => format!("{s}.{name}"),
11483 None => name.clone(),
11484 },
11485 ),
11486 };
11487
11488 let mut out = crate::auth::policies::ResourceRef::new(kind, name);
11489 if let Some(t) = tenant {
11490 out = out.with_tenant(t.to_string());
11491 }
11492 out
11493}
11494
11495#[derive(Debug)]
11496struct JoinTableSide {
11497 table: String,
11498 alias: String,
11499}
11500
11501fn table_side_context(expr: &QueryExpr) -> Option<JoinTableSide> {
11502 match expr {
11503 QueryExpr::Table(table) => Some(JoinTableSide {
11504 table: table.table.clone(),
11505 alias: table.alias.clone().unwrap_or_else(|| table.table.clone()),
11506 }),
11507 _ => None,
11508 }
11509}
11510
11511fn collect_projection_columns_for_table(
11512 projection: &Projection,
11513 table: &str,
11514 alias: Option<&str>,
11515 out: &mut BTreeSet<String>,
11516) {
11517 match projection {
11518 Projection::Column(column) | Projection::Alias(column, _) => {
11519 match split_qualified_column(column) {
11520 Some((qualifier, column))
11521 if qualifier == table || alias.is_some_and(|alias| qualifier == alias) =>
11522 {
11523 push_policy_column(column, out);
11524 }
11525 Some(_) => {}
11526 None => push_policy_column(column, out),
11527 }
11528 }
11529 Projection::Field(
11530 FieldRef::TableColumn {
11531 table: qualifier,
11532 column,
11533 },
11534 _,
11535 ) => {
11536 if qualifier.is_empty()
11537 || qualifier == table
11538 || alias.is_some_and(|alias| qualifier == alias)
11539 {
11540 push_policy_column(column, out);
11541 }
11542 }
11543 Projection::Field(
11544 FieldRef::NodeProperty {
11545 alias: qualifier,
11546 property,
11547 },
11548 _,
11549 )
11550 | Projection::Field(
11551 FieldRef::EdgeProperty {
11552 alias: qualifier,
11553 property,
11554 },
11555 _,
11556 ) => {
11557 if qualifier == table || alias.is_some_and(|alias| qualifier == alias) {
11558 push_policy_column(property, out);
11559 }
11560 }
11561 Projection::Function(_, args) => {
11562 for arg in args {
11563 collect_projection_columns_for_table(arg, table, alias, out);
11564 }
11565 }
11566 Projection::Expression(_, _) | Projection::All | Projection::Field(_, _) => {}
11567 Projection::Window { args, .. } => {
11568 for arg in args {
11569 collect_projection_columns_for_table(arg, table, alias, out);
11570 }
11571 }
11572 }
11573}
11574
11575fn collect_projection_columns_for_join_side(
11576 projection: &Projection,
11577 left: Option<&JoinTableSide>,
11578 right: Option<&JoinTableSide>,
11579 out: &mut HashMap<String, BTreeSet<String>>,
11580) -> RedDBResult<()> {
11581 match projection {
11582 Projection::Column(column) | Projection::Alias(column, _) => {
11583 if let Some((qualifier, column)) = split_qualified_column(column) {
11584 push_qualified_join_column(qualifier, column, left, right, out);
11585 } else {
11586 push_unqualified_join_column(column, left, right, out);
11587 }
11588 }
11589 Projection::Field(FieldRef::TableColumn { table, column }, _) => {
11590 if table.is_empty() {
11591 push_unqualified_join_column(column, left, right, out);
11592 } else if let Some(side) = [left, right]
11593 .into_iter()
11594 .flatten()
11595 .find(|side| table == side.table.as_str() || table == side.alias.as_str())
11596 {
11597 push_join_column(&side.table, column, out);
11598 }
11599 }
11600 Projection::Field(FieldRef::NodeProperty { alias, property }, _)
11601 | Projection::Field(FieldRef::EdgeProperty { alias, property }, _) => {
11602 push_qualified_join_column(alias, property, left, right, out);
11603 }
11604 Projection::Function(_, args) => {
11605 for arg in args {
11606 collect_projection_columns_for_join_side(arg, left, right, out)?;
11607 }
11608 }
11609 Projection::Expression(_, _) | Projection::All | Projection::Field(_, _) => {}
11610 Projection::Window { args, .. } => {
11611 for arg in args {
11612 collect_projection_columns_for_join_side(arg, left, right, out)?;
11613 }
11614 }
11615 }
11616 Ok(())
11617}
11618
11619fn split_qualified_column(column: &str) -> Option<(&str, &str)> {
11620 let (qualifier, column) = column.split_once('.')?;
11621 if qualifier.is_empty() || column.is_empty() || column.contains('.') {
11622 return None;
11623 }
11624 Some((qualifier, column))
11625}
11626
11627fn push_qualified_join_column(
11628 qualifier: &str,
11629 column: &str,
11630 left: Option<&JoinTableSide>,
11631 right: Option<&JoinTableSide>,
11632 out: &mut HashMap<String, BTreeSet<String>>,
11633) {
11634 if let Some(side) = [left, right]
11635 .into_iter()
11636 .flatten()
11637 .find(|side| qualifier == side.table.as_str() || qualifier == side.alias.as_str())
11638 {
11639 push_join_column(&side.table, column, out);
11640 }
11641}
11642
11643fn push_unqualified_join_column(
11644 column: &str,
11645 left: Option<&JoinTableSide>,
11646 right: Option<&JoinTableSide>,
11647 out: &mut HashMap<String, BTreeSet<String>>,
11648) {
11649 for side in [left, right].into_iter().flatten() {
11650 push_join_column(&side.table, column, out);
11651 }
11652}
11653
11654fn push_join_column(table: &str, column: &str, out: &mut HashMap<String, BTreeSet<String>>) {
11655 if is_policy_column_name(column) {
11656 out.entry(table.to_string())
11657 .or_default()
11658 .insert(column.to_string());
11659 }
11660}
11661
11662fn push_policy_column(column: &str, out: &mut BTreeSet<String>) {
11663 if is_policy_column_name(column) {
11664 out.insert(column.to_string());
11665 }
11666}
11667
11668fn is_policy_column_name(column: &str) -> bool {
11669 !column.is_empty()
11670 && column != "*"
11671 && !column.starts_with("LIT:")
11672 && !column.starts_with("TYPE:")
11673}
11674
11675fn runtime_iam_context(
11676 role: crate::auth::Role,
11677 tenant: Option<&str>,
11678 principal_is_system_owned: bool,
11679) -> crate::auth::policies::EvalContext {
11680 crate::auth::policies::EvalContext {
11681 principal_tenant: tenant.map(|t| t.to_string()),
11682 current_tenant: tenant.map(|t| t.to_string()),
11683 peer_ip: None,
11684 mfa_present: false,
11685 now_ms: crate::auth::now_ms(),
11686 principal_is_admin_role: role == crate::auth::Role::Admin,
11687 principal_is_system_owned,
11688 principal_is_platform_scoped: tenant.is_none(),
11689 }
11690}
11691
11692fn explicit_table_projection_columns(
11693 query: &crate::storage::query::ast::TableQuery,
11694) -> Vec<String> {
11695 use crate::storage::query::ast::{FieldRef, Projection};
11696
11697 let mut columns = Vec::new();
11698 for projection in crate::storage::query::sql_lowering::effective_table_projections(query) {
11699 match projection {
11700 Projection::Column(column) | Projection::Alias(column, _) => {
11701 push_unique(&mut columns, column)
11702 }
11703 Projection::Field(FieldRef::TableColumn { column, .. }, _) => {
11704 push_unique(&mut columns, column)
11705 }
11706 _ => {}
11710 }
11711 }
11712 columns
11713}
11714
11715fn explicit_graph_projection_properties(
11716 query: &crate::storage::query::ast::GraphQuery,
11717) -> Vec<String> {
11718 use crate::storage::query::ast::{FieldRef, Projection};
11719
11720 let mut columns = Vec::new();
11721 for projection in &query.return_ {
11722 match projection {
11723 Projection::Field(FieldRef::NodeProperty { property, .. }, _)
11724 | Projection::Field(FieldRef::EdgeProperty { property, .. }, _) => {
11725 push_unique(&mut columns, property.clone())
11726 }
11727 _ => {}
11728 }
11729 }
11730 columns
11731}
11732
11733fn push_unique(columns: &mut Vec<String>, column: String) {
11734 if !columns.iter().any(|existing| existing == &column) {
11735 columns.push(column);
11736 }
11737}
11738
11739fn principal_label(p: &crate::storage::query::ast::PolicyPrincipalRef) -> String {
11740 use crate::storage::query::ast::PolicyPrincipalRef;
11741 match p {
11742 PolicyPrincipalRef::User(u) => match &u.tenant {
11743 Some(t) => format!("user:{t}/{}", u.username),
11744 None => format!("user:{}", u.username),
11745 },
11746 PolicyPrincipalRef::Group(g) => format!("group:{g}"),
11747 }
11748}
11749
11750pub(crate) fn decision_to_strings(
11753 d: &crate::auth::policies::Decision,
11754) -> (String, Option<String>, Option<String>) {
11755 use crate::auth::policies::Decision;
11756 match d {
11757 Decision::Allow {
11758 matched_policy_id,
11759 matched_sid,
11760 } => (
11761 "allow".into(),
11762 Some(matched_policy_id.clone()),
11763 matched_sid.clone(),
11764 ),
11765 Decision::Deny {
11766 matched_policy_id,
11767 matched_sid,
11768 } => (
11769 "deny".into(),
11770 Some(matched_policy_id.clone()),
11771 matched_sid.clone(),
11772 ),
11773 Decision::DefaultDeny => ("default_deny".into(), None, None),
11774 Decision::AdminBypass => ("admin_bypass".into(), None, None),
11775 }
11776}
11777
11778fn relation_scopes_for_query(query: &QueryExpr) -> Vec<String> {
11779 let mut scopes = Vec::new();
11780 collect_relation_scopes(query, &mut scopes);
11781 scopes.sort();
11782 scopes.dedup();
11783 scopes
11784}
11785
11786fn collect_relation_scopes(query: &QueryExpr, scopes: &mut Vec<String>) {
11787 match query {
11788 QueryExpr::Table(table) => {
11789 if !table.table.is_empty() {
11790 scopes.push(table.table.clone());
11791 }
11792 if let Some(alias) = &table.alias {
11793 scopes.push(alias.clone());
11794 }
11795 }
11796 QueryExpr::Join(join) => {
11797 collect_relation_scopes(&join.left, scopes);
11798 collect_relation_scopes(&join.right, scopes);
11799 }
11800 _ => {}
11801 }
11802}
11803
11804fn query_references_outer_scope(query: &QueryExpr, outer_scopes: &[String]) -> bool {
11805 let inner_scopes = relation_scopes_for_query(query);
11806 query_expr_references_outer_scope(query, outer_scopes, &inner_scopes)
11807}
11808
11809fn query_expr_references_outer_scope(
11810 query: &QueryExpr,
11811 outer_scopes: &[String],
11812 inner_scopes: &[String],
11813) -> bool {
11814 match query {
11815 QueryExpr::Table(table) => {
11816 table.select_items.iter().any(|item| match item {
11817 crate::storage::query::ast::SelectItem::Wildcard => false,
11818 crate::storage::query::ast::SelectItem::Expr { expr, .. } => {
11819 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
11820 }
11821 }) || table
11822 .where_expr
11823 .as_ref()
11824 .is_some_and(|expr| expr_references_outer_scope(expr, outer_scopes, inner_scopes))
11825 || table.filter.as_ref().is_some_and(|filter| {
11826 filter_references_outer_scope(filter, outer_scopes, inner_scopes)
11827 })
11828 || table.having_expr.as_ref().is_some_and(|expr| {
11829 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
11830 })
11831 || table.having.as_ref().is_some_and(|filter| {
11832 filter_references_outer_scope(filter, outer_scopes, inner_scopes)
11833 })
11834 || table
11835 .group_by_exprs
11836 .iter()
11837 .any(|expr| expr_references_outer_scope(expr, outer_scopes, inner_scopes))
11838 || table.order_by.iter().any(|clause| {
11839 clause.expr.as_ref().is_some_and(|expr| {
11840 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
11841 })
11842 })
11843 }
11844 QueryExpr::Join(join) => {
11845 query_expr_references_outer_scope(&join.left, outer_scopes, inner_scopes)
11846 || query_expr_references_outer_scope(&join.right, outer_scopes, inner_scopes)
11847 || join.filter.as_ref().is_some_and(|filter| {
11848 filter_references_outer_scope(filter, outer_scopes, inner_scopes)
11849 })
11850 || join.return_items.iter().any(|item| match item {
11851 crate::storage::query::ast::SelectItem::Wildcard => false,
11852 crate::storage::query::ast::SelectItem::Expr { expr, .. } => {
11853 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
11854 }
11855 })
11856 }
11857 _ => false,
11858 }
11859}
11860
11861fn filter_references_outer_scope(
11862 filter: &crate::storage::query::ast::Filter,
11863 outer_scopes: &[String],
11864 inner_scopes: &[String],
11865) -> bool {
11866 use crate::storage::query::ast::Filter;
11867 match filter {
11868 Filter::Compare { field, .. }
11869 | Filter::IsNull(field)
11870 | Filter::IsNotNull(field)
11871 | Filter::In { field, .. }
11872 | Filter::Between { field, .. }
11873 | Filter::Like { field, .. }
11874 | Filter::StartsWith { field, .. }
11875 | Filter::EndsWith { field, .. }
11876 | Filter::Contains { field, .. } => {
11877 field_ref_references_outer_scope(field, outer_scopes, inner_scopes)
11878 }
11879 Filter::CompareFields { left, right, .. } => {
11880 field_ref_references_outer_scope(left, outer_scopes, inner_scopes)
11881 || field_ref_references_outer_scope(right, outer_scopes, inner_scopes)
11882 }
11883 Filter::CompareExpr { lhs, rhs, .. } => {
11884 expr_references_outer_scope(lhs, outer_scopes, inner_scopes)
11885 || expr_references_outer_scope(rhs, outer_scopes, inner_scopes)
11886 }
11887 Filter::And(left, right) | Filter::Or(left, right) => {
11888 filter_references_outer_scope(left, outer_scopes, inner_scopes)
11889 || filter_references_outer_scope(right, outer_scopes, inner_scopes)
11890 }
11891 Filter::Not(inner) => filter_references_outer_scope(inner, outer_scopes, inner_scopes),
11892 }
11893}
11894
11895fn expr_references_outer_scope(
11896 expr: &crate::storage::query::ast::Expr,
11897 outer_scopes: &[String],
11898 inner_scopes: &[String],
11899) -> bool {
11900 use crate::storage::query::ast::Expr;
11901 match expr {
11902 Expr::Column { field, .. } => {
11903 field_ref_references_outer_scope(field, outer_scopes, inner_scopes)
11904 }
11905 Expr::BinaryOp { lhs, rhs, .. } => {
11906 expr_references_outer_scope(lhs, outer_scopes, inner_scopes)
11907 || expr_references_outer_scope(rhs, outer_scopes, inner_scopes)
11908 }
11909 Expr::UnaryOp { operand, .. }
11910 | Expr::Cast { inner: operand, .. }
11911 | Expr::IsNull { operand, .. } => {
11912 expr_references_outer_scope(operand, outer_scopes, inner_scopes)
11913 }
11914 Expr::FunctionCall { args, .. } => args
11915 .iter()
11916 .any(|arg| expr_references_outer_scope(arg, outer_scopes, inner_scopes)),
11917 Expr::Case {
11918 branches, else_, ..
11919 } => {
11920 branches.iter().any(|(cond, value)| {
11921 expr_references_outer_scope(cond, outer_scopes, inner_scopes)
11922 || expr_references_outer_scope(value, outer_scopes, inner_scopes)
11923 }) || else_
11924 .as_ref()
11925 .is_some_and(|expr| expr_references_outer_scope(expr, outer_scopes, inner_scopes))
11926 }
11927 Expr::InList { target, values, .. } => {
11928 expr_references_outer_scope(target, outer_scopes, inner_scopes)
11929 || values
11930 .iter()
11931 .any(|value| expr_references_outer_scope(value, outer_scopes, inner_scopes))
11932 }
11933 Expr::Between {
11934 target, low, high, ..
11935 } => {
11936 expr_references_outer_scope(target, outer_scopes, inner_scopes)
11937 || expr_references_outer_scope(low, outer_scopes, inner_scopes)
11938 || expr_references_outer_scope(high, outer_scopes, inner_scopes)
11939 }
11940 Expr::Subquery { query, .. } => query_references_outer_scope(&query.query, inner_scopes),
11941 Expr::Literal { .. } | Expr::Parameter { .. } => false,
11942 Expr::WindowFunctionCall { args, window, .. } => {
11943 args.iter()
11944 .any(|arg| expr_references_outer_scope(arg, outer_scopes, inner_scopes))
11945 || window
11946 .partition_by
11947 .iter()
11948 .any(|e| expr_references_outer_scope(e, outer_scopes, inner_scopes))
11949 || window
11950 .order_by
11951 .iter()
11952 .any(|o| expr_references_outer_scope(&o.expr, outer_scopes, inner_scopes))
11953 }
11954 }
11955}
11956
11957fn field_ref_references_outer_scope(
11958 field: &crate::storage::query::ast::FieldRef,
11959 outer_scopes: &[String],
11960 inner_scopes: &[String],
11961) -> bool {
11962 match field {
11963 crate::storage::query::ast::FieldRef::TableColumn { table, .. } if !table.is_empty() => {
11964 outer_scopes.iter().any(|scope| scope == table)
11965 && !inner_scopes.iter().any(|scope| scope == table)
11966 }
11967 _ => false,
11968 }
11969}
11970
11971fn first_column_values(
11972 result: crate::storage::query::unified::UnifiedResult,
11973) -> RedDBResult<Vec<Value>> {
11974 if result.columns.len() > 1 {
11975 return Err(RedDBError::Query(
11976 "expression subquery must return exactly one column".to_string(),
11977 ));
11978 }
11979 let fallback_column = result
11980 .records
11981 .first()
11982 .and_then(|record| record.column_names().into_iter().next())
11983 .map(|name| name.to_string());
11984 let column = result.columns.first().cloned().or(fallback_column);
11985 let Some(column) = column else {
11986 return Ok(Vec::new());
11987 };
11988 Ok(result
11989 .records
11990 .iter()
11991 .map(|record| record.get(column.as_str()).cloned().unwrap_or(Value::Null))
11992 .collect())
11993}
11994
11995fn parse_timestamp_to_ms(s: &str) -> Option<u128> {
11996 if let Ok(n) = s.parse::<u128>() {
11998 return Some(n);
11999 }
12000 if let Some(date) = s.split_whitespace().next() {
12004 let parts: Vec<&str> = date.split('-').collect();
12005 if parts.len() == 3 {
12006 let (y, m, d) = (parts[0], parts[1], parts[2]);
12007 if let (Ok(y), Ok(m), Ok(d)) = (y.parse::<i64>(), m.parse::<u32>(), d.parse::<u32>()) {
12008 let days_in = days_from_civil(y, m, d);
12012 return Some((days_in as u128) * 86_400_000u128);
12013 }
12014 }
12015 }
12016 None
12017}
12018
12019fn days_from_civil(y: i64, m: u32, d: u32) -> i64 {
12022 let y = if m <= 2 { y - 1 } else { y };
12023 let era = if y >= 0 { y } else { y - 399 } / 400;
12024 let yoe = (y - era * 400) as u64; let doy = (153 * (if m > 2 { m - 3 } else { m + 9 }) as u64 + 2) / 5 + d as u64 - 1;
12026 let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
12027 era * 146097 + doe as i64 - 719468
12028}
12029
12030fn walk_plan_node(
12031 node: &crate::storage::query::planner::CanonicalLogicalNode,
12032 depth: usize,
12033 out: &mut Vec<crate::storage::query::unified::UnifiedRecord>,
12034) {
12035 use std::sync::Arc;
12036 let mut rec = crate::storage::query::unified::UnifiedRecord::default();
12037 rec.set_arc(Arc::from("op"), Value::text(node.operator.clone()));
12038 rec.set_arc(
12039 Arc::from("source"),
12040 node.source.clone().map(Value::text).unwrap_or(Value::Null),
12041 );
12042 rec.set_arc(Arc::from("est_rows"), Value::Float(node.estimated_rows));
12043 rec.set_arc(Arc::from("est_cost"), Value::Float(node.operator_cost));
12044 rec.set_arc(Arc::from("depth"), Value::Integer(depth as i64));
12045 out.push(rec);
12046 for child in &node.children {
12047 walk_plan_node(child, depth + 1, out);
12048 }
12049}