1use super::*;
2use crate::application::entity::metadata_to_json;
3use crate::auth::column_policy_gate::ColumnAccessRequest;
4use crate::auth::UserId;
5use crate::replication::cdc::ChangeRecord;
6use crate::replication::logical::{ApplyMode, LogicalChangeApplier};
7use crate::storage::query::ast::TableSource;
8
9thread_local! {
10 static CURRENT_CONN_ID: std::cell::Cell<u64> = const { std::cell::Cell::new(0) };
14
15 static CURRENT_AUTH_IDENTITY: std::cell::RefCell<Option<(String, crate::auth::Role)>> =
23 const { std::cell::RefCell::new(None) };
24
25 static CURRENT_SNAPSHOT: std::cell::RefCell<Option<SnapshotContext>> =
35 const { std::cell::RefCell::new(None) };
36
37 static HAS_SNAPSHOT: std::cell::Cell<bool> = const { std::cell::Cell::new(false) };
43
44 static CURRENT_TENANT_ID: std::cell::RefCell<Option<String>> =
54 const { std::cell::RefCell::new(None) };
55
56 static CURRENT_CONFIG_RESOLVER: std::cell::RefCell<Option<ConfigResolver>> =
60 const { std::cell::RefCell::new(None) };
61
62 static CURRENT_SECRET_RESOLVER: std::cell::RefCell<Option<SecretResolver>> =
66 const { std::cell::RefCell::new(None) };
67}
68
69fn secret_sql_value_to_string(value: &Value) -> RedDBResult<String> {
70 match value {
71 Value::Text(s) => Ok(s.to_string()),
72 Value::Integer(n) => Ok(n.to_string()),
73 Value::UnsignedInteger(n) => Ok(n.to_string()),
74 Value::Float(n) => Ok(n.to_string()),
75 Value::Boolean(b) => Ok(b.to_string()),
76 Value::Null => Err(RedDBError::Query(
77 "SET SECRET key = NULL deletes the secret; use DELETE SECRET for explicit deletes"
78 .to_string(),
79 )),
80 Value::Password(_) | Value::Secret(_) => Err(RedDBError::Query(
81 "SET SECRET accepts plain scalar literals; PASSWORD() and SECRET() are for typed columns"
82 .to_string(),
83 )),
84 _ => Err(RedDBError::Query(format!(
85 "SET SECRET does not support value type {:?} yet",
86 value.data_type()
87 ))),
88 }
89}
90
91#[derive(Clone)]
92struct QueryControlEventSpec {
93 kind: crate::runtime::control_events::EventKind,
94 action: &'static str,
95 resource: Option<String>,
96 fields: Vec<(String, crate::runtime::control_events::Sensitivity)>,
97}
98
99#[derive(Clone)]
100struct QueryAuditPlan {
101 statement_kind: &'static str,
102 collections: Vec<String>,
103}
104
105fn query_audit_plan(expr: &QueryExpr) -> Option<QueryAuditPlan> {
106 let mut collections = Vec::new();
107 let statement_kind = match expr {
108 QueryExpr::Table(table) => {
109 push_query_audit_collection(&mut collections, &table.table);
110 "select"
111 }
112 QueryExpr::Join(join) => {
113 collect_query_audit_collections(&join.left, &mut collections);
114 collect_query_audit_collections(&join.right, &mut collections);
115 "select"
116 }
117 QueryExpr::Insert(insert) => {
118 push_query_audit_collection(&mut collections, &insert.table);
119 "insert"
120 }
121 QueryExpr::Update(update) => {
122 push_query_audit_collection(&mut collections, &update.table);
123 "update"
124 }
125 QueryExpr::Delete(delete) => {
126 push_query_audit_collection(&mut collections, &delete.table);
127 "delete"
128 }
129 _ => return None,
130 };
131 if collections.is_empty() {
132 None
133 } else {
134 Some(QueryAuditPlan {
135 statement_kind,
136 collections,
137 })
138 }
139}
140
141fn collect_query_audit_collections(expr: &QueryExpr, collections: &mut Vec<String>) {
142 match expr {
143 QueryExpr::Table(table) => push_query_audit_collection(collections, &table.table),
144 QueryExpr::Join(join) => {
145 collect_query_audit_collections(&join.left, collections);
146 collect_query_audit_collections(&join.right, collections);
147 }
148 _ => {}
149 }
150}
151
152fn push_query_audit_collection(collections: &mut Vec<String>, name: &str) {
153 if name == "red" || name.starts_with("red.") || name.starts_with("__red_schema_") {
154 return;
155 }
156 if !collections.iter().any(|existing| existing == name) {
157 collections.push(name.to_string());
158 }
159}
160
161impl RedDBRuntime {
162 fn execute_create_metric(
163 &self,
164 raw_query: &str,
165 query: &crate::storage::query::ast::CreateMetricQuery,
166 ) -> RedDBResult<RuntimeQueryResult> {
167 self.check_write(crate::runtime::write_gate::WriteKind::Ddl)?;
168 let store = self.inner.db.store();
169 super::metric_descriptor_catalog::create(
170 store.as_ref(),
171 &query.path,
172 &query.kind,
173 &query.role,
174 super::metric_descriptor_catalog::DerivedSpec {
175 source: query.source.clone(),
176 query: query.query.clone(),
177 window_ms: query.window_ms,
178 time_field: query.time_field.clone(),
179 },
180 )?;
181 self.invalidate_result_cache();
182 Ok(RuntimeQueryResult::ok_message(
183 raw_query.to_string(),
184 &format!("metric descriptor '{}' created", query.path),
185 "create",
186 ))
187 }
188
189 fn execute_alter_metric(
190 &self,
191 raw_query: &str,
192 query: &crate::storage::query::ast::AlterMetricQuery,
193 ) -> RedDBResult<RuntimeQueryResult> {
194 self.check_write(crate::runtime::write_gate::WriteKind::Ddl)?;
195 let store = self.inner.db.store();
196 super::metric_descriptor_catalog::update(
197 store.as_ref(),
198 &query.path,
199 query.set_role.as_deref(),
200 query.attempted_kind.as_deref(),
201 query.attempted_path.as_deref(),
202 )?;
203 self.invalidate_result_cache();
204 Ok(RuntimeQueryResult::ok_message(
205 raw_query.to_string(),
206 &format!("metric descriptor '{}' updated", query.path),
207 "alter",
208 ))
209 }
210
211 fn execute_create_slo(
212 &self,
213 raw_query: &str,
214 query: &crate::storage::query::ast::CreateSloQuery,
215 ) -> RedDBResult<RuntimeQueryResult> {
216 self.check_write(crate::runtime::write_gate::WriteKind::Ddl)?;
217 let store = self.inner.db.store();
218 super::slo_descriptor_catalog::create(
219 store.as_ref(),
220 &query.path,
221 &query.metric_path,
222 query.target,
223 query.window_ms,
224 )?;
225 self.invalidate_result_cache();
226 Ok(RuntimeQueryResult::ok_message(
227 raw_query.to_string(),
228 &format!("SLO descriptor '{}' created", query.path),
229 "create",
230 ))
231 }
232
233 fn execute_create_analytics_source(
234 &self,
235 raw_query: &str,
236 query: super::analytics_source_catalog::CreateAnalyticsSourceProfile,
237 ) -> RedDBResult<RuntimeQueryResult> {
238 self.check_write(crate::runtime::write_gate::WriteKind::Ddl)?;
239 let store = self.inner.db.store();
240 let profile = super::analytics_source_catalog::create(
241 store.as_ref(),
242 &self.inner.db.collection_contracts(),
243 query,
244 )?;
245 self.invalidate_result_cache();
246 Ok(RuntimeQueryResult::ok_message(
247 raw_query.to_string(),
248 &format!("analytics source '{}' created", profile.name),
249 "create",
250 ))
251 }
252}
253
254fn query_control_event_specs(expr: &QueryExpr) -> Vec<QueryControlEventSpec> {
255 use crate::runtime::control_events::{EventKind, Sensitivity};
256
257 let mut specs = Vec::new();
258 let mut schema = |action: &'static str, resource: Option<String>| {
259 specs.push(QueryControlEventSpec {
260 kind: EventKind::SchemaDdl,
261 action,
262 resource,
263 fields: Vec::new(),
264 });
265 };
266 match expr {
267 QueryExpr::CreateTable(q) => {
268 schema("create_table", Some(format!("table:{}", q.name)));
269 if let Some(column) = &q.tenant_by {
270 specs.push(QueryControlEventSpec {
271 kind: EventKind::TenantGovernance,
272 action: "create_table_tenant_by",
273 resource: Some(format!("table:{}", q.name)),
274 fields: vec![("tenant_column".to_string(), Sensitivity::raw(column))],
275 });
276 }
277 }
278 QueryExpr::CreateCollection(q) => {
279 schema("create_collection", Some(format!("collection:{}", q.name)));
280 }
281 QueryExpr::CreateVector(q) => schema("create_vector", Some(format!("vector:{}", q.name))),
282 QueryExpr::DropTable(q) => schema("drop_table", Some(format!("table:{}", q.name))),
283 QueryExpr::DropGraph(q) => schema("drop_graph", Some(format!("graph:{}", q.name))),
284 QueryExpr::DropVector(q) => schema("drop_vector", Some(format!("vector:{}", q.name))),
285 QueryExpr::DropDocument(q) => {
286 schema("drop_document", Some(format!("document:{}", q.name)));
287 }
288 QueryExpr::DropKv(q) => schema("drop_kv", Some(format!("kv:{}", q.name))),
289 QueryExpr::DropCollection(q) => {
290 schema("drop_collection", Some(format!("collection:{}", q.name)));
291 }
292 QueryExpr::Truncate(q) => schema("truncate", Some(format!("collection:{}", q.name))),
293 QueryExpr::AlterTable(q) => {
294 schema("alter_table", Some(format!("table:{}", q.name)));
295 for op in &q.operations {
296 match op {
297 crate::storage::query::ast::AlterOperation::EnableRowLevelSecurity => {
298 specs.push(QueryControlEventSpec {
299 kind: EventKind::RlsGovernance,
300 action: "enable_rls",
301 resource: Some(format!("table:{}", q.name)),
302 fields: Vec::new(),
303 });
304 }
305 crate::storage::query::ast::AlterOperation::DisableRowLevelSecurity => {
306 specs.push(QueryControlEventSpec {
307 kind: EventKind::RlsGovernance,
308 action: "disable_rls",
309 resource: Some(format!("table:{}", q.name)),
310 fields: Vec::new(),
311 });
312 }
313 crate::storage::query::ast::AlterOperation::EnableTenancy { column } => {
314 specs.push(QueryControlEventSpec {
315 kind: EventKind::TenantGovernance,
316 action: "enable_tenancy",
317 resource: Some(format!("table:{}", q.name)),
318 fields: vec![("tenant_column".to_string(), Sensitivity::raw(column))],
319 });
320 }
321 crate::storage::query::ast::AlterOperation::DisableTenancy => {
322 specs.push(QueryControlEventSpec {
323 kind: EventKind::TenantGovernance,
324 action: "disable_tenancy",
325 resource: Some(format!("table:{}", q.name)),
326 fields: Vec::new(),
327 });
328 }
329 _ => {}
330 }
331 }
332 }
333 QueryExpr::CreateIndex(q) => {
334 schema(
335 "create_index",
336 Some(format!("index:{}:{}", q.table, q.name)),
337 );
338 }
339 QueryExpr::DropIndex(q) => {
340 schema("drop_index", Some(format!("index:{}:{}", q.table, q.name)));
341 }
342 QueryExpr::CreateTimeSeries(q) => {
343 schema("create_timeseries", Some(format!("timeseries:{}", q.name)));
344 }
345 QueryExpr::CreateMetric(q) => {
346 schema("create_metric", Some(format!("metric:{}", q.path)));
347 }
348 QueryExpr::AlterMetric(q) => {
349 schema("alter_metric", Some(format!("metric:{}", q.path)));
350 }
351 QueryExpr::CreateSlo(q) => {
352 schema("create_slo", Some(format!("slo:{}", q.path)));
353 }
354 QueryExpr::DropTimeSeries(q) => {
355 schema("drop_timeseries", Some(format!("timeseries:{}", q.name)));
356 }
357 QueryExpr::CreateQueue(q) => schema("create_queue", Some(format!("queue:{}", q.name))),
358 QueryExpr::AlterQueue(q) => schema("alter_queue", Some(format!("queue:{}", q.name))),
359 QueryExpr::DropQueue(q) => schema("drop_queue", Some(format!("queue:{}", q.name))),
360 QueryExpr::CreateTree(q) => {
361 schema(
362 "create_tree",
363 Some(format!("tree:{}:{}", q.collection, q.name)),
364 );
365 }
366 QueryExpr::DropTree(q) => {
367 schema(
368 "drop_tree",
369 Some(format!("tree:{}:{}", q.collection, q.name)),
370 );
371 }
372 QueryExpr::CreateSchema(q) => schema("create_schema", Some(format!("schema:{}", q.name))),
373 QueryExpr::DropSchema(q) => schema("drop_schema", Some(format!("schema:{}", q.name))),
374 QueryExpr::CreateSequence(q) => {
375 schema("create_sequence", Some(format!("sequence:{}", q.name)));
376 }
377 QueryExpr::DropSequence(q) => schema("drop_sequence", Some(format!("sequence:{}", q.name))),
378 QueryExpr::CreateView(q) => schema("create_view", Some(format!("view:{}", q.name))),
379 QueryExpr::DropView(q) => schema("drop_view", Some(format!("view:{}", q.name))),
380 QueryExpr::RefreshMaterializedView(q) => {
381 schema(
382 "refresh_materialized_view",
383 Some(format!("view:{}", q.name)),
384 );
385 }
386 QueryExpr::CreatePolicy(q) => {
387 specs.push(QueryControlEventSpec {
388 kind: EventKind::RlsGovernance,
389 action: "create_policy",
390 resource: Some(format!("table:{}:policy:{}", q.table, q.name)),
391 fields: vec![(
392 "target_kind".to_string(),
393 Sensitivity::raw(q.target_kind.as_ident()),
394 )],
395 });
396 }
397 QueryExpr::DropPolicy(q) => {
398 specs.push(QueryControlEventSpec {
399 kind: EventKind::RlsGovernance,
400 action: "drop_policy",
401 resource: Some(format!("table:{}:policy:{}", q.table, q.name)),
402 fields: Vec::new(),
403 });
404 }
405 QueryExpr::SetTenant(value) => {
406 let mut fields = Vec::new();
407 if let Some(value) = value {
408 fields.push(("tenant".to_string(), Sensitivity::raw(value)));
409 }
410 specs.push(QueryControlEventSpec {
411 kind: EventKind::TenantGovernance,
412 action: "set_tenant",
413 resource: Some("tenant:session".to_string()),
414 fields,
415 });
416 }
417 QueryExpr::SetConfig { key, .. } => {
418 specs.push(QueryControlEventSpec {
419 kind: EventKind::ConfigWrite,
420 action: "config:write",
421 resource: Some(format!("config:{key}")),
422 fields: vec![("key".to_string(), Sensitivity::raw(key))],
423 });
424 }
425 QueryExpr::ConfigCommand(cmd) => match cmd {
426 crate::storage::query::ast::ConfigCommand::Put {
427 collection, key, ..
428 }
429 | crate::storage::query::ast::ConfigCommand::Rotate {
430 collection, key, ..
431 } => {
432 let target = format!("{collection}/{key}");
433 specs.push(QueryControlEventSpec {
434 kind: EventKind::ConfigWrite,
435 action: "config:write",
436 resource: Some(format!("config:{target}")),
437 fields: vec![
438 ("collection".to_string(), Sensitivity::raw(collection)),
439 ("key".to_string(), Sensitivity::raw(key)),
440 ],
441 });
442 }
443 crate::storage::query::ast::ConfigCommand::Delete { collection, key } => {
444 let target = format!("{collection}/{key}");
445 specs.push(QueryControlEventSpec {
446 kind: EventKind::ConfigDelete,
447 action: "config:write",
448 resource: Some(format!("config:{target}")),
449 fields: vec![
450 ("collection".to_string(), Sensitivity::raw(collection)),
451 ("key".to_string(), Sensitivity::raw(key)),
452 ],
453 });
454 }
455 _ => {}
456 },
457 QueryExpr::AlterUser(stmt) => {
458 let disables = stmt.attributes.iter().any(|attr| {
459 matches!(
460 attr,
461 crate::storage::query::ast::AlterUserAttribute::Disable
462 )
463 });
464 specs.push(QueryControlEventSpec {
465 kind: if disables {
466 EventKind::UserDisable
467 } else {
468 EventKind::UserUpdate
469 },
470 action: "alter_user",
471 resource: Some(format!("user:{}", stmt.username)),
472 fields: Vec::new(),
473 });
474 }
475 _ => {}
476 }
477 specs
478}
479
480fn control_event_outcome_for_error(err: &RedDBError) -> crate::runtime::control_events::Outcome {
481 match err {
482 RedDBError::ReadOnly(_) => crate::runtime::control_events::Outcome::Denied,
483 RedDBError::Query(msg)
484 if msg.contains("permission denied")
485 || msg.contains("cannot issue")
486 || msg.contains("lacks") =>
487 {
488 crate::runtime::control_events::Outcome::Denied
489 }
490 _ => crate::runtime::control_events::Outcome::Error,
491 }
492}
493
494fn view_records_to_entities(
503 table: &str,
504 records: &[crate::storage::query::unified::UnifiedRecord],
505) -> Vec<crate::storage::UnifiedEntity> {
506 use std::collections::HashMap;
507 let table_arc: std::sync::Arc<str> = std::sync::Arc::from(table);
508 let mut out = Vec::with_capacity(records.len());
509 for record in records {
510 let mut named: HashMap<String, crate::storage::schema::Value> = HashMap::new();
511 for (name, value) in record.iter_fields() {
512 named.insert(name.to_string(), value.clone());
513 }
514 let entity = crate::storage::UnifiedEntity::new(
515 crate::storage::EntityId::new(0),
516 crate::storage::EntityKind::TableRow {
517 table: std::sync::Arc::clone(&table_arc),
518 row_id: 0,
519 },
520 crate::storage::EntityData::Row(crate::storage::RowData {
521 columns: Vec::new(),
522 named: Some(named),
523 schema: None,
524 }),
525 );
526 out.push(entity);
527 }
528 out
529}
530
531fn system_keyed_collection_contract(
532 name: &str,
533 model: crate::catalog::CollectionModel,
534) -> crate::physical::CollectionContract {
535 let now = crate::utils::now_unix_millis() as u128;
536 crate::physical::CollectionContract {
537 name: name.to_string(),
538 declared_model: model,
539 schema_mode: crate::catalog::SchemaMode::Dynamic,
540 origin: crate::physical::ContractOrigin::Implicit,
541 version: 1,
542 created_at_unix_ms: now,
543 updated_at_unix_ms: now,
544 default_ttl_ms: None,
545 vector_dimension: None,
546 vector_metric: None,
547 context_index_fields: Vec::new(),
548 declared_columns: Vec::new(),
549 table_def: None,
550 timestamps_enabled: false,
551 context_index_enabled: false,
552 metrics_raw_retention_ms: None,
553 metrics_rollup_policies: Vec::new(),
554 metrics_tenant_identity: None,
555 metrics_namespace: None,
556 append_only: false,
557 subscriptions: Vec::new(),
558 analytics_config: Vec::new(),
559 session_key: None,
560 session_gap_ms: None,
561 retention_duration_ms: None,
562 }
563}
564
565#[derive(Clone)]
580pub struct SnapshotContext {
581 pub snapshot: crate::storage::transaction::snapshot::Snapshot,
582 pub manager: Arc<crate::storage::transaction::snapshot::SnapshotManager>,
583 pub own_xids: std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
584 pub requires_index_fallback: bool,
585}
586
587pub fn set_current_connection_id(id: u64) {
596 CURRENT_CONN_ID.with(|c| c.set(id));
597}
598
599pub fn clear_current_connection_id() {
601 CURRENT_CONN_ID.with(|c| c.set(0));
602}
603
604pub fn current_connection_id() -> u64 {
607 CURRENT_CONN_ID.with(|c| c.get())
608}
609
610pub fn set_current_auth_identity(username: String, role: crate::auth::Role) {
614 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = Some((username, role)));
615}
616
617pub fn clear_current_auth_identity() {
621 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = None);
622}
623
624pub(crate) fn current_auth_identity() -> Option<(String, crate::auth::Role)> {
627 CURRENT_AUTH_IDENTITY.with(|cell| cell.borrow().clone())
628}
629
630pub fn current_auth_identity_for_audit() -> Option<(String, crate::auth::Role)> {
634 current_auth_identity()
635}
636
637pub fn set_current_tenant(tenant_id: String) {
642 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = Some(tenant_id));
643}
644
645pub fn clear_current_tenant() {
648 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = None);
649}
650
651pub fn current_tenant() -> Option<String> {
662 let inherited = CURRENT_TENANT_ID.with(|cell| cell.borrow().clone());
663 if let Some(over) = current_scope_override() {
664 if over.tenant.is_active() {
665 return over.tenant.resolve(inherited);
666 }
667 }
668 if let Some(tx_local) = current_tx_local_tenant() {
669 return tx_local;
670 }
671 inherited
672}
673
674thread_local! {
675 static TX_LOCAL_TENANT: std::cell::RefCell<Option<Option<String>>> =
684 const { std::cell::RefCell::new(None) };
685}
686
687fn current_tx_local_tenant() -> Option<Option<String>> {
688 TX_LOCAL_TENANT.with(|cell| cell.borrow().clone())
689}
690
691fn parse_set_local_tenant(query: &str) -> RedDBResult<Option<Option<String>>> {
697 let mut tokens = query.split_ascii_whitespace();
698 let Some(w1) = tokens.next() else {
699 return Ok(None);
700 };
701 if !w1.eq_ignore_ascii_case("SET") {
702 return Ok(None);
703 }
704 let Some(w2) = tokens.next() else {
705 return Ok(None);
706 };
707 if !w2.eq_ignore_ascii_case("LOCAL") {
708 return Ok(None);
709 }
710 let Some(w3) = tokens.next() else {
711 return Ok(None);
712 };
713 if !w3.eq_ignore_ascii_case("TENANT") {
714 return Ok(None);
715 }
716 let rest: String = tokens.collect::<Vec<_>>().join(" ");
717 let rest = rest.trim().trim_end_matches(';').trim();
718 let value_str = rest.strip_prefix('=').map(|s| s.trim()).unwrap_or(rest);
719 if value_str.is_empty() {
720 return Err(RedDBError::Query(
721 "SET LOCAL TENANT expects a string literal or NULL".to_string(),
722 ));
723 }
724 if value_str.eq_ignore_ascii_case("NULL") {
725 return Ok(Some(None));
726 }
727 if value_str.starts_with('\'') && value_str.ends_with('\'') && value_str.len() >= 2 {
728 let inner = &value_str[1..value_str.len() - 1];
729 return Ok(Some(Some(inner.to_string())));
730 }
731 Err(RedDBError::Query(format!(
732 "SET LOCAL TENANT expects a string literal or NULL, got `{value_str}`"
733 )))
734}
735
736pub(crate) struct TxLocalTenantGuard;
737
738impl TxLocalTenantGuard {
739 pub fn install(value: Option<Option<String>>) -> Self {
740 TX_LOCAL_TENANT.with(|cell| *cell.borrow_mut() = value);
741 Self
742 }
743}
744
745impl Drop for TxLocalTenantGuard {
746 fn drop(&mut self) {
747 TX_LOCAL_TENANT.with(|cell| *cell.borrow_mut() = None);
748 }
749}
750
751thread_local! {
752 static SCOPE_OVERRIDES: std::cell::RefCell<Vec<crate::runtime::within_clause::ScopeOverride>> =
759 const { std::cell::RefCell::new(Vec::new()) };
760}
761
762pub(crate) fn push_scope_override(over: crate::runtime::within_clause::ScopeOverride) {
763 SCOPE_OVERRIDES.with(|cell| cell.borrow_mut().push(over));
764}
765
766pub(crate) fn pop_scope_override() {
767 SCOPE_OVERRIDES.with(|cell| {
768 cell.borrow_mut().pop();
769 });
770}
771
772pub(crate) fn current_scope_override() -> Option<crate::runtime::within_clause::ScopeOverride> {
773 SCOPE_OVERRIDES.with(|cell| cell.borrow().last().cloned())
774}
775
776pub(crate) fn has_scope_override_active() -> bool {
780 SCOPE_OVERRIDES.with(|cell| !cell.borrow().is_empty())
781}
782
783pub(crate) struct ScopeOverrideGuard;
787
788impl ScopeOverrideGuard {
789 pub fn install(over: crate::runtime::within_clause::ScopeOverride) -> Self {
790 push_scope_override(over);
791 Self
792 }
793}
794
795impl Drop for ScopeOverrideGuard {
796 fn drop(&mut self) {
797 pop_scope_override();
798 }
799}
800
801pub(crate) fn current_user_projected() -> Option<String> {
807 let inherited = current_auth_identity().map(|(u, _)| u);
808 if let Some(over) = current_scope_override() {
809 if over.user.is_active() {
810 return over.user.resolve(inherited);
811 }
812 }
813 inherited
814}
815
816pub(crate) fn current_role_projected() -> Option<String> {
817 let inherited = current_auth_identity().map(|(_, r)| format!("{r:?}").to_lowercase());
818 if let Some(over) = current_scope_override() {
819 if over.role.is_active() {
820 return over.role.resolve(inherited);
821 }
822 }
823 inherited
824}
825
826pub(crate) fn current_secret_value(path: &str) -> Option<String> {
827 let key = path.to_ascii_lowercase();
828 CURRENT_SECRET_RESOLVER.with(|cell| {
829 let mut resolver = cell.borrow_mut();
830 let resolver = resolver.as_mut()?;
831 if resolver.values.is_none() {
832 resolver.values = resolver
833 .store
834 .as_ref()
835 .map(|store| store.vault_kv_snapshot());
836 }
837 let values = resolver.values.as_ref()?;
838 values.get(&key).cloned().or_else(|| {
839 key.strip_prefix("red.vault/").and_then(|rest| {
840 values
841 .get(rest)
842 .cloned()
843 .or_else(|| values.get(&format!("red.secret.{rest}")).cloned())
844 })
845 })
846 })
847}
848
849struct SecretResolver {
850 store: Option<Arc<crate::auth::store::AuthStore>>,
851 values: Option<HashMap<String, String>>,
852}
853
854pub(super) struct SecretStoreGuard {
855 previous: Option<SecretResolver>,
856}
857
858impl SecretStoreGuard {
859 pub(super) fn install(store: Option<Arc<crate::auth::store::AuthStore>>) -> Self {
860 let previous = CURRENT_SECRET_RESOLVER.with(|cell| {
861 cell.replace(Some(SecretResolver {
862 store,
863 values: None,
864 }))
865 });
866 Self { previous }
867 }
868}
869
870impl Drop for SecretStoreGuard {
871 fn drop(&mut self) {
872 let previous = self.previous.take();
873 CURRENT_SECRET_RESOLVER.with(|cell| {
874 cell.replace(previous);
875 });
876 }
877}
878
879pub(crate) fn current_config_value(path: &str) -> Option<Value> {
880 let key = path.to_ascii_lowercase();
881 CURRENT_CONFIG_RESOLVER.with(|cell| {
882 let mut resolver = cell.borrow_mut();
883 let resolver = resolver.as_mut()?;
884 if resolver.values.is_none() {
885 resolver.values = Some(latest_config_snapshot(&resolver.db));
886 }
887 let values = resolver.values.as_ref()?;
888 values.get(&key).cloned().or_else(|| {
889 key.strip_prefix("red.config/")
890 .and_then(|rest| values.get(&format!("red.config.{rest}")).cloned())
891 })
892 })
893}
894
895fn update_current_config_value(path: &str, value: Value) {
896 let key = path.to_ascii_lowercase();
897 CURRENT_CONFIG_RESOLVER.with(|cell| {
898 if let Some(resolver) = cell.borrow_mut().as_mut() {
899 if let Some(values) = resolver.values.as_mut() {
900 values.insert(key, value);
901 }
902 }
903 });
904}
905
906fn update_current_secret_value(path: &str, value: Option<String>) {
907 let key = path.to_ascii_lowercase();
908 CURRENT_SECRET_RESOLVER.with(|cell| {
909 if let Some(resolver) = cell.borrow_mut().as_mut() {
910 let Some(values) = resolver.values.as_mut() else {
911 return;
912 };
913 match value {
914 Some(value) => {
915 values.insert(key, value);
916 }
917 None => {
918 values.remove(&key);
919 }
920 }
921 }
922 });
923}
924
925fn latest_config_snapshot(db: &RedDB) -> HashMap<String, Value> {
926 let mut latest: HashMap<String, (u64, Value)> = HashMap::new();
927
928 if let Some(manager) = db.store().get_collection("red_config") {
929 manager.for_each_entity(|entity| {
930 let Some(row) = entity.data.as_row() else {
931 return true;
932 };
933 let Some(Value::Text(key)) = row.get_field("key") else {
934 return true;
935 };
936 let value = row.get_field("value").cloned().unwrap_or(Value::Null);
937 let id = entity.id.raw();
938 let key = key.to_ascii_lowercase();
939 insert_latest_config_value(&mut latest, key.clone(), id, value.clone());
940 if let Some(rest) = key.strip_prefix("red.config.") {
941 insert_latest_config_value(&mut latest, format!("red.config/{rest}"), id, value);
942 }
943 true
944 });
945 }
946
947 if let Some(manager) = db.store().get_collection("red.config") {
948 manager.for_each_entity(|entity| {
949 let Some(row) = entity.data.as_row() else {
950 return true;
951 };
952 if matches!(row.get_field("tombstone"), Some(Value::Boolean(true))) {
953 return true;
954 }
955 let Some(Value::Text(key)) = row.get_field("key") else {
956 return true;
957 };
958 let value = row.get_field("value").cloned().unwrap_or(Value::Null);
959 insert_latest_config_value(
960 &mut latest,
961 format!("red.config/{}", key.to_ascii_lowercase()),
962 entity.id.raw(),
963 value,
964 );
965 true
966 });
967 }
968
969 latest
970 .into_iter()
971 .map(|(key, (_, value))| (key, value))
972 .collect()
973}
974
975fn insert_latest_config_value(
976 latest: &mut HashMap<String, (u64, Value)>,
977 key: String,
978 id: u64,
979 value: Value,
980) {
981 match latest.get(&key) {
982 Some((prev_id, _)) if *prev_id > id => {}
983 _ => {
984 latest.insert(key, (id, value));
985 }
986 }
987}
988
989struct ConfigResolver {
990 db: Arc<RedDB>,
991 values: Option<HashMap<String, Value>>,
992}
993
994pub(super) struct ConfigSnapshotGuard {
995 previous: Option<ConfigResolver>,
996}
997
998impl ConfigSnapshotGuard {
999 pub(super) fn install(db: Arc<RedDB>) -> Self {
1000 let previous = CURRENT_CONFIG_RESOLVER
1001 .with(|cell| cell.replace(Some(ConfigResolver { db, values: None })));
1002 Self { previous }
1003 }
1004}
1005
1006impl Drop for ConfigSnapshotGuard {
1007 fn drop(&mut self) {
1008 let previous = self.previous.take();
1009 CURRENT_CONFIG_RESOLVER.with(|cell| {
1010 cell.replace(previous);
1011 });
1012 }
1013}
1014
1015pub fn set_current_snapshot(ctx: SnapshotContext) {
1020 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = Some(ctx));
1021 HAS_SNAPSHOT.with(|c| c.set(true));
1022}
1023
1024pub fn clear_current_snapshot() {
1025 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = None);
1026 HAS_SNAPSHOT.with(|c| c.set(false));
1027}
1028
1029pub(crate) struct CurrentSnapshotGuard {
1035 previous: Option<SnapshotContext>,
1036}
1037
1038impl CurrentSnapshotGuard {
1039 pub(crate) fn install(ctx: SnapshotContext) -> Self {
1040 let previous = CURRENT_SNAPSHOT.with(|cell| cell.borrow().clone());
1041 set_current_snapshot(ctx);
1042 Self { previous }
1043 }
1044}
1045
1046impl Drop for CurrentSnapshotGuard {
1047 fn drop(&mut self) {
1048 let prev = self.previous.take();
1049 let has = prev.is_some();
1050 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = prev);
1051 HAS_SNAPSHOT.with(|c| c.set(has));
1052 }
1053}
1054
1055#[inline]
1066pub fn entity_visible_under_current_snapshot(
1067 entity: &crate::storage::unified::entity::UnifiedEntity,
1068) -> bool {
1069 if !HAS_SNAPSHOT.with(|c| c.get()) {
1075 return entity.xmax == 0;
1076 }
1077 CURRENT_SNAPSHOT.with(|cell| {
1078 let guard = cell.borrow();
1079 let Some(ctx) = guard.as_ref() else {
1080 return true;
1081 };
1082 visibility_check(ctx, entity.xmin, entity.xmax)
1083 })
1084}
1085
1086#[inline]
1091pub(crate) fn xids_visible_under_current_snapshot(xmin: u64, xmax: u64) -> bool {
1092 if !HAS_SNAPSHOT.with(|c| c.get()) {
1093 return true;
1094 }
1095 CURRENT_SNAPSHOT.with(|cell| {
1096 let guard = cell.borrow();
1097 let Some(ctx) = guard.as_ref() else {
1098 return true;
1099 };
1100 visibility_check(ctx, xmin, xmax)
1101 })
1102}
1103
1104pub fn capture_current_snapshot() -> Option<SnapshotContext> {
1111 CURRENT_SNAPSHOT.with(|cell| cell.borrow().clone())
1112}
1113
1114pub(crate) fn current_snapshot_requires_index_fallback() -> bool {
1119 if !HAS_SNAPSHOT.with(|c| c.get()) {
1120 return false;
1121 }
1122 CURRENT_SNAPSHOT.with(|cell| {
1123 cell.borrow()
1124 .as_ref()
1125 .is_some_and(|ctx| ctx.requires_index_fallback)
1126 })
1127}
1128
1129#[derive(Clone, Default)]
1144pub struct SnapshotBundle {
1145 pub snapshot: Option<SnapshotContext>,
1146 pub auth: Option<(String, crate::auth::Role)>,
1147 pub tenant: Option<String>,
1148}
1149
1150pub fn snapshot_bundle() -> SnapshotBundle {
1153 SnapshotBundle {
1154 snapshot: capture_current_snapshot(),
1155 auth: current_auth_identity(),
1156 tenant: CURRENT_TENANT_ID.with(|cell| cell.borrow().clone()),
1157 }
1158}
1159
1160pub fn with_snapshot_bundle<R>(bundle: &SnapshotBundle, f: impl FnOnce() -> R) -> R {
1165 struct Guard {
1166 prev_snapshot: Option<SnapshotContext>,
1167 prev_auth: Option<(String, crate::auth::Role)>,
1168 prev_tenant: Option<String>,
1169 }
1170 impl Drop for Guard {
1171 fn drop(&mut self) {
1172 let snap = self.prev_snapshot.take();
1173 let has = snap.is_some();
1174 CURRENT_SNAPSHOT.with(|cell| *cell.borrow_mut() = snap);
1175 HAS_SNAPSHOT.with(|c| c.set(has));
1176 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = self.prev_auth.take());
1177 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = self.prev_tenant.take());
1178 }
1179 }
1180
1181 let _guard = {
1182 let prev_snapshot = CURRENT_SNAPSHOT.with(|cell| cell.borrow().clone());
1183 let prev_auth = CURRENT_AUTH_IDENTITY.with(|cell| cell.borrow().clone());
1184 let prev_tenant = CURRENT_TENANT_ID.with(|cell| cell.borrow().clone());
1185
1186 match bundle.snapshot.clone() {
1187 Some(ctx) => set_current_snapshot(ctx),
1188 None => clear_current_snapshot(),
1189 }
1190 CURRENT_AUTH_IDENTITY.with(|cell| *cell.borrow_mut() = bundle.auth.clone());
1191 CURRENT_TENANT_ID.with(|cell| *cell.borrow_mut() = bundle.tenant.clone());
1192
1193 Guard {
1194 prev_snapshot,
1195 prev_auth,
1196 prev_tenant,
1197 }
1198 };
1199 f()
1200}
1201
1202#[inline]
1206pub fn entity_visible_with_context(
1207 ctx: Option<&SnapshotContext>,
1208 entity: &crate::storage::unified::entity::UnifiedEntity,
1209) -> bool {
1210 match ctx {
1211 Some(ctx) => visibility_check(ctx, entity.xmin, entity.xmax),
1212 None => true,
1213 }
1214}
1215
1216fn table_row_index_fields(
1217 entity: &crate::storage::unified::entity::UnifiedEntity,
1218) -> Vec<(String, crate::storage::schema::Value)> {
1219 let crate::storage::EntityData::Row(row) = &entity.data else {
1220 return Vec::new();
1221 };
1222 if let Some(named) = &row.named {
1223 return named
1224 .iter()
1225 .map(|(name, value)| (name.clone(), value.clone()))
1226 .collect();
1227 }
1228 if let Some(schema) = &row.schema {
1229 return schema
1230 .iter()
1231 .zip(row.columns.iter())
1232 .map(|(name, value)| (name.clone(), value.clone()))
1233 .collect();
1234 }
1235 Vec::new()
1236}
1237
1238#[inline]
1239fn visibility_check(ctx: &SnapshotContext, xmin: u64, xmax: u64) -> bool {
1240 if xmin != 0 && ctx.manager.is_aborted(xmin) {
1244 return false;
1245 }
1246 let effective_xmax = if xmax != 0 && ctx.manager.is_aborted(xmax) {
1248 0
1249 } else {
1250 xmax
1251 };
1252 let own_xmin = xmin != 0 && ctx.own_xids.contains(&xmin);
1256 let own_xmax = effective_xmax != 0 && ctx.own_xids.contains(&effective_xmax);
1257 if own_xmax {
1258 return false;
1260 }
1261 if own_xmin {
1262 return true;
1263 }
1264 ctx.snapshot.sees(xmin, effective_xmax)
1265}
1266
1267fn runtime_pool_lock(runtime: &RedDBRuntime) -> std::sync::MutexGuard<'_, PoolState> {
1268 runtime
1269 .inner
1270 .pool
1271 .lock()
1272 .unwrap_or_else(|poisoned| poisoned.into_inner())
1273}
1274
1275fn is_graph_tvf_name(name: &str) -> bool {
1279 name.eq_ignore_ascii_case("components")
1280 || name.eq_ignore_ascii_case("louvain")
1281 || name.eq_ignore_ascii_case("degree_centrality")
1282 || name.eq_ignore_ascii_case("shortest_path")
1283 || name.eq_ignore_ascii_case("betweenness")
1284 || name.eq_ignore_ascii_case("eigenvector")
1285 || name.eq_ignore_ascii_case("pagerank")
1286}
1287
1288fn analytics_view_algorithm(
1295 graph: &str,
1296 view: &crate::catalog::AnalyticsViewDescriptor,
1297) -> RedDBResult<(String, Vec<(String, f64)>)> {
1298 use crate::catalog::AnalyticsOutput;
1299
1300 let mut named_args: Vec<(String, f64)> = Vec::new();
1301 let algorithm = match view.output {
1302 AnalyticsOutput::Communities => {
1303 let algo = view.algorithm.as_deref().unwrap_or("louvain");
1304 if !algo.eq_ignore_ascii_case("louvain") {
1305 return Err(RedDBError::Query(format!(
1306 "analytics output 'communities' on graph '{graph}' has unsupported algorithm '{algo}' (expected louvain)"
1307 )));
1308 }
1309 if let Some(resolution) = view.resolution {
1310 named_args.push(("resolution".to_string(), resolution));
1311 }
1312 "louvain".to_string()
1313 }
1314 AnalyticsOutput::Components => {
1315 if let Some(algo) = view.algorithm.as_deref() {
1316 if !algo.eq_ignore_ascii_case("components")
1317 && !algo.eq_ignore_ascii_case("connected_components")
1318 {
1319 return Err(RedDBError::Query(format!(
1320 "analytics output 'components' on graph '{graph}' has unsupported algorithm '{algo}' (expected connected_components)"
1321 )));
1322 }
1323 }
1324 "components".to_string()
1325 }
1326 AnalyticsOutput::Centrality => {
1327 let algo = view
1328 .algorithm
1329 .as_deref()
1330 .unwrap_or("pagerank")
1331 .to_ascii_lowercase();
1332 match algo.as_str() {
1333 "pagerank" => {
1334 if let Some(max_iterations) = view.max_iterations {
1335 named_args.push(("max_iterations".to_string(), max_iterations as f64));
1336 }
1337 }
1338 "eigenvector" => {
1339 if let Some(max_iterations) = view.max_iterations {
1340 named_args.push(("max_iterations".to_string(), max_iterations as f64));
1341 }
1342 if let Some(tolerance) = view.tolerance {
1343 named_args.push(("tolerance".to_string(), tolerance));
1344 }
1345 }
1346 "betweenness" => {}
1347 other => {
1348 return Err(RedDBError::Query(format!(
1349 "analytics output 'centrality' on graph '{graph}' has unsupported algorithm '{other}' (expected pagerank, betweenness, or eigenvector)"
1350 )));
1351 }
1352 }
1353 algo
1354 }
1355 };
1356 Ok((algorithm, named_args))
1357}
1358
1359fn reject_named_args(name: &str, named_args: &[(String, f64)]) -> RedDBResult<()> {
1361 if let Some((key, _)) = named_args.first() {
1362 return Err(RedDBError::Query(format!(
1363 "table function '{name}' has no named argument '{key}'"
1364 )));
1365 }
1366 Ok(())
1367}
1368
1369fn louvain_resolution(named_args: &[(String, f64)]) -> RedDBResult<f64> {
1372 let mut resolution = 1.0_f64;
1373 for (key, value) in named_args {
1374 if key.eq_ignore_ascii_case("resolution") {
1375 if !value.is_finite() || *value <= 0.0 {
1376 return Err(RedDBError::Query(format!(
1377 "table function 'louvain' resolution must be > 0, got {value}"
1378 )));
1379 }
1380 resolution = *value;
1381 } else {
1382 return Err(RedDBError::Query(format!(
1383 "table function 'louvain' has no named argument '{key}' (expected 'resolution')"
1384 )));
1385 }
1386 }
1387 Ok(resolution)
1388}
1389
1390fn abstract_degree_centrality(
1395 nodes: &[String],
1396 edges: &[(
1397 String,
1398 String,
1399 crate::storage::engine::graph_algorithms::Weight,
1400 )],
1401) -> Vec<(String, usize)> {
1402 let mut degree: std::collections::BTreeMap<String, usize> = std::collections::BTreeMap::new();
1403 for n in nodes {
1404 degree.entry(n.clone()).or_insert(0);
1405 }
1406 for (a, b, _w) in edges {
1407 *degree.entry(a.clone()).or_insert(0) += 1;
1408 *degree.entry(b.clone()).or_insert(0) += 1;
1409 }
1410 degree.into_iter().collect()
1411}
1412
1413fn ordered_result_columns(result: &crate::storage::query::unified::UnifiedResult) -> Vec<String> {
1416 if !result.columns.is_empty() {
1417 return result.columns.clone();
1418 }
1419 result
1420 .records
1421 .first()
1422 .map(|record| {
1423 record
1424 .column_names()
1425 .iter()
1426 .map(|column| column.to_string())
1427 .collect()
1428 })
1429 .unwrap_or_default()
1430}
1431
1432fn value_to_node_id(value: &crate::storage::schema::Value) -> Option<String> {
1436 use crate::storage::schema::Value;
1437 match value {
1438 Value::Null => None,
1439 Value::Text(s) => Some(s.to_string()),
1440 Value::Integer(n) => Some(n.to_string()),
1441 Value::UnsignedInteger(n) => Some(n.to_string()),
1442 Value::NodeRef(s) => Some(s.clone()),
1443 other => Some(other.to_string()),
1444 }
1445}
1446
1447fn value_to_weight(value: &crate::storage::schema::Value) -> Option<f32> {
1449 use crate::storage::schema::Value;
1450 match value {
1451 Value::Float(f) => Some(*f as f32),
1452 Value::Integer(n) => Some(*n as f32),
1453 Value::UnsignedInteger(n) => Some(*n as f32),
1454 _ => None,
1455 }
1456}
1457
1458fn inline_node_ids(
1462 name: &str,
1463 result: &crate::storage::query::unified::UnifiedResult,
1464) -> RedDBResult<Vec<String>> {
1465 if result.records.is_empty() {
1466 return Ok(Vec::new());
1467 }
1468 let columns = ordered_result_columns(result);
1469 let Some(first_col) = columns.first() else {
1470 return Err(RedDBError::Query(format!(
1471 "table function '{name}' inline form: `nodes` subquery must project at least one column (the node id)"
1472 )));
1473 };
1474 let mut ids = Vec::with_capacity(result.records.len());
1475 for record in &result.records {
1476 if let Some(id) = record.get(first_col).and_then(value_to_node_id) {
1477 ids.push(id);
1478 }
1479 }
1480 Ok(ids)
1481}
1482
1483fn inline_edges(
1488 name: &str,
1489 result: &crate::storage::query::unified::UnifiedResult,
1490) -> RedDBResult<
1491 Vec<(
1492 String,
1493 String,
1494 crate::storage::engine::graph_algorithms::Weight,
1495 )>,
1496> {
1497 if result.records.is_empty() {
1498 return Ok(Vec::new());
1499 }
1500 let columns = ordered_result_columns(result);
1501 if columns.len() < 2 {
1502 return Err(RedDBError::Query(format!(
1503 "table function '{name}' inline form: `edges` subquery must project at least two columns (source, target), got {}",
1504 columns.len()
1505 )));
1506 }
1507 let src_col = &columns[0];
1508 let dst_col = &columns[1];
1509 let weight_col = columns.get(2);
1510 let mut edges = Vec::with_capacity(result.records.len());
1511 for record in &result.records {
1512 let (Some(src), Some(dst)) = (
1513 record.get(src_col).and_then(value_to_node_id),
1514 record.get(dst_col).and_then(value_to_node_id),
1515 ) else {
1516 continue;
1518 };
1519 let weight = match weight_col {
1520 Some(col) => match record.get(col) {
1521 None | Some(crate::storage::schema::Value::Null) => 1.0,
1522 Some(value) => value_to_weight(value).ok_or_else(|| {
1523 RedDBError::Query(format!(
1524 "table function '{name}' inline form: `edges` weight column must be numeric"
1525 ))
1526 })?,
1527 },
1528 None => 1.0,
1529 };
1530 edges.push((src, dst, weight));
1531 }
1532 Ok(edges)
1533}
1534
1535fn cache_scope_insert(scopes: &mut HashSet<String>, name: &str) {
1536 if name.is_empty() || name.starts_with("__subq_") || is_universal_query_source(name) {
1537 return;
1538 }
1539 scopes.insert(name.to_string());
1540}
1541
1542fn collect_table_source_scopes(scopes: &mut HashSet<String>, query: &TableQuery) {
1543 match query.source.as_ref() {
1544 Some(crate::storage::query::ast::TableSource::Name(name)) => {
1545 cache_scope_insert(scopes, name)
1546 }
1547 Some(crate::storage::query::ast::TableSource::Subquery(subquery)) => {
1548 collect_query_expr_result_cache_scopes(scopes, subquery);
1549 }
1550 Some(crate::storage::query::ast::TableSource::Function { name, args, .. }) => {
1557 if is_graph_tvf_name(name) {
1558 if let Some(graph) = args.first() {
1559 cache_scope_insert(scopes, graph);
1560 }
1561 }
1562 }
1563 Some(crate::storage::query::ast::TableSource::InlineGraphFunction {
1568 nodes, edges, ..
1569 }) => {
1570 collect_query_expr_result_cache_scopes(scopes, nodes);
1571 collect_query_expr_result_cache_scopes(scopes, edges);
1572 }
1573 None => cache_scope_insert(scopes, &query.table),
1574 }
1575}
1576
1577fn collect_vector_source_scopes(
1578 scopes: &mut HashSet<String>,
1579 source: &crate::storage::query::ast::VectorSource,
1580) {
1581 match source {
1582 crate::storage::query::ast::VectorSource::Reference { collection, .. } => {
1583 cache_scope_insert(scopes, collection);
1584 }
1585 crate::storage::query::ast::VectorSource::Subquery(subquery) => {
1586 collect_query_expr_result_cache_scopes(scopes, subquery);
1587 }
1588 crate::storage::query::ast::VectorSource::Literal(_)
1589 | crate::storage::query::ast::VectorSource::Text(_) => {}
1590 }
1591}
1592
1593fn collect_path_selector_scopes(
1594 scopes: &mut HashSet<String>,
1595 selector: &crate::storage::query::ast::NodeSelector,
1596) {
1597 if let crate::storage::query::ast::NodeSelector::ByRow { table, .. } = selector {
1598 cache_scope_insert(scopes, table);
1599 }
1600}
1601
1602fn collect_query_expr_result_cache_scopes(scopes: &mut HashSet<String>, expr: &QueryExpr) {
1603 match expr {
1604 QueryExpr::Table(query) => collect_table_source_scopes(scopes, query),
1605 QueryExpr::Join(query) => {
1606 collect_query_expr_result_cache_scopes(scopes, &query.left);
1607 collect_query_expr_result_cache_scopes(scopes, &query.right);
1608 }
1609 QueryExpr::Path(query) => {
1610 collect_path_selector_scopes(scopes, &query.from);
1611 collect_path_selector_scopes(scopes, &query.to);
1612 }
1613 QueryExpr::Vector(query) => {
1614 cache_scope_insert(scopes, &query.collection);
1615 collect_vector_source_scopes(scopes, &query.query_vector);
1616 }
1617 QueryExpr::Hybrid(query) => {
1618 collect_query_expr_result_cache_scopes(scopes, &query.structured);
1619 cache_scope_insert(scopes, &query.vector.collection);
1620 collect_vector_source_scopes(scopes, &query.vector.query_vector);
1621 }
1622 QueryExpr::Insert(query) => cache_scope_insert(scopes, &query.table),
1623 QueryExpr::Update(query) => cache_scope_insert(scopes, &query.table),
1624 QueryExpr::Delete(query) => cache_scope_insert(scopes, &query.table),
1625 QueryExpr::CreateTable(query) => cache_scope_insert(scopes, &query.name),
1626 QueryExpr::CreateCollection(query) => cache_scope_insert(scopes, &query.name),
1627 QueryExpr::CreateVector(query) => cache_scope_insert(scopes, &query.name),
1628 QueryExpr::DropTable(query) => cache_scope_insert(scopes, &query.name),
1629 QueryExpr::DropGraph(query) => cache_scope_insert(scopes, &query.name),
1630 QueryExpr::DropVector(query) => cache_scope_insert(scopes, &query.name),
1631 QueryExpr::DropDocument(query) => cache_scope_insert(scopes, &query.name),
1632 QueryExpr::DropKv(query) => cache_scope_insert(scopes, &query.name),
1633 QueryExpr::DropCollection(query) => cache_scope_insert(scopes, &query.name),
1634 QueryExpr::Truncate(query) => cache_scope_insert(scopes, &query.name),
1635 QueryExpr::AlterTable(query) => cache_scope_insert(scopes, &query.name),
1636 QueryExpr::CreateIndex(query) => cache_scope_insert(scopes, &query.table),
1637 QueryExpr::DropIndex(query) => cache_scope_insert(scopes, &query.table),
1638 QueryExpr::CreateTimeSeries(query) => cache_scope_insert(scopes, &query.name),
1639 QueryExpr::CreateMetric(query) => cache_scope_insert(scopes, &query.path),
1640 QueryExpr::AlterMetric(query) => cache_scope_insert(scopes, &query.path),
1641 QueryExpr::CreateSlo(query) => cache_scope_insert(scopes, &query.path),
1642 QueryExpr::DropTimeSeries(query) => cache_scope_insert(scopes, &query.name),
1643 QueryExpr::CreateQueue(query) => cache_scope_insert(scopes, &query.name),
1644 QueryExpr::AlterQueue(query) => cache_scope_insert(scopes, &query.name),
1645 QueryExpr::DropQueue(query) => cache_scope_insert(scopes, &query.name),
1646 QueryExpr::QueueSelect(query) => cache_scope_insert(scopes, &query.queue),
1647 QueryExpr::QueueCommand(query) => match query {
1648 QueueCommand::Push { queue, .. }
1649 | QueueCommand::Pop { queue, .. }
1650 | QueueCommand::Peek { queue, .. }
1651 | QueueCommand::Len { queue }
1652 | QueueCommand::Purge { queue }
1653 | QueueCommand::GroupCreate { queue, .. }
1654 | QueueCommand::GroupRead { queue, .. }
1655 | QueueCommand::Pending { queue, .. }
1656 | QueueCommand::Claim { queue, .. }
1657 | QueueCommand::Ack { queue, .. }
1658 | QueueCommand::Nack { queue, .. } => cache_scope_insert(scopes, queue),
1659 QueueCommand::Move {
1660 source,
1661 destination,
1662 ..
1663 } => {
1664 cache_scope_insert(scopes, source);
1665 cache_scope_insert(scopes, destination);
1666 }
1667 },
1668 QueryExpr::EventsBackfill(query) => {
1669 cache_scope_insert(scopes, &query.collection);
1670 cache_scope_insert(scopes, &query.target_queue);
1671 }
1672 QueryExpr::CreateTree(query) => cache_scope_insert(scopes, &query.collection),
1673 QueryExpr::DropTree(query) => cache_scope_insert(scopes, &query.collection),
1674 QueryExpr::TreeCommand(query) => match query {
1675 TreeCommand::Insert { collection, .. }
1676 | TreeCommand::Move { collection, .. }
1677 | TreeCommand::Delete { collection, .. }
1678 | TreeCommand::Validate { collection, .. }
1679 | TreeCommand::Rebalance { collection, .. } => cache_scope_insert(scopes, collection),
1680 },
1681 QueryExpr::SearchCommand(query) => match query {
1682 SearchCommand::Similar { collection, .. }
1683 | SearchCommand::Hybrid { collection, .. }
1684 | SearchCommand::SpatialRadius { collection, .. }
1685 | SearchCommand::SpatialBbox { collection, .. }
1686 | SearchCommand::SpatialNearest { collection, .. } => {
1687 cache_scope_insert(scopes, collection);
1688 }
1689 SearchCommand::Text { collection, .. }
1690 | SearchCommand::Multimodal { collection, .. }
1691 | SearchCommand::Index { collection, .. }
1692 | SearchCommand::Context { collection, .. } => {
1693 if let Some(collection) = collection.as_deref() {
1694 cache_scope_insert(scopes, collection);
1695 }
1696 }
1697 },
1698 QueryExpr::Ask(query) => {
1699 if let Some(collection) = query.collection.as_deref() {
1700 cache_scope_insert(scopes, collection);
1701 }
1702 }
1703 QueryExpr::ExplainAlter(query) => cache_scope_insert(scopes, &query.target.name),
1704 QueryExpr::MaintenanceCommand(cmd) => match cmd {
1705 crate::storage::query::ast::MaintenanceCommand::Vacuum { target, .. }
1706 | crate::storage::query::ast::MaintenanceCommand::Analyze { target } => {
1707 if let Some(t) = target {
1708 cache_scope_insert(scopes, t);
1709 }
1710 }
1711 },
1712 QueryExpr::CopyFrom(cmd) => cache_scope_insert(scopes, &cmd.table),
1713 QueryExpr::CreateView(cmd) => {
1714 cache_scope_insert(scopes, &cmd.name);
1715 collect_query_expr_result_cache_scopes(scopes, &cmd.query);
1717 }
1718 QueryExpr::DropView(cmd) => cache_scope_insert(scopes, &cmd.name),
1719 QueryExpr::RefreshMaterializedView(cmd) => cache_scope_insert(scopes, &cmd.name),
1720 QueryExpr::CreatePolicy(cmd) => cache_scope_insert(scopes, &cmd.table),
1721 QueryExpr::DropPolicy(cmd) => cache_scope_insert(scopes, &cmd.table),
1722 QueryExpr::CreateServer(_) | QueryExpr::DropServer(_) => {}
1723 QueryExpr::CreateForeignTable(cmd) => cache_scope_insert(scopes, &cmd.name),
1724 QueryExpr::DropForeignTable(cmd) => cache_scope_insert(scopes, &cmd.name),
1725 QueryExpr::Graph(_)
1726 | QueryExpr::GraphCommand(_)
1727 | QueryExpr::ProbabilisticCommand(_)
1728 | QueryExpr::SetConfig { .. }
1729 | QueryExpr::ShowConfig { .. }
1730 | QueryExpr::SetSecret { .. }
1731 | QueryExpr::DeleteSecret { .. }
1732 | QueryExpr::ShowSecrets { .. }
1733 | QueryExpr::SetTenant(_)
1734 | QueryExpr::ShowTenant
1735 | QueryExpr::TransactionControl(_)
1736 | QueryExpr::CreateSchema(_)
1737 | QueryExpr::DropSchema(_)
1738 | QueryExpr::CreateSequence(_)
1739 | QueryExpr::DropSequence(_)
1740 | QueryExpr::Grant(_)
1741 | QueryExpr::Revoke(_)
1742 | QueryExpr::AlterUser(_)
1743 | QueryExpr::CreateIamPolicy { .. }
1744 | QueryExpr::DropIamPolicy { .. }
1745 | QueryExpr::AttachPolicy { .. }
1746 | QueryExpr::DetachPolicy { .. }
1747 | QueryExpr::ShowPolicies { .. }
1748 | QueryExpr::ShowEffectivePermissions { .. }
1749 | QueryExpr::SimulatePolicy { .. }
1750 | QueryExpr::LintPolicy { .. }
1751 | QueryExpr::MigratePolicyMode { .. }
1752 | QueryExpr::CreateMigration(_)
1753 | QueryExpr::ApplyMigration(_)
1754 | QueryExpr::RollbackMigration(_)
1755 | QueryExpr::ExplainMigration(_)
1756 | QueryExpr::EventsBackfillStatus { .. } => {}
1757 QueryExpr::KvCommand(cmd) => {
1758 use crate::storage::query::ast::KvCommand;
1759 match cmd {
1760 KvCommand::Put { collection, .. }
1761 | KvCommand::InvalidateTags { collection, .. }
1762 | KvCommand::Get { collection, .. }
1763 | KvCommand::Unseal { collection, .. }
1764 | KvCommand::Rotate { collection, .. }
1765 | KvCommand::History { collection, .. }
1766 | KvCommand::List { collection, .. }
1767 | KvCommand::Purge { collection, .. }
1768 | KvCommand::Watch { collection, .. }
1769 | KvCommand::Delete { collection, .. }
1770 | KvCommand::Incr { collection, .. }
1771 | KvCommand::Cas { collection, .. } => cache_scope_insert(scopes, collection),
1772 }
1773 }
1774 QueryExpr::ConfigCommand(cmd) => {
1775 use crate::storage::query::ast::ConfigCommand;
1776 match cmd {
1777 ConfigCommand::Put { collection, .. }
1778 | ConfigCommand::Get { collection, .. }
1779 | ConfigCommand::Resolve { collection, .. }
1780 | ConfigCommand::Rotate { collection, .. }
1781 | ConfigCommand::Delete { collection, .. }
1782 | ConfigCommand::History { collection, .. }
1783 | ConfigCommand::List { collection, .. }
1784 | ConfigCommand::Watch { collection, .. }
1785 | ConfigCommand::InvalidVolatileOperation { collection, .. } => {
1786 cache_scope_insert(scopes, collection)
1787 }
1788 }
1789 }
1790 }
1791}
1792
1793pub(crate) fn rls_policy_filter(
1801 runtime: &RedDBRuntime,
1802 table: &str,
1803 action: crate::storage::query::ast::PolicyAction,
1804) -> Option<crate::storage::query::ast::Filter> {
1805 rls_policy_filter_for_kind(
1806 runtime,
1807 table,
1808 action,
1809 crate::storage::query::ast::PolicyTargetKind::Table,
1810 )
1811}
1812
1813pub(crate) fn rls_policy_filter_for_kind(
1819 runtime: &RedDBRuntime,
1820 table: &str,
1821 action: crate::storage::query::ast::PolicyAction,
1822 kind: crate::storage::query::ast::PolicyTargetKind,
1823) -> Option<crate::storage::query::ast::Filter> {
1824 use crate::storage::query::ast::Filter;
1825
1826 if !runtime.inner.rls_enabled_tables.read().contains(table) {
1827 return None;
1828 }
1829 let role = current_auth_identity().map(|(_, role)| role);
1830 let role_str = role.map(|r| r.as_str().to_string());
1831 let policies = runtime.matching_rls_policies_for_kind(table, role_str.as_deref(), action, kind);
1832 if policies.is_empty() {
1833 return None;
1834 }
1835 policies
1836 .into_iter()
1837 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1838}
1839
1840pub(crate) fn rls_is_enabled(runtime: &RedDBRuntime, table: &str) -> bool {
1844 runtime.inner.rls_enabled_tables.read().contains(table)
1845}
1846
1847fn node_passes_rls(
1854 runtime: &RedDBRuntime,
1855 collection: &str,
1856 role: Option<&str>,
1857 cache: &mut std::collections::HashMap<String, Option<crate::storage::query::ast::Filter>>,
1858 entity: &crate::storage::unified::entity::UnifiedEntity,
1859) -> bool {
1860 use crate::storage::query::ast::{Filter, PolicyAction, PolicyTargetKind};
1861
1862 if !runtime.inner.rls_enabled_tables.read().contains(collection) {
1863 return true;
1864 }
1865 let filter = cache.entry(collection.to_string()).or_insert_with(|| {
1866 let policies = runtime.matching_rls_policies_for_kind(
1867 collection,
1868 role,
1869 PolicyAction::Select,
1870 PolicyTargetKind::Nodes,
1871 );
1872 if policies.is_empty() {
1873 None
1874 } else {
1875 policies
1876 .into_iter()
1877 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1878 }
1879 });
1880 let Some(filter) = filter else {
1881 return false;
1882 };
1883 crate::runtime::query_exec::evaluate_entity_filter_with_db(
1884 Some(&runtime.inner.db),
1885 entity,
1886 filter,
1887 collection,
1888 collection,
1889 )
1890}
1891
1892fn edge_passes_rls(
1895 runtime: &RedDBRuntime,
1896 collection: &str,
1897 role: Option<&str>,
1898 cache: &mut std::collections::HashMap<String, Option<crate::storage::query::ast::Filter>>,
1899 entity: &crate::storage::unified::entity::UnifiedEntity,
1900) -> bool {
1901 use crate::storage::query::ast::{Filter, PolicyAction, PolicyTargetKind};
1902
1903 if !runtime.inner.rls_enabled_tables.read().contains(collection) {
1904 return true;
1905 }
1906 let filter = cache.entry(collection.to_string()).or_insert_with(|| {
1907 let policies = runtime.matching_rls_policies_for_kind(
1908 collection,
1909 role,
1910 PolicyAction::Select,
1911 PolicyTargetKind::Edges,
1912 );
1913 if policies.is_empty() {
1914 None
1915 } else {
1916 policies
1917 .into_iter()
1918 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1919 }
1920 });
1921 let Some(filter) = filter else {
1922 return false;
1923 };
1924 crate::runtime::query_exec::evaluate_entity_filter_with_db(
1925 Some(&runtime.inner.db),
1926 entity,
1927 filter,
1928 collection,
1929 collection,
1930 )
1931}
1932
1933fn inject_rls_filters(
1954 runtime: &RedDBRuntime,
1955 frame: &dyn super::statement_frame::ReadFrame,
1956 mut table: crate::storage::query::ast::TableQuery,
1957) -> Option<crate::storage::query::ast::TableQuery> {
1958 use crate::storage::query::ast::{Filter, PolicyAction};
1959
1960 let role = frame.identity().map(|(_, role)| role);
1962 let role_str = role.map(|r| r.as_str().to_string());
1963 let policies =
1964 runtime.matching_rls_policies(&table.table, role_str.as_deref(), PolicyAction::Select);
1965
1966 if policies.is_empty() {
1967 return None;
1970 }
1971
1972 let combined = policies
1974 .into_iter()
1975 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
1976 .expect("policies non-empty");
1977
1978 use crate::storage::query::sql_lowering::{expr_to_filter, filter_to_expr};
1987 let had_where_expr = table.where_expr.is_some();
1988 let existing = table
1989 .filter
1990 .take()
1991 .or_else(|| table.where_expr.as_ref().map(expr_to_filter));
1992 let new_filter = match existing {
1993 Some(existing) => Filter::And(Box::new(existing), Box::new(combined)),
1994 None => combined,
1995 };
1996 if had_where_expr {
1999 table.where_expr = Some(filter_to_expr(&new_filter));
2000 }
2001 table.filter = Some(new_filter);
2002 Some(table)
2003}
2004
2005fn inject_rls_into_join(
2015 runtime: &RedDBRuntime,
2016 frame: &dyn super::statement_frame::ReadFrame,
2017 mut join: crate::storage::query::ast::JoinQuery,
2018) -> Option<crate::storage::query::ast::JoinQuery> {
2019 use crate::storage::query::ast::Filter;
2020
2021 let mut policy_filters: Vec<Filter> = Vec::new();
2022 if !collect_join_side_policy(runtime, frame, join.left.as_ref(), &mut policy_filters) {
2023 return None;
2024 }
2025 if !collect_join_side_policy(runtime, frame, join.right.as_ref(), &mut policy_filters) {
2026 return None;
2027 }
2028
2029 if policy_filters.is_empty() {
2030 return Some(join);
2031 }
2032
2033 let combined = policy_filters
2034 .into_iter()
2035 .reduce(|acc, f| Filter::And(Box::new(acc), Box::new(f)))
2036 .expect("policy_filters non-empty");
2037
2038 join.filter = Some(match join.filter.take() {
2039 Some(existing) => Filter::And(Box::new(existing), Box::new(combined)),
2040 None => combined,
2041 });
2042
2043 Some(join)
2044}
2045
2046fn collect_join_side_policy(
2051 runtime: &RedDBRuntime,
2052 frame: &dyn super::statement_frame::ReadFrame,
2053 expr: &crate::storage::query::ast::QueryExpr,
2054 out: &mut Vec<crate::storage::query::ast::Filter>,
2055) -> bool {
2056 use crate::storage::query::ast::{Filter, PolicyAction, QueryExpr};
2057 match expr {
2058 QueryExpr::Table(t) => {
2059 if !runtime.inner.rls_enabled_tables.read().contains(&t.table) {
2060 return true;
2061 }
2062 let role = frame.identity().map(|(_, role)| role);
2063 let role_str = role.map(|r| r.as_str().to_string());
2064 let policies =
2065 runtime.matching_rls_policies(&t.table, role_str.as_deref(), PolicyAction::Select);
2066 if policies.is_empty() {
2067 return false;
2068 }
2069 let combined = policies
2070 .into_iter()
2071 .reduce(|acc, f| Filter::Or(Box::new(acc), Box::new(f)))
2072 .expect("policies non-empty");
2073 out.push(combined);
2074 true
2075 }
2076 QueryExpr::Join(inner) => {
2077 collect_join_side_policy(runtime, frame, inner.left.as_ref(), out)
2078 && collect_join_side_policy(runtime, frame, inner.right.as_ref(), out)
2079 }
2080 _ => true,
2081 }
2082}
2083
2084fn apply_foreign_table_filters(
2095 records: Vec<crate::storage::query::unified::UnifiedRecord>,
2096 query: &crate::storage::query::ast::TableQuery,
2097) -> crate::storage::query::unified::UnifiedResult {
2098 use crate::storage::query::sql_lowering::{
2099 effective_table_filter, effective_table_projections,
2100 };
2101 use crate::storage::query::unified::UnifiedResult;
2102
2103 let filter = effective_table_filter(query);
2104 let projections = effective_table_projections(query);
2105
2106 let mut filtered: Vec<_> = records
2109 .into_iter()
2110 .filter(|record| match &filter {
2111 Some(f) => {
2112 super::join_filter::evaluate_runtime_filter_with_db(None, record, f, None, None)
2113 }
2114 None => true,
2115 })
2116 .collect();
2117
2118 if let Some(offset) = query.offset {
2120 let offset = offset as usize;
2121 if offset >= filtered.len() {
2122 filtered.clear();
2123 } else {
2124 filtered.drain(0..offset);
2125 }
2126 }
2127 if let Some(limit) = query.limit {
2128 filtered.truncate(limit as usize);
2129 }
2130
2131 let columns: Vec<String> = if projections.is_empty() {
2134 filtered
2135 .first()
2136 .map(|r| r.column_names().iter().map(|k| k.to_string()).collect())
2137 .unwrap_or_default()
2138 } else {
2139 projections
2140 .iter()
2141 .map(super::join_filter::projection_name)
2142 .collect()
2143 };
2144
2145 let mut result = UnifiedResult::empty();
2146 result.columns = columns;
2147 result.records = filtered;
2148 result
2149}
2150
2151pub(crate) fn collect_table_refs(expr: &QueryExpr) -> Vec<String> {
2158 let mut scopes: HashSet<String> = HashSet::new();
2159 collect_query_expr_result_cache_scopes(&mut scopes, expr);
2160 scopes.into_iter().collect()
2161}
2162
2163fn query_expr_result_cache_scopes(expr: &QueryExpr) -> HashSet<String> {
2164 let mut scopes = HashSet::new();
2165 collect_query_expr_result_cache_scopes(&mut scopes, expr);
2166 scopes
2167}
2168
2169const RESULT_CACHE_BACKEND_KEY: &str = "runtime.result_cache.backend";
2170const RESULT_CACHE_DEFAULT_BACKEND: &str = "legacy";
2171const RESULT_CACHE_BLOB_NAMESPACE: &str = "runtime.result_cache";
2172const RESULT_CACHE_TTL_SECS: u64 = 30;
2176const RESULT_CACHE_MAX_ENTRIES: usize = 1000;
2177const RESULT_CACHE_ENABLED_KEY: &str = "runtime.result_cache.enabled";
2178const RESULT_CACHE_TTL_KEY: &str = "runtime.result_cache.ttl_seconds";
2179const RESULT_CACHE_CAPACITY_KEY: &str = "runtime.result_cache.capacity_entries";
2180const RESULT_CACHE_PAYLOAD_MAGIC: &[u8; 8] = b"RDRC0001";
2181
2182#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2183enum RuntimeResultCacheBackend {
2184 Legacy,
2185 BlobCache,
2186 Shadow,
2187}
2188
2189fn trim_result_cache(
2193 map: &mut HashMap<String, RuntimeResultCacheEntry>,
2194 order: &mut std::collections::VecDeque<String>,
2195 max_entries: usize,
2196) -> u64 {
2197 let mut evicted = 0u64;
2198 while map.len() > max_entries {
2199 if let Some(oldest) = order.pop_front() {
2200 if map.remove(&oldest).is_some() {
2201 evicted += 1;
2202 }
2203 } else {
2204 break;
2205 }
2206 }
2207 evicted
2208}
2209
2210fn result_cache_fingerprint(result: &RuntimeQueryResult) -> String {
2211 format!(
2212 "{:?}|{}|{}|{}|{}|{:?}",
2213 result.result,
2214 result.query,
2215 result.statement,
2216 result.engine,
2217 result.affected_rows,
2218 result.statement_type
2219 )
2220}
2221
2222fn mode_to_byte(mode: crate::storage::query::modes::QueryMode) -> u8 {
2223 match mode {
2224 crate::storage::query::modes::QueryMode::Sql => 0,
2225 crate::storage::query::modes::QueryMode::Gremlin => 1,
2226 crate::storage::query::modes::QueryMode::Cypher => 2,
2227 crate::storage::query::modes::QueryMode::Sparql => 3,
2228 crate::storage::query::modes::QueryMode::Path => 4,
2229 crate::storage::query::modes::QueryMode::Natural => 5,
2230 crate::storage::query::modes::QueryMode::Unknown => 255,
2231 }
2232}
2233
2234fn mode_from_byte(byte: u8) -> Option<crate::storage::query::modes::QueryMode> {
2235 match byte {
2236 0 => Some(crate::storage::query::modes::QueryMode::Sql),
2237 1 => Some(crate::storage::query::modes::QueryMode::Gremlin),
2238 2 => Some(crate::storage::query::modes::QueryMode::Cypher),
2239 3 => Some(crate::storage::query::modes::QueryMode::Sparql),
2240 4 => Some(crate::storage::query::modes::QueryMode::Path),
2241 5 => Some(crate::storage::query::modes::QueryMode::Natural),
2242 255 => Some(crate::storage::query::modes::QueryMode::Unknown),
2243 _ => None,
2244 }
2245}
2246
2247fn result_cache_static_str(value: &str) -> Option<&'static str> {
2248 match value {
2249 "select" => Some("select"),
2250 "materialized-graph" => Some("materialized-graph"),
2251 "runtime-red-schema" => Some("runtime-red-schema"),
2252 "runtime-fdw" => Some("runtime-fdw"),
2253 "runtime-table-rls" => Some("runtime-table-rls"),
2254 "runtime-table" => Some("runtime-table"),
2255 "runtime-join-rls" => Some("runtime-join-rls"),
2256 "runtime-join" => Some("runtime-join"),
2257 "runtime-vector" => Some("runtime-vector"),
2258 "runtime-hybrid" => Some("runtime-hybrid"),
2259 "runtime-secret" => Some("runtime-secret"),
2260 "runtime-config" => Some("runtime-config"),
2261 "runtime-tenant" => Some("runtime-tenant"),
2262 "runtime-explain" => Some("runtime-explain"),
2263 "runtime-tree" => Some("runtime-tree"),
2264 "runtime-kv" => Some("runtime-kv"),
2265 "runtime-queue" => Some("runtime-queue"),
2266 _ => None,
2267 }
2268}
2269
2270fn write_u32(out: &mut Vec<u8>, value: usize) -> Option<()> {
2271 let value = u32::try_from(value).ok()?;
2272 out.extend_from_slice(&value.to_le_bytes());
2273 Some(())
2274}
2275
2276fn write_string(out: &mut Vec<u8>, value: &str) -> Option<()> {
2277 write_u32(out, value.len())?;
2278 out.extend_from_slice(value.as_bytes());
2279 Some(())
2280}
2281
2282fn write_bytes(out: &mut Vec<u8>, value: &[u8]) -> Option<()> {
2283 write_u32(out, value.len())?;
2284 out.extend_from_slice(value);
2285 Some(())
2286}
2287
2288fn read_u8(input: &mut &[u8]) -> Option<u8> {
2289 let (&value, rest) = input.split_first()?;
2290 *input = rest;
2291 Some(value)
2292}
2293
2294fn read_u32(input: &mut &[u8]) -> Option<usize> {
2295 if input.len() < 4 {
2296 return None;
2297 }
2298 let value = u32::from_le_bytes(input[..4].try_into().ok()?) as usize;
2299 *input = &input[4..];
2300 Some(value)
2301}
2302
2303fn read_u64(input: &mut &[u8]) -> Option<u64> {
2304 if input.len() < 8 {
2305 return None;
2306 }
2307 let value = u64::from_le_bytes(input[..8].try_into().ok()?);
2308 *input = &input[8..];
2309 Some(value)
2310}
2311
2312fn read_string(input: &mut &[u8]) -> Option<String> {
2313 let len = read_u32(input)?;
2314 if input.len() < len {
2315 return None;
2316 }
2317 let value = String::from_utf8(input[..len].to_vec()).ok()?;
2318 *input = &input[len..];
2319 Some(value)
2320}
2321
2322fn read_bytes<'a>(input: &mut &'a [u8]) -> Option<&'a [u8]> {
2323 let len = read_u32(input)?;
2324 if input.len() < len {
2325 return None;
2326 }
2327 let value = &input[..len];
2328 *input = &input[len..];
2329 Some(value)
2330}
2331
2332fn encode_result_cache_payload(entry: &RuntimeResultCacheEntry) -> Option<Vec<u8>> {
2333 let result = &entry.result;
2334 if result.result.pre_serialized_json.is_some()
2335 || result_cache_static_str(result.statement).is_none()
2336 || result_cache_static_str(result.engine).is_none()
2337 || result_cache_static_str(result.statement_type).is_none()
2338 || result.result.records.iter().any(|record| {
2339 !record.nodes.is_empty()
2340 || !record.edges.is_empty()
2341 || !record.paths.is_empty()
2342 || !record.vector_results.is_empty()
2343 })
2344 {
2345 return None;
2346 }
2347
2348 let mut out = Vec::new();
2349 out.extend_from_slice(RESULT_CACHE_PAYLOAD_MAGIC);
2350 write_string(&mut out, &result.query)?;
2351 out.push(mode_to_byte(result.mode));
2352 write_string(&mut out, result.statement)?;
2353 write_string(&mut out, result.engine)?;
2354 out.extend_from_slice(&result.affected_rows.to_le_bytes());
2355 write_string(&mut out, result.statement_type)?;
2356
2357 write_u32(&mut out, result.result.columns.len())?;
2358 for column in &result.result.columns {
2359 write_string(&mut out, column)?;
2360 }
2361 out.extend_from_slice(&result.result.stats.nodes_scanned.to_le_bytes());
2362 out.extend_from_slice(&result.result.stats.edges_scanned.to_le_bytes());
2363 out.extend_from_slice(&result.result.stats.rows_scanned.to_le_bytes());
2364 out.extend_from_slice(&result.result.stats.exec_time_us.to_le_bytes());
2365
2366 write_u32(&mut out, result.result.records.len())?;
2367 for record in &result.result.records {
2368 let fields = record.iter_fields().collect::<Vec<_>>();
2369 write_u32(&mut out, fields.len())?;
2370 for (name, value) in fields {
2371 write_string(&mut out, name)?;
2372 let mut encoded = Vec::new();
2373 crate::storage::schema::value_codec::encode(value, &mut encoded);
2374 write_bytes(&mut out, &encoded)?;
2375 }
2376 }
2377
2378 write_u32(&mut out, entry.scopes.len())?;
2379 for scope in &entry.scopes {
2380 write_string(&mut out, scope)?;
2381 }
2382 Some(out)
2383}
2384
2385fn decode_result_cache_payload(mut input: &[u8]) -> Option<(RuntimeQueryResult, HashSet<String>)> {
2386 if input.len() < RESULT_CACHE_PAYLOAD_MAGIC.len()
2387 || &input[..RESULT_CACHE_PAYLOAD_MAGIC.len()] != RESULT_CACHE_PAYLOAD_MAGIC
2388 {
2389 return None;
2390 }
2391 input = &input[RESULT_CACHE_PAYLOAD_MAGIC.len()..];
2392
2393 let query = read_string(&mut input)?;
2394 let mode = mode_from_byte(read_u8(&mut input)?)?;
2395 let statement = result_cache_static_str(&read_string(&mut input)?)?;
2396 let engine = result_cache_static_str(&read_string(&mut input)?)?;
2397 let affected_rows = read_u64(&mut input)?;
2398 let statement_type = result_cache_static_str(&read_string(&mut input)?)?;
2399
2400 let mut columns = Vec::new();
2401 for _ in 0..read_u32(&mut input)? {
2402 columns.push(read_string(&mut input)?);
2403 }
2404 let stats = crate::storage::query::unified::QueryStats {
2405 nodes_scanned: read_u64(&mut input)?,
2406 edges_scanned: read_u64(&mut input)?,
2407 rows_scanned: read_u64(&mut input)?,
2408 exec_time_us: read_u64(&mut input)?,
2409 };
2410
2411 let mut records = Vec::new();
2412 for _ in 0..read_u32(&mut input)? {
2413 let mut record = crate::storage::query::unified::UnifiedRecord::new();
2414 for _ in 0..read_u32(&mut input)? {
2415 let name = read_string(&mut input)?;
2416 let bytes = read_bytes(&mut input)?;
2417 let (value, used) = crate::storage::schema::value_codec::decode(bytes).ok()?;
2418 if used != bytes.len() {
2419 return None;
2420 }
2421 record.set_owned(name, value);
2422 }
2423 records.push(record);
2424 }
2425
2426 let mut scopes = HashSet::new();
2427 for _ in 0..read_u32(&mut input)? {
2428 scopes.insert(read_string(&mut input)?);
2429 }
2430 if !input.is_empty() {
2431 return None;
2432 }
2433
2434 Some((
2435 RuntimeQueryResult {
2436 query,
2437 mode,
2438 statement,
2439 engine,
2440 result: crate::storage::query::unified::UnifiedResult {
2441 columns,
2442 records,
2443 stats,
2444 pre_serialized_json: None,
2445 },
2446 affected_rows,
2447 statement_type,
2448 bookmark: None,
2449 },
2450 scopes,
2451 ))
2452}
2453
2454fn strip_explain_prefix(sql: &str) -> Option<&str> {
2468 let trimmed = sql.trim_start();
2469 let (head, rest) = trimmed.split_at(
2470 trimmed
2471 .find(|c: char| c.is_whitespace())
2472 .unwrap_or(trimmed.len()),
2473 );
2474 if !head.eq_ignore_ascii_case("EXPLAIN") {
2475 return None;
2476 }
2477 let rest = rest.trim_start();
2478 if rest.is_empty() {
2479 return None;
2480 }
2481 let next_head_end = rest.find(|c: char| c.is_whitespace()).unwrap_or(rest.len());
2485 if rest[..next_head_end].eq_ignore_ascii_case("ALTER")
2486 || rest[..next_head_end].eq_ignore_ascii_case("ASK")
2487 {
2488 return None;
2489 }
2490 Some(rest)
2491}
2492
2493pub(super) fn has_with_prefix(sql: &str) -> bool {
2498 let trimmed = sql.trim_start();
2499 let head_end = trimmed
2500 .find(|c: char| c.is_whitespace() || c == '(')
2501 .unwrap_or(trimmed.len());
2502 trimmed[..head_end].eq_ignore_ascii_case("WITH")
2503}
2504
2505fn peek_top_level_as_of(sql: &str) -> Option<crate::application::vcs::AsOfSpec> {
2513 peek_top_level_as_of_with_table(sql).map(|(spec, _)| spec)
2514}
2515
2516pub(super) fn peek_top_level_as_of_with_table(
2521 sql: &str,
2522) -> Option<(crate::application::vcs::AsOfSpec, Option<String>)> {
2523 if !sql
2524 .as_bytes()
2525 .windows(5)
2526 .any(|w| w.eq_ignore_ascii_case(b"as of"))
2527 {
2528 return None;
2529 }
2530 let parsed = crate::storage::query::parser::parse(sql).ok()?;
2531 let crate::storage::query::ast::QueryExpr::Table(table) = parsed.query else {
2532 return None;
2533 };
2534 let clause = table.as_of?;
2535 let table_name = if table.table.is_empty() || table.table == "any" {
2536 None
2537 } else {
2538 Some(table.table.clone())
2539 };
2540 let spec = match clause {
2541 crate::storage::query::ast::AsOfClause::Commit(h) => {
2542 crate::application::vcs::AsOfSpec::Commit(h)
2543 }
2544 crate::storage::query::ast::AsOfClause::Branch(b) => {
2545 crate::application::vcs::AsOfSpec::Branch(b)
2546 }
2547 crate::storage::query::ast::AsOfClause::Tag(t) => crate::application::vcs::AsOfSpec::Tag(t),
2548 crate::storage::query::ast::AsOfClause::TimestampMs(ts) => {
2549 crate::application::vcs::AsOfSpec::TimestampMs(ts)
2550 }
2551 crate::storage::query::ast::AsOfClause::Snapshot(x) => {
2552 crate::application::vcs::AsOfSpec::Snapshot(x)
2553 }
2554 };
2555 Some((spec, table_name))
2556}
2557
2558pub(super) fn query_has_volatile_builtin(sql: &str) -> bool {
2559 const VOLATILE_TOKENS: &[&str] = &[
2563 "pg_advisory_lock",
2564 "pg_try_advisory_lock",
2565 "pg_advisory_unlock",
2566 "random()",
2567 ];
2572 let lowered = sql.to_ascii_lowercase();
2573 VOLATILE_TOKENS.iter().any(|t| lowered.contains(t))
2574}
2575
2576pub(super) fn query_is_ask_statement(sql: &str) -> bool {
2577 let trimmed = sql.trim_start();
2578 let head_end = trimmed
2579 .find(|c: char| c.is_whitespace() || c == '(' || c == ';')
2580 .unwrap_or(trimmed.len());
2581 trimmed[..head_end].eq_ignore_ascii_case("ASK")
2582}
2583
2584pub(super) fn intent_lock_modes_for(
2594 expr: &QueryExpr,
2595) -> Option<(
2596 crate::storage::transaction::lock::LockMode,
2597 crate::storage::transaction::lock::LockMode,
2598)> {
2599 use crate::storage::transaction::lock::LockMode::{Exclusive, IntentExclusive, IntentShared};
2600
2601 match expr {
2602 QueryExpr::Table(_)
2604 | QueryExpr::Join(_)
2605 | QueryExpr::Vector(_)
2606 | QueryExpr::Hybrid(_)
2607 | QueryExpr::Graph(_)
2608 | QueryExpr::Path(_)
2609 | QueryExpr::Ask(_)
2610 | QueryExpr::SearchCommand(_)
2611 | QueryExpr::GraphCommand(_)
2612 | QueryExpr::QueueSelect(_) => Some((IntentShared, IntentShared)),
2613
2614 QueryExpr::Insert(_)
2622 | QueryExpr::Update(_)
2623 | QueryExpr::Delete(_)
2624 | QueryExpr::QueueCommand(QueueCommand::Move { .. }) => {
2625 Some((IntentExclusive, IntentExclusive))
2626 }
2627 QueryExpr::QueueCommand(_) => Some((IntentShared, IntentShared)),
2628
2629 QueryExpr::CreateTable(_)
2633 | QueryExpr::CreateCollection(_)
2634 | QueryExpr::CreateVector(_)
2635 | QueryExpr::DropTable(_)
2636 | QueryExpr::DropGraph(_)
2637 | QueryExpr::DropVector(_)
2638 | QueryExpr::DropDocument(_)
2639 | QueryExpr::DropKv(_)
2640 | QueryExpr::DropCollection(_)
2641 | QueryExpr::Truncate(_)
2642 | QueryExpr::AlterTable(_)
2643 | QueryExpr::CreateIndex(_)
2644 | QueryExpr::DropIndex(_)
2645 | QueryExpr::CreateTimeSeries(_)
2646 | QueryExpr::CreateMetric(_)
2647 | QueryExpr::AlterMetric(_)
2648 | QueryExpr::CreateSlo(_)
2649 | QueryExpr::DropTimeSeries(_)
2650 | QueryExpr::CreateQueue(_)
2651 | QueryExpr::AlterQueue(_)
2652 | QueryExpr::DropQueue(_)
2653 | QueryExpr::CreateTree(_)
2654 | QueryExpr::DropTree(_)
2655 | QueryExpr::CreatePolicy(_)
2656 | QueryExpr::DropPolicy(_)
2657 | QueryExpr::CreateView(_)
2658 | QueryExpr::DropView(_)
2659 | QueryExpr::RefreshMaterializedView(_)
2660 | QueryExpr::CreateSchema(_)
2661 | QueryExpr::DropSchema(_)
2662 | QueryExpr::CreateSequence(_)
2663 | QueryExpr::DropSequence(_)
2664 | QueryExpr::CreateServer(_)
2665 | QueryExpr::DropServer(_)
2666 | QueryExpr::CreateForeignTable(_)
2667 | QueryExpr::DropForeignTable(_) => Some((IntentExclusive, Exclusive)),
2668
2669 _ => None,
2675 }
2676}
2677
2678pub(super) fn collections_referenced(expr: &QueryExpr) -> Vec<String> {
2683 let mut out = Vec::new();
2684 walk_collections(expr, &mut out);
2685 out.sort();
2686 out.dedup();
2687 out
2688}
2689
2690fn walk_collections(expr: &QueryExpr, out: &mut Vec<String>) {
2691 match expr {
2692 QueryExpr::Table(t) => out.push(t.table.clone()),
2693 QueryExpr::Join(j) => {
2694 walk_collections(&j.left, out);
2695 walk_collections(&j.right, out);
2696 }
2697 QueryExpr::Insert(i) => out.push(i.table.clone()),
2698 QueryExpr::Update(u) => out.push(u.table.clone()),
2699 QueryExpr::Delete(d) => out.push(d.table.clone()),
2700 QueryExpr::QueueSelect(q) => out.push(q.queue.clone()),
2701
2702 QueryExpr::CreateTable(q) => out.push(q.name.clone()),
2707 QueryExpr::CreateCollection(q) => out.push(q.name.clone()),
2708 QueryExpr::CreateVector(q) => out.push(q.name.clone()),
2709 QueryExpr::DropTable(q) => out.push(q.name.clone()),
2710 QueryExpr::DropGraph(q) => out.push(q.name.clone()),
2711 QueryExpr::DropVector(q) => out.push(q.name.clone()),
2712 QueryExpr::DropDocument(q) => out.push(q.name.clone()),
2713 QueryExpr::DropKv(q) => out.push(q.name.clone()),
2714 QueryExpr::DropCollection(q) => out.push(q.name.clone()),
2715 QueryExpr::Truncate(q) => out.push(q.name.clone()),
2716 QueryExpr::AlterTable(q) => out.push(q.name.clone()),
2717 QueryExpr::CreateIndex(q) => out.push(q.table.clone()),
2718 QueryExpr::DropIndex(q) => out.push(q.table.clone()),
2719 QueryExpr::CreateTimeSeries(q) => out.push(q.name.clone()),
2720 QueryExpr::CreateMetric(q) => out.push(q.path.clone()),
2721 QueryExpr::AlterMetric(q) => out.push(q.path.clone()),
2722 QueryExpr::CreateSlo(q) => out.push(q.path.clone()),
2723 QueryExpr::DropTimeSeries(q) => out.push(q.name.clone()),
2724 QueryExpr::CreateQueue(q) => out.push(q.name.clone()),
2725 QueryExpr::AlterQueue(q) => out.push(q.name.clone()),
2726 QueryExpr::DropQueue(q) => out.push(q.name.clone()),
2727 QueryExpr::QueueCommand(QueueCommand::Move {
2728 source,
2729 destination,
2730 ..
2731 }) => {
2732 out.push(source.clone());
2733 out.push(destination.clone());
2734 }
2735 QueryExpr::CreatePolicy(q) => out.push(q.table.clone()),
2736 QueryExpr::CreateView(q) => out.push(q.name.clone()),
2737 QueryExpr::DropView(q) => out.push(q.name.clone()),
2738 QueryExpr::RefreshMaterializedView(q) => out.push(q.name.clone()),
2739
2740 _ => {}
2746 }
2747}
2748
2749impl RedDBRuntime {
2750 pub fn in_memory() -> RedDBResult<Self> {
2751 Self::with_options(RedDBOptions::in_memory())
2752 }
2753
2754 pub fn lock_manager(&self) -> std::sync::Arc<crate::storage::transaction::lock::LockManager> {
2758 self.inner.lock_manager.clone()
2759 }
2760
2761 pub fn config_registry(&self) -> std::sync::Arc<crate::auth::registry::ConfigRegistry> {
2763 self.inner.config_registry.clone()
2764 }
2765
2766 pub fn query_audit(&self) -> std::sync::Arc<crate::runtime::query_audit::QueryAuditStream> {
2767 self.inner.query_audit.clone()
2768 }
2769
2770 pub fn control_events_require_persistence(&self) -> bool {
2771 self.inner.control_event_config.require_persistence()
2772 }
2773
2774 pub fn control_event_config(&self) -> crate::runtime::control_events::ControlEventConfig {
2775 self.inner.control_event_config
2776 }
2777
2778 pub fn control_event_ledger(
2779 &self,
2780 ) -> Arc<dyn crate::runtime::control_events::ControlEventLedger> {
2781 self.inner.control_event_ledger.read().clone()
2782 }
2783
2784 #[doc(hidden)]
2785 pub fn replace_control_event_ledger_for_tests(
2786 &self,
2787 ledger: Arc<dyn crate::runtime::control_events::ControlEventLedger>,
2788 ) {
2789 *self.inner.control_event_ledger.write() = ledger;
2790 }
2791
2792 #[inline(never)]
2793 pub fn with_options(options: RedDBOptions) -> RedDBResult<Self> {
2794 Self::with_pool(options, ConnectionPoolConfig::default())
2795 }
2796
2797 pub fn with_pool(
2798 options: RedDBOptions,
2799 pool_config: ConnectionPoolConfig,
2800 ) -> RedDBResult<Self> {
2801 let boot_open_start_ms = std::time::SystemTime::now()
2809 .duration_since(std::time::UNIX_EPOCH)
2810 .map(|d| d.as_millis() as u64)
2811 .unwrap_or(0);
2812 let db = Arc::new(
2813 RedDB::open_with_options(&options)
2814 .map_err(|err| RedDBError::Internal(err.to_string()))?,
2815 );
2816 let result_blob_cache = crate::storage::cache::BlobCache::open_with_l2(
2817 crate::storage::cache::BlobCacheConfig::default().with_l2_path(
2818 options
2819 .resolved_path("data.rdb")
2820 .with_extension("result-cache.l2"),
2821 ),
2822 )
2823 .map_err(|err| {
2824 RedDBError::Internal(format!("open result Blob Cache L2 failed: {err:?}"))
2825 })?;
2826 let storage_ready_ms = std::time::SystemTime::now()
2827 .duration_since(std::time::UNIX_EPOCH)
2828 .map(|d| d.as_millis() as u64)
2829 .unwrap_or(0);
2830
2831 let runtime = Self {
2832 inner: Arc::new(RuntimeInner {
2833 db: db.clone(),
2834 layout: PhysicalLayout::from_options(&options),
2835 indices: IndexCatalog::register_default_vector_graph(
2836 options.has_capability(crate::api::Capability::Table),
2837 options.has_capability(crate::api::Capability::Graph),
2838 ),
2839 pool_config,
2840 pool: Mutex::new(PoolState::default()),
2841 started_at_unix_ms: SystemTime::now()
2842 .duration_since(UNIX_EPOCH)
2843 .unwrap_or_default()
2844 .as_millis(),
2845 probabilistic: super::probabilistic_store::ProbabilisticStore::new(),
2846 index_store: super::index_store::IndexStore::new(),
2847 cdc: crate::replication::cdc::CdcBuffer::new(100_000),
2848 backup_scheduler: crate::replication::scheduler::BackupScheduler::new(3600),
2849 query_cache: parking_lot::RwLock::new(
2850 crate::storage::query::planner::cache::PlanCache::new(1000),
2851 ),
2852 result_cache: parking_lot::RwLock::new((
2853 HashMap::new(),
2854 std::collections::VecDeque::new(),
2855 )),
2856 result_blob_cache,
2857 result_blob_entries: parking_lot::RwLock::new((
2858 HashMap::new(),
2859 std::collections::VecDeque::new(),
2860 )),
2861 ask_answer_cache_entries: parking_lot::RwLock::new((
2862 HashSet::new(),
2863 std::collections::VecDeque::new(),
2864 )),
2865 result_cache_shadow_divergences: std::sync::atomic::AtomicU64::new(0),
2866 result_cache_hits: std::sync::atomic::AtomicU64::new(0),
2867 result_cache_misses: std::sync::atomic::AtomicU64::new(0),
2868 result_cache_evictions: std::sync::atomic::AtomicU64::new(0),
2869 ask_daily_spend: parking_lot::RwLock::new(HashMap::new()),
2870 queue_message_locks: parking_lot::RwLock::new(HashMap::new()),
2871 rmw_locks: RmwLockTable::new(),
2872 planner_dirty_tables: parking_lot::RwLock::new(HashSet::new()),
2873 ec_registry: Arc::new(crate::ec::config::EcRegistry::new()),
2874 config_registry: Arc::new(crate::auth::registry::ConfigRegistry::new()),
2875 ec_worker: crate::ec::worker::EcWorker::new(),
2876 auth_store: parking_lot::RwLock::new(None),
2877 oauth_validator: parking_lot::RwLock::new(None),
2878 views: parking_lot::RwLock::new(HashMap::new()),
2879 materialized_views: parking_lot::RwLock::new(
2880 crate::storage::cache::result::MaterializedViewCache::new(),
2881 ),
2882 retention_sweeper: parking_lot::RwLock::new(
2883 crate::runtime::retention_sweeper::RetentionSweeperState::new(),
2884 ),
2885 snapshot_manager: Arc::new(
2886 crate::storage::transaction::snapshot::SnapshotManager::new(),
2887 ),
2888 tx_contexts: parking_lot::RwLock::new(HashMap::new()),
2889 tx_local_tenants: parking_lot::RwLock::new(HashMap::new()),
2890 env_config_overrides: crate::runtime::config_overlay::collect_env_overrides(),
2891 lock_manager: Arc::new({
2892 let env = crate::runtime::config_overlay::collect_env_overrides();
2897 let timeout_ms = env
2898 .get("concurrency.locking.deadlock_timeout_ms")
2899 .and_then(|raw| raw.parse::<u64>().ok())
2900 .unwrap_or_else(|| {
2901 match crate::runtime::config_matrix::default_for(
2902 "concurrency.locking.deadlock_timeout_ms",
2903 ) {
2904 Some(crate::serde_json::Value::Number(n)) => n as u64,
2905 _ => 5000,
2906 }
2907 });
2908 let cfg = crate::storage::transaction::lock::LockConfig {
2909 default_timeout: std::time::Duration::from_millis(timeout_ms),
2910 ..Default::default()
2911 };
2912 crate::storage::transaction::lock::LockManager::new(cfg)
2913 }),
2914 rls_policies: parking_lot::RwLock::new(HashMap::new()),
2915 rls_enabled_tables: parking_lot::RwLock::new(HashSet::new()),
2916 foreign_tables: Arc::new(crate::storage::fdw::ForeignTableRegistry::with_builtins()),
2917 pending_tombstones: parking_lot::RwLock::new(HashMap::new()),
2918 pending_versioned_updates: parking_lot::RwLock::new(HashMap::new()),
2919 pending_kv_watch_events: parking_lot::RwLock::new(HashMap::new()),
2920 pending_store_wal_actions: parking_lot::RwLock::new(HashMap::new()),
2921 queue_wait_registry: std::sync::Arc::new(
2922 crate::runtime::queue_wait_registry::QueueWaitRegistry::new(),
2923 ),
2924 pending_queue_wakes: parking_lot::RwLock::new(HashMap::new()),
2925 tenant_tables: parking_lot::RwLock::new(HashMap::new()),
2926 ddl_epoch: std::sync::atomic::AtomicU64::new(0),
2927 write_gate: Arc::new(crate::runtime::write_gate::WriteGate::from_options(
2928 &options,
2929 )),
2930 lifecycle: crate::runtime::lifecycle::Lifecycle::new(),
2931 resource_limits: crate::runtime::resource_limits::ResourceLimits::from_env(),
2932 audit_log: {
2933 let data_path = options
2943 .data_path
2944 .clone()
2945 .unwrap_or_else(|| std::env::temp_dir().join("reddb"));
2946 let (audit_dest, _) = crate::api::tier_wiring::current_log_destinations();
2947 Arc::new(crate::runtime::audit_log::AuditLogger::for_destination(
2948 &audit_dest,
2949 &data_path,
2950 ))
2951 },
2952 control_event_ledger: parking_lot::RwLock::new(Arc::new(
2953 crate::runtime::control_events::RuntimeLedger::new(db.store()),
2954 )),
2955 control_event_config: options.control_events,
2956 query_audit: Arc::new(crate::runtime::query_audit::QueryAuditStream::new(
2957 db.store(),
2958 options.query_audit.clone(),
2959 )),
2960 lease_lifecycle: std::sync::OnceLock::new(),
2961 replica_apply_metrics: std::sync::Arc::new(
2962 crate::replication::logical::ReplicaApplyMetrics::default(),
2963 ),
2964 quota_bucket: crate::runtime::quota_bucket::QuotaBucket::from_env(),
2965 schema_vocabulary: parking_lot::RwLock::new(
2966 crate::runtime::schema_vocabulary::SchemaVocabulary::new(),
2967 ),
2968 slow_query_logger: {
2969 let fallback_dir = options
2982 .data_path
2983 .as_ref()
2984 .and_then(|p| p.parent().map(std::path::PathBuf::from))
2985 .unwrap_or_else(|| std::env::temp_dir().join("reddb"));
2986 let threshold_ms = std::env::var("RED_SLOW_QUERY_THRESHOLD_MS")
2987 .ok()
2988 .and_then(|s| s.parse::<u64>().ok())
2989 .unwrap_or(1000);
2990 let sample_pct = std::env::var("RED_SLOW_QUERY_SAMPLE_PCT")
2991 .ok()
2992 .and_then(|s| s.parse::<u8>().ok())
2993 .unwrap_or(100);
2994 let (_, slow_dest) = crate::api::tier_wiring::current_log_destinations();
2995 crate::telemetry::slow_query_logger::SlowQueryLogger::for_destination(
2996 &slow_dest,
2997 &fallback_dir,
2998 threshold_ms,
2999 sample_pct,
3000 )
3001 },
3002 kv_stats: crate::runtime::KvStatsCounters::default(),
3003 metrics_ingest_stats: crate::runtime::MetricsIngestCounters::default(),
3004 metrics_tenant_activity_stats:
3005 crate::runtime::MetricsTenantActivityCounters::default(),
3006 queue_telemetry: Arc::new(
3007 crate::runtime::queue_telemetry::QueueTelemetryCounters::default(),
3008 ),
3009 queue_presence: Arc::new(
3010 crate::storage::queue::presence::ConsumerPresenceRegistry::new(),
3011 ),
3012 vector_introspection: Arc::new(
3013 crate::storage::vector::introspection::VectorIntrospectionRegistry::new(),
3014 ),
3015 kv_tag_index: crate::runtime::KvTagIndex::default(),
3016 chain_tip_cache: parking_lot::Mutex::new(HashMap::new()),
3017 chain_integrity_broken: parking_lot::Mutex::new(HashMap::new()),
3018 integrity_tombstones: parking_lot::Mutex::new(Vec::new()),
3019 integrity_tombstones_state: std::sync::atomic::AtomicU8::new(0),
3020 }),
3021 };
3022
3023 crate::telemetry::operator_event::install_global_audit_sink(Arc::clone(
3029 &runtime.inner.audit_log,
3030 ));
3031
3032 runtime
3040 .inner
3041 .lifecycle
3042 .set_restore_started_at_ms(boot_open_start_ms);
3043 runtime
3044 .inner
3045 .lifecycle
3046 .set_restore_ready_at_ms(storage_ready_ms);
3047 runtime
3048 .inner
3049 .lifecycle
3050 .set_wal_replay_started_at_ms(boot_open_start_ms);
3051 runtime
3052 .inner
3053 .lifecycle
3054 .set_wal_replay_ready_at_ms(storage_ready_ms);
3055
3056 let restored_cdc_lsn = runtime
3057 .inner
3058 .db
3059 .replication
3060 .as_ref()
3061 .map(|repl| {
3062 repl.logical_wal_spool
3063 .as_ref()
3064 .map(|spool| spool.current_lsn())
3065 .unwrap_or(0)
3066 })
3067 .unwrap_or(0)
3068 .max(runtime.config_u64("red.config.timeline.last_archived_lsn", 0));
3069 runtime.inner.cdc.set_current_lsn(restored_cdc_lsn);
3070 runtime.rehydrate_snapshot_xid_floor();
3071 runtime.bootstrap_system_keyed_collections()?;
3072 runtime.rehydrate_declared_column_schemas();
3073 runtime.load_probabilistic_state()?;
3074
3075 runtime.rehydrate_tenant_tables();
3079 runtime.rehydrate_materialized_view_descriptors();
3084 if let Some(repl) = &runtime.inner.db.replication {
3085 repl.wal_buffer.set_current_lsn(restored_cdc_lsn);
3086 }
3087
3088 {
3090 let sys = SystemInfo::collect();
3091 runtime.inner.db.store().set_config_tree(
3092 "red.system",
3093 &crate::serde_json::json!({
3094 "pid": sys.pid,
3095 "cpu_cores": sys.cpu_cores,
3096 "total_memory_bytes": sys.total_memory_bytes,
3097 "available_memory_bytes": sys.available_memory_bytes,
3098 "os": sys.os,
3099 "arch": sys.arch,
3100 "hostname": sys.hostname,
3101 "started_at": SystemTime::now()
3102 .duration_since(UNIX_EPOCH)
3103 .unwrap_or_default()
3104 .as_millis() as u64
3105 }),
3106 );
3107
3108 let store = runtime.inner.db.store();
3110 if store
3111 .get_collection("red_config")
3112 .map(|m| m.query_all(|_| true).len())
3113 .unwrap_or(0)
3114 <= 10
3115 {
3116 store.set_config_tree("red.ai", &crate::json!({
3117 "default": crate::json!({
3118 "provider": "openai",
3119 "model": crate::ai::DEFAULT_OPENAI_PROMPT_MODEL
3120 }),
3121 "max_embedding_inputs": 256,
3122 "max_prompt_batch": 256,
3123 "timeout": crate::json!({ "connect_secs": 10, "read_secs": 90, "write_secs": 30 })
3124 }));
3125 store.set_config_tree(
3126 "red.server",
3127 &crate::json!({
3128 "max_scan_limit": 1000,
3129 "max_body_size": 1048576,
3130 "read_timeout_ms": 5000,
3131 "write_timeout_ms": 5000
3132 }),
3133 );
3134 store.set_config_tree(
3135 "red.storage",
3136 &crate::json!({
3137 "page_size": 4096,
3138 "page_cache_capacity": 100000,
3139 "auto_checkpoint_pages": 1000,
3140 "snapshot_retention": 16,
3141 "verify_checksums": true,
3142 "segment": crate::json!({
3143 "max_entities": 100000,
3144 "max_bytes": 268435456_u64,
3145 "compression_level": 6
3146 }),
3147 "hnsw": crate::json!({ "m": 16, "ef_construction": 100, "ef_search": 50 }),
3148 "ivf": crate::json!({ "n_lists": 100, "n_probes": 10 }),
3149 "bm25": crate::json!({ "k1": 1.2, "b": 0.75 })
3150 }),
3151 );
3152 store.set_config_tree(
3153 "red.search",
3154 &crate::json!({
3155 "rag": crate::json!({
3156 "max_chunks_per_source": 10,
3157 "max_total_chunks": 25,
3158 "similarity_threshold": 0.8,
3159 "graph_depth": 2,
3160 "min_relevance": 0.3
3161 }),
3162 "fusion": crate::json!({
3163 "vector_weight": 0.5,
3164 "graph_weight": 0.3,
3165 "table_weight": 0.2,
3166 "dedup_threshold": 0.85
3167 })
3168 }),
3169 );
3170 store.set_config_tree(
3171 "red.auth",
3172 &crate::json!({
3173 "enabled": false,
3174 "session_ttl_secs": 3600,
3175 "require_auth": false
3176 }),
3177 );
3178 store.set_config_tree(
3179 "red.query",
3180 &crate::json!({
3181 "connection_pool": crate::json!({ "max_connections": 64, "max_idle": 16 }),
3182 "max_recursion_depth": 1000
3183 }),
3184 );
3185 store.set_config_tree(
3186 "red.indexes",
3187 &crate::json!({
3188 "auto_select": true,
3189 "bloom_filter": crate::json!({
3190 "enabled": true,
3191 "false_positive_rate": 0.01,
3192 "prune_on_scan": true
3193 }),
3194 "hash": crate::json!({ "enabled": true }),
3195 "bitmap": crate::json!({ "enabled": true, "max_cardinality": 1000 }),
3196 "spatial": crate::json!({ "enabled": true })
3197 }),
3198 );
3199 store.set_config_tree(
3200 "red.memtable",
3201 &crate::json!({
3202 "enabled": true,
3203 "max_bytes": 67108864_u64,
3204 "flush_threshold": 0.75
3205 }),
3206 );
3207 store.set_config_tree(
3208 "red.probabilistic",
3209 &crate::json!({
3210 "hll_registers": 16384,
3211 "sketch_default_width": 1000,
3212 "sketch_default_depth": 5,
3213 "filter_default_capacity": 100000
3214 }),
3215 );
3216 store.set_config_tree(
3217 "red.timeseries",
3218 &crate::json!({
3219 "default_chunk_size": 1024,
3220 "compression": crate::json!({
3221 "timestamps": "delta_of_delta",
3222 "values": "gorilla_xor"
3223 }),
3224 "default_retention_days": 0
3225 }),
3226 );
3227 store.set_config_tree(
3228 "red.queue",
3229 &crate::json!({
3230 "default_max_size": 0,
3231 "default_max_attempts": 3,
3232 "visibility_timeout_ms": 30000,
3233 "consumer_idle_timeout_ms": 60000
3234 }),
3235 );
3236 store.set_config_tree(
3237 "red.backup",
3238 &crate::json!({
3239 "enabled": false,
3240 "interval_secs": 3600,
3241 "retention_count": 24,
3242 "upload": false,
3243 "backend": "local"
3244 }),
3245 );
3246 store.set_config_tree(
3247 "red.wal",
3248 &crate::json!({
3249 "archive": crate::json!({
3250 "enabled": false,
3251 "retention_hours": 168,
3252 "prefix": "wal/"
3253 })
3254 }),
3255 );
3256 store.set_config_tree(
3257 "red.cdc",
3258 &crate::json!({
3259 "enabled": true,
3260 "buffer_size": 100000
3261 }),
3262 );
3263 store.set_config_tree(
3264 "red.config.secret",
3265 &crate::json!({
3266 "auto_encrypt": true,
3267 "auto_decrypt": true
3268 }),
3269 );
3270 }
3271
3272 crate::runtime::config_matrix::heal_critical_keys(store.as_ref());
3279
3280 let lehman_yao = runtime.config_bool("storage.btree.lehman_yao", true);
3287 crate::storage::engine::btree::lehman_yao::set_enabled(lehman_yao);
3288 if lehman_yao {
3289 tracing::info!(
3290 "storage.btree.lehman_yao=true — lock-free concurrent descent enabled"
3291 );
3292 }
3293
3294 let overlay_path = crate::runtime::config_overlay::config_file_path();
3299 let _ =
3300 crate::runtime::config_overlay::apply_config_file(store.as_ref(), &overlay_path);
3301 }
3302
3303 {
3307 let store = runtime.inner.db.store();
3308 for name in crate::application::vcs_collections::ALL {
3309 let _ = store.get_or_create_collection(*name);
3310 }
3311 store.set_config_tree(
3314 crate::application::vcs_collections::CONFIG_NAMESPACE,
3315 &crate::json!({
3316 "default_branch": "main",
3317 "author": crate::json!({
3318 "name": "reddb",
3319 "email": "reddb@localhost"
3320 }),
3321 "protected_branches": crate::json!(["main"]),
3322 "closure": crate::json!({
3323 "enabled": true,
3324 "lazy": true
3325 }),
3326 "merge": crate::json!({
3327 "default_strategy": "auto",
3328 "fast_forward": true
3329 })
3330 }),
3331 );
3332 }
3333
3334 {
3337 let store = runtime.inner.db.store();
3338 for name in crate::application::migration_collections::ALL {
3339 let _ = store.get_or_create_collection(*name);
3340 }
3341 }
3342
3343 let _ = crate::application::topology_collections::ensure(&runtime);
3347
3348 {
3363 let weak = Arc::downgrade(&runtime.inner);
3364 std::thread::Builder::new()
3365 .name("reddb-maintenance".into())
3366 .spawn(move || {
3367 let tick = std::time::Duration::from_millis(200);
3368 let work_interval = std::time::Duration::from_secs(60);
3369 let mut last_work = std::time::Instant::now();
3370 loop {
3371 std::thread::sleep(tick);
3372 let Some(inner) = weak.upgrade() else {
3373 break;
3376 };
3377 if last_work.elapsed() >= work_interval {
3378 let _stats = inner.db.store().context_index().stats();
3379 last_work = std::time::Instant::now();
3380 }
3381 }
3382 })
3383 .ok();
3384 }
3385
3386 {
3388 let store = runtime.inner.db.store();
3389 let mut backup_enabled = false;
3390 let mut backup_interval = 3600u64;
3391
3392 if let Some(manager) = store.get_collection("red_config") {
3393 manager.for_each_entity(|entity| {
3394 if let Some(row) = entity.data.as_row() {
3395 let key = row.get_field("key").and_then(|v| match v {
3396 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
3397 _ => None,
3398 });
3399 let val = row.get_field("value");
3400 if key == Some("red.config.backup.enabled") {
3401 backup_enabled = match val {
3402 Some(crate::storage::schema::Value::Boolean(true)) => true,
3403 Some(crate::storage::schema::Value::Text(s)) => &**s == "true",
3404 _ => false,
3405 };
3406 } else if key == Some("red.config.backup.interval_secs") {
3407 if let Some(crate::storage::schema::Value::Integer(n)) = val {
3408 backup_interval = *n as u64;
3409 }
3410 }
3411 }
3412 true
3413 });
3414 }
3415
3416 if backup_enabled {
3417 runtime.inner.backup_scheduler.set_interval(backup_interval);
3418 let rt = runtime.clone();
3419 runtime
3420 .inner
3421 .backup_scheduler
3422 .start(move || rt.trigger_backup().map_err(|e| format!("{}", e)));
3423 }
3424 }
3425
3426 {
3428 runtime
3429 .inner
3430 .ec_registry
3431 .load_from_config_store(runtime.inner.db.store().as_ref());
3432 if !runtime.inner.ec_registry.async_configs().is_empty() {
3433 runtime.inner.ec_worker.start(
3434 Arc::clone(&runtime.inner.ec_registry),
3435 Arc::clone(&runtime.inner.db.store()),
3436 );
3437 }
3438 }
3439
3440 if let crate::replication::ReplicationRole::Replica { primary_addr } =
3441 runtime.inner.db.options().replication.role.clone()
3442 {
3443 let rt = runtime.clone();
3444 std::thread::Builder::new()
3445 .name("reddb-replica".into())
3446 .spawn(move || rt.run_replica_loop(primary_addr))
3447 .ok();
3448 }
3449
3450 runtime.inner.lifecycle.mark_ready();
3455
3456 {
3465 let weak_inner = Arc::downgrade(&runtime.inner);
3466 std::thread::Builder::new()
3467 .name("reddb-mv-scheduler".into())
3468 .spawn(move || loop {
3469 std::thread::sleep(std::time::Duration::from_millis(50));
3470 let Some(inner) = weak_inner.upgrade() else {
3471 break;
3472 };
3473 let rt = RedDBRuntime { inner };
3474 rt.refresh_due_materialized_views();
3475 })
3476 .ok();
3477 }
3478
3479 if !runtime.write_gate().is_read_only() {
3489 let weak_inner = Arc::downgrade(&runtime.inner);
3490 std::thread::Builder::new()
3491 .name("reddb-retention-sweeper".into())
3492 .spawn(move || loop {
3493 std::thread::sleep(std::time::Duration::from_millis(500));
3494 let Some(inner) = weak_inner.upgrade() else {
3495 break;
3496 };
3497 let rt = RedDBRuntime { inner };
3498 rt.sweep_retention_tick(
3499 crate::runtime::retention_sweeper::DEFAULT_SWEEPER_BATCH,
3500 );
3501 })
3502 .ok();
3503 }
3504
3505 Ok(runtime)
3506 }
3507
3508 fn rehydrate_snapshot_xid_floor(&self) {
3509 let store = self.inner.db.store();
3510 for collection in store.list_collections() {
3511 let Some(manager) = store.get_collection(&collection) else {
3512 continue;
3513 };
3514 for entity in manager.query_all(|_| true) {
3515 self.inner
3516 .snapshot_manager
3517 .observe_committed_xid(entity.xmin);
3518 self.inner
3519 .snapshot_manager
3520 .observe_committed_xid(entity.xmax);
3521 }
3522 }
3523 }
3524
3525 pub(crate) fn ensure_materialized_view_backing(&self, name: &str) -> RedDBResult<()> {
3538 let store = self.inner.db.store();
3539 let mut changed = false;
3540 if store.get_collection(name).is_none() {
3541 store.get_or_create_collection(name);
3542 changed = true;
3543 }
3544 if self.inner.db.collection_contract(name).is_none() {
3545 self.inner
3546 .db
3547 .save_collection_contract(system_keyed_collection_contract(
3548 name,
3549 crate::catalog::CollectionModel::Table,
3550 ))
3551 .map_err(|err| RedDBError::Internal(err.to_string()))?;
3552 changed = true;
3553 }
3554 if changed {
3555 self.inner
3556 .db
3557 .persist_metadata()
3558 .map_err(|err| RedDBError::Internal(err.to_string()))?;
3559 }
3560 Ok(())
3561 }
3562
3563 pub(crate) fn drop_materialized_view_backing(&self, name: &str) -> RedDBResult<()> {
3568 let store = self.inner.db.store();
3569 if store.get_collection(name).is_none() {
3570 return Ok(());
3571 }
3572 store
3573 .drop_collection(name)
3574 .map_err(|err| RedDBError::Internal(err.to_string()))?;
3575 if self.inner.db.collection_contract(name).is_some() {
3578 self.inner
3579 .db
3580 .remove_collection_contract(name)
3581 .map_err(|err| RedDBError::Internal(err.to_string()))?;
3582 }
3583 self.invalidate_result_cache();
3584 self.inner
3585 .db
3586 .persist_metadata()
3587 .map_err(|err| RedDBError::Internal(err.to_string()))?;
3588 Ok(())
3589 }
3590
3591 fn bootstrap_system_keyed_collections(&self) -> RedDBResult<()> {
3592 let mut changed = false;
3593 for (name, model) in [
3594 ("red.config", crate::catalog::CollectionModel::Config),
3595 ("red.vault", crate::catalog::CollectionModel::Vault),
3596 (
3600 crate::runtime::continuous_materialized_view::CATALOG_COLLECTION,
3601 crate::catalog::CollectionModel::Config,
3602 ),
3603 ] {
3604 if self.inner.db.store().get_collection(name).is_none() {
3605 self.inner.db.store().get_or_create_collection(name);
3606 changed = true;
3607 }
3608 if self.inner.db.collection_contract(name).is_none() {
3609 self.inner
3610 .db
3611 .save_collection_contract(system_keyed_collection_contract(name, model))
3612 .map_err(|err| RedDBError::Internal(err.to_string()))?;
3613 changed = true;
3614 }
3615 }
3616 if changed {
3617 self.inner
3618 .db
3619 .persist_metadata()
3620 .map_err(|err| RedDBError::Internal(err.to_string()))?;
3621 }
3622 Ok(())
3623 }
3624
3625 pub fn db(&self) -> Arc<RedDB> {
3626 Arc::clone(&self.inner.db)
3627 }
3628
3629 pub fn index_store_ref(&self) -> &super::index_store::IndexStore {
3634 &self.inner.index_store
3635 }
3636
3637 pub(crate) fn schema_vocabulary_apply(
3642 &self,
3643 event: crate::runtime::schema_vocabulary::DdlEvent,
3644 ) {
3645 self.inner.schema_vocabulary.write().on_ddl(event);
3646 }
3647
3648 pub fn schema_vocabulary_lookup(
3653 &self,
3654 token: &str,
3655 ) -> Vec<crate::runtime::schema_vocabulary::VocabHit> {
3656 self.inner.schema_vocabulary.read().lookup(token).to_vec()
3657 }
3658
3659 pub fn set_auth_store(&self, store: Arc<crate::auth::store::AuthStore>) {
3663 *self.inner.auth_store.write() = Some(store);
3664 }
3665
3666 pub fn auth_store(&self) -> Option<Arc<crate::auth::store::AuthStore>> {
3669 self.inner.auth_store.read().clone()
3670 }
3671
3672 pub fn vault_kv_get(&self, key: &str) -> Option<String> {
3674 self.inner
3675 .auth_store
3676 .read()
3677 .as_ref()
3678 .and_then(|store| store.vault_kv_get(key))
3679 }
3680
3681 pub fn vault_kv_try_set(&self, key: String, value: String) -> RedDBResult<()> {
3684 let store = self.inner.auth_store.read().clone().ok_or_else(|| {
3685 RedDBError::Query("secret storage requires an enabled, unsealed vault".to_string())
3686 })?;
3687 store
3688 .vault_kv_try_set(key, value)
3689 .map_err(|err| RedDBError::Query(err.to_string()))
3690 }
3691
3692 pub fn set_oauth_validator(&self, validator: Option<Arc<crate::auth::oauth::OAuthValidator>>) {
3696 *self.inner.oauth_validator.write() = validator;
3697 }
3698
3699 pub fn oauth_validator(&self) -> Option<Arc<crate::auth::oauth::OAuthValidator>> {
3703 self.inner.oauth_validator.read().clone()
3704 }
3705
3706 pub(crate) fn secret_aes_key(&self) -> Option<[u8; 32]> {
3710 let guard = self.inner.auth_store.read();
3711 guard.as_ref().and_then(|s| s.vault_secret_key())
3712 }
3713
3714 pub(crate) fn config_bool(&self, key: &str, default: bool) -> bool {
3720 if let Some(raw) = self.inner.env_config_overrides.get(key) {
3721 if let Some(crate::storage::schema::Value::Boolean(b)) =
3722 crate::runtime::config_overlay::coerce_env_value(key, raw)
3723 {
3724 return b;
3725 }
3726 }
3727 let store = self.inner.db.store();
3728 let Some(manager) = store.get_collection("red_config") else {
3729 return default;
3730 };
3731 let mut result = default;
3732 let mut latest_id: u64 = 0;
3733 manager.for_each_entity(|entity| {
3734 if let Some(row) = entity.data.as_row() {
3735 let entry_key = row.get_field("key").and_then(|v| match v {
3736 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
3737 _ => None,
3738 });
3739 if entry_key == Some(key) {
3740 let id = entity.id.raw();
3741 if id >= latest_id {
3742 latest_id = id;
3743 result = match row.get_field("value") {
3744 Some(crate::storage::schema::Value::Boolean(b)) => *b,
3745 Some(crate::storage::schema::Value::Text(s)) => {
3746 matches!(s.as_ref(), "true" | "TRUE" | "True" | "1")
3747 }
3748 Some(crate::storage::schema::Value::Integer(n)) => *n != 0,
3749 _ => default,
3750 };
3751 }
3752 }
3753 }
3754 true
3755 });
3756 result
3757 }
3758
3759 pub(crate) fn config_u64(&self, key: &str, default: u64) -> u64 {
3760 if let Some(raw) = self.inner.env_config_overrides.get(key) {
3761 if let Some(crate::storage::schema::Value::UnsignedInteger(n)) =
3762 crate::runtime::config_overlay::coerce_env_value(key, raw)
3763 {
3764 return n;
3765 }
3766 }
3767 let store = self.inner.db.store();
3768 let Some(manager) = store.get_collection("red_config") else {
3769 return default;
3770 };
3771 let mut result = default;
3772 let mut latest_id: u64 = 0;
3773 manager.for_each_entity(|entity| {
3774 if let Some(row) = entity.data.as_row() {
3775 let entry_key = row.get_field("key").and_then(|v| match v {
3776 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
3777 _ => None,
3778 });
3779 if entry_key == Some(key) {
3780 let id = entity.id.raw();
3781 if id >= latest_id {
3782 latest_id = id;
3783 result = match row.get_field("value") {
3784 Some(crate::storage::schema::Value::Integer(n)) => *n as u64,
3785 Some(crate::storage::schema::Value::UnsignedInteger(n)) => *n,
3786 Some(crate::storage::schema::Value::Text(s)) => {
3787 s.parse::<u64>().unwrap_or(default)
3788 }
3789 _ => default,
3790 };
3791 }
3792 }
3793 }
3794 true
3795 });
3796 result
3797 }
3798
3799 pub(crate) fn config_f64(&self, key: &str, default: f64) -> f64 {
3800 if let Some(raw) = self.inner.env_config_overrides.get(key) {
3801 if let Ok(n) = raw.parse::<f64>() {
3802 return n;
3803 }
3804 }
3805 let store = self.inner.db.store();
3806 let Some(manager) = store.get_collection("red_config") else {
3807 return default;
3808 };
3809 let mut result = default;
3810 let mut latest_id: u64 = 0;
3811 manager.for_each_entity(|entity| {
3812 if let Some(row) = entity.data.as_row() {
3813 let entry_key = row.get_field("key").and_then(|v| match v {
3814 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
3815 _ => None,
3816 });
3817 if entry_key == Some(key) {
3818 let id = entity.id.raw();
3819 if id >= latest_id {
3820 latest_id = id;
3821 result = match row.get_field("value") {
3822 Some(crate::storage::schema::Value::Float(n)) => *n,
3823 Some(crate::storage::schema::Value::Integer(n)) => *n as f64,
3824 Some(crate::storage::schema::Value::UnsignedInteger(n)) => *n as f64,
3825 Some(crate::storage::schema::Value::Text(s)) => {
3826 s.parse::<f64>().unwrap_or(default)
3827 }
3828 _ => default,
3829 };
3830 }
3831 }
3832 }
3833 true
3834 });
3835 result
3836 }
3837
3838 pub(crate) fn config_string(&self, key: &str, default: &str) -> String {
3839 if let Some(raw) = self.inner.env_config_overrides.get(key) {
3840 return raw.clone();
3841 }
3842 let store = self.inner.db.store();
3843 let Some(manager) = store.get_collection("red_config") else {
3844 return default.to_string();
3845 };
3846 let mut result = default.to_string();
3847 let mut latest_id: u64 = 0;
3848 manager.for_each_entity(|entity| {
3849 if let Some(row) = entity.data.as_row() {
3850 let entry_key = row.get_field("key").and_then(|v| match v {
3851 crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
3852 _ => None,
3853 });
3854 if entry_key == Some(key) {
3855 let id = entity.id.raw();
3856 if id >= latest_id {
3857 latest_id = id;
3858 if let Some(crate::storage::schema::Value::Text(value)) =
3859 row.get_field("value")
3860 {
3861 result = value.to_string();
3862 }
3863 }
3864 }
3865 }
3866 true
3867 });
3868 result
3869 }
3870
3871 fn latest_metadata_for(
3872 &self,
3873 collection: &str,
3874 entity_id: u64,
3875 ) -> Option<crate::serde_json::Value> {
3876 self.inner
3877 .db
3878 .store()
3879 .get_metadata(collection, EntityId::new(entity_id))
3880 .map(|metadata| metadata_to_json(&metadata))
3881 }
3882
3883 fn persist_replica_lsn(&self, lsn: u64) {
3884 self.inner.db.store().set_config_tree(
3885 "red.replication",
3886 &crate::json!({
3887 "last_applied_lsn": lsn
3888 }),
3889 );
3890 }
3891
3892 fn resolve_replica_id(&self) -> String {
3899 let configured = self.config_string("red.replication.replica_id", "");
3900 if !configured.is_empty() {
3901 return configured;
3902 }
3903 let generated = crate::crypto::uuid::Uuid::new_v4().to_string();
3904 self.inner.db.store().set_config_tree(
3905 "red.replication",
3906 &crate::json!({
3907 "replica_id": generated.clone()
3908 }),
3909 );
3910 generated
3911 }
3912
3913 fn persist_replication_health(
3914 &self,
3915 state: &str,
3916 last_error: &str,
3917 primary_lsn: Option<u64>,
3918 oldest_available_lsn: Option<u64>,
3919 ) {
3920 self.inner.db.store().set_config_tree(
3921 "red.replication",
3922 &crate::json!({
3923 "state": state,
3924 "last_error": last_error,
3925 "last_seen_primary_lsn": primary_lsn.unwrap_or(0),
3926 "last_seen_oldest_lsn": oldest_available_lsn.unwrap_or(0),
3927 "updated_at_unix_ms": SystemTime::now()
3928 .duration_since(UNIX_EPOCH)
3929 .unwrap_or_default()
3930 .as_millis() as u64
3931 }),
3932 );
3933 }
3934
3935 pub(crate) fn secret_auto_encrypt(&self) -> bool {
3938 self.config_bool("red.config.secret.auto_encrypt", true)
3939 }
3940
3941 pub(crate) fn secret_auto_decrypt(&self) -> bool {
3946 self.config_bool("red.config.secret.auto_decrypt", true)
3947 }
3948
3949 pub(crate) fn apply_secret_decryption(&self, result: &mut RuntimeQueryResult) {
3956 if !self.secret_auto_decrypt() {
3957 return;
3958 }
3959 let Some(key) = self.secret_aes_key() else {
3960 return;
3961 };
3962 for record in result.result.records.iter_mut() {
3963 for value in record.values_mut() {
3964 if let Value::Secret(ref bytes) = value {
3965 if let Some(plain) =
3966 super::impl_dml::decrypt_secret_payload(&key, bytes.as_slice())
3967 {
3968 if let Ok(text) = String::from_utf8(plain) {
3969 *value = Value::text(text);
3970 }
3971 }
3972 }
3973 }
3974 }
3975 }
3976
3977 pub(crate) fn mutation_engine(&self) -> crate::runtime::mutation::MutationEngine<'_> {
3985 crate::runtime::mutation::MutationEngine::new(self)
3986 }
3987
3988 pub fn check_write(&self, kind: crate::runtime::write_gate::WriteKind) -> RedDBResult<()> {
3999 self.inner.write_gate.check(kind)
4000 }
4001
4002 pub fn write_gate(&self) -> &crate::runtime::write_gate::WriteGate {
4006 &self.inner.write_gate
4007 }
4008
4009 pub fn lifecycle(&self) -> &crate::runtime::lifecycle::Lifecycle {
4013 &self.inner.lifecycle
4014 }
4015
4016 pub fn resource_limits(&self) -> &crate::runtime::resource_limits::ResourceLimits {
4018 &self.inner.resource_limits
4019 }
4020
4021 pub fn audit_log(&self) -> &crate::runtime::audit_log::AuditLogger {
4023 &self.inner.audit_log
4024 }
4025
4026 pub fn audit_log_arc(&self) -> Arc<crate::runtime::audit_log::AuditLogger> {
4030 Arc::clone(&self.inner.audit_log)
4031 }
4032
4033 pub(crate) fn emit_control_event(
4034 &self,
4035 kind: crate::runtime::control_events::EventKind,
4036 outcome: crate::runtime::control_events::Outcome,
4037 action: &'static str,
4038 resource: Option<String>,
4039 reason: Option<String>,
4040 extra_fields: Vec<(String, crate::runtime::control_events::Sensitivity)>,
4041 ) -> RedDBResult<()> {
4042 use crate::runtime::control_events::{
4043 ActorRef, ControlEvent, ControlEventCtx, ControlEventLedger, Sensitivity,
4044 };
4045
4046 let tenant = current_tenant();
4047 let principal = current_auth_identity();
4048 let actor_user = principal
4049 .as_ref()
4050 .map(|(principal, _)| UserId::from_parts(tenant.as_deref(), principal));
4051 let actor = actor_user
4052 .as_ref()
4053 .map(ActorRef::User)
4054 .unwrap_or(ActorRef::Anonymous);
4055 let ctx = ControlEventCtx {
4056 actor,
4057 scope: tenant
4058 .as_ref()
4059 .map(|scope| std::borrow::Cow::Borrowed(scope.as_str())),
4060 request_id: Some(std::borrow::Cow::Owned(format!(
4061 "conn-{}",
4062 current_connection_id()
4063 ))),
4064 trace_id: None,
4065 };
4066 let mut fields = std::collections::HashMap::new();
4067 fields.insert(
4068 "connection_id".to_string(),
4069 Sensitivity::raw(current_connection_id().to_string()),
4070 );
4071 if let Some((_, role)) = principal {
4072 fields.insert("actor_role".to_string(), Sensitivity::raw(role.as_str()));
4073 }
4074 for (key, value) in extra_fields {
4075 fields.insert(key, value);
4076 }
4077 let event = ControlEvent {
4078 kind,
4079 outcome,
4080 action: std::borrow::Cow::Borrowed(action),
4081 resource,
4082 reason,
4083 matched_policy_id: None,
4084 fields,
4085 };
4086 let ledger = self.inner.control_event_ledger.read();
4087 match ledger.emit(&ctx, event) {
4088 Ok(_) => Ok(()),
4089 Err(err) if self.inner.control_event_config.require_persistence() => {
4090 Err(RedDBError::Internal(err.to_string()))
4091 }
4092 Err(_) => Ok(()),
4093 }
4094 }
4095
4096 fn policy_mutation_control_ctx<'a>(
4097 &self,
4098 actor: &'a crate::auth::UserId,
4099 tenant: Option<&'a str>,
4100 ) -> crate::runtime::control_events::ControlEventCtx<'a> {
4101 crate::runtime::control_events::ControlEventCtx {
4102 actor: crate::runtime::control_events::ActorRef::User(actor),
4103 scope: tenant.map(std::borrow::Cow::Borrowed),
4104 request_id: Some(std::borrow::Cow::Owned(format!(
4105 "conn-{}",
4106 current_connection_id()
4107 ))),
4108 trace_id: None,
4109 }
4110 }
4111
4112 fn emit_query_audit(
4113 &self,
4114 query: &str,
4115 plan: &QueryAuditPlan,
4116 duration_ms: u64,
4117 result: &RuntimeQueryResult,
4118 ) {
4119 if !self.inner.query_audit.has_rules() {
4120 return;
4121 }
4122 let actor = current_auth_identity().map(|(principal, _)| principal);
4123 let tenant = current_tenant();
4124 let row_count = if result.statement_type == "select" {
4125 result.result.records.len() as u64
4126 } else {
4127 result.affected_rows
4128 };
4129 self.inner
4130 .query_audit
4131 .emit(crate::runtime::query_audit::QueryAuditEvent {
4132 actor,
4133 tenant,
4134 statement_kind: plan.statement_kind,
4135 touched_collections: plan.collections.clone(),
4136 duration_ms,
4137 row_count,
4138 request_id: Some(crate::crypto::uuid::Uuid::new_v7().to_string()),
4139 query_hash: Some(blake3::hash(query.as_bytes()).to_hex().to_string()),
4140 });
4141 }
4142
4143 pub(crate) fn queue_telemetry(
4147 &self,
4148 ) -> &crate::runtime::queue_telemetry::QueueTelemetryCounters {
4149 &self.inner.queue_telemetry
4150 }
4151
4152 pub fn queue_telemetry_snapshot(
4155 &self,
4156 ) -> crate::runtime::queue_telemetry::QueueTelemetrySnapshot {
4157 crate::runtime::queue_telemetry::QueueTelemetrySnapshot {
4158 delivered: self.inner.queue_telemetry.delivered_snapshot(),
4159 acked: self.inner.queue_telemetry.acked_snapshot(),
4160 nacked: self.inner.queue_telemetry.nacked_snapshot(),
4161 wait_started: self.inner.queue_telemetry.wait_started_snapshot(),
4162 wait_woken: self.inner.queue_telemetry.wait_woken_snapshot(),
4163 wait_timed_out: self.inner.queue_telemetry.wait_timed_out_snapshot(),
4164 wait_cancelled: self.inner.queue_telemetry.wait_cancelled_snapshot(),
4165 wait_duration: self.inner.queue_telemetry.wait_duration_snapshot(),
4166 }
4167 }
4168
4169 pub(crate) fn queue_presence(
4174 &self,
4175 ) -> &std::sync::Arc<crate::storage::queue::presence::ConsumerPresenceRegistry> {
4176 &self.inner.queue_presence
4177 }
4178
4179 pub fn queue_consumer_presence_snapshot(
4184 &self,
4185 ttl_ms: u64,
4186 ) -> Vec<crate::storage::queue::presence::ConsumerPresence> {
4187 let now_ns = std::time::SystemTime::now()
4188 .duration_since(std::time::UNIX_EPOCH)
4189 .map(|d| d.as_nanos() as u64)
4190 .unwrap_or(0);
4191 self.inner.queue_presence.snapshot(now_ns, ttl_ms)
4192 }
4193
4194 pub fn queue_active_consumer_counts(
4198 &self,
4199 ttl_ms: u64,
4200 ) -> std::collections::HashMap<(String, String), u32> {
4201 let now_ns = std::time::SystemTime::now()
4202 .duration_since(std::time::UNIX_EPOCH)
4203 .map(|d| d.as_nanos() as u64)
4204 .unwrap_or(0);
4205 self.inner
4206 .queue_presence
4207 .count_active_by_group(now_ns, ttl_ms)
4208 }
4209
4210 pub(crate) fn vector_introspection_registry(
4216 &self,
4217 ) -> &std::sync::Arc<crate::storage::vector::introspection::VectorIntrospectionRegistry> {
4218 &self.inner.vector_introspection
4219 }
4220
4221 pub fn vector_introspection_snapshot(
4226 &self,
4227 ) -> Vec<crate::storage::vector::introspection::VectorIntrospection> {
4228 self.inner.vector_introspection.snapshot()
4229 }
4230
4231 pub fn vector_introspection_get(
4235 &self,
4236 collection: &str,
4237 ) -> Option<crate::storage::vector::introspection::VectorIntrospection> {
4238 self.inner.vector_introspection.get(collection)
4239 }
4240
4241 pub fn queue_pending_counts(&self) -> Vec<((String, String), u64)> {
4246 let store = self.inner.db.store();
4247 crate::runtime::impl_queue::pending_counts_by_group(store.as_ref())
4248 .into_iter()
4249 .collect()
4250 }
4251
4252 pub fn write_gate_arc(&self) -> Arc<crate::runtime::write_gate::WriteGate> {
4257 Arc::clone(&self.inner.write_gate)
4258 }
4259
4260 pub fn lease_lifecycle(&self) -> Option<&Arc<crate::runtime::lease_lifecycle::LeaseLifecycle>> {
4263 self.inner.lease_lifecycle.get()
4264 }
4265
4266 pub fn set_lease_lifecycle(
4269 &self,
4270 lifecycle: Arc<crate::runtime::lease_lifecycle::LeaseLifecycle>,
4271 ) -> Result<(), Arc<crate::runtime::lease_lifecycle::LeaseLifecycle>> {
4272 self.inner.lease_lifecycle.set(lifecycle)
4273 }
4274
4275 pub fn check_batch_size(&self, requested: usize) -> RedDBResult<()> {
4280 if self.inner.resource_limits.batch_size_exceeded(requested) {
4281 let max = self.inner.resource_limits.max_batch_size.unwrap_or(0);
4282 return Err(RedDBError::QuotaExceeded(format!(
4283 "max_batch_size:{requested}:{max}"
4284 )));
4285 }
4286 Ok(())
4287 }
4288
4289 pub fn check_db_size(&self) -> RedDBResult<()> {
4295 let Some(limit) = self.inner.resource_limits.max_db_size_bytes else {
4296 return Ok(());
4297 };
4298 if limit == 0 {
4299 return Ok(());
4300 }
4301 let Some(path) = self.inner.db.path() else {
4302 return Ok(());
4303 };
4304 let current = std::fs::metadata(path).map(|m| m.len()).unwrap_or(0);
4305 if current > limit {
4306 return Err(RedDBError::QuotaExceeded(format!(
4307 "max_db_size_bytes:{current}:{limit}"
4308 )));
4309 }
4310 Ok(())
4311 }
4312
4313 pub fn graceful_shutdown(
4331 &self,
4332 backup_on_shutdown: bool,
4333 ) -> RedDBResult<crate::runtime::lifecycle::ShutdownReport> {
4334 if !self.inner.lifecycle.begin_shutdown() {
4335 return Ok(self.inner.lifecycle.shutdown_report().unwrap_or_default());
4339 }
4340
4341 let started_ms = std::time::SystemTime::now()
4342 .duration_since(std::time::UNIX_EPOCH)
4343 .map(|d| d.as_millis() as u64)
4344 .unwrap_or(0);
4345 let mut report = crate::runtime::lifecycle::ShutdownReport {
4346 started_at_ms: started_ms,
4347 ..Default::default()
4348 };
4349
4350 let flush_res = self.inner.db.flush_local_only();
4356 report.flushed_wal = flush_res.is_ok();
4357 report.final_checkpoint = flush_res.is_ok();
4358 if let Err(err) = &flush_res {
4359 tracing::error!(
4360 target: "reddb::lifecycle",
4361 error = %err,
4362 "graceful_shutdown: local flush failed"
4363 );
4364 } else if let Err(lease_err) =
4365 self.assert_remote_write_allowed("shutdown/checkpoint_upload")
4366 {
4367 tracing::warn!(
4368 target: "reddb::serverless::lease",
4369 error = %lease_err,
4370 "graceful_shutdown: remote upload skipped — lease not held"
4371 );
4372 } else if let Err(err) = self.inner.db.upload_to_remote_backend() {
4373 tracing::error!(
4374 target: "reddb::lifecycle",
4375 error = %err,
4376 "graceful_shutdown: remote upload failed"
4377 );
4378 }
4379
4380 if backup_on_shutdown && self.inner.db.remote_backend.is_some() {
4385 match self.trigger_backup() {
4391 Ok(result) => {
4392 report.backup_uploaded = result.uploaded;
4393 }
4394 Err(err) => {
4395 tracing::warn!(
4396 target: "reddb::lifecycle",
4397 error = %err,
4398 "graceful_shutdown: final backup skipped"
4399 );
4400 }
4401 }
4402 }
4403
4404 let completed_ms = std::time::SystemTime::now()
4405 .duration_since(std::time::UNIX_EPOCH)
4406 .map(|d| d.as_millis() as u64)
4407 .unwrap_or(started_ms);
4408 report.completed_at_ms = completed_ms;
4409 report.duration_ms = completed_ms.saturating_sub(started_ms);
4410
4411 self.inner.lifecycle.finish_shutdown(report.clone());
4412 Ok(report)
4413 }
4414
4415 pub(crate) fn cdc_emit_no_cache_invalidate(
4421 &self,
4422 operation: crate::replication::cdc::ChangeOperation,
4423 collection: &str,
4424 entity_id: u64,
4425 entity_kind: &str,
4426 ) -> u64 {
4427 let lsn = self
4428 .inner
4429 .cdc
4430 .emit(operation, collection, entity_id, entity_kind);
4431
4432 if let Some(ref primary) = self.inner.db.replication {
4434 let store = self.inner.db.store();
4435 let entity = if operation == crate::replication::cdc::ChangeOperation::Delete {
4436 None
4437 } else {
4438 store.get(collection, EntityId::new(entity_id))
4439 };
4440 let record = ChangeRecord {
4441 term: self.current_replication_term(),
4442 lsn,
4443 timestamp: SystemTime::now()
4444 .duration_since(UNIX_EPOCH)
4445 .unwrap_or_default()
4446 .as_millis() as u64,
4447 operation,
4448 collection: collection.to_string(),
4449 entity_id,
4450 entity_kind: entity_kind.to_string(),
4451 entity_bytes: entity
4452 .as_ref()
4453 .map(|e| UnifiedStore::serialize_entity(e, store.format_version())),
4454 metadata: self.latest_metadata_for(collection, entity_id),
4455 refresh_records: None,
4456 };
4457 let encoded = record.encode();
4458 primary.append_logical_record(record.lsn, encoded);
4459 }
4460 lsn
4461 }
4462
4463 pub(crate) fn cdc_emit_insert_batch_no_cache_invalidate(
4464 &self,
4465 collection: &str,
4466 ids: &[EntityId],
4467 entity_kind: &str,
4468 ) -> Vec<u64> {
4469 if ids.is_empty() {
4470 return Vec::new();
4471 }
4472
4473 if self.inner.db.replication.is_none() {
4477 return self.inner.cdc.emit_batch_same_collection(
4478 crate::replication::cdc::ChangeOperation::Insert,
4479 collection,
4480 entity_kind,
4481 ids.iter().map(|id| id.raw()),
4482 );
4483 }
4484
4485 ids.iter()
4488 .map(|id| {
4489 self.cdc_emit_no_cache_invalidate(
4490 crate::replication::cdc::ChangeOperation::Insert,
4491 collection,
4492 id.raw(),
4493 entity_kind,
4494 )
4495 })
4496 .collect()
4497 }
4498
4499 pub fn cdc_emit(
4500 &self,
4501 operation: crate::replication::cdc::ChangeOperation,
4502 collection: &str,
4503 entity_id: u64,
4504 entity_kind: &str,
4505 ) -> u64 {
4506 let lsn = self
4507 .inner
4508 .cdc
4509 .emit(operation, collection, entity_id, entity_kind);
4510 self.invalidate_result_cache_for_table(collection);
4516
4517 if let Some(ref primary) = self.inner.db.replication {
4519 let store = self.inner.db.store();
4520 let entity = if operation == crate::replication::cdc::ChangeOperation::Delete {
4521 None
4522 } else {
4523 store.get(collection, EntityId::new(entity_id))
4524 };
4525 let record = ChangeRecord {
4526 term: self.current_replication_term(),
4527 lsn,
4528 timestamp: SystemTime::now()
4529 .duration_since(UNIX_EPOCH)
4530 .unwrap_or_default()
4531 .as_millis() as u64,
4532 operation,
4533 collection: collection.to_string(),
4534 entity_id,
4535 entity_kind: entity_kind.to_string(),
4536 entity_bytes: entity
4537 .as_ref()
4538 .map(|entity| UnifiedStore::serialize_entity(entity, store.format_version())),
4539 metadata: self.latest_metadata_for(collection, entity_id),
4540 refresh_records: None,
4541 };
4542 let encoded = record.encode();
4543 primary.append_logical_record(record.lsn, encoded);
4544 }
4545 lsn
4546 }
4547
4548 pub(crate) fn cdc_emit_kv(
4549 &self,
4550 operation: crate::replication::cdc::ChangeOperation,
4551 collection: &str,
4552 key: &str,
4553 entity_id: u64,
4554 before: Option<crate::json::Value>,
4555 after: Option<crate::json::Value>,
4556 ) -> u64 {
4557 let lsn = self
4558 .inner
4559 .cdc
4560 .emit_kv(operation, collection, key, entity_id, before, after);
4561 self.inner.kv_stats.incr_watch_events_emitted();
4562 self.invalidate_result_cache_for_table(collection);
4563 lsn
4564 }
4565
4566 pub(crate) fn record_kv_watch_event(
4567 &self,
4568 operation: crate::replication::cdc::ChangeOperation,
4569 collection: &str,
4570 key: &str,
4571 entity_id: u64,
4572 before: Option<crate::json::Value>,
4573 after: Option<crate::json::Value>,
4574 ) {
4575 if self.current_xid().is_some() {
4576 let conn_id = current_connection_id();
4577 let event = crate::replication::cdc::KvWatchEvent {
4578 collection: collection.to_string(),
4579 key: key.to_string(),
4580 op: operation,
4581 before,
4582 after,
4583 lsn: 0,
4584 committed_at: 0,
4585 dropped_event_count: 0,
4586 };
4587 self.inner
4588 .pending_kv_watch_events
4589 .write()
4590 .entry(conn_id)
4591 .or_default()
4592 .push(event);
4593 return;
4594 }
4595
4596 self.cdc_emit_kv(operation, collection, key, entity_id, before, after);
4597 }
4598
4599 pub(crate) fn cdc_emit_prebuilt(
4600 &self,
4601 operation: crate::replication::cdc::ChangeOperation,
4602 collection: &str,
4603 entity: &UnifiedEntity,
4604 entity_kind: &str,
4605 metadata: Option<&crate::storage::Metadata>,
4606 invalidate_cache: bool,
4607 ) -> u64 {
4608 self.cdc_emit_prebuilt_with_columns(
4609 operation,
4610 collection,
4611 entity,
4612 entity_kind,
4613 metadata,
4614 invalidate_cache,
4615 None,
4616 )
4617 }
4618
4619 pub(crate) fn cdc_emit_prebuilt_with_columns(
4626 &self,
4627 operation: crate::replication::cdc::ChangeOperation,
4628 collection: &str,
4629 entity: &UnifiedEntity,
4630 entity_kind: &str,
4631 metadata: Option<&crate::storage::Metadata>,
4632 invalidate_cache: bool,
4633 changed_columns: Option<Vec<String>>,
4634 ) -> u64 {
4635 if invalidate_cache {
4636 self.invalidate_result_cache();
4637 }
4638
4639 let public_id = entity.logical_id().raw();
4640 let lsn = self.inner.cdc.emit_with_columns(
4641 operation,
4642 collection,
4643 public_id,
4644 entity_kind,
4645 changed_columns,
4646 );
4647
4648 if let Some(ref primary) = self.inner.db.replication {
4649 let store = self.inner.db.store();
4650 let record = ChangeRecord {
4651 term: self.current_replication_term(),
4652 lsn,
4653 timestamp: SystemTime::now()
4654 .duration_since(UNIX_EPOCH)
4655 .unwrap_or_default()
4656 .as_millis() as u64,
4657 operation,
4658 collection: collection.to_string(),
4659 entity_id: entity.id.raw(),
4660 entity_kind: entity_kind.to_string(),
4661 entity_bytes: Some(UnifiedStore::serialize_entity(
4662 entity,
4663 store.format_version(),
4664 )),
4665 metadata: metadata
4666 .map(metadata_to_json)
4667 .or_else(|| self.latest_metadata_for(collection, entity.id.raw())),
4668 refresh_records: None,
4669 };
4670 let encoded = record.encode();
4671 primary.append_logical_record(record.lsn, encoded);
4672 }
4673
4674 lsn
4675 }
4676
4677 pub(crate) fn current_replication_term(&self) -> u64 {
4678 self.inner.db.options().replication.term
4679 }
4680
4681 pub(crate) fn cdc_emit_prebuilt_batch<'a, I>(
4682 &self,
4683 operation: crate::replication::cdc::ChangeOperation,
4684 entity_kind: &str,
4685 items: I,
4686 invalidate_cache: bool,
4687 ) where
4688 I: IntoIterator<
4689 Item = (
4690 &'a str,
4691 &'a UnifiedEntity,
4692 Option<&'a crate::storage::Metadata>,
4693 ),
4694 >,
4695 {
4696 let items: Vec<(&str, &UnifiedEntity, Option<&crate::storage::Metadata>)> =
4697 items.into_iter().collect();
4698 if items.is_empty() {
4699 return;
4700 }
4701
4702 if invalidate_cache {
4703 self.invalidate_result_cache();
4704 }
4705
4706 for (collection, entity, metadata) in items {
4707 self.cdc_emit_prebuilt(operation, collection, entity, entity_kind, metadata, false);
4708 }
4709 }
4710
4711 fn run_replica_loop(&self, primary_addr: String) {
4712 let endpoint = if primary_addr.starts_with("http") {
4713 primary_addr
4714 } else {
4715 format!("http://{primary_addr}")
4716 };
4717 let poll_ms = self.inner.db.options().replication.poll_interval_ms;
4718 let max_count = self.inner.db.options().replication.max_batch_size;
4719 let mut since_lsn = self.config_u64("red.replication.last_applied_lsn", 0);
4720 let replica_id = self.resolve_replica_id();
4723
4724 let runtime = match tokio::runtime::Builder::new_current_thread()
4725 .enable_all()
4726 .build()
4727 {
4728 Ok(runtime) => runtime,
4729 Err(_) => return,
4730 };
4731
4732 runtime.block_on(async move {
4733 use crate::grpc::proto::red_db_client::RedDbClient;
4734 use crate::grpc::proto::JsonPayloadRequest;
4735
4736 let mut client = loop {
4737 match RedDbClient::connect(endpoint.clone()).await {
4738 Ok(client) => {
4739 self.persist_replication_health("connecting", "", None, None);
4740 break client;
4741 }
4742 Err(_) => {
4743 self.persist_replication_health(
4744 "connecting",
4745 "waiting for primary connection",
4746 None,
4747 None,
4748 );
4749 std::thread::sleep(std::time::Duration::from_millis(poll_ms.max(250)))
4750 }
4751 }
4752 };
4753
4754 let applier = crate::replication::logical::LogicalChangeApplier::with_metrics(
4759 since_lsn,
4760 self.inner.replica_apply_metrics.clone(),
4761 );
4762
4763 loop {
4764 let payload = crate::json!({
4765 "since_lsn": since_lsn,
4766 "max_count": max_count,
4767 "replica_id": replica_id,
4768 "await_data": true,
4769 "await_timeout_ms": 30_000
4770 });
4771 let request = tonic::Request::new(JsonPayloadRequest {
4772 payload_json: crate::json::to_string(&payload)
4773 .unwrap_or_else(|_| "{}".to_string()),
4774 });
4775
4776 if let Ok(response) = client.pull_wal_records(request).await {
4777 if let Ok(value) =
4778 crate::json::from_str::<crate::json::Value>(&response.into_inner().payload)
4779 {
4780 let current_lsn =
4781 value.get("current_lsn").and_then(crate::json::Value::as_u64);
4782 let oldest_available_lsn = value
4783 .get("oldest_available_lsn")
4784 .and_then(crate::json::Value::as_u64);
4785 if value
4786 .get("needs_rebootstrap")
4787 .and_then(crate::json::Value::as_bool)
4788 .unwrap_or(false)
4789 {
4790 let reason = value
4791 .get("invalidation_reason")
4792 .and_then(crate::json::Value::as_str)
4793 .unwrap_or("unknown");
4794 self.persist_replication_health(
4795 "rebootstrap_required",
4796 &format!("replication slot invalidated ({reason}); re-bootstrap required"),
4797 current_lsn,
4798 oldest_available_lsn,
4799 );
4800 std::thread::sleep(std::time::Duration::from_millis(poll_ms.max(250)));
4801 continue;
4802 }
4803 if since_lsn > 0
4804 && oldest_available_lsn
4805 .map(|oldest| oldest > since_lsn.saturating_add(1))
4806 .unwrap_or(false)
4807 {
4808 self.persist_replication_health(
4809 "rebootstrap_required",
4810 "replica is behind the oldest logical WAL available on primary; re-bootstrap required",
4811 current_lsn,
4812 oldest_available_lsn,
4813 );
4814 std::thread::sleep(std::time::Duration::from_millis(poll_ms.max(250)));
4815 continue;
4816 }
4817 if let Some(records) =
4818 value.get("records").and_then(crate::json::Value::as_array)
4819 {
4820 let mut batch_applied_lsn = None;
4821 let mut ack_failed = false;
4822 for record in records {
4823 let Some(data_hex) =
4824 record.get("data").and_then(crate::json::Value::as_str)
4825 else {
4826 continue;
4827 };
4828 let Ok(data) = hex::decode(data_hex) else {
4829 self.inner.replica_apply_metrics.record(
4830 crate::replication::logical::ApplyErrorKind::Decode,
4831 );
4832 self.persist_replication_health(
4833 "apply_error",
4834 "failed to decode WAL record hex payload",
4835 current_lsn,
4836 oldest_available_lsn,
4837 );
4838 continue;
4839 };
4840 let Ok(change) = ChangeRecord::decode(&data) else {
4841 self.inner.replica_apply_metrics.record(
4842 crate::replication::logical::ApplyErrorKind::Decode,
4843 );
4844 self.persist_replication_health(
4845 "apply_error",
4846 "failed to decode logical WAL record",
4847 current_lsn,
4848 oldest_available_lsn,
4849 );
4850 continue;
4851 };
4852 match applier.apply(
4853 self.inner.db.as_ref(),
4854 &change,
4855 ApplyMode::Replica,
4856 ) {
4857 Ok(crate::replication::logical::ApplyOutcome::Applied) => {
4858 self.invalidate_result_cache_for_table(&change.collection);
4859 since_lsn = since_lsn.max(change.lsn);
4860 self.persist_replica_lsn(since_lsn);
4861 batch_applied_lsn = Some(since_lsn);
4862 }
4863 Ok(_) => {
4864 }
4866 Err(err) => {
4867 self.inner.replica_apply_metrics.record(err.kind());
4868 match &err {
4877 crate::replication::logical::LogicalApplyError::Divergence { lsn, expected: _, got: _, .. } => {
4878 crate::telemetry::operator_event::OperatorEvent::Divergence {
4879 peer: "primary".to_string(),
4880 leader_lsn: *lsn,
4881 follower_lsn: since_lsn,
4882 }
4883 .emit_global();
4884 }
4885 crate::replication::logical::LogicalApplyError::Gap { last, next } => {
4886 crate::telemetry::operator_event::OperatorEvent::ReplicationBroken {
4887 peer: "primary".to_string(),
4888 reason: format!("stalled gap last={last} next={next}"),
4889 }
4890 .emit_global();
4891 }
4892 _ => {}
4893 }
4894 let kind = match &err {
4895 crate::replication::logical::LogicalApplyError::Gap { .. } => "stalled_gap",
4896 crate::replication::logical::LogicalApplyError::Divergence { .. } => "divergence",
4897 _ => "apply_error",
4898 };
4899 self.persist_replication_health(
4900 kind,
4901 &format!("replica apply rejected: {err}"),
4902 current_lsn,
4903 oldest_available_lsn,
4904 );
4905 break;
4916 }
4917 }
4918 }
4919 if let Some(applied_lsn) = batch_applied_lsn {
4920 let apply_errors = self.replica_apply_error_counts();
4921 let apply_errors_total =
4922 apply_errors.iter().map(|(_, count)| *count).sum::<u64>();
4923 let divergence_total = apply_errors
4924 .iter()
4925 .find(|(kind, _)| {
4926 matches!(
4927 kind,
4928 crate::replication::logical::ApplyErrorKind::Divergence
4929 )
4930 })
4931 .map(|(_, count)| *count)
4932 .unwrap_or(0);
4933 let ack_payload = crate::json!({
4934 "replica_id": replica_id.clone(),
4935 "applied_lsn": applied_lsn,
4936 "durable_lsn": applied_lsn,
4937 "apply_errors_total": apply_errors_total,
4938 "divergence_total": divergence_total
4939 });
4940 let ack_request = tonic::Request::new(JsonPayloadRequest {
4941 payload_json: crate::json::to_string(&ack_payload)
4942 .unwrap_or_else(|_| "{}".to_string()),
4943 });
4944 if client.ack_replica_lsn(ack_request).await.is_err() {
4945 ack_failed = true;
4946 self.persist_replication_health(
4947 "ack_error",
4948 "primary ack_replica_lsn request failed",
4949 current_lsn,
4950 oldest_available_lsn,
4951 );
4952 }
4953 }
4954 if ack_failed {
4955 std::thread::sleep(std::time::Duration::from_millis(poll_ms));
4956 continue;
4957 }
4958 }
4959 self.persist_replication_health(
4960 "healthy",
4961 "",
4962 current_lsn,
4963 oldest_available_lsn,
4964 );
4965 } else {
4966 self.persist_replication_health(
4967 "apply_error",
4968 "failed to parse pull_wal_records response",
4969 None,
4970 None,
4971 );
4972 }
4973 } else {
4974 self.persist_replication_health(
4975 "connecting",
4976 "primary pull_wal_records request failed",
4977 None,
4978 None,
4979 );
4980 std::thread::sleep(std::time::Duration::from_millis(poll_ms.max(250)));
4981 }
4982 }
4983 });
4984 }
4985
4986 pub fn cdc_poll(
4988 &self,
4989 since_lsn: u64,
4990 max_count: usize,
4991 ) -> Vec<crate::replication::cdc::ChangeEvent> {
4992 self.inner.cdc.poll(since_lsn, max_count)
4993 }
4994
4995 pub fn cdc_current_lsn(&self) -> u64 {
4999 self.inner.cdc.current_lsn()
5000 }
5001
5002 pub fn kv_watch_events_since(
5003 &self,
5004 collection: &str,
5005 key: &str,
5006 since_lsn: u64,
5007 max_count: usize,
5008 ) -> Vec<crate::replication::cdc::KvWatchEvent> {
5009 self.inner
5010 .cdc
5011 .poll(since_lsn, max_count)
5012 .into_iter()
5013 .filter_map(|event| event.kv)
5014 .filter(|event| event.collection == collection && event.key == key)
5015 .collect()
5016 }
5017
5018 pub fn kv_watch_events_since_prefix(
5019 &self,
5020 collection: &str,
5021 prefix: &str,
5022 since_lsn: u64,
5023 max_count: usize,
5024 ) -> Vec<crate::replication::cdc::KvWatchEvent> {
5025 self.inner
5026 .cdc
5027 .poll(since_lsn, max_count)
5028 .into_iter()
5029 .filter_map(|event| event.kv)
5030 .filter(|event| event.collection == collection && event.key.starts_with(prefix))
5031 .collect()
5032 }
5033
5034 pub(crate) fn kv_watch_subscribe<'a>(
5035 &'a self,
5036 collection: impl Into<String>,
5037 key: impl Into<String>,
5038 from_lsn: Option<u64>,
5039 ) -> crate::runtime::kv_watch::KvWatchStream<'a> {
5040 crate::runtime::kv_watch::KvWatchStream::subscribe(
5041 &self.inner.cdc,
5042 &self.inner.kv_stats,
5043 collection,
5044 key,
5045 from_lsn,
5046 self.kv_watch_idle_timeout_ms(),
5047 )
5048 }
5049
5050 pub(crate) fn kv_watch_subscribe_prefix<'a>(
5051 &'a self,
5052 collection: impl Into<String>,
5053 prefix: impl Into<String>,
5054 from_lsn: Option<u64>,
5055 ) -> crate::runtime::kv_watch::KvWatchStream<'a> {
5056 crate::runtime::kv_watch::KvWatchStream::subscribe_prefix(
5057 &self.inner.cdc,
5058 &self.inner.kv_stats,
5059 collection,
5060 prefix,
5061 from_lsn,
5062 self.kv_watch_idle_timeout_ms(),
5063 )
5064 }
5065
5066 pub(crate) fn kv_watch_idle_timeout_ms(&self) -> u64 {
5067 self.config_u64("red.config.kv.watch.idle_timeout_ms", 60_000)
5068 }
5069
5070 pub fn backup_status(&self) -> crate::replication::scheduler::BackupStatus {
5072 self.inner.backup_scheduler.status()
5073 }
5074
5075 pub fn result_blob_cache(&self) -> &crate::storage::cache::BlobCache {
5085 &self.inner.result_blob_cache
5086 }
5087
5088 pub fn primary_replica_snapshots(&self) -> Vec<crate::replication::primary::ReplicaState> {
5092 self.inner
5093 .db
5094 .replication
5095 .as_ref()
5096 .map(|repl| repl.replica_snapshots())
5097 .unwrap_or_default()
5098 }
5099
5100 pub fn refresh_replication_flow_control(&self) -> bool {
5111 let flow = self.inner.write_gate.flow_control();
5112 if !flow.is_enabled() {
5113 return false;
5114 }
5115 let Some(repl) = self.inner.db.replication.as_ref() else {
5116 return false;
5117 };
5118 let primary_lsn = repl.current_logical_lsn();
5119 let replicas = repl.replica_snapshots();
5120 flow.observe(&replicas, primary_lsn)
5121 }
5122
5123 pub fn commit_policy(&self) -> crate::replication::CommitPolicy {
5128 crate::replication::CommitPolicy::from_env()
5129 }
5130
5131 pub fn replica_apply_error_counts(
5137 &self,
5138 ) -> [(crate::replication::logical::ApplyErrorKind, u64); 5] {
5139 self.inner.replica_apply_metrics.snapshot()
5140 }
5141
5142 pub fn quota_bucket(&self) -> &crate::runtime::quota_bucket::QuotaBucket {
5145 &self.inner.quota_bucket
5146 }
5147
5148 pub fn commit_waiter_snapshot(&self) -> Vec<(String, u64)> {
5152 self.inner
5153 .db
5154 .replication
5155 .as_ref()
5156 .map(|repl| repl.commit_waiter.snapshot())
5157 .unwrap_or_default()
5158 }
5159
5160 pub fn commit_waiter_metrics_snapshot(&self) -> (u64, u64, u64, u64) {
5163 self.inner
5164 .db
5165 .replication
5166 .as_ref()
5167 .map(|repl| repl.commit_waiter.metrics_snapshot())
5168 .unwrap_or((0, 0, 0, 0))
5169 }
5170
5171 pub fn commit_watermark(&self) -> u64 {
5175 match self.commit_policy() {
5176 crate::replication::CommitPolicy::AckN(n) if n > 0 => self
5177 .inner
5178 .db
5179 .replication
5180 .as_ref()
5181 .map(|repl| repl.commit_waiter.commit_watermark(n))
5182 .unwrap_or(0),
5183 crate::replication::CommitPolicy::Quorum => self
5184 .inner
5185 .db
5186 .quorum
5187 .as_ref()
5188 .map(|q| q.commit_watermark())
5189 .unwrap_or(0),
5190 _ => 0,
5191 }
5192 }
5193
5194 pub fn await_replica_acks(
5203 &self,
5204 target_lsn: u64,
5205 count: u32,
5206 timeout: std::time::Duration,
5207 ) -> crate::replication::AwaitOutcome {
5208 match &self.inner.db.replication {
5209 Some(repl) => repl.commit_waiter.await_acks(target_lsn, count, timeout),
5210 None => {
5211 crate::replication::AwaitOutcome::NotRequired
5215 }
5216 }
5217 }
5218
5219 pub fn enforce_commit_policy(
5232 &self,
5233 post_lsn: u64,
5234 ) -> RedDBResult<crate::replication::AwaitOutcome> {
5235 let policy = self.commit_policy();
5236 if matches!(policy, crate::replication::CommitPolicy::Quorum) {
5237 return match self.inner.db.wait_for_replication_quorum(post_lsn) {
5238 Ok(()) => Ok(crate::replication::AwaitOutcome::Reached(0)),
5239 Err(err) => {
5240 tracing::warn!(
5241 target: "reddb::commit",
5242 post_lsn,
5243 error = %err,
5244 "quorum: timed out waiting for commit watermark"
5245 );
5246 let fail = std::env::var("RED_COMMIT_FAIL_ON_TIMEOUT")
5247 .ok()
5248 .map(|v| {
5249 let t = v.trim();
5250 t.eq_ignore_ascii_case("true")
5251 || t == "1"
5252 || t.eq_ignore_ascii_case("yes")
5253 })
5254 .unwrap_or(false);
5255 if fail {
5256 return Err(RedDBError::ReadOnly(format!(
5257 "commit policy timed out at lsn {post_lsn}: {err} (RED_COMMIT_FAIL_ON_TIMEOUT=true)"
5258 )));
5259 }
5260 Ok(crate::replication::AwaitOutcome::TimedOut {
5261 observed: 0,
5262 required: 1,
5263 })
5264 }
5265 };
5266 }
5267
5268 let n = match policy {
5269 crate::replication::CommitPolicy::AckN(n) if n > 0 => n,
5270 _ => return Ok(crate::replication::AwaitOutcome::NotRequired),
5271 };
5272 let timeout_ms = std::env::var("RED_REPLICATION_ACK_TIMEOUT_MS")
5273 .ok()
5274 .and_then(|v| v.parse::<u64>().ok())
5275 .unwrap_or(5_000);
5276 let outcome =
5277 self.await_replica_acks(post_lsn, n, std::time::Duration::from_millis(timeout_ms));
5278 {
5279 use crate::runtime::control_events::{EventKind, Outcome, Sensitivity};
5280 let (event_outcome, fields) = match &outcome {
5281 crate::replication::AwaitOutcome::Reached(count) => (
5282 Outcome::Allowed,
5283 vec![
5284 (
5285 "post_lsn".to_string(),
5286 Sensitivity::raw(post_lsn.to_string()),
5287 ),
5288 ("required".to_string(), Sensitivity::raw(n.to_string())),
5289 ("observed".to_string(), Sensitivity::raw(count.to_string())),
5290 (
5291 "timeout_ms".to_string(),
5292 Sensitivity::raw(timeout_ms.to_string()),
5293 ),
5294 ],
5295 ),
5296 crate::replication::AwaitOutcome::TimedOut { observed, required } => (
5297 Outcome::Error,
5298 vec![
5299 (
5300 "post_lsn".to_string(),
5301 Sensitivity::raw(post_lsn.to_string()),
5302 ),
5303 (
5304 "required".to_string(),
5305 Sensitivity::raw(required.to_string()),
5306 ),
5307 (
5308 "observed".to_string(),
5309 Sensitivity::raw(observed.to_string()),
5310 ),
5311 (
5312 "timeout_ms".to_string(),
5313 Sensitivity::raw(timeout_ms.to_string()),
5314 ),
5315 ],
5316 ),
5317 crate::replication::AwaitOutcome::NotRequired => (Outcome::Allowed, Vec::new()),
5318 };
5319 if !fields.is_empty() {
5320 self.emit_control_event(
5321 EventKind::ReplicationSafety,
5322 event_outcome,
5323 "replication_commit_policy",
5324 Some(format!("replication:lsn:{post_lsn}")),
5325 None,
5326 fields,
5327 )?;
5328 }
5329 }
5330 if let crate::replication::AwaitOutcome::TimedOut { observed, required } = &outcome {
5331 tracing::warn!(
5332 target: "reddb::commit",
5333 post_lsn,
5334 observed = *observed,
5335 required = *required,
5336 timeout_ms,
5337 "ack_n: timed out waiting for replicas"
5338 );
5339 let fail = std::env::var("RED_COMMIT_FAIL_ON_TIMEOUT")
5340 .ok()
5341 .map(|v| {
5342 let t = v.trim();
5343 t.eq_ignore_ascii_case("true") || t == "1" || t.eq_ignore_ascii_case("yes")
5344 })
5345 .unwrap_or(false);
5346 if fail {
5347 return Err(RedDBError::ReadOnly(format!(
5348 "commit policy timed out at lsn {post_lsn}: observed={observed} required={required} (RED_COMMIT_FAIL_ON_TIMEOUT=true)"
5349 )));
5350 }
5351 }
5352 Ok(outcome)
5353 }
5354
5355 pub fn encryption_at_rest_status(&self) -> (&'static str, Option<String>) {
5363 match crate::crypto::page_encryption::key_from_env() {
5364 Ok(Some(_)) => ("enabled", None),
5365 Ok(None) => ("disabled", None),
5366 Err(err) => ("error", Some(err)),
5367 }
5368 }
5369
5370 pub fn replica_apply_health(&self) -> Option<String> {
5376 let state = self.config_string("red.replication.state", "");
5377 if state.is_empty() {
5378 None
5379 } else {
5380 Some(state)
5381 }
5382 }
5383
5384 pub fn wal_archive_progress(&self) -> (u64, u64) {
5389 let current_lsn = self
5390 .inner
5391 .db
5392 .replication
5393 .as_ref()
5394 .map(|repl| {
5395 repl.logical_wal_spool
5396 .as_ref()
5397 .map(|spool| spool.current_lsn())
5398 .unwrap_or_else(|| repl.wal_buffer.current_lsn())
5399 })
5400 .unwrap_or_else(|| self.inner.cdc.current_lsn());
5401 let last_archived_lsn = self.config_u64("red.config.timeline.last_archived_lsn", 0);
5402 (current_lsn, last_archived_lsn)
5403 }
5404
5405 pub fn trigger_backup(&self) -> RedDBResult<crate::replication::scheduler::BackupResult> {
5407 let result = (|| {
5408 self.check_write(crate::runtime::write_gate::WriteKind::Backup)?;
5409 self.assert_remote_write_allowed("admin/backup")?;
5414 let started = std::time::Instant::now();
5415 let snapshot = self.create_snapshot()?;
5416 let mut uploaded = false;
5417
5418 if let (Some(backend), Some(path)) =
5419 (&self.inner.db.remote_backend, self.inner.db.path())
5420 {
5421 let default_snapshot_prefix = self.inner.db.options().default_snapshot_prefix();
5422 let default_wal_prefix = self.inner.db.options().default_wal_archive_prefix();
5423 let default_head_key = self.inner.db.options().default_backup_head_key();
5424 let snapshot_prefix = self.config_string(
5425 "red.config.backup.snapshot_prefix",
5426 &default_snapshot_prefix,
5427 );
5428 let wal_prefix =
5429 self.config_string("red.config.wal.archive.prefix", &default_wal_prefix);
5430 let head_key = self.config_string("red.config.backup.head_key", &default_head_key);
5431 let timeline_id = self.config_string("red.config.timeline.id", "main");
5432 let snapshot_key = crate::storage::wal::archive_snapshot(
5433 backend.as_ref(),
5434 path,
5435 snapshot.snapshot_id,
5436 &snapshot_prefix,
5437 )
5438 .map_err(|err| RedDBError::Internal(err.to_string()))?;
5439 let current_lsn = self
5440 .inner
5441 .db
5442 .replication
5443 .as_ref()
5444 .map(|repl| {
5445 repl.logical_wal_spool
5446 .as_ref()
5447 .map(|spool| spool.current_lsn())
5448 .unwrap_or_else(|| repl.wal_buffer.current_lsn())
5449 })
5450 .unwrap_or_else(|| self.inner.cdc.current_lsn());
5451 let last_archived_lsn = self.config_u64("red.config.timeline.last_archived_lsn", 0);
5452 let snapshot_sha256 =
5458 crate::storage::wal::SnapshotManifest::compute_snapshot_sha256(path)
5459 .map_err(|err| {
5460 tracing::warn!(
5461 target: "reddb::backup",
5462 error = %err,
5463 snapshot_id = snapshot.snapshot_id,
5464 "snapshot hash failed; manifest will lack checksum"
5465 );
5466 })
5467 .ok();
5468 let manifest = crate::storage::wal::SnapshotManifest {
5469 timeline_id: timeline_id.clone(),
5470 snapshot_key: snapshot_key.clone(),
5471 snapshot_id: snapshot.snapshot_id,
5472 snapshot_time: snapshot.created_at_unix_ms as u64,
5473 base_lsn: current_lsn,
5474 schema_version: crate::api::REDDB_FORMAT_VERSION,
5475 format_version: crate::api::REDDB_FORMAT_VERSION,
5476 snapshot_sha256,
5477 };
5478 crate::storage::wal::publish_snapshot_manifest(backend.as_ref(), &manifest)
5479 .map_err(|err| RedDBError::Internal(err.to_string()))?;
5480
5481 let prev_segment_hash =
5488 self.config_string("red.config.timeline.last_segment_hash", "");
5489 let prev_hash_arg = if prev_segment_hash.is_empty() {
5490 None
5491 } else {
5492 Some(prev_segment_hash)
5493 };
5494
5495 let archived_lsn = if let Some(primary) = &self.inner.db.replication {
5496 let oldest = primary
5497 .logical_wal_spool
5498 .as_ref()
5499 .and_then(|spool| spool.oldest_lsn().ok().flatten())
5500 .or_else(|| primary.wal_buffer.oldest_lsn())
5501 .unwrap_or(last_archived_lsn);
5502 if last_archived_lsn > 0 && last_archived_lsn < oldest.saturating_sub(1) {
5503 return Err(RedDBError::Internal(format!(
5504 "logical WAL gap detected: last_archived_lsn={last_archived_lsn}, oldest_available_lsn={oldest}"
5505 )));
5506 }
5507 let records = if let Some(spool) = &primary.logical_wal_spool {
5508 spool
5509 .read_since(last_archived_lsn, usize::MAX)
5510 .map_err(|err| RedDBError::Internal(err.to_string()))?
5511 } else {
5512 primary.wal_buffer.read_since(last_archived_lsn, usize::MAX)
5513 };
5514 if let Some(meta) = crate::storage::wal::archive_change_records(
5515 backend.as_ref(),
5516 &wal_prefix,
5517 &records,
5518 prev_hash_arg,
5519 )
5520 .map_err(|err| RedDBError::Internal(err.to_string()))?
5521 {
5522 let _ = primary.prune_retained_wal_through(meta.lsn_end);
5523 if let Some(sha) = &meta.sha256 {
5529 self.inner.db.store().set_config_tree(
5530 "red.config.timeline",
5531 &crate::json!({ "last_segment_hash": sha }),
5532 );
5533 }
5534 meta.lsn_end
5535 } else {
5536 last_archived_lsn
5537 }
5538 } else {
5539 last_archived_lsn
5540 };
5541
5542 let head = crate::storage::wal::BackupHead {
5543 timeline_id,
5544 snapshot_key,
5545 snapshot_id: snapshot.snapshot_id,
5546 snapshot_time: snapshot.created_at_unix_ms as u64,
5547 current_lsn,
5548 last_archived_lsn: archived_lsn,
5549 wal_prefix,
5550 };
5551 crate::storage::wal::publish_backup_head(backend.as_ref(), &head_key, &head)
5552 .map_err(|err| RedDBError::Internal(err.to_string()))?;
5553 self.inner.db.store().set_config_tree(
5554 "red.config.timeline",
5555 &crate::json!({
5556 "last_archived_lsn": archived_lsn,
5557 "id": head.timeline_id
5558 }),
5559 );
5560
5561 if let Err(err) = crate::storage::wal::publish_unified_manifest_for_prefix(
5569 backend.as_ref(),
5570 &snapshot_prefix,
5571 ) {
5572 tracing::warn!(
5573 target: "reddb::backup",
5574 error = %err,
5575 snapshot_prefix = %snapshot_prefix,
5576 "unified MANIFEST.json refresh failed; per-artifact sidecars unaffected"
5577 );
5578 }
5579
5580 match self.commit_policy() {
5592 crate::replication::CommitPolicy::AckN(n) if n > 0 => {
5593 let timeout = std::env::var("RED_REPLICATION_ACK_TIMEOUT_MS")
5594 .ok()
5595 .and_then(|v| v.parse::<u64>().ok())
5596 .unwrap_or(5_000);
5597 let outcome = self.await_replica_acks(
5598 archived_lsn,
5599 n,
5600 std::time::Duration::from_millis(timeout),
5601 );
5602 match outcome {
5603 crate::replication::AwaitOutcome::Reached(count) => {
5604 tracing::debug!(
5605 target: "reddb::backup",
5606 archived_lsn,
5607 n,
5608 count,
5609 "ack_n: replicas synced before backup return"
5610 );
5611 }
5612 crate::replication::AwaitOutcome::TimedOut { observed, required } => {
5613 tracing::warn!(
5614 target: "reddb::backup",
5615 archived_lsn,
5616 observed,
5617 required,
5618 timeout_ms = timeout,
5619 "ack_n: timed out waiting for replicas; backup uploaded but DR posture degraded"
5620 );
5621 }
5622 crate::replication::AwaitOutcome::NotRequired => {}
5623 }
5624 }
5625 _ => {} }
5627
5628 if self.config_bool("red.config.backup.include_blob_cache", false) {
5640 let blob_cache_prefix = self.config_string(
5641 "red.config.backup.blob_cache_prefix",
5642 &format!("{snapshot_prefix}blob_cache/"),
5643 );
5644 if let Some(l2_path) = self.inner.result_blob_cache.l2_path() {
5645 match crate::storage::cache::archive_blob_cache_l2(
5646 backend.as_ref(),
5647 l2_path,
5648 &blob_cache_prefix,
5649 ) {
5650 Ok(count) => {
5651 tracing::info!(
5652 target: "reddb::backup",
5653 files_uploaded = count,
5654 blob_cache_prefix = %blob_cache_prefix,
5655 "include_blob_cache: archived L2 directory"
5656 );
5657 }
5658 Err(err) => {
5659 tracing::warn!(
5660 target: "reddb::backup",
5661 error = %err,
5662 blob_cache_prefix = %blob_cache_prefix,
5663 "include_blob_cache: L2 archive failed; backup proceeding (cache is derived state)"
5664 );
5665 }
5666 }
5667 } else {
5668 tracing::debug!(
5669 target: "reddb::backup",
5670 "include_blob_cache=true but no L2 path configured; nothing to archive"
5671 );
5672 }
5673 }
5674
5675 uploaded = true;
5676 }
5677
5678 Ok(crate::replication::scheduler::BackupResult {
5679 snapshot_id: snapshot.snapshot_id,
5680 uploaded,
5681 duration_ms: started.elapsed().as_millis() as u64,
5682 timestamp: snapshot.created_at_unix_ms as u64,
5683 })
5684 })();
5685
5686 use crate::runtime::control_events::{EventKind, Outcome, Sensitivity};
5687 let (current_lsn, last_archived_lsn) = self.wal_archive_progress();
5688 let mut fields = vec![
5689 (
5690 "current_lsn".to_string(),
5691 Sensitivity::raw(current_lsn.to_string()),
5692 ),
5693 (
5694 "last_archived_lsn".to_string(),
5695 Sensitivity::raw(last_archived_lsn.to_string()),
5696 ),
5697 ];
5698 if let Ok(backup) = &result {
5699 fields.push((
5700 "snapshot_id".to_string(),
5701 Sensitivity::raw(backup.snapshot_id.to_string()),
5702 ));
5703 fields.push((
5704 "uploaded".to_string(),
5705 Sensitivity::raw(backup.uploaded.to_string()),
5706 ));
5707 fields.push((
5708 "duration_ms".to_string(),
5709 Sensitivity::raw(backup.duration_ms.to_string()),
5710 ));
5711 fields.push((
5712 "snapshot_time".to_string(),
5713 Sensitivity::raw(backup.timestamp.to_string()),
5714 ));
5715 }
5716 let outcome = match &result {
5717 Ok(_) => Outcome::Allowed,
5718 Err(err) => control_event_outcome_for_error(err),
5719 };
5720 let reason = result.as_ref().err().map(|err| err.to_string());
5721 self.emit_control_event(
5722 EventKind::BackupRun,
5723 outcome,
5724 "backup_trigger",
5725 Some("backup:trigger".to_string()),
5726 reason,
5727 fields,
5728 )?;
5729 result
5730 }
5731
5732 pub fn acquire(&self) -> RedDBResult<RuntimeConnection> {
5733 let mut pool = self
5734 .inner
5735 .pool
5736 .lock()
5737 .map_err(|e| RedDBError::Internal(format!("connection pool lock poisoned: {e}")))?;
5738 if pool.active >= self.inner.pool_config.max_connections {
5739 return Err(RedDBError::Internal(
5740 "connection pool exhausted".to_string(),
5741 ));
5742 }
5743
5744 let id = if let Some(id) = pool.idle.pop() {
5745 id
5746 } else {
5747 let id = pool.next_id;
5748 pool.next_id += 1;
5749 id
5750 };
5751 pool.active += 1;
5752 pool.total_checkouts += 1;
5753 drop(pool);
5754
5755 Ok(RuntimeConnection {
5756 id,
5757 inner: Arc::clone(&self.inner),
5758 })
5759 }
5760
5761 pub fn checkpoint(&self) -> RedDBResult<()> {
5762 self.inner.db.flush_local_only().map_err(|err| {
5767 let msg = err.to_string();
5772 crate::telemetry::operator_event::OperatorEvent::CheckpointFailed {
5773 lsn: 0,
5774 error: msg.clone(),
5775 }
5776 .emit_global();
5777 crate::telemetry::operator_event::OperatorEvent::WalFsyncFailed {
5778 path: "<flush_local_only>".to_string(),
5779 error: msg.clone(),
5780 }
5781 .emit_global();
5782 RedDBError::Engine(msg)
5783 })?;
5784 if let Err(err) = self.assert_remote_write_allowed("checkpoint") {
5785 tracing::warn!(
5786 target: "reddb::serverless::lease",
5787 error = %err,
5788 "checkpoint: skipping remote upload — lease not held"
5789 );
5790 return Ok(());
5791 }
5792 self.inner
5793 .db
5794 .upload_to_remote_backend()
5795 .map_err(|err| RedDBError::Engine(err.to_string()))
5796 }
5797
5798 pub(crate) fn assert_remote_write_allowed(&self, action: &str) -> RedDBResult<()> {
5805 if self.inner.db.remote_backend.is_none() {
5806 return Ok(());
5807 }
5808 match self.inner.write_gate.lease_state() {
5809 crate::runtime::write_gate::LeaseGateState::NotHeld => {
5810 self.inner.audit_log.record(
5811 action,
5812 "system",
5813 "remote_backend",
5814 "err: writer lease not held",
5815 crate::json::Value::Null,
5816 );
5817 Err(RedDBError::ReadOnly(format!(
5818 "writer lease not held — {action} blocked (serverless fence)"
5819 )))
5820 }
5821 _ => Ok(()),
5822 }
5823 }
5824
5825 pub fn run_maintenance(&self) -> RedDBResult<()> {
5826 self.inner
5827 .db
5828 .run_maintenance()
5829 .map_err(|err| RedDBError::Internal(err.to_string()))
5830 }
5831
5832 pub fn scan_collection(
5833 &self,
5834 collection: &str,
5835 cursor: Option<ScanCursor>,
5836 limit: usize,
5837 ) -> RedDBResult<ScanPage> {
5838 let store = self.inner.db.store();
5839 let manager = store
5840 .get_collection(collection)
5841 .ok_or_else(|| RedDBError::NotFound(collection.to_string()))?;
5842
5843 let mut entities = manager.query_all(|_| true);
5844 entities.sort_by_key(|entity| entity.id.raw());
5845
5846 let offset = cursor.map(|cursor| cursor.offset).unwrap_or(0);
5847 let total = entities.len();
5848 let end = total.min(offset.saturating_add(limit.max(1)));
5849 let items = if offset >= total {
5850 Vec::new()
5851 } else {
5852 entities[offset..end].to_vec()
5853 };
5854 let next = (end < total).then_some(ScanCursor { offset: end });
5855
5856 Ok(ScanPage {
5857 collection: collection.to_string(),
5858 items,
5859 next,
5860 total,
5861 })
5862 }
5863
5864 pub fn catalog(&self) -> CatalogModelSnapshot {
5865 self.inner.db.catalog_model_snapshot()
5866 }
5867
5868 pub fn catalog_consistency_report(&self) -> crate::catalog::CatalogConsistencyReport {
5869 self.inner.db.catalog_consistency_report()
5870 }
5871
5872 pub fn catalog_attention_summary(&self) -> CatalogAttentionSummary {
5873 crate::catalog::attention_summary(&self.catalog())
5874 }
5875
5876 pub fn collection_attention(&self) -> Vec<CollectionDescriptor> {
5877 crate::catalog::collection_attention(&self.catalog())
5878 }
5879
5880 pub fn index_attention(&self) -> Vec<CatalogIndexStatus> {
5881 crate::catalog::index_attention(&self.catalog())
5882 }
5883
5884 pub fn graph_projection_attention(&self) -> Vec<CatalogGraphProjectionStatus> {
5885 crate::catalog::graph_projection_attention(&self.catalog())
5886 }
5887
5888 pub fn analytics_job_attention(&self) -> Vec<CatalogAnalyticsJobStatus> {
5889 crate::catalog::analytics_job_attention(&self.catalog())
5890 }
5891
5892 pub fn stats(&self) -> RuntimeStats {
5893 let pool = runtime_pool_lock(self);
5894 RuntimeStats {
5895 active_connections: pool.active,
5896 idle_connections: pool.idle.len(),
5897 total_checkouts: pool.total_checkouts,
5898 paged_mode: self.inner.db.is_paged(),
5899 started_at_unix_ms: self.inner.started_at_unix_ms,
5900 store: self.inner.db.stats(),
5901 system: SystemInfo::collect(),
5902 result_blob_cache: self.inner.result_blob_cache.stats(),
5903 kv: self.inner.kv_stats.snapshot(),
5904 metrics_ingest: self.inner.metrics_ingest_stats.snapshot(),
5905 }
5906 }
5907
5908 pub(crate) fn record_metrics_ingest(
5909 &self,
5910 accepted_samples: u64,
5911 accepted_series: u64,
5912 rejected_samples: u64,
5913 rejected_series: u64,
5914 ) {
5915 self.inner.metrics_ingest_stats.record(
5916 accepted_samples,
5917 accepted_series,
5918 rejected_samples,
5919 rejected_series,
5920 );
5921 }
5922
5923 pub(crate) fn record_metrics_cardinality_budget_rejections(&self, rejected_series: u64) {
5924 self.inner
5925 .metrics_ingest_stats
5926 .record_cardinality_budget_rejections(rejected_series);
5927 }
5928
5929 pub(crate) fn record_metrics_tenant_activity(
5930 &self,
5931 tenant: &str,
5932 namespace: &str,
5933 operation: &str,
5934 ) {
5935 self.inner
5936 .metrics_tenant_activity_stats
5937 .record(tenant, namespace, operation);
5938 }
5939
5940 pub(crate) fn metrics_tenant_activity_snapshot(
5941 &self,
5942 ) -> Vec<crate::runtime::MetricsTenantActivityStats> {
5943 self.inner.metrics_tenant_activity_stats.snapshot()
5944 }
5945
5946 pub fn execute_query_with_scope(
5960 &self,
5961 query: &str,
5962 scope: crate::runtime::within_clause::ScopeOverride,
5963 ) -> RedDBResult<RuntimeQueryResult> {
5964 if scope.is_empty() {
5965 return self.execute_query(query);
5966 }
5967 let _scope_guard = ScopeOverrideGuard::install(scope);
5968 self.execute_query(query)
5969 }
5970
5971 pub fn execute_query(&self, query: &str) -> RedDBResult<RuntimeQueryResult> {
5980 let started = std::time::Instant::now();
5981 let mut result = self.execute_query_inner(query);
5982 if let Ok(ref mut query_result) = result {
5987 if query_result.statement_type == "select" {
5988 self.filter_integrity_tombstoned(&mut query_result.result);
5989 }
5990 }
5991 let elapsed_ms = started.elapsed().as_millis() as u64;
5992
5993 let scope = self.ai_scope();
5998 let kind = match result
5999 .as_ref()
6000 .map(|r| r.statement_type)
6001 .unwrap_or("select")
6002 {
6003 "select" => crate::telemetry::slow_query_logger::QueryKind::Select,
6004 "insert" => crate::telemetry::slow_query_logger::QueryKind::Insert,
6005 "update" => crate::telemetry::slow_query_logger::QueryKind::Update,
6006 "delete" => crate::telemetry::slow_query_logger::QueryKind::Delete,
6007 _ => crate::telemetry::slow_query_logger::QueryKind::Internal,
6008 };
6009 self.inner
6015 .slow_query_logger
6016 .record(kind, elapsed_ms, query.to_string(), &scope);
6017
6018 if let Ok(ref mut query_result) = result {
6019 if matches!(query_result.statement_type, "insert" | "update" | "delete") {
6020 let bookmark = crate::replication::CausalBookmark::new(
6021 self.current_replication_term(),
6022 self.cdc_current_lsn(),
6023 );
6024 query_result.bookmark = Some(bookmark.encode());
6025 }
6026 }
6027
6028 result
6029 }
6030
6031 pub fn causal_session(&self) -> crate::runtime::CausalSession {
6032 crate::runtime::CausalSession {
6033 runtime: self.clone(),
6034 bookmark: None,
6035 wait_timeout: std::time::Duration::from_secs(5),
6036 }
6037 }
6038
6039 pub fn wait_for_bookmark(
6040 &self,
6041 bookmark: &crate::replication::CausalBookmark,
6042 timeout: std::time::Duration,
6043 ) -> RedDBResult<()> {
6044 let deadline = std::time::Instant::now() + timeout;
6045 loop {
6046 let applied_lsn = self.local_contiguous_applied_lsn();
6047 if applied_lsn >= bookmark.commit_lsn() {
6048 return Ok(());
6049 }
6050 let now = std::time::Instant::now();
6051 if now >= deadline {
6052 return Err(RedDBError::InvalidOperation(format!(
6053 "timed out waiting for causal bookmark lsn {}; applied={}",
6054 bookmark.commit_lsn(),
6055 applied_lsn
6056 )));
6057 }
6058 let remaining = deadline.saturating_duration_since(now);
6059 std::thread::sleep(remaining.min(std::time::Duration::from_millis(5)));
6060 }
6061 }
6062
6063 fn local_contiguous_applied_lsn(&self) -> u64 {
6064 match self.inner.db.options().replication.role {
6065 crate::replication::ReplicationRole::Replica { .. } => {
6066 self.config_u64("red.replication.last_applied_lsn", 0)
6067 }
6068 _ => self.cdc_current_lsn(),
6069 }
6070 }
6071
6072 #[inline(never)]
6073 fn execute_query_inner(&self, query: &str) -> RedDBResult<RuntimeQueryResult> {
6074 if !has_scope_override_active()
6085 && !query.trim_start().starts_with("WITHIN")
6086 && !query.trim_start().starts_with("within")
6087 && !self.inner.query_audit.has_rules()
6088 && !self
6089 .inner
6090 .tx_contexts
6091 .read()
6092 .contains_key(¤t_connection_id())
6093 {
6094 if let Some(result) = self.try_fast_entity_lookup(query) {
6095 return result;
6096 }
6097 }
6098
6099 match crate::runtime::within_clause::try_strip_within_prefix(query) {
6106 Ok(Some((scope, inner))) => {
6107 let _scope_guard = ScopeOverrideGuard::install(scope);
6108 return self.execute_query_inner(inner);
6113 }
6114 Ok(None) => {}
6115 Err(msg) => return Err(RedDBError::Query(msg)),
6116 }
6117
6118 if let Some(inner) = strip_explain_prefix(query) {
6125 return self.explain_as_rows(query, inner);
6126 }
6127
6128 if let Some(value) = parse_set_local_tenant(query)? {
6133 let conn_id = current_connection_id();
6134 if !self.inner.tx_contexts.read().contains_key(&conn_id) {
6135 return Err(RedDBError::Query(
6136 "SET LOCAL TENANT requires an active transaction".to_string(),
6137 ));
6138 }
6139 self.inner
6140 .tx_local_tenants
6141 .write()
6142 .insert(conn_id, value.clone());
6143 return Ok(RuntimeQueryResult::ok_message(
6144 query.to_string(),
6145 &match &value {
6146 Some(id) => format!("local tenant set: {id}"),
6147 None => "local tenant cleared".to_string(),
6148 },
6149 "set_local_tenant",
6150 ));
6151 }
6152
6153 if super::red_schema::is_system_schema_write(query) {
6154 return Err(RedDBError::Query(
6155 super::red_schema::READ_ONLY_ERROR.to_string(),
6156 ));
6157 }
6158
6159 if let Some(create_source) = super::analytics_source_catalog::parse_create_statement(query)?
6160 {
6161 return self.execute_create_analytics_source(query, create_source);
6162 }
6163
6164 if let Some(path) = super::metric_descriptor_catalog::parse_read_metric_statement(query) {
6170 return Err(super::metric_descriptor_catalog::read_output_unsupported(
6171 &path,
6172 ));
6173 }
6174
6175 let rewritten_query = super::red_schema::rewrite_virtual_names(query);
6176 let execution_query = rewritten_query.as_deref().unwrap_or(query);
6177
6178 let frame = super::statement_frame::StatementExecutionFrame::build(self, execution_query)?;
6179 let _frame_guards = frame.install(self);
6180
6181 let _log_span = crate::telemetry::span::query_span(query).entered();
6188
6189 if let Some(rewritten) = frame.prepare_cte(execution_query)? {
6191 return self.execute_query_expr(rewritten);
6192 }
6193
6194 if !self.inner.query_audit.has_rules() {
6196 if let Some(result) = self.try_fast_entity_lookup(execution_query) {
6197 return result;
6198 }
6199 }
6200
6201 if !self.inner.query_audit.has_rules() {
6203 if let Some(result) = frame.read_result_cache(self) {
6204 return Ok(result);
6205 }
6206 }
6207
6208 let prepared = frame.prepare_statement(self, execution_query)?;
6209 let mode = prepared.mode;
6210 let expr = prepared.expr;
6211
6212 let statement = query_expr_name(&expr);
6213 let result_cache_scopes = query_expr_result_cache_scopes(&expr);
6214 let control_event_specs = query_control_event_specs(&expr);
6215 let query_audit_plan = query_audit_plan(&expr);
6216
6217 let _lock_guard = match frame.prepare_dispatch(self, &expr) {
6218 Ok(guard) => guard,
6219 Err(err) => {
6220 let outcome = control_event_outcome_for_error(&err);
6221 for spec in &control_event_specs {
6222 self.emit_control_event(
6223 spec.kind,
6224 outcome,
6225 spec.action,
6226 spec.resource.clone(),
6227 Some(err.to_string()),
6228 spec.fields.clone(),
6229 )?;
6230 }
6231 return Err(err);
6232 }
6233 };
6234 let frame_iface: &dyn super::statement_frame::ReadFrame = &frame;
6235 let query_audit_started = std::time::Instant::now();
6236
6237 let query_result = match expr {
6238 QueryExpr::Graph(_) | QueryExpr::Path(_) => {
6239 let (graph, node_properties, edge_properties) =
6247 self.materialize_graph_with_rls()?;
6248 let result =
6249 crate::storage::query::unified::UnifiedExecutor::execute_on_with_graph_properties(
6250 &graph,
6251 &expr,
6252 node_properties,
6253 edge_properties,
6254 )
6255 .map_err(|err| RedDBError::Query(err.to_string()))?;
6256
6257 Ok(RuntimeQueryResult {
6258 query: query.to_string(),
6259 mode,
6260 statement,
6261 engine: "materialized-graph",
6262 result,
6263 affected_rows: 0,
6264 statement_type: "select",
6265 bookmark: None,
6266 })
6267 }
6268 QueryExpr::Table(table) => {
6269 let table = self.resolve_table_expr_subqueries(
6270 table,
6271 &frame as &dyn super::statement_frame::ReadFrame,
6272 )?;
6273 if let Some(TableSource::Function {
6277 name,
6278 args,
6279 named_args,
6280 }) = table.source.clone()
6281 {
6282 let tvf_result = RuntimeQueryResult {
6290 query: query.to_string(),
6291 mode,
6292 statement,
6293 engine: "runtime-graph-tvf",
6294 result: self.execute_table_function(&name, &args, &named_args)?,
6295 affected_rows: 0,
6296 statement_type: "select",
6297 bookmark: None,
6298 };
6299 frame.write_result_cache(self, &tvf_result, result_cache_scopes.clone());
6300 return Ok(tvf_result);
6301 }
6302 if let Some(TableSource::InlineGraphFunction {
6310 name,
6311 nodes,
6312 edges,
6313 named_args,
6314 }) = table.source.clone()
6315 {
6316 let inline_result = RuntimeQueryResult {
6317 query: query.to_string(),
6318 mode,
6319 statement,
6320 engine: "runtime-graph-tvf-inline",
6321 result: self.execute_inline_graph_function(
6322 &name,
6323 &nodes,
6324 &edges,
6325 &named_args,
6326 )?,
6327 affected_rows: 0,
6328 statement_type: "select",
6329 bookmark: None,
6330 };
6331 frame.write_result_cache(self, &inline_result, result_cache_scopes);
6332 return Ok(inline_result);
6333 }
6334 if super::red_schema::is_virtual_table(&table.table) {
6335 return Ok(RuntimeQueryResult {
6336 query: query.to_string(),
6337 mode,
6338 statement,
6339 engine: "runtime-red-schema",
6340 result: super::red_schema::red_query(
6341 self,
6342 &table.table,
6343 &table,
6344 &frame as &dyn super::statement_frame::ReadFrame,
6345 )?,
6346 affected_rows: 0,
6347 statement_type: "select",
6348 bookmark: None,
6349 });
6350 }
6351
6352 if let Some(view_result) = self.try_resolve_analytics_view(
6356 &table,
6357 &frame as &dyn super::statement_frame::ReadFrame,
6358 )? {
6359 return Ok(RuntimeQueryResult {
6360 query: query.to_string(),
6361 mode,
6362 statement,
6363 engine: "runtime-graph-analytics-view",
6364 result: view_result,
6365 affected_rows: 0,
6366 statement_type: "select",
6367 bookmark: None,
6368 });
6369 }
6370
6371 if let Some(result) = self.execute_probabilistic_select(&table)? {
6372 return Ok(RuntimeQueryResult {
6373 query: query.to_string(),
6374 mode,
6375 statement,
6376 engine: "runtime-probabilistic",
6377 result,
6378 affected_rows: 0,
6379 statement_type: "select",
6380 bookmark: None,
6381 });
6382 }
6383
6384 if self.inner.foreign_tables.is_foreign_table(&table.table) {
6392 let records = self
6393 .inner
6394 .foreign_tables
6395 .scan(&table.table)
6396 .map_err(|e| RedDBError::Internal(e.to_string()))?;
6397 let result = apply_foreign_table_filters(records, &table);
6398 return Ok(RuntimeQueryResult {
6399 query: query.to_string(),
6400 mode,
6401 statement,
6402 engine: "runtime-fdw",
6403 result,
6404 affected_rows: 0,
6405 statement_type: "select",
6406 bookmark: None,
6407 });
6408 }
6409
6410 let Some(table_with_rls) = self.authorize_relational_table_select(
6427 table,
6428 &frame as &dyn super::statement_frame::ReadFrame,
6429 )?
6430 else {
6431 let empty = crate::storage::query::unified::UnifiedResult::empty();
6432 return Ok(RuntimeQueryResult {
6433 query: query.to_string(),
6434 mode,
6435 statement,
6436 engine: "runtime-table-rls",
6437 result: empty,
6438 affected_rows: 0,
6439 statement_type: "select",
6440 bookmark: None,
6441 });
6442 };
6443 Ok(RuntimeQueryResult {
6444 query: query.to_string(),
6445 mode,
6446 statement,
6447 engine: "runtime-table",
6448 result: execute_runtime_table_query_in(
6455 &self.inner.db,
6456 &table_with_rls,
6457 Some(&self.inner.index_store),
6458 Some(frame.row_arena()),
6459 )?,
6460 affected_rows: 0,
6461 statement_type: "select",
6462 bookmark: None,
6463 })
6464 }
6465 QueryExpr::Join(join) => {
6466 let join_with_rls = match self.authorize_relational_join_select(
6475 join,
6476 &frame as &dyn super::statement_frame::ReadFrame,
6477 )? {
6478 Some(j) => j,
6479 None => {
6480 return Ok(RuntimeQueryResult {
6481 query: query.to_string(),
6482 mode,
6483 statement,
6484 engine: "runtime-join-rls",
6485 result: crate::storage::query::unified::UnifiedResult::empty(),
6486 affected_rows: 0,
6487 statement_type: "select",
6488 bookmark: None,
6489 });
6490 }
6491 };
6492 Ok(RuntimeQueryResult {
6493 query: query.to_string(),
6494 mode,
6495 statement,
6496 engine: "runtime-join",
6497 result: execute_runtime_join_query(&self.inner.db, &join_with_rls)?,
6498 affected_rows: 0,
6499 statement_type: "select",
6500 bookmark: None,
6501 })
6502 }
6503 QueryExpr::Vector(vector) => Ok(RuntimeQueryResult {
6504 query: query.to_string(),
6505 mode,
6506 statement,
6507 engine: "runtime-vector",
6508 result: execute_runtime_vector_query(&self.inner.db, &vector)?,
6509 affected_rows: 0,
6510 statement_type: "select",
6511 bookmark: None,
6512 }),
6513 QueryExpr::Hybrid(hybrid) => Ok(RuntimeQueryResult {
6514 query: query.to_string(),
6515 mode,
6516 statement,
6517 engine: "runtime-hybrid",
6518 result: execute_runtime_hybrid_query(&self.inner.db, &hybrid)?,
6519 affected_rows: 0,
6520 statement_type: "select",
6521 bookmark: None,
6522 }),
6523 QueryExpr::Insert(ref insert) if super::red_schema::is_virtual_table(&insert.table) => {
6525 Err(RedDBError::Query(
6526 super::red_schema::READ_ONLY_ERROR.to_string(),
6527 ))
6528 }
6529 QueryExpr::Update(ref update) if super::red_schema::is_virtual_table(&update.table) => {
6530 Err(RedDBError::Query(
6531 super::red_schema::READ_ONLY_ERROR.to_string(),
6532 ))
6533 }
6534 QueryExpr::Delete(ref delete) if super::red_schema::is_virtual_table(&delete.table) => {
6535 Err(RedDBError::Query(
6536 super::red_schema::READ_ONLY_ERROR.to_string(),
6537 ))
6538 }
6539 QueryExpr::Insert(ref insert) => self
6540 .with_deferred_store_wal_for_dml(self.insert_may_emit_events(insert), || {
6541 self.execute_insert(query, insert)
6542 }),
6543 QueryExpr::Update(ref update) => self
6544 .with_deferred_store_wal_for_dml(self.update_may_emit_events(update), || {
6545 self.execute_update(query, update)
6546 }),
6547 QueryExpr::Delete(ref delete) => self
6548 .with_deferred_store_wal_for_dml(self.delete_may_emit_events(delete), || {
6549 self.execute_delete(query, delete)
6550 }),
6551 QueryExpr::CreateTable(ref create) => self.execute_create_table(query, create),
6553 QueryExpr::CreateCollection(ref create) => {
6554 self.execute_create_collection(query, create)
6555 }
6556 QueryExpr::CreateVector(ref create) => self.execute_create_vector(query, create),
6557 QueryExpr::DropTable(ref drop_tbl) => self.execute_drop_table(query, drop_tbl),
6558 QueryExpr::DropGraph(ref drop_graph) => self.execute_drop_graph(query, drop_graph),
6559 QueryExpr::DropVector(ref drop_vector) => self.execute_drop_vector(query, drop_vector),
6560 QueryExpr::DropDocument(ref drop_document) => {
6561 self.execute_drop_document(query, drop_document)
6562 }
6563 QueryExpr::DropKv(ref drop_kv) => self.execute_drop_kv(query, drop_kv),
6564 QueryExpr::DropCollection(ref drop_collection) => {
6565 self.execute_drop_collection(query, drop_collection)
6566 }
6567 QueryExpr::Truncate(ref truncate) => self.execute_truncate(query, truncate),
6568 QueryExpr::AlterTable(ref alter) => self.execute_alter_table(query, alter),
6569 QueryExpr::ExplainAlter(ref explain) => self.execute_explain_alter(query, explain),
6570 QueryExpr::GraphCommand(ref cmd) => self.execute_graph_command(query, cmd),
6572 QueryExpr::SearchCommand(ref cmd) => self.execute_search_command(query, cmd),
6574 QueryExpr::Ask(ref ask) => self.execute_ask(query, ask),
6576 QueryExpr::CreateIndex(ref create_idx) => self.execute_create_index(query, create_idx),
6577 QueryExpr::DropIndex(ref drop_idx) => self.execute_drop_index(query, drop_idx),
6578 QueryExpr::ProbabilisticCommand(ref cmd) => {
6579 self.execute_probabilistic_command(query, cmd)
6580 }
6581 QueryExpr::CreateTimeSeries(ref ts) => self.execute_create_timeseries(query, ts),
6583 QueryExpr::CreateMetric(ref metric) => self.execute_create_metric(query, metric),
6584 QueryExpr::AlterMetric(ref alter) => self.execute_alter_metric(query, alter),
6585 QueryExpr::CreateSlo(ref slo) => self.execute_create_slo(query, slo),
6586 QueryExpr::DropTimeSeries(ref ts) => self.execute_drop_timeseries(query, ts),
6587 QueryExpr::CreateQueue(ref q) => self.execute_create_queue(query, q),
6589 QueryExpr::AlterQueue(ref q) => self.execute_alter_queue(query, q),
6590 QueryExpr::DropQueue(ref q) => self.execute_drop_queue(query, q),
6591 QueryExpr::QueueSelect(ref q) => self.execute_queue_select(query, q),
6592 QueryExpr::QueueCommand(ref cmd) => self.execute_queue_command(query, cmd),
6593 QueryExpr::EventsBackfill(ref backfill) => {
6594 self.execute_events_backfill(query, backfill)
6595 }
6596 QueryExpr::EventsBackfillStatus { ref collection } => Err(RedDBError::Query(format!(
6597 "EVENTS BACKFILL STATUS for '{collection}' is not implemented in this slice"
6598 ))),
6599 QueryExpr::KvCommand(ref cmd) => self.execute_kv_command(query, cmd),
6600 QueryExpr::ConfigCommand(ref cmd) => self.execute_config_command(query, cmd),
6601 QueryExpr::CreateTree(ref tree) => self.execute_create_tree(query, tree),
6602 QueryExpr::DropTree(ref tree) => self.execute_drop_tree(query, tree),
6603 QueryExpr::TreeCommand(ref cmd) => self.execute_tree_command(query, cmd),
6604 QueryExpr::SetConfig { ref key, ref value } => {
6606 if key.starts_with("red.secret.") {
6607 return Err(RedDBError::Query(
6608 "red.secret.* is reserved for vault secrets; use SET SECRET".to_string(),
6609 ));
6610 }
6611 match self.check_managed_config_write_for_set_config(key) {
6612 Err(err) => Err(err),
6613 Ok(()) => {
6614 let store = self.inner.db.store();
6615 let json_val = match value {
6616 Value::Text(s) => crate::serde_json::Value::String(s.to_string()),
6617 Value::Integer(n) => crate::serde_json::Value::Number(*n as f64),
6618 Value::Float(n) => crate::serde_json::Value::Number(*n),
6619 Value::Boolean(b) => crate::serde_json::Value::Bool(*b),
6620 _ => crate::serde_json::Value::String(value.to_string()),
6621 };
6622 store.set_config_tree(key, &json_val);
6623 update_current_config_value(key, value.clone());
6624 self.invalidate_result_cache();
6629 Ok(RuntimeQueryResult::ok_message(
6630 query.to_string(),
6631 &format!("config set: {key}"),
6632 "set",
6633 ))
6634 }
6635 }
6636 }
6637 QueryExpr::SetSecret { ref key, ref value } => {
6639 if key.starts_with("red.config.") {
6640 return Err(RedDBError::Query(
6641 "red.config.* is reserved for config; use SET CONFIG".to_string(),
6642 ));
6643 }
6644 let auth_store = self.inner.auth_store.read().clone().ok_or_else(|| {
6645 RedDBError::Query("SET SECRET requires an enabled, unsealed vault".to_string())
6646 })?;
6647 if matches!(value, Value::Null) {
6648 auth_store
6649 .vault_kv_try_delete(key)
6650 .map_err(|err| RedDBError::Query(err.to_string()))?;
6651 update_current_secret_value(key, None);
6652 self.invalidate_result_cache();
6653 return Ok(RuntimeQueryResult::ok_message(
6654 query.to_string(),
6655 &format!("secret deleted: {key}"),
6656 "delete_secret",
6657 ));
6658 }
6659 let value = secret_sql_value_to_string(value)?;
6660 auth_store
6661 .vault_kv_try_set(key.clone(), value.clone())
6662 .map_err(|err| RedDBError::Query(err.to_string()))?;
6663 update_current_secret_value(key, Some(value));
6664 self.invalidate_result_cache();
6665 Ok(RuntimeQueryResult::ok_message(
6666 query.to_string(),
6667 &format!("secret set: {key}"),
6668 "set_secret",
6669 ))
6670 }
6671 QueryExpr::DeleteSecret { ref key } => {
6673 let auth_store = self.inner.auth_store.read().clone().ok_or_else(|| {
6674 RedDBError::Query(
6675 "DELETE SECRET requires an enabled, unsealed vault".to_string(),
6676 )
6677 })?;
6678 let deleted = auth_store
6679 .vault_kv_try_delete(key)
6680 .map_err(|err| RedDBError::Query(err.to_string()))?;
6681 if deleted {
6682 update_current_secret_value(key, None);
6683 }
6684 self.invalidate_result_cache();
6685 Ok(RuntimeQueryResult::ok_message(
6686 query.to_string(),
6687 &format!("secret deleted: {key}"),
6688 if deleted {
6689 "delete_secret"
6690 } else {
6691 "delete_secret_not_found"
6692 },
6693 ))
6694 }
6695 QueryExpr::ShowSecrets { ref prefix } => {
6697 let auth_store = self.inner.auth_store.read().clone().ok_or_else(|| {
6698 RedDBError::Query("SHOW SECRET requires an enabled, unsealed vault".to_string())
6699 })?;
6700 if !auth_store.is_vault_backed() {
6701 return Err(RedDBError::Query(
6702 "SHOW SECRET requires an enabled, unsealed vault".to_string(),
6703 ));
6704 }
6705 let mut keys = auth_store.vault_kv_keys();
6706 keys.sort();
6707 let mut result = UnifiedResult::with_columns(vec![
6708 "key".into(),
6709 "value".into(),
6710 "status".into(),
6711 ]);
6712 for key in keys {
6713 if let Some(ref pfx) = prefix {
6714 if !key.starts_with(pfx) {
6715 continue;
6716 }
6717 }
6718 let mut record = UnifiedRecord::new();
6719 record.set("key", Value::text(key));
6720 record.set("value", Value::text("***"));
6721 record.set("status", Value::text("active"));
6722 result.push(record);
6723 }
6724 Ok(RuntimeQueryResult {
6725 query: query.to_string(),
6726 mode,
6727 statement: "show_secrets",
6728 engine: "runtime-secret",
6729 result,
6730 affected_rows: 0,
6731 statement_type: "select",
6732 bookmark: None,
6733 })
6734 }
6735 QueryExpr::ShowConfig { ref prefix } => {
6737 let store = self.inner.db.store();
6738 let all_collections = store.list_collections();
6739 if !all_collections.contains(&"red_config".to_string()) {
6740 let result = UnifiedResult::with_columns(vec!["key".into(), "value".into()]);
6741 return Ok(RuntimeQueryResult {
6742 query: query.to_string(),
6743 mode,
6744 statement: "show_config",
6745 engine: "runtime-config",
6746 result,
6747 affected_rows: 0,
6748 statement_type: "select",
6749 bookmark: None,
6750 });
6751 }
6752 let manager = store
6753 .get_collection("red_config")
6754 .ok_or_else(|| RedDBError::NotFound("red_config".to_string()))?;
6755 let entities = manager.query_all(|_| true);
6756 let mut latest = std::collections::BTreeMap::<String, (u64, Value, Value)>::new();
6757 for entity in entities {
6758 if let EntityData::Row(ref row) = entity.data {
6759 if let Some(ref named) = row.named {
6760 let key_val = named.get("key").cloned().unwrap_or(Value::Null);
6761 let val = named.get("value").cloned().unwrap_or(Value::Null);
6762 let key_str = match &key_val {
6763 Value::Text(s) => s.as_ref(),
6764 _ => continue,
6765 };
6766 if let Some(ref pfx) = prefix {
6767 if !key_str.starts_with(pfx.as_str()) {
6768 continue;
6769 }
6770 }
6771 let entity_id = entity.id.raw();
6772 match latest.get(key_str) {
6773 Some((prev_id, _, _)) if *prev_id > entity_id => {}
6774 _ => {
6775 latest.insert(key_str.to_string(), (entity_id, key_val, val));
6776 }
6777 }
6778 }
6779 }
6780 }
6781 let mut result = UnifiedResult::with_columns(vec!["key".into(), "value".into()]);
6782 for (_, key_val, val) in latest.into_values() {
6783 let mut record = UnifiedRecord::new();
6784 record.set("key", key_val);
6785 record.set("value", val);
6786 result.push(record);
6787 }
6788 Ok(RuntimeQueryResult {
6789 query: query.to_string(),
6790 mode,
6791 statement: "show_config",
6792 engine: "runtime-config",
6793 result,
6794 affected_rows: 0,
6795 statement_type: "select",
6796 bookmark: None,
6797 })
6798 }
6799 QueryExpr::SetTenant(ref value) => {
6805 match value {
6806 Some(id) => set_current_tenant(id.clone()),
6807 None => clear_current_tenant(),
6808 }
6809 Ok(RuntimeQueryResult::ok_message(
6810 query.to_string(),
6811 &match value {
6812 Some(id) => format!("tenant set: {id}"),
6813 None => "tenant cleared".to_string(),
6814 },
6815 "set_tenant",
6816 ))
6817 }
6818 QueryExpr::ShowTenant => {
6819 let mut result = UnifiedResult::with_columns(vec!["tenant".into()]);
6820 let mut record = UnifiedRecord::new();
6821 record.set(
6822 "tenant",
6823 current_tenant().map(Value::text).unwrap_or(Value::Null),
6824 );
6825 result.push(record);
6826 Ok(RuntimeQueryResult {
6827 query: query.to_string(),
6828 mode,
6829 statement: "show_tenant",
6830 engine: "runtime-tenant",
6831 result,
6832 affected_rows: 0,
6833 statement_type: "select",
6834 bookmark: None,
6835 })
6836 }
6837 QueryExpr::TransactionControl(ref ctl) => {
6849 use crate::storage::query::ast::TxnControl;
6850 use crate::storage::transaction::snapshot::{TxnContext, Xid};
6851 use crate::storage::transaction::IsolationLevel;
6852
6853 let conn_id = current_connection_id();
6858
6859 let (kind, msg) = match ctl {
6860 TxnControl::Begin => {
6861 let mgr = Arc::clone(&self.inner.snapshot_manager);
6862 let xid = mgr.begin();
6863 let snapshot = mgr.snapshot(xid);
6864 let ctx = TxnContext {
6865 xid,
6866 isolation: IsolationLevel::SnapshotIsolation,
6867 snapshot,
6868 savepoints: Vec::new(),
6869 released_sub_xids: Vec::new(),
6870 };
6871 self.inner.tx_contexts.write().insert(conn_id, ctx);
6872 ("begin", format!("BEGIN — xid={xid} (snapshot isolation)"))
6873 }
6874 TxnControl::Commit => {
6875 self.inner.tx_local_tenants.write().remove(&conn_id);
6877 let ctx = self.inner.tx_contexts.write().remove(&conn_id);
6878 match ctx {
6879 Some(ctx) => {
6880 let mut own_xids = std::collections::HashSet::new();
6881 own_xids.insert(ctx.xid);
6882 for (_, sub) in &ctx.savepoints {
6883 own_xids.insert(*sub);
6884 }
6885 for sub in &ctx.released_sub_xids {
6886 own_xids.insert(*sub);
6887 }
6888 if let Err(err) = self.check_table_row_write_conflicts(
6889 conn_id,
6890 &ctx.snapshot,
6891 &own_xids,
6892 ) {
6893 for (_, sub) in &ctx.savepoints {
6894 self.inner.snapshot_manager.rollback(*sub);
6895 }
6896 for sub in &ctx.released_sub_xids {
6897 self.inner.snapshot_manager.rollback(*sub);
6898 }
6899 self.inner.snapshot_manager.rollback(ctx.xid);
6900 self.revive_pending_versioned_updates(conn_id);
6901 self.revive_pending_tombstones(conn_id);
6902 self.discard_pending_kv_watch_events(conn_id);
6903 self.discard_pending_queue_wakes(conn_id);
6904 self.discard_pending_store_wal_actions(conn_id);
6905 return Err(err);
6906 }
6907 self.restore_pending_write_stamps(conn_id);
6908 if let Err(err) = self.flush_pending_store_wal_actions(conn_id) {
6909 for (_, sub) in &ctx.savepoints {
6910 self.inner.snapshot_manager.rollback(*sub);
6911 }
6912 for sub in &ctx.released_sub_xids {
6913 self.inner.snapshot_manager.rollback(*sub);
6914 }
6915 self.inner.snapshot_manager.rollback(ctx.xid);
6916 self.revive_pending_versioned_updates(conn_id);
6917 self.revive_pending_tombstones(conn_id);
6918 self.discard_pending_kv_watch_events(conn_id);
6919 return Err(err);
6920 }
6921 for (_, sub) in &ctx.savepoints {
6927 self.inner.snapshot_manager.commit(*sub);
6928 }
6929 for sub in &ctx.released_sub_xids {
6930 self.inner.snapshot_manager.commit(*sub);
6931 }
6932 self.inner.snapshot_manager.commit(ctx.xid);
6933 self.finalize_pending_versioned_updates(conn_id);
6934 self.finalize_pending_tombstones(conn_id);
6935 self.finalize_pending_kv_watch_events(conn_id);
6936 self.finalize_pending_queue_wakes(conn_id);
6937 ("commit", format!("COMMIT — xid={} committed", ctx.xid))
6938 }
6939 None => (
6940 "commit",
6941 "COMMIT outside transaction — no-op (autocommit)".to_string(),
6942 ),
6943 }
6944 }
6945 TxnControl::Rollback => {
6946 self.inner.tx_local_tenants.write().remove(&conn_id);
6947 let ctx = self.inner.tx_contexts.write().remove(&conn_id);
6948 match ctx {
6949 Some(ctx) => {
6950 for (_, sub) in &ctx.savepoints {
6953 self.inner.snapshot_manager.rollback(*sub);
6954 }
6955 for sub in &ctx.released_sub_xids {
6956 self.inner.snapshot_manager.rollback(*sub);
6957 }
6958 self.inner.snapshot_manager.rollback(ctx.xid);
6959 self.revive_pending_versioned_updates(conn_id);
6963 self.revive_pending_tombstones(conn_id);
6964 self.discard_pending_kv_watch_events(conn_id);
6965 self.discard_pending_queue_wakes(conn_id);
6966 self.discard_pending_store_wal_actions(conn_id);
6967 ("rollback", format!("ROLLBACK — xid={} aborted", ctx.xid))
6968 }
6969 None => (
6970 "rollback",
6971 "ROLLBACK outside transaction — no-op (autocommit)".to_string(),
6972 ),
6973 }
6974 }
6975 TxnControl::Savepoint(name) => {
6982 let mgr = Arc::clone(&self.inner.snapshot_manager);
6983 let mut guard = self.inner.tx_contexts.write();
6984 match guard.get_mut(&conn_id) {
6985 Some(ctx) => {
6986 let sub = mgr.begin();
6987 ctx.savepoints.push((name.clone(), sub));
6988 ("savepoint", format!("SAVEPOINT {name} — sub_xid={sub}"))
6989 }
6990 None => (
6991 "savepoint",
6992 "SAVEPOINT outside transaction — no-op".to_string(),
6993 ),
6994 }
6995 }
6996 TxnControl::ReleaseSavepoint(name) => {
6997 let mut guard = self.inner.tx_contexts.write();
6998 match guard.get_mut(&conn_id) {
6999 Some(ctx) => {
7000 let pos = ctx
7001 .savepoints
7002 .iter()
7003 .position(|(n, _)| n == name)
7004 .ok_or_else(|| {
7005 RedDBError::Internal(format!(
7006 "savepoint {name} does not exist"
7007 ))
7008 })?;
7009 let released = ctx.savepoints.len() - pos;
7017 let popped: Vec<Xid> = ctx
7018 .savepoints
7019 .split_off(pos)
7020 .into_iter()
7021 .map(|(_, x)| x)
7022 .collect();
7023 ctx.released_sub_xids.extend(popped);
7024 (
7025 "release_savepoint",
7026 format!("RELEASE SAVEPOINT {name} — {released} level(s)"),
7027 )
7028 }
7029 None => (
7030 "release_savepoint",
7031 "RELEASE outside transaction — no-op".to_string(),
7032 ),
7033 }
7034 }
7035 TxnControl::RollbackToSavepoint(name) => {
7036 let mgr = Arc::clone(&self.inner.snapshot_manager);
7037 let drop_result: Option<(Xid, Vec<Xid>)> = {
7042 let mut guard = self.inner.tx_contexts.write();
7043 if let Some(ctx) = guard.get_mut(&conn_id) {
7044 let pos = ctx
7045 .savepoints
7046 .iter()
7047 .position(|(n, _)| n == name)
7048 .ok_or_else(|| {
7049 RedDBError::Internal(format!(
7050 "savepoint {name} does not exist"
7051 ))
7052 })?;
7053 let savepoint_xid = ctx.savepoints[pos].1;
7054 let aborted: Vec<Xid> = ctx
7055 .savepoints
7056 .split_off(pos)
7057 .into_iter()
7058 .map(|(_, x)| x)
7059 .collect();
7060 Some((savepoint_xid, aborted))
7061 } else {
7062 None
7063 }
7064 };
7065
7066 match drop_result {
7067 Some((savepoint_xid, aborted)) => {
7068 for x in &aborted {
7069 mgr.rollback(*x);
7070 }
7071 let reverted_updates =
7072 self.revive_versioned_updates_since(conn_id, savepoint_xid);
7073 let revived = self.revive_tombstones_since(conn_id, savepoint_xid);
7074 (
7075 "rollback_to_savepoint",
7076 format!(
7077 "ROLLBACK TO SAVEPOINT {name} — aborted {} sub_xid(s), reverted {reverted_updates} update(s), revived {revived} tombstone(s)",
7078 aborted.len(),
7079 ),
7080 )
7081 }
7082 None => (
7083 "rollback_to_savepoint",
7084 "ROLLBACK TO outside transaction — no-op".to_string(),
7085 ),
7086 }
7087 }
7088 };
7089 Ok(RuntimeQueryResult::ok_message(
7090 query.to_string(),
7091 &msg,
7092 kind,
7093 ))
7094 }
7095 QueryExpr::CreateSchema(ref q) => {
7108 let store = self.inner.db.store();
7109 let key = format!("schema.{}", q.name);
7110 if store.get_config(&key).is_some() {
7111 if q.if_not_exists {
7112 return Ok(RuntimeQueryResult::ok_message(
7113 query.to_string(),
7114 &format!("schema {} already exists — skipped", q.name),
7115 "create_schema",
7116 ));
7117 }
7118 return Err(RedDBError::Internal(format!(
7119 "schema {} already exists",
7120 q.name
7121 )));
7122 }
7123 store.set_config_tree(&key, &crate::serde_json::Value::Bool(true));
7124 Ok(RuntimeQueryResult::ok_message(
7125 query.to_string(),
7126 &format!("schema {} created", q.name),
7127 "create_schema",
7128 ))
7129 }
7130 QueryExpr::DropSchema(ref q) => {
7131 let store = self.inner.db.store();
7132 let key = format!("schema.{}", q.name);
7133 let existed = store.get_config(&key).is_some();
7134 if !existed && !q.if_exists {
7135 return Err(RedDBError::Internal(format!(
7136 "schema {} does not exist",
7137 q.name
7138 )));
7139 }
7140 store.set_config_tree(&key, &crate::serde_json::Value::Null);
7142 let suffix = if q.cascade {
7143 " (CASCADE accepted — tables untouched)"
7144 } else {
7145 ""
7146 };
7147 Ok(RuntimeQueryResult::ok_message(
7148 query.to_string(),
7149 &format!("schema {} dropped{}", q.name, suffix),
7150 "drop_schema",
7151 ))
7152 }
7153 QueryExpr::CreateSequence(ref q) => {
7154 let store = self.inner.db.store();
7155 let base = format!("sequence.{}", q.name);
7156 let start_key = format!("{base}.start");
7157 let incr_key = format!("{base}.increment");
7158 let curr_key = format!("{base}.current");
7159 if store.get_config(&start_key).is_some() {
7160 if q.if_not_exists {
7161 return Ok(RuntimeQueryResult::ok_message(
7162 query.to_string(),
7163 &format!("sequence {} already exists — skipped", q.name),
7164 "create_sequence",
7165 ));
7166 }
7167 return Err(RedDBError::Internal(format!(
7168 "sequence {} already exists",
7169 q.name
7170 )));
7171 }
7172 let initial_current = q.start - q.increment;
7175 store.set_config_tree(
7176 &start_key,
7177 &crate::serde_json::Value::Number(q.start as f64),
7178 );
7179 store.set_config_tree(
7180 &incr_key,
7181 &crate::serde_json::Value::Number(q.increment as f64),
7182 );
7183 store.set_config_tree(
7184 &curr_key,
7185 &crate::serde_json::Value::Number(initial_current as f64),
7186 );
7187 Ok(RuntimeQueryResult::ok_message(
7188 query.to_string(),
7189 &format!(
7190 "sequence {} created (start={}, increment={})",
7191 q.name, q.start, q.increment
7192 ),
7193 "create_sequence",
7194 ))
7195 }
7196 QueryExpr::DropSequence(ref q) => {
7197 let store = self.inner.db.store();
7198 let base = format!("sequence.{}", q.name);
7199 let existed = store.get_config(&format!("{base}.start")).is_some();
7200 if !existed && !q.if_exists {
7201 return Err(RedDBError::Internal(format!(
7202 "sequence {} does not exist",
7203 q.name
7204 )));
7205 }
7206 for k in ["start", "increment", "current"] {
7207 store.set_config_tree(&format!("{base}.{k}"), &crate::serde_json::Value::Null);
7208 }
7209 Ok(RuntimeQueryResult::ok_message(
7210 query.to_string(),
7211 &format!("sequence {} dropped", q.name),
7212 "drop_sequence",
7213 ))
7214 }
7215 QueryExpr::CreateView(ref q) => {
7225 let mut views = self.inner.views.write();
7226 if views.contains_key(&q.name) && !q.or_replace {
7227 if q.if_not_exists {
7228 return Ok(RuntimeQueryResult::ok_message(
7229 query.to_string(),
7230 &format!("view {} already exists — skipped", q.name),
7231 "create_view",
7232 ));
7233 }
7234 return Err(RedDBError::Internal(format!(
7235 "view {} already exists",
7236 q.name
7237 )));
7238 }
7239 views.insert(q.name.clone(), Arc::new(q.clone()));
7240 drop(views);
7241
7242 if q.materialized {
7244 use crate::storage::cache::result::{MaterializedViewDef, RefreshPolicy};
7245 let refresh = match q.refresh_every_ms {
7246 Some(ms) => RefreshPolicy::Periodic(std::time::Duration::from_millis(ms)),
7247 None => RefreshPolicy::Manual,
7248 };
7249 let dependencies = collect_table_refs(&q.query);
7250 let def = MaterializedViewDef {
7251 name: q.name.clone(),
7252 query: format!("<parsed view {}>", q.name),
7253 dependencies: dependencies.clone(),
7254 refresh,
7255 retention_duration_ms: q.retention_duration_ms,
7256 };
7257 self.inner.materialized_views.write().register(def);
7258
7259 let descriptor =
7265 crate::runtime::continuous_materialized_view::MaterializedViewDescriptor {
7266 name: q.name.clone(),
7267 source_sql: query.to_string(),
7268 source_collections: dependencies,
7269 refresh_every_ms: q.refresh_every_ms,
7270 retention_duration_ms: q.retention_duration_ms,
7271 };
7272 let store = self.inner.db.store();
7273 crate::runtime::continuous_materialized_view::persist_descriptor(
7274 store.as_ref(),
7275 &descriptor,
7276 )?;
7277
7278 self.ensure_materialized_view_backing(&q.name)?;
7285 }
7286 self.invalidate_plan_cache();
7291 self.invalidate_result_cache();
7292
7293 Ok(RuntimeQueryResult::ok_message(
7294 query.to_string(),
7295 &format!(
7296 "{}view {} created",
7297 if q.materialized { "materialized " } else { "" },
7298 q.name
7299 ),
7300 "create_view",
7301 ))
7302 }
7303 QueryExpr::DropView(ref q) => {
7304 let mut views = self.inner.views.write();
7305 let removed = views.remove(&q.name);
7306 let existed = removed.is_some();
7307 let removed_materialized =
7308 removed.as_ref().map(|v| v.materialized).unwrap_or(false);
7309 drop(views);
7310 if q.materialized || existed {
7311 self.inner.materialized_views.write().remove(&q.name);
7313 let store = self.inner.db.store();
7317 crate::runtime::continuous_materialized_view::remove_by_name(
7318 store.as_ref(),
7319 &q.name,
7320 )?;
7321 }
7322 if removed_materialized || q.materialized {
7326 self.drop_materialized_view_backing(&q.name)?;
7327 }
7328 self.invalidate_plan_cache();
7331 self.invalidate_result_cache();
7332 if !existed && !q.if_exists {
7333 return Err(RedDBError::Internal(format!(
7334 "view {} does not exist",
7335 q.name
7336 )));
7337 }
7338 self.invalidate_plan_cache();
7339 Ok(RuntimeQueryResult::ok_message(
7340 query.to_string(),
7341 &format!("view {} dropped", q.name),
7342 "drop_view",
7343 ))
7344 }
7345 QueryExpr::RefreshMaterializedView(ref q) => {
7346 let view = {
7349 let views = self.inner.views.read();
7350 views.get(&q.name).cloned()
7351 };
7352 let view = match view {
7353 Some(v) => v,
7354 None => {
7355 return Err(RedDBError::Internal(format!(
7356 "view {} does not exist",
7357 q.name
7358 )))
7359 }
7360 };
7361 if !view.materialized {
7362 return Err(RedDBError::Internal(format!(
7363 "view {} is not materialized — REFRESH requires \
7364 CREATE MATERIALIZED VIEW",
7365 q.name
7366 )));
7367 }
7368 let started = std::time::Instant::now();
7370 let now_ms = std::time::SystemTime::now()
7371 .duration_since(std::time::UNIX_EPOCH)
7372 .map(|d| d.as_millis() as u64)
7373 .unwrap_or(0);
7374 match self.execute_query_expr((*view.query).clone()) {
7375 Ok(inner_result) => {
7376 let entities =
7383 view_records_to_entities(&q.name, &inner_result.result.records);
7384 let row_count = entities.len() as u64;
7385 let store = self.inner.db.store();
7386 let serialized_records = match store.refresh_collection(&q.name, entities) {
7387 Ok(records) => records,
7388 Err(err) => {
7389 let duration_ms = started.elapsed().as_millis() as u64;
7390 let msg = err.to_string();
7391 self.inner
7392 .materialized_views
7393 .write()
7394 .record_refresh_failure(
7395 &q.name,
7396 msg.clone(),
7397 duration_ms,
7398 now_ms,
7399 );
7400 return Err(RedDBError::Internal(format!(
7401 "REFRESH MATERIALIZED VIEW {}: {msg}",
7402 q.name
7403 )));
7404 }
7405 };
7406
7407 if let Some(ref primary) = self.inner.db.replication {
7413 let lsn = self.inner.cdc.emit(
7414 crate::replication::cdc::ChangeOperation::Refresh,
7415 &q.name,
7416 0,
7417 "refresh",
7418 );
7419 self.invalidate_result_cache_for_table(&q.name);
7420 let timestamp = std::time::SystemTime::now()
7421 .duration_since(std::time::UNIX_EPOCH)
7422 .unwrap_or_default()
7423 .as_millis() as u64;
7424 let record = ChangeRecord::for_refresh(
7425 lsn,
7426 timestamp,
7427 q.name.clone(),
7428 serialized_records,
7429 )
7430 .with_term(self.current_replication_term());
7431 let encoded = record.encode();
7432 primary.append_logical_record(record.lsn, encoded);
7433 }
7434
7435 let duration_ms = started.elapsed().as_millis() as u64;
7436 let serialized = format!("{:?}", inner_result.result);
7437 self.inner
7438 .materialized_views
7439 .write()
7440 .record_refresh_success(
7441 &q.name,
7442 serialized.into_bytes(),
7443 row_count,
7444 duration_ms,
7445 now_ms,
7446 );
7447 self.invalidate_result_cache();
7452 Ok(RuntimeQueryResult::ok_message(
7453 query.to_string(),
7454 &format!("materialized view {} refreshed", q.name),
7455 "refresh_materialized_view",
7456 ))
7457 }
7458 Err(err) => {
7459 let duration_ms = started.elapsed().as_millis() as u64;
7460 let msg = err.to_string();
7461 self.inner
7462 .materialized_views
7463 .write()
7464 .record_refresh_failure(&q.name, msg.clone(), duration_ms, now_ms);
7465 Err(err)
7466 }
7467 }
7468 }
7469 QueryExpr::CreatePolicy(ref q) => {
7476 let key = (q.table.clone(), q.name.clone());
7477 self.inner
7478 .rls_policies
7479 .write()
7480 .insert(key, Arc::new(q.clone()));
7481 self.invalidate_plan_cache();
7482 self.schema_vocabulary_apply(
7486 crate::runtime::schema_vocabulary::DdlEvent::CreatePolicy {
7487 collection: q.table.clone(),
7488 policy: q.name.clone(),
7489 },
7490 );
7491 Ok(RuntimeQueryResult::ok_message(
7492 query.to_string(),
7493 &format!("policy {} on {} created", q.name, q.table),
7494 "create_policy",
7495 ))
7496 }
7497 QueryExpr::DropPolicy(ref q) => {
7498 let removed = self
7499 .inner
7500 .rls_policies
7501 .write()
7502 .remove(&(q.table.clone(), q.name.clone()))
7503 .is_some();
7504 if !removed && !q.if_exists {
7505 return Err(RedDBError::Internal(format!(
7506 "policy {} on {} does not exist",
7507 q.name, q.table
7508 )));
7509 }
7510 self.invalidate_plan_cache();
7511 self.schema_vocabulary_apply(
7514 crate::runtime::schema_vocabulary::DdlEvent::DropPolicy {
7515 collection: q.table.clone(),
7516 policy: q.name.clone(),
7517 },
7518 );
7519 Ok(RuntimeQueryResult::ok_message(
7520 query.to_string(),
7521 &format!("policy {} on {} dropped", q.name, q.table),
7522 "drop_policy",
7523 ))
7524 }
7525 QueryExpr::CreateServer(ref q) => {
7536 use crate::storage::fdw::FdwOptions;
7537 let registry = Arc::clone(&self.inner.foreign_tables);
7538 if registry.server(&q.name).is_some() {
7539 if q.if_not_exists {
7540 return Ok(RuntimeQueryResult::ok_message(
7541 query.to_string(),
7542 &format!("server {} already exists — skipped", q.name),
7543 "create_server",
7544 ));
7545 }
7546 return Err(RedDBError::Internal(format!(
7547 "server {} already exists",
7548 q.name
7549 )));
7550 }
7551 let mut opts = FdwOptions::new();
7552 for (k, v) in &q.options {
7553 opts.values.insert(k.clone(), v.clone());
7554 }
7555 registry
7556 .create_server(&q.name, &q.wrapper, opts)
7557 .map_err(|e| RedDBError::Internal(e.to_string()))?;
7558 Ok(RuntimeQueryResult::ok_message(
7559 query.to_string(),
7560 &format!("server {} created (wrapper {})", q.name, q.wrapper),
7561 "create_server",
7562 ))
7563 }
7564 QueryExpr::DropServer(ref q) => {
7565 let existed = self.inner.foreign_tables.drop_server(&q.name);
7566 if !existed && !q.if_exists {
7567 return Err(RedDBError::Internal(format!(
7568 "server {} does not exist",
7569 q.name
7570 )));
7571 }
7572 Ok(RuntimeQueryResult::ok_message(
7573 query.to_string(),
7574 &format!(
7575 "server {} dropped{}",
7576 q.name,
7577 if q.cascade { " (cascade)" } else { "" }
7578 ),
7579 "drop_server",
7580 ))
7581 }
7582 QueryExpr::CreateForeignTable(ref q) => {
7583 use crate::storage::fdw::{FdwOptions, ForeignColumn, ForeignTable};
7584 let registry = Arc::clone(&self.inner.foreign_tables);
7585 if registry.foreign_table(&q.name).is_some() {
7586 if q.if_not_exists {
7587 return Ok(RuntimeQueryResult::ok_message(
7588 query.to_string(),
7589 &format!("foreign table {} already exists — skipped", q.name),
7590 "create_foreign_table",
7591 ));
7592 }
7593 return Err(RedDBError::Internal(format!(
7594 "foreign table {} already exists",
7595 q.name
7596 )));
7597 }
7598 let mut opts = FdwOptions::new();
7599 for (k, v) in &q.options {
7600 opts.values.insert(k.clone(), v.clone());
7601 }
7602 let columns: Vec<ForeignColumn> = q
7603 .columns
7604 .iter()
7605 .map(|c| ForeignColumn {
7606 name: c.name.clone(),
7607 data_type: c.data_type.clone(),
7608 not_null: c.not_null,
7609 })
7610 .collect();
7611 registry
7612 .create_foreign_table(ForeignTable {
7613 name: q.name.clone(),
7614 server_name: q.server.clone(),
7615 columns,
7616 options: opts,
7617 })
7618 .map_err(|e| RedDBError::Internal(e.to_string()))?;
7619 self.invalidate_plan_cache();
7620 Ok(RuntimeQueryResult::ok_message(
7621 query.to_string(),
7622 &format!("foreign table {} created (server {})", q.name, q.server),
7623 "create_foreign_table",
7624 ))
7625 }
7626 QueryExpr::DropForeignTable(ref q) => {
7627 let existed = self.inner.foreign_tables.drop_foreign_table(&q.name);
7628 if !existed && !q.if_exists {
7629 return Err(RedDBError::Internal(format!(
7630 "foreign table {} does not exist",
7631 q.name
7632 )));
7633 }
7634 self.invalidate_plan_cache();
7635 Ok(RuntimeQueryResult::ok_message(
7636 query.to_string(),
7637 &format!("foreign table {} dropped", q.name),
7638 "drop_foreign_table",
7639 ))
7640 }
7641 QueryExpr::CopyFrom(ref q) => {
7647 use crate::storage::import::{CsvConfig, CsvImporter};
7648 let store = self.inner.db.store();
7649 let cfg = CsvConfig {
7650 collection: q.table.clone(),
7651 has_header: q.has_header,
7652 delimiter: q.delimiter.map(|c| c as u8).unwrap_or(b','),
7653 ..CsvConfig::default()
7654 };
7655 let importer = CsvImporter::new(cfg);
7656 let stats = importer
7657 .import_file(&q.path, store.as_ref())
7658 .map_err(|e| RedDBError::Internal(format!("COPY failed: {e}")))?;
7659 self.note_table_write(&q.table);
7661 Ok(RuntimeQueryResult::ok_message(
7662 query.to_string(),
7663 &format!(
7664 "COPY imported {} rows into {} ({} errors skipped, {}ms)",
7665 stats.records_imported, q.table, stats.errors_skipped, stats.duration_ms
7666 ),
7667 "copy_from",
7668 ))
7669 }
7670 QueryExpr::MaintenanceCommand(ref cmd) => {
7686 use crate::storage::query::ast::MaintenanceCommand as Mc;
7687 let store = self.inner.db.store();
7688 let (kind, msg) = match cmd {
7689 Mc::Analyze { target } => {
7690 let targets: Vec<String> = match target {
7691 Some(t) => vec![t.clone()],
7692 None => store.list_collections(),
7693 };
7694 for t in &targets {
7695 self.refresh_table_planner_stats(t);
7696 }
7697 (
7698 "analyze",
7699 format!("ANALYZE refreshed stats for {} table(s)", targets.len()),
7700 )
7701 }
7702 Mc::Vacuum { target, full } => {
7703 let targets: Vec<String> = match target {
7704 Some(t) => vec![t.clone()],
7705 None => store.list_collections(),
7706 };
7707 let cutoff_xid = self.mvcc_vacuum_cutoff_xid();
7708 let mut vacuum_stats =
7709 crate::storage::unified::store::MvccVacuumStats::default();
7710 for t in &targets {
7711 let stats = store.vacuum_mvcc_history(t, cutoff_xid).map_err(|e| {
7712 RedDBError::Internal(format!(
7713 "VACUUM MVCC history failed for {t}: {e}"
7714 ))
7715 })?;
7716 if stats.reclaimed_versions > 0 {
7717 self.rebuild_runtime_indexes_for_table(t)?;
7718 }
7719 vacuum_stats.add(&stats);
7720 }
7721 self.inner.snapshot_manager.prune_aborted(cutoff_xid);
7722 for t in &targets {
7724 self.refresh_table_planner_stats(t);
7725 }
7726 let persisted = if *full {
7730 match store.persist() {
7731 Ok(()) => true,
7732 Err(e) => {
7733 return Err(RedDBError::Internal(format!(
7734 "VACUUM FULL persist failed: {e:?}"
7735 )));
7736 }
7737 }
7738 } else {
7739 false
7740 };
7741 self.invalidate_result_cache();
7743 (
7744 "vacuum",
7745 format!(
7746 "VACUUM{} processed {} table(s): scanned_versions={}, retained_versions={}, reclaimed_versions={}, retained_history_versions={}, reclaimed_history_versions={}, retained_tombstones={}, reclaimed_tombstones={}{}",
7747 if *full { " FULL" } else { "" },
7748 targets.len(),
7749 vacuum_stats.scanned_versions,
7750 vacuum_stats.retained_versions,
7751 vacuum_stats.reclaimed_versions,
7752 vacuum_stats.retained_history_versions,
7753 vacuum_stats.reclaimed_history_versions,
7754 vacuum_stats.retained_tombstones,
7755 vacuum_stats.reclaimed_tombstones,
7756 if persisted {
7757 " (pages flushed to disk)"
7758 } else {
7759 ""
7760 }
7761 ),
7762 )
7763 }
7764 };
7765 Ok(RuntimeQueryResult::ok_message(
7766 query.to_string(),
7767 &msg,
7768 kind,
7769 ))
7770 }
7771 QueryExpr::Grant(ref g) => self.execute_grant_statement(query, g),
7778 QueryExpr::Revoke(ref r) => self.execute_revoke_statement(query, r),
7779 QueryExpr::AlterUser(ref a) => self.execute_alter_user_statement(query, a),
7780 QueryExpr::CreateIamPolicy { ref id, ref json } => {
7781 self.execute_create_iam_policy(query, id, json)
7782 }
7783 QueryExpr::DropIamPolicy { ref id } => self.execute_drop_iam_policy(query, id),
7784 QueryExpr::AttachPolicy {
7785 ref policy_id,
7786 ref principal,
7787 } => self.execute_attach_policy(query, policy_id, principal),
7788 QueryExpr::DetachPolicy {
7789 ref policy_id,
7790 ref principal,
7791 } => self.execute_detach_policy(query, policy_id, principal),
7792 QueryExpr::ShowPolicies { ref filter } => {
7793 self.execute_show_policies(query, filter.as_ref())
7794 }
7795 QueryExpr::ShowEffectivePermissions {
7796 ref user,
7797 ref resource,
7798 } => self.execute_show_effective_permissions(query, user, resource.as_ref()),
7799 QueryExpr::SimulatePolicy {
7800 ref user,
7801 ref action,
7802 ref resource,
7803 } => self.execute_simulate_policy(query, user, action, resource),
7804 QueryExpr::LintPolicy { ref source } => self.execute_lint_policy(query, source),
7805 QueryExpr::MigratePolicyMode {
7806 ref target,
7807 dry_run,
7808 } => self.execute_migrate_policy_mode(query, target, dry_run),
7809 QueryExpr::CreateMigration(ref q) => self.execute_create_migration(query, q),
7810 QueryExpr::ApplyMigration(ref q) => self.execute_apply_migration(query, q),
7811 QueryExpr::RollbackMigration(ref q) => self.execute_rollback_migration(query, q),
7812 QueryExpr::ExplainMigration(ref q) => self.execute_explain_migration(query, q),
7813 };
7814
7815 if !control_event_specs.is_empty() {
7816 let (outcome, reason) = match &query_result {
7817 Ok(_) => (crate::runtime::control_events::Outcome::Allowed, None),
7818 Err(err) => (control_event_outcome_for_error(err), Some(err.to_string())),
7819 };
7820 for spec in &control_event_specs {
7821 self.emit_control_event(
7822 spec.kind,
7823 outcome,
7824 spec.action,
7825 spec.resource.clone(),
7826 reason.clone(),
7827 spec.fields.clone(),
7828 )?;
7829 }
7830 }
7831
7832 if let (Some(plan), Ok(result)) = (&query_audit_plan, &query_result) {
7833 self.emit_query_audit(
7834 query,
7835 plan,
7836 query_audit_started.elapsed().as_millis() as u64,
7837 result,
7838 );
7839 }
7840
7841 let mut query_result = query_result;
7845 if let Ok(ref mut result) = query_result {
7846 if result.statement_type == "select" {
7847 self.apply_secret_decryption(result);
7848 }
7849 }
7850
7851 if let Ok(ref result) = query_result {
7858 frame.write_result_cache(self, result, result_cache_scopes);
7859 }
7860
7861 query_result
7862 }
7863
7864 pub fn materialized_view_metadata(
7868 &self,
7869 ) -> Vec<crate::storage::cache::result::MaterializedViewMetadata> {
7870 let store = self.inner.db.store();
7877 let mut entries = self.inner.materialized_views.read().metadata();
7878 for entry in &mut entries {
7879 if let Some(manager) = store.get_collection(&entry.name) {
7880 entry.current_row_count = manager.count() as u64;
7881 }
7882 }
7883 entries
7884 }
7885
7886 pub(crate) fn retention_sweeper_snapshot(
7897 &self,
7898 ) -> Vec<(String, crate::runtime::retention_sweeper::SweeperState)> {
7899 self.inner.retention_sweeper.read().snapshot()
7900 }
7901
7902 pub fn sweep_retention_tick(&self, batch_size: usize) {
7924 if batch_size == 0 {
7925 return;
7926 }
7927 let now_ms = std::time::SystemTime::now()
7928 .duration_since(std::time::UNIX_EPOCH)
7929 .map(|d| d.as_millis() as u64)
7930 .unwrap_or(0);
7931
7932 let store = self.inner.db.store();
7933 let collections = store.list_collections();
7934 for name in collections {
7935 let Some(contract) = self.inner.db.collection_contract(&name) else {
7936 continue;
7937 };
7938 let Some(retention_ms) = contract.retention_duration_ms else {
7939 continue;
7940 };
7941 let Some(ts_column) =
7942 crate::runtime::retention_filter::resolve_timestamp_column(&contract)
7943 else {
7944 continue;
7945 };
7946 let Some(manager) = store.get_collection(&name) else {
7947 continue;
7948 };
7949 let cutoff = (now_ms as i64).saturating_sub(retention_ms as i64);
7950
7951 let mut expired_ts: Vec<i64> = Vec::new();
7959 manager.for_each_entity(|entity| {
7960 let ts = match ts_column.as_str() {
7961 "created_at" => Some(entity.created_at as i64),
7962 "updated_at" => Some(entity.updated_at as i64),
7963 other => entity
7964 .data
7965 .as_row()
7966 .and_then(|row| row.get_field(other))
7967 .and_then(|v| match v {
7968 crate::storage::schema::Value::TimestampMs(t) => Some(*t),
7969 crate::storage::schema::Value::Timestamp(t) => {
7970 Some(t.saturating_mul(1_000))
7971 }
7972 crate::storage::schema::Value::BigInt(t) => Some(*t),
7973 crate::storage::schema::Value::UnsignedInteger(t) => {
7974 i64::try_from(*t).ok()
7975 }
7976 crate::storage::schema::Value::Integer(t) => Some(*t),
7977 _ => None,
7978 }),
7979 };
7980 if let Some(t) = ts {
7981 if t < cutoff {
7982 expired_ts.push(t);
7983 }
7984 }
7985 true
7986 });
7987
7988 let total_expired = expired_ts.len() as u64;
7989 if total_expired == 0 {
7990 self.inner
7991 .retention_sweeper
7992 .write()
7993 .record_tick(&name, 0, 0, now_ms);
7994 continue;
7995 }
7996
7997 let (effective_cutoff, pending) = if (total_expired as usize) <= batch_size {
7998 (cutoff, 0u64)
7999 } else {
8000 expired_ts.sort_unstable();
8004 let nth = expired_ts[batch_size - 1];
8005 (
8006 nth.saturating_add(1),
8007 total_expired.saturating_sub(batch_size as u64),
8008 )
8009 };
8010
8011 let stmt = format!(
8012 "DELETE FROM {} WHERE {} < {}",
8013 name, ts_column, effective_cutoff
8014 );
8015 let deleted = match self.execute_query(&stmt) {
8016 Ok(r) => r.affected_rows,
8017 Err(_) => 0,
8018 };
8019
8020 self.inner
8021 .retention_sweeper
8022 .write()
8023 .record_tick(&name, deleted, pending, now_ms);
8024 }
8025 }
8026
8027 pub fn refresh_due_materialized_views(&self) {
8028 let due = {
8029 let mut cache = self.inner.materialized_views.write();
8030 cache.claim_due_at(std::time::Instant::now())
8031 };
8032 for name in due {
8033 let stmt = format!("REFRESH MATERIALIZED VIEW {}", name);
8040 let _ = self.execute_query(&stmt);
8041 }
8042 }
8043
8044 pub fn execute_query_expr(&self, expr: QueryExpr) -> RedDBResult<RuntimeQueryResult> {
8050 let _config_snapshot_guard = ConfigSnapshotGuard::install(Arc::clone(&self.inner.db));
8051 let _secret_store_guard = SecretStoreGuard::install(self.inner.auth_store.read().clone());
8052 let expr = self.rewrite_view_refs(expr);
8056
8057 self.validate_model_operations_before_auth(&expr)?;
8058 if let Err(err) = self.check_query_privilege(&expr) {
8062 return Err(RedDBError::Query(format!("permission denied: {err}")));
8063 }
8064
8065 let statement = query_expr_name(&expr);
8066 let mode = detect_mode(statement);
8067 let query_str = statement;
8068
8069 let result = self.dispatch_expr(expr, query_str, mode)?;
8070 let mut r = result;
8071 if r.statement_type == "select" {
8072 self.apply_secret_decryption(&mut r);
8073 }
8074 Ok(r)
8075 }
8076
8077 pub(super) fn validate_model_operations_before_auth(
8078 &self,
8079 expr: &QueryExpr,
8080 ) -> RedDBResult<()> {
8081 use crate::catalog::CollectionModel;
8082 use crate::runtime::ddl::polymorphic_resolver;
8083 use crate::storage::query::ast::KvCommand;
8084
8085 let system_schema_target = match expr {
8086 QueryExpr::DropTable(q) => Some(q.name.as_str()),
8087 QueryExpr::DropGraph(q) => Some(q.name.as_str()),
8088 QueryExpr::DropVector(q) => Some(q.name.as_str()),
8089 QueryExpr::DropDocument(q) => Some(q.name.as_str()),
8090 QueryExpr::DropKv(q) => Some(q.name.as_str()),
8091 QueryExpr::DropCollection(q) => Some(q.name.as_str()),
8092 QueryExpr::Truncate(q) => Some(q.name.as_str()),
8093 _ => None,
8094 };
8095 if system_schema_target.is_some_and(crate::runtime::impl_ddl::is_system_schema_name) {
8096 return Err(RedDBError::Query("system schema is read-only".to_string()));
8097 }
8098
8099 let expected = match expr {
8100 QueryExpr::DropTable(q) => Some((q.name.as_str(), CollectionModel::Table)),
8101 QueryExpr::DropGraph(q) => Some((q.name.as_str(), CollectionModel::Graph)),
8102 QueryExpr::DropVector(q) => Some((q.name.as_str(), CollectionModel::Vector)),
8103 QueryExpr::DropDocument(q) => Some((q.name.as_str(), CollectionModel::Document)),
8104 QueryExpr::DropKv(q) => Some((q.name.as_str(), q.model)),
8105 QueryExpr::DropCollection(q) => q.model.map(|model| (q.name.as_str(), model)),
8106 QueryExpr::Truncate(q) => q.model.map(|model| (q.name.as_str(), model)),
8107 QueryExpr::KvCommand(cmd) => {
8108 let (collection, model) = match cmd {
8109 KvCommand::Put {
8110 collection, model, ..
8111 }
8112 | KvCommand::Get {
8113 collection, model, ..
8114 }
8115 | KvCommand::Incr {
8116 collection, model, ..
8117 }
8118 | KvCommand::Cas {
8119 collection, model, ..
8120 }
8121 | KvCommand::Delete {
8122 collection, model, ..
8123 } => (collection.as_str(), *model),
8124 KvCommand::Rotate { collection, .. }
8125 | KvCommand::History { collection, .. }
8126 | KvCommand::List { collection, .. }
8127 | KvCommand::Purge { collection, .. } => {
8128 (collection.as_str(), CollectionModel::Vault)
8129 }
8130 KvCommand::InvalidateTags { collection, .. } => {
8131 (collection.as_str(), CollectionModel::Kv)
8132 }
8133 KvCommand::Watch {
8134 collection, model, ..
8135 } => (collection.as_str(), *model),
8136 KvCommand::Unseal { collection, .. } => {
8137 (collection.as_str(), CollectionModel::Vault)
8138 }
8139 };
8140 Some((collection, model))
8141 }
8142 QueryExpr::ConfigCommand(cmd) => {
8143 self.validate_config_command_before_auth(cmd)?;
8144 None
8145 }
8146 _ => None,
8147 };
8148
8149 let Some((name, expected_model)) = expected else {
8150 return Ok(());
8151 };
8152 let snapshot = self.inner.db.catalog_model_snapshot();
8153 let Some(actual_model) = snapshot
8154 .collections
8155 .iter()
8156 .find(|collection| collection.name == name)
8157 .map(|collection| collection.declared_model.unwrap_or(collection.model))
8158 else {
8159 return Ok(());
8160 };
8161 polymorphic_resolver::ensure_model_match(expected_model, actual_model)
8162 }
8163
8164 pub(super) fn rewrite_view_refs(&self, expr: QueryExpr) -> QueryExpr {
8169 if self.inner.views.read().is_empty() {
8171 return expr;
8172 }
8173 self.rewrite_view_refs_inner(expr)
8174 }
8175
8176 fn rewrite_view_refs_inner(&self, expr: QueryExpr) -> QueryExpr {
8177 use crate::storage::query::ast::{Filter, TableSource};
8178 match expr {
8179 QueryExpr::Table(mut tq) => {
8180 if let Some(TableSource::Subquery(body)) = tq.source.take() {
8186 tq.source = Some(TableSource::Subquery(Box::new(
8187 self.rewrite_view_refs_inner(*body),
8188 )));
8189 return QueryExpr::Table(tq);
8190 }
8191
8192 let maybe_view = {
8196 let views = self.inner.views.read();
8197 views.get(&tq.table).cloned()
8198 };
8199 let Some(view) = maybe_view else {
8200 return QueryExpr::Table(tq);
8201 };
8202
8203 if view.materialized {
8209 return QueryExpr::Table(tq);
8210 }
8211
8212 let inner_expr = self.rewrite_view_refs_inner((*view.query).clone());
8216
8217 match inner_expr {
8225 QueryExpr::Table(mut inner_tq) => {
8226 if let Some(outer_filter) = tq.filter.take() {
8227 inner_tq.filter = Some(match inner_tq.filter.take() {
8228 Some(existing) => {
8229 Filter::And(Box::new(existing), Box::new(outer_filter))
8230 }
8231 None => outer_filter,
8232 });
8233 inner_tq.where_expr = inner_tq
8241 .filter
8242 .as_ref()
8243 .map(crate::storage::query::sql_lowering::filter_to_expr);
8244 }
8245 if let Some(outer_limit) = tq.limit {
8246 inner_tq.limit = Some(match inner_tq.limit {
8247 Some(existing) => existing.min(outer_limit),
8248 None => outer_limit,
8249 });
8250 }
8251 if let Some(outer_offset) = tq.offset {
8252 inner_tq.offset = Some(match inner_tq.offset {
8253 Some(existing) => existing + outer_offset,
8254 None => outer_offset,
8255 });
8256 }
8257 QueryExpr::Table(inner_tq)
8258 }
8259 other => other,
8260 }
8261 }
8262 QueryExpr::Join(mut jq) => {
8263 jq.left = Box::new(self.rewrite_view_refs_inner(*jq.left));
8264 jq.right = Box::new(self.rewrite_view_refs_inner(*jq.right));
8265 QueryExpr::Join(jq)
8266 }
8267 other => other,
8270 }
8271 }
8272
8273 fn authorize_relational_table_select(
8277 &self,
8278 mut table: TableQuery,
8279 frame: &dyn super::statement_frame::ReadFrame,
8280 ) -> RedDBResult<Option<TableQuery>> {
8281 if let Some(TableSource::Subquery(inner)) = table.source.take() {
8282 let authorized_inner = self.authorize_relational_select_expr(*inner, frame)?;
8283 table.source = Some(TableSource::Subquery(Box::new(authorized_inner)));
8284 return Ok(Some(table));
8285 }
8286
8287 self.check_table_column_projection_authz(&table, frame)?;
8288
8289 if self.inner.rls_enabled_tables.read().contains(&table.table) {
8290 return Ok(inject_rls_filters(self, frame, table));
8291 }
8292
8293 Ok(Some(table))
8294 }
8295
8296 fn authorize_relational_join_select(
8297 &self,
8298 mut join: JoinQuery,
8299 frame: &dyn super::statement_frame::ReadFrame,
8300 ) -> RedDBResult<Option<JoinQuery>> {
8301 self.check_join_column_projection_authz(&join, frame)?;
8302 join.left = Box::new(self.authorize_relational_join_child(*join.left, frame)?);
8303 join.right = Box::new(self.authorize_relational_join_child(*join.right, frame)?);
8304 Ok(inject_rls_into_join(self, frame, join))
8305 }
8306
8307 fn authorize_relational_join_child(
8308 &self,
8309 expr: QueryExpr,
8310 frame: &dyn super::statement_frame::ReadFrame,
8311 ) -> RedDBResult<QueryExpr> {
8312 match expr {
8313 QueryExpr::Table(mut table) => {
8314 if let Some(TableSource::Subquery(inner)) = table.source.take() {
8315 let authorized_inner = self.authorize_relational_select_expr(*inner, frame)?;
8316 table.source = Some(TableSource::Subquery(Box::new(authorized_inner)));
8317 }
8318 Ok(QueryExpr::Table(table))
8319 }
8320 QueryExpr::Join(join) => self
8321 .authorize_relational_join_select(join, frame)?
8322 .map(QueryExpr::Join)
8323 .ok_or_else(|| {
8324 RedDBError::Query("permission denied: RLS denied relational subquery".into())
8325 }),
8326 other => Ok(other),
8327 }
8328 }
8329
8330 fn authorize_relational_select_expr(
8331 &self,
8332 expr: QueryExpr,
8333 frame: &dyn super::statement_frame::ReadFrame,
8334 ) -> RedDBResult<QueryExpr> {
8335 match expr {
8336 QueryExpr::Table(table) => self
8337 .authorize_relational_table_select(table, frame)?
8338 .map(QueryExpr::Table)
8339 .ok_or_else(|| {
8340 RedDBError::Query("permission denied: RLS denied relational subquery".into())
8341 }),
8342 QueryExpr::Join(join) => self
8343 .authorize_relational_join_select(join, frame)?
8344 .map(QueryExpr::Join)
8345 .ok_or_else(|| {
8346 RedDBError::Query("permission denied: RLS denied relational subquery".into())
8347 }),
8348 other => Ok(other),
8349 }
8350 }
8351
8352 fn check_table_column_projection_authz(
8353 &self,
8354 table: &TableQuery,
8355 frame: &dyn super::statement_frame::ReadFrame,
8356 ) -> RedDBResult<()> {
8357 let Some((username, role)) = frame.identity() else {
8358 return Ok(());
8359 };
8360 let Some(auth_store) = self.inner.auth_store.read().clone() else {
8361 return Ok(());
8362 };
8363
8364 let columns = self.resolved_table_projection_columns(table)?;
8365 let request = ColumnAccessRequest::select(table.table.clone(), columns);
8366 let principal = UserId::from_parts(frame.effective_scope(), username);
8367 let ctx = runtime_iam_context(
8368 role,
8369 frame.effective_scope(),
8370 auth_store.principal_is_system_owned(&principal),
8371 );
8372 let outcome = auth_store.check_column_projection_authz(&principal, &request, &ctx);
8373 if outcome.allowed() {
8374 return Ok(());
8375 }
8376
8377 if let Some(denied) = outcome.first_denied_column() {
8378 return Err(RedDBError::Query(format!(
8379 "permission denied: principal=`{username}` cannot select column `{}`",
8380 denied.resource.name
8381 )));
8382 }
8383 Err(RedDBError::Query(format!(
8384 "permission denied: principal=`{username}` cannot select table `{}`",
8385 table.table
8386 )))
8387 }
8388
8389 fn check_join_column_projection_authz(
8390 &self,
8391 join: &JoinQuery,
8392 frame: &dyn super::statement_frame::ReadFrame,
8393 ) -> RedDBResult<()> {
8394 let mut by_table: HashMap<String, BTreeSet<String>> = HashMap::new();
8395 let projections = crate::storage::query::sql_lowering::effective_join_projections(join);
8396 self.collect_join_projection_columns(join, &projections, &mut by_table)?;
8397
8398 for (table, columns) in by_table {
8399 let query = TableQuery {
8400 table,
8401 source: None,
8402 alias: None,
8403 select_items: Vec::new(),
8404 columns: columns.into_iter().map(Projection::Column).collect(),
8405 where_expr: None,
8406 filter: None,
8407 group_by_exprs: Vec::new(),
8408 group_by: Vec::new(),
8409 having_expr: None,
8410 having: None,
8411 order_by: Vec::new(),
8412 limit: None,
8413 limit_param: None,
8414 offset: None,
8415 offset_param: None,
8416 expand: None,
8417 as_of: None,
8418 sessionize: None,
8419 };
8420 self.check_table_column_projection_authz(&query, frame)?;
8421 }
8422 Ok(())
8423 }
8424
8425 fn collect_join_projection_columns(
8426 &self,
8427 join: &JoinQuery,
8428 projections: &[Projection],
8429 out: &mut HashMap<String, BTreeSet<String>>,
8430 ) -> RedDBResult<()> {
8431 let left = table_side_context(join.left.as_ref());
8432 let right = table_side_context(join.right.as_ref());
8433
8434 if projections
8435 .iter()
8436 .any(|projection| matches!(projection, Projection::All))
8437 {
8438 for side in [left.as_ref(), right.as_ref()].into_iter().flatten() {
8439 out.entry(side.table.clone())
8440 .or_default()
8441 .extend(self.table_all_projection_columns(&side.table)?);
8442 }
8443 return Ok(());
8444 }
8445
8446 for projection in projections {
8447 collect_projection_columns_for_join_side(
8448 projection,
8449 left.as_ref(),
8450 right.as_ref(),
8451 out,
8452 )?;
8453 }
8454 Ok(())
8455 }
8456
8457 fn resolved_table_projection_columns(&self, table: &TableQuery) -> RedDBResult<Vec<String>> {
8458 let projections = crate::storage::query::sql_lowering::effective_table_projections(table);
8459 if projections
8460 .iter()
8461 .any(|projection| matches!(projection, Projection::All))
8462 {
8463 return self.table_all_projection_columns(&table.table);
8464 }
8465
8466 let mut columns = BTreeSet::new();
8467 for projection in &projections {
8468 collect_projection_columns_for_table(
8469 projection,
8470 &table.table,
8471 table.alias.as_deref(),
8472 &mut columns,
8473 );
8474 }
8475 Ok(columns.into_iter().collect())
8476 }
8477
8478 fn table_all_projection_columns(&self, table: &str) -> RedDBResult<Vec<String>> {
8479 if let Some(contract) = self.inner.db.collection_contract_arc(table) {
8480 let columns: Vec<String> = contract
8481 .declared_columns
8482 .iter()
8483 .map(|column| column.name.clone())
8484 .collect();
8485 if !columns.is_empty() {
8486 return Ok(columns);
8487 }
8488 }
8489
8490 let records = scan_runtime_table_source_records_limited(&self.inner.db, table, Some(1))?;
8491 Ok(records
8492 .first()
8493 .map(|record| {
8494 record
8495 .column_names()
8496 .into_iter()
8497 .map(|column| column.to_string())
8498 .collect()
8499 })
8500 .unwrap_or_default())
8501 }
8502
8503 fn resolve_table_expr_subqueries(
8504 &self,
8505 mut table: TableQuery,
8506 frame: &dyn super::statement_frame::ReadFrame,
8507 ) -> RedDBResult<TableQuery> {
8508 match table.source.take() {
8515 Some(TableSource::Subquery(inner)) => {
8516 let inner = self.resolve_select_expr_subqueries(*inner, frame)?;
8517 table.source = Some(TableSource::Subquery(Box::new(inner)));
8518 }
8519 other => table.source = other,
8520 }
8521
8522 let outer_scopes = relation_scopes_for_query(&QueryExpr::Table(table.clone()));
8523 for item in &mut table.select_items {
8524 if let crate::storage::query::ast::SelectItem::Expr { expr, .. } = item {
8525 *expr = self.resolve_expr_subqueries(expr.clone(), &outer_scopes, frame)?;
8526 }
8527 }
8528 if let Some(where_expr) = table.where_expr.take() {
8529 table.where_expr =
8530 Some(self.resolve_expr_subqueries(where_expr, &outer_scopes, frame)?);
8531 table.filter = None;
8532 }
8533 if let Some(having_expr) = table.having_expr.take() {
8534 table.having_expr =
8535 Some(self.resolve_expr_subqueries(having_expr, &outer_scopes, frame)?);
8536 table.having = None;
8537 }
8538 for expr in &mut table.group_by_exprs {
8539 *expr = self.resolve_expr_subqueries(expr.clone(), &outer_scopes, frame)?;
8540 }
8541 for clause in &mut table.order_by {
8542 if let Some(expr) = clause.expr.take() {
8543 clause.expr = Some(self.resolve_expr_subqueries(expr, &outer_scopes, frame)?);
8544 }
8545 }
8546 Ok(table)
8547 }
8548
8549 fn resolve_select_expr_subqueries(
8550 &self,
8551 expr: QueryExpr,
8552 frame: &dyn super::statement_frame::ReadFrame,
8553 ) -> RedDBResult<QueryExpr> {
8554 match expr {
8555 QueryExpr::Table(table) => self
8556 .resolve_table_expr_subqueries(table, frame)
8557 .map(QueryExpr::Table),
8558 QueryExpr::Join(mut join) => {
8559 join.left = Box::new(self.resolve_select_expr_subqueries(*join.left, frame)?);
8560 join.right = Box::new(self.resolve_select_expr_subqueries(*join.right, frame)?);
8561 Ok(QueryExpr::Join(join))
8562 }
8563 other => Ok(other),
8564 }
8565 }
8566
8567 fn resolve_expr_subqueries(
8568 &self,
8569 expr: crate::storage::query::ast::Expr,
8570 outer_scopes: &[String],
8571 frame: &dyn super::statement_frame::ReadFrame,
8572 ) -> RedDBResult<crate::storage::query::ast::Expr> {
8573 use crate::storage::query::ast::Expr;
8574
8575 match expr {
8576 Expr::Subquery { query, span } => {
8577 let values = self.execute_expr_subquery_values(query, outer_scopes, frame)?;
8578 if values.len() > 1 {
8579 return Err(RedDBError::Query(
8580 "scalar subquery returned more than one row".to_string(),
8581 ));
8582 }
8583 Ok(Expr::Literal {
8584 value: values.into_iter().next().unwrap_or(Value::Null),
8585 span,
8586 })
8587 }
8588 Expr::BinaryOp { op, lhs, rhs, span } => Ok(Expr::BinaryOp {
8589 op,
8590 lhs: Box::new(self.resolve_expr_subqueries(*lhs, outer_scopes, frame)?),
8591 rhs: Box::new(self.resolve_expr_subqueries(*rhs, outer_scopes, frame)?),
8592 span,
8593 }),
8594 Expr::UnaryOp { op, operand, span } => Ok(Expr::UnaryOp {
8595 op,
8596 operand: Box::new(self.resolve_expr_subqueries(*operand, outer_scopes, frame)?),
8597 span,
8598 }),
8599 Expr::Cast {
8600 inner,
8601 target,
8602 span,
8603 } => Ok(Expr::Cast {
8604 inner: Box::new(self.resolve_expr_subqueries(*inner, outer_scopes, frame)?),
8605 target,
8606 span,
8607 }),
8608 Expr::FunctionCall { name, args, span } => {
8609 let args = args
8610 .into_iter()
8611 .map(|arg| self.resolve_expr_subqueries(arg, outer_scopes, frame))
8612 .collect::<RedDBResult<Vec<_>>>()?;
8613 Ok(Expr::FunctionCall { name, args, span })
8614 }
8615 Expr::Case {
8616 branches,
8617 else_,
8618 span,
8619 } => {
8620 let branches = branches
8621 .into_iter()
8622 .map(|(cond, value)| {
8623 Ok((
8624 self.resolve_expr_subqueries(cond, outer_scopes, frame)?,
8625 self.resolve_expr_subqueries(value, outer_scopes, frame)?,
8626 ))
8627 })
8628 .collect::<RedDBResult<Vec<_>>>()?;
8629 let else_ = else_
8630 .map(|expr| self.resolve_expr_subqueries(*expr, outer_scopes, frame))
8631 .transpose()?
8632 .map(Box::new);
8633 Ok(Expr::Case {
8634 branches,
8635 else_,
8636 span,
8637 })
8638 }
8639 Expr::IsNull {
8640 operand,
8641 negated,
8642 span,
8643 } => Ok(Expr::IsNull {
8644 operand: Box::new(self.resolve_expr_subqueries(*operand, outer_scopes, frame)?),
8645 negated,
8646 span,
8647 }),
8648 Expr::InList {
8649 target,
8650 values,
8651 negated,
8652 span,
8653 } => {
8654 let target =
8655 Box::new(self.resolve_expr_subqueries(*target, outer_scopes, frame)?);
8656 let mut resolved = Vec::new();
8657 for value in values {
8658 if let Expr::Subquery { query, .. } = value {
8659 resolved.extend(
8660 self.execute_expr_subquery_values(query, outer_scopes, frame)?
8661 .into_iter()
8662 .map(Expr::lit),
8663 );
8664 } else {
8665 resolved.push(self.resolve_expr_subqueries(value, outer_scopes, frame)?);
8666 }
8667 }
8668 Ok(Expr::InList {
8669 target,
8670 values: resolved,
8671 negated,
8672 span,
8673 })
8674 }
8675 Expr::Between {
8676 target,
8677 low,
8678 high,
8679 negated,
8680 span,
8681 } => Ok(Expr::Between {
8682 target: Box::new(self.resolve_expr_subqueries(*target, outer_scopes, frame)?),
8683 low: Box::new(self.resolve_expr_subqueries(*low, outer_scopes, frame)?),
8684 high: Box::new(self.resolve_expr_subqueries(*high, outer_scopes, frame)?),
8685 negated,
8686 span,
8687 }),
8688 other => Ok(other),
8689 }
8690 }
8691
8692 fn execute_expr_subquery_values(
8693 &self,
8694 subquery: crate::storage::query::ast::ExprSubquery,
8695 outer_scopes: &[String],
8696 frame: &dyn super::statement_frame::ReadFrame,
8697 ) -> RedDBResult<Vec<Value>> {
8698 let query = *subquery.query;
8699 if query_references_outer_scope(&query, outer_scopes) {
8700 return Err(RedDBError::Query(
8701 "NOT_YET_SUPPORTED: correlated subqueries are not supported yet; track follow-up issue #470-correlated-subqueries".to_string(),
8702 ));
8703 }
8704 let query = self.rewrite_view_refs(query);
8705 let query = self.resolve_select_expr_subqueries(query, frame)?;
8706 let query = self.authorize_relational_select_expr(query, frame)?;
8707 let result = match query {
8708 QueryExpr::Table(table) => {
8709 execute_runtime_table_query(&self.inner.db, &table, Some(&self.inner.index_store))?
8710 }
8711 QueryExpr::Join(join) => execute_runtime_join_query(&self.inner.db, &join)?,
8712 other => {
8713 return Err(RedDBError::Query(format!(
8714 "expression subquery must be a SELECT query, got {}",
8715 query_expr_name(&other)
8716 )))
8717 }
8718 };
8719 first_column_values(result)
8720 }
8721
8722 fn dispatch_expr(
8723 &self,
8724 expr: QueryExpr,
8725 query_str: &str,
8726 mode: QueryMode,
8727 ) -> RedDBResult<RuntimeQueryResult> {
8728 let statement = query_expr_name(&expr);
8729 match expr {
8730 QueryExpr::Graph(_) | QueryExpr::Path(_) => {
8731 Err(RedDBError::Query(
8733 "graph queries cannot be used as prepared statements".to_string(),
8734 ))
8735 }
8736 QueryExpr::Table(table) => {
8737 let scope = self.ai_scope();
8738 let table = self.resolve_table_expr_subqueries(
8739 table,
8740 &scope as &dyn super::statement_frame::ReadFrame,
8741 )?;
8742 if let Some(TableSource::Function {
8746 name,
8747 args,
8748 named_args,
8749 }) = table.source.clone()
8750 {
8751 return Ok(RuntimeQueryResult {
8752 query: query_str.to_string(),
8753 mode,
8754 statement,
8755 engine: "runtime-graph-tvf",
8756 result: self.execute_table_function(&name, &args, &named_args)?,
8757 affected_rows: 0,
8758 statement_type: "select",
8759 bookmark: None,
8760 });
8761 }
8762 if let Some(TableSource::InlineGraphFunction {
8766 name,
8767 nodes,
8768 edges,
8769 named_args,
8770 }) = table.source.clone()
8771 {
8772 return Ok(RuntimeQueryResult {
8773 query: query_str.to_string(),
8774 mode,
8775 statement,
8776 engine: "runtime-graph-tvf-inline",
8777 result: self.execute_inline_graph_function(
8778 &name,
8779 &nodes,
8780 &edges,
8781 &named_args,
8782 )?,
8783 affected_rows: 0,
8784 statement_type: "select",
8785 bookmark: None,
8786 });
8787 }
8788 if super::red_schema::is_virtual_table(&table.table) {
8789 return Ok(RuntimeQueryResult {
8790 query: query_str.to_string(),
8791 mode,
8792 statement,
8793 engine: "runtime-red-schema",
8794 result: super::red_schema::red_query(
8795 self,
8796 &table.table,
8797 &table,
8798 &scope as &dyn super::statement_frame::ReadFrame,
8799 )?,
8800 affected_rows: 0,
8801 statement_type: "select",
8802 bookmark: None,
8803 });
8804 }
8805 if let Some(view_result) = self.try_resolve_analytics_view(
8807 &table,
8808 &scope as &dyn super::statement_frame::ReadFrame,
8809 )? {
8810 return Ok(RuntimeQueryResult {
8811 query: query_str.to_string(),
8812 mode,
8813 statement,
8814 engine: "runtime-graph-analytics-view",
8815 result: view_result,
8816 affected_rows: 0,
8817 statement_type: "select",
8818 bookmark: None,
8819 });
8820 }
8821 let Some(table_with_rls) = self.authorize_relational_table_select(
8822 table,
8823 &scope as &dyn super::statement_frame::ReadFrame,
8824 )?
8825 else {
8826 return Ok(RuntimeQueryResult {
8827 query: query_str.to_string(),
8828 mode,
8829 statement,
8830 engine: "runtime-table-rls",
8831 result: crate::storage::query::unified::UnifiedResult::empty(),
8832 affected_rows: 0,
8833 statement_type: "select",
8834 bookmark: None,
8835 });
8836 };
8837 Ok(RuntimeQueryResult {
8838 query: query_str.to_string(),
8839 mode,
8840 statement,
8841 engine: "runtime-table",
8842 result: execute_runtime_table_query(
8843 &self.inner.db,
8844 &table_with_rls,
8845 Some(&self.inner.index_store),
8846 )?,
8847 affected_rows: 0,
8848 statement_type: "select",
8849 bookmark: None,
8850 })
8851 }
8852 QueryExpr::Join(join) => {
8853 let scope = self.ai_scope();
8854 let Some(join_with_rls) = self.authorize_relational_join_select(
8855 join,
8856 &scope as &dyn super::statement_frame::ReadFrame,
8857 )?
8858 else {
8859 return Ok(RuntimeQueryResult {
8860 query: query_str.to_string(),
8861 mode,
8862 statement,
8863 engine: "runtime-join-rls",
8864 result: crate::storage::query::unified::UnifiedResult::empty(),
8865 affected_rows: 0,
8866 statement_type: "select",
8867 bookmark: None,
8868 });
8869 };
8870 Ok(RuntimeQueryResult {
8871 query: query_str.to_string(),
8872 mode,
8873 statement,
8874 engine: "runtime-join",
8875 result: execute_runtime_join_query(&self.inner.db, &join_with_rls)?,
8876 affected_rows: 0,
8877 statement_type: "select",
8878 bookmark: None,
8879 })
8880 }
8881 QueryExpr::Vector(vector) => Ok(RuntimeQueryResult {
8882 query: query_str.to_string(),
8883 mode,
8884 statement,
8885 engine: "runtime-vector",
8886 result: execute_runtime_vector_query(&self.inner.db, &vector)?,
8887 affected_rows: 0,
8888 statement_type: "select",
8889 bookmark: None,
8890 }),
8891 QueryExpr::Hybrid(hybrid) => Ok(RuntimeQueryResult {
8892 query: query_str.to_string(),
8893 mode,
8894 statement,
8895 engine: "runtime-hybrid",
8896 result: execute_runtime_hybrid_query(&self.inner.db, &hybrid)?,
8897 affected_rows: 0,
8898 statement_type: "select",
8899 bookmark: None,
8900 }),
8901 QueryExpr::Insert(ref insert) if super::red_schema::is_virtual_table(&insert.table) => {
8902 Err(RedDBError::Query(
8903 super::red_schema::READ_ONLY_ERROR.to_string(),
8904 ))
8905 }
8906 QueryExpr::Update(ref update) if super::red_schema::is_virtual_table(&update.table) => {
8907 Err(RedDBError::Query(
8908 super::red_schema::READ_ONLY_ERROR.to_string(),
8909 ))
8910 }
8911 QueryExpr::Delete(ref delete) if super::red_schema::is_virtual_table(&delete.table) => {
8912 Err(RedDBError::Query(
8913 super::red_schema::READ_ONLY_ERROR.to_string(),
8914 ))
8915 }
8916 QueryExpr::Insert(ref insert) => self
8917 .with_deferred_store_wal_for_dml(self.insert_may_emit_events(insert), || {
8918 self.execute_insert(query_str, insert)
8919 }),
8920 QueryExpr::Update(ref update) => self
8921 .with_deferred_store_wal_for_dml(self.update_may_emit_events(update), || {
8922 self.execute_update(query_str, update)
8923 }),
8924 QueryExpr::Delete(ref delete) => self
8925 .with_deferred_store_wal_for_dml(self.delete_may_emit_events(delete), || {
8926 self.execute_delete(query_str, delete)
8927 }),
8928 QueryExpr::SearchCommand(ref cmd) => self.execute_search_command(query_str, cmd),
8929 QueryExpr::Ask(ref ask) => self.execute_ask(query_str, ask),
8930 _ => Err(RedDBError::Query(format!(
8931 "prepared-statement execution does not support {statement} statements"
8932 ))),
8933 }
8934 }
8935
8936 fn execute_table_function(
8943 &self,
8944 name: &str,
8945 args: &[String],
8946 named_args: &[(String, f64)],
8947 ) -> RedDBResult<crate::storage::query::unified::UnifiedResult> {
8948 if !is_graph_tvf_name(name) {
8949 return Err(RedDBError::Query(format!("unknown table function: {name}")));
8950 }
8951 if args.len() != 1 {
8953 return Err(RedDBError::Query(format!(
8954 "table function '{name}' takes exactly 1 graph argument, got {}",
8955 args.len()
8956 )));
8957 }
8958
8959 let (nodes, edges) = self.materialize_whole_graph_abstract()?;
8964 self.dispatch_graph_algorithm(name, nodes, edges, named_args)
8965 }
8966
8967 fn execute_inline_graph_function(
8977 &self,
8978 name: &str,
8979 nodes_query: &QueryExpr,
8980 edges_query: &QueryExpr,
8981 named_args: &[(String, f64)],
8982 ) -> RedDBResult<crate::storage::query::unified::UnifiedResult> {
8983 if !is_graph_tvf_name(name) {
8984 return Err(RedDBError::Query(format!("unknown table function: {name}")));
8985 }
8986
8987 let node_result = self.execute_query_expr(nodes_query.clone())?.result;
8988 let nodes = inline_node_ids(name, &node_result)?;
8989
8990 let edge_result = self.execute_query_expr(edges_query.clone())?.result;
8991 let edges = inline_edges(name, &edge_result)?;
8992
8993 self.dispatch_graph_algorithm(name, nodes, edges, named_args)
8994 }
8995
8996 fn materialize_whole_graph_abstract(
8999 &self,
9000 ) -> RedDBResult<(
9001 Vec<String>,
9002 Vec<(
9003 String,
9004 String,
9005 crate::storage::engine::graph_algorithms::Weight,
9006 )>,
9007 )> {
9008 use crate::storage::engine::graph_algorithms;
9009
9010 let graph = super::graph_dsl::materialize_graph_with_projection(
9011 self.inner.db.store().as_ref(),
9012 None,
9013 )?;
9014 let nodes: Vec<String> = graph.iter_nodes().map(|n| n.id.clone()).collect();
9015 let edges: Vec<(String, String, graph_algorithms::Weight)> = graph
9016 .iter_all_edges()
9017 .into_iter()
9018 .map(|e| (e.source_id, e.target_id, e.weight))
9019 .collect();
9020 Ok((nodes, edges))
9021 }
9022
9023 fn try_resolve_analytics_view(
9038 &self,
9039 table: &TableQuery,
9040 frame: &dyn super::statement_frame::ReadFrame,
9041 ) -> RedDBResult<Option<crate::storage::query::unified::UnifiedResult>> {
9042 let full = table.table.as_str();
9043 let Some(dot) = full.rfind('.') else {
9044 return Ok(None);
9045 };
9046 if self.inner.db.store().get_collection(full).is_some() {
9048 return Ok(None);
9049 }
9050 let graph_name = &full[..dot];
9051 let output_name = &full[dot + 1..];
9052 let Some(output) = crate::catalog::AnalyticsOutput::from_str(output_name) else {
9053 return Ok(None);
9054 };
9055
9056 let contracts = self.inner.db.collection_contracts();
9057 let Some(contract) = contracts.iter().find(|c| c.name == graph_name) else {
9058 return Ok(None);
9059 };
9060 if contract.declared_model != crate::catalog::CollectionModel::Graph {
9061 return Ok(None);
9062 }
9063 let Some(view) = contract
9064 .analytics_config
9065 .iter()
9066 .find(|view| view.output == output)
9067 else {
9068 return Err(RedDBError::Query(format!(
9071 "analytics output '{output_name}' is not enabled on graph '{graph_name}'; declare it with WITH ANALYTICS (...)"
9072 )));
9073 };
9074
9075 let parent_query = TableQuery::new(graph_name);
9079 if self
9080 .authorize_relational_table_select(parent_query, frame)?
9081 .is_none()
9082 {
9083 return Err(RedDBError::Query(format!(
9084 "permission denied: policy on graph '{graph_name}' denies analytics view '{output_name}'"
9085 )));
9086 }
9087
9088 let (algorithm, named_args) = analytics_view_algorithm(graph_name, view)?;
9089 let (nodes, edges) = self.materialize_whole_graph_abstract()?;
9090 let result = self.dispatch_graph_algorithm(&algorithm, nodes, edges, &named_args)?;
9091 Ok(Some(result))
9092 }
9093
9094 fn dispatch_graph_algorithm(
9101 &self,
9102 name: &str,
9103 nodes: Vec<String>,
9104 edges: Vec<(
9105 String,
9106 String,
9107 crate::storage::engine::graph_algorithms::Weight,
9108 )>,
9109 named_args: &[(String, f64)],
9110 ) -> RedDBResult<crate::storage::query::unified::UnifiedResult> {
9111 use crate::storage::engine::graph_algorithms;
9112 use crate::storage::query::unified::UnifiedResult;
9113 use crate::storage::schema::Value;
9114
9115 if name.eq_ignore_ascii_case("components") {
9116 reject_named_args(name, named_args)?;
9117 let assignment = graph_algorithms::connected_components(&nodes, &edges);
9118 let mut result =
9119 UnifiedResult::with_columns(vec!["node_id".into(), "island_id".into()]);
9120 for (node_id, island_id) in assignment {
9121 let mut record = UnifiedRecord::new();
9122 record.set("node_id", Value::text(node_id));
9123 record.set("island_id", Value::Integer(island_id as i64));
9124 result.push(record);
9125 }
9126 return Ok(result);
9127 }
9128
9129 if name.eq_ignore_ascii_case("louvain") {
9130 let resolution = louvain_resolution(named_args)?;
9135 let assignment = graph_algorithms::louvain(&nodes, &edges, resolution);
9136 let mut result =
9137 UnifiedResult::with_columns(vec!["node_id".into(), "community_id".into()]);
9138 for (node_id, community_id) in assignment {
9139 let mut record = UnifiedRecord::new();
9140 record.set("node_id", Value::text(node_id));
9141 record.set("community_id", Value::Integer(community_id as i64));
9142 result.push(record);
9143 }
9144 return Ok(result);
9145 }
9146
9147 if name.eq_ignore_ascii_case("degree_centrality") {
9148 reject_named_args(name, named_args)?;
9149 let assignment = abstract_degree_centrality(&nodes, &edges);
9150 let mut result = UnifiedResult::with_columns(vec!["node_id".into(), "degree".into()]);
9151 for (node_id, degree) in assignment {
9152 let mut record = UnifiedRecord::new();
9153 record.set("node_id", Value::text(node_id));
9154 record.set("degree", Value::Integer(degree as i64));
9155 result.push(record);
9156 }
9157 return Ok(result);
9158 }
9159
9160 if name.eq_ignore_ascii_case("shortest_path") {
9161 let mut src: Option<String> = None;
9167 let mut dst: Option<String> = None;
9168 let mut max_hops: Option<usize> = None;
9169 let as_node_id = |key: &str, value: f64| -> RedDBResult<String> {
9170 if !value.is_finite() || value < 0.0 || value.fract() != 0.0 {
9171 return Err(RedDBError::Query(format!(
9172 "table function 'shortest_path' argument '{key}' must be a non-negative integer node id, got {value}"
9173 )));
9174 }
9175 Ok((value as i64).to_string())
9176 };
9177 for (key, value) in named_args {
9178 if key.eq_ignore_ascii_case("src") {
9179 src = Some(as_node_id("src", *value)?);
9180 } else if key.eq_ignore_ascii_case("dst") {
9181 dst = Some(as_node_id("dst", *value)?);
9182 } else if key.eq_ignore_ascii_case("max_hops") {
9183 if !value.is_finite() || *value < 0.0 || value.fract() != 0.0 {
9184 return Err(RedDBError::Query(format!(
9185 "table function 'shortest_path' max_hops must be a non-negative integer, got {value}"
9186 )));
9187 }
9188 max_hops = Some(*value as usize);
9189 } else {
9190 return Err(RedDBError::Query(format!(
9191 "table function 'shortest_path' has no named argument '{key}' (expected 'src', 'dst', 'max_hops')"
9192 )));
9193 }
9194 }
9195 let src = src.ok_or_else(|| {
9196 RedDBError::Query(
9197 "table function 'shortest_path' requires named argument 'src'".to_string(),
9198 )
9199 })?;
9200 let dst = dst.ok_or_else(|| {
9201 RedDBError::Query(
9202 "table function 'shortest_path' requires named argument 'dst'".to_string(),
9203 )
9204 })?;
9205
9206 let mut result = UnifiedResult::with_columns(vec![
9213 "hop".into(),
9214 "node_id".into(),
9215 "cumulative_weight".into(),
9216 ]);
9217 if let Some(path) =
9218 graph_algorithms::shortest_path(&nodes, &edges, &src, &dst, max_hops)
9219 {
9220 for (hop, (node_id, cumulative_weight)) in path.into_iter().enumerate() {
9221 let mut record = UnifiedRecord::new();
9222 record.set("hop", Value::Integer(hop as i64));
9223 record.set("node_id", Value::text(node_id));
9224 record.set("cumulative_weight", Value::Float(cumulative_weight));
9225 result.push(record);
9226 }
9227 }
9228 return Ok(result);
9229 }
9230 if name.eq_ignore_ascii_case("betweenness") {
9235 reject_named_args(name, named_args)?;
9236 return Ok(Self::centrality_result(graph_algorithms::betweenness(
9237 &nodes, &edges,
9238 )));
9239 }
9240 if name.eq_ignore_ascii_case("eigenvector") {
9241 let mut max_iterations = 100_usize;
9244 let mut tolerance = 1e-6_f64;
9245 for (key, value) in named_args {
9246 if key.eq_ignore_ascii_case("max_iterations") {
9247 max_iterations = parse_positive_iterations("eigenvector", value)?;
9248 } else if key.eq_ignore_ascii_case("tolerance") {
9249 if !value.is_finite() || *value <= 0.0 {
9250 return Err(RedDBError::Query(format!(
9251 "table function 'eigenvector' tolerance must be > 0, got {value}"
9252 )));
9253 }
9254 tolerance = *value;
9255 } else {
9256 return Err(RedDBError::Query(format!(
9257 "table function 'eigenvector' has no named argument '{key}' (expected 'max_iterations' or 'tolerance')"
9258 )));
9259 }
9260 }
9261 return Ok(Self::centrality_result(graph_algorithms::eigenvector(
9262 &nodes,
9263 &edges,
9264 max_iterations,
9265 tolerance,
9266 )));
9267 }
9268 if name.eq_ignore_ascii_case("pagerank") {
9269 let mut damping = 0.85_f64;
9272 let mut max_iterations = 100_usize;
9273 for (key, value) in named_args {
9274 if key.eq_ignore_ascii_case("damping") {
9275 if !value.is_finite() || *value <= 0.0 || *value >= 1.0 {
9276 return Err(RedDBError::Query(format!(
9277 "table function 'pagerank' damping must be in (0, 1), got {value}"
9278 )));
9279 }
9280 damping = *value;
9281 } else if key.eq_ignore_ascii_case("max_iterations") {
9282 max_iterations = parse_positive_iterations("pagerank", value)?;
9283 } else {
9284 return Err(RedDBError::Query(format!(
9285 "table function 'pagerank' has no named argument '{key}' (expected 'damping' or 'max_iterations')"
9286 )));
9287 }
9288 }
9289 return Ok(Self::centrality_result(graph_algorithms::pagerank(
9290 &nodes,
9291 &edges,
9292 damping,
9293 max_iterations,
9294 )));
9295 }
9296 Err(RedDBError::Query(format!("unknown table function: {name}")))
9297 }
9298
9299 fn execute_components_tvf(
9306 &self,
9307 _collection: &str,
9308 ) -> RedDBResult<crate::storage::query::unified::UnifiedResult> {
9309 use crate::storage::engine::graph_algorithms;
9310 use crate::storage::query::unified::UnifiedResult;
9311 use crate::storage::schema::Value;
9312
9313 let graph = super::graph_dsl::materialize_graph_with_projection(
9319 self.inner.db.store().as_ref(),
9320 None,
9321 )?;
9322
9323 let nodes: Vec<String> = graph.iter_nodes().map(|n| n.id.clone()).collect();
9325 let edges: Vec<(String, String, graph_algorithms::Weight)> = graph
9326 .iter_all_edges()
9327 .into_iter()
9328 .map(|e| (e.source_id, e.target_id, e.weight))
9329 .collect();
9330
9331 let assignment = graph_algorithms::connected_components(&nodes, &edges);
9332
9333 let mut result = UnifiedResult::with_columns(vec!["node_id".into(), "island_id".into()]);
9335 for (node_id, island_id) in assignment {
9336 let mut record = UnifiedRecord::new();
9337 record.set("node_id", Value::text(node_id));
9338 record.set("island_id", Value::Integer(island_id as i64));
9339 result.push(record);
9340 }
9341 Ok(result)
9342 }
9343
9344 fn execute_louvain_tvf(
9354 &self,
9355 _collection: &str,
9356 resolution: f64,
9357 ) -> RedDBResult<crate::storage::query::unified::UnifiedResult> {
9358 use crate::storage::engine::graph_algorithms;
9359 use crate::storage::query::unified::UnifiedResult;
9360 use crate::storage::schema::Value;
9361
9362 let graph = super::graph_dsl::materialize_graph_with_projection(
9363 self.inner.db.store().as_ref(),
9364 None,
9365 )?;
9366
9367 let nodes: Vec<String> = graph.iter_nodes().map(|n| n.id.clone()).collect();
9368 let edges: Vec<(String, String, graph_algorithms::Weight)> = graph
9369 .iter_all_edges()
9370 .into_iter()
9371 .map(|e| (e.source_id, e.target_id, e.weight))
9372 .collect();
9373
9374 let assignment = graph_algorithms::louvain(&nodes, &edges, resolution);
9375
9376 let mut result = UnifiedResult::with_columns(vec!["node_id".into(), "community_id".into()]);
9378 for (node_id, community_id) in assignment {
9379 let mut record = UnifiedRecord::new();
9380 record.set("node_id", Value::text(node_id));
9381 record.set("community_id", Value::Integer(community_id as i64));
9382 result.push(record);
9383 }
9384 Ok(result)
9385 }
9386
9387 fn centrality_result(
9390 rows: Vec<(String, f64)>,
9391 ) -> crate::storage::query::unified::UnifiedResult {
9392 use crate::storage::query::unified::UnifiedResult;
9393 use crate::storage::schema::Value;
9394 let mut result = UnifiedResult::with_columns(vec!["node_id".into(), "score".into()]);
9395 for (node_id, score) in rows {
9396 let mut record = UnifiedRecord::new();
9397 record.set("node_id", Value::text(node_id));
9398 record.set("score", Value::Float(score));
9399 result.push(record);
9400 }
9401 result
9402 }
9403
9404 fn try_fast_entity_lookup(&self, query: &str) -> Option<RedDBResult<RuntimeQueryResult>> {
9407 let q = query.trim();
9410 if !q.starts_with("SELECT") && !q.starts_with("select") {
9411 return None;
9412 }
9413
9414 let where_pos = q
9416 .find("WHERE _entity_id")
9417 .or_else(|| q.find("where _entity_id"))?;
9418 let after_field = &q[where_pos + 16..].trim_start(); let after_eq = after_field.strip_prefix('=')?.trim_start();
9420
9421 let id_str = after_eq.trim();
9423 let entity_id: u64 = id_str.parse().ok()?;
9424
9425 let from_pos = q.find("FROM ").or_else(|| q.find("from "))? + 5;
9427 let table = q[from_pos..where_pos].trim();
9428 if table.is_empty()
9429 || table.contains(' ') && !table.contains(" AS ") && !table.contains(" as ")
9430 {
9431 return None; }
9433 let table_name = table.split_whitespace().next()?;
9434
9435 let store = self.inner.db.store();
9441 let entity = store
9442 .get(
9443 table_name,
9444 crate::storage::unified::EntityId::new(entity_id),
9445 )
9446 .filter(entity_visible_under_current_snapshot)
9447 .filter(|entity| {
9448 self.inner
9449 .db
9450 .replica_allows_entity_at_read(table_name, entity)
9451 });
9452
9453 let count = if entity.is_some() { 1u64 } else { 0 };
9454
9455 let records: Vec<crate::storage::query::unified::UnifiedRecord> = entity
9461 .as_ref()
9462 .and_then(|e| runtime_table_record_from_entity(e.clone()))
9463 .into_iter()
9464 .collect();
9465
9466 let json = match entity {
9467 Some(ref e) => execute_runtime_serialize_single_entity(e),
9468 None => r#"{"columns":[],"record_count":0,"selection":{"scope":"any"},"records":[]}"#
9469 .to_string(),
9470 };
9471
9472 Some(Ok(RuntimeQueryResult {
9473 query: query.to_string(),
9474 mode: crate::storage::query::modes::QueryMode::Sql,
9475 statement: "select",
9476 engine: "fast-entity-lookup",
9477 result: crate::storage::query::unified::UnifiedResult {
9478 columns: Vec::new(),
9479 records,
9480 stats: crate::storage::query::unified::QueryStats {
9481 rows_scanned: count,
9482 ..Default::default()
9483 },
9484 pre_serialized_json: Some(json),
9485 },
9486 affected_rows: 0,
9487 statement_type: "select",
9488 bookmark: None,
9489 }))
9490 }
9491
9492 fn result_cache_backend(&self) -> RuntimeResultCacheBackend {
9493 match self
9494 .config_string(RESULT_CACHE_BACKEND_KEY, RESULT_CACHE_DEFAULT_BACKEND)
9495 .as_str()
9496 {
9497 "blob_cache" => RuntimeResultCacheBackend::BlobCache,
9498 "shadow" => RuntimeResultCacheBackend::Shadow,
9499 _ => RuntimeResultCacheBackend::Legacy,
9500 }
9501 }
9502
9503 fn result_cache_enabled(&self) -> bool {
9507 self.config_bool(RESULT_CACHE_ENABLED_KEY, true)
9508 }
9509
9510 fn result_cache_ttl_secs(&self) -> u64 {
9513 self.config_u64(RESULT_CACHE_TTL_KEY, RESULT_CACHE_TTL_SECS)
9514 }
9515
9516 fn result_cache_capacity(&self) -> usize {
9520 self.config_u64(RESULT_CACHE_CAPACITY_KEY, RESULT_CACHE_MAX_ENTRIES as u64) as usize
9521 }
9522
9523 pub fn result_cache_metrics(&self) -> (u64, u64, u64) {
9526 use std::sync::atomic::Ordering::Relaxed;
9527 (
9528 self.inner.result_cache_hits.load(Relaxed),
9529 self.inner.result_cache_misses.load(Relaxed),
9530 self.inner.result_cache_evictions.load(Relaxed),
9531 )
9532 }
9533
9534 fn record_result_cache_evictions(&self, evicted: u64) {
9535 if evicted > 0 {
9536 self.inner
9537 .result_cache_evictions
9538 .fetch_add(evicted, std::sync::atomic::Ordering::Relaxed);
9539 }
9540 }
9541
9542 pub(super) fn get_result_cache_entry(&self, key: &str) -> Option<RuntimeQueryResult> {
9543 if !self.result_cache_enabled() {
9544 return None;
9545 }
9546 let hit = self.get_result_cache_entry_inner(key);
9547 let counter = if hit.is_some() {
9548 &self.inner.result_cache_hits
9549 } else {
9550 &self.inner.result_cache_misses
9551 };
9552 counter.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
9553 hit
9554 }
9555
9556 fn get_result_cache_entry_inner(&self, key: &str) -> Option<RuntimeQueryResult> {
9557 match self.result_cache_backend() {
9558 RuntimeResultCacheBackend::Legacy => self.get_legacy_result_cache_entry(key),
9559 RuntimeResultCacheBackend::BlobCache => self.get_blob_result_cache_entry(key),
9560 RuntimeResultCacheBackend::Shadow => {
9561 let legacy = self.get_legacy_result_cache_entry(key);
9562 let blob = self.get_blob_result_cache_entry(key);
9563 if let (Some(ref legacy), Some(ref blob)) = (&legacy, &blob) {
9564 if result_cache_fingerprint(legacy) != result_cache_fingerprint(blob) {
9565 self.inner
9566 .result_cache_shadow_divergences
9567 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
9568 tracing::warn!(
9569 key,
9570 metric = crate::runtime::METRIC_CACHE_SHADOW_DIVERGENCE_TOTAL,
9571 "result cache shadow backend diverged from legacy"
9572 );
9573 }
9574 }
9575 legacy
9576 }
9577 }
9578 }
9579
9580 fn get_legacy_result_cache_entry(&self, key: &str) -> Option<RuntimeQueryResult> {
9581 let ttl = self.result_cache_ttl_secs();
9582 let cache = self.inner.result_cache.read();
9583 cache.0.get(key).and_then(|entry| {
9584 if entry.cached_at.elapsed().as_secs() < ttl {
9585 Some(entry.result.clone())
9586 } else {
9587 None
9588 }
9589 })
9590 }
9591
9592 fn get_blob_result_cache_entry(&self, key: &str) -> Option<RuntimeQueryResult> {
9593 let hit = self
9594 .inner
9595 .result_blob_cache
9596 .get(RESULT_CACHE_BLOB_NAMESPACE, key)?;
9597 {
9598 let cache = self.inner.result_blob_entries.read();
9599 if let Some(entry) = cache.0.get(key) {
9600 return Some(entry.result.clone());
9601 }
9602 }
9603
9604 let (result, scopes) = decode_result_cache_payload(hit.value())?;
9605 let mut cache = self.inner.result_blob_entries.write();
9606 let (ref mut map, ref mut order) = *cache;
9607 if !map.contains_key(key) {
9608 order.push_back(key.to_string());
9609 }
9610 map.insert(
9611 key.to_string(),
9612 RuntimeResultCacheEntry {
9613 result: result.clone(),
9614 cached_at: std::time::Instant::now(),
9615 scopes,
9616 },
9617 );
9618 let evicted = trim_result_cache(map, order, self.result_cache_capacity());
9619 drop(cache);
9620 self.record_result_cache_evictions(evicted);
9621 Some(result)
9622 }
9623
9624 pub(super) fn put_result_cache_entry(&self, key: &str, entry: RuntimeResultCacheEntry) {
9625 if !self.result_cache_enabled() {
9626 return;
9627 }
9628 match self.result_cache_backend() {
9629 RuntimeResultCacheBackend::Legacy => self.put_legacy_result_cache_entry(key, entry),
9630 RuntimeResultCacheBackend::BlobCache => self.put_blob_result_cache_entry(key, entry),
9631 RuntimeResultCacheBackend::Shadow => {
9632 self.put_legacy_result_cache_entry(key, entry.clone());
9633 self.put_blob_result_cache_entry(key, entry);
9634 }
9635 }
9636 }
9637
9638 fn put_legacy_result_cache_entry(&self, key: &str, entry: RuntimeResultCacheEntry) {
9639 let capacity = self.result_cache_capacity();
9640 let mut cache = self.inner.result_cache.write();
9641 let (ref mut map, ref mut order) = *cache;
9642 if !map.contains_key(key) {
9643 order.push_back(key.to_string());
9644 }
9645 map.insert(key.to_string(), entry);
9646 let evicted = trim_result_cache(map, order, capacity);
9647 drop(cache);
9648 self.record_result_cache_evictions(evicted);
9649 }
9650
9651 fn put_blob_result_cache_entry(&self, key: &str, entry: RuntimeResultCacheEntry) {
9652 let policy = crate::storage::cache::BlobCachePolicy::default()
9653 .ttl_ms(self.result_cache_ttl_secs() * 1000)
9654 .priority(200);
9655 let dependencies = entry.scopes.iter().cloned().collect::<Vec<_>>();
9656 let bytes = encode_result_cache_payload(&entry)
9657 .unwrap_or_else(|| result_cache_fingerprint(&entry.result).into_bytes());
9658 let put = crate::storage::cache::BlobCachePut::new(bytes)
9659 .with_dependencies(dependencies)
9660 .with_policy(policy);
9661 if self
9662 .inner
9663 .result_blob_cache
9664 .put(RESULT_CACHE_BLOB_NAMESPACE, key, put)
9665 .is_err()
9666 {
9667 return;
9668 }
9669
9670 let capacity = self.result_cache_capacity();
9671 let mut cache = self.inner.result_blob_entries.write();
9672 let (ref mut map, ref mut order) = *cache;
9673 if !map.contains_key(key) {
9674 order.push_back(key.to_string());
9675 }
9676 map.insert(key.to_string(), entry);
9677 let evicted = trim_result_cache(map, order, capacity);
9678 drop(cache);
9679 self.record_result_cache_evictions(evicted);
9680 }
9681
9682 pub fn result_cache_shadow_divergences(&self) -> u64 {
9683 self.inner
9684 .result_cache_shadow_divergences
9685 .load(std::sync::atomic::Ordering::Relaxed)
9686 }
9687
9688 pub fn invalidate_result_cache(&self) {
9691 let mut cache = self.inner.result_cache.write();
9692 cache.0.clear();
9693 cache.1.clear();
9694 let mut blob_entries = self.inner.result_blob_entries.write();
9695 blob_entries.0.clear();
9696 blob_entries.1.clear();
9697 self.inner
9698 .result_blob_cache
9699 .invalidate_namespace(RESULT_CACHE_BLOB_NAMESPACE);
9700 let mut ask_entries = self.inner.ask_answer_cache_entries.write();
9701 ask_entries.0.clear();
9702 ask_entries.1.clear();
9703 self.inner
9704 .result_blob_cache
9705 .invalidate_namespace(ASK_ANSWER_CACHE_NAMESPACE);
9706 }
9707
9708 pub(crate) fn invalidate_result_cache_for_table(&self, table: &str) {
9711 let legacy_has_match = {
9714 let cache = self.inner.result_cache.read();
9715 let (ref map, _) = *cache;
9716 !map.is_empty() && map.values().any(|entry| entry.scopes.contains(table))
9717 };
9718 let blob_has_match = {
9719 let cache = self.inner.result_blob_entries.read();
9720 let (ref map, _) = *cache;
9721 !map.is_empty() && map.values().any(|entry| entry.scopes.contains(table))
9722 };
9723 if legacy_has_match {
9724 let mut cache = self.inner.result_cache.write();
9725 let (ref mut map, ref mut order) = *cache;
9726 map.retain(|_, entry| !entry.scopes.contains(table));
9727 order.retain(|key| map.contains_key(key));
9728 }
9729
9730 if matches!(
9731 self.result_cache_backend(),
9732 RuntimeResultCacheBackend::BlobCache | RuntimeResultCacheBackend::Shadow
9733 ) {
9734 let mut blob_entries = self.inner.result_blob_entries.write();
9735 let (ref mut blob_map, ref mut blob_order) = *blob_entries;
9736 blob_map.clear();
9737 blob_order.clear();
9738 self.inner
9739 .result_blob_cache
9740 .invalidate_namespace(RESULT_CACHE_BLOB_NAMESPACE);
9741 } else if blob_has_match {
9742 let mut blob_entries = self.inner.result_blob_entries.write();
9743 let (ref mut blob_map, ref mut blob_order) = *blob_entries;
9744 blob_map.retain(|_, entry| !entry.scopes.contains(table));
9745 blob_order.retain(|key| blob_map.contains_key(key));
9746 }
9747 let mut ask_entries = self.inner.ask_answer_cache_entries.write();
9748 ask_entries.0.clear();
9749 ask_entries.1.clear();
9750 self.inner
9751 .result_blob_cache
9752 .invalidate_namespace(ASK_ANSWER_CACHE_NAMESPACE);
9753 }
9754
9755 pub(crate) fn invalidate_plan_cache(&self) {
9756 self.inner.query_cache.write().clear();
9757 self.inner
9758 .ddl_epoch
9759 .fetch_add(1, std::sync::atomic::Ordering::Release);
9760 }
9761
9762 pub fn ddl_epoch(&self) -> u64 {
9766 self.inner
9767 .ddl_epoch
9768 .load(std::sync::atomic::Ordering::Acquire)
9769 }
9770
9771 pub(crate) fn clear_table_planner_stats(&self, table: &str) {
9772 let store = self.inner.db.store();
9773 crate::storage::query::planner::stats_catalog::clear_table_stats(store.as_ref(), table);
9774 self.invalidate_plan_cache();
9775 }
9776
9777 pub(crate) fn rehydrate_tenant_tables(&self) {
9786 let store = self.inner.db.store();
9787 let Some(manager) = store.get_collection("red_config") else {
9788 return;
9789 };
9790 for entity in manager.query_all(|_| true) {
9795 let crate::storage::unified::entity::EntityData::Row(row) = &entity.data else {
9796 continue;
9797 };
9798 let Some(named) = &row.named else { continue };
9799 let Some(crate::storage::schema::Value::Text(key)) = named.get("key") else {
9800 continue;
9801 };
9802 let Some(rest) = key.strip_prefix("tenant_tables.") else {
9804 continue;
9805 };
9806 let Some((table, suffix)) = rest.rsplit_once('.') else {
9807 crate::telemetry::operator_event::OperatorEvent::SchemaCorruption {
9813 collection: "red_config".to_string(),
9814 detail: format!("malformed tenant_tables key: {key}"),
9815 }
9816 .emit_global();
9817 continue;
9818 };
9819 if suffix != "column" {
9820 crate::telemetry::operator_event::OperatorEvent::SchemaCorruption {
9821 collection: "red_config".to_string(),
9822 detail: format!("unexpected tenant_tables suffix: {key}"),
9823 }
9824 .emit_global();
9825 continue;
9826 }
9827 match named.get("value") {
9828 Some(crate::storage::schema::Value::Text(column)) => {
9829 self.register_tenant_table(table, column);
9830 }
9831 Some(crate::storage::schema::Value::Null) | None => {
9833 self.unregister_tenant_table(table);
9834 }
9835 _ => {}
9836 }
9837 }
9838 }
9839
9840 pub(crate) fn rehydrate_materialized_view_descriptors(&self) {
9852 let store = self.inner.db.store();
9853 let descriptors = crate::runtime::continuous_materialized_view::load_all(store.as_ref());
9854 for descriptor in descriptors {
9855 let parsed = match crate::storage::query::parser::parse(&descriptor.source_sql) {
9856 Ok(qc) => qc,
9857 Err(err) => {
9858 crate::telemetry::operator_event::OperatorEvent::SchemaCorruption {
9859 collection:
9860 crate::runtime::continuous_materialized_view::CATALOG_COLLECTION
9861 .to_string(),
9862 detail: format!(
9863 "failed to re-parse materialized-view source for {}: {err}",
9864 descriptor.name
9865 ),
9866 }
9867 .emit_global();
9868 continue;
9869 }
9870 };
9871 let crate::storage::query::ast::QueryExpr::CreateView(create) = parsed.query else {
9872 crate::telemetry::operator_event::OperatorEvent::SchemaCorruption {
9873 collection: crate::runtime::continuous_materialized_view::CATALOG_COLLECTION
9874 .to_string(),
9875 detail: format!(
9876 "materialized-view source for {} did not re-parse as CREATE VIEW",
9877 descriptor.name
9878 ),
9879 }
9880 .emit_global();
9881 continue;
9882 };
9883 let view_name = create.name.clone();
9885 self.inner
9886 .views
9887 .write()
9888 .insert(view_name.clone(), Arc::new(create));
9889 use crate::storage::cache::result::{MaterializedViewDef, RefreshPolicy};
9891 let refresh = match descriptor.refresh_every_ms {
9892 Some(ms) => RefreshPolicy::Periodic(std::time::Duration::from_millis(ms)),
9893 None => RefreshPolicy::Manual,
9894 };
9895 let def = MaterializedViewDef {
9896 name: view_name.clone(),
9897 query: format!("<parsed view {}>", view_name),
9898 dependencies: descriptor.source_collections.clone(),
9899 refresh,
9900 retention_duration_ms: descriptor.retention_duration_ms,
9901 };
9902 self.inner.materialized_views.write().register(def);
9903 }
9904 self.invalidate_plan_cache();
9907 }
9908
9909 pub(crate) fn rehydrate_declared_column_schemas(&self) {
9910 let store = self.inner.db.store();
9911 for contract in self.inner.db.collection_contracts() {
9912 let columns: Vec<String> = contract
9913 .declared_columns
9914 .iter()
9915 .map(|column| column.name.clone())
9916 .collect();
9917 let Some(manager) = store.get_collection(&contract.name) else {
9918 continue;
9919 };
9920 manager.set_column_schema_if_empty(columns);
9921 }
9922 }
9923
9924 pub fn register_tenant_table(&self, table: &str, column: &str) {
9929 use crate::storage::query::ast::{
9930 CompareOp, CreatePolicyQuery, Expr, FieldRef, Filter, Span,
9931 };
9932 self.inner
9933 .tenant_tables
9934 .write()
9935 .insert(table.to_string(), column.to_string());
9936
9937 let lhs = Expr::Column {
9943 field: FieldRef::TableColumn {
9944 table: table.to_string(),
9945 column: column.to_string(),
9946 },
9947 span: Span::synthetic(),
9948 };
9949 let rhs = Expr::FunctionCall {
9950 name: "CURRENT_TENANT".to_string(),
9951 args: Vec::new(),
9952 span: Span::synthetic(),
9953 };
9954 let policy_filter = Filter::CompareExpr {
9955 lhs,
9956 op: CompareOp::Eq,
9957 rhs,
9958 };
9959
9960 let policy = CreatePolicyQuery {
9961 name: "__tenant_iso".to_string(),
9962 table: table.to_string(),
9963 action: None, role: None, using: Box::new(policy_filter),
9966 target_kind: crate::storage::query::ast::PolicyTargetKind::Table,
9973 };
9974
9975 self.inner.rls_policies.write().insert(
9977 (table.to_string(), "__tenant_iso".to_string()),
9978 Arc::new(policy),
9979 );
9980 self.inner
9981 .rls_enabled_tables
9982 .write()
9983 .insert(table.to_string());
9984
9985 self.ensure_tenant_index(table, column);
9991 }
9992
9993 fn ensure_tenant_index(&self, table: &str, column: &str) {
10001 if column.contains('.') {
10002 return;
10003 }
10004 let index_name = format!("__tenant_idx_{table}");
10005 let registry = self.inner.index_store.list_indices(table);
10006 if registry.iter().any(|idx| idx.name == index_name) {
10007 return;
10008 }
10009 if registry
10010 .iter()
10011 .any(|idx| idx.columns.first().map(|c| c.as_str()) == Some(column))
10012 {
10013 return;
10014 }
10015
10016 let store = self.inner.db.store();
10017 let Some(manager) = store.get_collection(table) else {
10018 return;
10019 };
10020 let entities = manager.query_all(|_| true);
10021 let entity_fields: Vec<(
10022 crate::storage::unified::EntityId,
10023 Vec<(String, crate::storage::schema::Value)>,
10024 )> = entities
10025 .iter()
10026 .map(|e| {
10027 let fields = match &e.data {
10028 crate::storage::EntityData::Row(row) => {
10029 if let Some(ref named) = row.named {
10030 named.iter().map(|(k, v)| (k.clone(), v.clone())).collect()
10031 } else if let Some(ref schema) = row.schema {
10032 schema
10033 .iter()
10034 .zip(row.columns.iter())
10035 .map(|(k, v)| (k.clone(), v.clone()))
10036 .collect()
10037 } else {
10038 Vec::new()
10039 }
10040 }
10041 crate::storage::EntityData::Node(node) => node
10042 .properties
10043 .iter()
10044 .map(|(k, v)| (k.clone(), v.clone()))
10045 .collect(),
10046 _ => Vec::new(),
10047 };
10048 (e.id, fields)
10049 })
10050 .collect();
10051
10052 let columns = vec![column.to_string()];
10053 if self
10054 .inner
10055 .index_store
10056 .create_index(
10057 &index_name,
10058 table,
10059 &columns,
10060 super::index_store::IndexMethodKind::Hash,
10061 false,
10062 &entity_fields,
10063 )
10064 .is_err()
10065 {
10066 return;
10067 }
10068 self.inner
10069 .index_store
10070 .register(super::index_store::RegisteredIndex {
10071 name: index_name,
10072 collection: table.to_string(),
10073 columns,
10074 method: super::index_store::IndexMethodKind::Hash,
10075 unique: false,
10076 });
10077 self.invalidate_plan_cache();
10078 }
10079
10080 fn drop_tenant_index(&self, table: &str) {
10083 let index_name = format!("__tenant_idx_{table}");
10084 self.inner.index_store.drop_index(&index_name, table);
10085 }
10086
10087 pub fn tenant_column(&self, table: &str) -> Option<String> {
10091 self.inner.tenant_tables.read().get(table).cloned()
10092 }
10093
10094 pub fn unregister_tenant_table(&self, table: &str) {
10098 self.inner.tenant_tables.write().remove(table);
10099 self.inner
10100 .rls_policies
10101 .write()
10102 .remove(&(table.to_string(), "__tenant_iso".to_string()));
10103 self.drop_tenant_index(table);
10104 let has_other_policies = self
10106 .inner
10107 .rls_policies
10108 .read()
10109 .keys()
10110 .any(|(t, _)| t == table);
10111 if !has_other_policies {
10112 self.inner.rls_enabled_tables.write().remove(table);
10113 }
10114 }
10115
10116 pub(crate) fn record_pending_tombstone(
10122 &self,
10123 conn_id: u64,
10124 collection: &str,
10125 id: crate::storage::unified::entity::EntityId,
10126 stamper_xid: crate::storage::transaction::snapshot::Xid,
10127 previous_xmax: crate::storage::transaction::snapshot::Xid,
10128 ) {
10129 self.inner
10130 .pending_tombstones
10131 .write()
10132 .entry(conn_id)
10133 .or_default()
10134 .push((collection.to_string(), id, stamper_xid, previous_xmax));
10135 }
10136
10137 pub(crate) fn record_pending_versioned_update(
10138 &self,
10139 conn_id: u64,
10140 collection: &str,
10141 old_id: crate::storage::unified::entity::EntityId,
10142 new_id: crate::storage::unified::entity::EntityId,
10143 stamper_xid: crate::storage::transaction::snapshot::Xid,
10144 previous_xmax: crate::storage::transaction::snapshot::Xid,
10145 ) {
10146 self.inner
10147 .pending_versioned_updates
10148 .write()
10149 .entry(conn_id)
10150 .or_default()
10151 .push((
10152 collection.to_string(),
10153 old_id,
10154 new_id,
10155 stamper_xid,
10156 previous_xmax,
10157 ));
10158 }
10159
10160 fn with_deferred_store_wal_if_transaction<T>(
10161 &self,
10162 f: impl FnOnce() -> RedDBResult<T>,
10163 ) -> RedDBResult<T> {
10164 let conn_id = current_connection_id();
10165 if !self.inner.tx_contexts.read().contains_key(&conn_id) {
10166 return f();
10167 }
10168
10169 crate::storage::UnifiedStore::begin_deferred_store_wal_capture();
10170 let result = f();
10171 let captured = crate::storage::UnifiedStore::take_deferred_store_wal_capture();
10172 match result {
10173 Ok(value) => {
10174 self.record_pending_store_wal_actions(conn_id, captured);
10175 Ok(value)
10176 }
10177 Err(err) => Err(err),
10178 }
10179 }
10180
10181 fn with_deferred_store_wal_for_dml<T>(
10182 &self,
10183 capture_autocommit_events: bool,
10184 f: impl FnOnce() -> RedDBResult<T>,
10185 ) -> RedDBResult<T> {
10186 let conn_id = current_connection_id();
10187 if self.inner.tx_contexts.read().contains_key(&conn_id) {
10188 return self.with_deferred_store_wal_if_transaction(f);
10189 }
10190 if !capture_autocommit_events {
10191 return f();
10192 }
10193
10194 crate::storage::UnifiedStore::begin_deferred_store_wal_capture();
10195 let result = f();
10196 let captured = crate::storage::UnifiedStore::take_deferred_store_wal_capture();
10197 self.inner
10198 .db
10199 .store()
10200 .append_deferred_store_wal_actions(captured)
10201 .map_err(|err| RedDBError::Internal(err.to_string()))?;
10202 result
10203 }
10204
10205 fn insert_may_emit_events(&self, query: &InsertQuery) -> bool {
10206 !query.suppress_events
10207 && self.collection_has_event_subscriptions_for_operation(
10208 &query.table,
10209 crate::catalog::SubscriptionOperation::Insert,
10210 )
10211 }
10212
10213 fn update_may_emit_events(&self, query: &UpdateQuery) -> bool {
10214 !query.suppress_events
10215 && self.collection_has_event_subscriptions_for_operation(
10216 &query.table,
10217 crate::catalog::SubscriptionOperation::Update,
10218 )
10219 }
10220
10221 fn delete_may_emit_events(&self, query: &DeleteQuery) -> bool {
10222 !query.suppress_events
10223 && self.collection_has_event_subscriptions_for_operation(
10224 &query.table,
10225 crate::catalog::SubscriptionOperation::Delete,
10226 )
10227 }
10228
10229 fn collection_has_event_subscriptions_for_operation(
10230 &self,
10231 collection: &str,
10232 operation: crate::catalog::SubscriptionOperation,
10233 ) -> bool {
10234 let Some(contract) = self.db().collection_contract_arc(collection) else {
10235 return false;
10236 };
10237 contract.subscriptions.iter().any(|subscription| {
10238 subscription.enabled
10239 && (subscription.ops_filter.is_empty()
10240 || subscription.ops_filter.contains(&operation))
10241 })
10242 }
10243
10244 fn record_pending_store_wal_actions(
10245 &self,
10246 conn_id: u64,
10247 actions: crate::storage::unified::DeferredStoreWalActions,
10248 ) {
10249 if actions.is_empty() {
10250 return;
10251 }
10252 let mut guard = self.inner.pending_store_wal_actions.write();
10253 guard.entry(conn_id).or_default().extend(actions);
10254 }
10255
10256 fn flush_pending_store_wal_actions(&self, conn_id: u64) -> RedDBResult<()> {
10257 let Some(actions) = self
10258 .inner
10259 .pending_store_wal_actions
10260 .write()
10261 .remove(&conn_id)
10262 else {
10263 return Ok(());
10264 };
10265 self.inner
10266 .db
10267 .store()
10268 .append_deferred_store_wal_actions(actions)
10269 .map_err(|err| RedDBError::Internal(err.to_string()))
10270 }
10271
10272 fn discard_pending_store_wal_actions(&self, conn_id: u64) {
10273 self.inner
10274 .pending_store_wal_actions
10275 .write()
10276 .remove(&conn_id);
10277 }
10278
10279 fn xid_conflicts_with_snapshot(
10280 &self,
10281 xid: crate::storage::transaction::snapshot::Xid,
10282 snapshot: &crate::storage::transaction::snapshot::Snapshot,
10283 own_xids: &std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
10284 ) -> bool {
10285 xid != 0
10286 && !own_xids.contains(&xid)
10287 && !self.inner.snapshot_manager.is_aborted(xid)
10288 && !self.inner.snapshot_manager.is_active(xid)
10289 && (xid > snapshot.xid || snapshot.in_progress.contains(&xid))
10290 }
10291
10292 fn conflict_error(
10293 collection: &str,
10294 logical_id: crate::storage::unified::entity::EntityId,
10295 xid: crate::storage::transaction::snapshot::Xid,
10296 ) -> RedDBError {
10297 RedDBError::Query(format!(
10298 "serialization conflict: table row {collection}/{} was modified by concurrent transaction {xid}",
10299 logical_id.raw()
10300 ))
10301 }
10302
10303 fn check_logical_row_conflict(
10304 &self,
10305 collection: &str,
10306 logical_id: crate::storage::unified::entity::EntityId,
10307 excluded_ids: &[crate::storage::unified::entity::EntityId],
10308 snapshot: &crate::storage::transaction::snapshot::Snapshot,
10309 own_xids: &std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
10310 ) -> RedDBResult<()> {
10311 let store = self.inner.db.store();
10312 let Some(manager) = store.get_collection(collection) else {
10313 return Ok(());
10314 };
10315
10316 for candidate in manager.query_all(|_| true) {
10317 if excluded_ids.contains(&candidate.id) || candidate.logical_id() != logical_id {
10318 continue;
10319 }
10320 if self.xid_conflicts_with_snapshot(candidate.xmin, snapshot, own_xids) {
10321 return Err(Self::conflict_error(collection, logical_id, candidate.xmin));
10322 }
10323 if self.xid_conflicts_with_snapshot(candidate.xmax, snapshot, own_xids) {
10324 return Err(Self::conflict_error(collection, logical_id, candidate.xmax));
10325 }
10326 }
10327 Ok(())
10328 }
10329
10330 pub(crate) fn check_table_row_write_conflicts(
10331 &self,
10332 conn_id: u64,
10333 snapshot: &crate::storage::transaction::snapshot::Snapshot,
10334 own_xids: &std::collections::HashSet<crate::storage::transaction::snapshot::Xid>,
10335 ) -> RedDBResult<()> {
10336 let versioned_updates = self
10337 .inner
10338 .pending_versioned_updates
10339 .read()
10340 .get(&conn_id)
10341 .cloned()
10342 .unwrap_or_default();
10343 let tombstones = self
10344 .inner
10345 .pending_tombstones
10346 .read()
10347 .get(&conn_id)
10348 .cloned()
10349 .unwrap_or_default();
10350
10351 let store = self.inner.db.store();
10352 for (collection, old_id, new_id, xid, previous_xmax) in versioned_updates {
10353 let Some(manager) = store.get_collection(&collection) else {
10354 continue;
10355 };
10356 let Some(old) = manager.get(old_id) else {
10357 continue;
10358 };
10359 let logical_id = old.logical_id();
10360 if self.xid_conflicts_with_snapshot(previous_xmax, snapshot, own_xids) {
10361 return Err(Self::conflict_error(&collection, logical_id, previous_xmax));
10362 }
10363 if old.xmax != xid && self.xid_conflicts_with_snapshot(old.xmax, snapshot, own_xids) {
10364 return Err(Self::conflict_error(&collection, logical_id, old.xmax));
10365 }
10366 self.check_logical_row_conflict(
10367 &collection,
10368 logical_id,
10369 &[old_id, new_id],
10370 snapshot,
10371 own_xids,
10372 )?;
10373 }
10374
10375 for (collection, id, xid, previous_xmax) in tombstones {
10376 let Some(manager) = store.get_collection(&collection) else {
10377 continue;
10378 };
10379 let Some(entity) = manager.get(id) else {
10380 continue;
10381 };
10382 let logical_id = entity.logical_id();
10383 if self.xid_conflicts_with_snapshot(previous_xmax, snapshot, own_xids) {
10384 return Err(Self::conflict_error(&collection, logical_id, previous_xmax));
10385 }
10386 if entity.xmax != xid
10387 && self.xid_conflicts_with_snapshot(entity.xmax, snapshot, own_xids)
10388 {
10389 return Err(Self::conflict_error(&collection, logical_id, entity.xmax));
10390 }
10391 self.check_logical_row_conflict(&collection, logical_id, &[id], snapshot, own_xids)?;
10392 }
10393
10394 Ok(())
10395 }
10396
10397 pub(crate) fn restore_pending_write_stamps(&self, conn_id: u64) {
10398 let versioned_updates = self
10399 .inner
10400 .pending_versioned_updates
10401 .read()
10402 .get(&conn_id)
10403 .cloned()
10404 .unwrap_or_default();
10405 let tombstones = self
10406 .inner
10407 .pending_tombstones
10408 .read()
10409 .get(&conn_id)
10410 .cloned()
10411 .unwrap_or_default();
10412
10413 let store = self.inner.db.store();
10414 for (collection, old_id, _new_id, xid, _previous_xmax) in versioned_updates {
10415 if let Some(manager) = store.get_collection(&collection) {
10416 if let Some(mut entity) = manager.get(old_id) {
10417 entity.set_xmax(xid);
10418 let _ = manager.update(entity);
10419 }
10420 }
10421 }
10422 for (collection, id, xid, _previous_xmax) in tombstones {
10423 if let Some(manager) = store.get_collection(&collection) {
10424 if let Some(mut entity) = manager.get(id) {
10425 entity.set_xmax(xid);
10426 let _ = manager.update(entity);
10427 }
10428 }
10429 }
10430 }
10431
10432 pub(crate) fn finalize_pending_versioned_updates(&self, conn_id: u64) {
10433 self.inner
10434 .pending_versioned_updates
10435 .write()
10436 .remove(&conn_id);
10437 }
10438
10439 pub(crate) fn revive_pending_versioned_updates(&self, conn_id: u64) {
10440 let Some(pending) = self
10441 .inner
10442 .pending_versioned_updates
10443 .write()
10444 .remove(&conn_id)
10445 else {
10446 return;
10447 };
10448
10449 let store = self.inner.db.store();
10450 for (collection, old_id, new_id, xid, previous_xmax) in pending {
10451 if let Some(manager) = store.get_collection(&collection) {
10452 if let Some(mut old) = manager.get(old_id) {
10453 if old.xmax == xid {
10454 old.set_xmax(previous_xmax);
10455 let _ = manager.update(old);
10456 }
10457 }
10458 }
10459 let _ = store.delete_batch(&collection, &[new_id]);
10460 }
10461 }
10462
10463 pub(crate) fn revive_versioned_updates_since(&self, conn_id: u64, stamper_xid: u64) -> usize {
10464 let mut guard = self.inner.pending_versioned_updates.write();
10465 let Some(pending) = guard.get_mut(&conn_id) else {
10466 return 0;
10467 };
10468
10469 let store = self.inner.db.store();
10470 let mut reverted = 0usize;
10471 pending.retain(|(collection, old_id, new_id, xid, previous_xmax)| {
10472 if *xid < stamper_xid {
10473 return true;
10474 }
10475 if let Some(manager) = store.get_collection(collection) {
10476 if let Some(mut old) = manager.get(*old_id) {
10477 if old.xmax == *xid {
10478 old.set_xmax(*previous_xmax);
10479 let _ = manager.update(old);
10480 }
10481 }
10482 }
10483 let _ = store.delete_batch(collection, &[*new_id]);
10484 reverted += 1;
10485 false
10486 });
10487 if pending.is_empty() {
10488 guard.remove(&conn_id);
10489 }
10490 reverted
10491 }
10492
10493 pub(crate) fn finalize_pending_tombstones(&self, conn_id: u64) {
10498 let Some(pending) = self.inner.pending_tombstones.write().remove(&conn_id) else {
10499 return;
10500 };
10501 if pending.is_empty() {
10502 return;
10503 }
10504
10505 let store = self.inner.db.store();
10506 for (collection, id, _xid, _previous_xmax) in pending {
10507 store.context_index().remove_entity(id);
10508 self.cdc_emit(
10509 crate::replication::cdc::ChangeOperation::Delete,
10510 &collection,
10511 id.raw(),
10512 "entity",
10513 );
10514 }
10515 }
10516
10517 pub(crate) fn revive_pending_tombstones(&self, conn_id: u64) {
10524 let Some(pending) = self.inner.pending_tombstones.write().remove(&conn_id) else {
10525 return;
10526 };
10527
10528 let store = self.inner.db.store();
10529 for (collection, id, xid, previous_xmax) in pending {
10530 let Some(manager) = store.get_collection(&collection) else {
10531 continue;
10532 };
10533 if let Some(mut entity) = manager.get(id) {
10534 if entity.xmax == xid {
10535 entity.set_xmax(previous_xmax);
10536 let _ = manager.update(entity);
10537 }
10538 }
10539 }
10540 }
10541
10542 pub fn queue_wait_registry(
10544 &self,
10545 ) -> std::sync::Arc<crate::runtime::queue_wait_registry::QueueWaitRegistry> {
10546 self.inner.queue_wait_registry.clone()
10547 }
10548
10549 pub(crate) fn record_queue_wake(&self, scope: &str, queue: &str) {
10554 if self.current_xid().is_some() {
10555 let conn_id = current_connection_id();
10556 self.inner
10557 .pending_queue_wakes
10558 .write()
10559 .entry(conn_id)
10560 .or_default()
10561 .push((scope.to_string(), queue.to_string()));
10562 return;
10563 }
10564 self.inner.queue_wait_registry.notify(scope, queue);
10565 }
10566
10567 pub(crate) fn finalize_pending_queue_wakes(&self, conn_id: u64) {
10568 let Some(pending) = self.inner.pending_queue_wakes.write().remove(&conn_id) else {
10569 return;
10570 };
10571 for (scope, queue) in pending {
10572 self.inner.queue_wait_registry.notify(&scope, &queue);
10573 }
10574 }
10575
10576 pub(crate) fn discard_pending_queue_wakes(&self, conn_id: u64) {
10577 self.inner.pending_queue_wakes.write().remove(&conn_id);
10578 }
10579
10580 pub(crate) fn finalize_pending_kv_watch_events(&self, conn_id: u64) {
10581 let Some(pending) = self.inner.pending_kv_watch_events.write().remove(&conn_id) else {
10582 return;
10583 };
10584 for event in pending {
10585 self.cdc_emit_kv(
10586 event.op,
10587 &event.collection,
10588 &event.key,
10589 0,
10590 event.before,
10591 event.after,
10592 );
10593 }
10594 }
10595
10596 pub(crate) fn discard_pending_kv_watch_events(&self, conn_id: u64) {
10597 self.inner.pending_kv_watch_events.write().remove(&conn_id);
10598 }
10599
10600 fn materialize_graph_with_rls(
10609 &self,
10610 ) -> RedDBResult<(
10611 crate::storage::engine::GraphStore,
10612 std::collections::HashMap<
10613 String,
10614 std::collections::HashMap<String, crate::storage::schema::Value>,
10615 >,
10616 crate::storage::query::unified::EdgeProperties,
10617 )> {
10618 use crate::storage::engine::GraphStore;
10619 use crate::storage::query::ast::{PolicyAction, PolicyTargetKind};
10620 use crate::storage::unified::entity::{EntityData, EntityKind};
10621 use std::collections::{HashMap, HashSet};
10622
10623 let store = self.inner.db.store();
10624 let snap_ctx = capture_current_snapshot();
10625 let role = current_auth_identity().map(|(_, r)| r.as_str().to_string());
10626
10627 let graph = GraphStore::new();
10628 let mut node_properties: HashMap<String, HashMap<String, crate::storage::schema::Value>> =
10629 HashMap::new();
10630 let mut edge_properties: crate::storage::query::unified::EdgeProperties = HashMap::new();
10631 let mut allowed_nodes: HashSet<String> = HashSet::new();
10632
10633 let mut node_rls: HashMap<String, Option<crate::storage::query::ast::Filter>> =
10637 HashMap::new();
10638 let mut edge_rls: HashMap<String, Option<crate::storage::query::ast::Filter>> =
10639 HashMap::new();
10640
10641 let collections = store.list_collections();
10642
10643 for collection in &collections {
10645 let Some(manager) = store.get_collection(collection) else {
10646 continue;
10647 };
10648 let entities = manager.query_all(|_| true);
10649 for entity in entities {
10650 if !entity_visible_with_context(snap_ctx.as_ref(), &entity) {
10651 continue;
10652 }
10653 let EntityKind::GraphNode(ref node) = entity.kind else {
10654 continue;
10655 };
10656 if !node_passes_rls(self, collection, role.as_deref(), &mut node_rls, &entity) {
10657 continue;
10658 }
10659 let id_str = entity.id.raw().to_string();
10660 graph
10661 .add_node_with_label(
10662 &id_str,
10663 &node.label,
10664 &super::graph_node_label(&node.node_type),
10665 )
10666 .map_err(|err| RedDBError::Query(err.to_string()))?;
10667 allowed_nodes.insert(id_str.clone());
10668 if let EntityData::Node(node_data) = &entity.data {
10669 node_properties.insert(id_str, node_data.properties.clone());
10670 }
10671 }
10672 }
10673
10674 for collection in &collections {
10678 let Some(manager) = store.get_collection(collection) else {
10679 continue;
10680 };
10681 let entities = manager.query_all(|_| true);
10682 for entity in entities {
10683 if !entity_visible_with_context(snap_ctx.as_ref(), &entity) {
10684 continue;
10685 }
10686 let EntityKind::GraphEdge(ref edge) = entity.kind else {
10687 continue;
10688 };
10689 if !allowed_nodes.contains(&edge.from_node)
10690 || !allowed_nodes.contains(&edge.to_node)
10691 {
10692 continue;
10693 }
10694 if !edge_passes_rls(self, collection, role.as_deref(), &mut edge_rls, &entity) {
10695 continue;
10696 }
10697 let weight = match &entity.data {
10698 EntityData::Edge(e) => e.weight,
10699 _ => edge.weight as f32 / 1000.0,
10700 };
10701 let edge_label = super::graph_edge_label(&edge.label);
10702 graph
10703 .add_edge_with_label(&edge.from_node, &edge.to_node, &edge_label, weight)
10704 .map_err(|err| RedDBError::Query(err.to_string()))?;
10705 if let EntityData::Edge(edge_data) = &entity.data {
10706 edge_properties.insert(
10707 (edge.from_node.clone(), edge_label, edge.to_node.clone()),
10708 edge_data.properties.clone(),
10709 );
10710 }
10711 }
10712 }
10713
10714 let _ = (PolicyAction::Select, PolicyTargetKind::Nodes);
10718
10719 Ok((graph, node_properties, edge_properties))
10720 }
10721
10722 pub(crate) fn stamp_xmin_if_in_txn(
10737 &self,
10738 collection: &str,
10739 id: crate::storage::unified::entity::EntityId,
10740 ) {
10741 let Some(xid) = self.current_xid() else {
10742 return;
10743 };
10744 let store = self.inner.db.store();
10745 let Some(manager) = store.get_collection(collection) else {
10746 return;
10747 };
10748 if let Some(mut entity) = manager.get(id) {
10749 entity.set_xmin(xid);
10750 let _ = manager.update(entity);
10751 }
10752 }
10753
10754 pub(crate) fn revive_tombstones_since(&self, conn_id: u64, stamper_xid: u64) -> usize {
10762 let mut guard = self.inner.pending_tombstones.write();
10763 let Some(pending) = guard.get_mut(&conn_id) else {
10764 return 0;
10765 };
10766
10767 let store = self.inner.db.store();
10768 let mut revived = 0usize;
10769 pending.retain(|(collection, id, xid, previous_xmax)| {
10770 if *xid < stamper_xid {
10771 return true;
10773 }
10774 if let Some(manager) = store.get_collection(collection) {
10775 if let Some(mut entity) = manager.get(*id) {
10776 if entity.xmax == *xid {
10777 entity.set_xmax(*previous_xmax);
10778 let _ = manager.update(entity);
10779 revived += 1;
10780 }
10781 }
10782 }
10783 false
10784 });
10785 if pending.is_empty() {
10786 guard.remove(&conn_id);
10787 }
10788 revived
10789 }
10790
10791 pub fn current_snapshot(&self) -> crate::storage::transaction::snapshot::Snapshot {
10800 let conn_id = current_connection_id();
10801 if let Some(ctx) = self.inner.tx_contexts.read().get(&conn_id).cloned() {
10802 return ctx.snapshot;
10803 }
10804 let high_water = self.inner.snapshot_manager.peek_next_xid();
10810 self.inner.snapshot_manager.snapshot(high_water)
10811 }
10812
10813 pub fn current_xid(&self) -> Option<crate::storage::transaction::snapshot::Xid> {
10823 let conn_id = current_connection_id();
10824 self.inner
10825 .tx_contexts
10826 .read()
10827 .get(&conn_id)
10828 .map(|ctx| ctx.writer_xid())
10829 }
10830
10831 pub fn connection_in_transaction(&self, conn_id: u64) -> bool {
10838 self.inner.tx_contexts.read().contains_key(&conn_id)
10839 }
10840
10841 pub fn snapshot_manager(&self) -> Arc<crate::storage::transaction::snapshot::SnapshotManager> {
10844 Arc::clone(&self.inner.snapshot_manager)
10845 }
10846
10847 fn mvcc_vacuum_cutoff_xid(&self) -> crate::storage::transaction::snapshot::Xid {
10848 let manager = &self.inner.snapshot_manager;
10849 let next_xid = manager.peek_next_xid();
10850 let mut cutoff = next_xid;
10851 if let Some(oldest_active) = manager.oldest_active_xid() {
10852 cutoff = cutoff.min(oldest_active);
10853 }
10854 if let Some(oldest_pinned) = manager.oldest_pinned_xid() {
10855 cutoff = cutoff.min(oldest_pinned);
10856 }
10857 let retention_xids = self.config_u64("runtime.mvcc.vacuum_retention_xids", 0);
10858 if retention_xids > 0 {
10859 cutoff = cutoff.min(next_xid.saturating_sub(retention_xids));
10860 }
10861 cutoff
10862 }
10863
10864 fn rebuild_runtime_indexes_for_table(&self, table: &str) -> RedDBResult<()> {
10865 let registered = self.inner.index_store.list_indices(table);
10866 if registered.is_empty() {
10867 return Ok(());
10868 }
10869 let store = self.inner.db.store();
10870 let Some(manager) = store.get_collection(table) else {
10871 return Ok(());
10872 };
10873 let entity_fields = manager
10874 .query_all(|entity| matches!(entity.kind, crate::storage::EntityKind::TableRow { .. }))
10875 .into_iter()
10876 .map(|entity| (entity.id, table_row_index_fields(&entity)))
10877 .collect::<Vec<_>>();
10878
10879 for index in registered {
10880 self.inner.index_store.drop_index(&index.name, table);
10881 self.inner
10882 .index_store
10883 .create_index(
10884 &index.name,
10885 table,
10886 &index.columns,
10887 index.method,
10888 index.unique,
10889 &entity_fields,
10890 )
10891 .map_err(RedDBError::Internal)?;
10892 self.inner.index_store.register(index);
10893 }
10894 self.invalidate_plan_cache();
10895 Ok(())
10896 }
10897
10898 pub fn current_txn_own_xids(
10903 &self,
10904 ) -> std::collections::HashSet<crate::storage::transaction::snapshot::Xid> {
10905 let mut set = std::collections::HashSet::new();
10906 if let Some(ctx) = self.inner.tx_contexts.read().get(¤t_connection_id()) {
10907 set.insert(ctx.xid);
10908 for (_, sub) in &ctx.savepoints {
10909 set.insert(*sub);
10910 }
10911 for sub in &ctx.released_sub_xids {
10912 set.insert(*sub);
10913 }
10914 }
10915 set
10916 }
10917
10918 pub fn foreign_tables(&self) -> Arc<crate::storage::fdw::ForeignTableRegistry> {
10925 Arc::clone(&self.inner.foreign_tables)
10926 }
10927
10928 pub fn is_rls_enabled(&self, table: &str) -> bool {
10930 self.inner.rls_enabled_tables.read().contains(table)
10931 }
10932
10933 pub fn matching_rls_policies(
10940 &self,
10941 table: &str,
10942 role: Option<&str>,
10943 action: crate::storage::query::ast::PolicyAction,
10944 ) -> Vec<crate::storage::query::ast::Filter> {
10945 self.matching_rls_policies_for_kind(
10950 table,
10951 role,
10952 action,
10953 crate::storage::query::ast::PolicyTargetKind::Table,
10954 )
10955 }
10956
10957 pub fn matching_rls_policies_for_kind(
10965 &self,
10966 table: &str,
10967 role: Option<&str>,
10968 action: crate::storage::query::ast::PolicyAction,
10969 kind: crate::storage::query::ast::PolicyTargetKind,
10970 ) -> Vec<crate::storage::query::ast::Filter> {
10971 if !self.is_rls_enabled(table) {
10972 return Vec::new();
10973 }
10974 let policies = self.inner.rls_policies.read();
10975 policies
10976 .iter()
10977 .filter_map(|((t, _), p)| {
10978 if t != table {
10979 return None;
10980 }
10981 if p.target_kind != kind
10990 && p.target_kind != crate::storage::query::ast::PolicyTargetKind::Table
10991 {
10992 return None;
10993 }
10994 if let Some(a) = p.action {
10996 if a != action {
10997 return None;
10998 }
10999 }
11000 if let Some(p_role) = p.role.as_deref() {
11002 match role {
11003 Some(r) if r == p_role => {}
11004 _ => return None,
11005 }
11006 }
11007 Some((*p.using).clone())
11008 })
11009 .collect()
11010 }
11011
11012 pub(crate) fn refresh_table_planner_stats(&self, table: &str) {
11013 let store = self.inner.db.store();
11014 if let Some(stats) =
11015 crate::storage::query::planner::stats_catalog::analyze_collection(store.as_ref(), table)
11016 {
11017 crate::storage::query::planner::stats_catalog::persist_table_stats(
11018 store.as_ref(),
11019 &stats,
11020 );
11021 } else {
11022 crate::storage::query::planner::stats_catalog::clear_table_stats(store.as_ref(), table);
11023 }
11024 self.invalidate_plan_cache();
11025 }
11026
11027 pub(crate) fn note_table_write(&self, table: &str) {
11028 let already_dirty = self.inner.planner_dirty_tables.read().contains(table);
11033 if !already_dirty {
11034 self.inner
11035 .planner_dirty_tables
11036 .write()
11037 .insert(table.to_string());
11038 }
11039 self.invalidate_result_cache_for_table(table);
11040 }
11041
11042 fn explain_as_rows(&self, raw_query: &str, inner_sql: &str) -> RedDBResult<RuntimeQueryResult> {
11050 let explain = self.explain_query(inner_sql)?;
11051
11052 let columns = vec![
11053 "op".to_string(),
11054 "source".to_string(),
11055 "est_rows".to_string(),
11056 "est_cost".to_string(),
11057 "depth".to_string(),
11058 ];
11059
11060 let mut records: Vec<crate::storage::query::unified::UnifiedRecord> = Vec::new();
11061
11062 for name in &explain.cte_materializations {
11068 use std::sync::Arc;
11069 let mut rec = crate::storage::query::unified::UnifiedRecord::default();
11070 rec.set_arc(Arc::from("op"), Value::text("CteScan".to_string()));
11071 rec.set_arc(Arc::from("source"), Value::text(name.clone()));
11072 rec.set_arc(Arc::from("est_rows"), Value::Float(0.0));
11073 rec.set_arc(Arc::from("est_cost"), Value::Float(0.0));
11074 rec.set_arc(Arc::from("depth"), Value::Integer(0));
11075 records.push(rec);
11076 }
11077
11078 walk_plan_node(&explain.logical_plan.root, 0, &mut records);
11079
11080 let result = crate::storage::query::unified::UnifiedResult {
11081 columns,
11082 records,
11083 stats: Default::default(),
11084 pre_serialized_json: None,
11085 };
11086
11087 Ok(RuntimeQueryResult {
11088 query: raw_query.to_string(),
11089 mode: explain.mode,
11090 statement: "explain",
11091 engine: "runtime-explain",
11092 result,
11093 affected_rows: 0,
11094 statement_type: "select",
11095 bookmark: None,
11096 })
11097 }
11098
11099 pub(super) fn check_query_privilege(
11107 &self,
11108 expr: &crate::storage::query::ast::QueryExpr,
11109 ) -> Result<(), String> {
11110 use crate::auth::privileges::{Action, AuthzContext, Resource};
11111 use crate::auth::UserId;
11112 use crate::storage::query::ast::QueryExpr;
11113
11114 let auth_store = match self.inner.auth_store.read().clone() {
11119 Some(s) => s,
11120 None => return Ok(()),
11121 };
11122
11123 let (username, role) = match current_auth_identity() {
11129 Some(p) => p,
11130 None => return Ok(()),
11131 };
11132 let tenant = current_tenant();
11133
11134 let ctx = AuthzContext {
11135 principal: &username,
11136 effective_role: role,
11137 tenant: tenant.as_deref(),
11138 };
11139 let principal_id = UserId::from_parts(tenant.as_deref(), &username);
11140
11141 let (action, resource) = match expr {
11143 QueryExpr::Table(t) => (Action::Select, Resource::table_from_name(&t.table)),
11144 QueryExpr::QueueSelect(q) => {
11145 return self.check_queue_op_privilege(
11146 &auth_store,
11147 &principal_id,
11148 role,
11149 tenant.as_deref(),
11150 "queue:peek",
11151 &q.queue,
11152 );
11153 }
11154 QueryExpr::QueueCommand(cmd) => {
11155 use crate::storage::query::ast::QueueCommand;
11156 let (queue, action_verb) = match cmd {
11157 QueueCommand::Push { queue, .. } => (queue.as_str(), "queue:enqueue"),
11158 QueueCommand::Pop { queue, .. }
11159 | QueueCommand::GroupRead { queue, .. }
11160 | QueueCommand::Claim { queue, .. } => (queue.as_str(), "queue:read"),
11161 QueueCommand::Peek { queue, .. }
11162 | QueueCommand::Len { queue }
11163 | QueueCommand::Pending { queue, .. } => (queue.as_str(), "queue:peek"),
11164 QueueCommand::Ack { queue, .. } => (queue.as_str(), "queue:ack"),
11165 QueueCommand::Nack {
11166 queue, delay_ms, ..
11167 } => {
11168 let verb = if delay_ms.is_some() {
11174 "queue:retry"
11175 } else {
11176 "queue:nack"
11177 };
11178 (queue.as_str(), verb)
11179 }
11180 QueueCommand::Purge { queue } => (queue.as_str(), "queue:purge"),
11181 QueueCommand::GroupCreate { queue, .. } => (queue.as_str(), "queue:read"),
11184 QueueCommand::Move { source, .. } => (source.as_str(), "queue:dlq:move"),
11185 };
11186 return self.check_queue_op_privilege(
11187 &auth_store,
11188 &principal_id,
11189 role,
11190 tenant.as_deref(),
11191 action_verb,
11192 queue,
11193 );
11194 }
11195 QueryExpr::Graph(g) => {
11196 self.check_graph_op_privilege(
11199 &auth_store,
11200 &principal_id,
11201 role,
11202 tenant.as_deref(),
11203 "graph:traverse",
11204 )?;
11205 if auth_store.iam_authorization_enabled() {
11206 self.check_graph_property_projection_privilege(
11207 &auth_store,
11208 &principal_id,
11209 role,
11210 tenant.as_deref(),
11211 g,
11212 )?;
11213 return Ok(());
11214 }
11215 return Ok(());
11216 }
11217 QueryExpr::Path(_) => {
11218 return self.check_graph_op_privilege(
11222 &auth_store,
11223 &principal_id,
11224 role,
11225 tenant.as_deref(),
11226 "graph:traverse",
11227 );
11228 }
11229 QueryExpr::GraphCommand(cmd) => {
11230 use crate::storage::query::ast::GraphCommand;
11231 let action_verb = match cmd {
11232 GraphCommand::Properties { .. } => "graph:read",
11234 GraphCommand::Neighborhood { .. }
11236 | GraphCommand::Traverse { .. }
11237 | GraphCommand::ShortestPath { .. } => "graph:traverse",
11238 GraphCommand::Centrality { .. }
11242 | GraphCommand::Community { .. }
11243 | GraphCommand::Components { .. }
11244 | GraphCommand::Cycles { .. }
11245 | GraphCommand::Clustering
11246 | GraphCommand::TopologicalSort => "graph:algorithm:run",
11247 };
11248 return self.check_graph_op_privilege(
11249 &auth_store,
11250 &principal_id,
11251 role,
11252 tenant.as_deref(),
11253 action_verb,
11254 );
11255 }
11256 QueryExpr::Vector(v) => {
11257 if auth_store.iam_authorization_enabled() {
11258 self.check_vector_op_privilege(
11259 &auth_store,
11260 &principal_id,
11261 role,
11262 tenant.as_deref(),
11263 "vector:search",
11264 &v.collection,
11265 )?;
11266 self.check_table_like_column_projection_privilege(
11267 &auth_store,
11268 &principal_id,
11269 role,
11270 tenant.as_deref(),
11271 &v.collection,
11272 &["content".to_string()],
11273 )?;
11274 return Ok(());
11275 }
11276 return Ok(());
11277 }
11278 QueryExpr::SearchCommand(cmd) => {
11279 use crate::storage::query::ast::SearchCommand;
11280 if auth_store.iam_authorization_enabled() {
11281 let collection = match cmd {
11287 SearchCommand::Similar { collection, .. }
11288 | SearchCommand::Hybrid { collection, .. } => Some(collection.as_str()),
11289 _ => None,
11290 };
11291 if let Some(c) = collection {
11292 self.check_vector_op_privilege(
11293 &auth_store,
11294 &principal_id,
11295 role,
11296 tenant.as_deref(),
11297 "vector:search",
11298 c,
11299 )?;
11300 return Ok(());
11301 }
11302 }
11303 return Ok(());
11304 }
11305 QueryExpr::Hybrid(h) => {
11306 if auth_store.iam_authorization_enabled() {
11307 self.check_vector_op_privilege(
11315 &auth_store,
11316 &principal_id,
11317 role,
11318 tenant.as_deref(),
11319 "vector:search",
11320 &h.vector.collection,
11321 )?;
11322 return Ok(());
11323 }
11324 return Ok(());
11325 }
11326 QueryExpr::Insert(i) => (Action::Insert, Resource::table_from_name(&i.table)),
11327 QueryExpr::Update(u) => (Action::Update, Resource::table_from_name(&u.table)),
11328 QueryExpr::Delete(d) => (Action::Delete, Resource::table_from_name(&d.table)),
11329 QueryExpr::Join(_) => (Action::Select, Resource::Database),
11333 QueryExpr::Grant(_) | QueryExpr::Revoke(_) | QueryExpr::AlterUser(_) => {
11336 return if role == crate::auth::Role::Admin {
11337 Ok(())
11338 } else {
11339 Err(format!(
11340 "principal=`{}` role=`{:?}` cannot issue ACL/auth DDL",
11341 username, role
11342 ))
11343 };
11344 }
11345 QueryExpr::CreateIamPolicy { id, .. } => {
11346 return self.check_policy_management_privilege(
11347 &auth_store,
11348 &principal_id,
11349 role,
11350 tenant.as_deref(),
11351 "policy:put",
11352 "policy",
11353 id,
11354 );
11355 }
11356 QueryExpr::DropIamPolicy { id } => {
11357 return self.check_policy_management_privilege(
11358 &auth_store,
11359 &principal_id,
11360 role,
11361 tenant.as_deref(),
11362 "policy:drop",
11363 "policy",
11364 id,
11365 );
11366 }
11367 QueryExpr::AttachPolicy { policy_id, .. } => {
11368 return self.check_policy_management_privilege(
11369 &auth_store,
11370 &principal_id,
11371 role,
11372 tenant.as_deref(),
11373 "policy:attach",
11374 "policy",
11375 policy_id,
11376 );
11377 }
11378 QueryExpr::DetachPolicy { policy_id, .. } => {
11379 return self.check_policy_management_privilege(
11380 &auth_store,
11381 &principal_id,
11382 role,
11383 tenant.as_deref(),
11384 "policy:detach",
11385 "policy",
11386 policy_id,
11387 );
11388 }
11389 QueryExpr::ShowPolicies { .. } | QueryExpr::ShowEffectivePermissions { .. } => {
11390 return Ok(());
11391 }
11392 QueryExpr::SimulatePolicy { .. } => {
11393 return self.check_policy_management_privilege(
11394 &auth_store,
11395 &principal_id,
11396 role,
11397 tenant.as_deref(),
11398 "policy:simulate",
11399 "policy",
11400 "*",
11401 );
11402 }
11403 QueryExpr::LintPolicy { .. } => {
11404 return self.check_policy_management_privilege(
11407 &auth_store,
11408 &principal_id,
11409 role,
11410 tenant.as_deref(),
11411 "policy:simulate",
11412 "policy",
11413 "*",
11414 );
11415 }
11416 QueryExpr::MigratePolicyMode { dry_run, .. } => {
11417 let action = if *dry_run {
11422 "policy:simulate"
11423 } else {
11424 "policy:put"
11425 };
11426 return self.check_policy_management_privilege(
11427 &auth_store,
11428 &principal_id,
11429 role,
11430 tenant.as_deref(),
11431 action,
11432 "policy",
11433 "*",
11434 );
11435 }
11436 QueryExpr::DropTable(q) => {
11439 return self.check_ddl_collection_privilege(
11440 &auth_store,
11441 &principal_id,
11442 role,
11443 tenant.as_deref(),
11444 &username,
11445 "drop",
11446 &q.name,
11447 );
11448 }
11449 QueryExpr::DropGraph(q) => {
11450 return self.check_ddl_collection_privilege(
11451 &auth_store,
11452 &principal_id,
11453 role,
11454 tenant.as_deref(),
11455 &username,
11456 "drop",
11457 &q.name,
11458 );
11459 }
11460 QueryExpr::DropVector(q) => {
11461 return self.check_ddl_collection_privilege(
11462 &auth_store,
11463 &principal_id,
11464 role,
11465 tenant.as_deref(),
11466 &username,
11467 "drop",
11468 &q.name,
11469 );
11470 }
11471 QueryExpr::DropDocument(q) => {
11472 return self.check_ddl_collection_privilege(
11473 &auth_store,
11474 &principal_id,
11475 role,
11476 tenant.as_deref(),
11477 &username,
11478 "drop",
11479 &q.name,
11480 );
11481 }
11482 QueryExpr::DropKv(q) => {
11483 return self.check_ddl_collection_privilege(
11484 &auth_store,
11485 &principal_id,
11486 role,
11487 tenant.as_deref(),
11488 &username,
11489 "drop",
11490 &q.name,
11491 );
11492 }
11493 QueryExpr::DropCollection(q) => {
11494 return self.check_ddl_collection_privilege(
11495 &auth_store,
11496 &principal_id,
11497 role,
11498 tenant.as_deref(),
11499 &username,
11500 "drop",
11501 &q.name,
11502 );
11503 }
11504 QueryExpr::Truncate(q) => {
11505 return self.check_ddl_collection_privilege(
11506 &auth_store,
11507 &principal_id,
11508 role,
11509 tenant.as_deref(),
11510 &username,
11511 "truncate",
11512 &q.name,
11513 );
11514 }
11515 QueryExpr::CreateTable(q) => {
11527 return self.check_ddl_object_privilege(
11528 &auth_store,
11529 &principal_id,
11530 role,
11531 tenant.as_deref(),
11532 &username,
11533 "create",
11534 "collection",
11535 &q.name,
11536 crate::auth::Role::Write,
11537 );
11538 }
11539 QueryExpr::CreateCollection(q) => {
11540 return self.check_ddl_object_privilege(
11541 &auth_store,
11542 &principal_id,
11543 role,
11544 tenant.as_deref(),
11545 &username,
11546 "create",
11547 "collection",
11548 &q.name,
11549 crate::auth::Role::Write,
11550 );
11551 }
11552 QueryExpr::CreateVector(q) => {
11553 return self.check_ddl_object_privilege(
11554 &auth_store,
11555 &principal_id,
11556 role,
11557 tenant.as_deref(),
11558 &username,
11559 "create",
11560 "collection",
11561 &q.name,
11562 crate::auth::Role::Write,
11563 );
11564 }
11565 QueryExpr::AlterTable(q) => {
11566 return self.check_ddl_object_privilege(
11567 &auth_store,
11568 &principal_id,
11569 role,
11570 tenant.as_deref(),
11571 &username,
11572 "alter",
11573 "collection",
11574 &q.name,
11575 crate::auth::Role::Write,
11576 );
11577 }
11578 QueryExpr::CreateIndex(q) => {
11579 return self.check_ddl_object_privilege(
11580 &auth_store,
11581 &principal_id,
11582 role,
11583 tenant.as_deref(),
11584 &username,
11585 "create",
11586 "collection",
11587 &q.table,
11588 crate::auth::Role::Write,
11589 );
11590 }
11591 QueryExpr::DropIndex(q) => {
11592 return self.check_ddl_object_privilege(
11593 &auth_store,
11594 &principal_id,
11595 role,
11596 tenant.as_deref(),
11597 &username,
11598 "drop",
11599 "collection",
11600 &q.table,
11601 crate::auth::Role::Write,
11602 );
11603 }
11604 QueryExpr::CreateSchema(q) => {
11605 return self.check_ddl_object_privilege(
11606 &auth_store,
11607 &principal_id,
11608 role,
11609 tenant.as_deref(),
11610 &username,
11611 "schema:admin",
11612 "schema",
11613 &q.name,
11614 crate::auth::Role::Admin,
11615 );
11616 }
11617 QueryExpr::DropSchema(q) => {
11618 return self.check_ddl_object_privilege(
11619 &auth_store,
11620 &principal_id,
11621 role,
11622 tenant.as_deref(),
11623 &username,
11624 "schema:admin",
11625 "schema",
11626 &q.name,
11627 crate::auth::Role::Admin,
11628 );
11629 }
11630 QueryExpr::CreateSequence(q) => {
11631 return self.check_ddl_object_privilege(
11632 &auth_store,
11633 &principal_id,
11634 role,
11635 tenant.as_deref(),
11636 &username,
11637 "create",
11638 "collection",
11639 &q.name,
11640 crate::auth::Role::Write,
11641 );
11642 }
11643 QueryExpr::DropSequence(q) => {
11644 return self.check_ddl_object_privilege(
11645 &auth_store,
11646 &principal_id,
11647 role,
11648 tenant.as_deref(),
11649 &username,
11650 "drop",
11651 "collection",
11652 &q.name,
11653 crate::auth::Role::Write,
11654 );
11655 }
11656 QueryExpr::CreateView(q) => {
11657 return self.check_ddl_object_privilege(
11658 &auth_store,
11659 &principal_id,
11660 role,
11661 tenant.as_deref(),
11662 &username,
11663 "create",
11664 "collection",
11665 &q.name,
11666 crate::auth::Role::Write,
11667 );
11668 }
11669 QueryExpr::DropView(q) => {
11670 return self.check_ddl_object_privilege(
11671 &auth_store,
11672 &principal_id,
11673 role,
11674 tenant.as_deref(),
11675 &username,
11676 "drop",
11677 "collection",
11678 &q.name,
11679 crate::auth::Role::Write,
11680 );
11681 }
11682 QueryExpr::RefreshMaterializedView(q) => {
11683 return self.check_ddl_object_privilege(
11684 &auth_store,
11685 &principal_id,
11686 role,
11687 tenant.as_deref(),
11688 &username,
11689 "alter",
11690 "collection",
11691 &q.name,
11692 crate::auth::Role::Write,
11693 );
11694 }
11695 QueryExpr::CreatePolicy(q) => {
11696 return self.check_ddl_object_privilege(
11697 &auth_store,
11698 &principal_id,
11699 role,
11700 tenant.as_deref(),
11701 &username,
11702 "create",
11703 "collection",
11704 &q.table,
11705 crate::auth::Role::Write,
11706 );
11707 }
11708 QueryExpr::DropPolicy(q) => {
11709 return self.check_ddl_object_privilege(
11710 &auth_store,
11711 &principal_id,
11712 role,
11713 tenant.as_deref(),
11714 &username,
11715 "drop",
11716 "collection",
11717 &q.table,
11718 crate::auth::Role::Write,
11719 );
11720 }
11721 QueryExpr::CreateServer(q) => {
11722 return self.check_ddl_object_privilege(
11723 &auth_store,
11724 &principal_id,
11725 role,
11726 tenant.as_deref(),
11727 &username,
11728 "schema:admin",
11729 "schema",
11730 &q.name,
11731 crate::auth::Role::Admin,
11732 );
11733 }
11734 QueryExpr::DropServer(q) => {
11735 return self.check_ddl_object_privilege(
11736 &auth_store,
11737 &principal_id,
11738 role,
11739 tenant.as_deref(),
11740 &username,
11741 "schema:admin",
11742 "schema",
11743 &q.name,
11744 crate::auth::Role::Admin,
11745 );
11746 }
11747 QueryExpr::CreateForeignTable(q) => {
11748 return self.check_ddl_object_privilege(
11749 &auth_store,
11750 &principal_id,
11751 role,
11752 tenant.as_deref(),
11753 &username,
11754 "schema:write",
11755 "schema",
11756 &q.name,
11757 crate::auth::Role::Write,
11758 );
11759 }
11760 QueryExpr::DropForeignTable(q) => {
11761 return self.check_ddl_object_privilege(
11762 &auth_store,
11763 &principal_id,
11764 role,
11765 tenant.as_deref(),
11766 &username,
11767 "schema:write",
11768 "schema",
11769 &q.name,
11770 crate::auth::Role::Write,
11771 );
11772 }
11773 QueryExpr::CreateTimeSeries(q) => {
11774 return self.check_ddl_object_privilege(
11775 &auth_store,
11776 &principal_id,
11777 role,
11778 tenant.as_deref(),
11779 &username,
11780 "create",
11781 "collection",
11782 &q.name,
11783 crate::auth::Role::Write,
11784 );
11785 }
11786 QueryExpr::CreateMetric(q) => {
11787 return self.check_ddl_object_privilege(
11788 &auth_store,
11789 &principal_id,
11790 role,
11791 tenant.as_deref(),
11792 &username,
11793 "create",
11794 "collection",
11795 &q.path,
11796 crate::auth::Role::Write,
11797 );
11798 }
11799 QueryExpr::AlterMetric(q) => {
11800 return self.check_ddl_object_privilege(
11801 &auth_store,
11802 &principal_id,
11803 role,
11804 tenant.as_deref(),
11805 &username,
11806 "alter",
11807 "collection",
11808 &q.path,
11809 crate::auth::Role::Write,
11810 );
11811 }
11812 QueryExpr::CreateSlo(q) => {
11813 return self.check_ddl_object_privilege(
11814 &auth_store,
11815 &principal_id,
11816 role,
11817 tenant.as_deref(),
11818 &username,
11819 "create",
11820 "collection",
11821 &q.path,
11822 crate::auth::Role::Write,
11823 );
11824 }
11825 QueryExpr::DropTimeSeries(q) => {
11826 return self.check_ddl_object_privilege(
11827 &auth_store,
11828 &principal_id,
11829 role,
11830 tenant.as_deref(),
11831 &username,
11832 "drop",
11833 "collection",
11834 &q.name,
11835 crate::auth::Role::Write,
11836 );
11837 }
11838 QueryExpr::CreateQueue(q) => {
11839 return self.check_ddl_object_privilege(
11840 &auth_store,
11841 &principal_id,
11842 role,
11843 tenant.as_deref(),
11844 &username,
11845 "create",
11846 "collection",
11847 &q.name,
11848 crate::auth::Role::Write,
11849 );
11850 }
11851 QueryExpr::AlterQueue(q) => {
11852 return self.check_ddl_object_privilege(
11853 &auth_store,
11854 &principal_id,
11855 role,
11856 tenant.as_deref(),
11857 &username,
11858 "alter",
11859 "collection",
11860 &q.name,
11861 crate::auth::Role::Write,
11862 );
11863 }
11864 QueryExpr::DropQueue(q) => {
11865 return self.check_ddl_object_privilege(
11866 &auth_store,
11867 &principal_id,
11868 role,
11869 tenant.as_deref(),
11870 &username,
11871 "drop",
11872 "collection",
11873 &q.name,
11874 crate::auth::Role::Write,
11875 );
11876 }
11877 QueryExpr::CreateTree(q) => {
11878 return self.check_ddl_object_privilege(
11879 &auth_store,
11880 &principal_id,
11881 role,
11882 tenant.as_deref(),
11883 &username,
11884 "create",
11885 "collection",
11886 &q.collection,
11887 crate::auth::Role::Write,
11888 );
11889 }
11890 QueryExpr::DropTree(q) => {
11891 return self.check_ddl_object_privilege(
11892 &auth_store,
11893 &principal_id,
11894 role,
11895 tenant.as_deref(),
11896 &username,
11897 "drop",
11898 "collection",
11899 &q.collection,
11900 crate::auth::Role::Write,
11901 );
11902 }
11903 QueryExpr::CreateMigration(q) => {
11907 return self.check_ddl_object_privilege(
11908 &auth_store,
11909 &principal_id,
11910 role,
11911 tenant.as_deref(),
11912 &username,
11913 "schema:write",
11914 "schema",
11915 &q.name,
11916 crate::auth::Role::Write,
11917 );
11918 }
11919 QueryExpr::ApplyMigration(_) | QueryExpr::RollbackMigration(_) => {
11921 return if role == crate::auth::Role::Admin {
11922 Ok(())
11923 } else {
11924 Err(format!(
11925 "principal=`{}` role=`{:?}` cannot issue APPLY/ROLLBACK MIGRATION",
11926 username, role
11927 ))
11928 };
11929 }
11930 QueryExpr::ExplainMigration(_) => return Ok(()),
11932 _ => return Ok(()),
11936 };
11937
11938 if auth_store.iam_authorization_enabled() {
11939 let iam_action = legacy_action_to_iam(action);
11940 let iam_resource = legacy_resource_to_iam(&resource, tenant.as_deref());
11941 let iam_ctx = runtime_iam_context(
11942 role,
11943 tenant.as_deref(),
11944 auth_store.principal_is_system_owned(&principal_id),
11945 );
11946 if !auth_store.check_policy_authz_with_role(
11947 &principal_id,
11948 iam_action,
11949 &iam_resource,
11950 &iam_ctx,
11951 role,
11952 ) {
11953 return Err(format!(
11954 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM policy",
11955 username, iam_action, iam_resource.kind, iam_resource.name
11956 ));
11957 }
11958
11959 if let QueryExpr::Table(table) = expr {
11960 self.check_table_column_projection_privilege(
11961 &auth_store,
11962 &principal_id,
11963 &iam_ctx,
11964 table,
11965 )?;
11966 }
11967
11968 if let QueryExpr::Update(update) = expr {
11969 let columns = update_set_target_columns(update);
11970 if !columns.is_empty() {
11971 let request = column_access_request_for_table_update(&update.table, columns);
11972 let outcome =
11973 auth_store.check_column_projection_authz(&principal_id, &request, &iam_ctx);
11974 if let Some(denied) = outcome.first_denied_column() {
11975 return Err(format!(
11976 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM column policy",
11977 username, iam_action, denied.resource.kind, denied.resource.name
11978 ));
11979 }
11980 if !outcome.allowed() {
11981 return Err(format!(
11982 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM policy",
11983 username,
11984 iam_action,
11985 outcome.table_resource.kind,
11986 outcome.table_resource.name
11987 ));
11988 }
11989 }
11990
11991 if let Some(columns) = update_returning_columns_for_policy(self, update) {
11992 let request = column_access_request_for_table_select(&update.table, columns);
11993 let outcome =
11994 auth_store.check_column_projection_authz(&principal_id, &request, &iam_ctx);
11995 if let Some(denied) = outcome.first_denied_column() {
11996 return Err(format!(
11997 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM column policy",
11998 username, denied.resource.kind, denied.resource.name
11999 ));
12000 }
12001 if !outcome.allowed() {
12002 return Err(format!(
12003 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM policy",
12004 username, outcome.table_resource.kind, outcome.table_resource.name
12005 ));
12006 }
12007 }
12008 }
12009
12010 Ok(())
12011 } else {
12012 auth_store
12013 .check_grant(&ctx, action, &resource)
12014 .map_err(|e| e.to_string())
12015 }
12016 }
12017
12018 fn check_table_column_projection_privilege(
12019 &self,
12020 auth_store: &Arc<crate::auth::store::AuthStore>,
12021 principal: &crate::auth::UserId,
12022 ctx: &crate::auth::policies::EvalContext,
12023 table: &crate::storage::query::ast::TableQuery,
12024 ) -> Result<(), String> {
12025 use crate::auth::{ColumnAccessRequest, ColumnDecisionEffect};
12026
12027 let columns = requested_table_columns_for_policy(table);
12028 if columns.is_empty() {
12029 return Ok(());
12030 }
12031
12032 let request = ColumnAccessRequest::select(table.table.clone(), columns);
12033 let outcome = auth_store.check_column_projection_authz(principal, &request, ctx);
12034 if outcome.allowed() {
12035 return Ok(());
12036 }
12037
12038 if !matches!(
12039 outcome.table_decision,
12040 crate::auth::policies::Decision::Allow { .. }
12041 | crate::auth::policies::Decision::AdminBypass
12042 ) {
12043 return Err(format!(
12044 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM policy",
12045 principal, outcome.table_resource.kind, outcome.table_resource.name
12046 ));
12047 }
12048
12049 let denied = outcome
12050 .first_denied_column()
12051 .filter(|decision| decision.effective == ColumnDecisionEffect::Denied);
12052 match denied {
12053 Some(decision) => Err(format!(
12054 "principal=`{}` action=`select` resource=`{}:{}` denied by IAM policy",
12055 principal, decision.resource.kind, decision.resource.name
12056 )),
12057 None => Ok(()),
12058 }
12059 }
12060
12061 fn check_graph_property_projection_privilege(
12062 &self,
12063 auth_store: &Arc<crate::auth::store::AuthStore>,
12064 principal: &crate::auth::UserId,
12065 role: crate::auth::Role,
12066 tenant: Option<&str>,
12067 query: &crate::storage::query::ast::GraphQuery,
12068 ) -> Result<(), String> {
12069 let columns = explicit_graph_projection_properties(query);
12070 if columns.is_empty() {
12071 return Ok(());
12072 }
12073 self.check_table_like_column_projection_privilege(
12074 auth_store, principal, role, tenant, "graph", &columns,
12075 )
12076 }
12077
12078 fn check_table_like_column_projection_privilege(
12079 &self,
12080 auth_store: &Arc<crate::auth::store::AuthStore>,
12081 principal: &crate::auth::UserId,
12082 role: crate::auth::Role,
12083 tenant: Option<&str>,
12084 table: &str,
12085 columns: &[String],
12086 ) -> Result<(), String> {
12087 let iam_ctx = runtime_iam_context(
12088 role,
12089 tenant,
12090 auth_store.principal_is_system_owned(principal),
12091 );
12092 let request =
12093 crate::auth::ColumnAccessRequest::select(table.to_string(), columns.iter().cloned());
12094 let outcome = auth_store.check_column_projection_authz(principal, &request, &iam_ctx);
12095 if outcome.allowed() {
12096 return Ok(());
12097 }
12098 let denied = outcome
12099 .first_denied_column()
12100 .map(|d| d.resource.name.clone())
12101 .unwrap_or_else(|| format!("{table}.<unknown>"));
12102 Err(format!(
12103 "principal=`{}` action=`select` resource=`column:{}` denied by IAM policy",
12104 principal, denied
12105 ))
12106 }
12107
12108 fn check_policy_management_privilege(
12109 &self,
12110 auth_store: &Arc<crate::auth::store::AuthStore>,
12111 principal: &crate::auth::UserId,
12112 role: crate::auth::Role,
12113 tenant: Option<&str>,
12114 action: &str,
12115 resource_kind: &str,
12116 resource_name: &str,
12117 ) -> Result<(), String> {
12118 let ctx = runtime_iam_context(
12119 role,
12120 tenant,
12121 auth_store.principal_is_system_owned(principal),
12122 );
12123
12124 if !auth_store.iam_authorization_enabled() {
12125 return if role == crate::auth::Role::Admin {
12126 Ok(())
12127 } else {
12128 Err(format!(
12129 "principal=`{}` role=`{:?}` cannot issue ACL/auth DDL",
12130 principal, role
12131 ))
12132 };
12133 }
12134
12135 let mut resource = crate::auth::policies::ResourceRef::new(
12136 resource_kind.to_string(),
12137 resource_name.to_string(),
12138 );
12139 if let Some(t) = tenant {
12140 resource = resource.with_tenant(t.to_string());
12141 }
12142 if auth_store.check_policy_authz_with_role(principal, action, &resource, &ctx, role) {
12143 Ok(())
12144 } else {
12145 Err(format!(
12146 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM policy",
12147 principal, action, resource.kind, resource.name
12148 ))
12149 }
12150 }
12151
12152 fn check_managed_config_write_for_set_config(&self, key: &str) -> RedDBResult<()> {
12153 let Some(auth_store) = self.inner.auth_store.read().clone() else {
12154 return Ok(());
12155 };
12156 let (username, role) = current_auth_identity()
12157 .unwrap_or_else(|| ("anonymous".to_string(), crate::auth::Role::Read));
12158 let tenant = current_tenant();
12159 let principal = crate::auth::UserId::from_parts(tenant.as_deref(), &username);
12160 let ctx = runtime_iam_context(
12161 role,
12162 tenant.as_deref(),
12163 auth_store.principal_is_system_owned(&principal),
12164 );
12165 let gate = crate::auth::managed_config::ManagedConfigGate::new(
12166 self.inner.config_registry.as_ref(),
12167 );
12168 match gate.check_write(&auth_store, &principal, &ctx, key) {
12169 crate::auth::managed_config::ManagedConfigDecision::PassThrough { .. }
12170 | crate::auth::managed_config::ManagedConfigDecision::Allow { .. } => Ok(()),
12171 crate::auth::managed_config::ManagedConfigDecision::Deny { reason, .. } => {
12172 Err(RedDBError::Query(format!(
12173 "permission denied: managed config mutation blocked for `{key}`: {reason}"
12174 )))
12175 }
12176 }
12177 }
12178
12179 fn check_queue_op_privilege(
12195 &self,
12196 auth_store: &Arc<crate::auth::store::AuthStore>,
12197 principal: &crate::auth::UserId,
12198 role: crate::auth::Role,
12199 tenant: Option<&str>,
12200 action: &str,
12201 queue: &str,
12202 ) -> Result<(), String> {
12203 if !auth_store.iam_authorization_enabled() {
12204 return Ok(());
12205 }
12206 let mut resource =
12207 crate::auth::policies::ResourceRef::new("queue".to_string(), queue.to_string());
12208 if let Some(t) = tenant {
12209 resource = resource.with_tenant(t.to_string());
12210 }
12211 let ctx = runtime_iam_context(
12212 role,
12213 tenant,
12214 auth_store.principal_is_system_owned(principal),
12215 );
12216 if auth_store.check_policy_authz_with_role(principal, action, &resource, &ctx, role) {
12217 Ok(())
12218 } else {
12219 Err(format!(
12220 "principal=`{}` action=`{}` resource=`queue:{}` denied by IAM policy",
12221 principal, action, queue
12222 ))
12223 }
12224 }
12225
12226 fn check_graph_op_privilege(
12246 &self,
12247 auth_store: &Arc<crate::auth::store::AuthStore>,
12248 principal: &crate::auth::UserId,
12249 role: crate::auth::Role,
12250 tenant: Option<&str>,
12251 action: &str,
12252 ) -> Result<(), String> {
12253 if !auth_store.iam_authorization_enabled() {
12254 return Ok(());
12255 }
12256 let mut resource =
12257 crate::auth::policies::ResourceRef::new("graph".to_string(), "*".to_string());
12258 if let Some(t) = tenant {
12259 resource = resource.with_tenant(t.to_string());
12260 }
12261 let ctx = runtime_iam_context(
12262 role,
12263 tenant,
12264 auth_store.principal_is_system_owned(principal),
12265 );
12266 if auth_store.check_policy_authz_with_role(principal, action, &resource, &ctx, role) {
12267 Ok(())
12268 } else {
12269 Err(format!(
12270 "principal=`{}` action=`{}` resource=`graph:*` denied by IAM policy",
12271 principal, action
12272 ))
12273 }
12274 }
12275
12276 fn check_vector_op_privilege(
12291 &self,
12292 auth_store: &Arc<crate::auth::store::AuthStore>,
12293 principal: &crate::auth::UserId,
12294 role: crate::auth::Role,
12295 tenant: Option<&str>,
12296 action: &str,
12297 collection: &str,
12298 ) -> Result<(), String> {
12299 if !auth_store.iam_authorization_enabled() {
12300 return Ok(());
12301 }
12302 let mut resource =
12303 crate::auth::policies::ResourceRef::new("vector".to_string(), collection.to_string());
12304 if let Some(t) = tenant {
12305 resource = resource.with_tenant(t.to_string());
12306 }
12307 let ctx = runtime_iam_context(
12308 role,
12309 tenant,
12310 auth_store.principal_is_system_owned(principal),
12311 );
12312 if auth_store.check_policy_authz_with_role(principal, action, &resource, &ctx, role) {
12313 Ok(())
12314 } else {
12315 Err(format!(
12316 "principal=`{}` action=`{}` resource=`vector:{}` denied by IAM policy",
12317 principal, action, collection
12318 ))
12319 }
12320 }
12321
12322 fn check_ddl_collection_privilege(
12328 &self,
12329 auth_store: &Arc<crate::auth::store::AuthStore>,
12330 principal: &crate::auth::UserId,
12331 role: crate::auth::Role,
12332 tenant: Option<&str>,
12333 username: &str,
12334 action: &str,
12335 collection: &str,
12336 ) -> Result<(), String> {
12337 self.check_ddl_object_privilege(
12338 auth_store,
12339 principal,
12340 role,
12341 tenant,
12342 username,
12343 action,
12344 "collection",
12345 collection,
12346 crate::auth::Role::Write,
12347 )
12348 }
12349
12350 #[allow(clippy::too_many_arguments)]
12368 fn check_ddl_object_privilege(
12369 &self,
12370 auth_store: &Arc<crate::auth::store::AuthStore>,
12371 principal: &crate::auth::UserId,
12372 role: crate::auth::Role,
12373 tenant: Option<&str>,
12374 username: &str,
12375 action: &str,
12376 resource_kind: &str,
12377 resource_name: &str,
12378 min_role: crate::auth::Role,
12379 ) -> Result<(), String> {
12380 if role < min_role {
12381 let msg = format!(
12382 "principal=`{}` role=`{:?}` cannot issue DDL action=`{}` resource=`{}:{}`",
12383 username, role, action, resource_kind, resource_name
12384 );
12385 self.inner.audit_log.record(
12386 action,
12387 username,
12388 resource_name,
12389 "denied",
12390 crate::json::Value::Null,
12391 );
12392 return Err(msg);
12393 }
12394
12395 if !auth_store.iam_authorization_enabled() {
12396 self.inner.audit_log.record(
12397 action,
12398 username,
12399 resource_name,
12400 "ok",
12401 crate::json::Value::Null,
12402 );
12403 return Ok(());
12404 }
12405
12406 let mut resource = crate::auth::policies::ResourceRef::new(
12407 resource_kind.to_string(),
12408 resource_name.to_string(),
12409 );
12410 if let Some(t) = tenant {
12411 resource = resource.with_tenant(t.to_string());
12412 }
12413 let ctx = runtime_iam_context(
12414 role,
12415 tenant,
12416 auth_store.principal_is_system_owned(principal),
12417 );
12418 if auth_store.check_policy_authz_with_role(principal, action, &resource, &ctx, role) {
12419 self.inner.audit_log.record(
12420 action,
12421 username,
12422 resource_name,
12423 "ok",
12424 crate::json::Value::Null,
12425 );
12426 Ok(())
12427 } else {
12428 self.inner.audit_log.record(
12429 action,
12430 username,
12431 resource_name,
12432 "denied",
12433 crate::json::Value::Null,
12434 );
12435 Err(format!(
12436 "principal=`{}` action=`{}` resource=`{}:{}` denied by IAM policy",
12437 username, action, resource_kind, resource_name
12438 ))
12439 }
12440 }
12441
12442 fn execute_grant_statement(
12444 &self,
12445 query: &str,
12446 stmt: &crate::storage::query::ast::GrantStmt,
12447 ) -> RedDBResult<RuntimeQueryResult> {
12448 use crate::auth::privileges::{Action, GrantPrincipal, Resource};
12449 use crate::auth::UserId;
12450 use crate::storage::query::ast::{GrantObjectKind, GrantPrincipalRef};
12451
12452 let auth_store = self
12453 .inner
12454 .auth_store
12455 .read()
12456 .clone()
12457 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
12458
12459 let (gname, grole) = current_auth_identity().ok_or_else(|| {
12461 RedDBError::Query("GRANT requires an authenticated principal".to_string())
12462 })?;
12463 let granter = UserId::from_parts(current_tenant().as_deref(), &gname);
12464 let granter_role = grole;
12465
12466 let mut actions: Vec<Action> = Vec::new();
12468 if stmt.all {
12469 actions.push(Action::All);
12470 } else {
12471 for kw in &stmt.actions {
12472 let a = Action::from_keyword(kw).ok_or_else(|| {
12473 RedDBError::Query(format!("unknown privilege keyword `{}`", kw))
12474 })?;
12475 actions.push(a);
12476 }
12477 }
12478
12479 let mut applied = 0usize;
12481 for obj in &stmt.objects {
12482 let resource = match stmt.object_kind {
12483 GrantObjectKind::Table => Resource::Table {
12484 schema: obj.schema.clone(),
12485 table: obj.name.clone(),
12486 },
12487 GrantObjectKind::Schema => Resource::Schema(obj.name.clone()),
12488 GrantObjectKind::Database => Resource::Database,
12489 GrantObjectKind::Function => Resource::Function {
12490 schema: obj.schema.clone(),
12491 name: obj.name.clone(),
12492 },
12493 };
12494 for principal in &stmt.principals {
12495 let p = match principal {
12496 GrantPrincipalRef::Public => GrantPrincipal::Public,
12497 GrantPrincipalRef::Group(g) => GrantPrincipal::Group(g.clone()),
12498 GrantPrincipalRef::User { tenant, name } => {
12499 GrantPrincipal::User(UserId::from_parts(tenant.as_deref(), name))
12500 }
12501 };
12502 let tenant = granter.tenant.clone();
12505 auth_store
12506 .grant(
12507 &granter,
12508 granter_role,
12509 p.clone(),
12510 resource.clone(),
12511 actions.clone(),
12512 stmt.with_grant_option,
12513 tenant.clone(),
12514 )
12515 .map_err(|e| RedDBError::Query(e.to_string()))?;
12516
12517 if let Some(policy) =
12521 grant_to_iam_policy(&p, &resource, &actions, tenant.as_deref())
12522 {
12523 let pid = policy.id.clone();
12524 auth_store
12525 .put_policy_internal(policy)
12526 .map_err(|e| RedDBError::Query(e.to_string()))?;
12527 let attachment = match &p {
12528 GrantPrincipal::User(uid) => {
12529 crate::auth::store::PrincipalRef::User(uid.clone())
12530 }
12531 GrantPrincipal::Group(group) => {
12532 crate::auth::store::PrincipalRef::Group(group.clone())
12533 }
12534 GrantPrincipal::Public => crate::auth::store::PrincipalRef::Group(
12535 crate::auth::store::PUBLIC_IAM_GROUP.to_string(),
12536 ),
12537 };
12538 auth_store
12539 .attach_policy(attachment, &pid)
12540 .map_err(|e| RedDBError::Query(e.to_string()))?;
12541 }
12542 applied += 1;
12543 tracing::info!(
12544 target: "audit",
12545 principal = %granter,
12546 action = "grant",
12547 "GRANT applied"
12548 );
12549 }
12550 }
12551
12552 self.invalidate_result_cache();
12553 Ok(RuntimeQueryResult::ok_message(
12554 query.to_string(),
12555 &format!("GRANT applied to {} target(s)", applied),
12556 "grant",
12557 ))
12558 }
12559
12560 fn execute_revoke_statement(
12562 &self,
12563 query: &str,
12564 stmt: &crate::storage::query::ast::RevokeStmt,
12565 ) -> RedDBResult<RuntimeQueryResult> {
12566 use crate::auth::privileges::{Action, GrantPrincipal, Resource};
12567 use crate::auth::UserId;
12568 use crate::storage::query::ast::{GrantObjectKind, GrantPrincipalRef};
12569
12570 let auth_store = self
12571 .inner
12572 .auth_store
12573 .read()
12574 .clone()
12575 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
12576
12577 let (_gname, grole) = current_auth_identity().ok_or_else(|| {
12578 RedDBError::Query("REVOKE requires an authenticated principal".to_string())
12579 })?;
12580 let granter_role = grole;
12581
12582 let actions: Vec<Action> = if stmt.all {
12583 vec![Action::All]
12584 } else {
12585 stmt.actions
12586 .iter()
12587 .map(|kw| Action::from_keyword(kw).unwrap_or(Action::Select))
12588 .collect()
12589 };
12590
12591 let mut total_removed = 0usize;
12592 for obj in &stmt.objects {
12593 let resource = match stmt.object_kind {
12594 GrantObjectKind::Table => Resource::Table {
12595 schema: obj.schema.clone(),
12596 table: obj.name.clone(),
12597 },
12598 GrantObjectKind::Schema => Resource::Schema(obj.name.clone()),
12599 GrantObjectKind::Database => Resource::Database,
12600 GrantObjectKind::Function => Resource::Function {
12601 schema: obj.schema.clone(),
12602 name: obj.name.clone(),
12603 },
12604 };
12605 for principal in &stmt.principals {
12606 let p = match principal {
12607 GrantPrincipalRef::Public => GrantPrincipal::Public,
12608 GrantPrincipalRef::Group(g) => GrantPrincipal::Group(g.clone()),
12609 GrantPrincipalRef::User { tenant, name } => {
12610 GrantPrincipal::User(UserId::from_parts(tenant.as_deref(), name))
12611 }
12612 };
12613 let removed = auth_store
12614 .revoke(granter_role, &p, &resource, &actions)
12615 .map_err(|e| RedDBError::Query(e.to_string()))?;
12616 let _removed_policies =
12617 auth_store.delete_synthetic_grant_policies(&p, &resource, &actions);
12618 total_removed += removed;
12619 }
12620 }
12621
12622 self.invalidate_result_cache();
12623 Ok(RuntimeQueryResult::ok_message(
12624 query.to_string(),
12625 &format!("REVOKE removed {} grant(s)", total_removed),
12626 "revoke",
12627 ))
12628 }
12629
12630 fn execute_alter_user_statement(
12632 &self,
12633 query: &str,
12634 stmt: &crate::storage::query::ast::AlterUserStmt,
12635 ) -> RedDBResult<RuntimeQueryResult> {
12636 use crate::auth::privileges::UserAttributes;
12637 use crate::auth::UserId;
12638 use crate::storage::query::ast::AlterUserAttribute;
12639
12640 let auth_store = self
12641 .inner
12642 .auth_store
12643 .read()
12644 .clone()
12645 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
12646
12647 let (_gname, grole) = current_auth_identity().ok_or_else(|| {
12648 RedDBError::Query("ALTER USER requires an authenticated principal".to_string())
12649 })?;
12650 if grole != crate::auth::Role::Admin {
12651 return Err(RedDBError::Query(
12652 "ALTER USER requires Admin role".to_string(),
12653 ));
12654 }
12655
12656 let target = UserId::from_parts(stmt.tenant.as_deref(), &stmt.username);
12657
12658 let mut attrs = auth_store.user_attributes(&target);
12661 let mut enable_change: Option<bool> = None;
12662
12663 for a in &stmt.attributes {
12664 match a {
12665 AlterUserAttribute::ValidUntil(ts) => {
12666 let ms = parse_timestamp_to_ms(ts).ok_or_else(|| {
12670 RedDBError::Query(format!("invalid VALID UNTIL timestamp `{ts}`"))
12671 })?;
12672 attrs.valid_until = Some(ms);
12673 }
12674 AlterUserAttribute::ConnectionLimit(n) => {
12675 if *n < 0 {
12676 return Err(RedDBError::Query(
12677 "CONNECTION LIMIT must be non-negative".to_string(),
12678 ));
12679 }
12680 attrs.connection_limit = Some(*n as u32);
12681 }
12682 AlterUserAttribute::SetSearchPath(p) => {
12683 attrs.search_path = Some(p.clone());
12684 }
12685 AlterUserAttribute::AddGroup(g) => {
12686 if !attrs.groups.iter().any(|existing| existing == g) {
12687 attrs.groups.push(g.clone());
12688 attrs.groups.sort();
12689 }
12690 }
12691 AlterUserAttribute::DropGroup(g) => {
12692 attrs.groups.retain(|existing| existing != g);
12693 }
12694 AlterUserAttribute::Enable => enable_change = Some(true),
12695 AlterUserAttribute::Disable => enable_change = Some(false),
12696 AlterUserAttribute::Password(_) => {
12697 }
12701 }
12702 }
12703
12704 auth_store
12705 .set_user_attributes(&target, attrs)
12706 .map_err(|e| RedDBError::Query(e.to_string()))?;
12707 if let Some(en) = enable_change {
12708 auth_store
12709 .set_user_enabled(&target, en)
12710 .map_err(|e| RedDBError::Query(e.to_string()))?;
12711 }
12712 self.invalidate_result_cache();
12713 tracing::info!(
12714 target: "audit",
12715 principal = %target,
12716 action = "alter_user",
12717 "ALTER USER applied"
12718 );
12719
12720 Ok(RuntimeQueryResult::ok_message(
12721 query.to_string(),
12722 &format!("ALTER USER {} applied", target),
12723 "alter_user",
12724 ))
12725 }
12726
12727 fn execute_create_iam_policy(
12732 &self,
12733 query: &str,
12734 id: &str,
12735 json: &str,
12736 ) -> RedDBResult<RuntimeQueryResult> {
12737 use crate::auth::policies::Policy;
12738
12739 let auth_store = self
12740 .inner
12741 .auth_store
12742 .read()
12743 .clone()
12744 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
12745
12746 let mut policy = Policy::from_json_str(json)
12751 .map_err(|e| RedDBError::Query(format!("policy parse: {e}")))?;
12752 if policy.id != id {
12753 policy.id = id.to_string();
12754 }
12755 let pid = policy.id.clone();
12756 let tenant = current_tenant();
12757 let (actor_name, actor_role) = current_auth_identity()
12758 .unwrap_or_else(|| ("anonymous".to_string(), crate::auth::Role::Read));
12759 let actor = crate::auth::UserId::from_parts(tenant.as_deref(), &actor_name);
12760 let eval_ctx = runtime_iam_context(
12761 actor_role,
12762 tenant.as_deref(),
12763 auth_store.principal_is_system_owned(&actor),
12764 );
12765 let event_ctx = self.policy_mutation_control_ctx(&actor, tenant.as_deref());
12766 let ledger = self.inner.control_event_ledger.read();
12767 let control = crate::auth::store::PolicyMutationControl {
12768 ctx: &event_ctx,
12769 ledger: ledger.as_ref(),
12770 config: self.inner.control_event_config,
12771 registry: Some(self.inner.config_registry.as_ref()),
12772 actor: &actor,
12773 eval_ctx: &eval_ctx,
12774 };
12775 auth_store
12776 .put_policy_with_control_events(policy, &control)
12777 .map_err(|e| RedDBError::Query(e.to_string()))?;
12778
12779 let principal = actor_name;
12780 tracing::info!(
12781 target: "audit",
12782 principal = %principal,
12783 action = "iam:policy.put",
12784 matched_policy_id = %pid,
12785 "CREATE POLICY applied"
12786 );
12787 self.inner.audit_log.record(
12788 "iam/policy.put",
12789 &principal,
12790 &pid,
12791 "ok",
12792 crate::json::Value::Null,
12793 );
12794
12795 self.invalidate_result_cache();
12796 Ok(RuntimeQueryResult::ok_message(
12797 query.to_string(),
12798 &format!("policy `{pid}` stored"),
12799 "create_iam_policy",
12800 ))
12801 }
12802
12803 fn execute_drop_iam_policy(&self, query: &str, id: &str) -> RedDBResult<RuntimeQueryResult> {
12804 let auth_store = self
12805 .inner
12806 .auth_store
12807 .read()
12808 .clone()
12809 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
12810 let tenant = current_tenant();
12811 let (actor_name, actor_role) = current_auth_identity()
12812 .unwrap_or_else(|| ("anonymous".to_string(), crate::auth::Role::Read));
12813 let actor = crate::auth::UserId::from_parts(tenant.as_deref(), &actor_name);
12814 let eval_ctx = runtime_iam_context(
12815 actor_role,
12816 tenant.as_deref(),
12817 auth_store.principal_is_system_owned(&actor),
12818 );
12819 let event_ctx = self.policy_mutation_control_ctx(&actor, tenant.as_deref());
12820 let ledger = self.inner.control_event_ledger.read();
12821 let control = crate::auth::store::PolicyMutationControl {
12822 ctx: &event_ctx,
12823 ledger: ledger.as_ref(),
12824 config: self.inner.control_event_config,
12825 registry: Some(self.inner.config_registry.as_ref()),
12826 actor: &actor,
12827 eval_ctx: &eval_ctx,
12828 };
12829 auth_store
12830 .delete_policy_with_control_events(id, &control)
12831 .map_err(|e| RedDBError::Query(e.to_string()))?;
12832
12833 let principal = actor_name;
12834 tracing::info!(
12835 target: "audit",
12836 principal = %principal,
12837 action = "iam:policy.drop",
12838 matched_policy_id = %id,
12839 "DROP POLICY applied"
12840 );
12841 self.inner.audit_log.record(
12842 "iam/policy.drop",
12843 &principal,
12844 id,
12845 "ok",
12846 crate::json::Value::Null,
12847 );
12848
12849 self.invalidate_result_cache();
12850 Ok(RuntimeQueryResult::ok_message(
12851 query.to_string(),
12852 &format!("policy `{id}` dropped"),
12853 "drop_iam_policy",
12854 ))
12855 }
12856
12857 fn execute_attach_policy(
12858 &self,
12859 query: &str,
12860 policy_id: &str,
12861 principal: &crate::storage::query::ast::PolicyPrincipalRef,
12862 ) -> RedDBResult<RuntimeQueryResult> {
12863 use crate::auth::store::PrincipalRef;
12864 use crate::auth::UserId;
12865 use crate::storage::query::ast::PolicyPrincipalRef;
12866
12867 let auth_store = self
12868 .inner
12869 .auth_store
12870 .read()
12871 .clone()
12872 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
12873 let p = match principal {
12874 PolicyPrincipalRef::User(u) => {
12875 PrincipalRef::User(UserId::from_parts(u.tenant.as_deref(), &u.username))
12876 }
12877 PolicyPrincipalRef::Group(g) => PrincipalRef::Group(g.clone()),
12878 };
12879 let pretty_target = principal_label(principal);
12880 let tenant = current_tenant();
12881 let (actor_name, actor_role) = current_auth_identity()
12882 .unwrap_or_else(|| ("anonymous".to_string(), crate::auth::Role::Read));
12883 let actor = crate::auth::UserId::from_parts(tenant.as_deref(), &actor_name);
12884 let eval_ctx = runtime_iam_context(
12885 actor_role,
12886 tenant.as_deref(),
12887 auth_store.principal_is_system_owned(&actor),
12888 );
12889 let event_ctx = self.policy_mutation_control_ctx(&actor, tenant.as_deref());
12890 let ledger = self.inner.control_event_ledger.read();
12891 let control = crate::auth::store::PolicyMutationControl {
12892 ctx: &event_ctx,
12893 ledger: ledger.as_ref(),
12894 config: self.inner.control_event_config,
12895 registry: Some(self.inner.config_registry.as_ref()),
12896 actor: &actor,
12897 eval_ctx: &eval_ctx,
12898 };
12899 auth_store
12900 .attach_policy_with_control_events(p, policy_id, &control)
12901 .map_err(|e| RedDBError::Query(e.to_string()))?;
12902
12903 let principal_str = actor_name;
12904 tracing::info!(
12905 target: "audit",
12906 principal = %principal_str,
12907 action = "iam:policy.attach",
12908 matched_policy_id = %policy_id,
12909 target = %pretty_target,
12910 "ATTACH POLICY applied"
12911 );
12912 self.inner.audit_log.record(
12913 "iam/policy.attach",
12914 &principal_str,
12915 &pretty_target,
12916 "ok",
12917 crate::json::Value::Null,
12918 );
12919
12920 self.invalidate_result_cache();
12921 Ok(RuntimeQueryResult::ok_message(
12922 query.to_string(),
12923 &format!("policy `{policy_id}` attached to {pretty_target}"),
12924 "attach_policy",
12925 ))
12926 }
12927
12928 fn execute_detach_policy(
12929 &self,
12930 query: &str,
12931 policy_id: &str,
12932 principal: &crate::storage::query::ast::PolicyPrincipalRef,
12933 ) -> RedDBResult<RuntimeQueryResult> {
12934 use crate::auth::store::PrincipalRef;
12935 use crate::auth::UserId;
12936 use crate::storage::query::ast::PolicyPrincipalRef;
12937
12938 let auth_store = self
12939 .inner
12940 .auth_store
12941 .read()
12942 .clone()
12943 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
12944 let p = match principal {
12945 PolicyPrincipalRef::User(u) => {
12946 PrincipalRef::User(UserId::from_parts(u.tenant.as_deref(), &u.username))
12947 }
12948 PolicyPrincipalRef::Group(g) => PrincipalRef::Group(g.clone()),
12949 };
12950 let pretty_target = principal_label(principal);
12951 let tenant = current_tenant();
12952 let (actor_name, actor_role) = current_auth_identity()
12953 .unwrap_or_else(|| ("anonymous".to_string(), crate::auth::Role::Read));
12954 let actor = crate::auth::UserId::from_parts(tenant.as_deref(), &actor_name);
12955 let eval_ctx = runtime_iam_context(
12956 actor_role,
12957 tenant.as_deref(),
12958 auth_store.principal_is_system_owned(&actor),
12959 );
12960 let event_ctx = self.policy_mutation_control_ctx(&actor, tenant.as_deref());
12961 let ledger = self.inner.control_event_ledger.read();
12962 let control = crate::auth::store::PolicyMutationControl {
12963 ctx: &event_ctx,
12964 ledger: ledger.as_ref(),
12965 config: self.inner.control_event_config,
12966 registry: Some(self.inner.config_registry.as_ref()),
12967 actor: &actor,
12968 eval_ctx: &eval_ctx,
12969 };
12970 auth_store
12971 .detach_policy_with_control_events(p, policy_id, &control)
12972 .map_err(|e| RedDBError::Query(e.to_string()))?;
12973
12974 let principal_str = actor_name;
12975 tracing::info!(
12976 target: "audit",
12977 principal = %principal_str,
12978 action = "iam:policy.detach",
12979 matched_policy_id = %policy_id,
12980 target = %pretty_target,
12981 "DETACH POLICY applied"
12982 );
12983 self.inner.audit_log.record(
12984 "iam/policy.detach",
12985 &principal_str,
12986 &pretty_target,
12987 "ok",
12988 crate::json::Value::Null,
12989 );
12990
12991 self.invalidate_result_cache();
12992 Ok(RuntimeQueryResult::ok_message(
12993 query.to_string(),
12994 &format!("policy `{policy_id}` detached from {pretty_target}"),
12995 "detach_policy",
12996 ))
12997 }
12998
12999 fn execute_show_policies(
13000 &self,
13001 query: &str,
13002 filter: Option<&crate::storage::query::ast::PolicyPrincipalRef>,
13003 ) -> RedDBResult<RuntimeQueryResult> {
13004 use crate::auth::UserId;
13005 use crate::storage::query::ast::PolicyPrincipalRef;
13006 use crate::storage::query::unified::UnifiedRecord;
13007 use crate::storage::schema::Value as SchemaValue;
13008 use std::sync::Arc;
13009
13010 let auth_store = self
13011 .inner
13012 .auth_store
13013 .read()
13014 .clone()
13015 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
13016
13017 let pols = match filter {
13018 None => auth_store.list_policies(),
13019 Some(PolicyPrincipalRef::User(u)) => {
13020 let id = UserId::from_parts(u.tenant.as_deref(), &u.username);
13021 auth_store.effective_policies(&id)
13022 }
13023 Some(PolicyPrincipalRef::Group(g)) => auth_store.group_policies(g),
13024 };
13025
13026 let mut records = Vec::with_capacity(pols.len() + 1);
13027
13028 let mode = auth_store.enforcement_mode();
13033 let mut header = UnifiedRecord::default();
13034 header.set_arc(
13035 Arc::from("id"),
13036 SchemaValue::text("<enforcement_mode>".to_string()),
13037 );
13038 header.set_arc(Arc::from("statements"), SchemaValue::Integer(0));
13039 header.set_arc(Arc::from("tenant"), SchemaValue::Null);
13040 let header_json = format!(
13041 r#"{{"enforcement_mode":"{}","policy_only_hard_version":"{}"}}"#,
13042 mode.as_str(),
13043 crate::auth::enforcement_mode::POLICY_ONLY_HARD_VERSION
13044 );
13045 header.set_arc(Arc::from("json"), SchemaValue::text(header_json));
13046 records.push(header);
13047
13048 for p in pols.iter() {
13049 let mut rec = UnifiedRecord::default();
13050 rec.set_arc(Arc::from("id"), SchemaValue::text(p.id.clone()));
13051 rec.set_arc(
13052 Arc::from("statements"),
13053 SchemaValue::Integer(p.statements.len() as i64),
13054 );
13055 rec.set_arc(
13056 Arc::from("tenant"),
13057 p.tenant
13058 .as_deref()
13059 .map(|t| SchemaValue::text(t.to_string()))
13060 .unwrap_or(SchemaValue::Null),
13061 );
13062 rec.set_arc(Arc::from("json"), SchemaValue::text(p.to_json_string()));
13063 records.push(rec);
13064 }
13065 let mut result = crate::storage::query::unified::UnifiedResult::empty();
13066 result.records = records;
13067 Ok(RuntimeQueryResult {
13068 query: query.to_string(),
13069 mode: crate::storage::query::modes::QueryMode::Sql,
13070 statement: "show_policies",
13071 engine: "iam-policies",
13072 result,
13073 affected_rows: 0,
13074 statement_type: "select",
13075 bookmark: None,
13076 })
13077 }
13078
13079 fn execute_show_effective_permissions(
13080 &self,
13081 query: &str,
13082 user: &crate::storage::query::ast::PolicyUserRef,
13083 resource: Option<&crate::storage::query::ast::PolicyResourceRef>,
13084 ) -> RedDBResult<RuntimeQueryResult> {
13085 use crate::auth::UserId;
13086 use crate::storage::query::unified::UnifiedRecord;
13087 use crate::storage::schema::Value as SchemaValue;
13088 use std::sync::Arc;
13089
13090 let auth_store = self
13091 .inner
13092 .auth_store
13093 .read()
13094 .clone()
13095 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
13096 let id = UserId::from_parts(user.tenant.as_deref(), &user.username);
13097 let pols = auth_store.effective_policies(&id);
13098
13099 let mut records = Vec::new();
13102 for p in pols.iter() {
13103 for (idx, st) in p.statements.iter().enumerate() {
13104 if let Some(_r) = resource {
13105 }
13109 let mut rec = UnifiedRecord::default();
13110 rec.set_arc(Arc::from("policy_id"), SchemaValue::text(p.id.clone()));
13111 rec.set_arc(
13112 Arc::from("statement_index"),
13113 SchemaValue::Integer(idx as i64),
13114 );
13115 rec.set_arc(
13116 Arc::from("sid"),
13117 st.sid
13118 .as_deref()
13119 .map(|s| SchemaValue::text(s.to_string()))
13120 .unwrap_or(SchemaValue::Null),
13121 );
13122 rec.set_arc(
13123 Arc::from("effect"),
13124 SchemaValue::text(match st.effect {
13125 crate::auth::policies::Effect::Allow => "allow",
13126 crate::auth::policies::Effect::Deny => "deny",
13127 }),
13128 );
13129 rec.set_arc(
13130 Arc::from("actions"),
13131 SchemaValue::Integer(st.actions.len() as i64),
13132 );
13133 rec.set_arc(
13134 Arc::from("resources"),
13135 SchemaValue::Integer(st.resources.len() as i64),
13136 );
13137 records.push(rec);
13138 }
13139 }
13140 let mut result = crate::storage::query::unified::UnifiedResult::empty();
13141 result.records = records;
13142 Ok(RuntimeQueryResult {
13143 query: query.to_string(),
13144 mode: crate::storage::query::modes::QueryMode::Sql,
13145 statement: "show_effective_permissions",
13146 engine: "iam-policies",
13147 result,
13148 affected_rows: 0,
13149 statement_type: "select",
13150 bookmark: None,
13151 })
13152 }
13153
13154 fn execute_lint_policy(
13155 &self,
13156 query: &str,
13157 source: &crate::storage::query::ast::LintPolicySource,
13158 ) -> RedDBResult<RuntimeQueryResult> {
13159 use crate::auth::policy_linter::lint;
13160 use crate::storage::query::ast::LintPolicySource;
13161 use crate::storage::query::unified::UnifiedRecord;
13162 use crate::storage::schema::Value as SchemaValue;
13163 use std::sync::Arc;
13164
13165 let policy_text = match source {
13170 LintPolicySource::Json(text) => text.clone(),
13171 LintPolicySource::Id(id) => {
13172 let auth_store =
13173 self.inner.auth_store.read().clone().ok_or_else(|| {
13174 RedDBError::Query("auth store not configured".to_string())
13175 })?;
13176 let policy = auth_store
13177 .get_policy(id)
13178 .ok_or_else(|| RedDBError::Query(format!("policy `{id}` not found")))?;
13179 policy.to_json_string()
13180 }
13181 };
13182 let diagnostics = lint(&policy_text);
13183
13184 let principal_str = current_auth_identity()
13185 .map(|(u, _)| u)
13186 .unwrap_or_else(|| "anonymous".into());
13187 tracing::info!(
13188 target: "audit",
13189 principal = %principal_str,
13190 action = "iam:policy.lint",
13191 diagnostic_count = diagnostics.len(),
13192 "LINT POLICY issued"
13193 );
13194 self.inner.audit_log.record(
13195 "iam/policy.lint",
13196 &principal_str,
13197 match source {
13198 LintPolicySource::Id(id) => id.as_str(),
13199 LintPolicySource::Json(_) => "<json>",
13200 },
13201 "ok",
13202 crate::json::Value::Null,
13203 );
13204
13205 const COLUMNS: [&str; 5] = ["severity", "code", "message", "suggested_fix", "location"];
13208 let schema = Arc::new(
13209 COLUMNS
13210 .iter()
13211 .map(|name| Arc::<str>::from(*name))
13212 .collect::<Vec<_>>(),
13213 );
13214 let records: Vec<UnifiedRecord> = diagnostics
13215 .iter()
13216 .map(|d| {
13217 UnifiedRecord::with_schema(
13218 Arc::clone(&schema),
13219 vec![
13220 SchemaValue::text(d.severity.as_str()),
13221 SchemaValue::text(d.code.as_str()),
13222 SchemaValue::text(d.message.clone()),
13223 d.suggested_fix
13224 .as_deref()
13225 .map(SchemaValue::text)
13226 .unwrap_or(SchemaValue::Null),
13227 d.location
13228 .as_deref()
13229 .map(SchemaValue::text)
13230 .unwrap_or(SchemaValue::Null),
13231 ],
13232 )
13233 })
13234 .collect();
13235 let mut result = crate::storage::query::unified::UnifiedResult::with_columns(
13236 COLUMNS.iter().map(|c| c.to_string()).collect(),
13237 );
13238 result.records = records;
13239 Ok(RuntimeQueryResult {
13240 query: query.to_string(),
13241 mode: crate::storage::query::modes::QueryMode::Sql,
13242 statement: "lint_policy",
13243 engine: "iam-policies",
13244 result,
13245 affected_rows: 0,
13246 statement_type: "select",
13247 bookmark: None,
13248 })
13249 }
13250
13251 fn execute_migrate_policy_mode(
13256 &self,
13257 query: &str,
13258 target: &str,
13259 dry_run: bool,
13260 ) -> RedDBResult<RuntimeQueryResult> {
13261 use crate::auth::enforcement_mode::PolicyEnforcementMode;
13262 use crate::auth::migrate_policy_mode::{
13263 principal_label, simulate_migration_delta, MigratePolicyDelta,
13264 };
13265 use crate::auth::policies::ResourceRef;
13266 use crate::storage::query::unified::UnifiedRecord;
13267 use crate::storage::schema::Value as SchemaValue;
13268 use std::sync::Arc;
13269
13270 let parsed = PolicyEnforcementMode::parse(target).ok_or_else(|| {
13275 RedDBError::Query(format!(
13276 "MIGRATE POLICY MODE: invalid target `{target}` (expected `policy_only`)"
13277 ))
13278 })?;
13279 if parsed != PolicyEnforcementMode::PolicyOnly {
13280 return Err(RedDBError::Query(format!(
13281 "MIGRATE POLICY MODE: target `{target}` is not supported — only `policy_only` may be migrated to via this command"
13282 )));
13283 }
13284
13285 let auth_store = self
13286 .inner
13287 .auth_store
13288 .read()
13289 .clone()
13290 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
13291
13292 let snapshot = self.inner.db.catalog_model_snapshot();
13300 let resources: Vec<ResourceRef> = snapshot
13301 .collections
13302 .iter()
13303 .map(|c| ResourceRef::new("table", c.name.clone()))
13304 .collect();
13305
13306 let now_ms = crate::utils::now_unix_millis() as u128;
13307 let deltas: Vec<MigratePolicyDelta> =
13308 simulate_migration_delta(auth_store.as_ref(), &resources, now_ms);
13309
13310 let principal_str = current_auth_identity()
13311 .map(|(u, _)| u)
13312 .unwrap_or_else(|| "anonymous".into());
13313
13314 let outcome_str = if dry_run {
13318 "dry_run"
13319 } else if deltas.is_empty() {
13320 "applied"
13321 } else {
13322 "refused"
13323 };
13324 tracing::info!(
13325 target: "audit",
13326 principal = %principal_str,
13327 action = "iam:policy.migrate_mode",
13328 target = %target,
13329 dry_run,
13330 delta_count = deltas.len(),
13331 outcome = outcome_str,
13332 "MIGRATE POLICY MODE issued"
13333 );
13334 self.inner.audit_log.record(
13335 "iam/policy.migrate_mode",
13336 &principal_str,
13337 target,
13338 outcome_str,
13339 crate::json::Value::Null,
13340 );
13341
13342 if !dry_run && !deltas.is_empty() {
13346 let summary = deltas
13347 .iter()
13348 .take(5)
13349 .map(|d| {
13350 format!(
13351 "{}:{}/{}:{}",
13352 principal_label(&d.principal),
13353 d.action,
13354 d.resource_kind,
13355 d.resource_name
13356 )
13357 })
13358 .collect::<Vec<_>>()
13359 .join(", ");
13360 let more = if deltas.len() > 5 {
13361 format!(" (and {} more)", deltas.len() - 5)
13362 } else {
13363 String::new()
13364 };
13365 return Err(RedDBError::Query(format!(
13366 "MIGRATE POLICY MODE refused: {n} principal/action/resource pair(s) would lose access under `policy_only`. Run `MIGRATE POLICY MODE TO '{target}' DRY RUN` to inspect. Sample: {summary}{more}",
13367 n = deltas.len(),
13368 )));
13369 }
13370
13371 if !dry_run {
13375 auth_store.set_enforcement_mode(parsed);
13376 }
13377
13378 const COLUMNS: [&str; 5] = [
13379 "principal",
13380 "role",
13381 "action",
13382 "resource_kind",
13383 "resource_name",
13384 ];
13385 let schema = Arc::new(
13386 COLUMNS
13387 .iter()
13388 .map(|name| Arc::<str>::from(*name))
13389 .collect::<Vec<_>>(),
13390 );
13391 let records: Vec<UnifiedRecord> = deltas
13392 .iter()
13393 .map(|d| {
13394 UnifiedRecord::with_schema(
13395 Arc::clone(&schema),
13396 vec![
13397 SchemaValue::text(principal_label(&d.principal)),
13398 SchemaValue::text(d.role.as_str()),
13399 SchemaValue::text(d.action.clone()),
13400 SchemaValue::text(d.resource_kind.clone()),
13401 SchemaValue::text(d.resource_name.clone()),
13402 ],
13403 )
13404 })
13405 .collect();
13406 let mut result = crate::storage::query::unified::UnifiedResult::with_columns(
13407 COLUMNS.iter().map(|c| c.to_string()).collect(),
13408 );
13409 result.records = records;
13410 Ok(RuntimeQueryResult {
13411 query: query.to_string(),
13412 mode: crate::storage::query::modes::QueryMode::Sql,
13413 statement: "migrate_policy_mode",
13414 engine: "iam-policies",
13415 result,
13416 affected_rows: 0,
13417 statement_type: "select",
13418 bookmark: None,
13419 })
13420 }
13421
13422 fn execute_simulate_policy(
13423 &self,
13424 query: &str,
13425 user: &crate::storage::query::ast::PolicyUserRef,
13426 action: &str,
13427 resource: &crate::storage::query::ast::PolicyResourceRef,
13428 ) -> RedDBResult<RuntimeQueryResult> {
13429 use crate::auth::policies::ResourceRef;
13430 use crate::auth::store::SimCtx;
13431 use crate::auth::UserId;
13432 use crate::storage::query::unified::UnifiedRecord;
13433 use crate::storage::schema::Value as SchemaValue;
13434 use std::sync::Arc;
13435
13436 let auth_store = self
13437 .inner
13438 .auth_store
13439 .read()
13440 .clone()
13441 .ok_or_else(|| RedDBError::Query("auth store not configured".to_string()))?;
13442 let id = UserId::from_parts(user.tenant.as_deref(), &user.username);
13443 let r = ResourceRef::new(resource.kind.clone(), resource.name.clone());
13444 let outcome = auth_store.simulate(&id, action, &r, SimCtx::default());
13445
13446 let principal_str = current_auth_identity()
13447 .map(|(u, _)| u)
13448 .unwrap_or_else(|| "anonymous".into());
13449 let (decision_str, matched_pid, matched_sid) = decision_to_strings(&outcome.decision);
13450 tracing::info!(
13451 target: "audit",
13452 principal = %principal_str,
13453 action = "iam:policy.simulate",
13454 decision = %decision_str,
13455 matched_policy_id = ?matched_pid,
13456 matched_sid = ?matched_sid,
13457 "SIMULATE issued"
13458 );
13459 self.inner.audit_log.record(
13460 "iam/policy.simulate",
13461 &principal_str,
13462 &id.to_string(),
13463 "ok",
13464 crate::json::Value::Null,
13465 );
13466
13467 let mut rec = UnifiedRecord::default();
13468 rec.set_arc(Arc::from("decision"), SchemaValue::text(decision_str));
13469 rec.set_arc(
13470 Arc::from("matched_policy_id"),
13471 matched_pid
13472 .map(SchemaValue::text)
13473 .unwrap_or(SchemaValue::Null),
13474 );
13475 rec.set_arc(
13476 Arc::from("matched_sid"),
13477 matched_sid
13478 .map(SchemaValue::text)
13479 .unwrap_or(SchemaValue::Null),
13480 );
13481 rec.set_arc(Arc::from("reason"), SchemaValue::text(outcome.reason));
13482 rec.set_arc(
13483 Arc::from("trail_len"),
13484 SchemaValue::Integer(outcome.trail.len() as i64),
13485 );
13486 let mut result = crate::storage::query::unified::UnifiedResult::empty();
13487 result.records = vec![rec];
13488 Ok(RuntimeQueryResult {
13489 query: query.to_string(),
13490 mode: crate::storage::query::modes::QueryMode::Sql,
13491 statement: "simulate_policy",
13492 engine: "iam-policies",
13493 result,
13494 affected_rows: 0,
13495 statement_type: "select",
13496 bookmark: None,
13497 })
13498 }
13499}
13500
13501fn grant_to_iam_policy(
13506 principal: &crate::auth::privileges::GrantPrincipal,
13507 resource: &crate::auth::privileges::Resource,
13508 actions: &[crate::auth::privileges::Action],
13509 tenant: Option<&str>,
13510) -> Option<crate::auth::policies::Policy> {
13511 use crate::auth::policies::{
13512 compile_action, ActionPattern, Effect, Policy, ResourcePattern, Statement,
13513 };
13514 use crate::auth::privileges::{Action, GrantPrincipal, Resource};
13515
13516 if matches!(principal, GrantPrincipal::Group(_)) {
13517 return None;
13518 }
13519
13520 let now = crate::auth::now_ms();
13521 let id = format!("_grant_{:x}_{:x}", now, std::process::id());
13522
13523 let resource_str = match resource {
13524 Resource::Database => "table:*".to_string(),
13525 Resource::Schema(s) => format!("table:{s}.*"),
13526 Resource::Table { schema, table } => match schema {
13527 Some(s) => format!("table:{s}.{table}"),
13528 None => format!("table:{table}"),
13529 },
13530 Resource::Function { schema, name } => match schema {
13531 Some(s) => format!("function:{s}.{name}"),
13532 None => format!("function:{name}"),
13533 },
13534 };
13535
13536 let action_patterns: Vec<ActionPattern> = if actions.contains(&Action::All) {
13540 vec![ActionPattern::Wildcard]
13541 } else {
13542 actions
13543 .iter()
13544 .map(|a| compile_action(&a.as_str().to_ascii_lowercase()))
13545 .collect()
13546 };
13547 if action_patterns.is_empty() {
13548 return None;
13549 }
13550
13551 let resource_patterns = if resource_str == "*" {
13556 vec![ResourcePattern::Wildcard]
13557 } else if resource_str.contains('*') {
13558 vec![ResourcePattern::Glob(resource_str.clone())]
13559 } else if let Some((kind, name)) = resource_str.split_once(':') {
13560 vec![ResourcePattern::Exact {
13561 kind: kind.to_string(),
13562 name: name.to_string(),
13563 }]
13564 } else {
13565 vec![ResourcePattern::Wildcard]
13566 };
13567
13568 let policy = Policy {
13569 id,
13570 version: 1,
13571 tenant: tenant.map(|t| t.to_string()),
13572 created_at: now,
13573 updated_at: now,
13574 statements: vec![Statement {
13575 sid: None,
13576 effect: Effect::Allow,
13577 actions: action_patterns,
13578 resources: resource_patterns,
13579 condition: None,
13580 }],
13581 };
13582 if policy.validate().is_err() {
13583 return None;
13584 }
13585 Some(policy)
13586}
13587
13588fn parse_positive_iterations(func: &str, value: &f64) -> RedDBResult<usize> {
13594 if !value.is_finite() || *value < 1.0 || value.fract() != 0.0 {
13595 return Err(RedDBError::Query(format!(
13596 "table function '{func}' max_iterations must be a positive integer, got {value}"
13597 )));
13598 }
13599 Ok(*value as usize)
13600}
13601
13602fn legacy_action_to_iam(action: crate::auth::privileges::Action) -> &'static str {
13603 use crate::auth::privileges::Action;
13604 match action {
13605 Action::Select => "select",
13606 Action::Insert => "insert",
13607 Action::Update => "update",
13608 Action::Delete => "delete",
13609 Action::Truncate => "truncate",
13610 Action::References => "references",
13611 Action::Execute => "execute",
13612 Action::Usage => "usage",
13613 Action::All => "*",
13614 }
13615}
13616
13617fn update_set_target_columns(query: &crate::storage::query::ast::UpdateQuery) -> Vec<String> {
13618 let mut columns = Vec::new();
13619 for (column, _) in &query.assignment_exprs {
13620 if !columns.iter().any(|seen| seen == column) {
13621 columns.push(column.clone());
13622 }
13623 }
13624 columns
13625}
13626
13627fn column_access_request_for_table_update(
13628 table_name: &str,
13629 columns: Vec<String>,
13630) -> crate::auth::ColumnAccessRequest {
13631 match table_name.split_once('.') {
13632 Some((schema, table)) => {
13633 crate::auth::ColumnAccessRequest::update(table.to_string(), columns)
13634 .with_schema(schema.to_string())
13635 }
13636 None => crate::auth::ColumnAccessRequest::update(table_name.to_string(), columns),
13637 }
13638}
13639
13640fn column_access_request_for_table_select(
13641 table_name: &str,
13642 columns: Vec<String>,
13643) -> crate::auth::ColumnAccessRequest {
13644 match table_name.split_once('.') {
13645 Some((schema, table)) => {
13646 crate::auth::ColumnAccessRequest::select(table.to_string(), columns)
13647 .with_schema(schema.to_string())
13648 }
13649 None => crate::auth::ColumnAccessRequest::select(table_name.to_string(), columns),
13650 }
13651}
13652
13653fn update_returning_columns_for_policy(
13654 runtime: &RedDBRuntime,
13655 query: &crate::storage::query::ast::UpdateQuery,
13656) -> Option<Vec<String>> {
13657 let items = query.returning.as_ref()?;
13658 let mut columns = Vec::new();
13659 let project_all = items
13660 .iter()
13661 .any(|item| matches!(item, crate::storage::query::ast::ReturningItem::All));
13662 if project_all {
13663 collect_returning_star_columns(runtime, query, &mut columns);
13664 } else {
13665 for item in items {
13666 let crate::storage::query::ast::ReturningItem::Column(column) = item else {
13667 continue;
13668 };
13669 push_returning_policy_column(&mut columns, column);
13670 }
13671 }
13672 (!columns.is_empty()).then_some(columns)
13673}
13674
13675fn collect_returning_star_columns(
13676 runtime: &RedDBRuntime,
13677 query: &crate::storage::query::ast::UpdateQuery,
13678 columns: &mut Vec<String>,
13679) {
13680 let store = runtime.db().store();
13681 let Some(manager) = store.get_collection(&query.table) else {
13682 return;
13683 };
13684 if let Some(schema) = manager.column_schema() {
13685 for column in schema.iter() {
13686 push_returning_policy_column(columns, column);
13687 }
13688 }
13689 for entity in manager.query_all(|_| true) {
13690 if !returning_entity_matches_update_target(&entity, query.target) {
13691 continue;
13692 }
13693 match &entity.data {
13694 crate::storage::EntityData::Row(row) => {
13695 for (column, _) in row.iter_fields() {
13696 push_returning_policy_column(columns, column);
13697 }
13698 }
13699 crate::storage::EntityData::Node(node) => {
13700 push_returning_policy_column(columns, "label");
13701 push_returning_policy_column(columns, "node_type");
13702 for column in node.properties.keys() {
13703 push_returning_policy_column(columns, column);
13704 }
13705 }
13706 crate::storage::EntityData::Edge(edge) => {
13707 push_returning_policy_column(columns, "label");
13708 push_returning_policy_column(columns, "from_rid");
13709 push_returning_policy_column(columns, "to_rid");
13710 push_returning_policy_column(columns, "weight");
13711 for column in edge.properties.keys() {
13712 push_returning_policy_column(columns, column);
13713 }
13714 }
13715 _ => {}
13716 }
13717 }
13718}
13719
13720fn push_returning_policy_column(columns: &mut Vec<String>, column: &str) {
13721 if returning_public_envelope_column(column) {
13722 return;
13723 }
13724 if !columns.iter().any(|seen| seen == column) {
13725 columns.push(column.to_string());
13726 }
13727}
13728
13729fn returning_public_envelope_column(column: &str) -> bool {
13730 matches!(
13731 column.to_ascii_lowercase().as_str(),
13732 "rid" | "collection" | "kind" | "tenant" | "created_at" | "updated_at" | "red_entity_id"
13733 )
13734}
13735
13736fn returning_entity_matches_update_target(
13737 entity: &crate::storage::UnifiedEntity,
13738 target: crate::storage::query::ast::UpdateTarget,
13739) -> bool {
13740 use crate::storage::query::ast::UpdateTarget;
13741 match target {
13742 UpdateTarget::Rows => {
13743 matches!(returning_row_item_kind(entity), Some(ReturningRowKind::Row))
13744 }
13745 UpdateTarget::Documents => {
13746 matches!(
13747 returning_row_item_kind(entity),
13748 Some(ReturningRowKind::Document)
13749 )
13750 }
13751 UpdateTarget::Kv => matches!(returning_row_item_kind(entity), Some(ReturningRowKind::Kv)),
13752 UpdateTarget::Nodes => matches!(
13753 (&entity.kind, &entity.data),
13754 (
13755 crate::storage::EntityKind::GraphNode(_),
13756 crate::storage::EntityData::Node(_)
13757 )
13758 ),
13759 UpdateTarget::Edges => matches!(
13760 (&entity.kind, &entity.data),
13761 (
13762 crate::storage::EntityKind::GraphEdge(_),
13763 crate::storage::EntityData::Edge(_)
13764 )
13765 ),
13766 }
13767}
13768
13769#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13770enum ReturningRowKind {
13771 Row,
13772 Document,
13773 Kv,
13774}
13775
13776fn returning_row_item_kind(entity: &crate::storage::UnifiedEntity) -> Option<ReturningRowKind> {
13777 let row = entity.data.as_row()?;
13778 let is_kv = row.iter_fields().all(|(column, _)| {
13779 column.eq_ignore_ascii_case("key") || column.eq_ignore_ascii_case("value")
13780 });
13781 if is_kv {
13782 return Some(ReturningRowKind::Kv);
13783 }
13784 let is_document = row
13785 .iter_fields()
13786 .any(|(_, value)| matches!(value, crate::storage::schema::Value::Json(_)));
13787 if is_document {
13788 Some(ReturningRowKind::Document)
13789 } else {
13790 Some(ReturningRowKind::Row)
13791 }
13792}
13793
13794fn requested_table_columns_for_policy(
13795 table: &crate::storage::query::ast::TableQuery,
13796) -> Vec<String> {
13797 use crate::storage::query::sql_lowering::{
13798 effective_table_filter, effective_table_group_by_exprs, effective_table_having_filter,
13799 effective_table_projections,
13800 };
13801
13802 let table_name = table.table.as_str();
13803 let table_alias = table.alias.as_deref();
13804 let mut columns = std::collections::BTreeSet::new();
13805
13806 for projection in effective_table_projections(table) {
13807 collect_projection_columns(&projection, table_name, table_alias, &mut columns);
13808 }
13809 if let Some(filter) = effective_table_filter(table) {
13810 collect_filter_columns(&filter, table_name, table_alias, &mut columns);
13811 }
13812 for expr in effective_table_group_by_exprs(table) {
13813 collect_expr_columns(&expr, table_name, table_alias, &mut columns);
13814 }
13815 if let Some(filter) = effective_table_having_filter(table) {
13816 collect_filter_columns(&filter, table_name, table_alias, &mut columns);
13817 }
13818 for order in &table.order_by {
13819 if let Some(expr) = order.expr.as_ref() {
13820 collect_expr_columns(expr, table_name, table_alias, &mut columns);
13821 } else {
13822 collect_field_ref_column(&order.field, table_name, table_alias, &mut columns);
13823 }
13824 }
13825
13826 columns.into_iter().collect()
13827}
13828
13829fn collect_projection_columns(
13830 projection: &crate::storage::query::ast::Projection,
13831 table_name: &str,
13832 table_alias: Option<&str>,
13833 columns: &mut std::collections::BTreeSet<String>,
13834) {
13835 use crate::storage::query::ast::Projection;
13836 match projection {
13837 Projection::All => {
13838 columns.insert("*".to_string());
13839 }
13840 Projection::Column(column) | Projection::Alias(column, _) => {
13841 if column != "*" {
13842 columns.insert(column.clone());
13843 }
13844 }
13845 Projection::Function(_, args) => {
13846 for arg in args {
13847 collect_projection_columns(arg, table_name, table_alias, columns);
13848 }
13849 }
13850 Projection::Expression(filter, _) => {
13851 collect_filter_columns(filter, table_name, table_alias, columns);
13852 }
13853 Projection::Field(field, _) => {
13854 collect_field_ref_column(field, table_name, table_alias, columns);
13855 }
13856 Projection::Window { args, .. } => {
13860 for arg in args {
13861 collect_projection_columns(arg, table_name, table_alias, columns);
13862 }
13863 }
13864 }
13865}
13866
13867fn collect_filter_columns(
13868 filter: &crate::storage::query::ast::Filter,
13869 table_name: &str,
13870 table_alias: Option<&str>,
13871 columns: &mut std::collections::BTreeSet<String>,
13872) {
13873 use crate::storage::query::ast::Filter;
13874 match filter {
13875 Filter::Compare { field, .. }
13876 | Filter::IsNull(field)
13877 | Filter::IsNotNull(field)
13878 | Filter::In { field, .. }
13879 | Filter::Between { field, .. }
13880 | Filter::Like { field, .. }
13881 | Filter::StartsWith { field, .. }
13882 | Filter::EndsWith { field, .. }
13883 | Filter::Contains { field, .. } => {
13884 collect_field_ref_column(field, table_name, table_alias, columns);
13885 }
13886 Filter::CompareFields { left, right, .. } => {
13887 collect_field_ref_column(left, table_name, table_alias, columns);
13888 collect_field_ref_column(right, table_name, table_alias, columns);
13889 }
13890 Filter::CompareExpr { lhs, rhs, .. } => {
13891 collect_expr_columns(lhs, table_name, table_alias, columns);
13892 collect_expr_columns(rhs, table_name, table_alias, columns);
13893 }
13894 Filter::And(left, right) | Filter::Or(left, right) => {
13895 collect_filter_columns(left, table_name, table_alias, columns);
13896 collect_filter_columns(right, table_name, table_alias, columns);
13897 }
13898 Filter::Not(inner) => collect_filter_columns(inner, table_name, table_alias, columns),
13899 }
13900}
13901
13902fn collect_expr_columns(
13903 expr: &crate::storage::query::ast::Expr,
13904 table_name: &str,
13905 table_alias: Option<&str>,
13906 columns: &mut std::collections::BTreeSet<String>,
13907) {
13908 use crate::storage::query::ast::Expr;
13909 match expr {
13910 Expr::Column { field, .. } => {
13911 collect_field_ref_column(field, table_name, table_alias, columns);
13912 }
13913 Expr::Literal { .. } | Expr::Parameter { .. } => {}
13914 Expr::UnaryOp { operand, .. } | Expr::Cast { inner: operand, .. } => {
13915 collect_expr_columns(operand, table_name, table_alias, columns);
13916 }
13917 Expr::BinaryOp { lhs, rhs, .. } => {
13918 collect_expr_columns(lhs, table_name, table_alias, columns);
13919 collect_expr_columns(rhs, table_name, table_alias, columns);
13920 }
13921 Expr::FunctionCall { args, .. } => {
13922 for arg in args {
13923 collect_expr_columns(arg, table_name, table_alias, columns);
13924 }
13925 }
13926 Expr::Case {
13927 branches, else_, ..
13928 } => {
13929 for (condition, value) in branches {
13930 collect_expr_columns(condition, table_name, table_alias, columns);
13931 collect_expr_columns(value, table_name, table_alias, columns);
13932 }
13933 if let Some(value) = else_ {
13934 collect_expr_columns(value, table_name, table_alias, columns);
13935 }
13936 }
13937 Expr::IsNull { operand, .. } => {
13938 collect_expr_columns(operand, table_name, table_alias, columns);
13939 }
13940 Expr::InList { target, values, .. } => {
13941 collect_expr_columns(target, table_name, table_alias, columns);
13942 for value in values {
13943 collect_expr_columns(value, table_name, table_alias, columns);
13944 }
13945 }
13946 Expr::Between {
13947 target, low, high, ..
13948 } => {
13949 collect_expr_columns(target, table_name, table_alias, columns);
13950 collect_expr_columns(low, table_name, table_alias, columns);
13951 collect_expr_columns(high, table_name, table_alias, columns);
13952 }
13953 Expr::Subquery { .. } => {}
13954 Expr::WindowFunctionCall { args, window, .. } => {
13955 for arg in args {
13956 collect_expr_columns(arg, table_name, table_alias, columns);
13957 }
13958 for e in &window.partition_by {
13959 collect_expr_columns(e, table_name, table_alias, columns);
13960 }
13961 for o in &window.order_by {
13962 collect_expr_columns(&o.expr, table_name, table_alias, columns);
13963 }
13964 }
13965 }
13966}
13967
13968fn collect_field_ref_column(
13969 field: &crate::storage::query::ast::FieldRef,
13970 table_name: &str,
13971 table_alias: Option<&str>,
13972 columns: &mut std::collections::BTreeSet<String>,
13973) {
13974 if let Some(column) = policy_column_name_from_field_ref(field, table_name, table_alias) {
13975 if column != "*" {
13976 columns.insert(column);
13977 }
13978 }
13979}
13980
13981fn policy_column_name_from_field_ref(
13982 field: &crate::storage::query::ast::FieldRef,
13983 table_name: &str,
13984 table_alias: Option<&str>,
13985) -> Option<String> {
13986 match field {
13987 crate::storage::query::ast::FieldRef::TableColumn { table, column } => {
13988 if column == "*" {
13989 return Some("*".to_string());
13990 }
13991 if table.is_empty() || table == table_name || Some(table.as_str()) == table_alias {
13992 Some(column.clone())
13993 } else {
13994 Some(format!("{table}.{column}"))
13995 }
13996 }
13997 _ => None,
13998 }
13999}
14000
14001fn legacy_resource_to_iam(
14002 resource: &crate::auth::privileges::Resource,
14003 tenant: Option<&str>,
14004) -> crate::auth::policies::ResourceRef {
14005 use crate::auth::privileges::Resource;
14006
14007 let (kind, name) = match resource {
14008 Resource::Database => ("database".to_string(), "*".to_string()),
14009 Resource::Schema(s) => ("schema".to_string(), format!("{s}.*")),
14010 Resource::Table { schema, table } => (
14011 "table".to_string(),
14012 match schema {
14013 Some(s) => format!("{s}.{table}"),
14014 None => table.clone(),
14015 },
14016 ),
14017 Resource::Function { schema, name } => (
14018 "function".to_string(),
14019 match schema {
14020 Some(s) => format!("{s}.{name}"),
14021 None => name.clone(),
14022 },
14023 ),
14024 };
14025
14026 let mut out = crate::auth::policies::ResourceRef::new(kind, name);
14027 if let Some(t) = tenant {
14028 out = out.with_tenant(t.to_string());
14029 }
14030 out
14031}
14032
14033#[derive(Debug)]
14034struct JoinTableSide {
14035 table: String,
14036 alias: String,
14037}
14038
14039fn table_side_context(expr: &QueryExpr) -> Option<JoinTableSide> {
14040 match expr {
14041 QueryExpr::Table(table) => Some(JoinTableSide {
14042 table: table.table.clone(),
14043 alias: table.alias.clone().unwrap_or_else(|| table.table.clone()),
14044 }),
14045 _ => None,
14046 }
14047}
14048
14049fn collect_projection_columns_for_table(
14050 projection: &Projection,
14051 table: &str,
14052 alias: Option<&str>,
14053 out: &mut BTreeSet<String>,
14054) {
14055 match projection {
14056 Projection::Column(column) | Projection::Alias(column, _) => {
14057 match split_qualified_column(column) {
14058 Some((qualifier, column))
14059 if qualifier == table || alias.is_some_and(|alias| qualifier == alias) =>
14060 {
14061 push_policy_column(column, out);
14062 }
14063 Some(_) => {}
14064 None => push_policy_column(column, out),
14065 }
14066 }
14067 Projection::Field(
14068 FieldRef::TableColumn {
14069 table: qualifier,
14070 column,
14071 },
14072 _,
14073 ) => {
14074 if qualifier.is_empty()
14075 || qualifier == table
14076 || alias.is_some_and(|alias| qualifier == alias)
14077 {
14078 push_policy_column(column, out);
14079 }
14080 }
14081 Projection::Field(
14082 FieldRef::NodeProperty {
14083 alias: qualifier,
14084 property,
14085 },
14086 _,
14087 )
14088 | Projection::Field(
14089 FieldRef::EdgeProperty {
14090 alias: qualifier,
14091 property,
14092 },
14093 _,
14094 ) => {
14095 if qualifier == table || alias.is_some_and(|alias| qualifier == alias) {
14096 push_policy_column(property, out);
14097 }
14098 }
14099 Projection::Function(_, args) => {
14100 for arg in args {
14101 collect_projection_columns_for_table(arg, table, alias, out);
14102 }
14103 }
14104 Projection::Expression(_, _) | Projection::All | Projection::Field(_, _) => {}
14105 Projection::Window { args, .. } => {
14106 for arg in args {
14107 collect_projection_columns_for_table(arg, table, alias, out);
14108 }
14109 }
14110 }
14111}
14112
14113fn collect_projection_columns_for_join_side(
14114 projection: &Projection,
14115 left: Option<&JoinTableSide>,
14116 right: Option<&JoinTableSide>,
14117 out: &mut HashMap<String, BTreeSet<String>>,
14118) -> RedDBResult<()> {
14119 match projection {
14120 Projection::Column(column) | Projection::Alias(column, _) => {
14121 if let Some((qualifier, column)) = split_qualified_column(column) {
14122 push_qualified_join_column(qualifier, column, left, right, out);
14123 } else {
14124 push_unqualified_join_column(column, left, right, out);
14125 }
14126 }
14127 Projection::Field(FieldRef::TableColumn { table, column }, _) => {
14128 if table.is_empty() {
14129 push_unqualified_join_column(column, left, right, out);
14130 } else if let Some(side) = [left, right]
14131 .into_iter()
14132 .flatten()
14133 .find(|side| table == side.table.as_str() || table == side.alias.as_str())
14134 {
14135 push_join_column(&side.table, column, out);
14136 }
14137 }
14138 Projection::Field(FieldRef::NodeProperty { alias, property }, _)
14139 | Projection::Field(FieldRef::EdgeProperty { alias, property }, _) => {
14140 push_qualified_join_column(alias, property, left, right, out);
14141 }
14142 Projection::Function(_, args) => {
14143 for arg in args {
14144 collect_projection_columns_for_join_side(arg, left, right, out)?;
14145 }
14146 }
14147 Projection::Expression(_, _) | Projection::All | Projection::Field(_, _) => {}
14148 Projection::Window { args, .. } => {
14149 for arg in args {
14150 collect_projection_columns_for_join_side(arg, left, right, out)?;
14151 }
14152 }
14153 }
14154 Ok(())
14155}
14156
14157fn split_qualified_column(column: &str) -> Option<(&str, &str)> {
14158 let (qualifier, column) = column.split_once('.')?;
14159 if qualifier.is_empty() || column.is_empty() || column.contains('.') {
14160 return None;
14161 }
14162 Some((qualifier, column))
14163}
14164
14165fn push_qualified_join_column(
14166 qualifier: &str,
14167 column: &str,
14168 left: Option<&JoinTableSide>,
14169 right: Option<&JoinTableSide>,
14170 out: &mut HashMap<String, BTreeSet<String>>,
14171) {
14172 if let Some(side) = [left, right]
14173 .into_iter()
14174 .flatten()
14175 .find(|side| qualifier == side.table.as_str() || qualifier == side.alias.as_str())
14176 {
14177 push_join_column(&side.table, column, out);
14178 }
14179}
14180
14181fn push_unqualified_join_column(
14182 column: &str,
14183 left: Option<&JoinTableSide>,
14184 right: Option<&JoinTableSide>,
14185 out: &mut HashMap<String, BTreeSet<String>>,
14186) {
14187 for side in [left, right].into_iter().flatten() {
14188 push_join_column(&side.table, column, out);
14189 }
14190}
14191
14192fn push_join_column(table: &str, column: &str, out: &mut HashMap<String, BTreeSet<String>>) {
14193 if is_policy_column_name(column) {
14194 out.entry(table.to_string())
14195 .or_default()
14196 .insert(column.to_string());
14197 }
14198}
14199
14200fn push_policy_column(column: &str, out: &mut BTreeSet<String>) {
14201 if is_policy_column_name(column) {
14202 out.insert(column.to_string());
14203 }
14204}
14205
14206fn is_policy_column_name(column: &str) -> bool {
14207 !column.is_empty()
14208 && column != "*"
14209 && !column.starts_with("LIT:")
14210 && !column.starts_with("TYPE:")
14211}
14212
14213fn runtime_iam_context(
14214 role: crate::auth::Role,
14215 tenant: Option<&str>,
14216 principal_is_system_owned: bool,
14217) -> crate::auth::policies::EvalContext {
14218 crate::auth::policies::EvalContext {
14219 principal_tenant: tenant.map(|t| t.to_string()),
14220 current_tenant: tenant.map(|t| t.to_string()),
14221 peer_ip: None,
14222 mfa_present: false,
14223 now_ms: crate::auth::now_ms(),
14224 principal_is_admin_role: role == crate::auth::Role::Admin,
14225 principal_is_system_owned,
14226 principal_is_platform_scoped: tenant.is_none(),
14227 }
14228}
14229
14230fn explicit_table_projection_columns(
14231 query: &crate::storage::query::ast::TableQuery,
14232) -> Vec<String> {
14233 use crate::storage::query::ast::{FieldRef, Projection};
14234
14235 let mut columns = Vec::new();
14236 for projection in crate::storage::query::sql_lowering::effective_table_projections(query) {
14237 match projection {
14238 Projection::Column(column) | Projection::Alias(column, _) => {
14239 push_unique(&mut columns, column)
14240 }
14241 Projection::Field(FieldRef::TableColumn { column, .. }, _) => {
14242 push_unique(&mut columns, column)
14243 }
14244 _ => {}
14248 }
14249 }
14250 columns
14251}
14252
14253fn explicit_graph_projection_properties(
14254 query: &crate::storage::query::ast::GraphQuery,
14255) -> Vec<String> {
14256 use crate::storage::query::ast::{FieldRef, Projection};
14257
14258 let mut columns = Vec::new();
14259 for projection in &query.return_ {
14260 match projection {
14261 Projection::Field(FieldRef::NodeProperty { property, .. }, _)
14262 | Projection::Field(FieldRef::EdgeProperty { property, .. }, _) => {
14263 push_unique(&mut columns, property.clone())
14264 }
14265 _ => {}
14266 }
14267 }
14268 columns
14269}
14270
14271fn push_unique(columns: &mut Vec<String>, column: String) {
14272 if !columns.iter().any(|existing| existing == &column) {
14273 columns.push(column);
14274 }
14275}
14276
14277fn principal_label(p: &crate::storage::query::ast::PolicyPrincipalRef) -> String {
14278 use crate::storage::query::ast::PolicyPrincipalRef;
14279 match p {
14280 PolicyPrincipalRef::User(u) => match &u.tenant {
14281 Some(t) => format!("user:{t}/{}", u.username),
14282 None => format!("user:{}", u.username),
14283 },
14284 PolicyPrincipalRef::Group(g) => format!("group:{g}"),
14285 }
14286}
14287
14288pub(crate) fn decision_to_strings(
14291 d: &crate::auth::policies::Decision,
14292) -> (String, Option<String>, Option<String>) {
14293 use crate::auth::policies::Decision;
14294 match d {
14295 Decision::Allow {
14296 matched_policy_id,
14297 matched_sid,
14298 } => (
14299 "allow".into(),
14300 Some(matched_policy_id.clone()),
14301 matched_sid.clone(),
14302 ),
14303 Decision::Deny {
14304 matched_policy_id,
14305 matched_sid,
14306 } => (
14307 "deny".into(),
14308 Some(matched_policy_id.clone()),
14309 matched_sid.clone(),
14310 ),
14311 Decision::DefaultDeny => ("default_deny".into(), None, None),
14312 Decision::AdminBypass => ("admin_bypass".into(), None, None),
14313 }
14314}
14315
14316fn relation_scopes_for_query(query: &QueryExpr) -> Vec<String> {
14317 let mut scopes = Vec::new();
14318 collect_relation_scopes(query, &mut scopes);
14319 scopes.sort();
14320 scopes.dedup();
14321 scopes
14322}
14323
14324fn collect_relation_scopes(query: &QueryExpr, scopes: &mut Vec<String>) {
14325 match query {
14326 QueryExpr::Table(table) => {
14327 if !table.table.is_empty() {
14328 scopes.push(table.table.clone());
14329 }
14330 if let Some(alias) = &table.alias {
14331 scopes.push(alias.clone());
14332 }
14333 }
14334 QueryExpr::Join(join) => {
14335 collect_relation_scopes(&join.left, scopes);
14336 collect_relation_scopes(&join.right, scopes);
14337 }
14338 _ => {}
14339 }
14340}
14341
14342fn query_references_outer_scope(query: &QueryExpr, outer_scopes: &[String]) -> bool {
14343 let inner_scopes = relation_scopes_for_query(query);
14344 query_expr_references_outer_scope(query, outer_scopes, &inner_scopes)
14345}
14346
14347fn query_expr_references_outer_scope(
14348 query: &QueryExpr,
14349 outer_scopes: &[String],
14350 inner_scopes: &[String],
14351) -> bool {
14352 match query {
14353 QueryExpr::Table(table) => {
14354 table.select_items.iter().any(|item| match item {
14355 crate::storage::query::ast::SelectItem::Wildcard => false,
14356 crate::storage::query::ast::SelectItem::Expr { expr, .. } => {
14357 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
14358 }
14359 }) || table
14360 .where_expr
14361 .as_ref()
14362 .is_some_and(|expr| expr_references_outer_scope(expr, outer_scopes, inner_scopes))
14363 || table.filter.as_ref().is_some_and(|filter| {
14364 filter_references_outer_scope(filter, outer_scopes, inner_scopes)
14365 })
14366 || table.having_expr.as_ref().is_some_and(|expr| {
14367 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
14368 })
14369 || table.having.as_ref().is_some_and(|filter| {
14370 filter_references_outer_scope(filter, outer_scopes, inner_scopes)
14371 })
14372 || table
14373 .group_by_exprs
14374 .iter()
14375 .any(|expr| expr_references_outer_scope(expr, outer_scopes, inner_scopes))
14376 || table.order_by.iter().any(|clause| {
14377 clause.expr.as_ref().is_some_and(|expr| {
14378 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
14379 })
14380 })
14381 }
14382 QueryExpr::Join(join) => {
14383 query_expr_references_outer_scope(&join.left, outer_scopes, inner_scopes)
14384 || query_expr_references_outer_scope(&join.right, outer_scopes, inner_scopes)
14385 || join.filter.as_ref().is_some_and(|filter| {
14386 filter_references_outer_scope(filter, outer_scopes, inner_scopes)
14387 })
14388 || join.return_items.iter().any(|item| match item {
14389 crate::storage::query::ast::SelectItem::Wildcard => false,
14390 crate::storage::query::ast::SelectItem::Expr { expr, .. } => {
14391 expr_references_outer_scope(expr, outer_scopes, inner_scopes)
14392 }
14393 })
14394 }
14395 _ => false,
14396 }
14397}
14398
14399fn filter_references_outer_scope(
14400 filter: &crate::storage::query::ast::Filter,
14401 outer_scopes: &[String],
14402 inner_scopes: &[String],
14403) -> bool {
14404 use crate::storage::query::ast::Filter;
14405 match filter {
14406 Filter::Compare { field, .. }
14407 | Filter::IsNull(field)
14408 | Filter::IsNotNull(field)
14409 | Filter::In { field, .. }
14410 | Filter::Between { field, .. }
14411 | Filter::Like { field, .. }
14412 | Filter::StartsWith { field, .. }
14413 | Filter::EndsWith { field, .. }
14414 | Filter::Contains { field, .. } => {
14415 field_ref_references_outer_scope(field, outer_scopes, inner_scopes)
14416 }
14417 Filter::CompareFields { left, right, .. } => {
14418 field_ref_references_outer_scope(left, outer_scopes, inner_scopes)
14419 || field_ref_references_outer_scope(right, outer_scopes, inner_scopes)
14420 }
14421 Filter::CompareExpr { lhs, rhs, .. } => {
14422 expr_references_outer_scope(lhs, outer_scopes, inner_scopes)
14423 || expr_references_outer_scope(rhs, outer_scopes, inner_scopes)
14424 }
14425 Filter::And(left, right) | Filter::Or(left, right) => {
14426 filter_references_outer_scope(left, outer_scopes, inner_scopes)
14427 || filter_references_outer_scope(right, outer_scopes, inner_scopes)
14428 }
14429 Filter::Not(inner) => filter_references_outer_scope(inner, outer_scopes, inner_scopes),
14430 }
14431}
14432
14433fn expr_references_outer_scope(
14434 expr: &crate::storage::query::ast::Expr,
14435 outer_scopes: &[String],
14436 inner_scopes: &[String],
14437) -> bool {
14438 use crate::storage::query::ast::Expr;
14439 match expr {
14440 Expr::Column { field, .. } => {
14441 field_ref_references_outer_scope(field, outer_scopes, inner_scopes)
14442 }
14443 Expr::BinaryOp { lhs, rhs, .. } => {
14444 expr_references_outer_scope(lhs, outer_scopes, inner_scopes)
14445 || expr_references_outer_scope(rhs, outer_scopes, inner_scopes)
14446 }
14447 Expr::UnaryOp { operand, .. }
14448 | Expr::Cast { inner: operand, .. }
14449 | Expr::IsNull { operand, .. } => {
14450 expr_references_outer_scope(operand, outer_scopes, inner_scopes)
14451 }
14452 Expr::FunctionCall { args, .. } => args
14453 .iter()
14454 .any(|arg| expr_references_outer_scope(arg, outer_scopes, inner_scopes)),
14455 Expr::Case {
14456 branches, else_, ..
14457 } => {
14458 branches.iter().any(|(cond, value)| {
14459 expr_references_outer_scope(cond, outer_scopes, inner_scopes)
14460 || expr_references_outer_scope(value, outer_scopes, inner_scopes)
14461 }) || else_
14462 .as_ref()
14463 .is_some_and(|expr| expr_references_outer_scope(expr, outer_scopes, inner_scopes))
14464 }
14465 Expr::InList { target, values, .. } => {
14466 expr_references_outer_scope(target, outer_scopes, inner_scopes)
14467 || values
14468 .iter()
14469 .any(|value| expr_references_outer_scope(value, outer_scopes, inner_scopes))
14470 }
14471 Expr::Between {
14472 target, low, high, ..
14473 } => {
14474 expr_references_outer_scope(target, outer_scopes, inner_scopes)
14475 || expr_references_outer_scope(low, outer_scopes, inner_scopes)
14476 || expr_references_outer_scope(high, outer_scopes, inner_scopes)
14477 }
14478 Expr::Subquery { query, .. } => query_references_outer_scope(&query.query, inner_scopes),
14479 Expr::Literal { .. } | Expr::Parameter { .. } => false,
14480 Expr::WindowFunctionCall { args, window, .. } => {
14481 args.iter()
14482 .any(|arg| expr_references_outer_scope(arg, outer_scopes, inner_scopes))
14483 || window
14484 .partition_by
14485 .iter()
14486 .any(|e| expr_references_outer_scope(e, outer_scopes, inner_scopes))
14487 || window
14488 .order_by
14489 .iter()
14490 .any(|o| expr_references_outer_scope(&o.expr, outer_scopes, inner_scopes))
14491 }
14492 }
14493}
14494
14495fn field_ref_references_outer_scope(
14496 field: &crate::storage::query::ast::FieldRef,
14497 outer_scopes: &[String],
14498 inner_scopes: &[String],
14499) -> bool {
14500 match field {
14501 crate::storage::query::ast::FieldRef::TableColumn { table, .. } if !table.is_empty() => {
14502 outer_scopes.iter().any(|scope| scope == table)
14503 && !inner_scopes.iter().any(|scope| scope == table)
14504 }
14505 _ => false,
14506 }
14507}
14508
14509fn first_column_values(
14510 result: crate::storage::query::unified::UnifiedResult,
14511) -> RedDBResult<Vec<Value>> {
14512 if result.columns.len() > 1 {
14513 return Err(RedDBError::Query(
14514 "expression subquery must return exactly one column".to_string(),
14515 ));
14516 }
14517 let fallback_column = result
14518 .records
14519 .first()
14520 .and_then(|record| record.column_names().into_iter().next())
14521 .map(|name| name.to_string());
14522 let column = result.columns.first().cloned().or(fallback_column);
14523 let Some(column) = column else {
14524 return Ok(Vec::new());
14525 };
14526 Ok(result
14527 .records
14528 .iter()
14529 .map(|record| record.get(column.as_str()).cloned().unwrap_or(Value::Null))
14530 .collect())
14531}
14532
14533fn parse_timestamp_to_ms(s: &str) -> Option<u128> {
14534 if let Ok(n) = s.parse::<u128>() {
14536 return Some(n);
14537 }
14538 if let Some(date) = s.split_whitespace().next() {
14542 let parts: Vec<&str> = date.split('-').collect();
14543 if parts.len() == 3 {
14544 let (y, m, d) = (parts[0], parts[1], parts[2]);
14545 if let (Ok(y), Ok(m), Ok(d)) = (y.parse::<i64>(), m.parse::<u32>(), d.parse::<u32>()) {
14546 let days_in = days_from_civil(y, m, d);
14550 return Some((days_in as u128) * 86_400_000u128);
14551 }
14552 }
14553 }
14554 None
14555}
14556
14557fn days_from_civil(y: i64, m: u32, d: u32) -> i64 {
14560 let y = if m <= 2 { y - 1 } else { y };
14561 let era = if y >= 0 { y } else { y - 399 } / 400;
14562 let yoe = (y - era * 400) as u64; let doy = (153 * (if m > 2 { m - 3 } else { m + 9 }) as u64 + 2) / 5 + d as u64 - 1;
14564 let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
14565 era * 146097 + doe as i64 - 719468
14566}
14567
14568fn walk_plan_node(
14569 node: &crate::storage::query::planner::CanonicalLogicalNode,
14570 depth: usize,
14571 out: &mut Vec<crate::storage::query::unified::UnifiedRecord>,
14572) {
14573 use std::sync::Arc;
14574 let mut rec = crate::storage::query::unified::UnifiedRecord::default();
14575 rec.set_arc(Arc::from("op"), Value::text(node.operator.clone()));
14576 rec.set_arc(
14577 Arc::from("source"),
14578 node.source.clone().map(Value::text).unwrap_or(Value::Null),
14579 );
14580 rec.set_arc(Arc::from("est_rows"), Value::Float(node.estimated_rows));
14581 rec.set_arc(Arc::from("est_cost"), Value::Float(node.operator_cost));
14582 rec.set_arc(Arc::from("depth"), Value::Integer(depth as i64));
14583 out.push(rec);
14584 for child in &node.children {
14585 walk_plan_node(child, depth + 1, out);
14586 }
14587}
14588
14589#[cfg(test)]
14590mod inline_graph_tvf_tests {
14591 use super::*;
14592
14593 fn scopes_for(sql: &str) -> HashSet<String> {
14594 let expr = crate::storage::query::parser::parse(sql)
14595 .expect("parse")
14596 .query;
14597 query_expr_result_cache_scopes(&expr)
14598 }
14599
14600 #[test]
14601 fn inline_tvf_cache_scopes_include_source_collections() {
14602 let scopes = scopes_for(
14606 "SELECT * FROM components(nodes => (SELECT id FROM hosts), edges => (SELECT src, dst FROM links))",
14607 );
14608 assert!(scopes.contains("hosts"), "nodes source scoped: {scopes:?}");
14609 assert!(scopes.contains("links"), "edges source scoped: {scopes:?}");
14610 }
14611
14612 #[test]
14613 fn graph_collection_tvf_has_no_cache_scope() {
14614 let scopes = scopes_for("SELECT * FROM components(g)");
14617 assert!(scopes.is_empty(), "collection form unscoped: {scopes:?}");
14618 }
14619
14620 #[test]
14621 fn abstract_degree_centrality_counts_undirected_endpoints() {
14622 let nodes = vec!["a".to_string(), "b".to_string(), "c".to_string()];
14623 let edges = vec![
14624 ("a".to_string(), "b".to_string(), 1.0_f32),
14625 ("b".to_string(), "c".to_string(), 1.0_f32),
14626 ];
14627 let degrees = abstract_degree_centrality(&nodes, &edges);
14628 assert_eq!(
14629 degrees,
14630 vec![
14631 ("a".to_string(), 1),
14632 ("b".to_string(), 2),
14633 ("c".to_string(), 1),
14634 ]
14635 );
14636 }
14637}