Skip to main content

reddb_server/runtime/
impl_dml.rs

1//! DML execution: INSERT, UPDATE, DELETE via SQL AST
2//!
3//! Implements `execute_insert`, `execute_update`, and `execute_delete` on
4//! `RedDBRuntime`.  Each method translates the parsed AST into entity-level
5//! operations through the existing `RuntimeEntityPort` trait so that all
6//! cross-cutting concerns (WAL, indexing, replication) are automatically
7//! applied.
8
9use crate::application::entity::{
10    metadata_from_json, AppliedEntityMutation, CreateDocumentInput, CreateEdgeInput,
11    CreateEntityOutput, CreateKvInput, CreateNodeInput, CreateRowInput, CreateRowsBatchInput,
12    CreateVectorInput, DeleteEntityInput, PatchEntityOperation, PatchEntityOperationType,
13    RowUpdateColumnRule, RowUpdateContractPlan,
14};
15use crate::application::ports::{
16    build_row_update_contract_plan, entity_row_fields_snapshot,
17    normalize_row_update_assignment_with_plan, normalize_row_update_value_for_rule,
18    RuntimeEntityPort,
19};
20use crate::application::ttl_payload::has_internal_ttl_metadata;
21use crate::presentation::entity_json::storage_value_to_json;
22use crate::storage::query::ast::{BinOp, Expr, FieldRef, ReturningItem, UpdateTarget};
23use crate::storage::query::sql_lowering::{
24    effective_delete_filter, effective_insert_rows, effective_update_filter, fold_expr_to_value,
25};
26use crate::storage::query::unified::{
27    sys_key_collection, sys_key_created_at, sys_key_kind, sys_key_red_entity_id, sys_key_rid,
28    sys_key_tenant, sys_key_updated_at, UnifiedRecord, UnifiedResult,
29};
30use crate::storage::unified::MetadataValue;
31use crate::storage::Metadata;
32use std::collections::HashMap;
33use std::sync::Arc;
34
35use super::*;
36
37const UPDATE_APPLY_CHUNK_SIZE: usize = 2048;
38const TREE_CHILD_EDGE_LABEL: &str = "TREE_CHILD";
39const TREE_METADATA_PREFIX: &str = "red.tree.";
40
41#[derive(Clone)]
42struct CompiledUpdateAssignment {
43    column: String,
44    expr: Expr,
45    compound_op: Option<BinOp>,
46    metadata_key: Option<&'static str>,
47    row_rule: Option<RowUpdateColumnRule>,
48}
49
50struct CompiledUpdatePlan {
51    static_field_assignments: Vec<(String, Value)>,
52    static_metadata_assignments: Vec<(String, MetadataValue)>,
53    dynamic_assignments: Vec<CompiledUpdateAssignment>,
54    row_contract_plan: Option<RowUpdateContractPlan>,
55    row_modified_columns: Vec<String>,
56    row_touches_unique_columns: bool,
57}
58
59#[derive(Default)]
60struct MaterializedUpdateAssignments {
61    dynamic_field_assignments: Vec<(String, Value)>,
62    dynamic_metadata_assignments: Vec<(String, MetadataValue)>,
63}
64
65impl RedDBRuntime {
66    /// Issue #524 — public read of the in-memory chain tip. Returns `None`
67    /// when the collection is not a chain or has no rows (pre-genesis). On a
68    /// cold cache the first call falls back to a one-time scan so the HTTP
69    /// `GET /collections/:name/chain-tip` handler stays consistent with the
70    /// INSERT path after a restart.
71    pub fn chain_tip_for_collection(
72        &self,
73        collection: &str,
74    ) -> Option<crate::runtime::blockchain_kind::ChainTipFull> {
75        let store = self.inner.db.store();
76        if !crate::runtime::blockchain_kind::is_chain(&store, collection) {
77            return None;
78        }
79        let mut cache = self.inner.chain_tip_cache.lock();
80        if let Some(existing) = cache.get(collection) {
81            return Some(existing.clone());
82        }
83        let scanned = crate::runtime::blockchain_kind::chain_tip_full(&store, collection)?;
84        cache.insert(collection.to_string(), scanned.clone());
85        Some(scanned)
86    }
87
88    /// Issue #525 — walks the chain end-to-end, recomputes each block's hash
89    /// against the stored fields, and returns the verification outcome.  On
90    /// `ok == false` the integrity flag is persisted and the in-memory cache
91    /// is updated so subsequent INSERTs surface `ChainIntegrityBroken`.
92    ///
93    /// Returns `None` when the collection is absent or not a `KIND blockchain`.
94    pub fn verify_chain_for_collection(
95        &self,
96        collection: &str,
97    ) -> Option<crate::runtime::blockchain_kind::VerifyChainOutcome> {
98        let store = self.inner.db.store();
99        let outcome = crate::runtime::blockchain_kind::verify_chain_outcome(&store, collection)?;
100        if !outcome.ok {
101            crate::runtime::blockchain_kind::persist_integrity_flag(&store, collection, true);
102            self.inner
103                .chain_integrity_broken
104                .lock()
105                .insert(collection.to_string(), true);
106        }
107        Some(outcome)
108    }
109
110    /// Issue #525 — admin clears the `ChainIntegrityBroken` flag so the chain
111    /// accepts INSERTs again.  Returns `false` when the collection is not a
112    /// chain.
113    pub fn clear_chain_integrity_flag(&self, collection: &str) -> bool {
114        let store = self.inner.db.store();
115        if !crate::runtime::blockchain_kind::is_chain(&store, collection) {
116            return false;
117        }
118        crate::runtime::blockchain_kind::persist_integrity_flag(&store, collection, false);
119        self.inner
120            .chain_integrity_broken
121            .lock()
122            .insert(collection.to_string(), false);
123        true
124    }
125
126    /// Issue #525 — INSERT-time check.  Combines in-memory cache (fast path)
127    /// with a one-time scan of `red_config` on cold start so the flag survives
128    /// restart.
129    fn is_chain_integrity_broken(&self, collection: &str) -> bool {
130        {
131            let cache = self.inner.chain_integrity_broken.lock();
132            if let Some(v) = cache.get(collection) {
133                return *v;
134            }
135        }
136        let store = self.inner.db.store();
137        let persisted =
138            crate::runtime::blockchain_kind::is_integrity_broken_persisted(&store, collection)
139                .unwrap_or(false);
140        self.inner
141            .chain_integrity_broken
142            .lock()
143            .insert(collection.to_string(), persisted);
144        persisted
145    }
146
147    /// Issue #765 / S6 — lazily hydrate the integrity-tombstone cache from
148    /// `red_config` on first access. Returns `true` when at least one
149    /// tombstone range is present. Subsequent calls observe the cached state
150    /// flag (`1` empty / `2` present) and skip the store scan.
151    fn ensure_integrity_tombstones_loaded(&self) -> bool {
152        use std::sync::atomic::Ordering;
153        match self
154            .inner
155            .integrity_tombstones_state
156            .load(Ordering::Relaxed)
157        {
158            1 => return false,
159            2 => return true,
160            _ => {}
161        }
162        // Cold: load under the cache lock so a concurrent reader cannot
163        // observe a half-populated vector.
164        let mut guard = self.inner.integrity_tombstones.lock();
165        if self
166            .inner
167            .integrity_tombstones_state
168            .load(Ordering::Relaxed)
169            == 0
170        {
171            let ranges = crate::runtime::integrity_tombstone::load_ranges(&self.inner.db.store());
172            let present = !ranges.is_empty();
173            *guard = ranges;
174            self.inner
175                .integrity_tombstones_state
176                .store(if present { 2 } else { 1 }, Ordering::Relaxed);
177        }
178        self.inner
179            .integrity_tombstones_state
180            .load(Ordering::Relaxed)
181            == 2
182    }
183
184    /// Issue #765 / S6 — durably record an integrity tombstone over the
185    /// inclusive RID range `[lo, hi]` of `table` (the committed rows of an
186    /// input stream whose end-to-end SHA-256 digest did not match). The range
187    /// is persisted to `red_config` (survives restart) and folded into the
188    /// in-memory cache so the same process filters it immediately.
189    pub fn record_integrity_tombstone(&self, table: &str, lo: u64, hi: u64) {
190        use std::sync::atomic::Ordering;
191        self.ensure_integrity_tombstones_loaded();
192        let mut guard = self.inner.integrity_tombstones.lock();
193        guard.push(crate::runtime::integrity_tombstone::TombstoneRange::new(
194            table.to_string(),
195            lo,
196            hi,
197        ));
198        crate::runtime::integrity_tombstone::persist_ranges(&self.inner.db.store(), &guard);
199        self.inner
200            .integrity_tombstones_state
201            .store(2, Ordering::Relaxed);
202    }
203
204    /// Issue #765 / S6 — snapshot of the currently-cached tombstone ranges.
205    /// Intended for tests and forensic surfaces; the read path uses
206    /// [`Self::filter_integrity_tombstoned`] which avoids the clone.
207    pub fn integrity_tombstone_ranges(
208        &self,
209    ) -> Vec<crate::runtime::integrity_tombstone::TombstoneRange> {
210        self.ensure_integrity_tombstones_loaded();
211        self.inner.integrity_tombstones.lock().clone()
212    }
213
214    /// Issue #765 / S6 — drop tombstoned rows from a SELECT result in place.
215    /// Fast no-op (one relaxed atomic load) when no tombstone has ever been
216    /// recorded. Clears `pre_serialized_json` when any row is removed so the
217    /// fast-path JSON cannot leak a filtered row back onto the wire.
218    pub fn filter_integrity_tombstoned(&self, result: &mut UnifiedResult) {
219        if !self.ensure_integrity_tombstones_loaded() {
220            return;
221        }
222        let guard = self.inner.integrity_tombstones.lock();
223        if guard.is_empty() {
224            return;
225        }
226        let before = result.records.len();
227        result.records.retain(|record| {
228            !crate::runtime::integrity_tombstone::record_tombstoned(&guard, record)
229        });
230        if result.records.len() != before {
231            result.pre_serialized_json = None;
232        }
233    }
234
235    /// Phase 2.5.4: inject `CURRENT_TENANT()` into an INSERT when the
236    /// target table is tenant-scoped and the user's column list does
237    /// not already name the tenant column.
238    ///
239    /// Returns:
240    /// * `Ok(None)` — no injection needed (non-tenant table, or user
241    ///   supplied the column explicitly). Caller uses the original
242    ///   query unchanged.
243    /// * `Ok(Some(augmented))` — a cloned query with the tenant column
244    ///   + literal value appended to every row.
245    /// * `Err(..)` — table is tenant-scoped but no tenant is bound to
246    ///   the current session. Fails loudly so callers don't produce
247    ///   rows that RLS would then hide on read.
248    fn maybe_inject_tenant_column(&self, query: &InsertQuery) -> RedDBResult<Option<InsertQuery>> {
249        let Some(tenant_col) = self.tenant_column(&query.table) else {
250            return Ok(None);
251        };
252        // User already named the column (literal match) — trust them.
253        if query
254            .columns
255            .iter()
256            .any(|c| c.eq_ignore_ascii_case(&tenant_col))
257        {
258            return Ok(None);
259        }
260
261        // Phase 2 PG parity: dotted-path tenancy. When `tenant_col` is a
262        // nested key like `headers.tenant` we operate on the root
263        // column (`headers`) and set / add the nested path inside its
264        // JSON value. If the user named the root column we mutate in
265        // place; otherwise we create a fresh JSON column for every row.
266        if let Some(dot_pos) = tenant_col.find('.') {
267            let (root, tail) = tenant_col.split_at(dot_pos);
268            let tail = &tail[1..]; // drop leading '.'
269            return self.inject_dotted_tenant(query, root, tail);
270        }
271
272        let Some(tenant_id) = crate::runtime::impl_core::current_tenant() else {
273            return Err(RedDBError::Query(format!(
274                "INSERT into tenant-scoped table '{}' requires an active tenant — \
275                 run SET TENANT '<id>' first or name column '{}' explicitly",
276                query.table, tenant_col
277            )));
278        };
279
280        let mut augmented = query.clone();
281        augmented.columns.push(tenant_col);
282        let lit = Value::text(tenant_id.clone());
283        for row in augmented.values.iter_mut() {
284            row.push(lit.clone());
285        }
286        for row in augmented.value_exprs.iter_mut() {
287            row.push(crate::storage::query::ast::Expr::Literal {
288                value: lit.clone(),
289                span: crate::storage::query::ast::Span::synthetic(),
290            });
291        }
292        Ok(Some(augmented))
293    }
294
295    /// Dotted-path auto-fill — set `root.tail` to `CURRENT_TENANT()` on
296    /// every row. Mirrors `maybe_inject_tenant_column` but mutates
297    /// nested JSON instead of appending a flat column.
298    ///
299    /// Cases:
300    /// * Root column already in the INSERT list → mutate per-row JSON
301    ///   (parse, set path, re-serialize).
302    /// * Root column absent → create a fresh `{tail: tenant}` JSON
303    ///   object and append the root column to the INSERT.
304    fn inject_dotted_tenant(
305        &self,
306        query: &InsertQuery,
307        root: &str,
308        tail: &str,
309    ) -> RedDBResult<Option<InsertQuery>> {
310        let active_tenant = crate::runtime::impl_core::current_tenant();
311        let mut augmented = query.clone();
312        let root_idx = augmented
313            .columns
314            .iter()
315            .position(|c| c.eq_ignore_ascii_case(root));
316
317        if let Some(idx) = root_idx {
318            // User supplied the root column. Per-row: if the dotted
319            // tail is already present we trust the user (admin / bulk
320            // loader scenario); otherwise fill from the active
321            // tenant. An unbound tenant is only an error when some
322            // row actually needs filling.
323            for row in augmented.values.iter_mut() {
324                let Some(slot) = row.get_mut(idx) else {
325                    continue;
326                };
327                if dotted_tail_already_set(slot, tail) {
328                    continue;
329                }
330                let Some(tenant_id) = &active_tenant else {
331                    return Err(RedDBError::Query(format!(
332                        "INSERT into tenant-scoped table '{}' requires an active tenant — \
333                         run SET TENANT '<id>' first or set '{}.{}' explicitly in each row",
334                        query.table, root, tail
335                    )));
336                };
337                *slot = merge_dotted_tenant(slot.clone(), tail, tenant_id)?;
338            }
339            // Expression row is kept in sync by re-wrapping the
340            // mutated literal; the canonical path will re-evaluate
341            // against the same JSON shape.
342            for (row_idx, row) in augmented.value_exprs.iter_mut().enumerate() {
343                if let Some(slot) = row.get_mut(idx) {
344                    let new_value = augmented
345                        .values
346                        .get(row_idx)
347                        .and_then(|v| v.get(idx))
348                        .cloned()
349                        .unwrap_or(Value::Null);
350                    *slot = crate::storage::query::ast::Expr::Literal {
351                        value: new_value,
352                        span: crate::storage::query::ast::Span::synthetic(),
353                    };
354                }
355            }
356        } else {
357            // No root column in the INSERT list — auto-fill needs a
358            // bound tenant to synthesise one. Error loud so we never
359            // create a tenant-less row that RLS would then hide.
360            let Some(tenant_id) = &active_tenant else {
361                return Err(RedDBError::Query(format!(
362                    "INSERT into tenant-scoped table '{}' requires an active tenant — \
363                     run SET TENANT '<id>' first or name path '{}.{}' explicitly",
364                    query.table, root, tail
365                )));
366            };
367            // Create a fresh JSON column with only the tenant path set.
368            augmented.columns.push(root.to_string());
369            let fresh = merge_dotted_tenant(Value::Null, tail, tenant_id)?;
370            for row in augmented.values.iter_mut() {
371                row.push(fresh.clone());
372            }
373            for row in augmented.value_exprs.iter_mut() {
374                row.push(crate::storage::query::ast::Expr::Literal {
375                    value: fresh.clone(),
376                    span: crate::storage::query::ast::Span::synthetic(),
377                });
378            }
379        }
380
381        Ok(Some(augmented))
382    }
383
384    /// Returns `(affected_count, lsns)`. For the txn (xmax-stamp) path,
385    /// `lsns` is empty because events fire at commit time.
386    fn delete_entities_batch(
387        &self,
388        collection: &str,
389        ids: &[EntityId],
390    ) -> RedDBResult<(u64, Vec<u64>)> {
391        if ids.is_empty() {
392            return Ok((0, vec![]));
393        }
394
395        let store = self.db().store();
396        let Some(manager) = store.get_collection(collection) else {
397            return Ok((0, vec![]));
398        };
399
400        let active_xid = self.current_xid();
401        let conn_id = crate::runtime::impl_core::current_connection_id();
402        let mut autocommit_xid = None;
403        let mut tombstoned_ids = Vec::new();
404        let mut tombstoned_entities = Vec::new();
405        let mut physical_delete_ids = Vec::new();
406        let table_row_resolver =
407            crate::runtime::table_row_mvcc_resolver::TableRowMvccReadResolver::current_statement();
408
409        for &id in ids {
410            let Some(mut entity) = manager.get(id) else {
411                continue;
412            };
413            if matches!(entity.data, EntityData::Row(_)) {
414                let previous_xmax = entity.xmax;
415                if matches!(entity.kind, crate::storage::EntityKind::TableRow { .. }) {
416                    if table_row_resolver.resolve_candidate(&entity).is_none() {
417                        continue;
418                    }
419                } else if entity.xmax != 0 {
420                    continue;
421                }
422
423                let xid = match active_xid {
424                    Some(xid) => xid,
425                    None => match autocommit_xid {
426                        Some(xid) => xid,
427                        None => {
428                            let mgr = self.snapshot_manager();
429                            let xid = mgr.begin();
430                            autocommit_xid = Some(xid);
431                            xid
432                        }
433                    },
434                };
435                entity.set_xmax(xid);
436                if manager.update(entity.clone()).is_ok() {
437                    if active_xid.is_some() {
438                        self.record_pending_tombstone(conn_id, collection, id, xid, previous_xmax);
439                    }
440                    tombstoned_entities.push(entity);
441                    tombstoned_ids.push(id);
442                }
443            } else {
444                physical_delete_ids.push(id);
445            }
446        }
447
448        if let Some(xid) = autocommit_xid {
449            self.snapshot_manager().commit(xid);
450        }
451
452        let mut affected = tombstoned_ids.len() as u64;
453        let mut lsns = Vec::with_capacity(tombstoned_ids.len() + physical_delete_ids.len());
454        if active_xid.is_some() {
455            store
456                .persist_entities_to_pager(collection, &tombstoned_entities)
457                .map_err(|err| RedDBError::Internal(err.to_string()))?;
458        } else {
459            store
460                .persist_entities_to_pager(collection, &tombstoned_entities)
461                .map_err(|err| RedDBError::Internal(err.to_string()))?;
462            for id in &tombstoned_ids {
463                store.context_index().remove_entity(*id);
464                let lsn = self.cdc_emit(
465                    crate::replication::cdc::ChangeOperation::Delete,
466                    collection,
467                    id.raw(),
468                    "entity",
469                );
470                lsns.push(lsn);
471            }
472        }
473
474        let deleted_ids = store
475            .delete_batch(collection, &physical_delete_ids)
476            .map_err(|err| RedDBError::Internal(err.to_string()))?;
477        affected += deleted_ids.len() as u64;
478        for id in &deleted_ids {
479            store.context_index().remove_entity(*id);
480            let lsn = self.cdc_emit(
481                crate::replication::cdc::ChangeOperation::Delete,
482                collection,
483                id.raw(),
484                "entity",
485            );
486            lsns.push(lsn);
487        }
488
489        Ok((affected, lsns))
490    }
491
492    /// Flushes context-index updates and CDC for each applied mutation.
493    /// Returns one LSN per entity in the same order as `applied`.
494    fn flush_update_chunk(&self, applied: &[AppliedEntityMutation]) -> RedDBResult<Vec<u64>> {
495        if applied.is_empty() {
496            return Ok(Vec::new());
497        }
498
499        let store = self.db().store();
500        if applied.iter().any(|item| item.context_index_dirty) {
501            store.context_index().index_entities(
502                &applied[0].collection,
503                applied
504                    .iter()
505                    .filter(|item| item.context_index_dirty)
506                    .map(|item| &item.entity),
507            );
508        }
509
510        for item in applied {
511            self.refresh_update_secondary_indexes(item)?;
512        }
513
514        let mut lsns = Vec::with_capacity(applied.len());
515        for item in applied {
516            let lsn = self.cdc_emit_prebuilt(
517                crate::replication::cdc::ChangeOperation::Update,
518                &item.collection,
519                &item.entity,
520                update_cdc_item_kind(self, &item.collection, &item.entity),
521                item.metadata.as_ref(),
522                false,
523            );
524            lsns.push(lsn);
525        }
526        Ok(lsns)
527    }
528
529    fn persist_update_chunk(&self, applied: &[AppliedEntityMutation]) -> RedDBResult<()> {
530        self.persist_applied_entity_mutations(applied)
531    }
532
533    fn refresh_update_secondary_indexes(&self, applied: &AppliedEntityMutation) -> RedDBResult<()> {
534        if applied.pre_mutation_fields.is_empty() {
535            return Ok(());
536        }
537        let post = entity_row_fields_snapshot(&applied.entity);
538        if post.is_empty() {
539            return Ok(());
540        }
541
542        let indexed_cols = self
543            .index_store_ref()
544            .indexed_columns_set(&applied.collection);
545        if indexed_cols.is_empty() {
546            return Ok(());
547        }
548
549        if let Some(old_version) = applied.replaced_entity.as_ref() {
550            let old_index_fields: Vec<(String, crate::storage::schema::Value)> = applied
551                .pre_mutation_fields
552                .iter()
553                .filter(|(col, _)| indexed_cols.contains(col))
554                .cloned()
555                .collect();
556            let new_index_fields: Vec<(String, crate::storage::schema::Value)> = post
557                .iter()
558                .filter(|(col, _)| indexed_cols.contains(col))
559                .cloned()
560                .collect();
561            if !old_index_fields.is_empty() {
562                self.index_store_ref()
563                    .index_entity_delete(&applied.collection, old_version.id, &old_index_fields)
564                    .map_err(crate::RedDBError::Internal)?;
565            }
566            if !new_index_fields.is_empty() {
567                self.index_store_ref()
568                    .index_entity_insert(&applied.collection, applied.entity.id, &new_index_fields)
569                    .map_err(crate::RedDBError::Internal)?;
570            }
571            return Ok(());
572        }
573
574        let damage =
575            crate::application::entity::row_damage_vector(&applied.pre_mutation_fields, &post);
576        if damage
577            .touched_columns()
578            .into_iter()
579            .any(|col| indexed_cols.contains(col))
580        {
581            self.index_store_ref()
582                .index_entity_update(
583                    &applied.collection,
584                    applied.id,
585                    &applied.pre_mutation_fields,
586                    &post,
587                )
588                .map_err(crate::RedDBError::Internal)?;
589        }
590        Ok(())
591    }
592
593    /// Execute INSERT INTO table [entity_type] (cols) VALUES (vals), ...
594    ///
595    /// Each row in `query.values` is zipped with `query.columns` to produce a
596    /// set of named fields, which is then dispatched based on entity_type.
597    pub fn execute_insert(
598        &self,
599        raw_query: &str,
600        query: &InsertQuery,
601    ) -> RedDBResult<RuntimeQueryResult> {
602        self.check_write(crate::runtime::write_gate::WriteKind::Dml)?;
603        // CollectionContract gate (#49): single entry point for the
604        // operator's collection-level write rules. Today this is a
605        // no-op for INSERT (APPEND ONLY permits insert); routing
606        // through the gate now means future contract bits — versioned,
607        // vault-only writes — plug in once instead of per verb.
608        crate::runtime::collection_contract::CollectionContractGate::check(
609            self,
610            &query.table,
611            crate::runtime::collection_contract::MutationKind::Insert,
612        )?;
613        // Phase 2.5.4 table-scoped tenancy: if the target table is
614        // tenant-scoped and the user didn't name the tenant column,
615        // auto-inject it with the thread-local `CURRENT_TENANT()`
616        // value. When the column is named explicitly we trust the
617        // caller (useful for admin tooling that writes on behalf of
618        // specific tenants). An unbound tenant on an implicit-fill
619        // path errors up front rather than producing a row the RLS
620        // policy would silently hide.
621        let augmented_owned;
622        let query = match self.maybe_inject_tenant_column(query)? {
623            Some(new_q) => {
624                augmented_owned = new_q;
625                &augmented_owned
626            }
627            None => query,
628        };
629        self.check_insert_column_policy(query)?;
630        if let Some(ref embed_config) = query.auto_embed {
631            let provider = crate::ai::parse_provider(&embed_config.provider)?;
632            // S3 / #711: planner-level provider gate. Runs before the
633            // local-model preflight and the API-key resolver so neither
634            // side-effect fires when policy denies.
635            crate::runtime::ai::provider_gate::enforce(self, &provider)?;
636            if matches!(provider, crate::ai::AiProvider::Local) {
637                crate::runtime::ai::local_embedding::ensure_local_embedding_available()?;
638                // Issue #682 — pre-flight the local model registry before
639                // any row write. Missing model, uninstalled artifacts,
640                // wrong task, and disabled-feature failures surface as
641                // deterministic errors that leave the target collection
642                // untouched, satisfying the "no partial writes on
643                // embedding failure" criterion for the failure modes
644                // owned by the local provider.
645                let model_name = embed_config.model.as_deref().map(str::trim).unwrap_or("");
646                if model_name.is_empty() {
647                    return Err(RedDBError::Query(
648                        "AUTO EMBED with provider=local requires MODEL '<registered-model-name>'; \
649                         the local provider does not have an implicit default model"
650                            .to_string(),
651                    ));
652                }
653                crate::runtime::ai::local_embedding::preflight_local_embedding(
654                    &self.inner.db,
655                    model_name,
656                )?;
657            }
658        }
659
660        let mut inserted_count: u64 = 0;
661        let effective_rows =
662            effective_insert_rows(query).map_err(|msg| RedDBError::Query(msg.to_string()))?;
663
664        // Ensure the collection exists (auto-create on first insert).
665        let store = self.inner.db.store();
666        let _ = store.get_or_create_collection(&query.table);
667        let declared_model = self
668            .db()
669            .collection_contract_arc(&query.table)
670            .map(|contract| contract.declared_model);
671
672        let mut returning_snapshots: Option<Vec<Vec<(String, Value)>>> =
673            if query.returning.is_some() {
674                Some(Vec::with_capacity(effective_rows.len()))
675            } else {
676                None
677            };
678        let mut returning_result: Option<UnifiedResult> = None;
679
680        if matches!(query.entity_type, InsertEntityType::Row)
681            && !matches!(
682                declared_model,
683                Some(crate::catalog::CollectionModel::TimeSeries)
684            )
685        {
686            // Issue #523 + #524: blockchain collections seal each row into the
687            // chain. When the caller omits the reserved columns, the engine
688            // auto-fills (#523). When the caller supplies any reserved column,
689            // the values are validated against the current tip and a mismatch
690            // surfaces a `BlockchainConflict:` error mapped to HTTP 409 (#524).
691            //
692            // The whole batch runs under a per-collection chain lock so two
693            // concurrent submitters can't both bind to the same prev_hash —
694            // the loser observes the advanced tip and gets 409 with the new
695            // tip so it can retry.
696            let chain_mode = crate::runtime::blockchain_kind::is_chain(&store, &query.table);
697            let _chain_lock_arc: Option<Arc<parking_lot::Mutex<()>>> = if chain_mode {
698                Some(self.inner.rmw_locks.lock_for(&query.table, "__chain__"))
699            } else {
700                None
701            };
702            let _chain_guard = _chain_lock_arc.as_ref().map(|m| m.lock());
703
704            // Issue #525 — refuse new blocks if the chain has been marked
705            // `integrity = broken` until an admin clears the flag.
706            if chain_mode && self.is_chain_integrity_broken(&query.table) {
707                return Err(RedDBError::InvalidOperation(format!(
708                    "ChainIntegrityBroken: collection '{}' is locked until \
709                     POST /collections/{}/clear-integrity-flag is called by an admin",
710                    query.table, query.table
711                )));
712            }
713
714            // Pull the tip from the in-memory cache; fall back to a one-time
715            // scan if the cache hasn't seen this collection yet (cold start
716            // after restart). Cache is updated below as rows are sealed.
717            let mut chain_tip_full: Option<crate::runtime::blockchain_kind::ChainTipFull> =
718                if chain_mode {
719                    let mut cache = self.inner.chain_tip_cache.lock();
720                    if let Some(existing) = cache.get(&query.table) {
721                        Some(existing.clone())
722                    } else if let Some(scanned) =
723                        crate::runtime::blockchain_kind::chain_tip_full(&store, &query.table)
724                    {
725                        cache.insert(query.table.clone(), scanned.clone());
726                        Some(scanned)
727                    } else {
728                        None
729                    }
730                } else {
731                    None
732                };
733
734            let mut rows = Vec::with_capacity(effective_rows.len());
735            for row_values in &effective_rows {
736                if row_values.len() != query.columns.len() {
737                    return Err(RedDBError::Query(format!(
738                        "INSERT column count ({}) does not match value count ({})",
739                        query.columns.len(),
740                        row_values.len()
741                    )));
742                }
743                let (mut fields, mut metadata) =
744                    split_insert_metadata(self, &query.columns, row_values)?;
745                if chain_mode {
746                    use crate::runtime::blockchain_kind::{
747                        chain_conflict_error, COL_BLOCK_HEIGHT, COL_HASH, COL_PREV_HASH,
748                        COL_TIMESTAMP, RESERVED_COLUMNS,
749                    };
750                    let supplied_height = fields
751                        .iter()
752                        .find(|(k, _)| k == COL_BLOCK_HEIGHT)
753                        .map(|(_, v)| v.clone());
754                    let supplied_prev = fields
755                        .iter()
756                        .find(|(k, _)| k == COL_PREV_HASH)
757                        .map(|(_, v)| v.clone());
758                    let supplied_ts = fields
759                        .iter()
760                        .find(|(k, _)| k == COL_TIMESTAMP)
761                        .map(|(_, v)| v.clone());
762                    let supplied_hash = fields.iter().any(|(k, _)| k == COL_HASH);
763                    let user_supplied_any = supplied_height.is_some()
764                        || supplied_prev.is_some()
765                        || supplied_ts.is_some()
766                        || supplied_hash;
767
768                    fields.retain(|(k, _)| !RESERVED_COLUMNS.contains(&k.as_str()));
769                    let payload = crate::runtime::blockchain_kind::canonical_payload(&fields);
770
771                    let (tip_prev_hash, tip_next_height) = match &chain_tip_full {
772                        Some(t) => (t.hash, t.height + 1),
773                        None => (crate::storage::blockchain::GENESIS_PREV_HASH, 0u64),
774                    };
775                    let server_now = crate::runtime::blockchain_kind::now_ms();
776
777                    let (use_prev, use_height, use_ts) = if user_supplied_any {
778                        // Caller is participating in the chain protocol —
779                        // every field must be supplied AND match the tip.
780                        if supplied_hash {
781                            return Err(chain_conflict_error(
782                                tip_next_height.saturating_sub(1),
783                                tip_prev_hash,
784                                chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
785                                server_now,
786                                "hash column is engine-computed and cannot be supplied",
787                            ));
788                        }
789                        let caller_prev = match &supplied_prev {
790                            Some(Value::Blob(b)) if b.len() == 32 => {
791                                let mut a = [0u8; 32];
792                                a.copy_from_slice(b);
793                                a
794                            }
795                            Some(Value::Text(s)) if s.len() == 64 => {
796                                // Accept hex-encoded prev_hash so JSON / SQL
797                                // callers without literal-blob syntax can
798                                // still participate in the chain protocol.
799                                let mut a = [0u8; 32];
800                                let mut ok = true;
801                                for (i, slot) in a.iter_mut().enumerate() {
802                                    let pair = &s.as_ref()[i * 2..i * 2 + 2];
803                                    match u8::from_str_radix(pair, 16) {
804                                        Ok(byte) => *slot = byte,
805                                        Err(_) => {
806                                            ok = false;
807                                            break;
808                                        }
809                                    }
810                                }
811                                if !ok {
812                                    return Err(chain_conflict_error(
813                                        tip_next_height.saturating_sub(1),
814                                        tip_prev_hash,
815                                        chain_tip_full
816                                            .as_ref()
817                                            .map(|t| t.timestamp_ms)
818                                            .unwrap_or(0),
819                                        server_now,
820                                        "prev_hash is not valid hex",
821                                    ));
822                                }
823                                a
824                            }
825                            _ => {
826                                return Err(chain_conflict_error(
827                                    tip_next_height.saturating_sub(1),
828                                    tip_prev_hash,
829                                    chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
830                                    server_now,
831                                    "prev_hash missing or not a 32-byte Blob",
832                                ));
833                            }
834                        };
835                        if caller_prev != tip_prev_hash {
836                            return Err(chain_conflict_error(
837                                tip_next_height.saturating_sub(1),
838                                tip_prev_hash,
839                                chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
840                                server_now,
841                                "prev_hash does not match current tip",
842                            ));
843                        }
844                        let caller_height = match &supplied_height {
845                            Some(Value::UnsignedInteger(v)) => *v,
846                            Some(Value::Integer(v)) if *v >= 0 => *v as u64,
847                            _ => {
848                                return Err(chain_conflict_error(
849                                    tip_next_height.saturating_sub(1),
850                                    tip_prev_hash,
851                                    chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
852                                    server_now,
853                                    "block_height missing or not an unsigned integer",
854                                ));
855                            }
856                        };
857                        if caller_height != tip_next_height {
858                            return Err(chain_conflict_error(
859                                tip_next_height.saturating_sub(1),
860                                tip_prev_hash,
861                                chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
862                                server_now,
863                                "block_height does not match tip+1",
864                            ));
865                        }
866                        let caller_ts = match &supplied_ts {
867                            Some(Value::UnsignedInteger(v)) => *v,
868                            Some(Value::Integer(v)) if *v >= 0 => *v as u64,
869                            _ => {
870                                return Err(chain_conflict_error(
871                                    tip_next_height.saturating_sub(1),
872                                    tip_prev_hash,
873                                    chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
874                                    server_now,
875                                    "timestamp missing or not an unsigned integer",
876                                ));
877                            }
878                        };
879                        let drift = (caller_ts as i128) - (server_now as i128);
880                        if drift.abs() > 60_000 {
881                            return Err(chain_conflict_error(
882                                tip_next_height.saturating_sub(1),
883                                tip_prev_hash,
884                                chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
885                                server_now,
886                                "timestamp outside ±60s of server_time",
887                            ));
888                        }
889                        (caller_prev, caller_height, caller_ts)
890                    } else {
891                        (tip_prev_hash, tip_next_height, server_now)
892                    };
893
894                    let (reserved, new_hash) =
895                        crate::runtime::blockchain_kind::make_block_reserved_fields(
896                            use_prev, use_height, use_ts, &payload,
897                        );
898                    fields.extend(reserved);
899                    chain_tip_full = Some(crate::runtime::blockchain_kind::ChainTipFull {
900                        height: use_height,
901                        hash: new_hash,
902                        timestamp_ms: use_ts,
903                    });
904                }
905                // Issue #522 — signed-writes verification. On collections
906                // created with `SIGNED_BY (...)` the row must carry valid
907                // `signer_pubkey` + `signature` reserved columns. Runs
908                // after chain_mode so canonical payload covers user-supplied
909                // fields only (blockchain reserved columns are filtered by
910                // `canonical_payload`; the two signed-writes reserved
911                // columns are split out before payload computation, then
912                // re-attached for storage). The blockchain + SIGNED_BY
913                // composition is owned by issue #526; we keep #522 to the
914                // non-chain path and let chain_mode collections punt to that
915                // slice rather than half-wire it here.
916                if crate::runtime::signed_writes_kind::is_signed(&store, &query.table) {
917                    let (pk_col, sig_col, residual) =
918                        crate::runtime::signed_writes_kind::split_signature_fields(fields);
919                    let payload = crate::runtime::blockchain_kind::canonical_payload(&residual);
920                    let reg = crate::runtime::signed_writes_kind::registry(&store, &query.table);
921                    crate::runtime::signed_writes_kind::verify_row(
922                        &reg,
923                        pk_col.as_ref().map(|c| c.bytes.as_slice()),
924                        sig_col.as_ref().map(|c| c.bytes.as_slice()),
925                        &payload,
926                    )
927                    .map_err(crate::runtime::signed_writes_kind::map_error)?;
928                    fields = residual;
929                    // Round-trip the reserved columns with the value
930                    // type the caller supplied (Text/hex on the SQL path,
931                    // Blob on the binary path). Keeps SELECT and WHERE
932                    // predicates symmetric with the INSERT shape.
933                    if let Some(col) = pk_col {
934                        fields.push((
935                            crate::storage::signed_writes::RESERVED_SIGNER_PUBKEY_COL.to_string(),
936                            col.raw_value,
937                        ));
938                    }
939                    if let Some(col) = sig_col {
940                        fields.push((
941                            crate::storage::signed_writes::RESERVED_SIGNATURE_COL.to_string(),
942                            col.raw_value,
943                        ));
944                    }
945                }
946                merge_with_clauses(
947                    &mut metadata,
948                    query.ttl_ms,
949                    query.expires_at_ms,
950                    &query.with_metadata,
951                );
952                if let Some(snaps) = returning_snapshots.as_mut() {
953                    snaps.push(fields.clone());
954                }
955                rows.push(CreateRowInput {
956                    collection: query.table.clone(),
957                    fields,
958                    metadata,
959                    node_links: Vec::new(),
960                    vector_links: Vec::new(),
961                });
962            }
963            let outputs = self.create_rows_batch(CreateRowsBatchInput {
964                collection: query.table.clone(),
965                rows,
966                suppress_events: query.suppress_events,
967            })?;
968            inserted_count = outputs.len() as u64;
969
970            // Chain mode: commit the new tip to the in-memory cache only after
971            // the batch persisted successfully. If the batch threw mid-way the
972            // cache stays on the previous tip and the chain lock releases.
973            if chain_mode {
974                if let Some(new_tip) = chain_tip_full.as_ref() {
975                    self.inner
976                        .chain_tip_cache
977                        .lock()
978                        .insert(query.table.clone(), new_tip.clone());
979                }
980            }
981
982            // Hypertable chunk routing: if this table was declared via
983            // CREATE HYPERTABLE, register each row's time-column value
984            // with the registry so chunk metadata (bounds, row counts,
985            // TTL eligibility) stays current. This is what lets
986            // HYPERTABLE_PRUNE_CHUNKS answer real questions + lets the
987            // retention daemon sweep expired chunks without scanning
988            // every row.
989            if let Some(spec) = self.inner.db.hypertables().get(&query.table) {
990                let time_col = &spec.time_column;
991                // Find the column's index in the INSERT column list.
992                if let Some(idx) = query.columns.iter().position(|c| c == time_col) {
993                    for row in &effective_rows {
994                        if let Some(Value::Integer(n) | Value::BigInt(n)) = row.get(idx) {
995                            if *n >= 0 {
996                                let _ = self.inner.db.hypertables().route(&query.table, *n as u64);
997                            }
998                        } else if let Some(Value::UnsignedInteger(n)) = row.get(idx) {
999                            let _ = self.inner.db.hypertables().route(&query.table, *n);
1000                        }
1001                    }
1002                }
1003            }
1004
1005            if let (Some(items), Some(snaps)) =
1006                (query.returning.as_ref(), returning_snapshots.take())
1007            {
1008                let snaps = row_insert_returning_snapshots(&outputs, snaps);
1009                returning_result = Some(build_returning_result(items, &snaps, Some(&outputs)));
1010            }
1011        } else {
1012            // Issue #419: surface the inserted entity id on every INSERT path.
1013            // For Node/Edge/Vector/Document/Kv we now keep each CreateEntityOutput
1014            // so a RETURNING clause (and the unconditional inserted_ids list,
1015            // below) can expose the engine-assigned id. TimeSeries (the row
1016            // branch in this else) still returns the not-supported error
1017            // because create_timeseries_point isn't plumbed through this fn.
1018            let mut entity_outputs: Vec<crate::application::entity::CreateEntityOutput> =
1019                Vec::with_capacity(effective_rows.len());
1020            let mut returning_field_snaps: Vec<Vec<(String, Value)>> = if query.returning.is_some()
1021            {
1022                Vec::with_capacity(effective_rows.len())
1023            } else {
1024                Vec::new()
1025            };
1026            if matches!(
1027                query.entity_type,
1028                InsertEntityType::Node | InsertEntityType::Edge
1029            ) {
1030                enum PreparedGraphInsert {
1031                    Node {
1032                        fields: Vec<(String, Value)>,
1033                        input: CreateNodeInput,
1034                    },
1035                    Edge {
1036                        fields: Vec<(String, Value)>,
1037                        input: CreateEdgeInput,
1038                    },
1039                }
1040
1041                let mut prepared = Vec::with_capacity(effective_rows.len());
1042                for row_values in &effective_rows {
1043                    if row_values.len() != query.columns.len() {
1044                        return Err(RedDBError::Query(format!(
1045                            "INSERT column count ({}) does not match value count ({})",
1046                            query.columns.len(),
1047                            row_values.len()
1048                        )));
1049                    }
1050
1051                    match query.entity_type {
1052                        InsertEntityType::Node => {
1053                            let (node_values, mut metadata) =
1054                                split_insert_metadata(self, &query.columns, row_values)?;
1055                            merge_with_clauses(
1056                                &mut metadata,
1057                                query.ttl_ms,
1058                                query.expires_at_ms,
1059                                &query.with_metadata,
1060                            );
1061                            ensure_non_tree_reserved_metadata_entries(&metadata)?;
1062                            apply_collection_default_ttl_metadata(
1063                                self,
1064                                &query.table,
1065                                &mut metadata,
1066                            );
1067                            let (columns, values) = pairwise_columns_values(&node_values);
1068                            let label = find_column_value_string(&columns, &values, "label")?;
1069                            let node_type =
1070                                find_column_value_opt_string(&columns, &values, "node_type");
1071                            let properties = extract_remaining_properties(
1072                                &columns,
1073                                &values,
1074                                &["label", "node_type"],
1075                            );
1076                            crate::reserved_fields::ensure_no_reserved_public_item_fields(
1077                                properties.iter().map(|(key, _)| key.as_str()),
1078                                &format!("node '{}'", query.table),
1079                            )?;
1080                            prepared.push(PreparedGraphInsert::Node {
1081                                fields: node_values,
1082                                input: CreateNodeInput {
1083                                    collection: query.table.clone(),
1084                                    label,
1085                                    node_type,
1086                                    properties,
1087                                    metadata,
1088                                    embeddings: Vec::new(),
1089                                    table_links: Vec::new(),
1090                                    node_links: Vec::new(),
1091                                },
1092                            });
1093                        }
1094                        InsertEntityType::Edge => {
1095                            let (edge_values, mut metadata) =
1096                                split_insert_metadata(self, &query.columns, row_values)?;
1097                            merge_with_clauses(
1098                                &mut metadata,
1099                                query.ttl_ms,
1100                                query.expires_at_ms,
1101                                &query.with_metadata,
1102                            );
1103                            ensure_non_tree_reserved_metadata_entries(&metadata)?;
1104                            apply_collection_default_ttl_metadata(
1105                                self,
1106                                &query.table,
1107                                &mut metadata,
1108                            );
1109                            let (columns, values) = pairwise_columns_values(&edge_values);
1110                            let label = find_column_value_string(&columns, &values, "label")?;
1111                            ensure_non_tree_structural_edge_label(&label)?;
1112                            let from_id = resolve_edge_endpoint_any(
1113                                self.inner.db.store().as_ref(),
1114                                &query.table,
1115                                &columns,
1116                                &values,
1117                                &["from_rid", "from"],
1118                            )?;
1119                            let to_id = resolve_edge_endpoint_any(
1120                                self.inner.db.store().as_ref(),
1121                                &query.table,
1122                                &columns,
1123                                &values,
1124                                &["to_rid", "to"],
1125                            )?;
1126                            let weight = find_column_value_f32_opt(&columns, &values, "weight");
1127                            let properties = extract_remaining_properties(
1128                                &columns,
1129                                &values,
1130                                &["label", "from_rid", "to_rid", "from", "to", "weight"],
1131                            );
1132                            crate::reserved_fields::ensure_no_reserved_public_item_fields(
1133                                properties.iter().map(|(key, _)| key.as_str()),
1134                                &format!("edge '{}'", query.table),
1135                            )?;
1136                            prepared.push(PreparedGraphInsert::Edge {
1137                                fields: edge_values,
1138                                input: CreateEdgeInput {
1139                                    collection: query.table.clone(),
1140                                    label,
1141                                    from: EntityId::new(from_id),
1142                                    to: EntityId::new(to_id),
1143                                    weight,
1144                                    properties,
1145                                    metadata,
1146                                },
1147                            });
1148                        }
1149                        _ => unreachable!("prepared graph insert only handles NODE and EDGE"),
1150                    }
1151                }
1152
1153                ensure_graph_insert_contract(self, &query.table)?;
1154                let mut batch = self.inner.db.batch();
1155                for item in prepared {
1156                    match item {
1157                        PreparedGraphInsert::Node { fields, input } => {
1158                            if query.returning.is_some() {
1159                                returning_field_snaps.push(fields);
1160                            }
1161                            let node_type = input.node_type.unwrap_or_else(|| input.label.clone());
1162                            batch = batch.add_node_with_type(
1163                                input.collection,
1164                                input.label,
1165                                node_type,
1166                                input.properties.into_iter().collect(),
1167                                input.metadata.into_iter().collect(),
1168                            );
1169                        }
1170                        PreparedGraphInsert::Edge { fields, input } => {
1171                            if query.returning.is_some() {
1172                                returning_field_snaps.push(fields);
1173                            }
1174                            batch = batch.add_edge(
1175                                input.collection,
1176                                input.label,
1177                                input.from,
1178                                input.to,
1179                                input.weight.unwrap_or(1.0),
1180                                input.properties.into_iter().collect(),
1181                                input.metadata.into_iter().collect(),
1182                            );
1183                        }
1184                    }
1185                }
1186                let batch_result = batch
1187                    .execute()
1188                    .map_err(|err| RedDBError::Internal(format!("{err:?}")))?;
1189                let (ids, entity_kind) = match query.entity_type {
1190                    InsertEntityType::Node => (batch_result.nodes, "graph_node"),
1191                    InsertEntityType::Edge => (batch_result.edges, "graph_edge"),
1192                    _ => unreachable!("prepared graph insert only handles NODE and EDGE"),
1193                };
1194                for id in &ids {
1195                    self.stamp_xmin_if_in_txn(&query.table, *id);
1196                }
1197                if query.returning.is_some() {
1198                    returning_field_snaps = graph_insert_returning_snapshots(
1199                        self.inner.db.store().as_ref(),
1200                        &query.table,
1201                        &ids,
1202                    );
1203                }
1204                self.cdc_emit_insert_batch_no_cache_invalidate(&query.table, &ids, entity_kind);
1205                let store = self.inner.db.store();
1206                entity_outputs.extend(ids.iter().map(|id| {
1207                    crate::application::entity::CreateEntityOutput {
1208                        id: *id,
1209                        entity: store.get(&query.table, *id),
1210                    }
1211                }));
1212                inserted_count = ids.len() as u64;
1213            } else {
1214                for row_values in &effective_rows {
1215                    if row_values.len() != query.columns.len() {
1216                        return Err(RedDBError::Query(format!(
1217                            "INSERT column count ({}) does not match value count ({})",
1218                            query.columns.len(),
1219                            row_values.len()
1220                        )));
1221                    }
1222
1223                    match query.entity_type {
1224                        InsertEntityType::Row => {
1225                            if query.returning.is_some() {
1226                                return Err(RedDBError::Query(
1227                                "RETURNING is not yet supported for this INSERT path (TimeSeries)"
1228                                    .to_string(),
1229                            ));
1230                            }
1231                            let (fields, mut metadata) =
1232                                split_insert_metadata(self, &query.columns, row_values)?;
1233                            merge_with_clauses(
1234                                &mut metadata,
1235                                query.ttl_ms,
1236                                query.expires_at_ms,
1237                                &query.with_metadata,
1238                            );
1239                            self.insert_timeseries_point(&query.table, fields, metadata)?;
1240                        }
1241                        InsertEntityType::Node | InsertEntityType::Edge => {
1242                            unreachable!("NODE and EDGE are handled by the prepared graph path")
1243                        }
1244                        InsertEntityType::Vector => {
1245                            let (vector_values, mut metadata) =
1246                                split_insert_metadata(self, &query.columns, row_values)?;
1247                            merge_with_clauses(
1248                                &mut metadata,
1249                                query.ttl_ms,
1250                                query.expires_at_ms,
1251                                &query.with_metadata,
1252                            );
1253                            let (columns, values) = pairwise_columns_values(&vector_values);
1254                            let dense = find_column_value_vec_f32_any(
1255                                &columns,
1256                                &values,
1257                                &["dense", "embedding"],
1258                            )?;
1259                            merge_vector_metadata_column(&mut metadata, &columns, &values)?;
1260                            let content =
1261                                find_column_value_opt_string(&columns, &values, "content");
1262                            if query.returning.is_some() {
1263                                returning_field_snaps.push(vector_values.clone());
1264                            }
1265                            let input = CreateVectorInput {
1266                                collection: query.table.clone(),
1267                                dense,
1268                                content,
1269                                metadata,
1270                                link_row: None,
1271                                link_node: None,
1272                            };
1273                            entity_outputs.push(self.create_vector(input)?);
1274                        }
1275                        InsertEntityType::Document => {
1276                            let (document_values, mut metadata) =
1277                                split_insert_metadata(self, &query.columns, row_values)?;
1278                            merge_with_clauses(
1279                                &mut metadata,
1280                                query.ttl_ms,
1281                                query.expires_at_ms,
1282                                &query.with_metadata,
1283                            );
1284                            let (columns, values) = pairwise_columns_values(&document_values);
1285                            let body = find_document_body_json(&columns, &values)?;
1286                            let input = CreateDocumentInput {
1287                                collection: query.table.clone(),
1288                                body,
1289                                metadata,
1290                                node_links: Vec::new(),
1291                                vector_links: Vec::new(),
1292                            };
1293                            let output = self.create_document(input)?;
1294                            if query.returning.is_some() {
1295                                let fields = output
1296                                    .entity
1297                                    .as_ref()
1298                                    .map(entity_row_fields_snapshot)
1299                                    .filter(|fields| !fields.is_empty())
1300                                    .unwrap_or(document_values);
1301                                returning_field_snaps.push(fields);
1302                            }
1303                            entity_outputs.push(output);
1304                        }
1305                        InsertEntityType::Kv => {
1306                            let (kv_values, mut metadata) =
1307                                split_insert_metadata(self, &query.columns, row_values)?;
1308                            merge_with_clauses(
1309                                &mut metadata,
1310                                query.ttl_ms,
1311                                query.expires_at_ms,
1312                                &query.with_metadata,
1313                            );
1314                            let (columns, values) = pairwise_columns_values(&kv_values);
1315                            let key = find_column_value_string(&columns, &values, "key")?;
1316                            let value = find_column_value(&columns, &values, "value")?;
1317                            if query.returning.is_some() {
1318                                returning_field_snaps.push(kv_values.clone());
1319                            }
1320                            let input = CreateKvInput {
1321                                collection: query.table.clone(),
1322                                key,
1323                                value,
1324                                metadata,
1325                            };
1326                            entity_outputs.push(self.create_kv(input)?);
1327                        }
1328                    }
1329
1330                    inserted_count += 1;
1331                }
1332            }
1333
1334            if let Some(items) = query.returning.as_ref() {
1335                if !entity_outputs.is_empty() {
1336                    returning_result = Some(build_returning_result(
1337                        items,
1338                        &returning_field_snaps,
1339                        Some(&entity_outputs),
1340                    ));
1341                }
1342            }
1343        }
1344
1345        // Auto-embed pipeline: batch-embed fields across all inserted rows via AiBatchClient.
1346        if let Some(ref embed_config) = query.auto_embed {
1347            let store = self.inner.db.store();
1348            let provider = crate::ai::parse_provider(&embed_config.provider)?;
1349            let is_local_provider = matches!(provider, crate::ai::AiProvider::Local);
1350            // Local provider runs in-process — no API key path applies.
1351            // The pre-flight above already required `MODEL '<name>'`
1352            // for the local case, so the unwrap_or default below only
1353            // ever fires for OpenAI-compatible providers.
1354            let api_key = if is_local_provider {
1355                String::new()
1356            } else {
1357                crate::ai::resolve_api_key_from_runtime(&provider, None, self)?
1358            };
1359            let model = embed_config.model.clone().unwrap_or_else(|| {
1360                std::env::var("REDDB_OPENAI_EMBEDDING_MODEL")
1361                    .ok()
1362                    .unwrap_or_else(|| crate::ai::DEFAULT_OPENAI_EMBEDDING_MODEL.to_string())
1363            });
1364
1365            // Collect the just-inserted rows (most-recently appended, reversed back to insert order).
1366            let manager = store
1367                .get_collection(&query.table)
1368                .ok_or_else(|| RedDBError::NotFound(query.table.clone()))?;
1369            let entities = manager.query_all(|_| true);
1370            let recent: Vec<_> = entities
1371                .into_iter()
1372                .rev()
1373                .take(effective_rows.len())
1374                .collect();
1375
1376            // Collector phase: (entity_index, combined_text) for rows that have non-empty fields.
1377            let entity_combos: Vec<(usize, String)> = recent
1378                .iter()
1379                .enumerate()
1380                .filter_map(|(i, entity)| {
1381                    if let EntityData::Row(ref row) = entity.data {
1382                        if let Some(ref named) = row.named {
1383                            let texts: Vec<String> = embed_config
1384                                .fields
1385                                .iter()
1386                                .filter_map(|field| match named.get(field) {
1387                                    Some(Value::Text(t)) if !t.is_empty() => Some(t.to_string()),
1388                                    _ => None,
1389                                })
1390                                .collect();
1391                            if !texts.is_empty() {
1392                                return Some((i, texts.join(" ")));
1393                            }
1394                        }
1395                    }
1396                    None
1397                })
1398                .collect();
1399
1400            if !entity_combos.is_empty() {
1401                // Batch phase: single provider round-trip for all rows.
1402                let batch_texts: Vec<String> =
1403                    entity_combos.iter().map(|(_, t)| t.clone()).collect();
1404
1405                // Issue #682 — when the provider is `local`, bypass
1406                // AiBatchClient (which is HTTP-only) and dispatch
1407                // directly through the in-process local embedding
1408                // backend. All texts go in one call, mirroring the
1409                // single-round-trip shape of the remote path. The
1410                // local backend does not perform intra-batch dedup —
1411                // each input position gets its own row in the output
1412                // — which keeps the per-row "create_vector" loop
1413                // below correct without additional fan-out logic.
1414                let embeddings = if is_local_provider {
1415                    let response = crate::runtime::ai::local_embedding::embed_local_with_db(
1416                        &self.inner.db,
1417                        &model,
1418                        batch_texts,
1419                    )?;
1420                    response.embeddings
1421                } else {
1422                    let batch_client =
1423                        crate::runtime::ai::batch_client::AiBatchClient::from_runtime(self);
1424
1425                    match tokio::runtime::Handle::try_current() {
1426                        Ok(handle) => tokio::task::block_in_place(|| {
1427                            handle.block_on(batch_client.embed_batch(
1428                                &provider,
1429                                &model,
1430                                &api_key,
1431                                batch_texts,
1432                            ))
1433                        }),
1434                        Err(_) => {
1435                            return Err(RedDBError::Query(
1436                                "AUTO EMBED requires a Tokio runtime context".to_string(),
1437                            ));
1438                        }
1439                    }
1440                    .map_err(|e| RedDBError::Query(e.to_string()))?
1441                };
1442
1443                // Distribute phase: persist one vector per non-empty embedding.
1444                for ((_, combined), dense) in entity_combos.iter().zip(embeddings) {
1445                    if dense.is_empty() {
1446                        continue;
1447                    }
1448                    self.create_vector(CreateVectorInput {
1449                        collection: query.table.clone(),
1450                        dense,
1451                        content: Some(combined.clone()),
1452                        metadata: Vec::new(),
1453                        link_row: None,
1454                        link_node: None,
1455                    })?;
1456                }
1457            }
1458        }
1459
1460        if inserted_count > 0 {
1461            self.note_table_write(&query.table);
1462        }
1463
1464        let mut result = RuntimeQueryResult::dml_result(
1465            raw_query.to_string(),
1466            inserted_count,
1467            "insert",
1468            "runtime-dml",
1469        );
1470        if let Some(returning) = returning_result {
1471            result.result = returning;
1472        }
1473        Ok(result)
1474    }
1475
1476    fn check_insert_column_policy(&self, query: &InsertQuery) -> RedDBResult<()> {
1477        let Some(auth_store) = self.inner.auth_store.read().clone() else {
1478            return Ok(());
1479        };
1480        if !auth_store.iam_authorization_enabled() {
1481            return Ok(());
1482        }
1483        let Some((username, role)) = crate::runtime::impl_core::current_auth_identity() else {
1484            return Ok(());
1485        };
1486
1487        let tenant = crate::runtime::impl_core::current_tenant();
1488        let principal = crate::auth::UserId::from_parts(tenant.as_deref(), &username);
1489        let request = crate::auth::ColumnAccessRequest {
1490            action: "insert".to_string(),
1491            schema: None,
1492            table: query.table.clone(),
1493            columns: query.columns.clone(),
1494        };
1495        let ctx = crate::auth::policies::EvalContext {
1496            principal_tenant: tenant.clone(),
1497            current_tenant: tenant,
1498            peer_ip: None,
1499            mfa_present: false,
1500            now_ms: crate::auth::now_ms(),
1501            principal_is_admin_role: role == crate::auth::Role::Admin,
1502            principal_is_platform_scoped: principal.tenant.is_none(),
1503        };
1504
1505        let outcome = auth_store.check_column_projection_authz(&principal, &request, &ctx);
1506        let table_allowed = matches!(
1507            outcome.table_decision,
1508            crate::auth::policies::Decision::Allow { .. }
1509                | crate::auth::policies::Decision::AdminBypass
1510        );
1511        if !table_allowed {
1512            return Err(RedDBError::Query(format!(
1513                "principal=`{username}` action=`insert` resource=`{}:{}` denied by IAM policy",
1514                outcome.table_resource.kind, outcome.table_resource.name
1515            )));
1516        }
1517        if let Some(denied) = outcome.first_denied_column() {
1518            return Err(RedDBError::Query(format!(
1519                "principal=`{username}` action=`insert` resource=`{}:{}` denied by IAM policy",
1520                denied.resource.kind, denied.resource.name
1521            )));
1522        }
1523
1524        Ok(())
1525    }
1526
1527    pub(crate) fn insert_timeseries_point(
1528        &self,
1529        collection: &str,
1530        fields: Vec<(String, Value)>,
1531        mut metadata: Vec<(String, MetadataValue)>,
1532    ) -> RedDBResult<EntityId> {
1533        apply_collection_default_ttl_metadata(self, collection, &mut metadata);
1534
1535        let (columns, values) = pairwise_columns_values(&fields);
1536        validate_timeseries_insert_columns(&columns)?;
1537
1538        // Issue #577 — AnalyticsSchemaRegistry hook. If the row carries
1539        // an `event_name` whose schema is registered, validate the
1540        // `payload` JSON against it BEFORE any write side-effect. On
1541        // failure we return a typed error and the row is not
1542        // persisted. When no schema is registered for the event name
1543        // (or no `event_name` column is supplied at all) we fall
1544        // through to the normal write path for back-compat with
1545        // existing timeseries rows.
1546        let event_name_opt = find_column_value_opt_string(&columns, &values, "event_name");
1547        let payload_opt = find_column_value_opt_string(&columns, &values, "payload");
1548        if let Some(event_name) = event_name_opt.as_deref() {
1549            let store_for_schema = self.inner.db.store();
1550            if super::analytics_schema_registry::latest(store_for_schema.as_ref(), event_name)
1551                .is_some()
1552            {
1553                let payload_json = payload_opt.as_deref().unwrap_or("{}");
1554                super::analytics_schema_registry::validate(
1555                    store_for_schema.as_ref(),
1556                    event_name,
1557                    payload_json,
1558                )
1559                .map_err(super::analytics_schema_registry::validation_error_to_reddb)?;
1560            }
1561        }
1562
1563        // `metric` is required by the existing timeseries write path;
1564        // when an analytics-style row supplies `event_name` but not
1565        // `metric`, fall back to the event name so the storage path
1566        // still has a non-empty metric tag.
1567        let metric = match find_column_value_opt_string(&columns, &values, "metric") {
1568            Some(m) => m,
1569            None => event_name_opt.clone().ok_or_else(|| {
1570                RedDBError::Query(
1571                    "timeseries INSERT requires either `metric` or `event_name`".to_string(),
1572                )
1573            })?,
1574        };
1575        // `value` is optional for analytics-event rows (which are
1576        // semantically counts of 1); default to 1.0 when missing so
1577        // analytics inserts don't have to fabricate a metric value.
1578        let value = match find_column_value_opt_string(&columns, &values, "value") {
1579            Some(s) => s.parse::<f64>().unwrap_or(1.0),
1580            None => columns
1581                .iter()
1582                .position(|c| c.eq_ignore_ascii_case("value"))
1583                .and_then(|i| match &values[i] {
1584                    Value::Float(f) => Some(*f),
1585                    Value::Integer(n) | Value::BigInt(n) => Some(*n as f64),
1586                    Value::UnsignedInteger(n) => Some(*n as f64),
1587                    _ => None,
1588                })
1589                .unwrap_or(1.0),
1590        };
1591        let timestamp_ns =
1592            find_timeseries_timestamp_ns(&columns, &values)?.unwrap_or_else(current_unix_ns);
1593        let mut tags = find_timeseries_tags(&columns, &values)?;
1594        if let Some(ref name) = event_name_opt {
1595            tags.entry("event_name".to_string())
1596                .or_insert_with(|| name.clone());
1597        }
1598        if let Some(ref payload) = payload_opt {
1599            tags.entry("payload".to_string())
1600                .or_insert_with(|| payload.clone());
1601        }
1602
1603        let mut entity = UnifiedEntity::new(
1604            EntityId::new(0),
1605            EntityKind::TimeSeriesPoint(Box::new(crate::storage::TimeSeriesPointKind {
1606                series: collection.to_string(),
1607                metric: metric.clone(),
1608            })),
1609            EntityData::TimeSeries(crate::storage::TimeSeriesData {
1610                metric,
1611                timestamp_ns,
1612                value,
1613                tags,
1614            }),
1615        );
1616        // MVCC #30: stamp xmin with the active tx xid (inside a tx)
1617        // or an autocommit xid (allocated and committed up-front so
1618        // future snapshots see the row as soon as it lands).
1619        let writer_xid = match self.current_xid() {
1620            Some(xid) => xid,
1621            None => {
1622                let mgr = self.snapshot_manager();
1623                let xid = mgr.begin();
1624                mgr.commit(xid);
1625                xid
1626            }
1627        };
1628        entity.set_xmin(writer_xid);
1629
1630        let store = self.inner.db.store();
1631        let id = store
1632            .insert_auto(collection, entity)
1633            .map_err(|err| RedDBError::Internal(err.to_string()))?;
1634
1635        if !metadata.is_empty() {
1636            let _ = store.set_metadata(
1637                collection,
1638                id,
1639                Metadata::with_fields(metadata.into_iter().collect()),
1640            );
1641        }
1642
1643        self.cdc_emit(
1644            crate::replication::cdc::ChangeOperation::Insert,
1645            collection,
1646            id.raw(),
1647            "timeseries",
1648        );
1649
1650        Ok(id)
1651    }
1652
1653    /// Execute UPDATE table SET col=val, ... WHERE filter
1654    ///
1655    /// Scans the target collection, evaluates the WHERE filter against each
1656    /// record, and patches every matching entity.
1657    pub fn execute_update(
1658        &self,
1659        raw_query: &str,
1660        query: &UpdateQuery,
1661    ) -> RedDBResult<RuntimeQueryResult> {
1662        self.check_write(crate::runtime::write_gate::WriteKind::Dml)?;
1663        // Issue #523 — blockchain collections are immutable. Reject before
1664        // RLS / RETURNING work so the operator sees a clean 409-mapped
1665        // error instead of a partially-applied mutation surface.
1666        if crate::runtime::blockchain_kind::is_chain(self.inner.db.store().as_ref(), &query.table) {
1667            return Err(RedDBError::InvalidOperation(format!(
1668                "BlockchainCollectionImmutable: UPDATE not allowed on '{}'",
1669                query.table
1670            )));
1671        }
1672        // CollectionContract gate (#50): runs the APPEND ONLY guard
1673        // (and any future contract bits) before RLS / RETURNING work
1674        // so the operator's immutability declaration is honoured
1675        // uniformly and the error message points at the DDL rather
1676        // than at a downstream symptom.
1677        crate::runtime::collection_contract::CollectionContractGate::check(
1678            self,
1679            &query.table,
1680            crate::runtime::collection_contract::MutationKind::Update,
1681        )?;
1682        ensure_update_target_contract(self, &query.table, query.target)?;
1683        ensure_graph_identity_update_target_allowed(query)?;
1684
1685        // Apply RLS augmentation first so every downstream path — plain
1686        // UPDATE, UPDATE...RETURNING, the inner scan — observes the
1687        // same policy-filtered target set. This prevents RETURNING
1688        // from ever exposing rows the UPDATE policy would have
1689        // denied.
1690        let rls_gated = crate::runtime::impl_core::rls_is_enabled(self, &query.table);
1691        let augmented_query: UpdateQuery;
1692        let effective_query: &UpdateQuery = if rls_gated {
1693            let rls_filter = crate::runtime::impl_core::rls_policy_filter(
1694                self,
1695                &query.table,
1696                crate::storage::query::ast::PolicyAction::Update,
1697            );
1698            let Some(policy) = rls_filter else {
1699                // No admitting policy: zero rows affected, empty
1700                // RETURNING (never leak rows the caller can't touch).
1701                let mut response = RuntimeQueryResult::dml_result(
1702                    raw_query.to_string(),
1703                    0,
1704                    "update",
1705                    "runtime-dml-rls",
1706                );
1707                if let Some(items) = query.returning.clone() {
1708                    response.result = build_returning_result(&items, &[], None);
1709                }
1710                return Ok(response);
1711            };
1712            let mut augmented = query.clone();
1713            augmented.filter = Some(match augmented.filter.take() {
1714                Some(existing) => {
1715                    crate::storage::query::ast::Filter::And(Box::new(existing), Box::new(policy))
1716                }
1717                None => policy,
1718            });
1719            augmented_query = augmented;
1720            &augmented_query
1721        } else {
1722            query
1723        };
1724
1725        // RETURNING wraps the inner executor and uses the touched-id
1726        // list the inner reports so the post-image reflects exactly
1727        // the rows the UPDATE actually mutated (not whatever a
1728        // separate SELECT might have observed).
1729        if let Some(items) = effective_query.returning.clone() {
1730            let mut inner_query = effective_query.clone();
1731            inner_query.returning = None;
1732            let (mut response, touched_ids) =
1733                self.execute_update_inner_tracked(raw_query, &inner_query)?;
1734
1735            let snapshots = if matches!(
1736                effective_query.target,
1737                UpdateTarget::Nodes | UpdateTarget::Edges
1738            ) {
1739                graph_update_returning_snapshots(self, &effective_query.table, &touched_ids)
1740            } else {
1741                super::dml_target_scan::DmlTargetScan::new(self, &effective_query.table, None, None)
1742                    .row_snapshots(&touched_ids)
1743            };
1744
1745            response.result = build_returning_result(&items, &snapshots, None);
1746            response.engine = "runtime-dml-returning";
1747            return Ok(response);
1748        }
1749
1750        self.execute_update_inner(raw_query, effective_query)
1751    }
1752
1753    /// Back-compat shim: the older entry point ignored touched ids.
1754    fn execute_update_inner(
1755        &self,
1756        raw_query: &str,
1757        query: &UpdateQuery,
1758    ) -> RedDBResult<RuntimeQueryResult> {
1759        self.execute_update_inner_tracked(raw_query, query)
1760            .map(|(res, _)| res)
1761    }
1762
1763    fn execute_update_inner_tracked(
1764        &self,
1765        raw_query: &str,
1766        query: &UpdateQuery,
1767    ) -> RedDBResult<(RuntimeQueryResult, Vec<EntityId>)> {
1768        let store = self.inner.db.store();
1769        let effective_filter = effective_update_filter(query);
1770        let compiled_plan = self.compile_update_plan(query)?;
1771        let needs_rmw_lock = update_needs_rmw_lock(query);
1772        let table_rmw_lock = if needs_rmw_lock {
1773            Some(
1774                self.inner
1775                    .rmw_locks
1776                    .lock_for(&query.table, "__table_rmw_update__"),
1777            )
1778        } else {
1779            None
1780        };
1781        let _table_rmw_guard = table_rmw_lock.as_ref().map(|lock| lock.lock());
1782        let mut touched_ids: Vec<EntityId> = Vec::new();
1783        let limit_cap = query.limit.map(|l| l as usize);
1784        let manager = store
1785            .get_collection(&query.table)
1786            .ok_or_else(|| RedDBError::NotFound(query.table.clone()))?;
1787        let scan_limit = if query.order_by.is_empty() {
1788            limit_cap
1789        } else {
1790            None
1791        };
1792        let mut target_scan = super::dml_target_scan::DmlTargetScan::with_update_target(
1793            self,
1794            &query.table,
1795            effective_filter.as_ref(),
1796            scan_limit,
1797            query.target,
1798        );
1799        if needs_rmw_lock {
1800            target_scan = target_scan.with_live_table_rows();
1801        }
1802        let ids_to_update = target_scan.find_target_ids()?;
1803        let ids_to_update = if query.order_by.is_empty() {
1804            ids_to_update
1805        } else {
1806            ordered_update_target_ids(&manager, &ids_to_update, &query.order_by, limit_cap)
1807        };
1808
1809        if needs_rmw_lock {
1810            return self.execute_update_inner_tracked_locked(
1811                raw_query,
1812                query,
1813                &compiled_plan,
1814                &ids_to_update,
1815                effective_filter.as_ref(),
1816            );
1817        }
1818
1819        let mut affected: u64 = 0;
1820        for chunk in ids_to_update.chunks(UPDATE_APPLY_CHUNK_SIZE) {
1821            let mut applied_chunk = Vec::with_capacity(chunk.len());
1822            for entity in manager.get_many(chunk).into_iter().flatten() {
1823                let assignments =
1824                    self.materialize_update_assignments_for_entity(query, &entity, &compiled_plan)?;
1825                let applied = self.apply_materialized_update_for_entity(
1826                    query.table.clone(),
1827                    entity,
1828                    &compiled_plan,
1829                    assignments,
1830                )?;
1831                touched_ids.push(applied.id);
1832                applied_chunk.push(applied);
1833            }
1834            self.persist_update_chunk(&applied_chunk)?;
1835            affected += applied_chunk.len() as u64;
1836            let lsns = self.flush_update_chunk(&applied_chunk)?;
1837            if !query.suppress_events {
1838                self.emit_update_events_for_collection(&query.table, &applied_chunk, &lsns)?;
1839            }
1840        }
1841
1842        if affected > 0 {
1843            self.note_table_write(&query.table);
1844        }
1845
1846        Ok((
1847            RuntimeQueryResult::dml_result(
1848                raw_query.to_string(),
1849                affected,
1850                "update",
1851                "runtime-dml",
1852            ),
1853            touched_ids,
1854        ))
1855    }
1856
1857    fn execute_update_inner_tracked_locked(
1858        &self,
1859        raw_query: &str,
1860        query: &UpdateQuery,
1861        compiled_plan: &CompiledUpdatePlan,
1862        ids_to_update: &[EntityId],
1863        effective_filter: Option<&Filter>,
1864    ) -> RedDBResult<(RuntimeQueryResult, Vec<EntityId>)> {
1865        let store = self.inner.db.store();
1866        let mut touched_ids = Vec::new();
1867        let mut lock_entries = Vec::new();
1868
1869        for id in ids_to_update {
1870            let Some(candidate) = store.get(&query.table, *id) else {
1871                continue;
1872            };
1873            let logical_id = candidate.logical_id();
1874            let lock_key = format!("row:{}", logical_id.raw());
1875            let rmw_lock = self.inner.rmw_locks.lock_for(&query.table, &lock_key);
1876            lock_entries.push((lock_key, logical_id, rmw_lock));
1877        }
1878
1879        lock_entries.sort_by(|left, right| left.0.cmp(&right.0));
1880        lock_entries.dedup_by(|left, right| left.0 == right.0);
1881        let _rmw_guards: Vec<_> = lock_entries.iter().map(|entry| entry.2.lock()).collect();
1882
1883        let mut applied_chunk = Vec::new();
1884        for (_, logical_id, _) in &lock_entries {
1885            let Some(entity) = resolve_update_entity_by_logical_id(self, &query.table, *logical_id)
1886            else {
1887                continue;
1888            };
1889            if let Some(filter) = effective_filter {
1890                if !crate::runtime::query_exec::evaluate_entity_filter_with_db(
1891                    Some(self.inner.db.as_ref()),
1892                    &entity,
1893                    filter,
1894                    &query.table,
1895                    &query.table,
1896                ) {
1897                    continue;
1898                }
1899            }
1900
1901            let assignments =
1902                self.materialize_update_assignments_for_entity(query, &entity, compiled_plan)?;
1903            let applied = self.apply_materialized_update_for_entity(
1904                query.table.clone(),
1905                entity,
1906                compiled_plan,
1907                assignments,
1908            )?;
1909            touched_ids.push(applied.id);
1910            applied_chunk.push(applied);
1911        }
1912
1913        let affected = applied_chunk.len() as u64;
1914        if !applied_chunk.is_empty() {
1915            self.persist_update_chunk(&applied_chunk)?;
1916            let lsns = self.flush_update_chunk(&applied_chunk)?;
1917            if !query.suppress_events {
1918                self.emit_update_events_for_collection(&query.table, &applied_chunk, &lsns)?;
1919            }
1920        }
1921
1922        if affected > 0 {
1923            self.note_table_write(&query.table);
1924        }
1925
1926        Ok((
1927            RuntimeQueryResult::dml_result(
1928                raw_query.to_string(),
1929                affected,
1930                "update",
1931                "runtime-dml",
1932            ),
1933            touched_ids,
1934        ))
1935    }
1936
1937    fn compile_update_plan(&self, query: &UpdateQuery) -> RedDBResult<CompiledUpdatePlan> {
1938        let mut static_field_assignments = Vec::new();
1939        let mut static_metadata_assignments = Vec::new();
1940        let mut dynamic_assignments = Vec::new();
1941        let row_contract_plan = build_row_update_contract_plan(&self.db(), &query.table)?;
1942        let mut row_modified_columns = Vec::new();
1943
1944        for (idx, (column, expr)) in query.assignment_exprs.iter().enumerate() {
1945            let compound_op = query.compound_assignment_ops.get(idx).copied().flatten();
1946            let metadata_key = resolve_sql_ttl_metadata_key(column);
1947            if compound_op.is_some() && metadata_key.is_some() {
1948                return Err(RedDBError::Query(format!(
1949                    "compound assignment is only supported for row fields: {column}"
1950                )));
1951            }
1952            if compound_op.is_none() {
1953                if let Ok(value) = fold_expr_to_value(expr.clone()) {
1954                    if let Some(metadata_key) = metadata_key {
1955                        let raw_value = sql_literal_to_metadata_value(metadata_key, &value)?;
1956                        let (canonical_key, canonical_value) =
1957                            canonicalize_sql_ttl_metadata(metadata_key, raw_value);
1958                        static_metadata_assignments
1959                            .push((canonical_key.to_string(), canonical_value));
1960                    } else {
1961                        let value = self.resolve_crypto_sentinel(value)?;
1962                        static_field_assignments.push((
1963                            column.clone(),
1964                            normalize_row_update_assignment_with_plan(
1965                                &query.table,
1966                                column,
1967                                value,
1968                                row_contract_plan.as_ref(),
1969                            )?,
1970                        ));
1971                        row_modified_columns.push(column.clone());
1972                    }
1973                    continue;
1974                }
1975            }
1976
1977            dynamic_assignments.push(CompiledUpdateAssignment {
1978                column: column.clone(),
1979                expr: expr.clone(),
1980                compound_op,
1981                metadata_key,
1982                row_rule: if metadata_key.is_none() {
1983                    if let Some(plan) = row_contract_plan.as_ref() {
1984                        if plan.timestamps_enabled
1985                            && (column == "created_at" || column == "updated_at")
1986                        {
1987                            return Err(RedDBError::Query(format!(
1988                                "collection '{}' manages '{}' automatically — do not set it in UPDATE",
1989                                query.table, column
1990                            )));
1991                        }
1992                        if let Some(rule) = plan.declared_rules.get(column) {
1993                            Some(rule.clone())
1994                        } else if plan.strict_schema {
1995                            return Err(RedDBError::Query(format!(
1996                                "collection '{}' is strict and does not allow undeclared fields: {}",
1997                                query.table, column
1998                            )));
1999                        } else {
2000                            None
2001                        }
2002                    } else {
2003                        None
2004                    }
2005                } else {
2006                    None
2007                },
2008            });
2009            if metadata_key.is_none() {
2010                row_modified_columns.push(column.clone());
2011            }
2012        }
2013
2014        let row_modified_columns = dedupe_update_columns(row_modified_columns);
2015        let row_touches_unique_columns = row_contract_plan.as_ref().is_some_and(|plan| {
2016            row_modified_columns.iter().any(|column| {
2017                plan.unique_columns
2018                    .keys()
2019                    .any(|unique| unique.eq_ignore_ascii_case(column))
2020            })
2021        });
2022
2023        if let Some(ttl_ms) = query.ttl_ms {
2024            static_metadata_assignments
2025                .push(("_ttl_ms".to_string(), metadata_u64_to_value(ttl_ms)));
2026        }
2027        if let Some(expires_at_ms) = query.expires_at_ms {
2028            static_metadata_assignments.push((
2029                "_expires_at".to_string(),
2030                metadata_u64_to_value(expires_at_ms),
2031            ));
2032        }
2033        for (key, val) in &query.with_metadata {
2034            static_metadata_assignments.push((key.clone(), storage_value_to_metadata_value(val)));
2035        }
2036
2037        Ok(CompiledUpdatePlan {
2038            static_field_assignments,
2039            static_metadata_assignments,
2040            dynamic_assignments,
2041            row_contract_plan,
2042            row_modified_columns,
2043            row_touches_unique_columns,
2044        })
2045    }
2046
2047    fn materialize_update_assignments_for_entity(
2048        &self,
2049        query: &UpdateQuery,
2050        entity: &UnifiedEntity,
2051        compiled_plan: &CompiledUpdatePlan,
2052    ) -> RedDBResult<MaterializedUpdateAssignments> {
2053        let mut assignments = MaterializedUpdateAssignments::default();
2054        let mut record: Option<UnifiedRecord> = None;
2055
2056        for assignment in &compiled_plan.dynamic_assignments {
2057            if assignment.compound_op.is_some()
2058                && !matches!(
2059                    entity.data,
2060                    EntityData::Row(_) | EntityData::Node(_) | EntityData::Edge(_)
2061                )
2062            {
2063                return Err(RedDBError::Query(format!(
2064                    "compound assignment is only supported for row or graph UPDATE column '{}'",
2065                    assignment.column
2066                )));
2067            }
2068            if record.is_none() {
2069                record = runtime_any_record_from_entity_ref(entity);
2070            }
2071            let Some(record) = record.as_ref() else {
2072                return Err(RedDBError::Query(format!(
2073                    "UPDATE could not materialize runtime record for entity {} in '{}'",
2074                    entity.id.raw(),
2075                    query.table
2076                )));
2077            };
2078            let rhs = super::expr_eval::evaluate_runtime_expr_with_db(
2079                Some(self.inner.db.as_ref()),
2080                &assignment.expr,
2081                record,
2082                Some(query.table.as_str()),
2083                Some(query.table.as_str()),
2084            )
2085            .ok_or_else(|| {
2086                RedDBError::Query(format!(
2087                    "failed to evaluate UPDATE expression for column '{}'",
2088                    assignment.column
2089                ))
2090            })?;
2091            let value = if let Some(op) = assignment.compound_op {
2092                evaluate_compound_update_assignment(&assignment.column, record, op, rhs)?
2093            } else {
2094                rhs
2095            };
2096
2097            if let Some(metadata_key) = assignment.metadata_key {
2098                let raw_value = sql_literal_to_metadata_value(metadata_key, &value)?;
2099                let (canonical_key, canonical_value) =
2100                    canonicalize_sql_ttl_metadata(metadata_key, raw_value);
2101                assignments
2102                    .dynamic_metadata_assignments
2103                    .push((canonical_key.to_string(), canonical_value));
2104            } else {
2105                assignments.dynamic_field_assignments.push((
2106                    assignment.column.clone(),
2107                    normalize_row_update_value_for_rule(
2108                        &query.table,
2109                        self.resolve_crypto_sentinel(value)?,
2110                        assignment.row_rule.as_ref(),
2111                    )?,
2112                ));
2113            }
2114        }
2115
2116        Ok(assignments)
2117    }
2118
2119    fn apply_materialized_update_for_entity(
2120        &self,
2121        collection: String,
2122        entity: UnifiedEntity,
2123        compiled_plan: &CompiledUpdatePlan,
2124        assignments: MaterializedUpdateAssignments,
2125    ) -> RedDBResult<AppliedEntityMutation> {
2126        if matches!(entity.data, EntityData::Row(_)) {
2127            return self.apply_loaded_sql_update_row_core(
2128                collection,
2129                entity,
2130                &compiled_plan.static_field_assignments,
2131                assignments.dynamic_field_assignments,
2132                &compiled_plan.static_metadata_assignments,
2133                assignments.dynamic_metadata_assignments,
2134                compiled_plan.row_contract_plan.as_ref(),
2135                &compiled_plan.row_modified_columns,
2136                compiled_plan.row_touches_unique_columns,
2137            );
2138        }
2139
2140        ensure_graph_identity_update_allowed(&entity, compiled_plan, &assignments)?;
2141
2142        let operations = build_patch_operations_from_materialized_assignments(
2143            &entity,
2144            compiled_plan,
2145            assignments,
2146        );
2147        self.apply_loaded_patch_entity_core(
2148            collection,
2149            entity,
2150            crate::json::Value::Null,
2151            operations,
2152        )
2153    }
2154
2155    /// Execute DELETE FROM table WHERE filter
2156    pub fn execute_delete(
2157        &self,
2158        raw_query: &str,
2159        query: &DeleteQuery,
2160    ) -> RedDBResult<RuntimeQueryResult> {
2161        self.check_write(crate::runtime::write_gate::WriteKind::Dml)?;
2162        // Issue #523 — blockchain collections are immutable; see
2163        // execute_update for the same gate.
2164        if crate::runtime::blockchain_kind::is_chain(self.inner.db.store().as_ref(), &query.table) {
2165            return Err(RedDBError::InvalidOperation(format!(
2166                "BlockchainCollectionImmutable: DELETE not allowed on '{}'",
2167                query.table
2168            )));
2169        }
2170        // CollectionContract gate (#50) — see execute_update for
2171        // rationale. The gate handles APPEND ONLY rejection and is
2172        // the single point where future contract bits land.
2173        crate::runtime::collection_contract::CollectionContractGate::check(
2174            self,
2175            &query.table,
2176            crate::runtime::collection_contract::MutationKind::Delete,
2177        )?;
2178
2179        // RETURNING on DELETE: capture the pre-image via an internal
2180        // SELECT that reuses the same WHERE, then run the delete with
2181        // the RETURNING clause stripped, then project the captured
2182        // rows through the requested items. The extra SELECT is a
2183        // pragmatic MVP — a future pass can fuse the scan with the
2184        // delete to avoid the second pass over the heap.
2185        if let Some(items) = query.returning.clone() {
2186            let select_sql = delete_to_select_sql(raw_query).ok_or_else(|| {
2187                RedDBError::Query(
2188                    "DELETE ... RETURNING: cannot rewrite query for pre-image scan".to_string(),
2189                )
2190            })?;
2191            let captured = self.execute_query(&select_sql)?;
2192
2193            let mut inner_query = query.clone();
2194            inner_query.returning = None;
2195            let _ = self.execute_delete(raw_query, &inner_query)?;
2196
2197            let snapshots: Vec<Vec<(String, Value)>> = captured
2198                .result
2199                .records
2200                .iter()
2201                .map(|rec| {
2202                    rec.iter_fields()
2203                        .map(|(k, v)| (k.as_ref().to_string(), v.clone()))
2204                        .collect()
2205                })
2206                .collect();
2207            let affected = snapshots.len() as u64;
2208            let result = build_returning_result(&items, &snapshots, None);
2209
2210            let mut response = RuntimeQueryResult::dml_result(
2211                raw_query.to_string(),
2212                affected,
2213                "delete",
2214                "runtime-dml-returning",
2215            );
2216            response.result = result;
2217            return Ok(response);
2218        }
2219        // Row-Level Security enforcement (Phase 2.5.2 PG parity).
2220        //
2221        // When the table has RLS enabled, gate the DELETE by the
2222        // per-role policy set: mutations only touch rows that *every*
2223        // matching `FOR DELETE` policy would accept. No policies =>
2224        // zero rows affected (PG restrictive-default).
2225        if crate::runtime::impl_core::rls_is_enabled(self, &query.table) {
2226            let rls_filter = crate::runtime::impl_core::rls_policy_filter(
2227                self,
2228                &query.table,
2229                crate::storage::query::ast::PolicyAction::Delete,
2230            );
2231            let Some(policy) = rls_filter else {
2232                return Ok(RuntimeQueryResult::dml_result(
2233                    raw_query.to_string(),
2234                    0,
2235                    "delete",
2236                    "runtime-dml-rls",
2237                ));
2238            };
2239            // Fold the policy predicate into the user's WHERE before
2240            // dispatching — the remainder of this function reads the
2241            // filter from `query` via `effective_delete_filter`, which
2242            // respects the updated value.
2243            let mut augmented = query.clone();
2244            augmented.filter = Some(match augmented.filter.take() {
2245                Some(existing) => {
2246                    crate::storage::query::ast::Filter::And(Box::new(existing), Box::new(policy))
2247                }
2248                None => policy,
2249            });
2250            return self.execute_delete_inner(raw_query, &augmented);
2251        }
2252        self.execute_delete_inner(raw_query, query)
2253    }
2254
2255    fn execute_delete_inner(
2256        &self,
2257        raw_query: &str,
2258        query: &DeleteQuery,
2259    ) -> RedDBResult<RuntimeQueryResult> {
2260        let effective_filter = effective_delete_filter(query);
2261
2262        // Find the rows that match the WHERE clause. The "find target
2263        // rows" loop lives in DmlTargetScan so UPDATE (#52) can reuse
2264        // the same scan strategy.
2265        let scan = super::dml_target_scan::DmlTargetScan::new(
2266            self,
2267            &query.table,
2268            effective_filter.as_ref(),
2269            None,
2270        );
2271        let ids_to_delete = scan.find_target_ids()?;
2272
2273        // For event-enabled collections, snapshot the pre-delete state
2274        // before rows are physically removed.
2275        let needs_delete_events =
2276            !query.suppress_events && self.collection_has_delete_subscriptions(&query.table);
2277        let mut pre_images: HashMap<u64, crate::json::Value> = if needs_delete_events {
2278            scan.row_json_pre_images(&ids_to_delete)
2279        } else {
2280            HashMap::new()
2281        };
2282
2283        let mut affected: u64 = 0;
2284        for chunk in ids_to_delete.chunks(UPDATE_APPLY_CHUNK_SIZE) {
2285            let (count, lsns) = self.delete_entities_batch(&query.table, chunk)?;
2286            affected += count;
2287            if needs_delete_events && !lsns.is_empty() {
2288                // lsns.len() == actually-deleted entities; align with chunk ids.
2289                // `delete_batch` may skip missing entities, so we correlate by
2290                // the number returned (they're emitted in chunk order).
2291                let deleted_chunk = &chunk[..lsns.len().min(chunk.len())];
2292                self.emit_delete_events_for_collection(
2293                    &query.table,
2294                    deleted_chunk,
2295                    &lsns,
2296                    &pre_images,
2297                )?;
2298            }
2299        }
2300        pre_images.clear();
2301
2302        if affected > 0 {
2303            self.note_table_write(&query.table);
2304        }
2305
2306        Ok(RuntimeQueryResult::dml_result(
2307            raw_query.to_string(),
2308            affected,
2309            "delete",
2310            "runtime-dml",
2311        ))
2312    }
2313}
2314
2315/// Reject UPDATE … NODES/EDGES that assign to graph identity/topology
2316/// columns regardless of whether any row matches the WHERE clause. The
2317/// per-entity guard below covers only the matched-rows case, but ADR 0019
2318/// declares these columns immutable on the surface itself, so a zero-row
2319/// UPDATE should still surface the same error to operators and SDKs.
2320fn ensure_graph_identity_update_target_allowed(query: &UpdateQuery) -> RedDBResult<()> {
2321    if !matches!(query.target, UpdateTarget::Nodes | UpdateTarget::Edges) {
2322        return Ok(());
2323    }
2324    for (column, _) in &query.assignment_exprs {
2325        if is_immutable_graph_identity_field(column) {
2326            return Err(RedDBError::Query(format!(
2327                "immutable graph field '{column}' cannot be updated"
2328            )));
2329        }
2330    }
2331    Ok(())
2332}
2333
2334fn ensure_graph_identity_update_allowed(
2335    entity: &UnifiedEntity,
2336    compiled_plan: &CompiledUpdatePlan,
2337    assignments: &MaterializedUpdateAssignments,
2338) -> RedDBResult<()> {
2339    if !matches!(entity.data, EntityData::Node(_) | EntityData::Edge(_)) {
2340        return Ok(());
2341    }
2342
2343    for (column, _) in compiled_plan
2344        .static_field_assignments
2345        .iter()
2346        .chain(assignments.dynamic_field_assignments.iter())
2347    {
2348        if is_immutable_graph_identity_field(column) {
2349            return Err(RedDBError::Query(format!(
2350                "immutable graph field '{column}' cannot be updated"
2351            )));
2352        }
2353    }
2354
2355    Ok(())
2356}
2357
2358fn is_immutable_graph_identity_field(column: &str) -> bool {
2359    ["rid", "label", "from_rid", "to_rid", "from", "to"]
2360        .iter()
2361        .any(|reserved| column.eq_ignore_ascii_case(reserved))
2362}
2363
2364fn build_patch_operations_from_materialized_assignments(
2365    entity: &UnifiedEntity,
2366    compiled_plan: &CompiledUpdatePlan,
2367    assignments: MaterializedUpdateAssignments,
2368) -> Vec<PatchEntityOperation> {
2369    let mut operations = Vec::with_capacity(
2370        compiled_plan.static_field_assignments.len()
2371            + compiled_plan.static_metadata_assignments.len()
2372            + assignments.dynamic_field_assignments.len()
2373            + assignments.dynamic_metadata_assignments.len(),
2374    );
2375
2376    for (column, value) in &compiled_plan.static_field_assignments {
2377        operations.push(PatchEntityOperation {
2378            op: PatchEntityOperationType::Set,
2379            path: update_patch_path_for_entity(entity, column),
2380            value: Some(storage_value_to_json(value)),
2381        });
2382    }
2383
2384    for (column, value) in assignments.dynamic_field_assignments {
2385        operations.push(PatchEntityOperation {
2386            op: PatchEntityOperationType::Set,
2387            path: update_patch_path_for_entity(entity, &column),
2388            value: Some(storage_value_to_json(&value)),
2389        });
2390    }
2391
2392    for (key, value) in &compiled_plan.static_metadata_assignments {
2393        operations.push(PatchEntityOperation {
2394            op: PatchEntityOperationType::Set,
2395            path: vec!["metadata".to_string(), key.clone()],
2396            value: Some(metadata_value_to_json(value)),
2397        });
2398    }
2399
2400    for (key, value) in assignments.dynamic_metadata_assignments {
2401        operations.push(PatchEntityOperation {
2402            op: PatchEntityOperationType::Set,
2403            path: vec!["metadata".to_string(), key],
2404            value: Some(metadata_value_to_json(&value)),
2405        });
2406    }
2407
2408    operations
2409}
2410
2411fn update_patch_path_for_entity(entity: &UnifiedEntity, column: &str) -> Vec<String> {
2412    if matches!(
2413        (&entity.kind, &entity.data),
2414        (
2415            crate::storage::EntityKind::GraphNode(_),
2416            EntityData::Node(_)
2417        )
2418    ) && column.eq_ignore_ascii_case("node_type")
2419    {
2420        return vec!["node_type".to_string()];
2421    }
2422    if matches!(
2423        (&entity.kind, &entity.data),
2424        (
2425            crate::storage::EntityKind::GraphEdge(_),
2426            EntityData::Edge(_)
2427        )
2428    ) && column.eq_ignore_ascii_case("weight")
2429    {
2430        return vec!["weight".to_string()];
2431    }
2432    vec!["fields".to_string(), column.to_string()]
2433}
2434
2435/// Rewrite `DELETE FROM <table> [WHERE …] [RETURNING …]` as
2436/// `SELECT * FROM <table> [WHERE …]` so the delete executor can
2437/// capture the pre-image before actually removing the rows. Returns
2438/// `None` when the input does not start with `DELETE`.
2439///
2440/// Case-insensitive on the keywords. Preserves everything between
2441/// the table name and the RETURNING clause, so WHERE / ORDER BY /
2442/// LIMIT survive untouched. The RETURNING tail — if present — is
2443/// truncated at the first top-level `RETURNING` token.
2444fn delete_to_select_sql(sql: &str) -> Option<String> {
2445    let trimmed = sql.trim_start();
2446    let lowered = trimmed.to_ascii_lowercase();
2447    if !lowered.starts_with("delete ") && !lowered.starts_with("delete\t") {
2448        return None;
2449    }
2450    // Find `FROM` after DELETE.
2451    let from_idx = lowered.find(" from ")?;
2452    let after_from = &trimmed[from_idx + " from ".len()..];
2453    let after_from_lc = &lowered[from_idx + " from ".len()..];
2454
2455    // Cut off the RETURNING tail (a naive search — the RETURNING
2456    // clause only appears once per statement at top level in our
2457    // grammar). Matches whitespace-bounded tokens to avoid clipping
2458    // `RETURNING` inside a string literal.
2459    let mut body = after_from.to_string();
2460    if let Some(pos) = find_top_level_keyword(after_from_lc, "returning") {
2461        body.truncate(pos);
2462    }
2463    Some(format!("SELECT * FROM {}", body.trim_end()))
2464}
2465
2466/// Find the byte offset of a whitespace-bounded keyword in a
2467/// lowercased haystack, skipping matches inside single-quoted
2468/// string literals. Naive — no escape handling — but enough for
2469/// the shapes the DML parser emits.
2470fn find_top_level_keyword(haystack: &str, needle: &str) -> Option<usize> {
2471    let bytes = haystack.as_bytes();
2472    let nlen = needle.len();
2473    let mut i = 0usize;
2474    let mut in_string = false;
2475    while i < bytes.len() {
2476        let c = bytes[i];
2477        if c == b'\'' {
2478            in_string = !in_string;
2479            i += 1;
2480            continue;
2481        }
2482        if !in_string
2483            && i + nlen <= bytes.len()
2484            && &bytes[i..i + nlen] == needle.as_bytes()
2485            && (i == 0 || bytes[i - 1].is_ascii_whitespace())
2486            && (i + nlen == bytes.len() || bytes[i + nlen].is_ascii_whitespace())
2487        {
2488            return Some(i);
2489        }
2490        i += 1;
2491    }
2492    None
2493}
2494
2495/// Build a `UnifiedResult` from the rows affected by a DML statement plus
2496/// its `RETURNING` clause. Each snapshot is a list of (column, value) pairs
2497/// for one affected row; `outputs`, when provided, supplies the engine-
2498/// assigned entity id for the same row (INSERT path). Projection honours
2499/// the RETURNING items: `*` expands to every snapshot column plus
2500/// the public row envelope when available.
2501fn build_returning_result(
2502    items: &[ReturningItem],
2503    snapshots: &[Vec<(String, Value)>],
2504    outputs: Option<&[CreateEntityOutput]>,
2505) -> UnifiedResult {
2506    let project_all = items.iter().any(|it| matches!(it, ReturningItem::All));
2507    let public_item_outputs = outputs.is_some_and(|outs| {
2508        outs.first()
2509            .and_then(|out| out.entity.as_ref())
2510            .is_some_and(|entity| public_returning_item_kind(entity).is_some())
2511    });
2512
2513    let mut columns: Vec<String> = if project_all {
2514        let mut cols: Vec<String> = Vec::new();
2515        if public_item_outputs {
2516            cols.extend(
2517                [
2518                    "rid",
2519                    "collection",
2520                    "kind",
2521                    "tenant",
2522                    "created_at",
2523                    "updated_at",
2524                ]
2525                .into_iter()
2526                .map(str::to_string),
2527            );
2528        } else if outputs.is_some() {
2529            cols.push("red_entity_id".to_string());
2530        }
2531        if let Some(first) = snapshots.first() {
2532            for (name, _) in first {
2533                cols.push(name.clone());
2534            }
2535        }
2536        cols
2537    } else {
2538        items
2539            .iter()
2540            .filter_map(|it| match it {
2541                ReturningItem::Column(c) => Some(c.clone()),
2542                ReturningItem::All => None,
2543            })
2544            .collect()
2545    };
2546    // Guarantee unique order-preserving column list.
2547    {
2548        let mut seen = std::collections::HashSet::new();
2549        columns.retain(|c| seen.insert(c.clone()));
2550    }
2551
2552    let mut records: Vec<UnifiedRecord> = Vec::with_capacity(snapshots.len());
2553    for (idx, snap) in snapshots.iter().enumerate() {
2554        let mut values: HashMap<Arc<str>, Value> = HashMap::with_capacity(columns.len());
2555        if let Some(outs) = outputs {
2556            if let Some(out) = outs.get(idx) {
2557                if let Some(entity) = out.entity.as_ref() {
2558                    if let Some(kind) = public_returning_item_kind(entity) {
2559                        values.insert(
2560                            Arc::clone(&sys_key_rid()),
2561                            Value::UnsignedInteger(out.id.raw()),
2562                        );
2563                        values.insert(
2564                            Arc::clone(&sys_key_collection()),
2565                            Value::text(entity.kind.collection().to_string()),
2566                        );
2567                        values.insert(Arc::clone(&sys_key_kind()), Value::text(kind.to_string()));
2568                        values.insert(
2569                            Arc::clone(&sys_key_created_at()),
2570                            Value::UnsignedInteger(entity.created_at),
2571                        );
2572                        values.insert(
2573                            Arc::clone(&sys_key_updated_at()),
2574                            Value::UnsignedInteger(entity.updated_at),
2575                        );
2576                        // Legacy alias: an explicit `RETURNING red_entity_id`
2577                        // still resolves to the row's rid. Only surfaces when
2578                        // the projected column list names it — `RETURNING *`
2579                        // keeps the envelope clean (rid, not red_entity_id).
2580                        values.insert(
2581                            Arc::clone(&sys_key_red_entity_id()),
2582                            Value::UnsignedInteger(out.id.raw()),
2583                        );
2584                    } else {
2585                        values.insert(
2586                            Arc::clone(&sys_key_red_entity_id()),
2587                            Value::Integer(out.id.raw() as i64),
2588                        );
2589                    }
2590                } else {
2591                    values.insert(
2592                        Arc::clone(&sys_key_red_entity_id()),
2593                        Value::Integer(out.id.raw() as i64),
2594                    );
2595                }
2596            }
2597        }
2598        for (name, val) in snap {
2599            values.insert(Arc::from(name.as_str()), val.clone());
2600        }
2601        if !values.contains_key("tenant") {
2602            let tenant = values.get("tenant_id").cloned().unwrap_or(Value::Null);
2603            values.insert(Arc::clone(&sys_key_tenant()), tenant);
2604        }
2605        let mut rec = UnifiedRecord::default();
2606        // Only keep projected columns on the record.
2607        for col in &columns {
2608            if let Some(v) = values.get(col.as_str()) {
2609                rec.set_arc(Arc::from(col.as_str()), v.clone());
2610            }
2611        }
2612        records.push(rec);
2613    }
2614
2615    UnifiedResult {
2616        columns,
2617        records,
2618        stats: Default::default(),
2619        pre_serialized_json: None,
2620    }
2621}
2622
2623fn public_returning_item_kind(entity: &crate::storage::UnifiedEntity) -> Option<&'static str> {
2624    match (&entity.kind, &entity.data) {
2625        (crate::storage::EntityKind::GraphNode(_), crate::storage::EntityData::Node(_)) => {
2626            Some("node")
2627        }
2628        (crate::storage::EntityKind::GraphEdge(_), crate::storage::EntityData::Edge(_)) => {
2629            Some("edge")
2630        }
2631        (_, crate::storage::EntityData::Row(_)) => Some(public_returning_row_kind(entity)),
2632        _ => None,
2633    }
2634}
2635
2636fn public_returning_row_kind(entity: &crate::storage::UnifiedEntity) -> &'static str {
2637    let Some(row) = entity.data.as_row() else {
2638        return "row";
2639    };
2640
2641    let is_kv = row.named.as_ref().is_some_and(|named| {
2642        (named.len() == 2 && named.contains_key("key") && named.contains_key("value"))
2643            || (named.len() == 1 && (named.contains_key("key") || named.contains_key("value")))
2644    });
2645    if is_kv {
2646        return "kv";
2647    }
2648
2649    let is_document = row
2650        .named
2651        .as_ref()
2652        .is_some_and(|named| named.values().any(runtime_returning_documentish_value))
2653        || row.columns.iter().any(runtime_returning_documentish_value);
2654    if is_document {
2655        "document"
2656    } else {
2657        "row"
2658    }
2659}
2660
2661fn runtime_returning_documentish_value(value: &Value) -> bool {
2662    matches!(value, Value::Json(_) | Value::Blob(_))
2663}
2664
2665fn row_insert_returning_snapshots(
2666    outputs: &[CreateEntityOutput],
2667    fallback: Vec<Vec<(String, Value)>>,
2668) -> Vec<Vec<(String, Value)>> {
2669    outputs
2670        .iter()
2671        .enumerate()
2672        .map(|(idx, out)| {
2673            out.entity
2674                .as_ref()
2675                .map(entity_row_fields_snapshot)
2676                .filter(|snap| !snap.is_empty())
2677                .unwrap_or_else(|| fallback.get(idx).cloned().unwrap_or_default())
2678        })
2679        .collect()
2680}
2681
2682fn graph_insert_returning_snapshots(
2683    store: &crate::storage::unified::UnifiedStore,
2684    collection: &str,
2685    ids: &[EntityId],
2686) -> Vec<Vec<(String, Value)>> {
2687    let Some(manager) = store.get_collection(collection) else {
2688        return Vec::new();
2689    };
2690
2691    ids.iter()
2692        .filter_map(|id| manager.get(*id))
2693        .filter_map(|entity| {
2694            let mut record = runtime_any_record_from_entity_ref(&entity)?;
2695            record.set_arc(sys_key_collection(), Value::text(collection.to_string()));
2696            Some(record)
2697        })
2698        .map(|record| {
2699            record
2700                .iter_fields()
2701                .map(|(key, value)| (key.as_ref().to_string(), value.clone()))
2702                .collect()
2703        })
2704        .collect()
2705}
2706
2707fn graph_update_returning_snapshots(
2708    runtime: &RedDBRuntime,
2709    collection: &str,
2710    ids: &[EntityId],
2711) -> Vec<Vec<(String, Value)>> {
2712    let store = runtime.db().store();
2713    let Some(manager) = store.get_collection(collection) else {
2714        return Vec::new();
2715    };
2716
2717    manager
2718        .get_many(ids)
2719        .into_iter()
2720        .flatten()
2721        .filter_map(|entity| runtime_any_record_from_entity_ref(&entity))
2722        .map(|record| {
2723            record
2724                .iter_fields()
2725                .map(|(key, value)| (key.as_ref().to_string(), value.clone()))
2726                .collect()
2727        })
2728        .collect()
2729}
2730
2731fn ensure_update_target_contract(
2732    runtime: &RedDBRuntime,
2733    collection: &str,
2734    target: UpdateTarget,
2735) -> RedDBResult<()> {
2736    let Some(contract) = runtime.db().collection_contract(collection) else {
2737        return Ok(());
2738    };
2739    if update_target_contract_is_advisory(&contract)
2740        || update_target_allows_model(contract.declared_model, update_target_model(target))
2741    {
2742        return Ok(());
2743    }
2744    Err(RedDBError::InvalidOperation(format!(
2745        "collection '{}' is declared as '{}' and does not allow '{}' updates",
2746        collection,
2747        update_model_name(contract.declared_model),
2748        update_model_name(update_target_model(target))
2749    )))
2750}
2751
2752fn update_target_contract_is_advisory(contract: &crate::physical::CollectionContract) -> bool {
2753    matches!(
2754        (&contract.origin, &contract.schema_mode),
2755        (
2756            crate::physical::ContractOrigin::Implicit,
2757            crate::catalog::SchemaMode::Dynamic,
2758        )
2759    )
2760}
2761
2762fn update_target_model(target: UpdateTarget) -> crate::catalog::CollectionModel {
2763    match target {
2764        UpdateTarget::Rows => crate::catalog::CollectionModel::Table,
2765        UpdateTarget::Documents => crate::catalog::CollectionModel::Document,
2766        UpdateTarget::Kv => crate::catalog::CollectionModel::Kv,
2767        UpdateTarget::Nodes | UpdateTarget::Edges => crate::catalog::CollectionModel::Graph,
2768    }
2769}
2770
2771fn update_target_allows_model(
2772    declared_model: crate::catalog::CollectionModel,
2773    requested_model: crate::catalog::CollectionModel,
2774) -> bool {
2775    declared_model == requested_model || declared_model == crate::catalog::CollectionModel::Mixed
2776}
2777
2778fn update_model_name(model: crate::catalog::CollectionModel) -> &'static str {
2779    match model {
2780        crate::catalog::CollectionModel::Table => "table",
2781        crate::catalog::CollectionModel::Document => "document",
2782        crate::catalog::CollectionModel::Graph => "graph",
2783        crate::catalog::CollectionModel::Vector => "vector",
2784        crate::catalog::CollectionModel::Hll => "hll",
2785        crate::catalog::CollectionModel::Sketch => "sketch",
2786        crate::catalog::CollectionModel::Filter => "filter",
2787        crate::catalog::CollectionModel::Kv => "kv",
2788        crate::catalog::CollectionModel::Config => "config",
2789        crate::catalog::CollectionModel::Vault => "vault",
2790        crate::catalog::CollectionModel::Mixed => "mixed",
2791        crate::catalog::CollectionModel::TimeSeries => "timeseries",
2792        crate::catalog::CollectionModel::Queue => "queue",
2793        crate::catalog::CollectionModel::Metrics => "metrics",
2794    }
2795}
2796
2797fn ensure_graph_insert_contract(runtime: &RedDBRuntime, collection: &str) -> RedDBResult<()> {
2798    let db = runtime.db();
2799    if let Some(contract) = db.collection_contract(collection) {
2800        let advisory_implicit_dynamic = matches!(
2801            (&contract.origin, &contract.schema_mode),
2802            (
2803                crate::physical::ContractOrigin::Implicit,
2804                crate::catalog::SchemaMode::Dynamic,
2805            )
2806        );
2807        if advisory_implicit_dynamic
2808            || matches!(
2809                contract.declared_model,
2810                crate::catalog::CollectionModel::Graph | crate::catalog::CollectionModel::Mixed
2811            )
2812        {
2813            return Ok(());
2814        }
2815        return Err(RedDBError::InvalidOperation(format!(
2816            "collection '{}' is declared as '{:?}' and does not allow 'Graph' writes",
2817            collection, contract.declared_model
2818        )));
2819    }
2820
2821    let now = std::time::SystemTime::now()
2822        .duration_since(std::time::UNIX_EPOCH)
2823        .unwrap_or_default()
2824        .as_millis();
2825    db.save_collection_contract(crate::physical::CollectionContract {
2826        name: collection.to_string(),
2827        declared_model: crate::catalog::CollectionModel::Graph,
2828        schema_mode: crate::catalog::SchemaMode::Dynamic,
2829        origin: crate::physical::ContractOrigin::Implicit,
2830        version: 1,
2831        created_at_unix_ms: now,
2832        updated_at_unix_ms: now,
2833        default_ttl_ms: db.collection_default_ttl_ms(collection),
2834        vector_dimension: None,
2835        vector_metric: None,
2836        context_index_fields: Vec::new(),
2837        declared_columns: Vec::new(),
2838        table_def: None,
2839        timestamps_enabled: false,
2840        context_index_enabled: false,
2841        metrics_raw_retention_ms: None,
2842        metrics_rollup_policies: Vec::new(),
2843        metrics_tenant_identity: None,
2844        metrics_namespace: None,
2845        append_only: false,
2846        subscriptions: Vec::new(),
2847        analytics_config: Vec::new(),
2848        session_key: None,
2849        session_gap_ms: None,
2850        retention_duration_ms: None,
2851        analytical_storage: None,
2852
2853        ai_policy: None,
2854    })
2855    .map(|_| ())
2856    .map_err(|err| RedDBError::Internal(err.to_string()))
2857}
2858
2859fn update_needs_rmw_lock(query: &UpdateQuery) -> bool {
2860    query
2861        .assignment_exprs
2862        .iter()
2863        .enumerate()
2864        .any(|(idx, (column, expr))| {
2865            query
2866                .compound_assignment_ops
2867                .get(idx)
2868                .is_some_and(|op| op.is_some())
2869                || expr_references_update_column(expr, &query.table, column)
2870        })
2871}
2872
2873fn evaluate_compound_update_assignment(
2874    column: &str,
2875    record: &UnifiedRecord,
2876    op: BinOp,
2877    rhs: Value,
2878) -> RedDBResult<Value> {
2879    let lhs = record.get(column).ok_or_else(|| {
2880        RedDBError::Query(format!(
2881            "compound assignment requires existing numeric field '{column}'"
2882        ))
2883    })?;
2884    if matches!(lhs, Value::Null) {
2885        return Err(RedDBError::Query(format!(
2886            "compound assignment requires non-null numeric field '{column}'"
2887        )));
2888    }
2889    apply_compound_numeric_op(column, op, lhs, &rhs)
2890}
2891
2892fn apply_compound_numeric_op(
2893    column: &str,
2894    op: BinOp,
2895    lhs: &Value,
2896    rhs: &Value,
2897) -> RedDBResult<Value> {
2898    let Some(lhs_number) = CompoundNumber::from_value(lhs) else {
2899        return Err(RedDBError::Query(format!(
2900            "compound assignment requires numeric field '{column}'"
2901        )));
2902    };
2903    let Some(rhs_number) = CompoundNumber::from_value(rhs) else {
2904        return Err(RedDBError::Query(format!(
2905            "compound assignment requires numeric right-hand value for field '{column}'"
2906        )));
2907    };
2908
2909    if lhs_number.is_float() || rhs_number.is_float() || matches!(op, BinOp::Div) {
2910        let a = lhs_number.as_f64();
2911        let b = rhs_number.as_f64();
2912        let out = match op {
2913            BinOp::Add => a + b,
2914            BinOp::Sub => a - b,
2915            BinOp::Mul => a * b,
2916            BinOp::Div => {
2917                if b == 0.0 {
2918                    return Err(RedDBError::Query(format!(
2919                        "division by zero in compound assignment for field '{column}'"
2920                    )));
2921                }
2922                a / b
2923            }
2924            BinOp::Mod => {
2925                if b == 0.0 {
2926                    return Err(RedDBError::Query(format!(
2927                        "modulo by zero in compound assignment for field '{column}'"
2928                    )));
2929                }
2930                a % b
2931            }
2932            _ => {
2933                return Err(RedDBError::Query(format!(
2934                    "unsupported compound assignment operator for field '{column}'"
2935                )));
2936            }
2937        };
2938        if !out.is_finite() {
2939            return Err(RedDBError::Query(format!(
2940                "numeric overflow in compound assignment for field '{column}'"
2941            )));
2942        }
2943        return Ok(Value::Float(out));
2944    }
2945
2946    let a = lhs_number.as_i128();
2947    let b = rhs_number.as_i128();
2948    let out = match op {
2949        BinOp::Add => a.checked_add(b),
2950        BinOp::Sub => a.checked_sub(b),
2951        BinOp::Mul => a.checked_mul(b),
2952        BinOp::Mod => {
2953            if b == 0 {
2954                return Err(RedDBError::Query(format!(
2955                    "modulo by zero in compound assignment for field '{column}'"
2956                )));
2957            }
2958            a.checked_rem(b)
2959        }
2960        BinOp::Div => unreachable!("integer division is handled by the float branch"),
2961        _ => None,
2962    }
2963    .ok_or_else(|| {
2964        RedDBError::Query(format!(
2965            "numeric overflow in compound assignment for field '{column}'"
2966        ))
2967    })?;
2968
2969    if matches!(lhs, Value::UnsignedInteger(_)) {
2970        let value = u64::try_from(out).map_err(|_| {
2971            RedDBError::Query(format!(
2972                "numeric overflow in compound assignment for field '{column}'"
2973            ))
2974        })?;
2975        Ok(Value::UnsignedInteger(value))
2976    } else {
2977        let value = i64::try_from(out).map_err(|_| {
2978            RedDBError::Query(format!(
2979                "numeric overflow in compound assignment for field '{column}'"
2980            ))
2981        })?;
2982        Ok(Value::Integer(value))
2983    }
2984}
2985
2986#[derive(Clone, Copy)]
2987enum CompoundNumber {
2988    Integer(i128),
2989    Float(f64),
2990}
2991
2992impl CompoundNumber {
2993    fn from_value(value: &Value) -> Option<Self> {
2994        match value {
2995            Value::Integer(value) | Value::BigInt(value) => Some(Self::Integer(*value as i128)),
2996            Value::UnsignedInteger(value) => Some(Self::Integer(*value as i128)),
2997            Value::Float(value) => value.is_finite().then_some(Self::Float(*value)),
2998            Value::Decimal(value) => Some(Self::Float(*value as f64 / 10_000.0)),
2999            _ => None,
3000        }
3001    }
3002
3003    fn is_float(self) -> bool {
3004        matches!(self, Self::Float(_))
3005    }
3006
3007    fn as_f64(self) -> f64 {
3008        match self {
3009            Self::Integer(value) => value as f64,
3010            Self::Float(value) => value,
3011        }
3012    }
3013
3014    fn as_i128(self) -> i128 {
3015        match self {
3016            Self::Integer(value) => value,
3017            Self::Float(_) => unreachable!("float compound number used as integer"),
3018        }
3019    }
3020}
3021
3022fn expr_references_update_column(expr: &Expr, table_name: &str, target_column: &str) -> bool {
3023    match expr {
3024        Expr::Literal { .. } | Expr::Parameter { .. } | Expr::Subquery { .. } => false,
3025        Expr::Column { field, .. } => {
3026            field_ref_matches_update_column(field, table_name, target_column)
3027        }
3028        Expr::BinaryOp { lhs, rhs, .. } => {
3029            expr_references_update_column(lhs, table_name, target_column)
3030                || expr_references_update_column(rhs, table_name, target_column)
3031        }
3032        Expr::UnaryOp { operand, .. } | Expr::Cast { inner: operand, .. } => {
3033            expr_references_update_column(operand, table_name, target_column)
3034        }
3035        Expr::FunctionCall { args, .. } => args
3036            .iter()
3037            .any(|arg| expr_references_update_column(arg, table_name, target_column)),
3038        Expr::Case {
3039            branches, else_, ..
3040        } => {
3041            branches.iter().any(|(cond, value)| {
3042                expr_references_update_column(cond, table_name, target_column)
3043                    || expr_references_update_column(value, table_name, target_column)
3044            }) || else_
3045                .as_deref()
3046                .is_some_and(|expr| expr_references_update_column(expr, table_name, target_column))
3047        }
3048        Expr::IsNull { operand, .. } => {
3049            expr_references_update_column(operand, table_name, target_column)
3050        }
3051        Expr::InList { target, values, .. } => {
3052            expr_references_update_column(target, table_name, target_column)
3053                || values
3054                    .iter()
3055                    .any(|value| expr_references_update_column(value, table_name, target_column))
3056        }
3057        Expr::Between {
3058            target, low, high, ..
3059        } => {
3060            expr_references_update_column(target, table_name, target_column)
3061                || expr_references_update_column(low, table_name, target_column)
3062                || expr_references_update_column(high, table_name, target_column)
3063        }
3064        Expr::WindowFunctionCall { args, window, .. } => {
3065            args.iter()
3066                .any(|arg| expr_references_update_column(arg, table_name, target_column))
3067                || window
3068                    .partition_by
3069                    .iter()
3070                    .any(|e| expr_references_update_column(e, table_name, target_column))
3071                || window
3072                    .order_by
3073                    .iter()
3074                    .any(|o| expr_references_update_column(&o.expr, table_name, target_column))
3075        }
3076    }
3077}
3078
3079fn field_ref_matches_update_column(
3080    field: &FieldRef,
3081    table_name: &str,
3082    target_column: &str,
3083) -> bool {
3084    match field {
3085        FieldRef::TableColumn { table, column } => {
3086            column.eq_ignore_ascii_case(target_column)
3087                && (table.is_empty() || table.eq_ignore_ascii_case(table_name))
3088        }
3089        FieldRef::NodeProperty { .. } | FieldRef::EdgeProperty { .. } | FieldRef::NodeId { .. } => {
3090            false
3091        }
3092    }
3093}
3094
3095fn resolve_update_entity_by_logical_id(
3096    runtime: &RedDBRuntime,
3097    table: &str,
3098    logical_id: EntityId,
3099) -> Option<UnifiedEntity> {
3100    let store = runtime.inner.db.store();
3101    if let Some(entity) = store.get_table_row_by_logical_id(table, logical_id) {
3102        return Some(entity);
3103    }
3104    // Fallback for non-table-row entities (graph nodes/edges, etc.) where
3105    // entity_id == logical_id and the MVCC table-row resolver doesn't apply.
3106    store.get(table, logical_id)
3107}
3108
3109fn update_cdc_item_kind(
3110    runtime: &RedDBRuntime,
3111    collection: &str,
3112    entity: &UnifiedEntity,
3113) -> &'static str {
3114    match &entity.data {
3115        EntityData::Node(_) => return "node",
3116        EntityData::Edge(_) => return "edge",
3117        _ => {}
3118    }
3119
3120    match runtime
3121        .db()
3122        .collection_contract(collection)
3123        .map(|contract| contract.declared_model)
3124    {
3125        Some(crate::catalog::CollectionModel::Document) => "document",
3126        Some(crate::catalog::CollectionModel::Kv)
3127        | Some(crate::catalog::CollectionModel::Vault) => "kv",
3128        _ => "row",
3129    }
3130}
3131
3132fn ordered_update_target_ids(
3133    manager: &Arc<crate::storage::SegmentManager>,
3134    entity_ids: &[EntityId],
3135    order_by: &[OrderByClause],
3136    limit: Option<usize>,
3137) -> Vec<EntityId> {
3138    let mut entities: Vec<UnifiedEntity> =
3139        manager.get_many(entity_ids).into_iter().flatten().collect();
3140    entities.sort_by(|left, right| compare_update_order(left, right, order_by));
3141    if let Some(limit) = limit {
3142        entities.truncate(limit);
3143    }
3144    entities.into_iter().map(|entity| entity.id).collect()
3145}
3146
3147fn compare_update_order(
3148    left: &UnifiedEntity,
3149    right: &UnifiedEntity,
3150    order_by: &[OrderByClause],
3151) -> Ordering {
3152    for clause in order_by {
3153        let left_value = update_order_value(left, &clause.field);
3154        let right_value = update_order_value(right, &clause.field);
3155        let ordering = compare_update_order_values(
3156            left_value.as_ref(),
3157            right_value.as_ref(),
3158            clause.nulls_first,
3159        );
3160        if ordering != Ordering::Equal {
3161            return if clause.ascending {
3162                ordering
3163            } else {
3164                ordering.reverse()
3165            };
3166        }
3167    }
3168    left.logical_id().raw().cmp(&right.logical_id().raw())
3169}
3170
3171fn compare_update_order_values(
3172    left: Option<&Value>,
3173    right: Option<&Value>,
3174    nulls_first: bool,
3175) -> Ordering {
3176    match (left, right) {
3177        (None, None) => Ordering::Equal,
3178        (None, Some(_)) => {
3179            if nulls_first {
3180                Ordering::Less
3181            } else {
3182                Ordering::Greater
3183            }
3184        }
3185        (Some(_), None) => {
3186            if nulls_first {
3187                Ordering::Greater
3188            } else {
3189                Ordering::Less
3190            }
3191        }
3192        (Some(left), Some(right)) => {
3193            crate::storage::query::value_compare::total_compare_values(left, right)
3194        }
3195    }
3196}
3197
3198fn update_order_value(entity: &UnifiedEntity, field: &FieldRef) -> Option<Value> {
3199    let FieldRef::TableColumn { table, column } = field else {
3200        return None;
3201    };
3202    if !table.is_empty() {
3203        return None;
3204    }
3205    if column.eq_ignore_ascii_case("rid") {
3206        return Some(Value::UnsignedInteger(entity.logical_id().raw()));
3207    }
3208    match &entity.data {
3209        EntityData::Row(row) => row.get_field(column).cloned(),
3210        EntityData::Node(_) | EntityData::Edge(_) => runtime_any_record_from_entity_ref(entity)
3211            .and_then(|record| record.get(column).cloned()),
3212        _ => None,
3213    }
3214}
3215
3216fn dedupe_update_columns(mut columns: Vec<String>) -> Vec<String> {
3217    if columns.is_empty() {
3218        return columns;
3219    }
3220
3221    let mut unique = Vec::with_capacity(columns.len());
3222    for column in columns.drain(..) {
3223        if !unique
3224            .iter()
3225            .any(|existing: &String| existing.eq_ignore_ascii_case(&column))
3226        {
3227            unique.push(column);
3228        }
3229    }
3230    unique
3231}
3232
3233// =============================================================================
3234// Helper functions for extracting typed values from column/value pairs
3235// =============================================================================
3236
3237const SQL_TTL_METADATA_COLUMNS: [&str; 3] = ["_ttl", "_ttl_ms", "_expires_at"];
3238
3239fn resolve_sql_ttl_metadata_key(column: &str) -> Option<&'static str> {
3240    if column.eq_ignore_ascii_case("_ttl") {
3241        Some(SQL_TTL_METADATA_COLUMNS[0])
3242    } else if column.eq_ignore_ascii_case("_ttl_ms") {
3243        Some(SQL_TTL_METADATA_COLUMNS[1])
3244    } else if column.eq_ignore_ascii_case("_expires_at") {
3245        Some(SQL_TTL_METADATA_COLUMNS[2])
3246    } else {
3247        None
3248    }
3249}
3250
3251/// Canonicalize a SQL TTL metadata `(key, value)` pair so the retention
3252/// sweeper sees a single key (`_ttl_ms`) regardless of which legacy form
3253/// the operator wrote. `_ttl` is scaled from seconds to milliseconds;
3254/// `_ttl_ms` and `_expires_at` are passed through.
3255fn canonicalize_sql_ttl_metadata(
3256    key: &'static str,
3257    value: MetadataValue,
3258) -> (&'static str, MetadataValue) {
3259    if key != "_ttl" {
3260        return (key, value);
3261    }
3262    let scaled = match value {
3263        MetadataValue::Int(s) => MetadataValue::Int(s.saturating_mul(1_000)),
3264        MetadataValue::Timestamp(ms_or_s) => {
3265            // Timestamp is already chosen for very large values; treat as
3266            // already-ms to avoid silent overflow.
3267            MetadataValue::Timestamp(ms_or_s)
3268        }
3269        MetadataValue::Float(f) => MetadataValue::Float(f * 1_000.0),
3270        other => other,
3271    };
3272    ("_ttl_ms", scaled)
3273}
3274
3275/// Sentinel prefix produced by the parser for `PASSWORD('...')` and
3276/// `SECRET('...')` literals. The runtime strips this marker and
3277/// applies the actual crypto transform during INSERT execution.
3278pub(crate) const PLAINTEXT_SENTINEL: &str = "@@plain@@";
3279
3280impl RedDBRuntime {
3281    /// Strip the plaintext sentinel from a `Value::Password` or
3282    /// `Value::Secret` produced by the parser and apply the real
3283    /// crypto transform. `Password` is always hashed with argon2id.
3284    /// `Secret` is encrypted with AES-256-GCM keyed by the vault
3285    /// when `red.config.secret.auto_encrypt = true` (default).
3286    pub(crate) fn resolve_crypto_sentinel(&self, value: Value) -> RedDBResult<Value> {
3287        match value {
3288            Value::Password(marked) => {
3289                if let Some(plain) = marked.strip_prefix(PLAINTEXT_SENTINEL) {
3290                    Ok(Value::Password(crate::auth::store::hash_password(plain)))
3291                } else {
3292                    Ok(Value::Password(marked))
3293                }
3294            }
3295            Value::Secret(bytes) => {
3296                if bytes.starts_with(PLAINTEXT_SENTINEL.as_bytes()) {
3297                    if !self.secret_auto_encrypt() {
3298                        return Err(RedDBError::Query(
3299                            "SECRET() literal rejected: red.config.secret.auto_encrypt \
3300                             is false. Insert pre-encrypted bytes directly instead."
3301                                .to_string(),
3302                        ));
3303                    }
3304                    let key = self.secret_aes_key().ok_or_else(|| {
3305                        RedDBError::Query(
3306                            "SECRET() column encryption requires a bootstrapped \
3307                             vault (red.secret.aes_key is missing). Start the server \
3308                             with --vault to enable."
3309                                .to_string(),
3310                        )
3311                    })?;
3312                    let plain = &bytes[PLAINTEXT_SENTINEL.len()..];
3313                    Ok(Value::Secret(encrypt_secret_payload(&key, plain)))
3314                } else {
3315                    Ok(Value::Secret(bytes))
3316                }
3317            }
3318            other => Ok(other),
3319        }
3320    }
3321}
3322
3323/// Encode an AES-256-GCM ciphertext as `[12-byte nonce][ciphertext||tag]`.
3324/// This is the on-disk representation of `Value::Secret`.
3325fn encrypt_secret_payload(key: &[u8; 32], plaintext: &[u8]) -> Vec<u8> {
3326    let nonce_bytes = crate::auth::store::random_bytes(12);
3327    let mut nonce = [0u8; 12];
3328    nonce.copy_from_slice(&nonce_bytes[..12]);
3329    let ct = crate::crypto::aes_gcm::aes256_gcm_encrypt(key, &nonce, b"reddb.secret", plaintext);
3330    let mut out = Vec::with_capacity(12 + ct.len());
3331    out.extend_from_slice(&nonce);
3332    out.extend_from_slice(&ct);
3333    out
3334}
3335
3336/// Decode a `Value::Secret` payload back to plaintext. Returns
3337/// `None` when the payload is too short or AES-GCM authentication
3338/// fails (tampered or wrong key).
3339pub(crate) fn decrypt_secret_payload(key: &[u8; 32], payload: &[u8]) -> Option<Vec<u8>> {
3340    if payload.len() < 12 {
3341        return None;
3342    }
3343    let mut nonce = [0u8; 12];
3344    nonce.copy_from_slice(&payload[..12]);
3345    crate::crypto::aes_gcm::aes256_gcm_decrypt(key, &nonce, b"reddb.secret", &payload[12..]).ok()
3346}
3347
3348fn split_insert_metadata(
3349    runtime: &RedDBRuntime,
3350    columns: &[String],
3351    values: &[Value],
3352) -> RedDBResult<(Vec<(String, Value)>, Vec<(String, MetadataValue)>)> {
3353    let mut fields = Vec::new();
3354    let mut metadata = Vec::new();
3355
3356    for (column, value) in columns.iter().zip(values.iter()) {
3357        // Still support legacy _ttl columns for backward compat
3358        if let Some(metadata_key) = resolve_sql_ttl_metadata_key(column) {
3359            let raw_value = sql_literal_to_metadata_value(metadata_key, value)?;
3360            let (canonical_key, canonical_value) =
3361                canonicalize_sql_ttl_metadata(metadata_key, raw_value);
3362            metadata.push((canonical_key.to_string(), canonical_value));
3363            continue;
3364        }
3365        fields.push((
3366            column.clone(),
3367            runtime.resolve_crypto_sentinel(value.clone())?,
3368        ));
3369    }
3370
3371    Ok((fields, metadata))
3372}
3373
3374/// Merge structured WITH TTL, WITH EXPIRES AT, and WITH METADATA clauses into metadata entries.
3375fn merge_with_clauses(
3376    metadata: &mut Vec<(String, MetadataValue)>,
3377    ttl_ms: Option<u64>,
3378    expires_at_ms: Option<u64>,
3379    with_metadata: &[(String, Value)],
3380) {
3381    if let Some(ms) = ttl_ms {
3382        metadata.push((
3383            "_ttl_ms".to_string(),
3384            if ms <= i64::MAX as u64 {
3385                MetadataValue::Int(ms as i64)
3386            } else {
3387                MetadataValue::Timestamp(ms)
3388            },
3389        ));
3390    }
3391    if let Some(ms) = expires_at_ms {
3392        metadata.push(("_expires_at".to_string(), MetadataValue::Timestamp(ms)));
3393    }
3394    for (key, value) in with_metadata {
3395        let meta_value = match value {
3396            Value::Text(s) => MetadataValue::String(s.to_string()),
3397            Value::Integer(n) => MetadataValue::Int(*n),
3398            Value::Float(n) => MetadataValue::Float(*n),
3399            Value::Boolean(b) => MetadataValue::Bool(*b),
3400            _ => MetadataValue::String(value.to_string()),
3401        };
3402        metadata.push((key.clone(), meta_value));
3403    }
3404}
3405
3406fn merge_vector_metadata_column(
3407    metadata: &mut Vec<(String, MetadataValue)>,
3408    columns: &[String],
3409    values: &[Value],
3410) -> RedDBResult<()> {
3411    let Some(value) = columns
3412        .iter()
3413        .position(|column| column.eq_ignore_ascii_case("metadata"))
3414        .map(|index| &values[index])
3415    else {
3416        return Ok(());
3417    };
3418    let json = match value {
3419        Value::Null => return Ok(()),
3420        Value::Json(bytes) => crate::json::from_slice(bytes).map_err(|err| {
3421            RedDBError::Query(format!("column 'metadata' invalid JSON object: {err}"))
3422        })?,
3423        Value::Text(text) => crate::json::from_str(text).map_err(|err| {
3424            RedDBError::Query(format!("column 'metadata' invalid JSON object: {err}"))
3425        })?,
3426        other => {
3427            return Err(RedDBError::Query(format!(
3428                "column 'metadata' expected JSON object, got {other:?}"
3429            )))
3430        }
3431    };
3432    let parsed = metadata_from_json(&json)?;
3433    for (key, value) in parsed.iter() {
3434        metadata.push((key.clone(), value.clone()));
3435    }
3436    Ok(())
3437}
3438
3439fn apply_collection_default_ttl_metadata(
3440    runtime: &RedDBRuntime,
3441    collection: &str,
3442    metadata: &mut Vec<(String, MetadataValue)>,
3443) {
3444    if has_internal_ttl_metadata(metadata) {
3445        return;
3446    }
3447
3448    let Some(default_ttl_ms) = runtime.db().collection_default_ttl_ms(collection) else {
3449        return;
3450    };
3451
3452    metadata.push((
3453        "_ttl_ms".to_string(),
3454        if default_ttl_ms <= i64::MAX as u64 {
3455            MetadataValue::Int(default_ttl_ms as i64)
3456        } else {
3457            MetadataValue::Timestamp(default_ttl_ms)
3458        },
3459    ));
3460}
3461
3462fn ensure_non_tree_reserved_metadata_entries(
3463    metadata: &[(String, MetadataValue)],
3464) -> RedDBResult<()> {
3465    for (key, _) in metadata {
3466        ensure_non_tree_reserved_metadata_key(key)?;
3467    }
3468    Ok(())
3469}
3470
3471fn ensure_non_tree_reserved_metadata_key(key: &str) -> RedDBResult<()> {
3472    if key.starts_with(TREE_METADATA_PREFIX) {
3473        return Err(RedDBError::Query(format!(
3474            "metadata key '{}' is reserved for managed trees",
3475            key
3476        )));
3477    }
3478    Ok(())
3479}
3480
3481fn ensure_non_tree_structural_edge_label(label: &str) -> RedDBResult<()> {
3482    if label.eq_ignore_ascii_case(TREE_CHILD_EDGE_LABEL) {
3483        return Err(RedDBError::Query(format!(
3484            "edge label '{}' is reserved for managed trees",
3485            TREE_CHILD_EDGE_LABEL
3486        )));
3487    }
3488    Ok(())
3489}
3490
3491fn pairwise_columns_values(pairs: &[(String, Value)]) -> (Vec<String>, Vec<Value>) {
3492    let mut columns = Vec::with_capacity(pairs.len());
3493    let mut values = Vec::with_capacity(pairs.len());
3494
3495    for (column, value) in pairs {
3496        columns.push(column.clone());
3497        values.push(value.clone());
3498    }
3499
3500    (columns, values)
3501}
3502
3503/// Find a required column value and return it as-is.
3504fn find_column_value(columns: &[String], values: &[Value], name: &str) -> RedDBResult<Value> {
3505    for (i, col) in columns.iter().enumerate() {
3506        if col.eq_ignore_ascii_case(name) {
3507            return Ok(values[i].clone());
3508        }
3509    }
3510    Err(RedDBError::Query(format!(
3511        "required column '{name}' not found in INSERT"
3512    )))
3513}
3514
3515/// Find a required column value and coerce to String.
3516fn find_column_value_string(
3517    columns: &[String],
3518    values: &[Value],
3519    name: &str,
3520) -> RedDBResult<String> {
3521    let val = find_column_value(columns, values, name)?;
3522    match val {
3523        Value::Text(s) => Ok(s.to_string()),
3524        Value::Integer(n) => Ok(n.to_string()),
3525        Value::Float(n) => Ok(n.to_string()),
3526        other => Err(RedDBError::Query(format!(
3527            "column '{name}' expected text, got {other:?}"
3528        ))),
3529    }
3530}
3531
3532fn find_document_body_json(
3533    columns: &[String],
3534    values: &[Value],
3535) -> RedDBResult<crate::json::Value> {
3536    let val = find_column_value(columns, values, "body")?;
3537    match val {
3538        Value::Json(bytes) | Value::Blob(bytes) => crate::json::from_slice(&bytes)
3539            .map_err(|err| RedDBError::Query(format!("invalid JSON body: {err}"))),
3540        Value::Text(text) => crate::json::from_str(text.as_ref())
3541            .map_err(|err| RedDBError::Query(format!("invalid JSON body: {err}"))),
3542        Value::Integer(value) => crate::json::from_str(&value.to_string())
3543            .map_err(|err| RedDBError::Query(format!("invalid JSON body: {err}"))),
3544        Value::UnsignedInteger(value) => crate::json::from_str(&value.to_string())
3545            .map_err(|err| RedDBError::Query(format!("invalid JSON body: {err}"))),
3546        Value::Float(value) => crate::json::from_str(&value.to_string())
3547            .map_err(|err| RedDBError::Query(format!("invalid JSON body: {err}"))),
3548        other => Err(RedDBError::Query(format!(
3549            "column 'body' expected JSON body, got {other:?}"
3550        ))),
3551    }
3552}
3553
3554fn find_column_value_f64(columns: &[String], values: &[Value], name: &str) -> RedDBResult<f64> {
3555    let val = find_column_value(columns, values, name)?;
3556    match val {
3557        Value::Float(n) => Ok(n),
3558        Value::Integer(n) => Ok(n as f64),
3559        Value::UnsignedInteger(n) => Ok(n as f64),
3560        Value::Text(s) => s
3561            .parse::<f64>()
3562            .map_err(|_| RedDBError::Query(format!("column '{name}' expected number, got '{s}'"))),
3563        other => Err(RedDBError::Query(format!(
3564            "column '{name}' expected number, got {other:?}"
3565        ))),
3566    }
3567}
3568
3569/// Find an optional column value as String.
3570fn find_column_value_opt_string(
3571    columns: &[String],
3572    values: &[Value],
3573    name: &str,
3574) -> Option<String> {
3575    for (i, col) in columns.iter().enumerate() {
3576        if col.eq_ignore_ascii_case(name) {
3577            return match &values[i] {
3578                Value::Null => None,
3579                Value::Text(s) => Some(s.to_string()),
3580                Value::Integer(n) => Some(n.to_string()),
3581                Value::Float(n) => Some(n.to_string()),
3582                _ => None,
3583            };
3584        }
3585    }
3586    None
3587}
3588
3589/// Resolve an EDGE endpoint (`from`/`to`) to a numeric entity id.
3590///
3591/// Accepts integer literals, decimal strings, and node labels resolved via
3592/// the per-collection graph label index (same source of truth that
3593/// `GRAPH NEIGHBORHOOD` / `GRAPH TRAVERSE` use at query time). Ambiguous
3594/// labels error so callers can fall back to the numeric id form.
3595fn resolve_edge_endpoint(
3596    store: &crate::storage::unified::UnifiedStore,
3597    collection: &str,
3598    columns: &[String],
3599    values: &[Value],
3600    name: &str,
3601) -> RedDBResult<u64> {
3602    let val = find_column_value(columns, values, name)?;
3603    match val {
3604        Value::Integer(n) => Ok(n as u64),
3605        Value::UnsignedInteger(n) => Ok(n),
3606        Value::Text(s) => {
3607            if let Ok(n) = s.parse::<u64>() {
3608                return Ok(n);
3609            }
3610            let matches = store.lookup_graph_nodes_by_label_in(collection, &s);
3611            match matches.len() {
3612                0 => Err(RedDBError::Query(format!(
3613                    "column '{name}': no graph node with label '{s}' in collection '{collection}'"
3614                ))),
3615                1 => Ok(matches[0].raw()),
3616                n => Err(RedDBError::Query(format!(
3617                    "column '{name}': ambiguous label '{s}' matches {n} nodes in collection '{collection}'; use the numeric id"
3618                ))),
3619            }
3620        }
3621        other => Err(RedDBError::Query(format!(
3622            "column '{name}' expected integer or node label, got {other:?}"
3623        ))),
3624    }
3625}
3626
3627fn resolve_edge_endpoint_any(
3628    store: &crate::storage::unified::UnifiedStore,
3629    collection: &str,
3630    columns: &[String],
3631    values: &[Value],
3632    names: &[&str],
3633) -> RedDBResult<u64> {
3634    for name in names {
3635        if columns
3636            .iter()
3637            .any(|column| column.eq_ignore_ascii_case(name))
3638        {
3639            return resolve_edge_endpoint(store, collection, columns, values, name);
3640        }
3641    }
3642
3643    Err(RedDBError::Query(format!(
3644        "required column '{}' not found in INSERT",
3645        names.first().copied().unwrap_or("from_rid")
3646    )))
3647}
3648
3649/// Find a required column value and coerce to u64.
3650fn find_column_value_u64(columns: &[String], values: &[Value], name: &str) -> RedDBResult<u64> {
3651    let val = find_column_value(columns, values, name)?;
3652    match val {
3653        Value::Integer(n) => Ok(n as u64),
3654        Value::UnsignedInteger(n) => Ok(n),
3655        Value::Text(s) => s
3656            .parse::<u64>()
3657            .map_err(|_| RedDBError::Query(format!("column '{name}' expected integer, got '{s}'"))),
3658        other => Err(RedDBError::Query(format!(
3659            "column '{name}' expected integer, got {other:?}"
3660        ))),
3661    }
3662}
3663
3664/// Find an optional column value as f32.
3665fn find_column_value_f32_opt(columns: &[String], values: &[Value], name: &str) -> Option<f32> {
3666    for (i, col) in columns.iter().enumerate() {
3667        if col.eq_ignore_ascii_case(name) {
3668            return match &values[i] {
3669                Value::Float(n) => Some(*n as f32),
3670                Value::Integer(n) => Some(*n as f32),
3671                Value::Null => None,
3672                _ => None,
3673            };
3674        }
3675    }
3676    None
3677}
3678
3679/// Find a required column value and coerce to Vec<f32> (from Value::Vector).
3680fn find_column_value_vec_f32(
3681    columns: &[String],
3682    values: &[Value],
3683    name: &str,
3684) -> RedDBResult<Vec<f32>> {
3685    let val = find_column_value(columns, values, name)?;
3686    match val {
3687        Value::Vector(v) => Ok(v),
3688        Value::Json(bytes) => {
3689            // Try to parse as JSON array of numbers
3690            let s = std::str::from_utf8(&bytes).map_err(|_| {
3691                RedDBError::Query(format!("column '{name}' contains invalid UTF-8"))
3692            })?;
3693            let arr: Vec<f32> = crate::json::from_str(s).map_err(|e| {
3694                RedDBError::Query(format!("column '{name}' invalid vector JSON: {e}"))
3695            })?;
3696            Ok(arr)
3697        }
3698        other => Err(RedDBError::Query(format!(
3699            "column '{name}' expected vector, got {other:?}"
3700        ))),
3701    }
3702}
3703
3704fn find_column_value_vec_f32_any(
3705    columns: &[String],
3706    values: &[Value],
3707    names: &[&str],
3708) -> RedDBResult<Vec<f32>> {
3709    for name in names {
3710        if columns
3711            .iter()
3712            .any(|column| column.eq_ignore_ascii_case(name))
3713        {
3714            return find_column_value_vec_f32(columns, values, name);
3715        }
3716    }
3717    Err(RedDBError::Query(format!(
3718        "required vector column '{}' not found in INSERT",
3719        names.join("' or '")
3720    )))
3721}
3722
3723/// Extract remaining properties (all columns not in the exclusion list).
3724fn extract_remaining_properties(
3725    columns: &[String],
3726    values: &[Value],
3727    exclude: &[&str],
3728) -> Vec<(String, Value)> {
3729    columns
3730        .iter()
3731        .zip(values.iter())
3732        .filter(|(col, _)| !exclude.iter().any(|e| col.eq_ignore_ascii_case(e)))
3733        .map(|(col, val)| (col.clone(), val.clone()))
3734        .collect()
3735}
3736
3737fn validate_timeseries_insert_columns(columns: &[String]) -> RedDBResult<()> {
3738    let mut invalid = Vec::new();
3739    for column in columns {
3740        if !is_timeseries_insert_column(column) && resolve_sql_ttl_metadata_key(column).is_none() {
3741            invalid.push(column.clone());
3742        }
3743    }
3744
3745    if invalid.is_empty() {
3746        Ok(())
3747    } else {
3748        Err(RedDBError::Query(format!(
3749            "timeseries INSERT only accepts metric, value, tags, timestamp, timestamp_ns, or time columns; got {}",
3750            invalid.join(", ")
3751        )))
3752    }
3753}
3754
3755fn is_timeseries_insert_column(column: &str) -> bool {
3756    matches!(
3757        column.to_ascii_lowercase().as_str(),
3758        "metric"
3759            | "value"
3760            | "tags"
3761            | "timestamp"
3762            | "timestamp_ns"
3763            | "time"
3764            // Analytics-event extension (#577): an analytics row carries
3765            // an `event_name` + JSON `payload`. The payload is validated
3766            // against the AnalyticsSchemaRegistry inside
3767            // `insert_timeseries_point` before the row lands.
3768            | "event_name"
3769            | "payload"
3770    )
3771}
3772
3773fn find_timeseries_timestamp_ns(columns: &[String], values: &[Value]) -> RedDBResult<Option<u64>> {
3774    let mut found = None;
3775
3776    for alias in ["timestamp_ns", "timestamp", "time"] {
3777        for (index, column) in columns.iter().enumerate() {
3778            if !column.eq_ignore_ascii_case(alias) {
3779                continue;
3780            }
3781
3782            if found.is_some() {
3783                return Err(RedDBError::Query(
3784                    "timeseries INSERT accepts only one timestamp column".to_string(),
3785                ));
3786            }
3787
3788            found = Some(coerce_value_to_non_negative_u64(&values[index], alias)?);
3789        }
3790    }
3791
3792    Ok(found)
3793}
3794
3795fn find_timeseries_tags(
3796    columns: &[String],
3797    values: &[Value],
3798) -> RedDBResult<std::collections::HashMap<String, String>> {
3799    for (index, column) in columns.iter().enumerate() {
3800        if column.eq_ignore_ascii_case("tags") {
3801            return parse_timeseries_tags(&values[index]);
3802        }
3803    }
3804    Ok(std::collections::HashMap::new())
3805}
3806
3807fn parse_timeseries_tags(value: &Value) -> RedDBResult<std::collections::HashMap<String, String>> {
3808    match value {
3809        Value::Null => Ok(std::collections::HashMap::new()),
3810        Value::Json(bytes) => parse_timeseries_tags_json(bytes),
3811        Value::Text(text) => parse_timeseries_tags_json(text.as_bytes()),
3812        other => Err(RedDBError::Query(format!(
3813            "timeseries tags must be a JSON object or JSON text, got {other:?}"
3814        ))),
3815    }
3816}
3817
3818fn parse_timeseries_tags_json(
3819    bytes: &[u8],
3820) -> RedDBResult<std::collections::HashMap<String, String>> {
3821    let json: crate::json::Value = crate::json::from_slice(bytes)
3822        .map_err(|err| RedDBError::Query(format!("timeseries tags must be valid JSON: {err}")))?;
3823
3824    let object = match json {
3825        crate::json::Value::Object(object) => object,
3826        other => {
3827            return Err(RedDBError::Query(format!(
3828                "timeseries tags must be a JSON object, got {other:?}"
3829            )))
3830        }
3831    };
3832
3833    let mut tags = std::collections::HashMap::with_capacity(object.len());
3834    for (key, value) in object {
3835        tags.insert(key, json_tag_value_to_string(&value));
3836    }
3837    Ok(tags)
3838}
3839
3840/// Encode a tag value for storage so the original JSON type can be
3841/// recovered on read (issue #543).
3842///
3843/// Time-series tags are stored as `HashMap<String, String>` on the
3844/// physical record (see [`crate::storage::TimeSeriesData`]) so that
3845/// the segment codec, WAL and gRPC mirrors don't need a new value
3846/// variant. To preserve the original JSON type across that
3847/// string-only channel we prepend the
3848/// [`crate::runtime::query_exec::TIMESERIES_TAG_JSON_PREFIX`] marker
3849/// and serialize the value as compact JSON text. The read paths
3850/// (`timeseries_tags_json_value` / `timeseries_tags_value`) detect
3851/// the marker, parse the suffix, and recover a real JSON value.
3852/// Tags written through other channels (Prometheus remote write,
3853/// metrics handlers, legacy on-disk data) lack the marker and are
3854/// returned as `JsonValue::String(raw)` exactly as before.
3855fn json_tag_value_to_string(value: &crate::json::Value) -> String {
3856    let mut buf = String::with_capacity(value.to_string_compact().len() + 1);
3857    buf.push(crate::runtime::query_exec::TIMESERIES_TAG_JSON_PREFIX);
3858    buf.push_str(&value.to_string_compact());
3859    buf
3860}
3861
3862fn coerce_value_to_non_negative_u64(value: &Value, column: &str) -> RedDBResult<u64> {
3863    match value {
3864        Value::UnsignedInteger(value) => Ok(*value),
3865        Value::Integer(value) if *value >= 0 => Ok(*value as u64),
3866        Value::Float(value) if *value >= 0.0 => Ok(*value as u64),
3867        Value::Text(value) => value.parse::<u64>().map_err(|_| {
3868            RedDBError::Query(format!(
3869                "column '{column}' expected a non-negative integer timestamp, got '{value}'"
3870            ))
3871        }),
3872        other => Err(RedDBError::Query(format!(
3873            "column '{column}' expected a non-negative integer timestamp, got {other:?}"
3874        ))),
3875    }
3876}
3877
3878fn current_unix_ns() -> u64 {
3879    std::time::SystemTime::now()
3880        .duration_since(std::time::UNIX_EPOCH)
3881        .unwrap_or_default()
3882        .as_nanos()
3883        .min(u128::from(u64::MAX)) as u64
3884}
3885
3886fn metadata_value_to_json(value: &MetadataValue) -> crate::json::Value {
3887    use crate::json::{Map, Value as JV};
3888    match value {
3889        MetadataValue::Null => JV::Null,
3890        MetadataValue::Bool(value) => JV::Bool(*value),
3891        MetadataValue::Int(value) => JV::Number(*value as f64),
3892        MetadataValue::Float(value) => JV::Number(*value),
3893        MetadataValue::String(value) => JV::String(value.clone()),
3894        MetadataValue::Bytes(value) => JV::Array(
3895            value
3896                .iter()
3897                .map(|value| JV::Number(*value as f64))
3898                .collect(),
3899        ),
3900        MetadataValue::Timestamp(value) => JV::Number(*value as f64),
3901        MetadataValue::Array(values) => {
3902            JV::Array(values.iter().map(metadata_value_to_json).collect())
3903        }
3904        MetadataValue::Object(object) => {
3905            let entries = object
3906                .iter()
3907                .map(|(key, value)| (key.clone(), metadata_value_to_json(value)))
3908                .collect();
3909            JV::Object(entries)
3910        }
3911        MetadataValue::Geo { lat, lon } => {
3912            let mut object = Map::new();
3913            object.insert("lat".to_string(), JV::Number(*lat));
3914            object.insert("lon".to_string(), JV::Number(*lon));
3915            JV::Object(object)
3916        }
3917        MetadataValue::Reference(target) => {
3918            let mut object = Map::new();
3919            object.insert(
3920                "collection".to_string(),
3921                JV::String(target.collection().to_string()),
3922            );
3923            object.insert(
3924                "entity_id".to_string(),
3925                JV::Number(target.entity_id().raw() as f64),
3926            );
3927            JV::Object(object)
3928        }
3929        MetadataValue::References(values) => {
3930            let refs = values
3931                .iter()
3932                .map(|target| {
3933                    let mut object = Map::new();
3934                    object.insert(
3935                        "collection".to_string(),
3936                        JV::String(target.collection().to_string()),
3937                    );
3938                    object.insert(
3939                        "entity_id".to_string(),
3940                        JV::Number(target.entity_id().raw() as f64),
3941                    );
3942                    JV::Object(object)
3943                })
3944                .collect();
3945            JV::Array(refs)
3946        }
3947    }
3948}
3949
3950fn storage_value_to_metadata_value(value: &Value) -> MetadataValue {
3951    match value {
3952        Value::Null => MetadataValue::Null,
3953        Value::Boolean(value) => MetadataValue::Bool(*value),
3954        Value::Integer(value) => MetadataValue::Int(*value),
3955        Value::UnsignedInteger(value) => metadata_u64_to_value(*value),
3956        Value::Float(value) => MetadataValue::Float(*value),
3957        Value::Text(value) => MetadataValue::String(value.to_string()),
3958        Value::Blob(value) => MetadataValue::Bytes(value.clone()),
3959        Value::Timestamp(value) => {
3960            if *value >= 0 {
3961                metadata_u64_to_value(*value as u64)
3962            } else {
3963                MetadataValue::Int(*value)
3964            }
3965        }
3966        Value::TimestampMs(value) => {
3967            if *value >= 0 {
3968                metadata_u64_to_value(*value as u64)
3969            } else {
3970                MetadataValue::Int(*value)
3971            }
3972        }
3973        Value::Json(value) => MetadataValue::String(String::from_utf8_lossy(value).into_owned()),
3974        Value::Uuid(value) => MetadataValue::String(format!("{value:?}")),
3975        Value::Date(value) => MetadataValue::String(value.to_string()),
3976        Value::Time(value) => MetadataValue::String(value.to_string()),
3977        Value::Decimal(value) => MetadataValue::String(value.to_string()),
3978        Value::Ipv4(value) => MetadataValue::String(format!(
3979            "{}.{}.{}.{}",
3980            (value >> 24) & 0xFF,
3981            (value >> 16) & 0xFF,
3982            (value >> 8) & 0xFF,
3983            value & 0xFF
3984        )),
3985        Value::Port(value) => MetadataValue::Int(i64::from(*value)),
3986        Value::Latitude(value) => MetadataValue::Float(*value as f64 / 1_000_000.0),
3987        Value::Longitude(value) => MetadataValue::Float(*value as f64 / 1_000_000.0),
3988        Value::GeoPoint(lat, lon) => MetadataValue::Geo {
3989            lat: *lat as f64 / 1_000_000.0,
3990            lon: *lon as f64 / 1_000_000.0,
3991        },
3992        Value::BigInt(value) => MetadataValue::String(value.to_string()),
3993        Value::TableRef(value) => MetadataValue::String(value.clone()),
3994        Value::PageRef(value) => MetadataValue::Int(*value as i64),
3995        Value::Password(value) => MetadataValue::String(value.clone()),
3996        Value::Array(values) => {
3997            MetadataValue::Array(values.iter().map(storage_value_to_metadata_value).collect())
3998        }
3999        _ => MetadataValue::String(value.to_string()),
4000    }
4001}
4002
4003fn sql_literal_to_metadata_value(field: &str, value: &Value) -> RedDBResult<MetadataValue> {
4004    match value {
4005        Value::Null => Ok(MetadataValue::Null),
4006        Value::Integer(value) if *value >= 0 => Ok(metadata_u64_to_value(*value as u64)),
4007        Value::Integer(_) => Err(RedDBError::Query(format!(
4008            "column '{field}' must be non-negative for TTL metadata"
4009        ))),
4010        Value::UnsignedInteger(value) => Ok(metadata_u64_to_value(*value)),
4011        Value::Float(value) if value.is_finite() => {
4012            if value.fract().abs() >= f64::EPSILON {
4013                return Err(RedDBError::Query(format!(
4014                    "column '{field}' must be an integer (TTL metadata must be an integer)"
4015                )));
4016            }
4017            if *value < 0.0 {
4018                return Err(RedDBError::Query(format!(
4019                    "column '{field}' must be non-negative for TTL metadata"
4020                )));
4021            }
4022            if *value > u64::MAX as f64 {
4023                return Err(RedDBError::Query(format!(
4024                    "column '{field}' value is too large"
4025                )));
4026            }
4027            Ok(metadata_u64_to_value(*value as u64))
4028        }
4029        Value::Float(_) => Err(RedDBError::Query(format!(
4030            "column '{field}' must be a finite number"
4031        ))),
4032        Value::Text(value) => {
4033            let value = value.trim();
4034            if let Ok(value) = value.parse::<u64>() {
4035                Ok(metadata_u64_to_value(value))
4036            } else if let Ok(value) = value.parse::<i64>() {
4037                if value < 0 {
4038                    return Err(RedDBError::Query(format!(
4039                        "column '{field}' must be non-negative for TTL metadata"
4040                    )));
4041                }
4042                Ok(metadata_u64_to_value(value as u64))
4043            } else if let Ok(value) = value.parse::<f64>() {
4044                if !value.is_finite() {
4045                    return Err(RedDBError::Query(format!(
4046                        "column '{field}' must be a finite number"
4047                    )));
4048                }
4049                if value.fract().abs() >= f64::EPSILON {
4050                    return Err(RedDBError::Query(format!(
4051                        "column '{field}' must be an integer (TTL metadata must be an integer)"
4052                    )));
4053                }
4054                if value < 0.0 {
4055                    return Err(RedDBError::Query(format!(
4056                        "column '{field}' must be non-negative for TTL metadata"
4057                    )));
4058                }
4059                if value > u64::MAX as f64 {
4060                    return Err(RedDBError::Query(format!(
4061                        "column '{field}' value is too large"
4062                    )));
4063                }
4064                Ok(metadata_u64_to_value(value as u64))
4065            } else {
4066                Err(RedDBError::Query(format!(
4067                    "column '{field}' expects a numeric value for TTL metadata"
4068                )))
4069            }
4070        }
4071        _ => Err(RedDBError::Query(format!(
4072            "column '{field}' expects a numeric value for TTL metadata"
4073        ))),
4074    }
4075}
4076
4077fn metadata_u64_to_value(value: u64) -> MetadataValue {
4078    if value <= i64::MAX as u64 {
4079        MetadataValue::Int(value as i64)
4080    } else {
4081        MetadataValue::Timestamp(value)
4082    }
4083}
4084
4085/// Phase 2 PG parity: inspect a column value and return `true` when
4086/// the dotted `tail` path is already present under it. Used by the
4087/// tenant auto-fill so rows that already carry an explicit value
4088/// (bulk import, admin insert on behalf of a tenant) are not
4089/// double-stamped with the session's current_tenant().
4090fn dotted_tail_already_set(value: &Value, tail: &str) -> bool {
4091    let json = match value {
4092        Value::Null => return false,
4093        Value::Json(bytes) | Value::Blob(bytes) => {
4094            match crate::json::from_slice::<crate::json::Value>(bytes) {
4095                Ok(v) => v,
4096                Err(_) => return false,
4097            }
4098        }
4099        Value::Text(s) => {
4100            let trimmed = s.trim_start();
4101            if !(trimmed.starts_with('{') || trimmed.starts_with('[')) {
4102                return false;
4103            }
4104            match crate::json::from_str::<crate::json::Value>(s) {
4105                Ok(v) => v,
4106                Err(_) => return false,
4107            }
4108        }
4109        _ => return false,
4110    };
4111    let mut cursor = &json;
4112    for seg in tail.split('.') {
4113        match cursor {
4114            crate::json::Value::Object(map) => match map.iter().find(|(k, _)| *k == seg) {
4115                Some((_, v)) => cursor = v,
4116                None => return false,
4117            },
4118            _ => return false,
4119        }
4120    }
4121    !matches!(cursor, crate::json::Value::Null)
4122}
4123
4124/// Phase 2 PG parity: take a column value (possibly Null / Text /
4125/// Json) and return a `Value::Json` with the dotted `tail` path set
4126/// to `tenant_id`. Preserves every pre-existing key.
4127///
4128/// Accepts:
4129/// * `Value::Null`  → fresh `{tail: tenant_id}` object
4130/// * `Value::Json(bytes)` → parse, navigate / create path, re-serialize
4131/// * `Value::text(s)` if `s` is valid JSON → same as Json
4132/// * anything else → error (user supplied a scalar where we need
4133///   a JSON container)
4134fn merge_dotted_tenant(current: Value, tail: &str, tenant_id: &str) -> RedDBResult<Value> {
4135    let mut root = match current {
4136        Value::Null => crate::json::Value::Object(Default::default()),
4137        Value::Json(bytes) | Value::Blob(bytes) => {
4138            crate::json::from_slice(&bytes).map_err(|err| {
4139                RedDBError::Query(format!(
4140                    "tenant auto-fill: root column is not valid JSON ({err})"
4141                ))
4142            })?
4143        }
4144        Value::Text(s) => {
4145            if s.trim().is_empty() {
4146                crate::json::Value::Object(Default::default())
4147            } else {
4148                crate::json::from_str::<crate::json::Value>(&s).map_err(|err| {
4149                    RedDBError::Query(format!(
4150                        "tenant auto-fill: text root is not valid JSON ({err})"
4151                    ))
4152                })?
4153            }
4154        }
4155        other => {
4156            return Err(RedDBError::Query(format!(
4157                "tenant auto-fill: root column must be JSON / NULL, got {other:?}"
4158            )));
4159        }
4160    };
4161
4162    // Navigate path segments, creating intermediate objects on demand.
4163    let segments: Vec<&str> = tail.split('.').collect();
4164    let mut cursor: &mut crate::json::Value = &mut root;
4165    for (i, seg) in segments.iter().enumerate() {
4166        let is_last = i + 1 == segments.len();
4167        let map = match cursor {
4168            crate::json::Value::Object(m) => m,
4169            _ => {
4170                return Err(RedDBError::Query(format!(
4171                    "tenant auto-fill: segment '{seg}' is not inside an object"
4172                )));
4173            }
4174        };
4175        if is_last {
4176            map.insert(
4177                seg.to_string(),
4178                crate::json::Value::String(tenant_id.to_string()),
4179            );
4180            break;
4181        }
4182        cursor = map
4183            .entry(seg.to_string())
4184            .or_insert_with(|| crate::json::Value::Object(Default::default()));
4185    }
4186
4187    let bytes = crate::json::to_vec(&root).map_err(|err| {
4188        RedDBError::Query(format!(
4189            "tenant auto-fill: failed to re-serialize JSON ({err})"
4190        ))
4191    })?;
4192    Ok(Value::Json(bytes))
4193}
4194
4195#[cfg(test)]
4196mod tests {
4197    use crate::storage::schema::Value;
4198    use crate::storage::wal::{WalReader, WalRecord};
4199    use crate::{RedDBOptions, RedDBRuntime};
4200    use std::path::Path;
4201
4202    fn store_commit_batches(wal_path: &Path) -> Vec<Vec<Vec<u8>>> {
4203        WalReader::open(wal_path)
4204            .expect("wal opens")
4205            .iter()
4206            .map(|record| record.expect("wal record decodes").1)
4207            .filter_map(|record| match record {
4208                WalRecord::TxCommitBatch { actions, .. } => Some(actions),
4209                _ => None,
4210            })
4211            .collect()
4212    }
4213
4214    fn action_contains_text(action: &[u8], needle: &str) -> bool {
4215        action
4216            .windows(needle.len())
4217            .any(|window| window == needle.as_bytes())
4218    }
4219
4220    fn assert_statement_writes_collections_in_one_new_wal_batch(
4221        rt: &RedDBRuntime,
4222        wal_path: &Path,
4223        statement: &str,
4224        source: &str,
4225        event_queue: &str,
4226    ) {
4227        let before_batches = store_commit_batches(wal_path).len();
4228
4229        rt.execute_query(statement).unwrap();
4230
4231        let batches = store_commit_batches(wal_path);
4232        let statement_batches = &batches[before_batches..];
4233        let source_batch = statement_batches
4234            .iter()
4235            .position(|actions| {
4236                actions.iter().any(|action| {
4237                    action_contains_text(action, source)
4238                        && !action_contains_text(action, event_queue)
4239                })
4240            })
4241            .expect("source collection write batch is present");
4242        let event_batch = statement_batches
4243            .iter()
4244            .position(|actions| {
4245                actions
4246                    .iter()
4247                    .any(|action| action_contains_text(action, event_queue))
4248            })
4249            .expect("event queue write batch is present");
4250
4251        assert_eq!(
4252            source_batch, event_batch,
4253            "WITH EVENTS must persist the source write and queue event in the same WAL batch"
4254        );
4255    }
4256
4257    #[test]
4258    fn with_events_autocommit_persists_mutation_and_event_in_one_wal_batch() {
4259        let dir = tempfile::tempdir().unwrap();
4260        let db_path = dir.path().join("events_dual_write.rdb");
4261        let wal_path = reddb_file::layout::unified_wal_path(&db_path);
4262        let rt = RedDBRuntime::with_options(RedDBOptions::persistent(&db_path)).unwrap();
4263
4264        rt.execute_query("CREATE TABLE users (id INT, email TEXT) WITH EVENTS")
4265            .unwrap();
4266        assert_statement_writes_collections_in_one_new_wal_batch(
4267            &rt,
4268            &wal_path,
4269            "INSERT INTO users (id, email) VALUES (1, 'a@example.test')",
4270            "users",
4271            "users_events",
4272        );
4273    }
4274
4275    #[test]
4276    fn with_events_autocommit_update_persists_mutation_and_event_in_one_wal_batch() {
4277        let dir = tempfile::tempdir().unwrap();
4278        let db_path = dir.path().join("events_update_atomic.rdb");
4279        let wal_path = reddb_file::layout::unified_wal_path(&db_path);
4280        let rt = RedDBRuntime::with_options(RedDBOptions::persistent(&db_path)).unwrap();
4281
4282        rt.execute_query(
4283            "CREATE TABLE users (id INT, email TEXT) WITH EVENTS (UPDATE) TO user_updates",
4284        )
4285        .unwrap();
4286        rt.execute_query("INSERT INTO users (id, email) VALUES (1, 'a@example.test')")
4287            .unwrap();
4288
4289        assert_statement_writes_collections_in_one_new_wal_batch(
4290            &rt,
4291            &wal_path,
4292            "UPDATE users SET email = 'b@example.test' WHERE id = 1",
4293            "users",
4294            "user_updates",
4295        );
4296    }
4297
4298    #[test]
4299    fn with_events_autocommit_delete_persists_mutation_and_event_in_one_wal_batch() {
4300        let dir = tempfile::tempdir().unwrap();
4301        let db_path = dir.path().join("events_delete_atomic.rdb");
4302        let wal_path = reddb_file::layout::unified_wal_path(&db_path);
4303        let rt = RedDBRuntime::with_options(RedDBOptions::persistent(&db_path)).unwrap();
4304
4305        rt.execute_query(
4306            "CREATE TABLE users (id INT, email TEXT) WITH EVENTS (DELETE) TO user_deletes",
4307        )
4308        .unwrap();
4309        rt.execute_query("INSERT INTO users (id, email) VALUES (1, 'a@example.test')")
4310            .unwrap();
4311
4312        assert_statement_writes_collections_in_one_new_wal_batch(
4313            &rt,
4314            &wal_path,
4315            "DELETE FROM users WHERE id = 1",
4316            "users",
4317            "user_deletes",
4318        );
4319    }
4320
4321    #[test]
4322    fn update_where_id_in_with_hash_index_updates_expected_rows() {
4323        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4324        rt.execute_query("CREATE TABLE users (id INT, score INT)")
4325            .unwrap();
4326        for id in 0..5 {
4327            rt.execute_query(&format!("INSERT INTO users (id, score) VALUES ({id}, 0)"))
4328                .unwrap();
4329        }
4330        rt.execute_query("CREATE INDEX idx_id ON users (id) USING HASH")
4331            .unwrap();
4332
4333        let updated = rt
4334            .execute_query("UPDATE users SET score = 42 WHERE id IN (1,3,4)")
4335            .unwrap();
4336        assert_eq!(updated.affected_rows, 3);
4337
4338        let selected = rt
4339            .execute_query("SELECT id, score FROM users ORDER BY id")
4340            .unwrap();
4341        let scores: Vec<(i64, i64)> = selected
4342            .result
4343            .records
4344            .iter()
4345            .map(|record| {
4346                let id = match record.get("id").unwrap() {
4347                    Value::Integer(value) => *value,
4348                    other => panic!("expected integer id, got {other:?}"),
4349                };
4350                let score = match record.get("score").unwrap() {
4351                    Value::Integer(value) => *value,
4352                    other => panic!("expected integer score, got {other:?}"),
4353                };
4354                (id, score)
4355            })
4356            .collect();
4357        assert_eq!(scores, vec![(0, 0), (1, 42), (2, 0), (3, 42), (4, 42)]);
4358    }
4359
4360    /// Drives UPDATE through the shared `DmlTargetScan` module — the
4361    /// same code path DELETE uses (#51, #52). Exercises the indexed
4362    /// equality fast-path (WHERE id = N with a HASH index), the
4363    /// unindexed range scan (WHERE score > N), and the no-WHERE
4364    /// full-scan branch to confirm the extracted "find target rows"
4365    /// loop preserves affected-row counts and the resulting row state.
4366    #[test]
4367    fn update_routes_through_dml_target_scan_for_indexed_and_scan_paths() {
4368        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4369        rt.execute_query("CREATE TABLE items (id INT, score INT)")
4370            .unwrap();
4371        for id in 0..5 {
4372            rt.execute_query(&format!(
4373                "INSERT INTO items (id, score) VALUES ({id}, {})",
4374                id * 10
4375            ))
4376            .unwrap();
4377        }
4378        rt.execute_query("CREATE INDEX idx_items_id ON items (id) USING HASH")
4379            .unwrap();
4380
4381        // Indexed equality UPDATE — hits the hash fast-path inside
4382        // DmlTargetScan::find_target_ids. id=2 has score=20, drop it
4383        // below the score>25 cutoff so the next assertion stays clean.
4384        let updated_one = rt
4385            .execute_query("UPDATE items SET score = 5 WHERE id = 2")
4386            .unwrap();
4387        assert_eq!(updated_one.affected_rows, 1);
4388
4389        // Unindexed scan UPDATE — bumps everyone with score > 25,
4390        // i.e. ids 3 and 4 (scores 30, 40). Goes through the
4391        // zoned/full-scan branch.
4392        let updated_many = rt
4393            .execute_query("UPDATE items SET score = 7 WHERE score > 25")
4394            .unwrap();
4395        assert_eq!(updated_many.affected_rows, 2);
4396
4397        let snapshot = rt
4398            .execute_query("SELECT id, score FROM items ORDER BY id")
4399            .unwrap();
4400        let pairs: Vec<(i64, i64)> = snapshot
4401            .result
4402            .records
4403            .iter()
4404            .map(|record| {
4405                let id = match record.get("id").unwrap() {
4406                    Value::Integer(value) => *value,
4407                    other => panic!("expected integer id, got {other:?}"),
4408                };
4409                let score = match record.get("score").unwrap() {
4410                    Value::Integer(value) => *value,
4411                    other => panic!("expected integer score, got {other:?}"),
4412                };
4413                (id, score)
4414            })
4415            .collect();
4416        assert_eq!(pairs, vec![(0, 0), (1, 10), (2, 5), (3, 7), (4, 7)]);
4417
4418        // Full-scan UPDATE with no WHERE rewrites every remaining row.
4419        let updated_all = rt.execute_query("UPDATE items SET score = 1").unwrap();
4420        assert_eq!(updated_all.affected_rows, 5);
4421        let after = rt
4422            .execute_query("SELECT score FROM items ORDER BY id")
4423            .unwrap();
4424        let scores: Vec<i64> = after
4425            .result
4426            .records
4427            .iter()
4428            .map(|record| match record.get("score").unwrap() {
4429                Value::Integer(value) => *value,
4430                other => panic!("expected integer score, got {other:?}"),
4431            })
4432            .collect();
4433        assert_eq!(scores, vec![1, 1, 1, 1, 1]);
4434    }
4435
4436    /// Drives DELETE through the new `DmlTargetScan` module. Exercises
4437    /// both the index fast-path (WHERE id = N with a HASH index) and
4438    /// the unindexed scan path (WHERE score > N) to confirm the
4439    /// extracted "find target rows" loop preserves the affected-row
4440    /// count and which rows survive.
4441    #[test]
4442    fn delete_routes_through_dml_target_scan_for_indexed_and_scan_paths() {
4443        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4444        rt.execute_query("CREATE TABLE items (id INT, score INT)")
4445            .unwrap();
4446        for id in 0..5 {
4447            rt.execute_query(&format!(
4448                "INSERT INTO items (id, score) VALUES ({id}, {})",
4449                id * 10
4450            ))
4451            .unwrap();
4452        }
4453        rt.execute_query("CREATE INDEX idx_items_id ON items (id) USING HASH")
4454            .unwrap();
4455
4456        // Indexed equality DELETE — hits the hash fast-path inside
4457        // DmlTargetScan::find_target_ids.
4458        let deleted_one = rt.execute_query("DELETE FROM items WHERE id = 2").unwrap();
4459        assert_eq!(deleted_one.affected_rows, 1);
4460
4461        // Unindexed scan DELETE — drops everyone with score > 25,
4462        // i.e. ids 3 and 4 (scores 30, 40). Goes through the
4463        // zoned/full-scan branch.
4464        let deleted_many = rt
4465            .execute_query("DELETE FROM items WHERE score > 25")
4466            .unwrap();
4467        assert_eq!(deleted_many.affected_rows, 2);
4468
4469        let surviving = rt
4470            .execute_query("SELECT id FROM items ORDER BY id")
4471            .unwrap();
4472        let ids: Vec<i64> = surviving
4473            .result
4474            .records
4475            .iter()
4476            .map(|record| match record.get("id").unwrap() {
4477                Value::Integer(value) => *value,
4478                other => panic!("expected integer id, got {other:?}"),
4479            })
4480            .collect();
4481        assert_eq!(ids, vec![0, 1]);
4482
4483        // Sanity: full-scan DELETE with no WHERE clears the rest.
4484        let deleted_rest = rt.execute_query("DELETE FROM items").unwrap();
4485        assert_eq!(deleted_rest.affected_rows, 2);
4486        let empty = rt.execute_query("SELECT id FROM items").unwrap();
4487        assert!(empty.result.records.is_empty());
4488    }
4489
4490    /// CollectionContract gate (#49 + #50): APPEND ONLY tables accept
4491    /// INSERT but reject UPDATE and DELETE with the documented
4492    /// operator-facing error strings. Drives all three DML verbs so
4493    /// the centralized gate is exercised end-to-end.
4494    #[test]
4495    fn collection_contract_gate_blocks_update_and_delete_on_append_only() {
4496        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4497        rt.execute_query("CREATE TABLE events (id INT, payload TEXT) APPEND ONLY")
4498            .unwrap();
4499
4500        // INSERT must succeed — APPEND ONLY exists precisely to allow
4501        // appends. The gate should be a no-op for INSERT.
4502        let inserted = rt
4503            .execute_query("INSERT INTO events (id, payload) VALUES (1, 'hello')")
4504            .unwrap();
4505        assert_eq!(inserted.affected_rows, 1);
4506
4507        // UPDATE is rejected with the gate's UPDATE-specific message.
4508        let update_err = rt
4509            .execute_query("UPDATE events SET payload = 'mut' WHERE id = 1")
4510            .unwrap_err();
4511        let msg = format!("{update_err}");
4512        assert!(
4513            msg.contains("APPEND ONLY") && msg.contains("UPDATE is rejected"),
4514            "expected UPDATE rejection message, got: {msg}"
4515        );
4516
4517        // DELETE is rejected with the gate's DELETE-specific message.
4518        let delete_err = rt
4519            .execute_query("DELETE FROM events WHERE id = 1")
4520            .unwrap_err();
4521        let msg = format!("{delete_err}");
4522        assert!(
4523            msg.contains("APPEND ONLY") && msg.contains("DELETE is rejected"),
4524            "expected DELETE rejection message, got: {msg}"
4525        );
4526
4527        // Row should still be present — neither rejected mutation
4528        // touched storage.
4529        let surviving = rt.execute_query("SELECT id FROM events").unwrap();
4530        assert_eq!(surviving.result.records.len(), 1);
4531    }
4532
4533    /// CollectionContract gate: tables without an APPEND ONLY contract
4534    /// permit INSERT, UPDATE, and DELETE — the gate's default branch
4535    /// is a true pass-through, not an accidental block.
4536    #[test]
4537    fn collection_contract_gate_allows_all_verbs_on_unrestricted_table() {
4538        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4539        rt.execute_query("CREATE TABLE notes (id INT, body TEXT)")
4540            .unwrap();
4541
4542        rt.execute_query("INSERT INTO notes (id, body) VALUES (1, 'a')")
4543            .unwrap();
4544        let updated = rt
4545            .execute_query("UPDATE notes SET body = 'b' WHERE id = 1")
4546            .unwrap();
4547        assert_eq!(updated.affected_rows, 1);
4548        let deleted = rt.execute_query("DELETE FROM notes WHERE id = 1").unwrap();
4549        assert_eq!(deleted.affected_rows, 1);
4550    }
4551
4552    #[test]
4553    fn insert_into_event_enabled_table_emits_event_to_configured_queue() {
4554        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4555        rt.execute_query(
4556            "CREATE TABLE users (id INT, email TEXT) WITH EVENTS (INSERT) TO audit_log",
4557        )
4558        .unwrap();
4559
4560        let inserted = rt
4561            .execute_query("INSERT INTO users (id, email) VALUES (7, 'a@example.com')")
4562            .unwrap();
4563        assert_eq!(inserted.affected_rows, 1);
4564
4565        let events = queue_payloads(&rt, "audit_log");
4566        assert_eq!(events.len(), 1);
4567        let event = events[0].as_object().expect("event payload object");
4568        assert!(event
4569            .get("event_id")
4570            .and_then(crate::json::Value::as_str)
4571            .is_some_and(|value| !value.is_empty()));
4572        assert_eq!(
4573            event.get("op").and_then(crate::json::Value::as_str),
4574            Some("insert")
4575        );
4576        assert_eq!(
4577            event.get("collection").and_then(crate::json::Value::as_str),
4578            Some("users")
4579        );
4580        assert_eq!(
4581            event.get("id").and_then(crate::json::Value::as_u64),
4582            Some(7)
4583        );
4584        assert!(event
4585            .get("ts")
4586            .and_then(crate::json::Value::as_u64)
4587            .is_some());
4588        assert!(event
4589            .get("lsn")
4590            .and_then(crate::json::Value::as_u64)
4591            .is_some());
4592        assert!(matches!(
4593            event.get("tenant"),
4594            Some(crate::json::Value::Null)
4595        ));
4596        assert!(matches!(
4597            event.get("before"),
4598            Some(crate::json::Value::Null)
4599        ));
4600        let after = event
4601            .get("after")
4602            .and_then(crate::json::Value::as_object)
4603            .expect("after object");
4604        assert_eq!(
4605            after.get("id").and_then(crate::json::Value::as_u64),
4606            Some(7)
4607        );
4608        assert_eq!(
4609            after.get("email").and_then(crate::json::Value::as_str),
4610            Some("a@example.com")
4611        );
4612    }
4613
4614    #[test]
4615    fn multi_row_insert_emits_one_insert_event_per_row_in_order() {
4616        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4617        rt.execute_query("CREATE TABLE users (id INT, email TEXT) WITH EVENTS")
4618            .unwrap();
4619
4620        rt.execute_query(
4621            "INSERT INTO users (id, email) VALUES (1, 'a@example.com'), (2, 'b@example.com')",
4622        )
4623        .unwrap();
4624
4625        let events = queue_payloads(&rt, "users_events");
4626        assert_eq!(events.len(), 2);
4627        let mut previous_lsn = 0;
4628        for (event, expected_id) in events.iter().zip([1_u64, 2]) {
4629            let object = event.as_object().expect("event payload object");
4630            assert_eq!(
4631                object.get("op").and_then(crate::json::Value::as_str),
4632                Some("insert")
4633            );
4634            assert_eq!(
4635                object.get("id").and_then(crate::json::Value::as_u64),
4636                Some(expected_id)
4637            );
4638            let lsn = object
4639                .get("lsn")
4640                .and_then(crate::json::Value::as_u64)
4641                .expect("event lsn");
4642            assert!(
4643                lsn > previous_lsn,
4644                "event LSNs should increase in row order"
4645            );
4646            previous_lsn = lsn;
4647            let after = object
4648                .get("after")
4649                .and_then(crate::json::Value::as_object)
4650                .expect("after object");
4651            assert_eq!(
4652                after.get("id").and_then(crate::json::Value::as_u64),
4653                Some(expected_id)
4654            );
4655        }
4656    }
4657
4658    fn queue_payloads(rt: &RedDBRuntime, queue: &str) -> Vec<crate::json::Value> {
4659        let result = rt
4660            .execute_query(&format!("QUEUE PEEK {queue} 10"))
4661            .expect("peek queue");
4662        result
4663            .result
4664            .records
4665            .iter()
4666            .map(
4667                |record| match record.get("payload").expect("payload column") {
4668                    Value::Json(bytes) => crate::json::from_slice(bytes).expect("json payload"),
4669                    other => panic!("expected JSON queue payload, got {other:?}"),
4670                },
4671            )
4672            .collect()
4673    }
4674
4675    // ── #112: auto-index user `id` on first insert ─────────────────────
4676
4677    /// First insert into a fresh collection that carries a column named
4678    /// `id` registers an implicit HASH index on `id`. Subsequent inserts
4679    /// populate it transparently, and `WHERE id = N` lookups exercise
4680    /// the hash-index fast path in `DmlTargetScan::find_target_ids`.
4681    ///
4682    /// This is the load-bearing acceptance test for #112 — without the
4683    /// hook, `find_index_for_column` returns `None` and DELETE/UPDATE
4684    /// fall through to a full segment scan (the 4× perf gap documented
4685    /// in `docs/perf/delete-sequential-2026-05-06.md`).
4686    #[test]
4687    fn auto_index_id_fires_on_first_insert() {
4688        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4689        rt.execute_query("CREATE TABLE bench_users (id INT, score INT)")
4690            .unwrap();
4691
4692        // Pre-condition: no index on `id` yet.
4693        assert!(
4694            rt.index_store_ref()
4695                .find_index_for_column("bench_users", "id")
4696                .is_none(),
4697            "freshly created collection should not have an `id` index"
4698        );
4699
4700        // Single-row INSERT — drives `MutationEngine::append_one`.
4701        rt.execute_query("INSERT INTO bench_users (id, score) VALUES (1, 10)")
4702            .unwrap();
4703
4704        // Post-condition: hash index registered on `id`.
4705        let registered = rt
4706            .index_store_ref()
4707            .find_index_for_column("bench_users", "id")
4708            .expect("auto-index hook should have registered idx_id on first insert");
4709        assert_eq!(registered.name, "idx_id");
4710        assert_eq!(registered.collection, "bench_users");
4711        assert_eq!(registered.columns, vec!["id".to_string()]);
4712        assert!(matches!(
4713            registered.method,
4714            super::super::index_store::IndexMethodKind::Hash
4715        ));
4716
4717        // Subsequent inserts populate the index; `WHERE id = N` should
4718        // resolve via the hash fast path and round-trip every row.
4719        for id in 2..=5 {
4720            rt.execute_query(&format!(
4721                "INSERT INTO bench_users (id, score) VALUES ({id}, {})",
4722                id * 10
4723            ))
4724            .unwrap();
4725        }
4726        for id in 1..=5 {
4727            let result = rt
4728                .execute_query(&format!("SELECT score FROM bench_users WHERE id = {id}"))
4729                .unwrap();
4730            assert_eq!(
4731                result.result.records.len(),
4732                1,
4733                "id={id} should match one row"
4734            );
4735        }
4736
4737        // Delete via the hash fast-path — exactly the bench scenario the
4738        // perf doc identified as the 4× regression. With the index
4739        // present, `find_target_ids` short-circuits before
4740        // `for_each_entity_zoned` runs.
4741        let deleted = rt
4742            .execute_query("DELETE FROM bench_users WHERE id = 3")
4743            .unwrap();
4744        assert_eq!(deleted.affected_rows, 1);
4745    }
4746
4747    /// Bulk INSERT (the multi-row VALUES path) drives
4748    /// `MutationEngine::append_batch`. The hook must fire there too —
4749    /// otherwise the batch entry points (gRPC binary bulk, HTTP bulk,
4750    /// wire bulk INSERT) skip auto-indexing entirely.
4751    #[test]
4752    fn auto_index_id_fires_on_first_bulk_insert() {
4753        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4754        rt.execute_query("CREATE TABLE bench_bulk (id INT, score INT)")
4755            .unwrap();
4756
4757        rt.execute_query("INSERT INTO bench_bulk (id, score) VALUES (1, 10), (2, 20), (3, 30)")
4758            .unwrap();
4759
4760        let registered = rt
4761            .index_store_ref()
4762            .find_index_for_column("bench_bulk", "id")
4763            .expect("auto-index hook should fire on first bulk insert");
4764        assert_eq!(registered.name, "idx_id");
4765
4766        // Every row populated via `index_entity_insert_batch`.
4767        for id in 1..=3 {
4768            let result = rt
4769                .execute_query(&format!("SELECT score FROM bench_bulk WHERE id = {id}"))
4770                .unwrap();
4771            assert_eq!(result.result.records.len(), 1);
4772        }
4773    }
4774
4775    /// Hook is a no-op when the row carries no `id` column. Conservative
4776    /// match (case-sensitive `id`) — `Id`, `ID`, and `red_entity_id`
4777    /// don't trigger it.
4778    #[test]
4779    fn auto_index_id_skips_when_no_id_column() {
4780        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4781        rt.execute_query("CREATE TABLE plain (uid INT, label TEXT)")
4782            .unwrap();
4783        rt.execute_query("INSERT INTO plain (uid, label) VALUES (1, 'a')")
4784            .unwrap();
4785
4786        assert!(rt
4787            .index_store_ref()
4788            .find_index_for_column("plain", "id")
4789            .is_none());
4790        assert!(rt
4791            .index_store_ref()
4792            .find_index_for_column("plain", "uid")
4793            .is_none());
4794    }
4795
4796    /// Hook only fires once per collection. If an explicit
4797    /// `CREATE INDEX ... USING BTREE` already covers `id`, the hook
4798    /// detects it via `find_index_for_column` and does NOT clobber it
4799    /// with a HASH index on the next insert.
4800    #[test]
4801    fn auto_index_id_skips_when_index_already_exists() {
4802        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4803        rt.execute_query("CREATE TABLE pre (id INT, score INT)")
4804            .unwrap();
4805        // User-declared BTREE index on `id` before any insert.
4806        rt.execute_query("CREATE INDEX user_idx ON pre (id) USING BTREE")
4807            .unwrap();
4808        rt.execute_query("INSERT INTO pre (id, score) VALUES (1, 10)")
4809            .unwrap();
4810
4811        let registered = rt
4812            .index_store_ref()
4813            .find_index_for_column("pre", "id")
4814            .expect("user index should still be there");
4815        assert_eq!(
4816            registered.name, "user_idx",
4817            "auto-index hook must not overwrite an existing index"
4818        );
4819    }
4820
4821    /// Implicit `idx_id` is reaped when the collection drops. The
4822    /// existing `execute_drop_table` walks `list_indices` and drops every
4823    /// entry — confirm the auto-created index participates.
4824    #[test]
4825    fn auto_index_id_dropped_with_collection() {
4826        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4827        rt.execute_query("CREATE TABLE ephemeral (id INT, score INT)")
4828            .unwrap();
4829        rt.execute_query("INSERT INTO ephemeral (id, score) VALUES (1, 10)")
4830            .unwrap();
4831        assert!(rt
4832            .index_store_ref()
4833            .find_index_for_column("ephemeral", "id")
4834            .is_some());
4835
4836        rt.execute_query("DROP TABLE ephemeral").unwrap();
4837
4838        assert!(
4839            rt.index_store_ref()
4840                .find_index_for_column("ephemeral", "id")
4841                .is_none(),
4842            "implicit `idx_id` must be reaped when its collection drops"
4843        );
4844    }
4845
4846    /// Opt-out via `RedDBOptions::with_auto_index_id(false)` (which
4847    /// forwards to `UnifiedStoreConfig::auto_index_id`). With the knob
4848    /// off, first insert leaves the collection without an `id` index —
4849    /// DELETE/UPDATE fall back to the scan path.
4850    #[test]
4851    fn auto_index_id_disabled_by_config() {
4852        let opts = RedDBOptions::in_memory().with_auto_index_id(false);
4853        let rt = RedDBRuntime::with_options(opts).unwrap();
4854
4855        rt.execute_query("CREATE TABLE off (id INT, score INT)")
4856            .unwrap();
4857        rt.execute_query("INSERT INTO off (id, score) VALUES (1, 10)")
4858            .unwrap();
4859
4860        assert!(
4861            rt.index_store_ref()
4862                .find_index_for_column("off", "id")
4863                .is_none(),
4864            "with auto_index_id=false, no implicit index should be created"
4865        );
4866    }
4867
4868    // ── #293: UPDATE / DELETE events ─────────────────────────────────────
4869
4870    #[test]
4871    fn update_single_row_emits_update_event() {
4872        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4873        rt.execute_query(
4874            "CREATE TABLE users (id INT, name TEXT) WITH EVENTS (UPDATE) TO audit_log",
4875        )
4876        .unwrap();
4877        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice')")
4878            .unwrap();
4879
4880        rt.execute_query("UPDATE users SET name = 'Bob' WHERE id = 1")
4881            .unwrap();
4882
4883        let events = queue_payloads(&rt, "audit_log");
4884        assert_eq!(events.len(), 1, "expected exactly 1 update event");
4885        let event = events[0].as_object().expect("event payload object");
4886        assert_eq!(
4887            event.get("op").and_then(crate::json::Value::as_str),
4888            Some("update")
4889        );
4890        assert_eq!(
4891            event.get("collection").and_then(crate::json::Value::as_str),
4892            Some("users")
4893        );
4894        assert!(event
4895            .get("event_id")
4896            .and_then(crate::json::Value::as_str)
4897            .is_some_and(|v| !v.is_empty()));
4898        let before = event
4899            .get("before")
4900            .and_then(crate::json::Value::as_object)
4901            .expect("before must be an object");
4902        let after = event
4903            .get("after")
4904            .and_then(crate::json::Value::as_object)
4905            .expect("after must be an object");
4906        assert_eq!(
4907            before.get("name").and_then(crate::json::Value::as_str),
4908            Some("Alice"),
4909            "before.name should be the old value"
4910        );
4911        assert_eq!(
4912            after.get("name").and_then(crate::json::Value::as_str),
4913            Some("Bob"),
4914            "after.name should be the new value"
4915        );
4916    }
4917
4918    #[test]
4919    fn update_event_only_includes_changed_fields() {
4920        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4921        rt.execute_query(
4922            "CREATE TABLE users (id INT, name TEXT, email TEXT) WITH EVENTS (UPDATE) TO evts",
4923        )
4924        .unwrap();
4925        rt.execute_query("INSERT INTO users (id, name, email) VALUES (1, 'Alice', 'a@x.com')")
4926            .unwrap();
4927
4928        rt.execute_query("UPDATE users SET name = 'Bob' WHERE id = 1")
4929            .unwrap();
4930
4931        let events = queue_payloads(&rt, "evts");
4932        assert_eq!(events.len(), 1);
4933        let event = events[0].as_object().unwrap();
4934        let before = event
4935            .get("before")
4936            .and_then(crate::json::Value::as_object)
4937            .unwrap();
4938        let after = event
4939            .get("after")
4940            .and_then(crate::json::Value::as_object)
4941            .unwrap();
4942        // Only changed field included.
4943        assert!(
4944            before.contains_key("name"),
4945            "before must include changed field"
4946        );
4947        assert!(
4948            after.contains_key("name"),
4949            "after must include changed field"
4950        );
4951        // Unchanged fields must not appear.
4952        assert!(
4953            !before.contains_key("email"),
4954            "before must not include unchanged email"
4955        );
4956        assert!(
4957            !after.contains_key("email"),
4958            "after must not include unchanged email"
4959        );
4960    }
4961
4962    #[test]
4963    fn multi_row_update_emits_one_event_per_row() {
4964        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4965        rt.execute_query("CREATE TABLE items (id INT, status TEXT) WITH EVENTS (UPDATE) TO evts")
4966            .unwrap();
4967        rt.execute_query(
4968            "INSERT INTO items (id, status) VALUES (1, 'new'), (2, 'new'), (3, 'new')",
4969        )
4970        .unwrap();
4971
4972        rt.execute_query("UPDATE items SET status = 'done'")
4973            .unwrap();
4974
4975        let events = queue_payloads(&rt, "evts");
4976        assert_eq!(events.len(), 3, "expected one update event per row");
4977        for event in &events {
4978            let obj = event.as_object().unwrap();
4979            assert_eq!(
4980                obj.get("op").and_then(crate::json::Value::as_str),
4981                Some("update")
4982            );
4983        }
4984    }
4985
4986    #[test]
4987    fn delete_single_row_emits_delete_event() {
4988        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4989        rt.execute_query("CREATE TABLE users (id INT, name TEXT) WITH EVENTS (DELETE) TO del_log")
4990            .unwrap();
4991        rt.execute_query("INSERT INTO users (id, name) VALUES (42, 'Alice')")
4992            .unwrap();
4993
4994        rt.execute_query("DELETE FROM users WHERE id = 42").unwrap();
4995
4996        let events = queue_payloads(&rt, "del_log");
4997        assert_eq!(events.len(), 1);
4998        let event = events[0].as_object().expect("event payload object");
4999        assert_eq!(
5000            event.get("op").and_then(crate::json::Value::as_str),
5001            Some("delete")
5002        );
5003        assert_eq!(
5004            event.get("collection").and_then(crate::json::Value::as_str),
5005            Some("users")
5006        );
5007        assert!(event
5008            .get("event_id")
5009            .and_then(crate::json::Value::as_str)
5010            .is_some_and(|v| !v.is_empty()));
5011        let before = event
5012            .get("before")
5013            .and_then(crate::json::Value::as_object)
5014            .expect("before must be an object for delete");
5015        assert_eq!(
5016            before.get("id").and_then(crate::json::Value::as_u64),
5017            Some(42)
5018        );
5019        assert_eq!(
5020            before.get("name").and_then(crate::json::Value::as_str),
5021            Some("Alice")
5022        );
5023        assert!(matches!(event.get("after"), Some(crate::json::Value::Null)));
5024    }
5025
5026    #[test]
5027    fn multi_row_delete_emits_one_event_per_row() {
5028        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
5029        rt.execute_query("CREATE TABLE items (id INT, val INT) WITH EVENTS (DELETE) TO del_log")
5030            .unwrap();
5031        rt.execute_query("INSERT INTO items (id, val) VALUES (1, 10), (2, 20), (3, 30)")
5032            .unwrap();
5033
5034        rt.execute_query("DELETE FROM items").unwrap();
5035
5036        let events = queue_payloads(&rt, "del_log");
5037        assert_eq!(events.len(), 3, "expected one delete event per deleted row");
5038        for event in &events {
5039            let obj = event.as_object().unwrap();
5040            assert_eq!(
5041                obj.get("op").and_then(crate::json::Value::as_str),
5042                Some("delete")
5043            );
5044            assert!(matches!(obj.get("after"), Some(crate::json::Value::Null)));
5045        }
5046    }
5047
5048    #[test]
5049    fn ops_filter_update_does_not_emit_on_insert_or_delete() {
5050        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
5051        rt.execute_query("CREATE TABLE users (id INT, name TEXT) WITH EVENTS (UPDATE) TO evts")
5052            .unwrap();
5053
5054        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice')")
5055            .unwrap();
5056        rt.execute_query("DELETE FROM users WHERE id = 1").unwrap();
5057
5058        let events = queue_payloads(&rt, "evts");
5059        assert!(
5060            events.is_empty(),
5061            "UPDATE-only filter must not emit INSERT or DELETE events"
5062        );
5063    }
5064
5065    // ── SUPPRESS EVENTS ────────────────────────────────────────────────────
5066
5067    #[test]
5068    fn suppress_events_on_insert_emits_no_events() {
5069        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
5070        rt.execute_query("CREATE TABLE users (id INT, name TEXT) WITH EVENTS TO evts")
5071            .unwrap();
5072
5073        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice') SUPPRESS EVENTS")
5074            .unwrap();
5075
5076        let events = queue_payloads(&rt, "evts");
5077        assert!(
5078            events.is_empty(),
5079            "SUPPRESS EVENTS must prevent INSERT events"
5080        );
5081    }
5082
5083    #[test]
5084    fn suppress_events_on_update_emits_no_events() {
5085        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
5086        rt.execute_query("CREATE TABLE users (id INT, name TEXT) WITH EVENTS TO evts")
5087            .unwrap();
5088        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice')")
5089            .unwrap();
5090        // drain the INSERT event
5091        let _ = queue_payloads(&rt, "evts");
5092        // Force pop to drain; simpler: just check new count after UPDATE
5093        rt.execute_query("QUEUE PURGE evts").unwrap();
5094
5095        rt.execute_query("UPDATE users SET name = 'Bob' WHERE id = 1 SUPPRESS EVENTS")
5096            .unwrap();
5097
5098        let events = queue_payloads(&rt, "evts");
5099        assert!(
5100            events.is_empty(),
5101            "SUPPRESS EVENTS must prevent UPDATE events"
5102        );
5103    }
5104
5105    #[test]
5106    fn suppress_events_on_delete_emits_no_events() {
5107        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
5108        rt.execute_query(
5109            "CREATE TABLE users (id INT, name TEXT) WITH EVENTS (INSERT, DELETE) TO evts",
5110        )
5111        .unwrap();
5112        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice') SUPPRESS EVENTS")
5113            .unwrap();
5114
5115        rt.execute_query("DELETE FROM users WHERE id = 1 SUPPRESS EVENTS")
5116            .unwrap();
5117
5118        let events = queue_payloads(&rt, "evts");
5119        assert!(
5120            events.is_empty(),
5121            "SUPPRESS EVENTS must prevent DELETE events"
5122        );
5123    }
5124
5125    #[test]
5126    fn normal_insert_after_suppress_still_emits() {
5127        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
5128        rt.execute_query("CREATE TABLE users (id INT, name TEXT) WITH EVENTS TO evts")
5129            .unwrap();
5130
5131        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice') SUPPRESS EVENTS")
5132            .unwrap();
5133        rt.execute_query("INSERT INTO users (id, name) VALUES (2, 'Bob')")
5134            .unwrap();
5135
5136        let events = queue_payloads(&rt, "evts");
5137        assert_eq!(
5138            events.len(),
5139            1,
5140            "only the non-suppressed INSERT should emit"
5141        );
5142        assert_eq!(
5143            events[0].get("id").and_then(crate::json::Value::as_u64),
5144            Some(2)
5145        );
5146    }
5147}