Skip to main content

reddb_server/runtime/
impl_dml.rs

1//! DML execution: INSERT, UPDATE, DELETE via SQL AST
2//!
3//! Implements `execute_insert`, `execute_update`, and `execute_delete` on
4//! `RedDBRuntime`.  Each method translates the parsed AST into entity-level
5//! operations through the existing `RuntimeEntityPort` trait so that all
6//! cross-cutting concerns (WAL, indexing, replication) are automatically
7//! applied.
8
9use crate::application::entity::{
10    metadata_from_json, AppliedEntityMutation, CreateDocumentInput, CreateEdgeInput,
11    CreateEntityOutput, CreateKvInput, CreateNodeInput, CreateRowInput, CreateRowsBatchInput,
12    CreateVectorInput, DeleteEntityInput, PatchEntityOperation, PatchEntityOperationType,
13    RowUpdateColumnRule, RowUpdateContractPlan,
14};
15use crate::application::ports::{
16    build_row_update_contract_plan, entity_row_fields_snapshot,
17    normalize_row_update_assignment_with_plan, normalize_row_update_value_for_rule,
18    RuntimeEntityPort,
19};
20use crate::application::ttl_payload::has_internal_ttl_metadata;
21use crate::presentation::entity_json::storage_value_to_json;
22use crate::storage::query::ast::{BinOp, Expr, FieldRef, ReturningItem, UpdateTarget};
23use crate::storage::query::sql_lowering::{
24    effective_delete_filter, effective_insert_rows, effective_update_filter, fold_expr_to_value,
25};
26use crate::storage::query::unified::{
27    sys_key_collection, sys_key_created_at, sys_key_kind, sys_key_red_entity_id, sys_key_rid,
28    sys_key_tenant, sys_key_updated_at, UnifiedRecord, UnifiedResult,
29};
30use crate::storage::unified::MetadataValue;
31use crate::storage::Metadata;
32use std::collections::HashMap;
33use std::sync::Arc;
34
35use super::*;
36
37const UPDATE_APPLY_CHUNK_SIZE: usize = 2048;
38const TREE_CHILD_EDGE_LABEL: &str = "TREE_CHILD";
39const TREE_METADATA_PREFIX: &str = "red.tree.";
40
41#[derive(Clone)]
42struct CompiledUpdateAssignment {
43    column: String,
44    expr: Expr,
45    compound_op: Option<BinOp>,
46    metadata_key: Option<&'static str>,
47    row_rule: Option<RowUpdateColumnRule>,
48}
49
50struct CompiledUpdatePlan {
51    static_field_assignments: Vec<(String, Value)>,
52    static_metadata_assignments: Vec<(String, MetadataValue)>,
53    dynamic_assignments: Vec<CompiledUpdateAssignment>,
54    row_contract_plan: Option<RowUpdateContractPlan>,
55    row_modified_columns: Vec<String>,
56    row_touches_unique_columns: bool,
57}
58
59#[derive(Default)]
60struct MaterializedUpdateAssignments {
61    dynamic_field_assignments: Vec<(String, Value)>,
62    dynamic_metadata_assignments: Vec<(String, MetadataValue)>,
63}
64
65impl RedDBRuntime {
66    /// Issue #524 — public read of the in-memory chain tip. Returns `None`
67    /// when the collection is not a chain or has no rows (pre-genesis). On a
68    /// cold cache the first call falls back to a one-time scan so the HTTP
69    /// `GET /collections/:name/chain-tip` handler stays consistent with the
70    /// INSERT path after a restart.
71    pub fn chain_tip_for_collection(
72        &self,
73        collection: &str,
74    ) -> Option<crate::runtime::blockchain_kind::ChainTipFull> {
75        let store = self.inner.db.store();
76        if !crate::runtime::blockchain_kind::is_chain(&store, collection) {
77            return None;
78        }
79        let mut cache = self.inner.chain_tip_cache.lock();
80        if let Some(existing) = cache.get(collection) {
81            return Some(existing.clone());
82        }
83        let scanned = crate::runtime::blockchain_kind::chain_tip_full(&store, collection)?;
84        cache.insert(collection.to_string(), scanned.clone());
85        Some(scanned)
86    }
87
88    /// Issue #525 — walks the chain end-to-end, recomputes each block's hash
89    /// against the stored fields, and returns the verification outcome.  On
90    /// `ok == false` the integrity flag is persisted and the in-memory cache
91    /// is updated so subsequent INSERTs surface `ChainIntegrityBroken`.
92    ///
93    /// Returns `None` when the collection is absent or not a `KIND blockchain`.
94    pub fn verify_chain_for_collection(
95        &self,
96        collection: &str,
97    ) -> Option<crate::runtime::blockchain_kind::VerifyChainOutcome> {
98        let store = self.inner.db.store();
99        let outcome = crate::runtime::blockchain_kind::verify_chain_outcome(&store, collection)?;
100        if !outcome.ok {
101            crate::runtime::blockchain_kind::persist_integrity_flag(&store, collection, true);
102            self.inner
103                .chain_integrity_broken
104                .lock()
105                .insert(collection.to_string(), true);
106        }
107        Some(outcome)
108    }
109
110    /// Issue #525 — admin clears the `ChainIntegrityBroken` flag so the chain
111    /// accepts INSERTs again.  Returns `false` when the collection is not a
112    /// chain.
113    pub fn clear_chain_integrity_flag(&self, collection: &str) -> bool {
114        let store = self.inner.db.store();
115        if !crate::runtime::blockchain_kind::is_chain(&store, collection) {
116            return false;
117        }
118        crate::runtime::blockchain_kind::persist_integrity_flag(&store, collection, false);
119        self.inner
120            .chain_integrity_broken
121            .lock()
122            .insert(collection.to_string(), false);
123        true
124    }
125
126    /// Issue #525 — INSERT-time check.  Combines in-memory cache (fast path)
127    /// with a one-time scan of `red_config` on cold start so the flag survives
128    /// restart.
129    fn is_chain_integrity_broken(&self, collection: &str) -> bool {
130        {
131            let cache = self.inner.chain_integrity_broken.lock();
132            if let Some(v) = cache.get(collection) {
133                return *v;
134            }
135        }
136        let store = self.inner.db.store();
137        let persisted =
138            crate::runtime::blockchain_kind::is_integrity_broken_persisted(&store, collection)
139                .unwrap_or(false);
140        self.inner
141            .chain_integrity_broken
142            .lock()
143            .insert(collection.to_string(), persisted);
144        persisted
145    }
146
147    /// Issue #765 / S6 — lazily hydrate the integrity-tombstone cache from
148    /// `red_config` on first access. Returns `true` when at least one
149    /// tombstone range is present. Subsequent calls observe the cached state
150    /// flag (`1` empty / `2` present) and skip the store scan.
151    fn ensure_integrity_tombstones_loaded(&self) -> bool {
152        use std::sync::atomic::Ordering;
153        match self
154            .inner
155            .integrity_tombstones_state
156            .load(Ordering::Relaxed)
157        {
158            1 => return false,
159            2 => return true,
160            _ => {}
161        }
162        // Cold: load under the cache lock so a concurrent reader cannot
163        // observe a half-populated vector.
164        let mut guard = self.inner.integrity_tombstones.lock();
165        if self
166            .inner
167            .integrity_tombstones_state
168            .load(Ordering::Relaxed)
169            == 0
170        {
171            let ranges = crate::runtime::integrity_tombstone::load_ranges(&self.inner.db.store());
172            let present = !ranges.is_empty();
173            *guard = ranges;
174            self.inner
175                .integrity_tombstones_state
176                .store(if present { 2 } else { 1 }, Ordering::Relaxed);
177        }
178        self.inner
179            .integrity_tombstones_state
180            .load(Ordering::Relaxed)
181            == 2
182    }
183
184    /// Issue #765 / S6 — durably record an integrity tombstone over the
185    /// inclusive RID range `[lo, hi]` of `table` (the committed rows of an
186    /// input stream whose end-to-end SHA-256 digest did not match). The range
187    /// is persisted to `red_config` (survives restart) and folded into the
188    /// in-memory cache so the same process filters it immediately.
189    pub fn record_integrity_tombstone(&self, table: &str, lo: u64, hi: u64) {
190        use std::sync::atomic::Ordering;
191        self.ensure_integrity_tombstones_loaded();
192        let mut guard = self.inner.integrity_tombstones.lock();
193        guard.push(crate::runtime::integrity_tombstone::TombstoneRange::new(
194            table.to_string(),
195            lo,
196            hi,
197        ));
198        crate::runtime::integrity_tombstone::persist_ranges(&self.inner.db.store(), &guard);
199        self.inner
200            .integrity_tombstones_state
201            .store(2, Ordering::Relaxed);
202    }
203
204    /// Issue #765 / S6 — snapshot of the currently-cached tombstone ranges.
205    /// Intended for tests and forensic surfaces; the read path uses
206    /// [`Self::filter_integrity_tombstoned`] which avoids the clone.
207    pub fn integrity_tombstone_ranges(
208        &self,
209    ) -> Vec<crate::runtime::integrity_tombstone::TombstoneRange> {
210        self.ensure_integrity_tombstones_loaded();
211        self.inner.integrity_tombstones.lock().clone()
212    }
213
214    /// Issue #765 / S6 — drop tombstoned rows from a SELECT result in place.
215    /// Fast no-op (one relaxed atomic load) when no tombstone has ever been
216    /// recorded. Clears `pre_serialized_json` when any row is removed so the
217    /// fast-path JSON cannot leak a filtered row back onto the wire.
218    pub fn filter_integrity_tombstoned(&self, result: &mut UnifiedResult) {
219        if !self.ensure_integrity_tombstones_loaded() {
220            return;
221        }
222        let guard = self.inner.integrity_tombstones.lock();
223        if guard.is_empty() {
224            return;
225        }
226        let before = result.records.len();
227        result.records.retain(|record| {
228            !crate::runtime::integrity_tombstone::record_tombstoned(&guard, record)
229        });
230        if result.records.len() != before {
231            result.pre_serialized_json = None;
232        }
233    }
234
235    /// Phase 2.5.4: inject `CURRENT_TENANT()` into an INSERT when the
236    /// target table is tenant-scoped and the user's column list does
237    /// not already name the tenant column.
238    ///
239    /// Returns:
240    /// * `Ok(None)` — no injection needed (non-tenant table, or user
241    ///   supplied the column explicitly). Caller uses the original
242    ///   query unchanged.
243    /// * `Ok(Some(augmented))` — a cloned query with the tenant column
244    ///   + literal value appended to every row.
245    /// * `Err(..)` — table is tenant-scoped but no tenant is bound to
246    ///   the current session. Fails loudly so callers don't produce
247    ///   rows that RLS would then hide on read.
248    fn maybe_inject_tenant_column(&self, query: &InsertQuery) -> RedDBResult<Option<InsertQuery>> {
249        let Some(tenant_col) = self.tenant_column(&query.table) else {
250            return Ok(None);
251        };
252        // User already named the column (literal match) — trust them.
253        if query
254            .columns
255            .iter()
256            .any(|c| c.eq_ignore_ascii_case(&tenant_col))
257        {
258            return Ok(None);
259        }
260
261        // Phase 2 PG parity: dotted-path tenancy. When `tenant_col` is a
262        // nested key like `headers.tenant` we operate on the root
263        // column (`headers`) and set / add the nested path inside its
264        // JSON value. If the user named the root column we mutate in
265        // place; otherwise we create a fresh JSON column for every row.
266        if let Some(dot_pos) = tenant_col.find('.') {
267            let (root, tail) = tenant_col.split_at(dot_pos);
268            let tail = &tail[1..]; // drop leading '.'
269            return self.inject_dotted_tenant(query, root, tail);
270        }
271
272        let Some(tenant_id) = crate::runtime::impl_core::current_tenant() else {
273            return Err(RedDBError::Query(format!(
274                "INSERT into tenant-scoped table '{}' requires an active tenant — \
275                 run SET TENANT '<id>' first or name column '{}' explicitly",
276                query.table, tenant_col
277            )));
278        };
279
280        let mut augmented = query.clone();
281        augmented.columns.push(tenant_col);
282        let lit = Value::text(tenant_id.clone());
283        for row in augmented.values.iter_mut() {
284            row.push(lit.clone());
285        }
286        for row in augmented.value_exprs.iter_mut() {
287            row.push(crate::storage::query::ast::Expr::Literal {
288                value: lit.clone(),
289                span: crate::storage::query::ast::Span::synthetic(),
290            });
291        }
292        Ok(Some(augmented))
293    }
294
295    /// Dotted-path auto-fill — set `root.tail` to `CURRENT_TENANT()` on
296    /// every row. Mirrors `maybe_inject_tenant_column` but mutates
297    /// nested JSON instead of appending a flat column.
298    ///
299    /// Cases:
300    /// * Root column already in the INSERT list → mutate per-row JSON
301    ///   (parse, set path, re-serialize).
302    /// * Root column absent → create a fresh `{tail: tenant}` JSON
303    ///   object and append the root column to the INSERT.
304    fn inject_dotted_tenant(
305        &self,
306        query: &InsertQuery,
307        root: &str,
308        tail: &str,
309    ) -> RedDBResult<Option<InsertQuery>> {
310        let active_tenant = crate::runtime::impl_core::current_tenant();
311        let mut augmented = query.clone();
312        let root_idx = augmented
313            .columns
314            .iter()
315            .position(|c| c.eq_ignore_ascii_case(root));
316
317        if let Some(idx) = root_idx {
318            // User supplied the root column. Per-row: if the dotted
319            // tail is already present we trust the user (admin / bulk
320            // loader scenario); otherwise fill from the active
321            // tenant. An unbound tenant is only an error when some
322            // row actually needs filling.
323            for row in augmented.values.iter_mut() {
324                let Some(slot) = row.get_mut(idx) else {
325                    continue;
326                };
327                if dotted_tail_already_set(slot, tail) {
328                    continue;
329                }
330                let Some(tenant_id) = &active_tenant else {
331                    return Err(RedDBError::Query(format!(
332                        "INSERT into tenant-scoped table '{}' requires an active tenant — \
333                         run SET TENANT '<id>' first or set '{}.{}' explicitly in each row",
334                        query.table, root, tail
335                    )));
336                };
337                *slot = merge_dotted_tenant(slot.clone(), tail, tenant_id)?;
338            }
339            // Expression row is kept in sync by re-wrapping the
340            // mutated literal; the canonical path will re-evaluate
341            // against the same JSON shape.
342            for (row_idx, row) in augmented.value_exprs.iter_mut().enumerate() {
343                if let Some(slot) = row.get_mut(idx) {
344                    let new_value = augmented
345                        .values
346                        .get(row_idx)
347                        .and_then(|v| v.get(idx))
348                        .cloned()
349                        .unwrap_or(Value::Null);
350                    *slot = crate::storage::query::ast::Expr::Literal {
351                        value: new_value,
352                        span: crate::storage::query::ast::Span::synthetic(),
353                    };
354                }
355            }
356        } else {
357            // No root column in the INSERT list — auto-fill needs a
358            // bound tenant to synthesise one. Error loud so we never
359            // create a tenant-less row that RLS would then hide.
360            let Some(tenant_id) = &active_tenant else {
361                return Err(RedDBError::Query(format!(
362                    "INSERT into tenant-scoped table '{}' requires an active tenant — \
363                     run SET TENANT '<id>' first or name path '{}.{}' explicitly",
364                    query.table, root, tail
365                )));
366            };
367            // Create a fresh JSON column with only the tenant path set.
368            augmented.columns.push(root.to_string());
369            let fresh = merge_dotted_tenant(Value::Null, tail, tenant_id)?;
370            for row in augmented.values.iter_mut() {
371                row.push(fresh.clone());
372            }
373            for row in augmented.value_exprs.iter_mut() {
374                row.push(crate::storage::query::ast::Expr::Literal {
375                    value: fresh.clone(),
376                    span: crate::storage::query::ast::Span::synthetic(),
377                });
378            }
379        }
380
381        Ok(Some(augmented))
382    }
383
384    /// Returns `(affected_count, lsns)`. For the txn (xmax-stamp) path,
385    /// `lsns` is empty because events fire at commit time.
386    fn delete_entities_batch(
387        &self,
388        collection: &str,
389        ids: &[EntityId],
390    ) -> RedDBResult<(u64, Vec<u64>)> {
391        if ids.is_empty() {
392            return Ok((0, vec![]));
393        }
394
395        let store = self.db().store();
396        let Some(manager) = store.get_collection(collection) else {
397            return Ok((0, vec![]));
398        };
399
400        let active_xid = self.current_xid();
401        let conn_id = crate::runtime::impl_core::current_connection_id();
402        let mut autocommit_xid = None;
403        let mut tombstoned_ids = Vec::new();
404        let mut tombstoned_entities = Vec::new();
405        let mut physical_delete_ids = Vec::new();
406        let table_row_resolver =
407            crate::runtime::table_row_mvcc_resolver::TableRowMvccReadResolver::current_statement();
408
409        for &id in ids {
410            let Some(mut entity) = manager.get(id) else {
411                continue;
412            };
413            if matches!(entity.data, EntityData::Row(_)) {
414                let previous_xmax = entity.xmax;
415                if matches!(entity.kind, crate::storage::EntityKind::TableRow { .. }) {
416                    if table_row_resolver.resolve_candidate(&entity).is_none() {
417                        continue;
418                    }
419                } else if entity.xmax != 0 {
420                    continue;
421                }
422
423                let xid = match active_xid {
424                    Some(xid) => xid,
425                    None => match autocommit_xid {
426                        Some(xid) => xid,
427                        None => {
428                            let mgr = self.snapshot_manager();
429                            let xid = mgr.begin();
430                            autocommit_xid = Some(xid);
431                            xid
432                        }
433                    },
434                };
435                entity.set_xmax(xid);
436                if manager.update(entity.clone()).is_ok() {
437                    if active_xid.is_some() {
438                        self.record_pending_tombstone(conn_id, collection, id, xid, previous_xmax);
439                    }
440                    tombstoned_entities.push(entity);
441                    tombstoned_ids.push(id);
442                }
443            } else {
444                physical_delete_ids.push(id);
445            }
446        }
447
448        if let Some(xid) = autocommit_xid {
449            self.snapshot_manager().commit(xid);
450        }
451
452        let mut affected = tombstoned_ids.len() as u64;
453        let mut lsns = Vec::with_capacity(tombstoned_ids.len() + physical_delete_ids.len());
454        if active_xid.is_some() {
455            store
456                .persist_entities_to_pager(collection, &tombstoned_entities)
457                .map_err(|err| RedDBError::Internal(err.to_string()))?;
458        } else {
459            store
460                .persist_entities_to_pager(collection, &tombstoned_entities)
461                .map_err(|err| RedDBError::Internal(err.to_string()))?;
462            for id in &tombstoned_ids {
463                store.context_index().remove_entity(*id);
464                let lsn = self.cdc_emit(
465                    crate::replication::cdc::ChangeOperation::Delete,
466                    collection,
467                    id.raw(),
468                    "entity",
469                );
470                lsns.push(lsn);
471            }
472        }
473
474        let deleted_ids = store
475            .delete_batch(collection, &physical_delete_ids)
476            .map_err(|err| RedDBError::Internal(err.to_string()))?;
477        affected += deleted_ids.len() as u64;
478        for id in &deleted_ids {
479            store.context_index().remove_entity(*id);
480            let lsn = self.cdc_emit(
481                crate::replication::cdc::ChangeOperation::Delete,
482                collection,
483                id.raw(),
484                "entity",
485            );
486            lsns.push(lsn);
487        }
488
489        Ok((affected, lsns))
490    }
491
492    /// Flushes context-index updates and CDC for each applied mutation.
493    /// Returns one LSN per entity in the same order as `applied`.
494    fn flush_update_chunk(&self, applied: &[AppliedEntityMutation]) -> RedDBResult<Vec<u64>> {
495        if applied.is_empty() {
496            return Ok(Vec::new());
497        }
498
499        let store = self.db().store();
500        if applied.iter().any(|item| item.context_index_dirty) {
501            store.context_index().index_entities(
502                &applied[0].collection,
503                applied
504                    .iter()
505                    .filter(|item| item.context_index_dirty)
506                    .map(|item| &item.entity),
507            );
508        }
509
510        for item in applied {
511            self.refresh_update_secondary_indexes(item)?;
512        }
513
514        let mut lsns = Vec::with_capacity(applied.len());
515        for item in applied {
516            let lsn = self.cdc_emit_prebuilt(
517                crate::replication::cdc::ChangeOperation::Update,
518                &item.collection,
519                &item.entity,
520                update_cdc_item_kind(self, &item.collection, &item.entity),
521                item.metadata.as_ref(),
522                false,
523            );
524            lsns.push(lsn);
525        }
526        Ok(lsns)
527    }
528
529    fn persist_update_chunk(&self, applied: &[AppliedEntityMutation]) -> RedDBResult<()> {
530        self.persist_applied_entity_mutations(applied)
531    }
532
533    fn refresh_update_secondary_indexes(&self, applied: &AppliedEntityMutation) -> RedDBResult<()> {
534        if applied.pre_mutation_fields.is_empty() {
535            return Ok(());
536        }
537        let post = entity_row_fields_snapshot(&applied.entity);
538        if post.is_empty() {
539            return Ok(());
540        }
541
542        let indexed_cols = self
543            .index_store_ref()
544            .indexed_columns_set(&applied.collection);
545        if indexed_cols.is_empty() {
546            return Ok(());
547        }
548
549        if let Some(old_version) = applied.replaced_entity.as_ref() {
550            let old_index_fields: Vec<(String, crate::storage::schema::Value)> = applied
551                .pre_mutation_fields
552                .iter()
553                .filter(|(col, _)| indexed_cols.contains(col))
554                .cloned()
555                .collect();
556            let new_index_fields: Vec<(String, crate::storage::schema::Value)> = post
557                .iter()
558                .filter(|(col, _)| indexed_cols.contains(col))
559                .cloned()
560                .collect();
561            if !old_index_fields.is_empty() {
562                self.index_store_ref()
563                    .index_entity_delete(&applied.collection, old_version.id, &old_index_fields)
564                    .map_err(crate::RedDBError::Internal)?;
565            }
566            if !new_index_fields.is_empty() {
567                self.index_store_ref()
568                    .index_entity_insert(&applied.collection, applied.entity.id, &new_index_fields)
569                    .map_err(crate::RedDBError::Internal)?;
570            }
571            return Ok(());
572        }
573
574        let damage =
575            crate::application::entity::row_damage_vector(&applied.pre_mutation_fields, &post);
576        if damage
577            .touched_columns()
578            .into_iter()
579            .any(|col| indexed_cols.contains(col))
580        {
581            self.index_store_ref()
582                .index_entity_update(
583                    &applied.collection,
584                    applied.id,
585                    &applied.pre_mutation_fields,
586                    &post,
587                )
588                .map_err(crate::RedDBError::Internal)?;
589        }
590        Ok(())
591    }
592
593    /// Execute INSERT INTO table [entity_type] (cols) VALUES (vals), ...
594    ///
595    /// Each row in `query.values` is zipped with `query.columns` to produce a
596    /// set of named fields, which is then dispatched based on entity_type.
597    pub fn execute_insert(
598        &self,
599        raw_query: &str,
600        query: &InsertQuery,
601    ) -> RedDBResult<RuntimeQueryResult> {
602        self.check_write(crate::runtime::write_gate::WriteKind::Dml)?;
603        // CollectionContract gate (#49): single entry point for the
604        // operator's collection-level write rules. Today this is a
605        // no-op for INSERT (APPEND ONLY permits insert); routing
606        // through the gate now means future contract bits — versioned,
607        // vault-only writes — plug in once instead of per verb.
608        crate::runtime::collection_contract::CollectionContractGate::check(
609            self,
610            &query.table,
611            crate::runtime::collection_contract::MutationKind::Insert,
612        )?;
613        // Phase 2.5.4 table-scoped tenancy: if the target table is
614        // tenant-scoped and the user didn't name the tenant column,
615        // auto-inject it with the thread-local `CURRENT_TENANT()`
616        // value. When the column is named explicitly we trust the
617        // caller (useful for admin tooling that writes on behalf of
618        // specific tenants). An unbound tenant on an implicit-fill
619        // path errors up front rather than producing a row the RLS
620        // policy would silently hide.
621        let augmented_owned;
622        let query = match self.maybe_inject_tenant_column(query)? {
623            Some(new_q) => {
624                augmented_owned = new_q;
625                &augmented_owned
626            }
627            None => query,
628        };
629        self.check_insert_column_policy(query)?;
630        if let Some(ref embed_config) = query.auto_embed {
631            let provider = crate::ai::parse_provider(&embed_config.provider)?;
632            // S3 / #711: planner-level provider gate. Runs before the
633            // local-model preflight and the API-key resolver so neither
634            // side-effect fires when policy denies.
635            crate::runtime::ai::provider_gate::enforce(self, &provider)?;
636            if matches!(provider, crate::ai::AiProvider::Local) {
637                // Issue #682 — pre-flight the local model registry before
638                // any row write. Missing model, uninstalled artifacts,
639                // wrong task, and disabled-feature failures surface as
640                // deterministic errors that leave the target collection
641                // untouched, satisfying the "no partial writes on
642                // embedding failure" criterion for the failure modes
643                // owned by the local provider.
644                let model_name = embed_config.model.as_deref().map(str::trim).unwrap_or("");
645                if model_name.is_empty() {
646                    return Err(RedDBError::Query(
647                        "AUTO EMBED with provider=local requires MODEL '<registered-model-name>'; \
648                         the local provider does not have an implicit default model"
649                            .to_string(),
650                    ));
651                }
652                crate::runtime::ai::local_embedding::preflight_local_embedding(
653                    &self.inner.db,
654                    model_name,
655                )?;
656            }
657        }
658
659        let mut inserted_count: u64 = 0;
660        let effective_rows =
661            effective_insert_rows(query).map_err(|msg| RedDBError::Query(msg.to_string()))?;
662
663        // Ensure the collection exists (auto-create on first insert).
664        let store = self.inner.db.store();
665        let _ = store.get_or_create_collection(&query.table);
666        let declared_model = self
667            .db()
668            .collection_contract_arc(&query.table)
669            .map(|contract| contract.declared_model);
670
671        let mut returning_snapshots: Option<Vec<Vec<(String, Value)>>> =
672            if query.returning.is_some() {
673                Some(Vec::with_capacity(effective_rows.len()))
674            } else {
675                None
676            };
677        let mut returning_result: Option<UnifiedResult> = None;
678
679        if matches!(query.entity_type, InsertEntityType::Row)
680            && !matches!(
681                declared_model,
682                Some(crate::catalog::CollectionModel::TimeSeries)
683            )
684        {
685            // Issue #523 + #524: blockchain collections seal each row into the
686            // chain. When the caller omits the reserved columns, the engine
687            // auto-fills (#523). When the caller supplies any reserved column,
688            // the values are validated against the current tip and a mismatch
689            // surfaces a `BlockchainConflict:` error mapped to HTTP 409 (#524).
690            //
691            // The whole batch runs under a per-collection chain lock so two
692            // concurrent submitters can't both bind to the same prev_hash —
693            // the loser observes the advanced tip and gets 409 with the new
694            // tip so it can retry.
695            let chain_mode = crate::runtime::blockchain_kind::is_chain(&store, &query.table);
696            let _chain_lock_arc: Option<Arc<parking_lot::Mutex<()>>> = if chain_mode {
697                Some(self.inner.rmw_locks.lock_for(&query.table, "__chain__"))
698            } else {
699                None
700            };
701            let _chain_guard = _chain_lock_arc.as_ref().map(|m| m.lock());
702
703            // Issue #525 — refuse new blocks if the chain has been marked
704            // `integrity = broken` until an admin clears the flag.
705            if chain_mode && self.is_chain_integrity_broken(&query.table) {
706                return Err(RedDBError::InvalidOperation(format!(
707                    "ChainIntegrityBroken: collection '{}' is locked until \
708                     POST /collections/{}/clear-integrity-flag is called by an admin",
709                    query.table, query.table
710                )));
711            }
712
713            // Pull the tip from the in-memory cache; fall back to a one-time
714            // scan if the cache hasn't seen this collection yet (cold start
715            // after restart). Cache is updated below as rows are sealed.
716            let mut chain_tip_full: Option<crate::runtime::blockchain_kind::ChainTipFull> =
717                if chain_mode {
718                    let mut cache = self.inner.chain_tip_cache.lock();
719                    if let Some(existing) = cache.get(&query.table) {
720                        Some(existing.clone())
721                    } else if let Some(scanned) =
722                        crate::runtime::blockchain_kind::chain_tip_full(&store, &query.table)
723                    {
724                        cache.insert(query.table.clone(), scanned.clone());
725                        Some(scanned)
726                    } else {
727                        None
728                    }
729                } else {
730                    None
731                };
732
733            let mut rows = Vec::with_capacity(effective_rows.len());
734            for row_values in &effective_rows {
735                if row_values.len() != query.columns.len() {
736                    return Err(RedDBError::Query(format!(
737                        "INSERT column count ({}) does not match value count ({})",
738                        query.columns.len(),
739                        row_values.len()
740                    )));
741                }
742                let (mut fields, mut metadata) =
743                    split_insert_metadata(self, &query.columns, row_values)?;
744                if chain_mode {
745                    use crate::runtime::blockchain_kind::{
746                        chain_conflict_error, COL_BLOCK_HEIGHT, COL_HASH, COL_PREV_HASH,
747                        COL_TIMESTAMP, RESERVED_COLUMNS,
748                    };
749                    let supplied_height = fields
750                        .iter()
751                        .find(|(k, _)| k == COL_BLOCK_HEIGHT)
752                        .map(|(_, v)| v.clone());
753                    let supplied_prev = fields
754                        .iter()
755                        .find(|(k, _)| k == COL_PREV_HASH)
756                        .map(|(_, v)| v.clone());
757                    let supplied_ts = fields
758                        .iter()
759                        .find(|(k, _)| k == COL_TIMESTAMP)
760                        .map(|(_, v)| v.clone());
761                    let supplied_hash = fields.iter().any(|(k, _)| k == COL_HASH);
762                    let user_supplied_any = supplied_height.is_some()
763                        || supplied_prev.is_some()
764                        || supplied_ts.is_some()
765                        || supplied_hash;
766
767                    fields.retain(|(k, _)| !RESERVED_COLUMNS.contains(&k.as_str()));
768                    let payload = crate::runtime::blockchain_kind::canonical_payload(&fields);
769
770                    let (tip_prev_hash, tip_next_height) = match &chain_tip_full {
771                        Some(t) => (t.hash, t.height + 1),
772                        None => (crate::storage::blockchain::GENESIS_PREV_HASH, 0u64),
773                    };
774                    let server_now = crate::runtime::blockchain_kind::now_ms();
775
776                    let (use_prev, use_height, use_ts) = if user_supplied_any {
777                        // Caller is participating in the chain protocol —
778                        // every field must be supplied AND match the tip.
779                        if supplied_hash {
780                            return Err(chain_conflict_error(
781                                tip_next_height.saturating_sub(1),
782                                tip_prev_hash,
783                                chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
784                                server_now,
785                                "hash column is engine-computed and cannot be supplied",
786                            ));
787                        }
788                        let caller_prev = match &supplied_prev {
789                            Some(Value::Blob(b)) if b.len() == 32 => {
790                                let mut a = [0u8; 32];
791                                a.copy_from_slice(b);
792                                a
793                            }
794                            Some(Value::Text(s)) if s.len() == 64 => {
795                                // Accept hex-encoded prev_hash so JSON / SQL
796                                // callers without literal-blob syntax can
797                                // still participate in the chain protocol.
798                                let mut a = [0u8; 32];
799                                let mut ok = true;
800                                for (i, slot) in a.iter_mut().enumerate() {
801                                    let pair = &s.as_ref()[i * 2..i * 2 + 2];
802                                    match u8::from_str_radix(pair, 16) {
803                                        Ok(byte) => *slot = byte,
804                                        Err(_) => {
805                                            ok = false;
806                                            break;
807                                        }
808                                    }
809                                }
810                                if !ok {
811                                    return Err(chain_conflict_error(
812                                        tip_next_height.saturating_sub(1),
813                                        tip_prev_hash,
814                                        chain_tip_full
815                                            .as_ref()
816                                            .map(|t| t.timestamp_ms)
817                                            .unwrap_or(0),
818                                        server_now,
819                                        "prev_hash is not valid hex",
820                                    ));
821                                }
822                                a
823                            }
824                            _ => {
825                                return Err(chain_conflict_error(
826                                    tip_next_height.saturating_sub(1),
827                                    tip_prev_hash,
828                                    chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
829                                    server_now,
830                                    "prev_hash missing or not a 32-byte Blob",
831                                ));
832                            }
833                        };
834                        if caller_prev != tip_prev_hash {
835                            return Err(chain_conflict_error(
836                                tip_next_height.saturating_sub(1),
837                                tip_prev_hash,
838                                chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
839                                server_now,
840                                "prev_hash does not match current tip",
841                            ));
842                        }
843                        let caller_height = match &supplied_height {
844                            Some(Value::UnsignedInteger(v)) => *v,
845                            Some(Value::Integer(v)) if *v >= 0 => *v as u64,
846                            _ => {
847                                return Err(chain_conflict_error(
848                                    tip_next_height.saturating_sub(1),
849                                    tip_prev_hash,
850                                    chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
851                                    server_now,
852                                    "block_height missing or not an unsigned integer",
853                                ));
854                            }
855                        };
856                        if caller_height != tip_next_height {
857                            return Err(chain_conflict_error(
858                                tip_next_height.saturating_sub(1),
859                                tip_prev_hash,
860                                chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
861                                server_now,
862                                "block_height does not match tip+1",
863                            ));
864                        }
865                        let caller_ts = match &supplied_ts {
866                            Some(Value::UnsignedInteger(v)) => *v,
867                            Some(Value::Integer(v)) if *v >= 0 => *v as u64,
868                            _ => {
869                                return Err(chain_conflict_error(
870                                    tip_next_height.saturating_sub(1),
871                                    tip_prev_hash,
872                                    chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
873                                    server_now,
874                                    "timestamp missing or not an unsigned integer",
875                                ));
876                            }
877                        };
878                        let drift = (caller_ts as i128) - (server_now as i128);
879                        if drift.abs() > 60_000 {
880                            return Err(chain_conflict_error(
881                                tip_next_height.saturating_sub(1),
882                                tip_prev_hash,
883                                chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
884                                server_now,
885                                "timestamp outside ±60s of server_time",
886                            ));
887                        }
888                        (caller_prev, caller_height, caller_ts)
889                    } else {
890                        (tip_prev_hash, tip_next_height, server_now)
891                    };
892
893                    let (reserved, new_hash) =
894                        crate::runtime::blockchain_kind::make_block_reserved_fields(
895                            use_prev, use_height, use_ts, &payload,
896                        );
897                    fields.extend(reserved);
898                    chain_tip_full = Some(crate::runtime::blockchain_kind::ChainTipFull {
899                        height: use_height,
900                        hash: new_hash,
901                        timestamp_ms: use_ts,
902                    });
903                }
904                // Issue #522 — signed-writes verification. On collections
905                // created with `SIGNED_BY (...)` the row must carry valid
906                // `signer_pubkey` + `signature` reserved columns. Runs
907                // after chain_mode so canonical payload covers user-supplied
908                // fields only (blockchain reserved columns are filtered by
909                // `canonical_payload`; the two signed-writes reserved
910                // columns are split out before payload computation, then
911                // re-attached for storage). The blockchain + SIGNED_BY
912                // composition is owned by issue #526; we keep #522 to the
913                // non-chain path and let chain_mode collections punt to that
914                // slice rather than half-wire it here.
915                if crate::runtime::signed_writes_kind::is_signed(&store, &query.table) {
916                    let (pk_col, sig_col, residual) =
917                        crate::runtime::signed_writes_kind::split_signature_fields(fields);
918                    let payload = crate::runtime::blockchain_kind::canonical_payload(&residual);
919                    let reg = crate::runtime::signed_writes_kind::registry(&store, &query.table);
920                    crate::runtime::signed_writes_kind::verify_row(
921                        &reg,
922                        pk_col.as_ref().map(|c| c.bytes.as_slice()),
923                        sig_col.as_ref().map(|c| c.bytes.as_slice()),
924                        &payload,
925                    )
926                    .map_err(crate::runtime::signed_writes_kind::map_error)?;
927                    fields = residual;
928                    // Round-trip the reserved columns with the value
929                    // type the caller supplied (Text/hex on the SQL path,
930                    // Blob on the binary path). Keeps SELECT and WHERE
931                    // predicates symmetric with the INSERT shape.
932                    if let Some(col) = pk_col {
933                        fields.push((
934                            crate::storage::signed_writes::RESERVED_SIGNER_PUBKEY_COL.to_string(),
935                            col.raw_value,
936                        ));
937                    }
938                    if let Some(col) = sig_col {
939                        fields.push((
940                            crate::storage::signed_writes::RESERVED_SIGNATURE_COL.to_string(),
941                            col.raw_value,
942                        ));
943                    }
944                }
945                merge_with_clauses(
946                    &mut metadata,
947                    query.ttl_ms,
948                    query.expires_at_ms,
949                    &query.with_metadata,
950                );
951                if let Some(snaps) = returning_snapshots.as_mut() {
952                    snaps.push(fields.clone());
953                }
954                rows.push(CreateRowInput {
955                    collection: query.table.clone(),
956                    fields,
957                    metadata,
958                    node_links: Vec::new(),
959                    vector_links: Vec::new(),
960                });
961            }
962            let outputs = self.create_rows_batch(CreateRowsBatchInput {
963                collection: query.table.clone(),
964                rows,
965                suppress_events: query.suppress_events,
966            })?;
967            inserted_count = outputs.len() as u64;
968
969            // Chain mode: commit the new tip to the in-memory cache only after
970            // the batch persisted successfully. If the batch threw mid-way the
971            // cache stays on the previous tip and the chain lock releases.
972            if chain_mode {
973                if let Some(new_tip) = chain_tip_full.as_ref() {
974                    self.inner
975                        .chain_tip_cache
976                        .lock()
977                        .insert(query.table.clone(), new_tip.clone());
978                }
979            }
980
981            // Hypertable chunk routing: if this table was declared via
982            // CREATE HYPERTABLE, register each row's time-column value
983            // with the registry so chunk metadata (bounds, row counts,
984            // TTL eligibility) stays current. This is what lets
985            // HYPERTABLE_PRUNE_CHUNKS answer real questions + lets the
986            // retention daemon sweep expired chunks without scanning
987            // every row.
988            if let Some(spec) = self.inner.db.hypertables().get(&query.table) {
989                let time_col = &spec.time_column;
990                // Find the column's index in the INSERT column list.
991                if let Some(idx) = query.columns.iter().position(|c| c == time_col) {
992                    for row in &effective_rows {
993                        if let Some(Value::Integer(n) | Value::BigInt(n)) = row.get(idx) {
994                            if *n >= 0 {
995                                let _ = self.inner.db.hypertables().route(&query.table, *n as u64);
996                            }
997                        } else if let Some(Value::UnsignedInteger(n)) = row.get(idx) {
998                            let _ = self.inner.db.hypertables().route(&query.table, *n);
999                        }
1000                    }
1001                }
1002            }
1003
1004            if let (Some(items), Some(snaps)) =
1005                (query.returning.as_ref(), returning_snapshots.take())
1006            {
1007                let snaps = row_insert_returning_snapshots(&outputs, snaps);
1008                returning_result = Some(build_returning_result(items, &snaps, Some(&outputs)));
1009            }
1010        } else {
1011            // Issue #419: surface the inserted entity id on every INSERT path.
1012            // For Node/Edge/Vector/Document/Kv we now keep each CreateEntityOutput
1013            // so a RETURNING clause (and the unconditional inserted_ids list,
1014            // below) can expose the engine-assigned id. TimeSeries (the row
1015            // branch in this else) still returns the not-supported error
1016            // because create_timeseries_point isn't plumbed through this fn.
1017            let mut entity_outputs: Vec<crate::application::entity::CreateEntityOutput> =
1018                Vec::with_capacity(effective_rows.len());
1019            let mut returning_field_snaps: Vec<Vec<(String, Value)>> = if query.returning.is_some()
1020            {
1021                Vec::with_capacity(effective_rows.len())
1022            } else {
1023                Vec::new()
1024            };
1025            if matches!(
1026                query.entity_type,
1027                InsertEntityType::Node | InsertEntityType::Edge
1028            ) {
1029                enum PreparedGraphInsert {
1030                    Node {
1031                        fields: Vec<(String, Value)>,
1032                        input: CreateNodeInput,
1033                    },
1034                    Edge {
1035                        fields: Vec<(String, Value)>,
1036                        input: CreateEdgeInput,
1037                    },
1038                }
1039
1040                let mut prepared = Vec::with_capacity(effective_rows.len());
1041                for row_values in &effective_rows {
1042                    if row_values.len() != query.columns.len() {
1043                        return Err(RedDBError::Query(format!(
1044                            "INSERT column count ({}) does not match value count ({})",
1045                            query.columns.len(),
1046                            row_values.len()
1047                        )));
1048                    }
1049
1050                    match query.entity_type {
1051                        InsertEntityType::Node => {
1052                            let (node_values, mut metadata) =
1053                                split_insert_metadata(self, &query.columns, row_values)?;
1054                            merge_with_clauses(
1055                                &mut metadata,
1056                                query.ttl_ms,
1057                                query.expires_at_ms,
1058                                &query.with_metadata,
1059                            );
1060                            ensure_non_tree_reserved_metadata_entries(&metadata)?;
1061                            apply_collection_default_ttl_metadata(
1062                                self,
1063                                &query.table,
1064                                &mut metadata,
1065                            );
1066                            let (columns, values) = pairwise_columns_values(&node_values);
1067                            let label = find_column_value_string(&columns, &values, "label")?;
1068                            let node_type =
1069                                find_column_value_opt_string(&columns, &values, "node_type");
1070                            let properties = extract_remaining_properties(
1071                                &columns,
1072                                &values,
1073                                &["label", "node_type"],
1074                            );
1075                            crate::reserved_fields::ensure_no_reserved_public_item_fields(
1076                                properties.iter().map(|(key, _)| key.as_str()),
1077                                &format!("node '{}'", query.table),
1078                            )?;
1079                            prepared.push(PreparedGraphInsert::Node {
1080                                fields: node_values,
1081                                input: CreateNodeInput {
1082                                    collection: query.table.clone(),
1083                                    label,
1084                                    node_type,
1085                                    properties,
1086                                    metadata,
1087                                    embeddings: Vec::new(),
1088                                    table_links: Vec::new(),
1089                                    node_links: Vec::new(),
1090                                },
1091                            });
1092                        }
1093                        InsertEntityType::Edge => {
1094                            let (edge_values, mut metadata) =
1095                                split_insert_metadata(self, &query.columns, row_values)?;
1096                            merge_with_clauses(
1097                                &mut metadata,
1098                                query.ttl_ms,
1099                                query.expires_at_ms,
1100                                &query.with_metadata,
1101                            );
1102                            ensure_non_tree_reserved_metadata_entries(&metadata)?;
1103                            apply_collection_default_ttl_metadata(
1104                                self,
1105                                &query.table,
1106                                &mut metadata,
1107                            );
1108                            let (columns, values) = pairwise_columns_values(&edge_values);
1109                            let label = find_column_value_string(&columns, &values, "label")?;
1110                            ensure_non_tree_structural_edge_label(&label)?;
1111                            let from_id = resolve_edge_endpoint_any(
1112                                self.inner.db.store().as_ref(),
1113                                &query.table,
1114                                &columns,
1115                                &values,
1116                                &["from_rid", "from"],
1117                            )?;
1118                            let to_id = resolve_edge_endpoint_any(
1119                                self.inner.db.store().as_ref(),
1120                                &query.table,
1121                                &columns,
1122                                &values,
1123                                &["to_rid", "to"],
1124                            )?;
1125                            let weight = find_column_value_f32_opt(&columns, &values, "weight");
1126                            let properties = extract_remaining_properties(
1127                                &columns,
1128                                &values,
1129                                &["label", "from_rid", "to_rid", "from", "to", "weight"],
1130                            );
1131                            crate::reserved_fields::ensure_no_reserved_public_item_fields(
1132                                properties.iter().map(|(key, _)| key.as_str()),
1133                                &format!("edge '{}'", query.table),
1134                            )?;
1135                            prepared.push(PreparedGraphInsert::Edge {
1136                                fields: edge_values,
1137                                input: CreateEdgeInput {
1138                                    collection: query.table.clone(),
1139                                    label,
1140                                    from: EntityId::new(from_id),
1141                                    to: EntityId::new(to_id),
1142                                    weight,
1143                                    properties,
1144                                    metadata,
1145                                },
1146                            });
1147                        }
1148                        _ => unreachable!("prepared graph insert only handles NODE and EDGE"),
1149                    }
1150                }
1151
1152                ensure_graph_insert_contract(self, &query.table)?;
1153                let mut batch = self.inner.db.batch();
1154                for item in prepared {
1155                    match item {
1156                        PreparedGraphInsert::Node { fields, input } => {
1157                            if query.returning.is_some() {
1158                                returning_field_snaps.push(fields);
1159                            }
1160                            let node_type = input.node_type.unwrap_or_else(|| input.label.clone());
1161                            batch = batch.add_node_with_type(
1162                                input.collection,
1163                                input.label,
1164                                node_type,
1165                                input.properties.into_iter().collect(),
1166                                input.metadata.into_iter().collect(),
1167                            );
1168                        }
1169                        PreparedGraphInsert::Edge { fields, input } => {
1170                            if query.returning.is_some() {
1171                                returning_field_snaps.push(fields);
1172                            }
1173                            batch = batch.add_edge(
1174                                input.collection,
1175                                input.label,
1176                                input.from,
1177                                input.to,
1178                                input.weight.unwrap_or(1.0),
1179                                input.properties.into_iter().collect(),
1180                                input.metadata.into_iter().collect(),
1181                            );
1182                        }
1183                    }
1184                }
1185                let batch_result = batch
1186                    .execute()
1187                    .map_err(|err| RedDBError::Internal(format!("{err:?}")))?;
1188                let (ids, entity_kind) = match query.entity_type {
1189                    InsertEntityType::Node => (batch_result.nodes, "graph_node"),
1190                    InsertEntityType::Edge => (batch_result.edges, "graph_edge"),
1191                    _ => unreachable!("prepared graph insert only handles NODE and EDGE"),
1192                };
1193                for id in &ids {
1194                    self.stamp_xmin_if_in_txn(&query.table, *id);
1195                }
1196                if query.returning.is_some() {
1197                    returning_field_snaps = graph_insert_returning_snapshots(
1198                        self.inner.db.store().as_ref(),
1199                        &query.table,
1200                        &ids,
1201                    );
1202                }
1203                self.cdc_emit_insert_batch_no_cache_invalidate(&query.table, &ids, entity_kind);
1204                let store = self.inner.db.store();
1205                entity_outputs.extend(ids.iter().map(|id| {
1206                    crate::application::entity::CreateEntityOutput {
1207                        id: *id,
1208                        entity: store.get(&query.table, *id),
1209                    }
1210                }));
1211                inserted_count = ids.len() as u64;
1212            } else {
1213                for row_values in &effective_rows {
1214                    if row_values.len() != query.columns.len() {
1215                        return Err(RedDBError::Query(format!(
1216                            "INSERT column count ({}) does not match value count ({})",
1217                            query.columns.len(),
1218                            row_values.len()
1219                        )));
1220                    }
1221
1222                    match query.entity_type {
1223                        InsertEntityType::Row => {
1224                            if query.returning.is_some() {
1225                                return Err(RedDBError::Query(
1226                                "RETURNING is not yet supported for this INSERT path (TimeSeries)"
1227                                    .to_string(),
1228                            ));
1229                            }
1230                            let (fields, mut metadata) =
1231                                split_insert_metadata(self, &query.columns, row_values)?;
1232                            merge_with_clauses(
1233                                &mut metadata,
1234                                query.ttl_ms,
1235                                query.expires_at_ms,
1236                                &query.with_metadata,
1237                            );
1238                            self.insert_timeseries_point(&query.table, fields, metadata)?;
1239                        }
1240                        InsertEntityType::Node | InsertEntityType::Edge => {
1241                            unreachable!("NODE and EDGE are handled by the prepared graph path")
1242                        }
1243                        InsertEntityType::Vector => {
1244                            let (vector_values, mut metadata) =
1245                                split_insert_metadata(self, &query.columns, row_values)?;
1246                            merge_with_clauses(
1247                                &mut metadata,
1248                                query.ttl_ms,
1249                                query.expires_at_ms,
1250                                &query.with_metadata,
1251                            );
1252                            let (columns, values) = pairwise_columns_values(&vector_values);
1253                            let dense = find_column_value_vec_f32_any(
1254                                &columns,
1255                                &values,
1256                                &["dense", "embedding"],
1257                            )?;
1258                            merge_vector_metadata_column(&mut metadata, &columns, &values)?;
1259                            let content =
1260                                find_column_value_opt_string(&columns, &values, "content");
1261                            if query.returning.is_some() {
1262                                returning_field_snaps.push(vector_values.clone());
1263                            }
1264                            let input = CreateVectorInput {
1265                                collection: query.table.clone(),
1266                                dense,
1267                                content,
1268                                metadata,
1269                                link_row: None,
1270                                link_node: None,
1271                            };
1272                            entity_outputs.push(self.create_vector(input)?);
1273                        }
1274                        InsertEntityType::Document => {
1275                            let (document_values, mut metadata) =
1276                                split_insert_metadata(self, &query.columns, row_values)?;
1277                            merge_with_clauses(
1278                                &mut metadata,
1279                                query.ttl_ms,
1280                                query.expires_at_ms,
1281                                &query.with_metadata,
1282                            );
1283                            let (columns, values) = pairwise_columns_values(&document_values);
1284                            let body_str = find_column_value_string(&columns, &values, "body")?;
1285                            let body: crate::json::Value = crate::json::from_str(&body_str)
1286                                .map_err(|e| {
1287                                    RedDBError::Query(format!("invalid JSON body: {e}"))
1288                                })?;
1289                            let input = CreateDocumentInput {
1290                                collection: query.table.clone(),
1291                                body,
1292                                metadata,
1293                                node_links: Vec::new(),
1294                                vector_links: Vec::new(),
1295                            };
1296                            let output = self.create_document(input)?;
1297                            if query.returning.is_some() {
1298                                let fields = output
1299                                    .entity
1300                                    .as_ref()
1301                                    .map(entity_row_fields_snapshot)
1302                                    .filter(|fields| !fields.is_empty())
1303                                    .unwrap_or(document_values);
1304                                returning_field_snaps.push(fields);
1305                            }
1306                            entity_outputs.push(output);
1307                        }
1308                        InsertEntityType::Kv => {
1309                            let (kv_values, mut metadata) =
1310                                split_insert_metadata(self, &query.columns, row_values)?;
1311                            merge_with_clauses(
1312                                &mut metadata,
1313                                query.ttl_ms,
1314                                query.expires_at_ms,
1315                                &query.with_metadata,
1316                            );
1317                            let (columns, values) = pairwise_columns_values(&kv_values);
1318                            let key = find_column_value_string(&columns, &values, "key")?;
1319                            let value = find_column_value(&columns, &values, "value")?;
1320                            if query.returning.is_some() {
1321                                returning_field_snaps.push(kv_values.clone());
1322                            }
1323                            let input = CreateKvInput {
1324                                collection: query.table.clone(),
1325                                key,
1326                                value,
1327                                metadata,
1328                            };
1329                            entity_outputs.push(self.create_kv(input)?);
1330                        }
1331                    }
1332
1333                    inserted_count += 1;
1334                }
1335            }
1336
1337            if let Some(items) = query.returning.as_ref() {
1338                if !entity_outputs.is_empty() {
1339                    returning_result = Some(build_returning_result(
1340                        items,
1341                        &returning_field_snaps,
1342                        Some(&entity_outputs),
1343                    ));
1344                }
1345            }
1346        }
1347
1348        // Auto-embed pipeline: batch-embed fields across all inserted rows via AiBatchClient.
1349        if let Some(ref embed_config) = query.auto_embed {
1350            let store = self.inner.db.store();
1351            let provider = crate::ai::parse_provider(&embed_config.provider)?;
1352            let is_local_provider = matches!(provider, crate::ai::AiProvider::Local);
1353            // Local provider runs in-process — no API key path applies.
1354            // The pre-flight above already required `MODEL '<name>'`
1355            // for the local case, so the unwrap_or default below only
1356            // ever fires for OpenAI-compatible providers.
1357            let api_key = if is_local_provider {
1358                String::new()
1359            } else {
1360                crate::ai::resolve_api_key_from_runtime(&provider, None, self)?
1361            };
1362            let model = embed_config.model.clone().unwrap_or_else(|| {
1363                std::env::var("REDDB_OPENAI_EMBEDDING_MODEL")
1364                    .ok()
1365                    .unwrap_or_else(|| crate::ai::DEFAULT_OPENAI_EMBEDDING_MODEL.to_string())
1366            });
1367
1368            // Collect the just-inserted rows (most-recently appended, reversed back to insert order).
1369            let manager = store
1370                .get_collection(&query.table)
1371                .ok_or_else(|| RedDBError::NotFound(query.table.clone()))?;
1372            let entities = manager.query_all(|_| true);
1373            let recent: Vec<_> = entities
1374                .into_iter()
1375                .rev()
1376                .take(effective_rows.len())
1377                .collect();
1378
1379            // Collector phase: (entity_index, combined_text) for rows that have non-empty fields.
1380            let entity_combos: Vec<(usize, String)> = recent
1381                .iter()
1382                .enumerate()
1383                .filter_map(|(i, entity)| {
1384                    if let EntityData::Row(ref row) = entity.data {
1385                        if let Some(ref named) = row.named {
1386                            let texts: Vec<String> = embed_config
1387                                .fields
1388                                .iter()
1389                                .filter_map(|field| match named.get(field) {
1390                                    Some(Value::Text(t)) if !t.is_empty() => Some(t.to_string()),
1391                                    _ => None,
1392                                })
1393                                .collect();
1394                            if !texts.is_empty() {
1395                                return Some((i, texts.join(" ")));
1396                            }
1397                        }
1398                    }
1399                    None
1400                })
1401                .collect();
1402
1403            if !entity_combos.is_empty() {
1404                // Batch phase: single provider round-trip for all rows.
1405                let batch_texts: Vec<String> =
1406                    entity_combos.iter().map(|(_, t)| t.clone()).collect();
1407
1408                // Issue #682 — when the provider is `local`, bypass
1409                // AiBatchClient (which is HTTP-only) and dispatch
1410                // directly through the in-process local embedding
1411                // backend. All texts go in one call, mirroring the
1412                // single-round-trip shape of the remote path. The
1413                // local backend does not perform intra-batch dedup —
1414                // each input position gets its own row in the output
1415                // — which keeps the per-row "create_vector" loop
1416                // below correct without additional fan-out logic.
1417                let embeddings = if is_local_provider {
1418                    let response = crate::runtime::ai::local_embedding::embed_local_with_db(
1419                        &self.inner.db,
1420                        &model,
1421                        batch_texts,
1422                    )?;
1423                    response.embeddings
1424                } else {
1425                    let batch_client =
1426                        crate::runtime::ai::batch_client::AiBatchClient::from_runtime(self);
1427
1428                    match tokio::runtime::Handle::try_current() {
1429                        Ok(handle) => tokio::task::block_in_place(|| {
1430                            handle.block_on(batch_client.embed_batch(
1431                                &provider,
1432                                &model,
1433                                &api_key,
1434                                batch_texts,
1435                            ))
1436                        }),
1437                        Err(_) => {
1438                            return Err(RedDBError::Query(
1439                                "AUTO EMBED requires a Tokio runtime context".to_string(),
1440                            ));
1441                        }
1442                    }
1443                    .map_err(|e| RedDBError::Query(e.to_string()))?
1444                };
1445
1446                // Distribute phase: persist one vector per non-empty embedding.
1447                for ((_, combined), dense) in entity_combos.iter().zip(embeddings) {
1448                    if dense.is_empty() {
1449                        continue;
1450                    }
1451                    self.create_vector(CreateVectorInput {
1452                        collection: query.table.clone(),
1453                        dense,
1454                        content: Some(combined.clone()),
1455                        metadata: Vec::new(),
1456                        link_row: None,
1457                        link_node: None,
1458                    })?;
1459                }
1460            }
1461        }
1462
1463        if inserted_count > 0 {
1464            self.note_table_write(&query.table);
1465        }
1466
1467        let mut result = RuntimeQueryResult::dml_result(
1468            raw_query.to_string(),
1469            inserted_count,
1470            "insert",
1471            "runtime-dml",
1472        );
1473        if let Some(returning) = returning_result {
1474            result.result = returning;
1475        }
1476        Ok(result)
1477    }
1478
1479    fn check_insert_column_policy(&self, query: &InsertQuery) -> RedDBResult<()> {
1480        let Some(auth_store) = self.inner.auth_store.read().clone() else {
1481            return Ok(());
1482        };
1483        if !auth_store.iam_authorization_enabled() {
1484            return Ok(());
1485        }
1486        let Some((username, role)) = crate::runtime::impl_core::current_auth_identity() else {
1487            return Ok(());
1488        };
1489
1490        let tenant = crate::runtime::impl_core::current_tenant();
1491        let principal = crate::auth::UserId::from_parts(tenant.as_deref(), &username);
1492        let request = crate::auth::ColumnAccessRequest {
1493            action: "insert".to_string(),
1494            schema: None,
1495            table: query.table.clone(),
1496            columns: query.columns.clone(),
1497        };
1498        let ctx = crate::auth::policies::EvalContext {
1499            principal_tenant: tenant.clone(),
1500            current_tenant: tenant,
1501            peer_ip: None,
1502            mfa_present: false,
1503            now_ms: crate::auth::now_ms(),
1504            principal_is_admin_role: role == crate::auth::Role::Admin,
1505            principal_is_system_owned: auth_store.principal_is_system_owned(&principal),
1506            principal_is_platform_scoped: principal.tenant.is_none(),
1507        };
1508
1509        let outcome = auth_store.check_column_projection_authz(&principal, &request, &ctx);
1510        let table_allowed = matches!(
1511            outcome.table_decision,
1512            crate::auth::policies::Decision::Allow { .. }
1513                | crate::auth::policies::Decision::AdminBypass
1514        );
1515        if !table_allowed {
1516            return Err(RedDBError::Query(format!(
1517                "principal=`{username}` action=`insert` resource=`{}:{}` denied by IAM policy",
1518                outcome.table_resource.kind, outcome.table_resource.name
1519            )));
1520        }
1521        if let Some(denied) = outcome.first_denied_column() {
1522            return Err(RedDBError::Query(format!(
1523                "principal=`{username}` action=`insert` resource=`{}:{}` denied by IAM policy",
1524                denied.resource.kind, denied.resource.name
1525            )));
1526        }
1527
1528        Ok(())
1529    }
1530
1531    pub(crate) fn insert_timeseries_point(
1532        &self,
1533        collection: &str,
1534        fields: Vec<(String, Value)>,
1535        mut metadata: Vec<(String, MetadataValue)>,
1536    ) -> RedDBResult<EntityId> {
1537        apply_collection_default_ttl_metadata(self, collection, &mut metadata);
1538
1539        let (columns, values) = pairwise_columns_values(&fields);
1540        validate_timeseries_insert_columns(&columns)?;
1541
1542        // Issue #577 — AnalyticsSchemaRegistry hook. If the row carries
1543        // an `event_name` whose schema is registered, validate the
1544        // `payload` JSON against it BEFORE any write side-effect. On
1545        // failure we return a typed error and the row is not
1546        // persisted. When no schema is registered for the event name
1547        // (or no `event_name` column is supplied at all) we fall
1548        // through to the normal write path for back-compat with
1549        // existing timeseries rows.
1550        let event_name_opt = find_column_value_opt_string(&columns, &values, "event_name");
1551        let payload_opt = find_column_value_opt_string(&columns, &values, "payload");
1552        if let Some(event_name) = event_name_opt.as_deref() {
1553            let store_for_schema = self.inner.db.store();
1554            if super::analytics_schema_registry::latest(store_for_schema.as_ref(), event_name)
1555                .is_some()
1556            {
1557                let payload_json = payload_opt.as_deref().unwrap_or("{}");
1558                super::analytics_schema_registry::validate(
1559                    store_for_schema.as_ref(),
1560                    event_name,
1561                    payload_json,
1562                )
1563                .map_err(super::analytics_schema_registry::validation_error_to_reddb)?;
1564            }
1565        }
1566
1567        // `metric` is required by the existing timeseries write path;
1568        // when an analytics-style row supplies `event_name` but not
1569        // `metric`, fall back to the event name so the storage path
1570        // still has a non-empty metric tag.
1571        let metric = match find_column_value_opt_string(&columns, &values, "metric") {
1572            Some(m) => m,
1573            None => event_name_opt.clone().ok_or_else(|| {
1574                RedDBError::Query(
1575                    "timeseries INSERT requires either `metric` or `event_name`".to_string(),
1576                )
1577            })?,
1578        };
1579        // `value` is optional for analytics-event rows (which are
1580        // semantically counts of 1); default to 1.0 when missing so
1581        // analytics inserts don't have to fabricate a metric value.
1582        let value = match find_column_value_opt_string(&columns, &values, "value") {
1583            Some(s) => s.parse::<f64>().unwrap_or(1.0),
1584            None => columns
1585                .iter()
1586                .position(|c| c.eq_ignore_ascii_case("value"))
1587                .and_then(|i| match &values[i] {
1588                    Value::Float(f) => Some(*f),
1589                    Value::Integer(n) | Value::BigInt(n) => Some(*n as f64),
1590                    Value::UnsignedInteger(n) => Some(*n as f64),
1591                    _ => None,
1592                })
1593                .unwrap_or(1.0),
1594        };
1595        let timestamp_ns =
1596            find_timeseries_timestamp_ns(&columns, &values)?.unwrap_or_else(current_unix_ns);
1597        let mut tags = find_timeseries_tags(&columns, &values)?;
1598        if let Some(ref name) = event_name_opt {
1599            tags.entry("event_name".to_string())
1600                .or_insert_with(|| name.clone());
1601        }
1602        if let Some(ref payload) = payload_opt {
1603            tags.entry("payload".to_string())
1604                .or_insert_with(|| payload.clone());
1605        }
1606
1607        let mut entity = UnifiedEntity::new(
1608            EntityId::new(0),
1609            EntityKind::TimeSeriesPoint(Box::new(crate::storage::TimeSeriesPointKind {
1610                series: collection.to_string(),
1611                metric: metric.clone(),
1612            })),
1613            EntityData::TimeSeries(crate::storage::TimeSeriesData {
1614                metric,
1615                timestamp_ns,
1616                value,
1617                tags,
1618            }),
1619        );
1620        // MVCC #30: stamp xmin with the active tx xid (inside a tx)
1621        // or an autocommit xid (allocated and committed up-front so
1622        // future snapshots see the row as soon as it lands).
1623        let writer_xid = match self.current_xid() {
1624            Some(xid) => xid,
1625            None => {
1626                let mgr = self.snapshot_manager();
1627                let xid = mgr.begin();
1628                mgr.commit(xid);
1629                xid
1630            }
1631        };
1632        entity.set_xmin(writer_xid);
1633
1634        let store = self.inner.db.store();
1635        let id = store
1636            .insert_auto(collection, entity)
1637            .map_err(|err| RedDBError::Internal(err.to_string()))?;
1638
1639        if !metadata.is_empty() {
1640            let _ = store.set_metadata(
1641                collection,
1642                id,
1643                Metadata::with_fields(metadata.into_iter().collect()),
1644            );
1645        }
1646
1647        self.cdc_emit(
1648            crate::replication::cdc::ChangeOperation::Insert,
1649            collection,
1650            id.raw(),
1651            "timeseries",
1652        );
1653
1654        Ok(id)
1655    }
1656
1657    /// Execute UPDATE table SET col=val, ... WHERE filter
1658    ///
1659    /// Scans the target collection, evaluates the WHERE filter against each
1660    /// record, and patches every matching entity.
1661    pub fn execute_update(
1662        &self,
1663        raw_query: &str,
1664        query: &UpdateQuery,
1665    ) -> RedDBResult<RuntimeQueryResult> {
1666        self.check_write(crate::runtime::write_gate::WriteKind::Dml)?;
1667        // Issue #523 — blockchain collections are immutable. Reject before
1668        // RLS / RETURNING work so the operator sees a clean 409-mapped
1669        // error instead of a partially-applied mutation surface.
1670        if crate::runtime::blockchain_kind::is_chain(self.inner.db.store().as_ref(), &query.table) {
1671            return Err(RedDBError::InvalidOperation(format!(
1672                "BlockchainCollectionImmutable: UPDATE not allowed on '{}'",
1673                query.table
1674            )));
1675        }
1676        // CollectionContract gate (#50): runs the APPEND ONLY guard
1677        // (and any future contract bits) before RLS / RETURNING work
1678        // so the operator's immutability declaration is honoured
1679        // uniformly and the error message points at the DDL rather
1680        // than at a downstream symptom.
1681        crate::runtime::collection_contract::CollectionContractGate::check(
1682            self,
1683            &query.table,
1684            crate::runtime::collection_contract::MutationKind::Update,
1685        )?;
1686        ensure_update_target_contract(self, &query.table, query.target)?;
1687        ensure_graph_identity_update_target_allowed(query)?;
1688
1689        // Apply RLS augmentation first so every downstream path — plain
1690        // UPDATE, UPDATE...RETURNING, the inner scan — observes the
1691        // same policy-filtered target set. This prevents RETURNING
1692        // from ever exposing rows the UPDATE policy would have
1693        // denied.
1694        let rls_gated = crate::runtime::impl_core::rls_is_enabled(self, &query.table);
1695        let augmented_query: UpdateQuery;
1696        let effective_query: &UpdateQuery = if rls_gated {
1697            let rls_filter = crate::runtime::impl_core::rls_policy_filter(
1698                self,
1699                &query.table,
1700                crate::storage::query::ast::PolicyAction::Update,
1701            );
1702            let Some(policy) = rls_filter else {
1703                // No admitting policy: zero rows affected, empty
1704                // RETURNING (never leak rows the caller can't touch).
1705                let mut response = RuntimeQueryResult::dml_result(
1706                    raw_query.to_string(),
1707                    0,
1708                    "update",
1709                    "runtime-dml-rls",
1710                );
1711                if let Some(items) = query.returning.clone() {
1712                    response.result = build_returning_result(&items, &[], None);
1713                }
1714                return Ok(response);
1715            };
1716            let mut augmented = query.clone();
1717            augmented.filter = Some(match augmented.filter.take() {
1718                Some(existing) => {
1719                    crate::storage::query::ast::Filter::And(Box::new(existing), Box::new(policy))
1720                }
1721                None => policy,
1722            });
1723            augmented_query = augmented;
1724            &augmented_query
1725        } else {
1726            query
1727        };
1728
1729        // RETURNING wraps the inner executor and uses the touched-id
1730        // list the inner reports so the post-image reflects exactly
1731        // the rows the UPDATE actually mutated (not whatever a
1732        // separate SELECT might have observed).
1733        if let Some(items) = effective_query.returning.clone() {
1734            let mut inner_query = effective_query.clone();
1735            inner_query.returning = None;
1736            let (mut response, touched_ids) =
1737                self.execute_update_inner_tracked(raw_query, &inner_query)?;
1738
1739            let snapshots = if matches!(
1740                effective_query.target,
1741                UpdateTarget::Nodes | UpdateTarget::Edges
1742            ) {
1743                graph_update_returning_snapshots(self, &effective_query.table, &touched_ids)
1744            } else {
1745                super::dml_target_scan::DmlTargetScan::new(self, &effective_query.table, None, None)
1746                    .row_snapshots(&touched_ids)
1747            };
1748
1749            response.result = build_returning_result(&items, &snapshots, None);
1750            response.engine = "runtime-dml-returning";
1751            return Ok(response);
1752        }
1753
1754        self.execute_update_inner(raw_query, effective_query)
1755    }
1756
1757    /// Back-compat shim: the older entry point ignored touched ids.
1758    fn execute_update_inner(
1759        &self,
1760        raw_query: &str,
1761        query: &UpdateQuery,
1762    ) -> RedDBResult<RuntimeQueryResult> {
1763        self.execute_update_inner_tracked(raw_query, query)
1764            .map(|(res, _)| res)
1765    }
1766
1767    fn execute_update_inner_tracked(
1768        &self,
1769        raw_query: &str,
1770        query: &UpdateQuery,
1771    ) -> RedDBResult<(RuntimeQueryResult, Vec<EntityId>)> {
1772        let store = self.inner.db.store();
1773        let effective_filter = effective_update_filter(query);
1774        let compiled_plan = self.compile_update_plan(query)?;
1775        let mut touched_ids: Vec<EntityId> = Vec::new();
1776        let limit_cap = query.limit.map(|l| l as usize);
1777        let manager = store
1778            .get_collection(&query.table)
1779            .ok_or_else(|| RedDBError::NotFound(query.table.clone()))?;
1780        let scan_limit = if query.order_by.is_empty() {
1781            limit_cap
1782        } else {
1783            None
1784        };
1785        let ids_to_update = super::dml_target_scan::DmlTargetScan::with_update_target(
1786            self,
1787            &query.table,
1788            effective_filter.as_ref(),
1789            scan_limit,
1790            query.target,
1791        )
1792        .find_target_ids()?;
1793        let ids_to_update = if query.order_by.is_empty() {
1794            ids_to_update
1795        } else {
1796            ordered_update_target_ids(&manager, &ids_to_update, &query.order_by, limit_cap)
1797        };
1798
1799        if update_needs_rmw_lock(query) {
1800            return self.execute_update_inner_tracked_locked(
1801                raw_query,
1802                query,
1803                &compiled_plan,
1804                &ids_to_update,
1805                effective_filter.as_ref(),
1806            );
1807        }
1808
1809        let mut affected: u64 = 0;
1810        for chunk in ids_to_update.chunks(UPDATE_APPLY_CHUNK_SIZE) {
1811            let mut applied_chunk = Vec::with_capacity(chunk.len());
1812            for entity in manager.get_many(chunk).into_iter().flatten() {
1813                let assignments =
1814                    self.materialize_update_assignments_for_entity(query, &entity, &compiled_plan)?;
1815                let applied = self.apply_materialized_update_for_entity(
1816                    query.table.clone(),
1817                    entity,
1818                    &compiled_plan,
1819                    assignments,
1820                )?;
1821                touched_ids.push(applied.id);
1822                applied_chunk.push(applied);
1823            }
1824            self.persist_update_chunk(&applied_chunk)?;
1825            affected += applied_chunk.len() as u64;
1826            let lsns = self.flush_update_chunk(&applied_chunk)?;
1827            if !query.suppress_events {
1828                self.emit_update_events_for_collection(&query.table, &applied_chunk, &lsns)?;
1829            }
1830        }
1831
1832        if affected > 0 {
1833            self.note_table_write(&query.table);
1834        }
1835
1836        Ok((
1837            RuntimeQueryResult::dml_result(
1838                raw_query.to_string(),
1839                affected,
1840                "update",
1841                "runtime-dml",
1842            ),
1843            touched_ids,
1844        ))
1845    }
1846
1847    fn execute_update_inner_tracked_locked(
1848        &self,
1849        raw_query: &str,
1850        query: &UpdateQuery,
1851        compiled_plan: &CompiledUpdatePlan,
1852        ids_to_update: &[EntityId],
1853        effective_filter: Option<&Filter>,
1854    ) -> RedDBResult<(RuntimeQueryResult, Vec<EntityId>)> {
1855        let store = self.inner.db.store();
1856        let mut touched_ids = Vec::new();
1857        let mut lock_entries = Vec::new();
1858
1859        for id in ids_to_update {
1860            let Some(candidate) = store.get(&query.table, *id) else {
1861                continue;
1862            };
1863            let logical_id = candidate.logical_id();
1864            let lock_key = format!("row:{}", logical_id.raw());
1865            let rmw_lock = self.inner.rmw_locks.lock_for(&query.table, &lock_key);
1866            lock_entries.push((lock_key, logical_id, rmw_lock));
1867        }
1868
1869        lock_entries.sort_by(|left, right| left.0.cmp(&right.0));
1870        lock_entries.dedup_by(|left, right| left.0 == right.0);
1871        let _rmw_guards: Vec<_> = lock_entries.iter().map(|entry| entry.2.lock()).collect();
1872
1873        let mut applied_chunk = Vec::new();
1874        for (_, logical_id, _) in &lock_entries {
1875            let Some(entity) = resolve_update_entity_by_logical_id(self, &query.table, *logical_id)
1876            else {
1877                continue;
1878            };
1879            if let Some(filter) = effective_filter {
1880                if !crate::runtime::query_exec::evaluate_entity_filter_with_db(
1881                    Some(self.inner.db.as_ref()),
1882                    &entity,
1883                    filter,
1884                    &query.table,
1885                    &query.table,
1886                ) {
1887                    continue;
1888                }
1889            }
1890
1891            let assignments =
1892                self.materialize_update_assignments_for_entity(query, &entity, compiled_plan)?;
1893            let applied = self.apply_materialized_update_for_entity(
1894                query.table.clone(),
1895                entity,
1896                compiled_plan,
1897                assignments,
1898            )?;
1899            touched_ids.push(applied.id);
1900            applied_chunk.push(applied);
1901        }
1902
1903        let affected = applied_chunk.len() as u64;
1904        if !applied_chunk.is_empty() {
1905            self.persist_update_chunk(&applied_chunk)?;
1906            let lsns = self.flush_update_chunk(&applied_chunk)?;
1907            if !query.suppress_events {
1908                self.emit_update_events_for_collection(&query.table, &applied_chunk, &lsns)?;
1909            }
1910        }
1911
1912        if affected > 0 {
1913            self.note_table_write(&query.table);
1914        }
1915
1916        Ok((
1917            RuntimeQueryResult::dml_result(
1918                raw_query.to_string(),
1919                affected,
1920                "update",
1921                "runtime-dml",
1922            ),
1923            touched_ids,
1924        ))
1925    }
1926
1927    fn compile_update_plan(&self, query: &UpdateQuery) -> RedDBResult<CompiledUpdatePlan> {
1928        let mut static_field_assignments = Vec::new();
1929        let mut static_metadata_assignments = Vec::new();
1930        let mut dynamic_assignments = Vec::new();
1931        let row_contract_plan = build_row_update_contract_plan(&self.db(), &query.table)?;
1932        let mut row_modified_columns = Vec::new();
1933
1934        for (idx, (column, expr)) in query.assignment_exprs.iter().enumerate() {
1935            let compound_op = query.compound_assignment_ops.get(idx).copied().flatten();
1936            let metadata_key = resolve_sql_ttl_metadata_key(column);
1937            if compound_op.is_some() && metadata_key.is_some() {
1938                return Err(RedDBError::Query(format!(
1939                    "compound assignment is only supported for row fields: {column}"
1940                )));
1941            }
1942            if compound_op.is_none() {
1943                if let Ok(value) = fold_expr_to_value(expr.clone()) {
1944                    if let Some(metadata_key) = metadata_key {
1945                        let raw_value = sql_literal_to_metadata_value(metadata_key, &value)?;
1946                        let (canonical_key, canonical_value) =
1947                            canonicalize_sql_ttl_metadata(metadata_key, raw_value);
1948                        static_metadata_assignments
1949                            .push((canonical_key.to_string(), canonical_value));
1950                    } else {
1951                        let value = self.resolve_crypto_sentinel(value)?;
1952                        static_field_assignments.push((
1953                            column.clone(),
1954                            normalize_row_update_assignment_with_plan(
1955                                &query.table,
1956                                column,
1957                                value,
1958                                row_contract_plan.as_ref(),
1959                            )?,
1960                        ));
1961                        row_modified_columns.push(column.clone());
1962                    }
1963                    continue;
1964                }
1965            }
1966
1967            dynamic_assignments.push(CompiledUpdateAssignment {
1968                column: column.clone(),
1969                expr: expr.clone(),
1970                compound_op,
1971                metadata_key,
1972                row_rule: if metadata_key.is_none() {
1973                    if let Some(plan) = row_contract_plan.as_ref() {
1974                        if plan.timestamps_enabled
1975                            && (column == "created_at" || column == "updated_at")
1976                        {
1977                            return Err(RedDBError::Query(format!(
1978                                "collection '{}' manages '{}' automatically — do not set it in UPDATE",
1979                                query.table, column
1980                            )));
1981                        }
1982                        if let Some(rule) = plan.declared_rules.get(column) {
1983                            Some(rule.clone())
1984                        } else if plan.strict_schema {
1985                            return Err(RedDBError::Query(format!(
1986                                "collection '{}' is strict and does not allow undeclared fields: {}",
1987                                query.table, column
1988                            )));
1989                        } else {
1990                            None
1991                        }
1992                    } else {
1993                        None
1994                    }
1995                } else {
1996                    None
1997                },
1998            });
1999            if metadata_key.is_none() {
2000                row_modified_columns.push(column.clone());
2001            }
2002        }
2003
2004        let row_modified_columns = dedupe_update_columns(row_modified_columns);
2005        let row_touches_unique_columns = row_contract_plan.as_ref().is_some_and(|plan| {
2006            row_modified_columns.iter().any(|column| {
2007                plan.unique_columns
2008                    .keys()
2009                    .any(|unique| unique.eq_ignore_ascii_case(column))
2010            })
2011        });
2012
2013        if let Some(ttl_ms) = query.ttl_ms {
2014            static_metadata_assignments
2015                .push(("_ttl_ms".to_string(), metadata_u64_to_value(ttl_ms)));
2016        }
2017        if let Some(expires_at_ms) = query.expires_at_ms {
2018            static_metadata_assignments.push((
2019                "_expires_at".to_string(),
2020                metadata_u64_to_value(expires_at_ms),
2021            ));
2022        }
2023        for (key, val) in &query.with_metadata {
2024            static_metadata_assignments.push((key.clone(), storage_value_to_metadata_value(val)));
2025        }
2026
2027        Ok(CompiledUpdatePlan {
2028            static_field_assignments,
2029            static_metadata_assignments,
2030            dynamic_assignments,
2031            row_contract_plan,
2032            row_modified_columns,
2033            row_touches_unique_columns,
2034        })
2035    }
2036
2037    fn materialize_update_assignments_for_entity(
2038        &self,
2039        query: &UpdateQuery,
2040        entity: &UnifiedEntity,
2041        compiled_plan: &CompiledUpdatePlan,
2042    ) -> RedDBResult<MaterializedUpdateAssignments> {
2043        let mut assignments = MaterializedUpdateAssignments::default();
2044        let mut record: Option<UnifiedRecord> = None;
2045
2046        for assignment in &compiled_plan.dynamic_assignments {
2047            if assignment.compound_op.is_some()
2048                && !matches!(
2049                    entity.data,
2050                    EntityData::Row(_) | EntityData::Node(_) | EntityData::Edge(_)
2051                )
2052            {
2053                return Err(RedDBError::Query(format!(
2054                    "compound assignment is only supported for row or graph UPDATE column '{}'",
2055                    assignment.column
2056                )));
2057            }
2058            if record.is_none() {
2059                record = runtime_any_record_from_entity_ref(entity);
2060            }
2061            let Some(record) = record.as_ref() else {
2062                return Err(RedDBError::Query(format!(
2063                    "UPDATE could not materialize runtime record for entity {} in '{}'",
2064                    entity.id.raw(),
2065                    query.table
2066                )));
2067            };
2068            let rhs = super::expr_eval::evaluate_runtime_expr_with_db(
2069                Some(self.inner.db.as_ref()),
2070                &assignment.expr,
2071                record,
2072                Some(query.table.as_str()),
2073                Some(query.table.as_str()),
2074            )
2075            .ok_or_else(|| {
2076                RedDBError::Query(format!(
2077                    "failed to evaluate UPDATE expression for column '{}'",
2078                    assignment.column
2079                ))
2080            })?;
2081            let value = if let Some(op) = assignment.compound_op {
2082                evaluate_compound_update_assignment(&assignment.column, record, op, rhs)?
2083            } else {
2084                rhs
2085            };
2086
2087            if let Some(metadata_key) = assignment.metadata_key {
2088                let raw_value = sql_literal_to_metadata_value(metadata_key, &value)?;
2089                let (canonical_key, canonical_value) =
2090                    canonicalize_sql_ttl_metadata(metadata_key, raw_value);
2091                assignments
2092                    .dynamic_metadata_assignments
2093                    .push((canonical_key.to_string(), canonical_value));
2094            } else {
2095                assignments.dynamic_field_assignments.push((
2096                    assignment.column.clone(),
2097                    normalize_row_update_value_for_rule(
2098                        &query.table,
2099                        self.resolve_crypto_sentinel(value)?,
2100                        assignment.row_rule.as_ref(),
2101                    )?,
2102                ));
2103            }
2104        }
2105
2106        Ok(assignments)
2107    }
2108
2109    fn apply_materialized_update_for_entity(
2110        &self,
2111        collection: String,
2112        entity: UnifiedEntity,
2113        compiled_plan: &CompiledUpdatePlan,
2114        assignments: MaterializedUpdateAssignments,
2115    ) -> RedDBResult<AppliedEntityMutation> {
2116        if matches!(entity.data, EntityData::Row(_)) {
2117            return self.apply_loaded_sql_update_row_core(
2118                collection,
2119                entity,
2120                &compiled_plan.static_field_assignments,
2121                assignments.dynamic_field_assignments,
2122                &compiled_plan.static_metadata_assignments,
2123                assignments.dynamic_metadata_assignments,
2124                compiled_plan.row_contract_plan.as_ref(),
2125                &compiled_plan.row_modified_columns,
2126                compiled_plan.row_touches_unique_columns,
2127            );
2128        }
2129
2130        ensure_graph_identity_update_allowed(&entity, compiled_plan, &assignments)?;
2131
2132        let operations = build_patch_operations_from_materialized_assignments(
2133            &entity,
2134            compiled_plan,
2135            assignments,
2136        );
2137        self.apply_loaded_patch_entity_core(
2138            collection,
2139            entity,
2140            crate::json::Value::Null,
2141            operations,
2142        )
2143    }
2144
2145    /// Execute DELETE FROM table WHERE filter
2146    pub fn execute_delete(
2147        &self,
2148        raw_query: &str,
2149        query: &DeleteQuery,
2150    ) -> RedDBResult<RuntimeQueryResult> {
2151        self.check_write(crate::runtime::write_gate::WriteKind::Dml)?;
2152        // Issue #523 — blockchain collections are immutable; see
2153        // execute_update for the same gate.
2154        if crate::runtime::blockchain_kind::is_chain(self.inner.db.store().as_ref(), &query.table) {
2155            return Err(RedDBError::InvalidOperation(format!(
2156                "BlockchainCollectionImmutable: DELETE not allowed on '{}'",
2157                query.table
2158            )));
2159        }
2160        // CollectionContract gate (#50) — see execute_update for
2161        // rationale. The gate handles APPEND ONLY rejection and is
2162        // the single point where future contract bits land.
2163        crate::runtime::collection_contract::CollectionContractGate::check(
2164            self,
2165            &query.table,
2166            crate::runtime::collection_contract::MutationKind::Delete,
2167        )?;
2168
2169        // RETURNING on DELETE: capture the pre-image via an internal
2170        // SELECT that reuses the same WHERE, then run the delete with
2171        // the RETURNING clause stripped, then project the captured
2172        // rows through the requested items. The extra SELECT is a
2173        // pragmatic MVP — a future pass can fuse the scan with the
2174        // delete to avoid the second pass over the heap.
2175        if let Some(items) = query.returning.clone() {
2176            let select_sql = delete_to_select_sql(raw_query).ok_or_else(|| {
2177                RedDBError::Query(
2178                    "DELETE ... RETURNING: cannot rewrite query for pre-image scan".to_string(),
2179                )
2180            })?;
2181            let captured = self.execute_query(&select_sql)?;
2182
2183            let mut inner_query = query.clone();
2184            inner_query.returning = None;
2185            let _ = self.execute_delete(raw_query, &inner_query)?;
2186
2187            let snapshots: Vec<Vec<(String, Value)>> = captured
2188                .result
2189                .records
2190                .iter()
2191                .map(|rec| {
2192                    rec.iter_fields()
2193                        .map(|(k, v)| (k.as_ref().to_string(), v.clone()))
2194                        .collect()
2195                })
2196                .collect();
2197            let affected = snapshots.len() as u64;
2198            let result = build_returning_result(&items, &snapshots, None);
2199
2200            let mut response = RuntimeQueryResult::dml_result(
2201                raw_query.to_string(),
2202                affected,
2203                "delete",
2204                "runtime-dml-returning",
2205            );
2206            response.result = result;
2207            return Ok(response);
2208        }
2209        // Row-Level Security enforcement (Phase 2.5.2 PG parity).
2210        //
2211        // When the table has RLS enabled, gate the DELETE by the
2212        // per-role policy set: mutations only touch rows that *every*
2213        // matching `FOR DELETE` policy would accept. No policies =>
2214        // zero rows affected (PG restrictive-default).
2215        if crate::runtime::impl_core::rls_is_enabled(self, &query.table) {
2216            let rls_filter = crate::runtime::impl_core::rls_policy_filter(
2217                self,
2218                &query.table,
2219                crate::storage::query::ast::PolicyAction::Delete,
2220            );
2221            let Some(policy) = rls_filter else {
2222                return Ok(RuntimeQueryResult::dml_result(
2223                    raw_query.to_string(),
2224                    0,
2225                    "delete",
2226                    "runtime-dml-rls",
2227                ));
2228            };
2229            // Fold the policy predicate into the user's WHERE before
2230            // dispatching — the remainder of this function reads the
2231            // filter from `query` via `effective_delete_filter`, which
2232            // respects the updated value.
2233            let mut augmented = query.clone();
2234            augmented.filter = Some(match augmented.filter.take() {
2235                Some(existing) => {
2236                    crate::storage::query::ast::Filter::And(Box::new(existing), Box::new(policy))
2237                }
2238                None => policy,
2239            });
2240            return self.execute_delete_inner(raw_query, &augmented);
2241        }
2242        self.execute_delete_inner(raw_query, query)
2243    }
2244
2245    fn execute_delete_inner(
2246        &self,
2247        raw_query: &str,
2248        query: &DeleteQuery,
2249    ) -> RedDBResult<RuntimeQueryResult> {
2250        let effective_filter = effective_delete_filter(query);
2251
2252        // Find the rows that match the WHERE clause. The "find target
2253        // rows" loop lives in DmlTargetScan so UPDATE (#52) can reuse
2254        // the same scan strategy.
2255        let scan = super::dml_target_scan::DmlTargetScan::new(
2256            self,
2257            &query.table,
2258            effective_filter.as_ref(),
2259            None,
2260        );
2261        let ids_to_delete = scan.find_target_ids()?;
2262
2263        // For event-enabled collections, snapshot the pre-delete state
2264        // before rows are physically removed.
2265        let needs_delete_events =
2266            !query.suppress_events && self.collection_has_delete_subscriptions(&query.table);
2267        let mut pre_images: HashMap<u64, crate::json::Value> = if needs_delete_events {
2268            scan.row_json_pre_images(&ids_to_delete)
2269        } else {
2270            HashMap::new()
2271        };
2272
2273        let mut affected: u64 = 0;
2274        for chunk in ids_to_delete.chunks(UPDATE_APPLY_CHUNK_SIZE) {
2275            let (count, lsns) = self.delete_entities_batch(&query.table, chunk)?;
2276            affected += count;
2277            if needs_delete_events && !lsns.is_empty() {
2278                // lsns.len() == actually-deleted entities; align with chunk ids.
2279                // `delete_batch` may skip missing entities, so we correlate by
2280                // the number returned (they're emitted in chunk order).
2281                let deleted_chunk = &chunk[..lsns.len().min(chunk.len())];
2282                self.emit_delete_events_for_collection(
2283                    &query.table,
2284                    deleted_chunk,
2285                    &lsns,
2286                    &pre_images,
2287                )?;
2288            }
2289        }
2290        pre_images.clear();
2291
2292        if affected > 0 {
2293            self.note_table_write(&query.table);
2294        }
2295
2296        Ok(RuntimeQueryResult::dml_result(
2297            raw_query.to_string(),
2298            affected,
2299            "delete",
2300            "runtime-dml",
2301        ))
2302    }
2303}
2304
2305/// Reject UPDATE … NODES/EDGES that assign to graph identity/topology
2306/// columns regardless of whether any row matches the WHERE clause. The
2307/// per-entity guard below covers only the matched-rows case, but ADR 0019
2308/// declares these columns immutable on the surface itself, so a zero-row
2309/// UPDATE should still surface the same error to operators and SDKs.
2310fn ensure_graph_identity_update_target_allowed(query: &UpdateQuery) -> RedDBResult<()> {
2311    if !matches!(query.target, UpdateTarget::Nodes | UpdateTarget::Edges) {
2312        return Ok(());
2313    }
2314    for (column, _) in &query.assignment_exprs {
2315        if is_immutable_graph_identity_field(column) {
2316            return Err(RedDBError::Query(format!(
2317                "immutable graph field '{column}' cannot be updated"
2318            )));
2319        }
2320    }
2321    Ok(())
2322}
2323
2324fn ensure_graph_identity_update_allowed(
2325    entity: &UnifiedEntity,
2326    compiled_plan: &CompiledUpdatePlan,
2327    assignments: &MaterializedUpdateAssignments,
2328) -> RedDBResult<()> {
2329    if !matches!(entity.data, EntityData::Node(_) | EntityData::Edge(_)) {
2330        return Ok(());
2331    }
2332
2333    for (column, _) in compiled_plan
2334        .static_field_assignments
2335        .iter()
2336        .chain(assignments.dynamic_field_assignments.iter())
2337    {
2338        if is_immutable_graph_identity_field(column) {
2339            return Err(RedDBError::Query(format!(
2340                "immutable graph field '{column}' cannot be updated"
2341            )));
2342        }
2343    }
2344
2345    Ok(())
2346}
2347
2348fn is_immutable_graph_identity_field(column: &str) -> bool {
2349    ["rid", "label", "from_rid", "to_rid", "from", "to"]
2350        .iter()
2351        .any(|reserved| column.eq_ignore_ascii_case(reserved))
2352}
2353
2354fn build_patch_operations_from_materialized_assignments(
2355    entity: &UnifiedEntity,
2356    compiled_plan: &CompiledUpdatePlan,
2357    assignments: MaterializedUpdateAssignments,
2358) -> Vec<PatchEntityOperation> {
2359    let mut operations = Vec::with_capacity(
2360        compiled_plan.static_field_assignments.len()
2361            + compiled_plan.static_metadata_assignments.len()
2362            + assignments.dynamic_field_assignments.len()
2363            + assignments.dynamic_metadata_assignments.len(),
2364    );
2365
2366    for (column, value) in &compiled_plan.static_field_assignments {
2367        operations.push(PatchEntityOperation {
2368            op: PatchEntityOperationType::Set,
2369            path: update_patch_path_for_entity(entity, column),
2370            value: Some(storage_value_to_json(value)),
2371        });
2372    }
2373
2374    for (column, value) in assignments.dynamic_field_assignments {
2375        operations.push(PatchEntityOperation {
2376            op: PatchEntityOperationType::Set,
2377            path: update_patch_path_for_entity(entity, &column),
2378            value: Some(storage_value_to_json(&value)),
2379        });
2380    }
2381
2382    for (key, value) in &compiled_plan.static_metadata_assignments {
2383        operations.push(PatchEntityOperation {
2384            op: PatchEntityOperationType::Set,
2385            path: vec!["metadata".to_string(), key.clone()],
2386            value: Some(metadata_value_to_json(value)),
2387        });
2388    }
2389
2390    for (key, value) in assignments.dynamic_metadata_assignments {
2391        operations.push(PatchEntityOperation {
2392            op: PatchEntityOperationType::Set,
2393            path: vec!["metadata".to_string(), key],
2394            value: Some(metadata_value_to_json(&value)),
2395        });
2396    }
2397
2398    operations
2399}
2400
2401fn update_patch_path_for_entity(entity: &UnifiedEntity, column: &str) -> Vec<String> {
2402    if matches!(
2403        (&entity.kind, &entity.data),
2404        (
2405            crate::storage::EntityKind::GraphNode(_),
2406            EntityData::Node(_)
2407        )
2408    ) && column.eq_ignore_ascii_case("node_type")
2409    {
2410        return vec!["node_type".to_string()];
2411    }
2412    if matches!(
2413        (&entity.kind, &entity.data),
2414        (
2415            crate::storage::EntityKind::GraphEdge(_),
2416            EntityData::Edge(_)
2417        )
2418    ) && column.eq_ignore_ascii_case("weight")
2419    {
2420        return vec!["weight".to_string()];
2421    }
2422    vec!["fields".to_string(), column.to_string()]
2423}
2424
2425/// Rewrite `DELETE FROM <table> [WHERE …] [RETURNING …]` as
2426/// `SELECT * FROM <table> [WHERE …]` so the delete executor can
2427/// capture the pre-image before actually removing the rows. Returns
2428/// `None` when the input does not start with `DELETE`.
2429///
2430/// Case-insensitive on the keywords. Preserves everything between
2431/// the table name and the RETURNING clause, so WHERE / ORDER BY /
2432/// LIMIT survive untouched. The RETURNING tail — if present — is
2433/// truncated at the first top-level `RETURNING` token.
2434fn delete_to_select_sql(sql: &str) -> Option<String> {
2435    let trimmed = sql.trim_start();
2436    let lowered = trimmed.to_ascii_lowercase();
2437    if !lowered.starts_with("delete ") && !lowered.starts_with("delete\t") {
2438        return None;
2439    }
2440    // Find `FROM` after DELETE.
2441    let from_idx = lowered.find(" from ")?;
2442    let after_from = &trimmed[from_idx + " from ".len()..];
2443    let after_from_lc = &lowered[from_idx + " from ".len()..];
2444
2445    // Cut off the RETURNING tail (a naive search — the RETURNING
2446    // clause only appears once per statement at top level in our
2447    // grammar). Matches whitespace-bounded tokens to avoid clipping
2448    // `RETURNING` inside a string literal.
2449    let mut body = after_from.to_string();
2450    if let Some(pos) = find_top_level_keyword(after_from_lc, "returning") {
2451        body.truncate(pos);
2452    }
2453    Some(format!("SELECT * FROM {}", body.trim_end()))
2454}
2455
2456/// Find the byte offset of a whitespace-bounded keyword in a
2457/// lowercased haystack, skipping matches inside single-quoted
2458/// string literals. Naive — no escape handling — but enough for
2459/// the shapes the DML parser emits.
2460fn find_top_level_keyword(haystack: &str, needle: &str) -> Option<usize> {
2461    let bytes = haystack.as_bytes();
2462    let nlen = needle.len();
2463    let mut i = 0usize;
2464    let mut in_string = false;
2465    while i < bytes.len() {
2466        let c = bytes[i];
2467        if c == b'\'' {
2468            in_string = !in_string;
2469            i += 1;
2470            continue;
2471        }
2472        if !in_string
2473            && i + nlen <= bytes.len()
2474            && &bytes[i..i + nlen] == needle.as_bytes()
2475            && (i == 0 || bytes[i - 1].is_ascii_whitespace())
2476            && (i + nlen == bytes.len() || bytes[i + nlen].is_ascii_whitespace())
2477        {
2478            return Some(i);
2479        }
2480        i += 1;
2481    }
2482    None
2483}
2484
2485/// Build a `UnifiedResult` from the rows affected by a DML statement plus
2486/// its `RETURNING` clause. Each snapshot is a list of (column, value) pairs
2487/// for one affected row; `outputs`, when provided, supplies the engine-
2488/// assigned entity id for the same row (INSERT path). Projection honours
2489/// the RETURNING items: `*` expands to every snapshot column plus
2490/// the public row envelope when available.
2491fn build_returning_result(
2492    items: &[ReturningItem],
2493    snapshots: &[Vec<(String, Value)>],
2494    outputs: Option<&[CreateEntityOutput]>,
2495) -> UnifiedResult {
2496    let project_all = items.iter().any(|it| matches!(it, ReturningItem::All));
2497    let public_item_outputs = outputs.is_some_and(|outs| {
2498        outs.first()
2499            .and_then(|out| out.entity.as_ref())
2500            .is_some_and(|entity| public_returning_item_kind(entity).is_some())
2501    });
2502
2503    let mut columns: Vec<String> = if project_all {
2504        let mut cols: Vec<String> = Vec::new();
2505        if public_item_outputs {
2506            cols.extend(
2507                [
2508                    "rid",
2509                    "collection",
2510                    "kind",
2511                    "tenant",
2512                    "created_at",
2513                    "updated_at",
2514                ]
2515                .into_iter()
2516                .map(str::to_string),
2517            );
2518        } else if outputs.is_some() {
2519            cols.push("red_entity_id".to_string());
2520        }
2521        if let Some(first) = snapshots.first() {
2522            for (name, _) in first {
2523                cols.push(name.clone());
2524            }
2525        }
2526        cols
2527    } else {
2528        items
2529            .iter()
2530            .filter_map(|it| match it {
2531                ReturningItem::Column(c) => Some(c.clone()),
2532                ReturningItem::All => None,
2533            })
2534            .collect()
2535    };
2536    // Guarantee unique order-preserving column list.
2537    {
2538        let mut seen = std::collections::HashSet::new();
2539        columns.retain(|c| seen.insert(c.clone()));
2540    }
2541
2542    let mut records: Vec<UnifiedRecord> = Vec::with_capacity(snapshots.len());
2543    for (idx, snap) in snapshots.iter().enumerate() {
2544        let mut values: HashMap<Arc<str>, Value> = HashMap::with_capacity(columns.len());
2545        if let Some(outs) = outputs {
2546            if let Some(out) = outs.get(idx) {
2547                if let Some(entity) = out.entity.as_ref() {
2548                    if let Some(kind) = public_returning_item_kind(entity) {
2549                        values.insert(
2550                            Arc::clone(&sys_key_rid()),
2551                            Value::UnsignedInteger(out.id.raw()),
2552                        );
2553                        values.insert(
2554                            Arc::clone(&sys_key_collection()),
2555                            Value::text(entity.kind.collection().to_string()),
2556                        );
2557                        values.insert(Arc::clone(&sys_key_kind()), Value::text(kind.to_string()));
2558                        values.insert(
2559                            Arc::clone(&sys_key_created_at()),
2560                            Value::UnsignedInteger(entity.created_at),
2561                        );
2562                        values.insert(
2563                            Arc::clone(&sys_key_updated_at()),
2564                            Value::UnsignedInteger(entity.updated_at),
2565                        );
2566                        // Legacy alias: an explicit `RETURNING red_entity_id`
2567                        // still resolves to the row's rid. Only surfaces when
2568                        // the projected column list names it — `RETURNING *`
2569                        // keeps the envelope clean (rid, not red_entity_id).
2570                        values.insert(
2571                            Arc::clone(&sys_key_red_entity_id()),
2572                            Value::UnsignedInteger(out.id.raw()),
2573                        );
2574                    } else {
2575                        values.insert(
2576                            Arc::clone(&sys_key_red_entity_id()),
2577                            Value::Integer(out.id.raw() as i64),
2578                        );
2579                    }
2580                } else {
2581                    values.insert(
2582                        Arc::clone(&sys_key_red_entity_id()),
2583                        Value::Integer(out.id.raw() as i64),
2584                    );
2585                }
2586            }
2587        }
2588        for (name, val) in snap {
2589            values.insert(Arc::from(name.as_str()), val.clone());
2590        }
2591        if !values.contains_key("tenant") {
2592            let tenant = values.get("tenant_id").cloned().unwrap_or(Value::Null);
2593            values.insert(Arc::clone(&sys_key_tenant()), tenant);
2594        }
2595        let mut rec = UnifiedRecord::default();
2596        // Only keep projected columns on the record.
2597        for col in &columns {
2598            if let Some(v) = values.get(col.as_str()) {
2599                rec.set_arc(Arc::from(col.as_str()), v.clone());
2600            }
2601        }
2602        records.push(rec);
2603    }
2604
2605    UnifiedResult {
2606        columns,
2607        records,
2608        stats: Default::default(),
2609        pre_serialized_json: None,
2610    }
2611}
2612
2613fn public_returning_item_kind(entity: &crate::storage::UnifiedEntity) -> Option<&'static str> {
2614    match (&entity.kind, &entity.data) {
2615        (crate::storage::EntityKind::GraphNode(_), crate::storage::EntityData::Node(_)) => {
2616            Some("node")
2617        }
2618        (crate::storage::EntityKind::GraphEdge(_), crate::storage::EntityData::Edge(_)) => {
2619            Some("edge")
2620        }
2621        (_, crate::storage::EntityData::Row(_)) => Some(public_returning_row_kind(entity)),
2622        _ => None,
2623    }
2624}
2625
2626fn public_returning_row_kind(entity: &crate::storage::UnifiedEntity) -> &'static str {
2627    let Some(row) = entity.data.as_row() else {
2628        return "row";
2629    };
2630
2631    let is_kv = row.named.as_ref().is_some_and(|named| {
2632        (named.len() == 2 && named.contains_key("key") && named.contains_key("value"))
2633            || (named.len() == 1 && (named.contains_key("key") || named.contains_key("value")))
2634    });
2635    if is_kv {
2636        return "kv";
2637    }
2638
2639    let is_document = row
2640        .named
2641        .as_ref()
2642        .is_some_and(|named| named.values().any(runtime_returning_documentish_value))
2643        || row.columns.iter().any(runtime_returning_documentish_value);
2644    if is_document {
2645        "document"
2646    } else {
2647        "row"
2648    }
2649}
2650
2651fn runtime_returning_documentish_value(value: &Value) -> bool {
2652    matches!(value, Value::Json(_) | Value::Blob(_))
2653}
2654
2655fn row_insert_returning_snapshots(
2656    outputs: &[CreateEntityOutput],
2657    fallback: Vec<Vec<(String, Value)>>,
2658) -> Vec<Vec<(String, Value)>> {
2659    outputs
2660        .iter()
2661        .enumerate()
2662        .map(|(idx, out)| {
2663            out.entity
2664                .as_ref()
2665                .map(entity_row_fields_snapshot)
2666                .filter(|snap| !snap.is_empty())
2667                .unwrap_or_else(|| fallback.get(idx).cloned().unwrap_or_default())
2668        })
2669        .collect()
2670}
2671
2672fn graph_insert_returning_snapshots(
2673    store: &crate::storage::unified::UnifiedStore,
2674    collection: &str,
2675    ids: &[EntityId],
2676) -> Vec<Vec<(String, Value)>> {
2677    let Some(manager) = store.get_collection(collection) else {
2678        return Vec::new();
2679    };
2680
2681    ids.iter()
2682        .filter_map(|id| manager.get(*id))
2683        .filter_map(|entity| {
2684            let mut record = runtime_any_record_from_entity_ref(&entity)?;
2685            record.set_arc(sys_key_collection(), Value::text(collection.to_string()));
2686            Some(record)
2687        })
2688        .map(|record| {
2689            record
2690                .iter_fields()
2691                .map(|(key, value)| (key.as_ref().to_string(), value.clone()))
2692                .collect()
2693        })
2694        .collect()
2695}
2696
2697fn graph_update_returning_snapshots(
2698    runtime: &RedDBRuntime,
2699    collection: &str,
2700    ids: &[EntityId],
2701) -> Vec<Vec<(String, Value)>> {
2702    let store = runtime.db().store();
2703    let Some(manager) = store.get_collection(collection) else {
2704        return Vec::new();
2705    };
2706
2707    manager
2708        .get_many(ids)
2709        .into_iter()
2710        .flatten()
2711        .filter_map(|entity| runtime_any_record_from_entity_ref(&entity))
2712        .map(|record| {
2713            record
2714                .iter_fields()
2715                .map(|(key, value)| (key.as_ref().to_string(), value.clone()))
2716                .collect()
2717        })
2718        .collect()
2719}
2720
2721fn ensure_update_target_contract(
2722    runtime: &RedDBRuntime,
2723    collection: &str,
2724    target: UpdateTarget,
2725) -> RedDBResult<()> {
2726    let Some(contract) = runtime.db().collection_contract(collection) else {
2727        return Ok(());
2728    };
2729    if update_target_contract_is_advisory(&contract)
2730        || update_target_allows_model(contract.declared_model, update_target_model(target))
2731    {
2732        return Ok(());
2733    }
2734    Err(RedDBError::InvalidOperation(format!(
2735        "collection '{}' is declared as '{}' and does not allow '{}' updates",
2736        collection,
2737        update_model_name(contract.declared_model),
2738        update_model_name(update_target_model(target))
2739    )))
2740}
2741
2742fn update_target_contract_is_advisory(contract: &crate::physical::CollectionContract) -> bool {
2743    matches!(
2744        (&contract.origin, &contract.schema_mode),
2745        (
2746            crate::physical::ContractOrigin::Implicit,
2747            crate::catalog::SchemaMode::Dynamic,
2748        )
2749    )
2750}
2751
2752fn update_target_model(target: UpdateTarget) -> crate::catalog::CollectionModel {
2753    match target {
2754        UpdateTarget::Rows => crate::catalog::CollectionModel::Table,
2755        UpdateTarget::Documents => crate::catalog::CollectionModel::Document,
2756        UpdateTarget::Kv => crate::catalog::CollectionModel::Kv,
2757        UpdateTarget::Nodes | UpdateTarget::Edges => crate::catalog::CollectionModel::Graph,
2758    }
2759}
2760
2761fn update_target_allows_model(
2762    declared_model: crate::catalog::CollectionModel,
2763    requested_model: crate::catalog::CollectionModel,
2764) -> bool {
2765    declared_model == requested_model || declared_model == crate::catalog::CollectionModel::Mixed
2766}
2767
2768fn update_model_name(model: crate::catalog::CollectionModel) -> &'static str {
2769    match model {
2770        crate::catalog::CollectionModel::Table => "table",
2771        crate::catalog::CollectionModel::Document => "document",
2772        crate::catalog::CollectionModel::Graph => "graph",
2773        crate::catalog::CollectionModel::Vector => "vector",
2774        crate::catalog::CollectionModel::Hll => "hll",
2775        crate::catalog::CollectionModel::Sketch => "sketch",
2776        crate::catalog::CollectionModel::Filter => "filter",
2777        crate::catalog::CollectionModel::Kv => "kv",
2778        crate::catalog::CollectionModel::Config => "config",
2779        crate::catalog::CollectionModel::Vault => "vault",
2780        crate::catalog::CollectionModel::Mixed => "mixed",
2781        crate::catalog::CollectionModel::TimeSeries => "timeseries",
2782        crate::catalog::CollectionModel::Queue => "queue",
2783        crate::catalog::CollectionModel::Metrics => "metrics",
2784    }
2785}
2786
2787fn ensure_graph_insert_contract(runtime: &RedDBRuntime, collection: &str) -> RedDBResult<()> {
2788    let db = runtime.db();
2789    if let Some(contract) = db.collection_contract(collection) {
2790        let advisory_implicit_dynamic = matches!(
2791            (&contract.origin, &contract.schema_mode),
2792            (
2793                crate::physical::ContractOrigin::Implicit,
2794                crate::catalog::SchemaMode::Dynamic,
2795            )
2796        );
2797        if advisory_implicit_dynamic
2798            || matches!(
2799                contract.declared_model,
2800                crate::catalog::CollectionModel::Graph | crate::catalog::CollectionModel::Mixed
2801            )
2802        {
2803            return Ok(());
2804        }
2805        return Err(RedDBError::InvalidOperation(format!(
2806            "collection '{}' is declared as '{:?}' and does not allow 'Graph' writes",
2807            collection, contract.declared_model
2808        )));
2809    }
2810
2811    let now = std::time::SystemTime::now()
2812        .duration_since(std::time::UNIX_EPOCH)
2813        .unwrap_or_default()
2814        .as_millis();
2815    db.save_collection_contract(crate::physical::CollectionContract {
2816        name: collection.to_string(),
2817        declared_model: crate::catalog::CollectionModel::Graph,
2818        schema_mode: crate::catalog::SchemaMode::Dynamic,
2819        origin: crate::physical::ContractOrigin::Implicit,
2820        version: 1,
2821        created_at_unix_ms: now,
2822        updated_at_unix_ms: now,
2823        default_ttl_ms: db.collection_default_ttl_ms(collection),
2824        vector_dimension: None,
2825        vector_metric: None,
2826        context_index_fields: Vec::new(),
2827        declared_columns: Vec::new(),
2828        table_def: None,
2829        timestamps_enabled: false,
2830        context_index_enabled: false,
2831        metrics_raw_retention_ms: None,
2832        metrics_rollup_policies: Vec::new(),
2833        metrics_tenant_identity: None,
2834        metrics_namespace: None,
2835        append_only: false,
2836        subscriptions: Vec::new(),
2837        analytics_config: Vec::new(),
2838        session_key: None,
2839        session_gap_ms: None,
2840        retention_duration_ms: None,
2841        analytical_storage: None,
2842    })
2843    .map(|_| ())
2844    .map_err(|err| RedDBError::Internal(err.to_string()))
2845}
2846
2847fn update_needs_rmw_lock(query: &UpdateQuery) -> bool {
2848    query
2849        .assignment_exprs
2850        .iter()
2851        .enumerate()
2852        .any(|(idx, (column, expr))| {
2853            query
2854                .compound_assignment_ops
2855                .get(idx)
2856                .is_some_and(|op| op.is_some())
2857                || expr_references_update_column(expr, &query.table, column)
2858        })
2859}
2860
2861fn evaluate_compound_update_assignment(
2862    column: &str,
2863    record: &UnifiedRecord,
2864    op: BinOp,
2865    rhs: Value,
2866) -> RedDBResult<Value> {
2867    let lhs = record.get(column).ok_or_else(|| {
2868        RedDBError::Query(format!(
2869            "compound assignment requires existing numeric field '{column}'"
2870        ))
2871    })?;
2872    if matches!(lhs, Value::Null) {
2873        return Err(RedDBError::Query(format!(
2874            "compound assignment requires non-null numeric field '{column}'"
2875        )));
2876    }
2877    apply_compound_numeric_op(column, op, lhs, &rhs)
2878}
2879
2880fn apply_compound_numeric_op(
2881    column: &str,
2882    op: BinOp,
2883    lhs: &Value,
2884    rhs: &Value,
2885) -> RedDBResult<Value> {
2886    let Some(lhs_number) = CompoundNumber::from_value(lhs) else {
2887        return Err(RedDBError::Query(format!(
2888            "compound assignment requires numeric field '{column}'"
2889        )));
2890    };
2891    let Some(rhs_number) = CompoundNumber::from_value(rhs) else {
2892        return Err(RedDBError::Query(format!(
2893            "compound assignment requires numeric right-hand value for field '{column}'"
2894        )));
2895    };
2896
2897    if lhs_number.is_float() || rhs_number.is_float() || matches!(op, BinOp::Div) {
2898        let a = lhs_number.as_f64();
2899        let b = rhs_number.as_f64();
2900        let out = match op {
2901            BinOp::Add => a + b,
2902            BinOp::Sub => a - b,
2903            BinOp::Mul => a * b,
2904            BinOp::Div => {
2905                if b == 0.0 {
2906                    return Err(RedDBError::Query(format!(
2907                        "division by zero in compound assignment for field '{column}'"
2908                    )));
2909                }
2910                a / b
2911            }
2912            BinOp::Mod => {
2913                if b == 0.0 {
2914                    return Err(RedDBError::Query(format!(
2915                        "modulo by zero in compound assignment for field '{column}'"
2916                    )));
2917                }
2918                a % b
2919            }
2920            _ => {
2921                return Err(RedDBError::Query(format!(
2922                    "unsupported compound assignment operator for field '{column}'"
2923                )));
2924            }
2925        };
2926        if !out.is_finite() {
2927            return Err(RedDBError::Query(format!(
2928                "numeric overflow in compound assignment for field '{column}'"
2929            )));
2930        }
2931        return Ok(Value::Float(out));
2932    }
2933
2934    let a = lhs_number.as_i128();
2935    let b = rhs_number.as_i128();
2936    let out = match op {
2937        BinOp::Add => a.checked_add(b),
2938        BinOp::Sub => a.checked_sub(b),
2939        BinOp::Mul => a.checked_mul(b),
2940        BinOp::Mod => {
2941            if b == 0 {
2942                return Err(RedDBError::Query(format!(
2943                    "modulo by zero in compound assignment for field '{column}'"
2944                )));
2945            }
2946            a.checked_rem(b)
2947        }
2948        BinOp::Div => unreachable!("integer division is handled by the float branch"),
2949        _ => None,
2950    }
2951    .ok_or_else(|| {
2952        RedDBError::Query(format!(
2953            "numeric overflow in compound assignment for field '{column}'"
2954        ))
2955    })?;
2956
2957    if matches!(lhs, Value::UnsignedInteger(_)) {
2958        let value = u64::try_from(out).map_err(|_| {
2959            RedDBError::Query(format!(
2960                "numeric overflow in compound assignment for field '{column}'"
2961            ))
2962        })?;
2963        Ok(Value::UnsignedInteger(value))
2964    } else {
2965        let value = i64::try_from(out).map_err(|_| {
2966            RedDBError::Query(format!(
2967                "numeric overflow in compound assignment for field '{column}'"
2968            ))
2969        })?;
2970        Ok(Value::Integer(value))
2971    }
2972}
2973
2974#[derive(Clone, Copy)]
2975enum CompoundNumber {
2976    Integer(i128),
2977    Float(f64),
2978}
2979
2980impl CompoundNumber {
2981    fn from_value(value: &Value) -> Option<Self> {
2982        match value {
2983            Value::Integer(value) | Value::BigInt(value) => Some(Self::Integer(*value as i128)),
2984            Value::UnsignedInteger(value) => Some(Self::Integer(*value as i128)),
2985            Value::Float(value) => value.is_finite().then_some(Self::Float(*value)),
2986            Value::Decimal(value) => Some(Self::Float(*value as f64 / 10_000.0)),
2987            _ => None,
2988        }
2989    }
2990
2991    fn is_float(self) -> bool {
2992        matches!(self, Self::Float(_))
2993    }
2994
2995    fn as_f64(self) -> f64 {
2996        match self {
2997            Self::Integer(value) => value as f64,
2998            Self::Float(value) => value,
2999        }
3000    }
3001
3002    fn as_i128(self) -> i128 {
3003        match self {
3004            Self::Integer(value) => value,
3005            Self::Float(_) => unreachable!("float compound number used as integer"),
3006        }
3007    }
3008}
3009
3010fn expr_references_update_column(expr: &Expr, table_name: &str, target_column: &str) -> bool {
3011    match expr {
3012        Expr::Literal { .. } | Expr::Parameter { .. } | Expr::Subquery { .. } => false,
3013        Expr::Column { field, .. } => {
3014            field_ref_matches_update_column(field, table_name, target_column)
3015        }
3016        Expr::BinaryOp { lhs, rhs, .. } => {
3017            expr_references_update_column(lhs, table_name, target_column)
3018                || expr_references_update_column(rhs, table_name, target_column)
3019        }
3020        Expr::UnaryOp { operand, .. } | Expr::Cast { inner: operand, .. } => {
3021            expr_references_update_column(operand, table_name, target_column)
3022        }
3023        Expr::FunctionCall { args, .. } => args
3024            .iter()
3025            .any(|arg| expr_references_update_column(arg, table_name, target_column)),
3026        Expr::Case {
3027            branches, else_, ..
3028        } => {
3029            branches.iter().any(|(cond, value)| {
3030                expr_references_update_column(cond, table_name, target_column)
3031                    || expr_references_update_column(value, table_name, target_column)
3032            }) || else_
3033                .as_deref()
3034                .is_some_and(|expr| expr_references_update_column(expr, table_name, target_column))
3035        }
3036        Expr::IsNull { operand, .. } => {
3037            expr_references_update_column(operand, table_name, target_column)
3038        }
3039        Expr::InList { target, values, .. } => {
3040            expr_references_update_column(target, table_name, target_column)
3041                || values
3042                    .iter()
3043                    .any(|value| expr_references_update_column(value, table_name, target_column))
3044        }
3045        Expr::Between {
3046            target, low, high, ..
3047        } => {
3048            expr_references_update_column(target, table_name, target_column)
3049                || expr_references_update_column(low, table_name, target_column)
3050                || expr_references_update_column(high, table_name, target_column)
3051        }
3052        Expr::WindowFunctionCall { args, window, .. } => {
3053            args.iter()
3054                .any(|arg| expr_references_update_column(arg, table_name, target_column))
3055                || window
3056                    .partition_by
3057                    .iter()
3058                    .any(|e| expr_references_update_column(e, table_name, target_column))
3059                || window
3060                    .order_by
3061                    .iter()
3062                    .any(|o| expr_references_update_column(&o.expr, table_name, target_column))
3063        }
3064    }
3065}
3066
3067fn field_ref_matches_update_column(
3068    field: &FieldRef,
3069    table_name: &str,
3070    target_column: &str,
3071) -> bool {
3072    match field {
3073        FieldRef::TableColumn { table, column } => {
3074            column.eq_ignore_ascii_case(target_column)
3075                && (table.is_empty() || table.eq_ignore_ascii_case(table_name))
3076        }
3077        FieldRef::NodeProperty { .. } | FieldRef::EdgeProperty { .. } | FieldRef::NodeId { .. } => {
3078            false
3079        }
3080    }
3081}
3082
3083fn resolve_update_entity_by_logical_id(
3084    runtime: &RedDBRuntime,
3085    table: &str,
3086    logical_id: EntityId,
3087) -> Option<UnifiedEntity> {
3088    let store = runtime.inner.db.store();
3089    if let Some(entity) =
3090        crate::runtime::table_row_mvcc_resolver::TableRowMvccReadResolver::current_statement()
3091            .resolve_logical_id(&store, table, logical_id)
3092    {
3093        return Some(entity);
3094    }
3095    // Fallback for non-table-row entities (graph nodes/edges, etc.) where
3096    // entity_id == logical_id and the MVCC table-row resolver doesn't apply.
3097    store.get(table, logical_id)
3098}
3099
3100fn update_cdc_item_kind(
3101    runtime: &RedDBRuntime,
3102    collection: &str,
3103    entity: &UnifiedEntity,
3104) -> &'static str {
3105    match &entity.data {
3106        EntityData::Node(_) => return "node",
3107        EntityData::Edge(_) => return "edge",
3108        _ => {}
3109    }
3110
3111    match runtime
3112        .db()
3113        .collection_contract(collection)
3114        .map(|contract| contract.declared_model)
3115    {
3116        Some(crate::catalog::CollectionModel::Document) => "document",
3117        Some(crate::catalog::CollectionModel::Kv)
3118        | Some(crate::catalog::CollectionModel::Vault) => "kv",
3119        _ => "row",
3120    }
3121}
3122
3123fn ordered_update_target_ids(
3124    manager: &Arc<crate::storage::SegmentManager>,
3125    entity_ids: &[EntityId],
3126    order_by: &[OrderByClause],
3127    limit: Option<usize>,
3128) -> Vec<EntityId> {
3129    let mut entities: Vec<UnifiedEntity> =
3130        manager.get_many(entity_ids).into_iter().flatten().collect();
3131    entities.sort_by(|left, right| compare_update_order(left, right, order_by));
3132    if let Some(limit) = limit {
3133        entities.truncate(limit);
3134    }
3135    entities.into_iter().map(|entity| entity.id).collect()
3136}
3137
3138fn compare_update_order(
3139    left: &UnifiedEntity,
3140    right: &UnifiedEntity,
3141    order_by: &[OrderByClause],
3142) -> Ordering {
3143    for clause in order_by {
3144        let left_value = update_order_value(left, &clause.field);
3145        let right_value = update_order_value(right, &clause.field);
3146        let ordering = compare_update_order_values(
3147            left_value.as_ref(),
3148            right_value.as_ref(),
3149            clause.nulls_first,
3150        );
3151        if ordering != Ordering::Equal {
3152            return if clause.ascending {
3153                ordering
3154            } else {
3155                ordering.reverse()
3156            };
3157        }
3158    }
3159    left.logical_id().raw().cmp(&right.logical_id().raw())
3160}
3161
3162fn compare_update_order_values(
3163    left: Option<&Value>,
3164    right: Option<&Value>,
3165    nulls_first: bool,
3166) -> Ordering {
3167    match (left, right) {
3168        (None, None) => Ordering::Equal,
3169        (None, Some(_)) => {
3170            if nulls_first {
3171                Ordering::Less
3172            } else {
3173                Ordering::Greater
3174            }
3175        }
3176        (Some(_), None) => {
3177            if nulls_first {
3178                Ordering::Greater
3179            } else {
3180                Ordering::Less
3181            }
3182        }
3183        (Some(left), Some(right)) => {
3184            crate::storage::query::value_compare::total_compare_values(left, right)
3185        }
3186    }
3187}
3188
3189fn update_order_value(entity: &UnifiedEntity, field: &FieldRef) -> Option<Value> {
3190    let FieldRef::TableColumn { table, column } = field else {
3191        return None;
3192    };
3193    if !table.is_empty() {
3194        return None;
3195    }
3196    if column.eq_ignore_ascii_case("rid") {
3197        return Some(Value::UnsignedInteger(entity.logical_id().raw()));
3198    }
3199    match &entity.data {
3200        EntityData::Row(row) => row.get_field(column).cloned(),
3201        EntityData::Node(_) | EntityData::Edge(_) => runtime_any_record_from_entity_ref(entity)
3202            .and_then(|record| record.get(column).cloned()),
3203        _ => None,
3204    }
3205}
3206
3207fn dedupe_update_columns(mut columns: Vec<String>) -> Vec<String> {
3208    if columns.is_empty() {
3209        return columns;
3210    }
3211
3212    let mut unique = Vec::with_capacity(columns.len());
3213    for column in columns.drain(..) {
3214        if !unique
3215            .iter()
3216            .any(|existing: &String| existing.eq_ignore_ascii_case(&column))
3217        {
3218            unique.push(column);
3219        }
3220    }
3221    unique
3222}
3223
3224// =============================================================================
3225// Helper functions for extracting typed values from column/value pairs
3226// =============================================================================
3227
3228const SQL_TTL_METADATA_COLUMNS: [&str; 3] = ["_ttl", "_ttl_ms", "_expires_at"];
3229
3230fn resolve_sql_ttl_metadata_key(column: &str) -> Option<&'static str> {
3231    if column.eq_ignore_ascii_case("_ttl") {
3232        Some(SQL_TTL_METADATA_COLUMNS[0])
3233    } else if column.eq_ignore_ascii_case("_ttl_ms") {
3234        Some(SQL_TTL_METADATA_COLUMNS[1])
3235    } else if column.eq_ignore_ascii_case("_expires_at") {
3236        Some(SQL_TTL_METADATA_COLUMNS[2])
3237    } else {
3238        None
3239    }
3240}
3241
3242/// Canonicalize a SQL TTL metadata `(key, value)` pair so the retention
3243/// sweeper sees a single key (`_ttl_ms`) regardless of which legacy form
3244/// the operator wrote. `_ttl` is scaled from seconds to milliseconds;
3245/// `_ttl_ms` and `_expires_at` are passed through.
3246fn canonicalize_sql_ttl_metadata(
3247    key: &'static str,
3248    value: MetadataValue,
3249) -> (&'static str, MetadataValue) {
3250    if key != "_ttl" {
3251        return (key, value);
3252    }
3253    let scaled = match value {
3254        MetadataValue::Int(s) => MetadataValue::Int(s.saturating_mul(1_000)),
3255        MetadataValue::Timestamp(ms_or_s) => {
3256            // Timestamp is already chosen for very large values; treat as
3257            // already-ms to avoid silent overflow.
3258            MetadataValue::Timestamp(ms_or_s)
3259        }
3260        MetadataValue::Float(f) => MetadataValue::Float(f * 1_000.0),
3261        other => other,
3262    };
3263    ("_ttl_ms", scaled)
3264}
3265
3266/// Sentinel prefix produced by the parser for `PASSWORD('...')` and
3267/// `SECRET('...')` literals. The runtime strips this marker and
3268/// applies the actual crypto transform during INSERT execution.
3269pub(crate) const PLAINTEXT_SENTINEL: &str = "@@plain@@";
3270
3271impl RedDBRuntime {
3272    /// Strip the plaintext sentinel from a `Value::Password` or
3273    /// `Value::Secret` produced by the parser and apply the real
3274    /// crypto transform. `Password` is always hashed with argon2id.
3275    /// `Secret` is encrypted with AES-256-GCM keyed by the vault
3276    /// when `red.config.secret.auto_encrypt = true` (default).
3277    pub(crate) fn resolve_crypto_sentinel(&self, value: Value) -> RedDBResult<Value> {
3278        match value {
3279            Value::Password(marked) => {
3280                if let Some(plain) = marked.strip_prefix(PLAINTEXT_SENTINEL) {
3281                    Ok(Value::Password(crate::auth::store::hash_password(plain)))
3282                } else {
3283                    Ok(Value::Password(marked))
3284                }
3285            }
3286            Value::Secret(bytes) => {
3287                if bytes.starts_with(PLAINTEXT_SENTINEL.as_bytes()) {
3288                    if !self.secret_auto_encrypt() {
3289                        return Err(RedDBError::Query(
3290                            "SECRET() literal rejected: red.config.secret.auto_encrypt \
3291                             is false. Insert pre-encrypted bytes directly instead."
3292                                .to_string(),
3293                        ));
3294                    }
3295                    let key = self.secret_aes_key().ok_or_else(|| {
3296                        RedDBError::Query(
3297                            "SECRET() column encryption requires a bootstrapped \
3298                             vault (red.secret.aes_key is missing). Start the server \
3299                             with --vault to enable."
3300                                .to_string(),
3301                        )
3302                    })?;
3303                    let plain = &bytes[PLAINTEXT_SENTINEL.len()..];
3304                    Ok(Value::Secret(encrypt_secret_payload(&key, plain)))
3305                } else {
3306                    Ok(Value::Secret(bytes))
3307                }
3308            }
3309            other => Ok(other),
3310        }
3311    }
3312}
3313
3314/// Encode an AES-256-GCM ciphertext as `[12-byte nonce][ciphertext||tag]`.
3315/// This is the on-disk representation of `Value::Secret`.
3316fn encrypt_secret_payload(key: &[u8; 32], plaintext: &[u8]) -> Vec<u8> {
3317    let nonce_bytes = crate::auth::store::random_bytes(12);
3318    let mut nonce = [0u8; 12];
3319    nonce.copy_from_slice(&nonce_bytes[..12]);
3320    let ct = crate::crypto::aes_gcm::aes256_gcm_encrypt(key, &nonce, b"reddb.secret", plaintext);
3321    let mut out = Vec::with_capacity(12 + ct.len());
3322    out.extend_from_slice(&nonce);
3323    out.extend_from_slice(&ct);
3324    out
3325}
3326
3327/// Decode a `Value::Secret` payload back to plaintext. Returns
3328/// `None` when the payload is too short or AES-GCM authentication
3329/// fails (tampered or wrong key).
3330pub(crate) fn decrypt_secret_payload(key: &[u8; 32], payload: &[u8]) -> Option<Vec<u8>> {
3331    if payload.len() < 12 {
3332        return None;
3333    }
3334    let mut nonce = [0u8; 12];
3335    nonce.copy_from_slice(&payload[..12]);
3336    crate::crypto::aes_gcm::aes256_gcm_decrypt(key, &nonce, b"reddb.secret", &payload[12..]).ok()
3337}
3338
3339fn split_insert_metadata(
3340    runtime: &RedDBRuntime,
3341    columns: &[String],
3342    values: &[Value],
3343) -> RedDBResult<(Vec<(String, Value)>, Vec<(String, MetadataValue)>)> {
3344    let mut fields = Vec::new();
3345    let mut metadata = Vec::new();
3346
3347    for (column, value) in columns.iter().zip(values.iter()) {
3348        // Still support legacy _ttl columns for backward compat
3349        if let Some(metadata_key) = resolve_sql_ttl_metadata_key(column) {
3350            let raw_value = sql_literal_to_metadata_value(metadata_key, value)?;
3351            let (canonical_key, canonical_value) =
3352                canonicalize_sql_ttl_metadata(metadata_key, raw_value);
3353            metadata.push((canonical_key.to_string(), canonical_value));
3354            continue;
3355        }
3356        fields.push((
3357            column.clone(),
3358            runtime.resolve_crypto_sentinel(value.clone())?,
3359        ));
3360    }
3361
3362    Ok((fields, metadata))
3363}
3364
3365/// Merge structured WITH TTL, WITH EXPIRES AT, and WITH METADATA clauses into metadata entries.
3366fn merge_with_clauses(
3367    metadata: &mut Vec<(String, MetadataValue)>,
3368    ttl_ms: Option<u64>,
3369    expires_at_ms: Option<u64>,
3370    with_metadata: &[(String, Value)],
3371) {
3372    if let Some(ms) = ttl_ms {
3373        metadata.push((
3374            "_ttl_ms".to_string(),
3375            if ms <= i64::MAX as u64 {
3376                MetadataValue::Int(ms as i64)
3377            } else {
3378                MetadataValue::Timestamp(ms)
3379            },
3380        ));
3381    }
3382    if let Some(ms) = expires_at_ms {
3383        metadata.push(("_expires_at".to_string(), MetadataValue::Timestamp(ms)));
3384    }
3385    for (key, value) in with_metadata {
3386        let meta_value = match value {
3387            Value::Text(s) => MetadataValue::String(s.to_string()),
3388            Value::Integer(n) => MetadataValue::Int(*n),
3389            Value::Float(n) => MetadataValue::Float(*n),
3390            Value::Boolean(b) => MetadataValue::Bool(*b),
3391            _ => MetadataValue::String(value.to_string()),
3392        };
3393        metadata.push((key.clone(), meta_value));
3394    }
3395}
3396
3397fn merge_vector_metadata_column(
3398    metadata: &mut Vec<(String, MetadataValue)>,
3399    columns: &[String],
3400    values: &[Value],
3401) -> RedDBResult<()> {
3402    let Some(value) = columns
3403        .iter()
3404        .position(|column| column.eq_ignore_ascii_case("metadata"))
3405        .map(|index| &values[index])
3406    else {
3407        return Ok(());
3408    };
3409    let json = match value {
3410        Value::Null => return Ok(()),
3411        Value::Json(bytes) => crate::json::from_slice(bytes).map_err(|err| {
3412            RedDBError::Query(format!("column 'metadata' invalid JSON object: {err}"))
3413        })?,
3414        Value::Text(text) => crate::json::from_str(text).map_err(|err| {
3415            RedDBError::Query(format!("column 'metadata' invalid JSON object: {err}"))
3416        })?,
3417        other => {
3418            return Err(RedDBError::Query(format!(
3419                "column 'metadata' expected JSON object, got {other:?}"
3420            )))
3421        }
3422    };
3423    let parsed = metadata_from_json(&json)?;
3424    for (key, value) in parsed.iter() {
3425        metadata.push((key.clone(), value.clone()));
3426    }
3427    Ok(())
3428}
3429
3430fn apply_collection_default_ttl_metadata(
3431    runtime: &RedDBRuntime,
3432    collection: &str,
3433    metadata: &mut Vec<(String, MetadataValue)>,
3434) {
3435    if has_internal_ttl_metadata(metadata) {
3436        return;
3437    }
3438
3439    let Some(default_ttl_ms) = runtime.db().collection_default_ttl_ms(collection) else {
3440        return;
3441    };
3442
3443    metadata.push((
3444        "_ttl_ms".to_string(),
3445        if default_ttl_ms <= i64::MAX as u64 {
3446            MetadataValue::Int(default_ttl_ms as i64)
3447        } else {
3448            MetadataValue::Timestamp(default_ttl_ms)
3449        },
3450    ));
3451}
3452
3453fn ensure_non_tree_reserved_metadata_entries(
3454    metadata: &[(String, MetadataValue)],
3455) -> RedDBResult<()> {
3456    for (key, _) in metadata {
3457        ensure_non_tree_reserved_metadata_key(key)?;
3458    }
3459    Ok(())
3460}
3461
3462fn ensure_non_tree_reserved_metadata_key(key: &str) -> RedDBResult<()> {
3463    if key.starts_with(TREE_METADATA_PREFIX) {
3464        return Err(RedDBError::Query(format!(
3465            "metadata key '{}' is reserved for managed trees",
3466            key
3467        )));
3468    }
3469    Ok(())
3470}
3471
3472fn ensure_non_tree_structural_edge_label(label: &str) -> RedDBResult<()> {
3473    if label.eq_ignore_ascii_case(TREE_CHILD_EDGE_LABEL) {
3474        return Err(RedDBError::Query(format!(
3475            "edge label '{}' is reserved for managed trees",
3476            TREE_CHILD_EDGE_LABEL
3477        )));
3478    }
3479    Ok(())
3480}
3481
3482fn pairwise_columns_values(pairs: &[(String, Value)]) -> (Vec<String>, Vec<Value>) {
3483    let mut columns = Vec::with_capacity(pairs.len());
3484    let mut values = Vec::with_capacity(pairs.len());
3485
3486    for (column, value) in pairs {
3487        columns.push(column.clone());
3488        values.push(value.clone());
3489    }
3490
3491    (columns, values)
3492}
3493
3494/// Find a required column value and return it as-is.
3495fn find_column_value(columns: &[String], values: &[Value], name: &str) -> RedDBResult<Value> {
3496    for (i, col) in columns.iter().enumerate() {
3497        if col.eq_ignore_ascii_case(name) {
3498            return Ok(values[i].clone());
3499        }
3500    }
3501    Err(RedDBError::Query(format!(
3502        "required column '{name}' not found in INSERT"
3503    )))
3504}
3505
3506/// Find a required column value and coerce to String.
3507fn find_column_value_string(
3508    columns: &[String],
3509    values: &[Value],
3510    name: &str,
3511) -> RedDBResult<String> {
3512    let val = find_column_value(columns, values, name)?;
3513    match val {
3514        Value::Text(s) => Ok(s.to_string()),
3515        Value::Integer(n) => Ok(n.to_string()),
3516        Value::Float(n) => Ok(n.to_string()),
3517        other => Err(RedDBError::Query(format!(
3518            "column '{name}' expected text, got {other:?}"
3519        ))),
3520    }
3521}
3522
3523fn find_column_value_f64(columns: &[String], values: &[Value], name: &str) -> RedDBResult<f64> {
3524    let val = find_column_value(columns, values, name)?;
3525    match val {
3526        Value::Float(n) => Ok(n),
3527        Value::Integer(n) => Ok(n as f64),
3528        Value::UnsignedInteger(n) => Ok(n as f64),
3529        Value::Text(s) => s
3530            .parse::<f64>()
3531            .map_err(|_| RedDBError::Query(format!("column '{name}' expected number, got '{s}'"))),
3532        other => Err(RedDBError::Query(format!(
3533            "column '{name}' expected number, got {other:?}"
3534        ))),
3535    }
3536}
3537
3538/// Find an optional column value as String.
3539fn find_column_value_opt_string(
3540    columns: &[String],
3541    values: &[Value],
3542    name: &str,
3543) -> Option<String> {
3544    for (i, col) in columns.iter().enumerate() {
3545        if col.eq_ignore_ascii_case(name) {
3546            return match &values[i] {
3547                Value::Null => None,
3548                Value::Text(s) => Some(s.to_string()),
3549                Value::Integer(n) => Some(n.to_string()),
3550                Value::Float(n) => Some(n.to_string()),
3551                _ => None,
3552            };
3553        }
3554    }
3555    None
3556}
3557
3558/// Resolve an EDGE endpoint (`from`/`to`) to a numeric entity id.
3559///
3560/// Accepts integer literals, decimal strings, and node labels resolved via
3561/// the per-collection graph label index (same source of truth that
3562/// `GRAPH NEIGHBORHOOD` / `GRAPH TRAVERSE` use at query time). Ambiguous
3563/// labels error so callers can fall back to the numeric id form.
3564fn resolve_edge_endpoint(
3565    store: &crate::storage::unified::UnifiedStore,
3566    collection: &str,
3567    columns: &[String],
3568    values: &[Value],
3569    name: &str,
3570) -> RedDBResult<u64> {
3571    let val = find_column_value(columns, values, name)?;
3572    match val {
3573        Value::Integer(n) => Ok(n as u64),
3574        Value::UnsignedInteger(n) => Ok(n),
3575        Value::Text(s) => {
3576            if let Ok(n) = s.parse::<u64>() {
3577                return Ok(n);
3578            }
3579            let matches = store.lookup_graph_nodes_by_label_in(collection, &s);
3580            match matches.len() {
3581                0 => Err(RedDBError::Query(format!(
3582                    "column '{name}': no graph node with label '{s}' in collection '{collection}'"
3583                ))),
3584                1 => Ok(matches[0].raw()),
3585                n => Err(RedDBError::Query(format!(
3586                    "column '{name}': ambiguous label '{s}' matches {n} nodes in collection '{collection}'; use the numeric id"
3587                ))),
3588            }
3589        }
3590        other => Err(RedDBError::Query(format!(
3591            "column '{name}' expected integer or node label, got {other:?}"
3592        ))),
3593    }
3594}
3595
3596fn resolve_edge_endpoint_any(
3597    store: &crate::storage::unified::UnifiedStore,
3598    collection: &str,
3599    columns: &[String],
3600    values: &[Value],
3601    names: &[&str],
3602) -> RedDBResult<u64> {
3603    for name in names {
3604        if columns
3605            .iter()
3606            .any(|column| column.eq_ignore_ascii_case(name))
3607        {
3608            return resolve_edge_endpoint(store, collection, columns, values, name);
3609        }
3610    }
3611
3612    Err(RedDBError::Query(format!(
3613        "required column '{}' not found in INSERT",
3614        names.first().copied().unwrap_or("from_rid")
3615    )))
3616}
3617
3618/// Find a required column value and coerce to u64.
3619fn find_column_value_u64(columns: &[String], values: &[Value], name: &str) -> RedDBResult<u64> {
3620    let val = find_column_value(columns, values, name)?;
3621    match val {
3622        Value::Integer(n) => Ok(n as u64),
3623        Value::UnsignedInteger(n) => Ok(n),
3624        Value::Text(s) => s
3625            .parse::<u64>()
3626            .map_err(|_| RedDBError::Query(format!("column '{name}' expected integer, got '{s}'"))),
3627        other => Err(RedDBError::Query(format!(
3628            "column '{name}' expected integer, got {other:?}"
3629        ))),
3630    }
3631}
3632
3633/// Find an optional column value as f32.
3634fn find_column_value_f32_opt(columns: &[String], values: &[Value], name: &str) -> Option<f32> {
3635    for (i, col) in columns.iter().enumerate() {
3636        if col.eq_ignore_ascii_case(name) {
3637            return match &values[i] {
3638                Value::Float(n) => Some(*n as f32),
3639                Value::Integer(n) => Some(*n as f32),
3640                Value::Null => None,
3641                _ => None,
3642            };
3643        }
3644    }
3645    None
3646}
3647
3648/// Find a required column value and coerce to Vec<f32> (from Value::Vector).
3649fn find_column_value_vec_f32(
3650    columns: &[String],
3651    values: &[Value],
3652    name: &str,
3653) -> RedDBResult<Vec<f32>> {
3654    let val = find_column_value(columns, values, name)?;
3655    match val {
3656        Value::Vector(v) => Ok(v),
3657        Value::Json(bytes) => {
3658            // Try to parse as JSON array of numbers
3659            let s = std::str::from_utf8(&bytes).map_err(|_| {
3660                RedDBError::Query(format!("column '{name}' contains invalid UTF-8"))
3661            })?;
3662            let arr: Vec<f32> = crate::json::from_str(s).map_err(|e| {
3663                RedDBError::Query(format!("column '{name}' invalid vector JSON: {e}"))
3664            })?;
3665            Ok(arr)
3666        }
3667        other => Err(RedDBError::Query(format!(
3668            "column '{name}' expected vector, got {other:?}"
3669        ))),
3670    }
3671}
3672
3673fn find_column_value_vec_f32_any(
3674    columns: &[String],
3675    values: &[Value],
3676    names: &[&str],
3677) -> RedDBResult<Vec<f32>> {
3678    for name in names {
3679        if columns
3680            .iter()
3681            .any(|column| column.eq_ignore_ascii_case(name))
3682        {
3683            return find_column_value_vec_f32(columns, values, name);
3684        }
3685    }
3686    Err(RedDBError::Query(format!(
3687        "required vector column '{}' not found in INSERT",
3688        names.join("' or '")
3689    )))
3690}
3691
3692/// Extract remaining properties (all columns not in the exclusion list).
3693fn extract_remaining_properties(
3694    columns: &[String],
3695    values: &[Value],
3696    exclude: &[&str],
3697) -> Vec<(String, Value)> {
3698    columns
3699        .iter()
3700        .zip(values.iter())
3701        .filter(|(col, _)| !exclude.iter().any(|e| col.eq_ignore_ascii_case(e)))
3702        .map(|(col, val)| (col.clone(), val.clone()))
3703        .collect()
3704}
3705
3706fn validate_timeseries_insert_columns(columns: &[String]) -> RedDBResult<()> {
3707    let mut invalid = Vec::new();
3708    for column in columns {
3709        if !is_timeseries_insert_column(column) && resolve_sql_ttl_metadata_key(column).is_none() {
3710            invalid.push(column.clone());
3711        }
3712    }
3713
3714    if invalid.is_empty() {
3715        Ok(())
3716    } else {
3717        Err(RedDBError::Query(format!(
3718            "timeseries INSERT only accepts metric, value, tags, timestamp, timestamp_ns, or time columns; got {}",
3719            invalid.join(", ")
3720        )))
3721    }
3722}
3723
3724fn is_timeseries_insert_column(column: &str) -> bool {
3725    matches!(
3726        column.to_ascii_lowercase().as_str(),
3727        "metric"
3728            | "value"
3729            | "tags"
3730            | "timestamp"
3731            | "timestamp_ns"
3732            | "time"
3733            // Analytics-event extension (#577): an analytics row carries
3734            // an `event_name` + JSON `payload`. The payload is validated
3735            // against the AnalyticsSchemaRegistry inside
3736            // `insert_timeseries_point` before the row lands.
3737            | "event_name"
3738            | "payload"
3739    )
3740}
3741
3742fn find_timeseries_timestamp_ns(columns: &[String], values: &[Value]) -> RedDBResult<Option<u64>> {
3743    let mut found = None;
3744
3745    for alias in ["timestamp_ns", "timestamp", "time"] {
3746        for (index, column) in columns.iter().enumerate() {
3747            if !column.eq_ignore_ascii_case(alias) {
3748                continue;
3749            }
3750
3751            if found.is_some() {
3752                return Err(RedDBError::Query(
3753                    "timeseries INSERT accepts only one timestamp column".to_string(),
3754                ));
3755            }
3756
3757            found = Some(coerce_value_to_non_negative_u64(&values[index], alias)?);
3758        }
3759    }
3760
3761    Ok(found)
3762}
3763
3764fn find_timeseries_tags(
3765    columns: &[String],
3766    values: &[Value],
3767) -> RedDBResult<std::collections::HashMap<String, String>> {
3768    for (index, column) in columns.iter().enumerate() {
3769        if column.eq_ignore_ascii_case("tags") {
3770            return parse_timeseries_tags(&values[index]);
3771        }
3772    }
3773    Ok(std::collections::HashMap::new())
3774}
3775
3776fn parse_timeseries_tags(value: &Value) -> RedDBResult<std::collections::HashMap<String, String>> {
3777    match value {
3778        Value::Null => Ok(std::collections::HashMap::new()),
3779        Value::Json(bytes) => parse_timeseries_tags_json(bytes),
3780        Value::Text(text) => parse_timeseries_tags_json(text.as_bytes()),
3781        other => Err(RedDBError::Query(format!(
3782            "timeseries tags must be a JSON object or JSON text, got {other:?}"
3783        ))),
3784    }
3785}
3786
3787fn parse_timeseries_tags_json(
3788    bytes: &[u8],
3789) -> RedDBResult<std::collections::HashMap<String, String>> {
3790    let json: crate::json::Value = crate::json::from_slice(bytes)
3791        .map_err(|err| RedDBError::Query(format!("timeseries tags must be valid JSON: {err}")))?;
3792
3793    let object = match json {
3794        crate::json::Value::Object(object) => object,
3795        other => {
3796            return Err(RedDBError::Query(format!(
3797                "timeseries tags must be a JSON object, got {other:?}"
3798            )))
3799        }
3800    };
3801
3802    let mut tags = std::collections::HashMap::with_capacity(object.len());
3803    for (key, value) in object {
3804        tags.insert(key, json_tag_value_to_string(&value));
3805    }
3806    Ok(tags)
3807}
3808
3809/// Encode a tag value for storage so the original JSON type can be
3810/// recovered on read (issue #543).
3811///
3812/// Time-series tags are stored as `HashMap<String, String>` on the
3813/// physical record (see [`crate::storage::TimeSeriesData`]) so that
3814/// the segment codec, WAL and gRPC mirrors don't need a new value
3815/// variant. To preserve the original JSON type across that
3816/// string-only channel we prepend the
3817/// [`crate::runtime::query_exec::TIMESERIES_TAG_JSON_PREFIX`] marker
3818/// and serialize the value as compact JSON text. The read paths
3819/// (`timeseries_tags_json_value` / `timeseries_tags_value`) detect
3820/// the marker, parse the suffix, and recover a real JSON value.
3821/// Tags written through other channels (Prometheus remote write,
3822/// metrics handlers, legacy on-disk data) lack the marker and are
3823/// returned as `JsonValue::String(raw)` exactly as before.
3824fn json_tag_value_to_string(value: &crate::json::Value) -> String {
3825    let mut buf = String::with_capacity(value.to_string_compact().len() + 1);
3826    buf.push(crate::runtime::query_exec::TIMESERIES_TAG_JSON_PREFIX);
3827    buf.push_str(&value.to_string_compact());
3828    buf
3829}
3830
3831fn coerce_value_to_non_negative_u64(value: &Value, column: &str) -> RedDBResult<u64> {
3832    match value {
3833        Value::UnsignedInteger(value) => Ok(*value),
3834        Value::Integer(value) if *value >= 0 => Ok(*value as u64),
3835        Value::Float(value) if *value >= 0.0 => Ok(*value as u64),
3836        Value::Text(value) => value.parse::<u64>().map_err(|_| {
3837            RedDBError::Query(format!(
3838                "column '{column}' expected a non-negative integer timestamp, got '{value}'"
3839            ))
3840        }),
3841        other => Err(RedDBError::Query(format!(
3842            "column '{column}' expected a non-negative integer timestamp, got {other:?}"
3843        ))),
3844    }
3845}
3846
3847fn current_unix_ns() -> u64 {
3848    std::time::SystemTime::now()
3849        .duration_since(std::time::UNIX_EPOCH)
3850        .unwrap_or_default()
3851        .as_nanos()
3852        .min(u128::from(u64::MAX)) as u64
3853}
3854
3855fn metadata_value_to_json(value: &MetadataValue) -> crate::json::Value {
3856    use crate::json::{Map, Value as JV};
3857    match value {
3858        MetadataValue::Null => JV::Null,
3859        MetadataValue::Bool(value) => JV::Bool(*value),
3860        MetadataValue::Int(value) => JV::Number(*value as f64),
3861        MetadataValue::Float(value) => JV::Number(*value),
3862        MetadataValue::String(value) => JV::String(value.clone()),
3863        MetadataValue::Bytes(value) => JV::Array(
3864            value
3865                .iter()
3866                .map(|value| JV::Number(*value as f64))
3867                .collect(),
3868        ),
3869        MetadataValue::Timestamp(value) => JV::Number(*value as f64),
3870        MetadataValue::Array(values) => {
3871            JV::Array(values.iter().map(metadata_value_to_json).collect())
3872        }
3873        MetadataValue::Object(object) => {
3874            let entries = object
3875                .iter()
3876                .map(|(key, value)| (key.clone(), metadata_value_to_json(value)))
3877                .collect();
3878            JV::Object(entries)
3879        }
3880        MetadataValue::Geo { lat, lon } => {
3881            let mut object = Map::new();
3882            object.insert("lat".to_string(), JV::Number(*lat));
3883            object.insert("lon".to_string(), JV::Number(*lon));
3884            JV::Object(object)
3885        }
3886        MetadataValue::Reference(target) => {
3887            let mut object = Map::new();
3888            object.insert(
3889                "collection".to_string(),
3890                JV::String(target.collection().to_string()),
3891            );
3892            object.insert(
3893                "entity_id".to_string(),
3894                JV::Number(target.entity_id().raw() as f64),
3895            );
3896            JV::Object(object)
3897        }
3898        MetadataValue::References(values) => {
3899            let refs = values
3900                .iter()
3901                .map(|target| {
3902                    let mut object = Map::new();
3903                    object.insert(
3904                        "collection".to_string(),
3905                        JV::String(target.collection().to_string()),
3906                    );
3907                    object.insert(
3908                        "entity_id".to_string(),
3909                        JV::Number(target.entity_id().raw() as f64),
3910                    );
3911                    JV::Object(object)
3912                })
3913                .collect();
3914            JV::Array(refs)
3915        }
3916    }
3917}
3918
3919fn storage_value_to_metadata_value(value: &Value) -> MetadataValue {
3920    match value {
3921        Value::Null => MetadataValue::Null,
3922        Value::Boolean(value) => MetadataValue::Bool(*value),
3923        Value::Integer(value) => MetadataValue::Int(*value),
3924        Value::UnsignedInteger(value) => metadata_u64_to_value(*value),
3925        Value::Float(value) => MetadataValue::Float(*value),
3926        Value::Text(value) => MetadataValue::String(value.to_string()),
3927        Value::Blob(value) => MetadataValue::Bytes(value.clone()),
3928        Value::Timestamp(value) => {
3929            if *value >= 0 {
3930                metadata_u64_to_value(*value as u64)
3931            } else {
3932                MetadataValue::Int(*value)
3933            }
3934        }
3935        Value::TimestampMs(value) => {
3936            if *value >= 0 {
3937                metadata_u64_to_value(*value as u64)
3938            } else {
3939                MetadataValue::Int(*value)
3940            }
3941        }
3942        Value::Json(value) => MetadataValue::String(String::from_utf8_lossy(value).into_owned()),
3943        Value::Uuid(value) => MetadataValue::String(format!("{value:?}")),
3944        Value::Date(value) => MetadataValue::String(value.to_string()),
3945        Value::Time(value) => MetadataValue::String(value.to_string()),
3946        Value::Decimal(value) => MetadataValue::String(value.to_string()),
3947        Value::Ipv4(value) => MetadataValue::String(format!(
3948            "{}.{}.{}.{}",
3949            (value >> 24) & 0xFF,
3950            (value >> 16) & 0xFF,
3951            (value >> 8) & 0xFF,
3952            value & 0xFF
3953        )),
3954        Value::Port(value) => MetadataValue::Int(i64::from(*value)),
3955        Value::Latitude(value) => MetadataValue::Float(*value as f64 / 1_000_000.0),
3956        Value::Longitude(value) => MetadataValue::Float(*value as f64 / 1_000_000.0),
3957        Value::GeoPoint(lat, lon) => MetadataValue::Geo {
3958            lat: *lat as f64 / 1_000_000.0,
3959            lon: *lon as f64 / 1_000_000.0,
3960        },
3961        Value::BigInt(value) => MetadataValue::String(value.to_string()),
3962        Value::TableRef(value) => MetadataValue::String(value.clone()),
3963        Value::PageRef(value) => MetadataValue::Int(*value as i64),
3964        Value::Password(value) => MetadataValue::String(value.clone()),
3965        Value::Array(values) => {
3966            MetadataValue::Array(values.iter().map(storage_value_to_metadata_value).collect())
3967        }
3968        _ => MetadataValue::String(value.to_string()),
3969    }
3970}
3971
3972fn sql_literal_to_metadata_value(field: &str, value: &Value) -> RedDBResult<MetadataValue> {
3973    match value {
3974        Value::Null => Ok(MetadataValue::Null),
3975        Value::Integer(value) if *value >= 0 => Ok(metadata_u64_to_value(*value as u64)),
3976        Value::Integer(_) => Err(RedDBError::Query(format!(
3977            "column '{field}' must be non-negative for TTL metadata"
3978        ))),
3979        Value::UnsignedInteger(value) => Ok(metadata_u64_to_value(*value)),
3980        Value::Float(value) if value.is_finite() => {
3981            if value.fract().abs() >= f64::EPSILON {
3982                return Err(RedDBError::Query(format!(
3983                    "column '{field}' must be an integer (TTL metadata must be an integer)"
3984                )));
3985            }
3986            if *value < 0.0 {
3987                return Err(RedDBError::Query(format!(
3988                    "column '{field}' must be non-negative for TTL metadata"
3989                )));
3990            }
3991            if *value > u64::MAX as f64 {
3992                return Err(RedDBError::Query(format!(
3993                    "column '{field}' value is too large"
3994                )));
3995            }
3996            Ok(metadata_u64_to_value(*value as u64))
3997        }
3998        Value::Float(_) => Err(RedDBError::Query(format!(
3999            "column '{field}' must be a finite number"
4000        ))),
4001        Value::Text(value) => {
4002            let value = value.trim();
4003            if let Ok(value) = value.parse::<u64>() {
4004                Ok(metadata_u64_to_value(value))
4005            } else if let Ok(value) = value.parse::<i64>() {
4006                if value < 0 {
4007                    return Err(RedDBError::Query(format!(
4008                        "column '{field}' must be non-negative for TTL metadata"
4009                    )));
4010                }
4011                Ok(metadata_u64_to_value(value as u64))
4012            } else if let Ok(value) = value.parse::<f64>() {
4013                if !value.is_finite() {
4014                    return Err(RedDBError::Query(format!(
4015                        "column '{field}' must be a finite number"
4016                    )));
4017                }
4018                if value.fract().abs() >= f64::EPSILON {
4019                    return Err(RedDBError::Query(format!(
4020                        "column '{field}' must be an integer (TTL metadata must be an integer)"
4021                    )));
4022                }
4023                if value < 0.0 {
4024                    return Err(RedDBError::Query(format!(
4025                        "column '{field}' must be non-negative for TTL metadata"
4026                    )));
4027                }
4028                if value > u64::MAX as f64 {
4029                    return Err(RedDBError::Query(format!(
4030                        "column '{field}' value is too large"
4031                    )));
4032                }
4033                Ok(metadata_u64_to_value(value as u64))
4034            } else {
4035                Err(RedDBError::Query(format!(
4036                    "column '{field}' expects a numeric value for TTL metadata"
4037                )))
4038            }
4039        }
4040        _ => Err(RedDBError::Query(format!(
4041            "column '{field}' expects a numeric value for TTL metadata"
4042        ))),
4043    }
4044}
4045
4046fn metadata_u64_to_value(value: u64) -> MetadataValue {
4047    if value <= i64::MAX as u64 {
4048        MetadataValue::Int(value as i64)
4049    } else {
4050        MetadataValue::Timestamp(value)
4051    }
4052}
4053
4054/// Phase 2 PG parity: inspect a column value and return `true` when
4055/// the dotted `tail` path is already present under it. Used by the
4056/// tenant auto-fill so rows that already carry an explicit value
4057/// (bulk import, admin insert on behalf of a tenant) are not
4058/// double-stamped with the session's current_tenant().
4059fn dotted_tail_already_set(value: &Value, tail: &str) -> bool {
4060    let json = match value {
4061        Value::Null => return false,
4062        Value::Json(bytes) | Value::Blob(bytes) => {
4063            match crate::json::from_slice::<crate::json::Value>(bytes) {
4064                Ok(v) => v,
4065                Err(_) => return false,
4066            }
4067        }
4068        Value::Text(s) => {
4069            let trimmed = s.trim_start();
4070            if !(trimmed.starts_with('{') || trimmed.starts_with('[')) {
4071                return false;
4072            }
4073            match crate::json::from_str::<crate::json::Value>(s) {
4074                Ok(v) => v,
4075                Err(_) => return false,
4076            }
4077        }
4078        _ => return false,
4079    };
4080    let mut cursor = &json;
4081    for seg in tail.split('.') {
4082        match cursor {
4083            crate::json::Value::Object(map) => match map.iter().find(|(k, _)| *k == seg) {
4084                Some((_, v)) => cursor = v,
4085                None => return false,
4086            },
4087            _ => return false,
4088        }
4089    }
4090    !matches!(cursor, crate::json::Value::Null)
4091}
4092
4093/// Phase 2 PG parity: take a column value (possibly Null / Text /
4094/// Json) and return a `Value::Json` with the dotted `tail` path set
4095/// to `tenant_id`. Preserves every pre-existing key.
4096///
4097/// Accepts:
4098/// * `Value::Null`  → fresh `{tail: tenant_id}` object
4099/// * `Value::Json(bytes)` → parse, navigate / create path, re-serialize
4100/// * `Value::text(s)` if `s` is valid JSON → same as Json
4101/// * anything else → error (user supplied a scalar where we need
4102///   a JSON container)
4103fn merge_dotted_tenant(current: Value, tail: &str, tenant_id: &str) -> RedDBResult<Value> {
4104    let mut root = match current {
4105        Value::Null => crate::json::Value::Object(Default::default()),
4106        Value::Json(bytes) | Value::Blob(bytes) => {
4107            crate::json::from_slice(&bytes).map_err(|err| {
4108                RedDBError::Query(format!(
4109                    "tenant auto-fill: root column is not valid JSON ({err})"
4110                ))
4111            })?
4112        }
4113        Value::Text(s) => {
4114            if s.trim().is_empty() {
4115                crate::json::Value::Object(Default::default())
4116            } else {
4117                crate::json::from_str::<crate::json::Value>(&s).map_err(|err| {
4118                    RedDBError::Query(format!(
4119                        "tenant auto-fill: text root is not valid JSON ({err})"
4120                    ))
4121                })?
4122            }
4123        }
4124        other => {
4125            return Err(RedDBError::Query(format!(
4126                "tenant auto-fill: root column must be JSON / NULL, got {other:?}"
4127            )));
4128        }
4129    };
4130
4131    // Navigate path segments, creating intermediate objects on demand.
4132    let segments: Vec<&str> = tail.split('.').collect();
4133    let mut cursor: &mut crate::json::Value = &mut root;
4134    for (i, seg) in segments.iter().enumerate() {
4135        let is_last = i + 1 == segments.len();
4136        let map = match cursor {
4137            crate::json::Value::Object(m) => m,
4138            _ => {
4139                return Err(RedDBError::Query(format!(
4140                    "tenant auto-fill: segment '{seg}' is not inside an object"
4141                )));
4142            }
4143        };
4144        if is_last {
4145            map.insert(
4146                seg.to_string(),
4147                crate::json::Value::String(tenant_id.to_string()),
4148            );
4149            break;
4150        }
4151        cursor = map
4152            .entry(seg.to_string())
4153            .or_insert_with(|| crate::json::Value::Object(Default::default()));
4154    }
4155
4156    let bytes = crate::json::to_vec(&root).map_err(|err| {
4157        RedDBError::Query(format!(
4158            "tenant auto-fill: failed to re-serialize JSON ({err})"
4159        ))
4160    })?;
4161    Ok(Value::Json(bytes))
4162}
4163
4164#[cfg(test)]
4165mod tests {
4166    use crate::storage::schema::Value;
4167    use crate::storage::wal::{WalReader, WalRecord};
4168    use crate::{RedDBOptions, RedDBRuntime};
4169    use std::path::Path;
4170
4171    fn store_commit_batches(wal_path: &Path) -> Vec<Vec<Vec<u8>>> {
4172        WalReader::open(wal_path)
4173            .expect("wal opens")
4174            .iter()
4175            .map(|record| record.expect("wal record decodes").1)
4176            .filter_map(|record| match record {
4177                WalRecord::TxCommitBatch { actions, .. } => Some(actions),
4178                _ => None,
4179            })
4180            .collect()
4181    }
4182
4183    fn action_contains_text(action: &[u8], needle: &str) -> bool {
4184        action
4185            .windows(needle.len())
4186            .any(|window| window == needle.as_bytes())
4187    }
4188
4189    fn assert_statement_writes_collections_in_one_new_wal_batch(
4190        rt: &RedDBRuntime,
4191        wal_path: &Path,
4192        statement: &str,
4193        source: &str,
4194        event_queue: &str,
4195    ) {
4196        let before_batches = store_commit_batches(wal_path).len();
4197
4198        rt.execute_query(statement).unwrap();
4199
4200        let batches = store_commit_batches(wal_path);
4201        let statement_batches = &batches[before_batches..];
4202        let source_batch = statement_batches
4203            .iter()
4204            .position(|actions| {
4205                actions.iter().any(|action| {
4206                    action_contains_text(action, source)
4207                        && !action_contains_text(action, event_queue)
4208                })
4209            })
4210            .expect("source collection write batch is present");
4211        let event_batch = statement_batches
4212            .iter()
4213            .position(|actions| {
4214                actions
4215                    .iter()
4216                    .any(|action| action_contains_text(action, event_queue))
4217            })
4218            .expect("event queue write batch is present");
4219
4220        assert_eq!(
4221            source_batch, event_batch,
4222            "WITH EVENTS must persist the source write and queue event in the same WAL batch"
4223        );
4224    }
4225
4226    #[test]
4227    fn with_events_autocommit_persists_mutation_and_event_in_one_wal_batch() {
4228        let dir = tempfile::tempdir().unwrap();
4229        let db_path = dir.path().join("events_dual_write.rdb");
4230        let wal_path = db_path.with_extension("rdb-uwal");
4231        let rt = RedDBRuntime::with_options(RedDBOptions::persistent(&db_path)).unwrap();
4232
4233        rt.execute_query("CREATE TABLE users (id INT, email TEXT) WITH EVENTS")
4234            .unwrap();
4235        assert_statement_writes_collections_in_one_new_wal_batch(
4236            &rt,
4237            &wal_path,
4238            "INSERT INTO users (id, email) VALUES (1, 'a@example.test')",
4239            "users",
4240            "users_events",
4241        );
4242    }
4243
4244    #[test]
4245    fn with_events_autocommit_update_persists_mutation_and_event_in_one_wal_batch() {
4246        let dir = tempfile::tempdir().unwrap();
4247        let db_path = dir.path().join("events_update_atomic.rdb");
4248        let wal_path = db_path.with_extension("rdb-uwal");
4249        let rt = RedDBRuntime::with_options(RedDBOptions::persistent(&db_path)).unwrap();
4250
4251        rt.execute_query(
4252            "CREATE TABLE users (id INT, email TEXT) WITH EVENTS (UPDATE) TO user_updates",
4253        )
4254        .unwrap();
4255        rt.execute_query("INSERT INTO users (id, email) VALUES (1, 'a@example.test')")
4256            .unwrap();
4257
4258        assert_statement_writes_collections_in_one_new_wal_batch(
4259            &rt,
4260            &wal_path,
4261            "UPDATE users SET email = 'b@example.test' WHERE id = 1",
4262            "users",
4263            "user_updates",
4264        );
4265    }
4266
4267    #[test]
4268    fn with_events_autocommit_delete_persists_mutation_and_event_in_one_wal_batch() {
4269        let dir = tempfile::tempdir().unwrap();
4270        let db_path = dir.path().join("events_delete_atomic.rdb");
4271        let wal_path = db_path.with_extension("rdb-uwal");
4272        let rt = RedDBRuntime::with_options(RedDBOptions::persistent(&db_path)).unwrap();
4273
4274        rt.execute_query(
4275            "CREATE TABLE users (id INT, email TEXT) WITH EVENTS (DELETE) TO user_deletes",
4276        )
4277        .unwrap();
4278        rt.execute_query("INSERT INTO users (id, email) VALUES (1, 'a@example.test')")
4279            .unwrap();
4280
4281        assert_statement_writes_collections_in_one_new_wal_batch(
4282            &rt,
4283            &wal_path,
4284            "DELETE FROM users WHERE id = 1",
4285            "users",
4286            "user_deletes",
4287        );
4288    }
4289
4290    #[test]
4291    fn update_where_id_in_with_hash_index_updates_expected_rows() {
4292        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4293        rt.execute_query("CREATE TABLE users (id INT, score INT)")
4294            .unwrap();
4295        for id in 0..5 {
4296            rt.execute_query(&format!("INSERT INTO users (id, score) VALUES ({id}, 0)"))
4297                .unwrap();
4298        }
4299        rt.execute_query("CREATE INDEX idx_id ON users (id) USING HASH")
4300            .unwrap();
4301
4302        let updated = rt
4303            .execute_query("UPDATE users SET score = 42 WHERE id IN (1,3,4)")
4304            .unwrap();
4305        assert_eq!(updated.affected_rows, 3);
4306
4307        let selected = rt
4308            .execute_query("SELECT id, score FROM users ORDER BY id")
4309            .unwrap();
4310        let scores: Vec<(i64, i64)> = selected
4311            .result
4312            .records
4313            .iter()
4314            .map(|record| {
4315                let id = match record.get("id").unwrap() {
4316                    Value::Integer(value) => *value,
4317                    other => panic!("expected integer id, got {other:?}"),
4318                };
4319                let score = match record.get("score").unwrap() {
4320                    Value::Integer(value) => *value,
4321                    other => panic!("expected integer score, got {other:?}"),
4322                };
4323                (id, score)
4324            })
4325            .collect();
4326        assert_eq!(scores, vec![(0, 0), (1, 42), (2, 0), (3, 42), (4, 42)]);
4327    }
4328
4329    /// Drives UPDATE through the shared `DmlTargetScan` module — the
4330    /// same code path DELETE uses (#51, #52). Exercises the indexed
4331    /// equality fast-path (WHERE id = N with a HASH index), the
4332    /// unindexed range scan (WHERE score > N), and the no-WHERE
4333    /// full-scan branch to confirm the extracted "find target rows"
4334    /// loop preserves affected-row counts and the resulting row state.
4335    #[test]
4336    fn update_routes_through_dml_target_scan_for_indexed_and_scan_paths() {
4337        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4338        rt.execute_query("CREATE TABLE items (id INT, score INT)")
4339            .unwrap();
4340        for id in 0..5 {
4341            rt.execute_query(&format!(
4342                "INSERT INTO items (id, score) VALUES ({id}, {})",
4343                id * 10
4344            ))
4345            .unwrap();
4346        }
4347        rt.execute_query("CREATE INDEX idx_items_id ON items (id) USING HASH")
4348            .unwrap();
4349
4350        // Indexed equality UPDATE — hits the hash fast-path inside
4351        // DmlTargetScan::find_target_ids. id=2 has score=20, drop it
4352        // below the score>25 cutoff so the next assertion stays clean.
4353        let updated_one = rt
4354            .execute_query("UPDATE items SET score = 5 WHERE id = 2")
4355            .unwrap();
4356        assert_eq!(updated_one.affected_rows, 1);
4357
4358        // Unindexed scan UPDATE — bumps everyone with score > 25,
4359        // i.e. ids 3 and 4 (scores 30, 40). Goes through the
4360        // zoned/full-scan branch.
4361        let updated_many = rt
4362            .execute_query("UPDATE items SET score = 7 WHERE score > 25")
4363            .unwrap();
4364        assert_eq!(updated_many.affected_rows, 2);
4365
4366        let snapshot = rt
4367            .execute_query("SELECT id, score FROM items ORDER BY id")
4368            .unwrap();
4369        let pairs: Vec<(i64, i64)> = snapshot
4370            .result
4371            .records
4372            .iter()
4373            .map(|record| {
4374                let id = match record.get("id").unwrap() {
4375                    Value::Integer(value) => *value,
4376                    other => panic!("expected integer id, got {other:?}"),
4377                };
4378                let score = match record.get("score").unwrap() {
4379                    Value::Integer(value) => *value,
4380                    other => panic!("expected integer score, got {other:?}"),
4381                };
4382                (id, score)
4383            })
4384            .collect();
4385        assert_eq!(pairs, vec![(0, 0), (1, 10), (2, 5), (3, 7), (4, 7)]);
4386
4387        // Full-scan UPDATE with no WHERE rewrites every remaining row.
4388        let updated_all = rt.execute_query("UPDATE items SET score = 1").unwrap();
4389        assert_eq!(updated_all.affected_rows, 5);
4390        let after = rt
4391            .execute_query("SELECT score FROM items ORDER BY id")
4392            .unwrap();
4393        let scores: Vec<i64> = after
4394            .result
4395            .records
4396            .iter()
4397            .map(|record| match record.get("score").unwrap() {
4398                Value::Integer(value) => *value,
4399                other => panic!("expected integer score, got {other:?}"),
4400            })
4401            .collect();
4402        assert_eq!(scores, vec![1, 1, 1, 1, 1]);
4403    }
4404
4405    /// Drives DELETE through the new `DmlTargetScan` module. Exercises
4406    /// both the index fast-path (WHERE id = N with a HASH index) and
4407    /// the unindexed scan path (WHERE score > N) to confirm the
4408    /// extracted "find target rows" loop preserves the affected-row
4409    /// count and which rows survive.
4410    #[test]
4411    fn delete_routes_through_dml_target_scan_for_indexed_and_scan_paths() {
4412        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4413        rt.execute_query("CREATE TABLE items (id INT, score INT)")
4414            .unwrap();
4415        for id in 0..5 {
4416            rt.execute_query(&format!(
4417                "INSERT INTO items (id, score) VALUES ({id}, {})",
4418                id * 10
4419            ))
4420            .unwrap();
4421        }
4422        rt.execute_query("CREATE INDEX idx_items_id ON items (id) USING HASH")
4423            .unwrap();
4424
4425        // Indexed equality DELETE — hits the hash fast-path inside
4426        // DmlTargetScan::find_target_ids.
4427        let deleted_one = rt.execute_query("DELETE FROM items WHERE id = 2").unwrap();
4428        assert_eq!(deleted_one.affected_rows, 1);
4429
4430        // Unindexed scan DELETE — drops everyone with score > 25,
4431        // i.e. ids 3 and 4 (scores 30, 40). Goes through the
4432        // zoned/full-scan branch.
4433        let deleted_many = rt
4434            .execute_query("DELETE FROM items WHERE score > 25")
4435            .unwrap();
4436        assert_eq!(deleted_many.affected_rows, 2);
4437
4438        let surviving = rt
4439            .execute_query("SELECT id FROM items ORDER BY id")
4440            .unwrap();
4441        let ids: Vec<i64> = surviving
4442            .result
4443            .records
4444            .iter()
4445            .map(|record| match record.get("id").unwrap() {
4446                Value::Integer(value) => *value,
4447                other => panic!("expected integer id, got {other:?}"),
4448            })
4449            .collect();
4450        assert_eq!(ids, vec![0, 1]);
4451
4452        // Sanity: full-scan DELETE with no WHERE clears the rest.
4453        let deleted_rest = rt.execute_query("DELETE FROM items").unwrap();
4454        assert_eq!(deleted_rest.affected_rows, 2);
4455        let empty = rt.execute_query("SELECT id FROM items").unwrap();
4456        assert!(empty.result.records.is_empty());
4457    }
4458
4459    /// CollectionContract gate (#49 + #50): APPEND ONLY tables accept
4460    /// INSERT but reject UPDATE and DELETE with the documented
4461    /// operator-facing error strings. Drives all three DML verbs so
4462    /// the centralized gate is exercised end-to-end.
4463    #[test]
4464    fn collection_contract_gate_blocks_update_and_delete_on_append_only() {
4465        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4466        rt.execute_query("CREATE TABLE events (id INT, payload TEXT) APPEND ONLY")
4467            .unwrap();
4468
4469        // INSERT must succeed — APPEND ONLY exists precisely to allow
4470        // appends. The gate should be a no-op for INSERT.
4471        let inserted = rt
4472            .execute_query("INSERT INTO events (id, payload) VALUES (1, 'hello')")
4473            .unwrap();
4474        assert_eq!(inserted.affected_rows, 1);
4475
4476        // UPDATE is rejected with the gate's UPDATE-specific message.
4477        let update_err = rt
4478            .execute_query("UPDATE events SET payload = 'mut' WHERE id = 1")
4479            .unwrap_err();
4480        let msg = format!("{update_err}");
4481        assert!(
4482            msg.contains("APPEND ONLY") && msg.contains("UPDATE is rejected"),
4483            "expected UPDATE rejection message, got: {msg}"
4484        );
4485
4486        // DELETE is rejected with the gate's DELETE-specific message.
4487        let delete_err = rt
4488            .execute_query("DELETE FROM events WHERE id = 1")
4489            .unwrap_err();
4490        let msg = format!("{delete_err}");
4491        assert!(
4492            msg.contains("APPEND ONLY") && msg.contains("DELETE is rejected"),
4493            "expected DELETE rejection message, got: {msg}"
4494        );
4495
4496        // Row should still be present — neither rejected mutation
4497        // touched storage.
4498        let surviving = rt.execute_query("SELECT id FROM events").unwrap();
4499        assert_eq!(surviving.result.records.len(), 1);
4500    }
4501
4502    /// CollectionContract gate: tables without an APPEND ONLY contract
4503    /// permit INSERT, UPDATE, and DELETE — the gate's default branch
4504    /// is a true pass-through, not an accidental block.
4505    #[test]
4506    fn collection_contract_gate_allows_all_verbs_on_unrestricted_table() {
4507        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4508        rt.execute_query("CREATE TABLE notes (id INT, body TEXT)")
4509            .unwrap();
4510
4511        rt.execute_query("INSERT INTO notes (id, body) VALUES (1, 'a')")
4512            .unwrap();
4513        let updated = rt
4514            .execute_query("UPDATE notes SET body = 'b' WHERE id = 1")
4515            .unwrap();
4516        assert_eq!(updated.affected_rows, 1);
4517        let deleted = rt.execute_query("DELETE FROM notes WHERE id = 1").unwrap();
4518        assert_eq!(deleted.affected_rows, 1);
4519    }
4520
4521    #[test]
4522    fn insert_into_event_enabled_table_emits_event_to_configured_queue() {
4523        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4524        rt.execute_query(
4525            "CREATE TABLE users (id INT, email TEXT) WITH EVENTS (INSERT) TO audit_log",
4526        )
4527        .unwrap();
4528
4529        let inserted = rt
4530            .execute_query("INSERT INTO users (id, email) VALUES (7, 'a@example.com')")
4531            .unwrap();
4532        assert_eq!(inserted.affected_rows, 1);
4533
4534        let events = queue_payloads(&rt, "audit_log");
4535        assert_eq!(events.len(), 1);
4536        let event = events[0].as_object().expect("event payload object");
4537        assert!(event
4538            .get("event_id")
4539            .and_then(crate::json::Value::as_str)
4540            .is_some_and(|value| !value.is_empty()));
4541        assert_eq!(
4542            event.get("op").and_then(crate::json::Value::as_str),
4543            Some("insert")
4544        );
4545        assert_eq!(
4546            event.get("collection").and_then(crate::json::Value::as_str),
4547            Some("users")
4548        );
4549        assert_eq!(
4550            event.get("id").and_then(crate::json::Value::as_u64),
4551            Some(7)
4552        );
4553        assert!(event
4554            .get("ts")
4555            .and_then(crate::json::Value::as_u64)
4556            .is_some());
4557        assert!(event
4558            .get("lsn")
4559            .and_then(crate::json::Value::as_u64)
4560            .is_some());
4561        assert!(matches!(
4562            event.get("tenant"),
4563            Some(crate::json::Value::Null)
4564        ));
4565        assert!(matches!(
4566            event.get("before"),
4567            Some(crate::json::Value::Null)
4568        ));
4569        let after = event
4570            .get("after")
4571            .and_then(crate::json::Value::as_object)
4572            .expect("after object");
4573        assert_eq!(
4574            after.get("id").and_then(crate::json::Value::as_u64),
4575            Some(7)
4576        );
4577        assert_eq!(
4578            after.get("email").and_then(crate::json::Value::as_str),
4579            Some("a@example.com")
4580        );
4581    }
4582
4583    #[test]
4584    fn multi_row_insert_emits_one_insert_event_per_row_in_order() {
4585        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4586        rt.execute_query("CREATE TABLE users (id INT, email TEXT) WITH EVENTS")
4587            .unwrap();
4588
4589        rt.execute_query(
4590            "INSERT INTO users (id, email) VALUES (1, 'a@example.com'), (2, 'b@example.com')",
4591        )
4592        .unwrap();
4593
4594        let events = queue_payloads(&rt, "users_events");
4595        assert_eq!(events.len(), 2);
4596        let mut previous_lsn = 0;
4597        for (event, expected_id) in events.iter().zip([1_u64, 2]) {
4598            let object = event.as_object().expect("event payload object");
4599            assert_eq!(
4600                object.get("op").and_then(crate::json::Value::as_str),
4601                Some("insert")
4602            );
4603            assert_eq!(
4604                object.get("id").and_then(crate::json::Value::as_u64),
4605                Some(expected_id)
4606            );
4607            let lsn = object
4608                .get("lsn")
4609                .and_then(crate::json::Value::as_u64)
4610                .expect("event lsn");
4611            assert!(
4612                lsn > previous_lsn,
4613                "event LSNs should increase in row order"
4614            );
4615            previous_lsn = lsn;
4616            let after = object
4617                .get("after")
4618                .and_then(crate::json::Value::as_object)
4619                .expect("after object");
4620            assert_eq!(
4621                after.get("id").and_then(crate::json::Value::as_u64),
4622                Some(expected_id)
4623            );
4624        }
4625    }
4626
4627    fn queue_payloads(rt: &RedDBRuntime, queue: &str) -> Vec<crate::json::Value> {
4628        let result = rt
4629            .execute_query(&format!("QUEUE PEEK {queue} 10"))
4630            .expect("peek queue");
4631        result
4632            .result
4633            .records
4634            .iter()
4635            .map(
4636                |record| match record.get("payload").expect("payload column") {
4637                    Value::Json(bytes) => crate::json::from_slice(bytes).expect("json payload"),
4638                    other => panic!("expected JSON queue payload, got {other:?}"),
4639                },
4640            )
4641            .collect()
4642    }
4643
4644    // ── #112: auto-index user `id` on first insert ─────────────────────
4645
4646    /// First insert into a fresh collection that carries a column named
4647    /// `id` registers an implicit HASH index on `id`. Subsequent inserts
4648    /// populate it transparently, and `WHERE id = N` lookups exercise
4649    /// the hash-index fast path in `DmlTargetScan::find_target_ids`.
4650    ///
4651    /// This is the load-bearing acceptance test for #112 — without the
4652    /// hook, `find_index_for_column` returns `None` and DELETE/UPDATE
4653    /// fall through to a full segment scan (the 4× perf gap documented
4654    /// in `docs/perf/delete-sequential-2026-05-06.md`).
4655    #[test]
4656    fn auto_index_id_fires_on_first_insert() {
4657        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4658        rt.execute_query("CREATE TABLE bench_users (id INT, score INT)")
4659            .unwrap();
4660
4661        // Pre-condition: no index on `id` yet.
4662        assert!(
4663            rt.index_store_ref()
4664                .find_index_for_column("bench_users", "id")
4665                .is_none(),
4666            "freshly created collection should not have an `id` index"
4667        );
4668
4669        // Single-row INSERT — drives `MutationEngine::append_one`.
4670        rt.execute_query("INSERT INTO bench_users (id, score) VALUES (1, 10)")
4671            .unwrap();
4672
4673        // Post-condition: hash index registered on `id`.
4674        let registered = rt
4675            .index_store_ref()
4676            .find_index_for_column("bench_users", "id")
4677            .expect("auto-index hook should have registered idx_id on first insert");
4678        assert_eq!(registered.name, "idx_id");
4679        assert_eq!(registered.collection, "bench_users");
4680        assert_eq!(registered.columns, vec!["id".to_string()]);
4681        assert!(matches!(
4682            registered.method,
4683            super::super::index_store::IndexMethodKind::Hash
4684        ));
4685
4686        // Subsequent inserts populate the index; `WHERE id = N` should
4687        // resolve via the hash fast path and round-trip every row.
4688        for id in 2..=5 {
4689            rt.execute_query(&format!(
4690                "INSERT INTO bench_users (id, score) VALUES ({id}, {})",
4691                id * 10
4692            ))
4693            .unwrap();
4694        }
4695        for id in 1..=5 {
4696            let result = rt
4697                .execute_query(&format!("SELECT score FROM bench_users WHERE id = {id}"))
4698                .unwrap();
4699            assert_eq!(
4700                result.result.records.len(),
4701                1,
4702                "id={id} should match one row"
4703            );
4704        }
4705
4706        // Delete via the hash fast-path — exactly the bench scenario the
4707        // perf doc identified as the 4× regression. With the index
4708        // present, `find_target_ids` short-circuits before
4709        // `for_each_entity_zoned` runs.
4710        let deleted = rt
4711            .execute_query("DELETE FROM bench_users WHERE id = 3")
4712            .unwrap();
4713        assert_eq!(deleted.affected_rows, 1);
4714    }
4715
4716    /// Bulk INSERT (the multi-row VALUES path) drives
4717    /// `MutationEngine::append_batch`. The hook must fire there too —
4718    /// otherwise the batch entry points (gRPC binary bulk, HTTP bulk,
4719    /// wire bulk INSERT) skip auto-indexing entirely.
4720    #[test]
4721    fn auto_index_id_fires_on_first_bulk_insert() {
4722        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4723        rt.execute_query("CREATE TABLE bench_bulk (id INT, score INT)")
4724            .unwrap();
4725
4726        rt.execute_query("INSERT INTO bench_bulk (id, score) VALUES (1, 10), (2, 20), (3, 30)")
4727            .unwrap();
4728
4729        let registered = rt
4730            .index_store_ref()
4731            .find_index_for_column("bench_bulk", "id")
4732            .expect("auto-index hook should fire on first bulk insert");
4733        assert_eq!(registered.name, "idx_id");
4734
4735        // Every row populated via `index_entity_insert_batch`.
4736        for id in 1..=3 {
4737            let result = rt
4738                .execute_query(&format!("SELECT score FROM bench_bulk WHERE id = {id}"))
4739                .unwrap();
4740            assert_eq!(result.result.records.len(), 1);
4741        }
4742    }
4743
4744    /// Hook is a no-op when the row carries no `id` column. Conservative
4745    /// match (case-sensitive `id`) — `Id`, `ID`, and `red_entity_id`
4746    /// don't trigger it.
4747    #[test]
4748    fn auto_index_id_skips_when_no_id_column() {
4749        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4750        rt.execute_query("CREATE TABLE plain (uid INT, label TEXT)")
4751            .unwrap();
4752        rt.execute_query("INSERT INTO plain (uid, label) VALUES (1, 'a')")
4753            .unwrap();
4754
4755        assert!(rt
4756            .index_store_ref()
4757            .find_index_for_column("plain", "id")
4758            .is_none());
4759        assert!(rt
4760            .index_store_ref()
4761            .find_index_for_column("plain", "uid")
4762            .is_none());
4763    }
4764
4765    /// Hook only fires once per collection. If an explicit
4766    /// `CREATE INDEX ... USING BTREE` already covers `id`, the hook
4767    /// detects it via `find_index_for_column` and does NOT clobber it
4768    /// with a HASH index on the next insert.
4769    #[test]
4770    fn auto_index_id_skips_when_index_already_exists() {
4771        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4772        rt.execute_query("CREATE TABLE pre (id INT, score INT)")
4773            .unwrap();
4774        // User-declared BTREE index on `id` before any insert.
4775        rt.execute_query("CREATE INDEX user_idx ON pre (id) USING BTREE")
4776            .unwrap();
4777        rt.execute_query("INSERT INTO pre (id, score) VALUES (1, 10)")
4778            .unwrap();
4779
4780        let registered = rt
4781            .index_store_ref()
4782            .find_index_for_column("pre", "id")
4783            .expect("user index should still be there");
4784        assert_eq!(
4785            registered.name, "user_idx",
4786            "auto-index hook must not overwrite an existing index"
4787        );
4788    }
4789
4790    /// Implicit `idx_id` is reaped when the collection drops. The
4791    /// existing `execute_drop_table` walks `list_indices` and drops every
4792    /// entry — confirm the auto-created index participates.
4793    #[test]
4794    fn auto_index_id_dropped_with_collection() {
4795        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4796        rt.execute_query("CREATE TABLE ephemeral (id INT, score INT)")
4797            .unwrap();
4798        rt.execute_query("INSERT INTO ephemeral (id, score) VALUES (1, 10)")
4799            .unwrap();
4800        assert!(rt
4801            .index_store_ref()
4802            .find_index_for_column("ephemeral", "id")
4803            .is_some());
4804
4805        rt.execute_query("DROP TABLE ephemeral").unwrap();
4806
4807        assert!(
4808            rt.index_store_ref()
4809                .find_index_for_column("ephemeral", "id")
4810                .is_none(),
4811            "implicit `idx_id` must be reaped when its collection drops"
4812        );
4813    }
4814
4815    /// Opt-out via `RedDBOptions::with_auto_index_id(false)` (which
4816    /// forwards to `UnifiedStoreConfig::auto_index_id`). With the knob
4817    /// off, first insert leaves the collection without an `id` index —
4818    /// DELETE/UPDATE fall back to the scan path.
4819    #[test]
4820    fn auto_index_id_disabled_by_config() {
4821        let opts = RedDBOptions::in_memory().with_auto_index_id(false);
4822        let rt = RedDBRuntime::with_options(opts).unwrap();
4823
4824        rt.execute_query("CREATE TABLE off (id INT, score INT)")
4825            .unwrap();
4826        rt.execute_query("INSERT INTO off (id, score) VALUES (1, 10)")
4827            .unwrap();
4828
4829        assert!(
4830            rt.index_store_ref()
4831                .find_index_for_column("off", "id")
4832                .is_none(),
4833            "with auto_index_id=false, no implicit index should be created"
4834        );
4835    }
4836
4837    // ── #293: UPDATE / DELETE events ─────────────────────────────────────
4838
4839    #[test]
4840    fn update_single_row_emits_update_event() {
4841        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4842        rt.execute_query(
4843            "CREATE TABLE users (id INT, name TEXT) WITH EVENTS (UPDATE) TO audit_log",
4844        )
4845        .unwrap();
4846        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice')")
4847            .unwrap();
4848
4849        rt.execute_query("UPDATE users SET name = 'Bob' WHERE id = 1")
4850            .unwrap();
4851
4852        let events = queue_payloads(&rt, "audit_log");
4853        assert_eq!(events.len(), 1, "expected exactly 1 update event");
4854        let event = events[0].as_object().expect("event payload object");
4855        assert_eq!(
4856            event.get("op").and_then(crate::json::Value::as_str),
4857            Some("update")
4858        );
4859        assert_eq!(
4860            event.get("collection").and_then(crate::json::Value::as_str),
4861            Some("users")
4862        );
4863        assert!(event
4864            .get("event_id")
4865            .and_then(crate::json::Value::as_str)
4866            .is_some_and(|v| !v.is_empty()));
4867        let before = event
4868            .get("before")
4869            .and_then(crate::json::Value::as_object)
4870            .expect("before must be an object");
4871        let after = event
4872            .get("after")
4873            .and_then(crate::json::Value::as_object)
4874            .expect("after must be an object");
4875        assert_eq!(
4876            before.get("name").and_then(crate::json::Value::as_str),
4877            Some("Alice"),
4878            "before.name should be the old value"
4879        );
4880        assert_eq!(
4881            after.get("name").and_then(crate::json::Value::as_str),
4882            Some("Bob"),
4883            "after.name should be the new value"
4884        );
4885    }
4886
4887    #[test]
4888    fn update_event_only_includes_changed_fields() {
4889        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4890        rt.execute_query(
4891            "CREATE TABLE users (id INT, name TEXT, email TEXT) WITH EVENTS (UPDATE) TO evts",
4892        )
4893        .unwrap();
4894        rt.execute_query("INSERT INTO users (id, name, email) VALUES (1, 'Alice', 'a@x.com')")
4895            .unwrap();
4896
4897        rt.execute_query("UPDATE users SET name = 'Bob' WHERE id = 1")
4898            .unwrap();
4899
4900        let events = queue_payloads(&rt, "evts");
4901        assert_eq!(events.len(), 1);
4902        let event = events[0].as_object().unwrap();
4903        let before = event
4904            .get("before")
4905            .and_then(crate::json::Value::as_object)
4906            .unwrap();
4907        let after = event
4908            .get("after")
4909            .and_then(crate::json::Value::as_object)
4910            .unwrap();
4911        // Only changed field included.
4912        assert!(
4913            before.contains_key("name"),
4914            "before must include changed field"
4915        );
4916        assert!(
4917            after.contains_key("name"),
4918            "after must include changed field"
4919        );
4920        // Unchanged fields must not appear.
4921        assert!(
4922            !before.contains_key("email"),
4923            "before must not include unchanged email"
4924        );
4925        assert!(
4926            !after.contains_key("email"),
4927            "after must not include unchanged email"
4928        );
4929    }
4930
4931    #[test]
4932    fn multi_row_update_emits_one_event_per_row() {
4933        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4934        rt.execute_query("CREATE TABLE items (id INT, status TEXT) WITH EVENTS (UPDATE) TO evts")
4935            .unwrap();
4936        rt.execute_query(
4937            "INSERT INTO items (id, status) VALUES (1, 'new'), (2, 'new'), (3, 'new')",
4938        )
4939        .unwrap();
4940
4941        rt.execute_query("UPDATE items SET status = 'done'")
4942            .unwrap();
4943
4944        let events = queue_payloads(&rt, "evts");
4945        assert_eq!(events.len(), 3, "expected one update event per row");
4946        for event in &events {
4947            let obj = event.as_object().unwrap();
4948            assert_eq!(
4949                obj.get("op").and_then(crate::json::Value::as_str),
4950                Some("update")
4951            );
4952        }
4953    }
4954
4955    #[test]
4956    fn delete_single_row_emits_delete_event() {
4957        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4958        rt.execute_query("CREATE TABLE users (id INT, name TEXT) WITH EVENTS (DELETE) TO del_log")
4959            .unwrap();
4960        rt.execute_query("INSERT INTO users (id, name) VALUES (42, 'Alice')")
4961            .unwrap();
4962
4963        rt.execute_query("DELETE FROM users WHERE id = 42").unwrap();
4964
4965        let events = queue_payloads(&rt, "del_log");
4966        assert_eq!(events.len(), 1);
4967        let event = events[0].as_object().expect("event payload object");
4968        assert_eq!(
4969            event.get("op").and_then(crate::json::Value::as_str),
4970            Some("delete")
4971        );
4972        assert_eq!(
4973            event.get("collection").and_then(crate::json::Value::as_str),
4974            Some("users")
4975        );
4976        assert!(event
4977            .get("event_id")
4978            .and_then(crate::json::Value::as_str)
4979            .is_some_and(|v| !v.is_empty()));
4980        let before = event
4981            .get("before")
4982            .and_then(crate::json::Value::as_object)
4983            .expect("before must be an object for delete");
4984        assert_eq!(
4985            before.get("id").and_then(crate::json::Value::as_u64),
4986            Some(42)
4987        );
4988        assert_eq!(
4989            before.get("name").and_then(crate::json::Value::as_str),
4990            Some("Alice")
4991        );
4992        assert!(matches!(event.get("after"), Some(crate::json::Value::Null)));
4993    }
4994
4995    #[test]
4996    fn multi_row_delete_emits_one_event_per_row() {
4997        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4998        rt.execute_query("CREATE TABLE items (id INT, val INT) WITH EVENTS (DELETE) TO del_log")
4999            .unwrap();
5000        rt.execute_query("INSERT INTO items (id, val) VALUES (1, 10), (2, 20), (3, 30)")
5001            .unwrap();
5002
5003        rt.execute_query("DELETE FROM items").unwrap();
5004
5005        let events = queue_payloads(&rt, "del_log");
5006        assert_eq!(events.len(), 3, "expected one delete event per deleted row");
5007        for event in &events {
5008            let obj = event.as_object().unwrap();
5009            assert_eq!(
5010                obj.get("op").and_then(crate::json::Value::as_str),
5011                Some("delete")
5012            );
5013            assert!(matches!(obj.get("after"), Some(crate::json::Value::Null)));
5014        }
5015    }
5016
5017    #[test]
5018    fn ops_filter_update_does_not_emit_on_insert_or_delete() {
5019        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
5020        rt.execute_query("CREATE TABLE users (id INT, name TEXT) WITH EVENTS (UPDATE) TO evts")
5021            .unwrap();
5022
5023        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice')")
5024            .unwrap();
5025        rt.execute_query("DELETE FROM users WHERE id = 1").unwrap();
5026
5027        let events = queue_payloads(&rt, "evts");
5028        assert!(
5029            events.is_empty(),
5030            "UPDATE-only filter must not emit INSERT or DELETE events"
5031        );
5032    }
5033
5034    // ── SUPPRESS EVENTS ────────────────────────────────────────────────────
5035
5036    #[test]
5037    fn suppress_events_on_insert_emits_no_events() {
5038        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
5039        rt.execute_query("CREATE TABLE users (id INT, name TEXT) WITH EVENTS TO evts")
5040            .unwrap();
5041
5042        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice') SUPPRESS EVENTS")
5043            .unwrap();
5044
5045        let events = queue_payloads(&rt, "evts");
5046        assert!(
5047            events.is_empty(),
5048            "SUPPRESS EVENTS must prevent INSERT events"
5049        );
5050    }
5051
5052    #[test]
5053    fn suppress_events_on_update_emits_no_events() {
5054        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
5055        rt.execute_query("CREATE TABLE users (id INT, name TEXT) WITH EVENTS TO evts")
5056            .unwrap();
5057        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice')")
5058            .unwrap();
5059        // drain the INSERT event
5060        let _ = queue_payloads(&rt, "evts");
5061        // Force pop to drain; simpler: just check new count after UPDATE
5062        rt.execute_query("QUEUE PURGE evts").unwrap();
5063
5064        rt.execute_query("UPDATE users SET name = 'Bob' WHERE id = 1 SUPPRESS EVENTS")
5065            .unwrap();
5066
5067        let events = queue_payloads(&rt, "evts");
5068        assert!(
5069            events.is_empty(),
5070            "SUPPRESS EVENTS must prevent UPDATE events"
5071        );
5072    }
5073
5074    #[test]
5075    fn suppress_events_on_delete_emits_no_events() {
5076        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
5077        rt.execute_query(
5078            "CREATE TABLE users (id INT, name TEXT) WITH EVENTS (INSERT, DELETE) TO evts",
5079        )
5080        .unwrap();
5081        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice') SUPPRESS EVENTS")
5082            .unwrap();
5083
5084        rt.execute_query("DELETE FROM users WHERE id = 1 SUPPRESS EVENTS")
5085            .unwrap();
5086
5087        let events = queue_payloads(&rt, "evts");
5088        assert!(
5089            events.is_empty(),
5090            "SUPPRESS EVENTS must prevent DELETE events"
5091        );
5092    }
5093
5094    #[test]
5095    fn normal_insert_after_suppress_still_emits() {
5096        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
5097        rt.execute_query("CREATE TABLE users (id INT, name TEXT) WITH EVENTS TO evts")
5098            .unwrap();
5099
5100        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice') SUPPRESS EVENTS")
5101            .unwrap();
5102        rt.execute_query("INSERT INTO users (id, name) VALUES (2, 'Bob')")
5103            .unwrap();
5104
5105        let events = queue_payloads(&rt, "evts");
5106        assert_eq!(
5107            events.len(),
5108            1,
5109            "only the non-suppressed INSERT should emit"
5110        );
5111        assert_eq!(
5112            events[0].get("id").and_then(crate::json::Value::as_u64),
5113            Some(2)
5114        );
5115    }
5116}