Skip to main content

reddb_server/runtime/
impl_dml.rs

1//! DML execution: INSERT, UPDATE, DELETE via SQL AST
2//!
3//! Implements `execute_insert`, `execute_update`, and `execute_delete` on
4//! `RedDBRuntime`.  Each method translates the parsed AST into entity-level
5//! operations through the existing `RuntimeEntityPort` trait so that all
6//! cross-cutting concerns (WAL, indexing, replication) are automatically
7//! applied.
8
9use crate::application::entity::{
10    metadata_from_json, AppliedEntityMutation, CreateDocumentInput, CreateEdgeInput,
11    CreateEntityOutput, CreateKvInput, CreateNodeInput, CreateRowInput, CreateRowsBatchInput,
12    CreateVectorInput, DeleteEntityInput, PatchEntityOperation, PatchEntityOperationType,
13    RowUpdateColumnRule, RowUpdateContractPlan,
14};
15use crate::application::ports::{
16    build_row_update_contract_plan, entity_row_fields_snapshot,
17    normalize_row_update_assignment_with_plan, normalize_row_update_value_for_rule,
18    RuntimeEntityPort,
19};
20use crate::application::ttl_payload::has_internal_ttl_metadata;
21use crate::presentation::entity_json::storage_value_to_json;
22use crate::storage::query::ast::{BinOp, Expr, FieldRef, ReturningItem, UpdateTarget};
23use crate::storage::query::sql_lowering::{
24    effective_delete_filter, effective_insert_rows, effective_update_filter, fold_expr_to_value,
25};
26use crate::storage::query::unified::{
27    sys_key_collection, sys_key_created_at, sys_key_kind, sys_key_red_entity_id, sys_key_rid,
28    sys_key_tenant, sys_key_updated_at, UnifiedRecord, UnifiedResult,
29};
30use crate::storage::unified::MetadataValue;
31use crate::storage::Metadata;
32use std::collections::HashMap;
33use std::sync::Arc;
34
35use super::*;
36
37const UPDATE_APPLY_CHUNK_SIZE: usize = 2048;
38const TREE_CHILD_EDGE_LABEL: &str = "TREE_CHILD";
39const TREE_METADATA_PREFIX: &str = "red.tree.";
40
41#[derive(Clone)]
42struct CompiledUpdateAssignment {
43    column: String,
44    expr: Expr,
45    compound_op: Option<BinOp>,
46    metadata_key: Option<&'static str>,
47    row_rule: Option<RowUpdateColumnRule>,
48}
49
50struct CompiledUpdatePlan {
51    static_field_assignments: Vec<(String, Value)>,
52    static_metadata_assignments: Vec<(String, MetadataValue)>,
53    dynamic_assignments: Vec<CompiledUpdateAssignment>,
54    row_contract_plan: Option<RowUpdateContractPlan>,
55    row_modified_columns: Vec<String>,
56    row_touches_unique_columns: bool,
57}
58
59#[derive(Default)]
60struct MaterializedUpdateAssignments {
61    dynamic_field_assignments: Vec<(String, Value)>,
62    dynamic_metadata_assignments: Vec<(String, MetadataValue)>,
63}
64
65impl RedDBRuntime {
66    /// Issue #524 — public read of the in-memory chain tip. Returns `None`
67    /// when the collection is not a chain or has no rows (pre-genesis). On a
68    /// cold cache the first call falls back to a one-time scan so the HTTP
69    /// `GET /collections/:name/chain-tip` handler stays consistent with the
70    /// INSERT path after a restart.
71    pub fn chain_tip_for_collection(
72        &self,
73        collection: &str,
74    ) -> Option<crate::runtime::blockchain_kind::ChainTipFull> {
75        let store = self.inner.db.store();
76        if !crate::runtime::blockchain_kind::is_chain(&store, collection) {
77            return None;
78        }
79        let mut cache = self.inner.chain_tip_cache.lock();
80        if let Some(existing) = cache.get(collection) {
81            return Some(existing.clone());
82        }
83        let scanned = crate::runtime::blockchain_kind::chain_tip_full(&store, collection)?;
84        cache.insert(collection.to_string(), scanned.clone());
85        Some(scanned)
86    }
87
88    /// Issue #525 — walks the chain end-to-end, recomputes each block's hash
89    /// against the stored fields, and returns the verification outcome.  On
90    /// `ok == false` the integrity flag is persisted and the in-memory cache
91    /// is updated so subsequent INSERTs surface `ChainIntegrityBroken`.
92    ///
93    /// Returns `None` when the collection is absent or not a `KIND blockchain`.
94    pub fn verify_chain_for_collection(
95        &self,
96        collection: &str,
97    ) -> Option<crate::runtime::blockchain_kind::VerifyChainOutcome> {
98        let store = self.inner.db.store();
99        let outcome = crate::runtime::blockchain_kind::verify_chain_outcome(&store, collection)?;
100        if !outcome.ok {
101            crate::runtime::blockchain_kind::persist_integrity_flag(&store, collection, true);
102            self.inner
103                .chain_integrity_broken
104                .lock()
105                .insert(collection.to_string(), true);
106        }
107        Some(outcome)
108    }
109
110    /// Issue #525 — admin clears the `ChainIntegrityBroken` flag so the chain
111    /// accepts INSERTs again.  Returns `false` when the collection is not a
112    /// chain.
113    pub fn clear_chain_integrity_flag(&self, collection: &str) -> bool {
114        let store = self.inner.db.store();
115        if !crate::runtime::blockchain_kind::is_chain(&store, collection) {
116            return false;
117        }
118        crate::runtime::blockchain_kind::persist_integrity_flag(&store, collection, false);
119        self.inner
120            .chain_integrity_broken
121            .lock()
122            .insert(collection.to_string(), false);
123        true
124    }
125
126    /// Issue #525 — INSERT-time check.  Combines in-memory cache (fast path)
127    /// with a one-time scan of `red_config` on cold start so the flag survives
128    /// restart.
129    fn is_chain_integrity_broken(&self, collection: &str) -> bool {
130        {
131            let cache = self.inner.chain_integrity_broken.lock();
132            if let Some(v) = cache.get(collection) {
133                return *v;
134            }
135        }
136        let store = self.inner.db.store();
137        let persisted =
138            crate::runtime::blockchain_kind::is_integrity_broken_persisted(&store, collection)
139                .unwrap_or(false);
140        self.inner
141            .chain_integrity_broken
142            .lock()
143            .insert(collection.to_string(), persisted);
144        persisted
145    }
146
147    /// Issue #765 / S6 — lazily hydrate the integrity-tombstone cache from
148    /// `red_config` on first access. Returns `true` when at least one
149    /// tombstone range is present. Subsequent calls observe the cached state
150    /// flag (`1` empty / `2` present) and skip the store scan.
151    fn ensure_integrity_tombstones_loaded(&self) -> bool {
152        use std::sync::atomic::Ordering;
153        match self
154            .inner
155            .integrity_tombstones_state
156            .load(Ordering::Relaxed)
157        {
158            1 => return false,
159            2 => return true,
160            _ => {}
161        }
162        // Cold: load under the cache lock so a concurrent reader cannot
163        // observe a half-populated vector.
164        let mut guard = self.inner.integrity_tombstones.lock();
165        if self
166            .inner
167            .integrity_tombstones_state
168            .load(Ordering::Relaxed)
169            == 0
170        {
171            let ranges = crate::runtime::integrity_tombstone::load_ranges(&self.inner.db.store());
172            let present = !ranges.is_empty();
173            *guard = ranges;
174            self.inner
175                .integrity_tombstones_state
176                .store(if present { 2 } else { 1 }, Ordering::Relaxed);
177        }
178        self.inner
179            .integrity_tombstones_state
180            .load(Ordering::Relaxed)
181            == 2
182    }
183
184    /// Issue #765 / S6 — durably record an integrity tombstone over the
185    /// inclusive RID range `[lo, hi]` of `table` (the committed rows of an
186    /// input stream whose end-to-end SHA-256 digest did not match). The range
187    /// is persisted to `red_config` (survives restart) and folded into the
188    /// in-memory cache so the same process filters it immediately.
189    pub fn record_integrity_tombstone(&self, table: &str, lo: u64, hi: u64) {
190        use std::sync::atomic::Ordering;
191        self.ensure_integrity_tombstones_loaded();
192        let mut guard = self.inner.integrity_tombstones.lock();
193        guard.push(crate::runtime::integrity_tombstone::TombstoneRange::new(
194            table.to_string(),
195            lo,
196            hi,
197        ));
198        crate::runtime::integrity_tombstone::persist_ranges(&self.inner.db.store(), &guard);
199        self.inner
200            .integrity_tombstones_state
201            .store(2, Ordering::Relaxed);
202    }
203
204    /// Issue #765 / S6 — snapshot of the currently-cached tombstone ranges.
205    /// Intended for tests and forensic surfaces; the read path uses
206    /// [`Self::filter_integrity_tombstoned`] which avoids the clone.
207    pub fn integrity_tombstone_ranges(
208        &self,
209    ) -> Vec<crate::runtime::integrity_tombstone::TombstoneRange> {
210        self.ensure_integrity_tombstones_loaded();
211        self.inner.integrity_tombstones.lock().clone()
212    }
213
214    /// Issue #765 / S6 — drop tombstoned rows from a SELECT result in place.
215    /// Fast no-op (one relaxed atomic load) when no tombstone has ever been
216    /// recorded. Clears `pre_serialized_json` when any row is removed so the
217    /// fast-path JSON cannot leak a filtered row back onto the wire.
218    pub fn filter_integrity_tombstoned(&self, result: &mut UnifiedResult) {
219        if !self.ensure_integrity_tombstones_loaded() {
220            return;
221        }
222        let guard = self.inner.integrity_tombstones.lock();
223        if guard.is_empty() {
224            return;
225        }
226        let before = result.records.len();
227        result.records.retain(|record| {
228            !crate::runtime::integrity_tombstone::record_tombstoned(&guard, record)
229        });
230        if result.records.len() != before {
231            result.pre_serialized_json = None;
232        }
233    }
234
235    /// Phase 2.5.4: inject `CURRENT_TENANT()` into an INSERT when the
236    /// target table is tenant-scoped and the user's column list does
237    /// not already name the tenant column.
238    ///
239    /// Returns:
240    /// * `Ok(None)` — no injection needed (non-tenant table, or user
241    ///   supplied the column explicitly). Caller uses the original
242    ///   query unchanged.
243    /// * `Ok(Some(augmented))` — a cloned query with the tenant column
244    ///   + literal value appended to every row.
245    /// * `Err(..)` — table is tenant-scoped but no tenant is bound to
246    ///   the current session. Fails loudly so callers don't produce
247    ///   rows that RLS would then hide on read.
248    fn maybe_inject_tenant_column(&self, query: &InsertQuery) -> RedDBResult<Option<InsertQuery>> {
249        let Some(tenant_col) = self.tenant_column(&query.table) else {
250            return Ok(None);
251        };
252        // User already named the column (literal match) — trust them.
253        if query
254            .columns
255            .iter()
256            .any(|c| c.eq_ignore_ascii_case(&tenant_col))
257        {
258            return Ok(None);
259        }
260
261        // Phase 2 PG parity: dotted-path tenancy. When `tenant_col` is a
262        // nested key like `headers.tenant` we operate on the root
263        // column (`headers`) and set / add the nested path inside its
264        // JSON value. If the user named the root column we mutate in
265        // place; otherwise we create a fresh JSON column for every row.
266        if let Some(dot_pos) = tenant_col.find('.') {
267            let (root, tail) = tenant_col.split_at(dot_pos);
268            let tail = &tail[1..]; // drop leading '.'
269            return self.inject_dotted_tenant(query, root, tail);
270        }
271
272        let Some(tenant_id) = crate::runtime::impl_core::current_tenant() else {
273            return Err(RedDBError::Query(format!(
274                "INSERT into tenant-scoped table '{}' requires an active tenant — \
275                 run SET TENANT '<id>' first or name column '{}' explicitly",
276                query.table, tenant_col
277            )));
278        };
279
280        let mut augmented = query.clone();
281        augmented.columns.push(tenant_col);
282        let lit = Value::text(tenant_id.clone());
283        for row in augmented.values.iter_mut() {
284            row.push(lit.clone());
285        }
286        for row in augmented.value_exprs.iter_mut() {
287            row.push(crate::storage::query::ast::Expr::Literal {
288                value: lit.clone(),
289                span: crate::storage::query::ast::Span::synthetic(),
290            });
291        }
292        Ok(Some(augmented))
293    }
294
295    /// Dotted-path auto-fill — set `root.tail` to `CURRENT_TENANT()` on
296    /// every row. Mirrors `maybe_inject_tenant_column` but mutates
297    /// nested JSON instead of appending a flat column.
298    ///
299    /// Cases:
300    /// * Root column already in the INSERT list → mutate per-row JSON
301    ///   (parse, set path, re-serialize).
302    /// * Root column absent → create a fresh `{tail: tenant}` JSON
303    ///   object and append the root column to the INSERT.
304    fn inject_dotted_tenant(
305        &self,
306        query: &InsertQuery,
307        root: &str,
308        tail: &str,
309    ) -> RedDBResult<Option<InsertQuery>> {
310        let active_tenant = crate::runtime::impl_core::current_tenant();
311        let mut augmented = query.clone();
312        let root_idx = augmented
313            .columns
314            .iter()
315            .position(|c| c.eq_ignore_ascii_case(root));
316
317        if let Some(idx) = root_idx {
318            // User supplied the root column. Per-row: if the dotted
319            // tail is already present we trust the user (admin / bulk
320            // loader scenario); otherwise fill from the active
321            // tenant. An unbound tenant is only an error when some
322            // row actually needs filling.
323            for row in augmented.values.iter_mut() {
324                let Some(slot) = row.get_mut(idx) else {
325                    continue;
326                };
327                if dotted_tail_already_set(slot, tail) {
328                    continue;
329                }
330                let Some(tenant_id) = &active_tenant else {
331                    return Err(RedDBError::Query(format!(
332                        "INSERT into tenant-scoped table '{}' requires an active tenant — \
333                         run SET TENANT '<id>' first or set '{}.{}' explicitly in each row",
334                        query.table, root, tail
335                    )));
336                };
337                *slot = merge_dotted_tenant(slot.clone(), tail, tenant_id)?;
338            }
339            // Expression row is kept in sync by re-wrapping the
340            // mutated literal; the canonical path will re-evaluate
341            // against the same JSON shape.
342            for (row_idx, row) in augmented.value_exprs.iter_mut().enumerate() {
343                if let Some(slot) = row.get_mut(idx) {
344                    let new_value = augmented
345                        .values
346                        .get(row_idx)
347                        .and_then(|v| v.get(idx))
348                        .cloned()
349                        .unwrap_or(Value::Null);
350                    *slot = crate::storage::query::ast::Expr::Literal {
351                        value: new_value,
352                        span: crate::storage::query::ast::Span::synthetic(),
353                    };
354                }
355            }
356        } else {
357            // No root column in the INSERT list — auto-fill needs a
358            // bound tenant to synthesise one. Error loud so we never
359            // create a tenant-less row that RLS would then hide.
360            let Some(tenant_id) = &active_tenant else {
361                return Err(RedDBError::Query(format!(
362                    "INSERT into tenant-scoped table '{}' requires an active tenant — \
363                     run SET TENANT '<id>' first or name path '{}.{}' explicitly",
364                    query.table, root, tail
365                )));
366            };
367            // Create a fresh JSON column with only the tenant path set.
368            augmented.columns.push(root.to_string());
369            let fresh = merge_dotted_tenant(Value::Null, tail, tenant_id)?;
370            for row in augmented.values.iter_mut() {
371                row.push(fresh.clone());
372            }
373            for row in augmented.value_exprs.iter_mut() {
374                row.push(crate::storage::query::ast::Expr::Literal {
375                    value: fresh.clone(),
376                    span: crate::storage::query::ast::Span::synthetic(),
377                });
378            }
379        }
380
381        Ok(Some(augmented))
382    }
383
384    /// Returns `(affected_count, lsns)`. For the txn (xmax-stamp) path,
385    /// `lsns` is empty because events fire at commit time.
386    fn delete_entities_batch(
387        &self,
388        collection: &str,
389        ids: &[EntityId],
390    ) -> RedDBResult<(u64, Vec<u64>)> {
391        if ids.is_empty() {
392            return Ok((0, vec![]));
393        }
394
395        let store = self.db().store();
396        let Some(manager) = store.get_collection(collection) else {
397            return Ok((0, vec![]));
398        };
399
400        let active_xid = self.current_xid();
401        let conn_id = crate::runtime::impl_core::current_connection_id();
402        let mut autocommit_xid = None;
403        let mut tombstoned_ids = Vec::new();
404        let mut tombstoned_entities = Vec::new();
405        let mut physical_delete_ids = Vec::new();
406        let table_row_resolver =
407            crate::runtime::table_row_mvcc_resolver::TableRowMvccReadResolver::current_statement();
408
409        for &id in ids {
410            let Some(mut entity) = manager.get(id) else {
411                continue;
412            };
413            if matches!(entity.data, EntityData::Row(_)) {
414                let previous_xmax = entity.xmax;
415                if matches!(entity.kind, crate::storage::EntityKind::TableRow { .. }) {
416                    if table_row_resolver.resolve_candidate(&entity).is_none() {
417                        continue;
418                    }
419                } else if entity.xmax != 0 {
420                    continue;
421                }
422
423                let xid = match active_xid {
424                    Some(xid) => xid,
425                    None => match autocommit_xid {
426                        Some(xid) => xid,
427                        None => {
428                            let mgr = self.snapshot_manager();
429                            let xid = mgr.begin();
430                            autocommit_xid = Some(xid);
431                            xid
432                        }
433                    },
434                };
435                entity.set_xmax(xid);
436                if manager.update(entity.clone()).is_ok() {
437                    if active_xid.is_some() {
438                        self.record_pending_tombstone(conn_id, collection, id, xid, previous_xmax);
439                    }
440                    tombstoned_entities.push(entity);
441                    tombstoned_ids.push(id);
442                }
443            } else {
444                physical_delete_ids.push(id);
445            }
446        }
447
448        if let Some(xid) = autocommit_xid {
449            self.snapshot_manager().commit(xid);
450        }
451
452        let mut affected = tombstoned_ids.len() as u64;
453        let mut lsns = Vec::with_capacity(tombstoned_ids.len() + physical_delete_ids.len());
454        if active_xid.is_some() {
455            store
456                .persist_entities_to_pager(collection, &tombstoned_entities)
457                .map_err(|err| RedDBError::Internal(err.to_string()))?;
458        } else {
459            store
460                .persist_entities_to_pager(collection, &tombstoned_entities)
461                .map_err(|err| RedDBError::Internal(err.to_string()))?;
462            for id in &tombstoned_ids {
463                store.context_index().remove_entity(*id);
464                let lsn = self.cdc_emit(
465                    crate::replication::cdc::ChangeOperation::Delete,
466                    collection,
467                    id.raw(),
468                    "entity",
469                );
470                lsns.push(lsn);
471            }
472        }
473
474        let deleted_ids = store
475            .delete_batch(collection, &physical_delete_ids)
476            .map_err(|err| RedDBError::Internal(err.to_string()))?;
477        affected += deleted_ids.len() as u64;
478        for id in &deleted_ids {
479            store.context_index().remove_entity(*id);
480            let lsn = self.cdc_emit(
481                crate::replication::cdc::ChangeOperation::Delete,
482                collection,
483                id.raw(),
484                "entity",
485            );
486            lsns.push(lsn);
487        }
488
489        Ok((affected, lsns))
490    }
491
492    /// Flushes context-index updates and CDC for each applied mutation.
493    /// Returns one LSN per entity in the same order as `applied`.
494    fn flush_update_chunk(&self, applied: &[AppliedEntityMutation]) -> RedDBResult<Vec<u64>> {
495        if applied.is_empty() {
496            return Ok(Vec::new());
497        }
498
499        let store = self.db().store();
500        if applied.iter().any(|item| item.context_index_dirty) {
501            store.context_index().index_entities(
502                &applied[0].collection,
503                applied
504                    .iter()
505                    .filter(|item| item.context_index_dirty)
506                    .map(|item| &item.entity),
507            );
508        }
509
510        for item in applied {
511            self.refresh_update_secondary_indexes(item)?;
512        }
513
514        let mut lsns = Vec::with_capacity(applied.len());
515        for item in applied {
516            let lsn = self.cdc_emit_prebuilt(
517                crate::replication::cdc::ChangeOperation::Update,
518                &item.collection,
519                &item.entity,
520                update_cdc_item_kind(self, &item.collection, &item.entity),
521                item.metadata.as_ref(),
522                false,
523            );
524            lsns.push(lsn);
525        }
526        Ok(lsns)
527    }
528
529    fn persist_update_chunk(&self, applied: &[AppliedEntityMutation]) -> RedDBResult<()> {
530        self.persist_applied_entity_mutations(applied)
531    }
532
533    fn refresh_update_secondary_indexes(&self, applied: &AppliedEntityMutation) -> RedDBResult<()> {
534        if applied.pre_mutation_fields.is_empty() {
535            return Ok(());
536        }
537        let post = entity_row_fields_snapshot(&applied.entity);
538        if post.is_empty() {
539            return Ok(());
540        }
541
542        let indexed_cols = self
543            .index_store_ref()
544            .indexed_columns_set(&applied.collection);
545        if indexed_cols.is_empty() {
546            return Ok(());
547        }
548
549        if let Some(old_version) = applied.replaced_entity.as_ref() {
550            let old_index_fields: Vec<(String, crate::storage::schema::Value)> = applied
551                .pre_mutation_fields
552                .iter()
553                .filter(|(col, _)| indexed_cols.contains(col))
554                .cloned()
555                .collect();
556            let new_index_fields: Vec<(String, crate::storage::schema::Value)> = post
557                .iter()
558                .filter(|(col, _)| indexed_cols.contains(col))
559                .cloned()
560                .collect();
561            if !old_index_fields.is_empty() {
562                self.index_store_ref()
563                    .index_entity_delete(&applied.collection, old_version.id, &old_index_fields)
564                    .map_err(crate::RedDBError::Internal)?;
565            }
566            if !new_index_fields.is_empty() {
567                self.index_store_ref()
568                    .index_entity_insert(&applied.collection, applied.entity.id, &new_index_fields)
569                    .map_err(crate::RedDBError::Internal)?;
570            }
571            return Ok(());
572        }
573
574        let damage =
575            crate::application::entity::row_damage_vector(&applied.pre_mutation_fields, &post);
576        if damage
577            .touched_columns()
578            .into_iter()
579            .any(|col| indexed_cols.contains(col))
580        {
581            self.index_store_ref()
582                .index_entity_update(
583                    &applied.collection,
584                    applied.id,
585                    &applied.pre_mutation_fields,
586                    &post,
587                )
588                .map_err(crate::RedDBError::Internal)?;
589        }
590        Ok(())
591    }
592
593    /// Execute INSERT INTO table [entity_type] (cols) VALUES (vals), ...
594    ///
595    /// Each row in `query.values` is zipped with `query.columns` to produce a
596    /// set of named fields, which is then dispatched based on entity_type.
597    pub fn execute_insert(
598        &self,
599        raw_query: &str,
600        query: &InsertQuery,
601    ) -> RedDBResult<RuntimeQueryResult> {
602        self.check_write(crate::runtime::write_gate::WriteKind::Dml)?;
603        // CollectionContract gate (#49): single entry point for the
604        // operator's collection-level write rules. Today this is a
605        // no-op for INSERT (APPEND ONLY permits insert); routing
606        // through the gate now means future contract bits — versioned,
607        // vault-only writes — plug in once instead of per verb.
608        crate::runtime::collection_contract::CollectionContractGate::check(
609            self,
610            &query.table,
611            crate::runtime::collection_contract::MutationKind::Insert,
612        )?;
613        // Phase 2.5.4 table-scoped tenancy: if the target table is
614        // tenant-scoped and the user didn't name the tenant column,
615        // auto-inject it with the thread-local `CURRENT_TENANT()`
616        // value. When the column is named explicitly we trust the
617        // caller (useful for admin tooling that writes on behalf of
618        // specific tenants). An unbound tenant on an implicit-fill
619        // path errors up front rather than producing a row the RLS
620        // policy would silently hide.
621        let augmented_owned;
622        let query = match self.maybe_inject_tenant_column(query)? {
623            Some(new_q) => {
624                augmented_owned = new_q;
625                &augmented_owned
626            }
627            None => query,
628        };
629        self.check_insert_column_policy(query)?;
630        if let Some(ref embed_config) = query.auto_embed {
631            let provider = crate::ai::parse_provider(&embed_config.provider)?;
632            // S3 / #711: planner-level provider gate. Runs before the
633            // local-model preflight and the API-key resolver so neither
634            // side-effect fires when policy denies.
635            crate::runtime::ai::provider_gate::enforce(self, &provider)?;
636            if matches!(provider, crate::ai::AiProvider::Local) {
637                // Issue #682 — pre-flight the local model registry before
638                // any row write. Missing model, uninstalled artifacts,
639                // wrong task, and disabled-feature failures surface as
640                // deterministic errors that leave the target collection
641                // untouched, satisfying the "no partial writes on
642                // embedding failure" criterion for the failure modes
643                // owned by the local provider.
644                let model_name = embed_config.model.as_deref().map(str::trim).unwrap_or("");
645                if model_name.is_empty() {
646                    return Err(RedDBError::Query(
647                        "AUTO EMBED with provider=local requires MODEL '<registered-model-name>'; \
648                         the local provider does not have an implicit default model"
649                            .to_string(),
650                    ));
651                }
652                crate::runtime::ai::local_embedding::preflight_local_embedding(
653                    &self.inner.db,
654                    model_name,
655                )?;
656            }
657        }
658
659        let mut inserted_count: u64 = 0;
660        let effective_rows =
661            effective_insert_rows(query).map_err(|msg| RedDBError::Query(msg.to_string()))?;
662
663        // Ensure the collection exists (auto-create on first insert).
664        let store = self.inner.db.store();
665        let _ = store.get_or_create_collection(&query.table);
666        let declared_model = self
667            .db()
668            .collection_contract_arc(&query.table)
669            .map(|contract| contract.declared_model);
670
671        let mut returning_snapshots: Option<Vec<Vec<(String, Value)>>> =
672            if query.returning.is_some() {
673                Some(Vec::with_capacity(effective_rows.len()))
674            } else {
675                None
676            };
677        let mut returning_result: Option<UnifiedResult> = None;
678
679        if matches!(query.entity_type, InsertEntityType::Row)
680            && !matches!(
681                declared_model,
682                Some(crate::catalog::CollectionModel::TimeSeries)
683            )
684        {
685            // Issue #523 + #524: blockchain collections seal each row into the
686            // chain. When the caller omits the reserved columns, the engine
687            // auto-fills (#523). When the caller supplies any reserved column,
688            // the values are validated against the current tip and a mismatch
689            // surfaces a `BlockchainConflict:` error mapped to HTTP 409 (#524).
690            //
691            // The whole batch runs under a per-collection chain lock so two
692            // concurrent submitters can't both bind to the same prev_hash —
693            // the loser observes the advanced tip and gets 409 with the new
694            // tip so it can retry.
695            let chain_mode = crate::runtime::blockchain_kind::is_chain(&store, &query.table);
696            let _chain_lock_arc: Option<Arc<parking_lot::Mutex<()>>> = if chain_mode {
697                Some(self.inner.rmw_locks.lock_for(&query.table, "__chain__"))
698            } else {
699                None
700            };
701            let _chain_guard = _chain_lock_arc.as_ref().map(|m| m.lock());
702
703            // Issue #525 — refuse new blocks if the chain has been marked
704            // `integrity = broken` until an admin clears the flag.
705            if chain_mode && self.is_chain_integrity_broken(&query.table) {
706                return Err(RedDBError::InvalidOperation(format!(
707                    "ChainIntegrityBroken: collection '{}' is locked until \
708                     POST /collections/{}/clear-integrity-flag is called by an admin",
709                    query.table, query.table
710                )));
711            }
712
713            // Pull the tip from the in-memory cache; fall back to a one-time
714            // scan if the cache hasn't seen this collection yet (cold start
715            // after restart). Cache is updated below as rows are sealed.
716            let mut chain_tip_full: Option<crate::runtime::blockchain_kind::ChainTipFull> =
717                if chain_mode {
718                    let mut cache = self.inner.chain_tip_cache.lock();
719                    if let Some(existing) = cache.get(&query.table) {
720                        Some(existing.clone())
721                    } else if let Some(scanned) =
722                        crate::runtime::blockchain_kind::chain_tip_full(&store, &query.table)
723                    {
724                        cache.insert(query.table.clone(), scanned.clone());
725                        Some(scanned)
726                    } else {
727                        None
728                    }
729                } else {
730                    None
731                };
732
733            let mut rows = Vec::with_capacity(effective_rows.len());
734            for row_values in &effective_rows {
735                if row_values.len() != query.columns.len() {
736                    return Err(RedDBError::Query(format!(
737                        "INSERT column count ({}) does not match value count ({})",
738                        query.columns.len(),
739                        row_values.len()
740                    )));
741                }
742                let (mut fields, mut metadata) =
743                    split_insert_metadata(self, &query.columns, row_values)?;
744                if chain_mode {
745                    use crate::runtime::blockchain_kind::{
746                        chain_conflict_error, COL_BLOCK_HEIGHT, COL_HASH, COL_PREV_HASH,
747                        COL_TIMESTAMP, RESERVED_COLUMNS,
748                    };
749                    let supplied_height = fields
750                        .iter()
751                        .find(|(k, _)| k == COL_BLOCK_HEIGHT)
752                        .map(|(_, v)| v.clone());
753                    let supplied_prev = fields
754                        .iter()
755                        .find(|(k, _)| k == COL_PREV_HASH)
756                        .map(|(_, v)| v.clone());
757                    let supplied_ts = fields
758                        .iter()
759                        .find(|(k, _)| k == COL_TIMESTAMP)
760                        .map(|(_, v)| v.clone());
761                    let supplied_hash = fields.iter().any(|(k, _)| k == COL_HASH);
762                    let user_supplied_any = supplied_height.is_some()
763                        || supplied_prev.is_some()
764                        || supplied_ts.is_some()
765                        || supplied_hash;
766
767                    fields.retain(|(k, _)| !RESERVED_COLUMNS.contains(&k.as_str()));
768                    let payload = crate::runtime::blockchain_kind::canonical_payload(&fields);
769
770                    let (tip_prev_hash, tip_next_height) = match &chain_tip_full {
771                        Some(t) => (t.hash, t.height + 1),
772                        None => (crate::storage::blockchain::GENESIS_PREV_HASH, 0u64),
773                    };
774                    let server_now = crate::runtime::blockchain_kind::now_ms();
775
776                    let (use_prev, use_height, use_ts) = if user_supplied_any {
777                        // Caller is participating in the chain protocol —
778                        // every field must be supplied AND match the tip.
779                        if supplied_hash {
780                            return Err(chain_conflict_error(
781                                tip_next_height.saturating_sub(1),
782                                tip_prev_hash,
783                                chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
784                                server_now,
785                                "hash column is engine-computed and cannot be supplied",
786                            ));
787                        }
788                        let caller_prev = match &supplied_prev {
789                            Some(Value::Blob(b)) if b.len() == 32 => {
790                                let mut a = [0u8; 32];
791                                a.copy_from_slice(b);
792                                a
793                            }
794                            Some(Value::Text(s)) if s.len() == 64 => {
795                                // Accept hex-encoded prev_hash so JSON / SQL
796                                // callers without literal-blob syntax can
797                                // still participate in the chain protocol.
798                                let mut a = [0u8; 32];
799                                let mut ok = true;
800                                for (i, slot) in a.iter_mut().enumerate() {
801                                    let pair = &s.as_ref()[i * 2..i * 2 + 2];
802                                    match u8::from_str_radix(pair, 16) {
803                                        Ok(byte) => *slot = byte,
804                                        Err(_) => {
805                                            ok = false;
806                                            break;
807                                        }
808                                    }
809                                }
810                                if !ok {
811                                    return Err(chain_conflict_error(
812                                        tip_next_height.saturating_sub(1),
813                                        tip_prev_hash,
814                                        chain_tip_full
815                                            .as_ref()
816                                            .map(|t| t.timestamp_ms)
817                                            .unwrap_or(0),
818                                        server_now,
819                                        "prev_hash is not valid hex",
820                                    ));
821                                }
822                                a
823                            }
824                            _ => {
825                                return Err(chain_conflict_error(
826                                    tip_next_height.saturating_sub(1),
827                                    tip_prev_hash,
828                                    chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
829                                    server_now,
830                                    "prev_hash missing or not a 32-byte Blob",
831                                ));
832                            }
833                        };
834                        if caller_prev != tip_prev_hash {
835                            return Err(chain_conflict_error(
836                                tip_next_height.saturating_sub(1),
837                                tip_prev_hash,
838                                chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
839                                server_now,
840                                "prev_hash does not match current tip",
841                            ));
842                        }
843                        let caller_height = match &supplied_height {
844                            Some(Value::UnsignedInteger(v)) => *v,
845                            Some(Value::Integer(v)) if *v >= 0 => *v as u64,
846                            _ => {
847                                return Err(chain_conflict_error(
848                                    tip_next_height.saturating_sub(1),
849                                    tip_prev_hash,
850                                    chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
851                                    server_now,
852                                    "block_height missing or not an unsigned integer",
853                                ));
854                            }
855                        };
856                        if caller_height != tip_next_height {
857                            return Err(chain_conflict_error(
858                                tip_next_height.saturating_sub(1),
859                                tip_prev_hash,
860                                chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
861                                server_now,
862                                "block_height does not match tip+1",
863                            ));
864                        }
865                        let caller_ts = match &supplied_ts {
866                            Some(Value::UnsignedInteger(v)) => *v,
867                            Some(Value::Integer(v)) if *v >= 0 => *v as u64,
868                            _ => {
869                                return Err(chain_conflict_error(
870                                    tip_next_height.saturating_sub(1),
871                                    tip_prev_hash,
872                                    chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
873                                    server_now,
874                                    "timestamp missing or not an unsigned integer",
875                                ));
876                            }
877                        };
878                        let drift = (caller_ts as i128) - (server_now as i128);
879                        if drift.abs() > 60_000 {
880                            return Err(chain_conflict_error(
881                                tip_next_height.saturating_sub(1),
882                                tip_prev_hash,
883                                chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
884                                server_now,
885                                "timestamp outside ±60s of server_time",
886                            ));
887                        }
888                        (caller_prev, caller_height, caller_ts)
889                    } else {
890                        (tip_prev_hash, tip_next_height, server_now)
891                    };
892
893                    let (reserved, new_hash) =
894                        crate::runtime::blockchain_kind::make_block_reserved_fields(
895                            use_prev, use_height, use_ts, &payload,
896                        );
897                    fields.extend(reserved);
898                    chain_tip_full = Some(crate::runtime::blockchain_kind::ChainTipFull {
899                        height: use_height,
900                        hash: new_hash,
901                        timestamp_ms: use_ts,
902                    });
903                }
904                // Issue #522 — signed-writes verification. On collections
905                // created with `SIGNED_BY (...)` the row must carry valid
906                // `signer_pubkey` + `signature` reserved columns. Runs
907                // after chain_mode so canonical payload covers user-supplied
908                // fields only (blockchain reserved columns are filtered by
909                // `canonical_payload`; the two signed-writes reserved
910                // columns are split out before payload computation, then
911                // re-attached for storage). The blockchain + SIGNED_BY
912                // composition is owned by issue #526; we keep #522 to the
913                // non-chain path and let chain_mode collections punt to that
914                // slice rather than half-wire it here.
915                if crate::runtime::signed_writes_kind::is_signed(&store, &query.table) {
916                    let (pk_col, sig_col, residual) =
917                        crate::runtime::signed_writes_kind::split_signature_fields(fields);
918                    let payload = crate::runtime::blockchain_kind::canonical_payload(&residual);
919                    let reg = crate::runtime::signed_writes_kind::registry(&store, &query.table);
920                    crate::runtime::signed_writes_kind::verify_row(
921                        &reg,
922                        pk_col.as_ref().map(|c| c.bytes.as_slice()),
923                        sig_col.as_ref().map(|c| c.bytes.as_slice()),
924                        &payload,
925                    )
926                    .map_err(crate::runtime::signed_writes_kind::map_error)?;
927                    fields = residual;
928                    // Round-trip the reserved columns with the value
929                    // type the caller supplied (Text/hex on the SQL path,
930                    // Blob on the binary path). Keeps SELECT and WHERE
931                    // predicates symmetric with the INSERT shape.
932                    if let Some(col) = pk_col {
933                        fields.push((
934                            crate::storage::signed_writes::RESERVED_SIGNER_PUBKEY_COL.to_string(),
935                            col.raw_value,
936                        ));
937                    }
938                    if let Some(col) = sig_col {
939                        fields.push((
940                            crate::storage::signed_writes::RESERVED_SIGNATURE_COL.to_string(),
941                            col.raw_value,
942                        ));
943                    }
944                }
945                merge_with_clauses(
946                    &mut metadata,
947                    query.ttl_ms,
948                    query.expires_at_ms,
949                    &query.with_metadata,
950                );
951                if let Some(snaps) = returning_snapshots.as_mut() {
952                    snaps.push(fields.clone());
953                }
954                rows.push(CreateRowInput {
955                    collection: query.table.clone(),
956                    fields,
957                    metadata,
958                    node_links: Vec::new(),
959                    vector_links: Vec::new(),
960                });
961            }
962            let outputs = self.create_rows_batch(CreateRowsBatchInput {
963                collection: query.table.clone(),
964                rows,
965                suppress_events: query.suppress_events,
966            })?;
967            inserted_count = outputs.len() as u64;
968
969            // Chain mode: commit the new tip to the in-memory cache only after
970            // the batch persisted successfully. If the batch threw mid-way the
971            // cache stays on the previous tip and the chain lock releases.
972            if chain_mode {
973                if let Some(new_tip) = chain_tip_full.as_ref() {
974                    self.inner
975                        .chain_tip_cache
976                        .lock()
977                        .insert(query.table.clone(), new_tip.clone());
978                }
979            }
980
981            // Hypertable chunk routing: if this table was declared via
982            // CREATE HYPERTABLE, register each row's time-column value
983            // with the registry so chunk metadata (bounds, row counts,
984            // TTL eligibility) stays current. This is what lets
985            // HYPERTABLE_PRUNE_CHUNKS answer real questions + lets the
986            // retention daemon sweep expired chunks without scanning
987            // every row.
988            if let Some(spec) = self.inner.db.hypertables().get(&query.table) {
989                let time_col = &spec.time_column;
990                // Find the column's index in the INSERT column list.
991                if let Some(idx) = query.columns.iter().position(|c| c == time_col) {
992                    for row in &effective_rows {
993                        if let Some(Value::Integer(n) | Value::BigInt(n)) = row.get(idx) {
994                            if *n >= 0 {
995                                let _ = self.inner.db.hypertables().route(&query.table, *n as u64);
996                            }
997                        } else if let Some(Value::UnsignedInteger(n)) = row.get(idx) {
998                            let _ = self.inner.db.hypertables().route(&query.table, *n);
999                        }
1000                    }
1001                }
1002            }
1003
1004            if let (Some(items), Some(snaps)) =
1005                (query.returning.as_ref(), returning_snapshots.take())
1006            {
1007                let snaps = row_insert_returning_snapshots(&outputs, snaps);
1008                returning_result = Some(build_returning_result(items, &snaps, Some(&outputs)));
1009            }
1010        } else {
1011            // Issue #419: surface the inserted entity id on every INSERT path.
1012            // For Node/Edge/Vector/Document/Kv we now keep each CreateEntityOutput
1013            // so a RETURNING clause (and the unconditional inserted_ids list,
1014            // below) can expose the engine-assigned id. TimeSeries (the row
1015            // branch in this else) still returns the not-supported error
1016            // because create_timeseries_point isn't plumbed through this fn.
1017            let mut entity_outputs: Vec<crate::application::entity::CreateEntityOutput> =
1018                Vec::with_capacity(effective_rows.len());
1019            let mut returning_field_snaps: Vec<Vec<(String, Value)>> = if query.returning.is_some()
1020            {
1021                Vec::with_capacity(effective_rows.len())
1022            } else {
1023                Vec::new()
1024            };
1025            if matches!(
1026                query.entity_type,
1027                InsertEntityType::Node | InsertEntityType::Edge
1028            ) {
1029                enum PreparedGraphInsert {
1030                    Node {
1031                        fields: Vec<(String, Value)>,
1032                        input: CreateNodeInput,
1033                    },
1034                    Edge {
1035                        fields: Vec<(String, Value)>,
1036                        input: CreateEdgeInput,
1037                    },
1038                }
1039
1040                let mut prepared = Vec::with_capacity(effective_rows.len());
1041                for row_values in &effective_rows {
1042                    if row_values.len() != query.columns.len() {
1043                        return Err(RedDBError::Query(format!(
1044                            "INSERT column count ({}) does not match value count ({})",
1045                            query.columns.len(),
1046                            row_values.len()
1047                        )));
1048                    }
1049
1050                    match query.entity_type {
1051                        InsertEntityType::Node => {
1052                            let (node_values, mut metadata) =
1053                                split_insert_metadata(self, &query.columns, row_values)?;
1054                            merge_with_clauses(
1055                                &mut metadata,
1056                                query.ttl_ms,
1057                                query.expires_at_ms,
1058                                &query.with_metadata,
1059                            );
1060                            ensure_non_tree_reserved_metadata_entries(&metadata)?;
1061                            apply_collection_default_ttl_metadata(
1062                                self,
1063                                &query.table,
1064                                &mut metadata,
1065                            );
1066                            let (columns, values) = pairwise_columns_values(&node_values);
1067                            let label = find_column_value_string(&columns, &values, "label")?;
1068                            let node_type =
1069                                find_column_value_opt_string(&columns, &values, "node_type");
1070                            let properties = extract_remaining_properties(
1071                                &columns,
1072                                &values,
1073                                &["label", "node_type"],
1074                            );
1075                            crate::reserved_fields::ensure_no_reserved_public_item_fields(
1076                                properties.iter().map(|(key, _)| key.as_str()),
1077                                &format!("node '{}'", query.table),
1078                            )?;
1079                            prepared.push(PreparedGraphInsert::Node {
1080                                fields: node_values,
1081                                input: CreateNodeInput {
1082                                    collection: query.table.clone(),
1083                                    label,
1084                                    node_type,
1085                                    properties,
1086                                    metadata,
1087                                    embeddings: Vec::new(),
1088                                    table_links: Vec::new(),
1089                                    node_links: Vec::new(),
1090                                },
1091                            });
1092                        }
1093                        InsertEntityType::Edge => {
1094                            let (edge_values, mut metadata) =
1095                                split_insert_metadata(self, &query.columns, row_values)?;
1096                            merge_with_clauses(
1097                                &mut metadata,
1098                                query.ttl_ms,
1099                                query.expires_at_ms,
1100                                &query.with_metadata,
1101                            );
1102                            ensure_non_tree_reserved_metadata_entries(&metadata)?;
1103                            apply_collection_default_ttl_metadata(
1104                                self,
1105                                &query.table,
1106                                &mut metadata,
1107                            );
1108                            let (columns, values) = pairwise_columns_values(&edge_values);
1109                            let label = find_column_value_string(&columns, &values, "label")?;
1110                            ensure_non_tree_structural_edge_label(&label)?;
1111                            let from_id = resolve_edge_endpoint_any(
1112                                self.inner.db.store().as_ref(),
1113                                &query.table,
1114                                &columns,
1115                                &values,
1116                                &["from_rid", "from"],
1117                            )?;
1118                            let to_id = resolve_edge_endpoint_any(
1119                                self.inner.db.store().as_ref(),
1120                                &query.table,
1121                                &columns,
1122                                &values,
1123                                &["to_rid", "to"],
1124                            )?;
1125                            let weight = find_column_value_f32_opt(&columns, &values, "weight");
1126                            let properties = extract_remaining_properties(
1127                                &columns,
1128                                &values,
1129                                &["label", "from_rid", "to_rid", "from", "to", "weight"],
1130                            );
1131                            crate::reserved_fields::ensure_no_reserved_public_item_fields(
1132                                properties.iter().map(|(key, _)| key.as_str()),
1133                                &format!("edge '{}'", query.table),
1134                            )?;
1135                            prepared.push(PreparedGraphInsert::Edge {
1136                                fields: edge_values,
1137                                input: CreateEdgeInput {
1138                                    collection: query.table.clone(),
1139                                    label,
1140                                    from: EntityId::new(from_id),
1141                                    to: EntityId::new(to_id),
1142                                    weight,
1143                                    properties,
1144                                    metadata,
1145                                },
1146                            });
1147                        }
1148                        _ => unreachable!("prepared graph insert only handles NODE and EDGE"),
1149                    }
1150                }
1151
1152                ensure_graph_insert_contract(self, &query.table)?;
1153                let mut batch = self.inner.db.batch();
1154                for item in prepared {
1155                    match item {
1156                        PreparedGraphInsert::Node { fields, input } => {
1157                            if query.returning.is_some() {
1158                                returning_field_snaps.push(fields);
1159                            }
1160                            let node_type = input.node_type.unwrap_or_else(|| input.label.clone());
1161                            batch = batch.add_node_with_type(
1162                                input.collection,
1163                                input.label,
1164                                node_type,
1165                                input.properties.into_iter().collect(),
1166                                input.metadata.into_iter().collect(),
1167                            );
1168                        }
1169                        PreparedGraphInsert::Edge { fields, input } => {
1170                            if query.returning.is_some() {
1171                                returning_field_snaps.push(fields);
1172                            }
1173                            batch = batch.add_edge(
1174                                input.collection,
1175                                input.label,
1176                                input.from,
1177                                input.to,
1178                                input.weight.unwrap_or(1.0),
1179                                input.properties.into_iter().collect(),
1180                                input.metadata.into_iter().collect(),
1181                            );
1182                        }
1183                    }
1184                }
1185                let batch_result = batch
1186                    .execute()
1187                    .map_err(|err| RedDBError::Internal(format!("{err:?}")))?;
1188                let (ids, entity_kind) = match query.entity_type {
1189                    InsertEntityType::Node => (batch_result.nodes, "graph_node"),
1190                    InsertEntityType::Edge => (batch_result.edges, "graph_edge"),
1191                    _ => unreachable!("prepared graph insert only handles NODE and EDGE"),
1192                };
1193                for id in &ids {
1194                    self.stamp_xmin_if_in_txn(&query.table, *id);
1195                }
1196                if query.returning.is_some() {
1197                    returning_field_snaps = graph_insert_returning_snapshots(
1198                        self.inner.db.store().as_ref(),
1199                        &query.table,
1200                        &ids,
1201                    );
1202                }
1203                self.cdc_emit_insert_batch_no_cache_invalidate(&query.table, &ids, entity_kind);
1204                let store = self.inner.db.store();
1205                entity_outputs.extend(ids.iter().map(|id| {
1206                    crate::application::entity::CreateEntityOutput {
1207                        id: *id,
1208                        entity: store.get(&query.table, *id),
1209                    }
1210                }));
1211                inserted_count = ids.len() as u64;
1212            } else {
1213                for row_values in &effective_rows {
1214                    if row_values.len() != query.columns.len() {
1215                        return Err(RedDBError::Query(format!(
1216                            "INSERT column count ({}) does not match value count ({})",
1217                            query.columns.len(),
1218                            row_values.len()
1219                        )));
1220                    }
1221
1222                    match query.entity_type {
1223                        InsertEntityType::Row => {
1224                            if query.returning.is_some() {
1225                                return Err(RedDBError::Query(
1226                                "RETURNING is not yet supported for this INSERT path (TimeSeries)"
1227                                    .to_string(),
1228                            ));
1229                            }
1230                            let (fields, mut metadata) =
1231                                split_insert_metadata(self, &query.columns, row_values)?;
1232                            merge_with_clauses(
1233                                &mut metadata,
1234                                query.ttl_ms,
1235                                query.expires_at_ms,
1236                                &query.with_metadata,
1237                            );
1238                            self.insert_timeseries_point(&query.table, fields, metadata)?;
1239                        }
1240                        InsertEntityType::Node | InsertEntityType::Edge => {
1241                            unreachable!("NODE and EDGE are handled by the prepared graph path")
1242                        }
1243                        InsertEntityType::Vector => {
1244                            let (vector_values, mut metadata) =
1245                                split_insert_metadata(self, &query.columns, row_values)?;
1246                            merge_with_clauses(
1247                                &mut metadata,
1248                                query.ttl_ms,
1249                                query.expires_at_ms,
1250                                &query.with_metadata,
1251                            );
1252                            let (columns, values) = pairwise_columns_values(&vector_values);
1253                            let dense = find_column_value_vec_f32_any(
1254                                &columns,
1255                                &values,
1256                                &["dense", "embedding"],
1257                            )?;
1258                            merge_vector_metadata_column(&mut metadata, &columns, &values)?;
1259                            let content =
1260                                find_column_value_opt_string(&columns, &values, "content");
1261                            if query.returning.is_some() {
1262                                returning_field_snaps.push(vector_values.clone());
1263                            }
1264                            let input = CreateVectorInput {
1265                                collection: query.table.clone(),
1266                                dense,
1267                                content,
1268                                metadata,
1269                                link_row: None,
1270                                link_node: None,
1271                            };
1272                            entity_outputs.push(self.create_vector(input)?);
1273                        }
1274                        InsertEntityType::Document => {
1275                            let (document_values, mut metadata) =
1276                                split_insert_metadata(self, &query.columns, row_values)?;
1277                            merge_with_clauses(
1278                                &mut metadata,
1279                                query.ttl_ms,
1280                                query.expires_at_ms,
1281                                &query.with_metadata,
1282                            );
1283                            let (columns, values) = pairwise_columns_values(&document_values);
1284                            let body_str = find_column_value_string(&columns, &values, "body")?;
1285                            let body: crate::json::Value = crate::json::from_str(&body_str)
1286                                .map_err(|e| {
1287                                    RedDBError::Query(format!("invalid JSON body: {e}"))
1288                                })?;
1289                            let input = CreateDocumentInput {
1290                                collection: query.table.clone(),
1291                                body,
1292                                metadata,
1293                                node_links: Vec::new(),
1294                                vector_links: Vec::new(),
1295                            };
1296                            let output = self.create_document(input)?;
1297                            if query.returning.is_some() {
1298                                let fields = output
1299                                    .entity
1300                                    .as_ref()
1301                                    .map(entity_row_fields_snapshot)
1302                                    .filter(|fields| !fields.is_empty())
1303                                    .unwrap_or(document_values);
1304                                returning_field_snaps.push(fields);
1305                            }
1306                            entity_outputs.push(output);
1307                        }
1308                        InsertEntityType::Kv => {
1309                            let (kv_values, mut metadata) =
1310                                split_insert_metadata(self, &query.columns, row_values)?;
1311                            merge_with_clauses(
1312                                &mut metadata,
1313                                query.ttl_ms,
1314                                query.expires_at_ms,
1315                                &query.with_metadata,
1316                            );
1317                            let (columns, values) = pairwise_columns_values(&kv_values);
1318                            let key = find_column_value_string(&columns, &values, "key")?;
1319                            let value = find_column_value(&columns, &values, "value")?;
1320                            if query.returning.is_some() {
1321                                returning_field_snaps.push(kv_values.clone());
1322                            }
1323                            let input = CreateKvInput {
1324                                collection: query.table.clone(),
1325                                key,
1326                                value,
1327                                metadata,
1328                            };
1329                            entity_outputs.push(self.create_kv(input)?);
1330                        }
1331                    }
1332
1333                    inserted_count += 1;
1334                }
1335            }
1336
1337            if let Some(items) = query.returning.as_ref() {
1338                if !entity_outputs.is_empty() {
1339                    returning_result = Some(build_returning_result(
1340                        items,
1341                        &returning_field_snaps,
1342                        Some(&entity_outputs),
1343                    ));
1344                }
1345            }
1346        }
1347
1348        // Auto-embed pipeline: batch-embed fields across all inserted rows via AiBatchClient.
1349        if let Some(ref embed_config) = query.auto_embed {
1350            let store = self.inner.db.store();
1351            let provider = crate::ai::parse_provider(&embed_config.provider)?;
1352            let is_local_provider = matches!(provider, crate::ai::AiProvider::Local);
1353            // Local provider runs in-process — no API key path applies.
1354            // The pre-flight above already required `MODEL '<name>'`
1355            // for the local case, so the unwrap_or default below only
1356            // ever fires for OpenAI-compatible providers.
1357            let api_key = if is_local_provider {
1358                String::new()
1359            } else {
1360                crate::ai::resolve_api_key_from_runtime(&provider, None, self)?
1361            };
1362            let model = embed_config.model.clone().unwrap_or_else(|| {
1363                std::env::var("REDDB_OPENAI_EMBEDDING_MODEL")
1364                    .ok()
1365                    .unwrap_or_else(|| crate::ai::DEFAULT_OPENAI_EMBEDDING_MODEL.to_string())
1366            });
1367
1368            // Collect the just-inserted rows (most-recently appended, reversed back to insert order).
1369            let manager = store
1370                .get_collection(&query.table)
1371                .ok_or_else(|| RedDBError::NotFound(query.table.clone()))?;
1372            let entities = manager.query_all(|_| true);
1373            let recent: Vec<_> = entities
1374                .into_iter()
1375                .rev()
1376                .take(effective_rows.len())
1377                .collect();
1378
1379            // Collector phase: (entity_index, combined_text) for rows that have non-empty fields.
1380            let entity_combos: Vec<(usize, String)> = recent
1381                .iter()
1382                .enumerate()
1383                .filter_map(|(i, entity)| {
1384                    if let EntityData::Row(ref row) = entity.data {
1385                        if let Some(ref named) = row.named {
1386                            let texts: Vec<String> = embed_config
1387                                .fields
1388                                .iter()
1389                                .filter_map(|field| match named.get(field) {
1390                                    Some(Value::Text(t)) if !t.is_empty() => Some(t.to_string()),
1391                                    _ => None,
1392                                })
1393                                .collect();
1394                            if !texts.is_empty() {
1395                                return Some((i, texts.join(" ")));
1396                            }
1397                        }
1398                    }
1399                    None
1400                })
1401                .collect();
1402
1403            if !entity_combos.is_empty() {
1404                // Batch phase: single provider round-trip for all rows.
1405                let batch_texts: Vec<String> =
1406                    entity_combos.iter().map(|(_, t)| t.clone()).collect();
1407
1408                // Issue #682 — when the provider is `local`, bypass
1409                // AiBatchClient (which is HTTP-only) and dispatch
1410                // directly through the in-process local embedding
1411                // backend. All texts go in one call, mirroring the
1412                // single-round-trip shape of the remote path. The
1413                // local backend does not perform intra-batch dedup —
1414                // each input position gets its own row in the output
1415                // — which keeps the per-row "create_vector" loop
1416                // below correct without additional fan-out logic.
1417                let embeddings = if is_local_provider {
1418                    let response = crate::runtime::ai::local_embedding::embed_local_with_db(
1419                        &self.inner.db,
1420                        &model,
1421                        batch_texts,
1422                    )?;
1423                    response.embeddings
1424                } else {
1425                    let batch_client =
1426                        crate::runtime::ai::batch_client::AiBatchClient::from_runtime(self);
1427
1428                    match tokio::runtime::Handle::try_current() {
1429                        Ok(handle) => tokio::task::block_in_place(|| {
1430                            handle.block_on(batch_client.embed_batch(
1431                                &provider,
1432                                &model,
1433                                &api_key,
1434                                batch_texts,
1435                            ))
1436                        }),
1437                        Err(_) => {
1438                            return Err(RedDBError::Query(
1439                                "AUTO EMBED requires a Tokio runtime context".to_string(),
1440                            ));
1441                        }
1442                    }
1443                    .map_err(|e| RedDBError::Query(e.to_string()))?
1444                };
1445
1446                // Distribute phase: persist one vector per non-empty embedding.
1447                for ((_, combined), dense) in entity_combos.iter().zip(embeddings) {
1448                    if dense.is_empty() {
1449                        continue;
1450                    }
1451                    self.create_vector(CreateVectorInput {
1452                        collection: query.table.clone(),
1453                        dense,
1454                        content: Some(combined.clone()),
1455                        metadata: Vec::new(),
1456                        link_row: None,
1457                        link_node: None,
1458                    })?;
1459                }
1460            }
1461        }
1462
1463        if inserted_count > 0 {
1464            self.note_table_write(&query.table);
1465        }
1466
1467        let mut result = RuntimeQueryResult::dml_result(
1468            raw_query.to_string(),
1469            inserted_count,
1470            "insert",
1471            "runtime-dml",
1472        );
1473        if let Some(returning) = returning_result {
1474            result.result = returning;
1475        }
1476        Ok(result)
1477    }
1478
1479    fn check_insert_column_policy(&self, query: &InsertQuery) -> RedDBResult<()> {
1480        let Some(auth_store) = self.inner.auth_store.read().clone() else {
1481            return Ok(());
1482        };
1483        if !auth_store.iam_authorization_enabled() {
1484            return Ok(());
1485        }
1486        let Some((username, role)) = crate::runtime::impl_core::current_auth_identity() else {
1487            return Ok(());
1488        };
1489
1490        let tenant = crate::runtime::impl_core::current_tenant();
1491        let principal = crate::auth::UserId::from_parts(tenant.as_deref(), &username);
1492        let request = crate::auth::ColumnAccessRequest {
1493            action: "insert".to_string(),
1494            schema: None,
1495            table: query.table.clone(),
1496            columns: query.columns.clone(),
1497        };
1498        let ctx = crate::auth::policies::EvalContext {
1499            principal_tenant: tenant.clone(),
1500            current_tenant: tenant,
1501            peer_ip: None,
1502            mfa_present: false,
1503            now_ms: crate::auth::now_ms(),
1504            principal_is_admin_role: role == crate::auth::Role::Admin,
1505            principal_is_system_owned: auth_store.principal_is_system_owned(&principal),
1506            principal_is_platform_scoped: principal.tenant.is_none(),
1507        };
1508
1509        let outcome = auth_store.check_column_projection_authz(&principal, &request, &ctx);
1510        let table_allowed = matches!(
1511            outcome.table_decision,
1512            crate::auth::policies::Decision::Allow { .. }
1513                | crate::auth::policies::Decision::AdminBypass
1514        );
1515        if !table_allowed {
1516            return Err(RedDBError::Query(format!(
1517                "principal=`{username}` action=`insert` resource=`{}:{}` denied by IAM policy",
1518                outcome.table_resource.kind, outcome.table_resource.name
1519            )));
1520        }
1521        if let Some(denied) = outcome.first_denied_column() {
1522            return Err(RedDBError::Query(format!(
1523                "principal=`{username}` action=`insert` resource=`{}:{}` denied by IAM policy",
1524                denied.resource.kind, denied.resource.name
1525            )));
1526        }
1527
1528        Ok(())
1529    }
1530
1531    pub(crate) fn insert_timeseries_point(
1532        &self,
1533        collection: &str,
1534        fields: Vec<(String, Value)>,
1535        mut metadata: Vec<(String, MetadataValue)>,
1536    ) -> RedDBResult<EntityId> {
1537        apply_collection_default_ttl_metadata(self, collection, &mut metadata);
1538
1539        let (columns, values) = pairwise_columns_values(&fields);
1540        validate_timeseries_insert_columns(&columns)?;
1541
1542        // Issue #577 — AnalyticsSchemaRegistry hook. If the row carries
1543        // an `event_name` whose schema is registered, validate the
1544        // `payload` JSON against it BEFORE any write side-effect. On
1545        // failure we return a typed error and the row is not
1546        // persisted. When no schema is registered for the event name
1547        // (or no `event_name` column is supplied at all) we fall
1548        // through to the normal write path for back-compat with
1549        // existing timeseries rows.
1550        let event_name_opt = find_column_value_opt_string(&columns, &values, "event_name");
1551        let payload_opt = find_column_value_opt_string(&columns, &values, "payload");
1552        if let Some(event_name) = event_name_opt.as_deref() {
1553            let store_for_schema = self.inner.db.store();
1554            if super::analytics_schema_registry::latest(store_for_schema.as_ref(), event_name)
1555                .is_some()
1556            {
1557                let payload_json = payload_opt.as_deref().unwrap_or("{}");
1558                super::analytics_schema_registry::validate(
1559                    store_for_schema.as_ref(),
1560                    event_name,
1561                    payload_json,
1562                )
1563                .map_err(super::analytics_schema_registry::validation_error_to_reddb)?;
1564            }
1565        }
1566
1567        // `metric` is required by the existing timeseries write path;
1568        // when an analytics-style row supplies `event_name` but not
1569        // `metric`, fall back to the event name so the storage path
1570        // still has a non-empty metric tag.
1571        let metric = match find_column_value_opt_string(&columns, &values, "metric") {
1572            Some(m) => m,
1573            None => event_name_opt.clone().ok_or_else(|| {
1574                RedDBError::Query(
1575                    "timeseries INSERT requires either `metric` or `event_name`".to_string(),
1576                )
1577            })?,
1578        };
1579        // `value` is optional for analytics-event rows (which are
1580        // semantically counts of 1); default to 1.0 when missing so
1581        // analytics inserts don't have to fabricate a metric value.
1582        let value = match find_column_value_opt_string(&columns, &values, "value") {
1583            Some(s) => s.parse::<f64>().unwrap_or(1.0),
1584            None => columns
1585                .iter()
1586                .position(|c| c.eq_ignore_ascii_case("value"))
1587                .and_then(|i| match &values[i] {
1588                    Value::Float(f) => Some(*f),
1589                    Value::Integer(n) | Value::BigInt(n) => Some(*n as f64),
1590                    Value::UnsignedInteger(n) => Some(*n as f64),
1591                    _ => None,
1592                })
1593                .unwrap_or(1.0),
1594        };
1595        let timestamp_ns =
1596            find_timeseries_timestamp_ns(&columns, &values)?.unwrap_or_else(current_unix_ns);
1597        let mut tags = find_timeseries_tags(&columns, &values)?;
1598        if let Some(ref name) = event_name_opt {
1599            tags.entry("event_name".to_string())
1600                .or_insert_with(|| name.clone());
1601        }
1602        if let Some(ref payload) = payload_opt {
1603            tags.entry("payload".to_string())
1604                .or_insert_with(|| payload.clone());
1605        }
1606
1607        let mut entity = UnifiedEntity::new(
1608            EntityId::new(0),
1609            EntityKind::TimeSeriesPoint(Box::new(crate::storage::TimeSeriesPointKind {
1610                series: collection.to_string(),
1611                metric: metric.clone(),
1612            })),
1613            EntityData::TimeSeries(crate::storage::TimeSeriesData {
1614                metric,
1615                timestamp_ns,
1616                value,
1617                tags,
1618            }),
1619        );
1620        // MVCC #30: stamp xmin with the active tx xid (inside a tx)
1621        // or an autocommit xid (allocated and committed up-front so
1622        // future snapshots see the row as soon as it lands).
1623        let writer_xid = match self.current_xid() {
1624            Some(xid) => xid,
1625            None => {
1626                let mgr = self.snapshot_manager();
1627                let xid = mgr.begin();
1628                mgr.commit(xid);
1629                xid
1630            }
1631        };
1632        entity.set_xmin(writer_xid);
1633
1634        let store = self.inner.db.store();
1635        let id = store
1636            .insert_auto(collection, entity)
1637            .map_err(|err| RedDBError::Internal(err.to_string()))?;
1638
1639        if !metadata.is_empty() {
1640            let _ = store.set_metadata(
1641                collection,
1642                id,
1643                Metadata::with_fields(metadata.into_iter().collect()),
1644            );
1645        }
1646
1647        self.cdc_emit(
1648            crate::replication::cdc::ChangeOperation::Insert,
1649            collection,
1650            id.raw(),
1651            "timeseries",
1652        );
1653
1654        Ok(id)
1655    }
1656
1657    /// Execute UPDATE table SET col=val, ... WHERE filter
1658    ///
1659    /// Scans the target collection, evaluates the WHERE filter against each
1660    /// record, and patches every matching entity.
1661    pub fn execute_update(
1662        &self,
1663        raw_query: &str,
1664        query: &UpdateQuery,
1665    ) -> RedDBResult<RuntimeQueryResult> {
1666        self.check_write(crate::runtime::write_gate::WriteKind::Dml)?;
1667        // Issue #523 — blockchain collections are immutable. Reject before
1668        // RLS / RETURNING work so the operator sees a clean 409-mapped
1669        // error instead of a partially-applied mutation surface.
1670        if crate::runtime::blockchain_kind::is_chain(self.inner.db.store().as_ref(), &query.table) {
1671            return Err(RedDBError::InvalidOperation(format!(
1672                "BlockchainCollectionImmutable: UPDATE not allowed on '{}'",
1673                query.table
1674            )));
1675        }
1676        // CollectionContract gate (#50): runs the APPEND ONLY guard
1677        // (and any future contract bits) before RLS / RETURNING work
1678        // so the operator's immutability declaration is honoured
1679        // uniformly and the error message points at the DDL rather
1680        // than at a downstream symptom.
1681        crate::runtime::collection_contract::CollectionContractGate::check(
1682            self,
1683            &query.table,
1684            crate::runtime::collection_contract::MutationKind::Update,
1685        )?;
1686        ensure_update_target_contract(self, &query.table, query.target)?;
1687        ensure_graph_identity_update_target_allowed(query)?;
1688
1689        // Apply RLS augmentation first so every downstream path — plain
1690        // UPDATE, UPDATE...RETURNING, the inner scan — observes the
1691        // same policy-filtered target set. This prevents RETURNING
1692        // from ever exposing rows the UPDATE policy would have
1693        // denied.
1694        let rls_gated = crate::runtime::impl_core::rls_is_enabled(self, &query.table);
1695        let augmented_query: UpdateQuery;
1696        let effective_query: &UpdateQuery = if rls_gated {
1697            let rls_filter = crate::runtime::impl_core::rls_policy_filter(
1698                self,
1699                &query.table,
1700                crate::storage::query::ast::PolicyAction::Update,
1701            );
1702            let Some(policy) = rls_filter else {
1703                // No admitting policy: zero rows affected, empty
1704                // RETURNING (never leak rows the caller can't touch).
1705                let mut response = RuntimeQueryResult::dml_result(
1706                    raw_query.to_string(),
1707                    0,
1708                    "update",
1709                    "runtime-dml-rls",
1710                );
1711                if let Some(items) = query.returning.clone() {
1712                    response.result = build_returning_result(&items, &[], None);
1713                }
1714                return Ok(response);
1715            };
1716            let mut augmented = query.clone();
1717            augmented.filter = Some(match augmented.filter.take() {
1718                Some(existing) => {
1719                    crate::storage::query::ast::Filter::And(Box::new(existing), Box::new(policy))
1720                }
1721                None => policy,
1722            });
1723            augmented_query = augmented;
1724            &augmented_query
1725        } else {
1726            query
1727        };
1728
1729        // RETURNING wraps the inner executor and uses the touched-id
1730        // list the inner reports so the post-image reflects exactly
1731        // the rows the UPDATE actually mutated (not whatever a
1732        // separate SELECT might have observed).
1733        if let Some(items) = effective_query.returning.clone() {
1734            let mut inner_query = effective_query.clone();
1735            inner_query.returning = None;
1736            let (mut response, touched_ids) =
1737                self.execute_update_inner_tracked(raw_query, &inner_query)?;
1738
1739            let snapshots = if matches!(
1740                effective_query.target,
1741                UpdateTarget::Nodes | UpdateTarget::Edges
1742            ) {
1743                graph_update_returning_snapshots(self, &effective_query.table, &touched_ids)
1744            } else {
1745                super::dml_target_scan::DmlTargetScan::new(self, &effective_query.table, None, None)
1746                    .row_snapshots(&touched_ids)
1747            };
1748
1749            response.result = build_returning_result(&items, &snapshots, None);
1750            response.engine = "runtime-dml-returning";
1751            return Ok(response);
1752        }
1753
1754        self.execute_update_inner(raw_query, effective_query)
1755    }
1756
1757    /// Back-compat shim: the older entry point ignored touched ids.
1758    fn execute_update_inner(
1759        &self,
1760        raw_query: &str,
1761        query: &UpdateQuery,
1762    ) -> RedDBResult<RuntimeQueryResult> {
1763        self.execute_update_inner_tracked(raw_query, query)
1764            .map(|(res, _)| res)
1765    }
1766
1767    fn execute_update_inner_tracked(
1768        &self,
1769        raw_query: &str,
1770        query: &UpdateQuery,
1771    ) -> RedDBResult<(RuntimeQueryResult, Vec<EntityId>)> {
1772        let store = self.inner.db.store();
1773        let effective_filter = effective_update_filter(query);
1774        let compiled_plan = self.compile_update_plan(query)?;
1775        let mut touched_ids: Vec<EntityId> = Vec::new();
1776        let limit_cap = query.limit.map(|l| l as usize);
1777        let manager = store
1778            .get_collection(&query.table)
1779            .ok_or_else(|| RedDBError::NotFound(query.table.clone()))?;
1780        let scan_limit = if query.order_by.is_empty() {
1781            limit_cap
1782        } else {
1783            None
1784        };
1785        let ids_to_update = super::dml_target_scan::DmlTargetScan::with_update_target(
1786            self,
1787            &query.table,
1788            effective_filter.as_ref(),
1789            scan_limit,
1790            query.target,
1791        )
1792        .find_target_ids()?;
1793        let ids_to_update = if query.order_by.is_empty() {
1794            ids_to_update
1795        } else {
1796            ordered_update_target_ids(&manager, &ids_to_update, &query.order_by, limit_cap)
1797        };
1798
1799        if update_needs_rmw_lock(query) {
1800            return self.execute_update_inner_tracked_locked(
1801                raw_query,
1802                query,
1803                &compiled_plan,
1804                &ids_to_update,
1805                effective_filter.as_ref(),
1806            );
1807        }
1808
1809        let mut affected: u64 = 0;
1810        for chunk in ids_to_update.chunks(UPDATE_APPLY_CHUNK_SIZE) {
1811            let mut applied_chunk = Vec::with_capacity(chunk.len());
1812            for entity in manager.get_many(chunk).into_iter().flatten() {
1813                let assignments =
1814                    self.materialize_update_assignments_for_entity(query, &entity, &compiled_plan)?;
1815                let applied = self.apply_materialized_update_for_entity(
1816                    query.table.clone(),
1817                    entity,
1818                    &compiled_plan,
1819                    assignments,
1820                )?;
1821                touched_ids.push(applied.id);
1822                applied_chunk.push(applied);
1823            }
1824            self.persist_update_chunk(&applied_chunk)?;
1825            affected += applied_chunk.len() as u64;
1826            let lsns = self.flush_update_chunk(&applied_chunk)?;
1827            if !query.suppress_events {
1828                self.emit_update_events_for_collection(&query.table, &applied_chunk, &lsns)?;
1829            }
1830        }
1831
1832        if affected > 0 {
1833            self.note_table_write(&query.table);
1834        }
1835
1836        Ok((
1837            RuntimeQueryResult::dml_result(
1838                raw_query.to_string(),
1839                affected,
1840                "update",
1841                "runtime-dml",
1842            ),
1843            touched_ids,
1844        ))
1845    }
1846
1847    fn execute_update_inner_tracked_locked(
1848        &self,
1849        raw_query: &str,
1850        query: &UpdateQuery,
1851        compiled_plan: &CompiledUpdatePlan,
1852        ids_to_update: &[EntityId],
1853        effective_filter: Option<&Filter>,
1854    ) -> RedDBResult<(RuntimeQueryResult, Vec<EntityId>)> {
1855        let store = self.inner.db.store();
1856        let mut touched_ids = Vec::new();
1857        let mut lock_entries = Vec::new();
1858
1859        for id in ids_to_update {
1860            let Some(candidate) = store.get(&query.table, *id) else {
1861                continue;
1862            };
1863            let logical_id = candidate.logical_id();
1864            let lock_key = format!("row:{}", logical_id.raw());
1865            let rmw_lock = self.inner.rmw_locks.lock_for(&query.table, &lock_key);
1866            lock_entries.push((lock_key, logical_id, rmw_lock));
1867        }
1868
1869        lock_entries.sort_by(|left, right| left.0.cmp(&right.0));
1870        lock_entries.dedup_by(|left, right| left.0 == right.0);
1871        let _rmw_guards: Vec<_> = lock_entries.iter().map(|entry| entry.2.lock()).collect();
1872
1873        let mut applied_chunk = Vec::new();
1874        for (_, logical_id, _) in &lock_entries {
1875            let Some(entity) = resolve_update_entity_by_logical_id(self, &query.table, *logical_id)
1876            else {
1877                continue;
1878            };
1879            if let Some(filter) = effective_filter {
1880                if !crate::runtime::query_exec::evaluate_entity_filter_with_db(
1881                    Some(self.inner.db.as_ref()),
1882                    &entity,
1883                    filter,
1884                    &query.table,
1885                    &query.table,
1886                ) {
1887                    continue;
1888                }
1889            }
1890
1891            let assignments =
1892                self.materialize_update_assignments_for_entity(query, &entity, compiled_plan)?;
1893            let applied = self.apply_materialized_update_for_entity(
1894                query.table.clone(),
1895                entity,
1896                compiled_plan,
1897                assignments,
1898            )?;
1899            touched_ids.push(applied.id);
1900            applied_chunk.push(applied);
1901        }
1902
1903        let affected = applied_chunk.len() as u64;
1904        if !applied_chunk.is_empty() {
1905            self.persist_update_chunk(&applied_chunk)?;
1906            let lsns = self.flush_update_chunk(&applied_chunk)?;
1907            if !query.suppress_events {
1908                self.emit_update_events_for_collection(&query.table, &applied_chunk, &lsns)?;
1909            }
1910        }
1911
1912        if affected > 0 {
1913            self.note_table_write(&query.table);
1914        }
1915
1916        Ok((
1917            RuntimeQueryResult::dml_result(
1918                raw_query.to_string(),
1919                affected,
1920                "update",
1921                "runtime-dml",
1922            ),
1923            touched_ids,
1924        ))
1925    }
1926
1927    fn compile_update_plan(&self, query: &UpdateQuery) -> RedDBResult<CompiledUpdatePlan> {
1928        let mut static_field_assignments = Vec::new();
1929        let mut static_metadata_assignments = Vec::new();
1930        let mut dynamic_assignments = Vec::new();
1931        let row_contract_plan = build_row_update_contract_plan(&self.db(), &query.table)?;
1932        let mut row_modified_columns = Vec::new();
1933
1934        for (idx, (column, expr)) in query.assignment_exprs.iter().enumerate() {
1935            let compound_op = query.compound_assignment_ops.get(idx).copied().flatten();
1936            let metadata_key = resolve_sql_ttl_metadata_key(column);
1937            if compound_op.is_some() && metadata_key.is_some() {
1938                return Err(RedDBError::Query(format!(
1939                    "compound assignment is only supported for row fields: {column}"
1940                )));
1941            }
1942            if compound_op.is_none() {
1943                if let Ok(value) = fold_expr_to_value(expr.clone()) {
1944                    if let Some(metadata_key) = metadata_key {
1945                        let raw_value = sql_literal_to_metadata_value(metadata_key, &value)?;
1946                        let (canonical_key, canonical_value) =
1947                            canonicalize_sql_ttl_metadata(metadata_key, raw_value);
1948                        static_metadata_assignments
1949                            .push((canonical_key.to_string(), canonical_value));
1950                    } else {
1951                        let value = self.resolve_crypto_sentinel(value)?;
1952                        static_field_assignments.push((
1953                            column.clone(),
1954                            normalize_row_update_assignment_with_plan(
1955                                &query.table,
1956                                column,
1957                                value,
1958                                row_contract_plan.as_ref(),
1959                            )?,
1960                        ));
1961                        row_modified_columns.push(column.clone());
1962                    }
1963                    continue;
1964                }
1965            }
1966
1967            dynamic_assignments.push(CompiledUpdateAssignment {
1968                column: column.clone(),
1969                expr: expr.clone(),
1970                compound_op,
1971                metadata_key,
1972                row_rule: if metadata_key.is_none() {
1973                    if let Some(plan) = row_contract_plan.as_ref() {
1974                        if plan.timestamps_enabled
1975                            && (column == "created_at" || column == "updated_at")
1976                        {
1977                            return Err(RedDBError::Query(format!(
1978                                "collection '{}' manages '{}' automatically — do not set it in UPDATE",
1979                                query.table, column
1980                            )));
1981                        }
1982                        if let Some(rule) = plan.declared_rules.get(column) {
1983                            Some(rule.clone())
1984                        } else if plan.strict_schema {
1985                            return Err(RedDBError::Query(format!(
1986                                "collection '{}' is strict and does not allow undeclared fields: {}",
1987                                query.table, column
1988                            )));
1989                        } else {
1990                            None
1991                        }
1992                    } else {
1993                        None
1994                    }
1995                } else {
1996                    None
1997                },
1998            });
1999            if metadata_key.is_none() {
2000                row_modified_columns.push(column.clone());
2001            }
2002        }
2003
2004        let row_modified_columns = dedupe_update_columns(row_modified_columns);
2005        let row_touches_unique_columns = row_contract_plan.as_ref().is_some_and(|plan| {
2006            row_modified_columns.iter().any(|column| {
2007                plan.unique_columns
2008                    .keys()
2009                    .any(|unique| unique.eq_ignore_ascii_case(column))
2010            })
2011        });
2012
2013        if let Some(ttl_ms) = query.ttl_ms {
2014            static_metadata_assignments
2015                .push(("_ttl_ms".to_string(), metadata_u64_to_value(ttl_ms)));
2016        }
2017        if let Some(expires_at_ms) = query.expires_at_ms {
2018            static_metadata_assignments.push((
2019                "_expires_at".to_string(),
2020                metadata_u64_to_value(expires_at_ms),
2021            ));
2022        }
2023        for (key, val) in &query.with_metadata {
2024            static_metadata_assignments.push((key.clone(), storage_value_to_metadata_value(val)));
2025        }
2026
2027        Ok(CompiledUpdatePlan {
2028            static_field_assignments,
2029            static_metadata_assignments,
2030            dynamic_assignments,
2031            row_contract_plan,
2032            row_modified_columns,
2033            row_touches_unique_columns,
2034        })
2035    }
2036
2037    fn materialize_update_assignments_for_entity(
2038        &self,
2039        query: &UpdateQuery,
2040        entity: &UnifiedEntity,
2041        compiled_plan: &CompiledUpdatePlan,
2042    ) -> RedDBResult<MaterializedUpdateAssignments> {
2043        let mut assignments = MaterializedUpdateAssignments::default();
2044        let mut record: Option<UnifiedRecord> = None;
2045
2046        for assignment in &compiled_plan.dynamic_assignments {
2047            if assignment.compound_op.is_some()
2048                && !matches!(
2049                    entity.data,
2050                    EntityData::Row(_) | EntityData::Node(_) | EntityData::Edge(_)
2051                )
2052            {
2053                return Err(RedDBError::Query(format!(
2054                    "compound assignment is only supported for row or graph UPDATE column '{}'",
2055                    assignment.column
2056                )));
2057            }
2058            if record.is_none() {
2059                record = runtime_any_record_from_entity_ref(entity);
2060            }
2061            let Some(record) = record.as_ref() else {
2062                return Err(RedDBError::Query(format!(
2063                    "UPDATE could not materialize runtime record for entity {} in '{}'",
2064                    entity.id.raw(),
2065                    query.table
2066                )));
2067            };
2068            let rhs = super::expr_eval::evaluate_runtime_expr_with_db(
2069                Some(self.inner.db.as_ref()),
2070                &assignment.expr,
2071                record,
2072                Some(query.table.as_str()),
2073                Some(query.table.as_str()),
2074            )
2075            .ok_or_else(|| {
2076                RedDBError::Query(format!(
2077                    "failed to evaluate UPDATE expression for column '{}'",
2078                    assignment.column
2079                ))
2080            })?;
2081            let value = if let Some(op) = assignment.compound_op {
2082                evaluate_compound_update_assignment(&assignment.column, record, op, rhs)?
2083            } else {
2084                rhs
2085            };
2086
2087            if let Some(metadata_key) = assignment.metadata_key {
2088                let raw_value = sql_literal_to_metadata_value(metadata_key, &value)?;
2089                let (canonical_key, canonical_value) =
2090                    canonicalize_sql_ttl_metadata(metadata_key, raw_value);
2091                assignments
2092                    .dynamic_metadata_assignments
2093                    .push((canonical_key.to_string(), canonical_value));
2094            } else {
2095                assignments.dynamic_field_assignments.push((
2096                    assignment.column.clone(),
2097                    normalize_row_update_value_for_rule(
2098                        &query.table,
2099                        self.resolve_crypto_sentinel(value)?,
2100                        assignment.row_rule.as_ref(),
2101                    )?,
2102                ));
2103            }
2104        }
2105
2106        Ok(assignments)
2107    }
2108
2109    fn apply_materialized_update_for_entity(
2110        &self,
2111        collection: String,
2112        entity: UnifiedEntity,
2113        compiled_plan: &CompiledUpdatePlan,
2114        assignments: MaterializedUpdateAssignments,
2115    ) -> RedDBResult<AppliedEntityMutation> {
2116        if matches!(entity.data, EntityData::Row(_)) {
2117            return self.apply_loaded_sql_update_row_core(
2118                collection,
2119                entity,
2120                &compiled_plan.static_field_assignments,
2121                assignments.dynamic_field_assignments,
2122                &compiled_plan.static_metadata_assignments,
2123                assignments.dynamic_metadata_assignments,
2124                compiled_plan.row_contract_plan.as_ref(),
2125                &compiled_plan.row_modified_columns,
2126                compiled_plan.row_touches_unique_columns,
2127            );
2128        }
2129
2130        ensure_graph_identity_update_allowed(&entity, compiled_plan, &assignments)?;
2131
2132        let operations = build_patch_operations_from_materialized_assignments(
2133            &entity,
2134            compiled_plan,
2135            assignments,
2136        );
2137        self.apply_loaded_patch_entity_core(
2138            collection,
2139            entity,
2140            crate::json::Value::Null,
2141            operations,
2142        )
2143    }
2144
2145    /// Execute DELETE FROM table WHERE filter
2146    pub fn execute_delete(
2147        &self,
2148        raw_query: &str,
2149        query: &DeleteQuery,
2150    ) -> RedDBResult<RuntimeQueryResult> {
2151        self.check_write(crate::runtime::write_gate::WriteKind::Dml)?;
2152        // Issue #523 — blockchain collections are immutable; see
2153        // execute_update for the same gate.
2154        if crate::runtime::blockchain_kind::is_chain(self.inner.db.store().as_ref(), &query.table) {
2155            return Err(RedDBError::InvalidOperation(format!(
2156                "BlockchainCollectionImmutable: DELETE not allowed on '{}'",
2157                query.table
2158            )));
2159        }
2160        // CollectionContract gate (#50) — see execute_update for
2161        // rationale. The gate handles APPEND ONLY rejection and is
2162        // the single point where future contract bits land.
2163        crate::runtime::collection_contract::CollectionContractGate::check(
2164            self,
2165            &query.table,
2166            crate::runtime::collection_contract::MutationKind::Delete,
2167        )?;
2168
2169        // RETURNING on DELETE: capture the pre-image via an internal
2170        // SELECT that reuses the same WHERE, then run the delete with
2171        // the RETURNING clause stripped, then project the captured
2172        // rows through the requested items. The extra SELECT is a
2173        // pragmatic MVP — a future pass can fuse the scan with the
2174        // delete to avoid the second pass over the heap.
2175        if let Some(items) = query.returning.clone() {
2176            let select_sql = delete_to_select_sql(raw_query).ok_or_else(|| {
2177                RedDBError::Query(
2178                    "DELETE ... RETURNING: cannot rewrite query for pre-image scan".to_string(),
2179                )
2180            })?;
2181            let captured = self.execute_query(&select_sql)?;
2182
2183            let mut inner_query = query.clone();
2184            inner_query.returning = None;
2185            let _ = self.execute_delete(raw_query, &inner_query)?;
2186
2187            let snapshots: Vec<Vec<(String, Value)>> = captured
2188                .result
2189                .records
2190                .iter()
2191                .map(|rec| {
2192                    rec.iter_fields()
2193                        .map(|(k, v)| (k.as_ref().to_string(), v.clone()))
2194                        .collect()
2195                })
2196                .collect();
2197            let affected = snapshots.len() as u64;
2198            let result = build_returning_result(&items, &snapshots, None);
2199
2200            let mut response = RuntimeQueryResult::dml_result(
2201                raw_query.to_string(),
2202                affected,
2203                "delete",
2204                "runtime-dml-returning",
2205            );
2206            response.result = result;
2207            return Ok(response);
2208        }
2209        // Row-Level Security enforcement (Phase 2.5.2 PG parity).
2210        //
2211        // When the table has RLS enabled, gate the DELETE by the
2212        // per-role policy set: mutations only touch rows that *every*
2213        // matching `FOR DELETE` policy would accept. No policies =>
2214        // zero rows affected (PG restrictive-default).
2215        if crate::runtime::impl_core::rls_is_enabled(self, &query.table) {
2216            let rls_filter = crate::runtime::impl_core::rls_policy_filter(
2217                self,
2218                &query.table,
2219                crate::storage::query::ast::PolicyAction::Delete,
2220            );
2221            let Some(policy) = rls_filter else {
2222                return Ok(RuntimeQueryResult::dml_result(
2223                    raw_query.to_string(),
2224                    0,
2225                    "delete",
2226                    "runtime-dml-rls",
2227                ));
2228            };
2229            // Fold the policy predicate into the user's WHERE before
2230            // dispatching — the remainder of this function reads the
2231            // filter from `query` via `effective_delete_filter`, which
2232            // respects the updated value.
2233            let mut augmented = query.clone();
2234            augmented.filter = Some(match augmented.filter.take() {
2235                Some(existing) => {
2236                    crate::storage::query::ast::Filter::And(Box::new(existing), Box::new(policy))
2237                }
2238                None => policy,
2239            });
2240            return self.execute_delete_inner(raw_query, &augmented);
2241        }
2242        self.execute_delete_inner(raw_query, query)
2243    }
2244
2245    fn execute_delete_inner(
2246        &self,
2247        raw_query: &str,
2248        query: &DeleteQuery,
2249    ) -> RedDBResult<RuntimeQueryResult> {
2250        let effective_filter = effective_delete_filter(query);
2251
2252        // Find the rows that match the WHERE clause. The "find target
2253        // rows" loop lives in DmlTargetScan so UPDATE (#52) can reuse
2254        // the same scan strategy.
2255        let scan = super::dml_target_scan::DmlTargetScan::new(
2256            self,
2257            &query.table,
2258            effective_filter.as_ref(),
2259            None,
2260        );
2261        let ids_to_delete = scan.find_target_ids()?;
2262
2263        // For event-enabled collections, snapshot the pre-delete state
2264        // before rows are physically removed.
2265        let needs_delete_events =
2266            !query.suppress_events && self.collection_has_delete_subscriptions(&query.table);
2267        let mut pre_images: HashMap<u64, crate::json::Value> = if needs_delete_events {
2268            scan.row_json_pre_images(&ids_to_delete)
2269        } else {
2270            HashMap::new()
2271        };
2272
2273        let mut affected: u64 = 0;
2274        for chunk in ids_to_delete.chunks(UPDATE_APPLY_CHUNK_SIZE) {
2275            let (count, lsns) = self.delete_entities_batch(&query.table, chunk)?;
2276            affected += count;
2277            if needs_delete_events && !lsns.is_empty() {
2278                // lsns.len() == actually-deleted entities; align with chunk ids.
2279                // `delete_batch` may skip missing entities, so we correlate by
2280                // the number returned (they're emitted in chunk order).
2281                let deleted_chunk = &chunk[..lsns.len().min(chunk.len())];
2282                self.emit_delete_events_for_collection(
2283                    &query.table,
2284                    deleted_chunk,
2285                    &lsns,
2286                    &pre_images,
2287                )?;
2288            }
2289        }
2290        pre_images.clear();
2291
2292        if affected > 0 {
2293            self.note_table_write(&query.table);
2294        }
2295
2296        Ok(RuntimeQueryResult::dml_result(
2297            raw_query.to_string(),
2298            affected,
2299            "delete",
2300            "runtime-dml",
2301        ))
2302    }
2303}
2304
2305/// Reject UPDATE … NODES/EDGES that assign to graph identity/topology
2306/// columns regardless of whether any row matches the WHERE clause. The
2307/// per-entity guard below covers only the matched-rows case, but ADR 0019
2308/// declares these columns immutable on the surface itself, so a zero-row
2309/// UPDATE should still surface the same error to operators and SDKs.
2310fn ensure_graph_identity_update_target_allowed(query: &UpdateQuery) -> RedDBResult<()> {
2311    if !matches!(query.target, UpdateTarget::Nodes | UpdateTarget::Edges) {
2312        return Ok(());
2313    }
2314    for (column, _) in &query.assignment_exprs {
2315        if is_immutable_graph_identity_field(column) {
2316            return Err(RedDBError::Query(format!(
2317                "immutable graph field '{column}' cannot be updated"
2318            )));
2319        }
2320    }
2321    Ok(())
2322}
2323
2324fn ensure_graph_identity_update_allowed(
2325    entity: &UnifiedEntity,
2326    compiled_plan: &CompiledUpdatePlan,
2327    assignments: &MaterializedUpdateAssignments,
2328) -> RedDBResult<()> {
2329    if !matches!(entity.data, EntityData::Node(_) | EntityData::Edge(_)) {
2330        return Ok(());
2331    }
2332
2333    for (column, _) in compiled_plan
2334        .static_field_assignments
2335        .iter()
2336        .chain(assignments.dynamic_field_assignments.iter())
2337    {
2338        if is_immutable_graph_identity_field(column) {
2339            return Err(RedDBError::Query(format!(
2340                "immutable graph field '{column}' cannot be updated"
2341            )));
2342        }
2343    }
2344
2345    Ok(())
2346}
2347
2348fn is_immutable_graph_identity_field(column: &str) -> bool {
2349    ["rid", "label", "from_rid", "to_rid", "from", "to"]
2350        .iter()
2351        .any(|reserved| column.eq_ignore_ascii_case(reserved))
2352}
2353
2354fn build_patch_operations_from_materialized_assignments(
2355    entity: &UnifiedEntity,
2356    compiled_plan: &CompiledUpdatePlan,
2357    assignments: MaterializedUpdateAssignments,
2358) -> Vec<PatchEntityOperation> {
2359    let mut operations = Vec::with_capacity(
2360        compiled_plan.static_field_assignments.len()
2361            + compiled_plan.static_metadata_assignments.len()
2362            + assignments.dynamic_field_assignments.len()
2363            + assignments.dynamic_metadata_assignments.len(),
2364    );
2365
2366    for (column, value) in &compiled_plan.static_field_assignments {
2367        operations.push(PatchEntityOperation {
2368            op: PatchEntityOperationType::Set,
2369            path: update_patch_path_for_entity(entity, column),
2370            value: Some(storage_value_to_json(value)),
2371        });
2372    }
2373
2374    for (column, value) in assignments.dynamic_field_assignments {
2375        operations.push(PatchEntityOperation {
2376            op: PatchEntityOperationType::Set,
2377            path: update_patch_path_for_entity(entity, &column),
2378            value: Some(storage_value_to_json(&value)),
2379        });
2380    }
2381
2382    for (key, value) in &compiled_plan.static_metadata_assignments {
2383        operations.push(PatchEntityOperation {
2384            op: PatchEntityOperationType::Set,
2385            path: vec!["metadata".to_string(), key.clone()],
2386            value: Some(metadata_value_to_json(value)),
2387        });
2388    }
2389
2390    for (key, value) in assignments.dynamic_metadata_assignments {
2391        operations.push(PatchEntityOperation {
2392            op: PatchEntityOperationType::Set,
2393            path: vec!["metadata".to_string(), key],
2394            value: Some(metadata_value_to_json(&value)),
2395        });
2396    }
2397
2398    operations
2399}
2400
2401fn update_patch_path_for_entity(entity: &UnifiedEntity, column: &str) -> Vec<String> {
2402    if matches!(
2403        (&entity.kind, &entity.data),
2404        (
2405            crate::storage::EntityKind::GraphNode(_),
2406            EntityData::Node(_)
2407        )
2408    ) && column.eq_ignore_ascii_case("node_type")
2409    {
2410        return vec!["node_type".to_string()];
2411    }
2412    if matches!(
2413        (&entity.kind, &entity.data),
2414        (
2415            crate::storage::EntityKind::GraphEdge(_),
2416            EntityData::Edge(_)
2417        )
2418    ) && column.eq_ignore_ascii_case("weight")
2419    {
2420        return vec!["weight".to_string()];
2421    }
2422    vec!["fields".to_string(), column.to_string()]
2423}
2424
2425/// Rewrite `DELETE FROM <table> [WHERE …] [RETURNING …]` as
2426/// `SELECT * FROM <table> [WHERE …]` so the delete executor can
2427/// capture the pre-image before actually removing the rows. Returns
2428/// `None` when the input does not start with `DELETE`.
2429///
2430/// Case-insensitive on the keywords. Preserves everything between
2431/// the table name and the RETURNING clause, so WHERE / ORDER BY /
2432/// LIMIT survive untouched. The RETURNING tail — if present — is
2433/// truncated at the first top-level `RETURNING` token.
2434fn delete_to_select_sql(sql: &str) -> Option<String> {
2435    let trimmed = sql.trim_start();
2436    let lowered = trimmed.to_ascii_lowercase();
2437    if !lowered.starts_with("delete ") && !lowered.starts_with("delete\t") {
2438        return None;
2439    }
2440    // Find `FROM` after DELETE.
2441    let from_idx = lowered.find(" from ")?;
2442    let after_from = &trimmed[from_idx + " from ".len()..];
2443    let after_from_lc = &lowered[from_idx + " from ".len()..];
2444
2445    // Cut off the RETURNING tail (a naive search — the RETURNING
2446    // clause only appears once per statement at top level in our
2447    // grammar). Matches whitespace-bounded tokens to avoid clipping
2448    // `RETURNING` inside a string literal.
2449    let mut body = after_from.to_string();
2450    if let Some(pos) = find_top_level_keyword(after_from_lc, "returning") {
2451        body.truncate(pos);
2452    }
2453    Some(format!("SELECT * FROM {}", body.trim_end()))
2454}
2455
2456/// Find the byte offset of a whitespace-bounded keyword in a
2457/// lowercased haystack, skipping matches inside single-quoted
2458/// string literals. Naive — no escape handling — but enough for
2459/// the shapes the DML parser emits.
2460fn find_top_level_keyword(haystack: &str, needle: &str) -> Option<usize> {
2461    let bytes = haystack.as_bytes();
2462    let nlen = needle.len();
2463    let mut i = 0usize;
2464    let mut in_string = false;
2465    while i < bytes.len() {
2466        let c = bytes[i];
2467        if c == b'\'' {
2468            in_string = !in_string;
2469            i += 1;
2470            continue;
2471        }
2472        if !in_string
2473            && i + nlen <= bytes.len()
2474            && &bytes[i..i + nlen] == needle.as_bytes()
2475            && (i == 0 || bytes[i - 1].is_ascii_whitespace())
2476            && (i + nlen == bytes.len() || bytes[i + nlen].is_ascii_whitespace())
2477        {
2478            return Some(i);
2479        }
2480        i += 1;
2481    }
2482    None
2483}
2484
2485/// Build a `UnifiedResult` from the rows affected by a DML statement plus
2486/// its `RETURNING` clause. Each snapshot is a list of (column, value) pairs
2487/// for one affected row; `outputs`, when provided, supplies the engine-
2488/// assigned entity id for the same row (INSERT path). Projection honours
2489/// the RETURNING items: `*` expands to every snapshot column plus
2490/// the public row envelope when available.
2491fn build_returning_result(
2492    items: &[ReturningItem],
2493    snapshots: &[Vec<(String, Value)>],
2494    outputs: Option<&[CreateEntityOutput]>,
2495) -> UnifiedResult {
2496    let project_all = items.iter().any(|it| matches!(it, ReturningItem::All));
2497    let public_item_outputs = outputs.is_some_and(|outs| {
2498        outs.first()
2499            .and_then(|out| out.entity.as_ref())
2500            .is_some_and(|entity| public_returning_item_kind(entity).is_some())
2501    });
2502
2503    let mut columns: Vec<String> = if project_all {
2504        let mut cols: Vec<String> = Vec::new();
2505        if public_item_outputs {
2506            cols.extend(
2507                [
2508                    "rid",
2509                    "collection",
2510                    "kind",
2511                    "tenant",
2512                    "created_at",
2513                    "updated_at",
2514                ]
2515                .into_iter()
2516                .map(str::to_string),
2517            );
2518        } else if outputs.is_some() {
2519            cols.push("red_entity_id".to_string());
2520        }
2521        if let Some(first) = snapshots.first() {
2522            for (name, _) in first {
2523                cols.push(name.clone());
2524            }
2525        }
2526        cols
2527    } else {
2528        items
2529            .iter()
2530            .filter_map(|it| match it {
2531                ReturningItem::Column(c) => Some(c.clone()),
2532                ReturningItem::All => None,
2533            })
2534            .collect()
2535    };
2536    // Guarantee unique order-preserving column list.
2537    {
2538        let mut seen = std::collections::HashSet::new();
2539        columns.retain(|c| seen.insert(c.clone()));
2540    }
2541
2542    let mut records: Vec<UnifiedRecord> = Vec::with_capacity(snapshots.len());
2543    for (idx, snap) in snapshots.iter().enumerate() {
2544        let mut values: HashMap<Arc<str>, Value> = HashMap::with_capacity(columns.len());
2545        if let Some(outs) = outputs {
2546            if let Some(out) = outs.get(idx) {
2547                if let Some(entity) = out.entity.as_ref() {
2548                    if let Some(kind) = public_returning_item_kind(entity) {
2549                        values.insert(
2550                            Arc::clone(&sys_key_rid()),
2551                            Value::UnsignedInteger(out.id.raw()),
2552                        );
2553                        values.insert(
2554                            Arc::clone(&sys_key_collection()),
2555                            Value::text(entity.kind.collection().to_string()),
2556                        );
2557                        values.insert(Arc::clone(&sys_key_kind()), Value::text(kind.to_string()));
2558                        values.insert(
2559                            Arc::clone(&sys_key_created_at()),
2560                            Value::UnsignedInteger(entity.created_at),
2561                        );
2562                        values.insert(
2563                            Arc::clone(&sys_key_updated_at()),
2564                            Value::UnsignedInteger(entity.updated_at),
2565                        );
2566                        // Legacy alias: an explicit `RETURNING red_entity_id`
2567                        // still resolves to the row's rid. Only surfaces when
2568                        // the projected column list names it — `RETURNING *`
2569                        // keeps the envelope clean (rid, not red_entity_id).
2570                        values.insert(
2571                            Arc::clone(&sys_key_red_entity_id()),
2572                            Value::UnsignedInteger(out.id.raw()),
2573                        );
2574                    } else {
2575                        values.insert(
2576                            Arc::clone(&sys_key_red_entity_id()),
2577                            Value::Integer(out.id.raw() as i64),
2578                        );
2579                    }
2580                } else {
2581                    values.insert(
2582                        Arc::clone(&sys_key_red_entity_id()),
2583                        Value::Integer(out.id.raw() as i64),
2584                    );
2585                }
2586            }
2587        }
2588        for (name, val) in snap {
2589            values.insert(Arc::from(name.as_str()), val.clone());
2590        }
2591        if !values.contains_key("tenant") {
2592            let tenant = values.get("tenant_id").cloned().unwrap_or(Value::Null);
2593            values.insert(Arc::clone(&sys_key_tenant()), tenant);
2594        }
2595        let mut rec = UnifiedRecord::default();
2596        // Only keep projected columns on the record.
2597        for col in &columns {
2598            if let Some(v) = values.get(col.as_str()) {
2599                rec.set_arc(Arc::from(col.as_str()), v.clone());
2600            }
2601        }
2602        records.push(rec);
2603    }
2604
2605    UnifiedResult {
2606        columns,
2607        records,
2608        stats: Default::default(),
2609        pre_serialized_json: None,
2610    }
2611}
2612
2613fn public_returning_item_kind(entity: &crate::storage::UnifiedEntity) -> Option<&'static str> {
2614    match (&entity.kind, &entity.data) {
2615        (crate::storage::EntityKind::GraphNode(_), crate::storage::EntityData::Node(_)) => {
2616            Some("node")
2617        }
2618        (crate::storage::EntityKind::GraphEdge(_), crate::storage::EntityData::Edge(_)) => {
2619            Some("edge")
2620        }
2621        (_, crate::storage::EntityData::Row(_)) => Some(public_returning_row_kind(entity)),
2622        _ => None,
2623    }
2624}
2625
2626fn public_returning_row_kind(entity: &crate::storage::UnifiedEntity) -> &'static str {
2627    let Some(row) = entity.data.as_row() else {
2628        return "row";
2629    };
2630
2631    let is_kv = row.named.as_ref().is_some_and(|named| {
2632        (named.len() == 2 && named.contains_key("key") && named.contains_key("value"))
2633            || (named.len() == 1 && (named.contains_key("key") || named.contains_key("value")))
2634    });
2635    if is_kv {
2636        return "kv";
2637    }
2638
2639    let is_document = row
2640        .named
2641        .as_ref()
2642        .is_some_and(|named| named.values().any(runtime_returning_documentish_value))
2643        || row.columns.iter().any(runtime_returning_documentish_value);
2644    if is_document {
2645        "document"
2646    } else {
2647        "row"
2648    }
2649}
2650
2651fn runtime_returning_documentish_value(value: &Value) -> bool {
2652    matches!(value, Value::Json(_) | Value::Blob(_))
2653}
2654
2655fn row_insert_returning_snapshots(
2656    outputs: &[CreateEntityOutput],
2657    fallback: Vec<Vec<(String, Value)>>,
2658) -> Vec<Vec<(String, Value)>> {
2659    outputs
2660        .iter()
2661        .enumerate()
2662        .map(|(idx, out)| {
2663            out.entity
2664                .as_ref()
2665                .map(entity_row_fields_snapshot)
2666                .filter(|snap| !snap.is_empty())
2667                .unwrap_or_else(|| fallback.get(idx).cloned().unwrap_or_default())
2668        })
2669        .collect()
2670}
2671
2672fn graph_insert_returning_snapshots(
2673    store: &crate::storage::unified::UnifiedStore,
2674    collection: &str,
2675    ids: &[EntityId],
2676) -> Vec<Vec<(String, Value)>> {
2677    let Some(manager) = store.get_collection(collection) else {
2678        return Vec::new();
2679    };
2680
2681    ids.iter()
2682        .filter_map(|id| manager.get(*id))
2683        .filter_map(|entity| {
2684            let mut record = runtime_any_record_from_entity_ref(&entity)?;
2685            record.set_arc(sys_key_collection(), Value::text(collection.to_string()));
2686            Some(record)
2687        })
2688        .map(|record| {
2689            record
2690                .iter_fields()
2691                .map(|(key, value)| (key.as_ref().to_string(), value.clone()))
2692                .collect()
2693        })
2694        .collect()
2695}
2696
2697fn graph_update_returning_snapshots(
2698    runtime: &RedDBRuntime,
2699    collection: &str,
2700    ids: &[EntityId],
2701) -> Vec<Vec<(String, Value)>> {
2702    let store = runtime.db().store();
2703    let Some(manager) = store.get_collection(collection) else {
2704        return Vec::new();
2705    };
2706
2707    manager
2708        .get_many(ids)
2709        .into_iter()
2710        .flatten()
2711        .filter_map(|entity| runtime_any_record_from_entity_ref(&entity))
2712        .map(|record| {
2713            record
2714                .iter_fields()
2715                .map(|(key, value)| (key.as_ref().to_string(), value.clone()))
2716                .collect()
2717        })
2718        .collect()
2719}
2720
2721fn ensure_update_target_contract(
2722    runtime: &RedDBRuntime,
2723    collection: &str,
2724    target: UpdateTarget,
2725) -> RedDBResult<()> {
2726    let Some(contract) = runtime.db().collection_contract(collection) else {
2727        return Ok(());
2728    };
2729    if update_target_contract_is_advisory(&contract)
2730        || update_target_allows_model(contract.declared_model, update_target_model(target))
2731    {
2732        return Ok(());
2733    }
2734    Err(RedDBError::InvalidOperation(format!(
2735        "collection '{}' is declared as '{}' and does not allow '{}' updates",
2736        collection,
2737        update_model_name(contract.declared_model),
2738        update_model_name(update_target_model(target))
2739    )))
2740}
2741
2742fn update_target_contract_is_advisory(contract: &crate::physical::CollectionContract) -> bool {
2743    matches!(
2744        (&contract.origin, &contract.schema_mode),
2745        (
2746            crate::physical::ContractOrigin::Implicit,
2747            crate::catalog::SchemaMode::Dynamic,
2748        )
2749    )
2750}
2751
2752fn update_target_model(target: UpdateTarget) -> crate::catalog::CollectionModel {
2753    match target {
2754        UpdateTarget::Rows => crate::catalog::CollectionModel::Table,
2755        UpdateTarget::Documents => crate::catalog::CollectionModel::Document,
2756        UpdateTarget::Kv => crate::catalog::CollectionModel::Kv,
2757        UpdateTarget::Nodes | UpdateTarget::Edges => crate::catalog::CollectionModel::Graph,
2758    }
2759}
2760
2761fn update_target_allows_model(
2762    declared_model: crate::catalog::CollectionModel,
2763    requested_model: crate::catalog::CollectionModel,
2764) -> bool {
2765    declared_model == requested_model || declared_model == crate::catalog::CollectionModel::Mixed
2766}
2767
2768fn update_model_name(model: crate::catalog::CollectionModel) -> &'static str {
2769    match model {
2770        crate::catalog::CollectionModel::Table => "table",
2771        crate::catalog::CollectionModel::Document => "document",
2772        crate::catalog::CollectionModel::Graph => "graph",
2773        crate::catalog::CollectionModel::Vector => "vector",
2774        crate::catalog::CollectionModel::Hll => "hll",
2775        crate::catalog::CollectionModel::Sketch => "sketch",
2776        crate::catalog::CollectionModel::Filter => "filter",
2777        crate::catalog::CollectionModel::Kv => "kv",
2778        crate::catalog::CollectionModel::Config => "config",
2779        crate::catalog::CollectionModel::Vault => "vault",
2780        crate::catalog::CollectionModel::Mixed => "mixed",
2781        crate::catalog::CollectionModel::TimeSeries => "timeseries",
2782        crate::catalog::CollectionModel::Queue => "queue",
2783        crate::catalog::CollectionModel::Metrics => "metrics",
2784    }
2785}
2786
2787fn ensure_graph_insert_contract(runtime: &RedDBRuntime, collection: &str) -> RedDBResult<()> {
2788    let db = runtime.db();
2789    if let Some(contract) = db.collection_contract(collection) {
2790        let advisory_implicit_dynamic = matches!(
2791            (&contract.origin, &contract.schema_mode),
2792            (
2793                crate::physical::ContractOrigin::Implicit,
2794                crate::catalog::SchemaMode::Dynamic,
2795            )
2796        );
2797        if advisory_implicit_dynamic
2798            || matches!(
2799                contract.declared_model,
2800                crate::catalog::CollectionModel::Graph | crate::catalog::CollectionModel::Mixed
2801            )
2802        {
2803            return Ok(());
2804        }
2805        return Err(RedDBError::InvalidOperation(format!(
2806            "collection '{}' is declared as '{:?}' and does not allow 'Graph' writes",
2807            collection, contract.declared_model
2808        )));
2809    }
2810
2811    let now = std::time::SystemTime::now()
2812        .duration_since(std::time::UNIX_EPOCH)
2813        .unwrap_or_default()
2814        .as_millis();
2815    db.save_collection_contract(crate::physical::CollectionContract {
2816        name: collection.to_string(),
2817        declared_model: crate::catalog::CollectionModel::Graph,
2818        schema_mode: crate::catalog::SchemaMode::Dynamic,
2819        origin: crate::physical::ContractOrigin::Implicit,
2820        version: 1,
2821        created_at_unix_ms: now,
2822        updated_at_unix_ms: now,
2823        default_ttl_ms: db.collection_default_ttl_ms(collection),
2824        vector_dimension: None,
2825        vector_metric: None,
2826        context_index_fields: Vec::new(),
2827        declared_columns: Vec::new(),
2828        table_def: None,
2829        timestamps_enabled: false,
2830        context_index_enabled: false,
2831        metrics_raw_retention_ms: None,
2832        metrics_rollup_policies: Vec::new(),
2833        metrics_tenant_identity: None,
2834        metrics_namespace: None,
2835        append_only: false,
2836        subscriptions: Vec::new(),
2837        analytics_config: Vec::new(),
2838        session_key: None,
2839        session_gap_ms: None,
2840        retention_duration_ms: None,
2841    })
2842    .map(|_| ())
2843    .map_err(|err| RedDBError::Internal(err.to_string()))
2844}
2845
2846fn update_needs_rmw_lock(query: &UpdateQuery) -> bool {
2847    query
2848        .assignment_exprs
2849        .iter()
2850        .enumerate()
2851        .any(|(idx, (column, expr))| {
2852            query
2853                .compound_assignment_ops
2854                .get(idx)
2855                .is_some_and(|op| op.is_some())
2856                || expr_references_update_column(expr, &query.table, column)
2857        })
2858}
2859
2860fn evaluate_compound_update_assignment(
2861    column: &str,
2862    record: &UnifiedRecord,
2863    op: BinOp,
2864    rhs: Value,
2865) -> RedDBResult<Value> {
2866    let lhs = record.get(column).ok_or_else(|| {
2867        RedDBError::Query(format!(
2868            "compound assignment requires existing numeric field '{column}'"
2869        ))
2870    })?;
2871    if matches!(lhs, Value::Null) {
2872        return Err(RedDBError::Query(format!(
2873            "compound assignment requires non-null numeric field '{column}'"
2874        )));
2875    }
2876    apply_compound_numeric_op(column, op, lhs, &rhs)
2877}
2878
2879fn apply_compound_numeric_op(
2880    column: &str,
2881    op: BinOp,
2882    lhs: &Value,
2883    rhs: &Value,
2884) -> RedDBResult<Value> {
2885    let Some(lhs_number) = CompoundNumber::from_value(lhs) else {
2886        return Err(RedDBError::Query(format!(
2887            "compound assignment requires numeric field '{column}'"
2888        )));
2889    };
2890    let Some(rhs_number) = CompoundNumber::from_value(rhs) else {
2891        return Err(RedDBError::Query(format!(
2892            "compound assignment requires numeric right-hand value for field '{column}'"
2893        )));
2894    };
2895
2896    if lhs_number.is_float() || rhs_number.is_float() || matches!(op, BinOp::Div) {
2897        let a = lhs_number.as_f64();
2898        let b = rhs_number.as_f64();
2899        let out = match op {
2900            BinOp::Add => a + b,
2901            BinOp::Sub => a - b,
2902            BinOp::Mul => a * b,
2903            BinOp::Div => {
2904                if b == 0.0 {
2905                    return Err(RedDBError::Query(format!(
2906                        "division by zero in compound assignment for field '{column}'"
2907                    )));
2908                }
2909                a / b
2910            }
2911            BinOp::Mod => {
2912                if b == 0.0 {
2913                    return Err(RedDBError::Query(format!(
2914                        "modulo by zero in compound assignment for field '{column}'"
2915                    )));
2916                }
2917                a % b
2918            }
2919            _ => {
2920                return Err(RedDBError::Query(format!(
2921                    "unsupported compound assignment operator for field '{column}'"
2922                )));
2923            }
2924        };
2925        if !out.is_finite() {
2926            return Err(RedDBError::Query(format!(
2927                "numeric overflow in compound assignment for field '{column}'"
2928            )));
2929        }
2930        return Ok(Value::Float(out));
2931    }
2932
2933    let a = lhs_number.as_i128();
2934    let b = rhs_number.as_i128();
2935    let out = match op {
2936        BinOp::Add => a.checked_add(b),
2937        BinOp::Sub => a.checked_sub(b),
2938        BinOp::Mul => a.checked_mul(b),
2939        BinOp::Mod => {
2940            if b == 0 {
2941                return Err(RedDBError::Query(format!(
2942                    "modulo by zero in compound assignment for field '{column}'"
2943                )));
2944            }
2945            a.checked_rem(b)
2946        }
2947        BinOp::Div => unreachable!("integer division is handled by the float branch"),
2948        _ => None,
2949    }
2950    .ok_or_else(|| {
2951        RedDBError::Query(format!(
2952            "numeric overflow in compound assignment for field '{column}'"
2953        ))
2954    })?;
2955
2956    if matches!(lhs, Value::UnsignedInteger(_)) {
2957        let value = u64::try_from(out).map_err(|_| {
2958            RedDBError::Query(format!(
2959                "numeric overflow in compound assignment for field '{column}'"
2960            ))
2961        })?;
2962        Ok(Value::UnsignedInteger(value))
2963    } else {
2964        let value = i64::try_from(out).map_err(|_| {
2965            RedDBError::Query(format!(
2966                "numeric overflow in compound assignment for field '{column}'"
2967            ))
2968        })?;
2969        Ok(Value::Integer(value))
2970    }
2971}
2972
2973#[derive(Clone, Copy)]
2974enum CompoundNumber {
2975    Integer(i128),
2976    Float(f64),
2977}
2978
2979impl CompoundNumber {
2980    fn from_value(value: &Value) -> Option<Self> {
2981        match value {
2982            Value::Integer(value) | Value::BigInt(value) => Some(Self::Integer(*value as i128)),
2983            Value::UnsignedInteger(value) => Some(Self::Integer(*value as i128)),
2984            Value::Float(value) => value.is_finite().then_some(Self::Float(*value)),
2985            Value::Decimal(value) => Some(Self::Float(*value as f64 / 10_000.0)),
2986            _ => None,
2987        }
2988    }
2989
2990    fn is_float(self) -> bool {
2991        matches!(self, Self::Float(_))
2992    }
2993
2994    fn as_f64(self) -> f64 {
2995        match self {
2996            Self::Integer(value) => value as f64,
2997            Self::Float(value) => value,
2998        }
2999    }
3000
3001    fn as_i128(self) -> i128 {
3002        match self {
3003            Self::Integer(value) => value,
3004            Self::Float(_) => unreachable!("float compound number used as integer"),
3005        }
3006    }
3007}
3008
3009fn expr_references_update_column(expr: &Expr, table_name: &str, target_column: &str) -> bool {
3010    match expr {
3011        Expr::Literal { .. } | Expr::Parameter { .. } | Expr::Subquery { .. } => false,
3012        Expr::Column { field, .. } => {
3013            field_ref_matches_update_column(field, table_name, target_column)
3014        }
3015        Expr::BinaryOp { lhs, rhs, .. } => {
3016            expr_references_update_column(lhs, table_name, target_column)
3017                || expr_references_update_column(rhs, table_name, target_column)
3018        }
3019        Expr::UnaryOp { operand, .. } | Expr::Cast { inner: operand, .. } => {
3020            expr_references_update_column(operand, table_name, target_column)
3021        }
3022        Expr::FunctionCall { args, .. } => args
3023            .iter()
3024            .any(|arg| expr_references_update_column(arg, table_name, target_column)),
3025        Expr::Case {
3026            branches, else_, ..
3027        } => {
3028            branches.iter().any(|(cond, value)| {
3029                expr_references_update_column(cond, table_name, target_column)
3030                    || expr_references_update_column(value, table_name, target_column)
3031            }) || else_
3032                .as_deref()
3033                .is_some_and(|expr| expr_references_update_column(expr, table_name, target_column))
3034        }
3035        Expr::IsNull { operand, .. } => {
3036            expr_references_update_column(operand, table_name, target_column)
3037        }
3038        Expr::InList { target, values, .. } => {
3039            expr_references_update_column(target, table_name, target_column)
3040                || values
3041                    .iter()
3042                    .any(|value| expr_references_update_column(value, table_name, target_column))
3043        }
3044        Expr::Between {
3045            target, low, high, ..
3046        } => {
3047            expr_references_update_column(target, table_name, target_column)
3048                || expr_references_update_column(low, table_name, target_column)
3049                || expr_references_update_column(high, table_name, target_column)
3050        }
3051        Expr::WindowFunctionCall { args, window, .. } => {
3052            args.iter()
3053                .any(|arg| expr_references_update_column(arg, table_name, target_column))
3054                || window
3055                    .partition_by
3056                    .iter()
3057                    .any(|e| expr_references_update_column(e, table_name, target_column))
3058                || window
3059                    .order_by
3060                    .iter()
3061                    .any(|o| expr_references_update_column(&o.expr, table_name, target_column))
3062        }
3063    }
3064}
3065
3066fn field_ref_matches_update_column(
3067    field: &FieldRef,
3068    table_name: &str,
3069    target_column: &str,
3070) -> bool {
3071    match field {
3072        FieldRef::TableColumn { table, column } => {
3073            column.eq_ignore_ascii_case(target_column)
3074                && (table.is_empty() || table.eq_ignore_ascii_case(table_name))
3075        }
3076        FieldRef::NodeProperty { .. } | FieldRef::EdgeProperty { .. } | FieldRef::NodeId { .. } => {
3077            false
3078        }
3079    }
3080}
3081
3082fn resolve_update_entity_by_logical_id(
3083    runtime: &RedDBRuntime,
3084    table: &str,
3085    logical_id: EntityId,
3086) -> Option<UnifiedEntity> {
3087    let store = runtime.inner.db.store();
3088    if let Some(entity) =
3089        crate::runtime::table_row_mvcc_resolver::TableRowMvccReadResolver::current_statement()
3090            .resolve_logical_id(&store, table, logical_id)
3091    {
3092        return Some(entity);
3093    }
3094    // Fallback for non-table-row entities (graph nodes/edges, etc.) where
3095    // entity_id == logical_id and the MVCC table-row resolver doesn't apply.
3096    store.get(table, logical_id)
3097}
3098
3099fn update_cdc_item_kind(
3100    runtime: &RedDBRuntime,
3101    collection: &str,
3102    entity: &UnifiedEntity,
3103) -> &'static str {
3104    match &entity.data {
3105        EntityData::Node(_) => return "node",
3106        EntityData::Edge(_) => return "edge",
3107        _ => {}
3108    }
3109
3110    match runtime
3111        .db()
3112        .collection_contract(collection)
3113        .map(|contract| contract.declared_model)
3114    {
3115        Some(crate::catalog::CollectionModel::Document) => "document",
3116        Some(crate::catalog::CollectionModel::Kv)
3117        | Some(crate::catalog::CollectionModel::Vault) => "kv",
3118        _ => "row",
3119    }
3120}
3121
3122fn ordered_update_target_ids(
3123    manager: &Arc<crate::storage::SegmentManager>,
3124    entity_ids: &[EntityId],
3125    order_by: &[OrderByClause],
3126    limit: Option<usize>,
3127) -> Vec<EntityId> {
3128    let mut entities: Vec<UnifiedEntity> =
3129        manager.get_many(entity_ids).into_iter().flatten().collect();
3130    entities.sort_by(|left, right| compare_update_order(left, right, order_by));
3131    if let Some(limit) = limit {
3132        entities.truncate(limit);
3133    }
3134    entities.into_iter().map(|entity| entity.id).collect()
3135}
3136
3137fn compare_update_order(
3138    left: &UnifiedEntity,
3139    right: &UnifiedEntity,
3140    order_by: &[OrderByClause],
3141) -> Ordering {
3142    for clause in order_by {
3143        let left_value = update_order_value(left, &clause.field);
3144        let right_value = update_order_value(right, &clause.field);
3145        let ordering = compare_update_order_values(
3146            left_value.as_ref(),
3147            right_value.as_ref(),
3148            clause.nulls_first,
3149        );
3150        if ordering != Ordering::Equal {
3151            return if clause.ascending {
3152                ordering
3153            } else {
3154                ordering.reverse()
3155            };
3156        }
3157    }
3158    left.logical_id().raw().cmp(&right.logical_id().raw())
3159}
3160
3161fn compare_update_order_values(
3162    left: Option<&Value>,
3163    right: Option<&Value>,
3164    nulls_first: bool,
3165) -> Ordering {
3166    match (left, right) {
3167        (None, None) => Ordering::Equal,
3168        (None, Some(_)) => {
3169            if nulls_first {
3170                Ordering::Less
3171            } else {
3172                Ordering::Greater
3173            }
3174        }
3175        (Some(_), None) => {
3176            if nulls_first {
3177                Ordering::Greater
3178            } else {
3179                Ordering::Less
3180            }
3181        }
3182        (Some(left), Some(right)) => {
3183            crate::storage::query::value_compare::total_compare_values(left, right)
3184        }
3185    }
3186}
3187
3188fn update_order_value(entity: &UnifiedEntity, field: &FieldRef) -> Option<Value> {
3189    let FieldRef::TableColumn { table, column } = field else {
3190        return None;
3191    };
3192    if !table.is_empty() {
3193        return None;
3194    }
3195    if column.eq_ignore_ascii_case("rid") {
3196        return Some(Value::UnsignedInteger(entity.logical_id().raw()));
3197    }
3198    match &entity.data {
3199        EntityData::Row(row) => row.get_field(column).cloned(),
3200        EntityData::Node(_) | EntityData::Edge(_) => runtime_any_record_from_entity_ref(entity)
3201            .and_then(|record| record.get(column).cloned()),
3202        _ => None,
3203    }
3204}
3205
3206fn dedupe_update_columns(mut columns: Vec<String>) -> Vec<String> {
3207    if columns.is_empty() {
3208        return columns;
3209    }
3210
3211    let mut unique = Vec::with_capacity(columns.len());
3212    for column in columns.drain(..) {
3213        if !unique
3214            .iter()
3215            .any(|existing: &String| existing.eq_ignore_ascii_case(&column))
3216        {
3217            unique.push(column);
3218        }
3219    }
3220    unique
3221}
3222
3223// =============================================================================
3224// Helper functions for extracting typed values from column/value pairs
3225// =============================================================================
3226
3227const SQL_TTL_METADATA_COLUMNS: [&str; 3] = ["_ttl", "_ttl_ms", "_expires_at"];
3228
3229fn resolve_sql_ttl_metadata_key(column: &str) -> Option<&'static str> {
3230    if column.eq_ignore_ascii_case("_ttl") {
3231        Some(SQL_TTL_METADATA_COLUMNS[0])
3232    } else if column.eq_ignore_ascii_case("_ttl_ms") {
3233        Some(SQL_TTL_METADATA_COLUMNS[1])
3234    } else if column.eq_ignore_ascii_case("_expires_at") {
3235        Some(SQL_TTL_METADATA_COLUMNS[2])
3236    } else {
3237        None
3238    }
3239}
3240
3241/// Canonicalize a SQL TTL metadata `(key, value)` pair so the retention
3242/// sweeper sees a single key (`_ttl_ms`) regardless of which legacy form
3243/// the operator wrote. `_ttl` is scaled from seconds to milliseconds;
3244/// `_ttl_ms` and `_expires_at` are passed through.
3245fn canonicalize_sql_ttl_metadata(
3246    key: &'static str,
3247    value: MetadataValue,
3248) -> (&'static str, MetadataValue) {
3249    if key != "_ttl" {
3250        return (key, value);
3251    }
3252    let scaled = match value {
3253        MetadataValue::Int(s) => MetadataValue::Int(s.saturating_mul(1_000)),
3254        MetadataValue::Timestamp(ms_or_s) => {
3255            // Timestamp is already chosen for very large values; treat as
3256            // already-ms to avoid silent overflow.
3257            MetadataValue::Timestamp(ms_or_s)
3258        }
3259        MetadataValue::Float(f) => MetadataValue::Float(f * 1_000.0),
3260        other => other,
3261    };
3262    ("_ttl_ms", scaled)
3263}
3264
3265/// Sentinel prefix produced by the parser for `PASSWORD('...')` and
3266/// `SECRET('...')` literals. The runtime strips this marker and
3267/// applies the actual crypto transform during INSERT execution.
3268pub(crate) const PLAINTEXT_SENTINEL: &str = "@@plain@@";
3269
3270impl RedDBRuntime {
3271    /// Strip the plaintext sentinel from a `Value::Password` or
3272    /// `Value::Secret` produced by the parser and apply the real
3273    /// crypto transform. `Password` is always hashed with argon2id.
3274    /// `Secret` is encrypted with AES-256-GCM keyed by the vault
3275    /// when `red.config.secret.auto_encrypt = true` (default).
3276    pub(crate) fn resolve_crypto_sentinel(&self, value: Value) -> RedDBResult<Value> {
3277        match value {
3278            Value::Password(marked) => {
3279                if let Some(plain) = marked.strip_prefix(PLAINTEXT_SENTINEL) {
3280                    Ok(Value::Password(crate::auth::store::hash_password(plain)))
3281                } else {
3282                    Ok(Value::Password(marked))
3283                }
3284            }
3285            Value::Secret(bytes) => {
3286                if bytes.starts_with(PLAINTEXT_SENTINEL.as_bytes()) {
3287                    if !self.secret_auto_encrypt() {
3288                        return Err(RedDBError::Query(
3289                            "SECRET() literal rejected: red.config.secret.auto_encrypt \
3290                             is false. Insert pre-encrypted bytes directly instead."
3291                                .to_string(),
3292                        ));
3293                    }
3294                    let key = self.secret_aes_key().ok_or_else(|| {
3295                        RedDBError::Query(
3296                            "SECRET() column encryption requires a bootstrapped \
3297                             vault (red.secret.aes_key is missing). Start the server \
3298                             with --vault to enable."
3299                                .to_string(),
3300                        )
3301                    })?;
3302                    let plain = &bytes[PLAINTEXT_SENTINEL.len()..];
3303                    Ok(Value::Secret(encrypt_secret_payload(&key, plain)))
3304                } else {
3305                    Ok(Value::Secret(bytes))
3306                }
3307            }
3308            other => Ok(other),
3309        }
3310    }
3311}
3312
3313/// Encode an AES-256-GCM ciphertext as `[12-byte nonce][ciphertext||tag]`.
3314/// This is the on-disk representation of `Value::Secret`.
3315fn encrypt_secret_payload(key: &[u8; 32], plaintext: &[u8]) -> Vec<u8> {
3316    let nonce_bytes = crate::auth::store::random_bytes(12);
3317    let mut nonce = [0u8; 12];
3318    nonce.copy_from_slice(&nonce_bytes[..12]);
3319    let ct = crate::crypto::aes_gcm::aes256_gcm_encrypt(key, &nonce, b"reddb.secret", plaintext);
3320    let mut out = Vec::with_capacity(12 + ct.len());
3321    out.extend_from_slice(&nonce);
3322    out.extend_from_slice(&ct);
3323    out
3324}
3325
3326/// Decode a `Value::Secret` payload back to plaintext. Returns
3327/// `None` when the payload is too short or AES-GCM authentication
3328/// fails (tampered or wrong key).
3329pub(crate) fn decrypt_secret_payload(key: &[u8; 32], payload: &[u8]) -> Option<Vec<u8>> {
3330    if payload.len() < 12 {
3331        return None;
3332    }
3333    let mut nonce = [0u8; 12];
3334    nonce.copy_from_slice(&payload[..12]);
3335    crate::crypto::aes_gcm::aes256_gcm_decrypt(key, &nonce, b"reddb.secret", &payload[12..]).ok()
3336}
3337
3338fn split_insert_metadata(
3339    runtime: &RedDBRuntime,
3340    columns: &[String],
3341    values: &[Value],
3342) -> RedDBResult<(Vec<(String, Value)>, Vec<(String, MetadataValue)>)> {
3343    let mut fields = Vec::new();
3344    let mut metadata = Vec::new();
3345
3346    for (column, value) in columns.iter().zip(values.iter()) {
3347        // Still support legacy _ttl columns for backward compat
3348        if let Some(metadata_key) = resolve_sql_ttl_metadata_key(column) {
3349            let raw_value = sql_literal_to_metadata_value(metadata_key, value)?;
3350            let (canonical_key, canonical_value) =
3351                canonicalize_sql_ttl_metadata(metadata_key, raw_value);
3352            metadata.push((canonical_key.to_string(), canonical_value));
3353            continue;
3354        }
3355        fields.push((
3356            column.clone(),
3357            runtime.resolve_crypto_sentinel(value.clone())?,
3358        ));
3359    }
3360
3361    Ok((fields, metadata))
3362}
3363
3364/// Merge structured WITH TTL, WITH EXPIRES AT, and WITH METADATA clauses into metadata entries.
3365fn merge_with_clauses(
3366    metadata: &mut Vec<(String, MetadataValue)>,
3367    ttl_ms: Option<u64>,
3368    expires_at_ms: Option<u64>,
3369    with_metadata: &[(String, Value)],
3370) {
3371    if let Some(ms) = ttl_ms {
3372        metadata.push((
3373            "_ttl_ms".to_string(),
3374            if ms <= i64::MAX as u64 {
3375                MetadataValue::Int(ms as i64)
3376            } else {
3377                MetadataValue::Timestamp(ms)
3378            },
3379        ));
3380    }
3381    if let Some(ms) = expires_at_ms {
3382        metadata.push(("_expires_at".to_string(), MetadataValue::Timestamp(ms)));
3383    }
3384    for (key, value) in with_metadata {
3385        let meta_value = match value {
3386            Value::Text(s) => MetadataValue::String(s.to_string()),
3387            Value::Integer(n) => MetadataValue::Int(*n),
3388            Value::Float(n) => MetadataValue::Float(*n),
3389            Value::Boolean(b) => MetadataValue::Bool(*b),
3390            _ => MetadataValue::String(value.to_string()),
3391        };
3392        metadata.push((key.clone(), meta_value));
3393    }
3394}
3395
3396fn merge_vector_metadata_column(
3397    metadata: &mut Vec<(String, MetadataValue)>,
3398    columns: &[String],
3399    values: &[Value],
3400) -> RedDBResult<()> {
3401    let Some(value) = columns
3402        .iter()
3403        .position(|column| column.eq_ignore_ascii_case("metadata"))
3404        .map(|index| &values[index])
3405    else {
3406        return Ok(());
3407    };
3408    let json = match value {
3409        Value::Null => return Ok(()),
3410        Value::Json(bytes) => crate::json::from_slice(bytes).map_err(|err| {
3411            RedDBError::Query(format!("column 'metadata' invalid JSON object: {err}"))
3412        })?,
3413        Value::Text(text) => crate::json::from_str(text).map_err(|err| {
3414            RedDBError::Query(format!("column 'metadata' invalid JSON object: {err}"))
3415        })?,
3416        other => {
3417            return Err(RedDBError::Query(format!(
3418                "column 'metadata' expected JSON object, got {other:?}"
3419            )))
3420        }
3421    };
3422    let parsed = metadata_from_json(&json)?;
3423    for (key, value) in parsed.iter() {
3424        metadata.push((key.clone(), value.clone()));
3425    }
3426    Ok(())
3427}
3428
3429fn apply_collection_default_ttl_metadata(
3430    runtime: &RedDBRuntime,
3431    collection: &str,
3432    metadata: &mut Vec<(String, MetadataValue)>,
3433) {
3434    if has_internal_ttl_metadata(metadata) {
3435        return;
3436    }
3437
3438    let Some(default_ttl_ms) = runtime.db().collection_default_ttl_ms(collection) else {
3439        return;
3440    };
3441
3442    metadata.push((
3443        "_ttl_ms".to_string(),
3444        if default_ttl_ms <= i64::MAX as u64 {
3445            MetadataValue::Int(default_ttl_ms as i64)
3446        } else {
3447            MetadataValue::Timestamp(default_ttl_ms)
3448        },
3449    ));
3450}
3451
3452fn ensure_non_tree_reserved_metadata_entries(
3453    metadata: &[(String, MetadataValue)],
3454) -> RedDBResult<()> {
3455    for (key, _) in metadata {
3456        ensure_non_tree_reserved_metadata_key(key)?;
3457    }
3458    Ok(())
3459}
3460
3461fn ensure_non_tree_reserved_metadata_key(key: &str) -> RedDBResult<()> {
3462    if key.starts_with(TREE_METADATA_PREFIX) {
3463        return Err(RedDBError::Query(format!(
3464            "metadata key '{}' is reserved for managed trees",
3465            key
3466        )));
3467    }
3468    Ok(())
3469}
3470
3471fn ensure_non_tree_structural_edge_label(label: &str) -> RedDBResult<()> {
3472    if label.eq_ignore_ascii_case(TREE_CHILD_EDGE_LABEL) {
3473        return Err(RedDBError::Query(format!(
3474            "edge label '{}' is reserved for managed trees",
3475            TREE_CHILD_EDGE_LABEL
3476        )));
3477    }
3478    Ok(())
3479}
3480
3481fn pairwise_columns_values(pairs: &[(String, Value)]) -> (Vec<String>, Vec<Value>) {
3482    let mut columns = Vec::with_capacity(pairs.len());
3483    let mut values = Vec::with_capacity(pairs.len());
3484
3485    for (column, value) in pairs {
3486        columns.push(column.clone());
3487        values.push(value.clone());
3488    }
3489
3490    (columns, values)
3491}
3492
3493/// Find a required column value and return it as-is.
3494fn find_column_value(columns: &[String], values: &[Value], name: &str) -> RedDBResult<Value> {
3495    for (i, col) in columns.iter().enumerate() {
3496        if col.eq_ignore_ascii_case(name) {
3497            return Ok(values[i].clone());
3498        }
3499    }
3500    Err(RedDBError::Query(format!(
3501        "required column '{name}' not found in INSERT"
3502    )))
3503}
3504
3505/// Find a required column value and coerce to String.
3506fn find_column_value_string(
3507    columns: &[String],
3508    values: &[Value],
3509    name: &str,
3510) -> RedDBResult<String> {
3511    let val = find_column_value(columns, values, name)?;
3512    match val {
3513        Value::Text(s) => Ok(s.to_string()),
3514        Value::Integer(n) => Ok(n.to_string()),
3515        Value::Float(n) => Ok(n.to_string()),
3516        other => Err(RedDBError::Query(format!(
3517            "column '{name}' expected text, got {other:?}"
3518        ))),
3519    }
3520}
3521
3522fn find_column_value_f64(columns: &[String], values: &[Value], name: &str) -> RedDBResult<f64> {
3523    let val = find_column_value(columns, values, name)?;
3524    match val {
3525        Value::Float(n) => Ok(n),
3526        Value::Integer(n) => Ok(n as f64),
3527        Value::UnsignedInteger(n) => Ok(n as f64),
3528        Value::Text(s) => s
3529            .parse::<f64>()
3530            .map_err(|_| RedDBError::Query(format!("column '{name}' expected number, got '{s}'"))),
3531        other => Err(RedDBError::Query(format!(
3532            "column '{name}' expected number, got {other:?}"
3533        ))),
3534    }
3535}
3536
3537/// Find an optional column value as String.
3538fn find_column_value_opt_string(
3539    columns: &[String],
3540    values: &[Value],
3541    name: &str,
3542) -> Option<String> {
3543    for (i, col) in columns.iter().enumerate() {
3544        if col.eq_ignore_ascii_case(name) {
3545            return match &values[i] {
3546                Value::Null => None,
3547                Value::Text(s) => Some(s.to_string()),
3548                Value::Integer(n) => Some(n.to_string()),
3549                Value::Float(n) => Some(n.to_string()),
3550                _ => None,
3551            };
3552        }
3553    }
3554    None
3555}
3556
3557/// Resolve an EDGE endpoint (`from`/`to`) to a numeric entity id.
3558///
3559/// Accepts integer literals, decimal strings, and node labels resolved via
3560/// the per-collection graph label index (same source of truth that
3561/// `GRAPH NEIGHBORHOOD` / `GRAPH TRAVERSE` use at query time). Ambiguous
3562/// labels error so callers can fall back to the numeric id form.
3563fn resolve_edge_endpoint(
3564    store: &crate::storage::unified::UnifiedStore,
3565    collection: &str,
3566    columns: &[String],
3567    values: &[Value],
3568    name: &str,
3569) -> RedDBResult<u64> {
3570    let val = find_column_value(columns, values, name)?;
3571    match val {
3572        Value::Integer(n) => Ok(n as u64),
3573        Value::UnsignedInteger(n) => Ok(n),
3574        Value::Text(s) => {
3575            if let Ok(n) = s.parse::<u64>() {
3576                return Ok(n);
3577            }
3578            let matches = store.lookup_graph_nodes_by_label_in(collection, &s);
3579            match matches.len() {
3580                0 => Err(RedDBError::Query(format!(
3581                    "column '{name}': no graph node with label '{s}' in collection '{collection}'"
3582                ))),
3583                1 => Ok(matches[0].raw()),
3584                n => Err(RedDBError::Query(format!(
3585                    "column '{name}': ambiguous label '{s}' matches {n} nodes in collection '{collection}'; use the numeric id"
3586                ))),
3587            }
3588        }
3589        other => Err(RedDBError::Query(format!(
3590            "column '{name}' expected integer or node label, got {other:?}"
3591        ))),
3592    }
3593}
3594
3595fn resolve_edge_endpoint_any(
3596    store: &crate::storage::unified::UnifiedStore,
3597    collection: &str,
3598    columns: &[String],
3599    values: &[Value],
3600    names: &[&str],
3601) -> RedDBResult<u64> {
3602    for name in names {
3603        if columns
3604            .iter()
3605            .any(|column| column.eq_ignore_ascii_case(name))
3606        {
3607            return resolve_edge_endpoint(store, collection, columns, values, name);
3608        }
3609    }
3610
3611    Err(RedDBError::Query(format!(
3612        "required column '{}' not found in INSERT",
3613        names.first().copied().unwrap_or("from_rid")
3614    )))
3615}
3616
3617/// Find a required column value and coerce to u64.
3618fn find_column_value_u64(columns: &[String], values: &[Value], name: &str) -> RedDBResult<u64> {
3619    let val = find_column_value(columns, values, name)?;
3620    match val {
3621        Value::Integer(n) => Ok(n as u64),
3622        Value::UnsignedInteger(n) => Ok(n),
3623        Value::Text(s) => s
3624            .parse::<u64>()
3625            .map_err(|_| RedDBError::Query(format!("column '{name}' expected integer, got '{s}'"))),
3626        other => Err(RedDBError::Query(format!(
3627            "column '{name}' expected integer, got {other:?}"
3628        ))),
3629    }
3630}
3631
3632/// Find an optional column value as f32.
3633fn find_column_value_f32_opt(columns: &[String], values: &[Value], name: &str) -> Option<f32> {
3634    for (i, col) in columns.iter().enumerate() {
3635        if col.eq_ignore_ascii_case(name) {
3636            return match &values[i] {
3637                Value::Float(n) => Some(*n as f32),
3638                Value::Integer(n) => Some(*n as f32),
3639                Value::Null => None,
3640                _ => None,
3641            };
3642        }
3643    }
3644    None
3645}
3646
3647/// Find a required column value and coerce to Vec<f32> (from Value::Vector).
3648fn find_column_value_vec_f32(
3649    columns: &[String],
3650    values: &[Value],
3651    name: &str,
3652) -> RedDBResult<Vec<f32>> {
3653    let val = find_column_value(columns, values, name)?;
3654    match val {
3655        Value::Vector(v) => Ok(v),
3656        Value::Json(bytes) => {
3657            // Try to parse as JSON array of numbers
3658            let s = std::str::from_utf8(&bytes).map_err(|_| {
3659                RedDBError::Query(format!("column '{name}' contains invalid UTF-8"))
3660            })?;
3661            let arr: Vec<f32> = crate::json::from_str(s).map_err(|e| {
3662                RedDBError::Query(format!("column '{name}' invalid vector JSON: {e}"))
3663            })?;
3664            Ok(arr)
3665        }
3666        other => Err(RedDBError::Query(format!(
3667            "column '{name}' expected vector, got {other:?}"
3668        ))),
3669    }
3670}
3671
3672fn find_column_value_vec_f32_any(
3673    columns: &[String],
3674    values: &[Value],
3675    names: &[&str],
3676) -> RedDBResult<Vec<f32>> {
3677    for name in names {
3678        if columns
3679            .iter()
3680            .any(|column| column.eq_ignore_ascii_case(name))
3681        {
3682            return find_column_value_vec_f32(columns, values, name);
3683        }
3684    }
3685    Err(RedDBError::Query(format!(
3686        "required vector column '{}' not found in INSERT",
3687        names.join("' or '")
3688    )))
3689}
3690
3691/// Extract remaining properties (all columns not in the exclusion list).
3692fn extract_remaining_properties(
3693    columns: &[String],
3694    values: &[Value],
3695    exclude: &[&str],
3696) -> Vec<(String, Value)> {
3697    columns
3698        .iter()
3699        .zip(values.iter())
3700        .filter(|(col, _)| !exclude.iter().any(|e| col.eq_ignore_ascii_case(e)))
3701        .map(|(col, val)| (col.clone(), val.clone()))
3702        .collect()
3703}
3704
3705fn validate_timeseries_insert_columns(columns: &[String]) -> RedDBResult<()> {
3706    let mut invalid = Vec::new();
3707    for column in columns {
3708        if !is_timeseries_insert_column(column) && resolve_sql_ttl_metadata_key(column).is_none() {
3709            invalid.push(column.clone());
3710        }
3711    }
3712
3713    if invalid.is_empty() {
3714        Ok(())
3715    } else {
3716        Err(RedDBError::Query(format!(
3717            "timeseries INSERT only accepts metric, value, tags, timestamp, timestamp_ns, or time columns; got {}",
3718            invalid.join(", ")
3719        )))
3720    }
3721}
3722
3723fn is_timeseries_insert_column(column: &str) -> bool {
3724    matches!(
3725        column.to_ascii_lowercase().as_str(),
3726        "metric"
3727            | "value"
3728            | "tags"
3729            | "timestamp"
3730            | "timestamp_ns"
3731            | "time"
3732            // Analytics-event extension (#577): an analytics row carries
3733            // an `event_name` + JSON `payload`. The payload is validated
3734            // against the AnalyticsSchemaRegistry inside
3735            // `insert_timeseries_point` before the row lands.
3736            | "event_name"
3737            | "payload"
3738    )
3739}
3740
3741fn find_timeseries_timestamp_ns(columns: &[String], values: &[Value]) -> RedDBResult<Option<u64>> {
3742    let mut found = None;
3743
3744    for alias in ["timestamp_ns", "timestamp", "time"] {
3745        for (index, column) in columns.iter().enumerate() {
3746            if !column.eq_ignore_ascii_case(alias) {
3747                continue;
3748            }
3749
3750            if found.is_some() {
3751                return Err(RedDBError::Query(
3752                    "timeseries INSERT accepts only one timestamp column".to_string(),
3753                ));
3754            }
3755
3756            found = Some(coerce_value_to_non_negative_u64(&values[index], alias)?);
3757        }
3758    }
3759
3760    Ok(found)
3761}
3762
3763fn find_timeseries_tags(
3764    columns: &[String],
3765    values: &[Value],
3766) -> RedDBResult<std::collections::HashMap<String, String>> {
3767    for (index, column) in columns.iter().enumerate() {
3768        if column.eq_ignore_ascii_case("tags") {
3769            return parse_timeseries_tags(&values[index]);
3770        }
3771    }
3772    Ok(std::collections::HashMap::new())
3773}
3774
3775fn parse_timeseries_tags(value: &Value) -> RedDBResult<std::collections::HashMap<String, String>> {
3776    match value {
3777        Value::Null => Ok(std::collections::HashMap::new()),
3778        Value::Json(bytes) => parse_timeseries_tags_json(bytes),
3779        Value::Text(text) => parse_timeseries_tags_json(text.as_bytes()),
3780        other => Err(RedDBError::Query(format!(
3781            "timeseries tags must be a JSON object or JSON text, got {other:?}"
3782        ))),
3783    }
3784}
3785
3786fn parse_timeseries_tags_json(
3787    bytes: &[u8],
3788) -> RedDBResult<std::collections::HashMap<String, String>> {
3789    let json: crate::json::Value = crate::json::from_slice(bytes)
3790        .map_err(|err| RedDBError::Query(format!("timeseries tags must be valid JSON: {err}")))?;
3791
3792    let object = match json {
3793        crate::json::Value::Object(object) => object,
3794        other => {
3795            return Err(RedDBError::Query(format!(
3796                "timeseries tags must be a JSON object, got {other:?}"
3797            )))
3798        }
3799    };
3800
3801    let mut tags = std::collections::HashMap::with_capacity(object.len());
3802    for (key, value) in object {
3803        tags.insert(key, json_tag_value_to_string(&value));
3804    }
3805    Ok(tags)
3806}
3807
3808/// Encode a tag value for storage so the original JSON type can be
3809/// recovered on read (issue #543).
3810///
3811/// Time-series tags are stored as `HashMap<String, String>` on the
3812/// physical record (see [`crate::storage::TimeSeriesData`]) so that
3813/// the segment codec, WAL and gRPC mirrors don't need a new value
3814/// variant. To preserve the original JSON type across that
3815/// string-only channel we prepend the
3816/// [`crate::runtime::query_exec::TIMESERIES_TAG_JSON_PREFIX`] marker
3817/// and serialize the value as compact JSON text. The read paths
3818/// (`timeseries_tags_json_value` / `timeseries_tags_value`) detect
3819/// the marker, parse the suffix, and recover a real JSON value.
3820/// Tags written through other channels (Prometheus remote write,
3821/// metrics handlers, legacy on-disk data) lack the marker and are
3822/// returned as `JsonValue::String(raw)` exactly as before.
3823fn json_tag_value_to_string(value: &crate::json::Value) -> String {
3824    let mut buf = String::with_capacity(value.to_string_compact().len() + 1);
3825    buf.push(crate::runtime::query_exec::TIMESERIES_TAG_JSON_PREFIX);
3826    buf.push_str(&value.to_string_compact());
3827    buf
3828}
3829
3830fn coerce_value_to_non_negative_u64(value: &Value, column: &str) -> RedDBResult<u64> {
3831    match value {
3832        Value::UnsignedInteger(value) => Ok(*value),
3833        Value::Integer(value) if *value >= 0 => Ok(*value as u64),
3834        Value::Float(value) if *value >= 0.0 => Ok(*value as u64),
3835        Value::Text(value) => value.parse::<u64>().map_err(|_| {
3836            RedDBError::Query(format!(
3837                "column '{column}' expected a non-negative integer timestamp, got '{value}'"
3838            ))
3839        }),
3840        other => Err(RedDBError::Query(format!(
3841            "column '{column}' expected a non-negative integer timestamp, got {other:?}"
3842        ))),
3843    }
3844}
3845
3846fn current_unix_ns() -> u64 {
3847    std::time::SystemTime::now()
3848        .duration_since(std::time::UNIX_EPOCH)
3849        .unwrap_or_default()
3850        .as_nanos()
3851        .min(u128::from(u64::MAX)) as u64
3852}
3853
3854fn metadata_value_to_json(value: &MetadataValue) -> crate::json::Value {
3855    use crate::json::{Map, Value as JV};
3856    match value {
3857        MetadataValue::Null => JV::Null,
3858        MetadataValue::Bool(value) => JV::Bool(*value),
3859        MetadataValue::Int(value) => JV::Number(*value as f64),
3860        MetadataValue::Float(value) => JV::Number(*value),
3861        MetadataValue::String(value) => JV::String(value.clone()),
3862        MetadataValue::Bytes(value) => JV::Array(
3863            value
3864                .iter()
3865                .map(|value| JV::Number(*value as f64))
3866                .collect(),
3867        ),
3868        MetadataValue::Timestamp(value) => JV::Number(*value as f64),
3869        MetadataValue::Array(values) => {
3870            JV::Array(values.iter().map(metadata_value_to_json).collect())
3871        }
3872        MetadataValue::Object(object) => {
3873            let entries = object
3874                .iter()
3875                .map(|(key, value)| (key.clone(), metadata_value_to_json(value)))
3876                .collect();
3877            JV::Object(entries)
3878        }
3879        MetadataValue::Geo { lat, lon } => {
3880            let mut object = Map::new();
3881            object.insert("lat".to_string(), JV::Number(*lat));
3882            object.insert("lon".to_string(), JV::Number(*lon));
3883            JV::Object(object)
3884        }
3885        MetadataValue::Reference(target) => {
3886            let mut object = Map::new();
3887            object.insert(
3888                "collection".to_string(),
3889                JV::String(target.collection().to_string()),
3890            );
3891            object.insert(
3892                "entity_id".to_string(),
3893                JV::Number(target.entity_id().raw() as f64),
3894            );
3895            JV::Object(object)
3896        }
3897        MetadataValue::References(values) => {
3898            let refs = values
3899                .iter()
3900                .map(|target| {
3901                    let mut object = Map::new();
3902                    object.insert(
3903                        "collection".to_string(),
3904                        JV::String(target.collection().to_string()),
3905                    );
3906                    object.insert(
3907                        "entity_id".to_string(),
3908                        JV::Number(target.entity_id().raw() as f64),
3909                    );
3910                    JV::Object(object)
3911                })
3912                .collect();
3913            JV::Array(refs)
3914        }
3915    }
3916}
3917
3918fn storage_value_to_metadata_value(value: &Value) -> MetadataValue {
3919    match value {
3920        Value::Null => MetadataValue::Null,
3921        Value::Boolean(value) => MetadataValue::Bool(*value),
3922        Value::Integer(value) => MetadataValue::Int(*value),
3923        Value::UnsignedInteger(value) => metadata_u64_to_value(*value),
3924        Value::Float(value) => MetadataValue::Float(*value),
3925        Value::Text(value) => MetadataValue::String(value.to_string()),
3926        Value::Blob(value) => MetadataValue::Bytes(value.clone()),
3927        Value::Timestamp(value) => {
3928            if *value >= 0 {
3929                metadata_u64_to_value(*value as u64)
3930            } else {
3931                MetadataValue::Int(*value)
3932            }
3933        }
3934        Value::TimestampMs(value) => {
3935            if *value >= 0 {
3936                metadata_u64_to_value(*value as u64)
3937            } else {
3938                MetadataValue::Int(*value)
3939            }
3940        }
3941        Value::Json(value) => MetadataValue::String(String::from_utf8_lossy(value).into_owned()),
3942        Value::Uuid(value) => MetadataValue::String(format!("{value:?}")),
3943        Value::Date(value) => MetadataValue::String(value.to_string()),
3944        Value::Time(value) => MetadataValue::String(value.to_string()),
3945        Value::Decimal(value) => MetadataValue::String(value.to_string()),
3946        Value::Ipv4(value) => MetadataValue::String(format!(
3947            "{}.{}.{}.{}",
3948            (value >> 24) & 0xFF,
3949            (value >> 16) & 0xFF,
3950            (value >> 8) & 0xFF,
3951            value & 0xFF
3952        )),
3953        Value::Port(value) => MetadataValue::Int(i64::from(*value)),
3954        Value::Latitude(value) => MetadataValue::Float(*value as f64 / 1_000_000.0),
3955        Value::Longitude(value) => MetadataValue::Float(*value as f64 / 1_000_000.0),
3956        Value::GeoPoint(lat, lon) => MetadataValue::Geo {
3957            lat: *lat as f64 / 1_000_000.0,
3958            lon: *lon as f64 / 1_000_000.0,
3959        },
3960        Value::BigInt(value) => MetadataValue::String(value.to_string()),
3961        Value::TableRef(value) => MetadataValue::String(value.clone()),
3962        Value::PageRef(value) => MetadataValue::Int(*value as i64),
3963        Value::Password(value) => MetadataValue::String(value.clone()),
3964        Value::Array(values) => {
3965            MetadataValue::Array(values.iter().map(storage_value_to_metadata_value).collect())
3966        }
3967        _ => MetadataValue::String(value.to_string()),
3968    }
3969}
3970
3971fn sql_literal_to_metadata_value(field: &str, value: &Value) -> RedDBResult<MetadataValue> {
3972    match value {
3973        Value::Null => Ok(MetadataValue::Null),
3974        Value::Integer(value) if *value >= 0 => Ok(metadata_u64_to_value(*value as u64)),
3975        Value::Integer(_) => Err(RedDBError::Query(format!(
3976            "column '{field}' must be non-negative for TTL metadata"
3977        ))),
3978        Value::UnsignedInteger(value) => Ok(metadata_u64_to_value(*value)),
3979        Value::Float(value) if value.is_finite() => {
3980            if value.fract().abs() >= f64::EPSILON {
3981                return Err(RedDBError::Query(format!(
3982                    "column '{field}' must be an integer (TTL metadata must be an integer)"
3983                )));
3984            }
3985            if *value < 0.0 {
3986                return Err(RedDBError::Query(format!(
3987                    "column '{field}' must be non-negative for TTL metadata"
3988                )));
3989            }
3990            if *value > u64::MAX as f64 {
3991                return Err(RedDBError::Query(format!(
3992                    "column '{field}' value is too large"
3993                )));
3994            }
3995            Ok(metadata_u64_to_value(*value as u64))
3996        }
3997        Value::Float(_) => Err(RedDBError::Query(format!(
3998            "column '{field}' must be a finite number"
3999        ))),
4000        Value::Text(value) => {
4001            let value = value.trim();
4002            if let Ok(value) = value.parse::<u64>() {
4003                Ok(metadata_u64_to_value(value))
4004            } else if let Ok(value) = value.parse::<i64>() {
4005                if value < 0 {
4006                    return Err(RedDBError::Query(format!(
4007                        "column '{field}' must be non-negative for TTL metadata"
4008                    )));
4009                }
4010                Ok(metadata_u64_to_value(value as u64))
4011            } else if let Ok(value) = value.parse::<f64>() {
4012                if !value.is_finite() {
4013                    return Err(RedDBError::Query(format!(
4014                        "column '{field}' must be a finite number"
4015                    )));
4016                }
4017                if value.fract().abs() >= f64::EPSILON {
4018                    return Err(RedDBError::Query(format!(
4019                        "column '{field}' must be an integer (TTL metadata must be an integer)"
4020                    )));
4021                }
4022                if value < 0.0 {
4023                    return Err(RedDBError::Query(format!(
4024                        "column '{field}' must be non-negative for TTL metadata"
4025                    )));
4026                }
4027                if value > u64::MAX as f64 {
4028                    return Err(RedDBError::Query(format!(
4029                        "column '{field}' value is too large"
4030                    )));
4031                }
4032                Ok(metadata_u64_to_value(value as u64))
4033            } else {
4034                Err(RedDBError::Query(format!(
4035                    "column '{field}' expects a numeric value for TTL metadata"
4036                )))
4037            }
4038        }
4039        _ => Err(RedDBError::Query(format!(
4040            "column '{field}' expects a numeric value for TTL metadata"
4041        ))),
4042    }
4043}
4044
4045fn metadata_u64_to_value(value: u64) -> MetadataValue {
4046    if value <= i64::MAX as u64 {
4047        MetadataValue::Int(value as i64)
4048    } else {
4049        MetadataValue::Timestamp(value)
4050    }
4051}
4052
4053/// Phase 2 PG parity: inspect a column value and return `true` when
4054/// the dotted `tail` path is already present under it. Used by the
4055/// tenant auto-fill so rows that already carry an explicit value
4056/// (bulk import, admin insert on behalf of a tenant) are not
4057/// double-stamped with the session's current_tenant().
4058fn dotted_tail_already_set(value: &Value, tail: &str) -> bool {
4059    let json = match value {
4060        Value::Null => return false,
4061        Value::Json(bytes) | Value::Blob(bytes) => {
4062            match crate::json::from_slice::<crate::json::Value>(bytes) {
4063                Ok(v) => v,
4064                Err(_) => return false,
4065            }
4066        }
4067        Value::Text(s) => {
4068            let trimmed = s.trim_start();
4069            if !(trimmed.starts_with('{') || trimmed.starts_with('[')) {
4070                return false;
4071            }
4072            match crate::json::from_str::<crate::json::Value>(s) {
4073                Ok(v) => v,
4074                Err(_) => return false,
4075            }
4076        }
4077        _ => return false,
4078    };
4079    let mut cursor = &json;
4080    for seg in tail.split('.') {
4081        match cursor {
4082            crate::json::Value::Object(map) => match map.iter().find(|(k, _)| *k == seg) {
4083                Some((_, v)) => cursor = v,
4084                None => return false,
4085            },
4086            _ => return false,
4087        }
4088    }
4089    !matches!(cursor, crate::json::Value::Null)
4090}
4091
4092/// Phase 2 PG parity: take a column value (possibly Null / Text /
4093/// Json) and return a `Value::Json` with the dotted `tail` path set
4094/// to `tenant_id`. Preserves every pre-existing key.
4095///
4096/// Accepts:
4097/// * `Value::Null`  → fresh `{tail: tenant_id}` object
4098/// * `Value::Json(bytes)` → parse, navigate / create path, re-serialize
4099/// * `Value::text(s)` if `s` is valid JSON → same as Json
4100/// * anything else → error (user supplied a scalar where we need
4101///   a JSON container)
4102fn merge_dotted_tenant(current: Value, tail: &str, tenant_id: &str) -> RedDBResult<Value> {
4103    let mut root = match current {
4104        Value::Null => crate::json::Value::Object(Default::default()),
4105        Value::Json(bytes) | Value::Blob(bytes) => {
4106            crate::json::from_slice(&bytes).map_err(|err| {
4107                RedDBError::Query(format!(
4108                    "tenant auto-fill: root column is not valid JSON ({err})"
4109                ))
4110            })?
4111        }
4112        Value::Text(s) => {
4113            if s.trim().is_empty() {
4114                crate::json::Value::Object(Default::default())
4115            } else {
4116                crate::json::from_str::<crate::json::Value>(&s).map_err(|err| {
4117                    RedDBError::Query(format!(
4118                        "tenant auto-fill: text root is not valid JSON ({err})"
4119                    ))
4120                })?
4121            }
4122        }
4123        other => {
4124            return Err(RedDBError::Query(format!(
4125                "tenant auto-fill: root column must be JSON / NULL, got {other:?}"
4126            )));
4127        }
4128    };
4129
4130    // Navigate path segments, creating intermediate objects on demand.
4131    let segments: Vec<&str> = tail.split('.').collect();
4132    let mut cursor: &mut crate::json::Value = &mut root;
4133    for (i, seg) in segments.iter().enumerate() {
4134        let is_last = i + 1 == segments.len();
4135        let map = match cursor {
4136            crate::json::Value::Object(m) => m,
4137            _ => {
4138                return Err(RedDBError::Query(format!(
4139                    "tenant auto-fill: segment '{seg}' is not inside an object"
4140                )));
4141            }
4142        };
4143        if is_last {
4144            map.insert(
4145                seg.to_string(),
4146                crate::json::Value::String(tenant_id.to_string()),
4147            );
4148            break;
4149        }
4150        cursor = map
4151            .entry(seg.to_string())
4152            .or_insert_with(|| crate::json::Value::Object(Default::default()));
4153    }
4154
4155    let bytes = crate::json::to_vec(&root).map_err(|err| {
4156        RedDBError::Query(format!(
4157            "tenant auto-fill: failed to re-serialize JSON ({err})"
4158        ))
4159    })?;
4160    Ok(Value::Json(bytes))
4161}
4162
4163#[cfg(test)]
4164mod tests {
4165    use crate::storage::schema::Value;
4166    use crate::storage::wal::{WalReader, WalRecord};
4167    use crate::{RedDBOptions, RedDBRuntime};
4168    use std::path::Path;
4169
4170    fn store_commit_batches(wal_path: &Path) -> Vec<Vec<Vec<u8>>> {
4171        WalReader::open(wal_path)
4172            .expect("wal opens")
4173            .iter()
4174            .map(|record| record.expect("wal record decodes").1)
4175            .filter_map(|record| match record {
4176                WalRecord::TxCommitBatch { actions, .. } => Some(actions),
4177                _ => None,
4178            })
4179            .collect()
4180    }
4181
4182    fn action_contains_text(action: &[u8], needle: &str) -> bool {
4183        action
4184            .windows(needle.len())
4185            .any(|window| window == needle.as_bytes())
4186    }
4187
4188    fn assert_statement_writes_collections_in_one_new_wal_batch(
4189        rt: &RedDBRuntime,
4190        wal_path: &Path,
4191        statement: &str,
4192        source: &str,
4193        event_queue: &str,
4194    ) {
4195        let before_batches = store_commit_batches(wal_path).len();
4196
4197        rt.execute_query(statement).unwrap();
4198
4199        let batches = store_commit_batches(wal_path);
4200        let statement_batches = &batches[before_batches..];
4201        let source_batch = statement_batches
4202            .iter()
4203            .position(|actions| {
4204                actions.iter().any(|action| {
4205                    action_contains_text(action, source)
4206                        && !action_contains_text(action, event_queue)
4207                })
4208            })
4209            .expect("source collection write batch is present");
4210        let event_batch = statement_batches
4211            .iter()
4212            .position(|actions| {
4213                actions
4214                    .iter()
4215                    .any(|action| action_contains_text(action, event_queue))
4216            })
4217            .expect("event queue write batch is present");
4218
4219        assert_eq!(
4220            source_batch, event_batch,
4221            "WITH EVENTS must persist the source write and queue event in the same WAL batch"
4222        );
4223    }
4224
4225    #[test]
4226    fn with_events_autocommit_persists_mutation_and_event_in_one_wal_batch() {
4227        let dir = tempfile::tempdir().unwrap();
4228        let db_path = dir.path().join("events_dual_write.rdb");
4229        let wal_path = db_path.with_extension("rdb-uwal");
4230        let rt = RedDBRuntime::with_options(RedDBOptions::persistent(&db_path)).unwrap();
4231
4232        rt.execute_query("CREATE TABLE users (id INT, email TEXT) WITH EVENTS")
4233            .unwrap();
4234        assert_statement_writes_collections_in_one_new_wal_batch(
4235            &rt,
4236            &wal_path,
4237            "INSERT INTO users (id, email) VALUES (1, 'a@example.test')",
4238            "users",
4239            "users_events",
4240        );
4241    }
4242
4243    #[test]
4244    fn with_events_autocommit_update_persists_mutation_and_event_in_one_wal_batch() {
4245        let dir = tempfile::tempdir().unwrap();
4246        let db_path = dir.path().join("events_update_atomic.rdb");
4247        let wal_path = db_path.with_extension("rdb-uwal");
4248        let rt = RedDBRuntime::with_options(RedDBOptions::persistent(&db_path)).unwrap();
4249
4250        rt.execute_query(
4251            "CREATE TABLE users (id INT, email TEXT) WITH EVENTS (UPDATE) TO user_updates",
4252        )
4253        .unwrap();
4254        rt.execute_query("INSERT INTO users (id, email) VALUES (1, 'a@example.test')")
4255            .unwrap();
4256
4257        assert_statement_writes_collections_in_one_new_wal_batch(
4258            &rt,
4259            &wal_path,
4260            "UPDATE users SET email = 'b@example.test' WHERE id = 1",
4261            "users",
4262            "user_updates",
4263        );
4264    }
4265
4266    #[test]
4267    fn with_events_autocommit_delete_persists_mutation_and_event_in_one_wal_batch() {
4268        let dir = tempfile::tempdir().unwrap();
4269        let db_path = dir.path().join("events_delete_atomic.rdb");
4270        let wal_path = db_path.with_extension("rdb-uwal");
4271        let rt = RedDBRuntime::with_options(RedDBOptions::persistent(&db_path)).unwrap();
4272
4273        rt.execute_query(
4274            "CREATE TABLE users (id INT, email TEXT) WITH EVENTS (DELETE) TO user_deletes",
4275        )
4276        .unwrap();
4277        rt.execute_query("INSERT INTO users (id, email) VALUES (1, 'a@example.test')")
4278            .unwrap();
4279
4280        assert_statement_writes_collections_in_one_new_wal_batch(
4281            &rt,
4282            &wal_path,
4283            "DELETE FROM users WHERE id = 1",
4284            "users",
4285            "user_deletes",
4286        );
4287    }
4288
4289    #[test]
4290    fn update_where_id_in_with_hash_index_updates_expected_rows() {
4291        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4292        rt.execute_query("CREATE TABLE users (id INT, score INT)")
4293            .unwrap();
4294        for id in 0..5 {
4295            rt.execute_query(&format!("INSERT INTO users (id, score) VALUES ({id}, 0)"))
4296                .unwrap();
4297        }
4298        rt.execute_query("CREATE INDEX idx_id ON users (id) USING HASH")
4299            .unwrap();
4300
4301        let updated = rt
4302            .execute_query("UPDATE users SET score = 42 WHERE id IN (1,3,4)")
4303            .unwrap();
4304        assert_eq!(updated.affected_rows, 3);
4305
4306        let selected = rt
4307            .execute_query("SELECT id, score FROM users ORDER BY id")
4308            .unwrap();
4309        let scores: Vec<(i64, i64)> = selected
4310            .result
4311            .records
4312            .iter()
4313            .map(|record| {
4314                let id = match record.get("id").unwrap() {
4315                    Value::Integer(value) => *value,
4316                    other => panic!("expected integer id, got {other:?}"),
4317                };
4318                let score = match record.get("score").unwrap() {
4319                    Value::Integer(value) => *value,
4320                    other => panic!("expected integer score, got {other:?}"),
4321                };
4322                (id, score)
4323            })
4324            .collect();
4325        assert_eq!(scores, vec![(0, 0), (1, 42), (2, 0), (3, 42), (4, 42)]);
4326    }
4327
4328    /// Drives UPDATE through the shared `DmlTargetScan` module — the
4329    /// same code path DELETE uses (#51, #52). Exercises the indexed
4330    /// equality fast-path (WHERE id = N with a HASH index), the
4331    /// unindexed range scan (WHERE score > N), and the no-WHERE
4332    /// full-scan branch to confirm the extracted "find target rows"
4333    /// loop preserves affected-row counts and the resulting row state.
4334    #[test]
4335    fn update_routes_through_dml_target_scan_for_indexed_and_scan_paths() {
4336        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4337        rt.execute_query("CREATE TABLE items (id INT, score INT)")
4338            .unwrap();
4339        for id in 0..5 {
4340            rt.execute_query(&format!(
4341                "INSERT INTO items (id, score) VALUES ({id}, {})",
4342                id * 10
4343            ))
4344            .unwrap();
4345        }
4346        rt.execute_query("CREATE INDEX idx_items_id ON items (id) USING HASH")
4347            .unwrap();
4348
4349        // Indexed equality UPDATE — hits the hash fast-path inside
4350        // DmlTargetScan::find_target_ids. id=2 has score=20, drop it
4351        // below the score>25 cutoff so the next assertion stays clean.
4352        let updated_one = rt
4353            .execute_query("UPDATE items SET score = 5 WHERE id = 2")
4354            .unwrap();
4355        assert_eq!(updated_one.affected_rows, 1);
4356
4357        // Unindexed scan UPDATE — bumps everyone with score > 25,
4358        // i.e. ids 3 and 4 (scores 30, 40). Goes through the
4359        // zoned/full-scan branch.
4360        let updated_many = rt
4361            .execute_query("UPDATE items SET score = 7 WHERE score > 25")
4362            .unwrap();
4363        assert_eq!(updated_many.affected_rows, 2);
4364
4365        let snapshot = rt
4366            .execute_query("SELECT id, score FROM items ORDER BY id")
4367            .unwrap();
4368        let pairs: Vec<(i64, i64)> = snapshot
4369            .result
4370            .records
4371            .iter()
4372            .map(|record| {
4373                let id = match record.get("id").unwrap() {
4374                    Value::Integer(value) => *value,
4375                    other => panic!("expected integer id, got {other:?}"),
4376                };
4377                let score = match record.get("score").unwrap() {
4378                    Value::Integer(value) => *value,
4379                    other => panic!("expected integer score, got {other:?}"),
4380                };
4381                (id, score)
4382            })
4383            .collect();
4384        assert_eq!(pairs, vec![(0, 0), (1, 10), (2, 5), (3, 7), (4, 7)]);
4385
4386        // Full-scan UPDATE with no WHERE rewrites every remaining row.
4387        let updated_all = rt.execute_query("UPDATE items SET score = 1").unwrap();
4388        assert_eq!(updated_all.affected_rows, 5);
4389        let after = rt
4390            .execute_query("SELECT score FROM items ORDER BY id")
4391            .unwrap();
4392        let scores: Vec<i64> = after
4393            .result
4394            .records
4395            .iter()
4396            .map(|record| match record.get("score").unwrap() {
4397                Value::Integer(value) => *value,
4398                other => panic!("expected integer score, got {other:?}"),
4399            })
4400            .collect();
4401        assert_eq!(scores, vec![1, 1, 1, 1, 1]);
4402    }
4403
4404    /// Drives DELETE through the new `DmlTargetScan` module. Exercises
4405    /// both the index fast-path (WHERE id = N with a HASH index) and
4406    /// the unindexed scan path (WHERE score > N) to confirm the
4407    /// extracted "find target rows" loop preserves the affected-row
4408    /// count and which rows survive.
4409    #[test]
4410    fn delete_routes_through_dml_target_scan_for_indexed_and_scan_paths() {
4411        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4412        rt.execute_query("CREATE TABLE items (id INT, score INT)")
4413            .unwrap();
4414        for id in 0..5 {
4415            rt.execute_query(&format!(
4416                "INSERT INTO items (id, score) VALUES ({id}, {})",
4417                id * 10
4418            ))
4419            .unwrap();
4420        }
4421        rt.execute_query("CREATE INDEX idx_items_id ON items (id) USING HASH")
4422            .unwrap();
4423
4424        // Indexed equality DELETE — hits the hash fast-path inside
4425        // DmlTargetScan::find_target_ids.
4426        let deleted_one = rt.execute_query("DELETE FROM items WHERE id = 2").unwrap();
4427        assert_eq!(deleted_one.affected_rows, 1);
4428
4429        // Unindexed scan DELETE — drops everyone with score > 25,
4430        // i.e. ids 3 and 4 (scores 30, 40). Goes through the
4431        // zoned/full-scan branch.
4432        let deleted_many = rt
4433            .execute_query("DELETE FROM items WHERE score > 25")
4434            .unwrap();
4435        assert_eq!(deleted_many.affected_rows, 2);
4436
4437        let surviving = rt
4438            .execute_query("SELECT id FROM items ORDER BY id")
4439            .unwrap();
4440        let ids: Vec<i64> = surviving
4441            .result
4442            .records
4443            .iter()
4444            .map(|record| match record.get("id").unwrap() {
4445                Value::Integer(value) => *value,
4446                other => panic!("expected integer id, got {other:?}"),
4447            })
4448            .collect();
4449        assert_eq!(ids, vec![0, 1]);
4450
4451        // Sanity: full-scan DELETE with no WHERE clears the rest.
4452        let deleted_rest = rt.execute_query("DELETE FROM items").unwrap();
4453        assert_eq!(deleted_rest.affected_rows, 2);
4454        let empty = rt.execute_query("SELECT id FROM items").unwrap();
4455        assert!(empty.result.records.is_empty());
4456    }
4457
4458    /// CollectionContract gate (#49 + #50): APPEND ONLY tables accept
4459    /// INSERT but reject UPDATE and DELETE with the documented
4460    /// operator-facing error strings. Drives all three DML verbs so
4461    /// the centralized gate is exercised end-to-end.
4462    #[test]
4463    fn collection_contract_gate_blocks_update_and_delete_on_append_only() {
4464        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4465        rt.execute_query("CREATE TABLE events (id INT, payload TEXT) APPEND ONLY")
4466            .unwrap();
4467
4468        // INSERT must succeed — APPEND ONLY exists precisely to allow
4469        // appends. The gate should be a no-op for INSERT.
4470        let inserted = rt
4471            .execute_query("INSERT INTO events (id, payload) VALUES (1, 'hello')")
4472            .unwrap();
4473        assert_eq!(inserted.affected_rows, 1);
4474
4475        // UPDATE is rejected with the gate's UPDATE-specific message.
4476        let update_err = rt
4477            .execute_query("UPDATE events SET payload = 'mut' WHERE id = 1")
4478            .unwrap_err();
4479        let msg = format!("{update_err}");
4480        assert!(
4481            msg.contains("APPEND ONLY") && msg.contains("UPDATE is rejected"),
4482            "expected UPDATE rejection message, got: {msg}"
4483        );
4484
4485        // DELETE is rejected with the gate's DELETE-specific message.
4486        let delete_err = rt
4487            .execute_query("DELETE FROM events WHERE id = 1")
4488            .unwrap_err();
4489        let msg = format!("{delete_err}");
4490        assert!(
4491            msg.contains("APPEND ONLY") && msg.contains("DELETE is rejected"),
4492            "expected DELETE rejection message, got: {msg}"
4493        );
4494
4495        // Row should still be present — neither rejected mutation
4496        // touched storage.
4497        let surviving = rt.execute_query("SELECT id FROM events").unwrap();
4498        assert_eq!(surviving.result.records.len(), 1);
4499    }
4500
4501    /// CollectionContract gate: tables without an APPEND ONLY contract
4502    /// permit INSERT, UPDATE, and DELETE — the gate's default branch
4503    /// is a true pass-through, not an accidental block.
4504    #[test]
4505    fn collection_contract_gate_allows_all_verbs_on_unrestricted_table() {
4506        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4507        rt.execute_query("CREATE TABLE notes (id INT, body TEXT)")
4508            .unwrap();
4509
4510        rt.execute_query("INSERT INTO notes (id, body) VALUES (1, 'a')")
4511            .unwrap();
4512        let updated = rt
4513            .execute_query("UPDATE notes SET body = 'b' WHERE id = 1")
4514            .unwrap();
4515        assert_eq!(updated.affected_rows, 1);
4516        let deleted = rt.execute_query("DELETE FROM notes WHERE id = 1").unwrap();
4517        assert_eq!(deleted.affected_rows, 1);
4518    }
4519
4520    #[test]
4521    fn insert_into_event_enabled_table_emits_event_to_configured_queue() {
4522        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4523        rt.execute_query(
4524            "CREATE TABLE users (id INT, email TEXT) WITH EVENTS (INSERT) TO audit_log",
4525        )
4526        .unwrap();
4527
4528        let inserted = rt
4529            .execute_query("INSERT INTO users (id, email) VALUES (7, 'a@example.com')")
4530            .unwrap();
4531        assert_eq!(inserted.affected_rows, 1);
4532
4533        let events = queue_payloads(&rt, "audit_log");
4534        assert_eq!(events.len(), 1);
4535        let event = events[0].as_object().expect("event payload object");
4536        assert!(event
4537            .get("event_id")
4538            .and_then(crate::json::Value::as_str)
4539            .is_some_and(|value| !value.is_empty()));
4540        assert_eq!(
4541            event.get("op").and_then(crate::json::Value::as_str),
4542            Some("insert")
4543        );
4544        assert_eq!(
4545            event.get("collection").and_then(crate::json::Value::as_str),
4546            Some("users")
4547        );
4548        assert_eq!(
4549            event.get("id").and_then(crate::json::Value::as_u64),
4550            Some(7)
4551        );
4552        assert!(event
4553            .get("ts")
4554            .and_then(crate::json::Value::as_u64)
4555            .is_some());
4556        assert!(event
4557            .get("lsn")
4558            .and_then(crate::json::Value::as_u64)
4559            .is_some());
4560        assert!(matches!(
4561            event.get("tenant"),
4562            Some(crate::json::Value::Null)
4563        ));
4564        assert!(matches!(
4565            event.get("before"),
4566            Some(crate::json::Value::Null)
4567        ));
4568        let after = event
4569            .get("after")
4570            .and_then(crate::json::Value::as_object)
4571            .expect("after object");
4572        assert_eq!(
4573            after.get("id").and_then(crate::json::Value::as_u64),
4574            Some(7)
4575        );
4576        assert_eq!(
4577            after.get("email").and_then(crate::json::Value::as_str),
4578            Some("a@example.com")
4579        );
4580    }
4581
4582    #[test]
4583    fn multi_row_insert_emits_one_insert_event_per_row_in_order() {
4584        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4585        rt.execute_query("CREATE TABLE users (id INT, email TEXT) WITH EVENTS")
4586            .unwrap();
4587
4588        rt.execute_query(
4589            "INSERT INTO users (id, email) VALUES (1, 'a@example.com'), (2, 'b@example.com')",
4590        )
4591        .unwrap();
4592
4593        let events = queue_payloads(&rt, "users_events");
4594        assert_eq!(events.len(), 2);
4595        let mut previous_lsn = 0;
4596        for (event, expected_id) in events.iter().zip([1_u64, 2]) {
4597            let object = event.as_object().expect("event payload object");
4598            assert_eq!(
4599                object.get("op").and_then(crate::json::Value::as_str),
4600                Some("insert")
4601            );
4602            assert_eq!(
4603                object.get("id").and_then(crate::json::Value::as_u64),
4604                Some(expected_id)
4605            );
4606            let lsn = object
4607                .get("lsn")
4608                .and_then(crate::json::Value::as_u64)
4609                .expect("event lsn");
4610            assert!(
4611                lsn > previous_lsn,
4612                "event LSNs should increase in row order"
4613            );
4614            previous_lsn = lsn;
4615            let after = object
4616                .get("after")
4617                .and_then(crate::json::Value::as_object)
4618                .expect("after object");
4619            assert_eq!(
4620                after.get("id").and_then(crate::json::Value::as_u64),
4621                Some(expected_id)
4622            );
4623        }
4624    }
4625
4626    fn queue_payloads(rt: &RedDBRuntime, queue: &str) -> Vec<crate::json::Value> {
4627        let result = rt
4628            .execute_query(&format!("QUEUE PEEK {queue} 10"))
4629            .expect("peek queue");
4630        result
4631            .result
4632            .records
4633            .iter()
4634            .map(
4635                |record| match record.get("payload").expect("payload column") {
4636                    Value::Json(bytes) => crate::json::from_slice(bytes).expect("json payload"),
4637                    other => panic!("expected JSON queue payload, got {other:?}"),
4638                },
4639            )
4640            .collect()
4641    }
4642
4643    // ── #112: auto-index user `id` on first insert ─────────────────────
4644
4645    /// First insert into a fresh collection that carries a column named
4646    /// `id` registers an implicit HASH index on `id`. Subsequent inserts
4647    /// populate it transparently, and `WHERE id = N` lookups exercise
4648    /// the hash-index fast path in `DmlTargetScan::find_target_ids`.
4649    ///
4650    /// This is the load-bearing acceptance test for #112 — without the
4651    /// hook, `find_index_for_column` returns `None` and DELETE/UPDATE
4652    /// fall through to a full segment scan (the 4× perf gap documented
4653    /// in `docs/perf/delete-sequential-2026-05-06.md`).
4654    #[test]
4655    fn auto_index_id_fires_on_first_insert() {
4656        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4657        rt.execute_query("CREATE TABLE bench_users (id INT, score INT)")
4658            .unwrap();
4659
4660        // Pre-condition: no index on `id` yet.
4661        assert!(
4662            rt.index_store_ref()
4663                .find_index_for_column("bench_users", "id")
4664                .is_none(),
4665            "freshly created collection should not have an `id` index"
4666        );
4667
4668        // Single-row INSERT — drives `MutationEngine::append_one`.
4669        rt.execute_query("INSERT INTO bench_users (id, score) VALUES (1, 10)")
4670            .unwrap();
4671
4672        // Post-condition: hash index registered on `id`.
4673        let registered = rt
4674            .index_store_ref()
4675            .find_index_for_column("bench_users", "id")
4676            .expect("auto-index hook should have registered idx_id on first insert");
4677        assert_eq!(registered.name, "idx_id");
4678        assert_eq!(registered.collection, "bench_users");
4679        assert_eq!(registered.columns, vec!["id".to_string()]);
4680        assert!(matches!(
4681            registered.method,
4682            super::super::index_store::IndexMethodKind::Hash
4683        ));
4684
4685        // Subsequent inserts populate the index; `WHERE id = N` should
4686        // resolve via the hash fast path and round-trip every row.
4687        for id in 2..=5 {
4688            rt.execute_query(&format!(
4689                "INSERT INTO bench_users (id, score) VALUES ({id}, {})",
4690                id * 10
4691            ))
4692            .unwrap();
4693        }
4694        for id in 1..=5 {
4695            let result = rt
4696                .execute_query(&format!("SELECT score FROM bench_users WHERE id = {id}"))
4697                .unwrap();
4698            assert_eq!(
4699                result.result.records.len(),
4700                1,
4701                "id={id} should match one row"
4702            );
4703        }
4704
4705        // Delete via the hash fast-path — exactly the bench scenario the
4706        // perf doc identified as the 4× regression. With the index
4707        // present, `find_target_ids` short-circuits before
4708        // `for_each_entity_zoned` runs.
4709        let deleted = rt
4710            .execute_query("DELETE FROM bench_users WHERE id = 3")
4711            .unwrap();
4712        assert_eq!(deleted.affected_rows, 1);
4713    }
4714
4715    /// Bulk INSERT (the multi-row VALUES path) drives
4716    /// `MutationEngine::append_batch`. The hook must fire there too —
4717    /// otherwise the batch entry points (gRPC binary bulk, HTTP bulk,
4718    /// wire bulk INSERT) skip auto-indexing entirely.
4719    #[test]
4720    fn auto_index_id_fires_on_first_bulk_insert() {
4721        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4722        rt.execute_query("CREATE TABLE bench_bulk (id INT, score INT)")
4723            .unwrap();
4724
4725        rt.execute_query("INSERT INTO bench_bulk (id, score) VALUES (1, 10), (2, 20), (3, 30)")
4726            .unwrap();
4727
4728        let registered = rt
4729            .index_store_ref()
4730            .find_index_for_column("bench_bulk", "id")
4731            .expect("auto-index hook should fire on first bulk insert");
4732        assert_eq!(registered.name, "idx_id");
4733
4734        // Every row populated via `index_entity_insert_batch`.
4735        for id in 1..=3 {
4736            let result = rt
4737                .execute_query(&format!("SELECT score FROM bench_bulk WHERE id = {id}"))
4738                .unwrap();
4739            assert_eq!(result.result.records.len(), 1);
4740        }
4741    }
4742
4743    /// Hook is a no-op when the row carries no `id` column. Conservative
4744    /// match (case-sensitive `id`) — `Id`, `ID`, and `red_entity_id`
4745    /// don't trigger it.
4746    #[test]
4747    fn auto_index_id_skips_when_no_id_column() {
4748        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4749        rt.execute_query("CREATE TABLE plain (uid INT, label TEXT)")
4750            .unwrap();
4751        rt.execute_query("INSERT INTO plain (uid, label) VALUES (1, 'a')")
4752            .unwrap();
4753
4754        assert!(rt
4755            .index_store_ref()
4756            .find_index_for_column("plain", "id")
4757            .is_none());
4758        assert!(rt
4759            .index_store_ref()
4760            .find_index_for_column("plain", "uid")
4761            .is_none());
4762    }
4763
4764    /// Hook only fires once per collection. If an explicit
4765    /// `CREATE INDEX ... USING BTREE` already covers `id`, the hook
4766    /// detects it via `find_index_for_column` and does NOT clobber it
4767    /// with a HASH index on the next insert.
4768    #[test]
4769    fn auto_index_id_skips_when_index_already_exists() {
4770        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4771        rt.execute_query("CREATE TABLE pre (id INT, score INT)")
4772            .unwrap();
4773        // User-declared BTREE index on `id` before any insert.
4774        rt.execute_query("CREATE INDEX user_idx ON pre (id) USING BTREE")
4775            .unwrap();
4776        rt.execute_query("INSERT INTO pre (id, score) VALUES (1, 10)")
4777            .unwrap();
4778
4779        let registered = rt
4780            .index_store_ref()
4781            .find_index_for_column("pre", "id")
4782            .expect("user index should still be there");
4783        assert_eq!(
4784            registered.name, "user_idx",
4785            "auto-index hook must not overwrite an existing index"
4786        );
4787    }
4788
4789    /// Implicit `idx_id` is reaped when the collection drops. The
4790    /// existing `execute_drop_table` walks `list_indices` and drops every
4791    /// entry — confirm the auto-created index participates.
4792    #[test]
4793    fn auto_index_id_dropped_with_collection() {
4794        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4795        rt.execute_query("CREATE TABLE ephemeral (id INT, score INT)")
4796            .unwrap();
4797        rt.execute_query("INSERT INTO ephemeral (id, score) VALUES (1, 10)")
4798            .unwrap();
4799        assert!(rt
4800            .index_store_ref()
4801            .find_index_for_column("ephemeral", "id")
4802            .is_some());
4803
4804        rt.execute_query("DROP TABLE ephemeral").unwrap();
4805
4806        assert!(
4807            rt.index_store_ref()
4808                .find_index_for_column("ephemeral", "id")
4809                .is_none(),
4810            "implicit `idx_id` must be reaped when its collection drops"
4811        );
4812    }
4813
4814    /// Opt-out via `RedDBOptions::with_auto_index_id(false)` (which
4815    /// forwards to `UnifiedStoreConfig::auto_index_id`). With the knob
4816    /// off, first insert leaves the collection without an `id` index —
4817    /// DELETE/UPDATE fall back to the scan path.
4818    #[test]
4819    fn auto_index_id_disabled_by_config() {
4820        let opts = RedDBOptions::in_memory().with_auto_index_id(false);
4821        let rt = RedDBRuntime::with_options(opts).unwrap();
4822
4823        rt.execute_query("CREATE TABLE off (id INT, score INT)")
4824            .unwrap();
4825        rt.execute_query("INSERT INTO off (id, score) VALUES (1, 10)")
4826            .unwrap();
4827
4828        assert!(
4829            rt.index_store_ref()
4830                .find_index_for_column("off", "id")
4831                .is_none(),
4832            "with auto_index_id=false, no implicit index should be created"
4833        );
4834    }
4835
4836    // ── #293: UPDATE / DELETE events ─────────────────────────────────────
4837
4838    #[test]
4839    fn update_single_row_emits_update_event() {
4840        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4841        rt.execute_query(
4842            "CREATE TABLE users (id INT, name TEXT) WITH EVENTS (UPDATE) TO audit_log",
4843        )
4844        .unwrap();
4845        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice')")
4846            .unwrap();
4847
4848        rt.execute_query("UPDATE users SET name = 'Bob' WHERE id = 1")
4849            .unwrap();
4850
4851        let events = queue_payloads(&rt, "audit_log");
4852        assert_eq!(events.len(), 1, "expected exactly 1 update event");
4853        let event = events[0].as_object().expect("event payload object");
4854        assert_eq!(
4855            event.get("op").and_then(crate::json::Value::as_str),
4856            Some("update")
4857        );
4858        assert_eq!(
4859            event.get("collection").and_then(crate::json::Value::as_str),
4860            Some("users")
4861        );
4862        assert!(event
4863            .get("event_id")
4864            .and_then(crate::json::Value::as_str)
4865            .is_some_and(|v| !v.is_empty()));
4866        let before = event
4867            .get("before")
4868            .and_then(crate::json::Value::as_object)
4869            .expect("before must be an object");
4870        let after = event
4871            .get("after")
4872            .and_then(crate::json::Value::as_object)
4873            .expect("after must be an object");
4874        assert_eq!(
4875            before.get("name").and_then(crate::json::Value::as_str),
4876            Some("Alice"),
4877            "before.name should be the old value"
4878        );
4879        assert_eq!(
4880            after.get("name").and_then(crate::json::Value::as_str),
4881            Some("Bob"),
4882            "after.name should be the new value"
4883        );
4884    }
4885
4886    #[test]
4887    fn update_event_only_includes_changed_fields() {
4888        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4889        rt.execute_query(
4890            "CREATE TABLE users (id INT, name TEXT, email TEXT) WITH EVENTS (UPDATE) TO evts",
4891        )
4892        .unwrap();
4893        rt.execute_query("INSERT INTO users (id, name, email) VALUES (1, 'Alice', 'a@x.com')")
4894            .unwrap();
4895
4896        rt.execute_query("UPDATE users SET name = 'Bob' WHERE id = 1")
4897            .unwrap();
4898
4899        let events = queue_payloads(&rt, "evts");
4900        assert_eq!(events.len(), 1);
4901        let event = events[0].as_object().unwrap();
4902        let before = event
4903            .get("before")
4904            .and_then(crate::json::Value::as_object)
4905            .unwrap();
4906        let after = event
4907            .get("after")
4908            .and_then(crate::json::Value::as_object)
4909            .unwrap();
4910        // Only changed field included.
4911        assert!(
4912            before.contains_key("name"),
4913            "before must include changed field"
4914        );
4915        assert!(
4916            after.contains_key("name"),
4917            "after must include changed field"
4918        );
4919        // Unchanged fields must not appear.
4920        assert!(
4921            !before.contains_key("email"),
4922            "before must not include unchanged email"
4923        );
4924        assert!(
4925            !after.contains_key("email"),
4926            "after must not include unchanged email"
4927        );
4928    }
4929
4930    #[test]
4931    fn multi_row_update_emits_one_event_per_row() {
4932        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4933        rt.execute_query("CREATE TABLE items (id INT, status TEXT) WITH EVENTS (UPDATE) TO evts")
4934            .unwrap();
4935        rt.execute_query(
4936            "INSERT INTO items (id, status) VALUES (1, 'new'), (2, 'new'), (3, 'new')",
4937        )
4938        .unwrap();
4939
4940        rt.execute_query("UPDATE items SET status = 'done'")
4941            .unwrap();
4942
4943        let events = queue_payloads(&rt, "evts");
4944        assert_eq!(events.len(), 3, "expected one update event per row");
4945        for event in &events {
4946            let obj = event.as_object().unwrap();
4947            assert_eq!(
4948                obj.get("op").and_then(crate::json::Value::as_str),
4949                Some("update")
4950            );
4951        }
4952    }
4953
4954    #[test]
4955    fn delete_single_row_emits_delete_event() {
4956        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4957        rt.execute_query("CREATE TABLE users (id INT, name TEXT) WITH EVENTS (DELETE) TO del_log")
4958            .unwrap();
4959        rt.execute_query("INSERT INTO users (id, name) VALUES (42, 'Alice')")
4960            .unwrap();
4961
4962        rt.execute_query("DELETE FROM users WHERE id = 42").unwrap();
4963
4964        let events = queue_payloads(&rt, "del_log");
4965        assert_eq!(events.len(), 1);
4966        let event = events[0].as_object().expect("event payload object");
4967        assert_eq!(
4968            event.get("op").and_then(crate::json::Value::as_str),
4969            Some("delete")
4970        );
4971        assert_eq!(
4972            event.get("collection").and_then(crate::json::Value::as_str),
4973            Some("users")
4974        );
4975        assert!(event
4976            .get("event_id")
4977            .and_then(crate::json::Value::as_str)
4978            .is_some_and(|v| !v.is_empty()));
4979        let before = event
4980            .get("before")
4981            .and_then(crate::json::Value::as_object)
4982            .expect("before must be an object for delete");
4983        assert_eq!(
4984            before.get("id").and_then(crate::json::Value::as_u64),
4985            Some(42)
4986        );
4987        assert_eq!(
4988            before.get("name").and_then(crate::json::Value::as_str),
4989            Some("Alice")
4990        );
4991        assert!(matches!(event.get("after"), Some(crate::json::Value::Null)));
4992    }
4993
4994    #[test]
4995    fn multi_row_delete_emits_one_event_per_row() {
4996        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4997        rt.execute_query("CREATE TABLE items (id INT, val INT) WITH EVENTS (DELETE) TO del_log")
4998            .unwrap();
4999        rt.execute_query("INSERT INTO items (id, val) VALUES (1, 10), (2, 20), (3, 30)")
5000            .unwrap();
5001
5002        rt.execute_query("DELETE FROM items").unwrap();
5003
5004        let events = queue_payloads(&rt, "del_log");
5005        assert_eq!(events.len(), 3, "expected one delete event per deleted row");
5006        for event in &events {
5007            let obj = event.as_object().unwrap();
5008            assert_eq!(
5009                obj.get("op").and_then(crate::json::Value::as_str),
5010                Some("delete")
5011            );
5012            assert!(matches!(obj.get("after"), Some(crate::json::Value::Null)));
5013        }
5014    }
5015
5016    #[test]
5017    fn ops_filter_update_does_not_emit_on_insert_or_delete() {
5018        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
5019        rt.execute_query("CREATE TABLE users (id INT, name TEXT) WITH EVENTS (UPDATE) TO evts")
5020            .unwrap();
5021
5022        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice')")
5023            .unwrap();
5024        rt.execute_query("DELETE FROM users WHERE id = 1").unwrap();
5025
5026        let events = queue_payloads(&rt, "evts");
5027        assert!(
5028            events.is_empty(),
5029            "UPDATE-only filter must not emit INSERT or DELETE events"
5030        );
5031    }
5032
5033    // ── SUPPRESS EVENTS ────────────────────────────────────────────────────
5034
5035    #[test]
5036    fn suppress_events_on_insert_emits_no_events() {
5037        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
5038        rt.execute_query("CREATE TABLE users (id INT, name TEXT) WITH EVENTS TO evts")
5039            .unwrap();
5040
5041        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice') SUPPRESS EVENTS")
5042            .unwrap();
5043
5044        let events = queue_payloads(&rt, "evts");
5045        assert!(
5046            events.is_empty(),
5047            "SUPPRESS EVENTS must prevent INSERT events"
5048        );
5049    }
5050
5051    #[test]
5052    fn suppress_events_on_update_emits_no_events() {
5053        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
5054        rt.execute_query("CREATE TABLE users (id INT, name TEXT) WITH EVENTS TO evts")
5055            .unwrap();
5056        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice')")
5057            .unwrap();
5058        // drain the INSERT event
5059        let _ = queue_payloads(&rt, "evts");
5060        // Force pop to drain; simpler: just check new count after UPDATE
5061        rt.execute_query("QUEUE PURGE evts").unwrap();
5062
5063        rt.execute_query("UPDATE users SET name = 'Bob' WHERE id = 1 SUPPRESS EVENTS")
5064            .unwrap();
5065
5066        let events = queue_payloads(&rt, "evts");
5067        assert!(
5068            events.is_empty(),
5069            "SUPPRESS EVENTS must prevent UPDATE events"
5070        );
5071    }
5072
5073    #[test]
5074    fn suppress_events_on_delete_emits_no_events() {
5075        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
5076        rt.execute_query(
5077            "CREATE TABLE users (id INT, name TEXT) WITH EVENTS (INSERT, DELETE) TO evts",
5078        )
5079        .unwrap();
5080        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice') SUPPRESS EVENTS")
5081            .unwrap();
5082
5083        rt.execute_query("DELETE FROM users WHERE id = 1 SUPPRESS EVENTS")
5084            .unwrap();
5085
5086        let events = queue_payloads(&rt, "evts");
5087        assert!(
5088            events.is_empty(),
5089            "SUPPRESS EVENTS must prevent DELETE events"
5090        );
5091    }
5092
5093    #[test]
5094    fn normal_insert_after_suppress_still_emits() {
5095        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
5096        rt.execute_query("CREATE TABLE users (id INT, name TEXT) WITH EVENTS TO evts")
5097            .unwrap();
5098
5099        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice') SUPPRESS EVENTS")
5100            .unwrap();
5101        rt.execute_query("INSERT INTO users (id, name) VALUES (2, 'Bob')")
5102            .unwrap();
5103
5104        let events = queue_payloads(&rt, "evts");
5105        assert_eq!(
5106            events.len(),
5107            1,
5108            "only the non-suppressed INSERT should emit"
5109        );
5110        assert_eq!(
5111            events[0].get("id").and_then(crate::json::Value::as_u64),
5112            Some(2)
5113        );
5114    }
5115}