Skip to main content

reddb_server/runtime/
impl_dml.rs

1//! DML execution: INSERT, UPDATE, DELETE via SQL AST
2//!
3//! Implements `execute_insert`, `execute_update`, and `execute_delete` on
4//! `RedDBRuntime`.  Each method translates the parsed AST into entity-level
5//! operations through the existing `RuntimeEntityPort` trait so that all
6//! cross-cutting concerns (WAL, indexing, replication) are automatically
7//! applied.
8
9use crate::application::entity::{
10    metadata_from_json, AppliedEntityMutation, CreateDocumentInput, CreateEdgeInput,
11    CreateEntityOutput, CreateKvInput, CreateNodeInput, CreateRowInput, CreateRowsBatchInput,
12    CreateVectorInput, DeleteEntityInput, PatchEntityOperation, PatchEntityOperationType,
13    RowUpdateColumnRule, RowUpdateContractPlan,
14};
15use crate::application::ports::{
16    build_row_update_contract_plan, entity_row_fields_snapshot,
17    normalize_row_update_assignment_with_plan, normalize_row_update_value_for_rule,
18    RuntimeEntityPort,
19};
20use crate::application::ttl_payload::has_internal_ttl_metadata;
21use crate::presentation::entity_json::storage_value_to_json;
22use crate::storage::query::ast::{BinOp, Expr, FieldRef, ReturningItem, UpdateTarget};
23use crate::storage::query::sql_lowering::{
24    effective_delete_filter, effective_insert_rows, effective_update_filter, fold_expr_to_value,
25};
26use crate::storage::query::unified::{
27    sys_key_collection, sys_key_created_at, sys_key_kind, sys_key_red_entity_id, sys_key_rid,
28    sys_key_tenant, sys_key_updated_at, UnifiedRecord, UnifiedResult,
29};
30use crate::storage::unified::MetadataValue;
31use crate::storage::Metadata;
32use std::collections::HashMap;
33use std::sync::Arc;
34
35use super::*;
36
37const UPDATE_APPLY_CHUNK_SIZE: usize = 2048;
38const TREE_CHILD_EDGE_LABEL: &str = "TREE_CHILD";
39const TREE_METADATA_PREFIX: &str = "red.tree.";
40
41#[derive(Clone)]
42struct CompiledUpdateAssignment {
43    column: String,
44    expr: Expr,
45    compound_op: Option<BinOp>,
46    metadata_key: Option<&'static str>,
47    row_rule: Option<RowUpdateColumnRule>,
48}
49
50struct CompiledUpdatePlan {
51    static_field_assignments: Vec<(String, Value)>,
52    static_metadata_assignments: Vec<(String, MetadataValue)>,
53    dynamic_assignments: Vec<CompiledUpdateAssignment>,
54    row_contract_plan: Option<RowUpdateContractPlan>,
55    row_modified_columns: Vec<String>,
56    row_touches_unique_columns: bool,
57}
58
59#[derive(Default)]
60struct MaterializedUpdateAssignments {
61    dynamic_field_assignments: Vec<(String, Value)>,
62    dynamic_metadata_assignments: Vec<(String, MetadataValue)>,
63}
64
65impl RedDBRuntime {
66    /// Issue #524 — public read of the in-memory chain tip. Returns `None`
67    /// when the collection is not a chain or has no rows (pre-genesis). On a
68    /// cold cache the first call falls back to a one-time scan so the HTTP
69    /// `GET /collections/:name/chain-tip` handler stays consistent with the
70    /// INSERT path after a restart.
71    pub fn chain_tip_for_collection(
72        &self,
73        collection: &str,
74    ) -> Option<crate::runtime::blockchain_kind::ChainTipFull> {
75        let store = self.inner.db.store();
76        if !crate::runtime::blockchain_kind::is_chain(&store, collection) {
77            return None;
78        }
79        let mut cache = self.inner.chain_tip_cache.lock();
80        if let Some(existing) = cache.get(collection) {
81            return Some(existing.clone());
82        }
83        let scanned = crate::runtime::blockchain_kind::chain_tip_full(&store, collection)?;
84        cache.insert(collection.to_string(), scanned.clone());
85        Some(scanned)
86    }
87
88    /// Issue #525 — walks the chain end-to-end, recomputes each block's hash
89    /// against the stored fields, and returns the verification outcome.  On
90    /// `ok == false` the integrity flag is persisted and the in-memory cache
91    /// is updated so subsequent INSERTs surface `ChainIntegrityBroken`.
92    ///
93    /// Returns `None` when the collection is absent or not a `KIND blockchain`.
94    pub fn verify_chain_for_collection(
95        &self,
96        collection: &str,
97    ) -> Option<crate::runtime::blockchain_kind::VerifyChainOutcome> {
98        let store = self.inner.db.store();
99        let outcome = crate::runtime::blockchain_kind::verify_chain_outcome(&store, collection)?;
100        if !outcome.ok {
101            crate::runtime::blockchain_kind::persist_integrity_flag(&store, collection, true);
102            self.inner
103                .chain_integrity_broken
104                .lock()
105                .insert(collection.to_string(), true);
106        }
107        Some(outcome)
108    }
109
110    /// Issue #525 — admin clears the `ChainIntegrityBroken` flag so the chain
111    /// accepts INSERTs again.  Returns `false` when the collection is not a
112    /// chain.
113    pub fn clear_chain_integrity_flag(&self, collection: &str) -> bool {
114        let store = self.inner.db.store();
115        if !crate::runtime::blockchain_kind::is_chain(&store, collection) {
116            return false;
117        }
118        crate::runtime::blockchain_kind::persist_integrity_flag(&store, collection, false);
119        self.inner
120            .chain_integrity_broken
121            .lock()
122            .insert(collection.to_string(), false);
123        true
124    }
125
126    /// Issue #525 — INSERT-time check.  Combines in-memory cache (fast path)
127    /// with a one-time scan of `red_config` on cold start so the flag survives
128    /// restart.
129    fn is_chain_integrity_broken(&self, collection: &str) -> bool {
130        {
131            let cache = self.inner.chain_integrity_broken.lock();
132            if let Some(v) = cache.get(collection) {
133                return *v;
134            }
135        }
136        let store = self.inner.db.store();
137        let persisted =
138            crate::runtime::blockchain_kind::is_integrity_broken_persisted(&store, collection)
139                .unwrap_or(false);
140        self.inner
141            .chain_integrity_broken
142            .lock()
143            .insert(collection.to_string(), persisted);
144        persisted
145    }
146
147    /// Issue #765 / S6 — lazily hydrate the integrity-tombstone cache from
148    /// `red_config` on first access. Returns `true` when at least one
149    /// tombstone range is present. Subsequent calls observe the cached state
150    /// flag (`1` empty / `2` present) and skip the store scan.
151    fn ensure_integrity_tombstones_loaded(&self) -> bool {
152        use std::sync::atomic::Ordering;
153        match self
154            .inner
155            .integrity_tombstones_state
156            .load(Ordering::Relaxed)
157        {
158            1 => return false,
159            2 => return true,
160            _ => {}
161        }
162        // Cold: load under the cache lock so a concurrent reader cannot
163        // observe a half-populated vector.
164        let mut guard = self.inner.integrity_tombstones.lock();
165        if self
166            .inner
167            .integrity_tombstones_state
168            .load(Ordering::Relaxed)
169            == 0
170        {
171            let ranges = crate::runtime::integrity_tombstone::load_ranges(&self.inner.db.store());
172            let present = !ranges.is_empty();
173            *guard = ranges;
174            self.inner
175                .integrity_tombstones_state
176                .store(if present { 2 } else { 1 }, Ordering::Relaxed);
177        }
178        self.inner
179            .integrity_tombstones_state
180            .load(Ordering::Relaxed)
181            == 2
182    }
183
184    /// Issue #765 / S6 — durably record an integrity tombstone over the
185    /// inclusive RID range `[lo, hi]` of `table` (the committed rows of an
186    /// input stream whose end-to-end SHA-256 digest did not match). The range
187    /// is persisted to `red_config` (survives restart) and folded into the
188    /// in-memory cache so the same process filters it immediately.
189    pub fn record_integrity_tombstone(&self, table: &str, lo: u64, hi: u64) {
190        use std::sync::atomic::Ordering;
191        self.ensure_integrity_tombstones_loaded();
192        let mut guard = self.inner.integrity_tombstones.lock();
193        guard.push(crate::runtime::integrity_tombstone::TombstoneRange::new(
194            table.to_string(),
195            lo,
196            hi,
197        ));
198        crate::runtime::integrity_tombstone::persist_ranges(&self.inner.db.store(), &guard);
199        self.inner
200            .integrity_tombstones_state
201            .store(2, Ordering::Relaxed);
202    }
203
204    /// Issue #765 / S6 — snapshot of the currently-cached tombstone ranges.
205    /// Intended for tests and forensic surfaces; the read path uses
206    /// [`Self::filter_integrity_tombstoned`] which avoids the clone.
207    pub fn integrity_tombstone_ranges(
208        &self,
209    ) -> Vec<crate::runtime::integrity_tombstone::TombstoneRange> {
210        self.ensure_integrity_tombstones_loaded();
211        self.inner.integrity_tombstones.lock().clone()
212    }
213
214    /// Issue #765 / S6 — drop tombstoned rows from a SELECT result in place.
215    /// Fast no-op (one relaxed atomic load) when no tombstone has ever been
216    /// recorded. Clears `pre_serialized_json` when any row is removed so the
217    /// fast-path JSON cannot leak a filtered row back onto the wire.
218    pub fn filter_integrity_tombstoned(&self, result: &mut UnifiedResult) {
219        if !self.ensure_integrity_tombstones_loaded() {
220            return;
221        }
222        let guard = self.inner.integrity_tombstones.lock();
223        if guard.is_empty() {
224            return;
225        }
226        let before = result.records.len();
227        result.records.retain(|record| {
228            !crate::runtime::integrity_tombstone::record_tombstoned(&guard, record)
229        });
230        if result.records.len() != before {
231            result.pre_serialized_json = None;
232        }
233    }
234
235    /// Phase 2.5.4: inject `CURRENT_TENANT()` into an INSERT when the
236    /// target table is tenant-scoped and the user's column list does
237    /// not already name the tenant column.
238    ///
239    /// Returns:
240    /// * `Ok(None)` — no injection needed (non-tenant table, or user
241    ///   supplied the column explicitly). Caller uses the original
242    ///   query unchanged.
243    /// * `Ok(Some(augmented))` — a cloned query with the tenant column
244    ///   + literal value appended to every row.
245    /// * `Err(..)` — table is tenant-scoped but no tenant is bound to
246    ///   the current session. Fails loudly so callers don't produce
247    ///   rows that RLS would then hide on read.
248    fn maybe_inject_tenant_column(&self, query: &InsertQuery) -> RedDBResult<Option<InsertQuery>> {
249        let Some(tenant_col) = self.tenant_column(&query.table) else {
250            return Ok(None);
251        };
252        // User already named the column (literal match) — trust them.
253        if query
254            .columns
255            .iter()
256            .any(|c| c.eq_ignore_ascii_case(&tenant_col))
257        {
258            return Ok(None);
259        }
260
261        // Phase 2 PG parity: dotted-path tenancy. When `tenant_col` is a
262        // nested key like `headers.tenant` we operate on the root
263        // column (`headers`) and set / add the nested path inside its
264        // JSON value. If the user named the root column we mutate in
265        // place; otherwise we create a fresh JSON column for every row.
266        if let Some(dot_pos) = tenant_col.find('.') {
267            let (root, tail) = tenant_col.split_at(dot_pos);
268            let tail = &tail[1..]; // drop leading '.'
269            return self.inject_dotted_tenant(query, root, tail);
270        }
271
272        let Some(tenant_id) = crate::runtime::impl_core::current_tenant() else {
273            return Err(RedDBError::Query(format!(
274                "INSERT into tenant-scoped table '{}' requires an active tenant — \
275                 run SET TENANT '<id>' first or name column '{}' explicitly",
276                query.table, tenant_col
277            )));
278        };
279
280        let mut augmented = query.clone();
281        augmented.columns.push(tenant_col);
282        let lit = Value::text(tenant_id.clone());
283        for row in augmented.values.iter_mut() {
284            row.push(lit.clone());
285        }
286        for row in augmented.value_exprs.iter_mut() {
287            row.push(crate::storage::query::ast::Expr::Literal {
288                value: lit.clone(),
289                span: crate::storage::query::ast::Span::synthetic(),
290            });
291        }
292        Ok(Some(augmented))
293    }
294
295    /// Dotted-path auto-fill — set `root.tail` to `CURRENT_TENANT()` on
296    /// every row. Mirrors `maybe_inject_tenant_column` but mutates
297    /// nested JSON instead of appending a flat column.
298    ///
299    /// Cases:
300    /// * Root column already in the INSERT list → mutate per-row JSON
301    ///   (parse, set path, re-serialize).
302    /// * Root column absent → create a fresh `{tail: tenant}` JSON
303    ///   object and append the root column to the INSERT.
304    fn inject_dotted_tenant(
305        &self,
306        query: &InsertQuery,
307        root: &str,
308        tail: &str,
309    ) -> RedDBResult<Option<InsertQuery>> {
310        let active_tenant = crate::runtime::impl_core::current_tenant();
311        let mut augmented = query.clone();
312        let root_idx = augmented
313            .columns
314            .iter()
315            .position(|c| c.eq_ignore_ascii_case(root));
316
317        if let Some(idx) = root_idx {
318            // User supplied the root column. Per-row: if the dotted
319            // tail is already present we trust the user (admin / bulk
320            // loader scenario); otherwise fill from the active
321            // tenant. An unbound tenant is only an error when some
322            // row actually needs filling.
323            for row in augmented.values.iter_mut() {
324                let Some(slot) = row.get_mut(idx) else {
325                    continue;
326                };
327                if dotted_tail_already_set(slot, tail) {
328                    continue;
329                }
330                let Some(tenant_id) = &active_tenant else {
331                    return Err(RedDBError::Query(format!(
332                        "INSERT into tenant-scoped table '{}' requires an active tenant — \
333                         run SET TENANT '<id>' first or set '{}.{}' explicitly in each row",
334                        query.table, root, tail
335                    )));
336                };
337                *slot = merge_dotted_tenant(slot.clone(), tail, tenant_id)?;
338            }
339            // Expression row is kept in sync by re-wrapping the
340            // mutated literal; the canonical path will re-evaluate
341            // against the same JSON shape.
342            for (row_idx, row) in augmented.value_exprs.iter_mut().enumerate() {
343                if let Some(slot) = row.get_mut(idx) {
344                    let new_value = augmented
345                        .values
346                        .get(row_idx)
347                        .and_then(|v| v.get(idx))
348                        .cloned()
349                        .unwrap_or(Value::Null);
350                    *slot = crate::storage::query::ast::Expr::Literal {
351                        value: new_value,
352                        span: crate::storage::query::ast::Span::synthetic(),
353                    };
354                }
355            }
356        } else {
357            // No root column in the INSERT list — auto-fill needs a
358            // bound tenant to synthesise one. Error loud so we never
359            // create a tenant-less row that RLS would then hide.
360            let Some(tenant_id) = &active_tenant else {
361                return Err(RedDBError::Query(format!(
362                    "INSERT into tenant-scoped table '{}' requires an active tenant — \
363                     run SET TENANT '<id>' first or name path '{}.{}' explicitly",
364                    query.table, root, tail
365                )));
366            };
367            // Create a fresh JSON column with only the tenant path set.
368            augmented.columns.push(root.to_string());
369            let fresh = merge_dotted_tenant(Value::Null, tail, tenant_id)?;
370            for row in augmented.values.iter_mut() {
371                row.push(fresh.clone());
372            }
373            for row in augmented.value_exprs.iter_mut() {
374                row.push(crate::storage::query::ast::Expr::Literal {
375                    value: fresh.clone(),
376                    span: crate::storage::query::ast::Span::synthetic(),
377                });
378            }
379        }
380
381        Ok(Some(augmented))
382    }
383
384    /// Returns `(affected_count, lsns)`. For the txn (xmax-stamp) path,
385    /// `lsns` is empty because events fire at commit time.
386    fn delete_entities_batch(
387        &self,
388        collection: &str,
389        ids: &[EntityId],
390    ) -> RedDBResult<(u64, Vec<u64>)> {
391        if ids.is_empty() {
392            return Ok((0, vec![]));
393        }
394
395        let store = self.db().store();
396        let Some(manager) = store.get_collection(collection) else {
397            return Ok((0, vec![]));
398        };
399
400        let active_xid = self.current_xid();
401        let conn_id = crate::runtime::impl_core::current_connection_id();
402        let mut autocommit_xid = None;
403        let mut tombstoned_ids = Vec::new();
404        let mut tombstoned_entities = Vec::new();
405        let mut physical_delete_ids = Vec::new();
406        let table_row_resolver =
407            crate::runtime::table_row_mvcc_resolver::TableRowMvccReadResolver::current_statement();
408
409        for &id in ids {
410            let Some(mut entity) = manager.get(id) else {
411                continue;
412            };
413            if matches!(entity.data, EntityData::Row(_)) {
414                let previous_xmax = entity.xmax;
415                if matches!(entity.kind, crate::storage::EntityKind::TableRow { .. }) {
416                    if table_row_resolver.resolve_candidate(&entity).is_none() {
417                        continue;
418                    }
419                } else if entity.xmax != 0 {
420                    continue;
421                }
422
423                let xid = match active_xid {
424                    Some(xid) => xid,
425                    None => match autocommit_xid {
426                        Some(xid) => xid,
427                        None => {
428                            let mgr = self.snapshot_manager();
429                            let xid = mgr.begin();
430                            autocommit_xid = Some(xid);
431                            xid
432                        }
433                    },
434                };
435                entity.set_xmax(xid);
436                if manager.update(entity.clone()).is_ok() {
437                    if active_xid.is_some() {
438                        self.record_pending_tombstone(conn_id, collection, id, xid, previous_xmax);
439                    }
440                    tombstoned_entities.push(entity);
441                    tombstoned_ids.push(id);
442                }
443            } else {
444                physical_delete_ids.push(id);
445            }
446        }
447
448        if let Some(xid) = autocommit_xid {
449            self.snapshot_manager().commit(xid);
450        }
451
452        let mut affected = tombstoned_ids.len() as u64;
453        let mut lsns = Vec::with_capacity(tombstoned_ids.len() + physical_delete_ids.len());
454        if active_xid.is_some() {
455            store
456                .persist_entities_to_pager(collection, &tombstoned_entities)
457                .map_err(|err| RedDBError::Internal(err.to_string()))?;
458        } else {
459            store
460                .persist_entities_to_pager(collection, &tombstoned_entities)
461                .map_err(|err| RedDBError::Internal(err.to_string()))?;
462            for id in &tombstoned_ids {
463                store.context_index().remove_entity(*id);
464                let lsn = self.cdc_emit(
465                    crate::replication::cdc::ChangeOperation::Delete,
466                    collection,
467                    id.raw(),
468                    "entity",
469                );
470                lsns.push(lsn);
471            }
472        }
473
474        let deleted_ids = store
475            .delete_batch(collection, &physical_delete_ids)
476            .map_err(|err| RedDBError::Internal(err.to_string()))?;
477        affected += deleted_ids.len() as u64;
478        for id in &deleted_ids {
479            store.context_index().remove_entity(*id);
480            let lsn = self.cdc_emit(
481                crate::replication::cdc::ChangeOperation::Delete,
482                collection,
483                id.raw(),
484                "entity",
485            );
486            lsns.push(lsn);
487        }
488
489        Ok((affected, lsns))
490    }
491
492    /// Flushes context-index updates and CDC for each applied mutation.
493    /// Returns one LSN per entity in the same order as `applied`.
494    fn flush_update_chunk(&self, applied: &[AppliedEntityMutation]) -> RedDBResult<Vec<u64>> {
495        if applied.is_empty() {
496            return Ok(Vec::new());
497        }
498
499        let store = self.db().store();
500        if applied.iter().any(|item| item.context_index_dirty) {
501            store.context_index().index_entities(
502                &applied[0].collection,
503                applied
504                    .iter()
505                    .filter(|item| item.context_index_dirty)
506                    .map(|item| &item.entity),
507            );
508        }
509
510        for item in applied {
511            self.refresh_update_secondary_indexes(item)?;
512        }
513
514        let mut lsns = Vec::with_capacity(applied.len());
515        for item in applied {
516            let lsn = self.cdc_emit_prebuilt(
517                crate::replication::cdc::ChangeOperation::Update,
518                &item.collection,
519                &item.entity,
520                update_cdc_item_kind(self, &item.collection, &item.entity),
521                item.metadata.as_ref(),
522                false,
523            );
524            lsns.push(lsn);
525        }
526        Ok(lsns)
527    }
528
529    fn persist_update_chunk(&self, applied: &[AppliedEntityMutation]) -> RedDBResult<()> {
530        self.persist_applied_entity_mutations(applied)
531    }
532
533    fn refresh_update_secondary_indexes(&self, applied: &AppliedEntityMutation) -> RedDBResult<()> {
534        if applied.pre_mutation_fields.is_empty() {
535            return Ok(());
536        }
537        let post = entity_row_fields_snapshot(&applied.entity);
538        if post.is_empty() {
539            return Ok(());
540        }
541
542        let indexed_cols = self
543            .index_store_ref()
544            .indexed_columns_set(&applied.collection);
545        if indexed_cols.is_empty() {
546            return Ok(());
547        }
548
549        if let Some(old_version) = applied.replaced_entity.as_ref() {
550            let old_index_fields: Vec<(String, crate::storage::schema::Value)> = applied
551                .pre_mutation_fields
552                .iter()
553                .filter(|(col, _)| indexed_cols.contains(col))
554                .cloned()
555                .collect();
556            let new_index_fields: Vec<(String, crate::storage::schema::Value)> = post
557                .iter()
558                .filter(|(col, _)| indexed_cols.contains(col))
559                .cloned()
560                .collect();
561            if !old_index_fields.is_empty() {
562                self.index_store_ref()
563                    .index_entity_delete(&applied.collection, old_version.id, &old_index_fields)
564                    .map_err(crate::RedDBError::Internal)?;
565            }
566            if !new_index_fields.is_empty() {
567                self.index_store_ref()
568                    .index_entity_insert(&applied.collection, applied.entity.id, &new_index_fields)
569                    .map_err(crate::RedDBError::Internal)?;
570            }
571            return Ok(());
572        }
573
574        let damage =
575            crate::application::entity::row_damage_vector(&applied.pre_mutation_fields, &post);
576        if damage
577            .touched_columns()
578            .into_iter()
579            .any(|col| indexed_cols.contains(col))
580        {
581            self.index_store_ref()
582                .index_entity_update(
583                    &applied.collection,
584                    applied.id,
585                    &applied.pre_mutation_fields,
586                    &post,
587                )
588                .map_err(crate::RedDBError::Internal)?;
589        }
590        Ok(())
591    }
592
593    /// Execute INSERT INTO table [entity_type] (cols) VALUES (vals), ...
594    ///
595    /// Each row in `query.values` is zipped with `query.columns` to produce a
596    /// set of named fields, which is then dispatched based on entity_type.
597    pub fn execute_insert(
598        &self,
599        raw_query: &str,
600        query: &InsertQuery,
601    ) -> RedDBResult<RuntimeQueryResult> {
602        self.check_write(crate::runtime::write_gate::WriteKind::Dml)?;
603        // CollectionContract gate (#49): single entry point for the
604        // operator's collection-level write rules. Today this is a
605        // no-op for INSERT (APPEND ONLY permits insert); routing
606        // through the gate now means future contract bits — versioned,
607        // vault-only writes — plug in once instead of per verb.
608        crate::runtime::collection_contract::CollectionContractGate::check(
609            self,
610            &query.table,
611            crate::runtime::collection_contract::MutationKind::Insert,
612        )?;
613        // Phase 2.5.4 table-scoped tenancy: if the target table is
614        // tenant-scoped and the user didn't name the tenant column,
615        // auto-inject it with the thread-local `CURRENT_TENANT()`
616        // value. When the column is named explicitly we trust the
617        // caller (useful for admin tooling that writes on behalf of
618        // specific tenants). An unbound tenant on an implicit-fill
619        // path errors up front rather than producing a row the RLS
620        // policy would silently hide.
621        let augmented_owned;
622        let query = match self.maybe_inject_tenant_column(query)? {
623            Some(new_q) => {
624                augmented_owned = new_q;
625                &augmented_owned
626            }
627            None => query,
628        };
629        self.check_insert_column_policy(query)?;
630        if let Some(ref embed_config) = query.auto_embed {
631            let provider = crate::ai::parse_provider(&embed_config.provider)?;
632            // S3 / #711: planner-level provider gate. Runs before the
633            // local-model preflight and the API-key resolver so neither
634            // side-effect fires when policy denies.
635            crate::runtime::ai::provider_gate::enforce(self, &provider)?;
636            if matches!(provider, crate::ai::AiProvider::Local) {
637                crate::runtime::ai::local_embedding::ensure_local_embedding_available()?;
638                // Issue #682 — pre-flight the local model registry before
639                // any row write. Missing model, uninstalled artifacts,
640                // wrong task, and disabled-feature failures surface as
641                // deterministic errors that leave the target collection
642                // untouched, satisfying the "no partial writes on
643                // embedding failure" criterion for the failure modes
644                // owned by the local provider.
645                let model_name = embed_config.model.as_deref().map(str::trim).unwrap_or("");
646                if model_name.is_empty() {
647                    return Err(RedDBError::Query(
648                        "AUTO EMBED with provider=local requires MODEL '<registered-model-name>'; \
649                         the local provider does not have an implicit default model"
650                            .to_string(),
651                    ));
652                }
653                crate::runtime::ai::local_embedding::preflight_local_embedding(
654                    &self.inner.db,
655                    model_name,
656                )?;
657            }
658        }
659
660        let mut inserted_count: u64 = 0;
661        let effective_rows =
662            effective_insert_rows(query).map_err(|msg| RedDBError::Query(msg.to_string()))?;
663
664        // Ensure the collection exists (auto-create on first insert).
665        let store = self.inner.db.store();
666        let _ = store.get_or_create_collection(&query.table);
667        let declared_model = self
668            .db()
669            .collection_contract_arc(&query.table)
670            .map(|contract| contract.declared_model);
671
672        let mut returning_snapshots: Option<Vec<Vec<(String, Value)>>> =
673            if query.returning.is_some() {
674                Some(Vec::with_capacity(effective_rows.len()))
675            } else {
676                None
677            };
678        let mut returning_result: Option<UnifiedResult> = None;
679
680        if matches!(query.entity_type, InsertEntityType::Row)
681            && !matches!(
682                declared_model,
683                Some(crate::catalog::CollectionModel::TimeSeries)
684            )
685        {
686            // Issue #523 + #524: blockchain collections seal each row into the
687            // chain. When the caller omits the reserved columns, the engine
688            // auto-fills (#523). When the caller supplies any reserved column,
689            // the values are validated against the current tip and a mismatch
690            // surfaces a `BlockchainConflict:` error mapped to HTTP 409 (#524).
691            //
692            // The whole batch runs under a per-collection chain lock so two
693            // concurrent submitters can't both bind to the same prev_hash —
694            // the loser observes the advanced tip and gets 409 with the new
695            // tip so it can retry.
696            let chain_mode = crate::runtime::blockchain_kind::is_chain(&store, &query.table);
697            let _chain_lock_arc: Option<Arc<parking_lot::Mutex<()>>> = if chain_mode {
698                Some(self.inner.rmw_locks.lock_for(&query.table, "__chain__"))
699            } else {
700                None
701            };
702            let _chain_guard = _chain_lock_arc.as_ref().map(|m| m.lock());
703
704            // Issue #525 — refuse new blocks if the chain has been marked
705            // `integrity = broken` until an admin clears the flag.
706            if chain_mode && self.is_chain_integrity_broken(&query.table) {
707                return Err(RedDBError::InvalidOperation(format!(
708                    "ChainIntegrityBroken: collection '{}' is locked until \
709                     POST /collections/{}/clear-integrity-flag is called by an admin",
710                    query.table, query.table
711                )));
712            }
713
714            // Pull the tip from the in-memory cache; fall back to a one-time
715            // scan if the cache hasn't seen this collection yet (cold start
716            // after restart). Cache is updated below as rows are sealed.
717            let mut chain_tip_full: Option<crate::runtime::blockchain_kind::ChainTipFull> =
718                if chain_mode {
719                    let mut cache = self.inner.chain_tip_cache.lock();
720                    if let Some(existing) = cache.get(&query.table) {
721                        Some(existing.clone())
722                    } else if let Some(scanned) =
723                        crate::runtime::blockchain_kind::chain_tip_full(&store, &query.table)
724                    {
725                        cache.insert(query.table.clone(), scanned.clone());
726                        Some(scanned)
727                    } else {
728                        None
729                    }
730                } else {
731                    None
732                };
733
734            let mut rows = Vec::with_capacity(effective_rows.len());
735            for row_values in &effective_rows {
736                if row_values.len() != query.columns.len() {
737                    return Err(RedDBError::Query(format!(
738                        "INSERT column count ({}) does not match value count ({})",
739                        query.columns.len(),
740                        row_values.len()
741                    )));
742                }
743                let (mut fields, mut metadata) =
744                    split_insert_metadata(self, &query.columns, row_values)?;
745                if chain_mode {
746                    use crate::runtime::blockchain_kind::{
747                        chain_conflict_error, COL_BLOCK_HEIGHT, COL_HASH, COL_PREV_HASH,
748                        COL_TIMESTAMP, RESERVED_COLUMNS,
749                    };
750                    let supplied_height = fields
751                        .iter()
752                        .find(|(k, _)| k == COL_BLOCK_HEIGHT)
753                        .map(|(_, v)| v.clone());
754                    let supplied_prev = fields
755                        .iter()
756                        .find(|(k, _)| k == COL_PREV_HASH)
757                        .map(|(_, v)| v.clone());
758                    let supplied_ts = fields
759                        .iter()
760                        .find(|(k, _)| k == COL_TIMESTAMP)
761                        .map(|(_, v)| v.clone());
762                    let supplied_hash = fields.iter().any(|(k, _)| k == COL_HASH);
763                    let user_supplied_any = supplied_height.is_some()
764                        || supplied_prev.is_some()
765                        || supplied_ts.is_some()
766                        || supplied_hash;
767
768                    fields.retain(|(k, _)| !RESERVED_COLUMNS.contains(&k.as_str()));
769                    let payload = crate::runtime::blockchain_kind::canonical_payload(&fields);
770
771                    let (tip_prev_hash, tip_next_height) = match &chain_tip_full {
772                        Some(t) => (t.hash, t.height + 1),
773                        None => (crate::storage::blockchain::GENESIS_PREV_HASH, 0u64),
774                    };
775                    let server_now = crate::runtime::blockchain_kind::now_ms();
776
777                    let (use_prev, use_height, use_ts) = if user_supplied_any {
778                        // Caller is participating in the chain protocol —
779                        // every field must be supplied AND match the tip.
780                        if supplied_hash {
781                            return Err(chain_conflict_error(
782                                tip_next_height.saturating_sub(1),
783                                tip_prev_hash,
784                                chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
785                                server_now,
786                                "hash column is engine-computed and cannot be supplied",
787                            ));
788                        }
789                        let caller_prev = match &supplied_prev {
790                            Some(Value::Blob(b)) if b.len() == 32 => {
791                                let mut a = [0u8; 32];
792                                a.copy_from_slice(b);
793                                a
794                            }
795                            Some(Value::Text(s)) if s.len() == 64 => {
796                                // Accept hex-encoded prev_hash so JSON / SQL
797                                // callers without literal-blob syntax can
798                                // still participate in the chain protocol.
799                                let mut a = [0u8; 32];
800                                let mut ok = true;
801                                for (i, slot) in a.iter_mut().enumerate() {
802                                    let pair = &s.as_ref()[i * 2..i * 2 + 2];
803                                    match u8::from_str_radix(pair, 16) {
804                                        Ok(byte) => *slot = byte,
805                                        Err(_) => {
806                                            ok = false;
807                                            break;
808                                        }
809                                    }
810                                }
811                                if !ok {
812                                    return Err(chain_conflict_error(
813                                        tip_next_height.saturating_sub(1),
814                                        tip_prev_hash,
815                                        chain_tip_full
816                                            .as_ref()
817                                            .map(|t| t.timestamp_ms)
818                                            .unwrap_or(0),
819                                        server_now,
820                                        "prev_hash is not valid hex",
821                                    ));
822                                }
823                                a
824                            }
825                            _ => {
826                                return Err(chain_conflict_error(
827                                    tip_next_height.saturating_sub(1),
828                                    tip_prev_hash,
829                                    chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
830                                    server_now,
831                                    "prev_hash missing or not a 32-byte Blob",
832                                ));
833                            }
834                        };
835                        if caller_prev != tip_prev_hash {
836                            return Err(chain_conflict_error(
837                                tip_next_height.saturating_sub(1),
838                                tip_prev_hash,
839                                chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
840                                server_now,
841                                "prev_hash does not match current tip",
842                            ));
843                        }
844                        let caller_height = match &supplied_height {
845                            Some(Value::UnsignedInteger(v)) => *v,
846                            Some(Value::Integer(v)) if *v >= 0 => *v as u64,
847                            _ => {
848                                return Err(chain_conflict_error(
849                                    tip_next_height.saturating_sub(1),
850                                    tip_prev_hash,
851                                    chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
852                                    server_now,
853                                    "block_height missing or not an unsigned integer",
854                                ));
855                            }
856                        };
857                        if caller_height != tip_next_height {
858                            return Err(chain_conflict_error(
859                                tip_next_height.saturating_sub(1),
860                                tip_prev_hash,
861                                chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
862                                server_now,
863                                "block_height does not match tip+1",
864                            ));
865                        }
866                        let caller_ts = match &supplied_ts {
867                            Some(Value::UnsignedInteger(v)) => *v,
868                            Some(Value::Integer(v)) if *v >= 0 => *v as u64,
869                            _ => {
870                                return Err(chain_conflict_error(
871                                    tip_next_height.saturating_sub(1),
872                                    tip_prev_hash,
873                                    chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
874                                    server_now,
875                                    "timestamp missing or not an unsigned integer",
876                                ));
877                            }
878                        };
879                        let drift = (caller_ts as i128) - (server_now as i128);
880                        if drift.abs() > 60_000 {
881                            return Err(chain_conflict_error(
882                                tip_next_height.saturating_sub(1),
883                                tip_prev_hash,
884                                chain_tip_full.as_ref().map(|t| t.timestamp_ms).unwrap_or(0),
885                                server_now,
886                                "timestamp outside ±60s of server_time",
887                            ));
888                        }
889                        (caller_prev, caller_height, caller_ts)
890                    } else {
891                        (tip_prev_hash, tip_next_height, server_now)
892                    };
893
894                    let (reserved, new_hash) =
895                        crate::runtime::blockchain_kind::make_block_reserved_fields(
896                            use_prev, use_height, use_ts, &payload,
897                        );
898                    fields.extend(reserved);
899                    chain_tip_full = Some(crate::runtime::blockchain_kind::ChainTipFull {
900                        height: use_height,
901                        hash: new_hash,
902                        timestamp_ms: use_ts,
903                    });
904                }
905                // Issue #522 — signed-writes verification. On collections
906                // created with `SIGNED_BY (...)` the row must carry valid
907                // `signer_pubkey` + `signature` reserved columns. Runs
908                // after chain_mode so canonical payload covers user-supplied
909                // fields only (blockchain reserved columns are filtered by
910                // `canonical_payload`; the two signed-writes reserved
911                // columns are split out before payload computation, then
912                // re-attached for storage). The blockchain + SIGNED_BY
913                // composition is owned by issue #526; we keep #522 to the
914                // non-chain path and let chain_mode collections punt to that
915                // slice rather than half-wire it here.
916                if crate::runtime::signed_writes_kind::is_signed(&store, &query.table) {
917                    let (pk_col, sig_col, residual) =
918                        crate::runtime::signed_writes_kind::split_signature_fields(fields);
919                    let payload = crate::runtime::blockchain_kind::canonical_payload(&residual);
920                    let reg = crate::runtime::signed_writes_kind::registry(&store, &query.table);
921                    crate::runtime::signed_writes_kind::verify_row(
922                        &reg,
923                        pk_col.as_ref().map(|c| c.bytes.as_slice()),
924                        sig_col.as_ref().map(|c| c.bytes.as_slice()),
925                        &payload,
926                    )
927                    .map_err(crate::runtime::signed_writes_kind::map_error)?;
928                    fields = residual;
929                    // Round-trip the reserved columns with the value
930                    // type the caller supplied (Text/hex on the SQL path,
931                    // Blob on the binary path). Keeps SELECT and WHERE
932                    // predicates symmetric with the INSERT shape.
933                    if let Some(col) = pk_col {
934                        fields.push((
935                            crate::storage::signed_writes::RESERVED_SIGNER_PUBKEY_COL.to_string(),
936                            col.raw_value,
937                        ));
938                    }
939                    if let Some(col) = sig_col {
940                        fields.push((
941                            crate::storage::signed_writes::RESERVED_SIGNATURE_COL.to_string(),
942                            col.raw_value,
943                        ));
944                    }
945                }
946                merge_with_clauses(
947                    &mut metadata,
948                    query.ttl_ms,
949                    query.expires_at_ms,
950                    &query.with_metadata,
951                );
952                if let Some(snaps) = returning_snapshots.as_mut() {
953                    snaps.push(fields.clone());
954                }
955                rows.push(CreateRowInput {
956                    collection: query.table.clone(),
957                    fields,
958                    metadata,
959                    node_links: Vec::new(),
960                    vector_links: Vec::new(),
961                });
962            }
963            let outputs = self.create_rows_batch(CreateRowsBatchInput {
964                collection: query.table.clone(),
965                rows,
966                suppress_events: query.suppress_events,
967            })?;
968            inserted_count = outputs.len() as u64;
969
970            // Chain mode: commit the new tip to the in-memory cache only after
971            // the batch persisted successfully. If the batch threw mid-way the
972            // cache stays on the previous tip and the chain lock releases.
973            if chain_mode {
974                if let Some(new_tip) = chain_tip_full.as_ref() {
975                    self.inner
976                        .chain_tip_cache
977                        .lock()
978                        .insert(query.table.clone(), new_tip.clone());
979                }
980            }
981
982            // Hypertable chunk routing: if this table was declared via
983            // CREATE HYPERTABLE, register each row's time-column value
984            // with the registry so chunk metadata (bounds, row counts,
985            // TTL eligibility) stays current. This is what lets
986            // HYPERTABLE_PRUNE_CHUNKS answer real questions + lets the
987            // retention daemon sweep expired chunks without scanning
988            // every row.
989            if let Some(spec) = self.inner.db.hypertables().get(&query.table) {
990                let time_col = &spec.time_column;
991                // Find the column's index in the INSERT column list.
992                if let Some(idx) = query.columns.iter().position(|c| c == time_col) {
993                    for row in &effective_rows {
994                        if let Some(Value::Integer(n) | Value::BigInt(n)) = row.get(idx) {
995                            if *n >= 0 {
996                                let _ = self.inner.db.hypertables().route(&query.table, *n as u64);
997                            }
998                        } else if let Some(Value::UnsignedInteger(n)) = row.get(idx) {
999                            let _ = self.inner.db.hypertables().route(&query.table, *n);
1000                        }
1001                    }
1002                }
1003            }
1004
1005            if let (Some(items), Some(snaps)) =
1006                (query.returning.as_ref(), returning_snapshots.take())
1007            {
1008                let snaps = row_insert_returning_snapshots(&outputs, snaps);
1009                returning_result = Some(build_returning_result(items, &snaps, Some(&outputs)));
1010            }
1011        } else {
1012            // Issue #419: surface the inserted entity id on every INSERT path.
1013            // For Node/Edge/Vector/Document/Kv we now keep each CreateEntityOutput
1014            // so a RETURNING clause (and the unconditional inserted_ids list,
1015            // below) can expose the engine-assigned id. TimeSeries (the row
1016            // branch in this else) still returns the not-supported error
1017            // because create_timeseries_point isn't plumbed through this fn.
1018            let mut entity_outputs: Vec<crate::application::entity::CreateEntityOutput> =
1019                Vec::with_capacity(effective_rows.len());
1020            let mut returning_field_snaps: Vec<Vec<(String, Value)>> = if query.returning.is_some()
1021            {
1022                Vec::with_capacity(effective_rows.len())
1023            } else {
1024                Vec::new()
1025            };
1026            if matches!(
1027                query.entity_type,
1028                InsertEntityType::Node | InsertEntityType::Edge
1029            ) {
1030                enum PreparedGraphInsert {
1031                    Node {
1032                        fields: Vec<(String, Value)>,
1033                        input: CreateNodeInput,
1034                    },
1035                    Edge {
1036                        fields: Vec<(String, Value)>,
1037                        input: CreateEdgeInput,
1038                    },
1039                }
1040
1041                let mut prepared = Vec::with_capacity(effective_rows.len());
1042                for row_values in &effective_rows {
1043                    if row_values.len() != query.columns.len() {
1044                        return Err(RedDBError::Query(format!(
1045                            "INSERT column count ({}) does not match value count ({})",
1046                            query.columns.len(),
1047                            row_values.len()
1048                        )));
1049                    }
1050
1051                    match query.entity_type {
1052                        InsertEntityType::Node => {
1053                            let (node_values, mut metadata) =
1054                                split_insert_metadata(self, &query.columns, row_values)?;
1055                            merge_with_clauses(
1056                                &mut metadata,
1057                                query.ttl_ms,
1058                                query.expires_at_ms,
1059                                &query.with_metadata,
1060                            );
1061                            ensure_non_tree_reserved_metadata_entries(&metadata)?;
1062                            apply_collection_default_ttl_metadata(
1063                                self,
1064                                &query.table,
1065                                &mut metadata,
1066                            );
1067                            let (columns, values) = pairwise_columns_values(&node_values);
1068                            let label = find_column_value_string(&columns, &values, "label")?;
1069                            let node_type =
1070                                find_column_value_opt_string(&columns, &values, "node_type");
1071                            let properties = extract_remaining_properties(
1072                                &columns,
1073                                &values,
1074                                &["label", "node_type"],
1075                            );
1076                            crate::reserved_fields::ensure_no_reserved_public_item_fields(
1077                                properties.iter().map(|(key, _)| key.as_str()),
1078                                &format!("node '{}'", query.table),
1079                            )?;
1080                            prepared.push(PreparedGraphInsert::Node {
1081                                fields: node_values,
1082                                input: CreateNodeInput {
1083                                    collection: query.table.clone(),
1084                                    label,
1085                                    node_type,
1086                                    properties,
1087                                    metadata,
1088                                    embeddings: Vec::new(),
1089                                    table_links: Vec::new(),
1090                                    node_links: Vec::new(),
1091                                },
1092                            });
1093                        }
1094                        InsertEntityType::Edge => {
1095                            let (edge_values, mut metadata) =
1096                                split_insert_metadata(self, &query.columns, row_values)?;
1097                            merge_with_clauses(
1098                                &mut metadata,
1099                                query.ttl_ms,
1100                                query.expires_at_ms,
1101                                &query.with_metadata,
1102                            );
1103                            ensure_non_tree_reserved_metadata_entries(&metadata)?;
1104                            apply_collection_default_ttl_metadata(
1105                                self,
1106                                &query.table,
1107                                &mut metadata,
1108                            );
1109                            let (columns, values) = pairwise_columns_values(&edge_values);
1110                            let label = find_column_value_string(&columns, &values, "label")?;
1111                            ensure_non_tree_structural_edge_label(&label)?;
1112                            let from_id = resolve_edge_endpoint_any(
1113                                self.inner.db.store().as_ref(),
1114                                &query.table,
1115                                &columns,
1116                                &values,
1117                                &["from_rid", "from"],
1118                            )?;
1119                            let to_id = resolve_edge_endpoint_any(
1120                                self.inner.db.store().as_ref(),
1121                                &query.table,
1122                                &columns,
1123                                &values,
1124                                &["to_rid", "to"],
1125                            )?;
1126                            let weight = find_column_value_f32_opt(&columns, &values, "weight");
1127                            let properties = extract_remaining_properties(
1128                                &columns,
1129                                &values,
1130                                &["label", "from_rid", "to_rid", "from", "to", "weight"],
1131                            );
1132                            crate::reserved_fields::ensure_no_reserved_public_item_fields(
1133                                properties.iter().map(|(key, _)| key.as_str()),
1134                                &format!("edge '{}'", query.table),
1135                            )?;
1136                            prepared.push(PreparedGraphInsert::Edge {
1137                                fields: edge_values,
1138                                input: CreateEdgeInput {
1139                                    collection: query.table.clone(),
1140                                    label,
1141                                    from: EntityId::new(from_id),
1142                                    to: EntityId::new(to_id),
1143                                    weight,
1144                                    properties,
1145                                    metadata,
1146                                },
1147                            });
1148                        }
1149                        _ => unreachable!("prepared graph insert only handles NODE and EDGE"),
1150                    }
1151                }
1152
1153                ensure_graph_insert_contract(self, &query.table)?;
1154                let mut batch = self.inner.db.batch();
1155                for item in prepared {
1156                    match item {
1157                        PreparedGraphInsert::Node { fields, input } => {
1158                            if query.returning.is_some() {
1159                                returning_field_snaps.push(fields);
1160                            }
1161                            let node_type = input.node_type.unwrap_or_else(|| input.label.clone());
1162                            batch = batch.add_node_with_type(
1163                                input.collection,
1164                                input.label,
1165                                node_type,
1166                                input.properties.into_iter().collect(),
1167                                input.metadata.into_iter().collect(),
1168                            );
1169                        }
1170                        PreparedGraphInsert::Edge { fields, input } => {
1171                            if query.returning.is_some() {
1172                                returning_field_snaps.push(fields);
1173                            }
1174                            batch = batch.add_edge(
1175                                input.collection,
1176                                input.label,
1177                                input.from,
1178                                input.to,
1179                                input.weight.unwrap_or(1.0),
1180                                input.properties.into_iter().collect(),
1181                                input.metadata.into_iter().collect(),
1182                            );
1183                        }
1184                    }
1185                }
1186                let batch_result = batch
1187                    .execute()
1188                    .map_err(|err| RedDBError::Internal(format!("{err:?}")))?;
1189                let (ids, entity_kind) = match query.entity_type {
1190                    InsertEntityType::Node => (batch_result.nodes, "graph_node"),
1191                    InsertEntityType::Edge => (batch_result.edges, "graph_edge"),
1192                    _ => unreachable!("prepared graph insert only handles NODE and EDGE"),
1193                };
1194                for id in &ids {
1195                    self.stamp_xmin_if_in_txn(&query.table, *id);
1196                }
1197                if query.returning.is_some() {
1198                    returning_field_snaps = graph_insert_returning_snapshots(
1199                        self.inner.db.store().as_ref(),
1200                        &query.table,
1201                        &ids,
1202                    );
1203                }
1204                self.cdc_emit_insert_batch_no_cache_invalidate(&query.table, &ids, entity_kind);
1205                let store = self.inner.db.store();
1206                entity_outputs.extend(ids.iter().map(|id| {
1207                    crate::application::entity::CreateEntityOutput {
1208                        id: *id,
1209                        entity: store.get(&query.table, *id),
1210                    }
1211                }));
1212                inserted_count = ids.len() as u64;
1213            } else {
1214                for row_values in &effective_rows {
1215                    if row_values.len() != query.columns.len() {
1216                        return Err(RedDBError::Query(format!(
1217                            "INSERT column count ({}) does not match value count ({})",
1218                            query.columns.len(),
1219                            row_values.len()
1220                        )));
1221                    }
1222
1223                    match query.entity_type {
1224                        InsertEntityType::Row => {
1225                            if query.returning.is_some() {
1226                                return Err(RedDBError::Query(
1227                                "RETURNING is not yet supported for this INSERT path (TimeSeries)"
1228                                    .to_string(),
1229                            ));
1230                            }
1231                            let (fields, mut metadata) =
1232                                split_insert_metadata(self, &query.columns, row_values)?;
1233                            merge_with_clauses(
1234                                &mut metadata,
1235                                query.ttl_ms,
1236                                query.expires_at_ms,
1237                                &query.with_metadata,
1238                            );
1239                            self.insert_timeseries_point(&query.table, fields, metadata)?;
1240                        }
1241                        InsertEntityType::Node | InsertEntityType::Edge => {
1242                            unreachable!("NODE and EDGE are handled by the prepared graph path")
1243                        }
1244                        InsertEntityType::Vector => {
1245                            let (vector_values, mut metadata) =
1246                                split_insert_metadata(self, &query.columns, row_values)?;
1247                            merge_with_clauses(
1248                                &mut metadata,
1249                                query.ttl_ms,
1250                                query.expires_at_ms,
1251                                &query.with_metadata,
1252                            );
1253                            let (columns, values) = pairwise_columns_values(&vector_values);
1254                            let dense = find_column_value_vec_f32_any(
1255                                &columns,
1256                                &values,
1257                                &["dense", "embedding"],
1258                            )?;
1259                            merge_vector_metadata_column(&mut metadata, &columns, &values)?;
1260                            let content =
1261                                find_column_value_opt_string(&columns, &values, "content");
1262                            if query.returning.is_some() {
1263                                returning_field_snaps.push(vector_values.clone());
1264                            }
1265                            let input = CreateVectorInput {
1266                                collection: query.table.clone(),
1267                                dense,
1268                                content,
1269                                metadata,
1270                                link_row: None,
1271                                link_node: None,
1272                            };
1273                            entity_outputs.push(self.create_vector(input)?);
1274                        }
1275                        InsertEntityType::Document => {
1276                            let (document_values, mut metadata) =
1277                                split_insert_metadata(self, &query.columns, row_values)?;
1278                            merge_with_clauses(
1279                                &mut metadata,
1280                                query.ttl_ms,
1281                                query.expires_at_ms,
1282                                &query.with_metadata,
1283                            );
1284                            let (columns, values) = pairwise_columns_values(&document_values);
1285                            let body = find_document_body_json(&columns, &values)?;
1286                            let input = CreateDocumentInput {
1287                                collection: query.table.clone(),
1288                                body,
1289                                metadata,
1290                                node_links: Vec::new(),
1291                                vector_links: Vec::new(),
1292                            };
1293                            let output = self.create_document(input)?;
1294                            if query.returning.is_some() {
1295                                let fields = output
1296                                    .entity
1297                                    .as_ref()
1298                                    .map(entity_row_fields_snapshot)
1299                                    .filter(|fields| !fields.is_empty())
1300                                    .unwrap_or(document_values);
1301                                returning_field_snaps.push(fields);
1302                            }
1303                            entity_outputs.push(output);
1304                        }
1305                        InsertEntityType::Kv => {
1306                            let (kv_values, mut metadata) =
1307                                split_insert_metadata(self, &query.columns, row_values)?;
1308                            merge_with_clauses(
1309                                &mut metadata,
1310                                query.ttl_ms,
1311                                query.expires_at_ms,
1312                                &query.with_metadata,
1313                            );
1314                            let (columns, values) = pairwise_columns_values(&kv_values);
1315                            let key = find_column_value_string(&columns, &values, "key")?;
1316                            let value = find_column_value(&columns, &values, "value")?;
1317                            if query.returning.is_some() {
1318                                returning_field_snaps.push(kv_values.clone());
1319                            }
1320                            let input = CreateKvInput {
1321                                collection: query.table.clone(),
1322                                key,
1323                                value,
1324                                metadata,
1325                            };
1326                            entity_outputs.push(self.create_kv(input)?);
1327                        }
1328                    }
1329
1330                    inserted_count += 1;
1331                }
1332            }
1333
1334            if let Some(items) = query.returning.as_ref() {
1335                if !entity_outputs.is_empty() {
1336                    returning_result = Some(build_returning_result(
1337                        items,
1338                        &returning_field_snaps,
1339                        Some(&entity_outputs),
1340                    ));
1341                }
1342            }
1343        }
1344
1345        // Auto-embed pipeline: batch-embed fields across all inserted rows via AiBatchClient.
1346        if let Some(ref embed_config) = query.auto_embed {
1347            let store = self.inner.db.store();
1348            let provider = crate::ai::parse_provider(&embed_config.provider)?;
1349            let is_local_provider = matches!(provider, crate::ai::AiProvider::Local);
1350            // Local provider runs in-process — no API key path applies.
1351            // The pre-flight above already required `MODEL '<name>'`
1352            // for the local case, so the unwrap_or default below only
1353            // ever fires for OpenAI-compatible providers.
1354            let api_key = if is_local_provider {
1355                String::new()
1356            } else {
1357                crate::ai::resolve_api_key_from_runtime(&provider, None, self)?
1358            };
1359            let model = embed_config.model.clone().unwrap_or_else(|| {
1360                std::env::var("REDDB_OPENAI_EMBEDDING_MODEL")
1361                    .ok()
1362                    .unwrap_or_else(|| crate::ai::DEFAULT_OPENAI_EMBEDDING_MODEL.to_string())
1363            });
1364
1365            // Collect the just-inserted rows (most-recently appended, reversed back to insert order).
1366            let manager = store
1367                .get_collection(&query.table)
1368                .ok_or_else(|| RedDBError::NotFound(query.table.clone()))?;
1369            let entities = manager.query_all(|_| true);
1370            let recent: Vec<_> = entities
1371                .into_iter()
1372                .rev()
1373                .take(effective_rows.len())
1374                .collect();
1375
1376            // Collector phase: (entity_index, combined_text) for rows that have non-empty fields.
1377            let entity_combos: Vec<(usize, String)> = recent
1378                .iter()
1379                .enumerate()
1380                .filter_map(|(i, entity)| {
1381                    if let EntityData::Row(ref row) = entity.data {
1382                        if let Some(ref named) = row.named {
1383                            let texts: Vec<String> = embed_config
1384                                .fields
1385                                .iter()
1386                                .filter_map(|field| match named.get(field) {
1387                                    Some(Value::Text(t)) if !t.is_empty() => Some(t.to_string()),
1388                                    _ => None,
1389                                })
1390                                .collect();
1391                            if !texts.is_empty() {
1392                                return Some((i, texts.join(" ")));
1393                            }
1394                        }
1395                    }
1396                    None
1397                })
1398                .collect();
1399
1400            if !entity_combos.is_empty() {
1401                // Batch phase: single provider round-trip for all rows.
1402                let batch_texts: Vec<String> =
1403                    entity_combos.iter().map(|(_, t)| t.clone()).collect();
1404
1405                // Issue #682 — when the provider is `local`, bypass
1406                // AiBatchClient (which is HTTP-only) and dispatch
1407                // directly through the in-process local embedding
1408                // backend. All texts go in one call, mirroring the
1409                // single-round-trip shape of the remote path. The
1410                // local backend does not perform intra-batch dedup —
1411                // each input position gets its own row in the output
1412                // — which keeps the per-row "create_vector" loop
1413                // below correct without additional fan-out logic.
1414                let embeddings = if is_local_provider {
1415                    let response = crate::runtime::ai::local_embedding::embed_local_with_db(
1416                        &self.inner.db,
1417                        &model,
1418                        batch_texts,
1419                    )?;
1420                    response.embeddings
1421                } else {
1422                    let batch_client =
1423                        crate::runtime::ai::batch_client::AiBatchClient::from_runtime(self);
1424
1425                    match tokio::runtime::Handle::try_current() {
1426                        Ok(handle) => tokio::task::block_in_place(|| {
1427                            handle.block_on(batch_client.embed_batch(
1428                                &provider,
1429                                &model,
1430                                &api_key,
1431                                batch_texts,
1432                            ))
1433                        }),
1434                        Err(_) => {
1435                            return Err(RedDBError::Query(
1436                                "AUTO EMBED requires a Tokio runtime context".to_string(),
1437                            ));
1438                        }
1439                    }
1440                    .map_err(|e| RedDBError::Query(e.to_string()))?
1441                };
1442
1443                // Distribute phase: persist one vector per non-empty embedding.
1444                for ((_, combined), dense) in entity_combos.iter().zip(embeddings) {
1445                    if dense.is_empty() {
1446                        continue;
1447                    }
1448                    self.create_vector(CreateVectorInput {
1449                        collection: query.table.clone(),
1450                        dense,
1451                        content: Some(combined.clone()),
1452                        metadata: Vec::new(),
1453                        link_row: None,
1454                        link_node: None,
1455                    })?;
1456                }
1457            }
1458        }
1459
1460        if inserted_count > 0 {
1461            self.note_table_write(&query.table);
1462        }
1463
1464        let mut result = RuntimeQueryResult::dml_result(
1465            raw_query.to_string(),
1466            inserted_count,
1467            "insert",
1468            "runtime-dml",
1469        );
1470        if let Some(returning) = returning_result {
1471            result.result = returning;
1472        }
1473        Ok(result)
1474    }
1475
1476    fn check_insert_column_policy(&self, query: &InsertQuery) -> RedDBResult<()> {
1477        let Some(auth_store) = self.inner.auth_store.read().clone() else {
1478            return Ok(());
1479        };
1480        if !auth_store.iam_authorization_enabled() {
1481            return Ok(());
1482        }
1483        let Some((username, role)) = crate::runtime::impl_core::current_auth_identity() else {
1484            return Ok(());
1485        };
1486
1487        let tenant = crate::runtime::impl_core::current_tenant();
1488        let principal = crate::auth::UserId::from_parts(tenant.as_deref(), &username);
1489        let request = crate::auth::ColumnAccessRequest {
1490            action: "insert".to_string(),
1491            schema: None,
1492            table: query.table.clone(),
1493            columns: query.columns.clone(),
1494        };
1495        let ctx = crate::auth::policies::EvalContext {
1496            principal_tenant: tenant.clone(),
1497            current_tenant: tenant,
1498            peer_ip: None,
1499            mfa_present: false,
1500            now_ms: crate::auth::now_ms(),
1501            principal_is_admin_role: role == crate::auth::Role::Admin,
1502            principal_is_system_owned: auth_store.principal_is_system_owned(&principal),
1503            principal_is_platform_scoped: principal.tenant.is_none(),
1504        };
1505
1506        let outcome = auth_store.check_column_projection_authz(&principal, &request, &ctx);
1507        let table_allowed = matches!(
1508            outcome.table_decision,
1509            crate::auth::policies::Decision::Allow { .. }
1510                | crate::auth::policies::Decision::AdminBypass
1511        );
1512        if !table_allowed {
1513            return Err(RedDBError::Query(format!(
1514                "principal=`{username}` action=`insert` resource=`{}:{}` denied by IAM policy",
1515                outcome.table_resource.kind, outcome.table_resource.name
1516            )));
1517        }
1518        if let Some(denied) = outcome.first_denied_column() {
1519            return Err(RedDBError::Query(format!(
1520                "principal=`{username}` action=`insert` resource=`{}:{}` denied by IAM policy",
1521                denied.resource.kind, denied.resource.name
1522            )));
1523        }
1524
1525        Ok(())
1526    }
1527
1528    pub(crate) fn insert_timeseries_point(
1529        &self,
1530        collection: &str,
1531        fields: Vec<(String, Value)>,
1532        mut metadata: Vec<(String, MetadataValue)>,
1533    ) -> RedDBResult<EntityId> {
1534        apply_collection_default_ttl_metadata(self, collection, &mut metadata);
1535
1536        let (columns, values) = pairwise_columns_values(&fields);
1537        validate_timeseries_insert_columns(&columns)?;
1538
1539        // Issue #577 — AnalyticsSchemaRegistry hook. If the row carries
1540        // an `event_name` whose schema is registered, validate the
1541        // `payload` JSON against it BEFORE any write side-effect. On
1542        // failure we return a typed error and the row is not
1543        // persisted. When no schema is registered for the event name
1544        // (or no `event_name` column is supplied at all) we fall
1545        // through to the normal write path for back-compat with
1546        // existing timeseries rows.
1547        let event_name_opt = find_column_value_opt_string(&columns, &values, "event_name");
1548        let payload_opt = find_column_value_opt_string(&columns, &values, "payload");
1549        if let Some(event_name) = event_name_opt.as_deref() {
1550            let store_for_schema = self.inner.db.store();
1551            if super::analytics_schema_registry::latest(store_for_schema.as_ref(), event_name)
1552                .is_some()
1553            {
1554                let payload_json = payload_opt.as_deref().unwrap_or("{}");
1555                super::analytics_schema_registry::validate(
1556                    store_for_schema.as_ref(),
1557                    event_name,
1558                    payload_json,
1559                )
1560                .map_err(super::analytics_schema_registry::validation_error_to_reddb)?;
1561            }
1562        }
1563
1564        // `metric` is required by the existing timeseries write path;
1565        // when an analytics-style row supplies `event_name` but not
1566        // `metric`, fall back to the event name so the storage path
1567        // still has a non-empty metric tag.
1568        let metric = match find_column_value_opt_string(&columns, &values, "metric") {
1569            Some(m) => m,
1570            None => event_name_opt.clone().ok_or_else(|| {
1571                RedDBError::Query(
1572                    "timeseries INSERT requires either `metric` or `event_name`".to_string(),
1573                )
1574            })?,
1575        };
1576        // `value` is optional for analytics-event rows (which are
1577        // semantically counts of 1); default to 1.0 when missing so
1578        // analytics inserts don't have to fabricate a metric value.
1579        let value = match find_column_value_opt_string(&columns, &values, "value") {
1580            Some(s) => s.parse::<f64>().unwrap_or(1.0),
1581            None => columns
1582                .iter()
1583                .position(|c| c.eq_ignore_ascii_case("value"))
1584                .and_then(|i| match &values[i] {
1585                    Value::Float(f) => Some(*f),
1586                    Value::Integer(n) | Value::BigInt(n) => Some(*n as f64),
1587                    Value::UnsignedInteger(n) => Some(*n as f64),
1588                    _ => None,
1589                })
1590                .unwrap_or(1.0),
1591        };
1592        let timestamp_ns =
1593            find_timeseries_timestamp_ns(&columns, &values)?.unwrap_or_else(current_unix_ns);
1594        let mut tags = find_timeseries_tags(&columns, &values)?;
1595        if let Some(ref name) = event_name_opt {
1596            tags.entry("event_name".to_string())
1597                .or_insert_with(|| name.clone());
1598        }
1599        if let Some(ref payload) = payload_opt {
1600            tags.entry("payload".to_string())
1601                .or_insert_with(|| payload.clone());
1602        }
1603
1604        let mut entity = UnifiedEntity::new(
1605            EntityId::new(0),
1606            EntityKind::TimeSeriesPoint(Box::new(crate::storage::TimeSeriesPointKind {
1607                series: collection.to_string(),
1608                metric: metric.clone(),
1609            })),
1610            EntityData::TimeSeries(crate::storage::TimeSeriesData {
1611                metric,
1612                timestamp_ns,
1613                value,
1614                tags,
1615            }),
1616        );
1617        // MVCC #30: stamp xmin with the active tx xid (inside a tx)
1618        // or an autocommit xid (allocated and committed up-front so
1619        // future snapshots see the row as soon as it lands).
1620        let writer_xid = match self.current_xid() {
1621            Some(xid) => xid,
1622            None => {
1623                let mgr = self.snapshot_manager();
1624                let xid = mgr.begin();
1625                mgr.commit(xid);
1626                xid
1627            }
1628        };
1629        entity.set_xmin(writer_xid);
1630
1631        let store = self.inner.db.store();
1632        let id = store
1633            .insert_auto(collection, entity)
1634            .map_err(|err| RedDBError::Internal(err.to_string()))?;
1635
1636        if !metadata.is_empty() {
1637            let _ = store.set_metadata(
1638                collection,
1639                id,
1640                Metadata::with_fields(metadata.into_iter().collect()),
1641            );
1642        }
1643
1644        self.cdc_emit(
1645            crate::replication::cdc::ChangeOperation::Insert,
1646            collection,
1647            id.raw(),
1648            "timeseries",
1649        );
1650
1651        Ok(id)
1652    }
1653
1654    /// Execute UPDATE table SET col=val, ... WHERE filter
1655    ///
1656    /// Scans the target collection, evaluates the WHERE filter against each
1657    /// record, and patches every matching entity.
1658    pub fn execute_update(
1659        &self,
1660        raw_query: &str,
1661        query: &UpdateQuery,
1662    ) -> RedDBResult<RuntimeQueryResult> {
1663        self.check_write(crate::runtime::write_gate::WriteKind::Dml)?;
1664        // Issue #523 — blockchain collections are immutable. Reject before
1665        // RLS / RETURNING work so the operator sees a clean 409-mapped
1666        // error instead of a partially-applied mutation surface.
1667        if crate::runtime::blockchain_kind::is_chain(self.inner.db.store().as_ref(), &query.table) {
1668            return Err(RedDBError::InvalidOperation(format!(
1669                "BlockchainCollectionImmutable: UPDATE not allowed on '{}'",
1670                query.table
1671            )));
1672        }
1673        // CollectionContract gate (#50): runs the APPEND ONLY guard
1674        // (and any future contract bits) before RLS / RETURNING work
1675        // so the operator's immutability declaration is honoured
1676        // uniformly and the error message points at the DDL rather
1677        // than at a downstream symptom.
1678        crate::runtime::collection_contract::CollectionContractGate::check(
1679            self,
1680            &query.table,
1681            crate::runtime::collection_contract::MutationKind::Update,
1682        )?;
1683        ensure_update_target_contract(self, &query.table, query.target)?;
1684        ensure_graph_identity_update_target_allowed(query)?;
1685
1686        // Apply RLS augmentation first so every downstream path — plain
1687        // UPDATE, UPDATE...RETURNING, the inner scan — observes the
1688        // same policy-filtered target set. This prevents RETURNING
1689        // from ever exposing rows the UPDATE policy would have
1690        // denied.
1691        let rls_gated = crate::runtime::impl_core::rls_is_enabled(self, &query.table);
1692        let augmented_query: UpdateQuery;
1693        let effective_query: &UpdateQuery = if rls_gated {
1694            let rls_filter = crate::runtime::impl_core::rls_policy_filter(
1695                self,
1696                &query.table,
1697                crate::storage::query::ast::PolicyAction::Update,
1698            );
1699            let Some(policy) = rls_filter else {
1700                // No admitting policy: zero rows affected, empty
1701                // RETURNING (never leak rows the caller can't touch).
1702                let mut response = RuntimeQueryResult::dml_result(
1703                    raw_query.to_string(),
1704                    0,
1705                    "update",
1706                    "runtime-dml-rls",
1707                );
1708                if let Some(items) = query.returning.clone() {
1709                    response.result = build_returning_result(&items, &[], None);
1710                }
1711                return Ok(response);
1712            };
1713            let mut augmented = query.clone();
1714            augmented.filter = Some(match augmented.filter.take() {
1715                Some(existing) => {
1716                    crate::storage::query::ast::Filter::And(Box::new(existing), Box::new(policy))
1717                }
1718                None => policy,
1719            });
1720            augmented_query = augmented;
1721            &augmented_query
1722        } else {
1723            query
1724        };
1725
1726        // RETURNING wraps the inner executor and uses the touched-id
1727        // list the inner reports so the post-image reflects exactly
1728        // the rows the UPDATE actually mutated (not whatever a
1729        // separate SELECT might have observed).
1730        if let Some(items) = effective_query.returning.clone() {
1731            let mut inner_query = effective_query.clone();
1732            inner_query.returning = None;
1733            let (mut response, touched_ids) =
1734                self.execute_update_inner_tracked(raw_query, &inner_query)?;
1735
1736            let snapshots = if matches!(
1737                effective_query.target,
1738                UpdateTarget::Nodes | UpdateTarget::Edges
1739            ) {
1740                graph_update_returning_snapshots(self, &effective_query.table, &touched_ids)
1741            } else {
1742                super::dml_target_scan::DmlTargetScan::new(self, &effective_query.table, None, None)
1743                    .row_snapshots(&touched_ids)
1744            };
1745
1746            response.result = build_returning_result(&items, &snapshots, None);
1747            response.engine = "runtime-dml-returning";
1748            return Ok(response);
1749        }
1750
1751        self.execute_update_inner(raw_query, effective_query)
1752    }
1753
1754    /// Back-compat shim: the older entry point ignored touched ids.
1755    fn execute_update_inner(
1756        &self,
1757        raw_query: &str,
1758        query: &UpdateQuery,
1759    ) -> RedDBResult<RuntimeQueryResult> {
1760        self.execute_update_inner_tracked(raw_query, query)
1761            .map(|(res, _)| res)
1762    }
1763
1764    fn execute_update_inner_tracked(
1765        &self,
1766        raw_query: &str,
1767        query: &UpdateQuery,
1768    ) -> RedDBResult<(RuntimeQueryResult, Vec<EntityId>)> {
1769        let store = self.inner.db.store();
1770        let effective_filter = effective_update_filter(query);
1771        let compiled_plan = self.compile_update_plan(query)?;
1772        let needs_rmw_lock = update_needs_rmw_lock(query);
1773        let table_rmw_lock = if needs_rmw_lock {
1774            Some(
1775                self.inner
1776                    .rmw_locks
1777                    .lock_for(&query.table, "__table_rmw_update__"),
1778            )
1779        } else {
1780            None
1781        };
1782        let _table_rmw_guard = table_rmw_lock.as_ref().map(|lock| lock.lock());
1783        let mut touched_ids: Vec<EntityId> = Vec::new();
1784        let limit_cap = query.limit.map(|l| l as usize);
1785        let manager = store
1786            .get_collection(&query.table)
1787            .ok_or_else(|| RedDBError::NotFound(query.table.clone()))?;
1788        let scan_limit = if query.order_by.is_empty() {
1789            limit_cap
1790        } else {
1791            None
1792        };
1793        let mut target_scan = super::dml_target_scan::DmlTargetScan::with_update_target(
1794            self,
1795            &query.table,
1796            effective_filter.as_ref(),
1797            scan_limit,
1798            query.target,
1799        );
1800        if needs_rmw_lock {
1801            target_scan = target_scan.with_live_table_rows();
1802        }
1803        let ids_to_update = target_scan.find_target_ids()?;
1804        let ids_to_update = if query.order_by.is_empty() {
1805            ids_to_update
1806        } else {
1807            ordered_update_target_ids(&manager, &ids_to_update, &query.order_by, limit_cap)
1808        };
1809
1810        if needs_rmw_lock {
1811            return self.execute_update_inner_tracked_locked(
1812                raw_query,
1813                query,
1814                &compiled_plan,
1815                &ids_to_update,
1816                effective_filter.as_ref(),
1817            );
1818        }
1819
1820        let mut affected: u64 = 0;
1821        for chunk in ids_to_update.chunks(UPDATE_APPLY_CHUNK_SIZE) {
1822            let mut applied_chunk = Vec::with_capacity(chunk.len());
1823            for entity in manager.get_many(chunk).into_iter().flatten() {
1824                let assignments =
1825                    self.materialize_update_assignments_for_entity(query, &entity, &compiled_plan)?;
1826                let applied = self.apply_materialized_update_for_entity(
1827                    query.table.clone(),
1828                    entity,
1829                    &compiled_plan,
1830                    assignments,
1831                )?;
1832                touched_ids.push(applied.id);
1833                applied_chunk.push(applied);
1834            }
1835            self.persist_update_chunk(&applied_chunk)?;
1836            affected += applied_chunk.len() as u64;
1837            let lsns = self.flush_update_chunk(&applied_chunk)?;
1838            if !query.suppress_events {
1839                self.emit_update_events_for_collection(&query.table, &applied_chunk, &lsns)?;
1840            }
1841        }
1842
1843        if affected > 0 {
1844            self.note_table_write(&query.table);
1845        }
1846
1847        Ok((
1848            RuntimeQueryResult::dml_result(
1849                raw_query.to_string(),
1850                affected,
1851                "update",
1852                "runtime-dml",
1853            ),
1854            touched_ids,
1855        ))
1856    }
1857
1858    fn execute_update_inner_tracked_locked(
1859        &self,
1860        raw_query: &str,
1861        query: &UpdateQuery,
1862        compiled_plan: &CompiledUpdatePlan,
1863        ids_to_update: &[EntityId],
1864        effective_filter: Option<&Filter>,
1865    ) -> RedDBResult<(RuntimeQueryResult, Vec<EntityId>)> {
1866        let store = self.inner.db.store();
1867        let mut touched_ids = Vec::new();
1868        let mut lock_entries = Vec::new();
1869
1870        for id in ids_to_update {
1871            let Some(candidate) = store.get(&query.table, *id) else {
1872                continue;
1873            };
1874            let logical_id = candidate.logical_id();
1875            let lock_key = format!("row:{}", logical_id.raw());
1876            let rmw_lock = self.inner.rmw_locks.lock_for(&query.table, &lock_key);
1877            lock_entries.push((lock_key, logical_id, rmw_lock));
1878        }
1879
1880        lock_entries.sort_by(|left, right| left.0.cmp(&right.0));
1881        lock_entries.dedup_by(|left, right| left.0 == right.0);
1882        let _rmw_guards: Vec<_> = lock_entries.iter().map(|entry| entry.2.lock()).collect();
1883
1884        let mut applied_chunk = Vec::new();
1885        for (_, logical_id, _) in &lock_entries {
1886            let Some(entity) = resolve_update_entity_by_logical_id(self, &query.table, *logical_id)
1887            else {
1888                continue;
1889            };
1890            if let Some(filter) = effective_filter {
1891                if !crate::runtime::query_exec::evaluate_entity_filter_with_db(
1892                    Some(self.inner.db.as_ref()),
1893                    &entity,
1894                    filter,
1895                    &query.table,
1896                    &query.table,
1897                ) {
1898                    continue;
1899                }
1900            }
1901
1902            let assignments =
1903                self.materialize_update_assignments_for_entity(query, &entity, compiled_plan)?;
1904            let applied = self.apply_materialized_update_for_entity(
1905                query.table.clone(),
1906                entity,
1907                compiled_plan,
1908                assignments,
1909            )?;
1910            touched_ids.push(applied.id);
1911            applied_chunk.push(applied);
1912        }
1913
1914        let affected = applied_chunk.len() as u64;
1915        if !applied_chunk.is_empty() {
1916            self.persist_update_chunk(&applied_chunk)?;
1917            let lsns = self.flush_update_chunk(&applied_chunk)?;
1918            if !query.suppress_events {
1919                self.emit_update_events_for_collection(&query.table, &applied_chunk, &lsns)?;
1920            }
1921        }
1922
1923        if affected > 0 {
1924            self.note_table_write(&query.table);
1925        }
1926
1927        Ok((
1928            RuntimeQueryResult::dml_result(
1929                raw_query.to_string(),
1930                affected,
1931                "update",
1932                "runtime-dml",
1933            ),
1934            touched_ids,
1935        ))
1936    }
1937
1938    fn compile_update_plan(&self, query: &UpdateQuery) -> RedDBResult<CompiledUpdatePlan> {
1939        let mut static_field_assignments = Vec::new();
1940        let mut static_metadata_assignments = Vec::new();
1941        let mut dynamic_assignments = Vec::new();
1942        let row_contract_plan = build_row_update_contract_plan(&self.db(), &query.table)?;
1943        let mut row_modified_columns = Vec::new();
1944
1945        for (idx, (column, expr)) in query.assignment_exprs.iter().enumerate() {
1946            let compound_op = query.compound_assignment_ops.get(idx).copied().flatten();
1947            let metadata_key = resolve_sql_ttl_metadata_key(column);
1948            if compound_op.is_some() && metadata_key.is_some() {
1949                return Err(RedDBError::Query(format!(
1950                    "compound assignment is only supported for row fields: {column}"
1951                )));
1952            }
1953            if compound_op.is_none() {
1954                if let Ok(value) = fold_expr_to_value(expr.clone()) {
1955                    if let Some(metadata_key) = metadata_key {
1956                        let raw_value = sql_literal_to_metadata_value(metadata_key, &value)?;
1957                        let (canonical_key, canonical_value) =
1958                            canonicalize_sql_ttl_metadata(metadata_key, raw_value);
1959                        static_metadata_assignments
1960                            .push((canonical_key.to_string(), canonical_value));
1961                    } else {
1962                        let value = self.resolve_crypto_sentinel(value)?;
1963                        static_field_assignments.push((
1964                            column.clone(),
1965                            normalize_row_update_assignment_with_plan(
1966                                &query.table,
1967                                column,
1968                                value,
1969                                row_contract_plan.as_ref(),
1970                            )?,
1971                        ));
1972                        row_modified_columns.push(column.clone());
1973                    }
1974                    continue;
1975                }
1976            }
1977
1978            dynamic_assignments.push(CompiledUpdateAssignment {
1979                column: column.clone(),
1980                expr: expr.clone(),
1981                compound_op,
1982                metadata_key,
1983                row_rule: if metadata_key.is_none() {
1984                    if let Some(plan) = row_contract_plan.as_ref() {
1985                        if plan.timestamps_enabled
1986                            && (column == "created_at" || column == "updated_at")
1987                        {
1988                            return Err(RedDBError::Query(format!(
1989                                "collection '{}' manages '{}' automatically — do not set it in UPDATE",
1990                                query.table, column
1991                            )));
1992                        }
1993                        if let Some(rule) = plan.declared_rules.get(column) {
1994                            Some(rule.clone())
1995                        } else if plan.strict_schema {
1996                            return Err(RedDBError::Query(format!(
1997                                "collection '{}' is strict and does not allow undeclared fields: {}",
1998                                query.table, column
1999                            )));
2000                        } else {
2001                            None
2002                        }
2003                    } else {
2004                        None
2005                    }
2006                } else {
2007                    None
2008                },
2009            });
2010            if metadata_key.is_none() {
2011                row_modified_columns.push(column.clone());
2012            }
2013        }
2014
2015        let row_modified_columns = dedupe_update_columns(row_modified_columns);
2016        let row_touches_unique_columns = row_contract_plan.as_ref().is_some_and(|plan| {
2017            row_modified_columns.iter().any(|column| {
2018                plan.unique_columns
2019                    .keys()
2020                    .any(|unique| unique.eq_ignore_ascii_case(column))
2021            })
2022        });
2023
2024        if let Some(ttl_ms) = query.ttl_ms {
2025            static_metadata_assignments
2026                .push(("_ttl_ms".to_string(), metadata_u64_to_value(ttl_ms)));
2027        }
2028        if let Some(expires_at_ms) = query.expires_at_ms {
2029            static_metadata_assignments.push((
2030                "_expires_at".to_string(),
2031                metadata_u64_to_value(expires_at_ms),
2032            ));
2033        }
2034        for (key, val) in &query.with_metadata {
2035            static_metadata_assignments.push((key.clone(), storage_value_to_metadata_value(val)));
2036        }
2037
2038        Ok(CompiledUpdatePlan {
2039            static_field_assignments,
2040            static_metadata_assignments,
2041            dynamic_assignments,
2042            row_contract_plan,
2043            row_modified_columns,
2044            row_touches_unique_columns,
2045        })
2046    }
2047
2048    fn materialize_update_assignments_for_entity(
2049        &self,
2050        query: &UpdateQuery,
2051        entity: &UnifiedEntity,
2052        compiled_plan: &CompiledUpdatePlan,
2053    ) -> RedDBResult<MaterializedUpdateAssignments> {
2054        let mut assignments = MaterializedUpdateAssignments::default();
2055        let mut record: Option<UnifiedRecord> = None;
2056
2057        for assignment in &compiled_plan.dynamic_assignments {
2058            if assignment.compound_op.is_some()
2059                && !matches!(
2060                    entity.data,
2061                    EntityData::Row(_) | EntityData::Node(_) | EntityData::Edge(_)
2062                )
2063            {
2064                return Err(RedDBError::Query(format!(
2065                    "compound assignment is only supported for row or graph UPDATE column '{}'",
2066                    assignment.column
2067                )));
2068            }
2069            if record.is_none() {
2070                record = runtime_any_record_from_entity_ref(entity);
2071            }
2072            let Some(record) = record.as_ref() else {
2073                return Err(RedDBError::Query(format!(
2074                    "UPDATE could not materialize runtime record for entity {} in '{}'",
2075                    entity.id.raw(),
2076                    query.table
2077                )));
2078            };
2079            let rhs = super::expr_eval::evaluate_runtime_expr_with_db(
2080                Some(self.inner.db.as_ref()),
2081                &assignment.expr,
2082                record,
2083                Some(query.table.as_str()),
2084                Some(query.table.as_str()),
2085            )
2086            .ok_or_else(|| {
2087                RedDBError::Query(format!(
2088                    "failed to evaluate UPDATE expression for column '{}'",
2089                    assignment.column
2090                ))
2091            })?;
2092            let value = if let Some(op) = assignment.compound_op {
2093                evaluate_compound_update_assignment(&assignment.column, record, op, rhs)?
2094            } else {
2095                rhs
2096            };
2097
2098            if let Some(metadata_key) = assignment.metadata_key {
2099                let raw_value = sql_literal_to_metadata_value(metadata_key, &value)?;
2100                let (canonical_key, canonical_value) =
2101                    canonicalize_sql_ttl_metadata(metadata_key, raw_value);
2102                assignments
2103                    .dynamic_metadata_assignments
2104                    .push((canonical_key.to_string(), canonical_value));
2105            } else {
2106                assignments.dynamic_field_assignments.push((
2107                    assignment.column.clone(),
2108                    normalize_row_update_value_for_rule(
2109                        &query.table,
2110                        self.resolve_crypto_sentinel(value)?,
2111                        assignment.row_rule.as_ref(),
2112                    )?,
2113                ));
2114            }
2115        }
2116
2117        Ok(assignments)
2118    }
2119
2120    fn apply_materialized_update_for_entity(
2121        &self,
2122        collection: String,
2123        entity: UnifiedEntity,
2124        compiled_plan: &CompiledUpdatePlan,
2125        assignments: MaterializedUpdateAssignments,
2126    ) -> RedDBResult<AppliedEntityMutation> {
2127        if matches!(entity.data, EntityData::Row(_)) {
2128            return self.apply_loaded_sql_update_row_core(
2129                collection,
2130                entity,
2131                &compiled_plan.static_field_assignments,
2132                assignments.dynamic_field_assignments,
2133                &compiled_plan.static_metadata_assignments,
2134                assignments.dynamic_metadata_assignments,
2135                compiled_plan.row_contract_plan.as_ref(),
2136                &compiled_plan.row_modified_columns,
2137                compiled_plan.row_touches_unique_columns,
2138            );
2139        }
2140
2141        ensure_graph_identity_update_allowed(&entity, compiled_plan, &assignments)?;
2142
2143        let operations = build_patch_operations_from_materialized_assignments(
2144            &entity,
2145            compiled_plan,
2146            assignments,
2147        );
2148        self.apply_loaded_patch_entity_core(
2149            collection,
2150            entity,
2151            crate::json::Value::Null,
2152            operations,
2153        )
2154    }
2155
2156    /// Execute DELETE FROM table WHERE filter
2157    pub fn execute_delete(
2158        &self,
2159        raw_query: &str,
2160        query: &DeleteQuery,
2161    ) -> RedDBResult<RuntimeQueryResult> {
2162        self.check_write(crate::runtime::write_gate::WriteKind::Dml)?;
2163        // Issue #523 — blockchain collections are immutable; see
2164        // execute_update for the same gate.
2165        if crate::runtime::blockchain_kind::is_chain(self.inner.db.store().as_ref(), &query.table) {
2166            return Err(RedDBError::InvalidOperation(format!(
2167                "BlockchainCollectionImmutable: DELETE not allowed on '{}'",
2168                query.table
2169            )));
2170        }
2171        // CollectionContract gate (#50) — see execute_update for
2172        // rationale. The gate handles APPEND ONLY rejection and is
2173        // the single point where future contract bits land.
2174        crate::runtime::collection_contract::CollectionContractGate::check(
2175            self,
2176            &query.table,
2177            crate::runtime::collection_contract::MutationKind::Delete,
2178        )?;
2179
2180        // RETURNING on DELETE: capture the pre-image via an internal
2181        // SELECT that reuses the same WHERE, then run the delete with
2182        // the RETURNING clause stripped, then project the captured
2183        // rows through the requested items. The extra SELECT is a
2184        // pragmatic MVP — a future pass can fuse the scan with the
2185        // delete to avoid the second pass over the heap.
2186        if let Some(items) = query.returning.clone() {
2187            let select_sql = delete_to_select_sql(raw_query).ok_or_else(|| {
2188                RedDBError::Query(
2189                    "DELETE ... RETURNING: cannot rewrite query for pre-image scan".to_string(),
2190                )
2191            })?;
2192            let captured = self.execute_query(&select_sql)?;
2193
2194            let mut inner_query = query.clone();
2195            inner_query.returning = None;
2196            let _ = self.execute_delete(raw_query, &inner_query)?;
2197
2198            let snapshots: Vec<Vec<(String, Value)>> = captured
2199                .result
2200                .records
2201                .iter()
2202                .map(|rec| {
2203                    rec.iter_fields()
2204                        .map(|(k, v)| (k.as_ref().to_string(), v.clone()))
2205                        .collect()
2206                })
2207                .collect();
2208            let affected = snapshots.len() as u64;
2209            let result = build_returning_result(&items, &snapshots, None);
2210
2211            let mut response = RuntimeQueryResult::dml_result(
2212                raw_query.to_string(),
2213                affected,
2214                "delete",
2215                "runtime-dml-returning",
2216            );
2217            response.result = result;
2218            return Ok(response);
2219        }
2220        // Row-Level Security enforcement (Phase 2.5.2 PG parity).
2221        //
2222        // When the table has RLS enabled, gate the DELETE by the
2223        // per-role policy set: mutations only touch rows that *every*
2224        // matching `FOR DELETE` policy would accept. No policies =>
2225        // zero rows affected (PG restrictive-default).
2226        if crate::runtime::impl_core::rls_is_enabled(self, &query.table) {
2227            let rls_filter = crate::runtime::impl_core::rls_policy_filter(
2228                self,
2229                &query.table,
2230                crate::storage::query::ast::PolicyAction::Delete,
2231            );
2232            let Some(policy) = rls_filter else {
2233                return Ok(RuntimeQueryResult::dml_result(
2234                    raw_query.to_string(),
2235                    0,
2236                    "delete",
2237                    "runtime-dml-rls",
2238                ));
2239            };
2240            // Fold the policy predicate into the user's WHERE before
2241            // dispatching — the remainder of this function reads the
2242            // filter from `query` via `effective_delete_filter`, which
2243            // respects the updated value.
2244            let mut augmented = query.clone();
2245            augmented.filter = Some(match augmented.filter.take() {
2246                Some(existing) => {
2247                    crate::storage::query::ast::Filter::And(Box::new(existing), Box::new(policy))
2248                }
2249                None => policy,
2250            });
2251            return self.execute_delete_inner(raw_query, &augmented);
2252        }
2253        self.execute_delete_inner(raw_query, query)
2254    }
2255
2256    fn execute_delete_inner(
2257        &self,
2258        raw_query: &str,
2259        query: &DeleteQuery,
2260    ) -> RedDBResult<RuntimeQueryResult> {
2261        let effective_filter = effective_delete_filter(query);
2262
2263        // Find the rows that match the WHERE clause. The "find target
2264        // rows" loop lives in DmlTargetScan so UPDATE (#52) can reuse
2265        // the same scan strategy.
2266        let scan = super::dml_target_scan::DmlTargetScan::new(
2267            self,
2268            &query.table,
2269            effective_filter.as_ref(),
2270            None,
2271        );
2272        let ids_to_delete = scan.find_target_ids()?;
2273
2274        // For event-enabled collections, snapshot the pre-delete state
2275        // before rows are physically removed.
2276        let needs_delete_events =
2277            !query.suppress_events && self.collection_has_delete_subscriptions(&query.table);
2278        let mut pre_images: HashMap<u64, crate::json::Value> = if needs_delete_events {
2279            scan.row_json_pre_images(&ids_to_delete)
2280        } else {
2281            HashMap::new()
2282        };
2283
2284        let mut affected: u64 = 0;
2285        for chunk in ids_to_delete.chunks(UPDATE_APPLY_CHUNK_SIZE) {
2286            let (count, lsns) = self.delete_entities_batch(&query.table, chunk)?;
2287            affected += count;
2288            if needs_delete_events && !lsns.is_empty() {
2289                // lsns.len() == actually-deleted entities; align with chunk ids.
2290                // `delete_batch` may skip missing entities, so we correlate by
2291                // the number returned (they're emitted in chunk order).
2292                let deleted_chunk = &chunk[..lsns.len().min(chunk.len())];
2293                self.emit_delete_events_for_collection(
2294                    &query.table,
2295                    deleted_chunk,
2296                    &lsns,
2297                    &pre_images,
2298                )?;
2299            }
2300        }
2301        pre_images.clear();
2302
2303        if affected > 0 {
2304            self.note_table_write(&query.table);
2305        }
2306
2307        Ok(RuntimeQueryResult::dml_result(
2308            raw_query.to_string(),
2309            affected,
2310            "delete",
2311            "runtime-dml",
2312        ))
2313    }
2314}
2315
2316/// Reject UPDATE … NODES/EDGES that assign to graph identity/topology
2317/// columns regardless of whether any row matches the WHERE clause. The
2318/// per-entity guard below covers only the matched-rows case, but ADR 0019
2319/// declares these columns immutable on the surface itself, so a zero-row
2320/// UPDATE should still surface the same error to operators and SDKs.
2321fn ensure_graph_identity_update_target_allowed(query: &UpdateQuery) -> RedDBResult<()> {
2322    if !matches!(query.target, UpdateTarget::Nodes | UpdateTarget::Edges) {
2323        return Ok(());
2324    }
2325    for (column, _) in &query.assignment_exprs {
2326        if is_immutable_graph_identity_field(column) {
2327            return Err(RedDBError::Query(format!(
2328                "immutable graph field '{column}' cannot be updated"
2329            )));
2330        }
2331    }
2332    Ok(())
2333}
2334
2335fn ensure_graph_identity_update_allowed(
2336    entity: &UnifiedEntity,
2337    compiled_plan: &CompiledUpdatePlan,
2338    assignments: &MaterializedUpdateAssignments,
2339) -> RedDBResult<()> {
2340    if !matches!(entity.data, EntityData::Node(_) | EntityData::Edge(_)) {
2341        return Ok(());
2342    }
2343
2344    for (column, _) in compiled_plan
2345        .static_field_assignments
2346        .iter()
2347        .chain(assignments.dynamic_field_assignments.iter())
2348    {
2349        if is_immutable_graph_identity_field(column) {
2350            return Err(RedDBError::Query(format!(
2351                "immutable graph field '{column}' cannot be updated"
2352            )));
2353        }
2354    }
2355
2356    Ok(())
2357}
2358
2359fn is_immutable_graph_identity_field(column: &str) -> bool {
2360    ["rid", "label", "from_rid", "to_rid", "from", "to"]
2361        .iter()
2362        .any(|reserved| column.eq_ignore_ascii_case(reserved))
2363}
2364
2365fn build_patch_operations_from_materialized_assignments(
2366    entity: &UnifiedEntity,
2367    compiled_plan: &CompiledUpdatePlan,
2368    assignments: MaterializedUpdateAssignments,
2369) -> Vec<PatchEntityOperation> {
2370    let mut operations = Vec::with_capacity(
2371        compiled_plan.static_field_assignments.len()
2372            + compiled_plan.static_metadata_assignments.len()
2373            + assignments.dynamic_field_assignments.len()
2374            + assignments.dynamic_metadata_assignments.len(),
2375    );
2376
2377    for (column, value) in &compiled_plan.static_field_assignments {
2378        operations.push(PatchEntityOperation {
2379            op: PatchEntityOperationType::Set,
2380            path: update_patch_path_for_entity(entity, column),
2381            value: Some(storage_value_to_json(value)),
2382        });
2383    }
2384
2385    for (column, value) in assignments.dynamic_field_assignments {
2386        operations.push(PatchEntityOperation {
2387            op: PatchEntityOperationType::Set,
2388            path: update_patch_path_for_entity(entity, &column),
2389            value: Some(storage_value_to_json(&value)),
2390        });
2391    }
2392
2393    for (key, value) in &compiled_plan.static_metadata_assignments {
2394        operations.push(PatchEntityOperation {
2395            op: PatchEntityOperationType::Set,
2396            path: vec!["metadata".to_string(), key.clone()],
2397            value: Some(metadata_value_to_json(value)),
2398        });
2399    }
2400
2401    for (key, value) in assignments.dynamic_metadata_assignments {
2402        operations.push(PatchEntityOperation {
2403            op: PatchEntityOperationType::Set,
2404            path: vec!["metadata".to_string(), key],
2405            value: Some(metadata_value_to_json(&value)),
2406        });
2407    }
2408
2409    operations
2410}
2411
2412fn update_patch_path_for_entity(entity: &UnifiedEntity, column: &str) -> Vec<String> {
2413    if matches!(
2414        (&entity.kind, &entity.data),
2415        (
2416            crate::storage::EntityKind::GraphNode(_),
2417            EntityData::Node(_)
2418        )
2419    ) && column.eq_ignore_ascii_case("node_type")
2420    {
2421        return vec!["node_type".to_string()];
2422    }
2423    if matches!(
2424        (&entity.kind, &entity.data),
2425        (
2426            crate::storage::EntityKind::GraphEdge(_),
2427            EntityData::Edge(_)
2428        )
2429    ) && column.eq_ignore_ascii_case("weight")
2430    {
2431        return vec!["weight".to_string()];
2432    }
2433    vec!["fields".to_string(), column.to_string()]
2434}
2435
2436/// Rewrite `DELETE FROM <table> [WHERE …] [RETURNING …]` as
2437/// `SELECT * FROM <table> [WHERE …]` so the delete executor can
2438/// capture the pre-image before actually removing the rows. Returns
2439/// `None` when the input does not start with `DELETE`.
2440///
2441/// Case-insensitive on the keywords. Preserves everything between
2442/// the table name and the RETURNING clause, so WHERE / ORDER BY /
2443/// LIMIT survive untouched. The RETURNING tail — if present — is
2444/// truncated at the first top-level `RETURNING` token.
2445fn delete_to_select_sql(sql: &str) -> Option<String> {
2446    let trimmed = sql.trim_start();
2447    let lowered = trimmed.to_ascii_lowercase();
2448    if !lowered.starts_with("delete ") && !lowered.starts_with("delete\t") {
2449        return None;
2450    }
2451    // Find `FROM` after DELETE.
2452    let from_idx = lowered.find(" from ")?;
2453    let after_from = &trimmed[from_idx + " from ".len()..];
2454    let after_from_lc = &lowered[from_idx + " from ".len()..];
2455
2456    // Cut off the RETURNING tail (a naive search — the RETURNING
2457    // clause only appears once per statement at top level in our
2458    // grammar). Matches whitespace-bounded tokens to avoid clipping
2459    // `RETURNING` inside a string literal.
2460    let mut body = after_from.to_string();
2461    if let Some(pos) = find_top_level_keyword(after_from_lc, "returning") {
2462        body.truncate(pos);
2463    }
2464    Some(format!("SELECT * FROM {}", body.trim_end()))
2465}
2466
2467/// Find the byte offset of a whitespace-bounded keyword in a
2468/// lowercased haystack, skipping matches inside single-quoted
2469/// string literals. Naive — no escape handling — but enough for
2470/// the shapes the DML parser emits.
2471fn find_top_level_keyword(haystack: &str, needle: &str) -> Option<usize> {
2472    let bytes = haystack.as_bytes();
2473    let nlen = needle.len();
2474    let mut i = 0usize;
2475    let mut in_string = false;
2476    while i < bytes.len() {
2477        let c = bytes[i];
2478        if c == b'\'' {
2479            in_string = !in_string;
2480            i += 1;
2481            continue;
2482        }
2483        if !in_string
2484            && i + nlen <= bytes.len()
2485            && &bytes[i..i + nlen] == needle.as_bytes()
2486            && (i == 0 || bytes[i - 1].is_ascii_whitespace())
2487            && (i + nlen == bytes.len() || bytes[i + nlen].is_ascii_whitespace())
2488        {
2489            return Some(i);
2490        }
2491        i += 1;
2492    }
2493    None
2494}
2495
2496/// Build a `UnifiedResult` from the rows affected by a DML statement plus
2497/// its `RETURNING` clause. Each snapshot is a list of (column, value) pairs
2498/// for one affected row; `outputs`, when provided, supplies the engine-
2499/// assigned entity id for the same row (INSERT path). Projection honours
2500/// the RETURNING items: `*` expands to every snapshot column plus
2501/// the public row envelope when available.
2502fn build_returning_result(
2503    items: &[ReturningItem],
2504    snapshots: &[Vec<(String, Value)>],
2505    outputs: Option<&[CreateEntityOutput]>,
2506) -> UnifiedResult {
2507    let project_all = items.iter().any(|it| matches!(it, ReturningItem::All));
2508    let public_item_outputs = outputs.is_some_and(|outs| {
2509        outs.first()
2510            .and_then(|out| out.entity.as_ref())
2511            .is_some_and(|entity| public_returning_item_kind(entity).is_some())
2512    });
2513
2514    let mut columns: Vec<String> = if project_all {
2515        let mut cols: Vec<String> = Vec::new();
2516        if public_item_outputs {
2517            cols.extend(
2518                [
2519                    "rid",
2520                    "collection",
2521                    "kind",
2522                    "tenant",
2523                    "created_at",
2524                    "updated_at",
2525                ]
2526                .into_iter()
2527                .map(str::to_string),
2528            );
2529        } else if outputs.is_some() {
2530            cols.push("red_entity_id".to_string());
2531        }
2532        if let Some(first) = snapshots.first() {
2533            for (name, _) in first {
2534                cols.push(name.clone());
2535            }
2536        }
2537        cols
2538    } else {
2539        items
2540            .iter()
2541            .filter_map(|it| match it {
2542                ReturningItem::Column(c) => Some(c.clone()),
2543                ReturningItem::All => None,
2544            })
2545            .collect()
2546    };
2547    // Guarantee unique order-preserving column list.
2548    {
2549        let mut seen = std::collections::HashSet::new();
2550        columns.retain(|c| seen.insert(c.clone()));
2551    }
2552
2553    let mut records: Vec<UnifiedRecord> = Vec::with_capacity(snapshots.len());
2554    for (idx, snap) in snapshots.iter().enumerate() {
2555        let mut values: HashMap<Arc<str>, Value> = HashMap::with_capacity(columns.len());
2556        if let Some(outs) = outputs {
2557            if let Some(out) = outs.get(idx) {
2558                if let Some(entity) = out.entity.as_ref() {
2559                    if let Some(kind) = public_returning_item_kind(entity) {
2560                        values.insert(
2561                            Arc::clone(&sys_key_rid()),
2562                            Value::UnsignedInteger(out.id.raw()),
2563                        );
2564                        values.insert(
2565                            Arc::clone(&sys_key_collection()),
2566                            Value::text(entity.kind.collection().to_string()),
2567                        );
2568                        values.insert(Arc::clone(&sys_key_kind()), Value::text(kind.to_string()));
2569                        values.insert(
2570                            Arc::clone(&sys_key_created_at()),
2571                            Value::UnsignedInteger(entity.created_at),
2572                        );
2573                        values.insert(
2574                            Arc::clone(&sys_key_updated_at()),
2575                            Value::UnsignedInteger(entity.updated_at),
2576                        );
2577                        // Legacy alias: an explicit `RETURNING red_entity_id`
2578                        // still resolves to the row's rid. Only surfaces when
2579                        // the projected column list names it — `RETURNING *`
2580                        // keeps the envelope clean (rid, not red_entity_id).
2581                        values.insert(
2582                            Arc::clone(&sys_key_red_entity_id()),
2583                            Value::UnsignedInteger(out.id.raw()),
2584                        );
2585                    } else {
2586                        values.insert(
2587                            Arc::clone(&sys_key_red_entity_id()),
2588                            Value::Integer(out.id.raw() as i64),
2589                        );
2590                    }
2591                } else {
2592                    values.insert(
2593                        Arc::clone(&sys_key_red_entity_id()),
2594                        Value::Integer(out.id.raw() as i64),
2595                    );
2596                }
2597            }
2598        }
2599        for (name, val) in snap {
2600            values.insert(Arc::from(name.as_str()), val.clone());
2601        }
2602        if !values.contains_key("tenant") {
2603            let tenant = values.get("tenant_id").cloned().unwrap_or(Value::Null);
2604            values.insert(Arc::clone(&sys_key_tenant()), tenant);
2605        }
2606        let mut rec = UnifiedRecord::default();
2607        // Only keep projected columns on the record.
2608        for col in &columns {
2609            if let Some(v) = values.get(col.as_str()) {
2610                rec.set_arc(Arc::from(col.as_str()), v.clone());
2611            }
2612        }
2613        records.push(rec);
2614    }
2615
2616    UnifiedResult {
2617        columns,
2618        records,
2619        stats: Default::default(),
2620        pre_serialized_json: None,
2621    }
2622}
2623
2624fn public_returning_item_kind(entity: &crate::storage::UnifiedEntity) -> Option<&'static str> {
2625    match (&entity.kind, &entity.data) {
2626        (crate::storage::EntityKind::GraphNode(_), crate::storage::EntityData::Node(_)) => {
2627            Some("node")
2628        }
2629        (crate::storage::EntityKind::GraphEdge(_), crate::storage::EntityData::Edge(_)) => {
2630            Some("edge")
2631        }
2632        (_, crate::storage::EntityData::Row(_)) => Some(public_returning_row_kind(entity)),
2633        _ => None,
2634    }
2635}
2636
2637fn public_returning_row_kind(entity: &crate::storage::UnifiedEntity) -> &'static str {
2638    let Some(row) = entity.data.as_row() else {
2639        return "row";
2640    };
2641
2642    let is_kv = row.named.as_ref().is_some_and(|named| {
2643        (named.len() == 2 && named.contains_key("key") && named.contains_key("value"))
2644            || (named.len() == 1 && (named.contains_key("key") || named.contains_key("value")))
2645    });
2646    if is_kv {
2647        return "kv";
2648    }
2649
2650    let is_document = row
2651        .named
2652        .as_ref()
2653        .is_some_and(|named| named.values().any(runtime_returning_documentish_value))
2654        || row.columns.iter().any(runtime_returning_documentish_value);
2655    if is_document {
2656        "document"
2657    } else {
2658        "row"
2659    }
2660}
2661
2662fn runtime_returning_documentish_value(value: &Value) -> bool {
2663    matches!(value, Value::Json(_) | Value::Blob(_))
2664}
2665
2666fn row_insert_returning_snapshots(
2667    outputs: &[CreateEntityOutput],
2668    fallback: Vec<Vec<(String, Value)>>,
2669) -> Vec<Vec<(String, Value)>> {
2670    outputs
2671        .iter()
2672        .enumerate()
2673        .map(|(idx, out)| {
2674            out.entity
2675                .as_ref()
2676                .map(entity_row_fields_snapshot)
2677                .filter(|snap| !snap.is_empty())
2678                .unwrap_or_else(|| fallback.get(idx).cloned().unwrap_or_default())
2679        })
2680        .collect()
2681}
2682
2683fn graph_insert_returning_snapshots(
2684    store: &crate::storage::unified::UnifiedStore,
2685    collection: &str,
2686    ids: &[EntityId],
2687) -> Vec<Vec<(String, Value)>> {
2688    let Some(manager) = store.get_collection(collection) else {
2689        return Vec::new();
2690    };
2691
2692    ids.iter()
2693        .filter_map(|id| manager.get(*id))
2694        .filter_map(|entity| {
2695            let mut record = runtime_any_record_from_entity_ref(&entity)?;
2696            record.set_arc(sys_key_collection(), Value::text(collection.to_string()));
2697            Some(record)
2698        })
2699        .map(|record| {
2700            record
2701                .iter_fields()
2702                .map(|(key, value)| (key.as_ref().to_string(), value.clone()))
2703                .collect()
2704        })
2705        .collect()
2706}
2707
2708fn graph_update_returning_snapshots(
2709    runtime: &RedDBRuntime,
2710    collection: &str,
2711    ids: &[EntityId],
2712) -> Vec<Vec<(String, Value)>> {
2713    let store = runtime.db().store();
2714    let Some(manager) = store.get_collection(collection) else {
2715        return Vec::new();
2716    };
2717
2718    manager
2719        .get_many(ids)
2720        .into_iter()
2721        .flatten()
2722        .filter_map(|entity| runtime_any_record_from_entity_ref(&entity))
2723        .map(|record| {
2724            record
2725                .iter_fields()
2726                .map(|(key, value)| (key.as_ref().to_string(), value.clone()))
2727                .collect()
2728        })
2729        .collect()
2730}
2731
2732fn ensure_update_target_contract(
2733    runtime: &RedDBRuntime,
2734    collection: &str,
2735    target: UpdateTarget,
2736) -> RedDBResult<()> {
2737    let Some(contract) = runtime.db().collection_contract(collection) else {
2738        return Ok(());
2739    };
2740    if update_target_contract_is_advisory(&contract)
2741        || update_target_allows_model(contract.declared_model, update_target_model(target))
2742    {
2743        return Ok(());
2744    }
2745    Err(RedDBError::InvalidOperation(format!(
2746        "collection '{}' is declared as '{}' and does not allow '{}' updates",
2747        collection,
2748        update_model_name(contract.declared_model),
2749        update_model_name(update_target_model(target))
2750    )))
2751}
2752
2753fn update_target_contract_is_advisory(contract: &crate::physical::CollectionContract) -> bool {
2754    matches!(
2755        (&contract.origin, &contract.schema_mode),
2756        (
2757            crate::physical::ContractOrigin::Implicit,
2758            crate::catalog::SchemaMode::Dynamic,
2759        )
2760    )
2761}
2762
2763fn update_target_model(target: UpdateTarget) -> crate::catalog::CollectionModel {
2764    match target {
2765        UpdateTarget::Rows => crate::catalog::CollectionModel::Table,
2766        UpdateTarget::Documents => crate::catalog::CollectionModel::Document,
2767        UpdateTarget::Kv => crate::catalog::CollectionModel::Kv,
2768        UpdateTarget::Nodes | UpdateTarget::Edges => crate::catalog::CollectionModel::Graph,
2769    }
2770}
2771
2772fn update_target_allows_model(
2773    declared_model: crate::catalog::CollectionModel,
2774    requested_model: crate::catalog::CollectionModel,
2775) -> bool {
2776    declared_model == requested_model || declared_model == crate::catalog::CollectionModel::Mixed
2777}
2778
2779fn update_model_name(model: crate::catalog::CollectionModel) -> &'static str {
2780    match model {
2781        crate::catalog::CollectionModel::Table => "table",
2782        crate::catalog::CollectionModel::Document => "document",
2783        crate::catalog::CollectionModel::Graph => "graph",
2784        crate::catalog::CollectionModel::Vector => "vector",
2785        crate::catalog::CollectionModel::Hll => "hll",
2786        crate::catalog::CollectionModel::Sketch => "sketch",
2787        crate::catalog::CollectionModel::Filter => "filter",
2788        crate::catalog::CollectionModel::Kv => "kv",
2789        crate::catalog::CollectionModel::Config => "config",
2790        crate::catalog::CollectionModel::Vault => "vault",
2791        crate::catalog::CollectionModel::Mixed => "mixed",
2792        crate::catalog::CollectionModel::TimeSeries => "timeseries",
2793        crate::catalog::CollectionModel::Queue => "queue",
2794        crate::catalog::CollectionModel::Metrics => "metrics",
2795    }
2796}
2797
2798fn ensure_graph_insert_contract(runtime: &RedDBRuntime, collection: &str) -> RedDBResult<()> {
2799    let db = runtime.db();
2800    if let Some(contract) = db.collection_contract(collection) {
2801        let advisory_implicit_dynamic = matches!(
2802            (&contract.origin, &contract.schema_mode),
2803            (
2804                crate::physical::ContractOrigin::Implicit,
2805                crate::catalog::SchemaMode::Dynamic,
2806            )
2807        );
2808        if advisory_implicit_dynamic
2809            || matches!(
2810                contract.declared_model,
2811                crate::catalog::CollectionModel::Graph | crate::catalog::CollectionModel::Mixed
2812            )
2813        {
2814            return Ok(());
2815        }
2816        return Err(RedDBError::InvalidOperation(format!(
2817            "collection '{}' is declared as '{:?}' and does not allow 'Graph' writes",
2818            collection, contract.declared_model
2819        )));
2820    }
2821
2822    let now = std::time::SystemTime::now()
2823        .duration_since(std::time::UNIX_EPOCH)
2824        .unwrap_or_default()
2825        .as_millis();
2826    db.save_collection_contract(crate::physical::CollectionContract {
2827        name: collection.to_string(),
2828        declared_model: crate::catalog::CollectionModel::Graph,
2829        schema_mode: crate::catalog::SchemaMode::Dynamic,
2830        origin: crate::physical::ContractOrigin::Implicit,
2831        version: 1,
2832        created_at_unix_ms: now,
2833        updated_at_unix_ms: now,
2834        default_ttl_ms: db.collection_default_ttl_ms(collection),
2835        vector_dimension: None,
2836        vector_metric: None,
2837        context_index_fields: Vec::new(),
2838        declared_columns: Vec::new(),
2839        table_def: None,
2840        timestamps_enabled: false,
2841        context_index_enabled: false,
2842        metrics_raw_retention_ms: None,
2843        metrics_rollup_policies: Vec::new(),
2844        metrics_tenant_identity: None,
2845        metrics_namespace: None,
2846        append_only: false,
2847        subscriptions: Vec::new(),
2848        analytics_config: Vec::new(),
2849        session_key: None,
2850        session_gap_ms: None,
2851        retention_duration_ms: None,
2852        analytical_storage: None,
2853    })
2854    .map(|_| ())
2855    .map_err(|err| RedDBError::Internal(err.to_string()))
2856}
2857
2858fn update_needs_rmw_lock(query: &UpdateQuery) -> bool {
2859    query
2860        .assignment_exprs
2861        .iter()
2862        .enumerate()
2863        .any(|(idx, (column, expr))| {
2864            query
2865                .compound_assignment_ops
2866                .get(idx)
2867                .is_some_and(|op| op.is_some())
2868                || expr_references_update_column(expr, &query.table, column)
2869        })
2870}
2871
2872fn evaluate_compound_update_assignment(
2873    column: &str,
2874    record: &UnifiedRecord,
2875    op: BinOp,
2876    rhs: Value,
2877) -> RedDBResult<Value> {
2878    let lhs = record.get(column).ok_or_else(|| {
2879        RedDBError::Query(format!(
2880            "compound assignment requires existing numeric field '{column}'"
2881        ))
2882    })?;
2883    if matches!(lhs, Value::Null) {
2884        return Err(RedDBError::Query(format!(
2885            "compound assignment requires non-null numeric field '{column}'"
2886        )));
2887    }
2888    apply_compound_numeric_op(column, op, lhs, &rhs)
2889}
2890
2891fn apply_compound_numeric_op(
2892    column: &str,
2893    op: BinOp,
2894    lhs: &Value,
2895    rhs: &Value,
2896) -> RedDBResult<Value> {
2897    let Some(lhs_number) = CompoundNumber::from_value(lhs) else {
2898        return Err(RedDBError::Query(format!(
2899            "compound assignment requires numeric field '{column}'"
2900        )));
2901    };
2902    let Some(rhs_number) = CompoundNumber::from_value(rhs) else {
2903        return Err(RedDBError::Query(format!(
2904            "compound assignment requires numeric right-hand value for field '{column}'"
2905        )));
2906    };
2907
2908    if lhs_number.is_float() || rhs_number.is_float() || matches!(op, BinOp::Div) {
2909        let a = lhs_number.as_f64();
2910        let b = rhs_number.as_f64();
2911        let out = match op {
2912            BinOp::Add => a + b,
2913            BinOp::Sub => a - b,
2914            BinOp::Mul => a * b,
2915            BinOp::Div => {
2916                if b == 0.0 {
2917                    return Err(RedDBError::Query(format!(
2918                        "division by zero in compound assignment for field '{column}'"
2919                    )));
2920                }
2921                a / b
2922            }
2923            BinOp::Mod => {
2924                if b == 0.0 {
2925                    return Err(RedDBError::Query(format!(
2926                        "modulo by zero in compound assignment for field '{column}'"
2927                    )));
2928                }
2929                a % b
2930            }
2931            _ => {
2932                return Err(RedDBError::Query(format!(
2933                    "unsupported compound assignment operator for field '{column}'"
2934                )));
2935            }
2936        };
2937        if !out.is_finite() {
2938            return Err(RedDBError::Query(format!(
2939                "numeric overflow in compound assignment for field '{column}'"
2940            )));
2941        }
2942        return Ok(Value::Float(out));
2943    }
2944
2945    let a = lhs_number.as_i128();
2946    let b = rhs_number.as_i128();
2947    let out = match op {
2948        BinOp::Add => a.checked_add(b),
2949        BinOp::Sub => a.checked_sub(b),
2950        BinOp::Mul => a.checked_mul(b),
2951        BinOp::Mod => {
2952            if b == 0 {
2953                return Err(RedDBError::Query(format!(
2954                    "modulo by zero in compound assignment for field '{column}'"
2955                )));
2956            }
2957            a.checked_rem(b)
2958        }
2959        BinOp::Div => unreachable!("integer division is handled by the float branch"),
2960        _ => None,
2961    }
2962    .ok_or_else(|| {
2963        RedDBError::Query(format!(
2964            "numeric overflow in compound assignment for field '{column}'"
2965        ))
2966    })?;
2967
2968    if matches!(lhs, Value::UnsignedInteger(_)) {
2969        let value = u64::try_from(out).map_err(|_| {
2970            RedDBError::Query(format!(
2971                "numeric overflow in compound assignment for field '{column}'"
2972            ))
2973        })?;
2974        Ok(Value::UnsignedInteger(value))
2975    } else {
2976        let value = i64::try_from(out).map_err(|_| {
2977            RedDBError::Query(format!(
2978                "numeric overflow in compound assignment for field '{column}'"
2979            ))
2980        })?;
2981        Ok(Value::Integer(value))
2982    }
2983}
2984
2985#[derive(Clone, Copy)]
2986enum CompoundNumber {
2987    Integer(i128),
2988    Float(f64),
2989}
2990
2991impl CompoundNumber {
2992    fn from_value(value: &Value) -> Option<Self> {
2993        match value {
2994            Value::Integer(value) | Value::BigInt(value) => Some(Self::Integer(*value as i128)),
2995            Value::UnsignedInteger(value) => Some(Self::Integer(*value as i128)),
2996            Value::Float(value) => value.is_finite().then_some(Self::Float(*value)),
2997            Value::Decimal(value) => Some(Self::Float(*value as f64 / 10_000.0)),
2998            _ => None,
2999        }
3000    }
3001
3002    fn is_float(self) -> bool {
3003        matches!(self, Self::Float(_))
3004    }
3005
3006    fn as_f64(self) -> f64 {
3007        match self {
3008            Self::Integer(value) => value as f64,
3009            Self::Float(value) => value,
3010        }
3011    }
3012
3013    fn as_i128(self) -> i128 {
3014        match self {
3015            Self::Integer(value) => value,
3016            Self::Float(_) => unreachable!("float compound number used as integer"),
3017        }
3018    }
3019}
3020
3021fn expr_references_update_column(expr: &Expr, table_name: &str, target_column: &str) -> bool {
3022    match expr {
3023        Expr::Literal { .. } | Expr::Parameter { .. } | Expr::Subquery { .. } => false,
3024        Expr::Column { field, .. } => {
3025            field_ref_matches_update_column(field, table_name, target_column)
3026        }
3027        Expr::BinaryOp { lhs, rhs, .. } => {
3028            expr_references_update_column(lhs, table_name, target_column)
3029                || expr_references_update_column(rhs, table_name, target_column)
3030        }
3031        Expr::UnaryOp { operand, .. } | Expr::Cast { inner: operand, .. } => {
3032            expr_references_update_column(operand, table_name, target_column)
3033        }
3034        Expr::FunctionCall { args, .. } => args
3035            .iter()
3036            .any(|arg| expr_references_update_column(arg, table_name, target_column)),
3037        Expr::Case {
3038            branches, else_, ..
3039        } => {
3040            branches.iter().any(|(cond, value)| {
3041                expr_references_update_column(cond, table_name, target_column)
3042                    || expr_references_update_column(value, table_name, target_column)
3043            }) || else_
3044                .as_deref()
3045                .is_some_and(|expr| expr_references_update_column(expr, table_name, target_column))
3046        }
3047        Expr::IsNull { operand, .. } => {
3048            expr_references_update_column(operand, table_name, target_column)
3049        }
3050        Expr::InList { target, values, .. } => {
3051            expr_references_update_column(target, table_name, target_column)
3052                || values
3053                    .iter()
3054                    .any(|value| expr_references_update_column(value, table_name, target_column))
3055        }
3056        Expr::Between {
3057            target, low, high, ..
3058        } => {
3059            expr_references_update_column(target, table_name, target_column)
3060                || expr_references_update_column(low, table_name, target_column)
3061                || expr_references_update_column(high, table_name, target_column)
3062        }
3063        Expr::WindowFunctionCall { args, window, .. } => {
3064            args.iter()
3065                .any(|arg| expr_references_update_column(arg, table_name, target_column))
3066                || window
3067                    .partition_by
3068                    .iter()
3069                    .any(|e| expr_references_update_column(e, table_name, target_column))
3070                || window
3071                    .order_by
3072                    .iter()
3073                    .any(|o| expr_references_update_column(&o.expr, table_name, target_column))
3074        }
3075    }
3076}
3077
3078fn field_ref_matches_update_column(
3079    field: &FieldRef,
3080    table_name: &str,
3081    target_column: &str,
3082) -> bool {
3083    match field {
3084        FieldRef::TableColumn { table, column } => {
3085            column.eq_ignore_ascii_case(target_column)
3086                && (table.is_empty() || table.eq_ignore_ascii_case(table_name))
3087        }
3088        FieldRef::NodeProperty { .. } | FieldRef::EdgeProperty { .. } | FieldRef::NodeId { .. } => {
3089            false
3090        }
3091    }
3092}
3093
3094fn resolve_update_entity_by_logical_id(
3095    runtime: &RedDBRuntime,
3096    table: &str,
3097    logical_id: EntityId,
3098) -> Option<UnifiedEntity> {
3099    let store = runtime.inner.db.store();
3100    if let Some(entity) = store.get_table_row_by_logical_id(table, logical_id) {
3101        return Some(entity);
3102    }
3103    // Fallback for non-table-row entities (graph nodes/edges, etc.) where
3104    // entity_id == logical_id and the MVCC table-row resolver doesn't apply.
3105    store.get(table, logical_id)
3106}
3107
3108fn update_cdc_item_kind(
3109    runtime: &RedDBRuntime,
3110    collection: &str,
3111    entity: &UnifiedEntity,
3112) -> &'static str {
3113    match &entity.data {
3114        EntityData::Node(_) => return "node",
3115        EntityData::Edge(_) => return "edge",
3116        _ => {}
3117    }
3118
3119    match runtime
3120        .db()
3121        .collection_contract(collection)
3122        .map(|contract| contract.declared_model)
3123    {
3124        Some(crate::catalog::CollectionModel::Document) => "document",
3125        Some(crate::catalog::CollectionModel::Kv)
3126        | Some(crate::catalog::CollectionModel::Vault) => "kv",
3127        _ => "row",
3128    }
3129}
3130
3131fn ordered_update_target_ids(
3132    manager: &Arc<crate::storage::SegmentManager>,
3133    entity_ids: &[EntityId],
3134    order_by: &[OrderByClause],
3135    limit: Option<usize>,
3136) -> Vec<EntityId> {
3137    let mut entities: Vec<UnifiedEntity> =
3138        manager.get_many(entity_ids).into_iter().flatten().collect();
3139    entities.sort_by(|left, right| compare_update_order(left, right, order_by));
3140    if let Some(limit) = limit {
3141        entities.truncate(limit);
3142    }
3143    entities.into_iter().map(|entity| entity.id).collect()
3144}
3145
3146fn compare_update_order(
3147    left: &UnifiedEntity,
3148    right: &UnifiedEntity,
3149    order_by: &[OrderByClause],
3150) -> Ordering {
3151    for clause in order_by {
3152        let left_value = update_order_value(left, &clause.field);
3153        let right_value = update_order_value(right, &clause.field);
3154        let ordering = compare_update_order_values(
3155            left_value.as_ref(),
3156            right_value.as_ref(),
3157            clause.nulls_first,
3158        );
3159        if ordering != Ordering::Equal {
3160            return if clause.ascending {
3161                ordering
3162            } else {
3163                ordering.reverse()
3164            };
3165        }
3166    }
3167    left.logical_id().raw().cmp(&right.logical_id().raw())
3168}
3169
3170fn compare_update_order_values(
3171    left: Option<&Value>,
3172    right: Option<&Value>,
3173    nulls_first: bool,
3174) -> Ordering {
3175    match (left, right) {
3176        (None, None) => Ordering::Equal,
3177        (None, Some(_)) => {
3178            if nulls_first {
3179                Ordering::Less
3180            } else {
3181                Ordering::Greater
3182            }
3183        }
3184        (Some(_), None) => {
3185            if nulls_first {
3186                Ordering::Greater
3187            } else {
3188                Ordering::Less
3189            }
3190        }
3191        (Some(left), Some(right)) => {
3192            crate::storage::query::value_compare::total_compare_values(left, right)
3193        }
3194    }
3195}
3196
3197fn update_order_value(entity: &UnifiedEntity, field: &FieldRef) -> Option<Value> {
3198    let FieldRef::TableColumn { table, column } = field else {
3199        return None;
3200    };
3201    if !table.is_empty() {
3202        return None;
3203    }
3204    if column.eq_ignore_ascii_case("rid") {
3205        return Some(Value::UnsignedInteger(entity.logical_id().raw()));
3206    }
3207    match &entity.data {
3208        EntityData::Row(row) => row.get_field(column).cloned(),
3209        EntityData::Node(_) | EntityData::Edge(_) => runtime_any_record_from_entity_ref(entity)
3210            .and_then(|record| record.get(column).cloned()),
3211        _ => None,
3212    }
3213}
3214
3215fn dedupe_update_columns(mut columns: Vec<String>) -> Vec<String> {
3216    if columns.is_empty() {
3217        return columns;
3218    }
3219
3220    let mut unique = Vec::with_capacity(columns.len());
3221    for column in columns.drain(..) {
3222        if !unique
3223            .iter()
3224            .any(|existing: &String| existing.eq_ignore_ascii_case(&column))
3225        {
3226            unique.push(column);
3227        }
3228    }
3229    unique
3230}
3231
3232// =============================================================================
3233// Helper functions for extracting typed values from column/value pairs
3234// =============================================================================
3235
3236const SQL_TTL_METADATA_COLUMNS: [&str; 3] = ["_ttl", "_ttl_ms", "_expires_at"];
3237
3238fn resolve_sql_ttl_metadata_key(column: &str) -> Option<&'static str> {
3239    if column.eq_ignore_ascii_case("_ttl") {
3240        Some(SQL_TTL_METADATA_COLUMNS[0])
3241    } else if column.eq_ignore_ascii_case("_ttl_ms") {
3242        Some(SQL_TTL_METADATA_COLUMNS[1])
3243    } else if column.eq_ignore_ascii_case("_expires_at") {
3244        Some(SQL_TTL_METADATA_COLUMNS[2])
3245    } else {
3246        None
3247    }
3248}
3249
3250/// Canonicalize a SQL TTL metadata `(key, value)` pair so the retention
3251/// sweeper sees a single key (`_ttl_ms`) regardless of which legacy form
3252/// the operator wrote. `_ttl` is scaled from seconds to milliseconds;
3253/// `_ttl_ms` and `_expires_at` are passed through.
3254fn canonicalize_sql_ttl_metadata(
3255    key: &'static str,
3256    value: MetadataValue,
3257) -> (&'static str, MetadataValue) {
3258    if key != "_ttl" {
3259        return (key, value);
3260    }
3261    let scaled = match value {
3262        MetadataValue::Int(s) => MetadataValue::Int(s.saturating_mul(1_000)),
3263        MetadataValue::Timestamp(ms_or_s) => {
3264            // Timestamp is already chosen for very large values; treat as
3265            // already-ms to avoid silent overflow.
3266            MetadataValue::Timestamp(ms_or_s)
3267        }
3268        MetadataValue::Float(f) => MetadataValue::Float(f * 1_000.0),
3269        other => other,
3270    };
3271    ("_ttl_ms", scaled)
3272}
3273
3274/// Sentinel prefix produced by the parser for `PASSWORD('...')` and
3275/// `SECRET('...')` literals. The runtime strips this marker and
3276/// applies the actual crypto transform during INSERT execution.
3277pub(crate) const PLAINTEXT_SENTINEL: &str = "@@plain@@";
3278
3279impl RedDBRuntime {
3280    /// Strip the plaintext sentinel from a `Value::Password` or
3281    /// `Value::Secret` produced by the parser and apply the real
3282    /// crypto transform. `Password` is always hashed with argon2id.
3283    /// `Secret` is encrypted with AES-256-GCM keyed by the vault
3284    /// when `red.config.secret.auto_encrypt = true` (default).
3285    pub(crate) fn resolve_crypto_sentinel(&self, value: Value) -> RedDBResult<Value> {
3286        match value {
3287            Value::Password(marked) => {
3288                if let Some(plain) = marked.strip_prefix(PLAINTEXT_SENTINEL) {
3289                    Ok(Value::Password(crate::auth::store::hash_password(plain)))
3290                } else {
3291                    Ok(Value::Password(marked))
3292                }
3293            }
3294            Value::Secret(bytes) => {
3295                if bytes.starts_with(PLAINTEXT_SENTINEL.as_bytes()) {
3296                    if !self.secret_auto_encrypt() {
3297                        return Err(RedDBError::Query(
3298                            "SECRET() literal rejected: red.config.secret.auto_encrypt \
3299                             is false. Insert pre-encrypted bytes directly instead."
3300                                .to_string(),
3301                        ));
3302                    }
3303                    let key = self.secret_aes_key().ok_or_else(|| {
3304                        RedDBError::Query(
3305                            "SECRET() column encryption requires a bootstrapped \
3306                             vault (red.secret.aes_key is missing). Start the server \
3307                             with --vault to enable."
3308                                .to_string(),
3309                        )
3310                    })?;
3311                    let plain = &bytes[PLAINTEXT_SENTINEL.len()..];
3312                    Ok(Value::Secret(encrypt_secret_payload(&key, plain)))
3313                } else {
3314                    Ok(Value::Secret(bytes))
3315                }
3316            }
3317            other => Ok(other),
3318        }
3319    }
3320}
3321
3322/// Encode an AES-256-GCM ciphertext as `[12-byte nonce][ciphertext||tag]`.
3323/// This is the on-disk representation of `Value::Secret`.
3324fn encrypt_secret_payload(key: &[u8; 32], plaintext: &[u8]) -> Vec<u8> {
3325    let nonce_bytes = crate::auth::store::random_bytes(12);
3326    let mut nonce = [0u8; 12];
3327    nonce.copy_from_slice(&nonce_bytes[..12]);
3328    let ct = crate::crypto::aes_gcm::aes256_gcm_encrypt(key, &nonce, b"reddb.secret", plaintext);
3329    let mut out = Vec::with_capacity(12 + ct.len());
3330    out.extend_from_slice(&nonce);
3331    out.extend_from_slice(&ct);
3332    out
3333}
3334
3335/// Decode a `Value::Secret` payload back to plaintext. Returns
3336/// `None` when the payload is too short or AES-GCM authentication
3337/// fails (tampered or wrong key).
3338pub(crate) fn decrypt_secret_payload(key: &[u8; 32], payload: &[u8]) -> Option<Vec<u8>> {
3339    if payload.len() < 12 {
3340        return None;
3341    }
3342    let mut nonce = [0u8; 12];
3343    nonce.copy_from_slice(&payload[..12]);
3344    crate::crypto::aes_gcm::aes256_gcm_decrypt(key, &nonce, b"reddb.secret", &payload[12..]).ok()
3345}
3346
3347fn split_insert_metadata(
3348    runtime: &RedDBRuntime,
3349    columns: &[String],
3350    values: &[Value],
3351) -> RedDBResult<(Vec<(String, Value)>, Vec<(String, MetadataValue)>)> {
3352    let mut fields = Vec::new();
3353    let mut metadata = Vec::new();
3354
3355    for (column, value) in columns.iter().zip(values.iter()) {
3356        // Still support legacy _ttl columns for backward compat
3357        if let Some(metadata_key) = resolve_sql_ttl_metadata_key(column) {
3358            let raw_value = sql_literal_to_metadata_value(metadata_key, value)?;
3359            let (canonical_key, canonical_value) =
3360                canonicalize_sql_ttl_metadata(metadata_key, raw_value);
3361            metadata.push((canonical_key.to_string(), canonical_value));
3362            continue;
3363        }
3364        fields.push((
3365            column.clone(),
3366            runtime.resolve_crypto_sentinel(value.clone())?,
3367        ));
3368    }
3369
3370    Ok((fields, metadata))
3371}
3372
3373/// Merge structured WITH TTL, WITH EXPIRES AT, and WITH METADATA clauses into metadata entries.
3374fn merge_with_clauses(
3375    metadata: &mut Vec<(String, MetadataValue)>,
3376    ttl_ms: Option<u64>,
3377    expires_at_ms: Option<u64>,
3378    with_metadata: &[(String, Value)],
3379) {
3380    if let Some(ms) = ttl_ms {
3381        metadata.push((
3382            "_ttl_ms".to_string(),
3383            if ms <= i64::MAX as u64 {
3384                MetadataValue::Int(ms as i64)
3385            } else {
3386                MetadataValue::Timestamp(ms)
3387            },
3388        ));
3389    }
3390    if let Some(ms) = expires_at_ms {
3391        metadata.push(("_expires_at".to_string(), MetadataValue::Timestamp(ms)));
3392    }
3393    for (key, value) in with_metadata {
3394        let meta_value = match value {
3395            Value::Text(s) => MetadataValue::String(s.to_string()),
3396            Value::Integer(n) => MetadataValue::Int(*n),
3397            Value::Float(n) => MetadataValue::Float(*n),
3398            Value::Boolean(b) => MetadataValue::Bool(*b),
3399            _ => MetadataValue::String(value.to_string()),
3400        };
3401        metadata.push((key.clone(), meta_value));
3402    }
3403}
3404
3405fn merge_vector_metadata_column(
3406    metadata: &mut Vec<(String, MetadataValue)>,
3407    columns: &[String],
3408    values: &[Value],
3409) -> RedDBResult<()> {
3410    let Some(value) = columns
3411        .iter()
3412        .position(|column| column.eq_ignore_ascii_case("metadata"))
3413        .map(|index| &values[index])
3414    else {
3415        return Ok(());
3416    };
3417    let json = match value {
3418        Value::Null => return Ok(()),
3419        Value::Json(bytes) => crate::json::from_slice(bytes).map_err(|err| {
3420            RedDBError::Query(format!("column 'metadata' invalid JSON object: {err}"))
3421        })?,
3422        Value::Text(text) => crate::json::from_str(text).map_err(|err| {
3423            RedDBError::Query(format!("column 'metadata' invalid JSON object: {err}"))
3424        })?,
3425        other => {
3426            return Err(RedDBError::Query(format!(
3427                "column 'metadata' expected JSON object, got {other:?}"
3428            )))
3429        }
3430    };
3431    let parsed = metadata_from_json(&json)?;
3432    for (key, value) in parsed.iter() {
3433        metadata.push((key.clone(), value.clone()));
3434    }
3435    Ok(())
3436}
3437
3438fn apply_collection_default_ttl_metadata(
3439    runtime: &RedDBRuntime,
3440    collection: &str,
3441    metadata: &mut Vec<(String, MetadataValue)>,
3442) {
3443    if has_internal_ttl_metadata(metadata) {
3444        return;
3445    }
3446
3447    let Some(default_ttl_ms) = runtime.db().collection_default_ttl_ms(collection) else {
3448        return;
3449    };
3450
3451    metadata.push((
3452        "_ttl_ms".to_string(),
3453        if default_ttl_ms <= i64::MAX as u64 {
3454            MetadataValue::Int(default_ttl_ms as i64)
3455        } else {
3456            MetadataValue::Timestamp(default_ttl_ms)
3457        },
3458    ));
3459}
3460
3461fn ensure_non_tree_reserved_metadata_entries(
3462    metadata: &[(String, MetadataValue)],
3463) -> RedDBResult<()> {
3464    for (key, _) in metadata {
3465        ensure_non_tree_reserved_metadata_key(key)?;
3466    }
3467    Ok(())
3468}
3469
3470fn ensure_non_tree_reserved_metadata_key(key: &str) -> RedDBResult<()> {
3471    if key.starts_with(TREE_METADATA_PREFIX) {
3472        return Err(RedDBError::Query(format!(
3473            "metadata key '{}' is reserved for managed trees",
3474            key
3475        )));
3476    }
3477    Ok(())
3478}
3479
3480fn ensure_non_tree_structural_edge_label(label: &str) -> RedDBResult<()> {
3481    if label.eq_ignore_ascii_case(TREE_CHILD_EDGE_LABEL) {
3482        return Err(RedDBError::Query(format!(
3483            "edge label '{}' is reserved for managed trees",
3484            TREE_CHILD_EDGE_LABEL
3485        )));
3486    }
3487    Ok(())
3488}
3489
3490fn pairwise_columns_values(pairs: &[(String, Value)]) -> (Vec<String>, Vec<Value>) {
3491    let mut columns = Vec::with_capacity(pairs.len());
3492    let mut values = Vec::with_capacity(pairs.len());
3493
3494    for (column, value) in pairs {
3495        columns.push(column.clone());
3496        values.push(value.clone());
3497    }
3498
3499    (columns, values)
3500}
3501
3502/// Find a required column value and return it as-is.
3503fn find_column_value(columns: &[String], values: &[Value], name: &str) -> RedDBResult<Value> {
3504    for (i, col) in columns.iter().enumerate() {
3505        if col.eq_ignore_ascii_case(name) {
3506            return Ok(values[i].clone());
3507        }
3508    }
3509    Err(RedDBError::Query(format!(
3510        "required column '{name}' not found in INSERT"
3511    )))
3512}
3513
3514/// Find a required column value and coerce to String.
3515fn find_column_value_string(
3516    columns: &[String],
3517    values: &[Value],
3518    name: &str,
3519) -> RedDBResult<String> {
3520    let val = find_column_value(columns, values, name)?;
3521    match val {
3522        Value::Text(s) => Ok(s.to_string()),
3523        Value::Integer(n) => Ok(n.to_string()),
3524        Value::Float(n) => Ok(n.to_string()),
3525        other => Err(RedDBError::Query(format!(
3526            "column '{name}' expected text, got {other:?}"
3527        ))),
3528    }
3529}
3530
3531fn find_document_body_json(
3532    columns: &[String],
3533    values: &[Value],
3534) -> RedDBResult<crate::json::Value> {
3535    let val = find_column_value(columns, values, "body")?;
3536    match val {
3537        Value::Json(bytes) | Value::Blob(bytes) => crate::json::from_slice(&bytes)
3538            .map_err(|err| RedDBError::Query(format!("invalid JSON body: {err}"))),
3539        Value::Text(text) => crate::json::from_str(text.as_ref())
3540            .map_err(|err| RedDBError::Query(format!("invalid JSON body: {err}"))),
3541        Value::Integer(value) => crate::json::from_str(&value.to_string())
3542            .map_err(|err| RedDBError::Query(format!("invalid JSON body: {err}"))),
3543        Value::UnsignedInteger(value) => crate::json::from_str(&value.to_string())
3544            .map_err(|err| RedDBError::Query(format!("invalid JSON body: {err}"))),
3545        Value::Float(value) => crate::json::from_str(&value.to_string())
3546            .map_err(|err| RedDBError::Query(format!("invalid JSON body: {err}"))),
3547        other => Err(RedDBError::Query(format!(
3548            "column 'body' expected JSON body, got {other:?}"
3549        ))),
3550    }
3551}
3552
3553fn find_column_value_f64(columns: &[String], values: &[Value], name: &str) -> RedDBResult<f64> {
3554    let val = find_column_value(columns, values, name)?;
3555    match val {
3556        Value::Float(n) => Ok(n),
3557        Value::Integer(n) => Ok(n as f64),
3558        Value::UnsignedInteger(n) => Ok(n as f64),
3559        Value::Text(s) => s
3560            .parse::<f64>()
3561            .map_err(|_| RedDBError::Query(format!("column '{name}' expected number, got '{s}'"))),
3562        other => Err(RedDBError::Query(format!(
3563            "column '{name}' expected number, got {other:?}"
3564        ))),
3565    }
3566}
3567
3568/// Find an optional column value as String.
3569fn find_column_value_opt_string(
3570    columns: &[String],
3571    values: &[Value],
3572    name: &str,
3573) -> Option<String> {
3574    for (i, col) in columns.iter().enumerate() {
3575        if col.eq_ignore_ascii_case(name) {
3576            return match &values[i] {
3577                Value::Null => None,
3578                Value::Text(s) => Some(s.to_string()),
3579                Value::Integer(n) => Some(n.to_string()),
3580                Value::Float(n) => Some(n.to_string()),
3581                _ => None,
3582            };
3583        }
3584    }
3585    None
3586}
3587
3588/// Resolve an EDGE endpoint (`from`/`to`) to a numeric entity id.
3589///
3590/// Accepts integer literals, decimal strings, and node labels resolved via
3591/// the per-collection graph label index (same source of truth that
3592/// `GRAPH NEIGHBORHOOD` / `GRAPH TRAVERSE` use at query time). Ambiguous
3593/// labels error so callers can fall back to the numeric id form.
3594fn resolve_edge_endpoint(
3595    store: &crate::storage::unified::UnifiedStore,
3596    collection: &str,
3597    columns: &[String],
3598    values: &[Value],
3599    name: &str,
3600) -> RedDBResult<u64> {
3601    let val = find_column_value(columns, values, name)?;
3602    match val {
3603        Value::Integer(n) => Ok(n as u64),
3604        Value::UnsignedInteger(n) => Ok(n),
3605        Value::Text(s) => {
3606            if let Ok(n) = s.parse::<u64>() {
3607                return Ok(n);
3608            }
3609            let matches = store.lookup_graph_nodes_by_label_in(collection, &s);
3610            match matches.len() {
3611                0 => Err(RedDBError::Query(format!(
3612                    "column '{name}': no graph node with label '{s}' in collection '{collection}'"
3613                ))),
3614                1 => Ok(matches[0].raw()),
3615                n => Err(RedDBError::Query(format!(
3616                    "column '{name}': ambiguous label '{s}' matches {n} nodes in collection '{collection}'; use the numeric id"
3617                ))),
3618            }
3619        }
3620        other => Err(RedDBError::Query(format!(
3621            "column '{name}' expected integer or node label, got {other:?}"
3622        ))),
3623    }
3624}
3625
3626fn resolve_edge_endpoint_any(
3627    store: &crate::storage::unified::UnifiedStore,
3628    collection: &str,
3629    columns: &[String],
3630    values: &[Value],
3631    names: &[&str],
3632) -> RedDBResult<u64> {
3633    for name in names {
3634        if columns
3635            .iter()
3636            .any(|column| column.eq_ignore_ascii_case(name))
3637        {
3638            return resolve_edge_endpoint(store, collection, columns, values, name);
3639        }
3640    }
3641
3642    Err(RedDBError::Query(format!(
3643        "required column '{}' not found in INSERT",
3644        names.first().copied().unwrap_or("from_rid")
3645    )))
3646}
3647
3648/// Find a required column value and coerce to u64.
3649fn find_column_value_u64(columns: &[String], values: &[Value], name: &str) -> RedDBResult<u64> {
3650    let val = find_column_value(columns, values, name)?;
3651    match val {
3652        Value::Integer(n) => Ok(n as u64),
3653        Value::UnsignedInteger(n) => Ok(n),
3654        Value::Text(s) => s
3655            .parse::<u64>()
3656            .map_err(|_| RedDBError::Query(format!("column '{name}' expected integer, got '{s}'"))),
3657        other => Err(RedDBError::Query(format!(
3658            "column '{name}' expected integer, got {other:?}"
3659        ))),
3660    }
3661}
3662
3663/// Find an optional column value as f32.
3664fn find_column_value_f32_opt(columns: &[String], values: &[Value], name: &str) -> Option<f32> {
3665    for (i, col) in columns.iter().enumerate() {
3666        if col.eq_ignore_ascii_case(name) {
3667            return match &values[i] {
3668                Value::Float(n) => Some(*n as f32),
3669                Value::Integer(n) => Some(*n as f32),
3670                Value::Null => None,
3671                _ => None,
3672            };
3673        }
3674    }
3675    None
3676}
3677
3678/// Find a required column value and coerce to Vec<f32> (from Value::Vector).
3679fn find_column_value_vec_f32(
3680    columns: &[String],
3681    values: &[Value],
3682    name: &str,
3683) -> RedDBResult<Vec<f32>> {
3684    let val = find_column_value(columns, values, name)?;
3685    match val {
3686        Value::Vector(v) => Ok(v),
3687        Value::Json(bytes) => {
3688            // Try to parse as JSON array of numbers
3689            let s = std::str::from_utf8(&bytes).map_err(|_| {
3690                RedDBError::Query(format!("column '{name}' contains invalid UTF-8"))
3691            })?;
3692            let arr: Vec<f32> = crate::json::from_str(s).map_err(|e| {
3693                RedDBError::Query(format!("column '{name}' invalid vector JSON: {e}"))
3694            })?;
3695            Ok(arr)
3696        }
3697        other => Err(RedDBError::Query(format!(
3698            "column '{name}' expected vector, got {other:?}"
3699        ))),
3700    }
3701}
3702
3703fn find_column_value_vec_f32_any(
3704    columns: &[String],
3705    values: &[Value],
3706    names: &[&str],
3707) -> RedDBResult<Vec<f32>> {
3708    for name in names {
3709        if columns
3710            .iter()
3711            .any(|column| column.eq_ignore_ascii_case(name))
3712        {
3713            return find_column_value_vec_f32(columns, values, name);
3714        }
3715    }
3716    Err(RedDBError::Query(format!(
3717        "required vector column '{}' not found in INSERT",
3718        names.join("' or '")
3719    )))
3720}
3721
3722/// Extract remaining properties (all columns not in the exclusion list).
3723fn extract_remaining_properties(
3724    columns: &[String],
3725    values: &[Value],
3726    exclude: &[&str],
3727) -> Vec<(String, Value)> {
3728    columns
3729        .iter()
3730        .zip(values.iter())
3731        .filter(|(col, _)| !exclude.iter().any(|e| col.eq_ignore_ascii_case(e)))
3732        .map(|(col, val)| (col.clone(), val.clone()))
3733        .collect()
3734}
3735
3736fn validate_timeseries_insert_columns(columns: &[String]) -> RedDBResult<()> {
3737    let mut invalid = Vec::new();
3738    for column in columns {
3739        if !is_timeseries_insert_column(column) && resolve_sql_ttl_metadata_key(column).is_none() {
3740            invalid.push(column.clone());
3741        }
3742    }
3743
3744    if invalid.is_empty() {
3745        Ok(())
3746    } else {
3747        Err(RedDBError::Query(format!(
3748            "timeseries INSERT only accepts metric, value, tags, timestamp, timestamp_ns, or time columns; got {}",
3749            invalid.join(", ")
3750        )))
3751    }
3752}
3753
3754fn is_timeseries_insert_column(column: &str) -> bool {
3755    matches!(
3756        column.to_ascii_lowercase().as_str(),
3757        "metric"
3758            | "value"
3759            | "tags"
3760            | "timestamp"
3761            | "timestamp_ns"
3762            | "time"
3763            // Analytics-event extension (#577): an analytics row carries
3764            // an `event_name` + JSON `payload`. The payload is validated
3765            // against the AnalyticsSchemaRegistry inside
3766            // `insert_timeseries_point` before the row lands.
3767            | "event_name"
3768            | "payload"
3769    )
3770}
3771
3772fn find_timeseries_timestamp_ns(columns: &[String], values: &[Value]) -> RedDBResult<Option<u64>> {
3773    let mut found = None;
3774
3775    for alias in ["timestamp_ns", "timestamp", "time"] {
3776        for (index, column) in columns.iter().enumerate() {
3777            if !column.eq_ignore_ascii_case(alias) {
3778                continue;
3779            }
3780
3781            if found.is_some() {
3782                return Err(RedDBError::Query(
3783                    "timeseries INSERT accepts only one timestamp column".to_string(),
3784                ));
3785            }
3786
3787            found = Some(coerce_value_to_non_negative_u64(&values[index], alias)?);
3788        }
3789    }
3790
3791    Ok(found)
3792}
3793
3794fn find_timeseries_tags(
3795    columns: &[String],
3796    values: &[Value],
3797) -> RedDBResult<std::collections::HashMap<String, String>> {
3798    for (index, column) in columns.iter().enumerate() {
3799        if column.eq_ignore_ascii_case("tags") {
3800            return parse_timeseries_tags(&values[index]);
3801        }
3802    }
3803    Ok(std::collections::HashMap::new())
3804}
3805
3806fn parse_timeseries_tags(value: &Value) -> RedDBResult<std::collections::HashMap<String, String>> {
3807    match value {
3808        Value::Null => Ok(std::collections::HashMap::new()),
3809        Value::Json(bytes) => parse_timeseries_tags_json(bytes),
3810        Value::Text(text) => parse_timeseries_tags_json(text.as_bytes()),
3811        other => Err(RedDBError::Query(format!(
3812            "timeseries tags must be a JSON object or JSON text, got {other:?}"
3813        ))),
3814    }
3815}
3816
3817fn parse_timeseries_tags_json(
3818    bytes: &[u8],
3819) -> RedDBResult<std::collections::HashMap<String, String>> {
3820    let json: crate::json::Value = crate::json::from_slice(bytes)
3821        .map_err(|err| RedDBError::Query(format!("timeseries tags must be valid JSON: {err}")))?;
3822
3823    let object = match json {
3824        crate::json::Value::Object(object) => object,
3825        other => {
3826            return Err(RedDBError::Query(format!(
3827                "timeseries tags must be a JSON object, got {other:?}"
3828            )))
3829        }
3830    };
3831
3832    let mut tags = std::collections::HashMap::with_capacity(object.len());
3833    for (key, value) in object {
3834        tags.insert(key, json_tag_value_to_string(&value));
3835    }
3836    Ok(tags)
3837}
3838
3839/// Encode a tag value for storage so the original JSON type can be
3840/// recovered on read (issue #543).
3841///
3842/// Time-series tags are stored as `HashMap<String, String>` on the
3843/// physical record (see [`crate::storage::TimeSeriesData`]) so that
3844/// the segment codec, WAL and gRPC mirrors don't need a new value
3845/// variant. To preserve the original JSON type across that
3846/// string-only channel we prepend the
3847/// [`crate::runtime::query_exec::TIMESERIES_TAG_JSON_PREFIX`] marker
3848/// and serialize the value as compact JSON text. The read paths
3849/// (`timeseries_tags_json_value` / `timeseries_tags_value`) detect
3850/// the marker, parse the suffix, and recover a real JSON value.
3851/// Tags written through other channels (Prometheus remote write,
3852/// metrics handlers, legacy on-disk data) lack the marker and are
3853/// returned as `JsonValue::String(raw)` exactly as before.
3854fn json_tag_value_to_string(value: &crate::json::Value) -> String {
3855    let mut buf = String::with_capacity(value.to_string_compact().len() + 1);
3856    buf.push(crate::runtime::query_exec::TIMESERIES_TAG_JSON_PREFIX);
3857    buf.push_str(&value.to_string_compact());
3858    buf
3859}
3860
3861fn coerce_value_to_non_negative_u64(value: &Value, column: &str) -> RedDBResult<u64> {
3862    match value {
3863        Value::UnsignedInteger(value) => Ok(*value),
3864        Value::Integer(value) if *value >= 0 => Ok(*value as u64),
3865        Value::Float(value) if *value >= 0.0 => Ok(*value as u64),
3866        Value::Text(value) => value.parse::<u64>().map_err(|_| {
3867            RedDBError::Query(format!(
3868                "column '{column}' expected a non-negative integer timestamp, got '{value}'"
3869            ))
3870        }),
3871        other => Err(RedDBError::Query(format!(
3872            "column '{column}' expected a non-negative integer timestamp, got {other:?}"
3873        ))),
3874    }
3875}
3876
3877fn current_unix_ns() -> u64 {
3878    std::time::SystemTime::now()
3879        .duration_since(std::time::UNIX_EPOCH)
3880        .unwrap_or_default()
3881        .as_nanos()
3882        .min(u128::from(u64::MAX)) as u64
3883}
3884
3885fn metadata_value_to_json(value: &MetadataValue) -> crate::json::Value {
3886    use crate::json::{Map, Value as JV};
3887    match value {
3888        MetadataValue::Null => JV::Null,
3889        MetadataValue::Bool(value) => JV::Bool(*value),
3890        MetadataValue::Int(value) => JV::Number(*value as f64),
3891        MetadataValue::Float(value) => JV::Number(*value),
3892        MetadataValue::String(value) => JV::String(value.clone()),
3893        MetadataValue::Bytes(value) => JV::Array(
3894            value
3895                .iter()
3896                .map(|value| JV::Number(*value as f64))
3897                .collect(),
3898        ),
3899        MetadataValue::Timestamp(value) => JV::Number(*value as f64),
3900        MetadataValue::Array(values) => {
3901            JV::Array(values.iter().map(metadata_value_to_json).collect())
3902        }
3903        MetadataValue::Object(object) => {
3904            let entries = object
3905                .iter()
3906                .map(|(key, value)| (key.clone(), metadata_value_to_json(value)))
3907                .collect();
3908            JV::Object(entries)
3909        }
3910        MetadataValue::Geo { lat, lon } => {
3911            let mut object = Map::new();
3912            object.insert("lat".to_string(), JV::Number(*lat));
3913            object.insert("lon".to_string(), JV::Number(*lon));
3914            JV::Object(object)
3915        }
3916        MetadataValue::Reference(target) => {
3917            let mut object = Map::new();
3918            object.insert(
3919                "collection".to_string(),
3920                JV::String(target.collection().to_string()),
3921            );
3922            object.insert(
3923                "entity_id".to_string(),
3924                JV::Number(target.entity_id().raw() as f64),
3925            );
3926            JV::Object(object)
3927        }
3928        MetadataValue::References(values) => {
3929            let refs = values
3930                .iter()
3931                .map(|target| {
3932                    let mut object = Map::new();
3933                    object.insert(
3934                        "collection".to_string(),
3935                        JV::String(target.collection().to_string()),
3936                    );
3937                    object.insert(
3938                        "entity_id".to_string(),
3939                        JV::Number(target.entity_id().raw() as f64),
3940                    );
3941                    JV::Object(object)
3942                })
3943                .collect();
3944            JV::Array(refs)
3945        }
3946    }
3947}
3948
3949fn storage_value_to_metadata_value(value: &Value) -> MetadataValue {
3950    match value {
3951        Value::Null => MetadataValue::Null,
3952        Value::Boolean(value) => MetadataValue::Bool(*value),
3953        Value::Integer(value) => MetadataValue::Int(*value),
3954        Value::UnsignedInteger(value) => metadata_u64_to_value(*value),
3955        Value::Float(value) => MetadataValue::Float(*value),
3956        Value::Text(value) => MetadataValue::String(value.to_string()),
3957        Value::Blob(value) => MetadataValue::Bytes(value.clone()),
3958        Value::Timestamp(value) => {
3959            if *value >= 0 {
3960                metadata_u64_to_value(*value as u64)
3961            } else {
3962                MetadataValue::Int(*value)
3963            }
3964        }
3965        Value::TimestampMs(value) => {
3966            if *value >= 0 {
3967                metadata_u64_to_value(*value as u64)
3968            } else {
3969                MetadataValue::Int(*value)
3970            }
3971        }
3972        Value::Json(value) => MetadataValue::String(String::from_utf8_lossy(value).into_owned()),
3973        Value::Uuid(value) => MetadataValue::String(format!("{value:?}")),
3974        Value::Date(value) => MetadataValue::String(value.to_string()),
3975        Value::Time(value) => MetadataValue::String(value.to_string()),
3976        Value::Decimal(value) => MetadataValue::String(value.to_string()),
3977        Value::Ipv4(value) => MetadataValue::String(format!(
3978            "{}.{}.{}.{}",
3979            (value >> 24) & 0xFF,
3980            (value >> 16) & 0xFF,
3981            (value >> 8) & 0xFF,
3982            value & 0xFF
3983        )),
3984        Value::Port(value) => MetadataValue::Int(i64::from(*value)),
3985        Value::Latitude(value) => MetadataValue::Float(*value as f64 / 1_000_000.0),
3986        Value::Longitude(value) => MetadataValue::Float(*value as f64 / 1_000_000.0),
3987        Value::GeoPoint(lat, lon) => MetadataValue::Geo {
3988            lat: *lat as f64 / 1_000_000.0,
3989            lon: *lon as f64 / 1_000_000.0,
3990        },
3991        Value::BigInt(value) => MetadataValue::String(value.to_string()),
3992        Value::TableRef(value) => MetadataValue::String(value.clone()),
3993        Value::PageRef(value) => MetadataValue::Int(*value as i64),
3994        Value::Password(value) => MetadataValue::String(value.clone()),
3995        Value::Array(values) => {
3996            MetadataValue::Array(values.iter().map(storage_value_to_metadata_value).collect())
3997        }
3998        _ => MetadataValue::String(value.to_string()),
3999    }
4000}
4001
4002fn sql_literal_to_metadata_value(field: &str, value: &Value) -> RedDBResult<MetadataValue> {
4003    match value {
4004        Value::Null => Ok(MetadataValue::Null),
4005        Value::Integer(value) if *value >= 0 => Ok(metadata_u64_to_value(*value as u64)),
4006        Value::Integer(_) => Err(RedDBError::Query(format!(
4007            "column '{field}' must be non-negative for TTL metadata"
4008        ))),
4009        Value::UnsignedInteger(value) => Ok(metadata_u64_to_value(*value)),
4010        Value::Float(value) if value.is_finite() => {
4011            if value.fract().abs() >= f64::EPSILON {
4012                return Err(RedDBError::Query(format!(
4013                    "column '{field}' must be an integer (TTL metadata must be an integer)"
4014                )));
4015            }
4016            if *value < 0.0 {
4017                return Err(RedDBError::Query(format!(
4018                    "column '{field}' must be non-negative for TTL metadata"
4019                )));
4020            }
4021            if *value > u64::MAX as f64 {
4022                return Err(RedDBError::Query(format!(
4023                    "column '{field}' value is too large"
4024                )));
4025            }
4026            Ok(metadata_u64_to_value(*value as u64))
4027        }
4028        Value::Float(_) => Err(RedDBError::Query(format!(
4029            "column '{field}' must be a finite number"
4030        ))),
4031        Value::Text(value) => {
4032            let value = value.trim();
4033            if let Ok(value) = value.parse::<u64>() {
4034                Ok(metadata_u64_to_value(value))
4035            } else if let Ok(value) = value.parse::<i64>() {
4036                if value < 0 {
4037                    return Err(RedDBError::Query(format!(
4038                        "column '{field}' must be non-negative for TTL metadata"
4039                    )));
4040                }
4041                Ok(metadata_u64_to_value(value as u64))
4042            } else if let Ok(value) = value.parse::<f64>() {
4043                if !value.is_finite() {
4044                    return Err(RedDBError::Query(format!(
4045                        "column '{field}' must be a finite number"
4046                    )));
4047                }
4048                if value.fract().abs() >= f64::EPSILON {
4049                    return Err(RedDBError::Query(format!(
4050                        "column '{field}' must be an integer (TTL metadata must be an integer)"
4051                    )));
4052                }
4053                if value < 0.0 {
4054                    return Err(RedDBError::Query(format!(
4055                        "column '{field}' must be non-negative for TTL metadata"
4056                    )));
4057                }
4058                if value > u64::MAX as f64 {
4059                    return Err(RedDBError::Query(format!(
4060                        "column '{field}' value is too large"
4061                    )));
4062                }
4063                Ok(metadata_u64_to_value(value as u64))
4064            } else {
4065                Err(RedDBError::Query(format!(
4066                    "column '{field}' expects a numeric value for TTL metadata"
4067                )))
4068            }
4069        }
4070        _ => Err(RedDBError::Query(format!(
4071            "column '{field}' expects a numeric value for TTL metadata"
4072        ))),
4073    }
4074}
4075
4076fn metadata_u64_to_value(value: u64) -> MetadataValue {
4077    if value <= i64::MAX as u64 {
4078        MetadataValue::Int(value as i64)
4079    } else {
4080        MetadataValue::Timestamp(value)
4081    }
4082}
4083
4084/// Phase 2 PG parity: inspect a column value and return `true` when
4085/// the dotted `tail` path is already present under it. Used by the
4086/// tenant auto-fill so rows that already carry an explicit value
4087/// (bulk import, admin insert on behalf of a tenant) are not
4088/// double-stamped with the session's current_tenant().
4089fn dotted_tail_already_set(value: &Value, tail: &str) -> bool {
4090    let json = match value {
4091        Value::Null => return false,
4092        Value::Json(bytes) | Value::Blob(bytes) => {
4093            match crate::json::from_slice::<crate::json::Value>(bytes) {
4094                Ok(v) => v,
4095                Err(_) => return false,
4096            }
4097        }
4098        Value::Text(s) => {
4099            let trimmed = s.trim_start();
4100            if !(trimmed.starts_with('{') || trimmed.starts_with('[')) {
4101                return false;
4102            }
4103            match crate::json::from_str::<crate::json::Value>(s) {
4104                Ok(v) => v,
4105                Err(_) => return false,
4106            }
4107        }
4108        _ => return false,
4109    };
4110    let mut cursor = &json;
4111    for seg in tail.split('.') {
4112        match cursor {
4113            crate::json::Value::Object(map) => match map.iter().find(|(k, _)| *k == seg) {
4114                Some((_, v)) => cursor = v,
4115                None => return false,
4116            },
4117            _ => return false,
4118        }
4119    }
4120    !matches!(cursor, crate::json::Value::Null)
4121}
4122
4123/// Phase 2 PG parity: take a column value (possibly Null / Text /
4124/// Json) and return a `Value::Json` with the dotted `tail` path set
4125/// to `tenant_id`. Preserves every pre-existing key.
4126///
4127/// Accepts:
4128/// * `Value::Null`  → fresh `{tail: tenant_id}` object
4129/// * `Value::Json(bytes)` → parse, navigate / create path, re-serialize
4130/// * `Value::text(s)` if `s` is valid JSON → same as Json
4131/// * anything else → error (user supplied a scalar where we need
4132///   a JSON container)
4133fn merge_dotted_tenant(current: Value, tail: &str, tenant_id: &str) -> RedDBResult<Value> {
4134    let mut root = match current {
4135        Value::Null => crate::json::Value::Object(Default::default()),
4136        Value::Json(bytes) | Value::Blob(bytes) => {
4137            crate::json::from_slice(&bytes).map_err(|err| {
4138                RedDBError::Query(format!(
4139                    "tenant auto-fill: root column is not valid JSON ({err})"
4140                ))
4141            })?
4142        }
4143        Value::Text(s) => {
4144            if s.trim().is_empty() {
4145                crate::json::Value::Object(Default::default())
4146            } else {
4147                crate::json::from_str::<crate::json::Value>(&s).map_err(|err| {
4148                    RedDBError::Query(format!(
4149                        "tenant auto-fill: text root is not valid JSON ({err})"
4150                    ))
4151                })?
4152            }
4153        }
4154        other => {
4155            return Err(RedDBError::Query(format!(
4156                "tenant auto-fill: root column must be JSON / NULL, got {other:?}"
4157            )));
4158        }
4159    };
4160
4161    // Navigate path segments, creating intermediate objects on demand.
4162    let segments: Vec<&str> = tail.split('.').collect();
4163    let mut cursor: &mut crate::json::Value = &mut root;
4164    for (i, seg) in segments.iter().enumerate() {
4165        let is_last = i + 1 == segments.len();
4166        let map = match cursor {
4167            crate::json::Value::Object(m) => m,
4168            _ => {
4169                return Err(RedDBError::Query(format!(
4170                    "tenant auto-fill: segment '{seg}' is not inside an object"
4171                )));
4172            }
4173        };
4174        if is_last {
4175            map.insert(
4176                seg.to_string(),
4177                crate::json::Value::String(tenant_id.to_string()),
4178            );
4179            break;
4180        }
4181        cursor = map
4182            .entry(seg.to_string())
4183            .or_insert_with(|| crate::json::Value::Object(Default::default()));
4184    }
4185
4186    let bytes = crate::json::to_vec(&root).map_err(|err| {
4187        RedDBError::Query(format!(
4188            "tenant auto-fill: failed to re-serialize JSON ({err})"
4189        ))
4190    })?;
4191    Ok(Value::Json(bytes))
4192}
4193
4194#[cfg(test)]
4195mod tests {
4196    use crate::storage::schema::Value;
4197    use crate::storage::wal::{WalReader, WalRecord};
4198    use crate::{RedDBOptions, RedDBRuntime};
4199    use std::path::Path;
4200
4201    fn store_commit_batches(wal_path: &Path) -> Vec<Vec<Vec<u8>>> {
4202        WalReader::open(wal_path)
4203            .expect("wal opens")
4204            .iter()
4205            .map(|record| record.expect("wal record decodes").1)
4206            .filter_map(|record| match record {
4207                WalRecord::TxCommitBatch { actions, .. } => Some(actions),
4208                _ => None,
4209            })
4210            .collect()
4211    }
4212
4213    fn action_contains_text(action: &[u8], needle: &str) -> bool {
4214        action
4215            .windows(needle.len())
4216            .any(|window| window == needle.as_bytes())
4217    }
4218
4219    fn assert_statement_writes_collections_in_one_new_wal_batch(
4220        rt: &RedDBRuntime,
4221        wal_path: &Path,
4222        statement: &str,
4223        source: &str,
4224        event_queue: &str,
4225    ) {
4226        let before_batches = store_commit_batches(wal_path).len();
4227
4228        rt.execute_query(statement).unwrap();
4229
4230        let batches = store_commit_batches(wal_path);
4231        let statement_batches = &batches[before_batches..];
4232        let source_batch = statement_batches
4233            .iter()
4234            .position(|actions| {
4235                actions.iter().any(|action| {
4236                    action_contains_text(action, source)
4237                        && !action_contains_text(action, event_queue)
4238                })
4239            })
4240            .expect("source collection write batch is present");
4241        let event_batch = statement_batches
4242            .iter()
4243            .position(|actions| {
4244                actions
4245                    .iter()
4246                    .any(|action| action_contains_text(action, event_queue))
4247            })
4248            .expect("event queue write batch is present");
4249
4250        assert_eq!(
4251            source_batch, event_batch,
4252            "WITH EVENTS must persist the source write and queue event in the same WAL batch"
4253        );
4254    }
4255
4256    #[test]
4257    fn with_events_autocommit_persists_mutation_and_event_in_one_wal_batch() {
4258        let dir = tempfile::tempdir().unwrap();
4259        let db_path = dir.path().join("events_dual_write.rdb");
4260        let wal_path = reddb_file::layout::unified_wal_path(&db_path);
4261        let rt = RedDBRuntime::with_options(RedDBOptions::persistent(&db_path)).unwrap();
4262
4263        rt.execute_query("CREATE TABLE users (id INT, email TEXT) WITH EVENTS")
4264            .unwrap();
4265        assert_statement_writes_collections_in_one_new_wal_batch(
4266            &rt,
4267            &wal_path,
4268            "INSERT INTO users (id, email) VALUES (1, 'a@example.test')",
4269            "users",
4270            "users_events",
4271        );
4272    }
4273
4274    #[test]
4275    fn with_events_autocommit_update_persists_mutation_and_event_in_one_wal_batch() {
4276        let dir = tempfile::tempdir().unwrap();
4277        let db_path = dir.path().join("events_update_atomic.rdb");
4278        let wal_path = reddb_file::layout::unified_wal_path(&db_path);
4279        let rt = RedDBRuntime::with_options(RedDBOptions::persistent(&db_path)).unwrap();
4280
4281        rt.execute_query(
4282            "CREATE TABLE users (id INT, email TEXT) WITH EVENTS (UPDATE) TO user_updates",
4283        )
4284        .unwrap();
4285        rt.execute_query("INSERT INTO users (id, email) VALUES (1, 'a@example.test')")
4286            .unwrap();
4287
4288        assert_statement_writes_collections_in_one_new_wal_batch(
4289            &rt,
4290            &wal_path,
4291            "UPDATE users SET email = 'b@example.test' WHERE id = 1",
4292            "users",
4293            "user_updates",
4294        );
4295    }
4296
4297    #[test]
4298    fn with_events_autocommit_delete_persists_mutation_and_event_in_one_wal_batch() {
4299        let dir = tempfile::tempdir().unwrap();
4300        let db_path = dir.path().join("events_delete_atomic.rdb");
4301        let wal_path = reddb_file::layout::unified_wal_path(&db_path);
4302        let rt = RedDBRuntime::with_options(RedDBOptions::persistent(&db_path)).unwrap();
4303
4304        rt.execute_query(
4305            "CREATE TABLE users (id INT, email TEXT) WITH EVENTS (DELETE) TO user_deletes",
4306        )
4307        .unwrap();
4308        rt.execute_query("INSERT INTO users (id, email) VALUES (1, 'a@example.test')")
4309            .unwrap();
4310
4311        assert_statement_writes_collections_in_one_new_wal_batch(
4312            &rt,
4313            &wal_path,
4314            "DELETE FROM users WHERE id = 1",
4315            "users",
4316            "user_deletes",
4317        );
4318    }
4319
4320    #[test]
4321    fn update_where_id_in_with_hash_index_updates_expected_rows() {
4322        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4323        rt.execute_query("CREATE TABLE users (id INT, score INT)")
4324            .unwrap();
4325        for id in 0..5 {
4326            rt.execute_query(&format!("INSERT INTO users (id, score) VALUES ({id}, 0)"))
4327                .unwrap();
4328        }
4329        rt.execute_query("CREATE INDEX idx_id ON users (id) USING HASH")
4330            .unwrap();
4331
4332        let updated = rt
4333            .execute_query("UPDATE users SET score = 42 WHERE id IN (1,3,4)")
4334            .unwrap();
4335        assert_eq!(updated.affected_rows, 3);
4336
4337        let selected = rt
4338            .execute_query("SELECT id, score FROM users ORDER BY id")
4339            .unwrap();
4340        let scores: Vec<(i64, i64)> = selected
4341            .result
4342            .records
4343            .iter()
4344            .map(|record| {
4345                let id = match record.get("id").unwrap() {
4346                    Value::Integer(value) => *value,
4347                    other => panic!("expected integer id, got {other:?}"),
4348                };
4349                let score = match record.get("score").unwrap() {
4350                    Value::Integer(value) => *value,
4351                    other => panic!("expected integer score, got {other:?}"),
4352                };
4353                (id, score)
4354            })
4355            .collect();
4356        assert_eq!(scores, vec![(0, 0), (1, 42), (2, 0), (3, 42), (4, 42)]);
4357    }
4358
4359    /// Drives UPDATE through the shared `DmlTargetScan` module — the
4360    /// same code path DELETE uses (#51, #52). Exercises the indexed
4361    /// equality fast-path (WHERE id = N with a HASH index), the
4362    /// unindexed range scan (WHERE score > N), and the no-WHERE
4363    /// full-scan branch to confirm the extracted "find target rows"
4364    /// loop preserves affected-row counts and the resulting row state.
4365    #[test]
4366    fn update_routes_through_dml_target_scan_for_indexed_and_scan_paths() {
4367        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4368        rt.execute_query("CREATE TABLE items (id INT, score INT)")
4369            .unwrap();
4370        for id in 0..5 {
4371            rt.execute_query(&format!(
4372                "INSERT INTO items (id, score) VALUES ({id}, {})",
4373                id * 10
4374            ))
4375            .unwrap();
4376        }
4377        rt.execute_query("CREATE INDEX idx_items_id ON items (id) USING HASH")
4378            .unwrap();
4379
4380        // Indexed equality UPDATE — hits the hash fast-path inside
4381        // DmlTargetScan::find_target_ids. id=2 has score=20, drop it
4382        // below the score>25 cutoff so the next assertion stays clean.
4383        let updated_one = rt
4384            .execute_query("UPDATE items SET score = 5 WHERE id = 2")
4385            .unwrap();
4386        assert_eq!(updated_one.affected_rows, 1);
4387
4388        // Unindexed scan UPDATE — bumps everyone with score > 25,
4389        // i.e. ids 3 and 4 (scores 30, 40). Goes through the
4390        // zoned/full-scan branch.
4391        let updated_many = rt
4392            .execute_query("UPDATE items SET score = 7 WHERE score > 25")
4393            .unwrap();
4394        assert_eq!(updated_many.affected_rows, 2);
4395
4396        let snapshot = rt
4397            .execute_query("SELECT id, score FROM items ORDER BY id")
4398            .unwrap();
4399        let pairs: Vec<(i64, i64)> = snapshot
4400            .result
4401            .records
4402            .iter()
4403            .map(|record| {
4404                let id = match record.get("id").unwrap() {
4405                    Value::Integer(value) => *value,
4406                    other => panic!("expected integer id, got {other:?}"),
4407                };
4408                let score = match record.get("score").unwrap() {
4409                    Value::Integer(value) => *value,
4410                    other => panic!("expected integer score, got {other:?}"),
4411                };
4412                (id, score)
4413            })
4414            .collect();
4415        assert_eq!(pairs, vec![(0, 0), (1, 10), (2, 5), (3, 7), (4, 7)]);
4416
4417        // Full-scan UPDATE with no WHERE rewrites every remaining row.
4418        let updated_all = rt.execute_query("UPDATE items SET score = 1").unwrap();
4419        assert_eq!(updated_all.affected_rows, 5);
4420        let after = rt
4421            .execute_query("SELECT score FROM items ORDER BY id")
4422            .unwrap();
4423        let scores: Vec<i64> = after
4424            .result
4425            .records
4426            .iter()
4427            .map(|record| match record.get("score").unwrap() {
4428                Value::Integer(value) => *value,
4429                other => panic!("expected integer score, got {other:?}"),
4430            })
4431            .collect();
4432        assert_eq!(scores, vec![1, 1, 1, 1, 1]);
4433    }
4434
4435    /// Drives DELETE through the new `DmlTargetScan` module. Exercises
4436    /// both the index fast-path (WHERE id = N with a HASH index) and
4437    /// the unindexed scan path (WHERE score > N) to confirm the
4438    /// extracted "find target rows" loop preserves the affected-row
4439    /// count and which rows survive.
4440    #[test]
4441    fn delete_routes_through_dml_target_scan_for_indexed_and_scan_paths() {
4442        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4443        rt.execute_query("CREATE TABLE items (id INT, score INT)")
4444            .unwrap();
4445        for id in 0..5 {
4446            rt.execute_query(&format!(
4447                "INSERT INTO items (id, score) VALUES ({id}, {})",
4448                id * 10
4449            ))
4450            .unwrap();
4451        }
4452        rt.execute_query("CREATE INDEX idx_items_id ON items (id) USING HASH")
4453            .unwrap();
4454
4455        // Indexed equality DELETE — hits the hash fast-path inside
4456        // DmlTargetScan::find_target_ids.
4457        let deleted_one = rt.execute_query("DELETE FROM items WHERE id = 2").unwrap();
4458        assert_eq!(deleted_one.affected_rows, 1);
4459
4460        // Unindexed scan DELETE — drops everyone with score > 25,
4461        // i.e. ids 3 and 4 (scores 30, 40). Goes through the
4462        // zoned/full-scan branch.
4463        let deleted_many = rt
4464            .execute_query("DELETE FROM items WHERE score > 25")
4465            .unwrap();
4466        assert_eq!(deleted_many.affected_rows, 2);
4467
4468        let surviving = rt
4469            .execute_query("SELECT id FROM items ORDER BY id")
4470            .unwrap();
4471        let ids: Vec<i64> = surviving
4472            .result
4473            .records
4474            .iter()
4475            .map(|record| match record.get("id").unwrap() {
4476                Value::Integer(value) => *value,
4477                other => panic!("expected integer id, got {other:?}"),
4478            })
4479            .collect();
4480        assert_eq!(ids, vec![0, 1]);
4481
4482        // Sanity: full-scan DELETE with no WHERE clears the rest.
4483        let deleted_rest = rt.execute_query("DELETE FROM items").unwrap();
4484        assert_eq!(deleted_rest.affected_rows, 2);
4485        let empty = rt.execute_query("SELECT id FROM items").unwrap();
4486        assert!(empty.result.records.is_empty());
4487    }
4488
4489    /// CollectionContract gate (#49 + #50): APPEND ONLY tables accept
4490    /// INSERT but reject UPDATE and DELETE with the documented
4491    /// operator-facing error strings. Drives all three DML verbs so
4492    /// the centralized gate is exercised end-to-end.
4493    #[test]
4494    fn collection_contract_gate_blocks_update_and_delete_on_append_only() {
4495        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4496        rt.execute_query("CREATE TABLE events (id INT, payload TEXT) APPEND ONLY")
4497            .unwrap();
4498
4499        // INSERT must succeed — APPEND ONLY exists precisely to allow
4500        // appends. The gate should be a no-op for INSERT.
4501        let inserted = rt
4502            .execute_query("INSERT INTO events (id, payload) VALUES (1, 'hello')")
4503            .unwrap();
4504        assert_eq!(inserted.affected_rows, 1);
4505
4506        // UPDATE is rejected with the gate's UPDATE-specific message.
4507        let update_err = rt
4508            .execute_query("UPDATE events SET payload = 'mut' WHERE id = 1")
4509            .unwrap_err();
4510        let msg = format!("{update_err}");
4511        assert!(
4512            msg.contains("APPEND ONLY") && msg.contains("UPDATE is rejected"),
4513            "expected UPDATE rejection message, got: {msg}"
4514        );
4515
4516        // DELETE is rejected with the gate's DELETE-specific message.
4517        let delete_err = rt
4518            .execute_query("DELETE FROM events WHERE id = 1")
4519            .unwrap_err();
4520        let msg = format!("{delete_err}");
4521        assert!(
4522            msg.contains("APPEND ONLY") && msg.contains("DELETE is rejected"),
4523            "expected DELETE rejection message, got: {msg}"
4524        );
4525
4526        // Row should still be present — neither rejected mutation
4527        // touched storage.
4528        let surviving = rt.execute_query("SELECT id FROM events").unwrap();
4529        assert_eq!(surviving.result.records.len(), 1);
4530    }
4531
4532    /// CollectionContract gate: tables without an APPEND ONLY contract
4533    /// permit INSERT, UPDATE, and DELETE — the gate's default branch
4534    /// is a true pass-through, not an accidental block.
4535    #[test]
4536    fn collection_contract_gate_allows_all_verbs_on_unrestricted_table() {
4537        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4538        rt.execute_query("CREATE TABLE notes (id INT, body TEXT)")
4539            .unwrap();
4540
4541        rt.execute_query("INSERT INTO notes (id, body) VALUES (1, 'a')")
4542            .unwrap();
4543        let updated = rt
4544            .execute_query("UPDATE notes SET body = 'b' WHERE id = 1")
4545            .unwrap();
4546        assert_eq!(updated.affected_rows, 1);
4547        let deleted = rt.execute_query("DELETE FROM notes WHERE id = 1").unwrap();
4548        assert_eq!(deleted.affected_rows, 1);
4549    }
4550
4551    #[test]
4552    fn insert_into_event_enabled_table_emits_event_to_configured_queue() {
4553        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4554        rt.execute_query(
4555            "CREATE TABLE users (id INT, email TEXT) WITH EVENTS (INSERT) TO audit_log",
4556        )
4557        .unwrap();
4558
4559        let inserted = rt
4560            .execute_query("INSERT INTO users (id, email) VALUES (7, 'a@example.com')")
4561            .unwrap();
4562        assert_eq!(inserted.affected_rows, 1);
4563
4564        let events = queue_payloads(&rt, "audit_log");
4565        assert_eq!(events.len(), 1);
4566        let event = events[0].as_object().expect("event payload object");
4567        assert!(event
4568            .get("event_id")
4569            .and_then(crate::json::Value::as_str)
4570            .is_some_and(|value| !value.is_empty()));
4571        assert_eq!(
4572            event.get("op").and_then(crate::json::Value::as_str),
4573            Some("insert")
4574        );
4575        assert_eq!(
4576            event.get("collection").and_then(crate::json::Value::as_str),
4577            Some("users")
4578        );
4579        assert_eq!(
4580            event.get("id").and_then(crate::json::Value::as_u64),
4581            Some(7)
4582        );
4583        assert!(event
4584            .get("ts")
4585            .and_then(crate::json::Value::as_u64)
4586            .is_some());
4587        assert!(event
4588            .get("lsn")
4589            .and_then(crate::json::Value::as_u64)
4590            .is_some());
4591        assert!(matches!(
4592            event.get("tenant"),
4593            Some(crate::json::Value::Null)
4594        ));
4595        assert!(matches!(
4596            event.get("before"),
4597            Some(crate::json::Value::Null)
4598        ));
4599        let after = event
4600            .get("after")
4601            .and_then(crate::json::Value::as_object)
4602            .expect("after object");
4603        assert_eq!(
4604            after.get("id").and_then(crate::json::Value::as_u64),
4605            Some(7)
4606        );
4607        assert_eq!(
4608            after.get("email").and_then(crate::json::Value::as_str),
4609            Some("a@example.com")
4610        );
4611    }
4612
4613    #[test]
4614    fn multi_row_insert_emits_one_insert_event_per_row_in_order() {
4615        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4616        rt.execute_query("CREATE TABLE users (id INT, email TEXT) WITH EVENTS")
4617            .unwrap();
4618
4619        rt.execute_query(
4620            "INSERT INTO users (id, email) VALUES (1, 'a@example.com'), (2, 'b@example.com')",
4621        )
4622        .unwrap();
4623
4624        let events = queue_payloads(&rt, "users_events");
4625        assert_eq!(events.len(), 2);
4626        let mut previous_lsn = 0;
4627        for (event, expected_id) in events.iter().zip([1_u64, 2]) {
4628            let object = event.as_object().expect("event payload object");
4629            assert_eq!(
4630                object.get("op").and_then(crate::json::Value::as_str),
4631                Some("insert")
4632            );
4633            assert_eq!(
4634                object.get("id").and_then(crate::json::Value::as_u64),
4635                Some(expected_id)
4636            );
4637            let lsn = object
4638                .get("lsn")
4639                .and_then(crate::json::Value::as_u64)
4640                .expect("event lsn");
4641            assert!(
4642                lsn > previous_lsn,
4643                "event LSNs should increase in row order"
4644            );
4645            previous_lsn = lsn;
4646            let after = object
4647                .get("after")
4648                .and_then(crate::json::Value::as_object)
4649                .expect("after object");
4650            assert_eq!(
4651                after.get("id").and_then(crate::json::Value::as_u64),
4652                Some(expected_id)
4653            );
4654        }
4655    }
4656
4657    fn queue_payloads(rt: &RedDBRuntime, queue: &str) -> Vec<crate::json::Value> {
4658        let result = rt
4659            .execute_query(&format!("QUEUE PEEK {queue} 10"))
4660            .expect("peek queue");
4661        result
4662            .result
4663            .records
4664            .iter()
4665            .map(
4666                |record| match record.get("payload").expect("payload column") {
4667                    Value::Json(bytes) => crate::json::from_slice(bytes).expect("json payload"),
4668                    other => panic!("expected JSON queue payload, got {other:?}"),
4669                },
4670            )
4671            .collect()
4672    }
4673
4674    // ── #112: auto-index user `id` on first insert ─────────────────────
4675
4676    /// First insert into a fresh collection that carries a column named
4677    /// `id` registers an implicit HASH index on `id`. Subsequent inserts
4678    /// populate it transparently, and `WHERE id = N` lookups exercise
4679    /// the hash-index fast path in `DmlTargetScan::find_target_ids`.
4680    ///
4681    /// This is the load-bearing acceptance test for #112 — without the
4682    /// hook, `find_index_for_column` returns `None` and DELETE/UPDATE
4683    /// fall through to a full segment scan (the 4× perf gap documented
4684    /// in `docs/perf/delete-sequential-2026-05-06.md`).
4685    #[test]
4686    fn auto_index_id_fires_on_first_insert() {
4687        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4688        rt.execute_query("CREATE TABLE bench_users (id INT, score INT)")
4689            .unwrap();
4690
4691        // Pre-condition: no index on `id` yet.
4692        assert!(
4693            rt.index_store_ref()
4694                .find_index_for_column("bench_users", "id")
4695                .is_none(),
4696            "freshly created collection should not have an `id` index"
4697        );
4698
4699        // Single-row INSERT — drives `MutationEngine::append_one`.
4700        rt.execute_query("INSERT INTO bench_users (id, score) VALUES (1, 10)")
4701            .unwrap();
4702
4703        // Post-condition: hash index registered on `id`.
4704        let registered = rt
4705            .index_store_ref()
4706            .find_index_for_column("bench_users", "id")
4707            .expect("auto-index hook should have registered idx_id on first insert");
4708        assert_eq!(registered.name, "idx_id");
4709        assert_eq!(registered.collection, "bench_users");
4710        assert_eq!(registered.columns, vec!["id".to_string()]);
4711        assert!(matches!(
4712            registered.method,
4713            super::super::index_store::IndexMethodKind::Hash
4714        ));
4715
4716        // Subsequent inserts populate the index; `WHERE id = N` should
4717        // resolve via the hash fast path and round-trip every row.
4718        for id in 2..=5 {
4719            rt.execute_query(&format!(
4720                "INSERT INTO bench_users (id, score) VALUES ({id}, {})",
4721                id * 10
4722            ))
4723            .unwrap();
4724        }
4725        for id in 1..=5 {
4726            let result = rt
4727                .execute_query(&format!("SELECT score FROM bench_users WHERE id = {id}"))
4728                .unwrap();
4729            assert_eq!(
4730                result.result.records.len(),
4731                1,
4732                "id={id} should match one row"
4733            );
4734        }
4735
4736        // Delete via the hash fast-path — exactly the bench scenario the
4737        // perf doc identified as the 4× regression. With the index
4738        // present, `find_target_ids` short-circuits before
4739        // `for_each_entity_zoned` runs.
4740        let deleted = rt
4741            .execute_query("DELETE FROM bench_users WHERE id = 3")
4742            .unwrap();
4743        assert_eq!(deleted.affected_rows, 1);
4744    }
4745
4746    /// Bulk INSERT (the multi-row VALUES path) drives
4747    /// `MutationEngine::append_batch`. The hook must fire there too —
4748    /// otherwise the batch entry points (gRPC binary bulk, HTTP bulk,
4749    /// wire bulk INSERT) skip auto-indexing entirely.
4750    #[test]
4751    fn auto_index_id_fires_on_first_bulk_insert() {
4752        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4753        rt.execute_query("CREATE TABLE bench_bulk (id INT, score INT)")
4754            .unwrap();
4755
4756        rt.execute_query("INSERT INTO bench_bulk (id, score) VALUES (1, 10), (2, 20), (3, 30)")
4757            .unwrap();
4758
4759        let registered = rt
4760            .index_store_ref()
4761            .find_index_for_column("bench_bulk", "id")
4762            .expect("auto-index hook should fire on first bulk insert");
4763        assert_eq!(registered.name, "idx_id");
4764
4765        // Every row populated via `index_entity_insert_batch`.
4766        for id in 1..=3 {
4767            let result = rt
4768                .execute_query(&format!("SELECT score FROM bench_bulk WHERE id = {id}"))
4769                .unwrap();
4770            assert_eq!(result.result.records.len(), 1);
4771        }
4772    }
4773
4774    /// Hook is a no-op when the row carries no `id` column. Conservative
4775    /// match (case-sensitive `id`) — `Id`, `ID`, and `red_entity_id`
4776    /// don't trigger it.
4777    #[test]
4778    fn auto_index_id_skips_when_no_id_column() {
4779        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4780        rt.execute_query("CREATE TABLE plain (uid INT, label TEXT)")
4781            .unwrap();
4782        rt.execute_query("INSERT INTO plain (uid, label) VALUES (1, 'a')")
4783            .unwrap();
4784
4785        assert!(rt
4786            .index_store_ref()
4787            .find_index_for_column("plain", "id")
4788            .is_none());
4789        assert!(rt
4790            .index_store_ref()
4791            .find_index_for_column("plain", "uid")
4792            .is_none());
4793    }
4794
4795    /// Hook only fires once per collection. If an explicit
4796    /// `CREATE INDEX ... USING BTREE` already covers `id`, the hook
4797    /// detects it via `find_index_for_column` and does NOT clobber it
4798    /// with a HASH index on the next insert.
4799    #[test]
4800    fn auto_index_id_skips_when_index_already_exists() {
4801        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4802        rt.execute_query("CREATE TABLE pre (id INT, score INT)")
4803            .unwrap();
4804        // User-declared BTREE index on `id` before any insert.
4805        rt.execute_query("CREATE INDEX user_idx ON pre (id) USING BTREE")
4806            .unwrap();
4807        rt.execute_query("INSERT INTO pre (id, score) VALUES (1, 10)")
4808            .unwrap();
4809
4810        let registered = rt
4811            .index_store_ref()
4812            .find_index_for_column("pre", "id")
4813            .expect("user index should still be there");
4814        assert_eq!(
4815            registered.name, "user_idx",
4816            "auto-index hook must not overwrite an existing index"
4817        );
4818    }
4819
4820    /// Implicit `idx_id` is reaped when the collection drops. The
4821    /// existing `execute_drop_table` walks `list_indices` and drops every
4822    /// entry — confirm the auto-created index participates.
4823    #[test]
4824    fn auto_index_id_dropped_with_collection() {
4825        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4826        rt.execute_query("CREATE TABLE ephemeral (id INT, score INT)")
4827            .unwrap();
4828        rt.execute_query("INSERT INTO ephemeral (id, score) VALUES (1, 10)")
4829            .unwrap();
4830        assert!(rt
4831            .index_store_ref()
4832            .find_index_for_column("ephemeral", "id")
4833            .is_some());
4834
4835        rt.execute_query("DROP TABLE ephemeral").unwrap();
4836
4837        assert!(
4838            rt.index_store_ref()
4839                .find_index_for_column("ephemeral", "id")
4840                .is_none(),
4841            "implicit `idx_id` must be reaped when its collection drops"
4842        );
4843    }
4844
4845    /// Opt-out via `RedDBOptions::with_auto_index_id(false)` (which
4846    /// forwards to `UnifiedStoreConfig::auto_index_id`). With the knob
4847    /// off, first insert leaves the collection without an `id` index —
4848    /// DELETE/UPDATE fall back to the scan path.
4849    #[test]
4850    fn auto_index_id_disabled_by_config() {
4851        let opts = RedDBOptions::in_memory().with_auto_index_id(false);
4852        let rt = RedDBRuntime::with_options(opts).unwrap();
4853
4854        rt.execute_query("CREATE TABLE off (id INT, score INT)")
4855            .unwrap();
4856        rt.execute_query("INSERT INTO off (id, score) VALUES (1, 10)")
4857            .unwrap();
4858
4859        assert!(
4860            rt.index_store_ref()
4861                .find_index_for_column("off", "id")
4862                .is_none(),
4863            "with auto_index_id=false, no implicit index should be created"
4864        );
4865    }
4866
4867    // ── #293: UPDATE / DELETE events ─────────────────────────────────────
4868
4869    #[test]
4870    fn update_single_row_emits_update_event() {
4871        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4872        rt.execute_query(
4873            "CREATE TABLE users (id INT, name TEXT) WITH EVENTS (UPDATE) TO audit_log",
4874        )
4875        .unwrap();
4876        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice')")
4877            .unwrap();
4878
4879        rt.execute_query("UPDATE users SET name = 'Bob' WHERE id = 1")
4880            .unwrap();
4881
4882        let events = queue_payloads(&rt, "audit_log");
4883        assert_eq!(events.len(), 1, "expected exactly 1 update event");
4884        let event = events[0].as_object().expect("event payload object");
4885        assert_eq!(
4886            event.get("op").and_then(crate::json::Value::as_str),
4887            Some("update")
4888        );
4889        assert_eq!(
4890            event.get("collection").and_then(crate::json::Value::as_str),
4891            Some("users")
4892        );
4893        assert!(event
4894            .get("event_id")
4895            .and_then(crate::json::Value::as_str)
4896            .is_some_and(|v| !v.is_empty()));
4897        let before = event
4898            .get("before")
4899            .and_then(crate::json::Value::as_object)
4900            .expect("before must be an object");
4901        let after = event
4902            .get("after")
4903            .and_then(crate::json::Value::as_object)
4904            .expect("after must be an object");
4905        assert_eq!(
4906            before.get("name").and_then(crate::json::Value::as_str),
4907            Some("Alice"),
4908            "before.name should be the old value"
4909        );
4910        assert_eq!(
4911            after.get("name").and_then(crate::json::Value::as_str),
4912            Some("Bob"),
4913            "after.name should be the new value"
4914        );
4915    }
4916
4917    #[test]
4918    fn update_event_only_includes_changed_fields() {
4919        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4920        rt.execute_query(
4921            "CREATE TABLE users (id INT, name TEXT, email TEXT) WITH EVENTS (UPDATE) TO evts",
4922        )
4923        .unwrap();
4924        rt.execute_query("INSERT INTO users (id, name, email) VALUES (1, 'Alice', 'a@x.com')")
4925            .unwrap();
4926
4927        rt.execute_query("UPDATE users SET name = 'Bob' WHERE id = 1")
4928            .unwrap();
4929
4930        let events = queue_payloads(&rt, "evts");
4931        assert_eq!(events.len(), 1);
4932        let event = events[0].as_object().unwrap();
4933        let before = event
4934            .get("before")
4935            .and_then(crate::json::Value::as_object)
4936            .unwrap();
4937        let after = event
4938            .get("after")
4939            .and_then(crate::json::Value::as_object)
4940            .unwrap();
4941        // Only changed field included.
4942        assert!(
4943            before.contains_key("name"),
4944            "before must include changed field"
4945        );
4946        assert!(
4947            after.contains_key("name"),
4948            "after must include changed field"
4949        );
4950        // Unchanged fields must not appear.
4951        assert!(
4952            !before.contains_key("email"),
4953            "before must not include unchanged email"
4954        );
4955        assert!(
4956            !after.contains_key("email"),
4957            "after must not include unchanged email"
4958        );
4959    }
4960
4961    #[test]
4962    fn multi_row_update_emits_one_event_per_row() {
4963        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4964        rt.execute_query("CREATE TABLE items (id INT, status TEXT) WITH EVENTS (UPDATE) TO evts")
4965            .unwrap();
4966        rt.execute_query(
4967            "INSERT INTO items (id, status) VALUES (1, 'new'), (2, 'new'), (3, 'new')",
4968        )
4969        .unwrap();
4970
4971        rt.execute_query("UPDATE items SET status = 'done'")
4972            .unwrap();
4973
4974        let events = queue_payloads(&rt, "evts");
4975        assert_eq!(events.len(), 3, "expected one update event per row");
4976        for event in &events {
4977            let obj = event.as_object().unwrap();
4978            assert_eq!(
4979                obj.get("op").and_then(crate::json::Value::as_str),
4980                Some("update")
4981            );
4982        }
4983    }
4984
4985    #[test]
4986    fn delete_single_row_emits_delete_event() {
4987        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
4988        rt.execute_query("CREATE TABLE users (id INT, name TEXT) WITH EVENTS (DELETE) TO del_log")
4989            .unwrap();
4990        rt.execute_query("INSERT INTO users (id, name) VALUES (42, 'Alice')")
4991            .unwrap();
4992
4993        rt.execute_query("DELETE FROM users WHERE id = 42").unwrap();
4994
4995        let events = queue_payloads(&rt, "del_log");
4996        assert_eq!(events.len(), 1);
4997        let event = events[0].as_object().expect("event payload object");
4998        assert_eq!(
4999            event.get("op").and_then(crate::json::Value::as_str),
5000            Some("delete")
5001        );
5002        assert_eq!(
5003            event.get("collection").and_then(crate::json::Value::as_str),
5004            Some("users")
5005        );
5006        assert!(event
5007            .get("event_id")
5008            .and_then(crate::json::Value::as_str)
5009            .is_some_and(|v| !v.is_empty()));
5010        let before = event
5011            .get("before")
5012            .and_then(crate::json::Value::as_object)
5013            .expect("before must be an object for delete");
5014        assert_eq!(
5015            before.get("id").and_then(crate::json::Value::as_u64),
5016            Some(42)
5017        );
5018        assert_eq!(
5019            before.get("name").and_then(crate::json::Value::as_str),
5020            Some("Alice")
5021        );
5022        assert!(matches!(event.get("after"), Some(crate::json::Value::Null)));
5023    }
5024
5025    #[test]
5026    fn multi_row_delete_emits_one_event_per_row() {
5027        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
5028        rt.execute_query("CREATE TABLE items (id INT, val INT) WITH EVENTS (DELETE) TO del_log")
5029            .unwrap();
5030        rt.execute_query("INSERT INTO items (id, val) VALUES (1, 10), (2, 20), (3, 30)")
5031            .unwrap();
5032
5033        rt.execute_query("DELETE FROM items").unwrap();
5034
5035        let events = queue_payloads(&rt, "del_log");
5036        assert_eq!(events.len(), 3, "expected one delete event per deleted row");
5037        for event in &events {
5038            let obj = event.as_object().unwrap();
5039            assert_eq!(
5040                obj.get("op").and_then(crate::json::Value::as_str),
5041                Some("delete")
5042            );
5043            assert!(matches!(obj.get("after"), Some(crate::json::Value::Null)));
5044        }
5045    }
5046
5047    #[test]
5048    fn ops_filter_update_does_not_emit_on_insert_or_delete() {
5049        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
5050        rt.execute_query("CREATE TABLE users (id INT, name TEXT) WITH EVENTS (UPDATE) TO evts")
5051            .unwrap();
5052
5053        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice')")
5054            .unwrap();
5055        rt.execute_query("DELETE FROM users WHERE id = 1").unwrap();
5056
5057        let events = queue_payloads(&rt, "evts");
5058        assert!(
5059            events.is_empty(),
5060            "UPDATE-only filter must not emit INSERT or DELETE events"
5061        );
5062    }
5063
5064    // ── SUPPRESS EVENTS ────────────────────────────────────────────────────
5065
5066    #[test]
5067    fn suppress_events_on_insert_emits_no_events() {
5068        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
5069        rt.execute_query("CREATE TABLE users (id INT, name TEXT) WITH EVENTS TO evts")
5070            .unwrap();
5071
5072        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice') SUPPRESS EVENTS")
5073            .unwrap();
5074
5075        let events = queue_payloads(&rt, "evts");
5076        assert!(
5077            events.is_empty(),
5078            "SUPPRESS EVENTS must prevent INSERT events"
5079        );
5080    }
5081
5082    #[test]
5083    fn suppress_events_on_update_emits_no_events() {
5084        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
5085        rt.execute_query("CREATE TABLE users (id INT, name TEXT) WITH EVENTS TO evts")
5086            .unwrap();
5087        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice')")
5088            .unwrap();
5089        // drain the INSERT event
5090        let _ = queue_payloads(&rt, "evts");
5091        // Force pop to drain; simpler: just check new count after UPDATE
5092        rt.execute_query("QUEUE PURGE evts").unwrap();
5093
5094        rt.execute_query("UPDATE users SET name = 'Bob' WHERE id = 1 SUPPRESS EVENTS")
5095            .unwrap();
5096
5097        let events = queue_payloads(&rt, "evts");
5098        assert!(
5099            events.is_empty(),
5100            "SUPPRESS EVENTS must prevent UPDATE events"
5101        );
5102    }
5103
5104    #[test]
5105    fn suppress_events_on_delete_emits_no_events() {
5106        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
5107        rt.execute_query(
5108            "CREATE TABLE users (id INT, name TEXT) WITH EVENTS (INSERT, DELETE) TO evts",
5109        )
5110        .unwrap();
5111        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice') SUPPRESS EVENTS")
5112            .unwrap();
5113
5114        rt.execute_query("DELETE FROM users WHERE id = 1 SUPPRESS EVENTS")
5115            .unwrap();
5116
5117        let events = queue_payloads(&rt, "evts");
5118        assert!(
5119            events.is_empty(),
5120            "SUPPRESS EVENTS must prevent DELETE events"
5121        );
5122    }
5123
5124    #[test]
5125    fn normal_insert_after_suppress_still_emits() {
5126        let rt = RedDBRuntime::with_options(RedDBOptions::in_memory()).unwrap();
5127        rt.execute_query("CREATE TABLE users (id INT, name TEXT) WITH EVENTS TO evts")
5128            .unwrap();
5129
5130        rt.execute_query("INSERT INTO users (id, name) VALUES (1, 'Alice') SUPPRESS EVENTS")
5131            .unwrap();
5132        rt.execute_query("INSERT INTO users (id, name) VALUES (2, 'Bob')")
5133            .unwrap();
5134
5135        let events = queue_payloads(&rt, "evts");
5136        assert_eq!(
5137            events.len(),
5138            1,
5139            "only the non-suppressed INSERT should emit"
5140        );
5141        assert_eq!(
5142            events[0].get("id").and_then(crate::json::Value::as_u64),
5143            Some(2)
5144        );
5145    }
5146}