Skip to main content

fathomdb_engine/
admin.rs

1use std::fmt::Write as _;
2use std::fs;
3use std::io;
4use std::path::{Path, PathBuf};
5use std::sync::Arc;
6use std::sync::mpsc::SyncSender;
7use std::time::SystemTime;
8
9use fathomdb_schema::{SchemaError, SchemaManager};
10use rusqlite::{DatabaseName, OptionalExtension, TransactionBehavior};
11use serde::{Deserialize, Serialize};
12use sha2::{Digest, Sha256};
13
14use crate::rebuild_actor::{RebuildMode, RebuildRequest, RebuildStateRow};
15
16use crate::{
17    EngineError, ProjectionRepairReport, ProjectionService,
18    embedder::{QueryEmbedder, QueryEmbedderIdentity},
19    ids::new_id,
20    operational::{
21        OperationalCollectionKind, OperationalCollectionRecord, OperationalCompactionReport,
22        OperationalCurrentRow, OperationalFilterClause, OperationalFilterField,
23        OperationalFilterFieldType, OperationalFilterMode, OperationalFilterValue,
24        OperationalHistoryValidationIssue, OperationalHistoryValidationReport,
25        OperationalMutationRow, OperationalPurgeReport, OperationalReadReport,
26        OperationalReadRequest, OperationalRegisterRequest, OperationalRepairReport,
27        OperationalRetentionActionKind, OperationalRetentionPlanItem,
28        OperationalRetentionPlanReport, OperationalRetentionRunItem, OperationalRetentionRunReport,
29        OperationalSecondaryIndexDefinition, OperationalSecondaryIndexRebuildReport,
30        OperationalTraceReport, extract_secondary_index_entries_for_current,
31        extract_secondary_index_entries_for_mutation, parse_operational_secondary_indexes_json,
32        parse_operational_validation_contract, validate_operational_payload_against_contract,
33    },
34    projection::ProjectionTarget,
35    sqlite,
36};
37
38/// Results of a physical and structural integrity check on the database.
39#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
40pub struct IntegrityReport {
41    pub physical_ok: bool,
42    pub foreign_keys_ok: bool,
43    pub missing_fts_rows: usize,
44    pub missing_property_fts_rows: usize,
45    pub duplicate_active_logical_ids: usize,
46    pub operational_missing_collections: usize,
47    pub operational_missing_last_mutations: usize,
48    pub warnings: Vec<String>,
49}
50
51/// A registered FTS property projection schema for a node kind.
52#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
53pub struct FtsPropertySchemaRecord {
54    /// The node kind this schema applies to.
55    pub kind: String,
56    /// Flat display list of registered JSON property paths
57    /// (e.g. `["$.name", "$.title"]`). For recursive entries this lists
58    /// only the root path; mode information is carried by
59    /// [`Self::entries`].
60    pub property_paths: Vec<String>,
61    /// Full per-entry schema shape with mode
62    /// ([`FtsPropertyPathMode::Scalar`] | [`FtsPropertyPathMode::Recursive`]).
63    /// Read this field for mode-accurate round-trip of the registered
64    /// schema.
65    pub entries: Vec<FtsPropertyPathSpec>,
66    /// Subtree paths excluded from recursive walks. Empty for
67    /// scalar-only schemas or recursive schemas with no exclusions.
68    pub exclude_paths: Vec<String>,
69    /// Separator used when concatenating extracted values.
70    pub separator: String,
71    /// Schema format version.
72    pub format_version: i64,
73}
74
75/// Extraction mode for a single registered FTS property path.
76#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize)]
77#[serde(rename_all = "snake_case")]
78pub enum FtsPropertyPathMode {
79    /// Resolve the path and append the scalar value(s). Matches legacy
80    /// pre-Phase-4 behaviour.
81    #[default]
82    Scalar,
83    /// Recursively walk every scalar leaf rooted at the path. Each leaf
84    /// contributes one entry to the position map.
85    Recursive,
86}
87
88/// A single registered property-FTS path with its extraction mode.
89#[non_exhaustive]
90#[derive(Clone, Debug, PartialEq, Serialize)]
91pub struct FtsPropertyPathSpec {
92    /// JSON path to the property (must start with `$.`).
93    pub path: String,
94    /// Whether to treat this path as a scalar or recursively walk it.
95    pub mode: FtsPropertyPathMode,
96    /// Optional BM25 weight multiplier for this path (1.0 = default).
97    /// Must satisfy `0.0 < weight <= 1000.0` when set.
98    pub weight: Option<f32>,
99}
100
101// f32 does not implement Eq (due to NaN), but weights in practice are
102// always finite values set by callers, so reflexivity holds.
103impl Eq for FtsPropertyPathSpec {}
104
105impl FtsPropertyPathSpec {
106    #[must_use]
107    pub fn scalar(path: impl Into<String>) -> Self {
108        Self {
109            path: path.into(),
110            mode: FtsPropertyPathMode::Scalar,
111            weight: None,
112        }
113    }
114
115    #[must_use]
116    pub fn recursive(path: impl Into<String>) -> Self {
117        Self {
118            path: path.into(),
119            mode: FtsPropertyPathMode::Recursive,
120            weight: None,
121        }
122    }
123
124    /// Set the BM25 weight multiplier for this path.
125    ///
126    /// The weight must satisfy `0.0 < weight <= 1000.0` at registration
127    /// time; this builder method does not validate — validation happens in
128    /// `register_fts_property_schema_with_entries`.
129    #[must_use]
130    pub fn with_weight(mut self, weight: f32) -> Self {
131        self.weight = Some(weight);
132        self
133    }
134}
135
136/// Options controlling how a safe database export is performed.
137#[derive(Clone, Copy, Debug)]
138pub struct SafeExportOptions {
139    /// When true, runs `PRAGMA wal_checkpoint(FULL)` before copying and fails if
140    /// any WAL frames could not be applied (busy != 0). Set to false only in
141    /// tests that seed a database without WAL mode.
142    pub force_checkpoint: bool,
143}
144
145impl Default for SafeExportOptions {
146    fn default() -> Self {
147        Self {
148            force_checkpoint: true,
149        }
150    }
151}
152
153// Must match PROTOCOL_VERSION in fathomdb-admin-bridge.rs
154const EXPORT_PROTOCOL_VERSION: u32 = 1;
155
156/// Manifest describing a completed safe export.
157#[derive(Clone, Debug, Serialize)]
158pub struct SafeExportManifest {
159    /// Unix timestamp (seconds since epoch) when the export was created.
160    pub exported_at: u64,
161    /// SHA-256 hex digest of the exported database file.
162    pub sha256: String,
163    /// Schema version recorded in `fathom_schema_migrations` at export time.
164    pub schema_version: u32,
165    /// Bridge protocol version compiled into this binary.
166    pub protocol_version: u32,
167    /// Number of `SQLite` pages in the exported database file.
168    pub page_count: u64,
169}
170
171/// Report from tracing all rows associated with a given `source_ref`.
172#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
173pub struct TraceReport {
174    pub source_ref: String,
175    pub node_rows: usize,
176    pub edge_rows: usize,
177    pub action_rows: usize,
178    pub operational_mutation_rows: usize,
179    pub node_logical_ids: Vec<String>,
180    pub action_ids: Vec<String>,
181    pub operational_mutation_ids: Vec<String>,
182}
183
184/// An edge that was skipped during a restore because an endpoint is missing.
185#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
186pub struct SkippedEdge {
187    pub edge_logical_id: String,
188    pub missing_endpoint: String,
189}
190
191/// Report from restoring a retired logical ID back to active state.
192#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
193pub struct LogicalRestoreReport {
194    pub logical_id: String,
195    pub was_noop: bool,
196    pub restored_node_rows: usize,
197    pub restored_edge_rows: usize,
198    pub restored_chunk_rows: usize,
199    pub restored_fts_rows: usize,
200    pub restored_property_fts_rows: usize,
201    pub restored_vec_rows: usize,
202    pub skipped_edges: Vec<SkippedEdge>,
203    pub notes: Vec<String>,
204}
205
206/// Report from permanently purging all rows for a logical ID.
207#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
208pub struct LogicalPurgeReport {
209    pub logical_id: String,
210    pub was_noop: bool,
211    pub deleted_node_rows: usize,
212    pub deleted_edge_rows: usize,
213    pub deleted_chunk_rows: usize,
214    pub deleted_fts_rows: usize,
215    pub deleted_vec_rows: usize,
216    pub notes: Vec<String>,
217}
218
219/// Options controlling provenance event purging behavior.
220#[derive(Clone, Debug, Serialize, Deserialize)]
221pub struct ProvenancePurgeOptions {
222    pub dry_run: bool,
223    #[serde(default)]
224    pub preserve_event_types: Vec<String>,
225}
226
227/// Report from a provenance event purge operation.
228#[derive(Clone, Debug, Serialize)]
229pub struct ProvenancePurgeReport {
230    pub events_deleted: u64,
231    pub events_preserved: u64,
232    pub oldest_remaining: Option<i64>,
233}
234
235/// Service providing administrative operations (integrity checks, exports, restores, purges).
236#[derive(Debug)]
237pub struct AdminService {
238    database_path: PathBuf,
239    schema_manager: Arc<SchemaManager>,
240    projections: ProjectionService,
241    /// Sender side of the rebuild actor's channel.  `None` when the engine
242    /// was opened without a rebuild actor (e.g. in tests that use
243    /// [`AdminService::new`] directly).
244    rebuild_sender: Option<SyncSender<RebuildRequest>>,
245}
246
247/// Results of a semantic consistency check on the graph data.
248#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
249pub struct SemanticReport {
250    /// Chunks whose `node_logical_id` has no active node.
251    pub orphaned_chunks: usize,
252    /// Active nodes with a NULL `source_ref` (loss of provenance).
253    pub null_source_ref_nodes: usize,
254    /// Steps referencing a `run_id` that does not exist in the runs table.
255    pub broken_step_fk: usize,
256    /// Actions referencing a `step_id` that does not exist in the steps table.
257    pub broken_action_fk: usize,
258    /// FTS rows whose `chunk_id` does not exist in the chunks table.
259    pub stale_fts_rows: usize,
260    /// FTS rows whose node has been superseded (`superseded_at` IS NOT NULL on all active rows).
261    pub fts_rows_for_superseded_nodes: usize,
262    /// Property FTS rows whose node has been superseded or does not exist.
263    pub stale_property_fts_rows: usize,
264    /// Property FTS rows whose kind has no registered FTS property schema.
265    pub orphaned_property_fts_rows: usize,
266    /// Property FTS rows whose `kind` does not match the active node's actual kind.
267    pub mismatched_kind_property_fts_rows: usize,
268    /// Active logical IDs with more than one per-kind FTS property row.
269    pub duplicate_property_fts_rows: usize,
270    /// Property FTS rows whose `text_content` no longer matches the canonical extraction.
271    pub drifted_property_fts_rows: usize,
272    /// Active edges where at least one endpoint has no active node.
273    pub dangling_edges: usize,
274    /// `logical_ids` where every version has been superseded (no active row).
275    pub orphaned_supersession_chains: usize,
276    /// Vec rows whose backing chunk no longer exists in the chunks table.
277    pub stale_vec_rows: usize,
278    /// Compatibility counter for vec rows whose chunk points at missing node history.
279    pub vec_rows_for_superseded_nodes: usize,
280    /// Latest-state keys whose latest mutation is a `put` but no current row exists.
281    pub missing_operational_current_rows: usize,
282    /// Current rows that do not match the latest mutation state.
283    pub stale_operational_current_rows: usize,
284    /// Mutations written after the owning collection was disabled.
285    pub disabled_collection_mutations: usize,
286    /// Access metadata rows whose `logical_id` no longer has any node history.
287    pub orphaned_last_access_metadata_rows: usize,
288    pub warnings: Vec<String>,
289}
290
291/// Configuration for regenerating vector embeddings.
292///
293/// 0.4.0 architectural invariant: vector identity is the embedder's
294/// responsibility, not the regeneration config's. This struct carries only
295/// WHERE the vectors live and HOW to chunk/preprocess them — never WHAT
296/// model produced them. The embedder supplied at regen-call time is the
297/// single source of truth for `model_identity`, `model_version`,
298/// `dimension`, and `normalization_policy`; the resulting vector profile
299/// is stamped directly from [`QueryEmbedder::identity`].
300#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
301#[serde(rename_all = "snake_case", deny_unknown_fields)]
302pub struct VectorRegenerationConfig {
303    pub profile: String,
304    pub table_name: String,
305    pub chunking_policy: String,
306    pub preprocessing_policy: String,
307}
308
309/// Report from a vector embedding regeneration run.
310#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
311pub struct VectorRegenerationReport {
312    pub profile: String,
313    pub table_name: String,
314    pub dimension: usize,
315    pub total_chunks: usize,
316    pub regenerated_rows: usize,
317    pub contract_persisted: bool,
318    pub notes: Vec<String>,
319}
320
321/// Stored FTS tokenizer profile for a node kind.
322///
323/// Created and updated by [`AdminService::set_fts_profile`].
324#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
325pub struct FtsProfile {
326    /// Node kind this profile applies to (e.g. `"Article"`).
327    pub kind: String,
328    /// FTS5 tokenizer string (e.g. `"porter unicode61 remove_diacritics 2"`).
329    pub tokenizer: String,
330    /// Unix timestamp when the profile was last activated, or `None` if never.
331    pub active_at: Option<i64>,
332    /// Unix timestamp when the profile row was first created.
333    pub created_at: i64,
334}
335
336/// Stored vector embedding profile (global, kind-agnostic).
337///
338/// Created and updated by [`AdminService::set_vec_profile`].
339#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
340pub struct VecProfile {
341    /// Identifier for the embedding model (e.g. `"openai/text-embedding-3-small"`).
342    pub model_identity: String,
343    /// Optional version string for the model.
344    pub model_version: Option<String>,
345    /// Number of dimensions produced by the model.
346    pub dimensions: u32,
347    /// Unix timestamp when the profile was last activated, or `None` if never.
348    pub active_at: Option<i64>,
349    /// Unix timestamp when the profile row was first created.
350    pub created_at: i64,
351}
352
353/// Estimated cost of rebuilding a projection (FTS table or vector embeddings).
354///
355/// Returned by [`AdminService::preview_projection_impact`].
356#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
357pub struct ProjectionImpact {
358    /// Number of rows that would be processed during a full rebuild.
359    pub rows_to_rebuild: u64,
360    /// Rough estimated rebuild time in seconds.
361    pub estimated_seconds: u64,
362    /// Estimated temporary disk space required during rebuild, in bytes.
363    pub temp_db_size_bytes: u64,
364    /// The tokenizer currently stored in `projection_profiles`, if any.
365    pub current_tokenizer: Option<String>,
366    /// Reserved for future use; always `None` currently.
367    pub target_tokenizer: Option<String>,
368}
369
370/// Well-known tokenizer preset names mapped to their FTS5 tokenizer strings.
371pub const TOKENIZER_PRESETS: &[(&str, &str)] = &[
372    (
373        "recall-optimized-english",
374        "porter unicode61 remove_diacritics 2",
375    ),
376    ("precision-optimized", "unicode61 remove_diacritics 2"),
377    ("global-cjk", "icu"),
378    ("substring-trigram", "trigram"),
379    ("source-code", "unicode61 tokenchars '._-$@'"),
380];
381
382/// Resolve a tokenizer preset name to its FTS5 tokenizer string.
383///
384/// If `input` matches a known preset name the preset value is returned.
385/// Otherwise `input` is returned unchanged (treated as a raw tokenizer string).
386pub fn resolve_tokenizer_preset(input: &str) -> &str {
387    for (name, value) in TOKENIZER_PRESETS {
388        if *name == input {
389            return value;
390        }
391    }
392    input
393}
394
395const CURRENT_VECTOR_CONTRACT_FORMAT_VERSION: i64 = 1;
396const MAX_PROFILE_LEN: usize = 128;
397const MAX_POLICY_LEN: usize = 128;
398const MAX_CONTRACT_JSON_BYTES: usize = 32 * 1024;
399const MAX_AUDIT_METADATA_BYTES: usize = 2048;
400const DEFAULT_OPERATIONAL_READ_LIMIT: usize = 100;
401const MAX_OPERATIONAL_READ_LIMIT: usize = 1000;
402
403/// Thread-safe handle to the shared [`AdminService`].
404#[derive(Clone, Debug)]
405pub struct AdminHandle {
406    inner: Arc<AdminService>,
407}
408
409impl AdminHandle {
410    /// Wrap an [`AdminService`] in a shared handle.
411    #[must_use]
412    pub fn new(service: AdminService) -> Self {
413        Self {
414            inner: Arc::new(service),
415        }
416    }
417
418    /// Clone the inner `Arc` to the [`AdminService`].
419    #[must_use]
420    pub fn service(&self) -> Arc<AdminService> {
421        Arc::clone(&self.inner)
422    }
423}
424
425impl AdminService {
426    /// Create a new admin service for the database at the given path.
427    #[must_use]
428    pub fn new(path: impl AsRef<Path>, schema_manager: Arc<SchemaManager>) -> Self {
429        let database_path = path.as_ref().to_path_buf();
430        let projections = ProjectionService::new(&database_path, Arc::clone(&schema_manager));
431        Self {
432            database_path,
433            schema_manager,
434            projections,
435            rebuild_sender: None,
436        }
437    }
438
439    /// Create a new admin service wired to the background rebuild actor.
440    #[must_use]
441    pub fn new_with_rebuild(
442        path: impl AsRef<Path>,
443        schema_manager: Arc<SchemaManager>,
444        rebuild_sender: SyncSender<RebuildRequest>,
445    ) -> Self {
446        let database_path = path.as_ref().to_path_buf();
447        let projections = ProjectionService::new(&database_path, Arc::clone(&schema_manager));
448        Self {
449            database_path,
450            schema_manager,
451            projections,
452            rebuild_sender: Some(rebuild_sender),
453        }
454    }
455
456    fn connect(&self) -> Result<rusqlite::Connection, EngineError> {
457        #[cfg(feature = "sqlite-vec")]
458        let conn = sqlite::open_connection_with_vec(&self.database_path)?;
459        #[cfg(not(feature = "sqlite-vec"))]
460        let conn = sqlite::open_connection(&self.database_path)?;
461        self.schema_manager.bootstrap(&conn)?;
462        Ok(conn)
463    }
464
465    /// Persist or update the FTS tokenizer profile for a node kind.
466    ///
467    /// `tokenizer_str` may be a preset name (see [`TOKENIZER_PRESETS`]) or a
468    /// raw FTS5 tokenizer string.  The resolved string is validated before
469    /// being written to `projection_profiles`.
470    ///
471    /// # Errors
472    /// Returns [`EngineError`] if the tokenizer string contains disallowed
473    /// characters, or if the database write fails.
474    pub fn set_fts_profile(
475        &self,
476        kind: &str,
477        tokenizer_str: &str,
478    ) -> Result<FtsProfile, EngineError> {
479        let resolved = resolve_tokenizer_preset(tokenizer_str);
480        // Allowed chars: alphanumeric, space, apostrophe, dot, underscore, hyphen, dollar, at
481        if !resolved
482            .chars()
483            .all(|c| c.is_alphanumeric() || "'._-$@ ".contains(c))
484        {
485            return Err(EngineError::Bridge(format!(
486                "invalid tokenizer string: {resolved:?}"
487            )));
488        }
489        let conn = self.connect()?;
490        conn.execute(
491            r"INSERT INTO projection_profiles (kind, facet, config_json, active_at, created_at)
492              VALUES (?1, 'fts', json_object('tokenizer', ?2), unixepoch(), unixepoch())
493              ON CONFLICT(kind, facet) DO UPDATE SET
494                  config_json = json_object('tokenizer', ?2),
495                  active_at   = unixepoch()",
496            rusqlite::params![kind, resolved],
497        )?;
498        let row = conn.query_row(
499            "SELECT kind, json_extract(config_json, '$.tokenizer'), active_at, created_at \
500             FROM projection_profiles WHERE kind = ?1 AND facet = 'fts'",
501            rusqlite::params![kind],
502            |row| {
503                Ok(FtsProfile {
504                    kind: row.get(0)?,
505                    tokenizer: row.get(1)?,
506                    active_at: row.get(2)?,
507                    created_at: row.get(3)?,
508                })
509            },
510        )?;
511        Ok(row)
512    }
513
514    /// Retrieve the FTS tokenizer profile for a node kind.
515    ///
516    /// Returns `None` if no profile has been set for `kind`.
517    ///
518    /// # Errors
519    /// Returns [`EngineError`] if the database query fails.
520    pub fn get_fts_profile(&self, kind: &str) -> Result<Option<FtsProfile>, EngineError> {
521        let conn = self.connect()?;
522        let result = conn
523            .query_row(
524                "SELECT kind, json_extract(config_json, '$.tokenizer'), active_at, created_at \
525                 FROM projection_profiles WHERE kind = ?1 AND facet = 'fts'",
526                rusqlite::params![kind],
527                |row| {
528                    Ok(FtsProfile {
529                        kind: row.get(0)?,
530                        tokenizer: row.get(1)?,
531                        active_at: row.get(2)?,
532                        created_at: row.get(3)?,
533                    })
534                },
535            )
536            .optional()?;
537        Ok(result)
538    }
539
540    /// Retrieve the global vector embedding profile.
541    ///
542    /// Returns `None` if no vector profile has been persisted yet.
543    ///
544    /// # Errors
545    /// Returns [`EngineError`] if the database query fails.
546    pub fn get_vec_profile(&self) -> Result<Option<VecProfile>, EngineError> {
547        let conn = self.connect()?;
548        let result = conn
549            .query_row(
550                "SELECT \
551                   json_extract(config_json, '$.model_identity'), \
552                   json_extract(config_json, '$.model_version'), \
553                   CAST(json_extract(config_json, '$.dimensions') AS INTEGER), \
554                   active_at, \
555                   created_at \
556                 FROM projection_profiles WHERE kind = '*' AND facet = 'vec'",
557                [],
558                |row| {
559                    Ok(VecProfile {
560                        model_identity: row.get(0)?,
561                        model_version: row.get(1)?,
562                        dimensions: {
563                            let d: i64 = row.get(2)?;
564                            u32::try_from(d).unwrap_or(0)
565                        },
566                        active_at: row.get(3)?,
567                        created_at: row.get(4)?,
568                    })
569                },
570            )
571            .optional()?;
572        Ok(result)
573    }
574
575    /// Write or update the global vector profile from a JSON identity string.
576    ///
577    /// This is a private helper called after a successful vector regeneration.
578    /// Errors are logged as warnings and not propagated to the caller.
579    #[allow(dead_code)]
580    fn set_vec_profile_inner(
581        conn: &rusqlite::Connection,
582        identity_json: &str,
583    ) -> Result<VecProfile, rusqlite::Error> {
584        conn.execute(
585            r"INSERT INTO projection_profiles (kind, facet, config_json, active_at, created_at)
586              VALUES ('*', 'vec', ?1, unixepoch(), unixepoch())
587              ON CONFLICT(kind, facet) DO UPDATE SET
588                  config_json = ?1,
589                  active_at   = unixepoch()",
590            rusqlite::params![identity_json],
591        )?;
592        conn.query_row(
593            "SELECT \
594               json_extract(config_json, '$.model_identity'), \
595               json_extract(config_json, '$.model_version'), \
596               CAST(json_extract(config_json, '$.dimensions') AS INTEGER), \
597               active_at, \
598               created_at \
599             FROM projection_profiles WHERE kind = '*' AND facet = 'vec'",
600            [],
601            |row| {
602                Ok(VecProfile {
603                    model_identity: row.get(0)?,
604                    model_version: row.get(1)?,
605                    dimensions: {
606                        let d: i64 = row.get(2)?;
607                        u32::try_from(d).unwrap_or(0)
608                    },
609                    active_at: row.get(3)?,
610                    created_at: row.get(4)?,
611                })
612            },
613        )
614    }
615
616    /// Persist or update the global vector profile from a JSON config string.
617    ///
618    /// `config_json` must be valid JSON with at least a `model_identity`
619    /// field and `dimensions`.  The JSON is stored verbatim in the
620    /// `projection_profiles` table under `kind='*'`, `facet='vec'`.
621    ///
622    /// # Errors
623    /// Returns [`EngineError`] if the database write fails.
624    pub fn set_vec_profile(&self, config_json: &str) -> Result<VecProfile, EngineError> {
625        let conn = self.connect()?;
626        Self::set_vec_profile_inner(&conn, config_json).map_err(EngineError::Sqlite)
627    }
628
629    /// Estimate the cost of rebuilding a projection.
630    ///
631    /// For facet `"fts"`: counts active nodes of `kind`.
632    /// For facet `"vec"`: counts all chunks.
633    ///
634    /// # Errors
635    /// Returns [`EngineError`] for unknown facets or database errors.
636    pub fn preview_projection_impact(
637        &self,
638        kind: &str,
639        facet: &str,
640    ) -> Result<ProjectionImpact, EngineError> {
641        let conn = self.connect()?;
642        match facet {
643            "fts" => {
644                let rows: u64 = conn
645                    .query_row(
646                        "SELECT count(*) FROM nodes WHERE kind = ?1 AND superseded_at IS NULL",
647                        rusqlite::params![kind],
648                        |row| row.get::<_, i64>(0),
649                    )
650                    .map(i64::cast_unsigned)?;
651                let current_tokenizer = self.get_fts_profile(kind)?.map(|p| p.tokenizer);
652                Ok(ProjectionImpact {
653                    rows_to_rebuild: rows,
654                    estimated_seconds: rows / 5000,
655                    temp_db_size_bytes: rows * 200,
656                    current_tokenizer,
657                    target_tokenizer: None,
658                })
659            }
660            "vec" => {
661                let rows: u64 = conn
662                    .query_row("SELECT count(*) FROM chunks", [], |row| {
663                        row.get::<_, i64>(0)
664                    })
665                    .map(i64::cast_unsigned)?;
666                Ok(ProjectionImpact {
667                    rows_to_rebuild: rows,
668                    estimated_seconds: rows / 100,
669                    temp_db_size_bytes: rows * 1536,
670                    current_tokenizer: None,
671                    target_tokenizer: None,
672                })
673            }
674            other => Err(EngineError::Bridge(format!(
675                "unknown projection facet: {other:?}"
676            ))),
677        }
678    }
679
680    /// # Errors
681    /// Returns [`EngineError`] if the database connection fails or any SQL query fails.
682    pub fn check_integrity(&self) -> Result<IntegrityReport, EngineError> {
683        let conn = self.connect()?;
684
685        let physical_result: String =
686            conn.query_row("PRAGMA integrity_check", [], |row| row.get(0))?;
687        let foreign_key_count: i64 =
688            conn.query_row("SELECT count(*) FROM pragma_foreign_key_check", [], |row| {
689                row.get(0)
690            })?;
691        let missing_fts_rows: i64 = conn.query_row(
692            r"
693            SELECT count(*)
694            FROM chunks c
695            JOIN nodes n
696              ON n.logical_id = c.node_logical_id
697             AND n.superseded_at IS NULL
698            WHERE NOT EXISTS (
699                SELECT 1
700                FROM fts_nodes f
701                WHERE f.chunk_id = c.id
702            )
703            ",
704            [],
705            |row| row.get(0),
706        )?;
707        let duplicate_active: i64 = conn.query_row(
708            r"
709            SELECT count(*)
710            FROM (
711                SELECT logical_id
712                FROM nodes
713                WHERE superseded_at IS NULL
714                GROUP BY logical_id
715                HAVING count(*) > 1
716            )
717            ",
718            [],
719            |row| row.get(0),
720        )?;
721        let operational_missing_collections: i64 = conn.query_row(
722            r"
723            SELECT (
724                SELECT count(*)
725                FROM operational_mutations m
726                LEFT JOIN operational_collections c ON c.name = m.collection_name
727                WHERE c.name IS NULL
728            ) + (
729                SELECT count(*)
730                FROM operational_current oc
731                LEFT JOIN operational_collections c ON c.name = oc.collection_name
732                WHERE c.name IS NULL
733            )
734            ",
735            [],
736            |row| row.get(0),
737        )?;
738        let operational_missing_last_mutations: i64 = conn.query_row(
739            r"
740            SELECT count(*)
741            FROM operational_current oc
742            LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
743            WHERE m.id IS NULL
744            ",
745            [],
746            |row| row.get(0),
747        )?;
748
749        // Count missing property FTS rows using the same extraction logic as
750        // write/rebuild. A pure-SQL check would overcount: nodes whose declared
751        // paths legitimately normalize to no values correctly have no row.
752        let missing_property_fts_rows = count_missing_property_fts_rows(&conn)?;
753
754        let mut warnings = Vec::new();
755        if missing_fts_rows > 0 {
756            warnings.push("missing FTS projections detected".to_owned());
757        }
758        if missing_property_fts_rows > 0 {
759            warnings.push("missing property FTS projections detected".to_owned());
760        }
761        if duplicate_active > 0 {
762            warnings.push("duplicate active logical_ids detected".to_owned());
763        }
764        if operational_missing_collections > 0 {
765            warnings.push("operational rows reference missing collections".to_owned());
766        }
767        if operational_missing_last_mutations > 0 {
768            warnings.push("operational current rows reference missing last mutations".to_owned());
769        }
770
771        // FIX(review): was `as usize` — unsound on 32-bit targets, wraps negatives silently.
772        // Options: (A) try_from().unwrap_or(0) — masks corruption, (B) try_from().expect() —
773        // panics on corruption, (C) propagate error. Chose (B) here: a negative count(*)
774        // signals data corruption, and the integrity report would be meaningless anyway.
775        Ok(IntegrityReport {
776            physical_ok: physical_result == "ok",
777            foreign_keys_ok: foreign_key_count == 0,
778            missing_fts_rows: i64_to_usize(missing_fts_rows),
779            missing_property_fts_rows: i64_to_usize(missing_property_fts_rows),
780            duplicate_active_logical_ids: i64_to_usize(duplicate_active),
781            operational_missing_collections: i64_to_usize(operational_missing_collections),
782            operational_missing_last_mutations: i64_to_usize(operational_missing_last_mutations),
783            warnings,
784        })
785    }
786
787    /// # Errors
788    /// Returns [`EngineError`] if the database connection fails or any SQL query fails.
789    #[allow(clippy::too_many_lines)]
790    pub fn check_semantics(&self) -> Result<SemanticReport, EngineError> {
791        let conn = self.connect()?;
792
793        let orphaned_chunks: i64 = conn.query_row(
794            r"
795            SELECT count(*)
796            FROM chunks c
797            WHERE NOT EXISTS (
798                SELECT 1 FROM nodes n
799                WHERE n.logical_id = c.node_logical_id
800            )
801            ",
802            [],
803            |row| row.get(0),
804        )?;
805
806        let null_source_ref_nodes: i64 = conn.query_row(
807            "SELECT count(*) FROM nodes WHERE source_ref IS NULL AND superseded_at IS NULL",
808            [],
809            |row| row.get(0),
810        )?;
811
812        let broken_step_fk: i64 = conn.query_row(
813            r"
814            SELECT count(*) FROM steps s
815            WHERE NOT EXISTS (SELECT 1 FROM runs r WHERE r.id = s.run_id)
816            ",
817            [],
818            |row| row.get(0),
819        )?;
820
821        let broken_action_fk: i64 = conn.query_row(
822            r"
823            SELECT count(*) FROM actions a
824            WHERE NOT EXISTS (SELECT 1 FROM steps s WHERE s.id = a.step_id)
825            ",
826            [],
827            |row| row.get(0),
828        )?;
829
830        let stale_fts_rows: i64 = conn.query_row(
831            r"
832            SELECT count(*) FROM fts_nodes f
833            WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = f.chunk_id)
834            ",
835            [],
836            |row| row.get(0),
837        )?;
838
839        let fts_rows_for_superseded_nodes: i64 = conn.query_row(
840            r"
841            SELECT count(*) FROM fts_nodes f
842            WHERE NOT EXISTS (
843                SELECT 1 FROM nodes n
844                WHERE n.logical_id = f.node_logical_id AND n.superseded_at IS NULL
845            )
846            ",
847            [],
848            |row| row.get(0),
849        )?;
850
851        let (
852            stale_property_fts_rows,
853            orphaned_property_fts_rows,
854            mismatched_kind_property_fts_rows,
855            duplicate_property_fts_rows,
856        ) = count_per_kind_property_fts_issues(&conn)?;
857
858        let drifted_property_fts_rows = count_drifted_property_fts_rows(&conn)?;
859
860        let dangling_edges: i64 = conn.query_row(
861            r"
862            SELECT count(*) FROM edges e
863            WHERE e.superseded_at IS NULL AND (
864                NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.source_logical_id AND n.superseded_at IS NULL)
865                OR
866                NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.target_logical_id AND n.superseded_at IS NULL)
867            )
868            ",
869            [],
870            |row| row.get(0),
871        )?;
872
873        let orphaned_supersession_chains: i64 = conn.query_row(
874            r"
875            SELECT count(*) FROM (
876                SELECT logical_id FROM nodes
877                GROUP BY logical_id
878                HAVING count(*) > 0 AND sum(CASE WHEN superseded_at IS NULL THEN 1 ELSE 0 END) = 0
879            )
880            ",
881            [],
882            |row| row.get(0),
883        )?;
884
885        // Vec stale row detection — degrades to 0 when the vec profile is absent.
886        #[cfg(feature = "sqlite-vec")]
887        let stale_vec_rows: i64 = match conn.query_row(
888            r"
889            SELECT count(*) FROM vec_nodes_active v
890            WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = v.chunk_id)
891            ",
892            [],
893            |row| row.get(0),
894        ) {
895            Ok(n) => n,
896            Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
897                if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
898            {
899                0
900            }
901            Err(e) => return Err(EngineError::Sqlite(e)),
902        };
903        #[cfg(not(feature = "sqlite-vec"))]
904        let stale_vec_rows: i64 = 0;
905
906        #[cfg(feature = "sqlite-vec")]
907        let vec_rows_for_superseded_nodes: i64 = match conn.query_row(
908            r"
909            SELECT count(*) FROM vec_nodes_active v
910            JOIN chunks c ON c.id = v.chunk_id
911            WHERE NOT EXISTS (
912                SELECT 1 FROM nodes n
913                WHERE n.logical_id = c.node_logical_id
914            )
915            ",
916            [],
917            |row| row.get(0),
918        ) {
919            Ok(n) => n,
920            Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
921                if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
922            {
923                0
924            }
925            Err(e) => return Err(EngineError::Sqlite(e)),
926        };
927        #[cfg(not(feature = "sqlite-vec"))]
928        let vec_rows_for_superseded_nodes: i64 = 0;
929        let missing_operational_current_rows: i64 = conn.query_row(
930            r"
931            SELECT count(*)
932            FROM operational_mutations m
933            JOIN operational_collections c
934              ON c.name = m.collection_name
935             AND c.kind = 'latest_state'
936            WHERE m.op_kind = 'put'
937              AND NOT EXISTS (
938                    SELECT 1
939                    FROM operational_mutations newer
940                    WHERE newer.collection_name = m.collection_name
941                      AND newer.record_key = m.record_key
942                      AND newer.mutation_order > m.mutation_order
943                )
944              AND NOT EXISTS (
945                    SELECT 1
946                    FROM operational_current oc
947                    WHERE oc.collection_name = m.collection_name
948                      AND oc.record_key = m.record_key
949                )
950            ",
951            [],
952            |row| row.get(0),
953        )?;
954        let stale_operational_current_rows: i64 = conn.query_row(
955            r"
956            SELECT count(*)
957            FROM operational_current oc
958            JOIN operational_collections c
959              ON c.name = oc.collection_name
960             AND c.kind = 'latest_state'
961            LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
962            WHERE m.id IS NULL
963               OR m.collection_name != oc.collection_name
964               OR m.record_key != oc.record_key
965               OR m.op_kind != 'put'
966               OR m.payload_json != oc.payload_json
967               OR EXISTS (
968                    SELECT 1
969                    FROM operational_mutations newer
970                    WHERE newer.collection_name = oc.collection_name
971                      AND newer.record_key = oc.record_key
972                      AND newer.mutation_order > m.mutation_order
973                )
974            ",
975            [],
976            |row| row.get(0),
977        )?;
978        let disabled_collection_mutations: i64 = conn.query_row(
979            r"
980            SELECT count(*)
981            FROM operational_mutations m
982            JOIN operational_collections c ON c.name = m.collection_name
983            WHERE c.disabled_at IS NOT NULL AND m.created_at > c.disabled_at
984            ",
985            [],
986            |row| row.get(0),
987        )?;
988        let orphaned_last_access_metadata_rows: i64 = conn.query_row(
989            r"
990            SELECT count(*)
991            FROM node_access_metadata am
992            WHERE NOT EXISTS (
993                SELECT 1 FROM nodes n WHERE n.logical_id = am.logical_id
994            )
995            ",
996            [],
997            |row| row.get(0),
998        )?;
999
1000        let mut warnings = Vec::new();
1001        if orphaned_chunks > 0 {
1002            warnings.push(format!(
1003                "{orphaned_chunks} orphaned chunk(s) with no surviving node history"
1004            ));
1005        }
1006        if null_source_ref_nodes > 0 {
1007            warnings.push(format!(
1008                "{null_source_ref_nodes} active node(s) with null source_ref"
1009            ));
1010        }
1011        if broken_step_fk > 0 {
1012            warnings.push(format!(
1013                "{broken_step_fk} step(s) referencing non-existent run"
1014            ));
1015        }
1016        if broken_action_fk > 0 {
1017            warnings.push(format!(
1018                "{broken_action_fk} action(s) referencing non-existent step"
1019            ));
1020        }
1021        if stale_fts_rows > 0 {
1022            warnings.push(format!(
1023                "{stale_fts_rows} stale FTS row(s) referencing missing chunk"
1024            ));
1025        }
1026        if fts_rows_for_superseded_nodes > 0 {
1027            warnings.push(format!(
1028                "{fts_rows_for_superseded_nodes} FTS row(s) for superseded node(s)"
1029            ));
1030        }
1031        if stale_property_fts_rows > 0 {
1032            warnings.push(format!(
1033                "{stale_property_fts_rows} stale property FTS row(s) for superseded/missing node(s)"
1034            ));
1035        }
1036        if orphaned_property_fts_rows > 0 {
1037            warnings.push(format!(
1038                "{orphaned_property_fts_rows} orphaned property FTS row(s) for unregistered kind(s)"
1039            ));
1040        }
1041        if mismatched_kind_property_fts_rows > 0 {
1042            warnings.push(format!(
1043                "{mismatched_kind_property_fts_rows} property FTS row(s) whose kind does not match the active node"
1044            ));
1045        }
1046        if duplicate_property_fts_rows > 0 {
1047            warnings.push(format!(
1048                "{duplicate_property_fts_rows} active logical ID(s) with duplicate property FTS rows"
1049            ));
1050        }
1051        if drifted_property_fts_rows > 0 {
1052            warnings.push(format!(
1053                "{drifted_property_fts_rows} property FTS row(s) with stale text_content"
1054            ));
1055        }
1056        if dangling_edges > 0 {
1057            warnings.push(format!(
1058                "{dangling_edges} active edge(s) with missing endpoint node"
1059            ));
1060        }
1061        if orphaned_supersession_chains > 0 {
1062            warnings.push(format!(
1063                "{orphaned_supersession_chains} logical_id(s) with all versions superseded"
1064            ));
1065        }
1066        if stale_vec_rows > 0 {
1067            warnings.push(format!(
1068                "{stale_vec_rows} stale vec row(s) referencing missing chunk"
1069            ));
1070        }
1071        if vec_rows_for_superseded_nodes > 0 {
1072            warnings.push(format!(
1073                "{vec_rows_for_superseded_nodes} vec row(s) whose node history is missing"
1074            ));
1075        }
1076        if missing_operational_current_rows > 0 {
1077            warnings.push(format!(
1078                "{missing_operational_current_rows} latest-state key(s) missing operational_current rows"
1079            ));
1080        }
1081        if stale_operational_current_rows > 0 {
1082            warnings.push(format!(
1083                "{stale_operational_current_rows} stale operational_current row(s)"
1084            ));
1085        }
1086        if disabled_collection_mutations > 0 {
1087            warnings.push(format!(
1088                "{disabled_collection_mutations} mutation(s) were written after collection disable"
1089            ));
1090        }
1091        if orphaned_last_access_metadata_rows > 0 {
1092            warnings.push(format!(
1093                "{orphaned_last_access_metadata_rows} last_access metadata row(s) reference missing node history"
1094            ));
1095        }
1096
1097        Ok(SemanticReport {
1098            orphaned_chunks: i64_to_usize(orphaned_chunks),
1099            null_source_ref_nodes: i64_to_usize(null_source_ref_nodes),
1100            broken_step_fk: i64_to_usize(broken_step_fk),
1101            broken_action_fk: i64_to_usize(broken_action_fk),
1102            stale_fts_rows: i64_to_usize(stale_fts_rows),
1103            fts_rows_for_superseded_nodes: i64_to_usize(fts_rows_for_superseded_nodes),
1104            stale_property_fts_rows: i64_to_usize(stale_property_fts_rows),
1105            orphaned_property_fts_rows: i64_to_usize(orphaned_property_fts_rows),
1106            mismatched_kind_property_fts_rows: i64_to_usize(mismatched_kind_property_fts_rows),
1107            duplicate_property_fts_rows: i64_to_usize(duplicate_property_fts_rows),
1108            drifted_property_fts_rows: i64_to_usize(drifted_property_fts_rows),
1109            dangling_edges: i64_to_usize(dangling_edges),
1110            orphaned_supersession_chains: i64_to_usize(orphaned_supersession_chains),
1111            stale_vec_rows: i64_to_usize(stale_vec_rows),
1112            vec_rows_for_superseded_nodes: i64_to_usize(vec_rows_for_superseded_nodes),
1113            missing_operational_current_rows: i64_to_usize(missing_operational_current_rows),
1114            stale_operational_current_rows: i64_to_usize(stale_operational_current_rows),
1115            disabled_collection_mutations: i64_to_usize(disabled_collection_mutations),
1116            orphaned_last_access_metadata_rows: i64_to_usize(orphaned_last_access_metadata_rows),
1117            warnings,
1118        })
1119    }
1120
1121    /// # Errors
1122    /// Returns [`EngineError`] if the collection metadata is invalid or the insert fails.
1123    pub fn register_operational_collection(
1124        &self,
1125        request: &OperationalRegisterRequest,
1126    ) -> Result<OperationalCollectionRecord, EngineError> {
1127        if request.name.trim().is_empty() {
1128            return Err(EngineError::InvalidWrite(
1129                "operational collection name must not be empty".to_owned(),
1130            ));
1131        }
1132        if request.schema_json.is_empty() {
1133            return Err(EngineError::InvalidWrite(
1134                "operational collection schema_json must not be empty".to_owned(),
1135            ));
1136        }
1137        if request.retention_json.is_empty() {
1138            return Err(EngineError::InvalidWrite(
1139                "operational collection retention_json must not be empty".to_owned(),
1140            ));
1141        }
1142        if request.filter_fields_json.is_empty() {
1143            return Err(EngineError::InvalidWrite(
1144                "operational collection filter_fields_json must not be empty".to_owned(),
1145            ));
1146        }
1147        parse_operational_validation_contract(&request.validation_json)
1148            .map_err(EngineError::InvalidWrite)?;
1149        parse_operational_secondary_indexes_json(&request.secondary_indexes_json, request.kind)
1150            .map_err(EngineError::InvalidWrite)?;
1151        if request.format_version <= 0 {
1152            return Err(EngineError::InvalidWrite(
1153                "operational collection format_version must be positive".to_owned(),
1154            ));
1155        }
1156        parse_operational_filter_fields(&request.filter_fields_json)
1157            .map_err(EngineError::InvalidWrite)?;
1158
1159        let mut conn = self.connect()?;
1160        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1161        tx.execute(
1162            "INSERT INTO operational_collections \
1163             (name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at) \
1164             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, unixepoch())",
1165            rusqlite::params![
1166                request.name.as_str(),
1167                request.kind.as_str(),
1168                request.schema_json.as_str(),
1169                request.retention_json.as_str(),
1170                request.filter_fields_json.as_str(),
1171                request.validation_json.as_str(),
1172                request.secondary_indexes_json.as_str(),
1173                request.format_version,
1174            ],
1175        )?;
1176        persist_simple_provenance_event(
1177            &tx,
1178            "operational_collection_registered",
1179            request.name.as_str(),
1180            Some(serde_json::json!({
1181                "kind": request.kind.as_str(),
1182                "format_version": request.format_version,
1183            })),
1184        )?;
1185        tx.commit()?;
1186
1187        self.describe_operational_collection(&request.name)?
1188            .ok_or_else(|| {
1189                EngineError::Bridge("registered collection missing after commit".to_owned())
1190            })
1191    }
1192
1193    /// # Errors
1194    /// Returns [`EngineError`] if the database query fails.
1195    pub fn describe_operational_collection(
1196        &self,
1197        name: &str,
1198    ) -> Result<Option<OperationalCollectionRecord>, EngineError> {
1199        let conn = self.connect()?;
1200        load_operational_collection_record(&conn, name)
1201    }
1202
1203    /// # Errors
1204    /// Returns [`EngineError`] if the collection is missing, the filter contract is invalid,
1205    /// or existing mutation backfill fails.
1206    pub fn update_operational_collection_filters(
1207        &self,
1208        name: &str,
1209        filter_fields_json: &str,
1210    ) -> Result<OperationalCollectionRecord, EngineError> {
1211        if filter_fields_json.is_empty() {
1212            return Err(EngineError::InvalidWrite(
1213                "operational collection filter_fields_json must not be empty".to_owned(),
1214            ));
1215        }
1216        let declared_fields = parse_operational_filter_fields(filter_fields_json)
1217            .map_err(EngineError::InvalidWrite)?;
1218
1219        let mut conn = self.connect()?;
1220        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1221        load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1222            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1223        })?;
1224        tx.execute(
1225            "UPDATE operational_collections SET filter_fields_json = ?2 WHERE name = ?1",
1226            rusqlite::params![name, filter_fields_json],
1227        )?;
1228        tx.execute(
1229            "DELETE FROM operational_filter_values WHERE collection_name = ?1",
1230            [name],
1231        )?;
1232
1233        let mut mutation_stmt = tx.prepare(
1234            "SELECT id, payload_json FROM operational_mutations \
1235             WHERE collection_name = ?1 ORDER BY mutation_order",
1236        )?;
1237        let mutations = mutation_stmt
1238            .query_map([name], |row| {
1239                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1240            })?
1241            .collect::<Result<Vec<_>, _>>()?;
1242        drop(mutation_stmt);
1243
1244        let mut insert_filter_value = tx.prepare_cached(
1245            "INSERT INTO operational_filter_values \
1246             (mutation_id, collection_name, field_name, string_value, integer_value) \
1247             VALUES (?1, ?2, ?3, ?4, ?5)",
1248        )?;
1249        let mut inserted_values = 0usize;
1250        for (mutation_id, payload_json) in &mutations {
1251            for filter_value in
1252                extract_operational_filter_values(&declared_fields, payload_json.as_str())
1253            {
1254                insert_filter_value.execute(rusqlite::params![
1255                    mutation_id,
1256                    name,
1257                    filter_value.field_name,
1258                    filter_value.string_value,
1259                    filter_value.integer_value,
1260                ])?;
1261                inserted_values += 1;
1262            }
1263        }
1264        drop(insert_filter_value);
1265
1266        persist_simple_provenance_event(
1267            &tx,
1268            "operational_collection_filter_fields_updated",
1269            name,
1270            Some(serde_json::json!({
1271                "field_count": declared_fields.len(),
1272                "mutations_backfilled": mutations.len(),
1273                "inserted_filter_values": inserted_values,
1274            })),
1275        )?;
1276        let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1277            EngineError::Bridge("operational collection missing after filter update".to_owned())
1278        })?;
1279        tx.commit()?;
1280        Ok(updated)
1281    }
1282
1283    /// # Errors
1284    /// Returns [`EngineError`] if the collection is missing or the validation contract is invalid.
1285    pub fn update_operational_collection_validation(
1286        &self,
1287        name: &str,
1288        validation_json: &str,
1289    ) -> Result<OperationalCollectionRecord, EngineError> {
1290        parse_operational_validation_contract(validation_json)
1291            .map_err(EngineError::InvalidWrite)?;
1292
1293        let mut conn = self.connect()?;
1294        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1295        load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1296            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1297        })?;
1298        tx.execute(
1299            "UPDATE operational_collections SET validation_json = ?2 WHERE name = ?1",
1300            rusqlite::params![name, validation_json],
1301        )?;
1302        persist_simple_provenance_event(
1303            &tx,
1304            "operational_collection_validation_updated",
1305            name,
1306            Some(serde_json::json!({
1307                "has_validation": !validation_json.is_empty(),
1308            })),
1309        )?;
1310        let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1311            EngineError::Bridge("operational collection missing after validation update".to_owned())
1312        })?;
1313        tx.commit()?;
1314        Ok(updated)
1315    }
1316
1317    /// # Errors
1318    /// Returns [`EngineError`] if the collection is missing, the contract is invalid,
1319    /// or derived index rebuild fails.
1320    pub fn update_operational_collection_secondary_indexes(
1321        &self,
1322        name: &str,
1323        secondary_indexes_json: &str,
1324    ) -> Result<OperationalCollectionRecord, EngineError> {
1325        let mut conn = self.connect()?;
1326        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1327        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1328            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1329        })?;
1330        let indexes = parse_operational_secondary_indexes_json(secondary_indexes_json, record.kind)
1331            .map_err(EngineError::InvalidWrite)?;
1332        tx.execute(
1333            "UPDATE operational_collections SET secondary_indexes_json = ?2 WHERE name = ?1",
1334            rusqlite::params![name, secondary_indexes_json],
1335        )?;
1336        let (mutation_entries_rebuilt, current_entries_rebuilt) =
1337            rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
1338        persist_simple_provenance_event(
1339            &tx,
1340            "operational_collection_secondary_indexes_updated",
1341            name,
1342            Some(serde_json::json!({
1343                "index_count": indexes.len(),
1344                "mutation_entries_rebuilt": mutation_entries_rebuilt,
1345                "current_entries_rebuilt": current_entries_rebuilt,
1346            })),
1347        )?;
1348        let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1349            EngineError::Bridge(
1350                "operational collection missing after secondary index update".to_owned(),
1351            )
1352        })?;
1353        tx.commit()?;
1354        Ok(updated)
1355    }
1356
1357    /// # Errors
1358    /// Returns [`EngineError`] if the collection is missing or rebuild fails.
1359    pub fn rebuild_operational_secondary_indexes(
1360        &self,
1361        name: &str,
1362    ) -> Result<OperationalSecondaryIndexRebuildReport, EngineError> {
1363        let mut conn = self.connect()?;
1364        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1365        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1366            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1367        })?;
1368        let indexes =
1369            parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
1370                .map_err(EngineError::InvalidWrite)?;
1371        let (mutation_entries_rebuilt, current_entries_rebuilt) =
1372            rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
1373        persist_simple_provenance_event(
1374            &tx,
1375            "operational_secondary_indexes_rebuilt",
1376            name,
1377            Some(serde_json::json!({
1378                "index_count": indexes.len(),
1379                "mutation_entries_rebuilt": mutation_entries_rebuilt,
1380                "current_entries_rebuilt": current_entries_rebuilt,
1381            })),
1382        )?;
1383        tx.commit()?;
1384        Ok(OperationalSecondaryIndexRebuildReport {
1385            collection_name: name.to_owned(),
1386            mutation_entries_rebuilt,
1387            current_entries_rebuilt,
1388        })
1389    }
1390
1391    /// # Errors
1392    /// Returns [`EngineError`] if the collection is missing or its validation contract is invalid.
1393    pub fn validate_operational_collection_history(
1394        &self,
1395        name: &str,
1396    ) -> Result<OperationalHistoryValidationReport, EngineError> {
1397        let conn = self.connect()?;
1398        let record = load_operational_collection_record(&conn, name)?.ok_or_else(|| {
1399            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1400        })?;
1401        let Some(contract) = parse_operational_validation_contract(&record.validation_json)
1402            .map_err(EngineError::InvalidWrite)?
1403        else {
1404            return Err(EngineError::InvalidWrite(format!(
1405                "operational collection '{name}' has no validation_json configured"
1406            )));
1407        };
1408
1409        let mut stmt = conn.prepare(
1410            "SELECT id, record_key, op_kind, payload_json FROM operational_mutations \
1411             WHERE collection_name = ?1 ORDER BY mutation_order",
1412        )?;
1413        let rows = stmt
1414            .query_map([name], |row| {
1415                Ok((
1416                    row.get::<_, String>(0)?,
1417                    row.get::<_, String>(1)?,
1418                    row.get::<_, String>(2)?,
1419                    row.get::<_, String>(3)?,
1420                ))
1421            })?
1422            .collect::<Result<Vec<_>, _>>()?;
1423        drop(stmt);
1424
1425        let mut checked_rows = 0usize;
1426        let mut issues = Vec::new();
1427        for (mutation_id, record_key, op_kind, payload_json) in rows {
1428            if op_kind == "delete" {
1429                continue;
1430            }
1431            checked_rows += 1;
1432            if let Err(message) =
1433                validate_operational_payload_against_contract(&contract, payload_json.as_str())
1434            {
1435                issues.push(OperationalHistoryValidationIssue {
1436                    mutation_id,
1437                    record_key,
1438                    op_kind,
1439                    message,
1440                });
1441            }
1442        }
1443
1444        Ok(OperationalHistoryValidationReport {
1445            collection_name: name.to_owned(),
1446            checked_rows,
1447            invalid_row_count: issues.len(),
1448            issues,
1449        })
1450    }
1451
1452    /// # Errors
1453    /// Returns [`EngineError`] if the database query fails.
1454    pub fn disable_operational_collection(
1455        &self,
1456        name: &str,
1457    ) -> Result<OperationalCollectionRecord, EngineError> {
1458        let mut conn = self.connect()?;
1459        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1460        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1461            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1462        })?;
1463        let changed = if record.disabled_at.is_none() {
1464            tx.execute(
1465                "UPDATE operational_collections SET disabled_at = unixepoch() WHERE name = ?1",
1466                [name],
1467            )?;
1468            true
1469        } else {
1470            false
1471        };
1472        let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1473            EngineError::Bridge("operational collection missing after disable".to_owned())
1474        })?;
1475        persist_simple_provenance_event(
1476            &tx,
1477            "operational_collection_disabled",
1478            name,
1479            Some(serde_json::json!({
1480                "disabled_at": record.disabled_at,
1481                "changed": changed,
1482            })),
1483        )?;
1484        tx.commit()?;
1485        Ok(record)
1486    }
1487
1488    /// # Errors
1489    /// Returns [`EngineError`] if the database query fails.
1490    pub fn compact_operational_collection(
1491        &self,
1492        name: &str,
1493        dry_run: bool,
1494    ) -> Result<OperationalCompactionReport, EngineError> {
1495        let mut conn = self.connect()?;
1496        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1497        let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1498            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1499        })?;
1500        validate_append_only_operational_collection(&collection, "compact")?;
1501        let (mutation_ids, before_timestamp) =
1502            operational_compaction_candidates(&tx, &collection.retention_json, name)?;
1503        if dry_run {
1504            drop(tx);
1505            return Ok(OperationalCompactionReport {
1506                collection_name: name.to_owned(),
1507                deleted_mutations: mutation_ids.len(),
1508                dry_run: true,
1509                before_timestamp,
1510            });
1511        }
1512        let mut delete_stmt =
1513            tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
1514        for mutation_id in &mutation_ids {
1515            delete_stmt.execute([mutation_id.as_str()])?;
1516        }
1517        drop(delete_stmt);
1518        persist_simple_provenance_event(
1519            &tx,
1520            "operational_collection_compacted",
1521            name,
1522            Some(serde_json::json!({
1523                "deleted_mutations": mutation_ids.len(),
1524                "before_timestamp": before_timestamp,
1525            })),
1526        )?;
1527        tx.commit()?;
1528        Ok(OperationalCompactionReport {
1529            collection_name: name.to_owned(),
1530            deleted_mutations: mutation_ids.len(),
1531            dry_run: false,
1532            before_timestamp,
1533        })
1534    }
1535
1536    /// # Errors
1537    /// Returns [`EngineError`] if the database query fails.
1538    pub fn purge_operational_collection(
1539        &self,
1540        name: &str,
1541        before_timestamp: i64,
1542    ) -> Result<OperationalPurgeReport, EngineError> {
1543        let mut conn = self.connect()?;
1544        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1545        let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1546            EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1547        })?;
1548        validate_append_only_operational_collection(&collection, "purge")?;
1549        let deleted_mutations = tx.execute(
1550            "DELETE FROM operational_mutations WHERE collection_name = ?1 AND created_at < ?2",
1551            rusqlite::params![name, before_timestamp],
1552        )?;
1553        persist_simple_provenance_event(
1554            &tx,
1555            "operational_collection_purged",
1556            name,
1557            Some(serde_json::json!({
1558                "deleted_mutations": deleted_mutations,
1559                "before_timestamp": before_timestamp,
1560            })),
1561        )?;
1562        tx.commit()?;
1563        Ok(OperationalPurgeReport {
1564            collection_name: name.to_owned(),
1565            deleted_mutations,
1566            before_timestamp,
1567        })
1568    }
1569
1570    /// # Errors
1571    /// Returns [`EngineError`] if collection selection or policy parsing fails.
1572    pub fn plan_operational_retention(
1573        &self,
1574        now_timestamp: i64,
1575        collection_names: Option<&[String]>,
1576        max_collections: Option<usize>,
1577    ) -> Result<OperationalRetentionPlanReport, EngineError> {
1578        let conn = self.connect()?;
1579        let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1580        let mut items = Vec::with_capacity(records.len());
1581        for record in records {
1582            items.push(plan_operational_retention_item(
1583                &conn,
1584                &record,
1585                now_timestamp,
1586            )?);
1587        }
1588        Ok(OperationalRetentionPlanReport {
1589            planned_at: now_timestamp,
1590            collections_examined: items.len(),
1591            items,
1592        })
1593    }
1594
1595    /// # Errors
1596    /// Returns [`EngineError`] if collection selection, policy parsing, or execution fails.
1597    pub fn run_operational_retention(
1598        &self,
1599        now_timestamp: i64,
1600        collection_names: Option<&[String]>,
1601        max_collections: Option<usize>,
1602        dry_run: bool,
1603    ) -> Result<OperationalRetentionRunReport, EngineError> {
1604        let mut conn = self.connect()?;
1605        let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1606        let mut items = Vec::with_capacity(records.len());
1607        let mut collections_acted_on = 0usize;
1608
1609        for record in records {
1610            let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1611            let item = run_operational_retention_item(&tx, &record, now_timestamp, dry_run)?;
1612            if item.deleted_mutations > 0 {
1613                collections_acted_on += 1;
1614            }
1615            if dry_run || item.action_kind == OperationalRetentionActionKind::Noop {
1616                drop(tx);
1617            } else {
1618                tx.commit()?;
1619            }
1620            items.push(item);
1621        }
1622
1623        Ok(OperationalRetentionRunReport {
1624            executed_at: now_timestamp,
1625            collections_examined: items.len(),
1626            collections_acted_on,
1627            dry_run,
1628            items,
1629        })
1630    }
1631
1632    /// # Errors
1633    /// Returns [`EngineError`] if the database query fails.
1634    pub fn trace_operational_collection(
1635        &self,
1636        collection_name: &str,
1637        record_key: Option<&str>,
1638    ) -> Result<OperationalTraceReport, EngineError> {
1639        let conn = self.connect()?;
1640        ensure_operational_collection_registered(&conn, collection_name)?;
1641        let mutations = if let Some(record_key) = record_key {
1642            let mut stmt = conn.prepare(
1643                "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1644                 FROM operational_mutations \
1645                 WHERE collection_name = ?1 AND record_key = ?2 \
1646                 ORDER BY mutation_order",
1647            )?;
1648            stmt.query_map([collection_name, record_key], map_operational_mutation_row)?
1649                .collect::<Result<Vec<_>, _>>()?
1650        } else {
1651            let mut stmt = conn.prepare(
1652                "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1653                 FROM operational_mutations \
1654                 WHERE collection_name = ?1 \
1655                 ORDER BY mutation_order",
1656            )?;
1657            stmt.query_map([collection_name], map_operational_mutation_row)?
1658                .collect::<Result<Vec<_>, _>>()?
1659        };
1660        let current_rows = if let Some(record_key) = record_key {
1661            let mut stmt = conn.prepare(
1662                "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1663                 FROM operational_current \
1664                 WHERE collection_name = ?1 AND record_key = ?2 \
1665                 ORDER BY updated_at, record_key",
1666            )?;
1667            stmt.query_map([collection_name, record_key], map_operational_current_row)?
1668                .collect::<Result<Vec<_>, _>>()?
1669        } else {
1670            let mut stmt = conn.prepare(
1671                "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1672                 FROM operational_current \
1673                 WHERE collection_name = ?1 \
1674                 ORDER BY updated_at, record_key",
1675            )?;
1676            stmt.query_map([collection_name], map_operational_current_row)?
1677                .collect::<Result<Vec<_>, _>>()?
1678        };
1679
1680        Ok(OperationalTraceReport {
1681            collection_name: collection_name.to_owned(),
1682            record_key: record_key.map(str::to_owned),
1683            mutation_count: mutations.len(),
1684            current_count: current_rows.len(),
1685            mutations,
1686            current_rows,
1687        })
1688    }
1689
1690    /// # Errors
1691    /// Returns [`EngineError`] if the collection contract is invalid or the filtered read fails.
1692    pub fn read_operational_collection(
1693        &self,
1694        request: &OperationalReadRequest,
1695    ) -> Result<OperationalReadReport, EngineError> {
1696        if request.collection_name.trim().is_empty() {
1697            return Err(EngineError::InvalidWrite(
1698                "operational read collection_name must not be empty".to_owned(),
1699            ));
1700        }
1701        if request.filters.is_empty() {
1702            return Err(EngineError::InvalidWrite(
1703                "operational read requires at least one filter clause".to_owned(),
1704            ));
1705        }
1706
1707        let conn = self.connect()?;
1708        let record = load_operational_collection_record(&conn, &request.collection_name)?
1709            .ok_or_else(|| {
1710                EngineError::InvalidWrite(format!(
1711                    "operational collection '{}' is not registered",
1712                    request.collection_name
1713                ))
1714            })?;
1715        validate_append_only_operational_collection(&record, "read")?;
1716        let declared_fields = parse_operational_filter_fields(&record.filter_fields_json)
1717            .map_err(EngineError::InvalidWrite)?;
1718        let secondary_indexes =
1719            parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
1720                .map_err(EngineError::InvalidWrite)?;
1721        let applied_limit = operational_read_limit(request.limit)?;
1722        let filters = compile_operational_read_filters(&request.filters, &declared_fields)?;
1723        if let Some(report) = execute_operational_secondary_index_read(
1724            &conn,
1725            &request.collection_name,
1726            &filters,
1727            &secondary_indexes,
1728            applied_limit,
1729        )? {
1730            return Ok(report);
1731        }
1732        execute_operational_filtered_read(&conn, &request.collection_name, &filters, applied_limit)
1733    }
1734
1735    /// # Errors
1736    /// Returns [`EngineError`] if the database query fails or collection validation fails.
1737    pub fn rebuild_operational_current(
1738        &self,
1739        collection_name: Option<&str>,
1740    ) -> Result<OperationalRepairReport, EngineError> {
1741        let mut conn = self.connect()?;
1742        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1743        let collections = if let Some(name) = collection_name {
1744            let maybe_kind: Option<String> = tx
1745                .query_row(
1746                    "SELECT kind FROM operational_collections WHERE name = ?1",
1747                    [name],
1748                    |row| row.get(0),
1749                )
1750                .optional()?;
1751            let Some(kind) = maybe_kind else {
1752                return Err(EngineError::InvalidWrite(format!(
1753                    "operational collection '{name}' is not registered"
1754                )));
1755            };
1756            if kind != OperationalCollectionKind::LatestState.as_str() {
1757                return Err(EngineError::InvalidWrite(format!(
1758                    "operational collection '{name}' is not latest_state"
1759                )));
1760            }
1761            vec![name.to_owned()]
1762        } else {
1763            let mut stmt = tx.prepare(
1764                "SELECT name FROM operational_collections WHERE kind = 'latest_state' ORDER BY name",
1765            )?;
1766            stmt.query_map([], |row| row.get::<_, String>(0))?
1767                .collect::<Result<Vec<_>, _>>()?
1768        };
1769
1770        let rebuilt_rows = rebuild_operational_current_rows(&tx, &collections)?;
1771        for collection in &collections {
1772            let record = load_operational_collection_record(&tx, collection)?.ok_or_else(|| {
1773                EngineError::Bridge(format!(
1774                    "operational collection '{collection}' missing during current rebuild"
1775                ))
1776            })?;
1777            let indexes = parse_operational_secondary_indexes_json(
1778                &record.secondary_indexes_json,
1779                record.kind,
1780            )
1781            .map_err(EngineError::InvalidWrite)?;
1782            if !indexes.is_empty() {
1783                rebuild_operational_secondary_index_entries(
1784                    &tx,
1785                    &record.name,
1786                    record.kind,
1787                    &indexes,
1788                )?;
1789            }
1790        }
1791
1792        persist_simple_provenance_event(
1793            &tx,
1794            "operational_current_rebuilt",
1795            collection_name.unwrap_or("*"),
1796            Some(serde_json::json!({
1797                "collections_rebuilt": collections.len(),
1798                "current_rows_rebuilt": rebuilt_rows,
1799            })),
1800        )?;
1801        tx.commit()?;
1802
1803        Ok(OperationalRepairReport {
1804            collections_rebuilt: collections.len(),
1805            current_rows_rebuilt: rebuilt_rows,
1806        })
1807    }
1808
1809    /// # Errors
1810    /// Returns [`EngineError`] if the database connection fails or the projection rebuild fails.
1811    pub fn rebuild_projections(
1812        &self,
1813        target: ProjectionTarget,
1814    ) -> Result<ProjectionRepairReport, EngineError> {
1815        self.projections.rebuild_projections(target)
1816    }
1817
1818    /// # Errors
1819    /// Returns [`EngineError`] if the database connection fails or the projection rebuild fails.
1820    pub fn rebuild_missing_projections(&self) -> Result<ProjectionRepairReport, EngineError> {
1821        self.projections.rebuild_missing_projections()
1822    }
1823
1824    /// Register (or update) an FTS property projection schema for the given node kind.
1825    ///
1826    /// After registration, any node of this kind will have the declared JSON property
1827    /// paths extracted, concatenated, and indexed in the per-kind `fts_props_<kind>` FTS5 table.
1828    ///
1829    /// # Errors
1830    /// Returns [`EngineError`] if `property_paths` is empty, contains duplicates,
1831    /// or if the database write fails.
1832    pub fn register_fts_property_schema(
1833        &self,
1834        kind: &str,
1835        property_paths: &[String],
1836        separator: Option<&str>,
1837    ) -> Result<FtsPropertySchemaRecord, EngineError> {
1838        let specs: Vec<FtsPropertyPathSpec> = property_paths
1839            .iter()
1840            .map(|p| FtsPropertyPathSpec::scalar(p.clone()))
1841            .collect();
1842        self.register_fts_property_schema_with_entries(
1843            kind,
1844            &specs,
1845            separator,
1846            &[],
1847            RebuildMode::Eager,
1848        )
1849    }
1850
1851    /// Register (or update) an FTS property projection schema with
1852    /// per-path modes and optional exclude paths.
1853    ///
1854    /// Under `RebuildMode::Eager` (the legacy mode), the full rebuild runs
1855    /// inside the registration transaction — same behavior as before Pack 7.
1856    ///
1857    /// Under `RebuildMode::Async` (the 0.4.1 default), the schema row is
1858    /// persisted in a short IMMEDIATE transaction, a rebuild-state row is
1859    /// upserted, and the actual rebuild is handed off to the background
1860    /// `RebuildActor`.  The register call returns in <100ms even for large
1861    /// kinds.
1862    ///
1863    /// # Errors
1864    /// Returns [`EngineError`] if the paths are invalid, the JSON
1865    /// serialization fails, or the (schema-persist / rebuild) transaction fails.
1866    pub fn register_fts_property_schema_with_entries(
1867        &self,
1868        kind: &str,
1869        entries: &[FtsPropertyPathSpec],
1870        separator: Option<&str>,
1871        exclude_paths: &[String],
1872        mode: RebuildMode,
1873    ) -> Result<FtsPropertySchemaRecord, EngineError> {
1874        let paths: Vec<String> = entries.iter().map(|e| e.path.clone()).collect();
1875        validate_fts_property_paths(&paths)?;
1876        for p in exclude_paths {
1877            if !p.starts_with("$.") {
1878                return Err(EngineError::InvalidWrite(format!(
1879                    "exclude_paths entries must start with '$.' but got: {p}"
1880                )));
1881            }
1882        }
1883        for e in entries {
1884            if let Some(w) = e.weight
1885                && !(w > 0.0 && w <= 1000.0)
1886            {
1887                return Err(EngineError::Bridge(format!(
1888                    "weight out of range: {w} (must satisfy 0.0 < weight <= 1000.0)"
1889                )));
1890            }
1891        }
1892        let separator = separator.unwrap_or(" ");
1893        let paths_json = serialize_property_paths_json(entries, exclude_paths)?;
1894
1895        match mode {
1896            RebuildMode::Eager => self.register_fts_property_schema_eager(
1897                kind,
1898                entries,
1899                separator,
1900                exclude_paths,
1901                &paths,
1902                &paths_json,
1903            ),
1904            RebuildMode::Async => self.register_fts_property_schema_async(
1905                kind,
1906                entries,
1907                separator,
1908                &paths,
1909                &paths_json,
1910            ),
1911        }
1912    }
1913
1914    /// Eager path: existing transactional behavior unchanged.
1915    fn register_fts_property_schema_eager(
1916        &self,
1917        kind: &str,
1918        entries: &[FtsPropertyPathSpec],
1919        separator: &str,
1920        exclude_paths: &[String],
1921        paths: &[String],
1922        paths_json: &str,
1923    ) -> Result<FtsPropertySchemaRecord, EngineError> {
1924        let mut conn = self.connect()?;
1925        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1926
1927        // Determine whether the registration introduces a recursive path
1928        // that was not present in the previously-registered schema for
1929        // this kind. If so, we must eagerly rebuild property FTS rows and
1930        // position map for every active node of this kind within the same
1931        // transaction.
1932        let previous_row: Option<(String, String)> = tx
1933            .query_row(
1934                "SELECT property_paths_json, separator FROM fts_property_schemas WHERE kind = ?1",
1935                [kind],
1936                |row| {
1937                    let json: String = row.get(0)?;
1938                    let sep: String = row.get(1)?;
1939                    Ok((json, sep))
1940                },
1941            )
1942            .optional()?;
1943        let had_previous_schema = previous_row.is_some();
1944        let previous_recursive_paths: Vec<String> = previous_row
1945            .map(|(json, sep)| crate::writer::parse_property_schema_json(&json, &sep))
1946            .map_or(Vec::new(), |schema| {
1947                schema
1948                    .paths
1949                    .into_iter()
1950                    .filter(|p| p.mode == crate::writer::PropertyPathMode::Recursive)
1951                    .map(|p| p.path)
1952                    .collect()
1953            });
1954        let new_recursive_paths: Vec<&str> = entries
1955            .iter()
1956            .filter(|e| e.mode == FtsPropertyPathMode::Recursive)
1957            .map(|e| e.path.as_str())
1958            .collect();
1959        let introduces_new_recursive = new_recursive_paths
1960            .iter()
1961            .any(|p| !previous_recursive_paths.iter().any(|prev| prev == p));
1962
1963        tx.execute(
1964            "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
1965             VALUES (?1, ?2, ?3) \
1966             ON CONFLICT(kind) DO UPDATE SET property_paths_json = ?2, separator = ?3",
1967            rusqlite::params![kind, paths_json, separator],
1968        )?;
1969
1970        // Eager transactional rebuild: always fire on any registration or update.
1971        // First-time registrations must populate the per-kind FTS table from any
1972        // existing nodes; updates must clear and re-populate so stale rows don't
1973        // linger. This covers recursive-path additions AND scalar-only
1974        // re-registrations where only the path or separator changed. (P4-P2-1)
1975        let _ = (introduces_new_recursive, had_previous_schema);
1976        let needs_rebuild = true;
1977        if needs_rebuild {
1978            let any_weight = entries.iter().any(|e| e.weight.is_some());
1979            let tok = fathomdb_schema::resolve_fts_tokenizer(&tx, kind)
1980                .map_err(|e| EngineError::Bridge(e.to_string()))?;
1981            if any_weight {
1982                // Per-spec column mode: drop and recreate the table with one column
1983                // per spec. Data population into per-spec columns is future work;
1984                // the table is left empty after recreation.
1985                create_or_replace_fts_kind_table(&tx, kind, entries, &tok)?;
1986                tx.execute(
1987                    "DELETE FROM fts_node_property_positions WHERE kind = ?1",
1988                    [kind],
1989                )?;
1990                // Skip insert_property_fts_rows_for_kind — it uses text_content
1991                // which is not present in the per-spec column layout.
1992            } else {
1993                // Legacy text_content mode: drop and recreate the table to ensure
1994                // the correct single-column layout (handles weighted-to-unweighted
1995                // downgrade where a stale per-spec table might otherwise remain).
1996                create_or_replace_fts_kind_table(&tx, kind, &[], &tok)?;
1997                tx.execute(
1998                    "DELETE FROM fts_node_property_positions WHERE kind = ?1",
1999                    [kind],
2000                )?;
2001                // Scope the rebuild to `kind` only. The multi-kind
2002                // `insert_property_fts_rows` iterates over every registered
2003                // schema and would re-insert rows for siblings that were not
2004                // deleted above, duplicating their FTS entries.
2005                crate::projection::insert_property_fts_rows_for_kind(&tx, kind)?;
2006            }
2007        }
2008
2009        persist_simple_provenance_event(
2010            &tx,
2011            "fts_property_schema_registered",
2012            kind,
2013            Some(serde_json::json!({
2014                "property_paths": paths,
2015                "separator": separator,
2016                "exclude_paths": exclude_paths,
2017                "eager_rebuild": needs_rebuild,
2018            })),
2019        )?;
2020        tx.commit()?;
2021
2022        self.describe_fts_property_schema(kind)?.ok_or_else(|| {
2023            EngineError::Bridge("registered FTS property schema missing after commit".to_owned())
2024        })
2025    }
2026
2027    /// Async path: schema persisted in a short tx; rebuild handed to actor.
2028    fn register_fts_property_schema_async(
2029        &self,
2030        kind: &str,
2031        entries: &[FtsPropertyPathSpec],
2032        separator: &str,
2033        paths: &[String],
2034        paths_json: &str,
2035    ) -> Result<FtsPropertySchemaRecord, EngineError> {
2036        let mut conn = self.connect()?;
2037        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2038
2039        // Detect first-registration vs re-registration.
2040        let had_previous_schema: bool = tx
2041            .query_row(
2042                "SELECT count(*) FROM fts_property_schemas WHERE kind = ?1",
2043                rusqlite::params![kind],
2044                |r| r.get::<_, i64>(0),
2045            )
2046            .unwrap_or(0)
2047            > 0;
2048
2049        // Upsert schema row (fast — just a metadata write).
2050        tx.execute(
2051            "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
2052             VALUES (?1, ?2, ?3) \
2053             ON CONFLICT(kind) DO UPDATE SET property_paths_json = ?2, separator = ?3",
2054            rusqlite::params![kind, paths_json, separator],
2055        )?;
2056
2057        // Always drop and recreate the per-kind FTS table to ensure the schema
2058        // matches the registered spec layout. This handles weighted-to-unweighted
2059        // downgrade where a stale per-spec table would otherwise remain.
2060        let any_weight = entries.iter().any(|e| e.weight.is_some());
2061        let tok = fathomdb_schema::resolve_fts_tokenizer(&tx, kind)
2062            .map_err(|e| EngineError::Bridge(e.to_string()))?;
2063        if any_weight {
2064            create_or_replace_fts_kind_table(&tx, kind, entries, &tok)?;
2065        } else {
2066            // Legacy text_content layout — pass empty specs so
2067            // create_or_replace_fts_kind_table uses the single text_content column.
2068            create_or_replace_fts_kind_table(&tx, kind, &[], &tok)?;
2069        }
2070
2071        // Retrieve the rowid of the schema row as schema_id.
2072        let schema_id: i64 = tx.query_row(
2073            "SELECT rowid FROM fts_property_schemas WHERE kind = ?1",
2074            rusqlite::params![kind],
2075            |r| r.get(0),
2076        )?;
2077
2078        let now_ms = crate::rebuild_actor::now_unix_ms_pub();
2079        let is_first = i64::from(!had_previous_schema);
2080
2081        // Upsert rebuild state row.
2082        tx.execute(
2083            "INSERT INTO fts_property_rebuild_state \
2084             (kind, schema_id, state, rows_done, started_at, is_first_registration) \
2085             VALUES (?1, ?2, 'PENDING', 0, ?3, ?4) \
2086             ON CONFLICT(kind) DO UPDATE SET \
2087                 schema_id = excluded.schema_id, \
2088                 state = 'PENDING', \
2089                 rows_total = NULL, \
2090                 rows_done = 0, \
2091                 started_at = excluded.started_at, \
2092                 last_progress_at = NULL, \
2093                 error_message = NULL, \
2094                 is_first_registration = excluded.is_first_registration",
2095            rusqlite::params![kind, schema_id, now_ms, is_first],
2096        )?;
2097
2098        persist_simple_provenance_event(
2099            &tx,
2100            "fts_property_schema_registered",
2101            kind,
2102            Some(serde_json::json!({
2103                "property_paths": paths,
2104                "separator": separator,
2105                "mode": "async",
2106            })),
2107        )?;
2108        tx.commit()?;
2109
2110        // Enqueue the rebuild request if the actor is available.
2111        // try_send is non-blocking: if the channel is full (capacity 64), the
2112        // request is dropped. The state row stays PENDING and the caller can
2113        // observe this via get_property_fts_rebuild_state. No automatic retry
2114        // in 0.4.1 — caller must re-invoke register to re-enqueue.
2115        if let Some(sender) = &self.rebuild_sender
2116            && sender
2117                .try_send(RebuildRequest {
2118                    kind: kind.to_owned(),
2119                    schema_id,
2120                })
2121                .is_err()
2122        {
2123            trace_warn!(
2124                kind = %kind,
2125                "rebuild channel full; rebuild request dropped — state remains PENDING"
2126            );
2127        }
2128
2129        self.describe_fts_property_schema(kind)?.ok_or_else(|| {
2130            EngineError::Bridge("registered FTS property schema missing after commit".to_owned())
2131        })
2132    }
2133
2134    /// Return the rebuild state row for a kind, if one exists.
2135    ///
2136    /// # Errors
2137    /// Returns [`EngineError`] if the database query fails.
2138    pub fn get_property_fts_rebuild_state(
2139        &self,
2140        kind: &str,
2141    ) -> Result<Option<RebuildStateRow>, EngineError> {
2142        let conn = self.connect()?;
2143        let row = conn
2144            .query_row(
2145                "SELECT kind, schema_id, state, rows_total, rows_done, \
2146                 started_at, is_first_registration, error_message \
2147                 FROM fts_property_rebuild_state WHERE kind = ?1",
2148                rusqlite::params![kind],
2149                |r| {
2150                    Ok(RebuildStateRow {
2151                        kind: r.get(0)?,
2152                        schema_id: r.get(1)?,
2153                        state: r.get(2)?,
2154                        rows_total: r.get(3)?,
2155                        rows_done: r.get(4)?,
2156                        started_at: r.get(5)?,
2157                        is_first_registration: r.get::<_, i64>(6)? != 0,
2158                        error_message: r.get(7)?,
2159                    })
2160                },
2161            )
2162            .optional()?;
2163        Ok(row)
2164    }
2165
2166    /// Return the count of rows in `fts_property_rebuild_staging` for a kind.
2167    /// Used by tests to verify the staging table was populated.
2168    ///
2169    /// # Errors
2170    /// Returns [`EngineError`] if the database query fails.
2171    pub fn count_staging_rows(&self, kind: &str) -> Result<i64, EngineError> {
2172        let conn = self.connect()?;
2173        let count: i64 = conn.query_row(
2174            "SELECT count(*) FROM fts_property_rebuild_staging WHERE kind = ?1",
2175            rusqlite::params![kind],
2176            |r| r.get(0),
2177        )?;
2178        Ok(count)
2179    }
2180
2181    /// Return whether a specific node is present in `fts_property_rebuild_staging`.
2182    /// Used by tests to verify the double-write path.
2183    ///
2184    /// # Errors
2185    /// Returns [`EngineError`] if the database query fails.
2186    pub fn staging_row_exists(
2187        &self,
2188        kind: &str,
2189        node_logical_id: &str,
2190    ) -> Result<bool, EngineError> {
2191        let conn = self.connect()?;
2192        let count: i64 = conn.query_row(
2193            "SELECT count(*) FROM fts_property_rebuild_staging WHERE kind = ?1 AND node_logical_id = ?2",
2194            rusqlite::params![kind, node_logical_id],
2195            |r| r.get(0),
2196        )?;
2197        Ok(count > 0)
2198    }
2199
2200    /// Return the FTS property schema for a single node kind, if registered.
2201    ///
2202    /// # Errors
2203    /// Returns [`EngineError`] if the database query fails.
2204    pub fn describe_fts_property_schema(
2205        &self,
2206        kind: &str,
2207    ) -> Result<Option<FtsPropertySchemaRecord>, EngineError> {
2208        let conn = self.connect()?;
2209        load_fts_property_schema_record(&conn, kind)
2210    }
2211
2212    /// Return all registered FTS property schemas.
2213    ///
2214    /// # Errors
2215    /// Returns [`EngineError`] if the database query fails.
2216    pub fn list_fts_property_schemas(&self) -> Result<Vec<FtsPropertySchemaRecord>, EngineError> {
2217        let conn = self.connect()?;
2218        let mut stmt = conn.prepare(
2219            "SELECT kind, property_paths_json, separator, format_version \
2220             FROM fts_property_schemas ORDER BY kind",
2221        )?;
2222        let records = stmt
2223            .query_map([], |row| {
2224                let kind: String = row.get(0)?;
2225                let paths_json: String = row.get(1)?;
2226                let separator: String = row.get(2)?;
2227                let format_version: i64 = row.get(3)?;
2228                Ok(build_fts_property_schema_record(
2229                    kind,
2230                    &paths_json,
2231                    separator,
2232                    format_version,
2233                ))
2234            })?
2235            .collect::<Result<Vec<_>, _>>()?;
2236        Ok(records)
2237    }
2238
2239    /// Remove the FTS property schema for a node kind.
2240    ///
2241    /// This does **not** delete existing FTS rows for this kind;
2242    /// call `rebuild_projections(Fts)` to clean up stale rows.
2243    ///
2244    /// # Errors
2245    /// Returns [`EngineError`] if the kind is not registered or the delete fails.
2246    pub fn remove_fts_property_schema(&self, kind: &str) -> Result<(), EngineError> {
2247        let mut conn = self.connect()?;
2248        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2249        let deleted = tx.execute("DELETE FROM fts_property_schemas WHERE kind = ?1", [kind])?;
2250        if deleted == 0 {
2251            return Err(EngineError::InvalidWrite(format!(
2252                "FTS property schema for kind '{kind}' is not registered"
2253            )));
2254        }
2255        // Delete all FTS rows from the per-kind table (if it exists).
2256        let table = fathomdb_schema::fts_kind_table_name(kind);
2257        let table_exists: bool = tx
2258            .query_row(
2259                "SELECT count(*) FROM sqlite_master WHERE type='table' AND name = ?1 \
2260                 AND sql LIKE 'CREATE VIRTUAL TABLE%'",
2261                rusqlite::params![table],
2262                |r| r.get::<_, i64>(0),
2263            )
2264            .unwrap_or(0)
2265            > 0;
2266        if table_exists {
2267            tx.execute_batch(&format!("DELETE FROM {table}"))?;
2268        }
2269        persist_simple_provenance_event(&tx, "fts_property_schema_removed", kind, None)?;
2270        tx.commit()?;
2271        Ok(())
2272    }
2273
2274    /// Recreate enabled vector profiles from persisted `vector_profiles` metadata.
2275    ///
2276    /// # Errors
2277    /// Returns [`EngineError`] if the database connection fails, reading metadata fails,
2278    /// or sqlite-vec support is unavailable while enabled profiles are present.
2279    pub fn restore_vector_profiles(&self) -> Result<ProjectionRepairReport, EngineError> {
2280        let conn = self.connect()?;
2281        let profiles: Vec<(String, String, i64)> = {
2282            let mut stmt = conn.prepare(
2283                "SELECT profile, table_name, dimension \
2284                 FROM vector_profiles WHERE enabled = 1 ORDER BY profile",
2285            )?;
2286            stmt.query_map([], |row| {
2287                Ok((
2288                    row.get::<_, String>(0)?,
2289                    row.get::<_, String>(1)?,
2290                    row.get::<_, i64>(2)?,
2291                ))
2292            })?
2293            .collect::<Result<Vec<_>, _>>()?
2294        };
2295
2296        for (profile, table_name, dimension) in &profiles {
2297            let dimension = usize::try_from(*dimension).map_err(|_| {
2298                EngineError::Bridge(format!("invalid vector profile dimension: {dimension}"))
2299            })?;
2300            self.schema_manager
2301                .ensure_vector_profile(&conn, profile, table_name, dimension)?;
2302        }
2303
2304        Ok(ProjectionRepairReport {
2305            targets: vec![ProjectionTarget::Vec],
2306            rebuilt_rows: profiles.len(),
2307            notes: vec![],
2308        })
2309    }
2310
2311    /// Rebuild vector embeddings using an application-supplied regeneration
2312    /// contract and generator command.
2313    ///
2314    /// The config is persisted in `vector_embedding_contracts` so the metadata
2315    /// required for recovery survives future repair runs.
2316    ///
2317    /// Vector identity is stamped from [`QueryEmbedder::identity`] — the
2318    /// caller supplies the embedder and cannot override its identity. This
2319    /// makes drift between the read-path and write-path identity stories
2320    /// structurally impossible.
2321    ///
2322    /// # Errors
2323    /// Returns [`EngineError`] if the database connection fails, the config is
2324    /// invalid, the embedder fails, or the regenerated embeddings are
2325    /// malformed.
2326    #[allow(clippy::too_many_lines)]
2327    pub fn regenerate_vector_embeddings(
2328        &self,
2329        embedder: &dyn QueryEmbedder,
2330        config: &VectorRegenerationConfig,
2331    ) -> Result<VectorRegenerationReport, EngineError> {
2332        let conn = self.connect()?;
2333        let identity = embedder.identity();
2334        let config = validate_vector_regeneration_config(&conn, config, &identity)
2335            .map_err(|failure| failure.to_engine_error())?;
2336        let chunks = collect_regeneration_chunks(&conn)?;
2337        let payload = build_regeneration_input(&config, &identity, chunks.clone());
2338        let snapshot_hash = compute_snapshot_hash(&payload)?;
2339        let audit_metadata = VectorRegenerationAuditMetadata {
2340            profile: config.profile.clone(),
2341            model_identity: identity.model_identity.clone(),
2342            model_version: identity.model_version.clone(),
2343            chunk_count: chunks.len(),
2344            snapshot_hash: snapshot_hash.clone(),
2345            failure_class: None,
2346        };
2347        persist_vector_regeneration_event(
2348            &conn,
2349            "vector_regeneration_requested",
2350            &config.profile,
2351            &audit_metadata,
2352        )?;
2353        let notes = vec!["vector embeddings regenerated via configured embedder".to_owned()];
2354
2355        let mut embedding_map: std::collections::HashMap<String, Vec<u8>> =
2356            std::collections::HashMap::with_capacity(chunks.len());
2357        for chunk in &chunks {
2358            let vector = match embedder.embed_query(&chunk.text_content) {
2359                Ok(vector) => vector,
2360                Err(error) => {
2361                    let failure = VectorRegenerationFailure::new(
2362                        VectorRegenerationFailureClass::EmbedderFailure,
2363                        format!("embedder failed for chunk '{}': {error}", chunk.chunk_id),
2364                    );
2365                    self.persist_vector_regeneration_failure_best_effort(
2366                        &config.profile,
2367                        &audit_metadata,
2368                        &failure,
2369                    );
2370                    return Err(failure.to_engine_error());
2371                }
2372            };
2373            if vector.len() != identity.dimension {
2374                let failure = VectorRegenerationFailure::new(
2375                    VectorRegenerationFailureClass::InvalidEmbedderOutput,
2376                    format!(
2377                        "embedder produced {} values for chunk '{}', expected {}",
2378                        vector.len(),
2379                        chunk.chunk_id,
2380                        identity.dimension
2381                    ),
2382                );
2383                self.persist_vector_regeneration_failure_best_effort(
2384                    &config.profile,
2385                    &audit_metadata,
2386                    &failure,
2387                );
2388                return Err(failure.to_engine_error());
2389            }
2390            if vector.iter().any(|value| !value.is_finite()) {
2391                let failure = VectorRegenerationFailure::new(
2392                    VectorRegenerationFailureClass::InvalidEmbedderOutput,
2393                    format!(
2394                        "embedder returned non-finite values for chunk '{}'",
2395                        chunk.chunk_id
2396                    ),
2397                );
2398                self.persist_vector_regeneration_failure_best_effort(
2399                    &config.profile,
2400                    &audit_metadata,
2401                    &failure,
2402                );
2403                return Err(failure.to_engine_error());
2404            }
2405            let bytes: Vec<u8> = vector
2406                .iter()
2407                .flat_map(|value| value.to_le_bytes())
2408                .collect();
2409            embedding_map.insert(chunk.chunk_id.clone(), bytes);
2410        }
2411
2412        let mut conn = conn;
2413        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2414        match self.schema_manager.ensure_vector_profile(
2415            &tx,
2416            &config.profile,
2417            &config.table_name,
2418            identity.dimension,
2419        ) {
2420            Ok(()) => {}
2421            Err(SchemaError::MissingCapability(message)) => {
2422                let failure = VectorRegenerationFailure::new(
2423                    VectorRegenerationFailureClass::UnsupportedVecCapability,
2424                    message,
2425                );
2426                drop(tx);
2427                self.persist_vector_regeneration_failure_best_effort(
2428                    &config.profile,
2429                    &audit_metadata,
2430                    &failure,
2431                );
2432                return Err(failure.to_engine_error());
2433            }
2434            Err(error) => return Err(EngineError::Schema(error)),
2435        }
2436        let apply_chunks = collect_regeneration_chunks(&tx)?;
2437        let apply_payload = build_regeneration_input(&config, &identity, apply_chunks.clone());
2438        let apply_hash = compute_snapshot_hash(&apply_payload)?;
2439        if apply_hash != snapshot_hash {
2440            let failure = VectorRegenerationFailure::new(
2441                VectorRegenerationFailureClass::SnapshotDrift,
2442                "chunk snapshot changed during generation; retry".to_owned(),
2443            );
2444            drop(tx);
2445            self.persist_vector_regeneration_failure_best_effort(
2446                &config.profile,
2447                &audit_metadata,
2448                &failure,
2449            );
2450            return Err(failure.to_engine_error());
2451        }
2452        persist_vector_contract(&tx, &config, &identity, &snapshot_hash)?;
2453        tx.execute("DELETE FROM vec_nodes_active", [])?;
2454        let mut stmt = tx
2455            .prepare_cached("INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES (?1, ?2)")?;
2456        let mut regenerated_rows = 0usize;
2457        for chunk in &apply_chunks {
2458            let Some(embedding) = embedding_map.remove(&chunk.chunk_id) else {
2459                drop(stmt);
2460                drop(tx);
2461                let failure = VectorRegenerationFailure::new(
2462                    VectorRegenerationFailureClass::InvalidEmbedderOutput,
2463                    format!(
2464                        "embedder did not produce a vector for chunk '{}'",
2465                        chunk.chunk_id
2466                    ),
2467                );
2468                self.persist_vector_regeneration_failure_best_effort(
2469                    &config.profile,
2470                    &audit_metadata,
2471                    &failure,
2472                );
2473                return Err(failure.to_engine_error());
2474            };
2475            stmt.execute(rusqlite::params![chunk.chunk_id.as_str(), embedding])?;
2476            regenerated_rows += 1;
2477        }
2478        drop(stmt);
2479        persist_vector_regeneration_event(
2480            &tx,
2481            "vector_regeneration_apply",
2482            &config.profile,
2483            &audit_metadata,
2484        )?;
2485        tx.commit()?;
2486
2487        Ok(VectorRegenerationReport {
2488            profile: config.profile.clone(),
2489            table_name: config.table_name.clone(),
2490            dimension: identity.dimension,
2491            total_chunks: chunks.len(),
2492            regenerated_rows,
2493            contract_persisted: true,
2494            notes,
2495        })
2496    }
2497
2498    fn persist_vector_regeneration_failure_best_effort(
2499        &self,
2500        profile: &str,
2501        metadata: &VectorRegenerationAuditMetadata,
2502        failure: &VectorRegenerationFailure,
2503    ) {
2504        let Ok(conn) = self.connect() else {
2505            return;
2506        };
2507        let failure_metadata = VectorRegenerationAuditMetadata {
2508            profile: metadata.profile.clone(),
2509            model_identity: metadata.model_identity.clone(),
2510            model_version: metadata.model_version.clone(),
2511            chunk_count: metadata.chunk_count,
2512            snapshot_hash: metadata.snapshot_hash.clone(),
2513            failure_class: Some(failure.failure_class_label().to_owned()),
2514        };
2515        let _ = persist_vector_regeneration_event(
2516            &conn,
2517            "vector_regeneration_failed",
2518            profile,
2519            &failure_metadata,
2520        );
2521    }
2522
2523    /// # Errors
2524    /// Returns [`EngineError`] if the database connection fails or any SQL query fails.
2525    pub fn trace_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
2526        let conn = self.connect()?;
2527
2528        let node_logical_ids = collect_strings(
2529            &conn,
2530            "SELECT logical_id FROM nodes WHERE source_ref = ?1 ORDER BY created_at",
2531            source_ref,
2532        )?;
2533        let action_ids = collect_strings(
2534            &conn,
2535            "SELECT id FROM actions WHERE source_ref = ?1 ORDER BY created_at",
2536            source_ref,
2537        )?;
2538        let operational_mutation_ids = collect_strings(
2539            &conn,
2540            "SELECT id FROM operational_mutations WHERE source_ref = ?1 ORDER BY mutation_order",
2541            source_ref,
2542        )?;
2543
2544        Ok(TraceReport {
2545            source_ref: source_ref.to_owned(),
2546            node_rows: count_source_ref(&conn, "nodes", source_ref)?,
2547            edge_rows: count_source_ref(&conn, "edges", source_ref)?,
2548            action_rows: count_source_ref(&conn, "actions", source_ref)?,
2549            operational_mutation_rows: count_source_ref(
2550                &conn,
2551                "operational_mutations",
2552                source_ref,
2553            )?,
2554            node_logical_ids,
2555            action_ids,
2556            operational_mutation_ids,
2557        })
2558    }
2559
2560    /// # Errors
2561    /// Returns [`EngineError`] if the database connection fails, the transaction cannot be
2562    /// started, or lifecycle restoration prerequisites are missing.
2563    #[allow(clippy::too_many_lines)]
2564    pub fn restore_logical_id(
2565        &self,
2566        logical_id: &str,
2567    ) -> Result<LogicalRestoreReport, EngineError> {
2568        let mut conn = self.connect()?;
2569        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2570
2571        let active_count: i64 = tx.query_row(
2572            "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
2573            [logical_id],
2574            |row| row.get(0),
2575        )?;
2576        if active_count > 0 {
2577            return Ok(LogicalRestoreReport {
2578                logical_id: logical_id.to_owned(),
2579                was_noop: true,
2580                restored_node_rows: 0,
2581                restored_edge_rows: 0,
2582                restored_chunk_rows: 0,
2583                restored_fts_rows: 0,
2584                restored_property_fts_rows: 0,
2585                restored_vec_rows: 0,
2586                skipped_edges: Vec::new(),
2587                notes: vec!["logical_id already active".to_owned()],
2588            });
2589        }
2590
2591        let restored_node: Option<(String, String)> = tx
2592            .query_row(
2593                "SELECT row_id, kind FROM nodes \
2594                 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
2595                 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
2596                [logical_id],
2597                |row| Ok((row.get(0)?, row.get(1)?)),
2598            )
2599            .optional()?;
2600        let (restored_node_row_id, restored_kind) = restored_node.ok_or_else(|| {
2601            EngineError::InvalidWrite(format!("logical_id '{logical_id}' is not retired"))
2602        })?;
2603
2604        tx.execute(
2605            "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
2606            [restored_node_row_id.as_str()],
2607        )?;
2608
2609        let retire_scope: Option<(i64, Option<String>, i64)> = tx
2610            .query_row(
2611                "SELECT rowid, source_ref, created_at FROM provenance_events \
2612                 WHERE event_type = 'node_retire' AND subject = ?1 \
2613                 ORDER BY created_at DESC, rowid DESC LIMIT 1",
2614                [logical_id],
2615                |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
2616            )
2617            .optional()?;
2618        let (restored_edge_rows, skipped_edges) = if let Some((
2619            retire_event_rowid,
2620            retire_source_ref,
2621            retire_created_at,
2622        )) = retire_scope
2623        {
2624            restore_validated_edges(
2625                &tx,
2626                logical_id,
2627                retire_source_ref.as_deref(),
2628                retire_created_at,
2629                retire_event_rowid,
2630            )?
2631        } else {
2632            (0, Vec::new())
2633        };
2634
2635        let restored_chunk_rows: usize = tx
2636            .query_row(
2637                "SELECT count(*) FROM chunks WHERE node_logical_id = ?1",
2638                [logical_id],
2639                |row| row.get::<_, i64>(0),
2640            )
2641            .map(i64_to_usize)?;
2642        tx.execute(
2643            "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
2644            [logical_id],
2645        )?;
2646        let restored_fts_rows = tx.execute(
2647            "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
2648             SELECT id, node_logical_id, ?2, text_content \
2649             FROM chunks WHERE node_logical_id = ?1",
2650            rusqlite::params![logical_id, restored_kind],
2651        )?;
2652        let restored_vec_rows = count_vec_rows_for_logical_id(&tx, logical_id)?;
2653
2654        // Rebuild property FTS for the restored node.
2655        // Delete from the per-kind FTS table for this node (if the table exists).
2656        let table = fathomdb_schema::fts_kind_table_name(&restored_kind);
2657        let fts_table_exists: bool = tx
2658            .query_row(
2659                "SELECT count(*) FROM sqlite_master WHERE type='table' AND name = ?1 \
2660                 AND sql LIKE 'CREATE VIRTUAL TABLE%'",
2661                rusqlite::params![table],
2662                |r| r.get::<_, i64>(0),
2663            )
2664            .unwrap_or(0)
2665            > 0;
2666        if fts_table_exists {
2667            tx.execute(
2668                &format!("DELETE FROM {table} WHERE node_logical_id = ?1"),
2669                [logical_id],
2670            )?;
2671        }
2672        let restored_property_fts_rows =
2673            rebuild_single_node_property_fts(&tx, logical_id, &restored_kind)?;
2674
2675        persist_simple_provenance_event(
2676            &tx,
2677            "restore_logical_id",
2678            logical_id,
2679            Some(serde_json::json!({
2680                "restored_node_rows": 1,
2681                "restored_edge_rows": restored_edge_rows,
2682                "restored_chunk_rows": restored_chunk_rows,
2683                "restored_fts_rows": restored_fts_rows,
2684                "restored_property_fts_rows": restored_property_fts_rows,
2685                "restored_vec_rows": restored_vec_rows,
2686            })),
2687        )?;
2688        tx.commit()?;
2689
2690        Ok(LogicalRestoreReport {
2691            logical_id: logical_id.to_owned(),
2692            was_noop: false,
2693            restored_node_rows: 1,
2694            restored_edge_rows,
2695            restored_chunk_rows,
2696            restored_fts_rows,
2697            restored_property_fts_rows,
2698            restored_vec_rows,
2699            skipped_edges,
2700            notes: Vec::new(),
2701        })
2702    }
2703
2704    /// # Errors
2705    /// Returns [`EngineError`] if the database connection fails, the transaction cannot be
2706    /// started, or the purge mutation fails.
2707    pub fn purge_logical_id(&self, logical_id: &str) -> Result<LogicalPurgeReport, EngineError> {
2708        let mut conn = self.connect()?;
2709        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2710
2711        let active_count: i64 = tx.query_row(
2712            "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
2713            [logical_id],
2714            |row| row.get(0),
2715        )?;
2716        if active_count > 0 {
2717            return Ok(LogicalPurgeReport {
2718                logical_id: logical_id.to_owned(),
2719                was_noop: true,
2720                deleted_node_rows: 0,
2721                deleted_edge_rows: 0,
2722                deleted_chunk_rows: 0,
2723                deleted_fts_rows: 0,
2724                deleted_vec_rows: 0,
2725                notes: vec!["logical_id is active; purge skipped".to_owned()],
2726            });
2727        }
2728
2729        let node_rows: i64 = tx.query_row(
2730            "SELECT count(*) FROM nodes WHERE logical_id = ?1",
2731            [logical_id],
2732            |row| row.get(0),
2733        )?;
2734        if node_rows == 0 {
2735            return Err(EngineError::InvalidWrite(format!(
2736                "logical_id '{logical_id}' does not exist"
2737            )));
2738        }
2739
2740        let deleted_vec_rows = delete_vec_rows_for_logical_id(&tx, logical_id)?;
2741        let deleted_fts_rows = tx.execute(
2742            "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
2743            [logical_id],
2744        )?;
2745        let deleted_edge_rows = tx.execute(
2746            "DELETE FROM edges WHERE source_logical_id = ?1 OR target_logical_id = ?1",
2747            [logical_id],
2748        )?;
2749        let deleted_chunk_rows = tx.execute(
2750            "DELETE FROM chunks WHERE node_logical_id = ?1",
2751            [logical_id],
2752        )?;
2753        let deleted_node_rows =
2754            tx.execute("DELETE FROM nodes WHERE logical_id = ?1", [logical_id])?;
2755        tx.execute(
2756            "DELETE FROM node_access_metadata WHERE logical_id = ?1",
2757            [logical_id],
2758        )?;
2759
2760        persist_simple_provenance_event(
2761            &tx,
2762            "purge_logical_id",
2763            logical_id,
2764            Some(serde_json::json!({
2765                "deleted_node_rows": deleted_node_rows,
2766                "deleted_edge_rows": deleted_edge_rows,
2767                "deleted_chunk_rows": deleted_chunk_rows,
2768                "deleted_fts_rows": deleted_fts_rows,
2769                "deleted_vec_rows": deleted_vec_rows,
2770            })),
2771        )?;
2772        tx.commit()?;
2773
2774        Ok(LogicalPurgeReport {
2775            logical_id: logical_id.to_owned(),
2776            was_noop: false,
2777            deleted_node_rows,
2778            deleted_edge_rows,
2779            deleted_chunk_rows,
2780            deleted_fts_rows,
2781            deleted_vec_rows,
2782            notes: Vec::new(),
2783        })
2784    }
2785
2786    /// Purge provenance events older than `before_timestamp`.
2787    ///
2788    /// By default, `excise` and `purge_logical_id` event types are preserved so that
2789    /// data-deletion audit trails survive. Pass an explicit
2790    /// `preserve_event_types` list to override this default.
2791    ///
2792    /// # Errors
2793    /// Returns [`EngineError`] if the database connection fails, the transaction
2794    /// cannot be started, or any SQL statement fails.
2795    pub fn purge_provenance_events(
2796        &self,
2797        before_timestamp: i64,
2798        options: &ProvenancePurgeOptions,
2799    ) -> Result<ProvenancePurgeReport, EngineError> {
2800        let mut conn = self.connect()?;
2801        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2802
2803        let preserved_types: Vec<&str> = if options.preserve_event_types.is_empty() {
2804            vec!["excise", "purge_logical_id"]
2805        } else {
2806            options
2807                .preserve_event_types
2808                .iter()
2809                .map(String::as_str)
2810                .collect()
2811        };
2812
2813        // Build the NOT IN clause dynamically based on preserved types.
2814        let placeholders: String = (0..preserved_types.len())
2815            .map(|i| format!("?{}", i + 2))
2816            .collect::<Vec<_>>()
2817            .join(", ");
2818        let count_query = format!(
2819            "SELECT count(*) FROM provenance_events \
2820             WHERE created_at < ?1 AND event_type NOT IN ({placeholders})"
2821        );
2822        let delete_query = format!(
2823            "DELETE FROM provenance_events WHERE rowid IN (\
2824             SELECT rowid FROM provenance_events \
2825             WHERE created_at < ?1 AND event_type NOT IN ({placeholders}) \
2826             LIMIT 10000)"
2827        );
2828
2829        let bind_params = |stmt: &mut rusqlite::Statement<'_>| -> Result<(), rusqlite::Error> {
2830            stmt.raw_bind_parameter(1, before_timestamp)?;
2831            for (i, event_type) in preserved_types.iter().enumerate() {
2832                stmt.raw_bind_parameter(i + 2, *event_type)?;
2833            }
2834            Ok(())
2835        };
2836
2837        let events_deleted = if options.dry_run {
2838            let mut stmt = tx.prepare(&count_query)?;
2839            bind_params(&mut stmt)?;
2840            stmt.raw_query()
2841                .next()?
2842                .map_or(0, |row| row.get::<_, u64>(0).unwrap_or(0))
2843        } else {
2844            let mut total_deleted: u64 = 0;
2845            loop {
2846                let mut stmt = tx.prepare(&delete_query)?;
2847                bind_params(&mut stmt)?;
2848                let deleted = stmt.raw_execute()?;
2849                if deleted == 0 {
2850                    break;
2851                }
2852                total_deleted += deleted as u64;
2853            }
2854            total_deleted
2855        };
2856
2857        let total_after: u64 =
2858            tx.query_row("SELECT count(*) FROM provenance_events", [], |row| {
2859                row.get(0)
2860            })?;
2861
2862        let oldest_remaining: Option<i64> = tx
2863            .query_row("SELECT MIN(created_at) FROM provenance_events", [], |row| {
2864                row.get(0)
2865            })
2866            .optional()?
2867            .flatten();
2868
2869        if !options.dry_run {
2870            tx.commit()?;
2871        }
2872
2873        // In dry_run mode nothing was deleted, so total_after includes the
2874        // would-be-deleted rows; subtract to get the preserved count.
2875        let events_preserved = if options.dry_run {
2876            total_after - events_deleted
2877        } else {
2878            total_after
2879        };
2880
2881        Ok(ProvenancePurgeReport {
2882            events_deleted,
2883            events_preserved,
2884            oldest_remaining,
2885        })
2886    }
2887
2888    /// # Errors
2889    /// Returns [`EngineError`] if the database connection fails, the transaction cannot be
2890    /// started, or any SQL statement fails.
2891    #[allow(clippy::too_many_lines)]
2892    pub fn excise_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
2893        let mut conn = self.connect()?;
2894
2895        let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2896        let affected_operational_collections = collect_strings_tx(
2897            &tx,
2898            "SELECT DISTINCT m.collection_name \
2899             FROM operational_mutations m \
2900             JOIN operational_collections c ON c.name = m.collection_name \
2901             WHERE m.source_ref = ?1 AND c.kind = 'latest_state' \
2902             ORDER BY m.collection_name",
2903            source_ref,
2904        )?;
2905
2906        // Collect (row_id, logical_id) for active rows that will be excised.
2907        let pairs: Vec<(String, String)> = {
2908            let mut stmt = tx.prepare(
2909                "SELECT row_id, logical_id FROM nodes \
2910                 WHERE source_ref = ?1 AND superseded_at IS NULL",
2911            )?;
2912            stmt.query_map([source_ref], |row| {
2913                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
2914            })?
2915            .collect::<Result<Vec<_>, _>>()?
2916        };
2917        let affected_logical_ids: Vec<String> = pairs
2918            .iter()
2919            .map(|(_, logical_id)| logical_id.clone())
2920            .collect();
2921
2922        // Supersede bad rows in all tables.
2923        tx.execute(
2924            "UPDATE nodes SET superseded_at = unixepoch() \
2925             WHERE source_ref = ?1 AND superseded_at IS NULL",
2926            [source_ref],
2927        )?;
2928        tx.execute(
2929            "UPDATE edges SET superseded_at = unixepoch() \
2930             WHERE source_ref = ?1 AND superseded_at IS NULL",
2931            [source_ref],
2932        )?;
2933        tx.execute(
2934            "UPDATE actions SET superseded_at = unixepoch() \
2935             WHERE source_ref = ?1 AND superseded_at IS NULL",
2936            [source_ref],
2937        )?;
2938        clear_operational_current_rows(&tx, &affected_operational_collections)?;
2939        tx.execute(
2940            "DELETE FROM operational_mutations WHERE source_ref = ?1",
2941            [source_ref],
2942        )?;
2943        for logical_id in &affected_logical_ids {
2944            delete_vec_rows_for_logical_id(&tx, logical_id)?;
2945            tx.execute(
2946                "DELETE FROM chunks WHERE node_logical_id = ?1",
2947                [logical_id.as_str()],
2948            )?;
2949        }
2950
2951        // Restore the most recent prior version for each affected logical_id.
2952        for (excised_row_id, logical_id) in &pairs {
2953            let prior: Option<String> = tx
2954                .query_row(
2955                    "SELECT row_id FROM nodes \
2956                     WHERE logical_id = ?1 AND row_id != ?2 \
2957                     ORDER BY created_at DESC LIMIT 1",
2958                    [logical_id.as_str(), excised_row_id.as_str()],
2959                    |row| row.get(0),
2960                )
2961                .optional()?;
2962            if let Some(prior_id) = prior {
2963                tx.execute(
2964                    "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
2965                    [prior_id.as_str()],
2966                )?;
2967            }
2968        }
2969
2970        for logical_id in &affected_logical_ids {
2971            let has_active_node = tx
2972                .query_row(
2973                    "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
2974                    [logical_id.as_str()],
2975                    |row| row.get::<_, i64>(0),
2976                )
2977                .optional()?
2978                .is_some();
2979            if !has_active_node {
2980                tx.execute(
2981                    "DELETE FROM node_access_metadata WHERE logical_id = ?1",
2982                    [logical_id.as_str()],
2983                )?;
2984            }
2985        }
2986
2987        rebuild_operational_current_rows(&tx, &affected_operational_collections)?;
2988
2989        // Rebuild FTS atomically within the same transaction so readers never
2990        // observe a post-excise node state with a stale FTS index.
2991        tx.execute("DELETE FROM fts_nodes", [])?;
2992        tx.execute(
2993            r"
2994            INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content)
2995            SELECT c.id, n.logical_id, n.kind, c.text_content
2996            FROM chunks c
2997            JOIN nodes n
2998              ON n.logical_id = c.node_logical_id
2999             AND n.superseded_at IS NULL
3000            ",
3001            [],
3002        )?;
3003
3004        // Rebuild property FTS in the same transaction.
3005        rebuild_property_fts_in_tx(&tx)?;
3006
3007        // Record the audit event inside the same transaction so the excision and its
3008        // audit record are committed atomically — no window where the excision is
3009        // durable but unaudited.
3010        tx.execute(
3011            "INSERT INTO provenance_events (id, event_type, subject, source_ref) \
3012             VALUES (?1, 'excise_source', ?2, ?2)",
3013            rusqlite::params![new_id(), source_ref],
3014        )?;
3015
3016        tx.commit()?;
3017
3018        self.trace_source(source_ref)
3019    }
3020
3021    /// # Errors
3022    /// Returns [`EngineError`] if the WAL checkpoint fails, the `SQLite` backup fails,
3023    /// the SHA-256 digest cannot be computed, or the manifest file cannot be written.
3024    pub fn safe_export(
3025        &self,
3026        destination_path: impl AsRef<Path>,
3027        options: SafeExportOptions,
3028    ) -> Result<SafeExportManifest, EngineError> {
3029        let destination_path = destination_path.as_ref();
3030
3031        // 1. Optionally checkpoint WAL before exporting. This keeps the on-disk file tidy for
3032        // callers that want a fully checkpointed export, but export correctness does not depend
3033        // on it because the backup API copies from the live SQLite connection state.
3034        let conn = self.connect()?;
3035
3036        if options.force_checkpoint {
3037            trace_info!("safe_export: wal checkpoint started");
3038            let (busy, log, checkpointed): (i64, i64, i64) =
3039                conn.query_row("PRAGMA wal_checkpoint(FULL)", [], |row| {
3040                    Ok((row.get(0)?, row.get(1)?, row.get(2)?))
3041                })?;
3042            if busy != 0 {
3043                trace_warn!(
3044                    busy,
3045                    log_frames = log,
3046                    checkpointed_frames = checkpointed,
3047                    "safe_export: wal checkpoint blocked by active readers"
3048                );
3049                return Err(EngineError::Bridge(format!(
3050                    "WAL checkpoint blocked: {busy} active reader(s) prevented a full checkpoint; \
3051                     log frames={log}, checkpointed={checkpointed}; \
3052                     retry export when no readers are active"
3053                )));
3054            }
3055            trace_info!(
3056                log_frames = log,
3057                checkpointed_frames = checkpointed,
3058                "safe_export: wal checkpoint completed"
3059            );
3060        }
3061
3062        let schema_version: u32 = conn
3063            .query_row(
3064                "SELECT COALESCE(MAX(version), 0) FROM fathom_schema_migrations",
3065                [],
3066                |row| row.get(0),
3067            )
3068            .unwrap_or(0);
3069
3070        // 2. Export the database through SQLite's online backup API so committed data in the WAL
3071        // is included even when `force_checkpoint` is false.
3072        if let Some(parent) = destination_path.parent() {
3073            fs::create_dir_all(parent)?;
3074        }
3075        conn.backup(DatabaseName::Main, destination_path, None)?;
3076
3077        drop(conn);
3078
3079        // 2b. Query page_count from the EXPORTED file so the manifest reflects what was
3080        // actually backed up, not the source (which may have changed between the PRAGMA
3081        // and the backup call).
3082        let page_count: u64 = {
3083            let export_conn = rusqlite::Connection::open_with_flags(
3084                destination_path,
3085                rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY
3086                    | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
3087            )?;
3088            export_conn.query_row("PRAGMA page_count", [], |row| row.get(0))?
3089        };
3090
3091        // 3. Compute SHA-256 of the exported file.
3092        // FIX(review): was fs::read loading entire DB into memory; use streaming hash.
3093        let sha256 = {
3094            let mut file = fs::File::open(destination_path)?;
3095            let mut hasher = Sha256::new();
3096            io::copy(&mut file, &mut hasher)?;
3097            format!("{:x}", hasher.finalize())
3098        };
3099
3100        // 4. Record when the export was created.
3101        let exported_at = SystemTime::now()
3102            .duration_since(SystemTime::UNIX_EPOCH)
3103            .map_err(|e| EngineError::Bridge(format!("system clock error: {e}")))?
3104            .as_secs();
3105
3106        let manifest = SafeExportManifest {
3107            exported_at,
3108            sha256,
3109            schema_version,
3110            protocol_version: EXPORT_PROTOCOL_VERSION,
3111            page_count,
3112        };
3113
3114        // 5. Write manifest alongside the exported file, using Path API for the name.
3115        let manifest_path = {
3116            let mut p = destination_path.to_path_buf();
3117            let stem = p
3118                .file_name()
3119                .map(|n| format!("{}.export-manifest.json", n.to_string_lossy()))
3120                .ok_or_else(|| {
3121                    EngineError::Bridge("destination path has no filename".to_owned())
3122                })?;
3123            p.set_file_name(stem);
3124            p
3125        };
3126        let manifest_json =
3127            serde_json::to_string(&manifest).map_err(|e| EngineError::Bridge(e.to_string()))?;
3128
3129        // Atomic manifest write: write to a temp file then rename so readers never
3130        // observe a partially-written manifest.
3131        let manifest_tmp = manifest_path.with_extension("json.tmp");
3132        if let Err(e) = fs::write(&manifest_tmp, &manifest_json)
3133            .and_then(|()| fs::rename(&manifest_tmp, &manifest_path))
3134        {
3135            let _ = fs::remove_file(&manifest_tmp);
3136            return Err(e.into());
3137        }
3138
3139        Ok(manifest)
3140    }
3141}
3142
3143#[allow(dead_code)]
3144#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
3145struct VectorEmbeddingContractRecord {
3146    profile: String,
3147    table_name: String,
3148    model_identity: String,
3149    model_version: String,
3150    dimension: usize,
3151    normalization_policy: String,
3152    chunking_policy: String,
3153    preprocessing_policy: String,
3154    generator_command_json: String,
3155    applied_at: i64,
3156    snapshot_hash: String,
3157    contract_format_version: i64,
3158}
3159
3160#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
3161struct VectorRegenerationInputChunk {
3162    chunk_id: String,
3163    node_logical_id: String,
3164    kind: String,
3165    text_content: String,
3166    byte_start: Option<i64>,
3167    byte_end: Option<i64>,
3168    source_ref: Option<String>,
3169    created_at: i64,
3170}
3171
3172#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
3173struct VectorRegenerationInput {
3174    profile: String,
3175    table_name: String,
3176    model_identity: String,
3177    model_version: String,
3178    dimension: usize,
3179    normalization_policy: String,
3180    chunking_policy: String,
3181    preprocessing_policy: String,
3182    chunks: Vec<VectorRegenerationInputChunk>,
3183}
3184
3185#[derive(Clone, Copy, Debug, PartialEq, Eq)]
3186pub(crate) enum VectorRegenerationFailureClass {
3187    InvalidContract,
3188    EmbedderFailure,
3189    InvalidEmbedderOutput,
3190    SnapshotDrift,
3191    UnsupportedVecCapability,
3192}
3193
3194impl VectorRegenerationFailureClass {
3195    fn label(self) -> &'static str {
3196        match self {
3197            Self::InvalidContract => "invalid contract",
3198            Self::EmbedderFailure => "embedder failure",
3199            Self::InvalidEmbedderOutput => "invalid embedder output",
3200            Self::SnapshotDrift => "snapshot drift",
3201            Self::UnsupportedVecCapability => "unsupported vec capability",
3202        }
3203    }
3204
3205    fn retryable(self) -> bool {
3206        matches!(self, Self::SnapshotDrift)
3207    }
3208}
3209
3210#[derive(Clone, Debug, PartialEq, Eq)]
3211pub(crate) struct VectorRegenerationFailure {
3212    class: VectorRegenerationFailureClass,
3213    detail: String,
3214}
3215
3216impl VectorRegenerationFailure {
3217    pub(crate) fn new(class: VectorRegenerationFailureClass, detail: impl Into<String>) -> Self {
3218        Self {
3219            class,
3220            detail: detail.into(),
3221        }
3222    }
3223
3224    fn to_engine_error(&self) -> EngineError {
3225        let retry_suffix = if self.class.retryable() {
3226            " [retryable]"
3227        } else {
3228            ""
3229        };
3230        EngineError::Bridge(format!(
3231            "vector regeneration {}: {}{}",
3232            self.class.label(),
3233            self.detail,
3234            retry_suffix
3235        ))
3236    }
3237
3238    fn failure_class_label(&self) -> &'static str {
3239        self.class.label()
3240    }
3241}
3242
3243#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
3244struct VectorRegenerationAuditMetadata {
3245    profile: String,
3246    model_identity: String,
3247    model_version: String,
3248    chunk_count: usize,
3249    snapshot_hash: String,
3250    #[serde(skip_serializing_if = "Option::is_none")]
3251    failure_class: Option<String>,
3252}
3253
3254#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize)]
3255#[serde(tag = "mode", rename_all = "snake_case")]
3256enum OperationalRetentionPolicy {
3257    KeepAll,
3258    PurgeBeforeSeconds { max_age_seconds: i64 },
3259    KeepLast { max_rows: usize },
3260}
3261
3262/// # Errors
3263/// Returns [`EngineError`] if the file cannot be read or the config is invalid.
3264pub fn load_vector_regeneration_config(
3265    path: impl AsRef<Path>,
3266) -> Result<VectorRegenerationConfig, EngineError> {
3267    let path = path.as_ref();
3268    let raw = fs::read_to_string(path)?;
3269    match path.extension().and_then(|ext| ext.to_str()) {
3270        Some("toml") => {
3271            toml::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
3272        }
3273        Some("json") | None => {
3274            serde_json::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
3275        }
3276        Some(other) => Err(EngineError::Bridge(format!(
3277            "unsupported vector regeneration config extension: {other}"
3278        ))),
3279    }
3280}
3281
3282fn validate_vector_regeneration_config(
3283    conn: &rusqlite::Connection,
3284    config: &VectorRegenerationConfig,
3285    identity: &QueryEmbedderIdentity,
3286) -> Result<VectorRegenerationConfig, VectorRegenerationFailure> {
3287    let profile = validate_bounded_text("profile", &config.profile, MAX_PROFILE_LEN)?;
3288    let table_name = validate_bounded_text("table_name", &config.table_name, MAX_PROFILE_LEN)?;
3289    if table_name != "vec_nodes_active" {
3290        return Err(VectorRegenerationFailure::new(
3291            VectorRegenerationFailureClass::InvalidContract,
3292            format!("table_name must be vec_nodes_active, got '{table_name}'"),
3293        ));
3294    }
3295    if identity.dimension == 0 {
3296        return Err(VectorRegenerationFailure::new(
3297            VectorRegenerationFailureClass::InvalidContract,
3298            "embedder reports dimension 0".to_owned(),
3299        ));
3300    }
3301    let chunking_policy =
3302        validate_bounded_text("chunking_policy", &config.chunking_policy, MAX_POLICY_LEN)?;
3303    let preprocessing_policy = validate_bounded_text(
3304        "preprocessing_policy",
3305        &config.preprocessing_policy,
3306        MAX_POLICY_LEN,
3307    )?;
3308
3309    if let Some(existing_dimension) = current_vector_profile_dimension(conn, &profile)?
3310        && existing_dimension != identity.dimension
3311    {
3312        return Err(VectorRegenerationFailure::new(
3313            VectorRegenerationFailureClass::InvalidContract,
3314            format!(
3315                "embedder dimension {} does not match existing vector profile dimension {}",
3316                identity.dimension, existing_dimension
3317            ),
3318        ));
3319    }
3320
3321    validate_existing_contract_version(conn, &profile)?;
3322
3323    let normalized = VectorRegenerationConfig {
3324        profile,
3325        table_name,
3326        chunking_policy,
3327        preprocessing_policy,
3328    };
3329    let serialized = serde_json::to_vec(&normalized).map_err(|error| {
3330        VectorRegenerationFailure::new(
3331            VectorRegenerationFailureClass::InvalidContract,
3332            error.to_string(),
3333        )
3334    })?;
3335    if serialized.len() > MAX_CONTRACT_JSON_BYTES {
3336        return Err(VectorRegenerationFailure::new(
3337            VectorRegenerationFailureClass::InvalidContract,
3338            format!("serialized contract exceeds {MAX_CONTRACT_JSON_BYTES} bytes"),
3339        ));
3340    }
3341
3342    Ok(normalized)
3343}
3344
3345#[allow(clippy::cast_possible_wrap)]
3346fn persist_vector_contract(
3347    conn: &rusqlite::Connection,
3348    config: &VectorRegenerationConfig,
3349    identity: &QueryEmbedderIdentity,
3350    snapshot_hash: &str,
3351) -> Result<(), EngineError> {
3352    conn.execute(
3353        r"
3354        INSERT OR REPLACE INTO vector_embedding_contracts (
3355            profile,
3356            table_name,
3357            model_identity,
3358            model_version,
3359            dimension,
3360            normalization_policy,
3361            chunking_policy,
3362            preprocessing_policy,
3363            generator_command_json,
3364            applied_at,
3365            snapshot_hash,
3366            contract_format_version,
3367            updated_at
3368        ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, unixepoch(), ?10, ?11, unixepoch())
3369        ",
3370        rusqlite::params![
3371            config.profile.as_str(),
3372            config.table_name.as_str(),
3373            identity.model_identity.as_str(),
3374            identity.model_version.as_str(),
3375            identity.dimension as i64,
3376            identity.normalization_policy.as_str(),
3377            config.chunking_policy.as_str(),
3378            config.preprocessing_policy.as_str(),
3379            "[]",
3380            snapshot_hash,
3381            CURRENT_VECTOR_CONTRACT_FORMAT_VERSION,
3382        ],
3383    )?;
3384    Ok(())
3385}
3386
3387fn persist_vector_regeneration_event(
3388    conn: &rusqlite::Connection,
3389    event_type: &str,
3390    subject: &str,
3391    metadata: &VectorRegenerationAuditMetadata,
3392) -> Result<(), EngineError> {
3393    let metadata_json = serialize_audit_metadata(metadata)?;
3394    conn.execute(
3395        "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
3396        rusqlite::params![new_id(), event_type, subject, metadata_json],
3397    )?;
3398    Ok(())
3399}
3400
3401fn persist_simple_provenance_event(
3402    conn: &rusqlite::Connection,
3403    event_type: &str,
3404    subject: &str,
3405    metadata: Option<serde_json::Value>,
3406) -> Result<(), EngineError> {
3407    let metadata_json = metadata.map(|value| value.to_string()).unwrap_or_default();
3408    conn.execute(
3409        "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
3410        rusqlite::params![new_id(), event_type, subject, metadata_json],
3411    )?;
3412    Ok(())
3413}
3414
3415/// Count per-kind FTS integrity issues across all registered per-kind tables.
3416/// Returns (stale, orphaned, `mismatched_kind`, duplicate) counts.
3417///
3418/// - Stale: rows in a per-kind table whose node is superseded or missing.
3419/// - Orphaned: rows in a per-kind table for a kind with no registered schema.
3420/// - Mismatched kind: impossible with per-kind tables (always 0).
3421/// - Duplicate: same `node_logical_id` appears more than once in any per-kind table.
3422fn count_per_kind_property_fts_issues(
3423    conn: &rusqlite::Connection,
3424) -> Result<(i64, i64, i64, i64), EngineError> {
3425    // Collect all per-kind virtual tables from sqlite_master.
3426    // Filter by sql LIKE 'CREATE VIRTUAL TABLE%' to exclude FTS5 shadow tables
3427    // (e.g. fts_props_goal_data, fts_props_goal_idx) which share the same prefix.
3428    let per_kind_tables: Vec<String> = {
3429        let mut stmt = conn.prepare(
3430            "SELECT name FROM sqlite_master \
3431             WHERE type='table' AND name LIKE 'fts_props_%' \
3432             AND sql LIKE 'CREATE VIRTUAL TABLE%'",
3433        )?;
3434        stmt.query_map([], |r| r.get::<_, String>(0))?
3435            .collect::<Result<Vec<_>, _>>()?
3436    };
3437
3438    let registered_kinds: std::collections::HashSet<String> = {
3439        let mut stmt = conn.prepare("SELECT kind FROM fts_property_schemas")?;
3440        stmt.query_map([], |r| r.get::<_, String>(0))?
3441            .collect::<Result<std::collections::HashSet<_>, _>>()?
3442    };
3443
3444    let mut stale = 0i64;
3445    let mut orphaned = 0i64;
3446    let mut duplicate = 0i64;
3447
3448    for table in &per_kind_tables {
3449        // Stale: rows whose node_logical_id has no active node.
3450        let kind_stale: i64 = conn.query_row(
3451            &format!(
3452                "SELECT count(*) FROM {table} fp \
3453                 WHERE NOT EXISTS (\
3454                     SELECT 1 FROM nodes n \
3455                     WHERE n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL\
3456                 )"
3457            ),
3458            [],
3459            |r| r.get(0),
3460        )?;
3461        stale += kind_stale;
3462
3463        // Duplicate: same node_logical_id more than once.
3464        let kind_dup: i64 = conn.query_row(
3465            &format!(
3466                "SELECT count(*) FROM (\
3467                     SELECT node_logical_id FROM {table} \
3468                     GROUP BY node_logical_id HAVING count(*) > 1\
3469                 )"
3470            ),
3471            [],
3472            |r| r.get(0),
3473        )?;
3474        duplicate += kind_dup;
3475
3476        // Orphaned: this per-kind table has no corresponding schema.
3477        // Determine which kind this table corresponds to by checking all registered kinds.
3478        let table_has_schema = registered_kinds
3479            .iter()
3480            .any(|k| fathomdb_schema::fts_kind_table_name(k) == *table);
3481        if !table_has_schema {
3482            let table_rows: i64 =
3483                conn.query_row(&format!("SELECT count(*) FROM {table}"), [], |r| r.get(0))?;
3484            orphaned += table_rows;
3485        }
3486    }
3487
3488    // Mismatched kind is always 0 with per-kind tables.
3489    Ok((stale, orphaned, 0, duplicate))
3490}
3491
3492/// Count active nodes that should have a property FTS row (extraction yields a value)
3493/// but don't. Uses the same extraction logic as write/rebuild to avoid false positives
3494/// for nodes whose declared paths legitimately normalize to no values.
3495fn count_missing_property_fts_rows(conn: &rusqlite::Connection) -> Result<i64, EngineError> {
3496    let schemas = crate::writer::load_fts_property_schemas(conn)?;
3497    if schemas.is_empty() {
3498        return Ok(0);
3499    }
3500
3501    let mut missing = 0i64;
3502    for (kind, schema) in &schemas {
3503        let table = fathomdb_schema::fts_kind_table_name(kind);
3504        // If the per-kind table doesn't exist yet, all nodes with extractable values are missing.
3505        let table_exists: bool = conn
3506            .query_row(
3507                "SELECT count(*) FROM sqlite_master WHERE type='table' AND name = ?1",
3508                [table.as_str()],
3509                |r| r.get::<_, i64>(0),
3510            )
3511            .unwrap_or(0)
3512            > 0;
3513
3514        if table_exists {
3515            let mut stmt = conn.prepare(&format!(
3516                "SELECT n.logical_id, n.properties FROM nodes n \
3517                 WHERE n.kind = ?1 AND n.superseded_at IS NULL \
3518                   AND NOT EXISTS (SELECT 1 FROM {table} fp WHERE fp.node_logical_id = n.logical_id)"
3519            ))?;
3520            let rows = stmt.query_map([kind.as_str()], |row| {
3521                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
3522            })?;
3523            for row in rows {
3524                let (_logical_id, properties_str) = row?;
3525                let props: serde_json::Value =
3526                    serde_json::from_str(&properties_str).unwrap_or_default();
3527                if crate::writer::extract_property_fts(&props, schema)
3528                    .0
3529                    .is_some()
3530                {
3531                    missing += 1;
3532                }
3533            }
3534        } else {
3535            // Per-kind table doesn't exist yet — count all nodes with extractable values.
3536            let mut stmt = conn.prepare(
3537                "SELECT n.logical_id, n.properties FROM nodes n \
3538                 WHERE n.kind = ?1 AND n.superseded_at IS NULL",
3539            )?;
3540            let rows = stmt.query_map([kind.as_str()], |row| {
3541                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
3542            })?;
3543            for row in rows {
3544                let (_logical_id, properties_str) = row?;
3545                let props: serde_json::Value =
3546                    serde_json::from_str(&properties_str).unwrap_or_default();
3547                if crate::writer::extract_property_fts(&props, schema)
3548                    .0
3549                    .is_some()
3550                {
3551                    missing += 1;
3552                }
3553            }
3554        }
3555    }
3556    Ok(missing)
3557}
3558
3559/// Count property FTS rows whose `text_content` has drifted from the current canonical
3560/// value computed by `compute_property_fts_text(...)`. This catches:
3561/// - rows whose text no longer matches the current node properties and schema
3562/// - rows that should have been removed (extraction now yields no value)
3563fn count_drifted_property_fts_rows(conn: &rusqlite::Connection) -> Result<i64, EngineError> {
3564    let schemas = crate::writer::load_fts_property_schemas(conn)?;
3565    if schemas.is_empty() {
3566        return Ok(0);
3567    }
3568
3569    let mut drifted = 0i64;
3570    for (kind, schema) in &schemas {
3571        let table = fathomdb_schema::fts_kind_table_name(kind);
3572        // If the per-kind table doesn't exist, no rows to check.
3573        let table_exists: bool = conn
3574            .query_row(
3575                "SELECT count(*) FROM sqlite_master WHERE type='table' AND name = ?1",
3576                [table.as_str()],
3577                |r| r.get::<_, i64>(0),
3578            )
3579            .unwrap_or(0)
3580            > 0;
3581        if !table_exists {
3582            continue;
3583        }
3584        let mut stmt = conn.prepare(&format!(
3585            "SELECT fp.node_logical_id, fp.text_content, n.properties \
3586             FROM {table} fp \
3587             JOIN nodes n ON n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL \
3588             WHERE n.kind = ?1"
3589        ))?;
3590        let rows = stmt.query_map([kind.as_str()], |row| {
3591            Ok((
3592                row.get::<_, String>(0)?,
3593                row.get::<_, String>(1)?,
3594                row.get::<_, String>(2)?,
3595            ))
3596        })?;
3597        for row in rows {
3598            let (_logical_id, stored_text, properties_str) = row?;
3599            let props: serde_json::Value =
3600                serde_json::from_str(&properties_str).unwrap_or_default();
3601            let (expected, _positions, _stats) =
3602                crate::writer::extract_property_fts(&props, schema);
3603            match expected {
3604                Some(text) if text == stored_text => {}
3605                _ => drifted += 1,
3606            }
3607        }
3608    }
3609    Ok(drifted)
3610}
3611
3612/// Rebuild property FTS rows from canonical state within an existing transaction.
3613fn rebuild_property_fts_in_tx(conn: &rusqlite::Connection) -> Result<usize, EngineError> {
3614    // Delete from ALL per-kind FTS virtual tables (including orphaned ones without schemas).
3615    // Filter by sql LIKE 'CREATE VIRTUAL TABLE%' to exclude FTS5 shadow tables.
3616    let all_per_kind_tables: Vec<String> = {
3617        let mut stmt = conn.prepare(
3618            "SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'fts_props_%' \
3619             AND sql LIKE 'CREATE VIRTUAL TABLE%'",
3620        )?;
3621        stmt.query_map([], |r| r.get::<_, String>(0))?
3622            .collect::<Result<Vec<_>, _>>()?
3623    };
3624    for table in &all_per_kind_tables {
3625        conn.execute_batch(&format!("DELETE FROM {table}"))?;
3626    }
3627    conn.execute("DELETE FROM fts_node_property_positions", [])?;
3628    let inserted = crate::projection::insert_property_fts_rows(
3629        conn,
3630        "SELECT logical_id, properties FROM nodes WHERE kind = ?1 AND superseded_at IS NULL",
3631    )?;
3632    Ok(inserted)
3633}
3634
3635/// Rebuild property FTS for a single node. Returns 1 if a row was inserted, 0 otherwise.
3636/// The caller must delete any existing per-kind FTS row for this node first.
3637fn rebuild_single_node_property_fts(
3638    conn: &rusqlite::Connection,
3639    logical_id: &str,
3640    kind: &str,
3641) -> Result<usize, EngineError> {
3642    let schema: Option<(String, String)> = conn
3643        .query_row(
3644            "SELECT property_paths_json, separator FROM fts_property_schemas WHERE kind = ?1",
3645            [kind],
3646            |row| {
3647                let paths_json: String = row.get(0)?;
3648                let separator: String = row.get(1)?;
3649                Ok((paths_json, separator))
3650            },
3651        )
3652        .optional()?;
3653    let Some((paths_json, separator)) = schema else {
3654        return Ok(0);
3655    };
3656    let parsed = crate::writer::parse_property_schema_json(&paths_json, &separator);
3657    let properties_str: Option<String> = conn
3658        .query_row(
3659            "SELECT properties FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
3660            [logical_id],
3661            |row| row.get(0),
3662        )
3663        .optional()?;
3664    let Some(properties_str) = properties_str else {
3665        return Ok(0);
3666    };
3667    let props: serde_json::Value = serde_json::from_str(&properties_str).unwrap_or_default();
3668    let (text, positions, _stats) = crate::writer::extract_property_fts(&props, &parsed);
3669    let Some(text) = text else {
3670        return Ok(0);
3671    };
3672    conn.execute(
3673        "DELETE FROM fts_node_property_positions WHERE node_logical_id = ?1",
3674        rusqlite::params![logical_id],
3675    )?;
3676    let table = fathomdb_schema::fts_kind_table_name(kind);
3677    let tok = fathomdb_schema::DEFAULT_FTS_TOKENIZER;
3678    conn.execute_batch(&format!(
3679        "CREATE VIRTUAL TABLE IF NOT EXISTS {table} \
3680         USING fts5(node_logical_id UNINDEXED, text_content, tokenize = '{tok}')"
3681    ))?;
3682    conn.execute(
3683        &format!("INSERT INTO {table} (node_logical_id, text_content) VALUES (?1, ?2)"),
3684        rusqlite::params![logical_id, text],
3685    )?;
3686    for pos in &positions {
3687        conn.execute(
3688            "INSERT INTO fts_node_property_positions \
3689             (node_logical_id, kind, start_offset, end_offset, leaf_path) \
3690             VALUES (?1, ?2, ?3, ?4, ?5)",
3691            rusqlite::params![
3692                logical_id,
3693                kind,
3694                i64::try_from(pos.start_offset).unwrap_or(i64::MAX),
3695                i64::try_from(pos.end_offset).unwrap_or(i64::MAX),
3696                pos.leaf_path,
3697            ],
3698        )?;
3699    }
3700    Ok(1)
3701}
3702
3703fn serialize_property_paths_json(
3704    entries: &[FtsPropertyPathSpec],
3705    exclude_paths: &[String],
3706) -> Result<String, EngineError> {
3707    // Scalar-only schemas with no exclude_paths and no weights are
3708    // serialised in the legacy shape (bare array of strings) for full
3709    // backwards compatibility with earlier schema versions.
3710    let all_scalar = entries
3711        .iter()
3712        .all(|e| e.mode == FtsPropertyPathMode::Scalar);
3713    let any_weight = entries.iter().any(|e| e.weight.is_some());
3714    if all_scalar && exclude_paths.is_empty() && !any_weight {
3715        let paths: Vec<&str> = entries.iter().map(|e| e.path.as_str()).collect();
3716        return serde_json::to_string(&paths).map_err(|e| {
3717            EngineError::InvalidWrite(format!("failed to serialize property paths: {e}"))
3718        });
3719    }
3720
3721    let mut obj = serde_json::Map::new();
3722    let paths_json: Vec<serde_json::Value> = entries
3723        .iter()
3724        .map(|e| {
3725            let mode_str = match e.mode {
3726                FtsPropertyPathMode::Scalar => "scalar",
3727                FtsPropertyPathMode::Recursive => "recursive",
3728            };
3729            let mut entry = serde_json::json!({ "path": e.path, "mode": mode_str });
3730            if let Some(w) = e.weight {
3731                entry["weight"] = serde_json::json!(w);
3732            }
3733            entry
3734        })
3735        .collect();
3736    obj.insert("paths".to_owned(), serde_json::Value::Array(paths_json));
3737    if !exclude_paths.is_empty() {
3738        obj.insert("exclude_paths".to_owned(), serde_json::json!(exclude_paths));
3739    }
3740    serde_json::to_string(&serde_json::Value::Object(obj))
3741        .map_err(|e| EngineError::InvalidWrite(format!("failed to serialize property paths: {e}")))
3742}
3743
3744/// Drop and recreate the per-kind FTS5 virtual table with one column per spec.
3745///
3746/// The tokenizer string is validated before interpolation into DDL to
3747/// prevent SQL injection.  If `specs` is empty a single `text_content`
3748/// column is used (matching the migration-21 baseline shape).
3749fn create_or_replace_fts_kind_table(
3750    conn: &rusqlite::Connection,
3751    kind: &str,
3752    specs: &[FtsPropertyPathSpec],
3753    tokenizer: &str,
3754) -> Result<(), EngineError> {
3755    let table = fathomdb_schema::fts_kind_table_name(kind);
3756
3757    // Validate tokenizer string: alphanumeric plus the set used by all known presets.
3758    // Must match the allowlist in `set_fts_profile` so that profiles written by one
3759    // function are accepted by the other.  The source-code preset
3760    // (`"unicode61 tokenchars '._-$@'"`) requires `.`, `-`, `$`, `@`.
3761    if !tokenizer
3762        .chars()
3763        .all(|c| c.is_alphanumeric() || "'._-$@ ".contains(c))
3764    {
3765        return Err(EngineError::Bridge(format!(
3766            "invalid tokenizer string: {tokenizer:?}"
3767        )));
3768    }
3769
3770    let cols: Vec<String> = if specs.is_empty() {
3771        vec![
3772            "node_logical_id UNINDEXED".to_owned(),
3773            "text_content".to_owned(),
3774        ]
3775    } else {
3776        std::iter::once("node_logical_id UNINDEXED".to_owned())
3777            .chain(specs.iter().map(|s| {
3778                let is_recursive = matches!(s.mode, FtsPropertyPathMode::Recursive);
3779                fathomdb_schema::fts_column_name(&s.path, is_recursive)
3780            }))
3781            .collect()
3782    };
3783
3784    // Escape inner apostrophes so the SQL single-quoted tokenize= clause is valid.
3785    // "unicode61 tokenchars '._-$@'" → "unicode61 tokenchars ''._-$@''"
3786    let tokenizer_sql = tokenizer.replace('\'', "''");
3787    conn.execute_batch(&format!(
3788        "DROP TABLE IF EXISTS {table}; \
3789         CREATE VIRTUAL TABLE {table} USING fts5({cols}, tokenize='{tokenizer_sql}');",
3790        cols = cols.join(", "),
3791    ))?;
3792
3793    Ok(())
3794}
3795
3796fn validate_fts_property_paths(paths: &[String]) -> Result<(), EngineError> {
3797    if paths.is_empty() {
3798        return Err(EngineError::InvalidWrite(
3799            "FTS property paths must not be empty".to_owned(),
3800        ));
3801    }
3802    let mut seen = std::collections::HashSet::new();
3803    for path in paths {
3804        if !path.starts_with("$.") {
3805            return Err(EngineError::InvalidWrite(format!(
3806                "FTS property path must start with '$.' but got: {path}"
3807            )));
3808        }
3809        let after_prefix = &path[2..]; // safe: already validated "$." prefix
3810        let segments: Vec<&str> = after_prefix.split('.').collect();
3811        if segments.is_empty() || segments.iter().any(|s| s.is_empty()) {
3812            return Err(EngineError::InvalidWrite(format!(
3813                "FTS property path has empty segment(s): {path}"
3814            )));
3815        }
3816        for seg in &segments {
3817            if !seg.chars().all(|c| c.is_alphanumeric() || c == '_') {
3818                return Err(EngineError::InvalidWrite(format!(
3819                    "FTS property path segment contains invalid characters: {path}"
3820                )));
3821            }
3822        }
3823        if !seen.insert(path) {
3824            return Err(EngineError::InvalidWrite(format!(
3825                "duplicate FTS property path: {path}"
3826            )));
3827        }
3828    }
3829    Ok(())
3830}
3831
3832fn load_fts_property_schema_record(
3833    conn: &rusqlite::Connection,
3834    kind: &str,
3835) -> Result<Option<FtsPropertySchemaRecord>, EngineError> {
3836    let row = conn
3837        .query_row(
3838            "SELECT kind, property_paths_json, separator, format_version \
3839             FROM fts_property_schemas WHERE kind = ?1",
3840            [kind],
3841            |row| {
3842                let kind: String = row.get(0)?;
3843                let paths_json: String = row.get(1)?;
3844                let separator: String = row.get(2)?;
3845                let format_version: i64 = row.get(3)?;
3846                Ok(build_fts_property_schema_record(
3847                    kind,
3848                    &paths_json,
3849                    separator,
3850                    format_version,
3851                ))
3852            },
3853        )
3854        .optional()?;
3855    Ok(row)
3856}
3857
3858/// Build an [`FtsPropertySchemaRecord`] from a raw
3859/// `fts_property_schemas` row. Delegates JSON parsing to
3860/// [`crate::writer::parse_property_schema_json`] — the same parser the
3861/// recursive walker uses at rebuild time — so both the legacy bare-array
3862/// shape and the Phase 4 object-shaped envelope round-trip correctly.
3863fn build_fts_property_schema_record(
3864    kind: String,
3865    paths_json: &str,
3866    separator: String,
3867    format_version: i64,
3868) -> FtsPropertySchemaRecord {
3869    let schema = crate::writer::parse_property_schema_json(paths_json, &separator);
3870    let entries: Vec<FtsPropertyPathSpec> = schema
3871        .paths
3872        .into_iter()
3873        .map(|entry| FtsPropertyPathSpec {
3874            path: entry.path,
3875            mode: match entry.mode {
3876                crate::writer::PropertyPathMode::Scalar => FtsPropertyPathMode::Scalar,
3877                crate::writer::PropertyPathMode::Recursive => FtsPropertyPathMode::Recursive,
3878            },
3879            weight: entry.weight,
3880        })
3881        .collect();
3882    let property_paths: Vec<String> = entries.iter().map(|e| e.path.clone()).collect();
3883    FtsPropertySchemaRecord {
3884        kind,
3885        property_paths,
3886        entries,
3887        exclude_paths: schema.exclude_paths,
3888        separator,
3889        format_version,
3890    }
3891}
3892
3893fn build_regeneration_input(
3894    config: &VectorRegenerationConfig,
3895    identity: &QueryEmbedderIdentity,
3896    chunks: Vec<VectorRegenerationInputChunk>,
3897) -> VectorRegenerationInput {
3898    VectorRegenerationInput {
3899        profile: config.profile.clone(),
3900        table_name: config.table_name.clone(),
3901        model_identity: identity.model_identity.clone(),
3902        model_version: identity.model_version.clone(),
3903        dimension: identity.dimension,
3904        normalization_policy: identity.normalization_policy.clone(),
3905        chunking_policy: config.chunking_policy.clone(),
3906        preprocessing_policy: config.preprocessing_policy.clone(),
3907        chunks,
3908    }
3909}
3910
3911fn compute_snapshot_hash(payload: &VectorRegenerationInput) -> Result<String, EngineError> {
3912    let bytes =
3913        serde_json::to_vec(payload).map_err(|error| EngineError::Bridge(error.to_string()))?;
3914    let mut hasher = Sha256::new();
3915    hasher.update(bytes);
3916    Ok(format!("{:x}", hasher.finalize()))
3917}
3918
3919fn collect_regeneration_chunks(
3920    conn: &rusqlite::Connection,
3921) -> Result<Vec<VectorRegenerationInputChunk>, EngineError> {
3922    let mut stmt = conn.prepare(
3923        r"
3924        SELECT c.id, c.node_logical_id, n.kind, c.text_content, c.byte_start, c.byte_end, n.source_ref, c.created_at
3925        FROM chunks c
3926        JOIN nodes n
3927          ON n.logical_id = c.node_logical_id
3928         AND n.superseded_at IS NULL
3929        ORDER BY c.created_at, c.id
3930        ",
3931    )?;
3932    let chunks = stmt
3933        .query_map([], |row| {
3934            Ok(VectorRegenerationInputChunk {
3935                chunk_id: row.get(0)?,
3936                node_logical_id: row.get(1)?,
3937                kind: row.get(2)?,
3938                text_content: row.get(3)?,
3939                byte_start: row.get(4)?,
3940                byte_end: row.get(5)?,
3941                source_ref: row.get(6)?,
3942                created_at: row.get(7)?,
3943            })
3944        })?
3945        .collect::<Result<Vec<_>, _>>()?;
3946    Ok(chunks)
3947}
3948
3949fn validate_bounded_text(
3950    field: &str,
3951    value: &str,
3952    max_len: usize,
3953) -> Result<String, VectorRegenerationFailure> {
3954    let trimmed = value.trim();
3955    if trimmed.is_empty() {
3956        return Err(VectorRegenerationFailure::new(
3957            VectorRegenerationFailureClass::InvalidContract,
3958            format!("{field} must not be empty"),
3959        ));
3960    }
3961    if trimmed.len() > max_len {
3962        return Err(VectorRegenerationFailure::new(
3963            VectorRegenerationFailureClass::InvalidContract,
3964            format!("{field} exceeds max length {max_len}"),
3965        ));
3966    }
3967    Ok(trimmed.to_owned())
3968}
3969
3970fn current_vector_profile_dimension(
3971    conn: &rusqlite::Connection,
3972    profile: &str,
3973) -> Result<Option<usize>, VectorRegenerationFailure> {
3974    let dimension: Option<i64> = conn
3975        .query_row(
3976            "SELECT dimension FROM vector_profiles WHERE profile = ?1 AND enabled = 1",
3977            [profile],
3978            |row| row.get(0),
3979        )
3980        .optional()
3981        .map_err(|error| {
3982            VectorRegenerationFailure::new(
3983                VectorRegenerationFailureClass::InvalidContract,
3984                error.to_string(),
3985            )
3986        })?;
3987    dimension
3988        .map(|value| {
3989            usize::try_from(value).map_err(|_| {
3990                VectorRegenerationFailure::new(
3991                    VectorRegenerationFailureClass::InvalidContract,
3992                    format!("stored vector profile dimension is invalid: {value}"),
3993                )
3994            })
3995        })
3996        .transpose()
3997}
3998
3999fn validate_existing_contract_version(
4000    conn: &rusqlite::Connection,
4001    profile: &str,
4002) -> Result<(), VectorRegenerationFailure> {
4003    let version: Option<i64> = conn
4004        .query_row(
4005            "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = ?1",
4006            [profile],
4007            |row| row.get(0),
4008        )
4009        .optional()
4010        .map_err(|error| {
4011            VectorRegenerationFailure::new(
4012                VectorRegenerationFailureClass::InvalidContract,
4013                error.to_string(),
4014            )
4015        })?;
4016    if let Some(version) = version
4017        && version > CURRENT_VECTOR_CONTRACT_FORMAT_VERSION
4018    {
4019        return Err(VectorRegenerationFailure::new(
4020            VectorRegenerationFailureClass::InvalidContract,
4021            format!(
4022                "persisted contract format version {version} is unsupported; supported version is {CURRENT_VECTOR_CONTRACT_FORMAT_VERSION}"
4023            ),
4024        ));
4025    }
4026    Ok(())
4027}
4028
4029fn serialize_audit_metadata(
4030    metadata: &VectorRegenerationAuditMetadata,
4031) -> Result<String, EngineError> {
4032    let json =
4033        serde_json::to_string(metadata).map_err(|error| EngineError::Bridge(error.to_string()))?;
4034    if json.len() > MAX_AUDIT_METADATA_BYTES {
4035        return Err(VectorRegenerationFailure::new(
4036            VectorRegenerationFailureClass::InvalidContract,
4037            format!("audit metadata exceeds {MAX_AUDIT_METADATA_BYTES} bytes"),
4038        )
4039        .to_engine_error());
4040    }
4041    Ok(json)
4042}
4043
4044fn count_source_ref(
4045    conn: &rusqlite::Connection,
4046    table: &str,
4047    source_ref: &str,
4048) -> Result<usize, EngineError> {
4049    let sql = match table {
4050        "nodes" => "SELECT count(*) FROM nodes WHERE source_ref = ?1",
4051        "edges" => "SELECT count(*) FROM edges WHERE source_ref = ?1",
4052        "actions" => "SELECT count(*) FROM actions WHERE source_ref = ?1",
4053        "operational_mutations" => {
4054            "SELECT count(*) FROM operational_mutations WHERE source_ref = ?1"
4055        }
4056        other => return Err(EngineError::Bridge(format!("unknown table: {other}"))),
4057    };
4058    let count: i64 = conn.query_row(sql, [source_ref], |row| row.get(0))?;
4059    // FIX(review): was `count as usize` — unsound cast.
4060    // Chose option (C) here: propagate error since this is a user-facing helper.
4061    usize::try_from(count)
4062        .map_err(|_| EngineError::Bridge(format!("count overflow for table {table}: {count}")))
4063}
4064
4065fn rebuild_operational_current_rows(
4066    tx: &rusqlite::Transaction<'_>,
4067    collections: &[String],
4068) -> Result<usize, EngineError> {
4069    let mut rebuilt_rows = 0usize;
4070    clear_operational_current_rows(tx, collections)?;
4071    let mut ins_current = tx.prepare_cached(
4072        "INSERT INTO operational_current \
4073         (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
4074         VALUES (?1, ?2, ?3, ?4, ?5)",
4075    )?;
4076
4077    for collection in collections {
4078        let mut stmt = tx.prepare(
4079            "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
4080             FROM operational_mutations \
4081             WHERE collection_name = ?1 \
4082             ORDER BY record_key, mutation_order",
4083        )?;
4084        let mut latest_by_key: std::collections::HashMap<String, Option<(String, i64, String)>> =
4085            std::collections::HashMap::new();
4086        let rows = stmt.query_map([collection], map_operational_mutation_row)?;
4087        for row in rows {
4088            let mutation = row?;
4089            match mutation.op_kind.as_str() {
4090                "put" => {
4091                    latest_by_key.insert(
4092                        mutation.record_key,
4093                        Some((mutation.payload_json, mutation.created_at, mutation.id)),
4094                    );
4095                }
4096                "delete" => {
4097                    latest_by_key.insert(mutation.record_key, None);
4098                }
4099                _ => {}
4100            }
4101        }
4102
4103        for (record_key, state) in latest_by_key {
4104            if let Some((payload_json, updated_at, last_mutation_id)) = state {
4105                ins_current.execute(rusqlite::params![
4106                    collection,
4107                    record_key,
4108                    payload_json,
4109                    updated_at,
4110                    last_mutation_id,
4111                ])?;
4112                rebuilt_rows += 1;
4113            }
4114        }
4115    }
4116
4117    drop(ins_current);
4118    Ok(rebuilt_rows)
4119}
4120
4121fn clear_operational_current_rows(
4122    tx: &rusqlite::Transaction<'_>,
4123    collections: &[String],
4124) -> Result<(), EngineError> {
4125    let mut delete_current =
4126        tx.prepare_cached("DELETE FROM operational_current WHERE collection_name = ?1")?;
4127    let mut delete_secondary_current = tx.prepare_cached(
4128        "DELETE FROM operational_secondary_index_entries \
4129         WHERE collection_name = ?1 AND subject_kind = 'current'",
4130    )?;
4131    for collection in collections {
4132        delete_secondary_current.execute([collection])?;
4133        delete_current.execute([collection])?;
4134    }
4135    drop(delete_secondary_current);
4136    drop(delete_current);
4137    Ok(())
4138}
4139
4140fn clear_operational_secondary_index_entries(
4141    tx: &rusqlite::Transaction<'_>,
4142    collection_name: &str,
4143) -> Result<(), EngineError> {
4144    tx.execute(
4145        "DELETE FROM operational_secondary_index_entries WHERE collection_name = ?1",
4146        [collection_name],
4147    )?;
4148    Ok(())
4149}
4150
4151fn insert_operational_secondary_index_entry(
4152    tx: &rusqlite::Transaction<'_>,
4153    collection_name: &str,
4154    subject_kind: &str,
4155    mutation_id: &str,
4156    record_key: &str,
4157    entry: &crate::operational::OperationalSecondaryIndexEntry,
4158) -> Result<(), EngineError> {
4159    tx.execute(
4160        "INSERT INTO operational_secondary_index_entries \
4161         (collection_name, index_name, subject_kind, mutation_id, record_key, sort_timestamp, \
4162          slot1_text, slot1_integer, slot2_text, slot2_integer, slot3_text, slot3_integer) \
4163         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
4164        rusqlite::params![
4165            collection_name,
4166            entry.index_name,
4167            subject_kind,
4168            mutation_id,
4169            record_key,
4170            entry.sort_timestamp,
4171            entry.slot1_text,
4172            entry.slot1_integer,
4173            entry.slot2_text,
4174            entry.slot2_integer,
4175            entry.slot3_text,
4176            entry.slot3_integer,
4177        ],
4178    )?;
4179    Ok(())
4180}
4181
4182fn rebuild_operational_secondary_index_entries(
4183    tx: &rusqlite::Transaction<'_>,
4184    collection_name: &str,
4185    collection_kind: OperationalCollectionKind,
4186    indexes: &[OperationalSecondaryIndexDefinition],
4187) -> Result<(usize, usize), EngineError> {
4188    clear_operational_secondary_index_entries(tx, collection_name)?;
4189
4190    let mut mutation_entries_rebuilt = 0usize;
4191    if collection_kind == OperationalCollectionKind::AppendOnlyLog {
4192        let mut stmt = tx.prepare(
4193            "SELECT id, record_key, payload_json FROM operational_mutations \
4194             WHERE collection_name = ?1 ORDER BY mutation_order",
4195        )?;
4196        let rows = stmt
4197            .query_map([collection_name], |row| {
4198                Ok((
4199                    row.get::<_, String>(0)?,
4200                    row.get::<_, String>(1)?,
4201                    row.get::<_, String>(2)?,
4202                ))
4203            })?
4204            .collect::<Result<Vec<_>, _>>()?;
4205        drop(stmt);
4206        for (mutation_id, record_key, payload_json) in rows {
4207            for entry in extract_secondary_index_entries_for_mutation(indexes, &payload_json) {
4208                insert_operational_secondary_index_entry(
4209                    tx,
4210                    collection_name,
4211                    "mutation",
4212                    &mutation_id,
4213                    &record_key,
4214                    &entry,
4215                )?;
4216                mutation_entries_rebuilt += 1;
4217            }
4218        }
4219    }
4220
4221    let mut current_entries_rebuilt = 0usize;
4222    if collection_kind == OperationalCollectionKind::LatestState {
4223        let mut stmt = tx.prepare(
4224            "SELECT record_key, payload_json, updated_at, last_mutation_id FROM operational_current \
4225             WHERE collection_name = ?1 ORDER BY updated_at DESC, record_key",
4226        )?;
4227        let rows = stmt
4228            .query_map([collection_name], |row| {
4229                Ok((
4230                    row.get::<_, String>(0)?,
4231                    row.get::<_, String>(1)?,
4232                    row.get::<_, i64>(2)?,
4233                    row.get::<_, String>(3)?,
4234                ))
4235            })?
4236            .collect::<Result<Vec<_>, _>>()?;
4237        drop(stmt);
4238        for (record_key, payload_json, updated_at, last_mutation_id) in rows {
4239            for entry in
4240                extract_secondary_index_entries_for_current(indexes, &payload_json, updated_at)
4241            {
4242                insert_operational_secondary_index_entry(
4243                    tx,
4244                    collection_name,
4245                    "current",
4246                    &last_mutation_id,
4247                    &record_key,
4248                    &entry,
4249                )?;
4250                current_entries_rebuilt += 1;
4251            }
4252        }
4253    }
4254
4255    Ok((mutation_entries_rebuilt, current_entries_rebuilt))
4256}
4257
4258fn collect_strings_tx(
4259    tx: &rusqlite::Transaction<'_>,
4260    sql: &str,
4261    value: &str,
4262) -> Result<Vec<String>, EngineError> {
4263    let mut stmt = tx.prepare(sql)?;
4264    let rows = stmt.query_map([value], |row| row.get::<_, String>(0))?;
4265    rows.collect::<Result<Vec<_>, _>>()
4266        .map_err(EngineError::from)
4267}
4268
4269/// Convert a non-negative i64 count to usize, panicking on negative values
4270/// which would indicate data corruption.
4271#[allow(clippy::expect_used)]
4272fn i64_to_usize(val: i64) -> usize {
4273    usize::try_from(val).expect("count(*) must be non-negative")
4274}
4275
4276/// Runs a parameterized query and collects the first column as strings.
4277///
4278/// NOTE(review): sql parameter must be a hardcoded query string, never user input.
4279/// Options: (A) doc comment, (B) whitelist refactor like `count_source_ref`, (C) leave as-is.
4280/// Chose (A): function is private, only called with hardcoded SQL from `trace_source`.
4281/// Whitelist refactor not practical — queries have different SELECT/ORDER BY per table.
4282fn collect_strings(
4283    conn: &rusqlite::Connection,
4284    sql: &str,
4285    param: &str,
4286) -> Result<Vec<String>, EngineError> {
4287    let mut stmt = conn.prepare(sql)?;
4288    let values = stmt
4289        .query_map([param], |row| row.get::<_, String>(0))?
4290        .collect::<Result<Vec<_>, _>>()?;
4291    Ok(values)
4292}
4293
4294fn collect_edge_logical_ids_for_restore(
4295    tx: &rusqlite::Transaction<'_>,
4296    logical_id: &str,
4297    retire_source_ref: Option<&str>,
4298    retire_created_at: i64,
4299    retire_event_rowid: i64,
4300) -> Result<Vec<String>, EngineError> {
4301    let mut stmt = tx.prepare(
4302        "SELECT DISTINCT e.logical_id \
4303         FROM edges e \
4304         JOIN provenance_events p \
4305           ON p.subject = e.logical_id \
4306          AND p.event_type = 'edge_retire' \
4307          AND ( \
4308                p.created_at > ?3 \
4309                OR (p.created_at = ?3 AND p.rowid >= ?4) \
4310          ) \
4311          AND ((?2 IS NULL AND p.source_ref IS NULL) OR p.source_ref = ?2) \
4312         WHERE e.superseded_at IS NOT NULL \
4313           AND (e.source_logical_id = ?1 OR e.target_logical_id = ?1) \
4314           AND NOT EXISTS ( \
4315                SELECT 1 FROM edges active \
4316                WHERE active.logical_id = e.logical_id \
4317                  AND active.superseded_at IS NULL \
4318           ) \
4319         ORDER BY e.logical_id",
4320    )?;
4321    let edge_ids = stmt
4322        .query_map(
4323            rusqlite::params![
4324                logical_id,
4325                retire_source_ref,
4326                retire_created_at,
4327                retire_event_rowid
4328            ],
4329            |row| row.get::<_, String>(0),
4330        )?
4331        .collect::<Result<Vec<_>, _>>()?;
4332    Ok(edge_ids)
4333}
4334
4335/// Restores edges for a node being restored, skipping any whose counterpart
4336/// endpoint is not active (e.g. still retired or purged).
4337fn restore_validated_edges(
4338    tx: &rusqlite::Transaction<'_>,
4339    logical_id: &str,
4340    retire_source_ref: Option<&str>,
4341    retire_created_at: i64,
4342    retire_event_rowid: i64,
4343) -> Result<(usize, Vec<SkippedEdge>), EngineError> {
4344    let edge_logical_ids = collect_edge_logical_ids_for_restore(
4345        tx,
4346        logical_id,
4347        retire_source_ref,
4348        retire_created_at,
4349        retire_event_rowid,
4350    )?;
4351    let mut restored = 0usize;
4352    let mut skipped = Vec::new();
4353    for edge_logical_id in &edge_logical_ids {
4354        let edge_detail: Option<(String, String, String)> = tx
4355            .query_row(
4356                "SELECT row_id, source_logical_id, target_logical_id FROM edges \
4357                 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
4358                 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
4359                [edge_logical_id.as_str()],
4360                |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
4361            )
4362            .optional()?;
4363        let Some((edge_row_id, source_lid, target_lid)) = edge_detail else {
4364            continue;
4365        };
4366        let other_endpoint = if source_lid == logical_id {
4367            &target_lid
4368        } else {
4369            &source_lid
4370        };
4371        let endpoint_active: bool = tx
4372            .query_row(
4373                "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
4374                [other_endpoint.as_str()],
4375                |_| Ok(true),
4376            )
4377            .optional()?
4378            .unwrap_or(false);
4379        if !endpoint_active {
4380            skipped.push(SkippedEdge {
4381                edge_logical_id: edge_logical_id.clone(),
4382                missing_endpoint: other_endpoint.clone(),
4383            });
4384            continue;
4385        }
4386        restored += tx.execute(
4387            "UPDATE edges SET superseded_at = NULL WHERE row_id = ?1",
4388            [edge_row_id.as_str()],
4389        )?;
4390    }
4391    Ok((restored, skipped))
4392}
4393
4394#[cfg(feature = "sqlite-vec")]
4395fn count_vec_rows_for_logical_id(
4396    tx: &rusqlite::Transaction<'_>,
4397    logical_id: &str,
4398) -> Result<usize, EngineError> {
4399    match tx.query_row(
4400        "SELECT count(*) FROM vec_nodes_active v \
4401         JOIN chunks c ON c.id = v.chunk_id \
4402         WHERE c.node_logical_id = ?1",
4403        [logical_id],
4404        |row| row.get::<_, i64>(0),
4405    ) {
4406        Ok(count) => Ok(i64_to_usize(count)),
4407        Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
4408            if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
4409        {
4410            Ok(0)
4411        }
4412        Err(error) => Err(EngineError::Sqlite(error)),
4413    }
4414}
4415
4416#[cfg(not(feature = "sqlite-vec"))]
4417#[allow(clippy::unnecessary_wraps)]
4418fn count_vec_rows_for_logical_id(
4419    _tx: &rusqlite::Transaction<'_>,
4420    _logical_id: &str,
4421) -> Result<usize, EngineError> {
4422    Ok(0)
4423}
4424
4425#[cfg(feature = "sqlite-vec")]
4426fn delete_vec_rows_for_logical_id(
4427    tx: &rusqlite::Transaction<'_>,
4428    logical_id: &str,
4429) -> Result<usize, EngineError> {
4430    match tx.execute(
4431        "DELETE FROM vec_nodes_active \
4432         WHERE chunk_id IN (SELECT id FROM chunks WHERE node_logical_id = ?1)",
4433        [logical_id],
4434    ) {
4435        Ok(count) => Ok(count),
4436        Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
4437            if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
4438        {
4439            Ok(0)
4440        }
4441        Err(error) => Err(EngineError::Sqlite(error)),
4442    }
4443}
4444
4445#[cfg(not(feature = "sqlite-vec"))]
4446#[allow(clippy::unnecessary_wraps)]
4447fn delete_vec_rows_for_logical_id(
4448    _tx: &rusqlite::Transaction<'_>,
4449    _logical_id: &str,
4450) -> Result<usize, EngineError> {
4451    Ok(0)
4452}
4453
4454fn ensure_operational_collection_registered(
4455    conn: &rusqlite::Connection,
4456    collection_name: &str,
4457) -> Result<(), EngineError> {
4458    if load_operational_collection_record(conn, collection_name)?.is_none() {
4459        return Err(EngineError::InvalidWrite(format!(
4460            "operational collection '{collection_name}' is not registered"
4461        )));
4462    }
4463    Ok(())
4464}
4465
4466fn load_operational_collection_record(
4467    conn: &rusqlite::Connection,
4468    name: &str,
4469) -> Result<Option<OperationalCollectionRecord>, EngineError> {
4470    conn.query_row(
4471        "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
4472         FROM operational_collections WHERE name = ?1",
4473        [name],
4474        map_operational_collection_row,
4475    )
4476    .optional()
4477    .map_err(EngineError::Sqlite)
4478}
4479
4480fn validate_append_only_operational_collection(
4481    record: &OperationalCollectionRecord,
4482    operation: &str,
4483) -> Result<(), EngineError> {
4484    if record.kind != OperationalCollectionKind::AppendOnlyLog {
4485        return Err(EngineError::InvalidWrite(format!(
4486            "operational collection '{}' must be append_only_log to {operation}",
4487            record.name
4488        )));
4489    }
4490    Ok(())
4491}
4492
4493#[derive(Clone, Debug, PartialEq, Eq)]
4494struct CompiledOperationalReadFilter {
4495    field: String,
4496    condition: OperationalReadCondition,
4497}
4498
4499#[derive(Clone, Debug)]
4500struct MatchedAppendOnlySecondaryIndexRead<'a> {
4501    index_name: &'a str,
4502    value_filter: &'a CompiledOperationalReadFilter,
4503    time_range: Option<&'a CompiledOperationalReadFilter>,
4504}
4505
4506#[derive(Clone, Debug, PartialEq, Eq)]
4507enum OperationalReadCondition {
4508    ExactString(String),
4509    ExactInteger(i64),
4510    Prefix(String),
4511    Range {
4512        lower: Option<i64>,
4513        upper: Option<i64>,
4514    },
4515}
4516
4517fn operational_read_limit(limit: Option<usize>) -> Result<usize, EngineError> {
4518    let applied_limit = limit.unwrap_or(DEFAULT_OPERATIONAL_READ_LIMIT);
4519    if applied_limit == 0 {
4520        return Err(EngineError::InvalidWrite(
4521            "operational read limit must be greater than zero".to_owned(),
4522        ));
4523    }
4524    Ok(applied_limit.min(MAX_OPERATIONAL_READ_LIMIT))
4525}
4526
4527fn parse_operational_filter_fields(
4528    filter_fields_json: &str,
4529) -> Result<Vec<OperationalFilterField>, String> {
4530    let fields: Vec<OperationalFilterField> = serde_json::from_str(filter_fields_json)
4531        .map_err(|error| format!("invalid filter_fields_json: {error}"))?;
4532    let mut seen = std::collections::HashSet::new();
4533    for field in &fields {
4534        if field.name.trim().is_empty() {
4535            return Err("filter_fields_json field names must not be empty".to_owned());
4536        }
4537        if !seen.insert(field.name.as_str()) {
4538            return Err(format!(
4539                "filter_fields_json contains duplicate field '{}'",
4540                field.name
4541            ));
4542        }
4543        if field.modes.is_empty() {
4544            return Err(format!(
4545                "filter_fields_json field '{}' must declare at least one mode",
4546                field.name
4547            ));
4548        }
4549        if field.modes.contains(&OperationalFilterMode::Prefix)
4550            && field.field_type != OperationalFilterFieldType::String
4551        {
4552            return Err(format!(
4553                "filter field '{}' only supports prefix for string types",
4554                field.name
4555            ));
4556        }
4557    }
4558    Ok(fields)
4559}
4560
4561fn compile_operational_read_filters(
4562    filters: &[OperationalFilterClause],
4563    declared_fields: &[OperationalFilterField],
4564) -> Result<Vec<CompiledOperationalReadFilter>, EngineError> {
4565    let field_map = declared_fields
4566        .iter()
4567        .map(|field| (field.name.as_str(), field))
4568        .collect::<std::collections::HashMap<_, _>>();
4569    filters
4570        .iter()
4571        .map(|filter| match filter {
4572            OperationalFilterClause::Exact { field, value } => {
4573                let declared = field_map.get(field.as_str()).ok_or_else(|| {
4574                    EngineError::InvalidWrite(format!(
4575                        "operational read filter uses undeclared field '{field}'"
4576                    ))
4577                })?;
4578                if !declared.modes.contains(&OperationalFilterMode::Exact) {
4579                    return Err(EngineError::InvalidWrite(format!(
4580                        "operational read field '{field}' does not allow exact filters"
4581                    )));
4582                }
4583                let condition = match (declared.field_type, value) {
4584                    (OperationalFilterFieldType::String, OperationalFilterValue::String(value)) => {
4585                        OperationalReadCondition::ExactString(value.clone())
4586                    }
4587                    (
4588                        OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp,
4589                        OperationalFilterValue::Integer(value),
4590                    ) => OperationalReadCondition::ExactInteger(*value),
4591                    _ => {
4592                        return Err(EngineError::InvalidWrite(format!(
4593                            "operational read field '{field}' received a value with the wrong type"
4594                        )));
4595                    }
4596                };
4597                Ok(CompiledOperationalReadFilter {
4598                    field: field.clone(),
4599                    condition,
4600                })
4601            }
4602            OperationalFilterClause::Prefix { field, value } => {
4603                let declared = field_map.get(field.as_str()).ok_or_else(|| {
4604                    EngineError::InvalidWrite(format!(
4605                        "operational read filter uses undeclared field '{field}'"
4606                    ))
4607                })?;
4608                if !declared.modes.contains(&OperationalFilterMode::Prefix) {
4609                    return Err(EngineError::InvalidWrite(format!(
4610                        "operational read field '{field}' does not allow prefix filters"
4611                    )));
4612                }
4613                if declared.field_type != OperationalFilterFieldType::String {
4614                    return Err(EngineError::InvalidWrite(format!(
4615                        "operational read field '{field}' only supports prefix filters for strings"
4616                    )));
4617                }
4618                Ok(CompiledOperationalReadFilter {
4619                    field: field.clone(),
4620                    condition: OperationalReadCondition::Prefix(value.clone()),
4621                })
4622            }
4623            OperationalFilterClause::Range {
4624                field,
4625                lower,
4626                upper,
4627            } => {
4628                let declared = field_map.get(field.as_str()).ok_or_else(|| {
4629                    EngineError::InvalidWrite(format!(
4630                        "operational read filter uses undeclared field '{field}'"
4631                    ))
4632                })?;
4633                if !declared.modes.contains(&OperationalFilterMode::Range) {
4634                    return Err(EngineError::InvalidWrite(format!(
4635                        "operational read field '{field}' does not allow range filters"
4636                    )));
4637                }
4638                if !matches!(
4639                    declared.field_type,
4640                    OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp
4641                ) {
4642                    return Err(EngineError::InvalidWrite(format!(
4643                        "operational read field '{field}' only supports range filters for integer/timestamp fields"
4644                    )));
4645                }
4646                if lower.is_none() && upper.is_none() {
4647                    return Err(EngineError::InvalidWrite(format!(
4648                        "operational read range filter for '{field}' must specify a lower or upper bound"
4649                    )));
4650                }
4651                Ok(CompiledOperationalReadFilter {
4652                    field: field.clone(),
4653                    condition: OperationalReadCondition::Range {
4654                        lower: *lower,
4655                        upper: *upper,
4656                    },
4657                })
4658            }
4659        })
4660        .collect()
4661}
4662
4663fn match_append_only_secondary_index_read<'a>(
4664    filters: &'a [CompiledOperationalReadFilter],
4665    indexes: &'a [OperationalSecondaryIndexDefinition],
4666) -> Option<MatchedAppendOnlySecondaryIndexRead<'a>> {
4667    indexes.iter().find_map(|index| {
4668        let OperationalSecondaryIndexDefinition::AppendOnlyFieldTime {
4669            name,
4670            field,
4671            value_type,
4672            time_field,
4673        } = index
4674        else {
4675            return None;
4676        };
4677        if !(1..=2).contains(&filters.len()) {
4678            return None;
4679        }
4680
4681        let mut value_filter = None;
4682        let mut time_range = None;
4683        for filter in filters {
4684            if filter.field == *field {
4685                let supported = matches!(
4686                    (&filter.condition, value_type),
4687                    (
4688                        OperationalReadCondition::ExactString(_)
4689                            | OperationalReadCondition::Prefix(_),
4690                        crate::operational::OperationalSecondaryIndexValueType::String
4691                    ) | (
4692                        OperationalReadCondition::ExactInteger(_),
4693                        crate::operational::OperationalSecondaryIndexValueType::Integer
4694                            | crate::operational::OperationalSecondaryIndexValueType::Timestamp
4695                    )
4696                );
4697                if !supported || value_filter.is_some() {
4698                    return None;
4699                }
4700                value_filter = Some(filter);
4701                continue;
4702            }
4703            if filter.field == *time_field {
4704                if !matches!(filter.condition, OperationalReadCondition::Range { .. })
4705                    || time_range.is_some()
4706                {
4707                    return None;
4708                }
4709                time_range = Some(filter);
4710                continue;
4711            }
4712            return None;
4713        }
4714
4715        value_filter.map(|value_filter| MatchedAppendOnlySecondaryIndexRead {
4716            index_name: name.as_str(),
4717            value_filter,
4718            time_range,
4719        })
4720    })
4721}
4722
4723fn execute_operational_secondary_index_read(
4724    conn: &rusqlite::Connection,
4725    collection_name: &str,
4726    filters: &[CompiledOperationalReadFilter],
4727    indexes: &[OperationalSecondaryIndexDefinition],
4728    applied_limit: usize,
4729) -> Result<Option<OperationalReadReport>, EngineError> {
4730    use rusqlite::types::Value;
4731
4732    let Some(matched) = match_append_only_secondary_index_read(filters, indexes) else {
4733        return Ok(None);
4734    };
4735
4736    let mut sql = String::from(
4737        "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
4738         FROM operational_secondary_index_entries s \
4739         JOIN operational_mutations m ON m.id = s.mutation_id \
4740         WHERE s.collection_name = ?1 AND s.index_name = ?2 AND s.subject_kind = 'mutation' ",
4741    );
4742    let mut params = vec![
4743        Value::from(collection_name.to_owned()),
4744        Value::from(matched.index_name.to_owned()),
4745    ];
4746
4747    match &matched.value_filter.condition {
4748        OperationalReadCondition::ExactString(value) => {
4749            let _ = write!(sql, "AND s.slot1_text = ?{} ", params.len() + 1);
4750            params.push(Value::from(value.clone()));
4751        }
4752        OperationalReadCondition::Prefix(value) => {
4753            let _ = write!(sql, "AND s.slot1_text GLOB ?{} ", params.len() + 1);
4754            params.push(Value::from(glob_prefix_pattern(value)));
4755        }
4756        OperationalReadCondition::ExactInteger(value) => {
4757            let _ = write!(sql, "AND s.slot1_integer = ?{} ", params.len() + 1);
4758            params.push(Value::from(*value));
4759        }
4760        OperationalReadCondition::Range { .. } => return Ok(None),
4761    }
4762
4763    if let Some(time_range) = matched.time_range
4764        && let OperationalReadCondition::Range { lower, upper } = &time_range.condition
4765    {
4766        if let Some(lower) = lower {
4767            let _ = write!(sql, "AND s.sort_timestamp >= ?{} ", params.len() + 1);
4768            params.push(Value::from(*lower));
4769        }
4770        if let Some(upper) = upper {
4771            let _ = write!(sql, "AND s.sort_timestamp <= ?{} ", params.len() + 1);
4772            params.push(Value::from(*upper));
4773        }
4774    }
4775
4776    let _ = write!(
4777        sql,
4778        "ORDER BY s.sort_timestamp DESC, m.mutation_order DESC LIMIT ?{}",
4779        params.len() + 1
4780    );
4781    params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
4782        |_| EngineError::Bridge("operational read limit overflow".to_owned()),
4783    )?));
4784
4785    let mut stmt = conn.prepare(&sql)?;
4786    let mut rows = stmt
4787        .query_map(
4788            rusqlite::params_from_iter(params),
4789            map_operational_mutation_row,
4790        )?
4791        .collect::<Result<Vec<_>, _>>()?;
4792    let was_limited = rows.len() > applied_limit;
4793    if was_limited {
4794        rows.truncate(applied_limit);
4795    }
4796
4797    Ok(Some(OperationalReadReport {
4798        collection_name: collection_name.to_owned(),
4799        row_count: rows.len(),
4800        applied_limit,
4801        was_limited,
4802        rows,
4803    }))
4804}
4805
4806fn execute_operational_filtered_read(
4807    conn: &rusqlite::Connection,
4808    collection_name: &str,
4809    filters: &[CompiledOperationalReadFilter],
4810    applied_limit: usize,
4811) -> Result<OperationalReadReport, EngineError> {
4812    use rusqlite::types::Value;
4813
4814    let mut sql = String::from(
4815        "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
4816         FROM operational_mutations m ",
4817    );
4818    let mut params = vec![Value::from(collection_name.to_owned())];
4819    for (index, filter) in filters.iter().enumerate() {
4820        let _ = write!(
4821            sql,
4822            "JOIN operational_filter_values f{index} \
4823             ON f{index}.mutation_id = m.id \
4824            AND f{index}.collection_name = m.collection_name "
4825        );
4826        match &filter.condition {
4827            OperationalReadCondition::ExactString(value) => {
4828                let _ = write!(
4829                    sql,
4830                    "AND f{index}.field_name = ?{} AND f{index}.string_value = ?{} ",
4831                    params.len() + 1,
4832                    params.len() + 2
4833                );
4834                params.push(Value::from(filter.field.clone()));
4835                params.push(Value::from(value.clone()));
4836            }
4837            OperationalReadCondition::ExactInteger(value) => {
4838                let _ = write!(
4839                    sql,
4840                    "AND f{index}.field_name = ?{} AND f{index}.integer_value = ?{} ",
4841                    params.len() + 1,
4842                    params.len() + 2
4843                );
4844                params.push(Value::from(filter.field.clone()));
4845                params.push(Value::from(*value));
4846            }
4847            OperationalReadCondition::Prefix(value) => {
4848                let _ = write!(
4849                    sql,
4850                    "AND f{index}.field_name = ?{} AND f{index}.string_value GLOB ?{} ",
4851                    params.len() + 1,
4852                    params.len() + 2
4853                );
4854                params.push(Value::from(filter.field.clone()));
4855                params.push(Value::from(glob_prefix_pattern(value)));
4856            }
4857            OperationalReadCondition::Range { lower, upper } => {
4858                let _ = write!(sql, "AND f{index}.field_name = ?{} ", params.len() + 1);
4859                params.push(Value::from(filter.field.clone()));
4860                if let Some(lower) = lower {
4861                    let _ = write!(sql, "AND f{index}.integer_value >= ?{} ", params.len() + 1);
4862                    params.push(Value::from(*lower));
4863                }
4864                if let Some(upper) = upper {
4865                    let _ = write!(sql, "AND f{index}.integer_value <= ?{} ", params.len() + 1);
4866                    params.push(Value::from(*upper));
4867                }
4868            }
4869        }
4870    }
4871    let _ = write!(
4872        sql,
4873        "WHERE m.collection_name = ?1 ORDER BY m.mutation_order DESC LIMIT ?{}",
4874        params.len() + 1
4875    );
4876    params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
4877        |_| EngineError::Bridge("operational read limit overflow".to_owned()),
4878    )?));
4879
4880    let mut stmt = conn.prepare(&sql)?;
4881    let mut rows = stmt
4882        .query_map(
4883            rusqlite::params_from_iter(params),
4884            map_operational_mutation_row,
4885        )?
4886        .collect::<Result<Vec<_>, _>>()?;
4887    let was_limited = rows.len() > applied_limit;
4888    if was_limited {
4889        rows.truncate(applied_limit);
4890    }
4891    Ok(OperationalReadReport {
4892        collection_name: collection_name.to_owned(),
4893        row_count: rows.len(),
4894        applied_limit,
4895        was_limited,
4896        rows,
4897    })
4898}
4899
4900fn glob_prefix_pattern(value: &str) -> String {
4901    let mut pattern = String::with_capacity(value.len() + 1);
4902    for ch in value.chars() {
4903        match ch {
4904            '*' => pattern.push_str("[*]"),
4905            '?' => pattern.push_str("[?]"),
4906            '[' => pattern.push_str("[[]"),
4907            _ => pattern.push(ch),
4908        }
4909    }
4910    pattern.push('*');
4911    pattern
4912}
4913
4914#[derive(Clone, Debug, PartialEq, Eq)]
4915struct ExtractedOperationalFilterValue {
4916    field_name: String,
4917    string_value: Option<String>,
4918    integer_value: Option<i64>,
4919}
4920
4921fn extract_operational_filter_values(
4922    filter_fields: &[OperationalFilterField],
4923    payload_json: &str,
4924) -> Vec<ExtractedOperationalFilterValue> {
4925    let Ok(parsed) = serde_json::from_str::<serde_json::Value>(payload_json) else {
4926        return Vec::new();
4927    };
4928    let Some(object) = parsed.as_object() else {
4929        return Vec::new();
4930    };
4931
4932    filter_fields
4933        .iter()
4934        .filter_map(|field| {
4935            let value = object.get(&field.name)?;
4936            match field.field_type {
4937                OperationalFilterFieldType::String => {
4938                    value
4939                        .as_str()
4940                        .map(|string_value| ExtractedOperationalFilterValue {
4941                            field_name: field.name.clone(),
4942                            string_value: Some(string_value.to_owned()),
4943                            integer_value: None,
4944                        })
4945                }
4946                OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp => {
4947                    value
4948                        .as_i64()
4949                        .map(|integer_value| ExtractedOperationalFilterValue {
4950                            field_name: field.name.clone(),
4951                            string_value: None,
4952                            integer_value: Some(integer_value),
4953                        })
4954                }
4955            }
4956        })
4957        .collect()
4958}
4959
4960fn operational_compaction_candidates(
4961    conn: &rusqlite::Connection,
4962    retention_json: &str,
4963    collection_name: &str,
4964) -> Result<(Vec<String>, Option<i64>), EngineError> {
4965    operational_compaction_candidates_at(
4966        conn,
4967        retention_json,
4968        collection_name,
4969        current_unix_timestamp()?,
4970    )
4971}
4972
4973fn operational_compaction_candidates_at(
4974    conn: &rusqlite::Connection,
4975    retention_json: &str,
4976    collection_name: &str,
4977    now_timestamp: i64,
4978) -> Result<(Vec<String>, Option<i64>), EngineError> {
4979    let policy = parse_operational_retention_policy(retention_json)?;
4980    match policy {
4981        OperationalRetentionPolicy::KeepAll => Ok((Vec::new(), None)),
4982        OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
4983            let before_timestamp = now_timestamp - max_age_seconds;
4984            let mut stmt = conn.prepare(
4985                "SELECT id FROM operational_mutations \
4986                 WHERE collection_name = ?1 AND created_at < ?2 \
4987                 ORDER BY mutation_order",
4988            )?;
4989            let mutation_ids = stmt
4990                .query_map(
4991                    rusqlite::params![collection_name, before_timestamp],
4992                    |row| row.get::<_, String>(0),
4993                )?
4994                .collect::<Result<Vec<_>, _>>()?;
4995            Ok((mutation_ids, Some(before_timestamp)))
4996        }
4997        OperationalRetentionPolicy::KeepLast { max_rows } => {
4998            let mut stmt = conn.prepare(
4999                "SELECT id FROM operational_mutations \
5000                 WHERE collection_name = ?1 \
5001                 ORDER BY mutation_order DESC",
5002            )?;
5003            let ordered_ids = stmt
5004                .query_map([collection_name], |row| row.get::<_, String>(0))?
5005                .collect::<Result<Vec<_>, _>>()?;
5006            Ok((ordered_ids.into_iter().skip(max_rows).collect(), None))
5007        }
5008    }
5009}
5010
5011fn parse_operational_retention_policy(
5012    retention_json: &str,
5013) -> Result<OperationalRetentionPolicy, EngineError> {
5014    let policy: OperationalRetentionPolicy = serde_json::from_str(retention_json)
5015        .map_err(|error| EngineError::InvalidWrite(format!("invalid retention_json: {error}")))?;
5016    match policy {
5017        OperationalRetentionPolicy::KeepAll => Ok(policy),
5018        OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
5019            if max_age_seconds <= 0 {
5020                return Err(EngineError::InvalidWrite(
5021                    "retention_json max_age_seconds must be greater than zero".to_owned(),
5022                ));
5023            }
5024            Ok(policy)
5025        }
5026        OperationalRetentionPolicy::KeepLast { max_rows } => {
5027            if max_rows == 0 {
5028                return Err(EngineError::InvalidWrite(
5029                    "retention_json max_rows must be greater than zero".to_owned(),
5030                ));
5031            }
5032            Ok(policy)
5033        }
5034    }
5035}
5036
5037fn load_operational_retention_records(
5038    conn: &rusqlite::Connection,
5039    collection_names: Option<&[String]>,
5040    max_collections: Option<usize>,
5041) -> Result<Vec<OperationalCollectionRecord>, EngineError> {
5042    let limit = max_collections.unwrap_or(usize::MAX);
5043    if limit == 0 {
5044        return Err(EngineError::InvalidWrite(
5045            "max_collections must be greater than zero".to_owned(),
5046        ));
5047    }
5048
5049    let mut records = Vec::new();
5050    if let Some(collection_names) = collection_names {
5051        for name in collection_names.iter().take(limit) {
5052            let record = load_operational_collection_record(conn, name)?.ok_or_else(|| {
5053                EngineError::InvalidWrite(format!(
5054                    "operational collection '{name}' is not registered"
5055                ))
5056            })?;
5057            records.push(record);
5058        }
5059        return Ok(records);
5060    }
5061
5062    let mut stmt = conn.prepare(
5063        "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
5064         FROM operational_collections ORDER BY name",
5065    )?;
5066    let rows = stmt
5067        .query_map([], map_operational_collection_row)?
5068        .take(limit)
5069        .collect::<Result<Vec<_>, _>>()?;
5070    Ok(rows)
5071}
5072
5073fn last_operational_retention_run_at(
5074    conn: &rusqlite::Connection,
5075    collection_name: &str,
5076) -> Result<Option<i64>, EngineError> {
5077    conn.query_row(
5078        "SELECT MAX(executed_at) FROM operational_retention_runs WHERE collection_name = ?1",
5079        [collection_name],
5080        |row| row.get(0),
5081    )
5082    .optional()
5083    .map_err(EngineError::Sqlite)
5084    .map(Option::flatten)
5085}
5086
5087fn count_operational_mutations_for_collection(
5088    conn: &rusqlite::Connection,
5089    collection_name: &str,
5090) -> Result<usize, EngineError> {
5091    let count: i64 = conn.query_row(
5092        "SELECT count(*) FROM operational_mutations WHERE collection_name = ?1",
5093        [collection_name],
5094        |row| row.get(0),
5095    )?;
5096    usize::try_from(count).map_err(|_| {
5097        EngineError::Bridge(format!("count overflow for collection {collection_name}"))
5098    })
5099}
5100
5101fn retention_action_kind_and_limit(
5102    policy: &OperationalRetentionPolicy,
5103) -> (OperationalRetentionActionKind, Option<usize>) {
5104    match policy {
5105        OperationalRetentionPolicy::KeepAll => (OperationalRetentionActionKind::Noop, None),
5106        OperationalRetentionPolicy::PurgeBeforeSeconds { .. } => {
5107            (OperationalRetentionActionKind::PurgeBeforeSeconds, None)
5108        }
5109        OperationalRetentionPolicy::KeepLast { max_rows } => {
5110            (OperationalRetentionActionKind::KeepLast, Some(*max_rows))
5111        }
5112    }
5113}
5114
5115fn plan_operational_retention_item(
5116    conn: &rusqlite::Connection,
5117    record: &OperationalCollectionRecord,
5118    now_timestamp: i64,
5119) -> Result<OperationalRetentionPlanItem, EngineError> {
5120    let last_run_at = last_operational_retention_run_at(conn, &record.name)?;
5121    if record.kind != OperationalCollectionKind::AppendOnlyLog {
5122        return Ok(OperationalRetentionPlanItem {
5123            collection_name: record.name.clone(),
5124            action_kind: OperationalRetentionActionKind::Noop,
5125            candidate_deletions: 0,
5126            before_timestamp: None,
5127            max_rows: None,
5128            last_run_at,
5129        });
5130    }
5131    let policy = parse_operational_retention_policy(&record.retention_json)?;
5132    let (action_kind, max_rows) = retention_action_kind_and_limit(&policy);
5133    let (candidate_ids, before_timestamp) = operational_compaction_candidates_at(
5134        conn,
5135        &record.retention_json,
5136        &record.name,
5137        now_timestamp,
5138    )?;
5139    Ok(OperationalRetentionPlanItem {
5140        collection_name: record.name.clone(),
5141        action_kind,
5142        candidate_deletions: candidate_ids.len(),
5143        before_timestamp,
5144        max_rows,
5145        last_run_at,
5146    })
5147}
5148
5149fn run_operational_retention_item(
5150    tx: &rusqlite::Transaction<'_>,
5151    record: &OperationalCollectionRecord,
5152    now_timestamp: i64,
5153    dry_run: bool,
5154) -> Result<OperationalRetentionRunItem, EngineError> {
5155    let plan = plan_operational_retention_item(tx, record, now_timestamp)?;
5156    let mut deleted_mutations = 0usize;
5157    if record.kind == OperationalCollectionKind::AppendOnlyLog
5158        && plan.action_kind != OperationalRetentionActionKind::Noop
5159        && plan.candidate_deletions > 0
5160        && !dry_run
5161    {
5162        let (candidate_ids, _) = operational_compaction_candidates_at(
5163            tx,
5164            &record.retention_json,
5165            &record.name,
5166            now_timestamp,
5167        )?;
5168        let mut delete_stmt =
5169            tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
5170        for mutation_id in &candidate_ids {
5171            delete_stmt.execute([mutation_id.as_str()])?;
5172            deleted_mutations += 1;
5173        }
5174        drop(delete_stmt);
5175
5176        persist_simple_provenance_event(
5177            tx,
5178            "operational_retention_run",
5179            &record.name,
5180            Some(serde_json::json!({
5181                "action_kind": plan.action_kind,
5182                "deleted_mutations": deleted_mutations,
5183                "before_timestamp": plan.before_timestamp,
5184                "max_rows": plan.max_rows,
5185                "executed_at": now_timestamp,
5186            })),
5187        )?;
5188    }
5189
5190    let live_rows_remaining = count_operational_mutations_for_collection(tx, &record.name)?;
5191    let effective_deleted_mutations = if dry_run {
5192        plan.candidate_deletions
5193    } else {
5194        deleted_mutations
5195    };
5196    let rows_remaining = if dry_run {
5197        live_rows_remaining.saturating_sub(effective_deleted_mutations)
5198    } else {
5199        live_rows_remaining
5200    };
5201    if !dry_run && plan.action_kind != OperationalRetentionActionKind::Noop {
5202        tx.execute(
5203            "INSERT INTO operational_retention_runs \
5204             (id, collection_name, executed_at, action_kind, dry_run, deleted_mutations, rows_remaining, metadata_json) \
5205             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
5206            rusqlite::params![
5207                new_id(),
5208                record.name,
5209                now_timestamp,
5210                serde_json::to_string(&plan.action_kind)
5211                    .unwrap_or_else(|_| "\"noop\"".to_owned())
5212                    .trim_matches('"')
5213                    .to_owned(),
5214                i32::from(dry_run),
5215                deleted_mutations,
5216                rows_remaining,
5217                serde_json::json!({
5218                    "before_timestamp": plan.before_timestamp,
5219                    "max_rows": plan.max_rows,
5220                })
5221                .to_string(),
5222            ],
5223        )?;
5224    }
5225
5226    Ok(OperationalRetentionRunItem {
5227        collection_name: plan.collection_name,
5228        action_kind: plan.action_kind,
5229        deleted_mutations: effective_deleted_mutations,
5230        before_timestamp: plan.before_timestamp,
5231        max_rows: plan.max_rows,
5232        rows_remaining,
5233    })
5234}
5235
5236fn current_unix_timestamp() -> Result<i64, EngineError> {
5237    let now = SystemTime::now()
5238        .duration_since(SystemTime::UNIX_EPOCH)
5239        .map_err(|error| EngineError::Bridge(format!("system clock error: {error}")))?;
5240    i64::try_from(now.as_secs())
5241        .map_err(|_| EngineError::Bridge("unix timestamp overflow".to_owned()))
5242}
5243
5244fn map_operational_collection_row(
5245    row: &rusqlite::Row<'_>,
5246) -> Result<OperationalCollectionRecord, rusqlite::Error> {
5247    let kind_text: String = row.get(1)?;
5248    let kind = OperationalCollectionKind::try_from(kind_text.as_str()).map_err(|message| {
5249        rusqlite::Error::FromSqlConversionFailure(
5250            1,
5251            rusqlite::types::Type::Text,
5252            Box::new(io::Error::new(io::ErrorKind::InvalidData, message)),
5253        )
5254    })?;
5255    Ok(OperationalCollectionRecord {
5256        name: row.get(0)?,
5257        kind,
5258        schema_json: row.get(2)?,
5259        retention_json: row.get(3)?,
5260        filter_fields_json: row.get(4)?,
5261        validation_json: row.get(5)?,
5262        secondary_indexes_json: row.get(6)?,
5263        format_version: row.get(7)?,
5264        created_at: row.get(8)?,
5265        disabled_at: row.get(9)?,
5266    })
5267}
5268
5269fn map_operational_mutation_row(
5270    row: &rusqlite::Row<'_>,
5271) -> Result<OperationalMutationRow, rusqlite::Error> {
5272    Ok(OperationalMutationRow {
5273        id: row.get(0)?,
5274        collection_name: row.get(1)?,
5275        record_key: row.get(2)?,
5276        op_kind: row.get(3)?,
5277        payload_json: row.get(4)?,
5278        source_ref: row.get(5)?,
5279        created_at: row.get(6)?,
5280    })
5281}
5282
5283fn map_operational_current_row(
5284    row: &rusqlite::Row<'_>,
5285) -> Result<OperationalCurrentRow, rusqlite::Error> {
5286    Ok(OperationalCurrentRow {
5287        collection_name: row.get(0)?,
5288        record_key: row.get(1)?,
5289        payload_json: row.get(2)?,
5290        updated_at: row.get(3)?,
5291        last_mutation_id: row.get(4)?,
5292    })
5293}
5294
5295#[cfg(test)]
5296#[allow(clippy::expect_used)]
5297mod tests {
5298    use std::fs;
5299    use std::sync::Arc;
5300
5301    use fathomdb_schema::SchemaManager;
5302    use tempfile::NamedTempFile;
5303
5304    use super::{
5305        AdminService, FtsPropertyPathMode, FtsPropertyPathSpec, SafeExportOptions,
5306        VectorRegenerationConfig,
5307    };
5308    use crate::embedder::{EmbedderError, QueryEmbedder, QueryEmbedderIdentity};
5309    use crate::projection::ProjectionTarget;
5310    use crate::sqlite;
5311    use crate::{EngineError, OperationalCollectionKind, OperationalRegisterRequest};
5312
5313    #[cfg(feature = "sqlite-vec")]
5314    use crate::{ExecutionCoordinator, TelemetryCounters};
5315
5316    #[cfg(feature = "sqlite-vec")]
5317    use fathomdb_query::QueryBuilder;
5318
5319    #[cfg(feature = "sqlite-vec")]
5320    use super::load_vector_regeneration_config;
5321
5322    /// In-process embedder used by the regeneration test suite. The
5323    /// vector is parameterized so individual tests can distinguish which
5324    /// embedder produced which profile row.
5325    #[derive(Debug)]
5326    #[allow(dead_code)]
5327    struct TestEmbedder {
5328        identity: QueryEmbedderIdentity,
5329        vector: Vec<f32>,
5330    }
5331
5332    #[allow(dead_code)]
5333    impl TestEmbedder {
5334        fn new(model: &str, dimension: usize) -> Self {
5335            Self {
5336                identity: QueryEmbedderIdentity {
5337                    model_identity: model.to_owned(),
5338                    model_version: "1.0.0".to_owned(),
5339                    dimension,
5340                    normalization_policy: "l2".to_owned(),
5341                },
5342                vector: vec![1.0; dimension],
5343            }
5344        }
5345    }
5346
5347    impl QueryEmbedder for TestEmbedder {
5348        fn embed_query(&self, _text: &str) -> Result<Vec<f32>, EmbedderError> {
5349            Ok(self.vector.clone())
5350        }
5351        fn identity(&self) -> QueryEmbedderIdentity {
5352            self.identity.clone()
5353        }
5354    }
5355
5356    /// Embedder that always fails — used to exercise the post-request
5357    /// failure audit path without the complexity of subprocess machinery.
5358    #[derive(Debug)]
5359    #[allow(dead_code)]
5360    struct FailingEmbedder {
5361        identity: QueryEmbedderIdentity,
5362    }
5363
5364    impl QueryEmbedder for FailingEmbedder {
5365        fn embed_query(&self, _text: &str) -> Result<Vec<f32>, EmbedderError> {
5366            Err(EmbedderError::Failed("test failure".to_owned()))
5367        }
5368        fn identity(&self) -> QueryEmbedderIdentity {
5369            self.identity.clone()
5370        }
5371    }
5372
5373    #[allow(dead_code)]
5374    #[cfg(unix)]
5375    fn set_file_mode(path: &std::path::Path, mode: u32) {
5376        use std::os::unix::fs::PermissionsExt;
5377
5378        let mut permissions = fs::metadata(path).expect("script metadata").permissions();
5379        permissions.set_mode(mode);
5380        fs::set_permissions(path, permissions).expect("chmod");
5381    }
5382
5383    #[allow(dead_code)]
5384    #[cfg(not(unix))]
5385    fn set_file_mode(_path: &std::path::Path, _mode: u32) {}
5386
5387    fn setup() -> (NamedTempFile, AdminService) {
5388        let db = NamedTempFile::new().expect("temp file");
5389        let schema = Arc::new(SchemaManager::new());
5390        {
5391            let conn = sqlite::open_connection(db.path()).expect("connection");
5392            schema.bootstrap(&conn).expect("bootstrap");
5393        }
5394        let service = AdminService::new(db.path(), Arc::clone(&schema));
5395        (db, service)
5396    }
5397
5398    #[test]
5399    fn check_integrity_includes_active_uniqueness_count() {
5400        let (_db, service) = setup();
5401        let report = service.check_integrity().expect("integrity check");
5402        assert_eq!(report.duplicate_active_logical_ids, 0);
5403        assert_eq!(report.operational_missing_collections, 0);
5404        assert_eq!(report.operational_missing_last_mutations, 0);
5405    }
5406
5407    #[test]
5408    fn trace_source_returns_node_logical_ids() {
5409        let (db, service) = setup();
5410        {
5411            let conn = sqlite::open_connection(db.path()).expect("conn");
5412            conn.execute(
5413                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5414                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 'source-1')",
5415                [],
5416            )
5417            .expect("insert node");
5418        }
5419        let report = service.trace_source("source-1").expect("trace");
5420        assert_eq!(report.node_rows, 1);
5421        assert_eq!(report.node_logical_ids, vec!["lg1"]);
5422    }
5423
5424    #[test]
5425    fn trace_source_includes_operational_mutations() {
5426        let (db, service) = setup();
5427        {
5428            let conn = sqlite::open_connection(db.path()).expect("conn");
5429            conn.execute(
5430                "INSERT INTO operational_collections \
5431                 (name, kind, schema_json, retention_json, format_version, created_at) \
5432                 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
5433                [],
5434            )
5435            .expect("insert collection");
5436            conn.execute(
5437                "INSERT INTO operational_mutations \
5438                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5439                 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"ok\"}', 'source-1', 100, 1)",
5440                [],
5441            )
5442            .expect("insert mutation");
5443        }
5444
5445        let report = service.trace_source("source-1").expect("trace");
5446        assert_eq!(report.operational_mutation_rows, 1);
5447        assert_eq!(report.operational_mutation_ids, vec!["m1"]);
5448    }
5449
5450    #[test]
5451    fn excise_source_restores_prior_active_node() {
5452        let (db, service) = setup();
5453        {
5454            let conn = sqlite::open_connection(db.path()).expect("conn");
5455            conn.execute(
5456                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5457                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
5458                [],
5459            )
5460            .expect("insert v1 superseded");
5461            conn.execute(
5462                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5463                 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
5464                [],
5465            )
5466            .expect("insert v2 active");
5467        }
5468        service.excise_source("source-2").expect("excise");
5469        {
5470            let conn = sqlite::open_connection(db.path()).expect("conn");
5471            let active_row_id: String = conn
5472                .query_row(
5473                    "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
5474                    [],
5475                    |row| row.get(0),
5476                )
5477                .expect("active row exists after excise");
5478            assert_eq!(active_row_id, "r1");
5479        }
5480    }
5481
5482    #[test]
5483    fn excise_source_deletes_operational_mutations_and_repairs_latest_state_current() {
5484        let (db, service) = setup();
5485        {
5486            let conn = sqlite::open_connection(db.path()).expect("conn");
5487            conn.execute(
5488                "INSERT INTO operational_collections \
5489                 (name, kind, schema_json, retention_json, format_version, created_at) \
5490                 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
5491                [],
5492            )
5493            .expect("insert collection");
5494            conn.execute(
5495                "INSERT INTO operational_mutations \
5496                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5497                 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'source-1', 100, 1)",
5498                [],
5499            )
5500            .expect("insert prior mutation");
5501            conn.execute(
5502                "INSERT INTO operational_mutations \
5503                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5504                 VALUES ('m2', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'source-2', 200, 2)",
5505                [],
5506            )
5507            .expect("insert excised mutation");
5508            conn.execute(
5509                "INSERT INTO operational_current \
5510                 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
5511                 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 200, 'm2')",
5512                [],
5513            )
5514            .expect("insert current row");
5515        }
5516
5517        let traced = service
5518            .trace_source("source-2")
5519            .expect("trace before excise");
5520        assert_eq!(traced.operational_mutation_rows, 1);
5521        assert_eq!(traced.operational_mutation_ids, vec!["m2"]);
5522
5523        let excised = service.excise_source("source-2").expect("excise");
5524        assert_eq!(excised.operational_mutation_rows, 0);
5525        assert!(excised.operational_mutation_ids.is_empty());
5526
5527        {
5528            let conn = sqlite::open_connection(db.path()).expect("conn");
5529            let remaining: i64 = conn
5530                .query_row(
5531                    "SELECT count(*) FROM operational_mutations WHERE source_ref = 'source-2'",
5532                    [],
5533                    |row| row.get(0),
5534                )
5535                .expect("remaining count");
5536            assert_eq!(remaining, 0);
5537
5538            let current: (String, String) = conn
5539                .query_row(
5540                    "SELECT payload_json, last_mutation_id FROM operational_current \
5541                     WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5542                    [],
5543                    |row| Ok((row.get(0)?, row.get(1)?)),
5544                )
5545                .expect("rebuilt current row");
5546            assert_eq!(current.0, "{\"status\":\"old\"}");
5547            assert_eq!(current.1, "m1");
5548        }
5549    }
5550
5551    #[test]
5552    fn restore_logical_id_reestablishes_last_pre_retire_content_and_attached_edges() {
5553        let (db, service) = setup();
5554        {
5555            let conn = sqlite::open_connection(db.path()).expect("conn");
5556            conn.execute(
5557                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5558                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5559                [],
5560            )
5561            .expect("insert node");
5562            conn.execute(
5563                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5564                 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
5565                [],
5566            )
5567            .expect("insert target node");
5568            conn.execute(
5569                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5570                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5571                [],
5572            )
5573            .expect("insert chunk");
5574            conn.execute(
5575                "INSERT INTO edges \
5576                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
5577                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
5578                [],
5579            )
5580            .expect("insert edge");
5581            conn.execute(
5582                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5583                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5584                [],
5585            )
5586            .expect("insert node retire event");
5587            conn.execute(
5588                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5589                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
5590                [],
5591            )
5592            .expect("insert edge retire event");
5593            conn.execute(
5594                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5595                [],
5596            )
5597            .expect("retire node");
5598            conn.execute(
5599                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
5600                [],
5601            )
5602            .expect("retire edge");
5603            conn.execute("DELETE FROM fts_nodes", [])
5604                .expect("clear fts");
5605        }
5606
5607        let report = service.restore_logical_id("doc-1").expect("restore");
5608        assert_eq!(report.logical_id, "doc-1");
5609        assert!(!report.was_noop);
5610        assert_eq!(report.restored_node_rows, 1);
5611        assert_eq!(report.restored_edge_rows, 1);
5612        assert_eq!(report.restored_chunk_rows, 1);
5613        assert_eq!(report.restored_fts_rows, 1);
5614
5615        let conn = sqlite::open_connection(db.path()).expect("conn");
5616        let active_node_count: i64 = conn
5617            .query_row(
5618                "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
5619                [],
5620                |row| row.get(0),
5621            )
5622            .expect("active node count");
5623        assert_eq!(active_node_count, 1);
5624        let active_edge_count: i64 = conn
5625            .query_row(
5626                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
5627                [],
5628                |row| row.get(0),
5629            )
5630            .expect("active edge count");
5631        assert_eq!(active_edge_count, 1);
5632        let fts_count: i64 = conn
5633            .query_row(
5634                "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'chunk-1'",
5635                [],
5636                |row| row.get(0),
5637            )
5638            .expect("fts count");
5639        assert_eq!(fts_count, 1);
5640    }
5641
5642    #[test]
5643    fn restore_logical_id_restores_edges_retired_after_the_node_retire_event() {
5644        let (db, service) = setup();
5645        {
5646            let conn = sqlite::open_connection(db.path()).expect("conn");
5647            conn.execute(
5648                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5649                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5650                [],
5651            )
5652            .expect("insert node");
5653            conn.execute(
5654                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5655                 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
5656                [],
5657            )
5658            .expect("insert target node");
5659            conn.execute(
5660                "INSERT INTO edges \
5661                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
5662                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
5663                [],
5664            )
5665            .expect("insert edge");
5666            conn.execute(
5667                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5668                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5669                [],
5670            )
5671            .expect("insert node retire event");
5672            conn.execute(
5673                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5674                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 201, '')",
5675                [],
5676            )
5677            .expect("insert edge retire event");
5678            conn.execute(
5679                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5680                [],
5681            )
5682            .expect("retire node");
5683            conn.execute(
5684                "UPDATE edges SET superseded_at = 201 WHERE logical_id = 'edge-1'",
5685                [],
5686            )
5687            .expect("retire edge");
5688        }
5689
5690        let report = service.restore_logical_id("doc-1").expect("restore");
5691        assert_eq!(report.restored_edge_rows, 1);
5692
5693        let conn = sqlite::open_connection(db.path()).expect("conn");
5694        let active_edge_count: i64 = conn
5695            .query_row(
5696                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
5697                [],
5698                |row| row.get(0),
5699            )
5700            .expect("active edge count");
5701        assert_eq!(active_edge_count, 1);
5702    }
5703
5704    #[test]
5705    fn restore_logical_id_prefers_latest_retired_revision_when_timestamps_tie() {
5706        let (db, service) = setup();
5707        {
5708            let conn = sqlite::open_connection(db.path()).expect("conn");
5709            conn.execute(
5710                "INSERT INTO nodes \
5711                 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5712                 VALUES ('node-row-older', 'doc-1', 'Document', '{\"title\":\"older\"}', 100, 200, 'forget-1')",
5713                [],
5714            )
5715            .expect("insert older retired node");
5716            conn.execute(
5717                "INSERT INTO nodes \
5718                 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5719                 VALUES ('node-row-newer', 'doc-1', 'Document', '{\"title\":\"newer\"}', 100, 200, 'forget-1')",
5720                [],
5721            )
5722            .expect("insert newer retired node");
5723            conn.execute(
5724                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5725                 VALUES ('evt-retire-older', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5726                [],
5727            )
5728            .expect("insert older retire event");
5729            conn.execute(
5730                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5731                 VALUES ('evt-retire-newer', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5732                [],
5733            )
5734            .expect("insert newer retire event");
5735        }
5736
5737        let report = service.restore_logical_id("doc-1").expect("restore");
5738
5739        assert!(!report.was_noop);
5740        let conn = sqlite::open_connection(db.path()).expect("conn");
5741        let active_row: (String, String) = conn
5742            .query_row(
5743                "SELECT row_id, properties FROM nodes \
5744                 WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
5745                [],
5746                |row| Ok((row.get(0)?, row.get(1)?)),
5747            )
5748            .expect("restored active row");
5749        assert_eq!(active_row.0, "node-row-newer");
5750        assert_eq!(active_row.1, "{\"title\":\"newer\"}");
5751    }
5752
5753    #[test]
5754    fn purge_logical_id_removes_retired_content_and_records_tombstone() {
5755        let (db, service) = setup();
5756        {
5757            let conn = sqlite::open_connection(db.path()).expect("conn");
5758            conn.execute(
5759                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5760                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5761                [],
5762            )
5763            .expect("insert retired node");
5764            conn.execute(
5765                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5766                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5767                [],
5768            )
5769            .expect("insert chunk");
5770            conn.execute(
5771                "INSERT INTO edges \
5772                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, superseded_at, source_ref) \
5773                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 200, 'seed')",
5774                [],
5775            )
5776            .expect("insert retired edge");
5777            conn.execute(
5778                "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
5779                 VALUES ('chunk-1', 'doc-1', 'Document', 'budget narrative')",
5780                [],
5781            )
5782            .expect("insert fts");
5783        }
5784
5785        let report = service.purge_logical_id("doc-1").expect("purge");
5786        assert_eq!(report.logical_id, "doc-1");
5787        assert!(!report.was_noop);
5788        assert_eq!(report.deleted_node_rows, 1);
5789        assert_eq!(report.deleted_edge_rows, 1);
5790        assert_eq!(report.deleted_chunk_rows, 1);
5791        assert_eq!(report.deleted_fts_rows, 1);
5792
5793        let conn = sqlite::open_connection(db.path()).expect("conn");
5794        let remaining_nodes: i64 = conn
5795            .query_row(
5796                "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1'",
5797                [],
5798                |row| row.get(0),
5799            )
5800            .expect("remaining nodes");
5801        assert_eq!(remaining_nodes, 0);
5802        let remaining_edges: i64 = conn
5803            .query_row(
5804                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1'",
5805                [],
5806                |row| row.get(0),
5807            )
5808            .expect("remaining edges");
5809        assert_eq!(remaining_edges, 0);
5810        let remaining_chunks: i64 = conn
5811            .query_row(
5812                "SELECT count(*) FROM chunks WHERE id = 'chunk-1'",
5813                [],
5814                |row| row.get(0),
5815            )
5816            .expect("remaining chunks");
5817        assert_eq!(remaining_chunks, 0);
5818        let purge_events: i64 = conn
5819            .query_row(
5820                "SELECT count(*) FROM provenance_events WHERE event_type = 'purge_logical_id' AND subject = 'doc-1'",
5821                [],
5822                |row| row.get(0),
5823            )
5824            .expect("purge events");
5825        assert_eq!(purge_events, 1);
5826    }
5827
5828    #[test]
5829    fn check_semantics_accepts_preserved_retired_chunks() {
5830        let (db, service) = setup();
5831        {
5832            let conn = sqlite::open_connection(db.path()).expect("conn");
5833            conn.execute(
5834                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5835                 VALUES ('node-row-1', 'doc-1', 'Document', '{}', 100, 200, 'seed')",
5836                [],
5837            )
5838            .expect("insert retired node");
5839            conn.execute(
5840                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5841                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5842                [],
5843            )
5844            .expect("insert chunk");
5845        }
5846
5847        let report = service.check_semantics().expect("semantics");
5848        assert_eq!(report.orphaned_chunks, 0);
5849    }
5850
5851    #[test]
5852    fn check_semantics_detects_missing_retired_node_history_for_preserved_chunks() {
5853        let (db, service) = setup();
5854        {
5855            let conn = sqlite::open_connection(db.path()).expect("conn");
5856            conn.execute(
5857                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5858                 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
5859                [],
5860            )
5861            .expect("insert orphaned chunk");
5862        }
5863
5864        let report = service.check_semantics().expect("semantics");
5865        assert_eq!(report.orphaned_chunks, 1);
5866    }
5867
5868    #[cfg(feature = "sqlite-vec")]
5869    #[test]
5870    fn check_semantics_detects_missing_retired_node_history_for_preserved_vec_rows() {
5871        let (db, service) = setup();
5872        {
5873            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5874            service
5875                .schema_manager
5876                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5877                .expect("ensure vec profile");
5878            conn.execute(
5879                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5880                 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
5881                [],
5882            )
5883            .expect("insert orphaned chunk");
5884            conn.execute(
5885                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5886                [],
5887            )
5888            .expect("insert vec row");
5889        }
5890
5891        let report = service.check_semantics().expect("semantics");
5892        assert_eq!(report.orphaned_chunks, 1);
5893        assert_eq!(report.vec_rows_for_superseded_nodes, 1);
5894    }
5895
5896    #[cfg(feature = "sqlite-vec")]
5897    #[test]
5898    fn restore_logical_id_reestablishes_vector_search_without_reingest() {
5899        let (db, service) = setup();
5900        {
5901            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5902            service
5903                .schema_manager
5904                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5905                .expect("ensure vec profile");
5906            conn.execute(
5907                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5908                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5909                [],
5910            )
5911            .expect("insert retired node");
5912            conn.execute(
5913                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5914                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5915                [],
5916            )
5917            .expect("insert chunk");
5918            conn.execute(
5919                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5920                [],
5921            )
5922            .expect("insert vec row");
5923            conn.execute(
5924                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5925                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5926                [],
5927            )
5928            .expect("insert retire event");
5929        }
5930
5931        let report = service.restore_logical_id("doc-1").expect("restore");
5932        assert_eq!(report.restored_vec_rows, 1);
5933
5934        let coordinator = ExecutionCoordinator::open(
5935            db.path(),
5936            Arc::new(SchemaManager::new()),
5937            Some(4),
5938            1,
5939            Arc::new(TelemetryCounters::default()),
5940            None,
5941        )
5942        .expect("coordinator");
5943        let compiled = QueryBuilder::nodes("Document")
5944            .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
5945            .compile()
5946            .expect("compile");
5947        let rows = coordinator
5948            .execute_compiled_read(&compiled)
5949            .expect("vector read");
5950        assert!(
5951            rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
5952            "restore should make the preserved vec row visible again without re-ingest"
5953        );
5954    }
5955
5956    #[cfg(feature = "sqlite-vec")]
5957    #[test]
5958    fn purge_logical_id_deletes_vec_rows_for_retired_content() {
5959        let (db, service) = setup();
5960        {
5961            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5962            service
5963                .schema_manager
5964                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5965                .expect("ensure vec profile");
5966            conn.execute(
5967                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5968                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5969                [],
5970            )
5971            .expect("insert retired node");
5972            conn.execute(
5973                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5974                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5975                [],
5976            )
5977            .expect("insert chunk");
5978            conn.execute(
5979                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5980                [],
5981            )
5982            .expect("insert vec row");
5983        }
5984
5985        let report = service.purge_logical_id("doc-1").expect("purge");
5986        assert_eq!(report.deleted_vec_rows, 1);
5987
5988        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5989        let vec_count: i64 = conn
5990            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
5991                row.get(0)
5992            })
5993            .expect("vec count");
5994        assert_eq!(vec_count, 0);
5995    }
5996
5997    #[cfg(feature = "sqlite-vec")]
5998    #[test]
5999    fn restore_logical_id_restores_visibility_of_regenerated_vectors() {
6000        let (db, service) = setup();
6001
6002        {
6003            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6004            service
6005                .schema_manager
6006                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
6007                .expect("ensure vec profile");
6008            conn.execute(
6009                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
6010                 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
6011                [],
6012            )
6013            .expect("insert node");
6014            conn.execute(
6015                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6016                 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
6017                [],
6018            )
6019            .expect("insert chunk");
6020        }
6021
6022        let embedder = TestEmbedder::new("test-model", 4);
6023        service
6024            .regenerate_vector_embeddings(
6025                &embedder,
6026                &VectorRegenerationConfig {
6027                    profile: "default".to_owned(),
6028                    table_name: "vec_nodes_active".to_owned(),
6029                    chunking_policy: "per_chunk".to_owned(),
6030                    preprocessing_policy: "trim".to_owned(),
6031                },
6032            )
6033            .expect("regenerate");
6034
6035        {
6036            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6037            conn.execute(
6038                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
6039                 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
6040                [],
6041            )
6042            .expect("insert retire event");
6043            conn.execute(
6044                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
6045                [],
6046            )
6047            .expect("retire node");
6048        }
6049
6050        let report = service.restore_logical_id("doc-1").expect("restore");
6051        assert_eq!(report.restored_vec_rows, 1);
6052
6053        let coordinator = ExecutionCoordinator::open(
6054            db.path(),
6055            Arc::new(SchemaManager::new()),
6056            Some(4),
6057            1,
6058            Arc::new(TelemetryCounters::default()),
6059            None,
6060        )
6061        .expect("coordinator");
6062        let compiled = QueryBuilder::nodes("Document")
6063            .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
6064            .compile()
6065            .expect("compile");
6066        let rows = coordinator
6067            .execute_compiled_read(&compiled)
6068            .expect("vector read");
6069        assert!(
6070            rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
6071            "restored logical_id should become visible through regenerated vectors"
6072        );
6073    }
6074
6075    #[test]
6076    fn check_semantics_clean_db_returns_zeros() {
6077        let (_db, service) = setup();
6078        let report = service.check_semantics().expect("semantics check");
6079        assert_eq!(report.orphaned_chunks, 0);
6080        assert_eq!(report.null_source_ref_nodes, 0);
6081        assert_eq!(report.broken_step_fk, 0);
6082        assert_eq!(report.broken_action_fk, 0);
6083        assert_eq!(report.stale_fts_rows, 0);
6084        assert_eq!(report.fts_rows_for_superseded_nodes, 0);
6085        assert_eq!(report.dangling_edges, 0);
6086        assert_eq!(report.orphaned_supersession_chains, 0);
6087        assert_eq!(report.stale_vec_rows, 0);
6088        assert_eq!(report.vec_rows_for_superseded_nodes, 0);
6089        assert_eq!(report.missing_operational_current_rows, 0);
6090        assert_eq!(report.stale_operational_current_rows, 0);
6091        assert_eq!(report.disabled_collection_mutations, 0);
6092        assert_eq!(report.mismatched_kind_property_fts_rows, 0);
6093        assert_eq!(report.duplicate_property_fts_rows, 0);
6094        assert_eq!(report.drifted_property_fts_rows, 0);
6095        assert!(report.warnings.is_empty());
6096    }
6097
6098    #[test]
6099    fn register_operational_collection_persists_and_emits_provenance() {
6100        let (db, service) = setup();
6101        let record = service
6102            .register_operational_collection(&OperationalRegisterRequest {
6103                name: "connector_health".to_owned(),
6104                kind: OperationalCollectionKind::LatestState,
6105                schema_json: "{}".to_owned(),
6106                retention_json: "{}".to_owned(),
6107                filter_fields_json: "[]".to_owned(),
6108                validation_json: String::new(),
6109                secondary_indexes_json: "[]".to_owned(),
6110                format_version: 1,
6111            })
6112            .expect("register collection");
6113
6114        assert_eq!(record.name, "connector_health");
6115        assert_eq!(record.kind, OperationalCollectionKind::LatestState);
6116        assert_eq!(record.schema_json, "{}");
6117        assert_eq!(record.retention_json, "{}");
6118        assert_eq!(record.filter_fields_json, "[]");
6119        assert!(record.created_at > 0);
6120        assert_eq!(record.disabled_at, None);
6121
6122        let described = service
6123            .describe_operational_collection("connector_health")
6124            .expect("describe collection")
6125            .expect("collection exists");
6126        assert_eq!(described, record);
6127
6128        let conn = sqlite::open_connection(db.path()).expect("conn");
6129        let provenance_count: i64 = conn
6130            .query_row(
6131                "SELECT count(*) FROM provenance_events \
6132                 WHERE event_type = 'operational_collection_registered' AND subject = 'connector_health'",
6133                [],
6134                |row| row.get(0),
6135            )
6136            .expect("provenance count");
6137        assert_eq!(provenance_count, 1);
6138    }
6139
6140    #[test]
6141    fn register_and_update_operational_collection_validation_round_trip() {
6142        let (db, service) = setup();
6143        let record = service
6144            .register_operational_collection(&OperationalRegisterRequest {
6145                name: "connector_health".to_owned(),
6146                kind: OperationalCollectionKind::LatestState,
6147                schema_json: "{}".to_owned(),
6148                retention_json: "{}".to_owned(),
6149                filter_fields_json: "[]".to_owned(),
6150                validation_json: String::new(),
6151                secondary_indexes_json: "[]".to_owned(),
6152                format_version: 1,
6153            })
6154            .expect("register collection");
6155        assert_eq!(record.validation_json, "");
6156
6157        let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
6158        let updated = service
6159            .update_operational_collection_validation("connector_health", validation_json)
6160            .expect("update validation");
6161        assert_eq!(updated.validation_json, validation_json);
6162
6163        let described = service
6164            .describe_operational_collection("connector_health")
6165            .expect("describe collection")
6166            .expect("collection exists");
6167        assert_eq!(described.validation_json, validation_json);
6168
6169        let conn = sqlite::open_connection(db.path()).expect("conn");
6170        let provenance_count: i64 = conn
6171            .query_row(
6172                "SELECT count(*) FROM provenance_events \
6173                 WHERE event_type = 'operational_collection_validation_updated' \
6174                   AND subject = 'connector_health'",
6175                [],
6176                |row| row.get(0),
6177            )
6178            .expect("provenance count");
6179        assert_eq!(provenance_count, 1);
6180    }
6181
6182    #[test]
6183    fn register_update_and_rebuild_operational_secondary_indexes_round_trip() {
6184        let (db, service) = setup();
6185        let record = service
6186            .register_operational_collection(&OperationalRegisterRequest {
6187                name: "audit_log".to_owned(),
6188                kind: OperationalCollectionKind::AppendOnlyLog,
6189                schema_json: "{}".to_owned(),
6190                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6191                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
6192                validation_json: String::new(),
6193                secondary_indexes_json: "[]".to_owned(),
6194                format_version: 1,
6195            })
6196            .expect("register collection");
6197        assert_eq!(record.secondary_indexes_json, "[]");
6198
6199        {
6200            let writer = crate::WriterActor::start(
6201                db.path(),
6202                Arc::new(SchemaManager::new()),
6203                crate::ProvenanceMode::Warn,
6204                Arc::new(crate::TelemetryCounters::default()),
6205            )
6206            .expect("writer");
6207            writer
6208                .submit(crate::WriteRequest {
6209                    label: "secondary-index-seed".to_owned(),
6210                    nodes: vec![],
6211                    node_retires: vec![],
6212                    edges: vec![],
6213                    edge_retires: vec![],
6214                    chunks: vec![],
6215                    runs: vec![],
6216                    steps: vec![],
6217                    actions: vec![],
6218                    optional_backfills: vec![],
6219                    vec_inserts: vec![],
6220                    operational_writes: vec![
6221                        crate::OperationalWrite::Append {
6222                            collection: "audit_log".to_owned(),
6223                            record_key: "evt-1".to_owned(),
6224                            payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
6225                            source_ref: Some("src-1".to_owned()),
6226                        },
6227                        crate::OperationalWrite::Append {
6228                            collection: "audit_log".to_owned(),
6229                            record_key: "evt-2".to_owned(),
6230                            payload_json: r#"{"actor":"bob","ts":200}"#.to_owned(),
6231                            source_ref: Some("src-2".to_owned()),
6232                        },
6233                    ],
6234                })
6235                .expect("seed writes");
6236        }
6237
6238        let secondary_indexes_json = r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#;
6239        let updated = service
6240            .update_operational_collection_secondary_indexes("audit_log", secondary_indexes_json)
6241            .expect("update secondary indexes");
6242        assert_eq!(updated.secondary_indexes_json, secondary_indexes_json);
6243
6244        let conn = sqlite::open_connection(db.path()).expect("conn");
6245        let entry_count: i64 = conn
6246            .query_row(
6247                "SELECT count(*) FROM operational_secondary_index_entries \
6248                 WHERE collection_name = 'audit_log' AND index_name = 'actor_ts'",
6249                [],
6250                |row| row.get(0),
6251            )
6252            .expect("secondary index count");
6253        assert_eq!(entry_count, 2);
6254        conn.execute(
6255            "DELETE FROM operational_secondary_index_entries WHERE collection_name = 'audit_log'",
6256            [],
6257        )
6258        .expect("clear index entries");
6259        drop(conn);
6260
6261        let rebuild = service
6262            .rebuild_operational_secondary_indexes("audit_log")
6263            .expect("rebuild secondary indexes");
6264        assert_eq!(rebuild.collection_name, "audit_log");
6265        assert_eq!(rebuild.mutation_entries_rebuilt, 2);
6266        assert_eq!(rebuild.current_entries_rebuilt, 0);
6267    }
6268
6269    #[test]
6270    fn register_operational_collection_rejects_invalid_validation_contract() {
6271        let (_db, service) = setup();
6272
6273        let error = service
6274            .register_operational_collection(&OperationalRegisterRequest {
6275                name: "connector_health".to_owned(),
6276                kind: OperationalCollectionKind::LatestState,
6277                schema_json: "{}".to_owned(),
6278                retention_json: "{}".to_owned(),
6279                filter_fields_json: "[]".to_owned(),
6280                validation_json: r#"{"format_version":1,"mode":"enforce","fields":[{"name":"status","type":"string","minimum":0}]}"#
6281                    .to_owned(),
6282                secondary_indexes_json: "[]".to_owned(),
6283                format_version: 1,
6284            })
6285            .expect_err("invalid validation contract should reject");
6286
6287        assert!(matches!(error, EngineError::InvalidWrite(_)));
6288        assert!(error.to_string().contains("minimum/maximum"));
6289    }
6290
6291    #[test]
6292    fn validate_operational_collection_history_reports_invalid_rows_without_mutation() {
6293        let (db, service) = setup();
6294        service
6295            .register_operational_collection(&OperationalRegisterRequest {
6296                name: "audit_log".to_owned(),
6297                kind: OperationalCollectionKind::AppendOnlyLog,
6298                schema_json: "{}".to_owned(),
6299                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6300                filter_fields_json: "[]".to_owned(),
6301                validation_json: r#"{"format_version":1,"mode":"disabled","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#
6302                    .to_owned(),
6303                secondary_indexes_json: "[]".to_owned(),
6304                format_version: 1,
6305            })
6306            .expect("register collection");
6307        {
6308            let writer = crate::WriterActor::start(
6309                db.path(),
6310                Arc::new(SchemaManager::new()),
6311                crate::ProvenanceMode::Warn,
6312                Arc::new(crate::TelemetryCounters::default()),
6313            )
6314            .expect("writer");
6315            writer
6316                .submit(crate::WriteRequest {
6317                    label: "history-validation".to_owned(),
6318                    nodes: vec![],
6319                    node_retires: vec![],
6320                    edges: vec![],
6321                    edge_retires: vec![],
6322                    chunks: vec![],
6323                    runs: vec![],
6324                    steps: vec![],
6325                    actions: vec![],
6326                    optional_backfills: vec![],
6327                    vec_inserts: vec![],
6328                    operational_writes: vec![
6329                        crate::OperationalWrite::Append {
6330                            collection: "audit_log".to_owned(),
6331                            record_key: "evt-1".to_owned(),
6332                            payload_json: r#"{"status":"ok"}"#.to_owned(),
6333                            source_ref: Some("src-1".to_owned()),
6334                        },
6335                        crate::OperationalWrite::Append {
6336                            collection: "audit_log".to_owned(),
6337                            record_key: "evt-2".to_owned(),
6338                            payload_json: r#"{"status":"bogus"}"#.to_owned(),
6339                            source_ref: Some("src-2".to_owned()),
6340                        },
6341                    ],
6342                })
6343                .expect("write");
6344        }
6345
6346        let report = service
6347            .validate_operational_collection_history("audit_log")
6348            .expect("validate history");
6349        assert_eq!(report.collection_name, "audit_log");
6350        assert_eq!(report.checked_rows, 2);
6351        assert_eq!(report.invalid_row_count, 1);
6352        assert_eq!(report.issues.len(), 1);
6353        assert_eq!(report.issues[0].record_key, "evt-2");
6354        assert!(report.issues[0].message.contains("must be one of"));
6355
6356        let trace = service
6357            .trace_operational_collection("audit_log", None)
6358            .expect("trace");
6359        assert_eq!(trace.mutation_count, 2);
6360
6361        let conn = sqlite::open_connection(db.path()).expect("conn");
6362        let provenance_count: i64 = conn
6363            .query_row(
6364                "SELECT count(*) FROM provenance_events \
6365                 WHERE event_type = 'operational_collection_history_validated' \
6366                   AND subject = 'audit_log'",
6367                [],
6368                |row| row.get(0),
6369            )
6370            .expect("provenance count");
6371        assert_eq!(provenance_count, 0);
6372    }
6373
6374    #[test]
6375    fn trace_operational_collection_returns_mutations_and_current_rows() {
6376        let (db, service) = setup();
6377        service
6378            .register_operational_collection(&OperationalRegisterRequest {
6379                name: "connector_health".to_owned(),
6380                kind: OperationalCollectionKind::LatestState,
6381                schema_json: "{}".to_owned(),
6382                retention_json: "{}".to_owned(),
6383                filter_fields_json: "[]".to_owned(),
6384                validation_json: String::new(),
6385                secondary_indexes_json: "[]".to_owned(),
6386                format_version: 1,
6387            })
6388            .expect("register collection");
6389        {
6390            let writer = crate::WriterActor::start(
6391                db.path(),
6392                Arc::new(SchemaManager::new()),
6393                crate::ProvenanceMode::Warn,
6394                Arc::new(crate::TelemetryCounters::default()),
6395            )
6396            .expect("writer");
6397            writer
6398                .submit(crate::WriteRequest {
6399                    label: "operational".to_owned(),
6400                    nodes: vec![],
6401                    node_retires: vec![],
6402                    edges: vec![],
6403                    edge_retires: vec![],
6404                    chunks: vec![],
6405                    runs: vec![],
6406                    steps: vec![],
6407                    actions: vec![],
6408                    optional_backfills: vec![],
6409                    vec_inserts: vec![],
6410                    operational_writes: vec![crate::OperationalWrite::Put {
6411                        collection: "connector_health".to_owned(),
6412                        record_key: "gmail".to_owned(),
6413                        payload_json: r#"{"status":"ok"}"#.to_owned(),
6414                        source_ref: Some("src-1".to_owned()),
6415                    }],
6416                })
6417                .expect("write");
6418        }
6419
6420        let report = service
6421            .trace_operational_collection("connector_health", Some("gmail"))
6422            .expect("trace");
6423        assert_eq!(report.collection_name, "connector_health");
6424        assert_eq!(report.record_key.as_deref(), Some("gmail"));
6425        assert_eq!(report.mutation_count, 1);
6426        assert_eq!(report.current_count, 1);
6427        assert_eq!(report.mutations[0].op_kind, "put");
6428        assert_eq!(report.current_rows[0].payload_json, r#"{"status":"ok"}"#);
6429    }
6430
6431    #[test]
6432    fn trace_operational_collection_rejects_unknown_collection() {
6433        let (_db, service) = setup();
6434
6435        let error = service
6436            .trace_operational_collection("missing_collection", None)
6437            .expect_err("unknown collection should fail");
6438
6439        assert!(matches!(error, EngineError::InvalidWrite(_)));
6440        assert!(error.to_string().contains("is not registered"));
6441    }
6442
6443    #[test]
6444    fn rebuild_operational_current_repairs_missing_latest_state_rows() {
6445        let (db, service) = setup();
6446        service
6447            .register_operational_collection(&OperationalRegisterRequest {
6448                name: "connector_health".to_owned(),
6449                kind: OperationalCollectionKind::LatestState,
6450                schema_json: "{}".to_owned(),
6451                retention_json: "{}".to_owned(),
6452                filter_fields_json: "[]".to_owned(),
6453                validation_json: String::new(),
6454                secondary_indexes_json: "[]".to_owned(),
6455                format_version: 1,
6456            })
6457            .expect("register collection");
6458        {
6459            let writer = crate::WriterActor::start(
6460                db.path(),
6461                Arc::new(SchemaManager::new()),
6462                crate::ProvenanceMode::Warn,
6463                Arc::new(crate::TelemetryCounters::default()),
6464            )
6465            .expect("writer");
6466            writer
6467                .submit(crate::WriteRequest {
6468                    label: "operational".to_owned(),
6469                    nodes: vec![],
6470                    node_retires: vec![],
6471                    edges: vec![],
6472                    edge_retires: vec![],
6473                    chunks: vec![],
6474                    runs: vec![],
6475                    steps: vec![],
6476                    actions: vec![],
6477                    optional_backfills: vec![],
6478                    vec_inserts: vec![],
6479                    operational_writes: vec![crate::OperationalWrite::Put {
6480                        collection: "connector_health".to_owned(),
6481                        record_key: "gmail".to_owned(),
6482                        payload_json: r#"{"status":"ok"}"#.to_owned(),
6483                        source_ref: Some("src-1".to_owned()),
6484                    }],
6485                })
6486                .expect("write");
6487        }
6488        {
6489            let conn = sqlite::open_connection(db.path()).expect("conn");
6490            conn.execute(
6491                "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6492                [],
6493            )
6494            .expect("delete current row");
6495        }
6496
6497        let before = service.check_semantics().expect("semantics before rebuild");
6498        assert_eq!(before.missing_operational_current_rows, 1);
6499
6500        let repair = service
6501            .rebuild_operational_current(Some("connector_health"))
6502            .expect("rebuild current");
6503        assert_eq!(repair.collections_rebuilt, 1);
6504        assert_eq!(repair.current_rows_rebuilt, 1);
6505
6506        let after = service.check_semantics().expect("semantics after rebuild");
6507        assert_eq!(after.missing_operational_current_rows, 0);
6508
6509        let conn = sqlite::open_connection(db.path()).expect("conn");
6510        let payload: String = conn
6511            .query_row(
6512                "SELECT payload_json FROM operational_current \
6513                 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6514                [],
6515                |row| row.get(0),
6516            )
6517            .expect("restored payload");
6518        assert_eq!(payload, r#"{"status":"ok"}"#);
6519    }
6520
6521    #[test]
6522    fn rebuild_operational_current_restores_latest_state_secondary_index_entries() {
6523        let (db, service) = setup();
6524        service
6525            .register_operational_collection(&OperationalRegisterRequest {
6526                name: "connector_health".to_owned(),
6527                kind: OperationalCollectionKind::LatestState,
6528                schema_json: "{}".to_owned(),
6529                retention_json: "{}".to_owned(),
6530                filter_fields_json: "[]".to_owned(),
6531                validation_json: String::new(),
6532                secondary_indexes_json: r#"[{"name":"status_current","kind":"latest_state_field","field":"status","value_type":"string"}]"#.to_owned(),
6533                format_version: 1,
6534            })
6535            .expect("register collection");
6536        {
6537            let writer = crate::WriterActor::start(
6538                db.path(),
6539                Arc::new(SchemaManager::new()),
6540                crate::ProvenanceMode::Warn,
6541                Arc::new(crate::TelemetryCounters::default()),
6542            )
6543            .expect("writer");
6544            writer
6545                .submit(crate::WriteRequest {
6546                    label: "operational".to_owned(),
6547                    nodes: vec![],
6548                    node_retires: vec![],
6549                    edges: vec![],
6550                    edge_retires: vec![],
6551                    chunks: vec![],
6552                    runs: vec![],
6553                    steps: vec![],
6554                    actions: vec![],
6555                    optional_backfills: vec![],
6556                    vec_inserts: vec![],
6557                    operational_writes: vec![crate::OperationalWrite::Put {
6558                        collection: "connector_health".to_owned(),
6559                        record_key: "gmail".to_owned(),
6560                        payload_json: r#"{"status":"ok"}"#.to_owned(),
6561                        source_ref: Some("src-1".to_owned()),
6562                    }],
6563                })
6564                .expect("write");
6565        }
6566        {
6567            let conn = sqlite::open_connection(db.path()).expect("conn");
6568            let entry_count: i64 = conn
6569                .query_row(
6570                    "SELECT count(*) FROM operational_secondary_index_entries \
6571                     WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
6572                    [],
6573                    |row| row.get(0),
6574                )
6575                .expect("secondary index count before repair");
6576            assert_eq!(entry_count, 1);
6577            conn.execute(
6578                "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6579                [],
6580            )
6581            .expect("delete current row");
6582        }
6583
6584        service
6585            .rebuild_operational_current(Some("connector_health"))
6586            .expect("rebuild current");
6587
6588        let conn = sqlite::open_connection(db.path()).expect("conn");
6589        let entry_count: i64 = conn
6590            .query_row(
6591                "SELECT count(*) FROM operational_secondary_index_entries \
6592                 WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
6593                [],
6594                |row| row.get(0),
6595            )
6596            .expect("secondary index count after repair");
6597        assert_eq!(entry_count, 1);
6598    }
6599
6600    #[test]
6601    fn operational_current_semantics_and_rebuild_follow_mutation_order() {
6602        let (db, service) = setup();
6603        {
6604            let conn = sqlite::open_connection(db.path()).expect("conn");
6605            conn.execute(
6606                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6607                 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
6608                [],
6609            )
6610            .expect("seed collection");
6611            conn.execute(
6612                "INSERT INTO operational_mutations \
6613                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6614                 VALUES ('m3', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'src-1', 100, 1)",
6615                [],
6616            )
6617            .expect("seed first put");
6618            conn.execute(
6619                "INSERT INTO operational_mutations \
6620                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6621                 VALUES ('m2', 'connector_health', 'gmail', 'delete', '', 'src-2', 100, 2)",
6622                [],
6623            )
6624            .expect("seed delete");
6625            conn.execute(
6626                "INSERT INTO operational_mutations \
6627                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6628                 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'src-3', 100, 3)",
6629                [],
6630            )
6631            .expect("seed final put");
6632            conn.execute(
6633                "INSERT INTO operational_current \
6634                 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
6635                 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 100, 'm1')",
6636                [],
6637            )
6638            .expect("seed current");
6639        }
6640
6641        let before = service.check_semantics().expect("semantics before rebuild");
6642        assert_eq!(before.missing_operational_current_rows, 0);
6643        assert_eq!(before.stale_operational_current_rows, 0);
6644
6645        {
6646            let conn = sqlite::open_connection(db.path()).expect("conn");
6647            conn.execute(
6648                "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6649                [],
6650            )
6651            .expect("delete current row");
6652        }
6653
6654        let missing = service.check_semantics().expect("semantics after delete");
6655        assert_eq!(missing.missing_operational_current_rows, 1);
6656        assert_eq!(missing.stale_operational_current_rows, 0);
6657
6658        service
6659            .rebuild_operational_current(Some("connector_health"))
6660            .expect("rebuild current");
6661
6662        let after = service.check_semantics().expect("semantics after rebuild");
6663        assert_eq!(after.missing_operational_current_rows, 0);
6664        assert_eq!(after.stale_operational_current_rows, 0);
6665
6666        let conn = sqlite::open_connection(db.path()).expect("conn");
6667        let payload: String = conn
6668            .query_row(
6669                "SELECT payload_json FROM operational_current \
6670                 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6671                [],
6672                |row| row.get(0),
6673            )
6674            .expect("restored payload");
6675        assert_eq!(payload, r#"{"status":"new"}"#);
6676    }
6677
6678    #[test]
6679    fn disable_operational_collection_sets_disabled_at_and_emits_provenance() {
6680        let (db, service) = setup();
6681        service
6682            .register_operational_collection(&OperationalRegisterRequest {
6683                name: "audit_log".to_owned(),
6684                kind: OperationalCollectionKind::AppendOnlyLog,
6685                schema_json: "{}".to_owned(),
6686                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6687                filter_fields_json: "[]".to_owned(),
6688                validation_json: String::new(),
6689                secondary_indexes_json: "[]".to_owned(),
6690                format_version: 1,
6691            })
6692            .expect("register collection");
6693
6694        let record = service
6695            .disable_operational_collection("audit_log")
6696            .expect("disable collection");
6697        assert_eq!(record.name, "audit_log");
6698        assert!(record.disabled_at.is_some());
6699
6700        let disabled_at = record.disabled_at.expect("disabled_at");
6701        let described = service
6702            .describe_operational_collection("audit_log")
6703            .expect("describe collection")
6704            .expect("collection exists");
6705        assert_eq!(described.disabled_at, Some(disabled_at));
6706
6707        let writer = crate::WriterActor::start(
6708            db.path(),
6709            Arc::new(SchemaManager::new()),
6710            crate::ProvenanceMode::Warn,
6711            Arc::new(crate::TelemetryCounters::default()),
6712        )
6713        .expect("writer");
6714        let error = writer
6715            .submit(crate::WriteRequest {
6716                label: "disabled-operational".to_owned(),
6717                nodes: vec![],
6718                node_retires: vec![],
6719                edges: vec![],
6720                edge_retires: vec![],
6721                chunks: vec![],
6722                runs: vec![],
6723                steps: vec![],
6724                actions: vec![],
6725                optional_backfills: vec![],
6726                vec_inserts: vec![],
6727                operational_writes: vec![crate::OperationalWrite::Append {
6728                    collection: "audit_log".to_owned(),
6729                    record_key: "evt-1".to_owned(),
6730                    payload_json: r#"{"type":"sync"}"#.to_owned(),
6731                    source_ref: Some("src-1".to_owned()),
6732                }],
6733            })
6734            .expect_err("disabled collection should reject writes");
6735        assert!(matches!(error, EngineError::InvalidWrite(_)));
6736        assert!(error.to_string().contains("is disabled"));
6737
6738        let conn = sqlite::open_connection(db.path()).expect("conn");
6739        let provenance_count: i64 = conn
6740            .query_row(
6741                "SELECT count(*) FROM provenance_events \
6742                 WHERE event_type = 'operational_collection_disabled' AND subject = 'audit_log'",
6743                [],
6744                |row| row.get(0),
6745            )
6746            .expect("provenance count");
6747        assert_eq!(provenance_count, 1);
6748    }
6749
6750    #[test]
6751    fn purge_operational_collection_deletes_append_only_rows_before_cutoff() {
6752        let (db, service) = setup();
6753        {
6754            let conn = sqlite::open_connection(db.path()).expect("conn");
6755            conn.execute(
6756                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6757                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_all\"}', 1, 100)",
6758                [],
6759            )
6760            .expect("seed collection");
6761            conn.execute(
6762                "INSERT INTO operational_mutations \
6763                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6764                 VALUES ('evt-1', 'audit_log', 'evt-1', 'append', '{\"seq\":1}', 'src-1', 100, 1)",
6765                [],
6766            )
6767            .expect("seed event 1");
6768            conn.execute(
6769                "INSERT INTO operational_mutations \
6770                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6771                 VALUES ('evt-2', 'audit_log', 'evt-2', 'append', '{\"seq\":2}', 'src-2', 200, 2)",
6772                [],
6773            )
6774            .expect("seed event 2");
6775            conn.execute(
6776                "INSERT INTO operational_mutations \
6777                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6778                 VALUES ('evt-3', 'audit_log', 'evt-3', 'append', '{\"seq\":3}', 'src-3', 300, 3)",
6779                [],
6780            )
6781            .expect("seed event 3");
6782        }
6783
6784        let report = service
6785            .purge_operational_collection("audit_log", 250)
6786            .expect("purge collection");
6787        assert_eq!(report.collection_name, "audit_log");
6788        assert_eq!(report.deleted_mutations, 2);
6789        assert_eq!(report.before_timestamp, 250);
6790
6791        let conn = sqlite::open_connection(db.path()).expect("conn");
6792        let remaining: Vec<String> = {
6793            let mut stmt = conn
6794                .prepare(
6795                    "SELECT id FROM operational_mutations \
6796                     WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6797                )
6798                .expect("stmt");
6799            stmt.query_map([], |row| row.get(0))
6800                .expect("rows")
6801                .collect::<Result<_, _>>()
6802                .expect("collect")
6803        };
6804        assert_eq!(remaining, vec!["evt-3".to_owned()]);
6805        let provenance_count: i64 = conn
6806            .query_row(
6807                "SELECT count(*) FROM provenance_events \
6808                 WHERE event_type = 'operational_collection_purged' AND subject = 'audit_log'",
6809                [],
6810                |row| row.get(0),
6811            )
6812            .expect("provenance count");
6813        assert_eq!(provenance_count, 1);
6814    }
6815
6816    #[test]
6817    fn compact_operational_collection_dry_run_reports_without_mutation() {
6818        let (db, service) = setup();
6819        {
6820            let conn = sqlite::open_connection(db.path()).expect("conn");
6821            conn.execute(
6822                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6823                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6824                [],
6825            )
6826            .expect("seed collection");
6827            for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6828                conn.execute(
6829                    "INSERT INTO operational_mutations \
6830                     (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6831                     VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6832                    rusqlite::params![
6833                        format!("evt-{index}"),
6834                        format!("{{\"seq\":{index}}}"),
6835                        created_at,
6836                        index,
6837                    ],
6838                )
6839                .expect("seed event");
6840            }
6841        }
6842
6843        let report = service
6844            .compact_operational_collection("audit_log", true)
6845            .expect("compact collection");
6846        assert_eq!(report.collection_name, "audit_log");
6847        assert_eq!(report.deleted_mutations, 1);
6848        assert!(report.dry_run);
6849        assert_eq!(report.before_timestamp, None);
6850
6851        let conn = sqlite::open_connection(db.path()).expect("conn");
6852        let remaining_count: i64 = conn
6853            .query_row(
6854                "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
6855                [],
6856                |row| row.get(0),
6857            )
6858            .expect("remaining count");
6859        assert_eq!(remaining_count, 3);
6860        let provenance_count: i64 = conn
6861            .query_row(
6862                "SELECT count(*) FROM provenance_events \
6863                 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
6864                [],
6865                |row| row.get(0),
6866            )
6867            .expect("provenance count");
6868        assert_eq!(provenance_count, 0);
6869    }
6870
6871    #[test]
6872    fn compact_operational_collection_keep_last_deletes_oldest_rows() {
6873        let (db, service) = setup();
6874        {
6875            let conn = sqlite::open_connection(db.path()).expect("conn");
6876            conn.execute(
6877                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6878                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6879                [],
6880            )
6881            .expect("seed collection");
6882            for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6883                conn.execute(
6884                    "INSERT INTO operational_mutations \
6885                     (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6886                     VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6887                    rusqlite::params![
6888                        format!("evt-{index}"),
6889                        format!("{{\"seq\":{index}}}"),
6890                        created_at,
6891                        index,
6892                    ],
6893                )
6894                .expect("seed event");
6895            }
6896        }
6897
6898        let report = service
6899            .compact_operational_collection("audit_log", false)
6900            .expect("compact collection");
6901        assert_eq!(report.deleted_mutations, 1);
6902        assert!(!report.dry_run);
6903
6904        let conn = sqlite::open_connection(db.path()).expect("conn");
6905        let remaining: Vec<String> = {
6906            let mut stmt = conn
6907                .prepare(
6908                    "SELECT id FROM operational_mutations \
6909                     WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6910                )
6911                .expect("stmt");
6912            stmt.query_map([], |row| row.get(0))
6913                .expect("rows")
6914                .collect::<Result<_, _>>()
6915                .expect("collect")
6916        };
6917        assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
6918        let provenance_count: i64 = conn
6919            .query_row(
6920                "SELECT count(*) FROM provenance_events \
6921                 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
6922                [],
6923                |row| row.get(0),
6924            )
6925            .expect("provenance count");
6926        assert_eq!(provenance_count, 1);
6927    }
6928
6929    #[test]
6930    fn plan_and_run_operational_retention_keep_last() {
6931        let (db, service) = setup();
6932        {
6933            let conn = sqlite::open_connection(db.path()).expect("conn");
6934            conn.execute(
6935                "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6936                 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6937                [],
6938            )
6939            .expect("seed collection");
6940            for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6941                conn.execute(
6942                    "INSERT INTO operational_mutations \
6943                     (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6944                     VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6945                    rusqlite::params![
6946                        format!("evt-{index}"),
6947                        format!("{{\"seq\":{index}}}"),
6948                        created_at,
6949                        index,
6950                    ],
6951                )
6952                .expect("seed event");
6953            }
6954        }
6955
6956        let plan = service
6957            .plan_operational_retention(1_000, None, Some(10))
6958            .expect("plan retention");
6959        assert_eq!(plan.collections_examined, 1);
6960        assert_eq!(plan.items[0].collection_name, "audit_log");
6961        assert_eq!(
6962            plan.items[0].action_kind,
6963            crate::operational::OperationalRetentionActionKind::KeepLast
6964        );
6965        assert_eq!(plan.items[0].candidate_deletions, 1);
6966        assert_eq!(plan.items[0].max_rows, Some(2));
6967        assert_eq!(plan.items[0].last_run_at, None);
6968
6969        let dry_run = service
6970            .run_operational_retention(1_000, None, Some(10), true)
6971            .expect("dry-run retention");
6972        assert!(dry_run.dry_run);
6973        assert_eq!(dry_run.collections_acted_on, 1);
6974        assert_eq!(dry_run.items[0].deleted_mutations, 1);
6975        assert_eq!(dry_run.items[0].rows_remaining, 2);
6976
6977        let conn = sqlite::open_connection(db.path()).expect("conn");
6978        let remaining_count: i64 = conn
6979            .query_row(
6980                "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
6981                [],
6982                |row| row.get(0),
6983            )
6984            .expect("remaining count after dry run");
6985        assert_eq!(remaining_count, 3);
6986        let retention_run_count: i64 = conn
6987            .query_row(
6988                "SELECT count(*) FROM operational_retention_runs WHERE collection_name = 'audit_log'",
6989                [],
6990                |row| row.get(0),
6991            )
6992            .expect("retention run count");
6993        assert_eq!(retention_run_count, 0);
6994        drop(conn);
6995
6996        let executed = service
6997            .run_operational_retention(1_000, None, Some(10), false)
6998            .expect("execute retention");
6999        assert_eq!(executed.collections_acted_on, 1);
7000        assert_eq!(executed.items[0].deleted_mutations, 1);
7001        assert_eq!(executed.items[0].rows_remaining, 2);
7002
7003        let conn = sqlite::open_connection(db.path()).expect("conn");
7004        let remaining: Vec<String> = {
7005            let mut stmt = conn
7006                .prepare(
7007                    "SELECT id FROM operational_mutations \
7008                     WHERE collection_name = 'audit_log' ORDER BY mutation_order",
7009                )
7010                .expect("stmt");
7011            stmt.query_map([], |row| row.get(0))
7012                .expect("rows")
7013                .collect::<Result<_, _>>()
7014                .expect("collect")
7015        };
7016        assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
7017        let last_run_at: i64 = conn
7018            .query_row(
7019                "SELECT executed_at FROM operational_retention_runs \
7020                 WHERE collection_name = 'audit_log' ORDER BY executed_at DESC LIMIT 1",
7021                [],
7022                |row| row.get(0),
7023            )
7024            .expect("last run at");
7025        assert_eq!(last_run_at, 1_000);
7026    }
7027
7028    #[test]
7029    fn dry_run_operational_retention_does_not_mark_noop_collection_as_acted_on() {
7030        let (db, service) = setup();
7031        let conn = sqlite::open_connection(db.path()).expect("conn");
7032        conn.execute(
7033            "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
7034             VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
7035            [],
7036        )
7037        .expect("seed collection");
7038        for (index, created_at) in [(1_i64, 100_i64), (2, 200)] {
7039            conn.execute(
7040                "INSERT INTO operational_mutations \
7041                 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
7042                 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
7043                rusqlite::params![
7044                    format!("evt-{index}"),
7045                    format!("{{\"seq\":{index}}}"),
7046                    created_at,
7047                    index,
7048                ],
7049            )
7050            .expect("seed event");
7051        }
7052        drop(conn);
7053
7054        let dry_run = service
7055            .run_operational_retention(1_000, None, Some(10), true)
7056            .expect("dry-run retention");
7057        assert!(dry_run.dry_run);
7058        assert_eq!(dry_run.collections_acted_on, 0);
7059        assert_eq!(dry_run.items[0].deleted_mutations, 0);
7060        assert_eq!(dry_run.items[0].rows_remaining, 2);
7061    }
7062
7063    #[test]
7064    fn compact_operational_collection_rejects_latest_state() {
7065        let (_db, service) = setup();
7066        service
7067            .register_operational_collection(&OperationalRegisterRequest {
7068                name: "connector_health".to_owned(),
7069                kind: OperationalCollectionKind::LatestState,
7070                schema_json: "{}".to_owned(),
7071                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
7072                filter_fields_json: "[]".to_owned(),
7073                validation_json: String::new(),
7074                secondary_indexes_json: "[]".to_owned(),
7075                format_version: 1,
7076            })
7077            .expect("register collection");
7078
7079        let error = service
7080            .compact_operational_collection("connector_health", false)
7081            .expect_err("latest_state compaction should be rejected");
7082        assert!(matches!(error, EngineError::InvalidWrite(_)));
7083        assert!(error.to_string().contains("append_only_log"));
7084    }
7085
7086    #[test]
7087    fn register_operational_collection_persists_filter_fields_json() {
7088        let (_db, service) = setup();
7089
7090        let record = service
7091            .register_operational_collection(&OperationalRegisterRequest {
7092                name: "audit_log".to_owned(),
7093                kind: OperationalCollectionKind::AppendOnlyLog,
7094                schema_json: "{}".to_owned(),
7095                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
7096                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
7097                validation_json: String::new(),
7098                secondary_indexes_json: "[]".to_owned(),
7099                format_version: 1,
7100            })
7101            .expect("register collection");
7102
7103        assert_eq!(
7104            record.filter_fields_json,
7105            r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#
7106        );
7107    }
7108
7109    #[test]
7110    fn read_operational_collection_filters_append_only_rows_by_declared_fields() {
7111        let (db, service) = setup();
7112        service
7113            .register_operational_collection(&OperationalRegisterRequest {
7114                name: "audit_log".to_owned(),
7115                kind: OperationalCollectionKind::AppendOnlyLog,
7116                schema_json: "{}".to_owned(),
7117                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
7118                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"seq","type":"integer","modes":["exact","range"]},{"name":"ts","type":"timestamp","modes":["exact","range"]}]"#.to_owned(),
7119                validation_json: String::new(),
7120                secondary_indexes_json: "[]".to_owned(),
7121                format_version: 1,
7122            })
7123            .expect("register collection");
7124        {
7125            let writer = crate::WriterActor::start(
7126                db.path(),
7127                Arc::new(SchemaManager::new()),
7128                crate::ProvenanceMode::Warn,
7129                Arc::new(crate::TelemetryCounters::default()),
7130            )
7131            .expect("writer");
7132            writer
7133                .submit(crate::WriteRequest {
7134                    label: "operational".to_owned(),
7135                    nodes: vec![],
7136                    node_retires: vec![],
7137                    edges: vec![],
7138                    edge_retires: vec![],
7139                    chunks: vec![],
7140                    runs: vec![],
7141                    steps: vec![],
7142                    actions: vec![],
7143                    optional_backfills: vec![],
7144                    vec_inserts: vec![],
7145                    operational_writes: vec![
7146                        crate::OperationalWrite::Append {
7147                            collection: "audit_log".to_owned(),
7148                            record_key: "evt-1".to_owned(),
7149                            payload_json: r#"{"actor":"alice","seq":1,"ts":100}"#.to_owned(),
7150                            source_ref: Some("src-1".to_owned()),
7151                        },
7152                        crate::OperationalWrite::Append {
7153                            collection: "audit_log".to_owned(),
7154                            record_key: "evt-2".to_owned(),
7155                            payload_json: r#"{"actor":"alice-admin","seq":2,"ts":200}"#.to_owned(),
7156                            source_ref: Some("src-2".to_owned()),
7157                        },
7158                        crate::OperationalWrite::Append {
7159                            collection: "audit_log".to_owned(),
7160                            record_key: "evt-3".to_owned(),
7161                            payload_json: r#"{"actor":"bob","seq":3,"ts":300}"#.to_owned(),
7162                            source_ref: Some("src-3".to_owned()),
7163                        },
7164                    ],
7165                })
7166                .expect("write");
7167        }
7168
7169        let report = service
7170            .read_operational_collection(&crate::operational::OperationalReadRequest {
7171                collection_name: "audit_log".to_owned(),
7172                filters: vec![
7173                    crate::operational::OperationalFilterClause::Prefix {
7174                        field: "actor".to_owned(),
7175                        value: "alice".to_owned(),
7176                    },
7177                    crate::operational::OperationalFilterClause::Range {
7178                        field: "ts".to_owned(),
7179                        lower: Some(150),
7180                        upper: Some(250),
7181                    },
7182                ],
7183                limit: Some(10),
7184            })
7185            .expect("filtered read");
7186
7187        assert_eq!(report.collection_name, "audit_log");
7188        assert_eq!(report.row_count, 1);
7189        assert!(!report.was_limited);
7190        assert_eq!(report.rows.len(), 1);
7191        assert_eq!(report.rows[0].record_key, "evt-2");
7192        assert_eq!(
7193            report.rows[0].payload_json,
7194            r#"{"actor":"alice-admin","seq":2,"ts":200}"#
7195        );
7196    }
7197
7198    #[test]
7199    fn read_operational_collection_uses_secondary_index_when_filter_values_are_missing() {
7200        let (db, service) = setup();
7201        service
7202            .register_operational_collection(&OperationalRegisterRequest {
7203                name: "audit_log".to_owned(),
7204                kind: OperationalCollectionKind::AppendOnlyLog,
7205                schema_json: "{}".to_owned(),
7206                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
7207                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
7208                validation_json: String::new(),
7209                secondary_indexes_json: r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#.to_owned(),
7210                format_version: 1,
7211            })
7212            .expect("register collection");
7213        {
7214            let writer = crate::WriterActor::start(
7215                db.path(),
7216                Arc::new(SchemaManager::new()),
7217                crate::ProvenanceMode::Warn,
7218                Arc::new(crate::TelemetryCounters::default()),
7219            )
7220            .expect("writer");
7221            writer
7222                .submit(crate::WriteRequest {
7223                    label: "operational".to_owned(),
7224                    nodes: vec![],
7225                    node_retires: vec![],
7226                    edges: vec![],
7227                    edge_retires: vec![],
7228                    chunks: vec![],
7229                    runs: vec![],
7230                    steps: vec![],
7231                    actions: vec![],
7232                    optional_backfills: vec![],
7233                    vec_inserts: vec![],
7234                    operational_writes: vec![
7235                        crate::OperationalWrite::Append {
7236                            collection: "audit_log".to_owned(),
7237                            record_key: "evt-1".to_owned(),
7238                            payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
7239                            source_ref: Some("src-1".to_owned()),
7240                        },
7241                        crate::OperationalWrite::Append {
7242                            collection: "audit_log".to_owned(),
7243                            record_key: "evt-2".to_owned(),
7244                            payload_json: r#"{"actor":"alice-admin","ts":200}"#.to_owned(),
7245                            source_ref: Some("src-2".to_owned()),
7246                        },
7247                    ],
7248                })
7249                .expect("write");
7250        }
7251        let conn = sqlite::open_connection(db.path()).expect("conn");
7252        conn.execute(
7253            "DELETE FROM operational_filter_values WHERE collection_name = 'audit_log'",
7254            [],
7255        )
7256        .expect("clear filter values");
7257        drop(conn);
7258
7259        let report = service
7260            .read_operational_collection(&crate::operational::OperationalReadRequest {
7261                collection_name: "audit_log".to_owned(),
7262                filters: vec![
7263                    crate::operational::OperationalFilterClause::Prefix {
7264                        field: "actor".to_owned(),
7265                        value: "alice".to_owned(),
7266                    },
7267                    crate::operational::OperationalFilterClause::Range {
7268                        field: "ts".to_owned(),
7269                        lower: Some(150),
7270                        upper: Some(250),
7271                    },
7272                ],
7273                limit: Some(10),
7274            })
7275            .expect("secondary-index read");
7276
7277        assert_eq!(report.row_count, 1);
7278        assert_eq!(report.rows[0].record_key, "evt-2");
7279    }
7280
7281    #[test]
7282    fn read_operational_collection_rejects_undeclared_fields_and_latest_state_collections() {
7283        let (_db, service) = setup();
7284        service
7285            .register_operational_collection(&OperationalRegisterRequest {
7286                name: "connector_health".to_owned(),
7287                kind: OperationalCollectionKind::LatestState,
7288                schema_json: "{}".to_owned(),
7289                retention_json: "{}".to_owned(),
7290                filter_fields_json: r#"[{"name":"status","type":"string","modes":["exact"]}]"#
7291                    .to_owned(),
7292                validation_json: String::new(),
7293                secondary_indexes_json: "[]".to_owned(),
7294                format_version: 1,
7295            })
7296            .expect("register collection");
7297
7298        let latest_state_error = service
7299            .read_operational_collection(&crate::operational::OperationalReadRequest {
7300                collection_name: "connector_health".to_owned(),
7301                filters: vec![crate::operational::OperationalFilterClause::Exact {
7302                    field: "status".to_owned(),
7303                    value: crate::operational::OperationalFilterValue::String("ok".to_owned()),
7304                }],
7305                limit: Some(10),
7306            })
7307            .expect_err("latest_state filtered reads should be rejected");
7308        assert!(latest_state_error.to_string().contains("append_only_log"));
7309
7310        service
7311            .register_operational_collection(&OperationalRegisterRequest {
7312                name: "audit_log".to_owned(),
7313                kind: OperationalCollectionKind::AppendOnlyLog,
7314                schema_json: "{}".to_owned(),
7315                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
7316                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact"]}]"#
7317                    .to_owned(),
7318                validation_json: String::new(),
7319                secondary_indexes_json: "[]".to_owned(),
7320                format_version: 1,
7321            })
7322            .expect("register append-only collection");
7323
7324        let undeclared_error = service
7325            .read_operational_collection(&crate::operational::OperationalReadRequest {
7326                collection_name: "audit_log".to_owned(),
7327                filters: vec![crate::operational::OperationalFilterClause::Exact {
7328                    field: "missing".to_owned(),
7329                    value: crate::operational::OperationalFilterValue::String("x".to_owned()),
7330                }],
7331                limit: Some(10),
7332            })
7333            .expect_err("undeclared field should be rejected");
7334        assert!(undeclared_error.to_string().contains("undeclared"));
7335    }
7336
7337    #[test]
7338    fn read_operational_collection_applies_limit_and_reports_truncation() {
7339        let (db, service) = setup();
7340        service
7341            .register_operational_collection(&OperationalRegisterRequest {
7342                name: "audit_log".to_owned(),
7343                kind: OperationalCollectionKind::AppendOnlyLog,
7344                schema_json: "{}".to_owned(),
7345                retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
7346                filter_fields_json: r#"[{"name":"actor","type":"string","modes":["prefix"]}]"#
7347                    .to_owned(),
7348                validation_json: String::new(),
7349                secondary_indexes_json: "[]".to_owned(),
7350                format_version: 1,
7351            })
7352            .expect("register collection");
7353        {
7354            let writer = crate::WriterActor::start(
7355                db.path(),
7356                Arc::new(SchemaManager::new()),
7357                crate::ProvenanceMode::Warn,
7358                Arc::new(crate::TelemetryCounters::default()),
7359            )
7360            .expect("writer");
7361            writer
7362                .submit(crate::WriteRequest {
7363                    label: "operational".to_owned(),
7364                    nodes: vec![],
7365                    node_retires: vec![],
7366                    edges: vec![],
7367                    edge_retires: vec![],
7368                    chunks: vec![],
7369                    runs: vec![],
7370                    steps: vec![],
7371                    actions: vec![],
7372                    optional_backfills: vec![],
7373                    vec_inserts: vec![],
7374                    operational_writes: vec![
7375                        crate::OperationalWrite::Append {
7376                            collection: "audit_log".to_owned(),
7377                            record_key: "evt-1".to_owned(),
7378                            payload_json: r#"{"actor":"alice-1"}"#.to_owned(),
7379                            source_ref: Some("src-1".to_owned()),
7380                        },
7381                        crate::OperationalWrite::Append {
7382                            collection: "audit_log".to_owned(),
7383                            record_key: "evt-2".to_owned(),
7384                            payload_json: r#"{"actor":"alice-2"}"#.to_owned(),
7385                            source_ref: Some("src-2".to_owned()),
7386                        },
7387                    ],
7388                })
7389                .expect("write");
7390        }
7391
7392        let report = service
7393            .read_operational_collection(&crate::operational::OperationalReadRequest {
7394                collection_name: "audit_log".to_owned(),
7395                filters: vec![crate::operational::OperationalFilterClause::Prefix {
7396                    field: "actor".to_owned(),
7397                    value: "alice".to_owned(),
7398                }],
7399                limit: Some(1),
7400            })
7401            .expect("limited read");
7402
7403        assert_eq!(report.row_count, 1);
7404        assert_eq!(report.applied_limit, 1);
7405        assert!(report.was_limited);
7406        assert_eq!(report.rows[0].record_key, "evt-2");
7407    }
7408
7409    #[test]
7410    fn preexisting_operational_collection_can_gain_filter_contract_after_upgrade() {
7411        let db = NamedTempFile::new().expect("temp db");
7412        let conn = sqlite::open_connection(db.path()).expect("conn");
7413        conn.execute_batch(
7414            r#"
7415            CREATE TABLE operational_collections (
7416                name TEXT PRIMARY KEY,
7417                kind TEXT NOT NULL,
7418                schema_json TEXT NOT NULL,
7419                retention_json TEXT NOT NULL,
7420                format_version INTEGER NOT NULL DEFAULT 1,
7421                created_at INTEGER NOT NULL DEFAULT 100,
7422                disabled_at INTEGER
7423            );
7424            CREATE TABLE operational_mutations (
7425                id TEXT PRIMARY KEY,
7426                collection_name TEXT NOT NULL,
7427                record_key TEXT NOT NULL,
7428                op_kind TEXT NOT NULL,
7429                payload_json TEXT NOT NULL,
7430                source_ref TEXT,
7431                created_at INTEGER NOT NULL DEFAULT 100,
7432                mutation_order INTEGER NOT NULL DEFAULT 1
7433            );
7434            INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at)
7435            VALUES ('audit_log', 'append_only_log', '{}', '{"mode":"keep_all"}', 1, 100);
7436            INSERT INTO operational_mutations
7437                (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order)
7438            VALUES
7439                ('evt-1', 'audit_log', 'evt-1', 'append', '{"actor":"alice","ts":0}', 'src-1', 100, 1);
7440            "#,
7441        )
7442        .expect("seed pre-v10 schema");
7443        drop(conn);
7444
7445        let service = AdminService::new(db.path(), Arc::new(SchemaManager::new()));
7446        let pre_update = service
7447            .read_operational_collection(&crate::operational::OperationalReadRequest {
7448                collection_name: "audit_log".to_owned(),
7449                filters: vec![crate::operational::OperationalFilterClause::Exact {
7450                    field: "actor".to_owned(),
7451                    value: crate::operational::OperationalFilterValue::String("alice".to_owned()),
7452                }],
7453                limit: Some(10),
7454            })
7455            .expect_err("read should reject undeclared fields before migration update");
7456        assert!(pre_update.to_string().contains("undeclared"));
7457
7458        let updated = service
7459            .update_operational_collection_filters(
7460                "audit_log",
7461                r#"[{"name":"actor","type":"string","modes":["exact"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#,
7462            )
7463            .expect("update filter contract");
7464        assert!(updated.filter_fields_json.contains("\"actor\""));
7465
7466        let report = service
7467            .read_operational_collection(&crate::operational::OperationalReadRequest {
7468                collection_name: "audit_log".to_owned(),
7469                filters: vec![crate::operational::OperationalFilterClause::Range {
7470                    field: "ts".to_owned(),
7471                    lower: Some(0),
7472                    upper: Some(0),
7473                }],
7474                limit: Some(10),
7475            })
7476            .expect("read after explicit filter update");
7477        assert_eq!(report.row_count, 1);
7478        assert_eq!(report.rows[0].record_key, "evt-1");
7479    }
7480
7481    #[cfg(feature = "sqlite-vec")]
7482    #[test]
7483    fn check_semantics_detects_stale_vec_rows() {
7484        use crate::sqlite::open_connection_with_vec;
7485
7486        let db = NamedTempFile::new().expect("temp file");
7487        let schema = Arc::new(SchemaManager::new());
7488        {
7489            let conn = open_connection_with_vec(db.path()).expect("vec conn");
7490            schema.bootstrap(&conn).expect("bootstrap");
7491            schema
7492                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 3)
7493                .expect("vec profile");
7494            // Insert a vec row whose chunk does not exist.
7495            let bytes: Vec<u8> = [0.1f32, 0.2f32, 0.3f32]
7496                .iter()
7497                .flat_map(|f| f.to_le_bytes())
7498                .collect();
7499            conn.execute(
7500                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('ghost-chunk', ?1)",
7501                rusqlite::params![bytes],
7502            )
7503            .expect("insert stale vec row");
7504        }
7505        let service = AdminService::new(db.path(), Arc::clone(&schema));
7506        let report = service.check_semantics().expect("semantics check");
7507        assert_eq!(report.stale_vec_rows, 1);
7508        assert!(
7509            report.warnings.iter().any(|w| w.contains("stale vec")),
7510            "warning must mention stale vec"
7511        );
7512    }
7513
7514    #[cfg(feature = "sqlite-vec")]
7515    #[test]
7516    fn restore_vector_profiles_recreates_vec_table_from_metadata() {
7517        let db = NamedTempFile::new().expect("temp file");
7518        let schema = Arc::new(SchemaManager::new());
7519        {
7520            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7521            schema.bootstrap(&conn).expect("bootstrap");
7522            conn.execute(
7523                "INSERT INTO vector_profiles (profile, table_name, dimension, enabled) \
7524                 VALUES ('default', 'vec_nodes_active', 3, 1)",
7525                [],
7526            )
7527            .expect("insert vector profile");
7528        }
7529
7530        let service = AdminService::new(db.path(), Arc::clone(&schema));
7531        let report = service
7532            .restore_vector_profiles()
7533            .expect("restore vector profiles");
7534        assert_eq!(
7535            report.targets,
7536            vec![crate::projection::ProjectionTarget::Vec]
7537        );
7538        assert_eq!(report.rebuilt_rows, 1);
7539
7540        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7541        let count: i64 = conn
7542            .query_row(
7543                "SELECT count(*) FROM sqlite_schema WHERE name = 'vec_nodes_active'",
7544                [],
7545                |row| row.get(0),
7546            )
7547            .expect("vec schema count");
7548        assert_eq!(count, 1, "vec table should exist after restore");
7549    }
7550
7551    #[cfg(feature = "sqlite-vec")]
7552    #[test]
7553    fn load_vector_regeneration_config_supports_json_and_toml() {
7554        let dir = tempfile::tempdir().expect("temp dir");
7555        let json_path = dir.path().join("regen.json");
7556        let toml_path = dir.path().join("regen.toml");
7557
7558        let config = VectorRegenerationConfig {
7559            profile: "default".to_owned(),
7560            table_name: "vec_nodes_active".to_owned(),
7561            chunking_policy: "per_chunk".to_owned(),
7562            preprocessing_policy: "trim".to_owned(),
7563        };
7564
7565        fs::write(&json_path, serde_json::to_string(&config).expect("json")).expect("write json");
7566        fs::write(&toml_path, toml::to_string(&config).expect("toml")).expect("write toml");
7567
7568        let parsed_json = load_vector_regeneration_config(&json_path).expect("json parse");
7569        let parsed_toml = load_vector_regeneration_config(&toml_path).expect("toml parse");
7570
7571        assert_eq!(parsed_json, config);
7572        assert_eq!(parsed_toml, config);
7573    }
7574
7575    /// The 0.4.0 rewrite removed the identity fields from the config.
7576    /// Any client that still serializes the pre-0.4 fields must be
7577    /// rejected AT THE SERDE BOUNDARY with a clear error — never
7578    /// silently accepted.
7579    #[test]
7580    fn regenerate_vector_embeddings_config_rejects_old_identity_fields() {
7581        let legacy_json = r#"{
7582            "profile": "default",
7583            "table_name": "vec_nodes_active",
7584            "model_identity": "old-model",
7585            "model_version": "1.0",
7586            "dimension": 4,
7587            "normalization_policy": "l2",
7588            "chunking_policy": "per_chunk",
7589            "preprocessing_policy": "trim",
7590            "generator_command": ["/bin/echo"]
7591        }"#;
7592        let result: Result<VectorRegenerationConfig, _> = serde_json::from_str(legacy_json);
7593        assert!(
7594            result.is_err(),
7595            "legacy identity fields must be rejected at deserialization"
7596        );
7597    }
7598
7599    #[cfg(all(not(feature = "sqlite-vec"), unix))]
7600    #[test]
7601    fn regenerate_vector_embeddings_unsupported_vec_capability_writes_request_and_failed_audit() {
7602        let db = NamedTempFile::new().expect("temp file");
7603        let schema = Arc::new(SchemaManager::new());
7604
7605        {
7606            let conn = sqlite::open_connection(db.path()).expect("connection");
7607            schema.bootstrap(&conn).expect("bootstrap");
7608            conn.execute(
7609                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7610                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7611                [],
7612            )
7613            .expect("insert node");
7614            conn.execute(
7615                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7616                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7617                [],
7618            )
7619            .expect("insert chunk");
7620        }
7621
7622        let service = AdminService::new(db.path(), Arc::clone(&schema));
7623        let embedder = TestEmbedder::new("test-model", 4);
7624        let error = service
7625            .regenerate_vector_embeddings(
7626                &embedder,
7627                &VectorRegenerationConfig {
7628                    profile: "default".to_owned(),
7629                    table_name: "vec_nodes_active".to_owned(),
7630                    chunking_policy: "per_chunk".to_owned(),
7631                    preprocessing_policy: "trim".to_owned(),
7632                },
7633            )
7634            .expect_err("sqlite-vec capability should be required");
7635
7636        assert!(error.to_string().contains("unsupported vec capability"));
7637
7638        let conn = sqlite::open_connection(db.path()).expect("connection");
7639        let request_count: i64 = conn
7640            .query_row(
7641                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
7642                [],
7643                |row| row.get(0),
7644            )
7645            .expect("request count");
7646        assert_eq!(request_count, 1);
7647        let failed_count: i64 = conn
7648            .query_row(
7649                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7650                [],
7651                |row| row.get(0),
7652            )
7653            .expect("failed count");
7654        assert_eq!(failed_count, 1);
7655        let metadata_json: String = conn
7656            .query_row(
7657                "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7658                [],
7659                |row| row.get(0),
7660            )
7661            .expect("failed metadata");
7662        assert!(metadata_json.contains("\"failure_class\":\"unsupported vec capability\""));
7663    }
7664
7665    #[cfg(feature = "sqlite-vec")]
7666    #[test]
7667    #[allow(clippy::too_many_lines)]
7668    fn regenerate_vector_embeddings_rebuilds_embeddings_via_embedder() {
7669        let db = NamedTempFile::new().expect("temp file");
7670        let schema = Arc::new(SchemaManager::new());
7671
7672        {
7673            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7674            schema.bootstrap(&conn).expect("bootstrap");
7675            conn.execute(
7676                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7677                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7678                [],
7679            )
7680            .expect("insert node");
7681            conn.execute(
7682                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7683                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7684                [],
7685            )
7686            .expect("insert chunk 1");
7687            conn.execute(
7688                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7689                 VALUES ('chunk-2', 'doc-1', 'travel plan', 101)",
7690                [],
7691            )
7692            .expect("insert chunk 2");
7693        }
7694
7695        let service = AdminService::new(db.path(), Arc::clone(&schema));
7696        let embedder = TestEmbedder::new("test-model", 4);
7697        let report = service
7698            .regenerate_vector_embeddings(
7699                &embedder,
7700                &VectorRegenerationConfig {
7701                    profile: "default".to_owned(),
7702                    table_name: "vec_nodes_active".to_owned(),
7703                    chunking_policy: "per_chunk".to_owned(),
7704                    preprocessing_policy: "trim".to_owned(),
7705                },
7706            )
7707            .expect("regenerate vectors");
7708
7709        assert_eq!(report.profile, "default");
7710        assert_eq!(report.table_name, "vec_nodes_active");
7711        assert_eq!(report.dimension, 4);
7712        assert_eq!(report.total_chunks, 2);
7713        assert_eq!(report.regenerated_rows, 2);
7714        assert!(report.contract_persisted);
7715
7716        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7717        let vec_count: i64 = conn
7718            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7719                row.get(0)
7720            })
7721            .expect("vec count");
7722        assert_eq!(vec_count, 2);
7723
7724        // The persisted vector contract must reflect the embedder
7725        // identity — not any string the caller passed in, because the
7726        // caller never passes one.
7727        let (model_identity, model_version, dimension, normalization_policy): (
7728            String,
7729            String,
7730            i64,
7731            String,
7732        ) = conn
7733            .query_row(
7734                "SELECT model_identity, model_version, dimension, normalization_policy \
7735                 FROM vector_embedding_contracts WHERE profile = 'default'",
7736                [],
7737                |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)),
7738            )
7739            .expect("contract row");
7740        assert_eq!(model_identity, "test-model");
7741        assert_eq!(model_version, "1.0.0");
7742        assert_eq!(dimension, 4);
7743        assert_eq!(normalization_policy, "l2");
7744
7745        let contract_format_version: i64 = conn
7746            .query_row(
7747                "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = 'default'",
7748                [],
7749                |row| row.get(0),
7750            )
7751            .expect("contract_format_version");
7752        assert_eq!(contract_format_version, 1);
7753        let request_count: i64 = conn
7754            .query_row(
7755                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
7756                [],
7757                |row| row.get(0),
7758            )
7759            .expect("request audit count");
7760        assert_eq!(request_count, 1);
7761        let apply_count: i64 = conn
7762            .query_row(
7763                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
7764                [],
7765                |row| row.get(0),
7766            )
7767            .expect("apply audit count");
7768        assert_eq!(apply_count, 1);
7769        let apply_metadata: String = conn
7770            .query_row(
7771                "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
7772                [],
7773                |row| row.get(0),
7774            )
7775            .expect("apply metadata");
7776        assert!(apply_metadata.contains("\"profile\":\"default\""));
7777        assert!(apply_metadata.contains("\"snapshot_hash\":"));
7778        assert!(apply_metadata.contains("\"model_identity\":\"test-model\""));
7779    }
7780
7781    #[cfg(feature = "sqlite-vec")]
7782    #[test]
7783    #[allow(clippy::too_many_lines)]
7784    fn regenerate_vector_embeddings_embedder_failure_leaves_contract_and_vec_rows_unchanged() {
7785        let db = NamedTempFile::new().expect("temp file");
7786        let schema = Arc::new(SchemaManager::new());
7787
7788        {
7789            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7790            schema.bootstrap(&conn).expect("bootstrap");
7791            conn.execute(
7792                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7793                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7794                [],
7795            )
7796            .expect("insert node");
7797            conn.execute(
7798                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7799                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7800                [],
7801            )
7802            .expect("insert chunk");
7803            schema
7804                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
7805                .expect("ensure vec profile");
7806            conn.execute(
7807                r"
7808                INSERT INTO vector_embedding_contracts (
7809                    profile,
7810                    table_name,
7811                    model_identity,
7812                    model_version,
7813                    dimension,
7814                    normalization_policy,
7815                    chunking_policy,
7816                    preprocessing_policy,
7817                    generator_command_json,
7818                    applied_at,
7819                    snapshot_hash
7820                ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)
7821                ",
7822                rusqlite::params![
7823                    "default",
7824                    "vec_nodes_active",
7825                    "old-model",
7826                    "0.9.0",
7827                    4,
7828                    "l2",
7829                    "per_chunk",
7830                    "trim",
7831                    "[]",
7832                    111,
7833                    "old-snapshot"
7834                ],
7835            )
7836            .expect("seed contract");
7837            conn.execute(
7838                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
7839                [],
7840            )
7841            .expect("seed vec row");
7842        }
7843
7844        let service = AdminService::new(db.path(), Arc::clone(&schema));
7845        let failing = FailingEmbedder {
7846            identity: QueryEmbedderIdentity {
7847                model_identity: "new-model".to_owned(),
7848                model_version: "1.0.0".to_owned(),
7849                dimension: 4,
7850                normalization_policy: "l2".to_owned(),
7851            },
7852        };
7853        let error = service
7854            .regenerate_vector_embeddings(
7855                &failing,
7856                &VectorRegenerationConfig {
7857                    profile: "default".to_owned(),
7858                    table_name: "vec_nodes_active".to_owned(),
7859                    chunking_policy: "per_chunk".to_owned(),
7860                    preprocessing_policy: "trim".to_owned(),
7861                },
7862            )
7863            .expect_err("embedder should fail");
7864
7865        assert!(error.to_string().contains("embedder failure"));
7866
7867        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7868        let model_identity: String = conn
7869            .query_row(
7870                "SELECT model_identity FROM vector_embedding_contracts WHERE profile = 'default'",
7871                [],
7872                |row| row.get(0),
7873            )
7874            .expect("model identity");
7875        assert_eq!(model_identity, "old-model");
7876        let snapshot_hash: String = conn
7877            .query_row(
7878                "SELECT snapshot_hash FROM vector_embedding_contracts WHERE profile = 'default'",
7879                [],
7880                |row| row.get(0),
7881            )
7882            .expect("snapshot hash");
7883        assert_eq!(snapshot_hash, "old-snapshot");
7884        let vec_count: i64 = conn
7885            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7886                row.get(0)
7887            })
7888            .expect("vec count");
7889        assert_eq!(vec_count, 1);
7890        let failure_count: i64 = conn
7891            .query_row(
7892                "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7893                [],
7894                |row| row.get(0),
7895            )
7896            .expect("failure count");
7897        assert_eq!(failure_count, 1);
7898        let failure_metadata: String = conn
7899            .query_row(
7900                "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7901                [],
7902                |row| row.get(0),
7903            )
7904            .expect("failure metadata");
7905        assert!(failure_metadata.contains("\"failure_class\":\"embedder failure\""));
7906    }
7907
7908    // Subprocess generator tests (snapshot-drift-via-concurrent-writer,
7909    // timeout, stdout/stderr overflow, oversized input, excessive chunk
7910    // count, malformed JSON, world-writable executable, disallowed
7911    // executable root, environment preservation) were removed in 0.4.0
7912    // along with the subprocess generator pattern itself. The failure
7913    // modes they exercised belong to the deleted
7914    // `run_vector_generator_bounded` pipeline and have no equivalent in
7915    // the direct-embedder path. See
7916    // `.claude/memory/project_vector_identity_invariant.md`.
7917
7918    #[cfg(feature = "sqlite-vec")]
7919    #[test]
7920    fn regenerate_vector_embeddings_rejects_whitespace_only_profile_before_mutation() {
7921        let db = NamedTempFile::new().expect("temp file");
7922        let schema = Arc::new(SchemaManager::new());
7923        {
7924            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7925            schema.bootstrap(&conn).expect("bootstrap");
7926            conn.execute(
7927                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7928                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7929                [],
7930            )
7931            .expect("insert node");
7932            conn.execute(
7933                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7934                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7935                [],
7936            )
7937            .expect("insert chunk");
7938        }
7939
7940        let service = AdminService::new(db.path(), Arc::clone(&schema));
7941        let embedder = TestEmbedder::new("test-model", 4);
7942        let error = service
7943            .regenerate_vector_embeddings(
7944                &embedder,
7945                &VectorRegenerationConfig {
7946                    profile: "   ".to_owned(),
7947                    table_name: "vec_nodes_active".to_owned(),
7948                    chunking_policy: "per_chunk".to_owned(),
7949                    preprocessing_policy: "trim".to_owned(),
7950                },
7951            )
7952            .expect_err("whitespace profile should be rejected");
7953
7954        assert!(error.to_string().contains("invalid contract"));
7955        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7956        let contract_count: i64 = conn
7957            .query_row(
7958                "SELECT count(*) FROM vector_embedding_contracts",
7959                [],
7960                |row| row.get(0),
7961            )
7962            .expect("contract count");
7963        assert_eq!(contract_count, 0);
7964        let provenance_count: i64 = conn
7965            .query_row("SELECT count(*) FROM provenance_events", [], |row| {
7966                row.get(0)
7967            })
7968            .expect("provenance count");
7969        assert_eq!(provenance_count, 0);
7970    }
7971
7972    #[cfg(feature = "sqlite-vec")]
7973    #[test]
7974    fn regenerate_vector_embeddings_rejects_future_contract_format_version() {
7975        let db = NamedTempFile::new().expect("temp file");
7976        let schema = Arc::new(SchemaManager::new());
7977        {
7978            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7979            schema.bootstrap(&conn).expect("bootstrap");
7980            conn.execute(
7981                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7982                 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7983                [],
7984            )
7985            .expect("insert node");
7986            conn.execute(
7987                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7988                 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7989                [],
7990            )
7991            .expect("insert chunk");
7992            conn.execute(
7993                r"
7994                INSERT INTO vector_embedding_contracts (
7995                    profile,
7996                    table_name,
7997                    model_identity,
7998                    model_version,
7999                    dimension,
8000                    normalization_policy,
8001                    chunking_policy,
8002                    preprocessing_policy,
8003                    generator_command_json,
8004                    applied_at,
8005                    snapshot_hash,
8006                    contract_format_version,
8007                    updated_at
8008                ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)
8009                ",
8010                rusqlite::params![
8011                    "default",
8012                    "vec_nodes_active",
8013                    "old-model",
8014                    "0.9.0",
8015                    4,
8016                    "l2",
8017                    "per_chunk",
8018                    "trim",
8019                    "[]",
8020                    111,
8021                    "old-snapshot",
8022                    99,
8023                    111,
8024                ],
8025            )
8026            .expect("seed future contract");
8027        }
8028
8029        let service = AdminService::new(db.path(), Arc::clone(&schema));
8030        let embedder = TestEmbedder::new("test-model", 4);
8031        let error = service
8032            .regenerate_vector_embeddings(
8033                &embedder,
8034                &VectorRegenerationConfig {
8035                    profile: "default".to_owned(),
8036                    table_name: "vec_nodes_active".to_owned(),
8037                    chunking_policy: "per_chunk".to_owned(),
8038                    preprocessing_policy: "trim".to_owned(),
8039                },
8040            )
8041            .expect_err("future contract version should be rejected");
8042
8043        assert!(error.to_string().contains("unsupported"));
8044        assert!(error.to_string().contains("format version"));
8045    }
8046
8047    #[test]
8048    fn check_semantics_detects_orphaned_chunk() {
8049        let (db, service) = setup();
8050        {
8051            // Open without FK enforcement to insert chunk with no active node.
8052            let conn = sqlite::open_connection(db.path()).expect("conn");
8053            conn.execute(
8054                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8055                 VALUES ('c1', 'ghost-node', 'text', 100)",
8056                [],
8057            )
8058            .expect("insert orphaned chunk");
8059        }
8060        let report = service.check_semantics().expect("semantics check");
8061        assert_eq!(report.orphaned_chunks, 1);
8062    }
8063
8064    #[test]
8065    fn check_semantics_detects_null_source_ref() {
8066        let (db, service) = setup();
8067        {
8068            let conn = sqlite::open_connection(db.path()).expect("conn");
8069            conn.execute(
8070                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at) \
8071                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100)",
8072                [],
8073            )
8074            .expect("insert node with null source_ref");
8075        }
8076        let report = service.check_semantics().expect("semantics check");
8077        assert_eq!(report.null_source_ref_nodes, 1);
8078    }
8079
8080    #[test]
8081    fn check_semantics_detects_broken_step_fk() {
8082        let (db, service) = setup();
8083        {
8084            // Explicitly disable FK enforcement for this connection so we can insert
8085            // an orphaned step (ghost run_id) to simulate a partial-write failure.
8086            let conn = sqlite::open_connection(db.path()).expect("conn");
8087            conn.execute_batch("PRAGMA foreign_keys = OFF;")
8088                .expect("disable FK");
8089            conn.execute(
8090                "INSERT INTO steps (id, run_id, kind, status, properties, created_at) \
8091                 VALUES ('s1', 'ghost-run', 'llm', 'completed', '{}', 100)",
8092                [],
8093            )
8094            .expect("insert step with ghost run_id");
8095        }
8096        let report = service.check_semantics().expect("semantics check");
8097        assert_eq!(report.broken_step_fk, 1);
8098    }
8099
8100    #[test]
8101    fn check_semantics_detects_broken_action_fk() {
8102        let (db, service) = setup();
8103        {
8104            let conn = sqlite::open_connection(db.path()).expect("conn");
8105            conn.execute_batch("PRAGMA foreign_keys = OFF;")
8106                .expect("disable FK");
8107            conn.execute(
8108                "INSERT INTO actions (id, step_id, kind, status, properties, created_at) \
8109                 VALUES ('a1', 'ghost-step', 'emit', 'completed', '{}', 100)",
8110                [],
8111            )
8112            .expect("insert action with ghost step_id");
8113        }
8114        let report = service.check_semantics().expect("semantics check");
8115        assert_eq!(report.broken_action_fk, 1);
8116    }
8117
8118    #[test]
8119    fn check_semantics_detects_stale_fts_rows() {
8120        let (db, service) = setup();
8121        {
8122            let conn = sqlite::open_connection(db.path()).expect("conn");
8123            // FTS virtual tables have no FK constraints; insert a row referencing
8124            // a chunk_id that does not exist in the chunks table.
8125            conn.execute(
8126                "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
8127                 VALUES ('ghost-chunk', 'any-node', 'Meeting', 'stale content')",
8128                [],
8129            )
8130            .expect("insert stale FTS row");
8131        }
8132        let report = service.check_semantics().expect("semantics check");
8133        assert_eq!(report.stale_fts_rows, 1);
8134    }
8135
8136    #[test]
8137    fn check_semantics_detects_fts_rows_for_superseded_nodes() {
8138        let (db, service) = setup();
8139        {
8140            let conn = sqlite::open_connection(db.path()).expect("conn");
8141            // Insert a node that has been fully superseded (superseded_at IS NOT NULL).
8142            conn.execute(
8143                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8144                 VALUES ('r1', 'lg-sup', 'Meeting', '{}', 100, 200, 'src-1')",
8145                [],
8146            )
8147            .expect("insert superseded node");
8148            // Insert an FTS row for the superseded node's logical_id.
8149            conn.execute(
8150                "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
8151                 VALUES ('ck-x', 'lg-sup', 'Meeting', 'superseded content')",
8152                [],
8153            )
8154            .expect("insert FTS row for superseded node");
8155        }
8156        let report = service.check_semantics().expect("semantics check");
8157        assert_eq!(report.fts_rows_for_superseded_nodes, 1);
8158    }
8159
8160    #[test]
8161    fn check_semantics_detects_dangling_edges() {
8162        let (db, service) = setup();
8163        {
8164            let conn = sqlite::open_connection(db.path()).expect("conn");
8165            conn.execute_batch("PRAGMA foreign_keys = OFF;")
8166                .expect("disable FK");
8167            // One active node as source; target does not exist — edge is dangling.
8168            conn.execute(
8169                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8170                 VALUES ('r1', 'lg-src', 'Meeting', '{}', 100, 'src-1')",
8171                [],
8172            )
8173            .expect("insert source node");
8174            conn.execute(
8175                "INSERT INTO edges \
8176                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8177                 VALUES ('e1', 'edge-1', 'lg-src', 'ghost-target', 'LINKS', '{}', 100, 'src-1')",
8178                [],
8179            )
8180            .expect("insert dangling edge");
8181        }
8182        let report = service.check_semantics().expect("semantics check");
8183        assert_eq!(report.dangling_edges, 1);
8184    }
8185
8186    #[test]
8187    fn check_semantics_detects_orphaned_supersession_chains() {
8188        let (db, service) = setup();
8189        {
8190            let conn = sqlite::open_connection(db.path()).expect("conn");
8191            // Every version of this logical_id is superseded — no active row remains.
8192            conn.execute(
8193                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8194                 VALUES ('r1', 'lg-orphaned', 'Meeting', '{}', 100, 200, 'src-1')",
8195                [],
8196            )
8197            .expect("insert fully superseded node");
8198        }
8199        let report = service.check_semantics().expect("semantics check");
8200        assert_eq!(report.orphaned_supersession_chains, 1);
8201    }
8202
8203    #[test]
8204    fn check_semantics_detects_mismatched_kind_property_fts_rows() {
8205        // With per-kind tables, mismatched_kind is always 0 — rows in fts_props_<kind>
8206        // must belong to that kind by construction. However, orphaned rows (per-kind table
8207        // with no registered schema) serve as the equivalent signal and are tested via
8208        // check_semantics_detects_fts_rows_for_superseded_nodes. This test verifies
8209        // mismatched_kind is 0 even when per-kind table rows exist for a node.
8210        let (db, service) = setup();
8211        {
8212            let conn = sqlite::open_connection(db.path()).expect("conn");
8213            conn.execute(
8214                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8215                 VALUES ('Goal', '[\"$.name\"]', ' ')",
8216                [],
8217            )
8218            .expect("register schema");
8219            conn.execute(
8220                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8221                 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'src-1')",
8222                [],
8223            )
8224            .expect("insert node");
8225            // Create the per-kind table and insert a correctly-kind row.
8226            let table = fathomdb_schema::fts_kind_table_name("Goal");
8227            conn.execute_batch(&format!(
8228                "CREATE VIRTUAL TABLE IF NOT EXISTS {table} \
8229                 USING fts5(node_logical_id UNINDEXED, text_content, tokenize = 'porter unicode61 remove_diacritics 2')"
8230            ))
8231            .expect("create per-kind table");
8232            conn.execute(
8233                &format!("INSERT INTO {table} (node_logical_id, text_content) VALUES ('goal-1', 'Ship v2')"),
8234                [],
8235            )
8236            .expect("insert per-kind FTS row");
8237        }
8238        let report = service.check_semantics().expect("semantics check");
8239        // Per-kind tables make mismatched_kind impossible — always 0.
8240        assert_eq!(report.mismatched_kind_property_fts_rows, 0);
8241    }
8242
8243    #[test]
8244    fn check_semantics_detects_duplicate_property_fts_rows() {
8245        let (db, service) = setup();
8246        {
8247            let conn = sqlite::open_connection(db.path()).expect("conn");
8248            conn.execute(
8249                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8250                 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'src-1')",
8251                [],
8252            )
8253            .expect("insert node");
8254            // Create the per-kind table and insert two rows for the same logical ID.
8255            let table = fathomdb_schema::fts_kind_table_name("Goal");
8256            conn.execute_batch(&format!(
8257                "CREATE VIRTUAL TABLE IF NOT EXISTS {table} \
8258                 USING fts5(node_logical_id UNINDEXED, text_content, tokenize = 'porter unicode61 remove_diacritics 2')"
8259            ))
8260            .expect("create per-kind table");
8261            conn.execute(
8262                &format!("INSERT INTO {table} (node_logical_id, text_content) VALUES ('goal-1', 'Ship v2')"),
8263                [],
8264            )
8265            .expect("insert first property FTS row");
8266            conn.execute(
8267                &format!("INSERT INTO {table} (node_logical_id, text_content) VALUES ('goal-1', 'Ship v2 duplicate')"),
8268                [],
8269            )
8270            .expect("insert duplicate property FTS row");
8271        }
8272        let report = service.check_semantics().expect("semantics check");
8273        assert_eq!(report.duplicate_property_fts_rows, 1);
8274    }
8275
8276    #[test]
8277    fn check_semantics_detects_drifted_property_fts_text() {
8278        let (db, service) = setup();
8279        {
8280            let conn = sqlite::open_connection(db.path()).expect("conn");
8281            conn.execute(
8282                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8283                 VALUES ('Goal', '[\"$.name\"]', ' ')",
8284                [],
8285            )
8286            .expect("register schema");
8287            conn.execute(
8288                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8289                 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Current name\"}', 100, 'src-1')",
8290                [],
8291            )
8292            .expect("insert node");
8293            // Create per-kind table and insert a row with outdated text content.
8294            let table = fathomdb_schema::fts_kind_table_name("Goal");
8295            conn.execute_batch(&format!(
8296                "CREATE VIRTUAL TABLE IF NOT EXISTS {table} \
8297                 USING fts5(node_logical_id UNINDEXED, text_content, tokenize = 'porter unicode61 remove_diacritics 2')"
8298            ))
8299            .expect("create per-kind table");
8300            conn.execute(
8301                &format!("INSERT INTO {table} (node_logical_id, text_content) VALUES ('goal-1', 'Old stale name')"),
8302                [],
8303            )
8304            .expect("insert stale property FTS row");
8305        }
8306        let report = service.check_semantics().expect("semantics check");
8307        assert_eq!(report.drifted_property_fts_rows, 1);
8308    }
8309
8310    #[test]
8311    fn check_semantics_detects_property_fts_row_that_should_not_exist() {
8312        let (db, service) = setup();
8313        {
8314            let conn = sqlite::open_connection(db.path()).expect("conn");
8315            conn.execute(
8316                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8317                 VALUES ('Goal', '[\"$.searchable\"]', ' ')",
8318                [],
8319            )
8320            .expect("register schema");
8321            // Node does NOT have $.searchable — extraction yields no value.
8322            conn.execute(
8323                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8324                 VALUES ('r1', 'goal-1', 'Goal', '{\"other\":\"field\"}', 100, 'src-1')",
8325                [],
8326            )
8327            .expect("insert node");
8328            // Create per-kind table and insert a phantom row that should not exist.
8329            let table = fathomdb_schema::fts_kind_table_name("Goal");
8330            conn.execute_batch(&format!(
8331                "CREATE VIRTUAL TABLE IF NOT EXISTS {table} \
8332                 USING fts5(node_logical_id UNINDEXED, text_content, tokenize = 'porter unicode61 remove_diacritics 2')"
8333            ))
8334            .expect("create per-kind table");
8335            conn.execute(
8336                &format!("INSERT INTO {table} (node_logical_id, text_content) VALUES ('goal-1', 'phantom text')"),
8337                [],
8338            )
8339            .expect("insert phantom property FTS row");
8340        }
8341        let report = service.check_semantics().expect("semantics check");
8342        assert_eq!(
8343            report.drifted_property_fts_rows, 1,
8344            "row that should not exist must be counted as drifted"
8345        );
8346    }
8347
8348    #[test]
8349    fn safe_export_writes_manifest_with_sha256() {
8350        let (_db, service) = setup();
8351        let export_dir = tempfile::TempDir::new().expect("temp dir");
8352        let export_path = export_dir.path().join("backup.db");
8353
8354        let manifest = service
8355            .safe_export(
8356                &export_path,
8357                SafeExportOptions {
8358                    force_checkpoint: false,
8359                },
8360            )
8361            .expect("export");
8362
8363        assert!(export_path.exists(), "exported db should exist");
8364        let manifest_path = export_dir.path().join("backup.db.export-manifest.json");
8365        assert!(
8366            manifest_path.exists(),
8367            "manifest file should exist at {}",
8368            manifest_path.display()
8369        );
8370        assert_eq!(manifest.sha256.len(), 64, "sha256 should be 64 hex chars");
8371        assert!(
8372            manifest.exported_at > 0,
8373            "exported_at should be a unix timestamp"
8374        );
8375        assert_eq!(
8376            manifest.schema_version,
8377            SchemaManager::new().current_version().0,
8378            "schema_version should match the live schema version"
8379        );
8380        assert_eq!(manifest.protocol_version, 1, "protocol_version should be 1");
8381        assert!(manifest.page_count > 0, "page_count should be positive");
8382    }
8383
8384    #[test]
8385    fn safe_export_preserves_operational_validation_contracts() {
8386        let (_db, service) = setup();
8387        let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
8388        service
8389            .register_operational_collection(&OperationalRegisterRequest {
8390                name: "connector_health".to_owned(),
8391                kind: OperationalCollectionKind::LatestState,
8392                schema_json: "{}".to_owned(),
8393                retention_json: "{}".to_owned(),
8394                filter_fields_json: "[]".to_owned(),
8395                validation_json: validation_json.to_owned(),
8396                secondary_indexes_json: "[]".to_owned(),
8397                format_version: 1,
8398            })
8399            .expect("register collection");
8400
8401        let export_dir = tempfile::TempDir::new().expect("temp dir");
8402        let export_path = export_dir.path().join("backup.db");
8403        service
8404            .safe_export(
8405                &export_path,
8406                SafeExportOptions {
8407                    force_checkpoint: false,
8408                },
8409            )
8410            .expect("export");
8411
8412        let exported = sqlite::open_connection(&export_path).expect("exported conn");
8413        let exported_validation_json: String = exported
8414            .query_row(
8415                "SELECT validation_json FROM operational_collections WHERE name = 'connector_health'",
8416                [],
8417                |row| row.get(0),
8418            )
8419            .expect("validation_json");
8420        assert_eq!(exported_validation_json, validation_json);
8421    }
8422
8423    #[test]
8424    fn safe_export_force_checkpoint_false_skips_wal_pragma() {
8425        let (_db, service) = setup();
8426        let export_dir = tempfile::TempDir::new().expect("temp dir");
8427        let export_path = export_dir.path().join("no-wal.db");
8428
8429        // force_checkpoint: false must not error even on a non-WAL database
8430        let manifest = service
8431            .safe_export(
8432                &export_path,
8433                SafeExportOptions {
8434                    force_checkpoint: false,
8435                },
8436            )
8437            .expect("export with no checkpoint");
8438
8439        assert!(
8440            manifest.page_count > 0,
8441            "page_count must be populated regardless of checkpoint mode"
8442        );
8443        assert_eq!(
8444            manifest.schema_version,
8445            SchemaManager::new().current_version().0
8446        );
8447        assert_eq!(manifest.protocol_version, 1);
8448    }
8449
8450    #[test]
8451    fn safe_export_force_checkpoint_false_still_captures_wal_backed_changes() {
8452        let (db, service) = setup();
8453        let conn = sqlite::open_connection(db.path()).expect("conn");
8454        let journal_mode: String = conn
8455            .query_row("PRAGMA journal_mode=WAL", [], |row| row.get(0))
8456            .expect("enable wal");
8457        assert_eq!(journal_mode.to_lowercase(), "wal");
8458        let auto_checkpoint_pages: i64 = conn
8459            .query_row("PRAGMA wal_autocheckpoint=0", [], |row| row.get(0))
8460            .expect("disable auto checkpoint");
8461        assert_eq!(auto_checkpoint_pages, 0);
8462        conn.execute(
8463            "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8464             VALUES ('r-wal', 'lg-wal', 'Meeting', '{}', 100, 'src-wal')",
8465            [],
8466        )
8467        .expect("insert wal-backed node");
8468
8469        let export_dir = tempfile::TempDir::new().expect("temp dir");
8470        let export_path = export_dir.path().join("wal-backed.db");
8471        service
8472            .safe_export(
8473                &export_path,
8474                SafeExportOptions {
8475                    force_checkpoint: false,
8476                },
8477            )
8478            .expect("export wal-backed db");
8479
8480        let exported = sqlite::open_connection(&export_path).expect("open exported db");
8481        let exported_count: i64 = exported
8482            .query_row(
8483                "SELECT count(*) FROM nodes WHERE logical_id = 'lg-wal'",
8484                [],
8485                |row| row.get(0),
8486            )
8487            .expect("count exported nodes");
8488        assert_eq!(
8489            exported_count, 1,
8490            "safe_export must include committed rows that are still resident in the WAL"
8491        );
8492    }
8493
8494    #[test]
8495    fn excise_source_removes_searchable_content_after_excision() {
8496        let (db, service) = setup();
8497        {
8498            let conn = sqlite::open_connection(db.path()).expect("conn");
8499            conn.execute(
8500                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8501                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
8502                [],
8503            )
8504            .expect("insert v1");
8505            conn.execute(
8506                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8507                 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
8508                [],
8509            )
8510            .expect("insert v2");
8511            conn.execute(
8512                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8513                 VALUES ('ck1', 'lg1', 'hello world', 100)",
8514                [],
8515            )
8516            .expect("insert chunk");
8517        }
8518        service.excise_source("source-2").expect("excise");
8519        {
8520            let conn = sqlite::open_connection(db.path()).expect("conn");
8521            let fts_count: i64 = conn
8522                .query_row(
8523                    "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'ck1'",
8524                    [],
8525                    |row| row.get(0),
8526                )
8527                .expect("fts count");
8528            assert_eq!(
8529                fts_count, 0,
8530                "excised content should not remain searchable after excise"
8531            );
8532        }
8533    }
8534
8535    #[cfg(feature = "sqlite-vec")]
8536    #[test]
8537    fn excise_source_cleans_chunks_and_vec_rows_for_excised_version() {
8538        let (db, service) = setup();
8539        {
8540            let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8541            service
8542                .schema_manager
8543                .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
8544                .expect("ensure vec profile");
8545            conn.execute(
8546                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8547                 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
8548                [],
8549            )
8550            .expect("insert v1");
8551            conn.execute(
8552                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8553                 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
8554                [],
8555            )
8556            .expect("insert v2");
8557            conn.execute(
8558                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8559                 VALUES ('ck1', 'lg1', 'new content', 200)",
8560                [],
8561            )
8562            .expect("insert chunk");
8563            conn.execute(
8564                "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('ck1', zeroblob(16))",
8565                [],
8566            )
8567            .expect("insert vec row");
8568        }
8569
8570        service.excise_source("source-2").expect("excise");
8571
8572        let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8573        let active_row: String = conn
8574            .query_row(
8575                "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
8576                [],
8577                |row| row.get(0),
8578            )
8579            .expect("restored active row");
8580        assert_eq!(active_row, "r1");
8581        let chunk_count: i64 = conn
8582            .query_row(
8583                "SELECT count(*) FROM chunks WHERE node_logical_id = 'lg1'",
8584                [],
8585                |row| row.get(0),
8586            )
8587            .expect("chunk count");
8588        assert_eq!(
8589            chunk_count, 0,
8590            "excised source content must not survive as chunks"
8591        );
8592        let vec_count: i64 = conn
8593            .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
8594                row.get(0)
8595            })
8596            .expect("vec count");
8597        assert_eq!(vec_count, 0, "excised source vec rows must be removed");
8598        let fts_count: i64 = conn
8599            .query_row(
8600                "SELECT count(*) FROM fts_nodes WHERE node_logical_id = 'lg1'",
8601                [],
8602                |row| row.get(0),
8603            )
8604            .expect("fts count");
8605        assert_eq!(
8606            fts_count, 0,
8607            "excised source content must not remain searchable"
8608        );
8609    }
8610
8611    #[test]
8612    fn export_page_count_matches_exported_file() {
8613        let (_db, service) = setup();
8614        let export_dir = tempfile::TempDir::new().expect("temp dir");
8615        let export_path = export_dir.path().join("page-count.db");
8616
8617        let manifest = service
8618            .safe_export(
8619                &export_path,
8620                SafeExportOptions {
8621                    force_checkpoint: false,
8622                },
8623            )
8624            .expect("export");
8625
8626        let exported = sqlite::open_connection(&export_path).expect("open exported db");
8627        let actual_page_count: u64 = exported
8628            .query_row("PRAGMA page_count", [], |row| row.get(0))
8629            .expect("page_count from exported file");
8630
8631        assert_eq!(
8632            manifest.page_count, actual_page_count,
8633            "manifest page_count must match the exported file's PRAGMA page_count"
8634        );
8635    }
8636
8637    #[test]
8638    fn no_temp_file_after_successful_export() {
8639        let (_db, service) = setup();
8640        let export_dir = tempfile::TempDir::new().expect("temp dir");
8641        let export_path = export_dir.path().join("no-tmp.db");
8642
8643        service
8644            .safe_export(
8645                &export_path,
8646                SafeExportOptions {
8647                    force_checkpoint: false,
8648                },
8649            )
8650            .expect("export");
8651
8652        let tmp_files: Vec<_> = fs::read_dir(export_dir.path())
8653            .expect("read export dir")
8654            .filter_map(Result::ok)
8655            .filter(|e| e.path().extension().is_some_and(|ext| ext == "tmp"))
8656            .collect();
8657
8658        assert!(
8659            tmp_files.is_empty(),
8660            "no .tmp files should remain after a successful export, found: {tmp_files:?}"
8661        );
8662    }
8663
8664    #[test]
8665    fn export_manifest_is_valid_json() {
8666        let (_db, service) = setup();
8667        let export_dir = tempfile::TempDir::new().expect("temp dir");
8668        let export_path = export_dir.path().join("valid-json.db");
8669
8670        service
8671            .safe_export(
8672                &export_path,
8673                SafeExportOptions {
8674                    force_checkpoint: false,
8675                },
8676            )
8677            .expect("export");
8678
8679        let manifest_path = export_dir.path().join("valid-json.db.export-manifest.json");
8680        let manifest_contents = fs::read_to_string(&manifest_path).expect("read manifest");
8681        let parsed: serde_json::Value =
8682            serde_json::from_str(&manifest_contents).expect("manifest must be valid JSON");
8683
8684        assert!(
8685            parsed.get("exported_at").is_some(),
8686            "manifest must contain exported_at"
8687        );
8688        assert!(
8689            parsed.get("sha256").is_some(),
8690            "manifest must contain sha256"
8691        );
8692        assert!(
8693            parsed.get("schema_version").is_some(),
8694            "manifest must contain schema_version"
8695        );
8696        assert!(
8697            parsed.get("protocol_version").is_some(),
8698            "manifest must contain protocol_version"
8699        );
8700        assert!(
8701            parsed.get("page_count").is_some(),
8702            "manifest must contain page_count"
8703        );
8704    }
8705
8706    #[test]
8707    fn provenance_purge_dry_run_reports_counts() {
8708        let (db, service) = setup();
8709        {
8710            let conn = sqlite::open_connection(db.path()).expect("conn");
8711            conn.execute(
8712                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8713                 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8714                [],
8715            )
8716            .expect("insert p1");
8717            conn.execute(
8718                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8719                 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
8720                [],
8721            )
8722            .expect("insert p2");
8723            conn.execute(
8724                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8725                 VALUES ('p3', 'excise', 'lg3', 'src-1', 300)",
8726                [],
8727            )
8728            .expect("insert p3");
8729        }
8730
8731        let options = super::ProvenancePurgeOptions {
8732            dry_run: true,
8733            preserve_event_types: Vec::new(),
8734        };
8735        let report = service
8736            .purge_provenance_events(250, &options)
8737            .expect("dry run purge");
8738
8739        assert_eq!(report.events_deleted, 2);
8740        assert_eq!(report.events_preserved, 1);
8741        assert!(report.oldest_remaining.is_some());
8742
8743        let conn = sqlite::open_connection(db.path()).expect("conn");
8744        let total: i64 = conn
8745            .query_row("SELECT count(*) FROM provenance_events", [], |row| {
8746                row.get(0)
8747            })
8748            .expect("count");
8749        assert_eq!(total, 3, "dry_run must not delete any events");
8750    }
8751
8752    #[test]
8753    fn provenance_purge_deletes_old_events() {
8754        let (db, service) = setup();
8755        {
8756            let conn = sqlite::open_connection(db.path()).expect("conn");
8757            conn.execute(
8758                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8759                 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8760                [],
8761            )
8762            .expect("insert p1");
8763            conn.execute(
8764                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8765                 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
8766                [],
8767            )
8768            .expect("insert p2");
8769        }
8770
8771        let options = super::ProvenancePurgeOptions {
8772            dry_run: false,
8773            preserve_event_types: Vec::new(),
8774        };
8775        let report = service
8776            .purge_provenance_events(150, &options)
8777            .expect("purge");
8778
8779        assert_eq!(report.events_deleted, 1);
8780        assert_eq!(report.events_preserved, 1);
8781        assert_eq!(report.oldest_remaining, Some(200));
8782
8783        let conn = sqlite::open_connection(db.path()).expect("conn");
8784        let remaining: i64 = conn
8785            .query_row("SELECT count(*) FROM provenance_events", [], |row| {
8786                row.get(0)
8787            })
8788            .expect("count");
8789        assert_eq!(remaining, 1);
8790    }
8791
8792    #[test]
8793    fn provenance_purge_preserves_specified_types() {
8794        let (db, service) = setup();
8795        {
8796            let conn = sqlite::open_connection(db.path()).expect("conn");
8797            conn.execute(
8798                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8799                 VALUES ('p1', 'excise', 'lg1', 'src-1', 100)",
8800                [],
8801            )
8802            .expect("insert p1");
8803            conn.execute(
8804                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8805                 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 100)",
8806                [],
8807            )
8808            .expect("insert p2");
8809            conn.execute(
8810                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8811                 VALUES ('p3', 'node_insert', 'lg3', 'src-1', 100)",
8812                [],
8813            )
8814            .expect("insert p3");
8815        }
8816
8817        let options = super::ProvenancePurgeOptions {
8818            dry_run: false,
8819            preserve_event_types: Vec::new(),
8820        };
8821        let report = service
8822            .purge_provenance_events(500, &options)
8823            .expect("purge");
8824
8825        assert_eq!(report.events_deleted, 2);
8826        assert_eq!(report.events_preserved, 1);
8827
8828        let conn = sqlite::open_connection(db.path()).expect("conn");
8829        let remaining_type: String = conn
8830            .query_row("SELECT event_type FROM provenance_events", [], |row| {
8831                row.get(0)
8832            })
8833            .expect("remaining event type");
8834        assert_eq!(remaining_type, "excise");
8835    }
8836
8837    #[test]
8838    fn provenance_purge_noop_with_zero_timestamp() {
8839        let (db, service) = setup();
8840        {
8841            let conn = sqlite::open_connection(db.path()).expect("conn");
8842            conn.execute(
8843                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8844                 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8845                [],
8846            )
8847            .expect("insert p1");
8848        }
8849
8850        let options = super::ProvenancePurgeOptions {
8851            dry_run: false,
8852            preserve_event_types: Vec::new(),
8853        };
8854        let report = service.purge_provenance_events(0, &options).expect("purge");
8855
8856        assert_eq!(report.events_deleted, 0);
8857        assert_eq!(report.events_preserved, 1);
8858        assert_eq!(report.oldest_remaining, Some(100));
8859    }
8860
8861    #[test]
8862    fn restore_skips_edge_when_counterpart_purged() {
8863        let (db, service) = setup();
8864        {
8865            let conn = sqlite::open_connection(db.path()).expect("conn");
8866            // Create node A (doc-1) and node B (doc-2)
8867            conn.execute(
8868                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8869                 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8870                [],
8871            )
8872            .expect("insert node A");
8873            conn.execute(
8874                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8875                 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8876                [],
8877            )
8878            .expect("insert node B");
8879            // Create edge between A and B
8880            conn.execute(
8881                "INSERT INTO edges \
8882                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8883                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8884                [],
8885            )
8886            .expect("insert edge");
8887            // Retire both A and B, and the edge
8888            conn.execute(
8889                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8890                 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8891                [],
8892            )
8893            .expect("insert retire event A");
8894            conn.execute(
8895                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8896                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8897                [],
8898            )
8899            .expect("insert edge retire event");
8900            conn.execute(
8901                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8902                [],
8903            )
8904            .expect("retire node A");
8905            conn.execute(
8906                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
8907                [],
8908            )
8909            .expect("retire node B");
8910            conn.execute(
8911                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
8912                [],
8913            )
8914            .expect("retire edge");
8915            // Simulate purge of B: delete node rows but leave the edge intact
8916            // to reproduce the dangling-edge scenario the validation guards against.
8917            conn.execute("DELETE FROM nodes WHERE logical_id = 'doc-2'", [])
8918                .expect("purge node B rows");
8919        }
8920
8921        // Restore A — the edge should be skipped because B has no active node
8922        let report = service.restore_logical_id("doc-1").expect("restore A");
8923        assert!(!report.was_noop);
8924        assert_eq!(report.restored_node_rows, 1);
8925        assert_eq!(report.restored_edge_rows, 0, "edge should not be restored");
8926        assert_eq!(report.skipped_edges.len(), 1);
8927        assert_eq!(report.skipped_edges[0].edge_logical_id, "edge-1");
8928        assert_eq!(report.skipped_edges[0].missing_endpoint, "doc-2");
8929
8930        // Verify the edge is still retired in the database
8931        let conn = sqlite::open_connection(db.path()).expect("conn");
8932        let active_edge_count: i64 = conn
8933            .query_row(
8934                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
8935                [],
8936                |row| row.get(0),
8937            )
8938            .expect("active edge count");
8939        assert_eq!(active_edge_count, 0, "edge must remain retired");
8940    }
8941
8942    #[test]
8943    fn restore_restores_edges_to_active_nodes() {
8944        let (db, service) = setup();
8945        {
8946            let conn = sqlite::open_connection(db.path()).expect("conn");
8947            // Create node A and node B (B stays active)
8948            conn.execute(
8949                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8950                 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8951                [],
8952            )
8953            .expect("insert node A");
8954            conn.execute(
8955                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8956                 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8957                [],
8958            )
8959            .expect("insert node B");
8960            // Create edge between A and B
8961            conn.execute(
8962                "INSERT INTO edges \
8963                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8964                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8965                [],
8966            )
8967            .expect("insert edge");
8968            // Retire only A
8969            conn.execute(
8970                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8971                 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8972                [],
8973            )
8974            .expect("insert retire event A");
8975            conn.execute(
8976                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8977                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8978                [],
8979            )
8980            .expect("insert edge retire event");
8981            conn.execute(
8982                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8983                [],
8984            )
8985            .expect("retire node A");
8986            conn.execute(
8987                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
8988                [],
8989            )
8990            .expect("retire edge");
8991        }
8992
8993        // Restore A — B is active, so the edge should be restored normally
8994        let report = service.restore_logical_id("doc-1").expect("restore A");
8995        assert!(!report.was_noop);
8996        assert_eq!(report.restored_node_rows, 1);
8997        assert!(report.restored_edge_rows > 0, "edge should be restored");
8998        assert!(
8999            report.skipped_edges.is_empty(),
9000            "no edges should be skipped"
9001        );
9002
9003        let conn = sqlite::open_connection(db.path()).expect("conn");
9004        let active_edge_count: i64 = conn
9005            .query_row(
9006                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
9007                [],
9008                |row| row.get(0),
9009            )
9010            .expect("active edge count");
9011        assert_eq!(active_edge_count, 1, "edge must be active");
9012    }
9013
9014    #[test]
9015    fn restore_restores_edges_when_both_restored() {
9016        let (db, service) = setup();
9017        {
9018            let conn = sqlite::open_connection(db.path()).expect("conn");
9019            // Create node A and node B
9020            conn.execute(
9021                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9022                 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
9023                [],
9024            )
9025            .expect("insert node A");
9026            conn.execute(
9027                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9028                 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
9029                [],
9030            )
9031            .expect("insert node B");
9032            // Create edge between A and B
9033            conn.execute(
9034                "INSERT INTO edges \
9035                 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
9036                 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
9037                [],
9038            )
9039            .expect("insert edge");
9040            // Retire both A and B
9041            conn.execute(
9042                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9043                 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
9044                [],
9045            )
9046            .expect("insert retire event A");
9047            conn.execute(
9048                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9049                 VALUES ('evt-retire-b', 'node_retire', 'doc-2', 'forget-1', 200, '')",
9050                [],
9051            )
9052            .expect("insert retire event B");
9053            conn.execute(
9054                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9055                 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
9056                [],
9057            )
9058            .expect("insert edge retire event");
9059            conn.execute(
9060                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
9061                [],
9062            )
9063            .expect("retire node A");
9064            conn.execute(
9065                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
9066                [],
9067            )
9068            .expect("retire node B");
9069            conn.execute(
9070                "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
9071                [],
9072            )
9073            .expect("retire edge");
9074        }
9075
9076        // Restore B first — edge is skipped because A is still retired
9077        let report_b = service.restore_logical_id("doc-2").expect("restore B");
9078        assert!(!report_b.was_noop);
9079
9080        // Restore A — B is now active, so the edge should be restored
9081        let report_a = service.restore_logical_id("doc-1").expect("restore A");
9082        assert!(!report_a.was_noop);
9083        assert_eq!(report_a.restored_node_rows, 1);
9084        assert!(
9085            report_a.restored_edge_rows > 0,
9086            "edge should be restored when both endpoints active"
9087        );
9088        assert!(
9089            report_a.skipped_edges.is_empty(),
9090            "no edges should be skipped"
9091        );
9092
9093        let conn = sqlite::open_connection(db.path()).expect("conn");
9094        let active_edge_count: i64 = conn
9095            .query_row(
9096                "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
9097                [],
9098                |row| row.get(0),
9099            )
9100            .expect("active edge count");
9101        assert_eq!(
9102            active_edge_count, 1,
9103            "edge must be active after both endpoints restored"
9104        );
9105    }
9106
9107    // ── FTS property schema end-to-end tests ──────────────────────────
9108
9109    #[test]
9110    fn fts_property_schema_crud_round_trip() {
9111        let (_db, service) = setup();
9112
9113        // Register
9114        let record = service
9115            .register_fts_property_schema(
9116                "Meeting",
9117                &["$.title".to_owned(), "$.summary".to_owned()],
9118                None,
9119            )
9120            .expect("register");
9121        assert_eq!(record.kind, "Meeting");
9122        assert_eq!(record.property_paths, vec!["$.title", "$.summary"]);
9123        assert_eq!(record.separator, " ");
9124        assert_eq!(record.format_version, 1);
9125
9126        // Describe
9127        let described = service
9128            .describe_fts_property_schema("Meeting")
9129            .expect("describe")
9130            .expect("should exist");
9131        assert_eq!(described, record);
9132
9133        // Describe missing kind
9134        let missing = service
9135            .describe_fts_property_schema("NoSuchKind")
9136            .expect("describe missing");
9137        assert!(missing.is_none());
9138
9139        // List
9140        let list = service.list_fts_property_schemas().expect("list");
9141        assert_eq!(list.len(), 1);
9142        assert_eq!(list[0].kind, "Meeting");
9143
9144        // Update (idempotent upsert)
9145        let updated = service
9146            .register_fts_property_schema(
9147                "Meeting",
9148                &["$.title".to_owned(), "$.notes".to_owned()],
9149                Some("\n"),
9150            )
9151            .expect("update");
9152        assert_eq!(updated.property_paths, vec!["$.title", "$.notes"]);
9153        assert_eq!(updated.separator, "\n");
9154
9155        // Remove
9156        service
9157            .remove_fts_property_schema("Meeting")
9158            .expect("remove");
9159        let after_remove = service
9160            .describe_fts_property_schema("Meeting")
9161            .expect("describe after remove");
9162        assert!(after_remove.is_none());
9163
9164        // Remove non-existent is an error
9165        let err = service.remove_fts_property_schema("Meeting");
9166        assert!(err.is_err());
9167    }
9168
9169    #[test]
9170    fn describe_fts_property_schema_round_trips_recursive_entries() {
9171        let (_db, service) = setup();
9172
9173        let entries = vec![
9174            FtsPropertyPathSpec::scalar("$.title"),
9175            FtsPropertyPathSpec::recursive("$.payload"),
9176        ];
9177        let exclude = vec!["$.payload.private".to_owned()];
9178        let registered = service
9179            .register_fts_property_schema_with_entries(
9180                "KnowledgeItem",
9181                &entries,
9182                Some(" "),
9183                &exclude,
9184                crate::rebuild_actor::RebuildMode::Eager,
9185            )
9186            .expect("register recursive");
9187
9188        // The register entry point now echoes back the fully-populated
9189        // record via the same load helper used by describe/list.
9190        assert_eq!(registered.entries, entries);
9191        assert_eq!(registered.exclude_paths, exclude);
9192        assert_eq!(registered.property_paths, vec!["$.title", "$.payload"]);
9193
9194        let described = service
9195            .describe_fts_property_schema("KnowledgeItem")
9196            .expect("describe")
9197            .expect("should exist");
9198        assert_eq!(described.kind, "KnowledgeItem");
9199        assert_eq!(described.entries, entries);
9200        assert_eq!(described.exclude_paths, exclude);
9201        assert_eq!(described.property_paths, vec!["$.title", "$.payload"]);
9202        assert_eq!(described.separator, " ");
9203        assert_eq!(described.format_version, 1);
9204    }
9205
9206    #[test]
9207    fn list_fts_property_schemas_round_trips_recursive_entries() {
9208        let (_db, service) = setup();
9209
9210        let entries = vec![
9211            FtsPropertyPathSpec::scalar("$.title"),
9212            FtsPropertyPathSpec::recursive("$.payload"),
9213        ];
9214        let exclude = vec!["$.payload.secret".to_owned()];
9215        service
9216            .register_fts_property_schema_with_entries(
9217                "KnowledgeItem",
9218                &entries,
9219                Some(" "),
9220                &exclude,
9221                crate::rebuild_actor::RebuildMode::Eager,
9222            )
9223            .expect("register recursive");
9224
9225        let listed = service.list_fts_property_schemas().expect("list");
9226        assert_eq!(listed.len(), 1);
9227        let record = &listed[0];
9228        assert_eq!(record.kind, "KnowledgeItem");
9229        assert_eq!(record.entries, entries);
9230        assert_eq!(record.exclude_paths, exclude);
9231        assert_eq!(record.property_paths, vec!["$.title", "$.payload"]);
9232    }
9233
9234    #[test]
9235    fn describe_fts_property_schema_round_trips_scalar_only_entries() {
9236        let (_db, service) = setup();
9237
9238        service
9239            .register_fts_property_schema(
9240                "Meeting",
9241                &["$.title".to_owned(), "$.summary".to_owned()],
9242                None,
9243            )
9244            .expect("register scalar");
9245
9246        let described = service
9247            .describe_fts_property_schema("Meeting")
9248            .expect("describe")
9249            .expect("should exist");
9250        assert_eq!(described.property_paths, vec!["$.title", "$.summary"]);
9251        assert_eq!(described.entries.len(), 2);
9252        for entry in &described.entries {
9253            assert_eq!(
9254                entry.mode,
9255                FtsPropertyPathMode::Scalar,
9256                "scalar-only schema should deserialize every entry as Scalar"
9257            );
9258        }
9259        assert!(described.exclude_paths.is_empty());
9260    }
9261
9262    #[test]
9263    fn restore_reestablishes_property_fts_visibility() {
9264        let (db, service) = setup();
9265        let doc_table = fathomdb_schema::fts_kind_table_name("Document");
9266        {
9267            let conn = sqlite::open_connection(db.path()).expect("conn");
9268            // Register a property schema for Document kind.
9269            conn.execute(
9270                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9271                 VALUES ('Document', '[\"$.title\", \"$.body\"]', ' ')",
9272                [],
9273            )
9274            .expect("register schema");
9275            // Create the per-kind FTS table.
9276            conn.execute_batch(&format!(
9277                "CREATE VIRTUAL TABLE IF NOT EXISTS {doc_table} USING fts5(\
9278                    node_logical_id UNINDEXED, text_content, \
9279                    tokenize = 'porter unicode61 remove_diacritics 2'\
9280                )"
9281            ))
9282            .expect("create per-kind table");
9283            // Insert an active node with extractable properties.
9284            conn.execute(
9285                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9286                 VALUES ('row-1', 'doc-1', 'Document', '{\"title\":\"Budget\",\"body\":\"Q3 forecast\"}', 100, 'seed')",
9287                [],
9288            )
9289            .expect("insert node");
9290            // Insert a chunk so restore has something to work with for FTS.
9291            conn.execute(
9292                "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
9293                 VALUES ('chunk-1', 'doc-1', 'budget text', 100)",
9294                [],
9295            )
9296            .expect("insert chunk");
9297            // Insert property FTS row into per-kind table (as write path would).
9298            conn.execute(
9299                &format!(
9300                    "INSERT INTO {doc_table} (node_logical_id, text_content) \
9301                     VALUES ('doc-1', 'Budget Q3 forecast')"
9302                ),
9303                [],
9304            )
9305            .expect("insert property fts");
9306            // Simulate retire: supersede node, clear FTS.
9307            conn.execute(
9308                "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9309                 VALUES ('evt-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
9310                [],
9311            )
9312            .expect("retire event");
9313            conn.execute(
9314                "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
9315                [],
9316            )
9317            .expect("supersede");
9318            conn.execute("DELETE FROM fts_nodes", [])
9319                .expect("clear chunk fts");
9320            conn.execute(&format!("DELETE FROM {doc_table}"), [])
9321                .expect("clear property fts");
9322        }
9323
9324        let report = service.restore_logical_id("doc-1").expect("restore");
9325        assert_eq!(report.restored_property_fts_rows, 1);
9326
9327        // Verify the property FTS row was recreated in the per-kind table.
9328        let conn = sqlite::open_connection(db.path()).expect("conn");
9329        let prop_fts_count: i64 = conn
9330            .query_row(
9331                &format!("SELECT count(*) FROM {doc_table} WHERE node_logical_id = 'doc-1'"),
9332                [],
9333                |row| row.get(0),
9334            )
9335            .expect("prop fts count");
9336        assert_eq!(prop_fts_count, 1, "property FTS must be restored");
9337
9338        let text: String = conn
9339            .query_row(
9340                &format!("SELECT text_content FROM {doc_table} WHERE node_logical_id = 'doc-1'"),
9341                [],
9342                |row| row.get(0),
9343            )
9344            .expect("prop fts text");
9345        assert_eq!(text, "Budget Q3 forecast");
9346    }
9347
9348    #[test]
9349    fn safe_export_preserves_fts_property_schemas() {
9350        let (_db, service) = setup();
9351        service
9352            .register_fts_property_schema(
9353                "Goal",
9354                &["$.name".to_owned(), "$.rationale".to_owned()],
9355                None,
9356            )
9357            .expect("register schema");
9358
9359        let export_dir = tempfile::TempDir::new().expect("temp dir");
9360        let export_path = export_dir.path().join("backup.db");
9361        service
9362            .safe_export(
9363                &export_path,
9364                SafeExportOptions {
9365                    force_checkpoint: false,
9366                },
9367            )
9368            .expect("export");
9369
9370        // Open the exported DB and verify the schema survived.
9371        let exported_conn = rusqlite::Connection::open(&export_path).expect("open exported db");
9372        let kind: String = exported_conn
9373            .query_row(
9374                "SELECT kind FROM fts_property_schemas WHERE kind = 'Goal'",
9375                [],
9376                |row| row.get(0),
9377            )
9378            .expect("schema must exist in export");
9379        assert_eq!(kind, "Goal");
9380        let paths_json: String = exported_conn
9381            .query_row(
9382                "SELECT property_paths_json FROM fts_property_schemas WHERE kind = 'Goal'",
9383                [],
9384                |row| row.get(0),
9385            )
9386            .expect("paths must exist");
9387        let paths: Vec<String> = serde_json::from_str(&paths_json).expect("valid json");
9388        assert_eq!(paths, vec!["$.name", "$.rationale"]);
9389    }
9390
9391    #[test]
9392    #[allow(clippy::too_many_lines)]
9393    fn export_recovery_rebuilds_property_fts_from_canonical_state() {
9394        let (db, service) = setup();
9395        let goal_table = fathomdb_schema::fts_kind_table_name("Goal");
9396        // Register a schema and insert two nodes with extractable properties.
9397        service
9398            .register_fts_property_schema("Goal", &["$.name".to_owned()], None)
9399            .expect("register");
9400        {
9401            let conn = sqlite::open_connection(db.path()).expect("conn");
9402            conn.execute(
9403                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9404                 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9405                [],
9406            )
9407            .expect("insert node 1");
9408            conn.execute(
9409                &format!(
9410                    "INSERT INTO {goal_table} (node_logical_id, text_content) \
9411                     VALUES ('goal-1', 'Ship v2')"
9412                ),
9413                [],
9414            )
9415            .expect("insert property FTS row 1");
9416            conn.execute(
9417                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9418                 VALUES ('row-2', 'goal-2', 'Goal', '{\"name\":\"Launch redesign\"}', 100, 'seed')",
9419                [],
9420            )
9421            .expect("insert node 2");
9422            conn.execute(
9423                &format!(
9424                    "INSERT INTO {goal_table} (node_logical_id, text_content) \
9425                     VALUES ('goal-2', 'Launch redesign')"
9426                ),
9427                [],
9428            )
9429            .expect("insert property FTS row 2");
9430        }
9431
9432        // Export.
9433        let export_dir = tempfile::TempDir::new().expect("temp dir");
9434        let export_path = export_dir.path().join("backup.db");
9435        service
9436            .safe_export(
9437                &export_path,
9438                SafeExportOptions {
9439                    force_checkpoint: false,
9440                },
9441            )
9442            .expect("export");
9443
9444        // Corrupt the derived rows: replace correct text with wrong text for
9445        // goal-1, and delete the row for goal-2 entirely. This exercises both
9446        // corrupted-but-present rows and missing rows in the same recovery.
9447        {
9448            let conn = rusqlite::Connection::open(&export_path).expect("open export");
9449            // Bootstrap the exported DB to get per-kind tables.
9450            SchemaManager::new()
9451                .bootstrap(&conn)
9452                .expect("bootstrap export");
9453            conn.execute(
9454                &format!("DELETE FROM {goal_table} WHERE node_logical_id = 'goal-1'"),
9455                [],
9456            )
9457            .expect("delete old row");
9458            conn.execute(
9459                &format!(
9460                    "INSERT INTO {goal_table} (node_logical_id, text_content) \
9461                     VALUES ('goal-1', 'completely wrong stale text')"
9462                ),
9463                [],
9464            )
9465            .expect("insert corrupted row");
9466            conn.execute(
9467                &format!("DELETE FROM {goal_table} WHERE node_logical_id = 'goal-2'"),
9468                [],
9469            )
9470            .expect("delete goal-2 row");
9471        }
9472
9473        // Open the exported DB and rebuild projections from canonical state.
9474        let schema = Arc::new(SchemaManager::new());
9475        let exported_service = AdminService::new(&export_path, Arc::clone(&schema));
9476        exported_service
9477            .rebuild_projections(ProjectionTarget::Fts)
9478            .expect("rebuild");
9479
9480        // Verify the per-kind table has the correct rows after recovery.
9481        let conn = rusqlite::Connection::open(&export_path).expect("open export for verify");
9482        let goal1_text: String = conn
9483            .query_row(
9484                &format!("SELECT text_content FROM {goal_table} WHERE node_logical_id = 'goal-1'"),
9485                [],
9486                |r| r.get(0),
9487            )
9488            .expect("goal-1 text after rebuild");
9489        assert_eq!(
9490            goal1_text, "Ship v2",
9491            "goal-1 text must be corrected by rebuild"
9492        );
9493
9494        let goal2_count: i64 = conn
9495            .query_row(
9496                &format!("SELECT count(*) FROM {goal_table} WHERE node_logical_id = 'goal-2'"),
9497                [],
9498                |r| r.get(0),
9499            )
9500            .expect("goal-2 count");
9501        assert_eq!(goal2_count, 1, "goal-2 row must be restored by rebuild");
9502
9503        let stale_count: i64 = conn
9504            .query_row(
9505                &format!("SELECT count(*) FROM {goal_table} WHERE text_content = 'completely wrong stale text'"),
9506                [],
9507                |r| r.get(0),
9508            )
9509            .expect("stale count");
9510        assert_eq!(stale_count, 0, "corrupted text must be gone after rebuild");
9511
9512        // Verify integrity and semantics are clean after recovery.
9513        let integrity = exported_service.check_integrity().expect("integrity");
9514        assert_eq!(integrity.missing_property_fts_rows, 0);
9515        let semantics = exported_service.check_semantics().expect("semantics");
9516        assert_eq!(semantics.drifted_property_fts_rows, 0);
9517        assert_eq!(semantics.orphaned_property_fts_rows, 0);
9518        assert_eq!(semantics.duplicate_property_fts_rows, 0);
9519    }
9520
9521    #[test]
9522    fn check_integrity_no_false_positives_for_empty_extraction() {
9523        let (db, service) = setup();
9524        {
9525            let conn = sqlite::open_connection(db.path()).expect("conn");
9526            // Register a schema that looks for $.searchable
9527            conn.execute(
9528                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9529                 VALUES ('Ticket', '[\"$.searchable\"]', ' ')",
9530                [],
9531            )
9532            .expect("register schema");
9533            // Insert a node whose properties do NOT contain $.searchable —
9534            // correctly has no property FTS row.
9535            conn.execute(
9536                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9537                 VALUES ('row-1', 'ticket-1', 'Ticket', '{\"status\":\"open\"}', 100, 'seed')",
9538                [],
9539            )
9540            .expect("insert node");
9541        }
9542
9543        let report = service.check_integrity().expect("integrity");
9544        assert_eq!(
9545            report.missing_property_fts_rows, 0,
9546            "node with no extractable values must not be counted as missing"
9547        );
9548    }
9549
9550    #[test]
9551    fn check_integrity_detects_genuinely_missing_property_fts_rows() {
9552        let (db, service) = setup();
9553        {
9554            let conn = sqlite::open_connection(db.path()).expect("conn");
9555            conn.execute(
9556                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9557                 VALUES ('Ticket', '[\"$.title\"]', ' ')",
9558                [],
9559            )
9560            .expect("register schema");
9561            // Insert a node WITH an extractable $.title but no property FTS row.
9562            conn.execute(
9563                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9564                 VALUES ('row-1', 'ticket-1', 'Ticket', '{\"title\":\"fix login bug\"}', 100, 'seed')",
9565                [],
9566            )
9567            .expect("insert node");
9568        }
9569
9570        let report = service.check_integrity().expect("integrity");
9571        assert_eq!(
9572            report.missing_property_fts_rows, 1,
9573            "node with extractable values but no property FTS row must be detected"
9574        );
9575    }
9576
9577    #[test]
9578    fn rebuild_projections_fts_restores_missing_property_fts_rows() {
9579        let (db, service) = setup();
9580        let goal_table = fathomdb_schema::fts_kind_table_name("Goal");
9581        {
9582            let conn = sqlite::open_connection(db.path()).expect("conn");
9583            conn.execute(
9584                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9585                 VALUES ('Goal', '[\"$.name\"]', ' ')",
9586                [],
9587            )
9588            .expect("register schema");
9589            conn.execute(
9590                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9591                 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9592                [],
9593            )
9594            .expect("insert node");
9595            // Deliberately do NOT insert a property FTS row.
9596        }
9597
9598        let report = service
9599            .rebuild_projections(ProjectionTarget::Fts)
9600            .expect("rebuild");
9601        assert!(
9602            report.rebuilt_rows >= 1,
9603            "rebuild must insert at least one property FTS row"
9604        );
9605
9606        let conn = sqlite::open_connection(db.path()).expect("conn");
9607        let text: String = conn
9608            .query_row(
9609                &format!("SELECT text_content FROM {goal_table} WHERE node_logical_id = 'goal-1'"),
9610                [],
9611                |row| row.get(0),
9612            )
9613            .expect("property FTS row must exist after rebuild");
9614        assert_eq!(text, "Ship v2");
9615    }
9616
9617    #[test]
9618    fn rebuild_missing_projections_fills_gap_for_deleted_property_fts_row() {
9619        let (db, service) = setup();
9620        let goal_table = fathomdb_schema::fts_kind_table_name("Goal");
9621        {
9622            let conn = sqlite::open_connection(db.path()).expect("conn");
9623            conn.execute(
9624                "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9625                 VALUES ('Goal', '[\"$.name\"]', ' ')",
9626                [],
9627            )
9628            .expect("register schema");
9629            conn.execute(
9630                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9631                 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9632                [],
9633            )
9634            .expect("insert node");
9635            // Create per-kind table and insert then delete to simulate corruption.
9636            conn.execute_batch(&format!(
9637                "CREATE VIRTUAL TABLE IF NOT EXISTS {goal_table} USING fts5(\
9638                    node_logical_id UNINDEXED, text_content, \
9639                    tokenize = 'porter unicode61 remove_diacritics 2'\
9640                )"
9641            ))
9642            .expect("create per-kind table");
9643            conn.execute(
9644                &format!(
9645                    "INSERT INTO {goal_table} (node_logical_id, text_content) \
9646                     VALUES ('goal-1', 'Ship v2')"
9647                ),
9648                [],
9649            )
9650            .expect("insert property fts");
9651            conn.execute(
9652                &format!("DELETE FROM {goal_table} WHERE node_logical_id = 'goal-1'"),
9653                [],
9654            )
9655            .expect("delete property fts");
9656        }
9657
9658        let report = service
9659            .rebuild_missing_projections()
9660            .expect("rebuild missing");
9661        assert!(
9662            report.rebuilt_rows >= 1,
9663            "missing rebuild must insert the gap-fill row"
9664        );
9665
9666        let conn = sqlite::open_connection(db.path()).expect("conn");
9667        let count: i64 = conn
9668            .query_row(
9669                &format!("SELECT count(*) FROM {goal_table} WHERE node_logical_id = 'goal-1'"),
9670                [],
9671                |row| row.get(0),
9672            )
9673            .expect("count");
9674        assert_eq!(
9675            count, 1,
9676            "gap-fill must restore exactly one property FTS row"
9677        );
9678    }
9679
9680    #[test]
9681    fn remove_schema_then_rebuild_cleans_stale_property_fts_rows() {
9682        // This test verifies that a full FTS rebuild clears per-kind tables whose
9683        // schema has been removed (orphaned state). We create the orphaned state
9684        // directly via SQL (bypassing the service API, which now eagerly deletes rows
9685        // on schema removal) to simulate a table that was left populated from a
9686        // previous registration cycle.
9687        let (db, service) = setup();
9688        let goal_table = fathomdb_schema::fts_kind_table_name("Goal");
9689        {
9690            let conn = sqlite::open_connection(db.path()).expect("conn");
9691            conn.execute(
9692                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9693                 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9694                [],
9695            )
9696            .expect("insert node");
9697            // Create per-kind table WITHOUT registering a schema — simulates orphaned rows
9698            // that remain after schema removal (or pre-existing table from a previous cycle).
9699            conn.execute_batch(&format!(
9700                "CREATE VIRTUAL TABLE IF NOT EXISTS {goal_table} \
9701                 USING fts5(node_logical_id UNINDEXED, text_content, tokenize = 'porter unicode61 remove_diacritics 2')"
9702            ))
9703            .expect("create per-kind table");
9704            conn.execute(
9705                &format!(
9706                    "INSERT INTO {goal_table} (node_logical_id, text_content) \
9707                     VALUES ('goal-1', 'Ship v2')"
9708                ),
9709                [],
9710            )
9711            .expect("insert property fts");
9712        }
9713
9714        // No schema registered — per-kind table has orphaned rows.
9715        let semantics = service.check_semantics().expect("semantics");
9716        assert_eq!(
9717            semantics.orphaned_property_fts_rows, 1,
9718            "orphaned property FTS rows must be detected with no registered schema"
9719        );
9720
9721        // Full rebuild should clean them (no schema means nothing to rebuild).
9722        service
9723            .rebuild_projections(ProjectionTarget::Fts)
9724            .expect("rebuild");
9725
9726        let conn = sqlite::open_connection(db.path()).expect("conn");
9727        let count: i64 = conn
9728            .query_row(
9729                &format!("SELECT count(*) FROM {goal_table} WHERE node_logical_id = 'goal-1'"),
9730                [],
9731                |row| row.get(0),
9732            )
9733            .expect("count");
9734        assert_eq!(
9735            count, 0,
9736            "rebuild must delete rows from per-kind tables with no registered schema"
9737        );
9738    }
9739
9740    mod validate_fts_property_paths_tests {
9741        use super::super::validate_fts_property_paths;
9742
9743        #[test]
9744        fn valid_simple_path() {
9745            assert!(validate_fts_property_paths(&["$.name".to_owned()]).is_ok());
9746        }
9747
9748        #[test]
9749        fn valid_nested_path() {
9750            assert!(validate_fts_property_paths(&["$.address.city".to_owned()]).is_ok());
9751        }
9752
9753        #[test]
9754        fn valid_underscore_segment() {
9755            assert!(validate_fts_property_paths(&["$.a_b".to_owned()]).is_ok());
9756        }
9757
9758        #[test]
9759        fn rejects_bare_prefix() {
9760            let result = validate_fts_property_paths(&["$.".to_owned()]);
9761            assert!(result.is_err(), "path '$.' must be rejected");
9762        }
9763
9764        #[test]
9765        fn rejects_double_dot() {
9766            let result = validate_fts_property_paths(&["$..x".to_owned()]);
9767            assert!(result.is_err(), "path '$..x' must be rejected");
9768        }
9769
9770        #[test]
9771        fn rejects_trailing_dot() {
9772            let result = validate_fts_property_paths(&["$.foo.".to_owned()]);
9773            assert!(result.is_err(), "path '$.foo.' must be rejected");
9774        }
9775
9776        #[test]
9777        fn rejects_space_in_segment() {
9778            let result = validate_fts_property_paths(&["$.foo bar".to_owned()]);
9779            assert!(result.is_err(), "path '$.foo bar' must be rejected");
9780        }
9781
9782        #[test]
9783        fn rejects_bracket_syntax() {
9784            let result = validate_fts_property_paths(&["$.foo[0]".to_owned()]);
9785            assert!(result.is_err(), "path '$.foo[0]' must be rejected");
9786        }
9787
9788        #[test]
9789        fn rejects_duplicates() {
9790            let result = validate_fts_property_paths(&["$.name".to_owned(), "$.name".to_owned()]);
9791            assert!(result.is_err(), "duplicate paths must be rejected");
9792        }
9793
9794        #[test]
9795        fn rejects_empty_list() {
9796            let result = validate_fts_property_paths(&[]);
9797            assert!(result.is_err(), "empty path list must be rejected");
9798        }
9799    }
9800
9801    // --- A-6: per-kind FTS table tests ---
9802
9803    #[test]
9804    fn register_fts_schema_writes_to_per_kind_table() {
9805        // After A-6: register_fts_property_schema writes rows to fts_props_<kind>,
9806        // NOT to fts_node_properties.
9807        let (db, service) = setup();
9808        {
9809            let conn = sqlite::open_connection(db.path()).expect("conn");
9810            // Insert a node before registering the schema.
9811            conn.execute(
9812                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9813                 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9814                [],
9815            )
9816            .expect("insert node");
9817        }
9818
9819        // Register schema — this triggers eager rebuild which writes to per-kind table.
9820        service
9821            .register_fts_property_schema("Goal", &["$.name".to_owned()], None)
9822            .expect("register schema");
9823
9824        let conn = sqlite::open_connection(db.path()).expect("conn");
9825        let table = fathomdb_schema::fts_kind_table_name("Goal");
9826        // Per-kind table must have the row.
9827        let per_kind_count: i64 = conn
9828            .query_row(
9829                &format!("SELECT count(*) FROM {table} WHERE node_logical_id = 'goal-1'"),
9830                [],
9831                |row| row.get(0),
9832            )
9833            .expect("per-kind count");
9834        assert_eq!(
9835            per_kind_count, 1,
9836            "per-kind table must have the row after registration"
9837        );
9838    }
9839
9840    #[test]
9841    fn remove_fts_schema_deletes_from_per_kind_table() {
9842        // After A-6: remove_fts_property_schema deletes rows from fts_props_<kind>.
9843        let (db, service) = setup();
9844        {
9845            let conn = sqlite::open_connection(db.path()).expect("conn");
9846            conn.execute(
9847                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9848                 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9849                [],
9850            )
9851            .expect("insert node");
9852        }
9853
9854        service
9855            .register_fts_property_schema("Goal", &["$.name".to_owned()], None)
9856            .expect("register schema");
9857        service
9858            .remove_fts_property_schema("Goal")
9859            .expect("remove schema");
9860
9861        let conn = sqlite::open_connection(db.path()).expect("conn");
9862        let table = fathomdb_schema::fts_kind_table_name("Goal");
9863        let per_kind_count: i64 = conn
9864            .query_row(
9865                &format!("SELECT count(*) FROM {table} WHERE node_logical_id = 'goal-1'"),
9866                [],
9867                |row| row.get(0),
9868            )
9869            .expect("per-kind count");
9870        assert_eq!(
9871            per_kind_count, 0,
9872            "per-kind table must be empty after schema removal"
9873        );
9874    }
9875
9876    // --- B-1: weight field tests ---
9877
9878    #[test]
9879    fn fts_path_spec_with_weight_builder() {
9880        let spec = FtsPropertyPathSpec::scalar("$.title").with_weight(5.0);
9881        assert_eq!(spec.weight, Some(5.0));
9882        assert_eq!(spec.path, "$.title");
9883        assert_eq!(spec.mode, FtsPropertyPathMode::Scalar);
9884    }
9885
9886    #[test]
9887    fn fts_path_spec_serialize_with_weight() {
9888        use super::serialize_property_paths_json;
9889        let entries = vec![
9890            FtsPropertyPathSpec::scalar("$.title").with_weight(2.0),
9891            FtsPropertyPathSpec::scalar("$.body"),
9892        ];
9893        let json = serialize_property_paths_json(&entries, &[]).expect("serialize");
9894        // Must use rich object format because a weight is present
9895        let v: serde_json::Value = serde_json::from_str(&json).expect("parse");
9896        let paths = v
9897            .get("paths")
9898            .expect("paths key")
9899            .as_array()
9900            .expect("array");
9901        assert_eq!(paths.len(), 2);
9902        // First entry has weight
9903        assert_eq!(
9904            paths[0].get("path").and_then(serde_json::Value::as_str),
9905            Some("$.title")
9906        );
9907        assert_eq!(
9908            paths[0].get("weight").and_then(serde_json::Value::as_f64),
9909            Some(2.0)
9910        );
9911        // Second entry has no weight field
9912        assert!(
9913            paths[1].get("weight").is_none(),
9914            "unweighted spec must omit weight field"
9915        );
9916    }
9917
9918    #[test]
9919    fn fts_path_spec_serialize_no_weights() {
9920        use super::serialize_property_paths_json;
9921        let entries = vec![
9922            FtsPropertyPathSpec::scalar("$.title"),
9923            FtsPropertyPathSpec::scalar("$.payload"),
9924        ];
9925        let json = serialize_property_paths_json(&entries, &[]).expect("serialize");
9926        // Must use bare string array (backward compat)
9927        let v: serde_json::Value = serde_json::from_str(&json).expect("parse");
9928        assert!(
9929            v.is_array(),
9930            "all-scalar no-weight schema must serialize as bare string array"
9931        );
9932        let arr = v.as_array().expect("array");
9933        assert_eq!(arr.len(), 2);
9934        assert_eq!(arr[0].as_str(), Some("$.title"));
9935        assert_eq!(arr[1].as_str(), Some("$.payload"));
9936    }
9937
9938    #[test]
9939    fn fts_weight_validation_out_of_range() {
9940        let (_db, service) = setup();
9941        // weight = 0.0 must be rejected
9942        let entries_zero = vec![FtsPropertyPathSpec::scalar("$.title").with_weight(0.0)];
9943        let result = service.register_fts_property_schema_with_entries(
9944            "Article",
9945            &entries_zero,
9946            None,
9947            &[],
9948            crate::rebuild_actor::RebuildMode::Eager,
9949        );
9950        assert!(result.is_err(), "weight 0.0 must be rejected");
9951        let err_msg = result.expect_err("weight 0.0 must be rejected").to_string();
9952        assert!(
9953            err_msg.contains("weight"),
9954            "error must mention weight: {err_msg}"
9955        );
9956
9957        // weight = 1001.0 must be rejected
9958        let entries_big = vec![FtsPropertyPathSpec::scalar("$.title").with_weight(1001.0)];
9959        let result = service.register_fts_property_schema_with_entries(
9960            "Article",
9961            &entries_big,
9962            None,
9963            &[],
9964            crate::rebuild_actor::RebuildMode::Eager,
9965        );
9966        assert!(result.is_err(), "weight 1001.0 must be rejected");
9967    }
9968
9969    #[test]
9970    fn fts_weight_validation_valid() {
9971        let (_db, service) = setup();
9972        let entries = vec![FtsPropertyPathSpec::scalar("$.title").with_weight(10.0)];
9973        let result = service.register_fts_property_schema_with_entries(
9974            "Article",
9975            &entries,
9976            None,
9977            &[],
9978            crate::rebuild_actor::RebuildMode::Eager,
9979        );
9980        assert!(
9981            result.is_ok(),
9982            "weight 10.0 must be accepted: {:?}",
9983            result.err()
9984        );
9985    }
9986
9987    // --- B-2: create_or_replace_fts_kind_table tests ---
9988
9989    #[test]
9990    fn create_or_replace_creates_multi_column_table() {
9991        use super::create_or_replace_fts_kind_table;
9992        let (db, _service) = setup();
9993        let conn = sqlite::open_connection(db.path()).expect("conn");
9994        let specs = vec![
9995            FtsPropertyPathSpec::scalar("$.title"),
9996            FtsPropertyPathSpec::recursive("$.payload"),
9997        ];
9998        create_or_replace_fts_kind_table(
9999            &conn,
10000            "Article",
10001            &specs,
10002            fathomdb_schema::DEFAULT_FTS_TOKENIZER,
10003        )
10004        .expect("create table");
10005
10006        // Verify table exists and has the expected columns.
10007        let table = fathomdb_schema::fts_kind_table_name("Article");
10008        // node_logical_id column
10009        let count: i64 = conn
10010            .query_row(&format!("SELECT count(*) FROM {table}"), [], |r| r.get(0))
10011            .expect("count");
10012        assert_eq!(count, 0, "new table must be empty");
10013
10014        // Verify columns exist by inserting a row with named columns
10015        let title_col = fathomdb_schema::fts_column_name("$.title", false);
10016        let payload_col = fathomdb_schema::fts_column_name("$.payload", true);
10017        conn.execute(
10018            &format!(
10019                "INSERT INTO {table} (node_logical_id, {title_col}, {payload_col}) VALUES ('id1', 'hello', 'world')"
10020            ),
10021            [],
10022        )
10023        .expect("insert with per-spec columns must succeed");
10024    }
10025
10026    #[test]
10027    fn create_or_replace_drops_and_recreates() {
10028        use super::create_or_replace_fts_kind_table;
10029        let (db, _service) = setup();
10030        let conn = sqlite::open_connection(db.path()).expect("conn");
10031
10032        // First call: 1 spec
10033        let specs_v1 = vec![FtsPropertyPathSpec::scalar("$.title")];
10034        create_or_replace_fts_kind_table(
10035            &conn,
10036            "Post",
10037            &specs_v1,
10038            fathomdb_schema::DEFAULT_FTS_TOKENIZER,
10039        )
10040        .expect("create v1");
10041
10042        // Second call: 2 specs (different layout)
10043        let specs_v2 = vec![
10044            FtsPropertyPathSpec::scalar("$.title"),
10045            FtsPropertyPathSpec::scalar("$.summary"),
10046        ];
10047        create_or_replace_fts_kind_table(
10048            &conn,
10049            "Post",
10050            &specs_v2,
10051            fathomdb_schema::DEFAULT_FTS_TOKENIZER,
10052        )
10053        .expect("create v2");
10054
10055        // Verify new layout: summary column must exist
10056        let table = fathomdb_schema::fts_kind_table_name("Post");
10057        let summary_col = fathomdb_schema::fts_column_name("$.summary", false);
10058        conn.execute(
10059            &format!("INSERT INTO {table} (node_logical_id, {summary_col}) VALUES ('id1', 'text')"),
10060            [],
10061        )
10062        .expect("second layout must allow summary column");
10063    }
10064
10065    #[test]
10066    fn create_or_replace_invalid_tokenizer() {
10067        use super::create_or_replace_fts_kind_table;
10068        let (db, _service) = setup();
10069        let conn = sqlite::open_connection(db.path()).expect("conn");
10070        let specs = vec![FtsPropertyPathSpec::scalar("$.title")];
10071        let result = create_or_replace_fts_kind_table(&conn, "Post", &specs, "'; DROP TABLE --");
10072        assert!(result.is_err(), "invalid tokenizer must be rejected");
10073        let err_msg = result
10074            .expect_err("invalid tokenizer must be rejected")
10075            .to_string();
10076        assert!(
10077            err_msg.contains("tokenizer"),
10078            "error must mention tokenizer: {err_msg}"
10079        );
10080    }
10081
10082    #[test]
10083    fn register_with_weights_creates_per_column_table() {
10084        let (db, service) = setup();
10085        let entries = vec![
10086            FtsPropertyPathSpec::scalar("$.title").with_weight(2.0),
10087            FtsPropertyPathSpec::scalar("$.body"),
10088        ];
10089        service
10090            .register_fts_property_schema_with_entries(
10091                "Article",
10092                &entries,
10093                None,
10094                &[],
10095                crate::rebuild_actor::RebuildMode::Eager,
10096            )
10097            .expect("register");
10098
10099        // Per-kind table must have per-spec columns, not just text_content
10100        let conn = sqlite::open_connection(db.path()).expect("conn");
10101        let table = fathomdb_schema::fts_kind_table_name("Article");
10102        let title_col = fathomdb_schema::fts_column_name("$.title", false);
10103        let body_col = fathomdb_schema::fts_column_name("$.body", false);
10104        // If the columns exist, insert must succeed
10105        conn.execute(
10106            &format!(
10107                "INSERT INTO {table} (node_logical_id, {title_col}, {body_col}) VALUES ('art-1', 'hello', 'world')"
10108            ),
10109            [],
10110        )
10111        .expect("per-spec columns must exist after registration with weights");
10112    }
10113
10114    #[test]
10115    fn weighted_to_unweighted_downgrade_recreates_table() {
10116        let (db, service) = setup();
10117
10118        // First register with weights (creates per-spec column layout).
10119        let weighted_entries = vec![
10120            FtsPropertyPathSpec::scalar("$.title").with_weight(2.0),
10121            FtsPropertyPathSpec::scalar("$.body"),
10122        ];
10123        service
10124            .register_fts_property_schema_with_entries(
10125                "Article",
10126                &weighted_entries,
10127                None,
10128                &[],
10129                crate::rebuild_actor::RebuildMode::Eager,
10130            )
10131            .expect("register weighted");
10132
10133        // Re-register the same kind WITHOUT weights.
10134        let unweighted_entries = vec![
10135            FtsPropertyPathSpec::scalar("$.title"),
10136            FtsPropertyPathSpec::scalar("$.body"),
10137        ];
10138        service
10139            .register_fts_property_schema_with_entries(
10140                "Article",
10141                &unweighted_entries,
10142                None,
10143                &[],
10144                crate::rebuild_actor::RebuildMode::Eager,
10145            )
10146            .expect("re-register unweighted");
10147
10148        // After downgrade, the table must have the text_content column
10149        // (legacy single-column layout), not the per-spec columns.
10150        let conn = sqlite::open_connection(db.path()).expect("conn");
10151        let table = fathomdb_schema::fts_kind_table_name("Article");
10152        let result = conn.execute(
10153            &format!("INSERT INTO {table} (node_logical_id, text_content) VALUES ('art-1', 'hello world')"),
10154            [],
10155        );
10156        assert!(
10157            result.is_ok(),
10158            "text_content column must exist after weighted-to-unweighted downgrade"
10159        );
10160    }
10161
10162    // --- Pack A+G: profile CRUD + tokenizer presets ---
10163
10164    #[test]
10165    fn set_get_fts_profile_roundtrip() {
10166        let (_db, service) = setup();
10167        let profile = service
10168            .set_fts_profile("book", "unicode61")
10169            .expect("set_fts_profile");
10170        assert_eq!(profile.kind, "book");
10171        assert_eq!(profile.tokenizer, "unicode61");
10172
10173        let got = service
10174            .get_fts_profile("book")
10175            .expect("get_fts_profile")
10176            .expect("should be Some");
10177        assert_eq!(got.kind, "book");
10178        assert_eq!(got.tokenizer, "unicode61");
10179    }
10180
10181    #[test]
10182    fn fts_profile_upsert() {
10183        let (_db, service) = setup();
10184        service
10185            .set_fts_profile("article", "unicode61")
10186            .expect("first set");
10187        service
10188            .set_fts_profile("article", "porter unicode61 remove_diacritics 2")
10189            .expect("second set");
10190        let got = service
10191            .get_fts_profile("article")
10192            .expect("get")
10193            .expect("Some");
10194        assert_eq!(got.tokenizer, "porter unicode61 remove_diacritics 2");
10195    }
10196
10197    #[test]
10198    fn invalid_tokenizer_rejected() {
10199        let (_db, service) = setup();
10200        let result = service.set_fts_profile("book", "'; DROP TABLE nodes --");
10201        assert!(result.is_err(), "invalid tokenizer must be rejected");
10202        let msg = result.expect_err("must be Err").to_string();
10203        assert!(
10204            msg.contains("tokenizer") || msg.contains("invalid"),
10205            "error must mention tokenizer or invalid: {msg}"
10206        );
10207    }
10208
10209    #[test]
10210    fn preset_recall_optimized_english() {
10211        assert_eq!(
10212            super::resolve_tokenizer_preset("recall-optimized-english"),
10213            "porter unicode61 remove_diacritics 2"
10214        );
10215    }
10216
10217    #[test]
10218    fn preset_precision_optimized() {
10219        assert_eq!(
10220            super::resolve_tokenizer_preset("precision-optimized"),
10221            "unicode61 remove_diacritics 2"
10222        );
10223    }
10224
10225    #[test]
10226    fn preset_global_cjk() {
10227        assert_eq!(super::resolve_tokenizer_preset("global-cjk"), "icu");
10228    }
10229
10230    #[test]
10231    fn preset_substring_trigram() {
10232        assert_eq!(
10233            super::resolve_tokenizer_preset("substring-trigram"),
10234            "trigram"
10235        );
10236    }
10237
10238    #[test]
10239    fn preset_source_code() {
10240        assert_eq!(
10241            super::resolve_tokenizer_preset("source-code"),
10242            "unicode61 tokenchars '._-$@'"
10243        );
10244    }
10245
10246    #[test]
10247    fn preview_fts_row_count() {
10248        let (db, service) = setup();
10249        {
10250            let conn = sqlite::open_connection(db.path()).expect("conn");
10251            for i in 0..5u32 {
10252                conn.execute(
10253                    "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
10254                     VALUES (?1, ?2, 'book', '{}', 100, 'src')",
10255                    rusqlite::params![format!("r{i}"), format!("lg{i}")],
10256                )
10257                .expect("insert node");
10258            }
10259            // Insert one superseded node that must NOT count
10260            conn.execute(
10261                "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref, superseded_at) \
10262                 VALUES ('r99', 'lg99', 'book', '{}', 100, 'src', 200)",
10263                [],
10264            )
10265            .expect("insert superseded");
10266        }
10267        let impact = service
10268            .preview_projection_impact("book", "fts")
10269            .expect("preview");
10270        assert_eq!(impact.rows_to_rebuild, 5);
10271    }
10272
10273    #[test]
10274    fn preview_populates_current_tokenizer() {
10275        let (_db, service) = setup();
10276        service
10277            .set_fts_profile("doc", "trigram")
10278            .expect("set profile");
10279        let impact = service
10280            .preview_projection_impact("doc", "fts")
10281            .expect("preview");
10282        assert_eq!(impact.current_tokenizer, Some("trigram".to_owned()));
10283        assert_eq!(impact.target_tokenizer, None);
10284    }
10285
10286    // --- Review fix: tokenizer allowlist alignment ---
10287
10288    #[test]
10289    fn create_or_replace_source_code_tokenizer_is_accepted() {
10290        // The source-code preset expands to "unicode61 tokenchars '._-$@'" which
10291        // contains `.`, `-`, `$`, `@`. The allowlist in create_or_replace_fts_kind_table
10292        // must accept these characters (matching set_fts_profile's allowlist).
10293        use super::create_or_replace_fts_kind_table;
10294        let (db, _service) = setup();
10295        let conn = sqlite::open_connection(db.path()).expect("conn");
10296        let specs = vec![FtsPropertyPathSpec::scalar("$.symbol")];
10297        let source_code_tokenizer = "unicode61 tokenchars '._-$@'";
10298        let result =
10299            create_or_replace_fts_kind_table(&conn, "Symbol", &specs, source_code_tokenizer);
10300        assert!(
10301            result.is_ok(),
10302            "source-code tokenizer string must be accepted by create_or_replace_fts_kind_table: {:?}",
10303            result.err()
10304        );
10305    }
10306
10307    #[test]
10308    fn source_code_profile_round_trip_through_register_fts_schema() {
10309        // Verify that set_fts_profile("source-code") followed by
10310        // register_fts_property_schema succeeds end-to-end.
10311        // Previously failed because set_fts_profile accepted "unicode61 tokenchars '._-$@'"
10312        // but create_or_replace_fts_kind_table rejected it (only allowed " '_").
10313        let db = tempfile::NamedTempFile::new().expect("temp file");
10314        let schema = Arc::new(fathomdb_schema::SchemaManager::new());
10315
10316        // Bootstrap the schema (creates projection_profiles table via migration 20).
10317        {
10318            let _coord = crate::ExecutionCoordinator::open(
10319                db.path(),
10320                Arc::clone(&schema),
10321                None,
10322                1,
10323                Arc::new(crate::TelemetryCounters::default()),
10324                None,
10325            )
10326            .expect("coordinator opens for bootstrap");
10327        }
10328
10329        let service = AdminService::new(db.path(), Arc::clone(&schema));
10330
10331        // Set source-code profile (uses preset resolver, stores "unicode61 tokenchars '._-$@'").
10332        service
10333            .set_fts_profile("Symbol", "source-code")
10334            .expect("set_fts_profile with source-code preset must succeed");
10335
10336        // Register an FTS schema for this kind — this calls create_or_replace_fts_kind_table
10337        // with the tokenizer from the profile row.
10338        let result = service.register_fts_property_schema("Symbol", &["$.name".to_owned()], None);
10339        assert!(
10340            result.is_ok(),
10341            "register_fts_property_schema must succeed when source-code profile is active: {:?}",
10342            result.err()
10343        );
10344    }
10345}