Skip to main content

ckg_storage/store/
mod.rs

1//! Storage facade over CozoDB. One DB per repo, plus a shared registry DB.
2//!
3//! DDL is run idempotently on first open: Cozo errors if a relation already
4//! exists, and we detect that string and swallow it so re-opens are no-ops.
5//!
6//! Sub-modules own the implementation of each concern:
7//! - `lifecycle`  — open / schema-version check / rebuild / path-swap
8//! - `meta`       — boolean Meta sentinels (needs_reindex, index_in_progress)
9//! - `insert`     — put_symbols / put_edges
10//! - `resolve`    — resolve_cross_file_calls / detect_test_edges
11
12use std::collections::BTreeMap;
13use std::path::{Path, PathBuf};
14
15use ckg_core::{Error, Result};
16use cozo::{DataValue, DbInstance, ScriptMutability};
17
18use self::meta::{read_meta_bool, stamp_meta_bool, stamp_needs_reindex};
19
20mod insert;
21mod lifecycle;
22mod meta;
23mod registry;
24mod resolve;
25
26pub use registry::RegistryStorage;
27
28// ---------------------------------------------------------------------------
29// Shared helpers (used by multiple sub-modules via `super::map_err`)
30// ---------------------------------------------------------------------------
31
32pub(super) fn map_err(e: impl std::fmt::Display) -> Error {
33    Error::Storage(e.to_string())
34}
35
36// ---------------------------------------------------------------------------
37// Core structs
38// ---------------------------------------------------------------------------
39
40/// Per-repo Cozo DB handle.
41pub struct Storage {
42    pub(super) repo_id: ckg_core::RepoId,
43    pub(super) db_path: PathBuf,
44    pub(super) db: DbInstance,
45}
46
47impl Storage {
48    // --- accessors ----------------------------------------------------------
49
50    pub fn repo_id(&self) -> &ckg_core::RepoId {
51        &self.repo_id
52    }
53
54    pub fn db_path(&self) -> &Path {
55        &self.db_path
56    }
57
58    /// CR-storage-M-7: returns the currently-recorded `Meta.root_path`,
59    /// which may differ from the caller's input shape if open_at auto-
60    /// migrated a symlink-equivalent path (canonicalize-equal). Callers
61    /// that also stamp the path elsewhere (e.g. registry's `Repo.root_path`
62    /// via `RegistryStorage::put_repo`) should use this accessor so the
63    /// two authoritative shapes stay consistent.
64    ///
65    /// CR-storage-H3: returns `Result<Option<String>>` so callers can
66    /// distinguish "no row recorded" (Ok(None) — fresh DB) from "Meta
67    /// read failed" (Err — disk error / corruption). Pre-fix the
68    /// `.ok()?` collapse made both cases indistinguishable, mirroring
69    /// the same bug pattern that `RootPathProbe::ReadFailed` was
70    /// introduced to fix in `lifecycle.rs`.
71    pub fn recorded_root_path(&self) -> Result<Option<String>> {
72        let rows = self
73            .db
74            .run_script(
75                "?[v] := *Meta{key: \"root_path\", value: v}",
76                BTreeMap::new(),
77                ScriptMutability::Immutable,
78            )
79            .map_err(map_err)?;
80        Ok(rows.rows.first().and_then(|r| r.first()).and_then(|v| match v {
81            DataValue::Str(s) => Some(s.to_string()),
82            _ => None,
83        }))
84    }
85
86    pub fn db(&self) -> &DbInstance {
87        &self.db
88    }
89
90    // --- script runners -----------------------------------------------------
91
92    /// Run a Cozo script with **mutable** access (`:put`, `:rm`, `:create`
93    /// etc. are allowed). Safety relies on Cozo's `ScriptMutability::Mutable`
94    /// runtime gate — there is no string-level prefilter.
95    ///
96    /// **STORAGE-H2 / danger:** The name is intentionally verbose. Callers
97    /// must hold an explicit intent to mutate; prefer `Self::run_immutable`
98    /// or `Self::run_with_immutable` for all read paths and any
99    /// caller-supplied Datalog (e.g. MCP `query` tool). This keeps the
100    /// footprint of mutable execution small and auditable.
101    pub fn run_mutable_unchecked(&self, script: &str) -> Result<cozo::NamedRows> {
102        self.db
103            .run_script(script, BTreeMap::new(), ScriptMutability::Mutable)
104            .map_err(map_err)
105    }
106
107    /// Read-only run — passes `ScriptMutability::Immutable` to Cozo which
108    /// rejects scripts containing `:put`, `:rm`, `:create`, `:replace`,
109    /// `:ensure_not`, etc. **at execution time** (not string-prefiltered).
110    /// Use this for any caller-supplied Datalog (MCP `query` tool) so a
111    /// malicious client can't drop or mutate relations.
112    pub fn run_immutable(&self, script: &str) -> Result<cozo::NamedRows> {
113        self.db
114            .run_script(script, BTreeMap::new(), ScriptMutability::Immutable)
115            .map_err(map_err)
116    }
117
118    /// Mutable run with parameters. Same safety model as `Self::run` — relies
119    /// on `ScriptMutability::Mutable` runtime gate, no string prefilter.
120    pub fn run_with(
121        &self,
122        script: &str,
123        params: BTreeMap<String, DataValue>,
124    ) -> Result<cozo::NamedRows> {
125        self.db
126            .run_script(script, params, ScriptMutability::Mutable)
127            .map_err(map_err)
128    }
129
130    /// Read-only variant of `run_with` — caller-supplied params, but the
131    /// script is rejected if it contains `:put` / `:rm` / `:create` /
132    /// `:replace`. Use for any caller-controlled Datalog so a malicious /
133    /// typo'd script can't mutate.
134    pub fn run_with_immutable(
135        &self,
136        script: &str,
137        params: BTreeMap<String, DataValue>,
138    ) -> Result<cozo::NamedRows> {
139        self.db
140            .run_script(script, params, ScriptMutability::Immutable)
141            .map_err(map_err)
142    }
143
144    // --- meta sentinels -----------------------------------------------------
145
146    /// True if this repo was schema-rebuilt and hasn't been re-indexed since.
147    /// Set when `Storage::open_*` triggers `rebuild_at_path`, cleared via
148    /// `mark_indexed()` after a successful `ckg index` run.
149    ///
150    /// Returns `false` on any read failure — the sentinel is best-effort UX
151    /// guidance, not a correctness gate.
152    pub fn needs_reindex(&self) -> bool {
153        read_meta_bool(&self.db, "needs_reindex")
154    }
155
156    /// CR-I-2: Atomicity sentinel. Stamp `index_in_progress=true` BEFORE the
157    /// first `put_symbols` / `put_edges` of a fresh index run. On next
158    /// `Storage::open`, if this flag is still set, `needs_reindex` is promoted.
159    /// Cleared by `mark_indexed`.
160    pub fn mark_index_in_progress(&self) -> Result<()> {
161        stamp_meta_bool(&self.db, "index_in_progress", true)
162    }
163
164    /// CR-I-2: True if a previous index run started but didn't reach
165    /// `mark_indexed`. Mostly internal — `Storage::open_at` checks this on
166    /// every open.
167    pub fn is_index_in_progress(&self) -> bool {
168        read_meta_bool(&self.db, "index_in_progress")
169    }
170
171    /// Clear the `needs_reindex` AND `index_in_progress` sentinels after a
172    /// successful `ckg index` run. Safe to call when the sentinels are already
173    /// absent.
174    pub fn mark_indexed(&self) -> Result<()> {
175        stamp_needs_reindex(&self.db, false)?;
176        stamp_meta_bool(&self.db, "index_in_progress", false)?;
177        Ok(())
178    }
179
180}
181
182// Tests live in a sibling `tests.rs` so the production surface of
183// `store/mod.rs` stays auditable on its own. The test module reaches
184// back via `super::*` for the same items it had inline.
185#[cfg(test)]
186mod tests;