Skip to main content

orbok_cache/
service.rs

1//! Cache service over localcache (Appendix A §8–§12).
2//!
3//! Rules enforced here:
4//! - cache payloads live in `orbok-cache.sqlite3`, never in the catalog
5//!   (Appendix A §3);
6//! - the catalog stays authoritative — this service stores derived
7//!   payloads only, keyed by canonical source path;
8//! - reads and writes take a [`ValidatedPath`] so nothing outside the
9//!   PathGuard boundary can be cached (RFC-003 §8 carried through);
10//! - cleanup runs only from a validated [`CleanupPlan`] (RFC-001 §14);
11//! - engines self-register in the catalog `cache_engines` table
12//!   (RFC-002 §7.16) so the storage dashboard can enumerate them.
13
14use crate::namespace::OrbokCacheNamespace;
15use localcache::{CacheEngine, ChangeDetectionMode, LocalFileCacheError};
16use orbok_core::{CleanupAction, CleanupPlan, OrbokError, OrbokResult};
17use orbok_db::{CACHE_FILE_NAME, Catalog};
18use orbok_fs::ValidatedPath;
19use serde::Serialize;
20use serde::de::DeserializeOwned;
21use std::path::{Path, PathBuf};
22use std::time::Duration;
23
24/// Tuning for one engine.
25#[derive(Debug, Clone, Default)]
26pub struct EngineOptions {
27    /// Time-to-live; `None` keeps entries until invalidated.
28    pub ttl: Option<Duration>,
29    /// LRU bound; `None` is unbounded (storage budget enforced via
30    /// cleanup instead).
31    pub max_entries: Option<usize>,
32}
33
34/// Result of a cache-side cleanup run.
35#[derive(Debug, Clone, Default)]
36pub struct CacheCleanupOutcome {
37    pub removed_entries: u64,
38}
39
40/// Per-namespace usage for storage accounting (Appendix A §11).
41#[derive(Debug, Clone)]
42pub struct NamespaceUsage {
43    pub namespace: String,
44    pub entries: u64,
45    pub payload_bytes: u64,
46}
47
48/// The orbok cache service. One per data directory.
49pub struct CacheService {
50    db_path: PathBuf,
51}
52
53impl CacheService {
54    /// Create the service for a data directory; the payload database is
55    /// `<data_dir>/orbok-cache.sqlite3` (Appendix A §3).
56    pub fn new(data_dir: &Path) -> Self {
57        Self {
58            db_path: data_dir.join(CACHE_FILE_NAME),
59        }
60    }
61
62    /// Payload database path (storage dashboard).
63    pub fn db_path(&self) -> &Path {
64        &self.db_path
65    }
66
67    /// Open a typed engine for `namespace`, registering it in the
68    /// catalog `cache_engines` table. Change detection is
69    /// metadata-then-full-hash (Appendix A §8: metadata fast path with
70    /// hash confirmation, mirroring the scanner's policy).
71    pub fn engine<T: Serialize + DeserializeOwned>(
72        &self,
73        catalog: &Catalog,
74        namespace: &OrbokCacheNamespace,
75        options: EngineOptions,
76    ) -> OrbokResult<CacheEngine<T>> {
77        let mut builder = CacheEngine::<T>::builder()
78            .database(&self.db_path)
79            .namespace(namespace.as_namespace())
80            .payload_version(namespace.payload_version())
81            .change_detection(ChangeDetectionMode::MetadataThenFullHash)
82            .compress();
83        builder = match options.ttl {
84            Some(ttl) => builder.ttl(ttl),
85            None => builder.no_ttl(),
86        };
87        if let Some(n) = options.max_entries {
88            builder = builder.max_entries(n);
89        }
90        let engine = builder.build().map_err(cache_err)?;
91        self.register_engine::<T>(catalog, namespace, &options)?;
92        Ok(engine)
93    }
94
95    /// Freshness-checked read: returns the payload only when localcache
96    /// confirms the source file is unchanged (Appendix A §8). The
97    /// catalog/scanner remains the authority for file state.
98    pub fn get_fresh<T: Serialize + DeserializeOwned>(
99        engine: &CacheEngine<T>,
100        path: &ValidatedPath,
101    ) -> OrbokResult<Option<T>> {
102        Ok(engine
103            .get_if_fresh(&path.canonical)
104            .map_err(cache_err)?
105            .map(|entry| entry.payload))
106    }
107
108    /// Store a derived payload for a boundary-validated source path.
109    pub fn put<T: Serialize + DeserializeOwned>(
110        engine: &CacheEngine<T>,
111        path: &ValidatedPath,
112        payload: &T,
113    ) -> OrbokResult<()> {
114        engine.set(&path.canonical, payload).map_err(cache_err)
115    }
116
117    /// Invalidate one entry (e.g. file deleted from catalog).
118    pub fn remove<T: Serialize + DeserializeOwned>(
119        engine: &CacheEngine<T>,
120        path: &ValidatedPath,
121    ) -> OrbokResult<bool> {
122        engine.remove(&path.canonical).map_err(cache_err)
123    }
124
125    /// Safe cleanup driven by a validated plan (RFC-001 §9, Appendix A
126    /// §12). Maps each action to its payload namespaces and runs
127    /// expiry + missing-file + stale-version maintenance there.
128    pub fn run_safe_cleanup(
129        &self,
130        catalog: &Catalog,
131        plan: &CleanupPlan,
132    ) -> OrbokResult<CacheCleanupOutcome> {
133        plan.assert_safe_for_ordinary_cleanup()?;
134        let namespaces: Vec<OrbokCacheNamespace> = match plan.action {
135            CleanupAction::ClearTemporaryExtraction => vec![OrbokCacheNamespace::ExtractSegments],
136            CleanupAction::ClearSnippetCache => vec![OrbokCacheNamespace::PreviewCache],
137            CleanupAction::RemoveReplacedStaleIndexes => vec![OrbokCacheNamespace::ChunkBundle],
138            // Search cache lives in the catalog, not in localcache.
139            CleanupAction::ClearExpiredSearchCache => vec![],
140            _ => return Err(OrbokError::CleanupWouldTouchPersistentData),
141        };
142        let mut outcome = CacheCleanupOutcome::default();
143        for namespace in namespaces {
144            let engine = self.maintenance_engine(catalog, &namespace)?;
145            outcome.removed_entries += engine.cleanup_expired().map_err(cache_err)? as u64;
146            outcome.removed_entries += engine.cleanup_missing_files().map_err(cache_err)? as u64;
147            outcome.removed_entries += engine.purge_stale_versions().map_err(cache_err)? as u64;
148            tracing::debug!(
149                namespace = namespace.as_namespace(),
150                removed = outcome.removed_entries,
151                "cache cleanup pass"
152            );
153        }
154        Ok(outcome)
155    }
156
157    /// Reclaim file space after large deletions (storage dashboard's
158    /// explicit "shrink" action; Appendix A §12).
159    pub fn shrink(&self, catalog: &Catalog) -> OrbokResult<()> {
160        let engine = self.maintenance_engine(catalog, &OrbokCacheNamespace::PreviewCache)?;
161        engine.shrink_database().map_err(cache_err)
162    }
163
164    /// Usage per namespace for storage accounting (Appendix A §11).
165    pub fn usage(
166        &self,
167        catalog: &Catalog,
168        namespaces: &[OrbokCacheNamespace],
169    ) -> OrbokResult<Vec<NamespaceUsage>> {
170        let mut out = Vec::new();
171        for namespace in namespaces {
172            let engine = self.maintenance_engine(catalog, namespace)?;
173            let stats = engine.cache_stats().map_err(cache_err)?;
174            out.push(NamespaceUsage {
175                namespace: stats.namespace,
176                entries: stats.total_entries as u64,
177                payload_bytes: stats.total_payload_bytes,
178            });
179        }
180        Ok(out)
181    }
182
183    /// Untyped (JSON-payload) engine for maintenance operations that do
184    /// not deserialize payloads.
185    fn maintenance_engine(
186        &self,
187        catalog: &Catalog,
188        namespace: &OrbokCacheNamespace,
189    ) -> OrbokResult<CacheEngine<serde_json::Value>> {
190        self.engine::<serde_json::Value>(catalog, namespace, EngineOptions::default())
191    }
192
193    /// Upsert the engine registration row (RFC-002 §7.16).
194    fn register_engine<T>(
195        &self,
196        catalog: &Catalog,
197        namespace: &OrbokCacheNamespace,
198        options: &EngineOptions,
199    ) -> OrbokResult<()> {
200        let data_class = match namespace.data_class() {
201            orbok_core::DataClass::RebuildableIndex => "rebuildable_index",
202            _ => "ephemeral_cache",
203        };
204        let id = format!(
205            "ce_{}",
206            namespace.as_namespace().replace([':', '/'], "_")
207        );
208        let now = orbok_core::now_iso8601();
209        let conn = catalog.lock();
210        conn.execute(
211            "INSERT INTO cache_engines (cache_engine_id, engine_kind, database_path, namespace, \
212             data_class, payload_type, payload_version, ttl_seconds, max_entries, status, \
213             created_at, updated_at) VALUES (?1,'localcache',?2,?3,?4,?5,?6,?7,?8,'active',?9,?9) \
214             ON CONFLICT(engine_kind, database_path, namespace) DO UPDATE SET \
215             payload_type = ?5, payload_version = ?6, ttl_seconds = ?7, max_entries = ?8, \
216             status = 'active', updated_at = ?9",
217            rusqlite::params![
218                id,
219                self.db_path.to_string_lossy(),
220                namespace.as_namespace(),
221                data_class,
222                std::any::type_name::<T>(),
223                namespace.payload_version(),
224                options.ttl.map(|d| d.as_secs() as i64),
225                options.max_entries.map(|n| n as i64),
226                now,
227            ],
228        )
229        .map_err(|e| OrbokError::Database(e.to_string()))?;
230        Ok(())
231    }
232}
233
234fn cache_err(e: LocalFileCacheError) -> OrbokError {
235    OrbokError::Cache(e.to_string())
236}