Skip to main content

orbok_core/
data_class.rs

1//! RFC-001 data lifecycle classification.
2//!
3//! Every piece of application-managed data belongs to exactly one
4//! [`DataClass`]. Cleanup operations must be expressed as a
5//! [`CleanupPlan`] before execution (RFC-001 §14: "No cleanup operation
6//! should run without first producing a `CleanupPlan`"). Ordinary (safe)
7//! cleanup must never touch [`DataClass::PersistentCatalog`].
8
9use serde::{Deserialize, Serialize};
10
11/// The five lifecycle classes of RFC-001 §14.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
13#[serde(rename_all = "snake_case")]
14pub enum DataClass {
15    /// User configuration and known catalog state. Never deleted by
16    /// ordinary cleanup (sources, policies, file catalog, settings,
17    /// model registry, migrations).
18    PersistentCatalog,
19    /// Derived from source files and local models; deletable with
20    /// confirmation, rebuildable (keyword index, embeddings, chunks).
21    RebuildableIndex,
22    /// Speed/convenience only; deletable automatically by TTL/LRU
23    /// (search cache, snippets, rerank scores, extraction buffers).
24    EphemeralCache,
25    /// Local model files: removable only with strong confirmation.
26    LocalDependency,
27    /// Logs and events, deletable under log policy.
28    OperationalLog,
29}
30
31/// Storage accounting categories (RFC-001 §10, RFC-002 §7.12).
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
33#[serde(rename_all = "snake_case")]
34pub enum StorageCategory {
35    PersistentCatalog,
36    KeywordIndex,
37    VectorIndex,
38    SnippetCache,
39    SearchCache,
40    TemporaryExtraction,
41    ModelFiles,
42    Logs,
43}
44
45impl StorageCategory {
46    /// All categories, for iteration in accounting and the Storage view.
47    pub const ALL: [StorageCategory; 8] = [
48        StorageCategory::PersistentCatalog,
49        StorageCategory::KeywordIndex,
50        StorageCategory::VectorIndex,
51        StorageCategory::SnippetCache,
52        StorageCategory::SearchCache,
53        StorageCategory::TemporaryExtraction,
54        StorageCategory::ModelFiles,
55        StorageCategory::Logs,
56    ];
57
58    /// Catalog/key string (matches RFC-001 §10 names).
59    pub fn as_str(&self) -> &'static str {
60        match self {
61            StorageCategory::PersistentCatalog => "persistent_catalog",
62            StorageCategory::KeywordIndex => "keyword_index",
63            StorageCategory::VectorIndex => "vector_index",
64            StorageCategory::SnippetCache => "snippet_cache",
65            StorageCategory::SearchCache => "search_cache",
66            StorageCategory::TemporaryExtraction => "temporary_extraction",
67            StorageCategory::ModelFiles => "model_files",
68            StorageCategory::Logs => "logs",
69        }
70    }
71
72    /// The lifecycle class this storage category belongs to.
73    pub fn data_class(&self) -> DataClass {
74        match self {
75            StorageCategory::PersistentCatalog => DataClass::PersistentCatalog,
76            StorageCategory::KeywordIndex | StorageCategory::VectorIndex => {
77                DataClass::RebuildableIndex
78            }
79            StorageCategory::SnippetCache
80            | StorageCategory::SearchCache
81            | StorageCategory::TemporaryExtraction => DataClass::EphemeralCache,
82            StorageCategory::ModelFiles => DataClass::LocalDependency,
83            StorageCategory::Logs => DataClass::OperationalLog,
84        }
85    }
86}
87
88/// Cleanup actions exposed by the Storage view (RFC-001 §9, RFC-011).
89#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
90#[serde(rename_all = "snake_case")]
91pub enum CleanupAction {
92    /// Safe: expired search cache entries.
93    ClearExpiredSearchCache,
94    /// Safe: expired/temporary snippets.
95    ClearSnippetCache,
96    /// Safe: temporary extraction buffers and cache-engine payloads.
97    ClearTemporaryExtraction,
98    /// Safe: stale indexes that have already been replaced.
99    RemoveReplacedStaleIndexes,
100    /// Space recovery: delete keyword index (rebuild required).
101    DeleteKeywordIndex,
102    /// Space recovery: delete vector index / embeddings (rebuild required).
103    DeleteVectorIndex,
104    /// Space recovery: delete temporary-source indexes.
105    RemoveTemporarySourceIndexes,
106    /// Destructive: reset the whole catalog (strong confirmation).
107    ResetCatalog,
108}
109
110impl CleanupAction {
111    /// Lifecycle classes this action is allowed to touch.
112    pub fn affected_classes(&self) -> &'static [DataClass] {
113        match self {
114            CleanupAction::ClearExpiredSearchCache
115            | CleanupAction::ClearSnippetCache
116            | CleanupAction::ClearTemporaryExtraction => &[DataClass::EphemeralCache],
117            CleanupAction::RemoveReplacedStaleIndexes
118            | CleanupAction::DeleteKeywordIndex
119            | CleanupAction::DeleteVectorIndex
120            | CleanupAction::RemoveTemporarySourceIndexes => &[DataClass::RebuildableIndex],
121            CleanupAction::ResetCatalog => &[
122                DataClass::PersistentCatalog,
123                DataClass::RebuildableIndex,
124                DataClass::EphemeralCache,
125                DataClass::OperationalLog,
126            ],
127        }
128    }
129
130    /// Whether running this action makes reindexing necessary.
131    pub fn requires_rebuild(&self) -> bool {
132        !matches!(
133            self,
134            CleanupAction::ClearExpiredSearchCache
135                | CleanupAction::ClearSnippetCache
136                | CleanupAction::ClearTemporaryExtraction
137        )
138    }
139
140    /// Whether the UI must show an explicit confirmation dialog.
141    pub fn requires_confirmation(&self) -> bool {
142        self.requires_rebuild()
143    }
144
145    /// Whether this action may touch persistent catalog data.
146    pub fn touches_persistent_catalog(&self) -> bool {
147        self.affected_classes()
148            .contains(&DataClass::PersistentCatalog)
149    }
150}
151
152/// A cleanup plan: produced first, executed second (RFC-001 §14).
153#[derive(Debug, Clone, Serialize, Deserialize)]
154pub struct CleanupPlan {
155    pub action: CleanupAction,
156    pub affected_classes: Vec<DataClass>,
157    pub estimated_recovered_bytes: u64,
158    pub requires_rebuild: bool,
159    pub requires_confirmation: bool,
160}
161
162impl CleanupPlan {
163    /// Build the plan for an action with an estimated byte impact.
164    pub fn for_action(action: CleanupAction, estimated_recovered_bytes: u64) -> Self {
165        Self {
166            action,
167            affected_classes: action.affected_classes().to_vec(),
168            estimated_recovered_bytes,
169            requires_rebuild: action.requires_rebuild(),
170            requires_confirmation: action.requires_confirmation(),
171        }
172    }
173
174    /// Safe cleanup must never include the persistent catalog. Executors
175    /// call this before running anything that has not been explicitly
176    /// confirmed as a destructive reset.
177    pub fn assert_safe_for_ordinary_cleanup(&self) -> Result<(), crate::error::OrbokError> {
178        if self
179            .affected_classes
180            .contains(&DataClass::PersistentCatalog)
181        {
182            return Err(crate::error::OrbokError::CleanupWouldTouchPersistentData);
183        }
184        Ok(())
185    }
186}