Skip to main content

orbok_db/repo/
models.rs

1//! Model registry repository (RFC-012 §6).
2//!
3//! The registry is persistent catalog data — it remembers which local AI
4//! models the user has registered, their on-disk paths, and their current
5//! availability. No model file is downloaded silently; every registration
6//! or installation action requires explicit user confirmation (RFC-012
7//! §13 "no silent download").
8
9use crate::catalog::{Catalog, db_err};
10use orbok_core::{ModelId, OrbokResult, now_iso8601};
11use rusqlite::params;
12
13/// The role a model serves in the pipeline.
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15pub enum ModelRole {
16    Embedding,
17    Reranker,
18}
19
20impl ModelRole {
21    pub fn as_str(&self) -> &'static str {
22        match self {
23            ModelRole::Embedding => "embedding",
24            ModelRole::Reranker => "reranker",
25        }
26    }
27    pub fn parse(s: &str) -> Option<Self> {
28        match s {
29            "embedding" => Some(Self::Embedding),
30            "reranker" => Some(Self::Reranker),
31            _ => None,
32        }
33    }
34}
35
36/// Model availability in the local registry.
37#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub enum ModelStatus {
39    Available,
40    Missing,
41    Invalid,
42    Installing,
43    Disabled,
44}
45
46impl ModelStatus {
47    pub fn as_str(&self) -> &'static str {
48        match self {
49            ModelStatus::Available => "available",
50            ModelStatus::Missing => "missing",
51            ModelStatus::Invalid => "invalid",
52            ModelStatus::Installing => "installing",
53            ModelStatus::Disabled => "disabled",
54        }
55    }
56    pub fn parse(s: &str) -> Option<Self> {
57        match s {
58            "available" => Some(Self::Available),
59            "missing" => Some(Self::Missing),
60            "invalid" => Some(Self::Invalid),
61            "installing" => Some(Self::Installing),
62            "disabled" => Some(Self::Disabled),
63            _ => None,
64        }
65    }
66}
67
68/// A registered model record.
69#[derive(Debug, Clone)]
70pub struct ModelRecord {
71    pub model_id: ModelId,
72    pub role: ModelRole,
73    pub model_name: String,
74    pub model_version: String,
75    pub local_path: Option<String>,
76    pub license_summary: Option<String>,
77    pub size_bytes: Option<u64>,
78    pub backend: Option<String>,
79    pub dimension: Option<u32>,
80    pub status: ModelStatus,
81    pub last_validated_at: Option<String>,
82}
83
84/// Parameters for registering a new model.
85#[derive(Debug, Clone)]
86pub struct NewModel {
87    pub role: ModelRole,
88    pub model_name: String,
89    pub model_version: String,
90    pub local_path: Option<String>,
91    pub license_summary: Option<String>,
92    pub size_bytes: Option<u64>,
93    pub backend: Option<String>,
94    pub dimension: Option<u32>,
95    pub status: ModelStatus,
96}
97
98pub struct ModelRepository<'a> {
99    catalog: &'a Catalog,
100}
101
102impl<'a> ModelRepository<'a> {
103    pub fn new(catalog: &'a Catalog) -> Self {
104        Self { catalog }
105    }
106
107    /// Register a new model. Returns the generated ModelId.
108    pub fn insert(&self, new: NewModel) -> OrbokResult<ModelRecord> {
109        let id = ModelId::generate();
110        let now = now_iso8601();
111        let conn = self.catalog.lock();
112        conn.execute(
113            "INSERT INTO models \
114             (model_id, role, model_name, model_version, local_path, license_summary, \
115              size_bytes, backend, dimension, status, created_at, updated_at) \
116             VALUES (?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?11)",
117            params![
118                id.as_str(),
119                new.role.as_str(),
120                new.model_name,
121                new.model_version,
122                new.local_path,
123                new.license_summary,
124                new.size_bytes.map(|v| v as i64),
125                new.backend,
126                new.dimension.map(|v| v as i64),
127                new.status.as_str(),
128                now,
129            ],
130        )
131        .map_err(db_err)?;
132        drop(conn);
133        self.get(&id)?.ok_or(orbok_core::OrbokError::SourceNotFound)
134    }
135
136    /// Fetch one model by ID.
137    pub fn get(&self, id: &ModelId) -> OrbokResult<Option<ModelRecord>> {
138        let conn = self.catalog.lock();
139        let result = conn.query_row(
140            "SELECT model_id, role, model_name, model_version, local_path, license_summary, \
141             size_bytes, backend, dimension, status, last_validated_at \
142             FROM models WHERE model_id = ?1",
143            params![id.as_str()],
144            row_to_record,
145        );
146        match result {
147            Ok(r) => Ok(Some(r)),
148            Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
149            Err(e) => Err(db_err(e)),
150        }
151    }
152
153    /// All models of a given role.
154    pub fn list_by_role(&self, role: ModelRole) -> OrbokResult<Vec<ModelRecord>> {
155        let conn = self.catalog.lock();
156        let mut stmt = conn
157            .prepare(
158                "SELECT model_id, role, model_name, model_version, local_path, license_summary, \
159                 size_bytes, backend, dimension, status, last_validated_at \
160                 FROM models WHERE role = ?1 ORDER BY model_name, model_version",
161            )
162            .map_err(db_err)?;
163        let rows = stmt
164            .query_map(params![role.as_str()], row_to_record)
165            .map_err(db_err)?;
166        let mut out = Vec::new();
167        for row in rows {
168            out.push(row.map_err(db_err)?);
169        }
170        Ok(out)
171    }
172
173    /// All models (all roles).
174    pub fn list_all(&self) -> OrbokResult<Vec<ModelRecord>> {
175        let conn = self.catalog.lock();
176        let mut stmt = conn
177            .prepare(
178                "SELECT model_id, role, model_name, model_version, local_path, license_summary, \
179                 size_bytes, backend, dimension, status, last_validated_at \
180                 FROM models ORDER BY role, model_name",
181            )
182            .map_err(db_err)?;
183        let rows = stmt
184            .query_map([], row_to_record)
185            .map_err(db_err)?;
186        let mut out = Vec::new();
187        for row in rows {
188            out.push(row.map_err(db_err)?);
189        }
190        Ok(out)
191    }
192
193    /// Update model status (available / missing / invalid / disabled).
194    pub fn set_status(&self, id: &ModelId, status: ModelStatus) -> OrbokResult<()> {
195        let conn = self.catalog.lock();
196        conn.execute(
197            "UPDATE models SET status = ?2, updated_at = ?3 WHERE model_id = ?1",
198            params![id.as_str(), status.as_str(), now_iso8601()],
199        )
200        .map_err(db_err)?;
201        Ok(())
202    }
203
204    /// Validate a model: check the file exists and matches expected dimension.
205    /// Updates `status` and `last_validated_at` in the catalog.
206    pub fn validate(&self, id: &ModelId, expected_dim: Option<u32>) -> OrbokResult<ModelStatus> {
207        let record = match self.get(id)? {
208            Some(r) => r,
209            None => return Ok(ModelStatus::Missing),
210        };
211        let status = if let Some(path) = &record.local_path {
212            if std::path::Path::new(path).exists() {
213                // Dimension check if expected.
214                if let (Some(expected), Some(actual)) = (expected_dim, record.dimension) {
215                    if expected != actual {
216                        ModelStatus::Invalid
217                    } else {
218                        ModelStatus::Available
219                    }
220                } else {
221                    ModelStatus::Available
222                }
223            } else {
224                ModelStatus::Missing
225            }
226        } else {
227            ModelStatus::Missing
228        };
229        let now = now_iso8601();
230        {
231            let conn = self.catalog.lock();
232            conn.execute(
233                "UPDATE models SET status = ?2, last_validated_at = ?3, updated_at = ?3 \
234                 WHERE model_id = ?1",
235                params![id.as_str(), status.as_str(), now],
236            )
237            .map_err(db_err)?;
238        }
239        Ok(status)
240    }
241
242    /// Locate and register an existing model file on disk (RFC-012 §8
243    /// "locate existing model"). This is explicit — no silent downloads.
244    pub fn locate(
245        &self,
246        path: &str,
247        role: ModelRole,
248        name: &str,
249        version: &str,
250        dimension: Option<u32>,
251    ) -> OrbokResult<ModelRecord> {
252        let size_bytes = std::fs::metadata(path).map(|m| m.len()).ok();
253        let record = self.insert(NewModel {
254            role,
255            model_name: name.to_string(),
256            model_version: version.to_string(),
257            local_path: Some(path.to_string()),
258            license_summary: None,
259            size_bytes,
260            backend: None,
261            dimension,
262            status: if size_bytes.is_some() {
263                ModelStatus::Available
264            } else {
265                ModelStatus::Missing
266            },
267        })?;
268        Ok(record)
269    }
270
271    /// When an embedding model changes, mark all embeddings from the old
272    /// model as stale (RFC-012 §14).
273    pub fn mark_embedding_dependents_stale(&self, model_id: &ModelId) -> OrbokResult<u64> {
274        let conn = self.catalog.lock();
275        let n = conn
276            .execute(
277                "UPDATE embeddings SET status = 'stale', updated_at = ?2 WHERE model_id = ?1",
278                params![model_id.as_str(), now_iso8601()],
279            )
280            .map_err(db_err)?;
281        Ok(n as u64)
282    }
283}
284
285fn row_to_record(row: &rusqlite::Row<'_>) -> rusqlite::Result<ModelRecord> {
286    Ok(ModelRecord {
287        model_id: ModelId::from_string(row.get::<_, String>(0)?),
288        role: {
289            let s: String = row.get(1)?;
290            ModelRole::parse(&s).unwrap_or(ModelRole::Embedding)
291        },
292        model_name: row.get(2)?,
293        model_version: row.get(3)?,
294        local_path: row.get(4)?,
295        license_summary: row.get(5)?,
296        size_bytes: row.get::<_, Option<i64>>(6)?.map(|v| v as u64),
297        backend: row.get(7)?,
298        dimension: row.get::<_, Option<i64>>(8)?.map(|v| v as u32),
299        status: {
300            let s: String = row.get(9)?;
301            ModelStatus::parse(&s).unwrap_or(ModelStatus::Missing)
302        },
303        last_validated_at: row.get(10)?,
304    })
305}
306
307/// Verify a model file's SHA-256 against an expected hash
308/// (RFC-029 §5 integrity checking).
309///
310/// `expected_hash` is a lowercase hex SHA-256 string (64 chars).
311/// Returns `Ok(true)` on match, `Ok(false)` on mismatch, `Err` on I/O
312/// error. The file path is not logged (NFR-014).
313pub fn verify_model_sha256(path: &str, expected_hash: &str) -> OrbokResult<bool> {
314    use sha2::{Digest, Sha256};
315    use std::io::Read;
316    let mut file = std::fs::File::open(path)
317        .map_err(|e| orbok_core::OrbokError::Io(e))?;
318    let mut hasher = Sha256::new();
319    let mut buf = [0u8; 64 * 1024];
320    loop {
321        let n = file.read(&mut buf).map_err(orbok_core::OrbokError::Io)?;
322        if n == 0 { break; }
323        hasher.update(&buf[..n]);
324    }
325    let actual = format!("{:x}", hasher.finalize());
326    Ok(actual == expected_hash)
327}