1use std::path::Path;
13
14pub const REQUIRED_MODEL_FILES: &[&str] = &["onnx/model.onnx", "tokenizer.json"];
16
17#[derive(Debug, Clone, PartialEq)]
19pub enum VerifyOutcome {
20 Ready,
23
24 NotConfigured,
27
28 FilesInvalid {
31 model_dir: String,
33 issues: Vec<FileIssue>,
35 },
36}
37
38#[derive(Debug, Clone, PartialEq)]
40pub struct FileIssue {
41 pub relative_path: String,
43 pub reason: FileIssueKind,
45}
46
47#[derive(Debug, Clone, PartialEq)]
49pub enum FileIssueKind {
50 NotFound,
51 Empty,
52 PermissionDenied,
53}
54
55impl FileIssueKind {
56 pub fn as_str(&self) -> &'static str {
57 match self {
58 FileIssueKind::NotFound => "not found",
59 FileIssueKind::Empty => "empty file (0 bytes)",
60 FileIssueKind::PermissionDenied => "permission denied",
61 }
62 }
63}
64
65pub fn verify_embedding_model(model_dir: Option<&str>) -> VerifyOutcome {
72 let dir_str = match model_dir {
73 Some(d) if !d.trim().is_empty() => d,
74 _ => return VerifyOutcome::NotConfigured,
75 };
76 let dir = Path::new(dir_str);
77 let mut issues = Vec::new();
78 for rel in REQUIRED_MODEL_FILES {
79 let full = dir.join(rel);
80 match std::fs::metadata(&full) {
81 Ok(meta) if meta.len() == 0 => {
82 issues.push(FileIssue {
83 relative_path: rel.to_string(),
84 reason: FileIssueKind::Empty,
85 });
86 }
87 Ok(_) => {} Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
89 issues.push(FileIssue {
90 relative_path: rel.to_string(),
91 reason: FileIssueKind::PermissionDenied,
92 });
93 }
94 Err(_) => {
95 issues.push(FileIssue {
96 relative_path: rel.to_string(),
97 reason: FileIssueKind::NotFound,
98 });
99 }
100 }
101 }
102 if issues.is_empty() {
103 VerifyOutcome::Ready
104 } else {
105 VerifyOutcome::FilesInvalid {
106 model_dir: dir_str.to_string(),
107 issues,
108 }
109 }
110}
111
112pub fn verify_outcome_summary(outcome: &VerifyOutcome) -> String {
115 match outcome {
116 VerifyOutcome::Ready => "embedding model OK".into(),
117 VerifyOutcome::NotConfigured => "embedding model not configured".into(),
118 VerifyOutcome::FilesInvalid { issues, .. } => {
119 let problems: Vec<_> = issues.iter().map(|i| i.reason.as_str()).collect();
120 format!("embedding model invalid: {}", problems.join(", "))
121 }
122 }
123}
124
125#[cfg(test)]
126mod tests {
127 use super::*;
128
129 #[test]
130 fn verify_none_is_not_configured() {
131 assert_eq!(verify_embedding_model(None), VerifyOutcome::NotConfigured);
132 }
133
134 #[test]
135 fn verify_empty_string_is_not_configured() {
136 assert_eq!(verify_embedding_model(Some("")), VerifyOutcome::NotConfigured);
137 assert_eq!(verify_embedding_model(Some(" ")), VerifyOutcome::NotConfigured);
138 }
139
140 #[test]
141 fn verify_nonexistent_dir_reports_both_files_missing() {
142 let outcome = verify_embedding_model(Some("/nonexistent/orbok-models"));
143 match outcome {
144 VerifyOutcome::FilesInvalid { issues, .. } => {
145 assert_eq!(issues.len(), 2);
146 assert!(issues.iter().all(|i| i.reason == FileIssueKind::NotFound));
147 }
148 other => panic!("expected FilesInvalid, got {other:?}"),
149 }
150 }
151
152 #[test]
153 fn verify_dir_with_valid_files_returns_ready() {
154 let dir = tempfile::tempdir().unwrap();
155 let onnx_dir = dir.path().join("onnx");
156 std::fs::create_dir_all(&onnx_dir).unwrap();
157 std::fs::write(onnx_dir.join("model.onnx"), vec![0u8; 1024]).unwrap();
158 std::fs::write(dir.path().join("tokenizer.json"), b"{}").unwrap();
159 assert_eq!(
160 verify_embedding_model(Some(&dir.path().to_string_lossy())),
161 VerifyOutcome::Ready
162 );
163 }
164
165 #[test]
166 fn verify_empty_model_file_reports_invalid() {
167 let dir = tempfile::tempdir().unwrap();
168 let onnx_dir = dir.path().join("onnx");
169 std::fs::create_dir_all(&onnx_dir).unwrap();
170 std::fs::write(onnx_dir.join("model.onnx"), b"").unwrap(); std::fs::write(dir.path().join("tokenizer.json"), b"{}").unwrap();
172 match verify_embedding_model(Some(&dir.path().to_string_lossy())) {
173 VerifyOutcome::FilesInvalid { issues, .. } => {
174 assert_eq!(issues.len(), 1);
175 assert_eq!(issues[0].relative_path, "onnx/model.onnx");
176 assert_eq!(issues[0].reason, FileIssueKind::Empty);
177 }
178 other => panic!("expected FilesInvalid, got {other:?}"),
179 }
180 }
181
182 #[test]
183 fn verify_missing_tokenizer_reports_invalid() {
184 let dir = tempfile::tempdir().unwrap();
185 let onnx_dir = dir.path().join("onnx");
186 std::fs::create_dir_all(&onnx_dir).unwrap();
187 std::fs::write(onnx_dir.join("model.onnx"), vec![1u8; 512]).unwrap();
188 match verify_embedding_model(Some(&dir.path().to_string_lossy())) {
190 VerifyOutcome::FilesInvalid { issues, .. } => {
191 assert_eq!(issues.len(), 1);
192 assert_eq!(issues[0].relative_path, "tokenizer.json");
193 }
194 other => panic!("expected FilesInvalid, got {other:?}"),
195 }
196 }
197
198 #[test]
199 fn summary_strings_are_log_safe() {
200 let summary = verify_outcome_summary(&VerifyOutcome::FilesInvalid {
202 model_dir: "/secret/path".into(),
203 issues: vec![FileIssue {
204 relative_path: "onnx/model.onnx".into(),
205 reason: FileIssueKind::NotFound,
206 }],
207 });
208 assert!(!summary.contains("/secret/path"),
209 "summary must not include the model dir path");
210 }
211}
212
213#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
221pub struct ModelManifest {
222 pub sha256: std::collections::HashMap<String, String>,
224}
225
226impl ModelManifest {
227 pub fn load(model_dir: &Path) -> Option<Self> {
230 let path = model_dir.join("orbok-manifest.json");
231 let bytes = std::fs::read(&path).ok()?;
232 serde_json::from_slice(&bytes).ok()
233 }
234
235 pub fn save(&self, model_dir: &Path) -> std::io::Result<()> {
237 let path = model_dir.join("orbok-manifest.json");
238 let json = serde_json::to_vec_pretty(self)
239 .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
240 std::fs::write(&path, &json)
241 }
242
243 pub fn sha256_of_file(path: &Path) -> std::io::Result<String> {
245 use sha2::{Digest, Sha256};
246 let bytes = std::fs::read(path)?;
247 let digest: String = Sha256::digest(&bytes)
248 .iter()
249 .map(|b| format!("{b:02x}"))
250 .collect();
251 Ok(digest)
252 }
253}
254
255#[derive(Debug, Clone, PartialEq)]
257pub enum DeepVerifyOutcome {
258 Valid,
260 NoManifest,
262 ChecksumMismatch(Vec<String>),
264 FileMissing(Vec<String>),
266}
267
268pub fn verify_embedding_model_deep(model_dir: &Path) -> DeepVerifyOutcome {
272 let manifest = match ModelManifest::load(model_dir) {
273 Some(m) => m,
274 None => return DeepVerifyOutcome::NoManifest,
275 };
276
277 let mut missing = Vec::new();
278 let mut mismatched = Vec::new();
279
280 for (rel, expected) in &manifest.sha256 {
281 let full = model_dir.join(rel);
282 match ModelManifest::sha256_of_file(&full) {
283 Ok(actual) if actual == *expected => {} Ok(_) => mismatched.push(rel.clone()),
285 Err(_) => missing.push(rel.clone()),
286 }
287 }
288
289 if !missing.is_empty() {
290 DeepVerifyOutcome::FileMissing(missing)
291 } else if !mismatched.is_empty() {
292 DeepVerifyOutcome::ChecksumMismatch(mismatched)
293 } else {
294 DeepVerifyOutcome::Valid
295 }
296}
297
298#[cfg(test)]
299mod deep_verify_tests {
300 use super::*;
301
302 #[test]
303 fn manifest_round_trips() {
304 let dir = tempfile::tempdir().unwrap();
305 let mut sha256 = std::collections::HashMap::new();
306 sha256.insert("onnx/model.onnx".into(), "abc123".into());
307 let m = ModelManifest { sha256 };
308 m.save(dir.path()).unwrap();
309 let loaded = ModelManifest::load(dir.path()).unwrap();
310 assert_eq!(loaded.sha256["onnx/model.onnx"], "abc123");
311 }
312
313 #[test]
314 fn no_manifest_returns_no_manifest() {
315 let dir = tempfile::tempdir().unwrap();
316 assert_eq!(
317 verify_embedding_model_deep(dir.path()),
318 DeepVerifyOutcome::NoManifest
319 );
320 }
321
322 #[test]
323 fn valid_checksums_return_valid() {
324 let dir = tempfile::tempdir().unwrap();
325 let content = b"model weights";
326 std::fs::write(dir.path().join("model.bin"), content).unwrap();
327 let hash = ModelManifest::sha256_of_file(&dir.path().join("model.bin")).unwrap();
328 let mut sha256 = std::collections::HashMap::new();
329 sha256.insert("model.bin".into(), hash);
330 ModelManifest { sha256 }.save(dir.path()).unwrap();
331 assert_eq!(
332 verify_embedding_model_deep(dir.path()),
333 DeepVerifyOutcome::Valid
334 );
335 }
336
337 #[test]
338 fn corrupted_file_returns_mismatch() {
339 let dir = tempfile::tempdir().unwrap();
340 std::fs::write(dir.path().join("model.bin"), b"original").unwrap();
341 let hash = ModelManifest::sha256_of_file(&dir.path().join("model.bin")).unwrap();
342 let mut sha256 = std::collections::HashMap::new();
343 sha256.insert("model.bin".into(), hash);
344 ModelManifest { sha256 }.save(dir.path()).unwrap();
345 std::fs::write(dir.path().join("model.bin"), b"corrupted!").unwrap();
347 assert_eq!(
348 verify_embedding_model_deep(dir.path()),
349 DeepVerifyOutcome::ChecksumMismatch(vec!["model.bin".into()])
350 );
351 }
352}