1use std::path::Path;
13
14pub const REQUIRED_MODEL_FILES: &[&str] = &["onnx/model.onnx", "tokenizer.json"];
16
17#[derive(Debug, Clone, PartialEq)]
19pub enum VerifyOutcome {
20 Ready,
23
24 NotConfigured,
27
28 FilesInvalid {
31 model_dir: String,
33 issues: Vec<FileIssue>,
35 },
36}
37
38#[derive(Debug, Clone, PartialEq)]
40pub struct FileIssue {
41 pub relative_path: String,
43 pub reason: FileIssueKind,
45}
46
47#[derive(Debug, Clone, PartialEq)]
49pub enum FileIssueKind {
50 NotFound,
51 Empty,
52 PermissionDenied,
53}
54
55impl FileIssueKind {
56 pub fn as_str(&self) -> &'static str {
57 match self {
58 FileIssueKind::NotFound => "not found",
59 FileIssueKind::Empty => "empty file (0 bytes)",
60 FileIssueKind::PermissionDenied => "permission denied",
61 }
62 }
63}
64
65pub fn verify_embedding_model(model_dir: Option<&str>) -> VerifyOutcome {
72 let dir_str = match model_dir {
73 Some(d) if !d.trim().is_empty() => d,
74 _ => return VerifyOutcome::NotConfigured,
75 };
76 let dir = Path::new(dir_str);
77 let mut issues = Vec::new();
78 for rel in REQUIRED_MODEL_FILES {
79 let full = dir.join(rel);
80 match std::fs::metadata(&full) {
81 Ok(meta) if meta.len() == 0 => {
82 issues.push(FileIssue {
83 relative_path: rel.to_string(),
84 reason: FileIssueKind::Empty,
85 });
86 }
87 Ok(_) => {} Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
89 issues.push(FileIssue {
90 relative_path: rel.to_string(),
91 reason: FileIssueKind::PermissionDenied,
92 });
93 }
94 Err(_) => {
95 issues.push(FileIssue {
96 relative_path: rel.to_string(),
97 reason: FileIssueKind::NotFound,
98 });
99 }
100 }
101 }
102 if issues.is_empty() {
103 VerifyOutcome::Ready
104 } else {
105 VerifyOutcome::FilesInvalid {
106 model_dir: dir_str.to_string(),
107 issues,
108 }
109 }
110}
111
112pub fn verify_outcome_summary(outcome: &VerifyOutcome) -> String {
115 match outcome {
116 VerifyOutcome::Ready => "embedding model OK".into(),
117 VerifyOutcome::NotConfigured => "embedding model not configured".into(),
118 VerifyOutcome::FilesInvalid { issues, .. } => {
119 let problems: Vec<_> = issues.iter().map(|i| i.reason.as_str()).collect();
120 format!("embedding model invalid: {}", problems.join(", "))
121 }
122 }
123}
124
125#[cfg(test)]
126mod tests {
127 use super::*;
128
129 #[test]
130 fn verify_none_is_not_configured() {
131 assert_eq!(verify_embedding_model(None), VerifyOutcome::NotConfigured);
132 }
133
134 #[test]
135 fn verify_empty_string_is_not_configured() {
136 assert_eq!(
137 verify_embedding_model(Some("")),
138 VerifyOutcome::NotConfigured
139 );
140 assert_eq!(
141 verify_embedding_model(Some(" ")),
142 VerifyOutcome::NotConfigured
143 );
144 }
145
146 #[test]
147 fn verify_nonexistent_dir_reports_both_files_missing() {
148 let outcome = verify_embedding_model(Some("/nonexistent/orbok-models"));
149 match outcome {
150 VerifyOutcome::FilesInvalid { issues, .. } => {
151 assert_eq!(issues.len(), 2);
152 assert!(issues.iter().all(|i| i.reason == FileIssueKind::NotFound));
153 }
154 other => panic!("expected FilesInvalid, got {other:?}"),
155 }
156 }
157
158 #[test]
159 fn verify_dir_with_valid_files_returns_ready() {
160 let dir = tempfile::tempdir().unwrap();
161 let onnx_dir = dir.path().join("onnx");
162 std::fs::create_dir_all(&onnx_dir).unwrap();
163 std::fs::write(onnx_dir.join("model.onnx"), vec![0u8; 1024]).unwrap();
164 std::fs::write(dir.path().join("tokenizer.json"), b"{}").unwrap();
165 assert_eq!(
166 verify_embedding_model(Some(&dir.path().to_string_lossy())),
167 VerifyOutcome::Ready
168 );
169 }
170
171 #[test]
172 fn verify_empty_model_file_reports_invalid() {
173 let dir = tempfile::tempdir().unwrap();
174 let onnx_dir = dir.path().join("onnx");
175 std::fs::create_dir_all(&onnx_dir).unwrap();
176 std::fs::write(onnx_dir.join("model.onnx"), b"").unwrap(); std::fs::write(dir.path().join("tokenizer.json"), b"{}").unwrap();
178 match verify_embedding_model(Some(&dir.path().to_string_lossy())) {
179 VerifyOutcome::FilesInvalid { issues, .. } => {
180 assert_eq!(issues.len(), 1);
181 assert_eq!(issues[0].relative_path, "onnx/model.onnx");
182 assert_eq!(issues[0].reason, FileIssueKind::Empty);
183 }
184 other => panic!("expected FilesInvalid, got {other:?}"),
185 }
186 }
187
188 #[test]
189 fn verify_missing_tokenizer_reports_invalid() {
190 let dir = tempfile::tempdir().unwrap();
191 let onnx_dir = dir.path().join("onnx");
192 std::fs::create_dir_all(&onnx_dir).unwrap();
193 std::fs::write(onnx_dir.join("model.onnx"), vec![1u8; 512]).unwrap();
194 match verify_embedding_model(Some(&dir.path().to_string_lossy())) {
196 VerifyOutcome::FilesInvalid { issues, .. } => {
197 assert_eq!(issues.len(), 1);
198 assert_eq!(issues[0].relative_path, "tokenizer.json");
199 }
200 other => panic!("expected FilesInvalid, got {other:?}"),
201 }
202 }
203
204 #[test]
205 fn summary_strings_are_log_safe() {
206 let summary = verify_outcome_summary(&VerifyOutcome::FilesInvalid {
208 model_dir: "/secret/path".into(),
209 issues: vec![FileIssue {
210 relative_path: "onnx/model.onnx".into(),
211 reason: FileIssueKind::NotFound,
212 }],
213 });
214 assert!(
215 !summary.contains("/secret/path"),
216 "summary must not include the model dir path"
217 );
218 }
219}
220
221#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
229pub struct ModelManifest {
230 pub sha256: std::collections::HashMap<String, String>,
232}
233
234impl ModelManifest {
235 pub fn load(model_dir: &Path) -> Option<Self> {
238 let path = model_dir.join("orbok-manifest.json");
239 let bytes = std::fs::read(&path).ok()?;
240 serde_json::from_slice(&bytes).ok()
241 }
242
243 pub fn save(&self, model_dir: &Path) -> std::io::Result<()> {
245 let path = model_dir.join("orbok-manifest.json");
246 let json = serde_json::to_vec_pretty(self)
247 .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
248 std::fs::write(&path, &json)
249 }
250
251 pub fn sha256_of_file(path: &Path) -> std::io::Result<String> {
253 use sha2::{Digest, Sha256};
254 let bytes = std::fs::read(path)?;
255 let digest: String = Sha256::digest(&bytes)
256 .iter()
257 .map(|b| format!("{b:02x}"))
258 .collect();
259 Ok(digest)
260 }
261}
262
263#[derive(Debug, Clone, PartialEq)]
265pub enum DeepVerifyOutcome {
266 Valid,
268 NoManifest,
270 ChecksumMismatch(Vec<String>),
272 FileMissing(Vec<String>),
274}
275
276pub fn verify_embedding_model_deep(model_dir: &Path) -> DeepVerifyOutcome {
280 let manifest = match ModelManifest::load(model_dir) {
281 Some(m) => m,
282 None => return DeepVerifyOutcome::NoManifest,
283 };
284
285 let mut missing = Vec::new();
286 let mut mismatched = Vec::new();
287
288 for (rel, expected) in &manifest.sha256 {
289 let full = model_dir.join(rel);
290 match ModelManifest::sha256_of_file(&full) {
291 Ok(actual) if actual == *expected => {} Ok(_) => mismatched.push(rel.clone()),
293 Err(_) => missing.push(rel.clone()),
294 }
295 }
296
297 if !missing.is_empty() {
298 DeepVerifyOutcome::FileMissing(missing)
299 } else if !mismatched.is_empty() {
300 DeepVerifyOutcome::ChecksumMismatch(mismatched)
301 } else {
302 DeepVerifyOutcome::Valid
303 }
304}
305
306#[cfg(test)]
307mod deep_verify_tests {
308 use super::*;
309
310 #[test]
311 fn manifest_round_trips() {
312 let dir = tempfile::tempdir().unwrap();
313 let mut sha256 = std::collections::HashMap::new();
314 sha256.insert("onnx/model.onnx".into(), "abc123".into());
315 let m = ModelManifest { sha256 };
316 m.save(dir.path()).unwrap();
317 let loaded = ModelManifest::load(dir.path()).unwrap();
318 assert_eq!(loaded.sha256["onnx/model.onnx"], "abc123");
319 }
320
321 #[test]
322 fn no_manifest_returns_no_manifest() {
323 let dir = tempfile::tempdir().unwrap();
324 assert_eq!(
325 verify_embedding_model_deep(dir.path()),
326 DeepVerifyOutcome::NoManifest
327 );
328 }
329
330 #[test]
331 fn valid_checksums_return_valid() {
332 let dir = tempfile::tempdir().unwrap();
333 let content = b"model weights";
334 std::fs::write(dir.path().join("model.bin"), content).unwrap();
335 let hash = ModelManifest::sha256_of_file(&dir.path().join("model.bin")).unwrap();
336 let mut sha256 = std::collections::HashMap::new();
337 sha256.insert("model.bin".into(), hash);
338 ModelManifest { sha256 }.save(dir.path()).unwrap();
339 assert_eq!(
340 verify_embedding_model_deep(dir.path()),
341 DeepVerifyOutcome::Valid
342 );
343 }
344
345 #[test]
346 fn corrupted_file_returns_mismatch() {
347 let dir = tempfile::tempdir().unwrap();
348 std::fs::write(dir.path().join("model.bin"), b"original").unwrap();
349 let hash = ModelManifest::sha256_of_file(&dir.path().join("model.bin")).unwrap();
350 let mut sha256 = std::collections::HashMap::new();
351 sha256.insert("model.bin".into(), hash);
352 ModelManifest { sha256 }.save(dir.path()).unwrap();
353 std::fs::write(dir.path().join("model.bin"), b"corrupted!").unwrap();
355 assert_eq!(
356 verify_embedding_model_deep(dir.path()),
357 DeepVerifyOutcome::ChecksumMismatch(vec!["model.bin".into()])
358 );
359 }
360}