orbok_workers/
model_verifier.rs1use std::path::Path;
13
14pub const REQUIRED_MODEL_FILES: &[&str] = &["onnx/model.onnx", "tokenizer.json"];
16
17#[derive(Debug, Clone, PartialEq)]
19pub enum VerifyOutcome {
20 Ready,
23
24 NotConfigured,
27
28 FilesInvalid {
31 model_dir: String,
33 issues: Vec<FileIssue>,
35 },
36}
37
38#[derive(Debug, Clone, PartialEq)]
40pub struct FileIssue {
41 pub relative_path: String,
43 pub reason: FileIssueKind,
45}
46
47#[derive(Debug, Clone, PartialEq)]
49pub enum FileIssueKind {
50 NotFound,
51 Empty,
52 PermissionDenied,
53}
54
55impl FileIssueKind {
56 pub fn as_str(&self) -> &'static str {
57 match self {
58 FileIssueKind::NotFound => "not found",
59 FileIssueKind::Empty => "empty file (0 bytes)",
60 FileIssueKind::PermissionDenied => "permission denied",
61 }
62 }
63}
64
65pub fn verify_embedding_model(model_dir: Option<&str>) -> VerifyOutcome {
72 let dir_str = match model_dir {
73 Some(d) if !d.trim().is_empty() => d,
74 _ => return VerifyOutcome::NotConfigured,
75 };
76 let dir = Path::new(dir_str);
77 let mut issues = Vec::new();
78 for rel in REQUIRED_MODEL_FILES {
79 let full = dir.join(rel);
80 match std::fs::metadata(&full) {
81 Ok(meta) if meta.len() == 0 => {
82 issues.push(FileIssue {
83 relative_path: rel.to_string(),
84 reason: FileIssueKind::Empty,
85 });
86 }
87 Ok(_) => {} Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
89 issues.push(FileIssue {
90 relative_path: rel.to_string(),
91 reason: FileIssueKind::PermissionDenied,
92 });
93 }
94 Err(_) => {
95 issues.push(FileIssue {
96 relative_path: rel.to_string(),
97 reason: FileIssueKind::NotFound,
98 });
99 }
100 }
101 }
102 if issues.is_empty() {
103 VerifyOutcome::Ready
104 } else {
105 VerifyOutcome::FilesInvalid {
106 model_dir: dir_str.to_string(),
107 issues,
108 }
109 }
110}
111
112pub fn verify_outcome_summary(outcome: &VerifyOutcome) -> String {
115 match outcome {
116 VerifyOutcome::Ready => "embedding model OK".into(),
117 VerifyOutcome::NotConfigured => "embedding model not configured".into(),
118 VerifyOutcome::FilesInvalid { issues, .. } => {
119 let problems: Vec<_> = issues.iter().map(|i| i.reason.as_str()).collect();
120 format!("embedding model invalid: {}", problems.join(", "))
121 }
122 }
123}
124
125#[cfg(test)]
126mod tests {
127 use super::*;
128
129 #[test]
130 fn verify_none_is_not_configured() {
131 assert_eq!(verify_embedding_model(None), VerifyOutcome::NotConfigured);
132 }
133
134 #[test]
135 fn verify_empty_string_is_not_configured() {
136 assert_eq!(verify_embedding_model(Some("")), VerifyOutcome::NotConfigured);
137 assert_eq!(verify_embedding_model(Some(" ")), VerifyOutcome::NotConfigured);
138 }
139
140 #[test]
141 fn verify_nonexistent_dir_reports_both_files_missing() {
142 let outcome = verify_embedding_model(Some("/nonexistent/orbok-models"));
143 match outcome {
144 VerifyOutcome::FilesInvalid { issues, .. } => {
145 assert_eq!(issues.len(), 2);
146 assert!(issues.iter().all(|i| i.reason == FileIssueKind::NotFound));
147 }
148 other => panic!("expected FilesInvalid, got {other:?}"),
149 }
150 }
151
152 #[test]
153 fn verify_dir_with_valid_files_returns_ready() {
154 let dir = tempfile::tempdir().unwrap();
155 let onnx_dir = dir.path().join("onnx");
156 std::fs::create_dir_all(&onnx_dir).unwrap();
157 std::fs::write(onnx_dir.join("model.onnx"), vec![0u8; 1024]).unwrap();
158 std::fs::write(dir.path().join("tokenizer.json"), b"{}").unwrap();
159 assert_eq!(
160 verify_embedding_model(Some(&dir.path().to_string_lossy())),
161 VerifyOutcome::Ready
162 );
163 }
164
165 #[test]
166 fn verify_empty_model_file_reports_invalid() {
167 let dir = tempfile::tempdir().unwrap();
168 let onnx_dir = dir.path().join("onnx");
169 std::fs::create_dir_all(&onnx_dir).unwrap();
170 std::fs::write(onnx_dir.join("model.onnx"), b"").unwrap(); std::fs::write(dir.path().join("tokenizer.json"), b"{}").unwrap();
172 match verify_embedding_model(Some(&dir.path().to_string_lossy())) {
173 VerifyOutcome::FilesInvalid { issues, .. } => {
174 assert_eq!(issues.len(), 1);
175 assert_eq!(issues[0].relative_path, "onnx/model.onnx");
176 assert_eq!(issues[0].reason, FileIssueKind::Empty);
177 }
178 other => panic!("expected FilesInvalid, got {other:?}"),
179 }
180 }
181
182 #[test]
183 fn verify_missing_tokenizer_reports_invalid() {
184 let dir = tempfile::tempdir().unwrap();
185 let onnx_dir = dir.path().join("onnx");
186 std::fs::create_dir_all(&onnx_dir).unwrap();
187 std::fs::write(onnx_dir.join("model.onnx"), vec![1u8; 512]).unwrap();
188 match verify_embedding_model(Some(&dir.path().to_string_lossy())) {
190 VerifyOutcome::FilesInvalid { issues, .. } => {
191 assert_eq!(issues.len(), 1);
192 assert_eq!(issues[0].relative_path, "tokenizer.json");
193 }
194 other => panic!("expected FilesInvalid, got {other:?}"),
195 }
196 }
197
198 #[test]
199 fn summary_strings_are_log_safe() {
200 let summary = verify_outcome_summary(&VerifyOutcome::FilesInvalid {
202 model_dir: "/secret/path".into(),
203 issues: vec![FileIssue {
204 relative_path: "onnx/model.onnx".into(),
205 reason: FileIssueKind::NotFound,
206 }],
207 });
208 assert!(!summary.contains("/secret/path"),
209 "summary must not include the model dir path");
210 }
211}