use std::path::Path;
pub const REQUIRED_MODEL_FILES: &[&str] = &["onnx/model.onnx", "tokenizer.json"];
#[derive(Debug, Clone, PartialEq)]
pub enum VerifyOutcome {
Ready,
NotConfigured,
FilesInvalid {
model_dir: String,
issues: Vec<FileIssue>,
},
}
#[derive(Debug, Clone, PartialEq)]
pub struct FileIssue {
pub relative_path: String,
pub reason: FileIssueKind,
}
#[derive(Debug, Clone, PartialEq)]
pub enum FileIssueKind {
NotFound,
Empty,
PermissionDenied,
}
impl FileIssueKind {
pub fn as_str(&self) -> &'static str {
match self {
FileIssueKind::NotFound => "not found",
FileIssueKind::Empty => "empty file (0 bytes)",
FileIssueKind::PermissionDenied => "permission denied",
}
}
}
pub fn verify_embedding_model(model_dir: Option<&str>) -> VerifyOutcome {
let dir_str = match model_dir {
Some(d) if !d.trim().is_empty() => d,
_ => return VerifyOutcome::NotConfigured,
};
let dir = Path::new(dir_str);
let mut issues = Vec::new();
for rel in REQUIRED_MODEL_FILES {
let full = dir.join(rel);
match std::fs::metadata(&full) {
Ok(meta) if meta.len() == 0 => {
issues.push(FileIssue {
relative_path: rel.to_string(),
reason: FileIssueKind::Empty,
});
}
Ok(_) => {} Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
issues.push(FileIssue {
relative_path: rel.to_string(),
reason: FileIssueKind::PermissionDenied,
});
}
Err(_) => {
issues.push(FileIssue {
relative_path: rel.to_string(),
reason: FileIssueKind::NotFound,
});
}
}
}
if issues.is_empty() {
VerifyOutcome::Ready
} else {
VerifyOutcome::FilesInvalid {
model_dir: dir_str.to_string(),
issues,
}
}
}
pub fn verify_outcome_summary(outcome: &VerifyOutcome) -> String {
match outcome {
VerifyOutcome::Ready => "embedding model OK".into(),
VerifyOutcome::NotConfigured => "embedding model not configured".into(),
VerifyOutcome::FilesInvalid { issues, .. } => {
let problems: Vec<_> = issues.iter().map(|i| i.reason.as_str()).collect();
format!("embedding model invalid: {}", problems.join(", "))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn verify_none_is_not_configured() {
assert_eq!(verify_embedding_model(None), VerifyOutcome::NotConfigured);
}
#[test]
fn verify_empty_string_is_not_configured() {
assert_eq!(verify_embedding_model(Some("")), VerifyOutcome::NotConfigured);
assert_eq!(verify_embedding_model(Some(" ")), VerifyOutcome::NotConfigured);
}
#[test]
fn verify_nonexistent_dir_reports_both_files_missing() {
let outcome = verify_embedding_model(Some("/nonexistent/orbok-models"));
match outcome {
VerifyOutcome::FilesInvalid { issues, .. } => {
assert_eq!(issues.len(), 2);
assert!(issues.iter().all(|i| i.reason == FileIssueKind::NotFound));
}
other => panic!("expected FilesInvalid, got {other:?}"),
}
}
#[test]
fn verify_dir_with_valid_files_returns_ready() {
let dir = tempfile::tempdir().unwrap();
let onnx_dir = dir.path().join("onnx");
std::fs::create_dir_all(&onnx_dir).unwrap();
std::fs::write(onnx_dir.join("model.onnx"), vec![0u8; 1024]).unwrap();
std::fs::write(dir.path().join("tokenizer.json"), b"{}").unwrap();
assert_eq!(
verify_embedding_model(Some(&dir.path().to_string_lossy())),
VerifyOutcome::Ready
);
}
#[test]
fn verify_empty_model_file_reports_invalid() {
let dir = tempfile::tempdir().unwrap();
let onnx_dir = dir.path().join("onnx");
std::fs::create_dir_all(&onnx_dir).unwrap();
std::fs::write(onnx_dir.join("model.onnx"), b"").unwrap(); std::fs::write(dir.path().join("tokenizer.json"), b"{}").unwrap();
match verify_embedding_model(Some(&dir.path().to_string_lossy())) {
VerifyOutcome::FilesInvalid { issues, .. } => {
assert_eq!(issues.len(), 1);
assert_eq!(issues[0].relative_path, "onnx/model.onnx");
assert_eq!(issues[0].reason, FileIssueKind::Empty);
}
other => panic!("expected FilesInvalid, got {other:?}"),
}
}
#[test]
fn verify_missing_tokenizer_reports_invalid() {
let dir = tempfile::tempdir().unwrap();
let onnx_dir = dir.path().join("onnx");
std::fs::create_dir_all(&onnx_dir).unwrap();
std::fs::write(onnx_dir.join("model.onnx"), vec![1u8; 512]).unwrap();
match verify_embedding_model(Some(&dir.path().to_string_lossy())) {
VerifyOutcome::FilesInvalid { issues, .. } => {
assert_eq!(issues.len(), 1);
assert_eq!(issues[0].relative_path, "tokenizer.json");
}
other => panic!("expected FilesInvalid, got {other:?}"),
}
}
#[test]
fn summary_strings_are_log_safe() {
let summary = verify_outcome_summary(&VerifyOutcome::FilesInvalid {
model_dir: "/secret/path".into(),
issues: vec![FileIssue {
relative_path: "onnx/model.onnx".into(),
reason: FileIssueKind::NotFound,
}],
});
assert!(!summary.contains("/secret/path"),
"summary must not include the model dir path");
}
}