pub mod download;
pub mod error;
pub mod manifest;
pub mod verify;
#[cfg(test)]
mod tests;
pub use error::BootstrapError;
pub use manifest::{placeholder_manifest, Manifest, ManifestFile, ModelPaths};
use std::path::{Path, PathBuf};
pub fn ensure_model_available(target_dir: Option<&Path>) -> Result<ModelPaths, BootstrapError> {
let manifest = placeholder_manifest();
ensure_with_manifest(target_dir, &manifest)
}
pub(crate) fn ensure_with_manifest(
target_dir: Option<&Path>,
manifest: &Manifest,
) -> Result<ModelPaths, BootstrapError> {
let target_dir_buf = resolve_target_dir(target_dir, manifest)?;
std::fs::create_dir_all(&target_dir_buf).map_err(|e| BootstrapError::DiskFull {
path: target_dir_buf.clone(),
source: e,
})?;
if let Some(paths) = verify::check_existing(&target_dir_buf, manifest) {
return Ok(paths);
}
let _ = verify::cleanup_partials(&target_dir_buf);
let mut onnx: Option<PathBuf> = None;
let mut tokenizer: Option<PathBuf> = None;
let mut config: Option<PathBuf> = None;
for f in &manifest.files {
let mut urls: Vec<String> = Vec::with_capacity(1 + f.fallback_urls.len());
urls.push(f.primary_url.clone());
urls.extend(f.fallback_urls.iter().cloned());
let outcome = download::download_with_chunks(
&urls,
&target_dir_buf,
&f.name,
f.size_bytes,
manifest.chunk_count,
)?;
if let Err(e) = verify::verify_sha256_streaming(&outcome.final_path, &f.sha256) {
verify::remove_artifact_best_effort(&outcome.final_path);
let _ = verify::cleanup_partials(&target_dir_buf);
return Err(e);
}
let _ = verify::cleanup_partials(&target_dir_buf);
match f.name.as_str() {
"model_q4f16.onnx" => onnx = Some(outcome.final_path),
"tokenizer.json" => tokenizer = Some(outcome.final_path),
"config.json" => config = Some(outcome.final_path),
_ => {} }
}
let onnx = onnx.ok_or_else(|| BootstrapError::ManifestParse {
source: serde_json::Error::io(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"manifest missing model_q4f16.onnx entry",
)),
})?;
let tokenizer = tokenizer.ok_or_else(|| BootstrapError::ManifestParse {
source: serde_json::Error::io(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"manifest missing tokenizer.json entry",
)),
})?;
let config = config.ok_or_else(|| BootstrapError::ManifestParse {
source: serde_json::Error::io(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"manifest missing config.json entry",
)),
})?;
Ok(ModelPaths {
onnx,
tokenizer,
config,
})
}
fn resolve_target_dir(
caller: Option<&Path>,
manifest: &Manifest,
) -> Result<PathBuf, BootstrapError> {
if let Some(p) = caller {
return Ok(p.to_path_buf());
}
let base = dirs::data_local_dir().ok_or_else(|| BootstrapError::DiskFull {
path: PathBuf::from("<no data_local_dir>"),
source: std::io::Error::new(
std::io::ErrorKind::NotFound,
"dirs::data_local_dir() returned None",
),
})?;
Ok(base
.join("vigil")
.join("models")
.join(format!("{}-{}", manifest.model_name, manifest.version)))
}