use std::{
env, fs,
io::Read,
path::{Path, PathBuf},
};
const REPO: &str = "Qdrant/all-MiniLM-L6-v2-onnx";
const REVISION: &str = "5f1b8cd78bc4fb444dd171e59b18f3a3af89a079";
const FILES: &[&str] = &[
"model.onnx",
"tokenizer.json",
"config.json",
"special_tokens_map.json",
"tokenizer_config.json",
];
fn main() {
let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR set by cargo"));
println!("cargo:rerun-if-env-changed=ALLSOURCE_PRIME_MODELS_SRC");
println!("cargo:rerun-if-env-changed=HF_ENDPOINT");
println!("cargo:rerun-if-changed=build.rs");
let local_src = env::var("ALLSOURCE_PRIME_MODELS_SRC")
.ok()
.filter(|s| !s.trim().is_empty());
let endpoint = env::var("HF_ENDPOINT").unwrap_or_else(|_| "https://huggingface.co".to_string());
for file in FILES {
let dst = out_dir.join(file);
if dst.exists() && fs::metadata(&dst).map(|m| m.len() > 0).unwrap_or(false) {
continue; }
let bytes = match &local_src {
Some(dir) => {
let path = Path::new(dir).join(file);
fs::read(&path).unwrap_or_else(|e| {
panic!(
"ALLSOURCE_PRIME_MODELS_SRC={dir} is set but {} could not be read: {e}. \
Provide all five files: {}.",
path.display(),
FILES.join(", ")
)
})
}
None => {
let url = format!("{endpoint}/{REPO}/resolve/{REVISION}/{file}");
download(&url).unwrap_or_else(|e| {
panic!(
"failed to fetch {url}: {e}\n\
To build offline, vendor the five model files and set \
ALLSOURCE_PRIME_MODELS_SRC=<dir> (files: {}). \
HF_ENDPOINT=<mirror> overrides the host.",
FILES.join(", ")
)
})
}
};
fs::write(&dst, &bytes)
.unwrap_or_else(|e| panic!("could not write {}: {e}", dst.display()));
}
}
fn download(url: &str) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
let resp = ureq::get(url)
.timeout(std::time::Duration::from_secs(300))
.call()?;
let mut buf = Vec::new();
resp.into_reader().read_to_end(&mut buf)?;
if buf.is_empty() {
return Err("empty response body".into());
}
Ok(buf)
}