use std::path::PathBuf;
use std::sync::{Mutex, OnceLock};
use llama_cpp_4::llama_backend::LlamaBackend;
use llama_cpp_4::model::params::LlamaModelParams;
use llama_cpp_4::model::LlamaModel;
use llama_cpp_4::LLamaCppError;
pub static DECODE_LOCK: Mutex<()> = Mutex::new(());
pub fn decode_guard() -> std::sync::MutexGuard<'static, ()> {
DECODE_LOCK.lock().unwrap_or_else(|e| e.into_inner())
}
static BACKEND: OnceLock<LlamaBackend> = OnceLock::new();
pub const DEFAULT_TEST_MODEL: &str = "stories260K.gguf";
pub fn backend() -> &'static LlamaBackend {
BACKEND.get_or_init(|| match LlamaBackend::init() {
Ok(b) => b,
Err(LLamaCppError::BackendAlreadyInitialized) => LlamaBackend {},
Err(e) => panic!("backend init failed: {e}"),
})
}
#[derive(Debug, Clone)]
pub struct ModelFixture {
pub path: PathBuf,
pub vocab_only: bool,
}
pub fn find_test_model() -> Option<ModelFixture> {
if let Ok(path) = std::env::var("LLAMA_TEST_MODEL") {
let p = PathBuf::from(path);
if p.is_file() {
return Some(ModelFixture {
path: p,
vocab_only: false,
});
}
eprintln!("LLAMA_TEST_MODEL is set but not a file: {}", p.display());
}
if let Some(path) = default_cached_test_model() {
if path.is_file() {
return Some(ModelFixture {
path,
vocab_only: false,
});
}
}
vocab_only_fixture()
}
fn default_cached_test_model() -> Option<PathBuf> {
let path = PathBuf::from("../target/test-models").join(DEFAULT_TEST_MODEL);
if path.is_file() {
return Some(path);
}
let path = PathBuf::from("target/test-models").join(DEFAULT_TEST_MODEL);
path.is_file().then_some(path)
}
fn vocab_only_fixture() -> Option<ModelFixture> {
let build_dir = PathBuf::from("target/debug/build");
let entries = std::fs::read_dir(&build_dir).ok()?;
for entry in entries.flatten() {
let name = entry.file_name();
if name
.to_str()
.is_some_and(|n| n.starts_with("llama-cpp-sys-4-"))
{
let vocab_path = entry
.path()
.join("out/llama.cpp/models/ggml-vocab-llama-bpe.gguf");
if vocab_path.is_file() {
return Some(ModelFixture {
path: vocab_path,
vocab_only: true,
});
}
}
}
None
}
pub fn load_model() -> Option<(LlamaModel, bool)> {
let fixture = find_test_model()?;
let mut params = LlamaModelParams::default();
if fixture.vocab_only {
params = params.with_vocab_only(true);
}
let params = std::pin::pin!(params);
let model = LlamaModel::load_from_file(backend(), &fixture.path, ¶ms).ok()?;
Some((model, fixture.vocab_only))
}
pub fn load_full_model() -> Option<LlamaModel> {
let (model, vocab_only) = load_model()?;
if vocab_only {
eprintln!(
"SKIP: full model required (set LLAMA_TEST_MODEL or run scripts/fetch-test-model.sh)"
);
return None;
}
Some(model)
}
pub fn test_model_path() -> Option<PathBuf> {
find_test_model().map(|f| f.path)
}
pub fn skip_no_model() {
eprintln!("SKIP: no test model available");
eprintln!(" export LLAMA_TEST_MODEL=/path/to/model.gguf");
eprintln!(" ./scripts/fetch-test-model.sh");
}