use once_cell::sync::OnceCell;
use qmd::{EmbeddingEngine, Store, pull_model};
use std::sync::Mutex;
static ENGINE: OnceCell<Mutex<EmbeddingEngine>> = OnceCell::new();
fn silence_llama_logs() {
use llama_cpp_2::{LogOptions, send_logs_to_tracing};
send_logs_to_tracing(LogOptions::default().with_logs_enabled(false));
}
pub fn get_engine() -> Result<&'static Mutex<EmbeddingEngine>, String> {
ENGINE.get_or_try_init(|| {
check_cpu_features()?;
silence_llama_logs();
let _fd_guard = crate::utils::fd_suppress::suppress_stdio();
let pull = pull_model(qmd::llm::DEFAULT_EMBED_MODEL_URI, false)
.map_err(|e| format!("Failed to pull embedding model: {e}"))?;
let engine = EmbeddingEngine::new(&pull.path)
.map_err(|e| format!("Failed to init embedding engine: {e}"))?;
tracing::info!(
"Embedding engine ready: {} ({:.1} MB)",
pull.model,
pull.size_bytes as f64 / 1_048_576.0
);
Ok(Mutex::new(engine))
})
}
fn check_cpu_features() -> Result<(), String> {
#[cfg(target_arch = "x86_64")]
{
if !std::arch::is_x86_feature_detected!("avx") {
return Err(
"CPU lacks AVX — llama.cpp GGUF inference requires AVX (Sandy Bridge 2011+). \
Memory search will use FTS-only."
.to_string(),
);
}
}
Ok(())
}
pub fn engine_if_ready() -> Option<&'static Mutex<EmbeddingEngine>> {
ENGINE.get()
}
const MAX_EMBED_BYTES: usize = 32_000;
pub fn embed_content(store: &Mutex<Store>, body: &str) -> Result<(), String> {
if body.is_empty() {
return Ok(());
}
if body.len() > MAX_EMBED_BYTES {
return Err(format!(
"Body too large for embedding ({} bytes, max {MAX_EMBED_BYTES})",
body.len()
));
}
let engine_mutex = engine_if_ready().ok_or("Embedding engine not initialized")?;
let title = Store::extract_title(body);
let hash = Store::hash_content(body);
let emb = {
let mut engine = engine_mutex
.lock()
.map_err(|e| format!("Engine lock poisoned: {e}"))?;
std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
engine.embed_document(body, Some(&title))
}))
.map_err(|_| "llama.cpp panicked during embedding".to_string())?
.map_err(|e| format!("Embedding failed: {e}"))?
};
let now = chrono::Local::now().format("%Y-%m-%dT%H:%M:%S").to_string();
store
.lock()
.map_err(|e| format!("Store lock poisoned: {e}"))?
.insert_embedding(&hash, 0, 0, &emb.embedding, &emb.model, &now)
.map_err(|e| format!("Failed to store embedding: {e}"))
}
pub(super) fn backfill_embeddings(store: &Mutex<Store>) {
let engine_mutex = match get_engine() {
Ok(e) => e,
Err(e) => {
tracing::warn!("Embedding engine unavailable, skipping backfill: {e}");
return;
}
};
let needing = match store.lock() {
Ok(s) => s.get_hashes_needing_embedding().unwrap_or_default(),
Err(_) => return,
};
if needing.is_empty() {
return;
}
let count = needing.len();
tracing::info!("Backfilling embeddings for {count} documents");
let now = chrono::Local::now().format("%Y-%m-%dT%H:%M:%S").to_string();
let mut stored = 0usize;
for (i, (hash, path, body)) in needing.iter().enumerate() {
tracing::info!(
"Embedding {}/{}: path={}, body_len={}, hash={}",
i + 1,
count,
path,
body.len(),
hash
);
if body.len() > MAX_EMBED_BYTES {
tracing::warn!(
"Skipping embedding for '{}' — body too large ({} bytes, max {}). \
Inserting zero-vector placeholder so it won't retry.",
path,
body.len(),
MAX_EMBED_BYTES
);
if let Ok(s) = store.lock() {
let _ = s.insert_embedding(hash, 0, 0, &[], "skipped-too-large", &now);
}
continue;
}
let title = Store::extract_title(body);
let emb = {
let mut engine = match engine_mutex.lock() {
Ok(e) => e,
Err(_) => return,
};
match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
engine.embed_document(body, Some(&title))
})) {
Ok(result) => result.ok(),
Err(_) => {
tracing::error!("llama.cpp panicked during backfill embed of '{path}'");
continue;
}
}
};
if let Some(emb) = emb
&& let Ok(s) = store.lock()
&& s.insert_embedding(hash, 0, 0, &emb.embedding, &emb.model, &now)
.is_ok()
{
stored += 1;
}
}
tracing::info!("Backfilled {stored}/{count} embeddings");
}