use once_cell::sync::OnceCell;
use qmd::{EmbeddingEngine, Store, pull_model};
use std::sync::Mutex;
static ENGINE: OnceCell<Mutex<EmbeddingEngine>> = OnceCell::new();
fn silence_llama_logs() {
use llama_cpp_2::{LogOptions, send_logs_to_tracing};
send_logs_to_tracing(LogOptions::default().with_logs_enabled(false));
}
pub fn get_engine() -> Result<&'static Mutex<EmbeddingEngine>, String> {
if !super::vector_enabled() {
return Err(
"Vector embeddings disabled by config [memory].vector_enabled = false".to_string(),
);
}
if super::embedding_api_configured() {
return Err("Local engine not used: [memory.embedding] API configured".to_string());
}
ENGINE.get_or_try_init(|| {
check_cpu_features()?;
silence_llama_logs();
let _fd_guard = crate::utils::fd_suppress::suppress_stdio();
let pull = pull_model(qmd::llm::DEFAULT_EMBED_MODEL_URI, false)
.map_err(|e| format!("Failed to pull embedding model: {e}"))?;
let engine = EmbeddingEngine::new(&pull.path)
.map_err(|e| format!("Failed to init embedding engine: {e}"))?;
tracing::info!(
"Embedding engine ready: {} ({:.1} MB)",
pull.model,
pull.size_bytes as f64 / 1_048_576.0
);
Ok(Mutex::new(engine))
})
}
fn check_cpu_features() -> Result<(), String> {
#[cfg(target_arch = "x86_64")]
{
if !std::arch::is_x86_feature_detected!("avx") {
return Err(
"CPU lacks AVX — llama.cpp GGUF inference requires AVX (Sandy Bridge 2011+). \
Memory search will use FTS-only."
.to_string(),
);
}
}
Ok(())
}
pub fn engine_if_ready() -> Option<&'static Mutex<EmbeddingEngine>> {
ENGINE.get()
}
const MAX_EMBED_BYTES: usize = 32_000;
pub fn embed_content(store: &Mutex<Store>, body: &str) -> Result<(), String> {
if !super::vector_enabled() {
return Ok(());
}
if body.is_empty() {
return Ok(());
}
if body.len() > MAX_EMBED_BYTES {
return Err(format!(
"Body too large for embedding ({} bytes, max {MAX_EMBED_BYTES})",
body.len()
));
}
let engine_mutex = engine_if_ready().ok_or("Embedding engine not initialized")?;
let title = Store::extract_title(body);
let hash = Store::hash_content(body);
let emb = {
let mut engine = engine_mutex
.lock()
.map_err(|e| format!("Engine lock poisoned: {e}"))?;
std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
engine.embed_document(body, Some(&title))
}))
.map_err(|_| "llama.cpp panicked during embedding".to_string())?
.map_err(|e| format!("Embedding failed: {e}"))?
};
let now = chrono::Local::now().format("%Y-%m-%dT%H:%M:%S").to_string();
store
.lock()
.map_err(|e| format!("Store lock poisoned: {e}"))?
.insert_embedding(&hash, 0, 0, &emb.embedding, &emb.model, &now)
.map_err(|e| format!("Failed to store embedding: {e}"))
}
pub(super) fn backfill_embeddings(store: &Mutex<Store>) {
if !super::vector_enabled() {
tracing::info!("Vector embeddings disabled — skipping backfill");
return;
}
let engine_mutex = match get_engine() {
Ok(e) => e,
Err(e) => {
tracing::warn!("Embedding engine unavailable, skipping backfill: {e}");
return;
}
};
let needing = match store.lock() {
Ok(s) => s.get_hashes_needing_embedding().unwrap_or_default(),
Err(_) => return,
};
if needing.is_empty() {
return;
}
let count = needing.len();
tracing::info!("Backfilling embeddings for {count} documents");
let now = chrono::Local::now().format("%Y-%m-%dT%H:%M:%S").to_string();
let mut stored = 0usize;
for (i, (hash, path, body)) in needing.iter().enumerate() {
tracing::info!(
"Embedding {}/{}: path={}, body_len={}, hash={}",
i + 1,
count,
path,
body.len(),
hash
);
if body.len() > MAX_EMBED_BYTES {
tracing::warn!(
"Skipping embedding for '{}' — body too large ({} bytes, max {}). \
Inserting zero-vector placeholder so it won't retry.",
path,
body.len(),
MAX_EMBED_BYTES
);
if let Ok(s) = store.lock() {
let _ = s.insert_embedding(hash, 0, 0, &[], "skipped-too-large", &now);
}
continue;
}
let title = Store::extract_title(body);
let emb = {
let mut engine = match engine_mutex.lock() {
Ok(e) => e,
Err(_) => return,
};
match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
engine.embed_document(body, Some(&title))
})) {
Ok(result) => result.ok(),
Err(_) => {
tracing::error!("llama.cpp panicked during backfill embed of '{path}'");
continue;
}
}
};
if let Some(emb) = emb
&& let Ok(s) = store.lock()
&& s.insert_embedding(hash, 0, 0, &emb.embedding, &emb.model, &now)
.is_ok()
{
stored += 1;
}
}
tracing::info!("Backfilled {stored}/{count} embeddings");
}
#[derive(Debug, serde::Deserialize)]
struct EmbeddingApiResponse {
data: Vec<EmbeddingData>,
}
#[derive(Debug, serde::Deserialize)]
struct EmbeddingData {
embedding: Vec<f32>,
}
pub async fn embed_via_api(text: &str) -> Result<Vec<f32>, String> {
let cfg = super::embedding_api_config().ok_or("No [memory.embedding] config")?;
let url = cfg.url.as_ref().ok_or("embedding.url not set")?;
let model = cfg.model.as_ref().ok_or("embedding.model not set")?;
let endpoint = if url.ends_with("/embeddings") {
url.clone()
} else if url.ends_with('/') {
format!("{}embeddings", url)
} else {
format!("{}/embeddings", url)
};
let mut body = serde_json::json!({
"model": model,
"input": text,
});
if let Some(dims) = cfg.dimensions {
body["dimensions"] = serde_json::json!(dims);
}
let client = reqwest::Client::new();
let mut request = client.post(&endpoint).json(&body);
if let Some(ref key) = cfg.api_key {
request = request.header("Authorization", format!("Bearer {key}"));
}
let resp = request
.send()
.await
.map_err(|e| format!("Embedding API request failed: {e}"))?;
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
return Err(format!("Embedding API error {status}: {body}"));
}
let api_resp: EmbeddingApiResponse = resp
.json()
.await
.map_err(|e| format!("Failed to decode embedding API response: {e}"))?;
api_resp
.data
.into_iter()
.next()
.map(|d| d.embedding)
.ok_or_else(|| "Embedding API returned no data".to_string())
}
pub async fn embed_content_api(store: &'static Mutex<Store>, body: &str) -> Result<(), String> {
if body.is_empty() {
return Ok(());
}
if body.len() > MAX_EMBED_BYTES {
return Err(format!(
"Body too large for embedding ({} bytes, max {MAX_EMBED_BYTES})",
body.len()
));
}
let embedding = embed_via_api(body).await?;
let _title = Store::extract_title(body);
let hash = Store::hash_content(body);
let model_name = super::embedding_api_config()
.and_then(|c| c.model)
.unwrap_or_else(|| "api-embedding".to_string());
let now = chrono::Local::now().format("%Y-%m-%dT%H:%M:%S").to_string();
store
.lock()
.map_err(|e| format!("Store lock poisoned: {e}"))?
.insert_embedding(&hash, 0, 0, &embedding, &model_name, &now)
.map_err(|e| format!("Failed to store API embedding: {e}"))
}
pub async fn embed_query_api(query: &str) -> Result<Vec<f32>, String> {
embed_via_api(query).await
}