use crate::errors::AppError;
use serde::Deserialize;
use std::process::Stdio;
use std::sync::Arc;
use tokio::io::AsyncWriteExt;
use tokio::process::Command;
const DEFAULT_EMBED_TIMEOUT_SECS: u64 = 300;
fn embed_timeout() -> std::time::Duration {
let secs = std::env::var("SQLITE_GRAPHRAG_EMBED_TIMEOUT_SECS")
.ok()
.and_then(|v| v.parse::<u64>().ok())
.filter(|&n| (10..=3_600).contains(&n))
.unwrap_or(DEFAULT_EMBED_TIMEOUT_SECS);
std::time::Duration::from_secs(secs)
}
#[cfg(test)]
fn embed_timeout_for_batch(batch_size: usize) -> std::time::Duration {
let base = embed_timeout();
let extra = std::time::Duration::from_secs(15) * batch_size.saturating_sub(1) as u32;
base + extra
}
fn extract_exit_info(status: &std::process::ExitStatus) -> (Option<i32>, Option<i32>) {
#[cfg(unix)]
{
use std::os::unix::process::ExitStatusExt;
(None, status.signal())
}
#[cfg(not(unix))]
{
let _ = status;
(None, None)
}
}
fn build_single_schema(dim: usize) -> String {
format!(
r#"{{"type":"object","properties":{{"embedding":{{"type":"array","items":{{"type":"number"}},"minItems":{dim},"maxItems":{dim}}}}},"required":["embedding"],"additionalProperties":false}}"#
)
}
fn build_batch_schema(dim: usize) -> String {
format!(
r#"{{"type":"object","properties":{{"items":{{"type":"array","items":{{"type":"object","properties":{{"i":{{"type":"integer"}},"v":{{"type":"array","items":{{"type":"number"}},"minItems":{dim},"maxItems":{dim}}}}},"required":["i","v"],"additionalProperties":false}}}}}},"required":["items"],"additionalProperties":false}}"#
)
}
#[derive(Clone, Debug)]
pub struct LlmEmbedding {
flavour: EmbeddingFlavour,
binary: std::path::PathBuf,
model: String,
codex_schemas: Arc<parking_lot::Mutex<CodexSchemaFiles>>,
timeout_override: Option<std::time::Duration>,
}
#[derive(Debug, Default)]
struct CodexSchemaFiles {
single: Option<(usize, Arc<tempfile::NamedTempFile>)>,
batch: Option<(usize, Arc<tempfile::NamedTempFile>)>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize)]
pub enum EmbeddingFlavour {
Claude,
Codex,
Opencode,
}
#[derive(Clone, Debug)]
pub struct LlmEmbeddingBuilder {
flavour: EmbeddingFlavour,
binary_override: Option<std::path::PathBuf>,
model_override: Option<String>,
timeout_override: Option<std::time::Duration>,
}
impl LlmEmbeddingBuilder {
pub fn claude_default() -> Self {
Self {
flavour: EmbeddingFlavour::Claude,
binary_override: None,
model_override: None,
timeout_override: None,
}
}
pub fn codex_default() -> Self {
Self {
flavour: EmbeddingFlavour::Codex,
binary_override: None,
model_override: None,
timeout_override: None,
}
}
pub fn opencode_default() -> Self {
Self {
flavour: EmbeddingFlavour::Opencode,
binary_override: None,
model_override: None,
timeout_override: None,
}
}
pub fn override_binary(mut self, binary: std::path::PathBuf) -> Self {
self.binary_override = Some(binary);
self
}
pub fn override_model(mut self, model: String) -> Self {
self.model_override = Some(model);
self
}
pub fn override_timeout(mut self, secs: u64) -> Self {
let clamped = secs.clamp(10, 3_600);
self.timeout_override = Some(std::time::Duration::from_secs(clamped));
self
}
pub fn build(self) -> Result<LlmEmbedding, AppError> {
LlmEmbedding::oauth_only_enforce()?;
let binary = match self.binary_override {
Some(path) => resolve_real_binary(&path),
None => {
let (env_var, which_name) = match self.flavour {
EmbeddingFlavour::Codex => ("SQLITE_GRAPHRAG_CODEX_BINARY", "codex"),
EmbeddingFlavour::Claude => ("SQLITE_GRAPHRAG_CLAUDE_BINARY", "claude"),
EmbeddingFlavour::Opencode => ("SQLITE_GRAPHRAG_OPENCODE_BINARY", "opencode"),
};
let path = std::env::var_os(env_var)
.map(std::path::PathBuf::from)
.or_else(|| which::which(which_name).ok())
.ok_or_else(|| {
AppError::Embedding(format!("`{which_name}` not found on PATH"))
})?;
resolve_real_binary(&path)
}
};
let model = match self.model_override {
Some(m) => m,
None => match self.flavour {
EmbeddingFlavour::Codex => codex_embed_model(),
EmbeddingFlavour::Claude => claude_embed_model(),
EmbeddingFlavour::Opencode => opencode_embed_model(),
},
};
Ok(LlmEmbedding {
flavour: self.flavour,
binary,
model,
codex_schemas: Arc::new(parking_lot::Mutex::new(CodexSchemaFiles::default())),
timeout_override: self.timeout_override,
})
}
}
impl EmbeddingFlavour {
pub fn as_str(self) -> &'static str {
match self {
Self::Claude => "claude",
Self::Codex => "codex",
Self::Opencode => "opencode",
}
}
}
#[derive(Debug, Deserialize)]
struct EmbeddingResponse {
embedding: Vec<f32>,
}
#[derive(Debug, Deserialize)]
struct BatchEmbeddingResponse {
items: Vec<BatchEmbeddingItem>,
}
#[derive(Debug, Deserialize)]
struct BatchEmbeddingItem {
i: usize,
v: Vec<f32>,
}
pub fn resolve_real_binary(path: &std::path::Path) -> std::path::PathBuf {
if let Ok(canonical) = std::fs::canonicalize(path) {
if is_elf_binary(&canonical) {
return canonical;
}
if let Some(exec_target) = extract_exec_target_from_shim(&canonical) {
if exec_target.exists() && is_elf_binary(&exec_target) {
return exec_target;
}
}
return canonical;
}
path.to_path_buf()
}
fn is_elf_binary(path: &std::path::Path) -> bool {
std::fs::read(path)
.map(|bytes| bytes.len() >= 4 && bytes[..4] == [0x7f, b'E', b'L', b'F'])
.unwrap_or(false)
}
fn extract_exec_target_from_shim(path: &std::path::Path) -> Option<std::path::PathBuf> {
let content = std::fs::read_to_string(path).ok()?;
if !content.starts_with("#!") {
return None;
}
for line in content.lines().rev() {
let trimmed = line.trim();
if trimmed.starts_with("exec ") {
let after_exec = trimmed.strip_prefix("exec ")?;
let binary = after_exec.split_whitespace().next()?;
return Some(std::path::PathBuf::from(binary));
}
}
None
}
fn claude_embed_model() -> String {
std::env::var("SQLITE_GRAPHRAG_CLAUDE_EMBED_MODEL")
.or_else(|_| std::env::var("SQLITE_GRAPHRAG_LLM_MODEL"))
.unwrap_or_else(|_| {
tracing::info!(
target: "llm_embedding",
"no model specified; defaulting to claude-sonnet-4-6"
);
"claude-sonnet-4-6".to_string()
})
}
fn codex_embed_model() -> String {
std::env::var("SQLITE_GRAPHRAG_CODEX_EMBED_MODEL")
.or_else(|_| std::env::var("SQLITE_GRAPHRAG_LLM_MODEL"))
.unwrap_or_else(|_| {
tracing::info!(
target: "llm_embedding",
"no model specified; defaulting to gpt-5.5"
);
"gpt-5.5".to_string()
})
}
fn opencode_embed_model() -> String {
std::env::var("SQLITE_GRAPHRAG_OPENCODE_EMBED_MODEL")
.or_else(|_| std::env::var("SQLITE_GRAPHRAG_OPENCODE_MODEL"))
.unwrap_or_else(|_| {
tracing::info!(
target: "llm_embedding",
"no model specified; defaulting to opencode/big-pickle"
);
"opencode/big-pickle".to_string()
})
}
impl LlmEmbedding {
pub fn detect_available() -> Result<Self, AppError> {
Self::oauth_only_enforce()?;
let codex_path = std::env::var_os("SQLITE_GRAPHRAG_CODEX_BINARY")
.map(std::path::PathBuf::from)
.or_else(|| which::which("codex").ok());
if let Some(path) = codex_path {
return Ok(Self {
flavour: EmbeddingFlavour::Codex,
binary: resolve_real_binary(&path),
model: codex_embed_model(),
codex_schemas: Arc::new(parking_lot::Mutex::new(CodexSchemaFiles::default())),
timeout_override: None,
});
}
let claude_path = std::env::var_os("SQLITE_GRAPHRAG_CLAUDE_BINARY")
.map(std::path::PathBuf::from)
.or_else(|| which::which("claude").ok());
if let Some(path) = claude_path {
return Ok(Self {
flavour: EmbeddingFlavour::Claude,
binary: resolve_real_binary(&path),
model: claude_embed_model(),
codex_schemas: Arc::new(parking_lot::Mutex::new(CodexSchemaFiles::default())),
timeout_override: None,
});
}
let opencode_path = std::env::var_os("SQLITE_GRAPHRAG_OPENCODE_BINARY")
.map(std::path::PathBuf::from)
.or_else(|| which::which("opencode").ok());
if let Some(path) = opencode_path {
return Ok(Self {
flavour: EmbeddingFlavour::Opencode,
binary: resolve_real_binary(&path),
model: opencode_embed_model(),
codex_schemas: Arc::new(parking_lot::Mutex::new(CodexSchemaFiles::default())),
timeout_override: None,
});
}
Err(AppError::Embedding(
"no LLM CLI found on PATH: install `codex` (0.130+), `claude` (Claude Code 2.1+), or `opencode` (1.17+)"
.to_string(),
))
}
fn instance_embed_timeout(&self) -> std::time::Duration {
if let Some(d) = self.timeout_override {
return d;
}
embed_timeout()
}
fn instance_embed_timeout_for_batch(&self, batch_size: usize) -> std::time::Duration {
let base = self.instance_embed_timeout();
let extra = std::time::Duration::from_secs(15) * batch_size.saturating_sub(1) as u32;
base + extra
}
pub fn with_codex() -> Result<Self, AppError> {
Self::with_codex_builder().build()
}
pub fn with_claude() -> Result<Self, AppError> {
Self::with_claude_builder().build()
}
pub fn with_codex_builder() -> LlmEmbeddingBuilder {
LlmEmbeddingBuilder {
flavour: EmbeddingFlavour::Codex,
binary_override: None,
model_override: None,
timeout_override: None,
}
}
pub fn with_claude_builder() -> LlmEmbeddingBuilder {
LlmEmbeddingBuilder {
flavour: EmbeddingFlavour::Claude,
binary_override: None,
model_override: None,
timeout_override: None,
}
}
pub fn with_opencode() -> Result<Self, AppError> {
Self::with_opencode_builder().build()
}
pub fn with_opencode_builder() -> LlmEmbeddingBuilder {
LlmEmbeddingBuilder {
flavour: EmbeddingFlavour::Opencode,
binary_override: None,
model_override: None,
timeout_override: None,
}
}
fn oauth_only_enforce() -> Result<(), AppError> {
if std::env::var("ANTHROPIC_API_KEY").is_ok() {
return Err(AppError::Validation(
"ANTHROPIC_API_KEY is set; v1.0.76 requires OAuth. \
unset it and use `claude login` instead."
.into(),
));
}
if std::env::var("OPENAI_API_KEY").is_ok() {
return Err(AppError::Validation(
"OPENAI_API_KEY is set; v1.0.76 requires OAuth. \
unset it and use `codex login` instead."
.into(),
));
}
Ok(())
}
pub fn embed_passage(&self, text: &str) -> Result<Vec<f32>, AppError> {
self.invoke_with_prefix(crate::constants::PASSAGE_PREFIX, text)
}
pub fn embed_query(&self, text: &str) -> Result<Vec<f32>, AppError> {
self.invoke_with_prefix(crate::constants::QUERY_PREFIX, text)
}
pub fn model_label(&self) -> String {
format!("{}:{}", self.flavour.as_str(), self.model)
}
pub fn flavour(&self) -> EmbeddingFlavour {
self.flavour
}
pub async fn embed_batch_async(
&self,
prefix: &str,
batch: &[(usize, String)],
) -> Result<Vec<(usize, Vec<f32>)>, AppError> {
let dim = crate::constants::embedding_dim();
if batch.is_empty() {
return Ok(Vec::new());
}
if batch.len() == 1 {
let (idx, text) = (&batch[0].0, &batch[0].1);
let v = self.invoke_single_async(prefix, text, dim).await?;
return Ok(vec![(*idx, v)]);
}
let mut prompt = format!(
"Generate {dim}-dimensional semantic embedding vectors for each numbered text below.\n\
Return a JSON object with an \"items\" array containing EXACTLY {n} items.\n\
Each item has \"i\" (the 1-based index) and \"v\" (the {dim}-float vector, values between -1 and 1).\n\n",
n = batch.len()
);
for (pos, (_, text)) in batch.iter().enumerate() {
prompt.push_str(&format!("{}: {prefix}{text}\n", pos + 1));
}
let _batch_timeout = self.instance_embed_timeout_for_batch(batch.len());
let stdout = match self.flavour {
EmbeddingFlavour::Claude => {
self.invoke_claude(&prompt, &build_batch_schema(dim))
.await?
}
EmbeddingFlavour::Codex => {
let schema = self.codex_schema_file(dim, true)?;
self.invoke_codex(&prompt, schema.path()).await?
}
EmbeddingFlavour::Opencode => {
let opencode_prompt = format!(
"You are a batch embedding function. For each numbered text item below, \
generate an array of exactly {dim} floating-point numbers between -1 and 1 \
representing its semantic meaning. Output ONLY a JSON object with key \"items\" \
containing an array of objects, each with \"i\" (the 1-based index) and \
\"v\" (the {dim}-element float array). No markdown, no explanation.\n\n\
{prompt}"
);
self.invoke_opencode(&opencode_prompt).await?
}
};
let parsed: BatchEmbeddingResponse = parse_llm_json(&stdout).map_err(|e| {
AppError::Embedding(format!(
"LLM batch embedding response parse failed: {e}; raw={stdout}"
))
})?;
if parsed.items.len() != batch.len() {
return Err(AppError::Embedding(format!(
"LLM batch returned {} items, expected {} (G42/S2 coverage check)",
parsed.items.len(),
batch.len()
)));
}
let mut out: Vec<Option<Vec<f32>>> = vec![None; batch.len()];
for item in parsed.items {
if item.i == 0 || item.i > batch.len() {
return Err(AppError::Embedding(format!(
"LLM batch item index {} out of range 1..={}",
item.i,
batch.len()
)));
}
if item.v.len() != dim {
return Err(AppError::Embedding(format!(
"LLM batch item {} returned {} dims, expected {dim}; \
refusing to truncate or pad silently (G42/C5)",
item.i,
item.v.len()
)));
}
out[item.i - 1] = Some(item.v);
}
let mut result = Vec::with_capacity(batch.len());
for (pos, slot) in out.into_iter().enumerate() {
let v = slot.ok_or_else(|| {
AppError::Embedding(format!(
"LLM batch response is missing item index {} (G42/S2 coverage check)",
pos + 1
))
})?;
result.push((batch[pos].0, v));
}
Ok(result)
}
fn invoke_with_prefix(&self, prefix: &str, text: &str) -> Result<Vec<f32>, AppError> {
let dim = crate::constants::embedding_dim();
let inner = self.invoke_single_async(prefix, text, dim);
match tokio::runtime::Handle::try_current() {
Ok(handle) => tokio::task::block_in_place(|| handle.block_on(inner)),
Err(_) => crate::embedder::shared_runtime()?.block_on(inner),
}
}
async fn invoke_single_async(
&self,
prefix: &str,
text: &str,
dim: usize,
) -> Result<Vec<f32>, AppError> {
let prompt = format!("{prefix}{text}");
let stdout = match self.flavour {
EmbeddingFlavour::Claude => {
self.invoke_claude(&prompt, &build_single_schema(dim))
.await?
}
EmbeddingFlavour::Codex => {
let schema = self.codex_schema_file(dim, false)?;
self.invoke_codex(&prompt, schema.path()).await?
}
EmbeddingFlavour::Opencode => {
let opencode_prompt = format!(
"You are an embedding function. Given the input text, output a JSON object \
with a single key \"embedding\" containing an array of exactly {dim} \
floating-point numbers between -1 and 1 that represent the semantic meaning \
of the text. Output ONLY the JSON object, nothing else.\n\n\
Input text: \"{prompt}\""
);
self.invoke_opencode(&opencode_prompt).await?
}
};
let parsed: EmbeddingResponse = parse_llm_json(&stdout).map_err(|e| {
AppError::Embedding(format!(
"LLM embedding response parse failed: {e}; raw={stdout}"
))
})?;
if parsed.embedding.len() != dim {
return Err(AppError::Embedding(format!(
"LLM returned {} dims, expected {dim}; \
refusing to truncate or pad silently (G42/C5)",
parsed.embedding.len()
)));
}
Ok(parsed.embedding)
}
fn codex_schema_file(
&self,
dim: usize,
batch: bool,
) -> Result<Arc<tempfile::NamedTempFile>, AppError> {
let mut guard = self.codex_schemas.lock();
let slot = if batch {
&mut guard.batch
} else {
&mut guard.single
};
if let Some((cached_dim, file)) = slot {
if *cached_dim == dim {
return Ok(Arc::clone(file));
}
}
let content = if batch {
build_batch_schema(dim)
} else {
build_single_schema(dim)
};
let file = tempfile::Builder::new()
.prefix("sqlite-graphrag-embed-schema-")
.suffix(".json")
.tempfile()
.map_err(|e| AppError::Embedding(format!("schema tempfile create failed: {e}")))?;
std::fs::write(file.path(), content)
.map_err(|e| AppError::Embedding(format!("schema tempfile write failed: {e}")))?;
let file = Arc::new(file);
*slot = Some((dim, Arc::clone(&file)));
Ok(file)
}
async fn invoke_claude(&self, prompt: &str, schema: &str) -> Result<String, AppError> {
let spawn_dir = crate::spawn::spawn_isolation_dir()?;
let mcp_config_path = crate::spawn::preflight::write_empty_mcp_config_tempfile()?;
let argv_refs: [std::ffi::OsString; 0] = [];
let preflight_args = crate::spawn::preflight::PreFlightArgs {
binary_path: &self.binary,
argv: &argv_refs,
workspace_root: &spawn_dir,
mcp_config_inline_json: None,
expected_output_bytes: 65_536,
spawner_name: "llm_embedding",
};
crate::spawn::preflight::preflight_check(&preflight_args)?;
let mut cmd = Command::new(&self.binary);
cmd.arg("-p")
.arg(prompt)
.arg("--model")
.arg(&self.model)
.arg("--json-schema")
.arg(schema)
.arg("--output-format")
.arg("json")
.arg("--strict-mcp-config")
.arg("--mcp-config")
.arg(mcp_config_path.as_os_str())
.arg("--settings")
.arg(r#"{"hooks":{}}"#)
.arg("--dangerously-skip-permissions")
.env_clear()
.env("PATH", std::env::var("PATH").unwrap_or_default())
.env("HOME", std::env::var("HOME").unwrap_or_default())
.stdin(Stdio::null())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.kill_on_drop(true);
cmd.current_dir(&spawn_dir);
cmd.env("CLAUDE_CONFIG_DIR", &spawn_dir);
if let Some(config_dir) = claude_embedding_config_dir() {
cmd.env("CLAUDE_CONFIG_DIR", &config_dir);
}
let binary_str = self.binary.to_string_lossy().into_owned();
let output = match tokio::time::timeout(self.instance_embed_timeout(), cmd.output()).await {
Err(_elapsed) => {
return Err(crate::llm::exit_code_hints::into_legacy_embedding(
&crate::llm::exit_code_hints::LlmBackendError::Timeout {
secs: self.instance_embed_timeout().as_secs(),
binary: binary_str.clone(),
},
));
}
Ok(Err(e)) => {
return Err(crate::llm::exit_code_hints::into_legacy_embedding(
&crate::llm::exit_code_hints::LlmBackendError::SpawnFailed {
binary: binary_str.clone(),
source: e.to_string(),
},
));
}
Ok(Ok(o)) => o,
};
let stdout_str = String::from_utf8_lossy(&output.stdout);
if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(&stdout_str) {
let is_rate_limited = parsed
.get("is_error")
.and_then(|v| v.as_bool())
.unwrap_or(false)
&& parsed
.get("result")
.and_then(|v| v.as_str())
.map(|s| {
s.contains("rate limit")
|| s.contains("quota")
|| s.contains("anthropic-ratelimit")
})
.unwrap_or(false);
if is_rate_limited {
return Err(AppError::Embedding(format!(
"OAuth usage quota exhausted: claude rate_limit detected in stdout: {}",
parsed
.get("result")
.and_then(|v| v.as_str())
.unwrap_or("")
.chars()
.take(120)
.collect::<String>()
)));
}
}
if !output.status.success() {
let (exit_code, signal) = if let Some(code) = output.status.code() {
(Some(code), None)
} else {
extract_exit_info(&output.status)
};
let stdout_tail = crate::llm::exit_code_hints::LlmBackendError::truncate_tail(
&output.stdout,
crate::llm::exit_code_hints::DIAG_TAIL_BYTES,
);
let stderr_tail = crate::llm::exit_code_hints::LlmBackendError::truncate_tail(
&output.stderr,
crate::llm::exit_code_hints::DIAG_TAIL_BYTES,
);
let mut hint = crate::llm::exit_code_hints::diagnose_exit_code(exit_code, signal);
if stderr_tail.contains("401")
|| stderr_tail.contains("Unauthorized")
|| stderr_tail.contains("expired")
|| stderr_tail.contains("login")
|| stdout_tail.contains("401")
|| stdout_tail.contains("Unauthorized")
{
hint.push_str(" | Claude OAuth token may be expired; run `claude login` to renew");
}
return Err(crate::llm::exit_code_hints::into_legacy_embedding(
&crate::llm::exit_code_hints::LlmBackendError::NonZeroExit {
exit_code,
signal,
stdout_tail,
stderr_tail,
binary: binary_str,
hint,
},
));
}
Ok(String::from_utf8_lossy(&output.stdout).into_owned())
}
async fn invoke_codex(
&self,
prompt: &str,
schema_path: &std::path::Path,
) -> Result<String, AppError> {
let binary_str = self.binary.to_string_lossy().into_owned();
let mut cmd = build_codex_embedding_command(&self.binary, &self.model, schema_path)?;
let argv_refs: [std::ffi::OsString; 0] = [];
let preflight_args = crate::spawn::preflight::PreFlightArgs {
binary_path: &self.binary,
argv: &argv_refs,
workspace_root: std::path::Path::new("."),
mcp_config_inline_json: None,
expected_output_bytes: 65_536,
spawner_name: "llm_embedding",
};
crate::spawn::preflight::preflight_check(&preflight_args)?;
let _ = binary_str;
let mut child = match cmd.spawn() {
Ok(c) => c,
Err(e) => {
return Err(crate::llm::exit_code_hints::into_legacy_embedding(
&crate::llm::exit_code_hints::LlmBackendError::SpawnFailed {
binary: binary_str,
source: e.to_string(),
},
));
}
};
if let Some(mut stdin) = child.stdin.take() {
stdin
.write_all(prompt.as_bytes())
.await
.map_err(|e| AppError::Embedding(format!("codex stdin write failed: {e}")))?;
drop(stdin);
}
let output =
match tokio::time::timeout(self.instance_embed_timeout(), child.wait_with_output())
.await
{
Err(_elapsed) => {
return Err(crate::llm::exit_code_hints::into_legacy_embedding(
&crate::llm::exit_code_hints::LlmBackendError::Timeout {
secs: self.instance_embed_timeout().as_secs(),
binary: binary_str,
},
));
}
Ok(Err(e)) => {
return Err(crate::llm::exit_code_hints::into_legacy_embedding(
&crate::llm::exit_code_hints::LlmBackendError::SpawnFailed {
binary: binary_str,
source: format!("codex wait failed: {e}"),
},
));
}
Ok(Ok(o)) => o,
};
if !output.status.success() {
let (exit_code, signal) = if let Some(code) = output.status.code() {
(Some(code), None)
} else {
extract_exit_info(&output.status)
};
let stdout_tail = crate::llm::exit_code_hints::LlmBackendError::truncate_tail(
&output.stdout,
crate::llm::exit_code_hints::DIAG_TAIL_BYTES,
);
let stderr_tail = crate::llm::exit_code_hints::LlmBackendError::truncate_tail(
&output.stderr,
crate::llm::exit_code_hints::DIAG_TAIL_BYTES,
);
let hint = crate::llm::exit_code_hints::diagnose_exit_code(exit_code, signal);
let mut combined_hint = hint;
if stderr_tail.contains("request_user_input") {
combined_hint.push_str(
" | codex requested interactive input in a headless embedding call; \
upgrade codex (>= 0.134) or switch the embedding backend to claude",
);
}
return Err(crate::llm::exit_code_hints::into_legacy_embedding(
&crate::llm::exit_code_hints::LlmBackendError::NonZeroExit {
exit_code,
signal,
stdout_tail,
stderr_tail,
binary: binary_str,
hint: combined_hint,
},
));
}
Ok(String::from_utf8_lossy(&output.stdout).into_owned())
}
async fn invoke_opencode(&self, prompt: &str) -> Result<String, AppError> {
let binary_str = self.binary.to_string_lossy().into_owned();
let spawn_dir = crate::spawn::spawn_isolation_dir()?;
let mut cmd = Command::new(&self.binary);
cmd.current_dir(&spawn_dir);
cmd.arg("run")
.arg("--format")
.arg("json")
.arg("-m")
.arg(&self.model)
.arg("--dangerously-skip-permissions")
.arg(prompt)
.env_clear()
.env("PATH", std::env::var("PATH").unwrap_or_default())
.env("HOME", std::env::var("HOME").unwrap_or_default())
.stdin(Stdio::null())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.kill_on_drop(true);
crate::commands::opencode_runner::propagate_opencode_env(&mut cmd);
let output = match tokio::time::timeout(self.instance_embed_timeout(), cmd.output()).await {
Err(_elapsed) => {
return Err(crate::llm::exit_code_hints::into_legacy_embedding(
&crate::llm::exit_code_hints::LlmBackendError::Timeout {
secs: self.instance_embed_timeout().as_secs(),
binary: binary_str.clone(),
},
));
}
Ok(Err(e)) => {
return Err(crate::llm::exit_code_hints::into_legacy_embedding(
&crate::llm::exit_code_hints::LlmBackendError::SpawnFailed {
binary: binary_str.clone(),
source: e.to_string(),
},
));
}
Ok(Ok(o)) => o,
};
if !output.status.success() {
let (exit_code, signal) = if let Some(code) = output.status.code() {
(Some(code), None)
} else {
extract_exit_info(&output.status)
};
let stdout_tail = crate::llm::exit_code_hints::LlmBackendError::truncate_tail(
&output.stdout,
crate::llm::exit_code_hints::DIAG_TAIL_BYTES,
);
let stderr_tail = crate::llm::exit_code_hints::LlmBackendError::truncate_tail(
&output.stderr,
crate::llm::exit_code_hints::DIAG_TAIL_BYTES,
);
let hint = crate::llm::exit_code_hints::diagnose_exit_code(exit_code, signal);
return Err(crate::llm::exit_code_hints::into_legacy_embedding(
&crate::llm::exit_code_hints::LlmBackendError::NonZeroExit {
exit_code,
signal,
stdout_tail,
stderr_tail,
binary: binary_str,
hint,
},
));
}
Ok(String::from_utf8_lossy(&output.stdout).into_owned())
}
}
fn claude_embedding_config_dir() -> Option<std::path::PathBuf> {
if let Ok(dir) = std::env::var("SQLITE_GRAPHRAG_CLAUDE_EMPTY_CONFIG_DIR") {
let path = std::path::PathBuf::from(dir);
if path.is_dir() {
return Some(path);
}
tracing::warn!(
target: "embedding",
path = %path.display(),
"SQLITE_GRAPHRAG_CLAUDE_EMPTY_CONFIG_DIR is set but not a directory; \
falling back to the managed empty config dir"
);
}
let home = std::env::var("HOME").ok()?;
let dir = std::path::Path::new(&home)
.join(".local/state/sqlite-graphrag")
.join("claude-empty-config");
if std::fs::create_dir_all(&dir).is_err() {
return None;
}
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
let _ = std::fs::set_permissions(&dir, std::fs::Permissions::from_mode(0o700));
}
let creds = std::path::Path::new(&home).join(".claude/.credentials.json");
if creds.exists() {
let target = dir.join(".credentials.json");
let _ = std::fs::copy(&creds, &target);
}
Some(dir)
}
fn build_codex_embedding_command(
binary: &std::path::Path,
model: &str,
schema_path: &std::path::Path,
) -> Result<Command, AppError> {
let spawn_dir = crate::spawn::spawn_isolation_dir()?;
let mut cmd = Command::new(binary);
cmd.current_dir(&spawn_dir);
cmd.arg("exec")
.arg("-c")
.arg("sandbox_mode='read-only'")
.arg("-c")
.arg("approval_policy='never'")
.arg("--json")
.arg("--output-schema")
.arg(schema_path)
.arg("--ephemeral")
.arg("--skip-git-repo-check")
.arg("--sandbox")
.arg("read-only")
.arg("--ignore-user-config")
.arg("--ignore-rules");
if crate::extract::codex_compat::codex_supports_ask_for_approval() {
cmd.arg("--ask-for-approval").arg("never");
}
cmd.arg("--model")
.arg(model)
.arg("-")
.env_clear()
.env("PATH", std::env::var("PATH").unwrap_or_default())
.env("HOME", std::env::var("HOME").unwrap_or_default());
if let Ok(codex_home) = std::env::var("CODEX_HOME") {
cmd.env("CODEX_HOME", codex_home);
} else if let Ok(home) = std::env::var("HOME") {
let default_home = std::path::Path::new(&home).join(".codex");
if default_home.exists() {
cmd.env("CODEX_HOME", &default_home);
}
}
cmd.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.kill_on_drop(true);
Ok(cmd)
}
fn parse_llm_json<T: serde::de::DeserializeOwned>(stdout: &str) -> Result<T, String> {
if let Ok(parsed) = serde_json::from_str::<T>(stdout) {
return Ok(parsed);
}
let mut opencode_texts: Vec<String> = Vec::new();
for line in stdout.lines() {
let line = line.trim();
if line.is_empty() {
continue;
}
let Ok(event) = serde_json::from_str::<serde_json::Value>(line) else {
continue;
};
if event.get("type").and_then(|t| t.as_str()) == Some("text") {
if let Some(text) = event
.get("part")
.and_then(|p| p.get("text"))
.and_then(|t| t.as_str())
{
opencode_texts.push(text.to_string());
}
}
}
if !opencode_texts.is_empty() {
let combined = opencode_texts.concat();
if let Ok(parsed) = serde_json::from_str::<T>(&combined) {
return Ok(parsed);
}
}
let mut last_agent_text: Option<String> = None;
for line in stdout.lines() {
let line = line.trim();
if line.is_empty() {
continue;
}
let Ok(event) = serde_json::from_str::<serde_json::Value>(line) else {
continue;
};
if event.get("type").and_then(|t| t.as_str()) != Some("item.completed") {
continue;
}
let item = match event.get("item") {
Some(i) => i,
None => continue,
};
if item.get("type").and_then(|t| t.as_str()) != Some("agent_message") {
continue;
}
if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
last_agent_text = Some(text.to_string());
}
}
let text = last_agent_text
.ok_or_else(|| "no agent_message found in codex JSONL output".to_string())?;
serde_json::from_str::<T>(&text)
.map_err(|e| format!("codex agent_message text does not match schema: {e}; raw={text}"))
}
#[cfg(test)]
mod tests {
use super::*;
fn test_client(flavour: EmbeddingFlavour, binary: std::path::PathBuf) -> LlmEmbedding {
LlmEmbedding {
flavour,
binary,
model: "gpt-5.4".to_string(),
codex_schemas: Arc::new(parking_lot::Mutex::new(CodexSchemaFiles::default())),
timeout_override: None,
}
}
#[test]
fn embed_timeout_default_is_300() {
assert_eq!(DEFAULT_EMBED_TIMEOUT_SECS, 300);
}
#[test]
#[serial_test::serial(env)]
fn oauth_only_enforce_blocks_api_keys() {
unsafe {
std::env::set_var("ANTHROPIC_API_KEY", "test");
assert!(LlmEmbedding::oauth_only_enforce().is_err());
std::env::remove_var("ANTHROPIC_API_KEY");
std::env::set_var("OPENAI_API_KEY", "test");
assert!(LlmEmbedding::oauth_only_enforce().is_err());
std::env::remove_var("OPENAI_API_KEY");
}
assert!(LlmEmbedding::oauth_only_enforce().is_ok());
}
#[test]
fn flavour_as_str_is_stable() {
assert_eq!(EmbeddingFlavour::Claude.as_str(), "claude");
assert_eq!(EmbeddingFlavour::Codex.as_str(), "codex");
}
#[test]
fn single_schema_embeds_active_dim() {
let schema = build_single_schema(64);
assert!(schema.contains(r#""minItems":64"#));
assert!(schema.contains(r#""maxItems":64"#));
let parsed: serde_json::Value =
serde_json::from_str(&schema).expect("single schema must be valid JSON");
assert_eq!(parsed["properties"]["embedding"]["minItems"], 64);
}
#[test]
fn batch_schema_is_valid_json_and_unbounded_items() {
let schema = build_batch_schema(64);
let parsed: serde_json::Value =
serde_json::from_str(&schema).expect("batch schema must be valid JSON");
assert!(parsed["properties"]["items"].get("minItems").is_none());
assert_eq!(
parsed["properties"]["items"]["items"]["properties"]["v"]["minItems"],
64
);
}
#[test]
fn parse_llm_json_accepts_claude_json() {
let stdout = r#"{"embedding":[0.0,1.0,2.0]}"#;
let parsed: EmbeddingResponse = parse_llm_json(stdout).expect("claude JSON must parse");
assert_eq!(parsed.embedding, vec![0.0, 1.0, 2.0]);
}
#[test]
fn parse_llm_json_accepts_codex_jsonl() {
let stdout = r#"{"type":"thread.started","thread_id":"mock-thread-0"}
{"type":"item.completed","item":{"type":"agent_message","text":"{\"embedding\":[0.0,1.0,2.0]}"}}
{"type":"turn.completed","usage":{"input_tokens":1,"output_tokens":1}}"#;
let parsed: EmbeddingResponse = parse_llm_json(stdout).expect("codex JSONL must parse");
assert_eq!(parsed.embedding, vec![0.0, 1.0, 2.0]);
}
#[test]
fn parse_llm_json_rejects_jsonl_without_agent_message() {
let stdout = r#"{"type":"thread.started","thread_id":"mock-thread-0"}"#;
let err = parse_llm_json::<EmbeddingResponse>(stdout)
.expect_err("missing agent_message must fail");
assert!(err.contains("no agent_message"));
}
#[test]
fn parse_llm_json_accepts_batch_response() {
let stdout = r#"{"items":[{"i":1,"v":[0.0,1.0]},{"i":2,"v":[2.0,3.0]}]}"#;
let parsed: BatchEmbeddingResponse = parse_llm_json(stdout).expect("batch JSON must parse");
assert_eq!(parsed.items.len(), 2);
assert_eq!(parsed.items[0].i, 1);
assert_eq!(parsed.items[1].v, vec![2.0, 3.0]);
}
#[test]
fn codex_schema_file_is_created_once_and_reused() {
let client = test_client(
EmbeddingFlavour::Codex,
std::path::PathBuf::from("/bin/true"),
);
let first = client
.codex_schema_file(64, false)
.expect("schema file must be created");
let second = client
.codex_schema_file(64, false)
.expect("schema file must be reused");
assert_eq!(first.path(), second.path(), "same dim must reuse the file");
let batch = client
.codex_schema_file(64, true)
.expect("batch schema file must be created");
assert_ne!(
first.path(),
batch.path(),
"single and batch schemas are distinct files"
);
let content = std::fs::read_to_string(first.path()).expect("schema file must be readable");
assert!(content.contains(r#""minItems":64"#));
}
#[test]
fn codex_embedding_command_reads_prompt_from_stdin() {
let schema_path = std::env::temp_dir().join("sqlite-graphrag-embed-schema-test.json");
let cmd = build_codex_embedding_command(
std::path::Path::new("/bin/true"),
"gpt-5.4",
&schema_path,
)
.expect("build_codex_embedding_command must succeed in test");
let argv: Vec<String> = cmd
.as_std()
.get_args()
.filter_map(|arg| arg.to_str().map(|s| s.to_string()))
.collect();
assert!(
argv.iter().any(|arg| arg == "-"),
"codex embedding command must read prompt from stdin: {argv:?}"
);
assert!(
!argv.iter().any(|arg| arg.starts_with("passage: ")),
"prompt text must not be passed as argv: {argv:?}"
);
for required in &[
"exec",
"-c",
"sandbox_mode='read-only'",
"approval_policy='never'",
"--json",
"--output-schema",
"--ephemeral",
"--skip-git-repo-check",
"--sandbox",
"read-only",
"--ignore-user-config",
"--ignore-rules",
"--model",
"gpt-5.4",
] {
assert!(
argv.iter().any(|arg| arg == required),
"missing flag {required} in {argv:?}"
);
}
}
#[cfg(unix)]
#[test]
#[serial_test::serial(env)]
fn embed_passage_sends_prompt_to_codex_stdin() {
use std::os::unix::fs::PermissionsExt;
unsafe {
std::env::set_var("SQLITE_GRAPHRAG_EMBEDDING_DIM", "64");
}
let temp = tempfile::tempdir().expect("tempdir must exist");
let binary = temp.path().join("codex-stdin-check");
let script = r#"#!/usr/bin/env bash
set -euo pipefail
prompt="$(cat)"
if [[ "$prompt" != "passage: codex-cli" ]]; then
echo "unexpected stdin: $prompt" >&2
exit 41
fi
vals="0.0"
for _ in $(seq 2 64); do
vals="$vals,0.0"
done
payload="{\"embedding\":[$vals]}"
escaped="${payload//\"/\\\"}"
echo "{\"type\":\"item.completed\",\"item\":{\"type\":\"agent_message\",\"text\":\"$escaped\"}}"
"#;
std::fs::write(&binary, script).expect("mock codex script must be written");
let mut perms = std::fs::metadata(&binary)
.expect("mock codex metadata must exist")
.permissions();
perms.set_mode(0o755);
std::fs::set_permissions(&binary, perms).expect("mock codex must be executable");
let embedding = test_client(EmbeddingFlavour::Codex, binary);
let vector = embedding
.embed_passage("codex-cli")
.expect("stdin-backed codex embedding must succeed");
unsafe {
std::env::remove_var("SQLITE_GRAPHRAG_EMBEDDING_DIM");
}
assert_eq!(vector.len(), 64);
assert!(vector.iter().all(|value| *value == 0.0));
}
#[test]
fn claude_default_resolves_path() {
let builder = LlmEmbeddingBuilder::claude_default();
assert_eq!(builder.flavour, EmbeddingFlavour::Claude);
assert!(builder.binary_override.is_none());
assert!(builder.model_override.is_none());
}
#[test]
fn override_binary_uses_provided() {
let path = std::path::PathBuf::from("/tmp/fake-claude-binary");
let builder = LlmEmbeddingBuilder::claude_default().override_binary(path.clone());
assert_eq!(builder.binary_override.as_ref(), Some(&path));
}
#[test]
fn override_model_uses_provided() {
let builder =
LlmEmbeddingBuilder::codex_default().override_model("gpt-5.4-custom".to_string());
assert_eq!(builder.model_override.as_deref(), Some("gpt-5.4-custom"));
}
#[test]
fn embed_timeout_for_batch_scales_with_size() {
let t1 = embed_timeout_for_batch(1);
let t4 = embed_timeout_for_batch(4);
let t8 = embed_timeout_for_batch(8);
assert!(
t1 < t4,
"batch of 4 must have longer timeout than batch of 1"
);
assert!(
t4 < t8,
"batch of 8 must have longer timeout than batch of 4"
);
assert_eq!(t8 - t1, std::time::Duration::from_secs(15 * 7));
}
#[test]
fn embed_timeout_for_batch_single_equals_base() {
let base = embed_timeout();
let single = embed_timeout_for_batch(1);
assert_eq!(base, single);
}
#[test]
fn opencode_flavour_as_str() {
assert_eq!(EmbeddingFlavour::Opencode.as_str(), "opencode");
}
#[test]
#[serial_test::serial(env)]
fn opencode_embed_model_uses_env_override() {
unsafe {
std::env::set_var(
"SQLITE_GRAPHRAG_OPENCODE_EMBED_MODEL",
"opencode/test-model",
);
let model = opencode_embed_model();
std::env::remove_var("SQLITE_GRAPHRAG_OPENCODE_EMBED_MODEL");
assert_eq!(model, "opencode/test-model");
}
}
#[test]
#[serial_test::serial(env)]
fn opencode_embed_model_falls_back_to_opencode_model() {
unsafe {
std::env::remove_var("SQLITE_GRAPHRAG_OPENCODE_EMBED_MODEL");
std::env::set_var("SQLITE_GRAPHRAG_OPENCODE_MODEL", "opencode/fallback");
let model = opencode_embed_model();
std::env::remove_var("SQLITE_GRAPHRAG_OPENCODE_MODEL");
assert_eq!(model, "opencode/fallback");
}
}
#[test]
#[serial_test::serial(env)]
fn opencode_embed_model_ignores_llm_model() {
unsafe {
std::env::remove_var("SQLITE_GRAPHRAG_OPENCODE_EMBED_MODEL");
std::env::remove_var("SQLITE_GRAPHRAG_OPENCODE_MODEL");
std::env::set_var("SQLITE_GRAPHRAG_LLM_MODEL", "gpt-5.4-mini");
let model = opencode_embed_model();
std::env::remove_var("SQLITE_GRAPHRAG_LLM_MODEL");
assert_eq!(
model, "opencode/big-pickle",
"must NOT cross-contaminate with LLM_MODEL"
);
}
}
#[test]
fn parse_llm_json_accepts_opencode_ndjson() {
let stdout = r#"{"type":"step_start","timestamp":1234,"sessionID":"ses_test","part":{"type":"step-start"}}
{"type":"text","timestamp":1235,"sessionID":"ses_test","part":{"type":"text","text":"{\"embedding\":[0.1,0.2,0.3]}"}}
{"type":"step_finish","timestamp":1236,"sessionID":"ses_test","part":{"type":"step-finish","tokens":{"total":100,"input":90,"output":10,"reasoning":0},"cost":0}}"#;
let parsed: EmbeddingResponse = parse_llm_json(stdout).expect("opencode NDJSON must parse");
assert_eq!(parsed.embedding, vec![0.1, 0.2, 0.3]);
}
#[test]
fn parse_llm_json_accepts_opencode_batch_ndjson() {
let stdout = r#"{"type":"step_start","timestamp":1234,"sessionID":"ses_test","part":{"type":"step-start"}}
{"type":"text","timestamp":1235,"sessionID":"ses_test","part":{"type":"text","text":"{\"items\":[{\"i\":1,\"v\":[0.1,0.2]},{\"i\":2,\"v\":[0.3,0.4]}]}"}}
{"type":"step_finish","timestamp":1236,"sessionID":"ses_test","part":{"type":"step-finish","tokens":{"total":100,"input":90,"output":10,"reasoning":0},"cost":0}}"#;
let parsed: BatchEmbeddingResponse =
parse_llm_json(stdout).expect("opencode batch NDJSON must parse");
assert_eq!(parsed.items.len(), 2);
assert_eq!(parsed.items[0].i, 1);
assert_eq!(parsed.items[1].v, vec![0.3, 0.4]);
}
#[test]
fn opencode_builder_default_has_correct_flavour() {
let builder = LlmEmbeddingBuilder::opencode_default();
assert_eq!(builder.flavour, EmbeddingFlavour::Opencode);
assert!(builder.binary_override.is_none());
assert!(builder.model_override.is_none());
}
}