Skip to main content

sqlite_graphrag/extract/
llm_embedding.rs

1//! LLM-based embedding backend (v1.0.76 default; reworked in v1.0.79 G42).
2//!
3//! `LlmEmbedding` is the production embedding client. It wraps headless
4//! invocations of `claude code` or `codex` and returns f32 vectors of the
5//! active dimensionality (`crate::constants::embedding_dim()`, default 64).
6//!
7//! v1.0.79 (G42) changes:
8//! - S1: the dimensionality is no longer hardcoded here — the single
9//!   source of truth lives in `crate::constants` and the JSON schemas
10//!   are generated dynamically.
11//! - S2: `embed_batch` embeds N numbered texts per LLM call with the
12//!   `{items:[{i,v}]}` schema, collapsing 39 subprocess spawns into 4-5.
13//! - S4: the codex `--output-schema` file is a `tempfile::NamedTempFile`
14//!   with a randomised name created once per client and shared across
15//!   clones via `Arc` — no per-call write+delete, no PID-path races.
16//! - S5: the claude model honours `SQLITE_GRAPHRAG_CLAUDE_EMBED_MODEL`
17//!   (symmetric to the codex env var). ZERO hardcoded models without
18//!   an env override.
19//! - S6: `CLAUDE_CONFIG_DIR` points at an empty managed directory BY
20//!   DEFAULT, because `--strict-mcp-config`/`--mcp-config '{}'` are
21//!   silently ignored upstream (anthropics/claude-code#10787) and a
22//!   full `~/.claude` costs ~223k cache-creation tokens per call.
23//! - S7: the codex `request_user_input` failure mode maps to an
24//!   actionable error instead of an opaque exit 11.
25//! - BLOCO 4: every subprocess uses `kill_on_drop(true)` plus an
26//!   explicit `tokio::time::timeout`, so cancellation never leaks a
27//!   child and a hung LLM cannot stall the pipeline forever.
28//!
29//! OAuth is the only supported credential path. The constructor rejects
30//! `ANTHROPIC_API_KEY` / `OPENAI_API_KEY` in the environment — see
31//! `v1.0.69 (G31) OAuth-Only Enforcement`.
32
33use crate::errors::AppError;
34use serde::Deserialize;
35use std::process::Stdio;
36use std::sync::Arc;
37use tokio::io::AsyncWriteExt;
38use tokio::process::Command;
39
40/// Default per-LLM-call timeout in seconds. Consistent with the
41/// `--claude-timeout` / `--codex-timeout` defaults used by ingest.
42/// Override via `SQLITE_GRAPHRAG_EMBED_TIMEOUT_SECS`.
43const DEFAULT_EMBED_TIMEOUT_SECS: u64 = 300;
44
45fn embed_timeout() -> std::time::Duration {
46    let secs = std::env::var("SQLITE_GRAPHRAG_EMBED_TIMEOUT_SECS")
47        .ok()
48        .and_then(|v| v.parse::<u64>().ok())
49        .filter(|&n| (10..=3_600).contains(&n))
50        .unwrap_or(DEFAULT_EMBED_TIMEOUT_SECS);
51    std::time::Duration::from_secs(secs)
52}
53
54/// G42/S1: single-vector JSON schema generated from the active dim.
55fn build_single_schema(dim: usize) -> String {
56    format!(
57        r#"{{"type":"object","properties":{{"embedding":{{"type":"array","items":{{"type":"number"}},"minItems":{dim},"maxItems":{dim}}}}},"required":["embedding"],"additionalProperties":false}}"#
58    )
59}
60
61/// G42/S2: batch JSON schema `{items:[{i,v}]}`. The `items` array length
62/// is deliberately unconstrained so ONE schema file serves every batch
63/// size (index coverage is validated in Rust after parsing).
64fn build_batch_schema(dim: usize) -> String {
65    format!(
66        r#"{{"type":"object","properties":{{"items":{{"type":"array","items":{{"type":"object","properties":{{"i":{{"type":"integer"}},"v":{{"type":"array","items":{{"type":"number"}},"minItems":{dim},"maxItems":{dim}}}}},"required":["i","v"],"additionalProperties":false}}}}}},"required":["items"],"additionalProperties":false}}"#
67    )
68}
69
70#[derive(Clone, Debug)]
71pub struct LlmEmbedding {
72    /// Which LLM headless binary to spawn. `claude` or `codex`.
73    flavour: EmbeddingFlavour,
74    /// Cached path to the binary to avoid PATH lookups on every call.
75    binary: std::path::PathBuf,
76    /// Model name. Resolved from env overrides at construction time.
77    model: String,
78    /// G42/S4: lazily-created codex `--output-schema` tempfiles, shared
79    /// across clones. Keyed by dim so an env change between tests cannot
80    /// serve a stale schema.
81    codex_schemas: Arc<parking_lot::Mutex<CodexSchemaFiles>>,
82}
83
84#[derive(Debug, Default)]
85struct CodexSchemaFiles {
86    single: Option<(usize, Arc<tempfile::NamedTempFile>)>,
87    batch: Option<(usize, Arc<tempfile::NamedTempFile>)>,
88}
89
90#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize)]
91pub enum EmbeddingFlavour {
92    Claude,
93    Codex,
94}
95
96/// ADR-0042 / GAP-002: builder for [`LlmEmbedding`] that lets callers
97/// override the binary path and model without having to remember the
98/// env-var names per flavour. Replaces the duplicated `with_codex` /
99/// `with_claude` bodies that diverged in v1.0.82 (GAP-002: the Claude
100/// arm of `embed_via_backend` re-did the PATH probe via
101/// `LlmEmbedding::detect_available` and could silently pick `codex`).
102#[derive(Clone, Debug)]
103pub struct LlmEmbeddingBuilder {
104    flavour: EmbeddingFlavour,
105    binary_override: Option<std::path::PathBuf>,
106    model_override: Option<String>,
107}
108
109impl LlmEmbeddingBuilder {
110    /// Convenience: produce a Claude-backed builder pre-configured with
111    /// the canonical default binary + model.
112    /// Convenience: produce a Claude-backed builder pre-configured with
113    /// the canonical default binary + model.
114    pub fn claude_default() -> Self {
115        Self {
116            flavour: EmbeddingFlavour::Claude,
117            binary_override: None,
118            model_override: None,
119        }
120    }
121
122    /// Convenience: produce a Codex-backed builder pre-configured with
123    /// the canonical default binary + model.
124    pub fn codex_default() -> Self {
125        Self {
126            flavour: EmbeddingFlavour::Codex,
127            binary_override: None,
128            model_override: None,
129        }
130    }
131    /// Override the binary path (skips the `which::which` PATH probe).
132    pub fn override_binary(mut self, binary: std::path::PathBuf) -> Self {
133        self.binary_override = Some(binary);
134        self
135    }
136
137    /// Override the model name (skips the env-var lookup).
138    pub fn override_model(mut self, model: String) -> Self {
139        self.model_override = Some(model);
140        self
141    }
142
143    /// Build the [`LlmEmbedding`]. Enforces OAuth-only and resolves the
144    /// binary/model via the override or the env-var defaults.
145    pub fn build(self) -> Result<LlmEmbedding, AppError> {
146        LlmEmbedding::oauth_only_enforce()?;
147        let binary = match self.binary_override {
148            Some(path) => resolve_real_binary(&path),
149            None => {
150                let which_name = match self.flavour {
151                    EmbeddingFlavour::Codex => "codex",
152                    EmbeddingFlavour::Claude => "claude",
153                };
154                let path = which::which(which_name).map_err(|_| {
155                    AppError::Embedding(format!("`{which_name}` not found on PATH"))
156                })?;
157                resolve_real_binary(&path)
158            }
159        };
160        let model = match self.model_override {
161            Some(m) => m,
162            None => match self.flavour {
163                EmbeddingFlavour::Codex => codex_embed_model(),
164                EmbeddingFlavour::Claude => claude_embed_model(),
165            },
166        };
167        Ok(LlmEmbedding {
168            flavour: self.flavour,
169            binary,
170            model,
171            codex_schemas: Arc::new(parking_lot::Mutex::new(CodexSchemaFiles::default())),
172        })
173    }
174}
175
176impl EmbeddingFlavour {
177    pub fn as_str(self) -> &'static str {
178        match self {
179            Self::Claude => "claude",
180            Self::Codex => "codex",
181        }
182    }
183}
184
185#[derive(Debug, Deserialize)]
186struct EmbeddingResponse {
187    embedding: Vec<f32>,
188}
189
190#[derive(Debug, Deserialize)]
191struct BatchEmbeddingResponse {
192    items: Vec<BatchEmbeddingItem>,
193}
194
195#[derive(Debug, Deserialize)]
196struct BatchEmbeddingItem {
197    i: usize,
198    v: Vec<f32>,
199}
200
201/// Follows symlinks and shell-script shim `exec` targets to find
202/// the real ELF binary. Shim wrappers (like `~/.graphrag-shim/codex`)
203/// can strip hardening flags; bypassing them is a security requirement.
204pub fn resolve_real_binary(path: &std::path::Path) -> std::path::PathBuf {
205    if let Ok(canonical) = std::fs::canonicalize(path) {
206        if is_elf_binary(&canonical) {
207            return canonical;
208        }
209        if let Some(exec_target) = extract_exec_target_from_shim(&canonical) {
210            if exec_target.exists() && is_elf_binary(&exec_target) {
211                return exec_target;
212            }
213        }
214        return canonical;
215    }
216    path.to_path_buf()
217}
218
219fn is_elf_binary(path: &std::path::Path) -> bool {
220    std::fs::read(path)
221        .map(|bytes| bytes.len() >= 4 && bytes[..4] == [0x7f, b'E', b'L', b'F'])
222        .unwrap_or(false)
223}
224
225fn extract_exec_target_from_shim(path: &std::path::Path) -> Option<std::path::PathBuf> {
226    let content = std::fs::read_to_string(path).ok()?;
227    if !content.starts_with("#!") {
228        return None;
229    }
230    for line in content.lines().rev() {
231        let trimmed = line.trim();
232        if trimmed.starts_with("exec ") {
233            let after_exec = trimmed.strip_prefix("exec ")?;
234            let binary = after_exec.split_whitespace().next()?;
235            return Some(std::path::PathBuf::from(binary));
236        }
237    }
238    None
239}
240
241/// G42/S5: claude embedding model with env override, symmetric to the
242/// codex `SQLITE_GRAPHRAG_CODEX_EMBED_MODEL` introduced in v1.0.78.
243fn claude_embed_model() -> String {
244    std::env::var("SQLITE_GRAPHRAG_CLAUDE_EMBED_MODEL")
245        .unwrap_or_else(|_| "claude-sonnet-4-6".to_string())
246}
247
248fn codex_embed_model() -> String {
249    std::env::var("SQLITE_GRAPHRAG_CODEX_EMBED_MODEL").unwrap_or_else(|_| "gpt-5.5".to_string())
250}
251
252impl LlmEmbedding {
253    /// Detects which LLM CLI is available on PATH and returns the
254    /// matching embedding client.
255    ///
256    /// v1.0.76: PREFERS `codex` over `claude` because:
257    /// - Claude Code 2.1+ ships a 180k+ token system context (plugins,
258    ///   skills, agents, MCP) that overflows the 200k context window
259    ///   for even trivial embedding prompts and returns "Prompt is too
260    ///   long". (v1.0.79/S6 mitigates this with an empty
261    ///   `CLAUDE_CONFIG_DIR`, but codex stays the lighter default.)
262    /// - Codex 0.134+ is lightweight (~5k system context) and the
263    ///   `StructuredOutput` tool reliably returns the requested vectors.
264    pub fn detect_available() -> Result<Self, AppError> {
265        Self::oauth_only_enforce()?;
266
267        if let Ok(path) = which::which("codex") {
268            return Ok(Self {
269                flavour: EmbeddingFlavour::Codex,
270                binary: resolve_real_binary(&path),
271                model: codex_embed_model(),
272                codex_schemas: Arc::new(parking_lot::Mutex::new(CodexSchemaFiles::default())),
273            });
274        }
275        if let Ok(path) = which::which("claude") {
276            return Ok(Self {
277                flavour: EmbeddingFlavour::Claude,
278                binary: resolve_real_binary(&path),
279                model: claude_embed_model(),
280                codex_schemas: Arc::new(parking_lot::Mutex::new(CodexSchemaFiles::default())),
281            });
282        }
283        Err(AppError::Embedding(
284            "no LLM CLI found on PATH: install `codex` (0.130+) or `claude` (Claude Code 2.1+)"
285                .to_string(),
286        ))
287    }
288
289    pub fn with_codex() -> Result<Self, AppError> {
290        Self::with_codex_builder().build()
291    }
292
293    pub fn with_claude() -> Result<Self, AppError> {
294        Self::with_claude_builder().build()
295    }
296
297    /// ADR-0042 / GAP-002: builder entry point for a codex-backed
298    /// embedder with default model resolution.
299    pub fn with_codex_builder() -> LlmEmbeddingBuilder {
300        LlmEmbeddingBuilder {
301            flavour: EmbeddingFlavour::Codex,
302            binary_override: None,
303            model_override: None,
304        }
305    }
306
307    /// ADR-0042 / GAP-002: builder entry point for a claude-backed
308    /// embedder with default model resolution.
309    pub fn with_claude_builder() -> LlmEmbeddingBuilder {
310        LlmEmbeddingBuilder {
311            flavour: EmbeddingFlavour::Claude,
312            binary_override: None,
313            model_override: None,
314        }
315    }
316    /// v1.0.69 (G31): refuse to spawn if an API key is set. The CLI
317    /// must use OAuth. The two API-key env vars are NOT in the
318    /// env-clear whitelist, so a parent process that exports them
319    /// will see this error.
320    fn oauth_only_enforce() -> Result<(), AppError> {
321        if std::env::var("ANTHROPIC_API_KEY").is_ok() {
322            return Err(AppError::Validation(
323                "ANTHROPIC_API_KEY is set; v1.0.76 requires OAuth. \
324                 unset it and use `claude login` instead."
325                    .into(),
326            ));
327        }
328        if std::env::var("OPENAI_API_KEY").is_ok() {
329            return Err(AppError::Validation(
330                "OPENAI_API_KEY is set; v1.0.76 requires OAuth. \
331                 unset it and use `codex login` instead."
332                    .into(),
333            ));
334        }
335        Ok(())
336    }
337
338    /// Embeds a single passage (chunk of a memory body). Returns an
339    /// f32 vector of the active dimensionality.
340    pub fn embed_passage(&self, text: &str) -> Result<Vec<f32>, AppError> {
341        self.invoke_with_prefix(crate::constants::PASSAGE_PREFIX, text)
342    }
343
344    /// Embeds a single query. The LLM uses a different prompt prefix
345    /// to disambiguate query from passage.
346    pub fn embed_query(&self, text: &str) -> Result<Vec<f32>, AppError> {
347        self.invoke_with_prefix(crate::constants::QUERY_PREFIX, text)
348    }
349
350    /// G56: returns a stable label for the active embedding model so the
351    /// in-process entity-embedding cache can key by `(model, text)`.
352    /// Embeddings produced by different models are not interchangeable,
353    /// so a cache entry from one model must never satisfy a request
354    /// served by another.
355    pub fn model_label(&self) -> String {
356        format!("{}:{}", self.flavour.as_str(), self.model)
357    }
358
359    /// ADR-0042 / BUG-003 fix: returns the resolved []
360    /// of this embedder. Used by  and
361    ///  to report the backend that
362    /// ACTUALLY executed the embedding (not the one requested in the
363    /// chain). When  substitutes claude
364    /// for a missing codex, the operator sees the truth in
365    /// .
366    pub fn flavour(&self) -> EmbeddingFlavour {
367        self.flavour
368    }
369
370    /// G42/S2: embeds a batch of `(global_index, text)` pairs in ONE
371    /// LLM call. Returns `(global_index, vector)` pairs. Async — this
372    /// is the unit of work scheduled by the bounded fan-out in
373    /// `crate::embedder`.
374    ///
375    /// Cancel safety: the future owns its subprocess via
376    /// `kill_on_drop(true)`, so dropping it (e.g. losing a
377    /// `tokio::select!` race against a cancellation token) kills the
378    /// child and leaks nothing.
379    pub async fn embed_batch_async(
380        &self,
381        prefix: &str,
382        batch: &[(usize, String)],
383    ) -> Result<Vec<(usize, Vec<f32>)>, AppError> {
384        let dim = crate::constants::embedding_dim();
385        if batch.is_empty() {
386            return Ok(Vec::new());
387        }
388        if batch.len() == 1 {
389            let (idx, text) = (&batch[0].0, &batch[0].1);
390            let v = self.invoke_single_async(prefix, text, dim).await?;
391            return Ok(vec![(*idx, v)]);
392        }
393
394        let mut prompt = format!(
395            "Generate {dim}-dimensional semantic embedding vectors for each numbered text below.\n\
396             Return a JSON object with an \"items\" array containing EXACTLY {n} items.\n\
397             Each item has \"i\" (the 1-based index) and \"v\" (the {dim}-float vector, values between -1 and 1).\n\n",
398            n = batch.len()
399        );
400        for (pos, (_, text)) in batch.iter().enumerate() {
401            prompt.push_str(&format!("{}: {prefix}{text}\n", pos + 1));
402        }
403
404        let stdout = match self.flavour {
405            EmbeddingFlavour::Claude => {
406                self.invoke_claude(&prompt, &build_batch_schema(dim))
407                    .await?
408            }
409            EmbeddingFlavour::Codex => {
410                let schema = self.codex_schema_file(dim, true)?;
411                self.invoke_codex(&prompt, schema.path()).await?
412            }
413        };
414
415        let parsed: BatchEmbeddingResponse = parse_llm_json(&stdout).map_err(|e| {
416            AppError::Embedding(format!(
417                "LLM batch embedding response parse failed: {e}; raw={stdout}"
418            ))
419        })?;
420        if parsed.items.len() != batch.len() {
421            return Err(AppError::Embedding(format!(
422                "LLM batch returned {} items, expected {} (G42/S2 coverage check)",
423                parsed.items.len(),
424                batch.len()
425            )));
426        }
427        let mut out: Vec<Option<Vec<f32>>> = vec![None; batch.len()];
428        for item in parsed.items {
429            if item.i == 0 || item.i > batch.len() {
430                return Err(AppError::Embedding(format!(
431                    "LLM batch item index {} out of range 1..={}",
432                    item.i,
433                    batch.len()
434                )));
435            }
436            if item.v.len() != dim {
437                return Err(AppError::Embedding(format!(
438                    "LLM batch item {} returned {} dims, expected {dim}; \
439                     refusing to truncate or pad silently (G42/C5)",
440                    item.i,
441                    item.v.len()
442                )));
443            }
444            out[item.i - 1] = Some(item.v);
445        }
446        let mut result = Vec::with_capacity(batch.len());
447        for (pos, slot) in out.into_iter().enumerate() {
448            let v = slot.ok_or_else(|| {
449                AppError::Embedding(format!(
450                    "LLM batch response is missing item index {} (G42/S2 coverage check)",
451                    pos + 1
452                ))
453            })?;
454            result.push((batch[pos].0, v));
455        }
456        Ok(result)
457    }
458
459    fn invoke_with_prefix(&self, prefix: &str, text: &str) -> Result<Vec<f32>, AppError> {
460        let dim = crate::constants::embedding_dim();
461        let inner = self.invoke_single_async(prefix, text, dim);
462        // v1.0.79 (G42/A2): reuse the process-wide multi-thread runtime
463        // instead of building a current-thread runtime PER CALL. Inside
464        // an existing runtime (tests, async commands) block_in_place
465        // keeps the worker pool healthy.
466        match tokio::runtime::Handle::try_current() {
467            Ok(handle) => tokio::task::block_in_place(|| handle.block_on(inner)),
468            Err(_) => crate::embedder::shared_runtime()?.block_on(inner),
469        }
470    }
471
472    async fn invoke_single_async(
473        &self,
474        prefix: &str,
475        text: &str,
476        dim: usize,
477    ) -> Result<Vec<f32>, AppError> {
478        let prompt = format!("{prefix}{text}");
479        let stdout = match self.flavour {
480            EmbeddingFlavour::Claude => {
481                self.invoke_claude(&prompt, &build_single_schema(dim))
482                    .await?
483            }
484            EmbeddingFlavour::Codex => {
485                let schema = self.codex_schema_file(dim, false)?;
486                self.invoke_codex(&prompt, schema.path()).await?
487            }
488        };
489        let parsed: EmbeddingResponse = parse_llm_json(&stdout).map_err(|e| {
490            AppError::Embedding(format!(
491                "LLM embedding response parse failed: {e}; raw={stdout}"
492            ))
493        })?;
494        if parsed.embedding.len() != dim {
495            return Err(AppError::Embedding(format!(
496                "LLM returned {} dims, expected {dim}; \
497                 refusing to truncate or pad silently (G42/C5)",
498                parsed.embedding.len()
499            )));
500        }
501        Ok(parsed.embedding)
502    }
503
504    /// G42/S4: returns the lazily-created, process-shared codex schema
505    /// tempfile for the requested mode. `NamedTempFile` randomises the
506    /// filename (no PID-based collisions) and removes the file on drop
507    /// of the last `Arc` clone.
508    fn codex_schema_file(
509        &self,
510        dim: usize,
511        batch: bool,
512    ) -> Result<Arc<tempfile::NamedTempFile>, AppError> {
513        let mut guard = self.codex_schemas.lock();
514        let slot = if batch {
515            &mut guard.batch
516        } else {
517            &mut guard.single
518        };
519        if let Some((cached_dim, file)) = slot {
520            if *cached_dim == dim {
521                return Ok(Arc::clone(file));
522            }
523        }
524        let content = if batch {
525            build_batch_schema(dim)
526        } else {
527            build_single_schema(dim)
528        };
529        let file = tempfile::Builder::new()
530            .prefix("sqlite-graphrag-embed-schema-")
531            .suffix(".json")
532            .tempfile()
533            .map_err(|e| AppError::Embedding(format!("schema tempfile create failed: {e}")))?;
534        std::fs::write(file.path(), content)
535            .map_err(|e| AppError::Embedding(format!("schema tempfile write failed: {e}")))?;
536        let file = Arc::new(file);
537        *slot = Some((dim, Arc::clone(&file)));
538        Ok(file)
539    }
540
541    async fn invoke_claude(&self, prompt: &str, schema: &str) -> Result<String, AppError> {
542        // v1.0.69 hardening: --strict-mcp-config --mcp-config '{}' --settings
543        // '{"hooks":{}}' --dangerously-skip-permissions.
544        //
545        // v1.0.76 hardening: Claude Code 2.1+ renamed --output-schema to
546        // --json-schema and accepts the schema as an inline JSON string
547        // (NOT a file path). Also pass --output-format json so the
548        // response is a single JSON object on stdout.
549        //
550        // v1.0.79 (G42/S6): CLAUDE_CONFIG_DIR points at an empty managed
551        // directory BY DEFAULT — the MCP-isolation flags above are
552        // silently ignored upstream (anthropics/claude-code#10787) and a
553        // populated ~/.claude costs ~223k cache-creation tokens per call.
554        let mut cmd = Command::new(&self.binary);
555        cmd.arg("-p")
556            .arg(prompt)
557            .arg("--model")
558            .arg(&self.model)
559            .arg("--json-schema")
560            .arg(schema)
561            .arg("--output-format")
562            .arg("json")
563            .arg("--strict-mcp-config")
564            .arg("--mcp-config")
565            .arg(r#"{"mcpServers":{}}"#)
566            .arg("--settings")
567            .arg(r#"{"hooks":{}}"#)
568            .arg("--dangerously-skip-permissions")
569            .env_clear()
570            .env("PATH", std::env::var("PATH").unwrap_or_default())
571            .env("HOME", std::env::var("HOME").unwrap_or_default())
572            .stdin(Stdio::null())
573            .stdout(Stdio::piped())
574            .stderr(Stdio::piped())
575            // BLOCO 4: cancellation (dropped future) must kill the child.
576            .kill_on_drop(true);
577        if let Some(config_dir) = claude_embedding_config_dir() {
578            cmd.env("CLAUDE_CONFIG_DIR", &config_dir);
579        }
580        let binary_str = self.binary.to_string_lossy().into_owned();
581        let output = match tokio::time::timeout(embed_timeout(), cmd.output()).await {
582            Err(_elapsed) => {
583                return Err(crate::llm::exit_code_hints::into_legacy_embedding(
584                    &crate::llm::exit_code_hints::LlmBackendError::Timeout {
585                        secs: embed_timeout().as_secs(),
586                        binary: binary_str.clone(),
587                    },
588                ));
589            }
590            Ok(Err(e)) => {
591                return Err(crate::llm::exit_code_hints::into_legacy_embedding(
592                    &crate::llm::exit_code_hints::LlmBackendError::SpawnFailed {
593                        binary: binary_str.clone(),
594                        source: e.to_string(),
595                    },
596                ));
597            }
598            Ok(Ok(o)) => o,
599        };
600        // G45-CR5 / ADR-0043 (v1.0.85): parse the JSON envelope from
601        // `claude -p --output-format json` and detect OAuth quota
602        // exhaustion by looking for the `rate_limit_error` or
603        // `usage` overflow markers before checking the subprocess
604        // exit status. This lets the deterministic fallback in
605        // hybrid-search and recall swap to codex immediately.
606        let stdout_str = String::from_utf8_lossy(&output.stdout);
607        if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(&stdout_str) {
608            let is_rate_limited = parsed
609                .get("is_error")
610                .and_then(|v| v.as_bool())
611                .unwrap_or(false)
612                && parsed
613                    .get("result")
614                    .and_then(|v| v.as_str())
615                    .map(|s| {
616                        s.contains("rate limit")
617                            || s.contains("quota")
618                            || s.contains("anthropic-ratelimit")
619                    })
620                    .unwrap_or(false);
621            if is_rate_limited {
622                return Err(AppError::Embedding(format!(
623                    "OAuth usage quota exhausted: claude rate_limit detected in stdout: {}",
624                    parsed
625                        .get("result")
626                        .and_then(|v| v.as_str())
627                        .unwrap_or("")
628                        .chars()
629                        .take(120)
630                        .collect::<String>()
631                )));
632            }
633        }
634        if !output.status.success() {
635            let (exit_code, signal) = if let Some(code) = output.status.code() {
636                (Some(code), None)
637            } else {
638                use std::os::unix::process::ExitStatusExt;
639                (None, output.status.signal())
640            };
641            let stdout_tail = crate::llm::exit_code_hints::LlmBackendError::truncate_tail(
642                &output.stdout,
643                crate::llm::exit_code_hints::DIAG_TAIL_BYTES,
644            );
645            let stderr_tail = crate::llm::exit_code_hints::LlmBackendError::truncate_tail(
646                &output.stderr,
647                crate::llm::exit_code_hints::DIAG_TAIL_BYTES,
648            );
649            let hint = crate::llm::exit_code_hints::diagnose_exit_code(exit_code, signal);
650            return Err(crate::llm::exit_code_hints::into_legacy_embedding(
651                &crate::llm::exit_code_hints::LlmBackendError::NonZeroExit {
652                    exit_code,
653                    signal,
654                    stdout_tail,
655                    stderr_tail,
656                    binary: binary_str,
657                    hint,
658                },
659            ));
660        }
661        Ok(String::from_utf8_lossy(&output.stdout).into_owned())
662    }
663
664    async fn invoke_codex(
665        &self,
666        prompt: &str,
667        schema_path: &std::path::Path,
668    ) -> Result<String, AppError> {
669        let binary_str = self.binary.to_string_lossy().into_owned();
670        let mut child =
671            match build_codex_embedding_command(&self.binary, &self.model, schema_path).spawn() {
672                Ok(c) => c,
673                Err(e) => {
674                    return Err(crate::llm::exit_code_hints::into_legacy_embedding(
675                        &crate::llm::exit_code_hints::LlmBackendError::SpawnFailed {
676                            binary: binary_str,
677                            source: e.to_string(),
678                        },
679                    ));
680                }
681            };
682        if let Some(mut stdin) = child.stdin.take() {
683            stdin
684                .write_all(prompt.as_bytes())
685                .await
686                .map_err(|e| AppError::Embedding(format!("codex stdin write failed: {e}")))?;
687        }
688        let output = match tokio::time::timeout(embed_timeout(), child.wait_with_output()).await {
689            Err(_elapsed) => {
690                return Err(crate::llm::exit_code_hints::into_legacy_embedding(
691                    &crate::llm::exit_code_hints::LlmBackendError::Timeout {
692                        secs: embed_timeout().as_secs(),
693                        binary: binary_str,
694                    },
695                ));
696            }
697            Ok(Err(e)) => {
698                return Err(crate::llm::exit_code_hints::into_legacy_embedding(
699                    &crate::llm::exit_code_hints::LlmBackendError::SpawnFailed {
700                        binary: binary_str,
701                        source: format!("codex wait failed: {e}"),
702                    },
703                ));
704            }
705            Ok(Ok(o)) => o,
706        };
707        if !output.status.success() {
708            let (exit_code, signal) = if let Some(code) = output.status.code() {
709                (Some(code), None)
710            } else {
711                use std::os::unix::process::ExitStatusExt;
712                (None, output.status.signal())
713            };
714            let stdout_tail = crate::llm::exit_code_hints::LlmBackendError::truncate_tail(
715                &output.stdout,
716                crate::llm::exit_code_hints::DIAG_TAIL_BYTES,
717            );
718            let stderr_tail = crate::llm::exit_code_hints::LlmBackendError::truncate_tail(
719                &output.stderr,
720                crate::llm::exit_code_hints::DIAG_TAIL_BYTES,
721            );
722            let hint = crate::llm::exit_code_hints::diagnose_exit_code(exit_code, signal);
723            // G42/S7: the headless spawn can still hit interactive
724            // prompts on some codex builds; keep the legacy request_user_input
725            // branch as a special-case hint, and stamp the diagnostic
726            // tail on top of the canonical NonZeroExit envelope.
727            let mut combined_hint = hint;
728            if stderr_tail.contains("request_user_input") {
729                combined_hint.push_str(
730                    " | codex requested interactive input in a headless embedding call; \
731                     upgrade codex (>= 0.134) or switch the embedding backend to claude",
732                );
733            }
734            return Err(crate::llm::exit_code_hints::into_legacy_embedding(
735                &crate::llm::exit_code_hints::LlmBackendError::NonZeroExit {
736                    exit_code,
737                    signal,
738                    stdout_tail,
739                    stderr_tail,
740                    binary: binary_str,
741                    hint: combined_hint,
742                },
743            ));
744        }
745        Ok(String::from_utf8_lossy(&output.stdout).into_owned())
746    }
747}
748
749/// G42/S6: resolves the empty `CLAUDE_CONFIG_DIR` used for embedding
750/// subprocesses.
751///
752/// - `SQLITE_GRAPHRAG_CLAUDE_EMPTY_CONFIG_DIR` is honoured when set and
753///   pointing at a directory (same contract as G28-A in claude_runner);
754/// - otherwise a managed directory is created at
755///   `~/.local/state/sqlite-graphrag/claude-empty-config` (mode 0700).
756///   If `~/.claude/.credentials.json` exists (Linux OAuth storage) it is
757///   copied in so authentication still works; on macOS credentials live
758///   in the Keychain and the empty dir is sufficient.
759///
760/// Returns `None` only when HOME is unset AND no override is given —
761/// in that case the subprocess falls back to claude's own default.
762fn claude_embedding_config_dir() -> Option<std::path::PathBuf> {
763    if let Ok(dir) = std::env::var("SQLITE_GRAPHRAG_CLAUDE_EMPTY_CONFIG_DIR") {
764        let path = std::path::PathBuf::from(dir);
765        if path.is_dir() {
766            return Some(path);
767        }
768        tracing::warn!(
769            target: "embedding",
770            path = %path.display(),
771            "SQLITE_GRAPHRAG_CLAUDE_EMPTY_CONFIG_DIR is set but not a directory; \
772             falling back to the managed empty config dir"
773        );
774    }
775    let home = std::env::var("HOME").ok()?;
776    let dir = std::path::Path::new(&home)
777        .join(".local/state/sqlite-graphrag")
778        .join("claude-empty-config");
779    if std::fs::create_dir_all(&dir).is_err() {
780        return None;
781    }
782    #[cfg(unix)]
783    {
784        use std::os::unix::fs::PermissionsExt;
785        let _ = std::fs::set_permissions(&dir, std::fs::Permissions::from_mode(0o700));
786    }
787    // Linux stores OAuth credentials on disk; copy them so the isolated
788    // config dir still authenticates. Best-effort: macOS uses Keychain.
789    let creds = std::path::Path::new(&home).join(".claude/.credentials.json");
790    if creds.exists() {
791        let target = dir.join(".credentials.json");
792        if !target.exists() {
793            let _ = std::fs::copy(&creds, &target);
794        }
795    }
796    Some(dir)
797}
798
799fn build_codex_embedding_command(
800    binary: &std::path::Path,
801    model: &str,
802    schema_path: &std::path::Path,
803) -> Command {
804    let mut cmd = Command::new(binary);
805    // v1.0.77: `-c` TOML overrides bypass the codex exec --sandbox propagation
806    // bug (openai/codex#18113). CLI flags alone are insufficient — the exec
807    // subcommand may not inherit --sandbox from the parent codex command.
808    cmd.arg("exec")
809        .arg("-c")
810        .arg("sandbox_mode='read-only'")
811        .arg("-c")
812        .arg("approval_policy='never'")
813        .arg("--json")
814        .arg("--output-schema")
815        .arg(schema_path)
816        .arg("--ephemeral")
817        .arg("--skip-git-repo-check")
818        .arg("--sandbox")
819        .arg("read-only")
820        .arg("--ignore-user-config")
821        .arg("--ignore-rules");
822    if crate::extract::codex_compat::codex_supports_ask_for_approval() {
823        cmd.arg("--ask-for-approval").arg("never");
824    }
825    // v1.0.77: isolate codex from user config by pointing CODEX_HOME at a
826    // minimal directory containing only auth.json (OAuth credentials).
827    let codex_home = prepare_isolated_codex_home();
828    cmd.arg("--model")
829        .arg(model)
830        .arg("-")
831        .env_clear()
832        .env("PATH", std::env::var("PATH").unwrap_or_default())
833        .env("HOME", std::env::var("HOME").unwrap_or_default());
834    if let Some(ref ch) = codex_home {
835        cmd.env("CODEX_HOME", ch);
836    }
837    cmd.stdin(Stdio::piped())
838        .stdout(Stdio::piped())
839        .stderr(Stdio::piped())
840        // BLOCO 4: cancellation (dropped future) must kill the child.
841        .kill_on_drop(true);
842    cmd
843}
844
845fn prepare_isolated_codex_home() -> Option<std::path::PathBuf> {
846    let home = std::env::var("HOME").ok()?;
847    let real_auth = std::path::Path::new(&home).join(".codex/auth.json");
848    if !real_auth.exists() {
849        return None;
850    }
851    let base = std::path::Path::new(&home).join(".local/share/sqlite-graphrag");
852    let isolated = base.join(format!("codex-home-{}", std::process::id()));
853    let _ = std::fs::create_dir_all(&isolated);
854    let target = isolated.join("auth.json");
855    if !target.exists() {
856        let _ = std::fs::copy(&real_auth, &target);
857    }
858    Some(isolated)
859}
860
861/// Parse an LLM JSON response of type `T`. The two backends emit
862/// different shapes:
863/// - Claude (with `--output-format json`): single JSON object on stdout.
864/// - Codex (with `--json`): JSONL stream with one event per line; the
865///   `agent_message` event's `text` field is the JSON payload.
866///
867/// This helper accepts both shapes and returns the parsed value (or an
868/// error describing the first mismatch).
869fn parse_llm_json<T: serde::de::DeserializeOwned>(stdout: &str) -> Result<T, String> {
870    // Strategy 1: try the whole stdout as JSON (Claude path).
871    if let Ok(parsed) = serde_json::from_str::<T>(stdout) {
872        return Ok(parsed);
873    }
874    // Strategy 2: walk the JSONL line by line and pick the last
875    // `item.completed` of type `agent_message` (Codex path).
876    let mut last_agent_text: Option<String> = None;
877    for line in stdout.lines() {
878        let line = line.trim();
879        if line.is_empty() {
880            continue;
881        }
882        let Ok(event) = serde_json::from_str::<serde_json::Value>(line) else {
883            continue;
884        };
885        if event.get("type").and_then(|t| t.as_str()) != Some("item.completed") {
886            continue;
887        }
888        let item = match event.get("item") {
889            Some(i) => i,
890            None => continue,
891        };
892        if item.get("type").and_then(|t| t.as_str()) != Some("agent_message") {
893            continue;
894        }
895        if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
896            last_agent_text = Some(text.to_string());
897        }
898    }
899    let text = last_agent_text
900        .ok_or_else(|| "no agent_message found in codex JSONL output".to_string())?;
901    serde_json::from_str::<T>(&text)
902        .map_err(|e| format!("codex agent_message text does not match schema: {e}; raw={text}"))
903}
904
905#[cfg(test)]
906mod tests {
907    use super::*;
908
909    fn test_client(flavour: EmbeddingFlavour, binary: std::path::PathBuf) -> LlmEmbedding {
910        LlmEmbedding {
911            flavour,
912            binary,
913            model: "gpt-5.4".to_string(),
914            codex_schemas: Arc::new(parking_lot::Mutex::new(CodexSchemaFiles::default())),
915        }
916    }
917
918    #[test]
919    #[serial_test::serial(env)]
920    fn oauth_only_enforce_blocks_api_keys() {
921        // SAFETY: this test only sets and unsets env vars; the
922        // `serial(env)` group prevents cross-test interference.
923        unsafe {
924            std::env::set_var("ANTHROPIC_API_KEY", "test");
925            assert!(LlmEmbedding::oauth_only_enforce().is_err());
926            std::env::remove_var("ANTHROPIC_API_KEY");
927
928            std::env::set_var("OPENAI_API_KEY", "test");
929            assert!(LlmEmbedding::oauth_only_enforce().is_err());
930            std::env::remove_var("OPENAI_API_KEY");
931        }
932        assert!(LlmEmbedding::oauth_only_enforce().is_ok());
933    }
934
935    #[test]
936    fn flavour_as_str_is_stable() {
937        assert_eq!(EmbeddingFlavour::Claude.as_str(), "claude");
938        assert_eq!(EmbeddingFlavour::Codex.as_str(), "codex");
939    }
940
941    #[test]
942    fn single_schema_embeds_active_dim() {
943        let schema = build_single_schema(64);
944        assert!(schema.contains(r#""minItems":64"#));
945        assert!(schema.contains(r#""maxItems":64"#));
946        let parsed: serde_json::Value =
947            serde_json::from_str(&schema).expect("single schema must be valid JSON");
948        assert_eq!(parsed["properties"]["embedding"]["minItems"], 64);
949    }
950
951    #[test]
952    fn batch_schema_is_valid_json_and_unbounded_items() {
953        let schema = build_batch_schema(64);
954        let parsed: serde_json::Value =
955            serde_json::from_str(&schema).expect("batch schema must be valid JSON");
956        // The items array must NOT constrain its length so one schema
957        // file serves every batch size (G42/S4).
958        assert!(parsed["properties"]["items"].get("minItems").is_none());
959        assert_eq!(
960            parsed["properties"]["items"]["items"]["properties"]["v"]["minItems"],
961            64
962        );
963    }
964
965    #[test]
966    fn parse_llm_json_accepts_claude_json() {
967        let stdout = r#"{"embedding":[0.0,1.0,2.0]}"#;
968
969        let parsed: EmbeddingResponse = parse_llm_json(stdout).expect("claude JSON must parse");
970
971        assert_eq!(parsed.embedding, vec![0.0, 1.0, 2.0]);
972    }
973
974    #[test]
975    fn parse_llm_json_accepts_codex_jsonl() {
976        let stdout = r#"{"type":"thread.started","thread_id":"mock-thread-0"}
977{"type":"item.completed","item":{"type":"agent_message","text":"{\"embedding\":[0.0,1.0,2.0]}"}}
978{"type":"turn.completed","usage":{"input_tokens":1,"output_tokens":1}}"#;
979
980        let parsed: EmbeddingResponse = parse_llm_json(stdout).expect("codex JSONL must parse");
981
982        assert_eq!(parsed.embedding, vec![0.0, 1.0, 2.0]);
983    }
984
985    #[test]
986    fn parse_llm_json_rejects_jsonl_without_agent_message() {
987        let stdout = r#"{"type":"thread.started","thread_id":"mock-thread-0"}"#;
988
989        let err = parse_llm_json::<EmbeddingResponse>(stdout)
990            .expect_err("missing agent_message must fail");
991
992        assert!(err.contains("no agent_message"));
993    }
994
995    #[test]
996    fn parse_llm_json_accepts_batch_response() {
997        let stdout = r#"{"items":[{"i":1,"v":[0.0,1.0]},{"i":2,"v":[2.0,3.0]}]}"#;
998
999        let parsed: BatchEmbeddingResponse = parse_llm_json(stdout).expect("batch JSON must parse");
1000
1001        assert_eq!(parsed.items.len(), 2);
1002        assert_eq!(parsed.items[0].i, 1);
1003        assert_eq!(parsed.items[1].v, vec![2.0, 3.0]);
1004    }
1005
1006    #[test]
1007    fn codex_schema_file_is_created_once_and_reused() {
1008        let client = test_client(
1009            EmbeddingFlavour::Codex,
1010            std::path::PathBuf::from("/bin/true"),
1011        );
1012        let first = client
1013            .codex_schema_file(64, false)
1014            .expect("schema file must be created");
1015        let second = client
1016            .codex_schema_file(64, false)
1017            .expect("schema file must be reused");
1018        assert_eq!(first.path(), second.path(), "same dim must reuse the file");
1019
1020        let batch = client
1021            .codex_schema_file(64, true)
1022            .expect("batch schema file must be created");
1023        assert_ne!(
1024            first.path(),
1025            batch.path(),
1026            "single and batch schemas are distinct files"
1027        );
1028
1029        let content = std::fs::read_to_string(first.path()).expect("schema file must be readable");
1030        assert!(content.contains(r#""minItems":64"#));
1031    }
1032
1033    #[test]
1034    fn codex_embedding_command_reads_prompt_from_stdin() {
1035        let schema_path = std::env::temp_dir().join("sqlite-graphrag-embed-schema-test.json");
1036        let cmd = build_codex_embedding_command(
1037            std::path::Path::new("/bin/true"),
1038            "gpt-5.4",
1039            &schema_path,
1040        );
1041        let argv: Vec<String> = cmd
1042            .as_std()
1043            .get_args()
1044            .filter_map(|arg| arg.to_str().map(|s| s.to_string()))
1045            .collect();
1046
1047        assert!(
1048            argv.iter().any(|arg| arg == "-"),
1049            "codex embedding command must read prompt from stdin: {argv:?}"
1050        );
1051        assert!(
1052            !argv.iter().any(|arg| arg.starts_with("passage: ")),
1053            "prompt text must not be passed as argv: {argv:?}"
1054        );
1055        for required in &[
1056            "exec",
1057            "-c",
1058            "sandbox_mode='read-only'",
1059            "approval_policy='never'",
1060            "--json",
1061            "--output-schema",
1062            "--ephemeral",
1063            "--skip-git-repo-check",
1064            "--sandbox",
1065            "read-only",
1066            "--ignore-user-config",
1067            "--ignore-rules",
1068            "--model",
1069            "gpt-5.4",
1070        ] {
1071            assert!(
1072                argv.iter().any(|arg| arg == required),
1073                "missing flag {required} in {argv:?}"
1074            );
1075        }
1076    }
1077
1078    #[cfg(unix)]
1079    #[test]
1080    #[serial_test::serial(env)]
1081    fn embed_passage_sends_prompt_to_codex_stdin() {
1082        use std::os::unix::fs::PermissionsExt;
1083
1084        // Pin the dimensionality so the mock script and the validation
1085        // agree regardless of test execution order.
1086        // SAFETY: guarded by serial(env).
1087        unsafe {
1088            std::env::set_var("SQLITE_GRAPHRAG_EMBEDDING_DIM", "64");
1089        }
1090
1091        let temp = tempfile::tempdir().expect("tempdir must exist");
1092        let binary = temp.path().join("codex-stdin-check");
1093        let script = r#"#!/usr/bin/env bash
1094set -euo pipefail
1095
1096prompt="$(cat)"
1097if [[ "$prompt" != "passage: codex-cli" ]]; then
1098  echo "unexpected stdin: $prompt" >&2
1099  exit 41
1100fi
1101
1102vals="0.0"
1103for _ in $(seq 2 64); do
1104  vals="$vals,0.0"
1105done
1106payload="{\"embedding\":[$vals]}"
1107escaped="${payload//\"/\\\"}"
1108echo "{\"type\":\"item.completed\",\"item\":{\"type\":\"agent_message\",\"text\":\"$escaped\"}}"
1109"#;
1110        std::fs::write(&binary, script).expect("mock codex script must be written");
1111        let mut perms = std::fs::metadata(&binary)
1112            .expect("mock codex metadata must exist")
1113            .permissions();
1114        perms.set_mode(0o755);
1115        std::fs::set_permissions(&binary, perms).expect("mock codex must be executable");
1116
1117        let embedding = test_client(EmbeddingFlavour::Codex, binary);
1118
1119        let vector = embedding
1120            .embed_passage("codex-cli")
1121            .expect("stdin-backed codex embedding must succeed");
1122
1123        // SAFETY: guarded by serial(env).
1124        unsafe {
1125            std::env::remove_var("SQLITE_GRAPHRAG_EMBEDDING_DIM");
1126        }
1127
1128        assert_eq!(vector.len(), 64);
1129        assert!(vector.iter().all(|value| *value == 0.0));
1130    }
1131
1132    // ---------------------------------------------------------------
1133    // ADR-0042 / GAP-002: LlmEmbeddingBuilder unit tests
1134    // ---------------------------------------------------------------
1135
1136    /// `claude_default` is the `with_claude_builder` alias: returns a
1137    /// builder pre-set to the Claude flavour. Build requires the
1138    /// Claude binary to be on PATH; in CI without `claude`, the build
1139    /// fails with the canonical `claude not found` error, which is
1140    /// itself the proof that the flavour is propagated correctly.
1141    #[test]
1142    fn claude_default_resolves_path() {
1143        let builder = LlmEmbeddingBuilder::claude_default();
1144        assert_eq!(builder.flavour, EmbeddingFlavour::Claude);
1145        assert!(builder.binary_override.is_none());
1146        assert!(builder.model_override.is_none());
1147    }
1148
1149    /// `override_binary` short-circuits the PATH probe. The builder
1150    /// stores the override verbatim so the `build()` call can fall
1151    /// back to `resolve_real_binary` for ELF canonicalisation.
1152    #[test]
1153    fn override_binary_uses_provided() {
1154        let path = std::path::PathBuf::from("/tmp/fake-claude-binary");
1155        let builder = LlmEmbeddingBuilder::claude_default().override_binary(path.clone());
1156        assert_eq!(builder.binary_override.as_ref(), Some(&path));
1157    }
1158
1159    /// `override_model` short-circuits the env-var lookup. The model
1160    /// override travels untouched through `build()` so the LLM
1161    /// subprocess spawn honours it.
1162    #[test]
1163    fn override_model_uses_provided() {
1164        let builder =
1165            LlmEmbeddingBuilder::codex_default().override_model("gpt-5.4-custom".to_string());
1166        assert_eq!(builder.model_override.as_deref(), Some("gpt-5.4-custom"));
1167    }
1168}