Skip to main content

sqlite_graphrag/extract/
llm_embedding.rs

1//! LLM-based embedding backend (v1.0.76 default; reworked in v1.0.79 G42).
2//!
3//! `LlmEmbedding` is the production embedding client. It wraps headless
4//! invocations of `claude code` or `codex` and returns f32 vectors of the
5//! active dimensionality (`crate::constants::embedding_dim()`, default 64).
6//!
7//! v1.0.79 (G42) changes:
8//! - S1: the dimensionality is no longer hardcoded here — the single
9//!   source of truth lives in `crate::constants` and the JSON schemas
10//!   are generated dynamically.
11//! - S2: `embed_batch` embeds N numbered texts per LLM call with the
12//!   `{items:[{i,v}]}` schema, collapsing 39 subprocess spawns into 4-5.
13//! - S4: the codex `--output-schema` file is a `tempfile::NamedTempFile`
14//!   with a randomised name created once per client and shared across
15//!   clones via `Arc` — no per-call write+delete, no PID-path races.
16//! - S5: the claude model honours `SQLITE_GRAPHRAG_CLAUDE_EMBED_MODEL`
17//!   (symmetric to the codex env var). ZERO hardcoded models without
18//!   an env override.
19//! - S6: `CLAUDE_CONFIG_DIR` points at an empty managed directory BY
20//!   DEFAULT, because `--strict-mcp-config`/`--mcp-config '{}'` are
21//!   silently ignored upstream (anthropics/claude-code#10787) and a
22//!   full `~/.claude` costs ~223k cache-creation tokens per call.
23//! - S7: the codex `request_user_input` failure mode maps to an
24//!   actionable error instead of an opaque exit 11.
25//! - BLOCO 4: every subprocess uses `kill_on_drop(true)` plus an
26//!   explicit `tokio::time::timeout`, so cancellation never leaks a
27//!   child and a hung LLM cannot stall the pipeline forever.
28//!
29//! OAuth is the only supported credential path. The constructor rejects
30//! `ANTHROPIC_API_KEY` / `OPENAI_API_KEY` in the environment — see
31//! `v1.0.69 (G31) OAuth-Only Enforcement`.
32
33use crate::errors::AppError;
34use serde::Deserialize;
35use std::process::Stdio;
36use std::sync::Arc;
37use tokio::io::AsyncWriteExt;
38use tokio::process::Command;
39
40/// Default per-LLM-call timeout in seconds. Consistent with the
41/// `--claude-timeout` / `--codex-timeout` defaults used by ingest.
42/// Override via `SQLITE_GRAPHRAG_EMBED_TIMEOUT_SECS`.
43const DEFAULT_EMBED_TIMEOUT_SECS: u64 = 300;
44
45fn embed_timeout() -> std::time::Duration {
46    let secs = std::env::var("SQLITE_GRAPHRAG_EMBED_TIMEOUT_SECS")
47        .ok()
48        .and_then(|v| v.parse::<u64>().ok())
49        .filter(|&n| (10..=3_600).contains(&n))
50        .unwrap_or(DEFAULT_EMBED_TIMEOUT_SECS);
51    std::time::Duration::from_secs(secs)
52}
53
54/// G42/S1: single-vector JSON schema generated from the active dim.
55fn build_single_schema(dim: usize) -> String {
56    format!(
57        r#"{{"type":"object","properties":{{"embedding":{{"type":"array","items":{{"type":"number"}},"minItems":{dim},"maxItems":{dim}}}}},"required":["embedding"],"additionalProperties":false}}"#
58    )
59}
60
61/// G42/S2: batch JSON schema `{items:[{i,v}]}`. The `items` array length
62/// is deliberately unconstrained so ONE schema file serves every batch
63/// size (index coverage is validated in Rust after parsing).
64fn build_batch_schema(dim: usize) -> String {
65    format!(
66        r#"{{"type":"object","properties":{{"items":{{"type":"array","items":{{"type":"object","properties":{{"i":{{"type":"integer"}},"v":{{"type":"array","items":{{"type":"number"}},"minItems":{dim},"maxItems":{dim}}}}},"required":["i","v"],"additionalProperties":false}}}}}},"required":["items"],"additionalProperties":false}}"#
67    )
68}
69
70#[derive(Clone, Debug)]
71pub struct LlmEmbedding {
72    /// Which LLM headless binary to spawn. `claude` or `codex`.
73    flavour: EmbeddingFlavour,
74    /// Cached path to the binary to avoid PATH lookups on every call.
75    binary: std::path::PathBuf,
76    /// Model name. Resolved from env overrides at construction time.
77    model: String,
78    /// G42/S4: lazily-created codex `--output-schema` tempfiles, shared
79    /// across clones. Keyed by dim so an env change between tests cannot
80    /// serve a stale schema.
81    codex_schemas: Arc<parking_lot::Mutex<CodexSchemaFiles>>,
82}
83
84#[derive(Debug, Default)]
85struct CodexSchemaFiles {
86    single: Option<(usize, Arc<tempfile::NamedTempFile>)>,
87    batch: Option<(usize, Arc<tempfile::NamedTempFile>)>,
88}
89
90#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize)]
91pub enum EmbeddingFlavour {
92    Claude,
93    Codex,
94}
95
96/// ADR-0042 / GAP-002: builder for [`LlmEmbedding`] that lets callers
97/// override the binary path and model without having to remember the
98/// env-var names per flavour. Replaces the duplicated `with_codex` /
99/// `with_claude` bodies that diverged in v1.0.82 (GAP-002: the Claude
100/// arm of `embed_via_backend` re-did the PATH probe via
101/// `LlmEmbedding::detect_available` and could silently pick `codex`).
102#[derive(Clone, Debug)]
103pub struct LlmEmbeddingBuilder {
104    flavour: EmbeddingFlavour,
105    binary_override: Option<std::path::PathBuf>,
106    model_override: Option<String>,
107}
108
109impl LlmEmbeddingBuilder {
110    /// Convenience: produce a Claude-backed builder pre-configured with
111    /// the canonical default binary + model.
112    /// Convenience: produce a Claude-backed builder pre-configured with
113    /// the canonical default binary + model.
114    pub fn claude_default() -> Self {
115        Self {
116            flavour: EmbeddingFlavour::Claude,
117            binary_override: None,
118            model_override: None,
119        }
120    }
121
122    /// Convenience: produce a Codex-backed builder pre-configured with
123    /// the canonical default binary + model.
124    pub fn codex_default() -> Self {
125        Self {
126            flavour: EmbeddingFlavour::Codex,
127            binary_override: None,
128            model_override: None,
129        }
130    }
131    /// Override the binary path (skips the `which::which` PATH probe).
132    pub fn override_binary(mut self, binary: std::path::PathBuf) -> Self {
133        self.binary_override = Some(binary);
134        self
135    }
136
137    /// Override the model name (skips the env-var lookup).
138    pub fn override_model(mut self, model: String) -> Self {
139        self.model_override = Some(model);
140        self
141    }
142
143    /// Build the [`LlmEmbedding`]. Enforces OAuth-only and resolves the
144    /// binary/model via the override or the env-var defaults.
145    pub fn build(self) -> Result<LlmEmbedding, AppError> {
146        LlmEmbedding::oauth_only_enforce()?;
147        let binary = match self.binary_override {
148            Some(path) => resolve_real_binary(&path),
149            None => {
150                let which_name = match self.flavour {
151                    EmbeddingFlavour::Codex => "codex",
152                    EmbeddingFlavour::Claude => "claude",
153                };
154                let path = which::which(which_name).map_err(|_| {
155                    AppError::Embedding(format!("`{which_name}` not found on PATH"))
156                })?;
157                resolve_real_binary(&path)
158            }
159        };
160        let model = match self.model_override {
161            Some(m) => m,
162            None => match self.flavour {
163                EmbeddingFlavour::Codex => codex_embed_model(),
164                EmbeddingFlavour::Claude => claude_embed_model(),
165            },
166        };
167        Ok(LlmEmbedding {
168            flavour: self.flavour,
169            binary,
170            model,
171            codex_schemas: Arc::new(parking_lot::Mutex::new(CodexSchemaFiles::default())),
172        })
173    }
174}
175
176impl EmbeddingFlavour {
177    pub fn as_str(self) -> &'static str {
178        match self {
179            Self::Claude => "claude",
180            Self::Codex => "codex",
181        }
182    }
183}
184
185#[derive(Debug, Deserialize)]
186struct EmbeddingResponse {
187    embedding: Vec<f32>,
188}
189
190#[derive(Debug, Deserialize)]
191struct BatchEmbeddingResponse {
192    items: Vec<BatchEmbeddingItem>,
193}
194
195#[derive(Debug, Deserialize)]
196struct BatchEmbeddingItem {
197    i: usize,
198    v: Vec<f32>,
199}
200
201/// Follows symlinks and shell-script shim `exec` targets to find
202/// the real ELF binary. Shim wrappers (like `~/.graphrag-shim/codex`)
203/// can strip hardening flags; bypassing them is a security requirement.
204pub fn resolve_real_binary(path: &std::path::Path) -> std::path::PathBuf {
205    if let Ok(canonical) = std::fs::canonicalize(path) {
206        if is_elf_binary(&canonical) {
207            return canonical;
208        }
209        if let Some(exec_target) = extract_exec_target_from_shim(&canonical) {
210            if exec_target.exists() && is_elf_binary(&exec_target) {
211                return exec_target;
212            }
213        }
214        return canonical;
215    }
216    path.to_path_buf()
217}
218
219fn is_elf_binary(path: &std::path::Path) -> bool {
220    std::fs::read(path)
221        .map(|bytes| bytes.len() >= 4 && bytes[..4] == [0x7f, b'E', b'L', b'F'])
222        .unwrap_or(false)
223}
224
225fn extract_exec_target_from_shim(path: &std::path::Path) -> Option<std::path::PathBuf> {
226    let content = std::fs::read_to_string(path).ok()?;
227    if !content.starts_with("#!") {
228        return None;
229    }
230    for line in content.lines().rev() {
231        let trimmed = line.trim();
232        if trimmed.starts_with("exec ") {
233            let after_exec = trimmed.strip_prefix("exec ")?;
234            let binary = after_exec.split_whitespace().next()?;
235            return Some(std::path::PathBuf::from(binary));
236        }
237    }
238    None
239}
240
241/// G42/S5: claude embedding model with env override, symmetric to the
242/// codex `SQLITE_GRAPHRAG_CODEX_EMBED_MODEL` introduced in v1.0.78.
243fn claude_embed_model() -> String {
244    std::env::var("SQLITE_GRAPHRAG_CLAUDE_EMBED_MODEL")
245        .unwrap_or_else(|_| "claude-sonnet-4-6".to_string())
246}
247
248fn codex_embed_model() -> String {
249    std::env::var("SQLITE_GRAPHRAG_CODEX_EMBED_MODEL").unwrap_or_else(|_| "gpt-5.5".to_string())
250}
251
252impl LlmEmbedding {
253    /// Detects which LLM CLI is available on PATH and returns the
254    /// matching embedding client.
255    ///
256    /// v1.0.76: PREFERS `codex` over `claude` because:
257    /// - Claude Code 2.1+ ships a 180k+ token system context (plugins,
258    ///   skills, agents, MCP) that overflows the 200k context window
259    ///   for even trivial embedding prompts and returns "Prompt is too
260    ///   long". (v1.0.79/S6 mitigates this with an empty
261    ///   `CLAUDE_CONFIG_DIR`, but codex stays the lighter default.)
262    /// - Codex 0.134+ is lightweight (~5k system context) and the
263    ///   `StructuredOutput` tool reliably returns the requested vectors.
264    pub fn detect_available() -> Result<Self, AppError> {
265        Self::oauth_only_enforce()?;
266
267        if let Ok(path) = which::which("codex") {
268            return Ok(Self {
269                flavour: EmbeddingFlavour::Codex,
270                binary: resolve_real_binary(&path),
271                model: codex_embed_model(),
272                codex_schemas: Arc::new(parking_lot::Mutex::new(CodexSchemaFiles::default())),
273            });
274        }
275        if let Ok(path) = which::which("claude") {
276            return Ok(Self {
277                flavour: EmbeddingFlavour::Claude,
278                binary: resolve_real_binary(&path),
279                model: claude_embed_model(),
280                codex_schemas: Arc::new(parking_lot::Mutex::new(CodexSchemaFiles::default())),
281            });
282        }
283        Err(AppError::Embedding(
284            "no LLM CLI found on PATH: install `codex` (0.130+) or `claude` (Claude Code 2.1+)"
285                .to_string(),
286        ))
287    }
288
289    pub fn with_codex() -> Result<Self, AppError> {
290        Self::with_codex_builder().build()
291    }
292
293    pub fn with_claude() -> Result<Self, AppError> {
294        Self::with_claude_builder().build()
295    }
296
297    /// ADR-0042 / GAP-002: builder entry point for a codex-backed
298    /// embedder with default model resolution.
299    pub fn with_codex_builder() -> LlmEmbeddingBuilder {
300        LlmEmbeddingBuilder {
301            flavour: EmbeddingFlavour::Codex,
302            binary_override: None,
303            model_override: None,
304        }
305    }
306
307    /// ADR-0042 / GAP-002: builder entry point for a claude-backed
308    /// embedder with default model resolution.
309    pub fn with_claude_builder() -> LlmEmbeddingBuilder {
310        LlmEmbeddingBuilder {
311            flavour: EmbeddingFlavour::Claude,
312            binary_override: None,
313            model_override: None,
314        }
315    }
316    /// v1.0.69 (G31): refuse to spawn if an API key is set. The CLI
317    /// must use OAuth. The two API-key env vars are NOT in the
318    /// env-clear whitelist, so a parent process that exports them
319    /// will see this error.
320    fn oauth_only_enforce() -> Result<(), AppError> {
321        if std::env::var("ANTHROPIC_API_KEY").is_ok() {
322            return Err(AppError::Validation(
323                "ANTHROPIC_API_KEY is set; v1.0.76 requires OAuth. \
324                 unset it and use `claude login` instead."
325                    .into(),
326            ));
327        }
328        if std::env::var("OPENAI_API_KEY").is_ok() {
329            return Err(AppError::Validation(
330                "OPENAI_API_KEY is set; v1.0.76 requires OAuth. \
331                 unset it and use `codex login` instead."
332                    .into(),
333            ));
334        }
335        Ok(())
336    }
337
338    /// Embeds a single passage (chunk of a memory body). Returns an
339    /// f32 vector of the active dimensionality.
340    pub fn embed_passage(&self, text: &str) -> Result<Vec<f32>, AppError> {
341        self.invoke_with_prefix(crate::constants::PASSAGE_PREFIX, text)
342    }
343
344    /// Embeds a single query. The LLM uses a different prompt prefix
345    /// to disambiguate query from passage.
346    pub fn embed_query(&self, text: &str) -> Result<Vec<f32>, AppError> {
347        self.invoke_with_prefix(crate::constants::QUERY_PREFIX, text)
348    }
349
350    /// G56: returns a stable label for the active embedding model so the
351    /// in-process entity-embedding cache can key by `(model, text)`.
352    /// Embeddings produced by different models are not interchangeable,
353    /// so a cache entry from one model must never satisfy a request
354    /// served by another.
355    pub fn model_label(&self) -> String {
356        format!("{}:{}", self.flavour.as_str(), self.model)
357    }
358
359    /// ADR-0042 / BUG-003 fix: returns the resolved []
360    /// of this embedder. Used by  and
361    ///  to report the backend that
362    /// ACTUALLY executed the embedding (not the one requested in the
363    /// chain). When  substitutes claude
364    /// for a missing codex, the operator sees the truth in
365    /// .
366    pub fn flavour(&self) -> EmbeddingFlavour {
367        self.flavour
368    }
369
370    /// G42/S2: embeds a batch of `(global_index, text)` pairs in ONE
371    /// LLM call. Returns `(global_index, vector)` pairs. Async — this
372    /// is the unit of work scheduled by the bounded fan-out in
373    /// `crate::embedder`.
374    ///
375    /// Cancel safety: the future owns its subprocess via
376    /// `kill_on_drop(true)`, so dropping it (e.g. losing a
377    /// `tokio::select!` race against a cancellation token) kills the
378    /// child and leaks nothing.
379    pub async fn embed_batch_async(
380        &self,
381        prefix: &str,
382        batch: &[(usize, String)],
383    ) -> Result<Vec<(usize, Vec<f32>)>, AppError> {
384        let dim = crate::constants::embedding_dim();
385        if batch.is_empty() {
386            return Ok(Vec::new());
387        }
388        if batch.len() == 1 {
389            let (idx, text) = (&batch[0].0, &batch[0].1);
390            let v = self.invoke_single_async(prefix, text, dim).await?;
391            return Ok(vec![(*idx, v)]);
392        }
393
394        let mut prompt = format!(
395            "Generate {dim}-dimensional semantic embedding vectors for each numbered text below.\n\
396             Return a JSON object with an \"items\" array containing EXACTLY {n} items.\n\
397             Each item has \"i\" (the 1-based index) and \"v\" (the {dim}-float vector, values between -1 and 1).\n\n",
398            n = batch.len()
399        );
400        for (pos, (_, text)) in batch.iter().enumerate() {
401            prompt.push_str(&format!("{}: {prefix}{text}\n", pos + 1));
402        }
403
404        let stdout = match self.flavour {
405            EmbeddingFlavour::Claude => {
406                self.invoke_claude(&prompt, &build_batch_schema(dim))
407                    .await?
408            }
409            EmbeddingFlavour::Codex => {
410                let schema = self.codex_schema_file(dim, true)?;
411                self.invoke_codex(&prompt, schema.path()).await?
412            }
413        };
414
415        let parsed: BatchEmbeddingResponse = parse_llm_json(&stdout).map_err(|e| {
416            AppError::Embedding(format!(
417                "LLM batch embedding response parse failed: {e}; raw={stdout}"
418            ))
419        })?;
420        if parsed.items.len() != batch.len() {
421            return Err(AppError::Embedding(format!(
422                "LLM batch returned {} items, expected {} (G42/S2 coverage check)",
423                parsed.items.len(),
424                batch.len()
425            )));
426        }
427        let mut out: Vec<Option<Vec<f32>>> = vec![None; batch.len()];
428        for item in parsed.items {
429            if item.i == 0 || item.i > batch.len() {
430                return Err(AppError::Embedding(format!(
431                    "LLM batch item index {} out of range 1..={}",
432                    item.i,
433                    batch.len()
434                )));
435            }
436            if item.v.len() != dim {
437                return Err(AppError::Embedding(format!(
438                    "LLM batch item {} returned {} dims, expected {dim}; \
439                     refusing to truncate or pad silently (G42/C5)",
440                    item.i,
441                    item.v.len()
442                )));
443            }
444            out[item.i - 1] = Some(item.v);
445        }
446        let mut result = Vec::with_capacity(batch.len());
447        for (pos, slot) in out.into_iter().enumerate() {
448            let v = slot.ok_or_else(|| {
449                AppError::Embedding(format!(
450                    "LLM batch response is missing item index {} (G42/S2 coverage check)",
451                    pos + 1
452                ))
453            })?;
454            result.push((batch[pos].0, v));
455        }
456        Ok(result)
457    }
458
459    fn invoke_with_prefix(&self, prefix: &str, text: &str) -> Result<Vec<f32>, AppError> {
460        let dim = crate::constants::embedding_dim();
461        let inner = self.invoke_single_async(prefix, text, dim);
462        // v1.0.79 (G42/A2): reuse the process-wide multi-thread runtime
463        // instead of building a current-thread runtime PER CALL. Inside
464        // an existing runtime (tests, async commands) block_in_place
465        // keeps the worker pool healthy.
466        match tokio::runtime::Handle::try_current() {
467            Ok(handle) => tokio::task::block_in_place(|| handle.block_on(inner)),
468            Err(_) => crate::embedder::shared_runtime()?.block_on(inner),
469        }
470    }
471
472    async fn invoke_single_async(
473        &self,
474        prefix: &str,
475        text: &str,
476        dim: usize,
477    ) -> Result<Vec<f32>, AppError> {
478        let prompt = format!("{prefix}{text}");
479        let stdout = match self.flavour {
480            EmbeddingFlavour::Claude => {
481                self.invoke_claude(&prompt, &build_single_schema(dim))
482                    .await?
483            }
484            EmbeddingFlavour::Codex => {
485                let schema = self.codex_schema_file(dim, false)?;
486                self.invoke_codex(&prompt, schema.path()).await?
487            }
488        };
489        let parsed: EmbeddingResponse = parse_llm_json(&stdout).map_err(|e| {
490            AppError::Embedding(format!(
491                "LLM embedding response parse failed: {e}; raw={stdout}"
492            ))
493        })?;
494        if parsed.embedding.len() != dim {
495            return Err(AppError::Embedding(format!(
496                "LLM returned {} dims, expected {dim}; \
497                 refusing to truncate or pad silently (G42/C5)",
498                parsed.embedding.len()
499            )));
500        }
501        Ok(parsed.embedding)
502    }
503
504    /// G42/S4: returns the lazily-created, process-shared codex schema
505    /// tempfile for the requested mode. `NamedTempFile` randomises the
506    /// filename (no PID-based collisions) and removes the file on drop
507    /// of the last `Arc` clone.
508    fn codex_schema_file(
509        &self,
510        dim: usize,
511        batch: bool,
512    ) -> Result<Arc<tempfile::NamedTempFile>, AppError> {
513        let mut guard = self.codex_schemas.lock();
514        let slot = if batch {
515            &mut guard.batch
516        } else {
517            &mut guard.single
518        };
519        if let Some((cached_dim, file)) = slot {
520            if *cached_dim == dim {
521                return Ok(Arc::clone(file));
522            }
523        }
524        let content = if batch {
525            build_batch_schema(dim)
526        } else {
527            build_single_schema(dim)
528        };
529        let file = tempfile::Builder::new()
530            .prefix("sqlite-graphrag-embed-schema-")
531            .suffix(".json")
532            .tempfile()
533            .map_err(|e| AppError::Embedding(format!("schema tempfile create failed: {e}")))?;
534        std::fs::write(file.path(), content)
535            .map_err(|e| AppError::Embedding(format!("schema tempfile write failed: {e}")))?;
536        let file = Arc::new(file);
537        *slot = Some((dim, Arc::clone(&file)));
538        Ok(file)
539    }
540
541    async fn invoke_claude(&self, prompt: &str, schema: &str) -> Result<String, AppError> {
542        // v1.0.69 hardening: --strict-mcp-config --mcp-config <PATH> --settings
543        // '{"hooks":{}}' --dangerously-skip-permissions.
544        //
545        // v1.0.76 hardening: Claude Code 2.1+ renamed --output-schema to
546        // --json-schema and accepts the schema as an inline JSON string
547        // (NOT a file path). Also pass --output-format json so the
548        // response is a single JSON object on stdout.
549        //
550        // v1.0.79 (G42/S6): CLAUDE_CONFIG_DIR points at an empty managed
551        // directory BY DEFAULT — the MCP-isolation flags above are
552        // silently ignored upstream (anthropics/claude-code#10787) and a
553        // populated ~/.claude costs ~223k cache-creation tokens per call.
554        //
555        // v1.0.88 (BUG-2 fix, ADR-0046): the inline `--mcp-config '{}'`
556        // form was rejected by Claude Code 2.1.177 (ADR-0045 Bug 2).
557        // Substitute a tempfile path produced by
558        // `write_empty_mcp_config_tempfile()` and run the full
559        // preflight gate BEFORE `Command::spawn()`, mirroring what
560        // `invoke_codex` already does for the codex backend.
561        let mcp_config_path = crate::spawn::preflight::write_empty_mcp_config_tempfile()?;
562        let argv_refs: [std::ffi::OsString; 0] = [];
563        let preflight_args = crate::spawn::preflight::PreFlightArgs {
564            binary_path: &self.binary,
565            argv: &argv_refs,
566            workspace_root: std::path::Path::new("."),
567            mcp_config_inline_json: None,
568            expected_output_bytes: 65_536,
569            spawner_name: "llm_embedding",
570        };
571        crate::spawn::preflight::preflight_check(&preflight_args)?;
572        let mut cmd = Command::new(&self.binary);
573        cmd.arg("-p")
574            .arg(prompt)
575            .arg("--model")
576            .arg(&self.model)
577            .arg("--json-schema")
578            .arg(schema)
579            .arg("--output-format")
580            .arg("json")
581            .arg("--strict-mcp-config")
582            .arg("--mcp-config")
583            .arg(mcp_config_path.as_os_str())
584            .arg("--settings")
585            .arg(r#"{"hooks":{}}"#)
586            .arg("--dangerously-skip-permissions")
587            .env_clear()
588            .env("PATH", std::env::var("PATH").unwrap_or_default())
589            .env("HOME", std::env::var("HOME").unwrap_or_default())
590            .stdin(Stdio::null())
591            .stdout(Stdio::piped())
592            .stderr(Stdio::piped())
593            // BLOCO 4: cancellation (dropped future) must kill the child.
594            .kill_on_drop(true);
595        if let Some(config_dir) = claude_embedding_config_dir() {
596            cmd.env("CLAUDE_CONFIG_DIR", &config_dir);
597        }
598        let binary_str = self.binary.to_string_lossy().into_owned();
599        let output = match tokio::time::timeout(embed_timeout(), cmd.output()).await {
600            Err(_elapsed) => {
601                return Err(crate::llm::exit_code_hints::into_legacy_embedding(
602                    &crate::llm::exit_code_hints::LlmBackendError::Timeout {
603                        secs: embed_timeout().as_secs(),
604                        binary: binary_str.clone(),
605                    },
606                ));
607            }
608            Ok(Err(e)) => {
609                return Err(crate::llm::exit_code_hints::into_legacy_embedding(
610                    &crate::llm::exit_code_hints::LlmBackendError::SpawnFailed {
611                        binary: binary_str.clone(),
612                        source: e.to_string(),
613                    },
614                ));
615            }
616            Ok(Ok(o)) => o,
617        };
618        // G45-CR5 / ADR-0043 (v1.0.85): parse the JSON envelope from
619        // `claude -p --output-format json` and detect OAuth quota
620        // exhaustion by looking for the `rate_limit_error` or
621        // `usage` overflow markers before checking the subprocess
622        // exit status. This lets the deterministic fallback in
623        // hybrid-search and recall swap to codex immediately.
624        let stdout_str = String::from_utf8_lossy(&output.stdout);
625        if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(&stdout_str) {
626            let is_rate_limited = parsed
627                .get("is_error")
628                .and_then(|v| v.as_bool())
629                .unwrap_or(false)
630                && parsed
631                    .get("result")
632                    .and_then(|v| v.as_str())
633                    .map(|s| {
634                        s.contains("rate limit")
635                            || s.contains("quota")
636                            || s.contains("anthropic-ratelimit")
637                    })
638                    .unwrap_or(false);
639            if is_rate_limited {
640                return Err(AppError::Embedding(format!(
641                    "OAuth usage quota exhausted: claude rate_limit detected in stdout: {}",
642                    parsed
643                        .get("result")
644                        .and_then(|v| v.as_str())
645                        .unwrap_or("")
646                        .chars()
647                        .take(120)
648                        .collect::<String>()
649                )));
650            }
651        }
652        if !output.status.success() {
653            let (exit_code, signal) = if let Some(code) = output.status.code() {
654                (Some(code), None)
655            } else {
656                use std::os::unix::process::ExitStatusExt;
657                (None, output.status.signal())
658            };
659            let stdout_tail = crate::llm::exit_code_hints::LlmBackendError::truncate_tail(
660                &output.stdout,
661                crate::llm::exit_code_hints::DIAG_TAIL_BYTES,
662            );
663            let stderr_tail = crate::llm::exit_code_hints::LlmBackendError::truncate_tail(
664                &output.stderr,
665                crate::llm::exit_code_hints::DIAG_TAIL_BYTES,
666            );
667            let hint = crate::llm::exit_code_hints::diagnose_exit_code(exit_code, signal);
668            return Err(crate::llm::exit_code_hints::into_legacy_embedding(
669                &crate::llm::exit_code_hints::LlmBackendError::NonZeroExit {
670                    exit_code,
671                    signal,
672                    stdout_tail,
673                    stderr_tail,
674                    binary: binary_str,
675                    hint,
676                },
677            ));
678        }
679        Ok(String::from_utf8_lossy(&output.stdout).into_owned())
680    }
681
682    async fn invoke_codex(
683        &self,
684        prompt: &str,
685        schema_path: &std::path::Path,
686    ) -> Result<String, AppError> {
687        let binary_str = self.binary.to_string_lossy().into_owned();
688        let mut cmd = build_codex_embedding_command(&self.binary, &self.model, schema_path);
689
690        // GAP-META-005 (v1.0.87, ADR-0045): pre-flight gate before spawn.
691        // `tokio::process::Command` does not expose `get_args()`, so we
692        // skip the argv-size check here and rely on binary + workspace
693        // root + output buffer guards. Embedding prompts are bounded by
694        // the schema validator so argv overflow is not a real risk here.
695        //
696        // v1.0.88 (BUG-7 fix, ADR-0046): propagate the preflight error
697        // directly via `AppError::PreFlightFailed` (via the `From`
698        // impl added in `errors.rs`) so callers and operators see the
699        // structured `PreFlightError` variant and the canonical exit
700        // code 16. The previous implementation wrapped the error in
701        // `LlmBackendError::SpawnFailed`, which mapped to a different
702        // exit code and masked the preflight signal.
703        let argv_refs: [std::ffi::OsString; 0] = [];
704        let preflight_args = crate::spawn::preflight::PreFlightArgs {
705            binary_path: &self.binary,
706            argv: &argv_refs,
707            workspace_root: std::path::Path::new("."),
708            mcp_config_inline_json: None,
709            expected_output_bytes: 65_536,
710            spawner_name: "llm_embedding",
711        };
712        crate::spawn::preflight::preflight_check(&preflight_args)?;
713        let _ = binary_str; // silenced: preflight does not need it
714
715        let mut child = match cmd.spawn() {
716            Ok(c) => c,
717            Err(e) => {
718                return Err(crate::llm::exit_code_hints::into_legacy_embedding(
719                    &crate::llm::exit_code_hints::LlmBackendError::SpawnFailed {
720                        binary: binary_str,
721                        source: e.to_string(),
722                    },
723                ));
724            }
725        };
726        if let Some(mut stdin) = child.stdin.take() {
727            stdin
728                .write_all(prompt.as_bytes())
729                .await
730                .map_err(|e| AppError::Embedding(format!("codex stdin write failed: {e}")))?;
731        }
732        let output = match tokio::time::timeout(embed_timeout(), child.wait_with_output()).await {
733            Err(_elapsed) => {
734                return Err(crate::llm::exit_code_hints::into_legacy_embedding(
735                    &crate::llm::exit_code_hints::LlmBackendError::Timeout {
736                        secs: embed_timeout().as_secs(),
737                        binary: binary_str,
738                    },
739                ));
740            }
741            Ok(Err(e)) => {
742                return Err(crate::llm::exit_code_hints::into_legacy_embedding(
743                    &crate::llm::exit_code_hints::LlmBackendError::SpawnFailed {
744                        binary: binary_str,
745                        source: format!("codex wait failed: {e}"),
746                    },
747                ));
748            }
749            Ok(Ok(o)) => o,
750        };
751        if !output.status.success() {
752            let (exit_code, signal) = if let Some(code) = output.status.code() {
753                (Some(code), None)
754            } else {
755                use std::os::unix::process::ExitStatusExt;
756                (None, output.status.signal())
757            };
758            let stdout_tail = crate::llm::exit_code_hints::LlmBackendError::truncate_tail(
759                &output.stdout,
760                crate::llm::exit_code_hints::DIAG_TAIL_BYTES,
761            );
762            let stderr_tail = crate::llm::exit_code_hints::LlmBackendError::truncate_tail(
763                &output.stderr,
764                crate::llm::exit_code_hints::DIAG_TAIL_BYTES,
765            );
766            let hint = crate::llm::exit_code_hints::diagnose_exit_code(exit_code, signal);
767            // G42/S7: the headless spawn can still hit interactive
768            // prompts on some codex builds; keep the legacy request_user_input
769            // branch as a special-case hint, and stamp the diagnostic
770            // tail on top of the canonical NonZeroExit envelope.
771            let mut combined_hint = hint;
772            if stderr_tail.contains("request_user_input") {
773                combined_hint.push_str(
774                    " | codex requested interactive input in a headless embedding call; \
775                     upgrade codex (>= 0.134) or switch the embedding backend to claude",
776                );
777            }
778            return Err(crate::llm::exit_code_hints::into_legacy_embedding(
779                &crate::llm::exit_code_hints::LlmBackendError::NonZeroExit {
780                    exit_code,
781                    signal,
782                    stdout_tail,
783                    stderr_tail,
784                    binary: binary_str,
785                    hint: combined_hint,
786                },
787            ));
788        }
789        Ok(String::from_utf8_lossy(&output.stdout).into_owned())
790    }
791}
792
793/// G42/S6: resolves the empty `CLAUDE_CONFIG_DIR` used for embedding
794/// subprocesses.
795///
796/// - `SQLITE_GRAPHRAG_CLAUDE_EMPTY_CONFIG_DIR` is honoured when set and
797///   pointing at a directory (same contract as G28-A in claude_runner);
798/// - otherwise a managed directory is created at
799///   `~/.local/state/sqlite-graphrag/claude-empty-config` (mode 0700).
800///   If `~/.claude/.credentials.json` exists (Linux OAuth storage) it is
801///   copied in so authentication still works; on macOS credentials live
802///   in the Keychain and the empty dir is sufficient.
803///
804/// Returns `None` only when HOME is unset AND no override is given —
805/// in that case the subprocess falls back to claude's own default.
806fn claude_embedding_config_dir() -> Option<std::path::PathBuf> {
807    if let Ok(dir) = std::env::var("SQLITE_GRAPHRAG_CLAUDE_EMPTY_CONFIG_DIR") {
808        let path = std::path::PathBuf::from(dir);
809        if path.is_dir() {
810            return Some(path);
811        }
812        tracing::warn!(
813            target: "embedding",
814            path = %path.display(),
815            "SQLITE_GRAPHRAG_CLAUDE_EMPTY_CONFIG_DIR is set but not a directory; \
816             falling back to the managed empty config dir"
817        );
818    }
819    let home = std::env::var("HOME").ok()?;
820    let dir = std::path::Path::new(&home)
821        .join(".local/state/sqlite-graphrag")
822        .join("claude-empty-config");
823    if std::fs::create_dir_all(&dir).is_err() {
824        return None;
825    }
826    #[cfg(unix)]
827    {
828        use std::os::unix::fs::PermissionsExt;
829        let _ = std::fs::set_permissions(&dir, std::fs::Permissions::from_mode(0o700));
830    }
831    // Linux stores OAuth credentials on disk; copy them so the isolated
832    // config dir still authenticates. Best-effort: macOS uses Keychain.
833    let creds = std::path::Path::new(&home).join(".claude/.credentials.json");
834    if creds.exists() {
835        let target = dir.join(".credentials.json");
836        if !target.exists() {
837            let _ = std::fs::copy(&creds, &target);
838        }
839    }
840    Some(dir)
841}
842
843fn build_codex_embedding_command(
844    binary: &std::path::Path,
845    model: &str,
846    schema_path: &std::path::Path,
847) -> Command {
848    let mut cmd = Command::new(binary);
849    // v1.0.77: `-c` TOML overrides bypass the codex exec --sandbox propagation
850    // bug (openai/codex#18113). CLI flags alone are insufficient — the exec
851    // subcommand may not inherit --sandbox from the parent codex command.
852    cmd.arg("exec")
853        .arg("-c")
854        .arg("sandbox_mode='read-only'")
855        .arg("-c")
856        .arg("approval_policy='never'")
857        .arg("--json")
858        .arg("--output-schema")
859        .arg(schema_path)
860        .arg("--ephemeral")
861        .arg("--skip-git-repo-check")
862        .arg("--sandbox")
863        .arg("read-only")
864        .arg("--ignore-user-config")
865        .arg("--ignore-rules");
866    if crate::extract::codex_compat::codex_supports_ask_for_approval() {
867        cmd.arg("--ask-for-approval").arg("never");
868    }
869    // v1.0.77: isolate codex from user config by pointing CODEX_HOME at a
870    // minimal directory containing only auth.json (OAuth credentials).
871    let codex_home = prepare_isolated_codex_home();
872    cmd.arg("--model")
873        .arg(model)
874        .arg("-")
875        .env_clear()
876        .env("PATH", std::env::var("PATH").unwrap_or_default())
877        .env("HOME", std::env::var("HOME").unwrap_or_default());
878    if let Some(ref ch) = codex_home {
879        cmd.env("CODEX_HOME", ch);
880    }
881    cmd.stdin(Stdio::piped())
882        .stdout(Stdio::piped())
883        .stderr(Stdio::piped())
884        // BLOCO 4: cancellation (dropped future) must kill the child.
885        .kill_on_drop(true);
886    cmd
887}
888
889fn prepare_isolated_codex_home() -> Option<std::path::PathBuf> {
890    let home = std::env::var("HOME").ok()?;
891    let real_auth = std::path::Path::new(&home).join(".codex/auth.json");
892    if !real_auth.exists() {
893        return None;
894    }
895    let base = std::path::Path::new(&home).join(".local/share/sqlite-graphrag");
896    let isolated = base.join(format!("codex-home-{}", std::process::id()));
897    let _ = std::fs::create_dir_all(&isolated);
898    let target = isolated.join("auth.json");
899    if !target.exists() {
900        let _ = std::fs::copy(&real_auth, &target);
901    }
902    Some(isolated)
903}
904
905/// Parse an LLM JSON response of type `T`. The two backends emit
906/// different shapes:
907/// - Claude (with `--output-format json`): single JSON object on stdout.
908/// - Codex (with `--json`): JSONL stream with one event per line; the
909///   `agent_message` event's `text` field is the JSON payload.
910///
911/// This helper accepts both shapes and returns the parsed value (or an
912/// error describing the first mismatch).
913fn parse_llm_json<T: serde::de::DeserializeOwned>(stdout: &str) -> Result<T, String> {
914    // Strategy 1: try the whole stdout as JSON (Claude path).
915    if let Ok(parsed) = serde_json::from_str::<T>(stdout) {
916        return Ok(parsed);
917    }
918    // Strategy 2: walk the JSONL line by line and pick the last
919    // `item.completed` of type `agent_message` (Codex path).
920    let mut last_agent_text: Option<String> = None;
921    for line in stdout.lines() {
922        let line = line.trim();
923        if line.is_empty() {
924            continue;
925        }
926        let Ok(event) = serde_json::from_str::<serde_json::Value>(line) else {
927            continue;
928        };
929        if event.get("type").and_then(|t| t.as_str()) != Some("item.completed") {
930            continue;
931        }
932        let item = match event.get("item") {
933            Some(i) => i,
934            None => continue,
935        };
936        if item.get("type").and_then(|t| t.as_str()) != Some("agent_message") {
937            continue;
938        }
939        if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
940            last_agent_text = Some(text.to_string());
941        }
942    }
943    let text = last_agent_text
944        .ok_or_else(|| "no agent_message found in codex JSONL output".to_string())?;
945    serde_json::from_str::<T>(&text)
946        .map_err(|e| format!("codex agent_message text does not match schema: {e}; raw={text}"))
947}
948
949#[cfg(test)]
950mod tests {
951    use super::*;
952
953    fn test_client(flavour: EmbeddingFlavour, binary: std::path::PathBuf) -> LlmEmbedding {
954        LlmEmbedding {
955            flavour,
956            binary,
957            model: "gpt-5.4".to_string(),
958            codex_schemas: Arc::new(parking_lot::Mutex::new(CodexSchemaFiles::default())),
959        }
960    }
961
962    #[test]
963    #[serial_test::serial(env)]
964    fn oauth_only_enforce_blocks_api_keys() {
965        // SAFETY: this test only sets and unsets env vars; the
966        // `serial(env)` group prevents cross-test interference.
967        unsafe {
968            std::env::set_var("ANTHROPIC_API_KEY", "test");
969            assert!(LlmEmbedding::oauth_only_enforce().is_err());
970            std::env::remove_var("ANTHROPIC_API_KEY");
971
972            std::env::set_var("OPENAI_API_KEY", "test");
973            assert!(LlmEmbedding::oauth_only_enforce().is_err());
974            std::env::remove_var("OPENAI_API_KEY");
975        }
976        assert!(LlmEmbedding::oauth_only_enforce().is_ok());
977    }
978
979    #[test]
980    fn flavour_as_str_is_stable() {
981        assert_eq!(EmbeddingFlavour::Claude.as_str(), "claude");
982        assert_eq!(EmbeddingFlavour::Codex.as_str(), "codex");
983    }
984
985    #[test]
986    fn single_schema_embeds_active_dim() {
987        let schema = build_single_schema(64);
988        assert!(schema.contains(r#""minItems":64"#));
989        assert!(schema.contains(r#""maxItems":64"#));
990        let parsed: serde_json::Value =
991            serde_json::from_str(&schema).expect("single schema must be valid JSON");
992        assert_eq!(parsed["properties"]["embedding"]["minItems"], 64);
993    }
994
995    #[test]
996    fn batch_schema_is_valid_json_and_unbounded_items() {
997        let schema = build_batch_schema(64);
998        let parsed: serde_json::Value =
999            serde_json::from_str(&schema).expect("batch schema must be valid JSON");
1000        // The items array must NOT constrain its length so one schema
1001        // file serves every batch size (G42/S4).
1002        assert!(parsed["properties"]["items"].get("minItems").is_none());
1003        assert_eq!(
1004            parsed["properties"]["items"]["items"]["properties"]["v"]["minItems"],
1005            64
1006        );
1007    }
1008
1009    #[test]
1010    fn parse_llm_json_accepts_claude_json() {
1011        let stdout = r#"{"embedding":[0.0,1.0,2.0]}"#;
1012
1013        let parsed: EmbeddingResponse = parse_llm_json(stdout).expect("claude JSON must parse");
1014
1015        assert_eq!(parsed.embedding, vec![0.0, 1.0, 2.0]);
1016    }
1017
1018    #[test]
1019    fn parse_llm_json_accepts_codex_jsonl() {
1020        let stdout = r#"{"type":"thread.started","thread_id":"mock-thread-0"}
1021{"type":"item.completed","item":{"type":"agent_message","text":"{\"embedding\":[0.0,1.0,2.0]}"}}
1022{"type":"turn.completed","usage":{"input_tokens":1,"output_tokens":1}}"#;
1023
1024        let parsed: EmbeddingResponse = parse_llm_json(stdout).expect("codex JSONL must parse");
1025
1026        assert_eq!(parsed.embedding, vec![0.0, 1.0, 2.0]);
1027    }
1028
1029    #[test]
1030    fn parse_llm_json_rejects_jsonl_without_agent_message() {
1031        let stdout = r#"{"type":"thread.started","thread_id":"mock-thread-0"}"#;
1032
1033        let err = parse_llm_json::<EmbeddingResponse>(stdout)
1034            .expect_err("missing agent_message must fail");
1035
1036        assert!(err.contains("no agent_message"));
1037    }
1038
1039    #[test]
1040    fn parse_llm_json_accepts_batch_response() {
1041        let stdout = r#"{"items":[{"i":1,"v":[0.0,1.0]},{"i":2,"v":[2.0,3.0]}]}"#;
1042
1043        let parsed: BatchEmbeddingResponse = parse_llm_json(stdout).expect("batch JSON must parse");
1044
1045        assert_eq!(parsed.items.len(), 2);
1046        assert_eq!(parsed.items[0].i, 1);
1047        assert_eq!(parsed.items[1].v, vec![2.0, 3.0]);
1048    }
1049
1050    #[test]
1051    fn codex_schema_file_is_created_once_and_reused() {
1052        let client = test_client(
1053            EmbeddingFlavour::Codex,
1054            std::path::PathBuf::from("/bin/true"),
1055        );
1056        let first = client
1057            .codex_schema_file(64, false)
1058            .expect("schema file must be created");
1059        let second = client
1060            .codex_schema_file(64, false)
1061            .expect("schema file must be reused");
1062        assert_eq!(first.path(), second.path(), "same dim must reuse the file");
1063
1064        let batch = client
1065            .codex_schema_file(64, true)
1066            .expect("batch schema file must be created");
1067        assert_ne!(
1068            first.path(),
1069            batch.path(),
1070            "single and batch schemas are distinct files"
1071        );
1072
1073        let content = std::fs::read_to_string(first.path()).expect("schema file must be readable");
1074        assert!(content.contains(r#""minItems":64"#));
1075    }
1076
1077    #[test]
1078    fn codex_embedding_command_reads_prompt_from_stdin() {
1079        let schema_path = std::env::temp_dir().join("sqlite-graphrag-embed-schema-test.json");
1080        let cmd = build_codex_embedding_command(
1081            std::path::Path::new("/bin/true"),
1082            "gpt-5.4",
1083            &schema_path,
1084        );
1085        let argv: Vec<String> = cmd
1086            .as_std()
1087            .get_args()
1088            .filter_map(|arg| arg.to_str().map(|s| s.to_string()))
1089            .collect();
1090
1091        assert!(
1092            argv.iter().any(|arg| arg == "-"),
1093            "codex embedding command must read prompt from stdin: {argv:?}"
1094        );
1095        assert!(
1096            !argv.iter().any(|arg| arg.starts_with("passage: ")),
1097            "prompt text must not be passed as argv: {argv:?}"
1098        );
1099        for required in &[
1100            "exec",
1101            "-c",
1102            "sandbox_mode='read-only'",
1103            "approval_policy='never'",
1104            "--json",
1105            "--output-schema",
1106            "--ephemeral",
1107            "--skip-git-repo-check",
1108            "--sandbox",
1109            "read-only",
1110            "--ignore-user-config",
1111            "--ignore-rules",
1112            "--model",
1113            "gpt-5.4",
1114        ] {
1115            assert!(
1116                argv.iter().any(|arg| arg == required),
1117                "missing flag {required} in {argv:?}"
1118            );
1119        }
1120    }
1121
1122    #[cfg(unix)]
1123    #[test]
1124    #[serial_test::serial(env)]
1125    fn embed_passage_sends_prompt_to_codex_stdin() {
1126        use std::os::unix::fs::PermissionsExt;
1127
1128        // Pin the dimensionality so the mock script and the validation
1129        // agree regardless of test execution order.
1130        // SAFETY: guarded by serial(env).
1131        unsafe {
1132            std::env::set_var("SQLITE_GRAPHRAG_EMBEDDING_DIM", "64");
1133        }
1134
1135        let temp = tempfile::tempdir().expect("tempdir must exist");
1136        let binary = temp.path().join("codex-stdin-check");
1137        let script = r#"#!/usr/bin/env bash
1138set -euo pipefail
1139
1140prompt="$(cat)"
1141if [[ "$prompt" != "passage: codex-cli" ]]; then
1142  echo "unexpected stdin: $prompt" >&2
1143  exit 41
1144fi
1145
1146vals="0.0"
1147for _ in $(seq 2 64); do
1148  vals="$vals,0.0"
1149done
1150payload="{\"embedding\":[$vals]}"
1151escaped="${payload//\"/\\\"}"
1152echo "{\"type\":\"item.completed\",\"item\":{\"type\":\"agent_message\",\"text\":\"$escaped\"}}"
1153"#;
1154        std::fs::write(&binary, script).expect("mock codex script must be written");
1155        let mut perms = std::fs::metadata(&binary)
1156            .expect("mock codex metadata must exist")
1157            .permissions();
1158        perms.set_mode(0o755);
1159        std::fs::set_permissions(&binary, perms).expect("mock codex must be executable");
1160
1161        let embedding = test_client(EmbeddingFlavour::Codex, binary);
1162
1163        let vector = embedding
1164            .embed_passage("codex-cli")
1165            .expect("stdin-backed codex embedding must succeed");
1166
1167        // SAFETY: guarded by serial(env).
1168        unsafe {
1169            std::env::remove_var("SQLITE_GRAPHRAG_EMBEDDING_DIM");
1170        }
1171
1172        assert_eq!(vector.len(), 64);
1173        assert!(vector.iter().all(|value| *value == 0.0));
1174    }
1175
1176    // ---------------------------------------------------------------
1177    // ADR-0042 / GAP-002: LlmEmbeddingBuilder unit tests
1178    // ---------------------------------------------------------------
1179
1180    /// `claude_default` is the `with_claude_builder` alias: returns a
1181    /// builder pre-set to the Claude flavour. Build requires the
1182    /// Claude binary to be on PATH; in CI without `claude`, the build
1183    /// fails with the canonical `claude not found` error, which is
1184    /// itself the proof that the flavour is propagated correctly.
1185    #[test]
1186    fn claude_default_resolves_path() {
1187        let builder = LlmEmbeddingBuilder::claude_default();
1188        assert_eq!(builder.flavour, EmbeddingFlavour::Claude);
1189        assert!(builder.binary_override.is_none());
1190        assert!(builder.model_override.is_none());
1191    }
1192
1193    /// `override_binary` short-circuits the PATH probe. The builder
1194    /// stores the override verbatim so the `build()` call can fall
1195    /// back to `resolve_real_binary` for ELF canonicalisation.
1196    #[test]
1197    fn override_binary_uses_provided() {
1198        let path = std::path::PathBuf::from("/tmp/fake-claude-binary");
1199        let builder = LlmEmbeddingBuilder::claude_default().override_binary(path.clone());
1200        assert_eq!(builder.binary_override.as_ref(), Some(&path));
1201    }
1202
1203    /// `override_model` short-circuits the env-var lookup. The model
1204    /// override travels untouched through `build()` so the LLM
1205    /// subprocess spawn honours it.
1206    #[test]
1207    fn override_model_uses_provided() {
1208        let builder =
1209            LlmEmbeddingBuilder::codex_default().override_model("gpt-5.4-custom".to_string());
1210        assert_eq!(builder.model_override.as_deref(), Some("gpt-5.4-custom"));
1211    }
1212}