Skip to main content

sqlite_graphrag/extract/
llm_embedding.rs

1//! LLM-based embedding backend (v1.0.76 default; reworked in v1.0.79 G42).
2//!
3//! `LlmEmbedding` is the production embedding client. It wraps headless
4//! invocations of `claude code` or `codex` and returns f32 vectors of the
5//! active dimensionality (`crate::constants::embedding_dim()`, default 64).
6//!
7//! v1.0.79 (G42) changes:
8//! - S1: the dimensionality is no longer hardcoded here — the single
9//!   source of truth lives in `crate::constants` and the JSON schemas
10//!   are generated dynamically.
11//! - S2: `embed_batch` embeds N numbered texts per LLM call with the
12//!   `{items:[{i,v}]}` schema, collapsing 39 subprocess spawns into 4-5.
13//! - S4: the codex `--output-schema` file is a `tempfile::NamedTempFile`
14//!   with a randomised name created once per client and shared across
15//!   clones via `Arc` — no per-call write+delete, no PID-path races.
16//! - S5: the claude model honours `SQLITE_GRAPHRAG_CLAUDE_EMBED_MODEL`
17//!   (symmetric to the codex env var). ZERO hardcoded models without
18//!   an env override.
19//! - S6: `CLAUDE_CONFIG_DIR` points at an empty managed directory BY
20//!   DEFAULT, because `--strict-mcp-config`/`--mcp-config '{}'` are
21//!   silently ignored upstream (anthropics/claude-code#10787) and a
22//!   full `~/.claude` costs ~223k cache-creation tokens per call.
23//! - S7: the codex `request_user_input` failure mode maps to an
24//!   actionable error instead of an opaque exit 11.
25//! - BLOCO 4: every subprocess uses `kill_on_drop(true)` plus an
26//!   explicit `tokio::time::timeout`, so cancellation never leaks a
27//!   child and a hung LLM cannot stall the pipeline forever.
28//!
29//! OAuth is the only supported credential path. The constructor rejects
30//! `ANTHROPIC_API_KEY` / `OPENAI_API_KEY` in the environment — see
31//! `v1.0.69 (G31) OAuth-Only Enforcement`.
32
33use crate::errors::AppError;
34use serde::Deserialize;
35use std::process::Stdio;
36use std::sync::Arc;
37use tokio::io::AsyncWriteExt;
38use tokio::process::Command;
39
40/// Default per-LLM-call timeout in seconds. Consistent with the
41/// `--claude-timeout` / `--codex-timeout` defaults used by ingest.
42/// Override via `SQLITE_GRAPHRAG_EMBED_TIMEOUT_SECS`.
43const DEFAULT_EMBED_TIMEOUT_SECS: u64 = 60;
44
45fn embed_timeout() -> std::time::Duration {
46    let secs = std::env::var("SQLITE_GRAPHRAG_EMBED_TIMEOUT_SECS")
47        .ok()
48        .and_then(|v| v.parse::<u64>().ok())
49        .filter(|&n| (10..=3_600).contains(&n))
50        .unwrap_or(DEFAULT_EMBED_TIMEOUT_SECS);
51    std::time::Duration::from_secs(secs)
52}
53
54/// v1.0.89 (GAP-4): scales the per-call timeout with batch size.
55/// A single-item batch uses the base timeout (60s default).
56/// Each additional item adds 15s to account for the LLM generating
57/// more embedding vectors in the same call.
58#[cfg(test)]
59fn embed_timeout_for_batch(batch_size: usize) -> std::time::Duration {
60    let base = embed_timeout();
61    let extra = std::time::Duration::from_secs(15) * batch_size.saturating_sub(1) as u32;
62    base + extra
63}
64
65/// Cross-platform helper: extracts `(exit_code, signal)` from an
66/// `ExitStatus` whose `.code()` returned `None`. On Unix this means
67/// the process was killed by a signal; on Windows processes always
68/// have an exit code so this branch returns `(None, None)`.
69fn extract_exit_info(status: &std::process::ExitStatus) -> (Option<i32>, Option<i32>) {
70    #[cfg(unix)]
71    {
72        use std::os::unix::process::ExitStatusExt;
73        (None, status.signal())
74    }
75    #[cfg(not(unix))]
76    {
77        let _ = status;
78        (None, None)
79    }
80}
81
82/// G42/S1: single-vector JSON schema generated from the active dim.
83fn build_single_schema(dim: usize) -> String {
84    format!(
85        r#"{{"type":"object","properties":{{"embedding":{{"type":"array","items":{{"type":"number"}},"minItems":{dim},"maxItems":{dim}}}}},"required":["embedding"],"additionalProperties":false}}"#
86    )
87}
88
89/// G42/S2: batch JSON schema `{items:[{i,v}]}`. The `items` array length
90/// is deliberately unconstrained so ONE schema file serves every batch
91/// size (index coverage is validated in Rust after parsing).
92fn build_batch_schema(dim: usize) -> String {
93    format!(
94        r#"{{"type":"object","properties":{{"items":{{"type":"array","items":{{"type":"object","properties":{{"i":{{"type":"integer"}},"v":{{"type":"array","items":{{"type":"number"}},"minItems":{dim},"maxItems":{dim}}}}},"required":["i","v"],"additionalProperties":false}}}}}},"required":["items"],"additionalProperties":false}}"#
95    )
96}
97
98#[derive(Clone, Debug)]
99pub struct LlmEmbedding {
100    /// Which LLM headless binary to spawn. `claude` or `codex`.
101    flavour: EmbeddingFlavour,
102    /// Cached path to the binary to avoid PATH lookups on every call.
103    binary: std::path::PathBuf,
104    /// Model name. Resolved from env overrides at construction time.
105    model: String,
106    /// G42/S4: lazily-created codex `--output-schema` tempfiles, shared
107    /// across clones. Keyed by dim so an env change between tests cannot
108    /// serve a stale schema.
109    codex_schemas: Arc<parking_lot::Mutex<CodexSchemaFiles>>,
110    /// BUG-TIMEOUT-HARDCODE-001: instance-scoped timeout override.
111    /// Precedence: this field > env var > DEFAULT_EMBED_TIMEOUT_SECS.
112    timeout_override: Option<std::time::Duration>,
113}
114
115#[derive(Debug, Default)]
116struct CodexSchemaFiles {
117    single: Option<(usize, Arc<tempfile::NamedTempFile>)>,
118    batch: Option<(usize, Arc<tempfile::NamedTempFile>)>,
119}
120
121#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize)]
122pub enum EmbeddingFlavour {
123    Claude,
124    Codex,
125    Opencode,
126}
127
128/// ADR-0042 / GAP-002: builder for [`LlmEmbedding`] that lets callers
129/// override the binary path and model without having to remember the
130/// env-var names per flavour. Replaces the duplicated `with_codex` /
131/// `with_claude` bodies that diverged in v1.0.82 (GAP-002: the Claude
132/// arm of `embed_via_backend` re-did the PATH probe via
133/// `LlmEmbedding::detect_available` and could silently pick `codex`).
134#[derive(Clone, Debug)]
135pub struct LlmEmbeddingBuilder {
136    flavour: EmbeddingFlavour,
137    binary_override: Option<std::path::PathBuf>,
138    model_override: Option<String>,
139    timeout_override: Option<std::time::Duration>,
140}
141
142impl LlmEmbeddingBuilder {
143    /// Convenience: produce a Claude-backed builder pre-configured with
144    /// the canonical default binary + model.
145    /// Convenience: produce a Claude-backed builder pre-configured with
146    /// the canonical default binary + model.
147    pub fn claude_default() -> Self {
148        Self {
149            flavour: EmbeddingFlavour::Claude,
150            binary_override: None,
151            model_override: None,
152            timeout_override: None,
153        }
154    }
155
156    /// Convenience: produce a Codex-backed builder pre-configured with
157    /// the canonical default binary + model.
158    pub fn codex_default() -> Self {
159        Self {
160            flavour: EmbeddingFlavour::Codex,
161            binary_override: None,
162            model_override: None,
163            timeout_override: None,
164        }
165    }
166
167    /// Convenience: produce an OpenCode-backed builder pre-configured with
168    /// the canonical default binary + model.
169    pub fn opencode_default() -> Self {
170        Self {
171            flavour: EmbeddingFlavour::Opencode,
172            binary_override: None,
173            model_override: None,
174            timeout_override: None,
175        }
176    }
177    /// Override the binary path (skips the `which::which` PATH probe).
178    pub fn override_binary(mut self, binary: std::path::PathBuf) -> Self {
179        self.binary_override = Some(binary);
180        self
181    }
182
183    /// Override the model name (skips the env-var lookup).
184    pub fn override_model(mut self, model: String) -> Self {
185        self.model_override = Some(model);
186        self
187    }
188
189    /// Override the per-call embedding timeout (skips env-var lookup).
190    pub fn override_timeout(mut self, secs: u64) -> Self {
191        let clamped = secs.clamp(10, 3_600);
192        self.timeout_override = Some(std::time::Duration::from_secs(clamped));
193        self
194    }
195
196    /// Build the [`LlmEmbedding`]. Enforces OAuth-only and resolves the
197    /// binary/model via the override or the env-var defaults.
198    pub fn build(self) -> Result<LlmEmbedding, AppError> {
199        LlmEmbedding::oauth_only_enforce()?;
200        let binary = match self.binary_override {
201            Some(path) => resolve_real_binary(&path),
202            None => {
203                let (env_var, which_name) = match self.flavour {
204                    EmbeddingFlavour::Codex => ("SQLITE_GRAPHRAG_CODEX_BINARY", "codex"),
205                    EmbeddingFlavour::Claude => ("SQLITE_GRAPHRAG_CLAUDE_BINARY", "claude"),
206                    EmbeddingFlavour::Opencode => ("SQLITE_GRAPHRAG_OPENCODE_BINARY", "opencode"),
207                };
208                let path = std::env::var_os(env_var)
209                    .map(std::path::PathBuf::from)
210                    .or_else(|| which::which(which_name).ok())
211                    .ok_or_else(|| {
212                        AppError::Embedding(format!("`{which_name}` not found on PATH"))
213                    })?;
214                resolve_real_binary(&path)
215            }
216        };
217        let model = match self.model_override {
218            Some(m) => m,
219            None => match self.flavour {
220                EmbeddingFlavour::Codex => codex_embed_model(),
221                EmbeddingFlavour::Claude => claude_embed_model(),
222                EmbeddingFlavour::Opencode => opencode_embed_model(),
223            },
224        };
225        Ok(LlmEmbedding {
226            flavour: self.flavour,
227            binary,
228            model,
229            codex_schemas: Arc::new(parking_lot::Mutex::new(CodexSchemaFiles::default())),
230            timeout_override: self.timeout_override,
231        })
232    }
233}
234
235impl EmbeddingFlavour {
236    pub fn as_str(self) -> &'static str {
237        match self {
238            Self::Claude => "claude",
239            Self::Codex => "codex",
240            Self::Opencode => "opencode",
241        }
242    }
243}
244
245#[derive(Debug, Deserialize)]
246struct EmbeddingResponse {
247    embedding: Vec<f32>,
248}
249
250#[derive(Debug, Deserialize)]
251struct BatchEmbeddingResponse {
252    items: Vec<BatchEmbeddingItem>,
253}
254
255#[derive(Debug, Deserialize)]
256struct BatchEmbeddingItem {
257    i: usize,
258    v: Vec<f32>,
259}
260
261/// Follows symlinks and shell-script shim `exec` targets to find
262/// the real ELF binary. Shim wrappers (like `~/.graphrag-shim/codex`)
263/// can strip hardening flags; bypassing them is a security requirement.
264pub fn resolve_real_binary(path: &std::path::Path) -> std::path::PathBuf {
265    if let Ok(canonical) = std::fs::canonicalize(path) {
266        if is_elf_binary(&canonical) {
267            return canonical;
268        }
269        if let Some(exec_target) = extract_exec_target_from_shim(&canonical) {
270            if exec_target.exists() && is_elf_binary(&exec_target) {
271                return exec_target;
272            }
273        }
274        return canonical;
275    }
276    path.to_path_buf()
277}
278
279fn is_elf_binary(path: &std::path::Path) -> bool {
280    std::fs::read(path)
281        .map(|bytes| bytes.len() >= 4 && bytes[..4] == [0x7f, b'E', b'L', b'F'])
282        .unwrap_or(false)
283}
284
285fn extract_exec_target_from_shim(path: &std::path::Path) -> Option<std::path::PathBuf> {
286    let content = std::fs::read_to_string(path).ok()?;
287    if !content.starts_with("#!") {
288        return None;
289    }
290    for line in content.lines().rev() {
291        let trimmed = line.trim();
292        if trimmed.starts_with("exec ") {
293            let after_exec = trimmed.strip_prefix("exec ")?;
294            let binary = after_exec.split_whitespace().next()?;
295            return Some(std::path::PathBuf::from(binary));
296        }
297    }
298    None
299}
300
301/// G42/S5: claude embedding model with env override, symmetric to the
302/// codex `SQLITE_GRAPHRAG_CODEX_EMBED_MODEL` introduced in v1.0.78.
303fn claude_embed_model() -> String {
304    // Precedence: SQLITE_GRAPHRAG_CLAUDE_EMBED_MODEL > SQLITE_GRAPHRAG_LLM_MODEL > default
305    std::env::var("SQLITE_GRAPHRAG_CLAUDE_EMBED_MODEL")
306        .or_else(|_| std::env::var("SQLITE_GRAPHRAG_LLM_MODEL"))
307        .unwrap_or_else(|_| {
308            tracing::info!(
309                target: "llm_embedding",
310                "no model specified; defaulting to claude-sonnet-4-6"
311            );
312            "claude-sonnet-4-6".to_string()
313        })
314}
315
316fn codex_embed_model() -> String {
317    // Precedence: SQLITE_GRAPHRAG_CODEX_EMBED_MODEL > SQLITE_GRAPHRAG_LLM_MODEL > default
318    std::env::var("SQLITE_GRAPHRAG_CODEX_EMBED_MODEL")
319        .or_else(|_| std::env::var("SQLITE_GRAPHRAG_LLM_MODEL"))
320        .unwrap_or_else(|_| {
321            tracing::info!(
322                target: "llm_embedding",
323                "no model specified; defaulting to gpt-5.5"
324            );
325            "gpt-5.5".to_string()
326        })
327}
328
329fn opencode_embed_model() -> String {
330    // Precedence: SQLITE_GRAPHRAG_OPENCODE_EMBED_MODEL > SQLITE_GRAPHRAG_OPENCODE_MODEL > default
331    // NOTE: intentionally does NOT fall back to SQLITE_GRAPHRAG_LLM_MODEL because that
332    // var typically holds a codex/claude model name (e.g. "gpt-5.4-mini") that opencode
333    // does not recognise — cross-contamination caused ProviderModelNotFoundError (v1.0.90 audit).
334    std::env::var("SQLITE_GRAPHRAG_OPENCODE_EMBED_MODEL")
335        .or_else(|_| std::env::var("SQLITE_GRAPHRAG_OPENCODE_MODEL"))
336        .unwrap_or_else(|_| {
337            tracing::info!(
338                target: "llm_embedding",
339                "no model specified; defaulting to opencode/big-pickle"
340            );
341            "opencode/big-pickle".to_string()
342        })
343}
344
345impl LlmEmbedding {
346    /// Detects which LLM CLI is available on PATH and returns the
347    /// matching embedding client.
348    ///
349    /// v1.0.76: PREFERS `codex` over `claude` because:
350    /// - Claude Code 2.1+ ships a 180k+ token system context (plugins,
351    ///   skills, agents, MCP) that overflows the 200k context window
352    ///   for even trivial embedding prompts and returns "Prompt is too
353    ///   long". (v1.0.79/S6 mitigates this with an empty
354    ///   `CLAUDE_CONFIG_DIR`, but codex stays the lighter default.)
355    /// - Codex 0.134+ is lightweight (~5k system context) and the
356    ///   `StructuredOutput` tool reliably returns the requested vectors.
357    pub fn detect_available() -> Result<Self, AppError> {
358        Self::oauth_only_enforce()?;
359
360        // v1.0.89 (GAP-1): honour SQLITE_GRAPHRAG_CODEX_BINARY for the
361        // embedding pipeline, symmetric with SQLITE_GRAPHRAG_CLAUDE_BINARY.
362        let codex_path = std::env::var_os("SQLITE_GRAPHRAG_CODEX_BINARY")
363            .map(std::path::PathBuf::from)
364            .or_else(|| which::which("codex").ok());
365        if let Some(path) = codex_path {
366            return Ok(Self {
367                flavour: EmbeddingFlavour::Codex,
368                binary: resolve_real_binary(&path),
369                model: codex_embed_model(),
370                codex_schemas: Arc::new(parking_lot::Mutex::new(CodexSchemaFiles::default())),
371                timeout_override: None,
372            });
373        }
374        // v1.0.89: honour SQLITE_GRAPHRAG_CLAUDE_BINARY for the embedding
375        // pipeline, not just ingest/enrich. This lets operators override the
376        // symlink-resolved path (e.g. a stale multi-instance binary).
377        let claude_path = std::env::var_os("SQLITE_GRAPHRAG_CLAUDE_BINARY")
378            .map(std::path::PathBuf::from)
379            .or_else(|| which::which("claude").ok());
380        if let Some(path) = claude_path {
381            return Ok(Self {
382                flavour: EmbeddingFlavour::Claude,
383                binary: resolve_real_binary(&path),
384                model: claude_embed_model(),
385                codex_schemas: Arc::new(parking_lot::Mutex::new(CodexSchemaFiles::default())),
386                timeout_override: None,
387            });
388        }
389        // v1.0.90 (GAP-OPENCODE-001): probe opencode as 3rd priority.
390        let opencode_path = std::env::var_os("SQLITE_GRAPHRAG_OPENCODE_BINARY")
391            .map(std::path::PathBuf::from)
392            .or_else(|| which::which("opencode").ok());
393        if let Some(path) = opencode_path {
394            return Ok(Self {
395                flavour: EmbeddingFlavour::Opencode,
396                binary: resolve_real_binary(&path),
397                model: opencode_embed_model(),
398                codex_schemas: Arc::new(parking_lot::Mutex::new(CodexSchemaFiles::default())),
399                timeout_override: None,
400            });
401        }
402        Err(AppError::Embedding(
403            "no LLM CLI found on PATH: install `codex` (0.130+), `claude` (Claude Code 2.1+), or `opencode` (1.17+)"
404                .to_string(),
405        ))
406    }
407
408    /// Instance-scoped timeout. Precedence:
409    /// `timeout_override` field > env var > DEFAULT_EMBED_TIMEOUT_SECS.
410    fn instance_embed_timeout(&self) -> std::time::Duration {
411        if let Some(d) = self.timeout_override {
412            return d;
413        }
414        embed_timeout()
415    }
416
417    /// Instance-scoped batch timeout: base + 15s per extra item.
418    fn instance_embed_timeout_for_batch(&self, batch_size: usize) -> std::time::Duration {
419        let base = self.instance_embed_timeout();
420        let extra = std::time::Duration::from_secs(15) * batch_size.saturating_sub(1) as u32;
421        base + extra
422    }
423
424    pub fn with_codex() -> Result<Self, AppError> {
425        Self::with_codex_builder().build()
426    }
427
428    pub fn with_claude() -> Result<Self, AppError> {
429        Self::with_claude_builder().build()
430    }
431
432    /// ADR-0042 / GAP-002: builder entry point for a codex-backed
433    /// embedder with default model resolution.
434    pub fn with_codex_builder() -> LlmEmbeddingBuilder {
435        LlmEmbeddingBuilder {
436            flavour: EmbeddingFlavour::Codex,
437            binary_override: None,
438            model_override: None,
439            timeout_override: None,
440        }
441    }
442
443    /// ADR-0042 / GAP-002: builder entry point for a claude-backed
444    /// embedder with default model resolution.
445    pub fn with_claude_builder() -> LlmEmbeddingBuilder {
446        LlmEmbeddingBuilder {
447            flavour: EmbeddingFlavour::Claude,
448            binary_override: None,
449            model_override: None,
450            timeout_override: None,
451        }
452    }
453
454    pub fn with_opencode() -> Result<Self, AppError> {
455        Self::with_opencode_builder().build()
456    }
457
458    pub fn with_opencode_builder() -> LlmEmbeddingBuilder {
459        LlmEmbeddingBuilder {
460            flavour: EmbeddingFlavour::Opencode,
461            binary_override: None,
462            model_override: None,
463            timeout_override: None,
464        }
465    }
466    /// v1.0.69 (G31): refuse to spawn if an API key is set. The CLI
467    /// must use OAuth. The two API-key env vars are NOT in the
468    /// env-clear whitelist, so a parent process that exports them
469    /// will see this error.
470    fn oauth_only_enforce() -> Result<(), AppError> {
471        if std::env::var("ANTHROPIC_API_KEY").is_ok() {
472            return Err(AppError::Validation(
473                "ANTHROPIC_API_KEY is set; v1.0.76 requires OAuth. \
474                 unset it and use `claude login` instead."
475                    .into(),
476            ));
477        }
478        if std::env::var("OPENAI_API_KEY").is_ok() {
479            return Err(AppError::Validation(
480                "OPENAI_API_KEY is set; v1.0.76 requires OAuth. \
481                 unset it and use `codex login` instead."
482                    .into(),
483            ));
484        }
485        Ok(())
486    }
487
488    /// Embeds a single passage (chunk of a memory body). Returns an
489    /// f32 vector of the active dimensionality.
490    pub fn embed_passage(&self, text: &str) -> Result<Vec<f32>, AppError> {
491        self.invoke_with_prefix(crate::constants::PASSAGE_PREFIX, text)
492    }
493
494    /// Embeds a single query. The LLM uses a different prompt prefix
495    /// to disambiguate query from passage.
496    pub fn embed_query(&self, text: &str) -> Result<Vec<f32>, AppError> {
497        self.invoke_with_prefix(crate::constants::QUERY_PREFIX, text)
498    }
499
500    /// G56: returns a stable label for the active embedding model so the
501    /// in-process entity-embedding cache can key by `(model, text)`.
502    /// Embeddings produced by different models are not interchangeable,
503    /// so a cache entry from one model must never satisfy a request
504    /// served by another.
505    pub fn model_label(&self) -> String {
506        format!("{}:{}", self.flavour.as_str(), self.model)
507    }
508
509    /// ADR-0042 / BUG-003 fix: returns the resolved []
510    /// of this embedder. Used by  and
511    ///  to report the backend that
512    /// ACTUALLY executed the embedding (not the one requested in the
513    /// chain). When  substitutes claude
514    /// for a missing codex, the operator sees the truth in
515    /// .
516    pub fn flavour(&self) -> EmbeddingFlavour {
517        self.flavour
518    }
519
520    /// G42/S2: embeds a batch of `(global_index, text)` pairs in ONE
521    /// LLM call. Returns `(global_index, vector)` pairs. Async — this
522    /// is the unit of work scheduled by the bounded fan-out in
523    /// `crate::embedder`.
524    ///
525    /// Cancel safety: the future owns its subprocess via
526    /// `kill_on_drop(true)`, so dropping it (e.g. losing a
527    /// `tokio::select!` race against a cancellation token) kills the
528    /// child and leaks nothing.
529    pub async fn embed_batch_async(
530        &self,
531        prefix: &str,
532        batch: &[(usize, String)],
533    ) -> Result<Vec<(usize, Vec<f32>)>, AppError> {
534        let dim = crate::constants::embedding_dim();
535        if batch.is_empty() {
536            return Ok(Vec::new());
537        }
538        if batch.len() == 1 {
539            let (idx, text) = (&batch[0].0, &batch[0].1);
540            let v = self.invoke_single_async(prefix, text, dim).await?;
541            return Ok(vec![(*idx, v)]);
542        }
543
544        let mut prompt = format!(
545            "Generate {dim}-dimensional semantic embedding vectors for each numbered text below.\n\
546             Return a JSON object with an \"items\" array containing EXACTLY {n} items.\n\
547             Each item has \"i\" (the 1-based index) and \"v\" (the {dim}-float vector, values between -1 and 1).\n\n",
548            n = batch.len()
549        );
550        for (pos, (_, text)) in batch.iter().enumerate() {
551            prompt.push_str(&format!("{}: {prefix}{text}\n", pos + 1));
552        }
553
554        // BUG-TIMEOUT-HARDCODE-001: batch timeout is now instance-scoped
555        // (no more std::env::set_var which was unsafe in multi-thread).
556        let _batch_timeout = self.instance_embed_timeout_for_batch(batch.len());
557        let stdout = match self.flavour {
558            EmbeddingFlavour::Claude => {
559                self.invoke_claude(&prompt, &build_batch_schema(dim))
560                    .await?
561            }
562            EmbeddingFlavour::Codex => {
563                let schema = self.codex_schema_file(dim, true)?;
564                self.invoke_codex(&prompt, schema.path()).await?
565            }
566            EmbeddingFlavour::Opencode => {
567                let opencode_prompt = format!(
568                    "You are a batch embedding function. For each numbered text item below, \
569                     generate an array of exactly {dim} floating-point numbers between -1 and 1 \
570                     representing its semantic meaning. Output ONLY a JSON object with key \"items\" \
571                     containing an array of objects, each with \"i\" (the 1-based index) and \
572                     \"v\" (the {dim}-element float array). No markdown, no explanation.\n\n\
573                     {prompt}"
574                );
575                self.invoke_opencode(&opencode_prompt).await?
576            }
577        };
578        let parsed: BatchEmbeddingResponse = parse_llm_json(&stdout).map_err(|e| {
579            AppError::Embedding(format!(
580                "LLM batch embedding response parse failed: {e}; raw={stdout}"
581            ))
582        })?;
583        if parsed.items.len() != batch.len() {
584            return Err(AppError::Embedding(format!(
585                "LLM batch returned {} items, expected {} (G42/S2 coverage check)",
586                parsed.items.len(),
587                batch.len()
588            )));
589        }
590        let mut out: Vec<Option<Vec<f32>>> = vec![None; batch.len()];
591        for item in parsed.items {
592            if item.i == 0 || item.i > batch.len() {
593                return Err(AppError::Embedding(format!(
594                    "LLM batch item index {} out of range 1..={}",
595                    item.i,
596                    batch.len()
597                )));
598            }
599            if item.v.len() != dim {
600                return Err(AppError::Embedding(format!(
601                    "LLM batch item {} returned {} dims, expected {dim}; \
602                     refusing to truncate or pad silently (G42/C5)",
603                    item.i,
604                    item.v.len()
605                )));
606            }
607            out[item.i - 1] = Some(item.v);
608        }
609        let mut result = Vec::with_capacity(batch.len());
610        for (pos, slot) in out.into_iter().enumerate() {
611            let v = slot.ok_or_else(|| {
612                AppError::Embedding(format!(
613                    "LLM batch response is missing item index {} (G42/S2 coverage check)",
614                    pos + 1
615                ))
616            })?;
617            result.push((batch[pos].0, v));
618        }
619        Ok(result)
620    }
621
622    fn invoke_with_prefix(&self, prefix: &str, text: &str) -> Result<Vec<f32>, AppError> {
623        let dim = crate::constants::embedding_dim();
624        let inner = self.invoke_single_async(prefix, text, dim);
625        // v1.0.79 (G42/A2): reuse the process-wide multi-thread runtime
626        // instead of building a current-thread runtime PER CALL. Inside
627        // an existing runtime (tests, async commands) block_in_place
628        // keeps the worker pool healthy.
629        match tokio::runtime::Handle::try_current() {
630            Ok(handle) => tokio::task::block_in_place(|| handle.block_on(inner)),
631            Err(_) => crate::embedder::shared_runtime()?.block_on(inner),
632        }
633    }
634
635    async fn invoke_single_async(
636        &self,
637        prefix: &str,
638        text: &str,
639        dim: usize,
640    ) -> Result<Vec<f32>, AppError> {
641        let prompt = format!("{prefix}{text}");
642        let stdout = match self.flavour {
643            EmbeddingFlavour::Claude => {
644                self.invoke_claude(&prompt, &build_single_schema(dim))
645                    .await?
646            }
647            EmbeddingFlavour::Codex => {
648                let schema = self.codex_schema_file(dim, false)?;
649                self.invoke_codex(&prompt, schema.path()).await?
650            }
651            EmbeddingFlavour::Opencode => {
652                let opencode_prompt = format!(
653                    "You are an embedding function. Given the input text, output a JSON object \
654                     with a single key \"embedding\" containing an array of exactly {dim} \
655                     floating-point numbers between -1 and 1 that represent the semantic meaning \
656                     of the text. Output ONLY the JSON object, nothing else.\n\n\
657                     Input text: \"{prompt}\""
658                );
659                self.invoke_opencode(&opencode_prompt).await?
660            }
661        };
662        let parsed: EmbeddingResponse = parse_llm_json(&stdout).map_err(|e| {
663            AppError::Embedding(format!(
664                "LLM embedding response parse failed: {e}; raw={stdout}"
665            ))
666        })?;
667        if parsed.embedding.len() != dim {
668            return Err(AppError::Embedding(format!(
669                "LLM returned {} dims, expected {dim}; \
670                 refusing to truncate or pad silently (G42/C5)",
671                parsed.embedding.len()
672            )));
673        }
674        Ok(parsed.embedding)
675    }
676
677    /// G42/S4: returns the lazily-created, process-shared codex schema
678    /// tempfile for the requested mode. `NamedTempFile` randomises the
679    /// filename (no PID-based collisions) and removes the file on drop
680    /// of the last `Arc` clone.
681    fn codex_schema_file(
682        &self,
683        dim: usize,
684        batch: bool,
685    ) -> Result<Arc<tempfile::NamedTempFile>, AppError> {
686        let mut guard = self.codex_schemas.lock();
687        let slot = if batch {
688            &mut guard.batch
689        } else {
690            &mut guard.single
691        };
692        if let Some((cached_dim, file)) = slot {
693            if *cached_dim == dim {
694                return Ok(Arc::clone(file));
695            }
696        }
697        let content = if batch {
698            build_batch_schema(dim)
699        } else {
700            build_single_schema(dim)
701        };
702        let file = tempfile::Builder::new()
703            .prefix("sqlite-graphrag-embed-schema-")
704            .suffix(".json")
705            .tempfile()
706            .map_err(|e| AppError::Embedding(format!("schema tempfile create failed: {e}")))?;
707        std::fs::write(file.path(), content)
708            .map_err(|e| AppError::Embedding(format!("schema tempfile write failed: {e}")))?;
709        let file = Arc::new(file);
710        *slot = Some((dim, Arc::clone(&file)));
711        Ok(file)
712    }
713
714    async fn invoke_claude(&self, prompt: &str, schema: &str) -> Result<String, AppError> {
715        // v1.0.69 hardening: --strict-mcp-config --mcp-config <PATH> --settings
716        // '{"hooks":{}}' --dangerously-skip-permissions.
717        //
718        // v1.0.76 hardening: Claude Code 2.1+ renamed --output-schema to
719        // --json-schema and accepts the schema as an inline JSON string
720        // (NOT a file path). Also pass --output-format json so the
721        // response is a single JSON object on stdout.
722        //
723        // v1.0.79 (G42/S6): CLAUDE_CONFIG_DIR points at an empty managed
724        // directory BY DEFAULT — the MCP-isolation flags above are
725        // silently ignored upstream (anthropics/claude-code#10787) and a
726        // populated ~/.claude costs ~223k cache-creation tokens per call.
727        //
728        // v1.0.88 (BUG-2 fix, ADR-0046): the inline `--mcp-config '{}'`
729        // form was rejected by Claude Code 2.1.177 (ADR-0045 Bug 2).
730        // Substitute a tempfile path produced by
731        // `write_empty_mcp_config_tempfile()` and run the full
732        // preflight gate BEFORE `Command::spawn()`, mirroring what
733        // `invoke_codex` already does for the codex backend.
734        let mcp_config_path = crate::spawn::preflight::write_empty_mcp_config_tempfile()?;
735        let argv_refs: [std::ffi::OsString; 0] = [];
736        let preflight_args = crate::spawn::preflight::PreFlightArgs {
737            binary_path: &self.binary,
738            argv: &argv_refs,
739            workspace_root: std::path::Path::new("."),
740            mcp_config_inline_json: None,
741            expected_output_bytes: 65_536,
742            spawner_name: "llm_embedding",
743        };
744        crate::spawn::preflight::preflight_check(&preflight_args)?;
745        let mut cmd = Command::new(&self.binary);
746        cmd.arg("-p")
747            .arg(prompt)
748            .arg("--model")
749            .arg(&self.model)
750            .arg("--json-schema")
751            .arg(schema)
752            .arg("--output-format")
753            .arg("json")
754            .arg("--strict-mcp-config")
755            .arg("--mcp-config")
756            .arg(mcp_config_path.as_os_str())
757            .arg("--settings")
758            .arg(r#"{"hooks":{}}"#)
759            .arg("--dangerously-skip-permissions")
760            .env_clear()
761            .env("PATH", std::env::var("PATH").unwrap_or_default())
762            .env("HOME", std::env::var("HOME").unwrap_or_default())
763            .stdin(Stdio::null())
764            .stdout(Stdio::piped())
765            .stderr(Stdio::piped())
766            // BLOCO 4: cancellation (dropped future) must kill the child.
767            .kill_on_drop(true);
768        if let Some(config_dir) = claude_embedding_config_dir() {
769            cmd.env("CLAUDE_CONFIG_DIR", &config_dir);
770        }
771        let binary_str = self.binary.to_string_lossy().into_owned();
772        let output = match tokio::time::timeout(self.instance_embed_timeout(), cmd.output()).await {
773            Err(_elapsed) => {
774                return Err(crate::llm::exit_code_hints::into_legacy_embedding(
775                    &crate::llm::exit_code_hints::LlmBackendError::Timeout {
776                        secs: self.instance_embed_timeout().as_secs(),
777                        binary: binary_str.clone(),
778                    },
779                ));
780            }
781            Ok(Err(e)) => {
782                return Err(crate::llm::exit_code_hints::into_legacy_embedding(
783                    &crate::llm::exit_code_hints::LlmBackendError::SpawnFailed {
784                        binary: binary_str.clone(),
785                        source: e.to_string(),
786                    },
787                ));
788            }
789            Ok(Ok(o)) => o,
790        };
791        // G45-CR5 / ADR-0043 (v1.0.85): parse the JSON envelope from
792        // `claude -p --output-format json` and detect OAuth quota
793        // exhaustion by looking for the `rate_limit_error` or
794        // `usage` overflow markers before checking the subprocess
795        // exit status. This lets the deterministic fallback in
796        // hybrid-search and recall swap to codex immediately.
797        let stdout_str = String::from_utf8_lossy(&output.stdout);
798        if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(&stdout_str) {
799            let is_rate_limited = parsed
800                .get("is_error")
801                .and_then(|v| v.as_bool())
802                .unwrap_or(false)
803                && parsed
804                    .get("result")
805                    .and_then(|v| v.as_str())
806                    .map(|s| {
807                        s.contains("rate limit")
808                            || s.contains("quota")
809                            || s.contains("anthropic-ratelimit")
810                    })
811                    .unwrap_or(false);
812            if is_rate_limited {
813                return Err(AppError::Embedding(format!(
814                    "OAuth usage quota exhausted: claude rate_limit detected in stdout: {}",
815                    parsed
816                        .get("result")
817                        .and_then(|v| v.as_str())
818                        .unwrap_or("")
819                        .chars()
820                        .take(120)
821                        .collect::<String>()
822                )));
823            }
824        }
825        if !output.status.success() {
826            let (exit_code, signal) = if let Some(code) = output.status.code() {
827                (Some(code), None)
828            } else {
829                extract_exit_info(&output.status)
830            };
831            let stdout_tail = crate::llm::exit_code_hints::LlmBackendError::truncate_tail(
832                &output.stdout,
833                crate::llm::exit_code_hints::DIAG_TAIL_BYTES,
834            );
835            let stderr_tail = crate::llm::exit_code_hints::LlmBackendError::truncate_tail(
836                &output.stderr,
837                crate::llm::exit_code_hints::DIAG_TAIL_BYTES,
838            );
839            let mut hint = crate::llm::exit_code_hints::diagnose_exit_code(exit_code, signal);
840            // v1.0.89 (GAP-5): detect expired OAuth and suggest actionable fix.
841            if stderr_tail.contains("401")
842                || stderr_tail.contains("Unauthorized")
843                || stderr_tail.contains("expired")
844                || stderr_tail.contains("login")
845                || stdout_tail.contains("401")
846                || stdout_tail.contains("Unauthorized")
847            {
848                hint.push_str(" | Claude OAuth token may be expired; run `claude login` to renew");
849            }
850            return Err(crate::llm::exit_code_hints::into_legacy_embedding(
851                &crate::llm::exit_code_hints::LlmBackendError::NonZeroExit {
852                    exit_code,
853                    signal,
854                    stdout_tail,
855                    stderr_tail,
856                    binary: binary_str,
857                    hint,
858                },
859            ));
860        }
861        Ok(String::from_utf8_lossy(&output.stdout).into_owned())
862    }
863
864    async fn invoke_codex(
865        &self,
866        prompt: &str,
867        schema_path: &std::path::Path,
868    ) -> Result<String, AppError> {
869        let binary_str = self.binary.to_string_lossy().into_owned();
870        let mut cmd = build_codex_embedding_command(&self.binary, &self.model, schema_path);
871
872        // GAP-META-005 (v1.0.87, ADR-0045): pre-flight gate before spawn.
873        // `tokio::process::Command` does not expose `get_args()`, so we
874        // skip the argv-size check here and rely on binary + workspace
875        // root + output buffer guards. Embedding prompts are bounded by
876        // the schema validator so argv overflow is not a real risk here.
877        //
878        // v1.0.88 (BUG-7 fix, ADR-0046): propagate the preflight error
879        // directly via `AppError::PreFlightFailed` (via the `From`
880        // impl added in `errors.rs`) so callers and operators see the
881        // structured `PreFlightError` variant and the canonical exit
882        // code 16. The previous implementation wrapped the error in
883        // `LlmBackendError::SpawnFailed`, which mapped to a different
884        // exit code and masked the preflight signal.
885        let argv_refs: [std::ffi::OsString; 0] = [];
886        let preflight_args = crate::spawn::preflight::PreFlightArgs {
887            binary_path: &self.binary,
888            argv: &argv_refs,
889            workspace_root: std::path::Path::new("."),
890            mcp_config_inline_json: None,
891            expected_output_bytes: 65_536,
892            spawner_name: "llm_embedding",
893        };
894        crate::spawn::preflight::preflight_check(&preflight_args)?;
895        let _ = binary_str; // silenced: preflight does not need it
896
897        let mut child = match cmd.spawn() {
898            Ok(c) => c,
899            Err(e) => {
900                return Err(crate::llm::exit_code_hints::into_legacy_embedding(
901                    &crate::llm::exit_code_hints::LlmBackendError::SpawnFailed {
902                        binary: binary_str,
903                        source: e.to_string(),
904                    },
905                ));
906            }
907        };
908        if let Some(mut stdin) = child.stdin.take() {
909            stdin
910                .write_all(prompt.as_bytes())
911                .await
912                .map_err(|e| AppError::Embedding(format!("codex stdin write failed: {e}")))?;
913            drop(stdin);
914        }
915        let output =
916            match tokio::time::timeout(self.instance_embed_timeout(), child.wait_with_output())
917                .await
918            {
919                Err(_elapsed) => {
920                    return Err(crate::llm::exit_code_hints::into_legacy_embedding(
921                        &crate::llm::exit_code_hints::LlmBackendError::Timeout {
922                            secs: self.instance_embed_timeout().as_secs(),
923                            binary: binary_str,
924                        },
925                    ));
926                }
927                Ok(Err(e)) => {
928                    return Err(crate::llm::exit_code_hints::into_legacy_embedding(
929                        &crate::llm::exit_code_hints::LlmBackendError::SpawnFailed {
930                            binary: binary_str,
931                            source: format!("codex wait failed: {e}"),
932                        },
933                    ));
934                }
935                Ok(Ok(o)) => o,
936            };
937        if !output.status.success() {
938            let (exit_code, signal) = if let Some(code) = output.status.code() {
939                (Some(code), None)
940            } else {
941                extract_exit_info(&output.status)
942            };
943            let stdout_tail = crate::llm::exit_code_hints::LlmBackendError::truncate_tail(
944                &output.stdout,
945                crate::llm::exit_code_hints::DIAG_TAIL_BYTES,
946            );
947            let stderr_tail = crate::llm::exit_code_hints::LlmBackendError::truncate_tail(
948                &output.stderr,
949                crate::llm::exit_code_hints::DIAG_TAIL_BYTES,
950            );
951            let hint = crate::llm::exit_code_hints::diagnose_exit_code(exit_code, signal);
952            // G42/S7: the headless spawn can still hit interactive
953            // prompts on some codex builds; keep the legacy request_user_input
954            // branch as a special-case hint, and stamp the diagnostic
955            // tail on top of the canonical NonZeroExit envelope.
956            let mut combined_hint = hint;
957            if stderr_tail.contains("request_user_input") {
958                combined_hint.push_str(
959                    " | codex requested interactive input in a headless embedding call; \
960                     upgrade codex (>= 0.134) or switch the embedding backend to claude",
961                );
962            }
963            return Err(crate::llm::exit_code_hints::into_legacy_embedding(
964                &crate::llm::exit_code_hints::LlmBackendError::NonZeroExit {
965                    exit_code,
966                    signal,
967                    stdout_tail,
968                    stderr_tail,
969                    binary: binary_str,
970                    hint: combined_hint,
971                },
972            ));
973        }
974        Ok(String::from_utf8_lossy(&output.stdout).into_owned())
975    }
976
977    async fn invoke_opencode(&self, prompt: &str) -> Result<String, AppError> {
978        let binary_str = self.binary.to_string_lossy().into_owned();
979        let mut cmd = Command::new(&self.binary);
980        cmd.arg("run")
981            .arg("--format")
982            .arg("json")
983            .arg("-m")
984            .arg(&self.model)
985            .arg("--dangerously-skip-permissions")
986            .arg(prompt)
987            .env_clear()
988            .env("PATH", std::env::var("PATH").unwrap_or_default())
989            .env("HOME", std::env::var("HOME").unwrap_or_default())
990            .stdin(Stdio::null())
991            .stdout(Stdio::piped())
992            .stderr(Stdio::piped())
993            .kill_on_drop(true);
994        crate::commands::opencode_runner::propagate_opencode_env(&mut cmd);
995
996        let output = match tokio::time::timeout(self.instance_embed_timeout(), cmd.output()).await {
997            Err(_elapsed) => {
998                return Err(crate::llm::exit_code_hints::into_legacy_embedding(
999                    &crate::llm::exit_code_hints::LlmBackendError::Timeout {
1000                        secs: self.instance_embed_timeout().as_secs(),
1001                        binary: binary_str.clone(),
1002                    },
1003                ));
1004            }
1005            Ok(Err(e)) => {
1006                return Err(crate::llm::exit_code_hints::into_legacy_embedding(
1007                    &crate::llm::exit_code_hints::LlmBackendError::SpawnFailed {
1008                        binary: binary_str.clone(),
1009                        source: e.to_string(),
1010                    },
1011                ));
1012            }
1013            Ok(Ok(o)) => o,
1014        };
1015        if !output.status.success() {
1016            let (exit_code, signal) = if let Some(code) = output.status.code() {
1017                (Some(code), None)
1018            } else {
1019                extract_exit_info(&output.status)
1020            };
1021            let stdout_tail = crate::llm::exit_code_hints::LlmBackendError::truncate_tail(
1022                &output.stdout,
1023                crate::llm::exit_code_hints::DIAG_TAIL_BYTES,
1024            );
1025            let stderr_tail = crate::llm::exit_code_hints::LlmBackendError::truncate_tail(
1026                &output.stderr,
1027                crate::llm::exit_code_hints::DIAG_TAIL_BYTES,
1028            );
1029            let hint = crate::llm::exit_code_hints::diagnose_exit_code(exit_code, signal);
1030            return Err(crate::llm::exit_code_hints::into_legacy_embedding(
1031                &crate::llm::exit_code_hints::LlmBackendError::NonZeroExit {
1032                    exit_code,
1033                    signal,
1034                    stdout_tail,
1035                    stderr_tail,
1036                    binary: binary_str,
1037                    hint,
1038                },
1039            ));
1040        }
1041        Ok(String::from_utf8_lossy(&output.stdout).into_owned())
1042    }
1043}
1044
1045/// G42/S6: resolves the empty `CLAUDE_CONFIG_DIR` used for embedding
1046/// subprocesses.
1047///
1048/// - `SQLITE_GRAPHRAG_CLAUDE_EMPTY_CONFIG_DIR` is honoured when set and
1049///   pointing at a directory (same contract as G28-A in claude_runner);
1050/// - otherwise a managed directory is created at
1051///   `~/.local/state/sqlite-graphrag/claude-empty-config` (mode 0700).
1052///   If `~/.claude/.credentials.json` exists (Linux OAuth storage) it is
1053///   copied in so authentication still works; on macOS credentials live
1054///   in the Keychain and the empty dir is sufficient.
1055///
1056/// Returns `None` only when HOME is unset AND no override is given —
1057/// in that case the subprocess falls back to claude's own default.
1058fn claude_embedding_config_dir() -> Option<std::path::PathBuf> {
1059    if let Ok(dir) = std::env::var("SQLITE_GRAPHRAG_CLAUDE_EMPTY_CONFIG_DIR") {
1060        let path = std::path::PathBuf::from(dir);
1061        if path.is_dir() {
1062            return Some(path);
1063        }
1064        tracing::warn!(
1065            target: "embedding",
1066            path = %path.display(),
1067            "SQLITE_GRAPHRAG_CLAUDE_EMPTY_CONFIG_DIR is set but not a directory; \
1068             falling back to the managed empty config dir"
1069        );
1070    }
1071    let home = std::env::var("HOME").ok()?;
1072    let dir = std::path::Path::new(&home)
1073        .join(".local/state/sqlite-graphrag")
1074        .join("claude-empty-config");
1075    if std::fs::create_dir_all(&dir).is_err() {
1076        return None;
1077    }
1078    #[cfg(unix)]
1079    {
1080        use std::os::unix::fs::PermissionsExt;
1081        let _ = std::fs::set_permissions(&dir, std::fs::Permissions::from_mode(0o700));
1082    }
1083    // Linux stores OAuth credentials on disk; copy them so the isolated
1084    // config dir still authenticates. Best-effort: macOS uses Keychain.
1085    // v1.0.89: ALWAYS copy (was: skip if target exists). OAuth tokens
1086    // expire and the stale copy causes 401 until manually deleted.
1087    let creds = std::path::Path::new(&home).join(".claude/.credentials.json");
1088    if creds.exists() {
1089        let target = dir.join(".credentials.json");
1090        let _ = std::fs::copy(&creds, &target);
1091    }
1092    Some(dir)
1093}
1094
1095fn build_codex_embedding_command(
1096    binary: &std::path::Path,
1097    model: &str,
1098    schema_path: &std::path::Path,
1099) -> Command {
1100    let mut cmd = Command::new(binary);
1101    // v1.0.77: `-c` TOML overrides bypass the codex exec --sandbox propagation
1102    // bug (openai/codex#18113). CLI flags alone are insufficient — the exec
1103    // subcommand may not inherit --sandbox from the parent codex command.
1104    cmd.arg("exec")
1105        .arg("-c")
1106        .arg("sandbox_mode='read-only'")
1107        .arg("-c")
1108        .arg("approval_policy='never'")
1109        .arg("--json")
1110        .arg("--output-schema")
1111        .arg(schema_path)
1112        .arg("--ephemeral")
1113        .arg("--skip-git-repo-check")
1114        .arg("--sandbox")
1115        .arg("read-only")
1116        .arg("--ignore-user-config")
1117        .arg("--ignore-rules");
1118    if crate::extract::codex_compat::codex_supports_ask_for_approval() {
1119        cmd.arg("--ask-for-approval").arg("never");
1120    }
1121    // v1.0.89: use the real CODEX_HOME (~/.codex) instead of an isolated
1122    // per-PID directory. The isolated dir caused cold-start overhead (codex
1123    // creates ~6 SQLite databases on first run) that regularly exceeded
1124    // the 30s embedding timeout. The --ignore-user-config + --ephemeral
1125    // flags already prevent config pollution; CODEX_HOME only needs auth.
1126    cmd.arg("--model")
1127        .arg(model)
1128        .arg("-")
1129        .env_clear()
1130        .env("PATH", std::env::var("PATH").unwrap_or_default())
1131        .env("HOME", std::env::var("HOME").unwrap_or_default());
1132    if let Ok(codex_home) = std::env::var("CODEX_HOME") {
1133        cmd.env("CODEX_HOME", codex_home);
1134    } else if let Ok(home) = std::env::var("HOME") {
1135        let default_home = std::path::Path::new(&home).join(".codex");
1136        if default_home.exists() {
1137            cmd.env("CODEX_HOME", &default_home);
1138        }
1139    }
1140    cmd.stdin(Stdio::piped())
1141        .stdout(Stdio::piped())
1142        .stderr(Stdio::piped())
1143        // BLOCO 4: cancellation (dropped future) must kill the child.
1144        .kill_on_drop(true);
1145    cmd
1146}
1147
1148// prepare_isolated_codex_home removed in v1.0.89: the per-PID isolated
1149// CODEX_HOME caused cold-start overhead that exceeded the 30s embedding
1150// timeout. The real ~/.codex is now used directly (see build_codex_embedding_command).
1151
1152/// Parse an LLM JSON response of type `T`. The two backends emit
1153/// different shapes:
1154/// - Claude (with `--output-format json`): single JSON object on stdout.
1155/// - Codex (with `--json`): JSONL stream with one event per line; the
1156///   `agent_message` event's `text` field is the JSON payload.
1157///
1158/// This helper accepts both shapes and returns the parsed value (or an
1159/// error describing the first mismatch).
1160fn parse_llm_json<T: serde::de::DeserializeOwned>(stdout: &str) -> Result<T, String> {
1161    // Strategy 1: try the whole stdout as JSON (Claude path).
1162    if let Ok(parsed) = serde_json::from_str::<T>(stdout) {
1163        return Ok(parsed);
1164    }
1165    // Strategy 3: walk NDJSON and collect `.part.text` from `type == "text"`
1166    // events (OpenCode path: `opencode run --format json`).
1167    let mut opencode_texts: Vec<String> = Vec::new();
1168    for line in stdout.lines() {
1169        let line = line.trim();
1170        if line.is_empty() {
1171            continue;
1172        }
1173        let Ok(event) = serde_json::from_str::<serde_json::Value>(line) else {
1174            continue;
1175        };
1176        if event.get("type").and_then(|t| t.as_str()) == Some("text") {
1177            if let Some(text) = event
1178                .get("part")
1179                .and_then(|p| p.get("text"))
1180                .and_then(|t| t.as_str())
1181            {
1182                opencode_texts.push(text.to_string());
1183            }
1184        }
1185    }
1186    if !opencode_texts.is_empty() {
1187        let combined = opencode_texts.concat();
1188        if let Ok(parsed) = serde_json::from_str::<T>(&combined) {
1189            return Ok(parsed);
1190        }
1191    }
1192    // Strategy 2: walk the JSONL line by line and pick the last
1193    // `item.completed` of type `agent_message` (Codex path).
1194    let mut last_agent_text: Option<String> = None;
1195    for line in stdout.lines() {
1196        let line = line.trim();
1197        if line.is_empty() {
1198            continue;
1199        }
1200        let Ok(event) = serde_json::from_str::<serde_json::Value>(line) else {
1201            continue;
1202        };
1203        if event.get("type").and_then(|t| t.as_str()) != Some("item.completed") {
1204            continue;
1205        }
1206        let item = match event.get("item") {
1207            Some(i) => i,
1208            None => continue,
1209        };
1210        if item.get("type").and_then(|t| t.as_str()) != Some("agent_message") {
1211            continue;
1212        }
1213        if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
1214            last_agent_text = Some(text.to_string());
1215        }
1216    }
1217    let text = last_agent_text
1218        .ok_or_else(|| "no agent_message found in codex JSONL output".to_string())?;
1219    serde_json::from_str::<T>(&text)
1220        .map_err(|e| format!("codex agent_message text does not match schema: {e}; raw={text}"))
1221}
1222
1223#[cfg(test)]
1224mod tests {
1225    use super::*;
1226
1227    fn test_client(flavour: EmbeddingFlavour, binary: std::path::PathBuf) -> LlmEmbedding {
1228        LlmEmbedding {
1229            flavour,
1230            binary,
1231            model: "gpt-5.4".to_string(),
1232            codex_schemas: Arc::new(parking_lot::Mutex::new(CodexSchemaFiles::default())),
1233            timeout_override: None,
1234        }
1235    }
1236
1237    #[test]
1238    fn embed_timeout_default_is_60() {
1239        assert_eq!(DEFAULT_EMBED_TIMEOUT_SECS, 60);
1240    }
1241
1242    #[test]
1243    #[serial_test::serial(env)]
1244    fn oauth_only_enforce_blocks_api_keys() {
1245        // SAFETY: this test only sets and unsets env vars; the
1246        // `serial(env)` group prevents cross-test interference.
1247        unsafe {
1248            std::env::set_var("ANTHROPIC_API_KEY", "test");
1249            assert!(LlmEmbedding::oauth_only_enforce().is_err());
1250            std::env::remove_var("ANTHROPIC_API_KEY");
1251
1252            std::env::set_var("OPENAI_API_KEY", "test");
1253            assert!(LlmEmbedding::oauth_only_enforce().is_err());
1254            std::env::remove_var("OPENAI_API_KEY");
1255        }
1256        assert!(LlmEmbedding::oauth_only_enforce().is_ok());
1257    }
1258
1259    #[test]
1260    fn flavour_as_str_is_stable() {
1261        assert_eq!(EmbeddingFlavour::Claude.as_str(), "claude");
1262        assert_eq!(EmbeddingFlavour::Codex.as_str(), "codex");
1263    }
1264
1265    #[test]
1266    fn single_schema_embeds_active_dim() {
1267        let schema = build_single_schema(64);
1268        assert!(schema.contains(r#""minItems":64"#));
1269        assert!(schema.contains(r#""maxItems":64"#));
1270        let parsed: serde_json::Value =
1271            serde_json::from_str(&schema).expect("single schema must be valid JSON");
1272        assert_eq!(parsed["properties"]["embedding"]["minItems"], 64);
1273    }
1274
1275    #[test]
1276    fn batch_schema_is_valid_json_and_unbounded_items() {
1277        let schema = build_batch_schema(64);
1278        let parsed: serde_json::Value =
1279            serde_json::from_str(&schema).expect("batch schema must be valid JSON");
1280        // The items array must NOT constrain its length so one schema
1281        // file serves every batch size (G42/S4).
1282        assert!(parsed["properties"]["items"].get("minItems").is_none());
1283        assert_eq!(
1284            parsed["properties"]["items"]["items"]["properties"]["v"]["minItems"],
1285            64
1286        );
1287    }
1288
1289    #[test]
1290    fn parse_llm_json_accepts_claude_json() {
1291        let stdout = r#"{"embedding":[0.0,1.0,2.0]}"#;
1292
1293        let parsed: EmbeddingResponse = parse_llm_json(stdout).expect("claude JSON must parse");
1294
1295        assert_eq!(parsed.embedding, vec![0.0, 1.0, 2.0]);
1296    }
1297
1298    #[test]
1299    fn parse_llm_json_accepts_codex_jsonl() {
1300        let stdout = r#"{"type":"thread.started","thread_id":"mock-thread-0"}
1301{"type":"item.completed","item":{"type":"agent_message","text":"{\"embedding\":[0.0,1.0,2.0]}"}}
1302{"type":"turn.completed","usage":{"input_tokens":1,"output_tokens":1}}"#;
1303
1304        let parsed: EmbeddingResponse = parse_llm_json(stdout).expect("codex JSONL must parse");
1305
1306        assert_eq!(parsed.embedding, vec![0.0, 1.0, 2.0]);
1307    }
1308
1309    #[test]
1310    fn parse_llm_json_rejects_jsonl_without_agent_message() {
1311        let stdout = r#"{"type":"thread.started","thread_id":"mock-thread-0"}"#;
1312
1313        let err = parse_llm_json::<EmbeddingResponse>(stdout)
1314            .expect_err("missing agent_message must fail");
1315
1316        assert!(err.contains("no agent_message"));
1317    }
1318
1319    #[test]
1320    fn parse_llm_json_accepts_batch_response() {
1321        let stdout = r#"{"items":[{"i":1,"v":[0.0,1.0]},{"i":2,"v":[2.0,3.0]}]}"#;
1322
1323        let parsed: BatchEmbeddingResponse = parse_llm_json(stdout).expect("batch JSON must parse");
1324
1325        assert_eq!(parsed.items.len(), 2);
1326        assert_eq!(parsed.items[0].i, 1);
1327        assert_eq!(parsed.items[1].v, vec![2.0, 3.0]);
1328    }
1329
1330    #[test]
1331    fn codex_schema_file_is_created_once_and_reused() {
1332        let client = test_client(
1333            EmbeddingFlavour::Codex,
1334            std::path::PathBuf::from("/bin/true"),
1335        );
1336        let first = client
1337            .codex_schema_file(64, false)
1338            .expect("schema file must be created");
1339        let second = client
1340            .codex_schema_file(64, false)
1341            .expect("schema file must be reused");
1342        assert_eq!(first.path(), second.path(), "same dim must reuse the file");
1343
1344        let batch = client
1345            .codex_schema_file(64, true)
1346            .expect("batch schema file must be created");
1347        assert_ne!(
1348            first.path(),
1349            batch.path(),
1350            "single and batch schemas are distinct files"
1351        );
1352
1353        let content = std::fs::read_to_string(first.path()).expect("schema file must be readable");
1354        assert!(content.contains(r#""minItems":64"#));
1355    }
1356
1357    #[test]
1358    fn codex_embedding_command_reads_prompt_from_stdin() {
1359        let schema_path = std::env::temp_dir().join("sqlite-graphrag-embed-schema-test.json");
1360        let cmd = build_codex_embedding_command(
1361            std::path::Path::new("/bin/true"),
1362            "gpt-5.4",
1363            &schema_path,
1364        );
1365        let argv: Vec<String> = cmd
1366            .as_std()
1367            .get_args()
1368            .filter_map(|arg| arg.to_str().map(|s| s.to_string()))
1369            .collect();
1370
1371        assert!(
1372            argv.iter().any(|arg| arg == "-"),
1373            "codex embedding command must read prompt from stdin: {argv:?}"
1374        );
1375        assert!(
1376            !argv.iter().any(|arg| arg.starts_with("passage: ")),
1377            "prompt text must not be passed as argv: {argv:?}"
1378        );
1379        for required in &[
1380            "exec",
1381            "-c",
1382            "sandbox_mode='read-only'",
1383            "approval_policy='never'",
1384            "--json",
1385            "--output-schema",
1386            "--ephemeral",
1387            "--skip-git-repo-check",
1388            "--sandbox",
1389            "read-only",
1390            "--ignore-user-config",
1391            "--ignore-rules",
1392            "--model",
1393            "gpt-5.4",
1394        ] {
1395            assert!(
1396                argv.iter().any(|arg| arg == required),
1397                "missing flag {required} in {argv:?}"
1398            );
1399        }
1400    }
1401
1402    #[cfg(unix)]
1403    #[test]
1404    #[serial_test::serial(env)]
1405    fn embed_passage_sends_prompt_to_codex_stdin() {
1406        use std::os::unix::fs::PermissionsExt;
1407
1408        // Pin the dimensionality so the mock script and the validation
1409        // agree regardless of test execution order.
1410        // SAFETY: guarded by serial(env).
1411        unsafe {
1412            std::env::set_var("SQLITE_GRAPHRAG_EMBEDDING_DIM", "64");
1413        }
1414
1415        let temp = tempfile::tempdir().expect("tempdir must exist");
1416        let binary = temp.path().join("codex-stdin-check");
1417        let script = r#"#!/usr/bin/env bash
1418set -euo pipefail
1419
1420prompt="$(cat)"
1421if [[ "$prompt" != "passage: codex-cli" ]]; then
1422  echo "unexpected stdin: $prompt" >&2
1423  exit 41
1424fi
1425
1426vals="0.0"
1427for _ in $(seq 2 64); do
1428  vals="$vals,0.0"
1429done
1430payload="{\"embedding\":[$vals]}"
1431escaped="${payload//\"/\\\"}"
1432echo "{\"type\":\"item.completed\",\"item\":{\"type\":\"agent_message\",\"text\":\"$escaped\"}}"
1433"#;
1434        std::fs::write(&binary, script).expect("mock codex script must be written");
1435        let mut perms = std::fs::metadata(&binary)
1436            .expect("mock codex metadata must exist")
1437            .permissions();
1438        perms.set_mode(0o755);
1439        std::fs::set_permissions(&binary, perms).expect("mock codex must be executable");
1440
1441        let embedding = test_client(EmbeddingFlavour::Codex, binary);
1442
1443        let vector = embedding
1444            .embed_passage("codex-cli")
1445            .expect("stdin-backed codex embedding must succeed");
1446
1447        // SAFETY: guarded by serial(env).
1448        unsafe {
1449            std::env::remove_var("SQLITE_GRAPHRAG_EMBEDDING_DIM");
1450        }
1451
1452        assert_eq!(vector.len(), 64);
1453        assert!(vector.iter().all(|value| *value == 0.0));
1454    }
1455
1456    // ---------------------------------------------------------------
1457    // ADR-0042 / GAP-002: LlmEmbeddingBuilder unit tests
1458    // ---------------------------------------------------------------
1459
1460    /// `claude_default` is the `with_claude_builder` alias: returns a
1461    /// builder pre-set to the Claude flavour. Build requires the
1462    /// Claude binary to be on PATH; in CI without `claude`, the build
1463    /// fails with the canonical `claude not found` error, which is
1464    /// itself the proof that the flavour is propagated correctly.
1465    #[test]
1466    fn claude_default_resolves_path() {
1467        let builder = LlmEmbeddingBuilder::claude_default();
1468        assert_eq!(builder.flavour, EmbeddingFlavour::Claude);
1469        assert!(builder.binary_override.is_none());
1470        assert!(builder.model_override.is_none());
1471    }
1472
1473    /// `override_binary` short-circuits the PATH probe. The builder
1474    /// stores the override verbatim so the `build()` call can fall
1475    /// back to `resolve_real_binary` for ELF canonicalisation.
1476    #[test]
1477    fn override_binary_uses_provided() {
1478        let path = std::path::PathBuf::from("/tmp/fake-claude-binary");
1479        let builder = LlmEmbeddingBuilder::claude_default().override_binary(path.clone());
1480        assert_eq!(builder.binary_override.as_ref(), Some(&path));
1481    }
1482
1483    /// `override_model` short-circuits the env-var lookup. The model
1484    /// override travels untouched through `build()` so the LLM
1485    /// subprocess spawn honours it.
1486    #[test]
1487    fn override_model_uses_provided() {
1488        let builder =
1489            LlmEmbeddingBuilder::codex_default().override_model("gpt-5.4-custom".to_string());
1490        assert_eq!(builder.model_override.as_deref(), Some("gpt-5.4-custom"));
1491    }
1492
1493    // ---------------------------------------------------------------
1494    // v1.0.89 GAP tests
1495    // ---------------------------------------------------------------
1496
1497    #[test]
1498    fn embed_timeout_for_batch_scales_with_size() {
1499        let t1 = embed_timeout_for_batch(1);
1500        let t4 = embed_timeout_for_batch(4);
1501        let t8 = embed_timeout_for_batch(8);
1502        assert!(
1503            t1 < t4,
1504            "batch of 4 must have longer timeout than batch of 1"
1505        );
1506        assert!(
1507            t4 < t8,
1508            "batch of 8 must have longer timeout than batch of 4"
1509        );
1510        assert_eq!(t8 - t1, std::time::Duration::from_secs(15 * 7));
1511    }
1512
1513    #[test]
1514    fn embed_timeout_for_batch_single_equals_base() {
1515        let base = embed_timeout();
1516        let single = embed_timeout_for_batch(1);
1517        assert_eq!(base, single);
1518    }
1519
1520    #[test]
1521    fn opencode_flavour_as_str() {
1522        assert_eq!(EmbeddingFlavour::Opencode.as_str(), "opencode");
1523    }
1524
1525    #[test]
1526    #[serial_test::serial(env)]
1527    fn opencode_embed_model_uses_env_override() {
1528        unsafe {
1529            std::env::set_var(
1530                "SQLITE_GRAPHRAG_OPENCODE_EMBED_MODEL",
1531                "opencode/test-model",
1532            );
1533            let model = opencode_embed_model();
1534            std::env::remove_var("SQLITE_GRAPHRAG_OPENCODE_EMBED_MODEL");
1535            assert_eq!(model, "opencode/test-model");
1536        }
1537    }
1538
1539    #[test]
1540    #[serial_test::serial(env)]
1541    fn opencode_embed_model_falls_back_to_opencode_model() {
1542        unsafe {
1543            std::env::remove_var("SQLITE_GRAPHRAG_OPENCODE_EMBED_MODEL");
1544            std::env::set_var("SQLITE_GRAPHRAG_OPENCODE_MODEL", "opencode/fallback");
1545            let model = opencode_embed_model();
1546            std::env::remove_var("SQLITE_GRAPHRAG_OPENCODE_MODEL");
1547            assert_eq!(model, "opencode/fallback");
1548        }
1549    }
1550
1551    #[test]
1552    #[serial_test::serial(env)]
1553    fn opencode_embed_model_ignores_llm_model() {
1554        unsafe {
1555            std::env::remove_var("SQLITE_GRAPHRAG_OPENCODE_EMBED_MODEL");
1556            std::env::remove_var("SQLITE_GRAPHRAG_OPENCODE_MODEL");
1557            std::env::set_var("SQLITE_GRAPHRAG_LLM_MODEL", "gpt-5.4-mini");
1558            let model = opencode_embed_model();
1559            std::env::remove_var("SQLITE_GRAPHRAG_LLM_MODEL");
1560            assert_eq!(
1561                model, "opencode/big-pickle",
1562                "must NOT cross-contaminate with LLM_MODEL"
1563            );
1564        }
1565    }
1566
1567    #[test]
1568    fn parse_llm_json_accepts_opencode_ndjson() {
1569        let stdout = r#"{"type":"step_start","timestamp":1234,"sessionID":"ses_test","part":{"type":"step-start"}}
1570{"type":"text","timestamp":1235,"sessionID":"ses_test","part":{"type":"text","text":"{\"embedding\":[0.1,0.2,0.3]}"}}
1571{"type":"step_finish","timestamp":1236,"sessionID":"ses_test","part":{"type":"step-finish","tokens":{"total":100,"input":90,"output":10,"reasoning":0},"cost":0}}"#;
1572
1573        let parsed: EmbeddingResponse = parse_llm_json(stdout).expect("opencode NDJSON must parse");
1574        assert_eq!(parsed.embedding, vec![0.1, 0.2, 0.3]);
1575    }
1576
1577    #[test]
1578    fn parse_llm_json_accepts_opencode_batch_ndjson() {
1579        let stdout = r#"{"type":"step_start","timestamp":1234,"sessionID":"ses_test","part":{"type":"step-start"}}
1580{"type":"text","timestamp":1235,"sessionID":"ses_test","part":{"type":"text","text":"{\"items\":[{\"i\":1,\"v\":[0.1,0.2]},{\"i\":2,\"v\":[0.3,0.4]}]}"}}
1581{"type":"step_finish","timestamp":1236,"sessionID":"ses_test","part":{"type":"step-finish","tokens":{"total":100,"input":90,"output":10,"reasoning":0},"cost":0}}"#;
1582
1583        let parsed: BatchEmbeddingResponse =
1584            parse_llm_json(stdout).expect("opencode batch NDJSON must parse");
1585        assert_eq!(parsed.items.len(), 2);
1586        assert_eq!(parsed.items[0].i, 1);
1587        assert_eq!(parsed.items[1].v, vec![0.3, 0.4]);
1588    }
1589
1590    #[test]
1591    fn opencode_builder_default_has_correct_flavour() {
1592        let builder = LlmEmbeddingBuilder::opencode_default();
1593        assert_eq!(builder.flavour, EmbeddingFlavour::Opencode);
1594        assert!(builder.binary_override.is_none());
1595        assert!(builder.model_override.is_none());
1596    }
1597}