Skip to main content

sqlite_graphrag/commands/
dry_run_backend.rs

1//! v1.0.84 (ADR-0042 / GAP-002): resolve and emit the LLM backend that
2//! WOULD be invoked for embedding without actually spawning the
3//! subprocess. Used by `--dry-run-backend` for CI audit and pre-flight
4//! sanity-check of `--llm-backend` before long ingestion sessions.
5//!
6//! The output is a compact JSON envelope on stdout. stderr carries the
7//! human-friendly summary so operators can run `sqlite-graphrag --dry-run-backend`
8//! without piping through `jaq`.
9//!
10//! ## Schema (`dry-run-backend.schema.json`)
11//!
12//! ```json
13//! {
14//!   "action": "dry_run_backend",
15//!   "backend": "codex|claude|none",
16//!   "binary": "/usr/local/bin/codex",
17//!   "model": "gpt-5.5",
18//!   "flavour": "codex|claude",
19//!   "chain": "claude",
20//!   "strict_env_clear": false
21//! }
22//! ```
23//!
24//! ## Implementation notes
25//!
26//! - We deliberately do NOT depend on the private fields of
27//!   `LlmEmbedding`. The struct's `binary` and `flavour` fields are
28//!   private to `crate::extract::llm_embedding`, so we re-probe the
29//!   PATH here (cheap, idempotent) instead of forcing the core to add
30//!   `pub(crate)` getters just for this audit path.
31//! - `model` comes from `LlmEmbedding::model_label()` which already
32//!   exposes a stable public string of the form `<flavour>:<model>`.
33//!   We strip the `<flavour>:` prefix to keep the schema flat.
34//! - When `--llm-backend none` is selected the envelope still emits
35//!   the same shape with empty `binary` and `model`, so downstream
36//!   pipelines can parse a single schema unconditionally.
37
38use crate::cli::{Cli, LlmBackendChoice};
39use crate::errors::AppError;
40use crate::extract::llm_embedding::LlmEmbedding;
41use crate::output::emit_json_compact;
42use crate::spawn::env_whitelist::is_strict_env_clear;
43use serde::Serialize;
44
45/// Compact JSON envelope emitted by `--dry-run-backend`.
46///
47/// Field order matches the documented schema. `chain` reflects
48/// `--llm-fallback` so operators can audit the fallback order without
49/// spawning `embedder::embed_with_fallback`.
50#[derive(Serialize)]
51pub struct DryRunBackendOutput {
52    pub action: &'static str,
53    pub backend: &'static str,
54    pub binary: String,
55    pub model: String,
56    pub flavour: &'static str,
57    pub chain: String,
58    pub strict_env_clear: bool,
59}
60
61/// Resolve the LLM backend that would be used for embedding and emit
62/// the JSON envelope. Returns `Err(AppError::Embedding)` when the
63/// requested backend CLI is missing from PATH.
64pub fn emit_dry_run_backend(cli: &Cli) -> Result<(), AppError> {
65    let payload = match cli.llm_backend {
66        LlmBackendChoice::None => DryRunBackendOutput {
67            action: "dry_run_backend",
68            backend: "none",
69            binary: String::new(),
70            model: String::new(),
71            flavour: "none",
72            chain: cli.llm_fallback.clone(),
73            strict_env_clear: is_strict_env_clear(),
74        },
75        LlmBackendChoice::Auto => {
76            // ADR-0038: codex is preferred; claude is the fallback when codex
77            // is absent. Mirrors `LlmEmbedding::detect_available()` exactly
78            // so the audit output never disagrees with the real spawn path.
79            let resolved = LlmEmbedding::detect_available()?;
80            backend_payload(&resolved, "codex-first-then-claude", cli, true)
81        }
82        LlmBackendChoice::Codex => {
83            let resolved = LlmEmbedding::detect_available()?;
84            let flavour = resolved.model_label();
85            // Guard: the user explicitly asked for codex. If detect_available
86            // returned a claude-backed client (no codex on PATH), we MUST
87            // surface that as an error rather than silently substitute.
88            // v1.0.84 (ADR-0042): claude must NOT silently replace codex
89            // when the user opts in via `--llm-backend codex`.
90            if flavour.starts_with("claude:") {
91                return Err(AppError::Embedding(
92                    "`--llm-backend codex` requested but `codex` was not found on PATH \
93                     (a `claude` binary was detected; refusing silent fallback per ADR-0042). \
94                     Install `codex` (>= 0.130) or pass `--llm-backend claude` explicitly."
95                        .to_string(),
96                ));
97            }
98            backend_payload(&resolved, "codex-explicit", cli, false)
99        }
100        LlmBackendChoice::Claude => {
101            let resolved = LlmEmbedding::detect_available()?;
102            let flavour = resolved.model_label();
103            // Symmetric guard for `--llm-backend claude`.
104            if flavour.starts_with("codex:") {
105                return Err(AppError::Embedding(
106                    "`--llm-backend claude` requested but `claude` was not found on PATH \
107                     (a `codex` binary was detected; refusing silent fallback per ADR-0042). \
108                     Install `claude` (Claude Code >= 2.1) or pass `--llm-backend codex` explicitly."
109                        .to_string(),
110                ));
111            }
112            backend_payload(&resolved, "claude-explicit", cli, false)
113        }
114        LlmBackendChoice::Opencode => {
115            let resolved = LlmEmbedding::detect_available()?;
116            let flavour = resolved.model_label();
117            if !flavour.starts_with("opencode:") {
118                let hint = if flavour.starts_with("codex:") || flavour.starts_with("claude:") {
119                    format!(
120                        "`--llm-backend opencode` requested but auto-detect resolved `{flavour}` \
121                         (opencode has lower priority than codex/claude in detect_available). \
122                         Pass `--llm-backend auto` or set SQLITE_GRAPHRAG_OPENCODE_BINARY explicitly."
123                    )
124                } else {
125                    "`--llm-backend opencode` requested but `opencode` was not found on PATH. \
126                     Install `opencode` (>= 1.17) or pass `--llm-backend auto` to auto-detect."
127                        .to_string()
128                };
129                return Err(AppError::Embedding(hint));
130            }
131            backend_payload(&resolved, "opencode-explicit", cli, false)
132        }
133        LlmBackendChoice::OpenRouter => DryRunBackendOutput {
134            action: "dry_run_backend",
135            backend: "openrouter",
136            binary: String::new(),
137            model: String::new(),
138            flavour: "openrouter",
139            chain: cli.llm_fallback.clone(),
140            strict_env_clear: is_strict_env_clear(),
141        },
142    };
143
144    emit_json_compact(&payload)?;
145    Ok(())
146}
147
148/// Build the envelope from a successfully-resolved `LlmEmbedding`.
149///
150/// `chain_label` documents which CLI knob produced this payload
151/// (e.g. `codex-explicit` vs `codex-first-then-claude`) so the audit
152/// output is self-describing.
153fn backend_payload(
154    resolved: &LlmEmbedding,
155    chain_label: &str,
156    cli: &Cli,
157    is_auto: bool,
158) -> DryRunBackendOutput {
159    // `model_label()` returns `<flavour>:<model>` — split on the FIRST
160    // colon so model names with colons (rare but possible) survive.
161    // `flavour` must be a `&'static str` (the struct field type), so we
162    // leak the slice into a `Box<str>` to obtain a `'static` reference.
163    let label = resolved.model_label();
164    let (flavour, model) = match label.split_once(':') {
165        Some((f, m)) => (f, m.to_string()),
166        None => ("unknown", label.to_string()),
167    };
168    let flavour: &'static str = Box::leak(flavour.to_string().into_boxed_str());
169
170    // Re-probe PATH to surface the binary path the audit envelope
171    // promises. We prefer `which::which` over the private `LlmEmbedding`
172    // field so this file compiles independently of the `extract`
173    // module's internal layout. The result is canonicalized when
174    // possible so symlinks and shim wrappers don't leak location.
175    let binary = which::which(if is_auto {
176        // For Auto, prefer whichever the real spawn would pick first.
177        if which::which("codex").is_ok() {
178            "codex"
179        } else {
180            "claude"
181        }
182    } else {
183        flavour
184    })
185    .ok()
186    .and_then(|p| std::fs::canonicalize(&p).ok().or(Some(p)))
187    .map(|p| p.display().to_string())
188    .unwrap_or_default();
189
190    // Backend string is the `LlmBackendChoice` name for clarity in CI
191    // logs (operators filter on `backend == "codex"` etc.).
192    let backend = match cli.llm_backend {
193        LlmBackendChoice::Auto => {
194            if flavour == "codex" {
195                "codex"
196            } else if flavour == "opencode" {
197                "opencode"
198            } else {
199                "claude"
200            }
201        }
202        LlmBackendChoice::Codex => "codex",
203        LlmBackendChoice::Claude => "claude",
204        LlmBackendChoice::Opencode => "opencode",
205        LlmBackendChoice::OpenRouter => "openrouter",
206        LlmBackendChoice::None => "none",
207    };
208
209    DryRunBackendOutput {
210        action: "dry_run_backend",
211        backend,
212        binary,
213        model,
214        flavour,
215        chain: if chain_label == "codex-first-then-claude" {
216            cli.llm_fallback.clone()
217        } else {
218            chain_label.to_string()
219        },
220        strict_env_clear: is_strict_env_clear(),
221    }
222}