Skip to main content

sqlite_graphrag/commands/
dry_run_backend.rs

1//! v1.0.84 (ADR-0042 / GAP-002): resolve and emit the LLM backend that
2//! WOULD be invoked for embedding without actually spawning the
3//! subprocess. Used by `--dry-run-backend` for CI audit and pre-flight
4//! sanity-check of `--llm-backend` before long ingestion sessions.
5//!
6//! The output is a compact JSON envelope on stdout. stderr carries the
7//! human-friendly summary so operators can run `sqlite-graphrag --dry-run-backend`
8//! without piping through `jaq`.
9//!
10//! ## Schema (`dry-run-backend.schema.json`)
11//!
12//! ```json
13//! {
14//!   "action": "dry_run_backend",
15//!   "backend": "codex|claude|none",
16//!   "binary": "/usr/local/bin/codex",
17//!   "model": "gpt-5.5",
18//!   "flavour": "codex|claude",
19//!   "chain": "claude",
20//!   "strict_env_clear": false
21//! }
22//! ```
23//!
24//! ## Implementation notes
25//!
26//! - We deliberately do NOT depend on the private fields of
27//!   `LlmEmbedding`. The struct's `binary` and `flavour` fields are
28//!   private to `crate::extract::llm_embedding`, so we re-probe the
29//!   PATH here (cheap, idempotent) instead of forcing the core to add
30//!   `pub(crate)` getters just for this audit path.
31//! - `model` comes from `LlmEmbedding::model_label()` which already
32//!   exposes a stable public string of the form `<flavour>:<model>`.
33//!   We strip the `<flavour>:` prefix to keep the schema flat.
34//! - When `--llm-backend none` is selected the envelope still emits
35//!   the same shape with empty `binary` and `model`, so downstream
36//!   pipelines can parse a single schema unconditionally.
37
38use crate::cli::{Cli, LlmBackendChoice};
39use crate::errors::AppError;
40use crate::extract::llm_embedding::LlmEmbedding;
41use crate::output::emit_json_compact;
42use crate::spawn::env_whitelist::is_strict_env_clear;
43use serde::Serialize;
44
45/// Compact JSON envelope emitted by `--dry-run-backend`.
46///
47/// Field order matches the documented schema. `chain` reflects
48/// `--llm-fallback` so operators can audit the fallback order without
49/// spawning `embedder::embed_with_fallback`.
50#[derive(Serialize)]
51pub struct DryRunBackendOutput {
52    pub action: &'static str,
53    pub backend: &'static str,
54    pub binary: String,
55    pub model: String,
56    pub flavour: &'static str,
57    pub chain: String,
58    pub strict_env_clear: bool,
59}
60
61/// Resolve the LLM backend that would be used for embedding and emit
62/// the JSON envelope. Returns `Err(AppError::Embedding)` when the
63/// requested backend CLI is missing from PATH.
64pub fn emit_dry_run_backend(cli: &Cli) -> Result<(), AppError> {
65    let payload = match cli.llm_backend {
66        LlmBackendChoice::None => DryRunBackendOutput {
67            action: "dry_run_backend",
68            backend: "none",
69            binary: String::new(),
70            model: String::new(),
71            flavour: "none",
72            chain: cli.llm_fallback.clone(),
73            strict_env_clear: is_strict_env_clear(),
74        },
75        LlmBackendChoice::Auto => {
76            // ADR-0038: codex is preferred; claude is the fallback when codex
77            // is absent. Mirrors `LlmEmbedding::detect_available()` exactly
78            // so the audit output never disagrees with the real spawn path.
79            let resolved = LlmEmbedding::detect_available()?;
80            backend_payload(&resolved, "codex-first-then-claude", cli, true)
81        }
82        LlmBackendChoice::Codex => {
83            let resolved = LlmEmbedding::detect_available()?;
84            let flavour = resolved.model_label();
85            // Guard: the user explicitly asked for codex. If detect_available
86            // returned a claude-backed client (no codex on PATH), we MUST
87            // surface that as an error rather than silently substitute.
88            // v1.0.84 (ADR-0042): claude must NOT silently replace codex
89            // when the user opts in via `--llm-backend codex`.
90            if flavour.starts_with("claude:") {
91                return Err(AppError::Embedding(
92                    "`--llm-backend codex` requested but `codex` was not found on PATH \
93                     (a `claude` binary was detected; refusing silent fallback per ADR-0042). \
94                     Install `codex` (>= 0.130) or pass `--llm-backend claude` explicitly."
95                        .to_string(),
96                ));
97            }
98            backend_payload(&resolved, "codex-explicit", cli, false)
99        }
100        LlmBackendChoice::Claude => {
101            let resolved = LlmEmbedding::detect_available()?;
102            let flavour = resolved.model_label();
103            // Symmetric guard for `--llm-backend claude`.
104            if flavour.starts_with("codex:") {
105                return Err(AppError::Embedding(
106                    "`--llm-backend claude` requested but `claude` was not found on PATH \
107                     (a `codex` binary was detected; refusing silent fallback per ADR-0042). \
108                     Install `claude` (Claude Code >= 2.1) or pass `--llm-backend codex` explicitly."
109                        .to_string(),
110                ));
111            }
112            backend_payload(&resolved, "claude-explicit", cli, false)
113        }
114    };
115
116    emit_json_compact(&payload)?;
117    Ok(())
118}
119
120/// Build the envelope from a successfully-resolved `LlmEmbedding`.
121///
122/// `chain_label` documents which CLI knob produced this payload
123/// (e.g. `codex-explicit` vs `codex-first-then-claude`) so the audit
124/// output is self-describing.
125fn backend_payload(
126    resolved: &LlmEmbedding,
127    chain_label: &str,
128    cli: &Cli,
129    is_auto: bool,
130) -> DryRunBackendOutput {
131    // `model_label()` returns `<flavour>:<model>` — split on the FIRST
132    // colon so model names with colons (rare but possible) survive.
133    // `flavour` must be a `&'static str` (the struct field type), so we
134    // leak the slice into a `Box<str>` to obtain a `'static` reference.
135    let label = resolved.model_label();
136    let (flavour, model) = match label.split_once(':') {
137        Some((f, m)) => (f, m.to_string()),
138        None => ("unknown", label.to_string()),
139    };
140    let flavour: &'static str = Box::leak(flavour.to_string().into_boxed_str());
141
142    // Re-probe PATH to surface the binary path the audit envelope
143    // promises. We prefer `which::which` over the private `LlmEmbedding`
144    // field so this file compiles independently of the `extract`
145    // module's internal layout. The result is canonicalized when
146    // possible so symlinks and shim wrappers don't leak location.
147    let binary = which::which(if is_auto {
148        // For Auto, prefer whichever the real spawn would pick first.
149        if which::which("codex").is_ok() {
150            "codex"
151        } else {
152            "claude"
153        }
154    } else {
155        flavour
156    })
157    .ok()
158    .and_then(|p| std::fs::canonicalize(&p).ok().or(Some(p)))
159    .map(|p| p.display().to_string())
160    .unwrap_or_default();
161
162    // Backend string is the `LlmBackendChoice` name for clarity in CI
163    // logs (operators filter on `backend == "codex"` etc.).
164    let backend = match cli.llm_backend {
165        LlmBackendChoice::Auto => {
166            if flavour == "codex" {
167                "codex"
168            } else {
169                "claude"
170            }
171        }
172        LlmBackendChoice::Codex => "codex",
173        LlmBackendChoice::Claude => "claude",
174        LlmBackendChoice::None => "none",
175    };
176
177    DryRunBackendOutput {
178        action: "dry_run_backend",
179        backend,
180        binary,
181        model,
182        flavour,
183        chain: if chain_label == "codex-first-then-claude" {
184            cli.llm_fallback.clone()
185        } else {
186            chain_label.to_string()
187        },
188        strict_env_clear: is_strict_env_clear(),
189    }
190}