Skip to main content

sqlite_graphrag/spawn/
env_whitelist.rs

1//! Env whitelist for LLM subprocess spawners (v1.0.83, ADR-0041).
2//!
3//! Unifies the duplicated `env_clear()` + re-injection logic that previously
4//! lived in `src/commands/{claude_runner,codex_spawn,ingest_claude}.rs`.
5//!
6//! ## OAuth-only mandate preserved
7//!
8//! `ANTHROPIC_API_KEY` and `OPENAI_API_KEY` are INTENTIONALLY ABSENT —
9//! rejected by upstream guards in `claude_runner.rs`, `codex_spawn.rs`,
10//! `ingest_claude.rs` and `extract/llm_embedding.rs` per ADR-0011, ADR-0025
11//! and ADR-0041. The guards reject these vars regardless of whether they
12//! reach the subprocess; the env whitelist is the SECOND line of defence.
13//!
14//! ## Custom provider support (v1.0.83)
15//!
16//! `ANTHROPIC_AUTH_TOKEN` and `ANTHROPIC_BASE_URL` are preserved so that
17//! Claude Code can authenticate against a custom Anthropic-compatible
18//! endpoint (MiniMax/api.minimax.io, OpenRouter, corporate gateways). The
19//! `--bare` flag remains PROHIBITED — these vars only flow to the
20//! subprocess when the user opts into a custom provider via env vars.
21//!
22//! ## Strict mode (compliance)
23//!
24//! When `SQLITE_GRAPHRAG_STRICT_ENV_CLEAR=1` (or `--strict-env-clear` flag)
25//! is active, only `PATH` is preserved. This covers environments that
26//! forbid credential forwarding via env vars entirely.
27
28use std::process::Command;
29
30/// Environment variables preserved when spawning Claude/Codex subprocesses.
31///
32/// Order is purely cosmetic — `env_clear()` followed by per-var `env()` is
33/// independent of iteration order.
34pub const PRESERVED_ENV_VARS: &[&str] = &[
35    // Standard POSIX / XDG base directory
36    "PATH",
37    "HOME",
38    "USER",
39    "SHELL",
40    "TERM",
41    "LANG",
42    "XDG_CONFIG_HOME",
43    "XDG_DATA_HOME",
44    "XDG_RUNTIME_DIR",
45    "XDG_CACHE_HOME",
46    // Temporary directories
47    "TMPDIR",
48    "TMP",
49    "TEMP",
50    // macOS dynamic linker fallback path
51    "DYLD_FALLBACK_LIBRARY_PATH",
52    // Claude Code specific
53    "CLAUDE_CONFIG_DIR",
54    // v1.0.83 (ADR-0041): custom provider credentials for Claude Code
55    "ANTHROPIC_AUTH_TOKEN",
56    "ANTHROPIC_BASE_URL",
57    "CLAUDE_CODE_ENTRYPOINT",
58    // v1.0.83 (ADR-0041): custom provider credentials for Codex CLI
59    "CODEX_ACCESS_TOKEN",
60    "OPENAI_BASE_URL",
61    // v1.0.83 (ADR-0041): telemetry opt-out and observability override
62    "DISABLE_TELEMETRY",
63    "OTEL_EXPORTER_OTLP_ENDPOINT",
64];
65
66/// Windows-only environment variables preserved alongside the POSIX set.
67#[cfg(windows)]
68pub const PRESERVED_ENV_VARS_WINDOWS: &[&str] = &[
69    "LOCALAPPDATA",
70    "APPDATA",
71    "USERPROFILE",
72    "SystemRoot",
73    "COMSPEC",
74    "PATHEXT",
75    "HOMEPATH",
76    "HOMEDRIVE",
77];
78
79/// Apply the v1.0.83 env whitelist to a `Command`.
80///
81/// In strict mode, only `PATH` is preserved (compliance environments).
82/// In default mode, the full `PRESERVED_ENV_VARS` set is applied.
83pub fn apply_env_whitelist(cmd: &mut Command, strict: bool) {
84    cmd.env_clear();
85    if strict {
86        if let Ok(path) = std::env::var("PATH") {
87            cmd.env("PATH", path);
88        }
89        return;
90    }
91    for var in PRESERVED_ENV_VARS {
92        if let Ok(val) = std::env::var(var) {
93            cmd.env(var, val);
94        }
95    }
96    #[cfg(windows)]
97    for var in PRESERVED_ENV_VARS_WINDOWS {
98        if let Ok(val) = std::env::var(var) {
99            cmd.env(var, val);
100        }
101    }
102}
103
104/// Detect whether strict env-clear mode is requested.
105///
106/// Returns true when `SQLITE_GRAPHRAG_STRICT_ENV_CLEAR` is `1`, `true`,
107/// `TRUE` or `yes` (case-insensitive for `true`/`yes`).
108pub fn is_strict_env_clear() -> bool {
109    matches!(
110        std::env::var("SQLITE_GRAPHRAG_STRICT_ENV_CLEAR")
111            .ok()
112            .as_deref(),
113        Some("1") | Some("true") | Some("TRUE") | Some("True") | Some("yes") | Some("YES")
114    )
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120
121    /// Helper that records the env vars set on a Command without spawning it.
122    fn captured_env(cmd: &Command) -> Vec<(String, String)> {
123        cmd.get_envs()
124            .filter_map(|(k, v)| {
125                let k = k.to_str()?.to_string();
126                let v = v?.to_str()?.to_string();
127                Some((k, v))
128            })
129            .collect()
130    }
131
132    #[test]
133    #[serial_test::serial(env)]
134    fn whitelist_includes_custom_provider_vars() {
135        // SAFETY: serial_test::serial(env) ensures no parallel mutation.
136        unsafe {
137            std::env::set_var("ANTHROPIC_AUTH_TOKEN", "sk-cp-test");
138            std::env::set_var("ANTHROPIC_BASE_URL", "https://api.minimax.io/anthropic");
139            std::env::set_var("OPENAI_BASE_URL", "https://api.openrouter.ai/v1");
140        }
141        let mut cmd = std::process::Command::new("/usr/bin/false");
142        apply_env_whitelist(&mut cmd, false);
143        let envs = captured_env(&cmd);
144        let has_token = envs
145            .iter()
146            .any(|(k, v)| k == "ANTHROPIC_AUTH_TOKEN" && v == "sk-cp-test");
147        let has_anthropic_url = envs
148            .iter()
149            .any(|(k, v)| k == "ANTHROPIC_BASE_URL" && v == "https://api.minimax.io/anthropic");
150        let has_openai_url = envs
151            .iter()
152            .any(|(k, v)| k == "OPENAI_BASE_URL" && v == "https://api.openrouter.ai/v1");
153        unsafe {
154            std::env::remove_var("ANTHROPIC_AUTH_TOKEN");
155            std::env::remove_var("ANTHROPIC_BASE_URL");
156            std::env::remove_var("OPENAI_BASE_URL");
157        }
158        assert!(has_token, "ANTHROPIC_AUTH_TOKEN not preserved");
159        assert!(has_anthropic_url, "ANTHROPIC_BASE_URL not preserved");
160        assert!(has_openai_url, "OPENAI_BASE_URL not preserved");
161    }
162
163    #[test]
164    #[serial_test::serial(env)]
165    fn whitelist_excludes_api_key_vars() {
166        // SAFETY: serial_test::serial(env) ensures no parallel mutation.
167        unsafe {
168            std::env::set_var("ANTHROPIC_API_KEY", "sk-ant-violation");
169            std::env::set_var("OPENAI_API_KEY", "sk-violation");
170        }
171        let mut cmd = std::process::Command::new("/usr/bin/false");
172        apply_env_whitelist(&mut cmd, false);
173        let envs = captured_env(&cmd);
174        let has_anthropic_key = envs.iter().any(|(k, _)| k == "ANTHROPIC_API_KEY");
175        let has_openai_key = envs.iter().any(|(k, _)| k == "OPENAI_API_KEY");
176        unsafe {
177            std::env::remove_var("ANTHROPIC_API_KEY");
178            std::env::remove_var("OPENAI_API_KEY");
179        }
180        assert!(
181            !has_anthropic_key,
182            "ANTHROPIC_API_KEY must NEVER reach subprocess"
183        );
184        assert!(
185            !has_openai_key,
186            "OPENAI_API_KEY must NEVER reach subprocess"
187        );
188    }
189
190    #[test]
191    #[serial_test::serial(env)]
192    fn strict_mode_drops_credentials() {
193        // SAFETY: serial_test::serial(env) ensures no parallel mutation.
194        unsafe {
195            std::env::set_var("ANTHROPIC_AUTH_TOKEN", "sk-cp-strict-test");
196            std::env::set_var("PATH", "/usr/bin:/bin");
197        }
198        let mut cmd = std::process::Command::new("/usr/bin/false");
199        apply_env_whitelist(&mut cmd, true);
200        let envs = captured_env(&cmd);
201        let has_token = envs.iter().any(|(k, _)| k == "ANTHROPIC_AUTH_TOKEN");
202        let has_path = envs
203            .iter()
204            .any(|(k, v)| k == "PATH" && v == "/usr/bin:/bin");
205        unsafe {
206            std::env::remove_var("ANTHROPIC_AUTH_TOKEN");
207        }
208        assert!(!has_token, "strict mode must drop credentials");
209        assert!(has_path, "strict mode preserves PATH only");
210    }
211}