sqlite_graphrag/commands/
codex_spawn.rs

1//! Codex CLI spawn + JSONL parsing helper shared by `enrich` and `ingest --mode codex`.
2//!
3//! G31 (v1.0.69): `enrich --mode codex` was missing five critical hardening
4//! flags compared to `ingest --mode codex`. This module extracts the
5//! spawn pipeline into a single helper that BOTH call-sites consume,
6//! guaranteeing the same defaults everywhere.
7//!
8//! G32 (v1.0.69): `enrich --mode codex` used `serde_json::from_str` on the
9//! raw stdout, but `codex exec --json` emits JSONL (one event per line).
10//! [`parse_codex_jsonl`] iterates line-by-line, picking the last
11//! `item.completed` of type `agent_message` as the assistant text.
12//!
13//! G33 (v1.0.69): validate the model against the ChatGPT Pro OAuth whitelist
14//! stored in `~/.codex/models_cache.json` BEFORE spawning the subprocess.
15
16use crate::errors::AppError;
17use crate::extract::codex_compat::codex_supports_ask_for_approval;
18use crate::extraction::{ExtractedUrl, ExtractionResult};
19use crate::storage::entities::{NewEntity, NewRelationship};
20use serde::{Deserialize, Serialize};
21use std::path::{Path, PathBuf};
22use std::process::{Command, Stdio};
23
24/// Token usage reported by Codex on `turn.completed` events.
25#[derive(Debug, Clone, Default, Deserialize, Serialize)]
26pub struct CodexUsage {
27    #[serde(default)]
28    pub input_tokens: u64,
29    #[serde(default)]
30    pub cached_input_tokens: u64,
31    #[serde(default)]
32    pub output_tokens: u64,
33    #[serde(default)]
34    pub reasoning_output_tokens: u64,
35}
36
37/// Combined result of one `codex exec` invocation.
38#[derive(Debug)]
39pub struct CodexResult {
40    pub extraction: ExtractionResult,
41    /// Raw text of the last `item.completed` of type `agent_message` (the
42    /// JSON payload the LLM produced). Callers that need a schema other
43    /// than the extraction shape (e.g. body-enrich's `enriched_body`)
44    /// should parse this directly.
45    pub last_agent_text: String,
46    pub usage: Option<CodexUsage>,
47    pub rate_limited: bool,
48    pub schema_error: bool,
49    pub turn_failed: bool,
50    pub failed_message: String,
51}
52
53/// Configuration for the codex spawner.
54#[allow(rustdoc::broken_intra_doc_links)]
55pub struct CodexSpawnArgs<'a> {
56    pub binary: &'a Path,
57    pub prompt: &'a str,
58    pub json_schema: &'a str,
59    pub input_text: &'a str,
60    pub model: Option<&'a str>,
61    pub timeout_secs: u64,
62    /// Caller-provided schema path (must be inside a trusted directory
63    /// that codex recognises as sandbox-safe). Use [`trusted_schema_path`]
64    /// to compute one under the cache dir.
65    pub schema_path: PathBuf,
66}
67
68/// Computes a schema path under the cache dir so `codex exec` accepts it
69/// as part of a trusted directory (rejects `/tmp` on hardened installs).
70pub fn trusted_schema_path() -> Result<PathBuf, AppError> {
71    let cache = crate::paths::AppPaths::resolve(None)
72        .map(|p| p.models.parent().map(|m| m.to_path_buf()))
73        .ok()
74        .flatten()
75        .unwrap_or_else(std::env::temp_dir);
76    std::fs::create_dir_all(&cache).map_err(AppError::Io)?;
77    Ok(cache.join(format!("enrich-schema-{}.json", std::process::id())))
78}
79
80/// Models accepted by Codex CLI when using ChatGPT Pro OAuth.
81///
82/// Mirrored from `~/.codex/models_cache.json` (which the official CLI
83/// refreshes on every login). This list is intentionally narrow; passing
84/// a model not in this set with `--mode codex` returns
85/// `AppError::Validation` BEFORE any OAuth turn is spent.
86pub const CODEX_PRO_OAUTH_MODELS: &[&str] = &[
87    "codex-auto-review",
88    "gpt-5.3-codex-spark",
89    "gpt-5.4",
90    "gpt-5.4-mini",
91    "gpt-5.5",
92];
93
94/// Validates the requested model against [`CODEX_PRO_OAUTH_MODELS`].
95///
96/// # Errors
97/// Returns [`AppError::Validation`] listing the accepted models when the
98/// caller supplied a model outside the whitelist.
99pub fn validate_codex_model(model: Option<&str>) -> Result<(), AppError> {
100    let Some(m) = model else {
101        return Ok(()); // no override; codex picks its default
102    };
103    if CODEX_PRO_OAUTH_MODELS.contains(&m) {
104        Ok(())
105    } else {
106        Err(AppError::Validation(format!(
107            "--codex-model {m:?} is not supported with ChatGPT Pro OAuth. \
108             Accepted: {}",
109            CODEX_PRO_OAUTH_MODELS.join(", ")
110        )))
111    }
112}
113
114/// Returns the list of models accepted by Codex with ChatGPT Pro OAuth.
115///
116/// Tries to read `~/.codex/models_cache.json` (which the official CLI
117/// refreshes on every login) and falls back to the static
118/// [`CODEX_PRO_OAUTH_MODELS`] constant when the file is missing or
119/// malformed. The returned `Vec<String>` is the union of both sources,
120/// de-duplicated.
121///
122/// The official cache file is an object with the shape
123/// `{"fetched_at": "...", "etag": "...", "client_version": "...",
124/// "models": [{"slug": "gpt-5.5", ...}, ...]}` (v1.0.81 fix: previously we
125/// iterated `obj.keys()` which produced bogus entries like `client_version`
126/// and `etag` as "models"; now we extract only the `models` array).
127pub fn list_codex_models() -> Vec<String> {
128    use std::collections::BTreeSet;
129    let mut out: BTreeSet<String> = CODEX_PRO_OAUTH_MODELS
130        .iter()
131        .map(|s| s.to_string())
132        .collect();
133
134    if let Some(home) = std::env::var_os("HOME") {
135        let path = std::path::Path::new(&home)
136            .join(".codex")
137            .join("models_cache.json");
138        if let Ok(content) = std::fs::read_to_string(&path) {
139            if let Ok(value) = serde_json::from_str::<serde_json::Value>(&content) {
140                if let Some(obj) = value.as_object() {
141                    // v1.0.81 fix: prefer the well-known `models` array
142                    // (each item has a `slug` field). Fall back to keys
143                    // only when `models` is absent (legacy cache format).
144                    if let Some(models_arr) = obj.get("models").and_then(|m| m.as_array()) {
145                        for v in models_arr {
146                            if let Some(slug) = v.get("slug").and_then(|s| s.as_str()) {
147                                out.insert(slug.to_string());
148                            } else if let Some(s) = v.as_str() {
149                                out.insert(s.to_string());
150                            }
151                        }
152                    } else {
153                        for key in obj.keys() {
154                            out.insert(key.clone());
155                        }
156                    }
157                } else if let Some(arr) = value.as_array() {
158                    for v in arr {
159                        if let Some(s) = v.as_str() {
160                            out.insert(s.to_string());
161                        }
162                    }
163                }
164            }
165        }
166    }
167    out.into_iter().collect()
168}
169
170/// Suggests the closest codex OAuth model to a user-supplied substring
171/// (G33). Returns `None` when no candidate is close enough.
172///
173/// Match strategy: exact substring containment wins; otherwise Levenshtein
174/// distance below `max_distance = max(2, query.len() / 3)`.
175pub fn suggest_codex_model(query: &str) -> Option<String> {
176    let query_lc = query.to_ascii_lowercase();
177    let models = list_codex_model_lc();
178
179    // Exact substring match wins.
180    for m in &models {
181        if m.contains(&query_lc) {
182            return Some(m.clone());
183        }
184    }
185
186    // Levenshtein fallback.
187    let max_distance = (query.len() / 3).max(2);
188    let mut best: Option<(usize, String)> = None;
189    for m in &models {
190        let d = levenshtein(query_lc.as_str(), m.as_str());
191        if d <= max_distance && best.as_ref().is_none_or(|(bd, _)| d < *bd) {
192            best = Some((d, m.clone()));
193        }
194    }
195    best.map(|(_, m)| m)
196}
197
198fn list_codex_model_lc() -> Vec<String> {
199    list_codex_models()
200        .into_iter()
201        .map(|s| s.to_ascii_lowercase())
202        .collect()
203}
204
205fn levenshtein(a: &str, b: &str) -> usize {
206    let a_chars: Vec<char> = a.chars().collect();
207    let b_chars: Vec<char> = b.chars().collect();
208    if a_chars.is_empty() {
209        return b_chars.len();
210    }
211    if b_chars.is_empty() {
212        return a_chars.len();
213    }
214    let mut prev: Vec<usize> = (0..=b_chars.len()).collect();
215    let mut curr = vec![0; b_chars.len() + 1];
216    for (i, &ac) in a_chars.iter().enumerate() {
217        curr[0] = i + 1;
218        for (j, &bc) in b_chars.iter().enumerate() {
219            let cost = if ac == bc { 0 } else { 1 };
220            curr[j + 1] = (curr[j] + 1).min(prev[j + 1] + 1).min(prev[j] + cost);
221        }
222        std::mem::swap(&mut prev, &mut curr);
223    }
224    prev[b_chars.len()]
225}
226
227/// Builds the `codex exec` command with the canonical hardening flags.
228///
229/// G31 + OAuth-only hardening (v1.0.69, mandated by gaps.md lines 41-49):
230/// the command ALWAYS uses the OAuth `auth.json` flow. The flag set is
231/// the canonical one documented in gaps.md Fix A:
232///
233/// ```text
234/// codex exec \
235///   -c mcp_servers='{}' \
236///   --json --output-schema <SCHEMA> \
237///   --ephemeral \
238///   --skip-git-repo-check \
239///   --sandbox read-only \
240///   --ignore-user-config \
241///   --ignore-rules \
242///   --ask-for-approval never \
243///   -m <MODEL> \
244///   -
245/// ```
246///
247/// The combination zeroes MCP servers (via two complementary mechanisms:
248/// the inline `-c mcp_servers='{}'` override AND `--ignore-user-config`),
249/// disables user-defined rules, and never asks for interactive approval.
250///
251/// **`OPENAI_API_KEY` is FORBIDDEN** in the spawned environment (gaps.md:48).
252/// OAuth flows via `~/.codex/auth.json` and `CODEX_ACCESS_TOKEN` only.
253pub fn build_codex_command(args: &CodexSpawnArgs<'_>) -> Command {
254    let full_prompt = format!("{}\n\n{}", args.prompt, args.input_text);
255
256    // OAuth-only guard (gaps.md:48). If `OPENAI_API_KEY` is set in the
257    // environment we MUST abort — that is the API-key path which is
258    // explicitly PROHIBITED. Use the OAuth `auth.json` flow exclusively.
259    if let Ok(_key) = std::env::var("OPENAI_API_KEY") {
260        let mut cmd = Command::new("false");
261        cmd.env_clear();
262        cmd.env("PATH", "/nonexistent");
263        cmd.arg("--oauth-only-violation-openai-api-key-set");
264        return cmd;
265    }
266
267    // Write the JSON schema to a path the caller controls. Callers should
268    // pass a path under the cache dir (see [`trusted_schema_path`]).
269    std::fs::write(&args.schema_path, args.json_schema).ok();
270
271    let mut cmd = Command::new(args.binary);
272    cmd.env_clear();
273    // OAuth flow: `CODEX_ACCESS_TOKEN` (Bearer) is whitelisted.
274    // `OPENAI_API_KEY` is INTENTIONALLY ABSENT.
275    // v1.0.77: CODEX_HOME is overridden to an isolated dir (see below)
276    // to prevent loading ~/.codex/config.toml trust_level and sandbox_mode.
277    for var in &[
278        "PATH",
279        "HOME",
280        "USER",
281        "SHELL",
282        "TERM",
283        "LANG",
284        "XDG_CONFIG_HOME",
285        "XDG_DATA_HOME",
286        "XDG_RUNTIME_DIR",
287        "XDG_CACHE_HOME",
288        "CODEX_ACCESS_TOKEN",
289        "TMPDIR",
290        "TMP",
291        "TEMP",
292        "DYLD_FALLBACK_LIBRARY_PATH",
293    ] {
294        if let Ok(val) = std::env::var(var) {
295            cmd.env(var, val);
296        }
297    }
298    // v1.0.77: point CODEX_HOME at an isolated dir that only contains
299    // auth.json — this prevents the codex subprocess from loading
300    // ~/.codex/config.toml (which has trust_level=trusted for the project,
301    // causing sandbox escalation per openai/codex#18113).
302    if let Some(isolated) = prepare_isolated_codex_home_spawn() {
303        cmd.env("CODEX_HOME", isolated);
304    }
305
306    #[cfg(windows)]
307    for var in &[
308        "LOCALAPPDATA",
309        "APPDATA",
310        "USERPROFILE",
311        "SystemRoot",
312        "COMSPEC",
313        "PATHEXT",
314    ] {
315        if let Ok(val) = std::env::var(var) {
316            cmd.env(var, val);
317        }
318    }
319
320    // v1.0.77: `-c` TOML overrides bypass the codex exec --sandbox propagation
321    // bug (openai/codex#18113). CLI flags alone are insufficient — the exec
322    // subcommand may not inherit --sandbox from the parent codex command.
323    cmd.arg("exec")
324        .arg("-c")
325        .arg("sandbox_mode='read-only'")
326        .arg("-c")
327        .arg("approval_policy='never'")
328        .arg("--json")
329        .arg("--output-schema")
330        .arg(&args.schema_path)
331        .arg("--ephemeral")
332        .arg("--skip-git-repo-check")
333        .arg("--sandbox")
334        .arg("read-only")
335        .arg("--ignore-user-config")
336        .arg("--ignore-rules");
337
338    // Codex 0.134+ no longer accepts `-c mcp_servers='{}'` — it parses the
339    // value as a string and rejects it ("expected a map"). The
340    // `--ignore-user-config` flag already discards any user-defined MCP
341    // servers, so the override is redundant on all supported versions.
342
343    // Codex 0.134+ removed --ask-for-approval entirely (Issue #26602).
344    // Skip the flag on newer versions; sandbox=read-only already suppresses
345    // approval prompts. See src/extract/codex_compat.rs for the probe.
346    if codex_supports_ask_for_approval() {
347        cmd.arg("--ask-for-approval").arg("never");
348    }
349
350    if let Some(m) = args.model {
351        cmd.arg("-m").arg(m);
352    }
353
354    // `-` means: read the prompt from stdin (Codex Paperclip pattern)
355    cmd.arg("-");
356
357    cmd.stdin(Stdio::piped())
358        .stdout(Stdio::piped())
359        .stderr(Stdio::piped());
360    // Keep the prompt alive for the stdin thread spawned in `spawn_codex`.
361    let _ = full_prompt; // captured by closure below
362
363    cmd
364}
365
366/// Parses JSONL output from `codex exec --json`.
367///
368/// Event format (DOTS notation):
369/// - `thread.started` — session init
370/// - `turn.started` — model turn begins
371/// - `item.completed` — message or tool call; last `agent_message` wins
372/// - `turn.completed` — includes usage stats
373/// - `turn.failed` — error with optional rate-limit indicator
374/// - `error` — schema or validation error
375///
376/// G32 (v1.0.69): this function is the single source of truth for JSONL
377/// parsing. Both `enrich` and `ingest --mode codex` consume it.
378pub fn parse_codex_jsonl(stdout: &str) -> Result<CodexResult, AppError> {
379    let mut last_agent_text: Option<String> = None;
380    let mut usage: Option<CodexUsage> = None;
381    let mut rate_limited = false;
382    let mut schema_error = false;
383    let mut turn_failed = false;
384    let mut failed_message = String::new();
385
386    for line in stdout.lines() {
387        let line = line.trim();
388        if line.is_empty() {
389            continue;
390        }
391
392        let event: serde_json::Value = match serde_json::from_str(line) {
393            Ok(v) => v,
394            Err(_) => {
395                tracing::warn!(target: "codex_spawn", line, "skipping malformed JSONL line");
396                continue;
397            }
398        };
399
400        let event_type = match event.get("type").and_then(|t| t.as_str()) {
401            Some(t) => t,
402            None => continue,
403        };
404
405        match event_type {
406            "item.completed" => {
407                if let Some(item) = event.get("item") {
408                    if item.get("type").and_then(|t| t.as_str()) == Some("agent_message") {
409                        if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
410                            last_agent_text = Some(text.to_string());
411                        }
412                    }
413                }
414            }
415            "turn.completed" => {
416                if let Some(u) = event.get("usage") {
417                    // Skip events that lack the recognised token fields
418                    // (e.g. partial broadcasts with `{}`) so the last
419                    // populated usage wins instead of being overwritten
420                    // by an empty one.
421                    let is_populated = u
422                        .get("input_tokens")
423                        .and_then(|v| v.as_u64())
424                        .map(|n| n > 0)
425                        .unwrap_or(false)
426                        || u.get("output_tokens")
427                            .and_then(|v| v.as_u64())
428                            .map(|n| n > 0)
429                            .unwrap_or(false);
430                    if is_populated {
431                        if let Ok(parsed) = serde_json::from_value::<CodexUsage>(u.clone()) {
432                            usage = Some(parsed);
433                        }
434                    }
435                }
436            }
437            "turn.failed" => {
438                turn_failed = true;
439                if let Some(err) = event.get("error") {
440                    let msg = err
441                        .get("message")
442                        .and_then(|m| m.as_str())
443                        .unwrap_or("unknown error");
444                    failed_message = msg.to_string();
445                    if msg.contains("rate_limit")
446                        || msg.contains("429")
447                        || msg.contains("Too Many Requests")
448                    {
449                        rate_limited = true;
450                    }
451                }
452            }
453            "error" => {
454                if let Some(msg) = event.get("message").and_then(|m| m.as_str()) {
455                    if msg.contains("invalid_json_schema") || msg.contains("schema") {
456                        schema_error = true;
457                    }
458                }
459            }
460            _ => {}
461        }
462    }
463
464    let text = last_agent_text.ok_or_else(|| {
465        AppError::Validation(format!(
466            "no agent_message in codex JSONL output (rate_limited={rate_limited}, schema_error={schema_error}, turn_failed={turn_failed})"
467        ))
468    })?;
469
470    if turn_failed {
471        return Err(AppError::Validation(format!(
472            "codex turn failed: {failed_message}"
473        )));
474    }
475    if schema_error {
476        return Err(AppError::Validation(
477            "codex reported invalid_json_schema; check the --output-schema file".to_string(),
478        ));
479    }
480    if rate_limited {
481        return Err(AppError::Validation(format!(
482            "codex rate-limited: {failed_message}"
483        )));
484    }
485
486    let extraction = parse_extraction_text(&text)?;
487    Ok(CodexResult {
488        extraction,
489        last_agent_text: text,
490        usage,
491        rate_limited,
492        schema_error,
493        turn_failed,
494        failed_message,
495    })
496}
497
498/// Parses the agent_message text as an `ExtractionResult` JSON payload.
499///
500/// The schema is shared by both `enrich` and `ingest --mode codex`; the
501/// `text` is the JSON value the assistant returned, not a wrapper object.
502pub fn parse_extraction_text(text: &str) -> Result<ExtractionResult, AppError> {
503    let value: serde_json::Value = serde_json::from_str(text).map_err(|e| {
504        AppError::Validation(format!("failed to parse codex agent_message as JSON: {e}"))
505    })?;
506    let obj = value.as_object().ok_or_else(|| {
507        AppError::Validation("codex agent_message is not a JSON object".to_string())
508    })?;
509
510    let mut entities: Vec<NewEntity> = Vec::new();
511    if let Some(arr) = obj.get("entities").and_then(|v| v.as_array()) {
512        for e in arr {
513            if let Some(name) = e.get("name").and_then(|v| v.as_str()) {
514                // Accept either "type" or "entity_type" from the LLM payload
515                // and fall back to "concept" when the LLM omits it.
516                let entity_type_str = e
517                    .get("type")
518                    .or_else(|| e.get("entity_type"))
519                    .and_then(|v| v.as_str())
520                    .unwrap_or("concept");
521                let entity_type = serde_json::from_value::<crate::entity_type::EntityType>(
522                    serde_json::Value::String(entity_type_str.to_string()),
523                )
524                .unwrap_or(crate::entity_type::EntityType::Concept);
525                entities.push(NewEntity {
526                    name: name.to_string(),
527                    entity_type,
528                    description: None,
529                });
530            }
531        }
532    }
533
534    let mut relationships: Vec<NewRelationship> = Vec::new();
535    if let Some(arr) = obj.get("relationships").and_then(|v| v.as_array()) {
536        for r in arr {
537            let from = r.get("source").or_else(|| r.get("from"));
538            let to = r.get("target").or_else(|| r.get("to"));
539            let rel = r.get("relation").and_then(|v| v.as_str());
540            if let (Some(from_v), Some(to_v), Some(rel_v)) = (
541                from.and_then(|v| v.as_str()),
542                to.and_then(|v| v.as_str()),
543                rel,
544            ) {
545                relationships.push(NewRelationship {
546                    source: from_v.to_string(),
547                    target: to_v.to_string(),
548                    relation: rel_v.to_string(),
549                    strength: r.get("strength").and_then(|v| v.as_f64()).unwrap_or(0.5),
550                    description: None,
551                });
552            }
553        }
554    }
555
556    let urls: Vec<ExtractedUrl> = obj
557        .get("urls")
558        .and_then(|v| v.as_array())
559        .map(|arr| {
560            arr.iter()
561                .filter_map(|u| {
562                    let url = u.get("url")?.as_str()?.to_string();
563                    let start = u.get("start").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
564                    let end = u
565                        .get("end")
566                        .and_then(|v| v.as_u64())
567                        .unwrap_or(start as u64) as usize;
568                    Some(ExtractedUrl { url, start, end })
569                })
570                .collect()
571        })
572        .unwrap_or_default();
573
574    // v1.0.76: ExtractionResult no longer carries relationships or
575    // relationships_truncated fields; those are LLM backend output
576    // (see `ExtractionOutput` in src/extract/mod.rs). The default
577    // build extracts URLs + entities only; relationships are an
578    // LLM-side concern.
579    //
580    // Convert `NewEntity` (storage-side) to `ExtractedEntity`
581    // (extraction-side). The LLM payload doesn't include byte offsets
582    // (the chunker is responsible for that), so start/end are 0.
583    let entities_ext: Vec<crate::extraction::ExtractedEntity> = entities
584        .into_iter()
585        .map(|e| crate::extraction::ExtractedEntity {
586            name: e.name,
587            entity_type: e.entity_type.as_str().to_string(),
588            start: 0,
589            end: 0,
590        })
591        .collect();
592
593    Ok(ExtractionResult {
594        entities: entities_ext,
595        urls,
596        elapsed_ms: 0,
597    })
598}
599
600fn prepare_isolated_codex_home_spawn() -> Option<std::path::PathBuf> {
601    let home = std::env::var("HOME").ok()?;
602    let real_auth = std::path::Path::new(&home).join(".codex/auth.json");
603    if !real_auth.exists() {
604        return None;
605    }
606    let isolated =
607        std::env::temp_dir().join(format!("sqlite-graphrag-codex-home-{}", std::process::id()));
608    let _ = std::fs::create_dir_all(&isolated);
609    let target = isolated.join("auth.json");
610    if !target.exists() {
611        let _ = std::fs::copy(&real_auth, &target);
612    }
613    Some(isolated)
614}
615
616#[cfg(test)]
617mod tests {
618    use super::*;
619
620    const SAMPLE_JSONL: &str = r#"{"type":"thread.started","thread_id":"abc"}
621{"type":"turn.started"}
622{"type":"item.completed","item":{"type":"reasoning","text":"thinking"}}
623{"type":"item.completed","item":{"type":"agent_message","text":"{\"entities\":[{\"name\":\"alpha\",\"type\":\"concept\"}],\"relationships\":[{\"source\":\"alpha\",\"target\":\"beta\",\"relation\":\"uses\",\"strength\":0.7}],\"extraction_method\":\"codex\",\"urls\":[]}"}}
624{"type":"turn.completed","usage":{"input_tokens":120,"output_tokens":45}}
625{"type":"turn.completed","usage":{}}
626"#;
627
628    #[test]
629    fn parse_codex_jsonl_extracts_last_agent_message() {
630        // v1.0.76: relationships are no longer carried in the
631        // ExtractionResult struct (they belong to the LLM ExtractionBackend
632        // payload, not the URL-only default build). The default test
633        // validates the entity extraction path only.
634        let result = parse_codex_jsonl(SAMPLE_JSONL).expect("parse must succeed");
635        assert_eq!(result.extraction.entities.len(), 1);
636        assert_eq!(result.extraction.entities[0].name, "alpha");
637    }
638
639    #[test]
640    fn parse_codex_jsonl_collects_usage() {
641        let result = parse_codex_jsonl(SAMPLE_JSONL).expect("parse must succeed");
642        let usage = result.usage.expect("usage must be populated");
643        assert_eq!(usage.input_tokens, 120);
644        assert_eq!(usage.output_tokens, 45);
645    }
646
647    #[test]
648    fn parse_codex_jsonl_detects_rate_limit() {
649        let r = parse_codex_jsonl(
650            "{\"type\":\"turn.failed\",\"error\":{\"message\":\"rate_limit: 429 too many\"}}\n{\"type\":\"item.completed\",\"item\":{\"type\":\"agent_message\",\"text\":\"{}\"}}",
651        );
652        assert!(matches!(r, Err(AppError::Validation(_))));
653    }
654
655    #[test]
656    fn parse_codex_jsonl_handles_no_agent_message() {
657        let r = parse_codex_jsonl("{\"type\":\"thread.started\"}");
658        assert!(matches!(r, Err(AppError::Validation(_))));
659    }
660
661    #[test]
662    fn parse_codex_jsonl_skips_malformed_lines() {
663        let r = parse_codex_jsonl(
664            "{not valid json\n{\"type\":\"item.completed\",\"item\":{\"type\":\"agent_message\",\"text\":\"{\\\"entities\\\":[],\\\"relationships\\\":[],\\\"extraction_method\\\":\\\"codex\\\"}\"}}",
665        );
666        assert!(r.is_ok(), "malformed lines must be skipped, got {r:?}");
667    }
668
669    #[test]
670    fn validate_codex_model_accepts_known() {
671        assert!(validate_codex_model(Some("gpt-5.5")).is_ok());
672        assert!(validate_codex_model(Some("gpt-5.4")).is_ok());
673        assert!(validate_codex_model(None).is_ok()); // no override
674    }
675
676    #[test]
677    fn validate_codex_model_rejects_unknown() {
678        let err = validate_codex_model(Some("gpt-4")).unwrap_err();
679        let msg = format!("{err}");
680        assert!(msg.contains("not supported"));
681        assert!(msg.contains("gpt-5.5"));
682    }
683
684    #[test]
685    fn list_codex_models_includes_all_static_whitelist() {
686        let models = list_codex_models();
687        for m in CODEX_PRO_OAUTH_MODELS {
688            assert!(models.contains(&m.to_string()), "missing {m} in {models:?}");
689        }
690    }
691
692    #[test]
693    fn suggest_codex_model_substring_match() {
694        let s = suggest_codex_model("gpt-5");
695        assert!(s.is_some(), "must suggest a gpt-5.x model");
696    }
697
698    #[test]
699    fn suggest_codex_model_fuzzy_match() {
700        // 'gpt5.5' has no hyphen; should still suggest 'gpt-5.5'.
701        let s = suggest_codex_model("gpt5.5");
702        assert!(s.is_some(), "fuzzy must suggest gpt-5.5 for 'gpt5.5'");
703        assert_eq!(s.unwrap(), "gpt-5.5");
704    }
705
706    #[test]
707    fn suggest_codex_model_unrelated_returns_none() {
708        let s = suggest_codex_model("totally-unrelated-zzz");
709        assert!(s.is_none());
710    }
711
712    #[test]
713    fn build_codex_command_includes_hardening_flags() {
714        let args = CodexSpawnArgs {
715            binary: Path::new("/bin/true"),
716            prompt: "p",
717            json_schema: "{}",
718            input_text: "i",
719            model: Some("gpt-5.5"),
720            timeout_secs: 60,
721            schema_path: std::env::temp_dir().join("test-schema.json"),
722        };
723        let cmd = build_codex_command(&args);
724        let collected: Vec<String> = cmd
725            .get_args()
726            .filter_map(|a| a.to_str().map(|s| s.to_string()))
727            .collect();
728        for required in &[
729            "exec",
730            "-c",
731            "sandbox_mode='read-only'",
732            "approval_policy='never'",
733            "--json",
734            "--output-schema",
735            "--ephemeral",
736            "--skip-git-repo-check",
737            "--sandbox",
738            "read-only",
739            "--ignore-user-config",
740            "--ignore-rules",
741            "-m",
742            "gpt-5.5",
743            "-",
744        ] {
745            assert!(
746                collected.iter().any(|a| a == required),
747                "missing flag {required} in {collected:?}"
748            );
749        }
750    }
751
752    #[test]
753    fn list_codex_models_dedupes_with_cache_file() {
754        // Ensure the union with the cache file (when present) does not
755        // produce duplicates. We can't actually write a cache file in
756        // a test, so we just verify the static path is dedup'd.
757        let models = list_codex_models();
758        let unique: std::collections::HashSet<_> = models.iter().collect();
759        assert_eq!(unique.len(), models.len(), "list_codex_models must dedupe");
760    }
761    #[test]
762    fn list_codex_models_extracts_from_models_array_v1_0_81_regression() {
763        // v1.0.81 fix: the official codex CLI writes
764        //   {"fetched_at": "...", "etag": "...", "client_version": "...",
765        //    "models": [{"slug": "gpt-5.5", ...}, ...]}
766        // and the old code iterated obj.keys(), polluting the model
767        // list with metadata keys. Here we simulate a cache file by
768        // setting HOME to a tempdir containing a synthetic cache and
769        // verifying the metadata keys are NOT present in the output.
770        let tmp =
771            std::env::temp_dir().join(format!("codex-models-array-test-{}", std::process::id()));
772        std::fs::create_dir_all(tmp.join(".codex")).expect("mkdir");
773        let cache_body = r#"{
774            "fetched_at": "2026-06-14T06:43:56.639903114Z",
775            "etag": "W/\"deadbeef\"",
776            "client_version": "0.139.0",
777            "models": [
778                {"slug": "gpt-5.5", "display_name": "GPT-5.5"},
779                {"slug": "gpt-5.4-mini", "display_name": "GPT-5.4 mini"}
780            ]
781        }"#;
782        std::fs::write(tmp.join(".codex/models_cache.json"), cache_body).expect("write cache");
783        // SAFETY: unit test
784        let prev_home = std::env::var("HOME");
785        unsafe {
786            std::env::set_var("HOME", &tmp);
787        }
788        let models = list_codex_models();
789        unsafe {
790            if let Ok(h) = prev_home {
791                std::env::set_var("HOME", h);
792            } else {
793                std::env::remove_var("HOME");
794            }
795        }
796        let _ = std::fs::remove_dir_all(&tmp);
797
798        for forbidden in &["client_version", "etag", "fetched_at", "models"] {
799            assert!(
800                !models.contains(&forbidden.to_string()),
801                "metadata key {forbidden:?} leaked into model list: {models:?}"
802            );
803        }
804        assert!(
805            models.contains(&"gpt-5.5".to_string()),
806            "gpt-5.5 missing from extracted list: {models:?}"
807        );
808        assert!(
809            models.contains(&"gpt-5.4-mini".to_string()),
810            "gpt-5.4-mini missing from extracted list: {models:?}"
811        );
812    }
813
814    #[test]
815    fn list_codex_models_falls_back_to_keys_when_models_field_absent() {
816        // Legacy cache shape: keys are model ids directly (no models
817        // array). v1.0.81 must still merge those keys into the result.
818        let tmp =
819            std::env::temp_dir().join(format!("codex-models-legacy-test-{}", std::process::id()));
820        std::fs::create_dir_all(tmp.join(".codex")).expect("mkdir");
821        let cache_body = r#"{"legacy-model-x": 1, "legacy-model-y": 2}"#;
822        std::fs::write(tmp.join(".codex/models_cache.json"), cache_body).expect("write cache");
823        let prev_home = std::env::var("HOME");
824        unsafe {
825            std::env::set_var("HOME", &tmp);
826        }
827        let models = list_codex_models();
828        unsafe {
829            if let Ok(h) = prev_home {
830                std::env::set_var("HOME", h);
831            } else {
832                std::env::remove_var("HOME");
833            }
834        }
835        let _ = std::fs::remove_dir_all(&tmp);
836
837        assert!(
838            models.contains(&"legacy-model-x".to_string()),
839            "legacy-model-x missing: {models:?}"
840        );
841        assert!(
842            models.contains(&"legacy-model-y".to_string()),
843            "legacy-model-y missing: {models:?}"
844        );
845    }
846
847    /// OAuth-only conformance test (gaps.md:41-49, v1.0.69 mandate).
848    /// Verifies that `build_codex_command` always emits `-c mcp_servers='{}'`,
849    /// `--ignore-user-config`, `--ask-for-approval never` and does NOT
850    /// whitelist `OPENAI_API_KEY` in the env_clear whitelist.
851    #[test]
852    #[serial_test::serial(env)]
853    fn build_command_oauth_only_mandatory_flags() {
854        // SAFETY: unit test
855        unsafe {
856            std::env::remove_var("OPENAI_API_KEY");
857        }
858        let schema = std::env::temp_dir().join("codex-test-schema.json");
859        let _ = std::fs::remove_file(&schema);
860        let args = CodexSpawnArgs {
861            binary: std::path::Path::new("/usr/bin/false"),
862            prompt: "p",
863            json_schema: "{}",
864            input_text: "i",
865            model: Some("gpt-5.4-mini"),
866            timeout_secs: 60,
867            schema_path: schema.clone(),
868        };
869        let cmd = build_codex_command(&args);
870        let argv: Vec<&str> = cmd.get_args().filter_map(|a| a.to_str()).collect();
871        // Mandatory flags from gaps.md lines 233-238.
872        // -c mcp_servers='{}' was REMOVED in v1.0.76 — codex 0.134+ parses
873        // the value as a string and rejects it ("expected a map"). The
874        // --ignore-user-config flag already covers the MCP isolation
875        // requirement.
876        assert!(
877            argv.contains(&"--ignore-user-config"),
878            "must have --ignore-user-config (gaps.md:266)"
879        );
880        // --ask-for-approval is conditional on codex < 0.134. When the
881        // installed codex is 0.134+ the flag is omitted by the compat
882        // helper. Both outcomes are valid.
883        let ask_for_approval_present = argv.contains(&"--ask-for-approval");
884        if !crate::extract::codex_compat::codex_supports_ask_for_approval() {
885            assert!(
886                !ask_for_approval_present,
887                "codex 0.134+ must NOT include --ask-for-approval"
888            );
889        }
890        assert!(
891            argv.contains(&"--sandbox"),
892            "must have --sandbox read-only (G31)"
893        );
894        assert!(argv.contains(&"--ephemeral"), "must have --ephemeral (G31)");
895        assert!(
896            argv.contains(&"--skip-git-repo-check"),
897            "must have --skip-git-repo-check (G31)"
898        );
899        assert!(
900            argv.contains(&"--ignore-rules"),
901            "must have --ignore-rules (G31)"
902        );
903        // v1.0.77: -c TOML overrides bypass codex exec --sandbox bug (#18113)
904        assert!(
905            argv.contains(&"-c") && argv.contains(&"sandbox_mode='read-only'"),
906            "must have -c sandbox_mode='read-only' (v1.0.77, codex#18113)"
907        );
908        assert!(
909            argv.contains(&"approval_policy='never'"),
910            "must have -c approval_policy='never' (v1.0.77)"
911        );
912    }
913
914    /// OAuth-only guard: when `OPENAI_API_KEY` is in the environment,
915    /// `build_codex_command` MUST abort the spawn (return a `false`
916    /// command), NOT pass the key through to the child.
917    #[test]
918    #[serial_test::serial(env)]
919    fn build_command_aborts_when_openai_api_key_set() {
920        // SAFETY: unit test
921        unsafe {
922            std::env::set_var("OPENAI_API_KEY", "sk-violation-test");
923        }
924        let schema = std::env::temp_dir().join("codex-test-schema-abort.json");
925        let _ = std::fs::remove_file(&schema);
926        let args = CodexSpawnArgs {
927            binary: std::path::Path::new("/usr/bin/codex"),
928            prompt: "p",
929            json_schema: "{}",
930            input_text: "i",
931            model: Some("gpt-5.4-mini"),
932            timeout_secs: 60,
933            schema_path: schema.clone(),
934        };
935        let cmd = build_codex_command(&args);
936        let program = cmd.get_program().to_string_lossy().to_string();
937        let argv: Vec<&str> = cmd.get_args().filter_map(|a| a.to_str()).collect();
938        assert_eq!(
939            program, "false",
940            "when OPENAI_API_KEY is set, build_codex_command must abort"
941        );
942        assert!(
943            argv.contains(&"--oauth-only-violation-openai-api-key-set"),
944            "aborted command must carry violation marker"
945        );
946        unsafe {
947            std::env::remove_var("OPENAI_API_KEY");
948        }
949    }
950}
sqlite_graphrag/commands/codex_spawn.rs

sqlite_graphrag/commands/
codex_spawn.rs