Skip to main content

sqlite_graphrag/commands/
codex_spawn.rs

1//! Codex CLI spawn + JSONL parsing helper shared by `enrich` and `ingest --mode codex`.
2//!
3//! G31 (v1.0.69): `enrich --mode codex` was missing five critical hardening
4//! flags compared to `ingest --mode codex`. This module extracts the
5//! spawn pipeline into a single helper that BOTH call-sites consume,
6//! guaranteeing the same defaults everywhere.
7//!
8//! G32 (v1.0.69): `enrich --mode codex` used `serde_json::from_str` on the
9//! raw stdout, but `codex exec --json` emits JSONL (one event per line).
10//! [`parse_codex_jsonl`] iterates line-by-line, picking the last
11//! `item.completed` of type `agent_message` as the assistant text.
12//!
13//! G33 (v1.0.69): validate the model against the ChatGPT Pro OAuth whitelist
14//! stored in `~/.codex/models_cache.json` BEFORE spawning the subprocess.
15
16use crate::errors::AppError;
17use crate::extract::codex_compat::codex_supports_ask_for_approval;
18use crate::extraction::{ExtractedUrl, ExtractionResult};
19use crate::spawn::env_whitelist::apply_env_whitelist;
20use crate::storage::entities::{NewEntity, NewRelationship};
21use serde::{Deserialize, Serialize};
22use std::path::{Path, PathBuf};
23use std::process::{Command, Stdio};
24
25/// Token usage reported by Codex on `turn.completed` events.
26#[derive(Debug, Clone, Default, Deserialize, Serialize)]
27pub struct CodexUsage {
28    #[serde(default)]
29    pub input_tokens: u64,
30    #[serde(default)]
31    pub cached_input_tokens: u64,
32    #[serde(default)]
33    pub output_tokens: u64,
34    #[serde(default)]
35    pub reasoning_output_tokens: u64,
36}
37
38/// Combined result of one `codex exec` invocation.
39#[derive(Debug)]
40pub struct CodexResult {
41    pub extraction: ExtractionResult,
42    /// Raw text of the last `item.completed` of type `agent_message` (the
43    /// JSON payload the LLM produced). Callers that need a schema other
44    /// than the extraction shape (e.g. body-enrich's `enriched_body`)
45    /// should parse this directly.
46    pub last_agent_text: String,
47    pub usage: Option<CodexUsage>,
48    pub rate_limited: bool,
49    pub schema_error: bool,
50    pub turn_failed: bool,
51    pub failed_message: String,
52}
53
54/// Configuration for the codex spawner.
55#[allow(rustdoc::broken_intra_doc_links)]
56pub struct CodexSpawnArgs<'a> {
57    pub binary: &'a Path,
58    pub prompt: &'a str,
59    pub json_schema: &'a str,
60    pub input_text: &'a str,
61    pub model: Option<&'a str>,
62    pub timeout_secs: u64,
63    /// Caller-provided schema path (must be inside a trusted directory
64    /// that codex recognises as sandbox-safe). Use [`trusted_schema_path`]
65    /// to compute one under the cache dir.
66    pub schema_path: PathBuf,
67}
68
69/// Computes a schema path under the cache dir so `codex exec` accepts it
70/// as part of a trusted directory (rejects `/tmp` on hardened installs).
71pub fn trusted_schema_path() -> Result<PathBuf, AppError> {
72    let cache = crate::paths::AppPaths::resolve(None)
73        .map(|p| p.models.parent().map(|m| m.to_path_buf()))
74        .ok()
75        .flatten()
76        .unwrap_or_else(std::env::temp_dir);
77    std::fs::create_dir_all(&cache).map_err(AppError::Io)?;
78    Ok(cache.join(format!("enrich-schema-{}.json", std::process::id())))
79}
80
81/// Models accepted by Codex CLI when using ChatGPT Pro OAuth.
82///
83/// Mirrored from `~/.codex/models_cache.json` (which the official CLI
84/// refreshes on every login). This list is intentionally narrow; passing
85/// a model not in this set with `--mode codex` returns
86/// `AppError::Validation` BEFORE any OAuth turn is spent.
87pub const CODEX_PRO_OAUTH_MODELS: &[&str] = &[
88    "codex-auto-review",
89    "gpt-5.3-codex-spark",
90    "gpt-5.4",
91    "gpt-5.4-mini",
92    "gpt-5.5",
93];
94
95/// Validates the requested model against [`CODEX_PRO_OAUTH_MODELS`].
96///
97/// # Errors
98/// Returns [`AppError::Validation`] listing the accepted models when the
99/// caller supplied a model outside the whitelist.
100pub fn validate_codex_model(model: Option<&str>) -> Result<(), AppError> {
101    let Some(m) = model else {
102        return Ok(()); // no override; codex picks its default
103    };
104    if CODEX_PRO_OAUTH_MODELS.contains(&m) {
105        Ok(())
106    } else {
107        Err(AppError::Validation(format!(
108            "--codex-model {m:?} is not supported with ChatGPT Pro OAuth. \
109             Accepted: {}",
110            CODEX_PRO_OAUTH_MODELS.join(", ")
111        )))
112    }
113}
114
115/// Returns the list of models accepted by Codex with ChatGPT Pro OAuth.
116///
117/// Tries to read `~/.codex/models_cache.json` (which the official CLI
118/// refreshes on every login) and falls back to the static
119/// [`CODEX_PRO_OAUTH_MODELS`] constant when the file is missing or
120/// malformed. The returned `Vec<String>` is the union of both sources,
121/// de-duplicated.
122///
123/// The official cache file is an object with the shape
124/// `{"fetched_at": "...", "etag": "...", "client_version": "...",
125/// "models": [{"slug": "gpt-5.5", ...}, ...]}` (v1.0.81 fix: previously we
126/// iterated `obj.keys()` which produced bogus entries like `client_version`
127/// and `etag` as "models"; now we extract only the `models` array).
128pub fn list_codex_models() -> Vec<String> {
129    use std::collections::BTreeSet;
130    let mut out: BTreeSet<String> = CODEX_PRO_OAUTH_MODELS
131        .iter()
132        .map(|s| s.to_string())
133        .collect();
134
135    if let Some(home) = std::env::var_os("HOME") {
136        let path = std::path::Path::new(&home)
137            .join(".codex")
138            .join("models_cache.json");
139        if let Ok(content) = std::fs::read_to_string(&path) {
140            if let Ok(value) = serde_json::from_str::<serde_json::Value>(&content) {
141                if let Some(obj) = value.as_object() {
142                    // v1.0.81 fix: prefer the well-known `models` array
143                    // (each item has a `slug` field). Fall back to keys
144                    // only when `models` is absent (legacy cache format).
145                    if let Some(models_arr) = obj.get("models").and_then(|m| m.as_array()) {
146                        for v in models_arr {
147                            if let Some(slug) = v.get("slug").and_then(|s| s.as_str()) {
148                                out.insert(slug.to_string());
149                            } else if let Some(s) = v.as_str() {
150                                out.insert(s.to_string());
151                            }
152                        }
153                    } else {
154                        for key in obj.keys() {
155                            out.insert(key.clone());
156                        }
157                    }
158                } else if let Some(arr) = value.as_array() {
159                    for v in arr {
160                        if let Some(s) = v.as_str() {
161                            out.insert(s.to_string());
162                        }
163                    }
164                }
165            }
166        }
167    }
168    out.into_iter().collect()
169}
170
171/// Suggests the closest codex OAuth model to a user-supplied substring
172/// (G33). Returns `None` when no candidate is close enough.
173///
174/// Match strategy: exact substring containment wins; otherwise Levenshtein
175/// distance below `max_distance = max(2, query.len() / 3)`.
176pub fn suggest_codex_model(query: &str) -> Option<String> {
177    let query_lc = query.to_ascii_lowercase();
178    let models = list_codex_model_lc();
179
180    // Exact substring match wins.
181    for m in &models {
182        if m.contains(&query_lc) {
183            return Some(m.clone());
184        }
185    }
186
187    // Levenshtein fallback.
188    let max_distance = (query.len() / 3).max(2);
189    let mut best: Option<(usize, String)> = None;
190    for m in &models {
191        let d = levenshtein(query_lc.as_str(), m.as_str());
192        if d <= max_distance && best.as_ref().is_none_or(|(bd, _)| d < *bd) {
193            best = Some((d, m.clone()));
194        }
195    }
196    best.map(|(_, m)| m)
197}
198
199fn list_codex_model_lc() -> Vec<String> {
200    list_codex_models()
201        .into_iter()
202        .map(|s| s.to_ascii_lowercase())
203        .collect()
204}
205
206fn levenshtein(a: &str, b: &str) -> usize {
207    let a_chars: Vec<char> = a.chars().collect();
208    let b_chars: Vec<char> = b.chars().collect();
209    if a_chars.is_empty() {
210        return b_chars.len();
211    }
212    if b_chars.is_empty() {
213        return a_chars.len();
214    }
215    let mut prev: Vec<usize> = (0..=b_chars.len()).collect();
216    let mut curr = vec![0; b_chars.len() + 1];
217    for (i, &ac) in a_chars.iter().enumerate() {
218        curr[0] = i + 1;
219        for (j, &bc) in b_chars.iter().enumerate() {
220            let cost = if ac == bc { 0 } else { 1 };
221            curr[j + 1] = (curr[j] + 1).min(prev[j + 1] + 1).min(prev[j] + cost);
222        }
223        std::mem::swap(&mut prev, &mut curr);
224    }
225    prev[b_chars.len()]
226}
227
228/// Builds the `codex exec` command with the canonical hardening flags.
229///
230/// G31 + OAuth-only hardening (v1.0.69, mandated by gaps.md lines 41-49):
231/// the command ALWAYS uses the OAuth `auth.json` flow. The flag set is
232/// the canonical one documented in gaps.md Fix A:
233///
234/// ```text
235/// codex exec \
236///   -c mcp_servers='{}' \
237///   --json --output-schema <SCHEMA> \
238///   --ephemeral \
239///   --skip-git-repo-check \
240///   --sandbox read-only \
241///   --ignore-user-config \
242///   --ignore-rules \
243///   --ask-for-approval never \
244///   -m <MODEL> \
245///   -
246/// ```
247///
248/// The combination zeroes MCP servers (via two complementary mechanisms:
249/// the inline `-c mcp_servers='{}'` override AND `--ignore-user-config`),
250/// disables user-defined rules, and never asks for interactive approval.
251///
252/// **`OPENAI_API_KEY` is FORBIDDEN** in the spawned environment (gaps.md:48).
253/// OAuth flows via `~/.codex/auth.json` and `CODEX_ACCESS_TOKEN` only.
254pub fn build_codex_command(args: &CodexSpawnArgs<'_>) -> Result<Command, crate::errors::AppError> {
255    let full_prompt = format!("{}\n\n{}", args.prompt, args.input_text);
256
257    // OAuth-only guard (gaps.md:48, ADR-0011). If `OPENAI_API_KEY` is set
258    // in the environment we MUST abort — that is the API-key path which is
259    // explicitly PROHIBITED. Use the OAuth `auth.json` flow exclusively.
260    if let Ok(_key) = std::env::var("OPENAI_API_KEY") {
261        let mut cmd = Command::new("false");
262        cmd.env_clear();
263        cmd.env("PATH", "/nonexistent");
264        cmd.arg("--oauth-only-violation-openai-api-key-set");
265        cmd.arg("--oauth-only-resolution-use-codex-auth-json-or-openai-base-url");
266        return Ok(cmd);
267    }
268
269    // Write the JSON schema to a path the caller controls. Callers should
270    // pass a path under the cache dir (see [`trusted_schema_path`]).
271    std::fs::write(&args.schema_path, args.json_schema).ok();
272
273    let mut cmd = Command::new(args.binary);
274    // v1.0.83 (ADR-0041): env whitelist delegated to the shared helper.
275    // `OPENAI_API_KEY` is INTENTIONALLY ABSENT (defence-in-depth).
276    // `CODEX_ACCESS_TOKEN` and `OPENAI_BASE_URL` ARE whitelisted for
277    // custom providers via the canonical list in src/spawn/env_whitelist.rs.
278    apply_env_whitelist(&mut cmd, crate::spawn::env_whitelist::is_strict_env_clear());
279    crate::spawn::apply_cwd_isolation(&mut cmd)?;
280
281    // v1.0.77: point CODEX_HOME at an isolated dir that only contains
282    // auth.json — this prevents the codex subprocess from loading
283    // ~/.codex/config.toml (which has trust_level=trusted for the project,
284    // causing sandbox escalation per openai/codex#18113).
285    if let Some(isolated) = prepare_isolated_codex_home_spawn() {
286        cmd.env("CODEX_HOME", isolated);
287    }
288
289    // v1.0.77: `-c` TOML overrides bypass the codex exec --sandbox propagation
290    // bug (openai/codex#18113). CLI flags alone are insufficient — the exec
291    // subcommand may not inherit --sandbox from the parent codex command.
292    cmd.arg("exec")
293        .arg("-c")
294        .arg("sandbox_mode='read-only'")
295        .arg("-c")
296        .arg("approval_policy='never'")
297        .arg("--json")
298        .arg("--output-schema")
299        .arg(&args.schema_path)
300        .arg("--ephemeral")
301        .arg("--skip-git-repo-check")
302        .arg("--sandbox")
303        .arg("read-only")
304        .arg("--ignore-user-config")
305        .arg("--ignore-rules");
306
307    // Codex 0.134+ no longer accepts `-c mcp_servers='{}'` — it parses the
308    // value as a string and rejects it ("expected a map"). The
309    // `--ignore-user-config` flag already discards any user-defined MCP
310    // servers, so the override is redundant on all supported versions.
311
312    // Codex 0.134+ removed --ask-for-approval entirely (Issue #26602).
313    // Skip the flag on newer versions; sandbox=read-only already suppresses
314    // approval prompts. See src/extract/codex_compat.rs for the probe.
315    if codex_supports_ask_for_approval() {
316        cmd.arg("--ask-for-approval").arg("never");
317    }
318
319    if let Some(m) = args.model {
320        cmd.arg("-m").arg(m);
321    }
322
323    // `-` means: read the prompt from stdin (Codex Paperclip pattern)
324    cmd.arg("-");
325
326    cmd.stdin(Stdio::piped())
327        .stdout(Stdio::piped())
328        .stderr(Stdio::piped());
329    // Keep the prompt alive for the stdin thread spawned in `spawn_codex`.
330    let _ = full_prompt; // captured by closure below
331
332    // GAP-META-005 (v1.0.87, ADR-0045): pre-flight validation gate runs
333    // AFTER argv is fully built. Validates binary existence, argv size,
334    // walk-up of `.mcp.json`, and `CLAUDE_CONFIG_DIR` cleanliness.
335    // Pre-flight failure aborts the spawn with exit 16 — see ADR-0045.
336    let argv_refs: Vec<std::ffi::OsString> = cmd.get_args().map(|s| s.to_os_string()).collect();
337    let preflight_args = crate::spawn::preflight::PreFlightArgs {
338        binary_path: args.binary,
339        argv: &argv_refs,
340        workspace_root: std::path::Path::new("."),
341        mcp_config_inline_json: None, // Codex does not use --mcp-config flag
342        expected_output_bytes: 65_536,
343        spawner_name: "codex_spawn",
344    };
345    if let Err(e) = crate::spawn::preflight::preflight_check(&preflight_args) {
346        // v1.0.88 (BUG-6 fix, ADR-0046): propagate the structured
347        // `PreFlightError` via the `From` impl in `errors.rs` so callers
348        // receive `AppError::PreFlightFailed` (exit 16) instead of a
349        // bare `std::process::exit(16)` that discards the variant name,
350        // tracing context, and PT-BR i18n.
351        return Err(crate::errors::AppError::from(e));
352    }
353
354    Ok(cmd)
355}
356
357/// Parses JSONL output from `codex exec --json`.
358///
359/// Event format (DOTS notation):
360/// - `thread.started` — session init
361/// - `turn.started` — model turn begins
362/// - `item.completed` — message or tool call; last `agent_message` wins
363/// - `turn.completed` — includes usage stats
364/// - `turn.failed` — error with optional rate-limit indicator
365/// - `error` — schema or validation error
366///
367/// G32 (v1.0.69): this function is the single source of truth for JSONL
368/// parsing. Both `enrich` and `ingest --mode codex` consume it.
369pub fn parse_codex_jsonl(stdout: &str) -> Result<CodexResult, AppError> {
370    let mut last_agent_text: Option<String> = None;
371    let mut usage: Option<CodexUsage> = None;
372    let mut rate_limited = false;
373    let mut schema_error = false;
374    let mut turn_failed = false;
375    let mut failed_message = String::new();
376
377    for line in stdout.lines() {
378        let line = line.trim();
379        if line.is_empty() {
380            continue;
381        }
382
383        let event: serde_json::Value = match serde_json::from_str(line) {
384            Ok(v) => v,
385            Err(_) => {
386                tracing::warn!(target: "codex_spawn", line, "skipping malformed JSONL line");
387                continue;
388            }
389        };
390
391        let event_type = match event.get("type").and_then(|t| t.as_str()) {
392            Some(t) => t,
393            None => continue,
394        };
395
396        match event_type {
397            "item.completed" => {
398                if let Some(item) = event.get("item") {
399                    if item.get("type").and_then(|t| t.as_str()) == Some("agent_message") {
400                        if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
401                            last_agent_text = Some(text.to_string());
402                        }
403                    }
404                }
405            }
406            "turn.completed" => {
407                if let Some(u) = event.get("usage") {
408                    // Skip events that lack the recognised token fields
409                    // (e.g. partial broadcasts with `{}`) so the last
410                    // populated usage wins instead of being overwritten
411                    // by an empty one.
412                    let is_populated = u
413                        .get("input_tokens")
414                        .and_then(|v| v.as_u64())
415                        .map(|n| n > 0)
416                        .unwrap_or(false)
417                        || u.get("output_tokens")
418                            .and_then(|v| v.as_u64())
419                            .map(|n| n > 0)
420                            .unwrap_or(false);
421                    if is_populated {
422                        if let Ok(parsed) = serde_json::from_value::<CodexUsage>(u.clone()) {
423                            usage = Some(parsed);
424                        }
425                    }
426                }
427            }
428            "turn.failed" => {
429                turn_failed = true;
430                if let Some(err) = event.get("error") {
431                    let msg = err
432                        .get("message")
433                        .and_then(|m| m.as_str())
434                        .unwrap_or("unknown error");
435                    failed_message = msg.to_string();
436                    if msg.contains("rate_limit")
437                        || msg.contains("429")
438                        || msg.contains("Too Many Requests")
439                    {
440                        rate_limited = true;
441                    }
442                }
443            }
444            "error" => {
445                if let Some(msg) = event.get("message").and_then(|m| m.as_str()) {
446                    if msg.contains("invalid_json_schema") || msg.contains("schema") {
447                        schema_error = true;
448                    }
449                }
450            }
451            _ => {}
452        }
453    }
454
455    let text = last_agent_text.ok_or_else(|| {
456        AppError::Validation(format!(
457            "no agent_message in codex JSONL output (rate_limited={rate_limited}, schema_error={schema_error}, turn_failed={turn_failed})"
458        ))
459    })?;
460
461    if turn_failed {
462        return Err(AppError::Validation(format!(
463            "codex turn failed: {failed_message}"
464        )));
465    }
466    if schema_error {
467        return Err(AppError::Validation(
468            "codex reported invalid_json_schema; check the --output-schema file".to_string(),
469        ));
470    }
471    if rate_limited {
472        return Err(AppError::Validation(format!(
473            "codex rate-limited: {failed_message}"
474        )));
475    }
476
477    let extraction = parse_extraction_text(&text)?;
478    Ok(CodexResult {
479        extraction,
480        last_agent_text: text,
481        usage,
482        rate_limited,
483        schema_error,
484        turn_failed,
485        failed_message,
486    })
487}
488
489/// Parses the agent_message text as an `ExtractionResult` JSON payload.
490///
491/// The schema is shared by both `enrich` and `ingest --mode codex`; the
492/// `text` is the JSON value the assistant returned, not a wrapper object.
493pub fn parse_extraction_text(text: &str) -> Result<ExtractionResult, AppError> {
494    let value: serde_json::Value = serde_json::from_str(text).map_err(|e| {
495        AppError::Validation(format!("failed to parse codex agent_message as JSON: {e}"))
496    })?;
497    let obj = value.as_object().ok_or_else(|| {
498        AppError::Validation("codex agent_message is not a JSON object".to_string())
499    })?;
500
501    let mut entities: Vec<NewEntity> = Vec::new();
502    if let Some(arr) = obj.get("entities").and_then(|v| v.as_array()) {
503        for e in arr {
504            if let Some(name) = e.get("name").and_then(|v| v.as_str()) {
505                // Accept either "type" or "entity_type" from the LLM payload
506                // and fall back to "concept" when the LLM omits it.
507                let entity_type_str = e
508                    .get("type")
509                    .or_else(|| e.get("entity_type"))
510                    .and_then(|v| v.as_str())
511                    .unwrap_or("concept");
512                // GAP-SG-47: fold non-canonical labels onto the nearest
513                // canonical kind (preserves aliases/case instead of collapsing
514                // every miss straight to concept).
515                let entity_type = crate::entity_type::EntityType::map_to_canonical(entity_type_str);
516                entities.push(NewEntity {
517                    name: name.to_string(),
518                    entity_type,
519                    description: None,
520                });
521            }
522        }
523    }
524
525    let mut relationships: Vec<NewRelationship> = Vec::new();
526    if let Some(arr) = obj.get("relationships").and_then(|v| v.as_array()) {
527        for r in arr {
528            let from = r.get("source").or_else(|| r.get("from"));
529            let to = r.get("target").or_else(|| r.get("to"));
530            let rel = r.get("relation").and_then(|v| v.as_str());
531            if let (Some(from_v), Some(to_v), Some(rel_v)) = (
532                from.and_then(|v| v.as_str()),
533                to.and_then(|v| v.as_str()),
534                rel,
535            ) {
536                relationships.push(NewRelationship {
537                    source: from_v.to_string(),
538                    target: to_v.to_string(),
539                    // GAP-SG-48: rewrite non-canonical relations to canonical.
540                    relation: crate::parsers::map_to_canonical_relation(rel_v),
541                    strength: r.get("strength").and_then(|v| v.as_f64()).unwrap_or(0.5),
542                    description: None,
543                });
544            }
545        }
546    }
547
548    let urls: Vec<ExtractedUrl> = obj
549        .get("urls")
550        .and_then(|v| v.as_array())
551        .map(|arr| {
552            arr.iter()
553                .filter_map(|u| {
554                    let url = u.get("url")?.as_str()?.to_string();
555                    let start = u.get("start").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
556                    let end = u
557                        .get("end")
558                        .and_then(|v| v.as_u64())
559                        .unwrap_or(start as u64) as usize;
560                    Some(ExtractedUrl { url, start, end })
561                })
562                .collect()
563        })
564        .unwrap_or_default();
565
566    // v1.0.76: ExtractionResult no longer carries relationships or
567    // relationships_truncated fields; those are LLM backend output
568    // (see `ExtractionOutput` in src/extract/mod.rs). The default
569    // build extracts URLs + entities only; relationships are an
570    // LLM-side concern.
571    //
572    // Convert `NewEntity` (storage-side) to `ExtractedEntity`
573    // (extraction-side). The LLM payload doesn't include byte offsets
574    // (the chunker is responsible for that), so start/end are 0.
575    let entities_ext: Vec<crate::extraction::ExtractedEntity> = entities
576        .into_iter()
577        .map(|e| crate::extraction::ExtractedEntity {
578            name: e.name,
579            entity_type: e.entity_type.as_str().to_string(),
580            start: 0,
581            end: 0,
582        })
583        .collect();
584
585    Ok(ExtractionResult {
586        entities: entities_ext,
587        urls,
588        elapsed_ms: 0,
589    })
590}
591
592fn prepare_isolated_codex_home_spawn() -> Option<std::path::PathBuf> {
593    let home = std::env::var("HOME").ok()?;
594    let real_auth = std::path::Path::new(&home).join(".codex/auth.json");
595    if !real_auth.exists() {
596        return None;
597    }
598    let isolated =
599        std::env::temp_dir().join(format!("sqlite-graphrag-codex-home-{}", std::process::id()));
600    let _ = std::fs::create_dir_all(&isolated);
601    let target = isolated.join("auth.json");
602    if !target.exists() {
603        let _ = std::fs::copy(&real_auth, &target);
604    }
605    Some(isolated)
606}
607
608#[cfg(test)]
609mod tests {
610    use super::*;
611
612    const SAMPLE_JSONL: &str = r#"{"type":"thread.started","thread_id":"abc"}
613{"type":"turn.started"}
614{"type":"item.completed","item":{"type":"reasoning","text":"thinking"}}
615{"type":"item.completed","item":{"type":"agent_message","text":"{\"entities\":[{\"name\":\"alpha\",\"type\":\"concept\"}],\"relationships\":[{\"source\":\"alpha\",\"target\":\"beta\",\"relation\":\"uses\",\"strength\":0.7}],\"extraction_method\":\"codex\",\"urls\":[]}"}}
616{"type":"turn.completed","usage":{"input_tokens":120,"output_tokens":45}}
617{"type":"turn.completed","usage":{}}
618"#;
619
620    #[test]
621    fn parse_codex_jsonl_extracts_last_agent_message() {
622        // v1.0.76: relationships are no longer carried in the
623        // ExtractionResult struct (they belong to the LLM ExtractionBackend
624        // payload, not the URL-only default build). The default test
625        // validates the entity extraction path only.
626        let result = parse_codex_jsonl(SAMPLE_JSONL).expect("parse must succeed");
627        assert_eq!(result.extraction.entities.len(), 1);
628        assert_eq!(result.extraction.entities[0].name, "alpha");
629    }
630
631    #[test]
632    fn parse_codex_jsonl_collects_usage() {
633        let result = parse_codex_jsonl(SAMPLE_JSONL).expect("parse must succeed");
634        let usage = result.usage.expect("usage must be populated");
635        assert_eq!(usage.input_tokens, 120);
636        assert_eq!(usage.output_tokens, 45);
637    }
638
639    #[test]
640    fn parse_codex_jsonl_detects_rate_limit() {
641        let r = parse_codex_jsonl(
642            "{\"type\":\"turn.failed\",\"error\":{\"message\":\"rate_limit: 429 too many\"}}\n{\"type\":\"item.completed\",\"item\":{\"type\":\"agent_message\",\"text\":\"{}\"}}",
643        );
644        assert!(matches!(r, Err(AppError::Validation(_))));
645    }
646
647    #[test]
648    fn parse_codex_jsonl_handles_no_agent_message() {
649        let r = parse_codex_jsonl("{\"type\":\"thread.started\"}");
650        assert!(matches!(r, Err(AppError::Validation(_))));
651    }
652
653    #[test]
654    fn parse_codex_jsonl_skips_malformed_lines() {
655        let r = parse_codex_jsonl(
656            "{not valid json\n{\"type\":\"item.completed\",\"item\":{\"type\":\"agent_message\",\"text\":\"{\\\"entities\\\":[],\\\"relationships\\\":[],\\\"extraction_method\\\":\\\"codex\\\"}\"}}",
657        );
658        assert!(r.is_ok(), "malformed lines must be skipped, got {r:?}");
659    }
660
661    #[test]
662    fn validate_codex_model_accepts_known() {
663        assert!(validate_codex_model(Some("gpt-5.5")).is_ok());
664        assert!(validate_codex_model(Some("gpt-5.4")).is_ok());
665        assert!(validate_codex_model(None).is_ok()); // no override
666    }
667
668    #[test]
669    fn validate_codex_model_rejects_unknown() {
670        let err = validate_codex_model(Some("gpt-4")).unwrap_err();
671        let msg = format!("{err}");
672        assert!(msg.contains("not supported"));
673        assert!(msg.contains("gpt-5.5"));
674    }
675
676    #[test]
677    fn list_codex_models_includes_all_static_whitelist() {
678        let models = list_codex_models();
679        for m in CODEX_PRO_OAUTH_MODELS {
680            assert!(models.contains(&m.to_string()), "missing {m} in {models:?}");
681        }
682    }
683
684    #[test]
685    fn suggest_codex_model_substring_match() {
686        let s = suggest_codex_model("gpt-5");
687        assert!(s.is_some(), "must suggest a gpt-5.x model");
688    }
689
690    #[test]
691    fn suggest_codex_model_fuzzy_match() {
692        // 'gpt5.5' has no hyphen; should still suggest 'gpt-5.5'.
693        let s = suggest_codex_model("gpt5.5");
694        assert!(s.is_some(), "fuzzy must suggest gpt-5.5 for 'gpt5.5'");
695        assert_eq!(s.unwrap(), "gpt-5.5");
696    }
697
698    #[test]
699    fn suggest_codex_model_unrelated_returns_none() {
700        let s = suggest_codex_model("totally-unrelated-zzz");
701        assert!(s.is_none());
702    }
703
704    #[test]
705    fn build_codex_command_includes_hardening_flags() {
706        // RC-14 (v1.0.98): `/bin/true` is rejected by the preflight existence
707        // check under the macOS runner sandbox. Use the running test binary,
708        // which always exists and is executable on every platform.
709        let self_exe = std::env::current_exe().expect("current exe path");
710        let args = CodexSpawnArgs {
711            binary: &self_exe,
712            prompt: "p",
713            json_schema: "{}",
714            input_text: "i",
715            model: Some("gpt-5.5"),
716            timeout_secs: 60,
717            schema_path: std::env::temp_dir().join("test-schema.json"),
718        };
719        let cmd = build_codex_command(&args).expect("preflight gate accepts valid args");
720        let collected: Vec<String> = cmd
721            .get_args()
722            .filter_map(|a| a.to_str().map(|s| s.to_string()))
723            .collect();
724        for required in &[
725            "exec",
726            "-c",
727            "sandbox_mode='read-only'",
728            "approval_policy='never'",
729            "--json",
730            "--output-schema",
731            "--ephemeral",
732            "--skip-git-repo-check",
733            "--sandbox",
734            "read-only",
735            "--ignore-user-config",
736            "--ignore-rules",
737            "-m",
738            "gpt-5.5",
739            "-",
740        ] {
741            assert!(
742                collected.iter().any(|a| a == required),
743                "missing flag {required} in {collected:?}"
744            );
745        }
746    }
747
748    #[test]
749    fn list_codex_models_dedupes_with_cache_file() {
750        // Ensure the union with the cache file (when present) does not
751        // produce duplicates. We can't actually write a cache file in
752        // a test, so we just verify the static path is dedup'd.
753        let models = list_codex_models();
754        let unique: std::collections::HashSet<_> = models.iter().collect();
755        assert_eq!(unique.len(), models.len(), "list_codex_models must dedupe");
756    }
757    #[test]
758    fn list_codex_models_extracts_from_models_array_v1_0_81_regression() {
759        // v1.0.81 fix: the official codex CLI writes
760        //   {"fetched_at": "...", "etag": "...", "client_version": "...",
761        //    "models": [{"slug": "gpt-5.5", ...}, ...]}
762        // and the old code iterated obj.keys(), polluting the model
763        // list with metadata keys. Here we simulate a cache file by
764        // setting HOME to a tempdir containing a synthetic cache and
765        // verifying the metadata keys are NOT present in the output.
766        let tmp =
767            std::env::temp_dir().join(format!("codex-models-array-test-{}", std::process::id()));
768        std::fs::create_dir_all(tmp.join(".codex")).expect("mkdir");
769        let cache_body = r#"{
770            "fetched_at": "2026-06-14T06:43:56.639903114Z",
771            "etag": "W/\"deadbeef\"",
772            "client_version": "0.139.0",
773            "models": [
774                {"slug": "gpt-5.5", "display_name": "GPT-5.5"},
775                {"slug": "gpt-5.4-mini", "display_name": "GPT-5.4 mini"}
776            ]
777        }"#;
778        std::fs::write(tmp.join(".codex/models_cache.json"), cache_body).expect("write cache");
779        // SAFETY: unit test
780        let prev_home = std::env::var("HOME");
781        unsafe {
782            std::env::set_var("HOME", &tmp);
783        }
784        let models = list_codex_models();
785        unsafe {
786            if let Ok(h) = prev_home {
787                std::env::set_var("HOME", h);
788            } else {
789                std::env::remove_var("HOME");
790            }
791        }
792        let _ = std::fs::remove_dir_all(&tmp);
793
794        for forbidden in &["client_version", "etag", "fetched_at", "models"] {
795            assert!(
796                !models.contains(&forbidden.to_string()),
797                "metadata key {forbidden:?} leaked into model list: {models:?}"
798            );
799        }
800        assert!(
801            models.contains(&"gpt-5.5".to_string()),
802            "gpt-5.5 missing from extracted list: {models:?}"
803        );
804        assert!(
805            models.contains(&"gpt-5.4-mini".to_string()),
806            "gpt-5.4-mini missing from extracted list: {models:?}"
807        );
808    }
809
810    #[test]
811    fn list_codex_models_falls_back_to_keys_when_models_field_absent() {
812        // Legacy cache shape: keys are model ids directly (no models
813        // array). v1.0.81 must still merge those keys into the result.
814        let tmp =
815            std::env::temp_dir().join(format!("codex-models-legacy-test-{}", std::process::id()));
816        std::fs::create_dir_all(tmp.join(".codex")).expect("mkdir");
817        let cache_body = r#"{"legacy-model-x": 1, "legacy-model-y": 2}"#;
818        std::fs::write(tmp.join(".codex/models_cache.json"), cache_body).expect("write cache");
819        let prev_home = std::env::var("HOME");
820        unsafe {
821            std::env::set_var("HOME", &tmp);
822        }
823        let models = list_codex_models();
824        unsafe {
825            if let Ok(h) = prev_home {
826                std::env::set_var("HOME", h);
827            } else {
828                std::env::remove_var("HOME");
829            }
830        }
831        let _ = std::fs::remove_dir_all(&tmp);
832
833        assert!(
834            models.contains(&"legacy-model-x".to_string()),
835            "legacy-model-x missing: {models:?}"
836        );
837        assert!(
838            models.contains(&"legacy-model-y".to_string()),
839            "legacy-model-y missing: {models:?}"
840        );
841    }
842
843    /// OAuth-only conformance test (gaps.md:41-49, v1.0.69 mandate).
844    /// Verifies that `build_codex_command` always emits `-c mcp_servers='{}'`,
845    /// `--ignore-user-config`, `--ask-for-approval never` and does NOT
846    /// whitelist `OPENAI_API_KEY` in the env_clear whitelist.
847    #[test]
848    #[serial_test::serial(env)]
849    fn build_command_oauth_only_mandatory_flags() {
850        // SAFETY: unit test
851        unsafe {
852            std::env::remove_var("OPENAI_API_KEY");
853        }
854        let schema = std::env::temp_dir().join("codex-test-schema.json");
855        let _ = std::fs::remove_file(&schema);
856        let args = CodexSpawnArgs {
857            binary: std::path::Path::new("/usr/bin/false"),
858            prompt: "p",
859            json_schema: "{}",
860            input_text: "i",
861            model: Some("gpt-5.4-mini"),
862            timeout_secs: 60,
863            schema_path: schema.clone(),
864        };
865        let cmd = build_codex_command(&args).expect("preflight gate accepts valid args");
866        let argv: Vec<&str> = cmd.get_args().filter_map(|a| a.to_str()).collect();
867        // Mandatory flags from gaps.md lines 233-238.
868        // -c mcp_servers='{}' was REMOVED in v1.0.76 — codex 0.134+ parses
869        // the value as a string and rejects it ("expected a map"). The
870        // --ignore-user-config flag already covers the MCP isolation
871        // requirement.
872        assert!(
873            argv.contains(&"--ignore-user-config"),
874            "must have --ignore-user-config (gaps.md:266)"
875        );
876        // --ask-for-approval is conditional on codex < 0.134. When the
877        // installed codex is 0.134+ the flag is omitted by the compat
878        // helper. Both outcomes are valid.
879        let ask_for_approval_present = argv.contains(&"--ask-for-approval");
880        if !crate::extract::codex_compat::codex_supports_ask_for_approval() {
881            assert!(
882                !ask_for_approval_present,
883                "codex 0.134+ must NOT include --ask-for-approval"
884            );
885        }
886        assert!(
887            argv.contains(&"--sandbox"),
888            "must have --sandbox read-only (G31)"
889        );
890        assert!(argv.contains(&"--ephemeral"), "must have --ephemeral (G31)");
891        assert!(
892            argv.contains(&"--skip-git-repo-check"),
893            "must have --skip-git-repo-check (G31)"
894        );
895        assert!(
896            argv.contains(&"--ignore-rules"),
897            "must have --ignore-rules (G31)"
898        );
899        // v1.0.77: -c TOML overrides bypass codex exec --sandbox bug (#18113)
900        assert!(
901            argv.contains(&"-c") && argv.contains(&"sandbox_mode='read-only'"),
902            "must have -c sandbox_mode='read-only' (v1.0.77, codex#18113)"
903        );
904        assert!(
905            argv.contains(&"approval_policy='never'"),
906            "must have -c approval_policy='never' (v1.0.77)"
907        );
908    }
909
910    /// OAuth-only guard: when `OPENAI_API_KEY` is in the environment,
911    /// `build_codex_command` MUST abort the spawn (return a `false`
912    /// command), NOT pass the key through to the child.
913    #[test]
914    #[serial_test::serial(env)]
915    fn build_command_aborts_when_openai_api_key_set() {
916        // SAFETY: unit test
917        unsafe {
918            std::env::set_var("OPENAI_API_KEY", "sk-violation-test");
919        }
920        let schema = std::env::temp_dir().join("codex-test-schema-abort.json");
921        let _ = std::fs::remove_file(&schema);
922        let args = CodexSpawnArgs {
923            binary: std::path::Path::new("/usr/bin/codex"),
924            prompt: "p",
925            json_schema: "{}",
926            input_text: "i",
927            model: Some("gpt-5.4-mini"),
928            timeout_secs: 60,
929            schema_path: schema.clone(),
930        };
931        let cmd = build_codex_command(&args).expect("preflight gate accepts valid args");
932        let program = cmd.get_program().to_string_lossy().to_string();
933        let argv: Vec<&str> = cmd.get_args().filter_map(|a| a.to_str()).collect();
934        assert_eq!(
935            program, "false",
936            "when OPENAI_API_KEY is set, build_codex_command must abort"
937        );
938        assert!(
939            argv.contains(&"--oauth-only-violation-openai-api-key-set"),
940            "aborted command must carry violation marker"
941        );
942        unsafe {
943            std::env::remove_var("OPENAI_API_KEY");
944        }
945    }
946}