Skip to main content

sqlite_graphrag/commands/
codex_spawn.rs

1//! Codex CLI spawn + JSONL parsing helper shared by `enrich` and `ingest --mode codex`.
2//!
3//! G31 (v1.0.69): `enrich --mode codex` was missing five critical hardening
4//! flags compared to `ingest --mode codex`. This module extracts the
5//! spawn pipeline into a single helper that BOTH call-sites consume,
6//! guaranteeing the same defaults everywhere.
7//!
8//! G32 (v1.0.69): `enrich --mode codex` used `serde_json::from_str` on the
9//! raw stdout, but `codex exec --json` emits JSONL (one event per line).
10//! [`parse_codex_jsonl`] iterates line-by-line, picking the last
11//! `item.completed` of type `agent_message` as the assistant text.
12//!
13//! G33 (v1.0.69): validate the model against the ChatGPT Pro OAuth whitelist
14//! stored in `~/.codex/models_cache.json` BEFORE spawning the subprocess.
15
16use crate::errors::AppError;
17use crate::extraction::{ExtractedUrl, ExtractionResult};
18use crate::storage::entities::{NewEntity, NewRelationship};
19use serde::{Deserialize, Serialize};
20use std::path::{Path, PathBuf};
21use std::process::{Command, Stdio};
22
23/// Token usage reported by Codex on `turn.completed` events.
24#[derive(Debug, Clone, Default, Deserialize, Serialize)]
25pub struct CodexUsage {
26    #[serde(default)]
27    pub input_tokens: u64,
28    #[serde(default)]
29    pub cached_input_tokens: u64,
30    #[serde(default)]
31    pub output_tokens: u64,
32    #[serde(default)]
33    pub reasoning_output_tokens: u64,
34}
35
36/// Combined result of one `codex exec` invocation.
37#[derive(Debug)]
38pub struct CodexResult {
39    pub extraction: ExtractionResult,
40    pub usage: Option<CodexUsage>,
41    pub rate_limited: bool,
42    pub schema_error: bool,
43    pub turn_failed: bool,
44    pub failed_message: String,
45}
46
47/// Configuration for the codex spawner.
48#[allow(rustdoc::broken_intra_doc_links)]
49pub struct CodexSpawnArgs<'a> {
50    pub binary: &'a Path,
51    pub prompt: &'a str,
52    pub json_schema: &'a str,
53    pub input_text: &'a str,
54    pub model: Option<&'a str>,
55    pub timeout_secs: u64,
56    /// Caller-provided schema path (must be inside a trusted directory
57    /// that codex recognises as sandbox-safe). Use [`trusted_schema_path`]
58    /// to compute one under the cache dir.
59    pub schema_path: PathBuf,
60}
61
62/// Computes a schema path under the cache dir so `codex exec` accepts it
63/// as part of a trusted directory (rejects `/tmp` on hardened installs).
64pub fn trusted_schema_path() -> Result<PathBuf, AppError> {
65    let cache = crate::paths::AppPaths::resolve(None)
66        .map(|p| p.models.parent().map(|m| m.to_path_buf()))
67        .ok()
68        .flatten()
69        .unwrap_or_else(std::env::temp_dir);
70    std::fs::create_dir_all(&cache).map_err(AppError::Io)?;
71    Ok(cache.join(format!("enrich-schema-{}.json", std::process::id())))
72}
73
74/// Models accepted by Codex CLI when using ChatGPT Pro OAuth.
75///
76/// Mirrored from `~/.codex/models_cache.json` (which the official CLI
77/// refreshes on every login). This list is intentionally narrow; passing
78/// a model not in this set with `--mode codex` returns
79/// `AppError::Validation` BEFORE any OAuth turn is spent.
80pub const CODEX_PRO_OAUTH_MODELS: &[&str] = &[
81    "codex-auto-review",
82    "gpt-5.3-codex-spark",
83    "gpt-5.4",
84    "gpt-5.4-mini",
85    "gpt-5.5",
86];
87
88/// Validates the requested model against [`CODEX_PRO_OAUTH_MODELS`].
89///
90/// # Errors
91/// Returns [`AppError::Validation`] listing the accepted models when the
92/// caller supplied a model outside the whitelist.
93pub fn validate_codex_model(model: Option<&str>) -> Result<(), AppError> {
94    let Some(m) = model else {
95        return Ok(()); // no override; codex picks its default
96    };
97    if CODEX_PRO_OAUTH_MODELS.contains(&m) {
98        Ok(())
99    } else {
100        Err(AppError::Validation(format!(
101            "--codex-model {m:?} is not supported with ChatGPT Pro OAuth. \
102             Accepted: {}",
103            CODEX_PRO_OAUTH_MODELS.join(", ")
104        )))
105    }
106}
107
108/// Returns the list of models accepted by Codex with ChatGPT Pro OAuth.
109///
110/// Tries to read `~/.codex/models_cache.json` (which the official CLI
111/// refreshes on every login) and falls back to the static
112/// [`CODEX_PRO_OAUTH_MODELS`] constant when the file is missing or
113/// malformed. The returned `Vec<String>` is the union of both sources,
114/// de-duplicated.
115pub fn list_codex_models() -> Vec<String> {
116    use std::collections::BTreeSet;
117    let mut out: BTreeSet<String> = CODEX_PRO_OAUTH_MODELS
118        .iter()
119        .map(|s| s.to_string())
120        .collect();
121
122    if let Some(home) = std::env::var_os("HOME") {
123        let path = std::path::Path::new(&home)
124            .join(".codex")
125            .join("models_cache.json");
126        if let Ok(content) = std::fs::read_to_string(&path) {
127            // The file is a JSON object whose keys are model ids.
128            // Use serde_json::Value to traverse safely without depending
129            // on a precise schema.
130            if let Ok(value) = serde_json::from_str::<serde_json::Value>(&content) {
131                if let Some(obj) = value.as_object() {
132                    for key in obj.keys() {
133                        out.insert(key.clone());
134                    }
135                } else if let Some(arr) = value.as_array() {
136                    for v in arr {
137                        if let Some(s) = v.as_str() {
138                            out.insert(s.to_string());
139                        }
140                    }
141                }
142            }
143        }
144    }
145    out.into_iter().collect()
146}
147
148/// Suggests the closest codex OAuth model to a user-supplied substring
149/// (G33). Returns `None` when no candidate is close enough.
150///
151/// Match strategy: exact substring containment wins; otherwise Levenshtein
152/// distance below `max_distance = max(2, query.len() / 3)`.
153pub fn suggest_codex_model(query: &str) -> Option<String> {
154    let query_lc = query.to_ascii_lowercase();
155    let models = list_codex_model_lc();
156
157    // Exact substring match wins.
158    for m in &models {
159        if m.contains(&query_lc) {
160            return Some(m.clone());
161        }
162    }
163
164    // Levenshtein fallback.
165    let max_distance = (query.len() / 3).max(2);
166    let mut best: Option<(usize, String)> = None;
167    for m in &models {
168        let d = levenshtein(query_lc.as_str(), m.as_str());
169        if d <= max_distance && best.as_ref().is_none_or(|(bd, _)| d < *bd) {
170            best = Some((d, m.clone()));
171        }
172    }
173    best.map(|(_, m)| m)
174}
175
176fn list_codex_model_lc() -> Vec<String> {
177    list_codex_models()
178        .into_iter()
179        .map(|s| s.to_ascii_lowercase())
180        .collect()
181}
182
183fn levenshtein(a: &str, b: &str) -> usize {
184    let a_chars: Vec<char> = a.chars().collect();
185    let b_chars: Vec<char> = b.chars().collect();
186    if a_chars.is_empty() {
187        return b_chars.len();
188    }
189    if b_chars.is_empty() {
190        return a_chars.len();
191    }
192    let mut prev: Vec<usize> = (0..=b_chars.len()).collect();
193    let mut curr = vec![0; b_chars.len() + 1];
194    for (i, &ac) in a_chars.iter().enumerate() {
195        curr[0] = i + 1;
196        for (j, &bc) in b_chars.iter().enumerate() {
197            let cost = if ac == bc { 0 } else { 1 };
198            curr[j + 1] = (curr[j] + 1).min(prev[j + 1] + 1).min(prev[j] + cost);
199        }
200        std::mem::swap(&mut prev, &mut curr);
201    }
202    prev[b_chars.len()]
203}
204
205/// Builds the `codex exec` command with the canonical hardening flags.
206///
207/// G31 + OAuth-only hardening (v1.0.69, mandated by gaps.md lines 41-49):
208/// the command ALWAYS uses the OAuth `auth.json` flow. The flag set is
209/// the canonical one documented in gaps.md Fix A:
210///
211/// ```text
212/// codex exec \
213///   -c mcp_servers='{}' \
214///   --json --output-schema <SCHEMA> \
215///   --ephemeral \
216///   --skip-git-repo-check \
217///   --sandbox read-only \
218///   --ignore-user-config \
219///   --ignore-rules \
220///   --ask-for-approval never \
221///   -m <MODEL> \
222///   -
223/// ```
224///
225/// The combination zeroes MCP servers (via two complementary mechanisms:
226/// the inline `-c mcp_servers='{}'` override AND `--ignore-user-config`),
227/// disables user-defined rules, and never asks for interactive approval.
228///
229/// **`OPENAI_API_KEY` is FORBIDDEN** in the spawned environment (gaps.md:48).
230/// OAuth flows via `~/.codex/auth.json` and `CODEX_ACCESS_TOKEN` only.
231pub fn build_codex_command(args: &CodexSpawnArgs<'_>) -> Command {
232    let full_prompt = format!("{}\n\n{}", args.prompt, args.input_text);
233
234    // OAuth-only guard (gaps.md:48). If `OPENAI_API_KEY` is set in the
235    // environment we MUST abort — that is the API-key path which is
236    // explicitly PROHIBITED. Use the OAuth `auth.json` flow exclusively.
237    if let Ok(_key) = std::env::var("OPENAI_API_KEY") {
238        let mut cmd = Command::new("false");
239        cmd.env_clear();
240        cmd.env("PATH", "/nonexistent");
241        cmd.arg("--oauth-only-violation-openai-api-key-set");
242        return cmd;
243    }
244
245    // Write the JSON schema to a path the caller controls. Callers should
246    // pass a path under the cache dir (see [`trusted_schema_path`]).
247    std::fs::write(&args.schema_path, args.json_schema).ok();
248
249    let mut cmd = Command::new(args.binary);
250    cmd.env_clear();
251    // OAuth flow: `CODEX_ACCESS_TOKEN` (Bearer) and `CODEX_HOME` (auth.json
252    // location) are whitelisted. `OPENAI_API_KEY` is INTENTIONALLY ABSENT.
253    for var in &[
254        "PATH",
255        "HOME",
256        "USER",
257        "SHELL",
258        "TERM",
259        "LANG",
260        "XDG_CONFIG_HOME",
261        "XDG_DATA_HOME",
262        "XDG_RUNTIME_DIR",
263        "XDG_CACHE_HOME",
264        "CODEX_ACCESS_TOKEN",
265        "CODEX_HOME",
266        "TMPDIR",
267        "TMP",
268        "TEMP",
269        "DYLD_FALLBACK_LIBRARY_PATH",
270    ] {
271        if let Ok(val) = std::env::var(var) {
272            cmd.env(var, val);
273        }
274    }
275
276    #[cfg(windows)]
277    for var in &[
278        "LOCALAPPDATA",
279        "APPDATA",
280        "USERPROFILE",
281        "SystemRoot",
282        "COMSPEC",
283        "PATHEXT",
284    ] {
285        if let Ok(val) = std::env::var(var) {
286            cmd.env(var, val);
287        }
288    }
289
290    cmd.arg("exec")
291        .arg("-c")
292        .arg("mcp_servers='{}'")
293        .arg("--json")
294        .arg("--output-schema")
295        .arg(&args.schema_path)
296        .arg("--ephemeral")
297        .arg("--skip-git-repo-check")
298        .arg("--sandbox")
299        .arg("read-only")
300        .arg("--ignore-user-config")
301        .arg("--ignore-rules")
302        .arg("--ask-for-approval")
303        .arg("never");
304
305    if let Some(m) = args.model {
306        cmd.arg("-m").arg(m);
307    }
308
309    // `-` means: read the prompt from stdin (Codex Paperclip pattern)
310    cmd.arg("-");
311
312    cmd.stdin(Stdio::piped())
313        .stdout(Stdio::piped())
314        .stderr(Stdio::piped());
315    // Keep the prompt alive for the stdin thread spawned in `spawn_codex`.
316    let _ = full_prompt; // captured by closure below
317
318    cmd
319}
320
321/// Parses JSONL output from `codex exec --json`.
322///
323/// Event format (DOTS notation):
324/// - `thread.started` — session init
325/// - `turn.started` — model turn begins
326/// - `item.completed` — message or tool call; last `agent_message` wins
327/// - `turn.completed` — includes usage stats
328/// - `turn.failed` — error with optional rate-limit indicator
329/// - `error` — schema or validation error
330///
331/// G32 (v1.0.69): this function is the single source of truth for JSONL
332/// parsing. Both `enrich` and `ingest --mode codex` consume it.
333pub fn parse_codex_jsonl(stdout: &str) -> Result<CodexResult, AppError> {
334    let mut last_agent_text: Option<String> = None;
335    let mut usage: Option<CodexUsage> = None;
336    let mut rate_limited = false;
337    let mut schema_error = false;
338    let mut turn_failed = false;
339    let mut failed_message = String::new();
340
341    for line in stdout.lines() {
342        let line = line.trim();
343        if line.is_empty() {
344            continue;
345        }
346
347        let event: serde_json::Value = match serde_json::from_str(line) {
348            Ok(v) => v,
349            Err(_) => {
350                tracing::warn!(target: "codex_spawn", line, "skipping malformed JSONL line");
351                continue;
352            }
353        };
354
355        let event_type = match event.get("type").and_then(|t| t.as_str()) {
356            Some(t) => t,
357            None => continue,
358        };
359
360        match event_type {
361            "item.completed" => {
362                if let Some(item) = event.get("item") {
363                    if item.get("type").and_then(|t| t.as_str()) == Some("agent_message") {
364                        if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
365                            last_agent_text = Some(text.to_string());
366                        }
367                    }
368                }
369            }
370            "turn.completed" => {
371                if let Some(u) = event.get("usage") {
372                    // Skip events that lack the recognised token fields
373                    // (e.g. partial broadcasts with `{}`) so the last
374                    // populated usage wins instead of being overwritten
375                    // by an empty one.
376                    let is_populated = u
377                        .get("input_tokens")
378                        .and_then(|v| v.as_u64())
379                        .map(|n| n > 0)
380                        .unwrap_or(false)
381                        || u.get("output_tokens")
382                            .and_then(|v| v.as_u64())
383                            .map(|n| n > 0)
384                            .unwrap_or(false);
385                    if is_populated {
386                        if let Ok(parsed) = serde_json::from_value::<CodexUsage>(u.clone()) {
387                            usage = Some(parsed);
388                        }
389                    }
390                }
391            }
392            "turn.failed" => {
393                turn_failed = true;
394                if let Some(err) = event.get("error") {
395                    let msg = err
396                        .get("message")
397                        .and_then(|m| m.as_str())
398                        .unwrap_or("unknown error");
399                    failed_message = msg.to_string();
400                    if msg.contains("rate_limit")
401                        || msg.contains("429")
402                        || msg.contains("Too Many Requests")
403                    {
404                        rate_limited = true;
405                    }
406                }
407            }
408            "error" => {
409                if let Some(msg) = event.get("message").and_then(|m| m.as_str()) {
410                    if msg.contains("invalid_json_schema") || msg.contains("schema") {
411                        schema_error = true;
412                    }
413                }
414            }
415            _ => {}
416        }
417    }
418
419    let text = last_agent_text.ok_or_else(|| {
420        AppError::Validation(format!(
421            "no agent_message in codex JSONL output (rate_limited={rate_limited}, schema_error={schema_error}, turn_failed={turn_failed})"
422        ))
423    })?;
424
425    if turn_failed {
426        return Err(AppError::Validation(format!(
427            "codex turn failed: {failed_message}"
428        )));
429    }
430    if schema_error {
431        return Err(AppError::Validation(
432            "codex reported invalid_json_schema; check the --output-schema file".to_string(),
433        ));
434    }
435    if rate_limited {
436        return Err(AppError::Validation(format!(
437            "codex rate-limited: {failed_message}"
438        )));
439    }
440
441    let extraction = parse_extraction_text(&text)?;
442    Ok(CodexResult {
443        extraction,
444        usage,
445        rate_limited,
446        schema_error,
447        turn_failed,
448        failed_message,
449    })
450}
451
452/// Parses the agent_message text as an `ExtractionResult` JSON payload.
453///
454/// The schema is shared by both `enrich` and `ingest --mode codex`; the
455/// `text` is the JSON value the assistant returned, not a wrapper object.
456pub fn parse_extraction_text(text: &str) -> Result<ExtractionResult, AppError> {
457    let value: serde_json::Value = serde_json::from_str(text).map_err(|e| {
458        AppError::Validation(format!("failed to parse codex agent_message as JSON: {e}"))
459    })?;
460    let obj = value.as_object().ok_or_else(|| {
461        AppError::Validation("codex agent_message is not a JSON object".to_string())
462    })?;
463
464    let mut entities: Vec<NewEntity> = Vec::new();
465    if let Some(arr) = obj.get("entities").and_then(|v| v.as_array()) {
466        for e in arr {
467            if let Some(name) = e.get("name").and_then(|v| v.as_str()) {
468                // Accept either "type" or "entity_type" from the LLM payload
469                // and fall back to "concept" when the LLM omits it.
470                let entity_type_str = e
471                    .get("type")
472                    .or_else(|| e.get("entity_type"))
473                    .and_then(|v| v.as_str())
474                    .unwrap_or("concept");
475                let entity_type = serde_json::from_value::<crate::entity_type::EntityType>(
476                    serde_json::Value::String(entity_type_str.to_string()),
477                )
478                .unwrap_or(crate::entity_type::EntityType::Concept);
479                entities.push(NewEntity {
480                    name: name.to_string(),
481                    entity_type,
482                    description: None,
483                });
484            }
485        }
486    }
487
488    let mut relationships: Vec<NewRelationship> = Vec::new();
489    if let Some(arr) = obj.get("relationships").and_then(|v| v.as_array()) {
490        for r in arr {
491            let from = r.get("source").or_else(|| r.get("from"));
492            let to = r.get("target").or_else(|| r.get("to"));
493            let rel = r.get("relation").and_then(|v| v.as_str());
494            if let (Some(from_v), Some(to_v), Some(rel_v)) = (
495                from.and_then(|v| v.as_str()),
496                to.and_then(|v| v.as_str()),
497                rel,
498            ) {
499                relationships.push(NewRelationship {
500                    source: from_v.to_string(),
501                    target: to_v.to_string(),
502                    relation: rel_v.to_string(),
503                    strength: r.get("strength").and_then(|v| v.as_f64()).unwrap_or(0.5),
504                    description: None,
505                });
506            }
507        }
508    }
509
510    let urls: Vec<ExtractedUrl> = obj
511        .get("urls")
512        .and_then(|v| v.as_array())
513        .map(|arr| {
514            arr.iter()
515                .filter_map(|u| {
516                    Some(ExtractedUrl {
517                        url: u.get("url")?.as_str()?.to_string(),
518                        offset: u.get("offset").and_then(|v| v.as_u64()).unwrap_or(0) as usize,
519                    })
520                })
521                .collect()
522        })
523        .unwrap_or_default();
524
525    Ok(ExtractionResult {
526        entities,
527        relationships,
528        relationships_truncated: obj
529            .get("relationships_truncated")
530            .and_then(|v| v.as_bool())
531            .unwrap_or(false),
532        extraction_method: obj
533            .get("extraction_method")
534            .and_then(|v| v.as_str())
535            .unwrap_or("codex")
536            .to_string(),
537        urls,
538    })
539}
540
541#[cfg(test)]
542mod tests {
543    use super::*;
544
545    const SAMPLE_JSONL: &str = r#"{"type":"thread.started","thread_id":"abc"}
546{"type":"turn.started"}
547{"type":"item.completed","item":{"type":"reasoning","text":"thinking"}}
548{"type":"item.completed","item":{"type":"agent_message","text":"{\"entities\":[{\"name\":\"alpha\",\"type\":\"concept\"}],\"relationships\":[{\"source\":\"alpha\",\"target\":\"beta\",\"relation\":\"uses\",\"strength\":0.7}],\"extraction_method\":\"codex\",\"urls\":[]}"}}
549{"type":"turn.completed","usage":{"input_tokens":120,"output_tokens":45}}
550{"type":"turn.completed","usage":{}}
551"#;
552
553    #[test]
554    fn parse_codex_jsonl_extracts_last_agent_message() {
555        let result = parse_codex_jsonl(SAMPLE_JSONL).expect("parse must succeed");
556        assert_eq!(result.extraction.entities.len(), 1);
557        assert_eq!(result.extraction.entities[0].name, "alpha");
558        assert_eq!(result.extraction.relationships.len(), 1);
559        assert_eq!(result.extraction.relationships[0].relation, "uses");
560        assert!((result.extraction.relationships[0].strength - 0.7).abs() < 1e-6);
561    }
562
563    #[test]
564    fn parse_codex_jsonl_collects_usage() {
565        let result = parse_codex_jsonl(SAMPLE_JSONL).expect("parse must succeed");
566        let usage = result.usage.expect("usage must be populated");
567        assert_eq!(usage.input_tokens, 120);
568        assert_eq!(usage.output_tokens, 45);
569    }
570
571    #[test]
572    fn parse_codex_jsonl_detects_rate_limit() {
573        let r = parse_codex_jsonl(
574            "{\"type\":\"turn.failed\",\"error\":{\"message\":\"rate_limit: 429 too many\"}}\n{\"type\":\"item.completed\",\"item\":{\"type\":\"agent_message\",\"text\":\"{}\"}}",
575        );
576        assert!(matches!(r, Err(AppError::Validation(_))));
577    }
578
579    #[test]
580    fn parse_codex_jsonl_handles_no_agent_message() {
581        let r = parse_codex_jsonl("{\"type\":\"thread.started\"}");
582        assert!(matches!(r, Err(AppError::Validation(_))));
583    }
584
585    #[test]
586    fn parse_codex_jsonl_skips_malformed_lines() {
587        let r = parse_codex_jsonl(
588            "{not valid json\n{\"type\":\"item.completed\",\"item\":{\"type\":\"agent_message\",\"text\":\"{\\\"entities\\\":[],\\\"relationships\\\":[],\\\"extraction_method\\\":\\\"codex\\\"}\"}}",
589        );
590        assert!(r.is_ok(), "malformed lines must be skipped, got {r:?}");
591    }
592
593    #[test]
594    fn validate_codex_model_accepts_known() {
595        assert!(validate_codex_model(Some("gpt-5.5")).is_ok());
596        assert!(validate_codex_model(Some("gpt-5.4")).is_ok());
597        assert!(validate_codex_model(None).is_ok()); // no override
598    }
599
600    #[test]
601    fn validate_codex_model_rejects_unknown() {
602        let err = validate_codex_model(Some("gpt-4")).unwrap_err();
603        let msg = format!("{err}");
604        assert!(msg.contains("not supported"));
605        assert!(msg.contains("gpt-5.5"));
606    }
607
608    #[test]
609    fn list_codex_models_includes_all_static_whitelist() {
610        let models = list_codex_models();
611        for m in CODEX_PRO_OAUTH_MODELS {
612            assert!(models.contains(&m.to_string()), "missing {m} in {models:?}");
613        }
614    }
615
616    #[test]
617    fn suggest_codex_model_substring_match() {
618        let s = suggest_codex_model("gpt-5");
619        assert!(s.is_some(), "must suggest a gpt-5.x model");
620    }
621
622    #[test]
623    fn suggest_codex_model_fuzzy_match() {
624        // 'gpt5.5' has no hyphen; should still suggest 'gpt-5.5'.
625        let s = suggest_codex_model("gpt5.5");
626        assert!(s.is_some(), "fuzzy must suggest gpt-5.5 for 'gpt5.5'");
627        assert_eq!(s.unwrap(), "gpt-5.5");
628    }
629
630    #[test]
631    fn suggest_codex_model_unrelated_returns_none() {
632        let s = suggest_codex_model("totally-unrelated-zzz");
633        assert!(s.is_none());
634    }
635
636    #[test]
637    fn build_codex_command_includes_hardening_flags() {
638        let args = CodexSpawnArgs {
639            binary: Path::new("/bin/true"),
640            prompt: "p",
641            json_schema: "{}",
642            input_text: "i",
643            model: Some("gpt-5.5"),
644            timeout_secs: 60,
645            schema_path: std::env::temp_dir().join("test-schema.json"),
646        };
647        let cmd = build_codex_command(&args);
648        let collected: Vec<String> = cmd
649            .get_args()
650            .filter_map(|a| a.to_str().map(|s| s.to_string()))
651            .collect();
652        for required in &[
653            "exec",
654            "--json",
655            "--output-schema",
656            "--ephemeral",
657            "--skip-git-repo-check",
658            "--sandbox",
659            "read-only",
660            "--ignore-user-config",
661            "--ignore-rules",
662            "-m",
663            "gpt-5.5",
664            "-",
665        ] {
666            assert!(
667                collected.iter().any(|a| a == required),
668                "missing flag {required} in {collected:?}"
669            );
670        }
671    }
672
673    #[test]
674    fn list_codex_models_dedupes_with_cache_file() {
675        // Ensure the union with the cache file (when present) does not
676        // produce duplicates. We can't actually write a cache file in
677        // a test, so we just verify the static path is dedup'd.
678        let models = list_codex_models();
679        let unique: std::collections::HashSet<_> = models.iter().collect();
680        assert_eq!(unique.len(), models.len(), "list_codex_models must dedupe");
681    }
682
683    /// OAuth-only conformance test (gaps.md:41-49, v1.0.69 mandate).
684    /// Verifies that `build_codex_command` always emits `-c mcp_servers='{}'`,
685    /// `--ignore-user-config`, `--ask-for-approval never` and does NOT
686    /// whitelist `OPENAI_API_KEY` in the env_clear whitelist.
687    #[test]
688    #[serial_test::serial(env)]
689    fn build_command_oauth_only_mandatory_flags() {
690        // SAFETY: unit test
691        unsafe {
692            std::env::remove_var("OPENAI_API_KEY");
693        }
694        let schema = std::env::temp_dir().join("codex-test-schema.json");
695        let _ = std::fs::remove_file(&schema);
696        let args = CodexSpawnArgs {
697            binary: std::path::Path::new("/usr/bin/false"),
698            prompt: "p",
699            json_schema: "{}",
700            input_text: "i",
701            model: Some("gpt-5.4-mini"),
702            timeout_secs: 60,
703            schema_path: schema.clone(),
704        };
705        let cmd = build_codex_command(&args);
706        let argv: Vec<&str> = cmd.get_args().filter_map(|a| a.to_str()).collect();
707        // Mandatory flags from gaps.md lines 233-238
708        assert!(argv.contains(&"-c"), "must have -c (gaps.md:234)");
709        assert!(
710            argv.contains(&"mcp_servers='{}'"),
711            "must have mcp_servers override (gaps.md:234)"
712        );
713        assert!(
714            argv.contains(&"--ignore-user-config"),
715            "must have --ignore-user-config (gaps.md:266)"
716        );
717        assert!(
718            argv.contains(&"--ask-for-approval"),
719            "must have --ask-for-approval never (gaps.md:237)"
720        );
721        assert!(
722            argv.contains(&"--sandbox"),
723            "must have --sandbox read-only (G31)"
724        );
725        assert!(argv.contains(&"--ephemeral"), "must have --ephemeral (G31)");
726        assert!(
727            argv.contains(&"--skip-git-repo-check"),
728            "must have --skip-git-repo-check (G31)"
729        );
730        assert!(
731            argv.contains(&"--ignore-rules"),
732            "must have --ignore-rules (G31)"
733        );
734    }
735
736    /// OAuth-only guard: when `OPENAI_API_KEY` is in the environment,
737    /// `build_codex_command` MUST abort the spawn (return a `false`
738    /// command), NOT pass the key through to the child.
739    #[test]
740    #[serial_test::serial(env)]
741    fn build_command_aborts_when_openai_api_key_set() {
742        // SAFETY: unit test
743        unsafe {
744            std::env::set_var("OPENAI_API_KEY", "sk-violation-test");
745        }
746        let schema = std::env::temp_dir().join("codex-test-schema-abort.json");
747        let _ = std::fs::remove_file(&schema);
748        let args = CodexSpawnArgs {
749            binary: std::path::Path::new("/usr/bin/codex"),
750            prompt: "p",
751            json_schema: "{}",
752            input_text: "i",
753            model: Some("gpt-5.4-mini"),
754            timeout_secs: 60,
755            schema_path: schema.clone(),
756        };
757        let cmd = build_codex_command(&args);
758        let program = cmd.get_program().to_string_lossy().to_string();
759        let argv: Vec<&str> = cmd.get_args().filter_map(|a| a.to_str()).collect();
760        assert_eq!(
761            program, "false",
762            "when OPENAI_API_KEY is set, build_codex_command must abort"
763        );
764        assert!(
765            argv.contains(&"--oauth-only-violation-openai-api-key-set"),
766            "aborted command must carry violation marker"
767        );
768        unsafe {
769            std::env::remove_var("OPENAI_API_KEY");
770        }
771    }
772}