Skip to main content

cai_ingest/
codex.rs

1//! Codex CLI history parser
2
3use crate::error::IngestError;
4use cai_core::{Entry, Metadata, Source};
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::fs::File;
9use std::io::{BufRead, BufReader};
10use std::path::{Path, PathBuf};
11use tracing::debug;
12
13/// Codex CLI history entry format (JSONL)
14#[derive(Debug, Serialize, Deserialize)]
15struct CodexEntry {
16    /// Unique ID
17    id: Option<String>,
18    /// Timestamp (ISO 8601 or Unix timestamp)
19    timestamp: Option<String>,
20    /// User prompt
21    prompt: String,
22    /// Assistant response
23    response: Option<String>,
24    /// Optional file context
25    file: Option<String>,
26    /// Optional language
27    language: Option<String>,
28    /// Session ID
29    session_id: Option<String>,
30}
31
32/// Parser for Codex CLI history
33pub struct CodexParser {
34    /// Path to history file
35    history_path: PathBuf,
36}
37
38impl CodexParser {
39    /// Create a new Codex parser
40    ///
41    /// # Arguments
42    /// * `history_path` - Path to ~/.codex/history.jsonl file
43    pub fn new<P: AsRef<Path>>(history_path: P) -> Self {
44        Self {
45            history_path: history_path.as_ref().to_path_buf(),
46        }
47    }
48
49    /// Create parser with default path
50    pub fn with_default_path() -> Result<Self, IngestError> {
51        let home = dirs::home_dir()
52            .ok_or_else(|| IngestError::PathNotFound("Home directory not found".to_string()))?;
53        Ok(Self::new(home.join(".codex/history.jsonl")))
54    }
55
56    /// Parse all entries from the history file
57    pub fn parse_all(&self) -> Result<Vec<Entry>, IngestError> {
58        let file = File::open(&self.history_path).map_err(|e| {
59            IngestError::PathNotFound(format!("{}: {}", self.history_path.display(), e))
60        })?;
61
62        let reader = BufReader::new(file);
63        let mut entries = Vec::new();
64        let mut line_num = 0;
65
66        for line in reader.lines() {
67            line_num += 1;
68            let line = line.map_err(|e| {
69                IngestError::InvalidFormat(format!("read line {}: {}", line_num, e))
70            })?;
71
72            if line.trim().is_empty() {
73                continue;
74            }
75
76            debug!("Parsing Codex entry line {}", line_num);
77            match self.parse_line(&line, line_num) {
78                Ok(entry) => entries.push(entry),
79                Err(e) => {
80                    tracing::warn!("Failed to parse line {}: {}", line_num, e);
81                }
82            }
83        }
84
85        if entries.is_empty() {
86            return Err(IngestError::NoFilesFound(
87                self.history_path.display().to_string(),
88            ));
89        }
90
91        Ok(entries)
92    }
93
94    /// Parse a single JSONL line
95    fn parse_line(&self, line: &str, line_num: usize) -> Result<Entry, IngestError> {
96        let codex_entry: CodexEntry = serde_json::from_str(line)
97            .map_err(|e| IngestError::InvalidFormat(format!("line {}: {}", line_num, e)))?;
98
99        let id = codex_entry
100            .id
101            .unwrap_or_else(|| format!("codex-line-{}", line_num));
102
103        let timestamp = parse_codex_timestamp(&codex_entry.timestamp);
104        let response = codex_entry.response.unwrap_or_default();
105
106        let mut extra = HashMap::new();
107        if let Some(sid) = &codex_entry.session_id {
108            extra.insert("session_id".to_string(), sid.clone());
109        }
110
111        Ok(Entry {
112            id,
113            source: Source::Codex,
114            timestamp,
115            prompt: codex_entry.prompt,
116            response,
117            metadata: Metadata {
118                file_path: codex_entry.file,
119                repo_url: None,
120                commit_hash: None,
121                language: codex_entry.language,
122                extra,
123            },
124        })
125    }
126}
127
128fn parse_codex_timestamp(ts: &Option<String>) -> DateTime<Utc> {
129    ts.as_ref()
130        .and_then(|s| {
131            // Try ISO 8601 first
132            DateTime::parse_from_rfc3339(s)
133                .ok()
134                .map(|dt: DateTime<chrono::FixedOffset>| dt.with_timezone(&Utc))
135                .or_else(|| {
136                    // Try Unix timestamp (seconds)
137                    s.parse::<i64>()
138                        .ok()
139                        .map(|secs| DateTime::from_timestamp(secs, 0).unwrap_or_else(Utc::now))
140                })
141        })
142        .unwrap_or_else(Utc::now)
143}
144
145#[cfg(test)]
146mod tests {
147    use super::*;
148    use std::fs;
149    use tempfile::TempDir;
150
151    #[test]
152    fn test_parse_codex_history() {
153        let temp_dir = TempDir::new().unwrap();
154        let history_path = temp_dir.path().join("history.jsonl");
155
156        let jsonl = r#"{"id":"entry1","timestamp":"2024-01-15T10:30:00Z","prompt":"write a function","response":"def foo(): pass","file":"main.py","language":"python"}
157{"prompt":"another question","response":"answer"}
158"#;
159
160        fs::write(&history_path, jsonl).unwrap();
161
162        let parser = CodexParser::new(&history_path);
163        let entries = parser.parse_all().unwrap();
164
165        assert_eq!(entries.len(), 2);
166
167        let entry1 = &entries[0];
168        assert_eq!(entry1.id, "entry1");
169        assert_eq!(entry1.source, Source::Codex);
170        assert_eq!(entry1.prompt, "write a function");
171        assert_eq!(entry1.response, "def foo(): pass");
172        assert_eq!(entry1.metadata.file_path, Some("main.py".to_string()));
173        assert_eq!(entry1.metadata.language, Some("python".to_string()));
174
175        let entry2 = &entries[1];
176        assert!(entry2.id.starts_with("codex-line-"));
177        assert_eq!(entry2.prompt, "another question");
178    }
179
180    #[test]
181    fn test_parse_unix_timestamp() {
182        let temp_dir = TempDir::new().unwrap();
183        let history_path = temp_dir.path().join("history.jsonl");
184
185        // Unix timestamp for 2024-01-15 10:30:00 UTC
186        let jsonl = r#"{"prompt":"test","timestamp":"1705319400"}"#;
187
188        fs::write(&history_path, jsonl).unwrap();
189
190        let parser = CodexParser::new(&history_path);
191        let entries = parser.parse_all().unwrap();
192
193        assert_eq!(entries.len(), 1);
194        assert_eq!(entries[0].prompt, "test");
195    }
196}