Skip to main content

normalize_chat_sessions/formats/
codex.rs

1//! OpenAI Codex CLI JSONL format parser.
2
3use super::{LogFormat, SessionFile, peek_lines};
4use crate::{ContentBlock, Message, Role, Session, TokenUsage, Turn};
5use serde_json::Value;
6use std::collections::HashMap;
7use std::fs::File;
8use std::io::{BufRead, BufReader};
9use std::path::{Path, PathBuf};
10
11/// OpenAI Codex CLI session log format (JSONL).
12pub struct CodexFormat;
13
14impl LogFormat for CodexFormat {
15    fn name(&self) -> &'static str {
16        "codex"
17    }
18
19    fn sessions_dir(&self, _project: Option<&Path>) -> PathBuf {
20        let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".into());
21        PathBuf::from(home).join(".codex/sessions")
22    }
23
24    fn list_sessions(&self, project: Option<&Path>) -> Vec<SessionFile> {
25        let dir = self.sessions_dir(project);
26        // Codex stores sessions in ~/.codex/sessions/YYYY/MM/DD/*.jsonl
27        let mut sessions = Vec::new();
28        // Walk year directories
29        if let Ok(years) = std::fs::read_dir(&dir) {
30            for year in years.filter_map(|e| e.ok()) {
31                if !year.path().is_dir() {
32                    continue;
33                }
34                // Walk month directories
35                if let Ok(months) = std::fs::read_dir(year.path()) {
36                    for month in months.filter_map(|e| e.ok()) {
37                        if !month.path().is_dir() {
38                            continue;
39                        }
40                        // Walk day directories
41                        if let Ok(days) = std::fs::read_dir(month.path()) {
42                            for day in days.filter_map(|e| e.ok()) {
43                                if !day.path().is_dir() {
44                                    continue;
45                                }
46                                // Find .jsonl files
47                                if let Ok(files) = std::fs::read_dir(day.path()) {
48                                    for file in files.filter_map(|e| e.ok()) {
49                                        let path = file.path();
50                                        if path.extension().and_then(|e| e.to_str())
51                                            == Some("jsonl")
52                                        {
53                                            if let Ok(meta) = path.metadata() {
54                                                if let Ok(mtime) = meta.modified() {
55                                                    sessions.push(SessionFile { path, mtime });
56                                                }
57                                            }
58                                        }
59                                    }
60                                }
61                            }
62                        }
63                    }
64                }
65            }
66        }
67        sessions
68    }
69
70    fn detect(&self, path: &Path) -> f64 {
71        // Check extension
72        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
73        if ext != "jsonl" {
74            return 0.0;
75        }
76
77        // Peek at first few lines
78        for line in peek_lines(path, 5) {
79            if let Ok(entry) = serde_json::from_str::<Value>(&line) {
80                // Codex has type field with session_meta, response_item, event_msg
81                if let Some(t) = entry.get("type").and_then(|v| v.as_str()) {
82                    if t == "session_meta" {
83                        // Check for codex-specific originator
84                        if let Some(originator) = entry
85                            .get("payload")
86                            .and_then(|p| p.get("originator"))
87                            .and_then(|v| v.as_str())
88                        {
89                            if originator.contains("codex") {
90                                return 1.0;
91                            }
92                        }
93                    }
94                }
95            }
96        }
97        0.0
98    }
99
100    fn parse(&self, path: &Path) -> Result<Session, String> {
101        let file = File::open(path).map_err(|e| e.to_string())?;
102        let reader = BufReader::new(file);
103
104        let mut session = Session::new(path.to_path_buf(), self.name());
105        let mut current_turn = Turn::default();
106        let mut pending_tool_calls: HashMap<String, (String, Value)> = HashMap::new();
107
108        for line in reader.lines() {
109            let line = line.map_err(|e| e.to_string())?;
110            if line.trim().is_empty() {
111                continue;
112            }
113
114            let Ok(entry) = serde_json::from_str::<Value>(&line) else {
115                continue;
116            };
117
118            let entry_type = entry.get("type").and_then(|v| v.as_str()).unwrap_or("");
119
120            // Extract metadata from session_meta
121            if entry_type == "session_meta" {
122                if let Some(payload) = entry.get("payload") {
123                    if session.metadata.session_id.is_none() {
124                        session.metadata.session_id = payload
125                            .get("session_id")
126                            .and_then(|v| v.as_str())
127                            .map(String::from);
128                    }
129                    if session.metadata.model.is_none() {
130                        session.metadata.model = payload
131                            .get("model")
132                            .and_then(|v| v.as_str())
133                            .map(String::from);
134                    }
135                }
136            }
137
138            let Some(payload) = entry.get("payload") else {
139                continue;
140            };
141
142            let payload_type = payload.get("type").and_then(|v| v.as_str()).unwrap_or("");
143
144            match payload_type {
145                "user_message" => {
146                    // Flush previous turn
147                    if !current_turn.messages.is_empty() {
148                        session.turns.push(std::mem::take(&mut current_turn));
149                    }
150
151                    let text = payload
152                        .get("content")
153                        .and_then(|v| v.as_str())
154                        .unwrap_or("")
155                        .to_string();
156
157                    current_turn.messages.push(Message {
158                        role: Role::User,
159                        content: vec![ContentBlock::Text { text }],
160                        timestamp: entry
161                            .get("timestamp")
162                            .and_then(|v| v.as_str())
163                            .map(String::from),
164                    });
165                }
166                "message" => {
167                    // Assistant text response
168                    let text = payload
169                        .get("content")
170                        .and_then(|v| v.as_str())
171                        .unwrap_or("")
172                        .to_string();
173
174                    if !text.is_empty() {
175                        current_turn.messages.push(Message {
176                            role: Role::Assistant,
177                            content: vec![ContentBlock::Text { text }],
178                            timestamp: entry
179                                .get("timestamp")
180                                .and_then(|v| v.as_str())
181                                .map(String::from),
182                        });
183                    }
184                }
185                "function_call" => {
186                    let call_id = payload
187                        .get("call_id")
188                        .and_then(|v| v.as_str())
189                        .unwrap_or("")
190                        .to_string();
191                    let name = payload
192                        .get("name")
193                        .and_then(|v| v.as_str())
194                        .unwrap_or("")
195                        .to_string();
196                    let args_str = payload
197                        .get("arguments")
198                        .and_then(|v| v.as_str())
199                        .unwrap_or("{}");
200                    let input: Value =
201                        serde_json::from_str(args_str).unwrap_or(Value::Object(Default::default()));
202
203                    // Store for later pairing with result
204                    pending_tool_calls.insert(call_id.clone(), (name.clone(), input.clone()));
205
206                    current_turn.messages.push(Message {
207                        role: Role::Assistant,
208                        content: vec![ContentBlock::ToolUse {
209                            id: call_id,
210                            name,
211                            input,
212                        }],
213                        timestamp: entry
214                            .get("timestamp")
215                            .and_then(|v| v.as_str())
216                            .map(String::from),
217                    });
218                }
219                "function_call_output" => {
220                    let call_id = payload
221                        .get("call_id")
222                        .and_then(|v| v.as_str())
223                        .unwrap_or("")
224                        .to_string();
225                    let output = payload
226                        .get("output")
227                        .and_then(|v| v.as_str())
228                        .unwrap_or("")
229                        .to_string();
230                    let is_error = output.contains("Exit code: 1")
231                        || output.starts_with("Error:")
232                        || output.contains("\nError:");
233
234                    current_turn.messages.push(Message {
235                        role: Role::User,
236                        content: vec![ContentBlock::ToolResult {
237                            tool_use_id: call_id,
238                            content: output,
239                            is_error,
240                        }],
241                        timestamp: entry
242                            .get("timestamp")
243                            .and_then(|v| v.as_str())
244                            .map(String::from),
245                    });
246                }
247                "token_count" => {
248                    // Extract final token usage
249                    if let Some(info) = payload.get("info") {
250                        if let Some(total) = info.get("total_token_usage") {
251                            current_turn.token_usage = Some(TokenUsage {
252                                input: total
253                                    .get("input_tokens")
254                                    .and_then(|v| v.as_u64())
255                                    .unwrap_or(0),
256                                output: total
257                                    .get("output_tokens")
258                                    .and_then(|v| v.as_u64())
259                                    .unwrap_or(0)
260                                    + total
261                                        .get("reasoning_output_tokens")
262                                        .and_then(|v| v.as_u64())
263                                        .unwrap_or(0),
264                                cache_read: total
265                                    .get("cached_input_tokens")
266                                    .and_then(|v| v.as_u64()),
267                                cache_create: None,
268                            });
269                        }
270                    }
271                }
272                _ => {}
273            }
274        }
275
276        // Flush final turn
277        if !current_turn.messages.is_empty() {
278            session.turns.push(current_turn);
279        }
280
281        // Set provider
282        session.metadata.provider = Some("openai".to_string());
283
284        Ok(session)
285    }
286}