lore_cli/capture/watchers/
codex.rs

1//! Codex CLI session parser.
2//!
3//! Parses session files from OpenAI's Codex CLI tool. Sessions are stored in
4//! JSONL format at `~/.codex/sessions/YYYY/MM/DD/rollout-*.jsonl`.
5//!
6//! Each line in a JSONL file has a `type` field:
7//! - `session_meta`: Contains session metadata (id, cwd, model, git info)
8//! - `response_item`: Contains messages with role and content
9
10use anyhow::{Context, Result};
11use chrono::{DateTime, Utc};
12use serde::Deserialize;
13use std::fs::File;
14use std::io::{BufRead, BufReader};
15use std::path::{Path, PathBuf};
16use uuid::Uuid;
17
18use crate::storage::models::{Message, MessageContent, MessageRole, Session};
19
20use super::{Watcher, WatcherInfo};
21
22/// Watcher for Codex CLI sessions.
23///
24/// Discovers and parses JSONL session files from the Codex CLI tool.
25/// Sessions are stored in `~/.codex/sessions/YYYY/MM/DD/rollout-*.jsonl`.
26pub struct CodexWatcher;
27
28impl Watcher for CodexWatcher {
29    fn info(&self) -> WatcherInfo {
30        WatcherInfo {
31            name: "codex",
32            description: "OpenAI Codex CLI",
33            default_paths: vec![codex_sessions_dir()],
34        }
35    }
36
37    fn is_available(&self) -> bool {
38        codex_sessions_dir().exists()
39    }
40
41    fn find_sources(&self) -> Result<Vec<PathBuf>> {
42        find_codex_session_files()
43    }
44
45    fn parse_source(&self, path: &Path) -> Result<Vec<(Session, Vec<Message>)>> {
46        let parsed = parse_codex_session_file(path)?;
47        if parsed.messages.is_empty() {
48            return Ok(vec![]);
49        }
50        let (session, messages) = parsed.to_storage_models();
51        Ok(vec![(session, messages)])
52    }
53
54    fn watch_paths(&self) -> Vec<PathBuf> {
55        vec![codex_sessions_dir()]
56    }
57}
58
59/// Returns the path to the Codex sessions directory.
60///
61/// This is typically `~/.codex/sessions/`.
62fn codex_sessions_dir() -> PathBuf {
63    dirs::home_dir()
64        .unwrap_or_else(|| PathBuf::from("."))
65        .join(".codex")
66        .join("sessions")
67}
68
69/// Raw session metadata from Codex JSONL files.
70#[derive(Debug, Deserialize)]
71struct RawSessionMeta {
72    id: String,
73    #[allow(dead_code)]
74    timestamp: String,
75    cwd: String,
76    #[serde(default)]
77    cli_version: Option<String>,
78    #[serde(default)]
79    model_provider: Option<String>,
80    #[serde(default)]
81    git: Option<RawGitInfo>,
82}
83
84/// Git information from session metadata.
85#[derive(Debug, Deserialize)]
86struct RawGitInfo {
87    #[serde(default)]
88    branch: Option<String>,
89}
90
91/// Raw entry from Codex JSONL files.
92#[derive(Debug, Deserialize)]
93struct RawEntry {
94    timestamp: String,
95    #[serde(rename = "type")]
96    entry_type: String,
97    #[serde(default)]
98    payload: Option<serde_json::Value>,
99}
100
101/// Raw response item payload.
102#[derive(Debug, Deserialize)]
103struct RawResponseItem {
104    #[serde(rename = "type")]
105    item_type: Option<String>,
106    role: Option<String>,
107    #[serde(default)]
108    content: Vec<RawContentItem>,
109}
110
111/// Raw content item within a response.
112#[derive(Debug, Deserialize)]
113struct RawContentItem {
114    #[serde(rename = "type")]
115    content_type: String,
116    #[serde(default)]
117    text: Option<String>,
118}
119
120/// Parses a Codex JSONL session file.
121///
122/// Reads each line of the file and extracts session metadata and messages.
123/// Skips malformed lines rather than failing the entire parse.
124///
125/// # Errors
126///
127/// Returns an error if the file cannot be opened.
128pub fn parse_codex_session_file(path: &Path) -> Result<ParsedCodexSession> {
129    let file = File::open(path).context("Failed to open Codex session file")?;
130    let reader = BufReader::new(file);
131
132    let mut session_id: Option<String> = None;
133    let mut cli_version: Option<String> = None;
134    let mut cwd: Option<String> = None;
135    let mut git_branch: Option<String> = None;
136    let mut model_provider: Option<String> = None;
137    let mut messages: Vec<ParsedCodexMessage> = Vec::new();
138
139    for (line_num, line) in reader.lines().enumerate() {
140        let line = match line {
141            Ok(l) => l,
142            Err(e) => {
143                tracing::debug!("Failed to read line {}: {}", line_num + 1, e);
144                continue;
145            }
146        };
147
148        if line.trim().is_empty() {
149            continue;
150        }
151
152        let entry: RawEntry = match serde_json::from_str(&line) {
153            Ok(e) => e,
154            Err(e) => {
155                tracing::debug!("Skipping unparseable line {}: {}", line_num + 1, e);
156                continue;
157            }
158        };
159
160        match entry.entry_type.as_str() {
161            "session_meta" => {
162                if let Some(payload) = entry.payload {
163                    if let Ok(meta) = serde_json::from_value::<RawSessionMeta>(payload) {
164                        if session_id.is_none() {
165                            session_id = Some(meta.id);
166                        }
167                        if cli_version.is_none() {
168                            cli_version = meta.cli_version;
169                        }
170                        if cwd.is_none() {
171                            cwd = Some(meta.cwd);
172                        }
173                        if model_provider.is_none() {
174                            model_provider = meta.model_provider;
175                        }
176                        if git_branch.is_none() {
177                            git_branch = meta.git.and_then(|g| g.branch);
178                        }
179                    }
180                }
181            }
182            "response_item" => {
183                if let Some(payload) = entry.payload {
184                    if let Ok(item) = serde_json::from_value::<RawResponseItem>(payload) {
185                        // Only process message types
186                        if item.item_type.as_deref() != Some("message") {
187                            continue;
188                        }
189
190                        let role = match item.role.as_deref() {
191                            Some("user") => MessageRole::User,
192                            Some("assistant") => MessageRole::Assistant,
193                            Some("system") => MessageRole::System,
194                            _ => continue,
195                        };
196
197                        // Extract text content from content array
198                        let text: String = item
199                            .content
200                            .iter()
201                            .filter_map(|c| {
202                                if c.content_type == "input_text" || c.content_type == "text" {
203                                    c.text.clone()
204                                } else {
205                                    None
206                                }
207                            })
208                            .collect::<Vec<_>>()
209                            .join("\n");
210
211                        if text.trim().is_empty() {
212                            continue;
213                        }
214
215                        let timestamp = DateTime::parse_from_rfc3339(&entry.timestamp)
216                            .map(|t| t.with_timezone(&Utc))
217                            .unwrap_or_else(|_| Utc::now());
218
219                        messages.push(ParsedCodexMessage {
220                            timestamp,
221                            role,
222                            content: text,
223                        });
224                    }
225                }
226            }
227            _ => {
228                // Skip other entry types
229            }
230        }
231    }
232
233    Ok(ParsedCodexSession {
234        session_id: session_id.unwrap_or_else(|| {
235            path.file_stem()
236                .and_then(|s| s.to_str())
237                .unwrap_or("unknown")
238                .to_string()
239        }),
240        cli_version,
241        cwd: cwd.unwrap_or_else(|| ".".to_string()),
242        git_branch,
243        model_provider,
244        messages,
245        source_path: path.to_string_lossy().to_string(),
246    })
247}
248
249/// Intermediate representation of a parsed Codex session.
250#[derive(Debug)]
251pub struct ParsedCodexSession {
252    pub session_id: String,
253    pub cli_version: Option<String>,
254    pub cwd: String,
255    pub git_branch: Option<String>,
256    pub model_provider: Option<String>,
257    pub messages: Vec<ParsedCodexMessage>,
258    pub source_path: String,
259}
260
261impl ParsedCodexSession {
262    /// Converts this parsed session to storage-ready models.
263    pub fn to_storage_models(&self) -> (Session, Vec<Message>) {
264        let session_uuid = Uuid::parse_str(&self.session_id).unwrap_or_else(|_| Uuid::new_v4());
265
266        let started_at = self
267            .messages
268            .first()
269            .map(|m| m.timestamp)
270            .unwrap_or_else(Utc::now);
271
272        let ended_at = self.messages.last().map(|m| m.timestamp);
273
274        let session = Session {
275            id: session_uuid,
276            tool: "codex".to_string(),
277            tool_version: self.cli_version.clone(),
278            started_at,
279            ended_at,
280            model: self.model_provider.clone(),
281            working_directory: self.cwd.clone(),
282            git_branch: self.git_branch.clone(),
283            source_path: Some(self.source_path.clone()),
284            message_count: self.messages.len() as i32,
285            machine_id: crate::storage::get_machine_id(),
286        };
287
288        let messages: Vec<Message> = self
289            .messages
290            .iter()
291            .enumerate()
292            .map(|(idx, m)| Message {
293                id: Uuid::new_v4(),
294                session_id: session_uuid,
295                parent_id: None,
296                index: idx as i32,
297                timestamp: m.timestamp,
298                role: m.role.clone(),
299                content: MessageContent::Text(m.content.clone()),
300                model: self.model_provider.clone(),
301                git_branch: self.git_branch.clone(),
302                cwd: Some(self.cwd.clone()),
303            })
304            .collect();
305
306        (session, messages)
307    }
308}
309
310/// Intermediate representation of a parsed Codex message.
311#[derive(Debug)]
312pub struct ParsedCodexMessage {
313    pub timestamp: DateTime<Utc>,
314    pub role: MessageRole,
315    pub content: String,
316}
317
318/// Discovers all Codex session files.
319///
320/// Scans `~/.codex/sessions/YYYY/MM/DD/` for `rollout-*.jsonl` files.
321pub fn find_codex_session_files() -> Result<Vec<PathBuf>> {
322    let sessions_dir = codex_sessions_dir();
323
324    if !sessions_dir.exists() {
325        return Ok(Vec::new());
326    }
327
328    let mut files = Vec::new();
329
330    // Walk the directory tree: sessions/YYYY/MM/DD/rollout-*.jsonl
331    for year_entry in std::fs::read_dir(&sessions_dir)? {
332        let year_entry = year_entry?;
333        let year_path = year_entry.path();
334        if !year_path.is_dir() {
335            continue;
336        }
337
338        for month_entry in std::fs::read_dir(&year_path)? {
339            let month_entry = month_entry?;
340            let month_path = month_entry.path();
341            if !month_path.is_dir() {
342                continue;
343            }
344
345            for day_entry in std::fs::read_dir(&month_path)? {
346                let day_entry = day_entry?;
347                let day_path = day_entry.path();
348                if !day_path.is_dir() {
349                    continue;
350                }
351
352                for file_entry in std::fs::read_dir(&day_path)? {
353                    let file_entry = file_entry?;
354                    let file_path = file_entry.path();
355
356                    if let Some(name) = file_path.file_name().and_then(|n| n.to_str()) {
357                        if name.starts_with("rollout-") && name.ends_with(".jsonl") {
358                            files.push(file_path);
359                        }
360                    }
361                }
362            }
363        }
364    }
365
366    Ok(files)
367}
368
369#[cfg(test)]
370mod tests {
371    use super::*;
372    use std::io::Write;
373    use tempfile::NamedTempFile;
374
375    /// Creates a temporary JSONL file with given lines.
376    fn create_temp_session_file(lines: &[&str]) -> NamedTempFile {
377        let mut file = NamedTempFile::new().expect("Failed to create temp file");
378        for line in lines {
379            writeln!(file, "{line}").expect("Failed to write line");
380        }
381        file.flush().expect("Failed to flush");
382        file
383    }
384
385    /// Generate a session_meta line.
386    fn make_session_meta(session_id: &str, cwd: &str, version: &str) -> String {
387        format!(
388            r#"{{"timestamp":"2025-12-18T22:53:29.406Z","type":"session_meta","payload":{{"id":"{session_id}","timestamp":"2025-12-18T22:53:29.377Z","cwd":"{cwd}","originator":"codex_cli_rs","cli_version":"{version}","model_provider":"openai","git":{{"branch":"main"}}}}}}"#
389        )
390    }
391
392    /// Generate a user response_item line.
393    fn make_user_message(content: &str) -> String {
394        format!(
395            r#"{{"timestamp":"2025-12-18T22:54:00.000Z","type":"response_item","payload":{{"type":"message","role":"user","content":[{{"type":"input_text","text":"{content}"}}]}}}}"#
396        )
397    }
398
399    /// Generate an assistant response_item line.
400    fn make_assistant_message(content: &str) -> String {
401        format!(
402            r#"{{"timestamp":"2025-12-18T22:55:00.000Z","type":"response_item","payload":{{"type":"message","role":"assistant","content":[{{"type":"text","text":"{content}"}}]}}}}"#
403        )
404    }
405
406    #[test]
407    fn test_watcher_info() {
408        let watcher = CodexWatcher;
409        let info = watcher.info();
410
411        assert_eq!(info.name, "codex");
412        assert_eq!(info.description, "OpenAI Codex CLI");
413        assert!(!info.default_paths.is_empty());
414        assert!(info.default_paths[0].to_string_lossy().contains(".codex"));
415    }
416
417    #[test]
418    fn test_watcher_watch_paths() {
419        let watcher = CodexWatcher;
420        let paths = watcher.watch_paths();
421
422        assert!(!paths.is_empty());
423        assert!(paths[0].to_string_lossy().contains(".codex"));
424    }
425
426    #[test]
427    fn test_parse_session_meta() {
428        let session_id = "019b33ab-179f-7802-88a6-16557b4b7603";
429        let meta_line = make_session_meta(session_id, "/Users/test/project", "0.63.0");
430
431        let file = create_temp_session_file(&[&meta_line]);
432        let parsed = parse_codex_session_file(file.path()).expect("Failed to parse");
433
434        assert_eq!(parsed.session_id, session_id);
435        assert_eq!(parsed.cli_version, Some("0.63.0".to_string()));
436        assert_eq!(parsed.cwd, "/Users/test/project");
437        assert_eq!(parsed.model_provider, Some("openai".to_string()));
438        assert_eq!(parsed.git_branch, Some("main".to_string()));
439    }
440
441    #[test]
442    fn test_parse_user_message() {
443        let session_id = "019b33ab-179f-7802-88a6-16557b4b7603";
444        let meta_line = make_session_meta(session_id, "/test", "0.63.0");
445        let user_line = make_user_message("Hello, can you help me?");
446
447        let file = create_temp_session_file(&[&meta_line, &user_line]);
448        let parsed = parse_codex_session_file(file.path()).expect("Failed to parse");
449
450        assert_eq!(parsed.messages.len(), 1);
451        assert_eq!(parsed.messages[0].role, MessageRole::User);
452        assert_eq!(parsed.messages[0].content, "Hello, can you help me?");
453    }
454
455    #[test]
456    fn test_parse_assistant_message() {
457        let session_id = "019b33ab-179f-7802-88a6-16557b4b7603";
458        let meta_line = make_session_meta(session_id, "/test", "0.63.0");
459        let assistant_line = make_assistant_message("Sure, I can help!");
460
461        let file = create_temp_session_file(&[&meta_line, &assistant_line]);
462        let parsed = parse_codex_session_file(file.path()).expect("Failed to parse");
463
464        assert_eq!(parsed.messages.len(), 1);
465        assert_eq!(parsed.messages[0].role, MessageRole::Assistant);
466        assert_eq!(parsed.messages[0].content, "Sure, I can help!");
467    }
468
469    #[test]
470    fn test_parse_conversation() {
471        let session_id = "019b33ab-179f-7802-88a6-16557b4b7603";
472        let meta_line = make_session_meta(session_id, "/test", "0.63.0");
473        let user_line = make_user_message("Hello");
474        let assistant_line = make_assistant_message("Hi there!");
475
476        let file = create_temp_session_file(&[&meta_line, &user_line, &assistant_line]);
477        let parsed = parse_codex_session_file(file.path()).expect("Failed to parse");
478
479        assert_eq!(parsed.messages.len(), 2);
480        assert_eq!(parsed.messages[0].role, MessageRole::User);
481        assert_eq!(parsed.messages[1].role, MessageRole::Assistant);
482    }
483
484    #[test]
485    fn test_to_storage_models() {
486        let session_id = "019b33ab-179f-7802-88a6-16557b4b7603";
487        let meta_line = make_session_meta(session_id, "/test/project", "0.63.0");
488        let user_line = make_user_message("Hello");
489        let assistant_line = make_assistant_message("Hi!");
490
491        let file = create_temp_session_file(&[&meta_line, &user_line, &assistant_line]);
492        let parsed = parse_codex_session_file(file.path()).expect("Failed to parse");
493        let (session, messages) = parsed.to_storage_models();
494
495        assert_eq!(session.tool, "codex");
496        assert_eq!(session.tool_version, Some("0.63.0".to_string()));
497        assert_eq!(session.working_directory, "/test/project");
498        assert_eq!(session.git_branch, Some("main".to_string()));
499        assert_eq!(session.model, Some("openai".to_string()));
500        assert_eq!(session.message_count, 2);
501
502        assert_eq!(messages.len(), 2);
503        assert_eq!(messages[0].role, MessageRole::User);
504        assert_eq!(messages[1].role, MessageRole::Assistant);
505        assert_eq!(messages[0].index, 0);
506        assert_eq!(messages[1].index, 1);
507    }
508
509    #[test]
510    fn test_empty_lines_skipped() {
511        let session_id = "019b33ab-179f-7802-88a6-16557b4b7603";
512        let meta_line = make_session_meta(session_id, "/test", "0.63.0");
513        let user_line = make_user_message("Hello");
514
515        let file = create_temp_session_file(&["", &meta_line, "  ", &user_line, ""]);
516        let parsed = parse_codex_session_file(file.path()).expect("Failed to parse");
517
518        assert_eq!(parsed.messages.len(), 1);
519    }
520
521    #[test]
522    fn test_invalid_json_skipped() {
523        let session_id = "019b33ab-179f-7802-88a6-16557b4b7603";
524        let meta_line = make_session_meta(session_id, "/test", "0.63.0");
525        let user_line = make_user_message("Hello");
526
527        let file =
528            create_temp_session_file(&["invalid json", &meta_line, "{not valid", &user_line]);
529        let parsed = parse_codex_session_file(file.path()).expect("Failed to parse");
530
531        assert_eq!(parsed.messages.len(), 1);
532        assert_eq!(parsed.session_id, session_id);
533    }
534
535    #[test]
536    fn test_non_message_response_items_skipped() {
537        let session_id = "019b33ab-179f-7802-88a6-16557b4b7603";
538        let meta_line = make_session_meta(session_id, "/test", "0.63.0");
539        // A response_item with type other than "message"
540        let other_item = r#"{"timestamp":"2025-12-18T22:54:00.000Z","type":"response_item","payload":{"type":"function_call","name":"test"}}"#;
541        let user_line = make_user_message("Hello");
542
543        let file = create_temp_session_file(&[&meta_line, other_item, &user_line]);
544        let parsed = parse_codex_session_file(file.path()).expect("Failed to parse");
545
546        assert_eq!(parsed.messages.len(), 1);
547        assert_eq!(parsed.messages[0].role, MessageRole::User);
548    }
549
550    #[test]
551    fn test_empty_content_skipped() {
552        let session_id = "019b33ab-179f-7802-88a6-16557b4b7603";
553        let meta_line = make_session_meta(session_id, "/test", "0.63.0");
554        let empty_content = r#"{"timestamp":"2025-12-18T22:54:00.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[]}}"#;
555        let user_line = make_user_message("Hello");
556
557        let file = create_temp_session_file(&[&meta_line, empty_content, &user_line]);
558        let parsed = parse_codex_session_file(file.path()).expect("Failed to parse");
559
560        assert_eq!(parsed.messages.len(), 1);
561    }
562
563    #[test]
564    fn test_find_session_files_returns_empty_when_missing() {
565        let result = find_codex_session_files();
566        assert!(result.is_ok());
567        // May or may not find files depending on system
568    }
569
570    #[test]
571    fn test_watcher_parse_source() {
572        let watcher = CodexWatcher;
573        let session_id = "019b33ab-179f-7802-88a6-16557b4b7603";
574        let meta_line = make_session_meta(session_id, "/test", "0.63.0");
575        let user_line = make_user_message("Hello");
576
577        let file = create_temp_session_file(&[&meta_line, &user_line]);
578        let result = watcher
579            .parse_source(file.path())
580            .expect("Should parse successfully");
581
582        assert_eq!(result.len(), 1);
583        let (session, messages) = &result[0];
584        assert_eq!(session.tool, "codex");
585        assert_eq!(messages.len(), 1);
586    }
587
588    #[test]
589    fn test_watcher_parse_source_empty_session() {
590        let watcher = CodexWatcher;
591        let session_id = "019b33ab-179f-7802-88a6-16557b4b7603";
592        let meta_line = make_session_meta(session_id, "/test", "0.63.0");
593
594        // Only metadata, no messages
595        let file = create_temp_session_file(&[&meta_line]);
596        let result = watcher
597            .parse_source(file.path())
598            .expect("Should parse successfully");
599
600        assert!(result.is_empty());
601    }
602
603    #[test]
604    fn test_session_id_fallback_to_filename() {
605        // File with no session_meta
606        let user_line = make_user_message("Hello");
607        let file = create_temp_session_file(&[&user_line]);
608        let parsed = parse_codex_session_file(file.path()).expect("Failed to parse");
609
610        // Should fall back to filename
611        assert!(!parsed.session_id.is_empty());
612    }
613
614    #[test]
615    fn test_uuid_session_id_parsing() {
616        let session_id = "019b33ab-179f-7802-88a6-16557b4b7603";
617        let meta_line = make_session_meta(session_id, "/test", "0.63.0");
618        let user_line = make_user_message("Hello");
619
620        let file = create_temp_session_file(&[&meta_line, &user_line]);
621        let parsed = parse_codex_session_file(file.path()).expect("Failed to parse");
622        let (session, _) = parsed.to_storage_models();
623
624        // The session ID should be parsed as a valid UUID
625        assert_eq!(session.id.to_string(), session_id);
626    }
627
628    #[test]
629    fn test_invalid_uuid_generates_new() {
630        let meta_line = r#"{"timestamp":"2025-12-18T22:53:29.406Z","type":"session_meta","payload":{"id":"not-a-uuid","timestamp":"2025-12-18T22:53:29.377Z","cwd":"/test","cli_version":"0.63.0"}}"#;
631        let user_line = make_user_message("Hello");
632
633        let file = create_temp_session_file(&[meta_line, &user_line]);
634        let parsed = parse_codex_session_file(file.path()).expect("Failed to parse");
635        let (session, _) = parsed.to_storage_models();
636
637        // Should still have a valid UUID (newly generated)
638        assert!(!session.id.is_nil());
639    }
640}