1use crate::error::IngestError;
4use cai_core::{Entry, Metadata, Source};
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::fs::File;
9use std::io::{BufRead, BufReader};
10use std::path::{Path, PathBuf};
11use tracing::debug;
12
13#[derive(Debug, Serialize, Deserialize)]
15struct CodexEntry {
16 id: Option<String>,
18 timestamp: Option<String>,
20 prompt: String,
22 response: Option<String>,
24 file: Option<String>,
26 language: Option<String>,
28 session_id: Option<String>,
30}
31
32pub struct CodexParser {
34 history_path: PathBuf,
36}
37
38impl CodexParser {
39 pub fn new<P: AsRef<Path>>(history_path: P) -> Self {
44 Self {
45 history_path: history_path.as_ref().to_path_buf(),
46 }
47 }
48
49 pub fn with_default_path() -> Result<Self, IngestError> {
51 let home = dirs::home_dir()
52 .ok_or_else(|| IngestError::PathNotFound("Home directory not found".to_string()))?;
53 Ok(Self::new(home.join(".codex/history.jsonl")))
54 }
55
56 pub fn parse_all(&self) -> Result<Vec<Entry>, IngestError> {
58 let file = File::open(&self.history_path).map_err(|e| {
59 IngestError::PathNotFound(format!("{}: {}", self.history_path.display(), e))
60 })?;
61
62 let reader = BufReader::new(file);
63 let mut entries = Vec::new();
64 let mut line_num = 0;
65
66 for line in reader.lines() {
67 line_num += 1;
68 let line = line.map_err(|e| {
69 IngestError::InvalidFormat(format!("read line {}: {}", line_num, e))
70 })?;
71
72 if line.trim().is_empty() {
73 continue;
74 }
75
76 debug!("Parsing Codex entry line {}", line_num);
77 match self.parse_line(&line, line_num) {
78 Ok(entry) => entries.push(entry),
79 Err(e) => {
80 tracing::warn!("Failed to parse line {}: {}", line_num, e);
81 }
82 }
83 }
84
85 if entries.is_empty() {
86 return Err(IngestError::NoFilesFound(
87 self.history_path.display().to_string(),
88 ));
89 }
90
91 Ok(entries)
92 }
93
94 fn parse_line(&self, line: &str, line_num: usize) -> Result<Entry, IngestError> {
96 let codex_entry: CodexEntry = serde_json::from_str(line)
97 .map_err(|e| IngestError::InvalidFormat(format!("line {}: {}", line_num, e)))?;
98
99 let id = codex_entry
100 .id
101 .unwrap_or_else(|| format!("codex-line-{}", line_num));
102
103 let timestamp = parse_codex_timestamp(&codex_entry.timestamp);
104 let response = codex_entry.response.unwrap_or_default();
105
106 let mut extra = HashMap::new();
107 if let Some(sid) = &codex_entry.session_id {
108 extra.insert("session_id".to_string(), sid.clone());
109 }
110
111 Ok(Entry {
112 id,
113 source: Source::Codex,
114 timestamp,
115 prompt: codex_entry.prompt,
116 response,
117 metadata: Metadata {
118 file_path: codex_entry.file,
119 repo_url: None,
120 commit_hash: None,
121 language: codex_entry.language,
122 extra,
123 },
124 })
125 }
126}
127
128fn parse_codex_timestamp(ts: &Option<String>) -> DateTime<Utc> {
129 ts.as_ref()
130 .and_then(|s| {
131 DateTime::parse_from_rfc3339(s)
133 .ok()
134 .map(|dt: DateTime<chrono::FixedOffset>| dt.with_timezone(&Utc))
135 .or_else(|| {
136 s.parse::<i64>()
138 .ok()
139 .map(|secs| DateTime::from_timestamp(secs, 0).unwrap_or_else(Utc::now))
140 })
141 })
142 .unwrap_or_else(Utc::now)
143}
144
145#[cfg(test)]
146mod tests {
147 use super::*;
148 use std::fs;
149 use tempfile::TempDir;
150
151 #[test]
152 fn test_parse_codex_history() {
153 let temp_dir = TempDir::new().unwrap();
154 let history_path = temp_dir.path().join("history.jsonl");
155
156 let jsonl = r#"{"id":"entry1","timestamp":"2024-01-15T10:30:00Z","prompt":"write a function","response":"def foo(): pass","file":"main.py","language":"python"}
157{"prompt":"another question","response":"answer"}
158"#;
159
160 fs::write(&history_path, jsonl).unwrap();
161
162 let parser = CodexParser::new(&history_path);
163 let entries = parser.parse_all().unwrap();
164
165 assert_eq!(entries.len(), 2);
166
167 let entry1 = &entries[0];
168 assert_eq!(entry1.id, "entry1");
169 assert_eq!(entry1.source, Source::Codex);
170 assert_eq!(entry1.prompt, "write a function");
171 assert_eq!(entry1.response, "def foo(): pass");
172 assert_eq!(entry1.metadata.file_path, Some("main.py".to_string()));
173 assert_eq!(entry1.metadata.language, Some("python".to_string()));
174
175 let entry2 = &entries[1];
176 assert!(entry2.id.starts_with("codex-line-"));
177 assert_eq!(entry2.prompt, "another question");
178 }
179
180 #[test]
181 fn test_parse_unix_timestamp() {
182 let temp_dir = TempDir::new().unwrap();
183 let history_path = temp_dir.path().join("history.jsonl");
184
185 let jsonl = r#"{"prompt":"test","timestamp":"1705319400"}"#;
187
188 fs::write(&history_path, jsonl).unwrap();
189
190 let parser = CodexParser::new(&history_path);
191 let entries = parser.parse_all().unwrap();
192
193 assert_eq!(entries.len(), 1);
194 assert_eq!(entries[0].prompt, "test");
195 }
196}