mi6_core/input/codex_session/
scanner.rs1use std::path::{Path, PathBuf};
7
8use crate::model::Storage;
9use crate::model::error::{ScanError, TranscriptError};
10
11use super::{CodexSessionParser, FilePosition, extract_session_id_from_filename};
12
13#[derive(Debug, Clone, Default)]
15pub struct ScanResult {
16 pub inserted: usize,
18 pub parse_errors: u64,
20 pub session_id: Option<String>,
22}
23
24pub struct CodexSessionScanner<'a, S: Storage> {
32 storage: &'a S,
33 machine_id: String,
34}
35
36impl<'a, S: Storage> CodexSessionScanner<'a, S> {
37 pub fn new(storage: &'a S, machine_id: impl Into<String>) -> Self {
39 Self {
40 storage,
41 machine_id: machine_id.into(),
42 }
43 }
44
45 pub fn scan_file(&self, path: &Path) -> Result<ScanResult, ScanError> {
54 let filename = path.file_name().and_then(|n| n.to_str()).ok_or_else(|| {
56 ScanError::Parse(TranscriptError::Io(std::io::Error::new(
57 std::io::ErrorKind::InvalidInput,
58 "Invalid filename",
59 )))
60 })?;
61
62 let session_id = extract_session_id_from_filename(filename).ok_or_else(|| {
63 ScanError::Parse(TranscriptError::Io(std::io::Error::new(
64 std::io::ErrorKind::InvalidInput,
65 "Could not extract session ID from filename",
66 )))
67 })?;
68
69 let start_position = self
71 .storage
72 .get_transcript_position(path)?
73 .map(|pos| FilePosition {
74 offset: pos.offset,
75 line_number: pos.line_number,
76 last_key: pos.last_uuid,
77 })
78 .unwrap_or_default();
79
80 let parser = CodexSessionParser::new(&self.machine_id);
82 let result = parser.parse_incremental(path, &session_id, &start_position)?;
83
84 let parse_errors = result.parse_errors;
85
86 let mut inserted = 0;
88 for event in result.events {
89 self.storage.insert(&event)?;
90 inserted += 1;
91 }
92
93 if let Some(path_str) = path.to_str() {
97 let _ = self.storage.update_session_transcript_path(
99 &self.machine_id,
100 &session_id,
101 path_str,
102 );
103 }
104
105 let transcript_position = crate::input::transcript::FilePosition {
107 offset: result.position.offset,
108 line_number: result.position.line_number,
109 last_uuid: result.position.last_key,
110 };
111 self.storage
112 .set_transcript_position(path, &transcript_position)?;
113
114 Ok(ScanResult {
115 inserted,
116 parse_errors,
117 session_id: Some(session_id),
118 })
119 }
120
121 pub fn scan_session(&self, session_id: &str) -> Result<ScanResult, ScanError> {
128 let path = find_session_file(session_id)?;
129 let result = self.scan_file(&path)?;
130
131 if let Some(path_str) = path.to_str() {
136 let _ =
137 self.storage
138 .update_session_transcript_path(&self.machine_id, session_id, path_str);
139 }
140
141 Ok(result)
142 }
143}
144
145pub fn find_session_file(session_id: &str) -> Result<PathBuf, ScanError> {
149 let sessions_dir = dirs::home_dir()
150 .ok_or_else(|| {
151 ScanError::Parse(TranscriptError::Io(std::io::Error::new(
152 std::io::ErrorKind::NotFound,
153 "Could not determine home directory",
154 )))
155 })?
156 .join(".codex")
157 .join("sessions");
158
159 if !sessions_dir.exists() {
160 return Err(ScanError::Parse(TranscriptError::Io(std::io::Error::new(
161 std::io::ErrorKind::NotFound,
162 "Codex sessions directory not found",
163 ))));
164 }
165
166 let year_entries =
169 std::fs::read_dir(&sessions_dir).map_err(|e| ScanError::Parse(TranscriptError::Io(e)))?;
170
171 for year_entry in year_entries {
172 let year_path = year_entry
173 .map_err(|e| ScanError::Parse(TranscriptError::Io(e)))?
174 .path();
175 if !year_path.is_dir() {
176 continue;
177 }
178
179 let month_entries =
180 std::fs::read_dir(&year_path).map_err(|e| ScanError::Parse(TranscriptError::Io(e)))?;
181
182 for month_entry in month_entries {
183 let month_path = month_entry
184 .map_err(|e| ScanError::Parse(TranscriptError::Io(e)))?
185 .path();
186 if !month_path.is_dir() {
187 continue;
188 }
189
190 let day_entries = std::fs::read_dir(&month_path)
191 .map_err(|e| ScanError::Parse(TranscriptError::Io(e)))?;
192
193 for day_entry in day_entries {
194 let day_path = day_entry
195 .map_err(|e| ScanError::Parse(TranscriptError::Io(e)))?
196 .path();
197 if !day_path.is_dir() {
198 continue;
199 }
200
201 let file_entries = std::fs::read_dir(&day_path)
202 .map_err(|e| ScanError::Parse(TranscriptError::Io(e)))?;
203
204 for file_entry in file_entries {
205 let file_path = file_entry
206 .map_err(|e| ScanError::Parse(TranscriptError::Io(e)))?
207 .path();
208 if let Some(filename) = file_path.file_name().and_then(|n| n.to_str())
209 && filename.ends_with(".jsonl")
210 && filename.contains(session_id)
211 {
212 return Ok(file_path);
213 }
214 }
215 }
216 }
217 }
218
219 Err(ScanError::Parse(TranscriptError::Io(std::io::Error::new(
220 std::io::ErrorKind::NotFound,
221 format!("Session file not found for ID: {}", session_id),
222 ))))
223}
224
225pub fn codex_sessions_dir() -> Option<PathBuf> {
227 dirs::home_dir().map(|h| h.join(".codex").join("sessions"))
228}
229
230pub fn list_session_files_in_dir(dir: &Path) -> Vec<(String, PathBuf)> {
234 let mut results = Vec::new();
235
236 if let Ok(entries) = std::fs::read_dir(dir) {
237 for entry in entries.flatten() {
238 let path = entry.path();
239 if path.is_file()
240 && let Some(filename) = path.file_name().and_then(|n| n.to_str())
241 && filename.ends_with(".jsonl")
242 && let Some(session_id) = extract_session_id_from_filename(filename)
243 {
244 results.push((session_id, path));
245 }
246 }
247 }
248
249 results
250}
251
252#[cfg(test)]
253mod tests {
254 use super::*;
255
256 #[test]
257 fn test_list_session_files_in_dir_empty() {
258 let temp_dir = tempfile::tempdir().unwrap();
259 let results = list_session_files_in_dir(temp_dir.path());
260 assert!(results.is_empty());
261 }
262
263 #[test]
264 fn test_list_session_files_in_dir_with_files() {
265 let temp_dir = tempfile::tempdir().unwrap();
266
267 std::fs::write(
269 temp_dir
270 .path()
271 .join("rollout-2026-01-08T17-00-06-019ba044-90fa-7b30-be9c-6b7b601599cd.jsonl"),
272 "{}",
273 )
274 .unwrap();
275 std::fs::write(
276 temp_dir
277 .path()
278 .join("rollout-2026-01-08T16-00-00-abcd1234-5678-90ab-cdef-1234567890ab.jsonl"),
279 "{}",
280 )
281 .unwrap();
282 std::fs::write(temp_dir.path().join("other.txt"), "not a session").unwrap();
283
284 let results = list_session_files_in_dir(temp_dir.path());
285 assert_eq!(results.len(), 2);
286
287 let session_ids: Vec<_> = results.iter().map(|(id, _)| id.as_str()).collect();
288 assert!(session_ids.contains(&"019ba044-90fa-7b30-be9c-6b7b601599cd"));
289 assert!(session_ids.contains(&"abcd1234-5678-90ab-cdef-1234567890ab"));
290 }
291
292 #[test]
293 fn test_codex_sessions_dir() {
294 let dir = codex_sessions_dir();
295 assert!(dir.is_some());
296 assert!(dir.unwrap().ends_with(".codex/sessions"));
297 }
298}