Skip to main content

toolpath_codex/
reader.rs

1//! Parse Codex rollout JSONL files.
2//!
3//! The writer is append-only but backgrounded — a crashed Codex
4//! process may leave the final line mid-write. We skip unparseable
5//! lines by default and surface them as warnings rather than failing
6//! the whole read.
7
8use crate::error::{ConvoError, Result};
9use crate::types::{RolloutLine, Session};
10use std::fs::File;
11use std::io::{BufRead, BufReader};
12use std::path::{Path, PathBuf};
13
14pub struct RolloutReader;
15
16impl RolloutReader {
17    /// Read every line of a rollout file into a [`Session`].
18    ///
19    /// The session id is taken from the first line's `session_meta`
20    /// payload if present; otherwise from the filename stem.
21    pub fn read_session<P: AsRef<Path>>(path: P) -> Result<Session> {
22        let path = path.as_ref();
23        if !path.exists() {
24            return Err(ConvoError::SessionNotFound(path.display().to_string()));
25        }
26
27        let file = File::open(path)?;
28        let reader = BufReader::new(file);
29        let mut lines: Vec<RolloutLine> = Vec::new();
30        for (idx, raw) in reader.lines().enumerate() {
31            let raw = match raw {
32                Ok(s) => s,
33                Err(e) => {
34                    eprintln!(
35                        "Warning: IO error reading {} line {}: {}",
36                        path.display(),
37                        idx + 1,
38                        e
39                    );
40                    continue;
41                }
42            };
43            if raw.trim().is_empty() {
44                continue;
45            }
46            match serde_json::from_str::<RolloutLine>(&raw) {
47                Ok(line) => lines.push(line),
48                Err(e) => {
49                    // Tolerate a single truncated last line (common after crashes);
50                    // warn about anything else.
51                    if std::env::var("CODEX_ROLLOUT_STRICT").is_ok() {
52                        return Err(ConvoError::Json(e));
53                    }
54                    eprintln!(
55                        "Warning: unparseable rollout line {} in {}: {}",
56                        idx + 1,
57                        path.file_name().and_then(|n| n.to_str()).unwrap_or("<?>"),
58                        e
59                    );
60                }
61            }
62        }
63
64        let id = Self::derive_session_id(&lines, path);
65        Ok(Session {
66            id,
67            file_path: path.to_path_buf(),
68            lines,
69        })
70    }
71
72    /// Peek just the first `session_meta` payload without fully parsing
73    /// the rest of the file. Returns the session id if found.
74    pub fn peek_session_id<P: AsRef<Path>>(path: P) -> Option<String> {
75        let file = File::open(path).ok()?;
76        let mut reader = BufReader::new(file);
77        let mut first = String::new();
78        reader.read_line(&mut first).ok()?;
79        let line: RolloutLine = serde_json::from_str(first.trim()).ok()?;
80        if line.kind != "session_meta" {
81            return None;
82        }
83        line.payload
84            .get("id")
85            .and_then(|v| v.as_str())
86            .map(str::to_string)
87    }
88
89    /// Return the byte-length of a rollout file.
90    pub fn file_size<P: AsRef<Path>>(path: P) -> Result<u64> {
91        let path = path.as_ref();
92        if !path.exists() {
93            return Err(ConvoError::SessionNotFound(path.display().to_string()));
94        }
95        Ok(std::fs::metadata(path)?.len())
96    }
97
98    fn derive_session_id(lines: &[RolloutLine], path: &Path) -> String {
99        // Prefer the session_meta payload.
100        if let Some(first) = lines.first()
101            && first.kind == "session_meta"
102            && let Some(id) = first.payload.get("id").and_then(|v| v.as_str())
103        {
104            return id.to_string();
105        }
106        // Fall back to the UUID suffix of the filename stem.
107        if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
108            // Filename pattern: rollout-YYYY-MM-DDThh-mm-ss-<uuid>
109            if let Some(uuid_start) = find_uuid_start(stem) {
110                return stem[uuid_start..].to_string();
111            }
112            return stem.to_string();
113        }
114        "unknown".to_string()
115    }
116}
117
118/// Heuristic: look for the first hex group matching a UUIDv7 shape
119/// (8-4-4-4-12 or a prefix thereof) in the filename stem.
120fn find_uuid_start(stem: &str) -> Option<usize> {
121    // `rollout-` + `YYYY-MM-DDTHH-MM-SS-` prefix has exactly 28
122    // characters before the UUID in normal filenames.
123    // Fall back to searching for a group of 8 hex characters followed
124    // by a `-` and more hex.
125    let mut idx = 0usize;
126    let bytes = stem.as_bytes();
127    while idx + 36 <= bytes.len() {
128        if is_uuid_shape(&stem[idx..idx + 36]) {
129            return Some(idx);
130        }
131        idx += 1;
132    }
133    None
134}
135
136fn is_uuid_shape(s: &str) -> bool {
137    let b = s.as_bytes();
138    if b.len() != 36 {
139        return false;
140    }
141    for (i, c) in b.iter().enumerate() {
142        match i {
143            8 | 13 | 18 | 23 => {
144                if *c != b'-' {
145                    return false;
146                }
147            }
148            _ => {
149                if !c.is_ascii_hexdigit() {
150                    return false;
151                }
152            }
153        }
154    }
155    true
156}
157
158/// Type alias exposed for consumers to avoid re-importing `PathBuf`.
159pub type RolloutPath = PathBuf;
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164    use std::io::Write;
165    use tempfile::NamedTempFile;
166
167    fn sample_rollout() -> String {
168        [
169            r#"{"timestamp":"2026-04-20T16:44:37.772Z","type":"session_meta","payload":{"id":"019dabc6-8fef-7681-a054-b5bb75fcb97d","timestamp":"2026-04-20T16:43:30.171Z","cwd":"/tmp/proj","originator":"codex-tui","cli_version":"0.118.0","source":"cli"}}"#,
170            r#"{"timestamp":"2026-04-20T16:44:37.773Z","type":"turn_context","payload":{"turn_id":"019dabc7","cwd":"/tmp/proj"}}"#,
171            r#"{"timestamp":"2026-04-20T16:44:37.775Z","type":"event_msg","payload":{"type":"task_started","turn_id":"019dabc7"}}"#,
172            r#"{"timestamp":"2026-04-20T16:44:38.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"hello"}]}}"#,
173        ]
174        .join("\n")
175    }
176
177    fn write_fixture(body: &str) -> NamedTempFile {
178        let mut f = NamedTempFile::new().unwrap();
179        f.write_all(body.as_bytes()).unwrap();
180        f.flush().unwrap();
181        f
182    }
183
184    #[test]
185    fn read_session_basic() {
186        let f = write_fixture(&sample_rollout());
187        let s = RolloutReader::read_session(f.path()).unwrap();
188        assert_eq!(s.id, "019dabc6-8fef-7681-a054-b5bb75fcb97d");
189        assert_eq!(s.lines.len(), 4);
190        assert!(s.meta().is_some());
191    }
192
193    #[test]
194    fn read_session_nonexistent_errors() {
195        let err = RolloutReader::read_session("/nonexistent").unwrap_err();
196        assert!(matches!(err, ConvoError::SessionNotFound(_)));
197    }
198
199    /// Serializes access to `CODEX_ROLLOUT_STRICT` across tests in this
200    /// module. Two tests probe `read_session` with opposing strictness
201    /// expectations; without serialization, cargo test's threaded
202    /// runner can observe the env var set by one test during another.
203    fn strict_env_lock() -> std::sync::MutexGuard<'static, ()> {
204        use std::sync::{Mutex, OnceLock};
205        static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
206        LOCK.get_or_init(|| Mutex::new(()))
207            .lock()
208            .unwrap_or_else(|p| p.into_inner())
209    }
210
211    #[test]
212    fn read_session_handles_truncated_last_line() {
213        let _g = strict_env_lock();
214        // Belt-and-braces: even under the lock, make sure the env var
215        // is clear before we observe lenient behavior.
216        unsafe { std::env::remove_var("CODEX_ROLLOUT_STRICT") };
217        // Good first line, garbage second — reader skips and warns.
218        let body = sample_rollout() + "\n{\"timestamp\":\"broken"; // truncated
219        let f = write_fixture(&body);
220        let s = RolloutReader::read_session(f.path()).unwrap();
221        assert_eq!(s.lines.len(), 4, "truncated line dropped, others kept");
222    }
223
224    #[test]
225    fn read_session_respects_strict_env() {
226        let _g = strict_env_lock();
227        let body = sample_rollout() + "\n{\"timestamp\":\"broken";
228        let f = write_fixture(&body);
229        unsafe { std::env::set_var("CODEX_ROLLOUT_STRICT", "1") };
230        let err = RolloutReader::read_session(f.path()).unwrap_err();
231        unsafe { std::env::remove_var("CODEX_ROLLOUT_STRICT") };
232        assert!(matches!(err, ConvoError::Json(_)));
233    }
234
235    #[test]
236    fn peek_session_id_reads_first_line_only() {
237        let f = write_fixture(&sample_rollout());
238        let id = RolloutReader::peek_session_id(f.path()).unwrap();
239        assert_eq!(id, "019dabc6-8fef-7681-a054-b5bb75fcb97d");
240    }
241
242    #[test]
243    fn peek_session_id_missing_when_first_line_not_meta() {
244        let body = r#"{"timestamp":"t","type":"event_msg","payload":{"type":"x"}}"#;
245        let f = write_fixture(body);
246        assert!(RolloutReader::peek_session_id(f.path()).is_none());
247    }
248
249    #[test]
250    fn session_started_at_and_last_activity() {
251        let f = write_fixture(&sample_rollout());
252        let s = RolloutReader::read_session(f.path()).unwrap();
253        assert!(s.started_at().is_some());
254        assert!(s.last_activity() >= s.started_at());
255    }
256
257    #[test]
258    fn session_first_user_text() {
259        let f = write_fixture(&sample_rollout());
260        let s = RolloutReader::read_session(f.path()).unwrap();
261        assert_eq!(s.first_user_text().as_deref(), Some("hello"));
262    }
263
264    #[test]
265    fn file_size_works() {
266        let f = write_fixture(&sample_rollout());
267        let size = RolloutReader::file_size(f.path()).unwrap();
268        assert!(size > 0);
269    }
270
271    #[test]
272    fn is_uuid_shape_accepts_v7() {
273        assert!(is_uuid_shape("019dabc6-8fef-7681-a054-b5bb75fcb97d"));
274        assert!(!is_uuid_shape("019dabc6-8fef-7681-a054-b5bb75fcb97")); // too short
275        assert!(!is_uuid_shape("zzz"));
276    }
277
278    #[test]
279    fn derive_session_id_falls_back_to_stem_uuid() {
280        let body = r#"{"timestamp":"t","type":"event_msg","payload":{"type":"x"}}"#;
281        let f = NamedTempFile::new().unwrap();
282        let path = f
283            .path()
284            .parent()
285            .unwrap()
286            .join("rollout-2026-04-20T10-00-00-019dabc6-8fef-7681-a054-b5bb75fcb97d.jsonl");
287        std::fs::write(&path, body).unwrap();
288        let s = RolloutReader::read_session(&path).unwrap();
289        assert_eq!(s.id, "019dabc6-8fef-7681-a054-b5bb75fcb97d");
290        // Clean up
291        drop(f);
292        let _ = std::fs::remove_file(path);
293    }
294}