Skip to main content

opensession_core/
jsonl.rs

1//! HAIL JSONL format: streaming serialization/deserialization
2//!
3//! A `.hail.jsonl` file has the structure:
4//! ```jsonl
5//! {"type":"header","version":"hail-1.0.0","session_id":"...","agent":{...},"context":{...}}
6//! {"type":"event","event_id":"e1","timestamp":"...","event_type":{...},"content":{...},...}
7//! {"type":"event","event_id":"e2","timestamp":"...","event_type":{...},"content":{...},...}
8//! {"type":"stats","event_count":42,"message_count":10,...}
9//! ```
10//!
11//! The header line contains session metadata (no events).
12//! Each event is one line.
13//! The last line is aggregate stats (optional on write, recomputed on read if missing).
14
15use crate::trace::{Agent, Event, Session, SessionContext, Stats};
16use serde::{Deserialize, Serialize};
17use std::io::{self, BufRead, Write};
18
19/// A single line in a HAIL JSONL file
20#[derive(Debug, Clone, Serialize, Deserialize)]
21#[serde(tag = "type")]
22#[non_exhaustive]
23pub enum HailLine {
24    /// First line: session metadata
25    #[serde(rename = "header")]
26    Header {
27        version: String,
28        session_id: String,
29        agent: Agent,
30        context: SessionContext,
31    },
32    /// Middle lines: one event per line
33    #[serde(rename = "event")]
34    Event(Event),
35    /// Last line: aggregate stats
36    #[serde(rename = "stats")]
37    Stats(Stats),
38}
39
40/// Error types for JSONL operations
41#[derive(Debug, thiserror::Error)]
42#[non_exhaustive]
43pub enum JsonlError {
44    #[error("IO error: {0}")]
45    Io(#[from] io::Error),
46    #[error("JSON error at line {line}: {source}")]
47    Json {
48        line: usize,
49        source: serde_json::Error,
50    },
51    #[error("Missing header line")]
52    MissingHeader,
53    #[error("Unexpected line type at line {0}: expected header")]
54    UnexpectedLineType(usize),
55}
56
57/// Write a Session as HAIL JSONL to a writer
58pub fn write_jsonl<W: Write>(session: &Session, mut writer: W) -> Result<(), JsonlError> {
59    // Line 1: header
60    let header = HailLine::Header {
61        version: session.version.clone(),
62        session_id: session.session_id.clone(),
63        agent: session.agent.clone(),
64        context: session.context.clone(),
65    };
66    serde_json::to_writer(&mut writer, &header)
67        .map_err(|e| JsonlError::Json { line: 1, source: e })?;
68    writer.write_all(b"\n")?;
69
70    // Lines 2..N: events
71    for (i, event) in session.events.iter().enumerate() {
72        let line = HailLine::Event(event.clone());
73        serde_json::to_writer(&mut writer, &line).map_err(|e| JsonlError::Json {
74            line: i + 2,
75            source: e,
76        })?;
77        writer.write_all(b"\n")?;
78    }
79
80    // Last line: stats
81    let stats_line = HailLine::Stats(session.stats.clone());
82    serde_json::to_writer(&mut writer, &stats_line).map_err(|e| JsonlError::Json {
83        line: session.events.len() + 2,
84        source: e,
85    })?;
86    writer.write_all(b"\n")?;
87
88    Ok(())
89}
90
91/// Write a Session as HAIL JSONL to a String
92pub fn to_jsonl_string(session: &Session) -> Result<String, JsonlError> {
93    let mut buf = Vec::new();
94    write_jsonl(session, &mut buf)?;
95    // Safe: serde_json always produces valid UTF-8
96    Ok(String::from_utf8(buf).unwrap())
97}
98
99/// Read a Session from HAIL JSONL reader
100pub fn read_jsonl<R: BufRead>(reader: R) -> Result<Session, JsonlError> {
101    let mut lines = reader.lines();
102
103    // Line 1: header
104    let header_str = lines.next().ok_or(JsonlError::MissingHeader)??;
105    let header: HailLine =
106        serde_json::from_str(&header_str).map_err(|e| JsonlError::Json { line: 1, source: e })?;
107
108    let (version, session_id, agent, context) = match header {
109        HailLine::Header {
110            version,
111            session_id,
112            agent,
113            context,
114        } => (version, session_id, agent, context),
115        _ => return Err(JsonlError::UnexpectedLineType(1)),
116    };
117
118    let mut events = Vec::new();
119    let mut stats = None;
120    let mut line_num = 1usize;
121
122    for line_result in lines {
123        line_num += 1;
124        let line_str = line_result?;
125        if line_str.is_empty() {
126            continue;
127        }
128
129        let hail_line: HailLine =
130            serde_json::from_str(&line_str).map_err(|e| JsonlError::Json {
131                line: line_num,
132                source: e,
133            })?;
134
135        match hail_line {
136            HailLine::Event(event) => events.push(event),
137            HailLine::Stats(s) => stats = Some(s),
138            HailLine::Header { .. } => {
139                // Ignore duplicate headers
140            }
141        }
142    }
143
144    let has_stats = stats.is_some();
145    let mut session = Session {
146        version,
147        session_id,
148        agent,
149        context,
150        events,
151        stats: stats.unwrap_or_default(),
152    };
153
154    // If no stats line was present, recompute
155    if !has_stats {
156        session.recompute_stats();
157    }
158
159    Ok(session)
160}
161
162/// Read a Session from a HAIL JSONL string
163pub fn from_jsonl_str(s: &str) -> Result<Session, JsonlError> {
164    read_jsonl(io::BufReader::new(s.as_bytes()))
165}
166
167/// Read only the header (first line) from HAIL JSONL — useful for listing sessions
168/// without loading all events
169pub fn read_header<R: BufRead>(
170    reader: R,
171) -> Result<(String, String, Agent, SessionContext), JsonlError> {
172    let mut lines = reader.lines();
173    let header_str = lines.next().ok_or(JsonlError::MissingHeader)??;
174    let header: HailLine =
175        serde_json::from_str(&header_str).map_err(|e| JsonlError::Json { line: 1, source: e })?;
176
177    match header {
178        HailLine::Header {
179            version,
180            session_id,
181            agent,
182            context,
183        } => Ok((version, session_id, agent, context)),
184        _ => Err(JsonlError::UnexpectedLineType(1)),
185    }
186}
187
188/// Read header + stats (first and last line) without loading events.
189/// Returns (version, session_id, agent, context, stats_or_none)
190pub fn read_header_and_stats(
191    data: &str,
192) -> Result<(String, String, Agent, SessionContext, Option<Stats>), JsonlError> {
193    let mut lines = data.lines();
194
195    // First line: header
196    let header_str = lines.next().ok_or(JsonlError::MissingHeader)?;
197    let header: HailLine =
198        serde_json::from_str(header_str).map_err(|e| JsonlError::Json { line: 1, source: e })?;
199
200    let (version, session_id, agent, context) = match header {
201        HailLine::Header {
202            version,
203            session_id,
204            agent,
205            context,
206        } => (version, session_id, agent, context),
207        _ => return Err(JsonlError::UnexpectedLineType(1)),
208    };
209
210    // Try to read last non-empty line for stats
211    let mut last_line = None;
212    let mut line_num = 1usize;
213    for line in lines {
214        line_num += 1;
215        if !line.is_empty() {
216            last_line = Some((line_num, line));
217        }
218    }
219
220    let stats = if let Some((_ln, last)) = last_line {
221        match serde_json::from_str::<HailLine>(last) {
222            Ok(HailLine::Stats(s)) => Some(s),
223            Ok(_) => None,
224            Err(_) => None, // Last line isn't stats, that's ok (will recompute)
225        }
226    } else {
227        None
228    };
229
230    Ok((version, session_id, agent, context, stats))
231}
232
233#[cfg(test)]
234mod tests {
235    use super::*;
236    use crate::trace::{Content, EventType};
237    use chrono::Utc;
238    use std::collections::HashMap;
239
240    fn make_test_session() -> Session {
241        let mut session = Session::new(
242            "test-jsonl-123".to_string(),
243            Agent {
244                provider: "anthropic".to_string(),
245                model: "claude-opus-4-6".to_string(),
246                tool: "claude-code".to_string(),
247                tool_version: Some("1.2.3".to_string()),
248            },
249        );
250        session.context.title = Some("Test JSONL session".to_string());
251
252        let ts = Utc::now();
253        session.events.push(Event {
254            event_id: "e1".to_string(),
255            timestamp: ts,
256            event_type: EventType::UserMessage,
257            task_id: None,
258            content: Content::text("Hello, can you help me?"),
259            duration_ms: None,
260            attributes: HashMap::new(),
261        });
262        session.events.push(Event {
263            event_id: "e2".to_string(),
264            timestamp: ts,
265            event_type: EventType::AgentMessage,
266            task_id: None,
267            content: Content::text("Sure! What do you need?"),
268            duration_ms: None,
269            attributes: HashMap::new(),
270        });
271        session.events.push(Event {
272            event_id: "e3".to_string(),
273            timestamp: ts,
274            event_type: EventType::FileRead {
275                path: "/tmp/test.rs".to_string(),
276            },
277            task_id: Some("t1".to_string()),
278            content: Content::code("fn main() {}", Some("rust".to_string())),
279            duration_ms: Some(50),
280            attributes: HashMap::new(),
281        });
282
283        session.recompute_stats();
284        session
285    }
286
287    #[test]
288    fn test_jsonl_roundtrip() {
289        let session = make_test_session();
290        let jsonl = to_jsonl_string(&session).unwrap();
291
292        // Should have exactly 5 lines (header + 3 events + stats)
293        let lines: Vec<&str> = jsonl.trim().lines().collect();
294        assert_eq!(lines.len(), 5);
295
296        // First line should be header
297        assert!(lines[0].contains("\"type\":\"header\""));
298        assert!(lines[0].contains("hail-1.0.0"));
299
300        // Middle lines should be events
301        assert!(lines[1].contains("\"type\":\"event\""));
302        assert!(lines[2].contains("\"type\":\"event\""));
303        assert!(lines[3].contains("\"type\":\"event\""));
304
305        // Last line should be stats
306        assert!(lines[4].contains("\"type\":\"stats\""));
307
308        // Roundtrip
309        let parsed = from_jsonl_str(&jsonl).unwrap();
310        assert_eq!(parsed.version, "hail-1.0.0");
311        assert_eq!(parsed.session_id, "test-jsonl-123");
312        assert_eq!(parsed.events.len(), 3);
313        assert_eq!(parsed.stats.message_count, 2);
314        assert_eq!(parsed.stats.tool_call_count, 1);
315        assert_eq!(parsed.stats.event_count, 3);
316        assert_eq!(parsed.agent.tool, "claude-code");
317        assert_eq!(parsed.context.title, Some("Test JSONL session".to_string()));
318    }
319
320    #[test]
321    fn test_jsonl_empty_session() {
322        let session = Session::new(
323            "empty-session".to_string(),
324            Agent {
325                provider: "openai".to_string(),
326                model: "gpt-4o".to_string(),
327                tool: "codex".to_string(),
328                tool_version: None,
329            },
330        );
331
332        let jsonl = to_jsonl_string(&session).unwrap();
333        let lines: Vec<&str> = jsonl.trim().lines().collect();
334        assert_eq!(lines.len(), 2); // header + stats only
335
336        let parsed = from_jsonl_str(&jsonl).unwrap();
337        assert_eq!(parsed.events.len(), 0);
338        assert_eq!(parsed.stats.event_count, 0);
339    }
340
341    #[test]
342    fn test_read_header_only() {
343        let session = make_test_session();
344        let jsonl = to_jsonl_string(&session).unwrap();
345
346        let (version, session_id, agent, context) =
347            read_header(io::BufReader::new(jsonl.as_bytes())).unwrap();
348
349        assert_eq!(version, "hail-1.0.0");
350        assert_eq!(session_id, "test-jsonl-123");
351        assert_eq!(agent.tool, "claude-code");
352        assert_eq!(context.title, Some("Test JSONL session".to_string()));
353    }
354
355    #[test]
356    fn test_read_header_and_stats() {
357        let session = make_test_session();
358        let jsonl = to_jsonl_string(&session).unwrap();
359
360        let (version, session_id, _agent, _context, stats) = read_header_and_stats(&jsonl).unwrap();
361
362        assert_eq!(version, "hail-1.0.0");
363        assert_eq!(session_id, "test-jsonl-123");
364        let stats = stats.unwrap();
365        assert_eq!(stats.event_count, 3);
366        assert_eq!(stats.message_count, 2);
367    }
368
369    #[test]
370    fn test_missing_stats_recomputes() {
371        // Manually construct JSONL without stats line
372        let session = make_test_session();
373        let jsonl = to_jsonl_string(&session).unwrap();
374
375        // Remove last line (stats)
376        let without_stats: String = jsonl.lines().take(4).collect::<Vec<_>>().join("\n") + "\n";
377
378        let parsed = from_jsonl_str(&without_stats).unwrap();
379        assert_eq!(parsed.stats.event_count, 3);
380        assert_eq!(parsed.stats.message_count, 2);
381    }
382
383    #[test]
384    fn test_hailline_serde_tag() {
385        let header = HailLine::Header {
386            version: "hail-1.0.0".to_string(),
387            session_id: "s1".to_string(),
388            agent: Agent {
389                provider: "test".to_string(),
390                model: "test".to_string(),
391                tool: "test".to_string(),
392                tool_version: None,
393            },
394            context: SessionContext::default(),
395        };
396
397        let json = serde_json::to_string(&header).unwrap();
398        assert!(json.contains("\"type\":\"header\""));
399
400        let parsed: HailLine = serde_json::from_str(&json).unwrap();
401        match parsed {
402            HailLine::Header { version, .. } => assert_eq!(version, "hail-1.0.0"),
403            _ => panic!("Expected Header"),
404        }
405    }
406
407    #[test]
408    fn test_jsonl_preserves_task_ids() {
409        let session = make_test_session();
410        let jsonl = to_jsonl_string(&session).unwrap();
411        let parsed = from_jsonl_str(&jsonl).unwrap();
412
413        // Event e3 has task_id "t1"
414        assert_eq!(parsed.events[2].task_id, Some("t1".to_string()));
415        // Events e1, e2 have no task_id
416        assert_eq!(parsed.events[0].task_id, None);
417    }
418}