Skip to main content

tj_core/session/
parser.rs

1//! Parser for Claude Code session JSONL files.
2//!
3//! Each line is a JSON object with `type` field determining its structure.
4//! We only care about `user` and `assistant` entries for backfill.
5
6use serde::Deserialize;
7
8/// Top-level entry in a Claude Code session JSONL file.
9/// Uses untagged enum because the `type` field values don't map cleanly to Rust enum variants.
10#[derive(Debug, Clone)]
11pub enum SessionEntry {
12    User(UserEntry),
13    Assistant(AssistantEntry),
14    Summary(SummaryEntry),
15    /// Entries we don't need for backfill (attachment, system, queue-operation, last-prompt).
16    Other,
17}
18
19#[derive(Debug, Clone, Deserialize)]
20#[serde(rename_all = "camelCase")]
21pub struct UserEntry {
22    pub uuid: String,
23    pub timestamp: String,
24    pub session_id: Option<String>,
25    pub message: Option<UserMessage>,
26    #[serde(default)]
27    pub cwd: Option<String>,
28}
29
30#[derive(Debug, Clone, Deserialize)]
31pub struct UserMessage {
32    pub content: serde_json::Value, // String or array of content blocks
33}
34
35#[derive(Debug, Clone, Deserialize)]
36#[serde(rename_all = "camelCase")]
37pub struct AssistantEntry {
38    pub uuid: String,
39    pub timestamp: String,
40    pub session_id: Option<String>,
41    pub message: Option<AssistantMessage>,
42}
43
44#[derive(Debug, Clone, Deserialize)]
45pub struct AssistantMessage {
46    pub content: Vec<ContentBlock>,
47    #[serde(default)]
48    pub model: Option<String>,
49    #[serde(default)]
50    pub stop_reason: Option<String>,
51}
52
53#[derive(Debug, Clone, Deserialize)]
54#[serde(tag = "type", rename_all = "snake_case")]
55pub enum ContentBlock {
56    Text {
57        text: String,
58    },
59    ToolUse {
60        name: String,
61        input: serde_json::Value,
62    },
63    ToolResult {
64        #[serde(default)]
65        content: serde_json::Value,
66    },
67    Thinking {
68        #[serde(default)]
69        thinking: Option<String>,
70    },
71    #[serde(other)]
72    Unknown,
73}
74
75#[derive(Debug, Clone, Deserialize)]
76pub struct SummaryEntry {
77    pub summary: String,
78    #[serde(default)]
79    pub timestamp: Option<String>,
80}
81
82/// A parsed session with all meaningful entries extracted.
83#[derive(Debug, Clone)]
84pub struct ParsedSession {
85    pub session_id: String,
86    pub file_path: String,
87    pub entries: Vec<SessionEntry>,
88    pub first_timestamp: Option<String>,
89    pub last_timestamp: Option<String>,
90}
91
92impl ParsedSession {
93    /// Extract the first user message text as a potential task title.
94    pub fn first_user_text(&self) -> Option<String> {
95        for entry in &self.entries {
96            if let SessionEntry::User(u) = entry {
97                let text = extract_user_text(u)?;
98                if !text.trim().is_empty() {
99                    return Some(text);
100                }
101            }
102        }
103        None
104    }
105
106    /// Extract session summary if present.
107    pub fn summary(&self) -> Option<&str> {
108        for entry in &self.entries {
109            if let SessionEntry::Summary(s) = entry {
110                return Some(&s.summary);
111            }
112        }
113        None
114    }
115
116    /// Count user messages.
117    pub fn user_message_count(&self) -> usize {
118        self.entries
119            .iter()
120            .filter(|e| matches!(e, SessionEntry::User(_)))
121            .count()
122    }
123
124    /// Count assistant messages.
125    pub fn assistant_message_count(&self) -> usize {
126        self.entries
127            .iter()
128            .filter(|e| matches!(e, SessionEntry::Assistant(_)))
129            .count()
130    }
131}
132
133/// Parse a Claude Code session JSONL file into structured entries.
134pub fn parse_session(path: &std::path::Path) -> anyhow::Result<ParsedSession> {
135    let file_name = path
136        .file_stem()
137        .and_then(|s| s.to_str())
138        .unwrap_or("unknown")
139        .to_string();
140
141    let file = std::fs::File::open(path)?;
142    let reader = std::io::BufReader::new(file);
143
144    let mut entries = Vec::new();
145    let mut first_ts: Option<String> = None;
146    let mut last_ts: Option<String> = None;
147
148    use std::io::BufRead;
149    for line in reader.lines() {
150        let line = line?;
151        if line.trim().is_empty() {
152            continue;
153        }
154        let raw: serde_json::Value = match serde_json::from_str(&line) {
155            Ok(v) => v,
156            Err(_) => continue, // skip malformed lines
157        };
158
159        let entry_type = raw.get("type").and_then(|v| v.as_str()).unwrap_or("");
160        let timestamp = raw
161            .get("timestamp")
162            .and_then(|v| v.as_str())
163            .map(String::from);
164
165        if let Some(ref ts) = timestamp {
166            if first_ts.is_none() {
167                first_ts = Some(ts.clone());
168            }
169            last_ts = Some(ts.clone());
170        }
171
172        let entry = match entry_type {
173            "user" => match serde_json::from_value::<UserEntry>(raw) {
174                Ok(u) => SessionEntry::User(u),
175                Err(_) => SessionEntry::Other,
176            },
177            "assistant" => match serde_json::from_value::<AssistantEntry>(raw) {
178                Ok(a) => SessionEntry::Assistant(a),
179                Err(_) => SessionEntry::Other,
180            },
181            "summary" => match serde_json::from_value::<SummaryEntry>(raw) {
182                Ok(s) => SessionEntry::Summary(s),
183                Err(_) => SessionEntry::Other,
184            },
185            _ => SessionEntry::Other,
186        };
187
188        // Only keep meaningful entries.
189        if !matches!(entry, SessionEntry::Other) {
190            entries.push(entry);
191        }
192    }
193
194    Ok(ParsedSession {
195        session_id: file_name,
196        file_path: path.to_string_lossy().into_owned(),
197        entries,
198        first_timestamp: first_ts,
199        last_timestamp: last_ts,
200    })
201}
202
203/// Extract text content from a user message.
204/// Content can be a plain string or an array of content blocks.
205pub fn extract_user_text(entry: &UserEntry) -> Option<String> {
206    let msg = entry.message.as_ref()?;
207    match &msg.content {
208        serde_json::Value::String(s) => Some(s.clone()),
209        serde_json::Value::Array(arr) => {
210            let texts: Vec<&str> = arr
211                .iter()
212                .filter_map(|block| {
213                    if block.get("type")?.as_str()? == "text" {
214                        block.get("text")?.as_str()
215                    } else {
216                        None
217                    }
218                })
219                .collect();
220            if texts.is_empty() {
221                None
222            } else {
223                Some(texts.join("\n"))
224            }
225        }
226        _ => None,
227    }
228}
229
230/// Extract all text blocks from an assistant message.
231pub fn extract_assistant_texts(entry: &AssistantEntry) -> Vec<String> {
232    let Some(msg) = &entry.message else {
233        return vec![];
234    };
235    msg.content
236        .iter()
237        .filter_map(|block| match block {
238            ContentBlock::Text { text } => Some(text.clone()),
239            _ => None,
240        })
241        .collect()
242}
243
244/// Extract tool_use calls from an assistant message.
245pub fn extract_tool_uses(entry: &AssistantEntry) -> Vec<(String, serde_json::Value)> {
246    let Some(msg) = &entry.message else {
247        return vec![];
248    };
249    msg.content
250        .iter()
251        .filter_map(|block| match block {
252            ContentBlock::ToolUse { name, input } => Some((name.clone(), input.clone())),
253            _ => None,
254        })
255        .collect()
256}
257
258#[cfg(test)]
259mod tests {
260    use super::*;
261
262    #[test]
263    fn parse_user_string_content() {
264        let json = r#"{"type":"user","uuid":"abc","timestamp":"2026-01-01T00:00:00Z","message":{"content":"hello world"}}"#;
265        let raw: serde_json::Value = serde_json::from_str(json).unwrap();
266        let entry: UserEntry = serde_json::from_value(raw).unwrap();
267        let text = extract_user_text(&entry).unwrap();
268        assert_eq!(text, "hello world");
269    }
270
271    #[test]
272    fn parse_user_array_content() {
273        let json = r#"{"type":"user","uuid":"abc","timestamp":"2026-01-01T00:00:00Z","message":{"content":[{"type":"text","text":"fix the bug"}]}}"#;
274        let raw: serde_json::Value = serde_json::from_str(json).unwrap();
275        let entry: UserEntry = serde_json::from_value(raw).unwrap();
276        let text = extract_user_text(&entry).unwrap();
277        assert_eq!(text, "fix the bug");
278    }
279
280    #[test]
281    fn parse_assistant_with_tool_use() {
282        let json = r#"{"type":"assistant","uuid":"def","timestamp":"2026-01-01T00:00:01Z","message":{"content":[{"type":"text","text":"Let me check"},{"type":"tool_use","name":"Read","input":{"file_path":"/tmp/x"}}]}}"#;
283        let raw: serde_json::Value = serde_json::from_str(json).unwrap();
284        let entry: AssistantEntry = serde_json::from_value(raw).unwrap();
285        let texts = extract_assistant_texts(&entry);
286        assert_eq!(texts, vec!["Let me check"]);
287        let tools = extract_tool_uses(&entry);
288        assert_eq!(tools.len(), 1);
289        assert_eq!(tools[0].0, "Read");
290    }
291
292    // --- parse_session() with tempfile ---
293
294    #[test]
295    fn parse_session_with_valid_jsonl() {
296        let dir = tempfile::tempdir().unwrap();
297        let path = dir.path().join("abc123.jsonl");
298        let lines = vec![
299            r#"{"type":"user","uuid":"u1","timestamp":"2026-01-01T00:00:00Z","message":{"content":"hello"}}"#,
300            r#"{"type":"assistant","uuid":"a1","timestamp":"2026-01-01T00:00:01Z","message":{"content":[{"type":"text","text":"hi there"}]}}"#,
301            r#"{"type":"summary","summary":"This session was about greeting.","timestamp":"2026-01-01T00:00:02Z"}"#,
302        ];
303        std::fs::write(&path, lines.join("\n")).unwrap();
304
305        let session = parse_session(&path).unwrap();
306        assert_eq!(session.session_id, "abc123");
307        assert_eq!(session.entries.len(), 3);
308        assert_eq!(session.first_timestamp.as_deref(), Some("2026-01-01T00:00:00Z"));
309        assert_eq!(session.last_timestamp.as_deref(), Some("2026-01-01T00:00:02Z"));
310    }
311
312    #[test]
313    fn parse_session_skips_empty_and_malformed_lines() {
314        let dir = tempfile::tempdir().unwrap();
315        let path = dir.path().join("sess.jsonl");
316        let lines = vec![
317            "",
318            "not-json-at-all",
319            r#"{"type":"user","uuid":"u1","timestamp":"2026-01-01T00:00:00Z","message":{"content":"valid"}}"#,
320            "   ",
321            r#"{"type":"unknown_type","data":"ignored"}"#,
322        ];
323        std::fs::write(&path, lines.join("\n")).unwrap();
324
325        let session = parse_session(&path).unwrap();
326        // Only the valid user entry should be kept; unknown types are SessionEntry::Other and filtered out.
327        assert_eq!(session.entries.len(), 1);
328    }
329
330    #[test]
331    fn parse_session_empty_file() {
332        let dir = tempfile::tempdir().unwrap();
333        let path = dir.path().join("empty.jsonl");
334        std::fs::write(&path, "").unwrap();
335
336        let session = parse_session(&path).unwrap();
337        assert!(session.entries.is_empty());
338        assert!(session.first_timestamp.is_none());
339        assert!(session.last_timestamp.is_none());
340    }
341
342    #[test]
343    fn parse_session_nonexistent_file() {
344        let result = parse_session(std::path::Path::new("/nonexistent/path.jsonl"));
345        assert!(result.is_err());
346    }
347
348    #[test]
349    fn parse_session_session_id_from_filename() {
350        let dir = tempfile::tempdir().unwrap();
351        let path = dir.path().join("my-session-id.jsonl");
352        std::fs::write(&path, "").unwrap();
353
354        let session = parse_session(&path).unwrap();
355        assert_eq!(session.session_id, "my-session-id");
356    }
357
358    // --- ParsedSession::first_user_text() edge cases ---
359
360    #[test]
361    fn first_user_text_returns_none_when_no_users() {
362        let session = ParsedSession {
363            session_id: "s1".into(),
364            file_path: "/tmp/s1.jsonl".into(),
365            entries: vec![
366                SessionEntry::Assistant(AssistantEntry {
367                    uuid: "a1".into(),
368                    timestamp: "2026-01-01T00:00:00Z".into(),
369                    session_id: None,
370                    message: Some(AssistantMessage {
371                        content: vec![ContentBlock::Text { text: "hello".into() }],
372                        model: None,
373                        stop_reason: None,
374                    }),
375                }),
376            ],
377            first_timestamp: None,
378            last_timestamp: None,
379        };
380        assert!(session.first_user_text().is_none());
381    }
382
383    #[test]
384    fn first_user_text_skips_empty_messages() {
385        let session = ParsedSession {
386            session_id: "s1".into(),
387            file_path: "/tmp/s1.jsonl".into(),
388            entries: vec![
389                SessionEntry::User(UserEntry {
390                    uuid: "u1".into(),
391                    timestamp: "2026-01-01T00:00:00Z".into(),
392                    session_id: None,
393                    message: Some(UserMessage {
394                        content: serde_json::json!("   "),
395                    }),
396                    cwd: None,
397                }),
398                SessionEntry::User(UserEntry {
399                    uuid: "u2".into(),
400                    timestamp: "2026-01-01T00:00:01Z".into(),
401                    session_id: None,
402                    message: Some(UserMessage {
403                        content: serde_json::json!("actual text"),
404                    }),
405                    cwd: None,
406                }),
407            ],
408            first_timestamp: None,
409            last_timestamp: None,
410        };
411        assert_eq!(session.first_user_text().unwrap(), "actual text");
412    }
413
414    #[test]
415    fn first_user_text_with_xml_tagged_content() {
416        // first_user_text does NOT strip XML tags — it returns raw text.
417        let session = ParsedSession {
418            session_id: "s1".into(),
419            file_path: "/tmp/s1.jsonl".into(),
420            entries: vec![
421                SessionEntry::User(UserEntry {
422                    uuid: "u1".into(),
423                    timestamp: "2026-01-01T00:00:00Z".into(),
424                    session_id: None,
425                    message: Some(UserMessage {
426                        content: serde_json::json!("<command-name>init</command-name> Setup project"),
427                    }),
428                    cwd: None,
429                }),
430            ],
431            first_timestamp: None,
432            last_timestamp: None,
433        };
434        let text = session.first_user_text().unwrap();
435        assert!(text.contains("<command-name>"));
436        assert!(text.contains("Setup project"));
437    }
438
439    #[test]
440    fn first_user_text_no_message() {
441        let session = ParsedSession {
442            session_id: "s1".into(),
443            file_path: "/tmp/s1.jsonl".into(),
444            entries: vec![
445                SessionEntry::User(UserEntry {
446                    uuid: "u1".into(),
447                    timestamp: "2026-01-01T00:00:00Z".into(),
448                    session_id: None,
449                    message: None,
450                    cwd: None,
451                }),
452            ],
453            first_timestamp: None,
454            last_timestamp: None,
455        };
456        assert!(session.first_user_text().is_none());
457    }
458
459    // --- ParsedSession::summary() ---
460
461    #[test]
462    fn summary_returns_none_when_no_summary_entry() {
463        let session = ParsedSession {
464            session_id: "s1".into(),
465            file_path: "/tmp/s1.jsonl".into(),
466            entries: vec![
467                SessionEntry::User(UserEntry {
468                    uuid: "u1".into(),
469                    timestamp: "2026-01-01T00:00:00Z".into(),
470                    session_id: None,
471                    message: None,
472                    cwd: None,
473                }),
474            ],
475            first_timestamp: None,
476            last_timestamp: None,
477        };
478        assert!(session.summary().is_none());
479    }
480
481    #[test]
482    fn summary_returns_first_summary_text() {
483        let session = ParsedSession {
484            session_id: "s1".into(),
485            file_path: "/tmp/s1.jsonl".into(),
486            entries: vec![
487                SessionEntry::Summary(SummaryEntry {
488                    summary: "Worked on tests".into(),
489                    timestamp: Some("2026-01-01T00:00:00Z".into()),
490                }),
491                SessionEntry::Summary(SummaryEntry {
492                    summary: "Second summary ignored".into(),
493                    timestamp: None,
494                }),
495            ],
496            first_timestamp: None,
497            last_timestamp: None,
498        };
499        assert_eq!(session.summary().unwrap(), "Worked on tests");
500    }
501
502    // --- user_message_count / assistant_message_count ---
503
504    #[test]
505    fn message_counts() {
506        let session = ParsedSession {
507            session_id: "s1".into(),
508            file_path: "/tmp/s1.jsonl".into(),
509            entries: vec![
510                SessionEntry::User(UserEntry {
511                    uuid: "u1".into(),
512                    timestamp: "t".into(),
513                    session_id: None,
514                    message: None,
515                    cwd: None,
516                }),
517                SessionEntry::User(UserEntry {
518                    uuid: "u2".into(),
519                    timestamp: "t".into(),
520                    session_id: None,
521                    message: None,
522                    cwd: None,
523                }),
524                SessionEntry::Assistant(AssistantEntry {
525                    uuid: "a1".into(),
526                    timestamp: "t".into(),
527                    session_id: None,
528                    message: None,
529                }),
530                SessionEntry::Summary(SummaryEntry {
531                    summary: "s".into(),
532                    timestamp: None,
533                }),
534            ],
535            first_timestamp: None,
536            last_timestamp: None,
537        };
538        assert_eq!(session.user_message_count(), 2);
539        assert_eq!(session.assistant_message_count(), 1);
540    }
541
542    #[test]
543    fn message_counts_empty_session() {
544        let session = ParsedSession {
545            session_id: "s1".into(),
546            file_path: "/tmp/s1.jsonl".into(),
547            entries: vec![],
548            first_timestamp: None,
549            last_timestamp: None,
550        };
551        assert_eq!(session.user_message_count(), 0);
552        assert_eq!(session.assistant_message_count(), 0);
553    }
554
555    // --- extract_user_text() edge cases ---
556
557    #[test]
558    fn extract_user_text_null_content() {
559        let entry = UserEntry {
560            uuid: "u1".into(),
561            timestamp: "t".into(),
562            session_id: None,
563            message: Some(UserMessage {
564                content: serde_json::Value::Null,
565            }),
566            cwd: None,
567        };
568        assert!(extract_user_text(&entry).is_none());
569    }
570
571    #[test]
572    fn extract_user_text_empty_array() {
573        let entry = UserEntry {
574            uuid: "u1".into(),
575            timestamp: "t".into(),
576            session_id: None,
577            message: Some(UserMessage {
578                content: serde_json::json!([]),
579            }),
580            cwd: None,
581        };
582        assert!(extract_user_text(&entry).is_none());
583    }
584
585    #[test]
586    fn extract_user_text_array_no_text_blocks() {
587        let entry = UserEntry {
588            uuid: "u1".into(),
589            timestamp: "t".into(),
590            session_id: None,
591            message: Some(UserMessage {
592                content: serde_json::json!([{"type": "image", "url": "http://example.com/img.png"}]),
593            }),
594            cwd: None,
595        };
596        assert!(extract_user_text(&entry).is_none());
597    }
598
599    #[test]
600    fn extract_user_text_multiple_text_blocks_joined() {
601        let entry = UserEntry {
602            uuid: "u1".into(),
603            timestamp: "t".into(),
604            session_id: None,
605            message: Some(UserMessage {
606                content: serde_json::json!([
607                    {"type": "text", "text": "first"},
608                    {"type": "text", "text": "second"}
609                ]),
610            }),
611            cwd: None,
612        };
613        assert_eq!(extract_user_text(&entry).unwrap(), "first\nsecond");
614    }
615
616    // --- extract_assistant_texts() edge cases ---
617
618    #[test]
619    fn extract_assistant_texts_no_message() {
620        let entry = AssistantEntry {
621            uuid: "a1".into(),
622            timestamp: "t".into(),
623            session_id: None,
624            message: None,
625        };
626        assert!(extract_assistant_texts(&entry).is_empty());
627    }
628
629    #[test]
630    fn extract_assistant_texts_filters_out_thinking_and_tool_result() {
631        let entry = AssistantEntry {
632            uuid: "a1".into(),
633            timestamp: "t".into(),
634            session_id: None,
635            message: Some(AssistantMessage {
636                content: vec![
637                    ContentBlock::Thinking { thinking: Some("internal thought".into()) },
638                    ContentBlock::Text { text: "visible text".into() },
639                    ContentBlock::ToolResult { content: serde_json::json!("result data") },
640                    ContentBlock::ToolUse { name: "Read".into(), input: serde_json::json!({}) },
641                    ContentBlock::Text { text: "more text".into() },
642                ],
643                model: None,
644                stop_reason: None,
645            }),
646        };
647        let texts = extract_assistant_texts(&entry);
648        assert_eq!(texts, vec!["visible text", "more text"]);
649    }
650
651    #[test]
652    fn extract_assistant_texts_empty_content() {
653        let entry = AssistantEntry {
654            uuid: "a1".into(),
655            timestamp: "t".into(),
656            session_id: None,
657            message: Some(AssistantMessage {
658                content: vec![],
659                model: None,
660                stop_reason: None,
661            }),
662        };
663        assert!(extract_assistant_texts(&entry).is_empty());
664    }
665
666    // --- extract_tool_uses() filtering ---
667
668    #[test]
669    fn extract_tool_uses_no_message() {
670        let entry = AssistantEntry {
671            uuid: "a1".into(),
672            timestamp: "t".into(),
673            session_id: None,
674            message: None,
675        };
676        assert!(extract_tool_uses(&entry).is_empty());
677    }
678
679    #[test]
680    fn extract_tool_uses_only_returns_tool_use_blocks() {
681        let entry = AssistantEntry {
682            uuid: "a1".into(),
683            timestamp: "t".into(),
684            session_id: None,
685            message: Some(AssistantMessage {
686                content: vec![
687                    ContentBlock::Text { text: "Let me help".into() },
688                    ContentBlock::ToolUse { name: "Write".into(), input: serde_json::json!({"file_path": "/tmp/a"}) },
689                    ContentBlock::Thinking { thinking: None },
690                    ContentBlock::ToolUse { name: "Bash".into(), input: serde_json::json!({"command": "ls"}) },
691                    ContentBlock::ToolResult { content: serde_json::json!(null) },
692                ],
693                model: None,
694                stop_reason: None,
695            }),
696        };
697        let tools = extract_tool_uses(&entry);
698        assert_eq!(tools.len(), 2);
699        assert_eq!(tools[0].0, "Write");
700        assert_eq!(tools[1].0, "Bash");
701    }
702
703    #[test]
704    fn extract_tool_uses_preserves_input() {
705        let entry = AssistantEntry {
706            uuid: "a1".into(),
707            timestamp: "t".into(),
708            session_id: None,
709            message: Some(AssistantMessage {
710                content: vec![
711                    ContentBlock::ToolUse {
712                        name: "Edit".into(),
713                        input: serde_json::json!({"file_path": "/src/main.rs", "old_string": "foo", "new_string": "bar"}),
714                    },
715                ],
716                model: None,
717                stop_reason: None,
718            }),
719        };
720        let tools = extract_tool_uses(&entry);
721        assert_eq!(tools[0].1["file_path"], "/src/main.rs");
722        assert_eq!(tools[0].1["old_string"], "foo");
723    }
724}