1use std::collections::HashMap;
7
8use chrono::Utc;
9use serde::Deserialize;
10
11use crate::model::{Event, EventBuilder, EventType};
12use crate::util::truncate_string;
13
14#[derive(Debug, Clone)]
16pub struct ToolInfo {
17 pub name: String,
19 pub input: serde_json::Value,
21}
22
23pub type ToolInfoMap = HashMap<String, ToolInfo>;
25
26#[derive(Debug, Clone, Deserialize)]
28#[serde(rename_all = "camelCase")]
29pub struct TranscriptEntry {
30 pub uuid: String,
32
33 #[serde(default)]
35 pub parent_uuid: Option<String>,
36
37 pub session_id: String,
39
40 pub timestamp: String,
42
43 #[serde(rename = "type")]
45 pub entry_type: String,
46
47 #[serde(default)]
50 pub is_sidechain: bool,
51
52 #[serde(default)]
54 pub message: Option<TranscriptMessage>,
55
56 #[serde(default)]
58 pub cwd: Option<String>,
59
60 #[serde(default)]
62 pub git_branch: Option<String>,
63
64 #[serde(default)]
66 pub version: Option<String>,
67
68 #[serde(default)]
70 pub slug: Option<String>,
71
72 #[serde(default)]
74 pub tool_use_result: Option<serde_json::Value>,
75}
76
77#[derive(Debug, Clone, Deserialize)]
79pub struct TranscriptMessage {
80 pub role: String,
82
83 #[serde(default)]
85 pub model: Option<String>,
86
87 #[serde(default)]
89 pub id: Option<String>,
90
91 #[serde(default)]
93 pub usage: Option<TokenUsage>,
94
95 #[serde(default, deserialize_with = "deserialize_message_content")]
101 pub content: Option<MessageContent>,
102}
103
104#[derive(Debug, Clone, PartialEq)]
110pub enum MessageContent {
111 Text(String),
113 Blocks(Vec<ContentBlock>),
115}
116
117impl MessageContent {
118 pub fn text(&self) -> Option<&str> {
122 match self {
123 MessageContent::Text(s) => Some(s.as_str()),
124 MessageContent::Blocks(blocks) => blocks.iter().find_map(|b| {
125 if let ContentBlock::Text { text } = b {
126 Some(text.as_str())
127 } else {
128 None
129 }
130 }),
131 }
132 }
133
134 pub fn blocks(&self) -> Option<&[ContentBlock]> {
136 match self {
137 MessageContent::Text(_) => None,
138 MessageContent::Blocks(blocks) => Some(blocks),
139 }
140 }
141}
142
143fn deserialize_message_content<'de, D>(deserializer: D) -> Result<Option<MessageContent>, D::Error>
145where
146 D: serde::Deserializer<'de>,
147{
148 use serde::de::{self, Visitor};
149
150 struct MessageContentVisitor;
151
152 impl<'de> Visitor<'de> for MessageContentVisitor {
153 type Value = Option<MessageContent>;
154
155 fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
156 formatter.write_str("a string, an array of content blocks, or null")
157 }
158
159 fn visit_none<E>(self) -> Result<Self::Value, E>
160 where
161 E: de::Error,
162 {
163 Ok(None)
164 }
165
166 fn visit_unit<E>(self) -> Result<Self::Value, E>
167 where
168 E: de::Error,
169 {
170 Ok(None)
171 }
172
173 fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
174 where
175 E: de::Error,
176 {
177 Ok(Some(MessageContent::Text(value.to_string())))
178 }
179
180 fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
181 where
182 E: de::Error,
183 {
184 Ok(Some(MessageContent::Text(value)))
185 }
186
187 fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
188 where
189 A: de::SeqAccess<'de>,
190 {
191 let blocks: Vec<ContentBlock> =
192 Deserialize::deserialize(de::value::SeqAccessDeserializer::new(seq))?;
193 Ok(Some(MessageContent::Blocks(blocks)))
194 }
195 }
196
197 deserializer.deserialize_any(MessageContentVisitor)
198}
199
200#[derive(Debug, Clone, Deserialize)]
202pub struct TokenUsage {
203 pub input_tokens: Option<i64>,
204 pub output_tokens: Option<i64>,
205
206 #[serde(default)]
207 pub cache_read_input_tokens: Option<i64>,
208
209 #[serde(default)]
210 pub cache_creation_input_tokens: Option<i64>,
211
212 #[serde(default)]
214 pub cache_creation: Option<CacheCreationDetails>,
215}
216
217#[derive(Debug, Clone, Deserialize)]
219pub struct CacheCreationDetails {
220 #[serde(default)]
221 pub ephemeral_5m_input_tokens: Option<i64>,
222
223 #[serde(default)]
224 pub ephemeral_1h_input_tokens: Option<i64>,
225}
226
227#[derive(Debug, Clone, PartialEq, Deserialize)]
229#[serde(tag = "type", rename_all = "snake_case")]
230pub enum ContentBlock {
231 Text { text: String },
233
234 ToolUse {
236 id: String,
237 name: String,
238 #[serde(default)]
239 input: serde_json::Value,
240 },
241
242 ToolResult {
244 tool_use_id: String,
245 #[serde(default)]
246 content: serde_json::Value,
247 },
248
249 Thinking {
251 #[serde(default)]
252 thinking: String,
253 },
254}
255
256impl TranscriptEntry {
257 pub fn into_events(self, machine_id: &str, tool_info_map: &mut ToolInfoMap) -> Vec<Event> {
268 let mut events = Vec::new();
269
270 let timestamp = chrono::DateTime::parse_from_rfc3339(&self.timestamp)
272 .ok()
273 .map(|dt| dt.with_timezone(&Utc));
274
275 let base_builder = || {
276 let mut b = EventBuilder::new(machine_id, EventType::ApiRequest, &self.session_id)
277 .source("transcript")
278 .framework("claude")
279 .is_sidechain(self.is_sidechain);
280
281 if let Some(ts) = timestamp {
282 b = b.timestamp(ts);
283 }
284 if let Some(ref cwd) = self.cwd {
285 b = b.cwd(cwd.clone());
286 }
287 if let Some(ref branch) = self.git_branch {
288 b = b.git_branch(branch.clone());
289 }
290
291 let mut metadata = serde_json::Map::new();
293 metadata.insert("uuid".into(), serde_json::json!(&self.uuid));
294 if let Some(ref parent) = self.parent_uuid {
295 metadata.insert("parent_uuid".into(), serde_json::json!(parent));
296 }
297 b = b.metadata(serde_json::Value::Object(metadata).to_string());
298
299 b
300 };
301
302 match self.entry_type.as_str() {
303 "assistant" => {
304 if let Some(ref message) = self.message {
305 if let Some(ref usage) = message.usage
307 && (usage.input_tokens.is_some() || usage.output_tokens.is_some())
308 {
309 let mut builder = base_builder();
310
311 let input = usage.input_tokens.unwrap_or(0);
313 let output = usage.output_tokens.unwrap_or(0);
314 builder = builder.tokens(input, output);
315
316 if let Some(cache_read) = usage.cache_read_input_tokens {
318 builder = builder.tokens_cache_read_opt(Some(cache_read));
319 }
320 if let Some(cache_write) = usage.cache_creation_input_tokens {
321 builder = builder.tokens_cache_write_opt(Some(cache_write));
322 }
323
324 if let Some(ref model) = message.model {
326 builder = builder.model(model.clone());
327 }
328
329 events.push(builder.build());
330 }
331
332 if let Some(ref content) = message.content
334 && let Some(blocks) = content.blocks()
335 {
336 for block in blocks {
337 if let ContentBlock::ToolUse { id, name, input } = block {
338 tool_info_map.insert(
340 id.clone(),
341 ToolInfo {
342 name: name.clone(),
343 input: input.clone(),
344 },
345 );
346
347 let builder = EventBuilder::new(
348 machine_id,
349 EventType::PreToolUse,
350 &self.session_id,
351 )
352 .source("transcript")
353 .framework("claude")
354 .is_sidechain(self.is_sidechain)
355 .tool(id.clone(), name.clone())
356 .timestamp_opt(timestamp)
357 .cwd_opt(self.cwd.clone())
358 .git_branch_opt(self.git_branch.clone());
359
360 events.push(builder.build());
361 }
362 }
363 }
364 }
365 }
366
367 "user" => {
368 let message = self.message.as_ref();
370
371 let prompt_text = message
375 .and_then(|m| m.content.as_ref())
376 .and_then(|c| c.text())
377 .map(|s| s.to_string());
378
379 if prompt_text.is_some() {
382 let mut builder = EventBuilder::new(
383 machine_id,
384 EventType::UserPromptSubmit,
385 &self.session_id,
386 )
387 .source("transcript")
388 .framework("claude")
389 .is_sidechain(self.is_sidechain)
390 .timestamp_opt(timestamp);
391
392 if let Some(ref cwd) = self.cwd {
393 builder = builder.cwd(cwd.clone());
394 }
395 if let Some(ref branch) = self.git_branch {
396 builder = builder.git_branch(branch.clone());
397 }
398
399 if let Some(text) = prompt_text {
401 let truncated = truncate_string(&text, 1000);
402 builder =
403 builder.payload(serde_json::json!({"prompt": truncated}).to_string());
404 }
405
406 events.push(builder.build());
407 }
408
409 if let Some(ref message) = self.message
411 && let Some(ref content) = message.content
412 && let Some(blocks) = content.blocks()
413 {
414 for block in blocks {
415 if let ContentBlock::ToolResult {
416 tool_use_id,
417 content: result_content,
418 } = block
419 {
420 let mut builder = EventBuilder::new(
421 machine_id,
422 EventType::PostToolUse,
423 &self.session_id,
424 )
425 .source("transcript")
426 .framework("claude")
427 .is_sidechain(self.is_sidechain)
428 .tool_use_id(tool_use_id.clone())
429 .timestamp_opt(timestamp)
430 .cwd_opt(self.cwd.clone())
431 .git_branch_opt(self.git_branch.clone());
432
433 if let Some(tool_info) = tool_info_map.get(tool_use_id) {
435 builder = builder.tool_name(tool_info.name.clone());
438
439 if tool_info.name == "Bash" {
441 if let Some(command) = tool_info.input.get("command") {
443 builder = builder.payload(
444 serde_json::json!({ "command": command }).to_string(),
445 );
446 }
447
448 builder = builder.metadata(
450 serde_json::json!({ "output": result_content }).to_string(),
451 );
452 }
453 }
454
455 events.push(builder.build());
456 }
457 }
458 }
459 }
460
461 _ => {
462 }
464 }
465
466 events
467 }
468
469 pub fn dedup_key(&self) -> &str {
471 &self.uuid
472 }
473
474 pub fn should_skip(&self) -> bool {
476 self.entry_type != "user" && self.entry_type != "assistant"
478 }
479}
480
481#[cfg(test)]
482mod tests {
483 use super::*;
484
485 #[test]
486 fn test_parse_assistant_with_usage() {
487 let json = r#"{
488 "sessionId": "test-session",
489 "uuid": "entry-123",
490 "parentUuid": "entry-122",
491 "isSidechain": false,
492 "timestamp": "2025-12-25T21:30:31.390Z",
493 "type": "assistant",
494 "cwd": "/projects/test",
495 "message": {
496 "role": "assistant",
497 "model": "claude-opus-4-5-20251101",
498 "usage": {
499 "input_tokens": 1000,
500 "output_tokens": 500,
501 "cache_read_input_tokens": 200,
502 "cache_creation_input_tokens": 100
503 }
504 }
505 }"#;
506
507 let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
508 assert_eq!(entry.session_id, "test-session");
509 assert_eq!(entry.uuid, "entry-123");
510 assert!(!entry.is_sidechain);
511
512 let mut tool_info_map = ToolInfoMap::default();
513 let events = entry.into_events("machine-1", &mut tool_info_map);
514 assert_eq!(events.len(), 1);
515
516 let event = &events[0];
517 assert_eq!(event.event_type, EventType::ApiRequest);
518 assert_eq!(event.session_id, "test-session");
519 assert_eq!(event.source, Some("transcript".to_string()));
520 assert_eq!(event.is_sidechain, Some(false));
521 assert_eq!(event.tokens_input, Some(1000));
522 assert_eq!(event.tokens_output, Some(500));
523 assert_eq!(event.tokens_cache_read, Some(200));
524 assert_eq!(event.tokens_cache_write, Some(100));
525 assert_eq!(event.model, Some("claude-opus-4-5-20251101".to_string()));
526 }
527
528 #[test]
529 fn test_parse_sidechain_entry() {
530 let json = r#"{
531 "sessionId": "test-session",
532 "uuid": "entry-456",
533 "isSidechain": true,
534 "timestamp": "2025-12-25T21:30:31.390Z",
535 "type": "assistant",
536 "message": {
537 "role": "assistant",
538 "usage": {
539 "input_tokens": 100,
540 "output_tokens": 50
541 }
542 }
543 }"#;
544
545 let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
546 assert!(entry.is_sidechain);
547
548 let mut tool_info_map = ToolInfoMap::default();
549 let events = entry.into_events("machine-1", &mut tool_info_map);
550 assert_eq!(events[0].is_sidechain, Some(true));
551 }
552
553 #[test]
554 fn test_parse_user_message() {
555 let json = r#"{
556 "sessionId": "test-session",
557 "uuid": "entry-789",
558 "isSidechain": false,
559 "timestamp": "2025-12-25T21:30:31.390Z",
560 "type": "user",
561 "message": {
562 "role": "user",
563 "content": [
564 {"type": "text", "text": "Hello, Claude!"}
565 ]
566 }
567 }"#;
568
569 let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
570 let mut tool_info_map = ToolInfoMap::default();
571 let events = entry.into_events("machine-1", &mut tool_info_map);
572
573 assert_eq!(events.len(), 1);
574 assert_eq!(events[0].event_type, EventType::UserPromptSubmit);
575 }
576
577 #[test]
578 fn test_parse_user_message_string_content() {
579 let json = r#"{
583 "sessionId": "test-session",
584 "uuid": "entry-string",
585 "isSidechain": false,
586 "timestamp": "2025-12-25T21:30:31.390Z",
587 "type": "user",
588 "message": {
589 "role": "user",
590 "content": "ok"
591 }
592 }"#;
593
594 let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
595
596 let message = entry.message.as_ref().unwrap();
598 let content = message.content.as_ref().unwrap();
599 assert!(matches!(content, MessageContent::Text(_)));
600 assert_eq!(content.text(), Some("ok"));
601 assert!(content.blocks().is_none());
602
603 let mut tool_info_map = ToolInfoMap::default();
605 let events = entry.into_events("machine-1", &mut tool_info_map);
606
607 assert_eq!(events.len(), 1);
608 assert_eq!(events[0].event_type, EventType::UserPromptSubmit);
609
610 let payload: serde_json::Value =
612 serde_json::from_str(events[0].payload.as_ref().unwrap()).unwrap();
613 assert_eq!(payload["prompt"], "ok");
614 }
615
616 #[test]
617 fn test_message_content_blocks_variant() {
618 let json = r#"{
620 "sessionId": "test-session",
621 "uuid": "entry-blocks",
622 "isSidechain": false,
623 "timestamp": "2025-12-25T21:30:31.390Z",
624 "type": "user",
625 "message": {
626 "role": "user",
627 "content": [
628 {"type": "text", "text": "Hello from blocks!"}
629 ]
630 }
631 }"#;
632
633 let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
634 let message = entry.message.as_ref().unwrap();
635 let content = message.content.as_ref().unwrap();
636
637 assert!(matches!(content, MessageContent::Blocks(_)));
639 assert_eq!(content.text(), Some("Hello from blocks!"));
640 assert!(content.blocks().is_some());
641 assert_eq!(content.blocks().unwrap().len(), 1);
642 }
643
644 #[test]
645 fn test_parse_tool_use() {
646 let json = r#"{
647 "sessionId": "test-session",
648 "uuid": "entry-tool",
649 "isSidechain": false,
650 "timestamp": "2025-12-25T21:30:31.390Z",
651 "type": "assistant",
652 "message": {
653 "role": "assistant",
654 "usage": {
655 "input_tokens": 100,
656 "output_tokens": 50
657 },
658 "content": [
659 {"type": "tool_use", "id": "toolu_123", "name": "Bash", "input": {"command": "ls"}}
660 ]
661 }
662 }"#;
663
664 let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
665 let mut tool_info_map = ToolInfoMap::default();
666 let events = entry.into_events("machine-1", &mut tool_info_map);
667
668 assert_eq!(events.len(), 2);
670 assert_eq!(events[0].event_type, EventType::ApiRequest);
671 assert_eq!(events[1].event_type, EventType::PreToolUse);
672 assert_eq!(events[1].tool_use_id, Some("toolu_123".to_string()));
673 assert_eq!(events[1].tool_name, Some("Bash".to_string()));
674
675 assert!(tool_info_map.contains_key("toolu_123"));
677 assert_eq!(tool_info_map["toolu_123"].name, "Bash");
678 }
679
680 #[test]
681 fn test_parse_tool_result_with_enrichment() {
682 let tool_use_json = r#"{
684 "sessionId": "test-session",
685 "uuid": "entry-tool",
686 "isSidechain": false,
687 "timestamp": "2025-12-25T21:30:31.390Z",
688 "type": "assistant",
689 "message": {
690 "role": "assistant",
691 "usage": {
692 "input_tokens": 100,
693 "output_tokens": 50
694 },
695 "content": [
696 {"type": "tool_use", "id": "toolu_123", "name": "Bash", "input": {"command": "ls -la"}}
697 ]
698 }
699 }"#;
700
701 let tool_use_entry: TranscriptEntry = serde_json::from_str(tool_use_json).unwrap();
702 let mut tool_info_map = ToolInfoMap::default();
703 let _ = tool_use_entry.into_events("machine-1", &mut tool_info_map);
704
705 let tool_result_json = r#"{
707 "sessionId": "test-session",
708 "uuid": "entry-result",
709 "isSidechain": false,
710 "timestamp": "2025-12-25T21:30:31.390Z",
711 "type": "user",
712 "message": {
713 "role": "user",
714 "content": [
715 {"type": "tool_result", "tool_use_id": "toolu_123", "content": "file1.txt\nfile2.txt"}
716 ]
717 }
718 }"#;
719
720 let result_entry: TranscriptEntry = serde_json::from_str(tool_result_json).unwrap();
721 let events = result_entry.into_events("machine-1", &mut tool_info_map);
722
723 assert_eq!(events.len(), 1);
725 let event = &events[0];
726 assert_eq!(event.event_type, EventType::PostToolUse);
727 assert_eq!(event.tool_use_id, Some("toolu_123".to_string()));
728
729 assert_eq!(event.tool_name, Some("Bash".to_string()));
731
732 assert!(event.payload.is_some());
734 let payload: serde_json::Value =
735 serde_json::from_str(event.payload.as_ref().unwrap()).unwrap();
736 assert_eq!(payload["command"], "ls -la");
737
738 assert!(event.metadata.is_some());
740 let metadata: serde_json::Value =
741 serde_json::from_str(event.metadata.as_ref().unwrap()).unwrap();
742 assert_eq!(metadata["output"], "file1.txt\nfile2.txt");
743 }
744
745 #[test]
746 fn test_parse_non_bash_tool_no_payload() {
747 let tool_use_json = r#"{
749 "sessionId": "test-session",
750 "uuid": "entry-tool",
751 "isSidechain": false,
752 "timestamp": "2025-12-25T21:30:31.390Z",
753 "type": "assistant",
754 "message": {
755 "role": "assistant",
756 "usage": { "input_tokens": 100, "output_tokens": 50 },
757 "content": [
758 {"type": "tool_use", "id": "toolu_read", "name": "Read", "input": {"file_path": "/test.txt"}}
759 ]
760 }
761 }"#;
762
763 let tool_use_entry: TranscriptEntry = serde_json::from_str(tool_use_json).unwrap();
764 let mut tool_info_map = ToolInfoMap::default();
765 let _ = tool_use_entry.into_events("machine-1", &mut tool_info_map);
766
767 let tool_result_json = r#"{
768 "sessionId": "test-session",
769 "uuid": "entry-result",
770 "isSidechain": false,
771 "timestamp": "2025-12-25T21:30:31.390Z",
772 "type": "user",
773 "message": {
774 "role": "user",
775 "content": [
776 {"type": "tool_result", "tool_use_id": "toolu_read", "content": "file contents here"}
777 ]
778 }
779 }"#;
780
781 let result_entry: TranscriptEntry = serde_json::from_str(tool_result_json).unwrap();
782 let events = result_entry.into_events("machine-1", &mut tool_info_map);
783
784 assert_eq!(events.len(), 1);
785 let event = &events[0];
786
787 assert_eq!(event.tool_name, Some("Read".to_string()));
789
790 assert!(event.payload.is_none());
792 assert!(event.metadata.is_none());
793 }
794
795 #[test]
796 fn test_skip_non_message_entries() {
797 let json = r#"{
798 "sessionId": "test-session",
799 "uuid": "snapshot-1",
800 "isSidechain": false,
801 "timestamp": "2025-12-25T21:30:31.390Z",
802 "type": "file-history-snapshot"
803 }"#;
804
805 let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
806 assert!(entry.should_skip());
807
808 let mut tool_info_map = ToolInfoMap::default();
809 let events = entry.into_events("machine-1", &mut tool_info_map);
810 assert!(events.is_empty());
811 }
812
813 #[test]
814 fn test_user_message_with_multibyte_utf8_truncation() {
815 let long_prompt = format!("{}{}", "a".repeat(999), "é"); let json = format!(
820 r#"{{
821 "sessionId": "test-session",
822 "uuid": "entry-utf8",
823 "isSidechain": false,
824 "timestamp": "2025-12-25T21:30:31.390Z",
825 "type": "user",
826 "message": {{
827 "role": "user",
828 "content": [
829 {{"type": "text", "text": "{}"}}
830 ]
831 }}
832 }}"#,
833 long_prompt
834 );
835
836 let entry: TranscriptEntry = serde_json::from_str(&json).unwrap();
837 let mut tool_info_map = ToolInfoMap::default();
839 let events = entry.into_events("machine-1", &mut tool_info_map);
840
841 assert_eq!(events.len(), 1);
842 assert_eq!(events[0].event_type, EventType::UserPromptSubmit);
843
844 let payload: serde_json::Value =
846 serde_json::from_str(events[0].payload.as_ref().unwrap()).unwrap();
847 let prompt = payload["prompt"].as_str().unwrap();
848 assert!(prompt.ends_with("..."));
850 assert_eq!(prompt.len(), 1002); }
852}