1use std::collections::HashMap;
7
8use chrono::Utc;
9use serde::Deserialize;
10
11use crate::model::{Event, EventBuilder, EventType};
12use crate::util::truncate_string;
13
14#[derive(Debug, Clone)]
16pub struct ToolInfo {
17 pub name: String,
19 pub input: serde_json::Value,
21}
22
23pub type ToolInfoMap = HashMap<String, ToolInfo>;
25
26#[derive(Debug, Clone, Deserialize)]
28#[serde(rename_all = "camelCase")]
29pub struct TranscriptEntry {
30 pub uuid: String,
32
33 #[serde(default)]
35 pub parent_uuid: Option<String>,
36
37 pub session_id: String,
39
40 pub timestamp: String,
42
43 #[serde(rename = "type")]
45 pub entry_type: String,
46
47 #[serde(default)]
50 pub is_sidechain: bool,
51
52 #[serde(default)]
54 pub message: Option<TranscriptMessage>,
55
56 #[serde(default)]
58 pub cwd: Option<String>,
59
60 #[serde(default)]
62 pub git_branch: Option<String>,
63
64 #[serde(default)]
66 pub version: Option<String>,
67
68 #[serde(default)]
70 pub slug: Option<String>,
71
72 #[serde(default)]
74 pub tool_use_result: Option<serde_json::Value>,
75}
76
77#[derive(Debug, Clone, Deserialize)]
79pub struct TranscriptMessage {
80 pub role: String,
82
83 #[serde(default)]
85 pub model: Option<String>,
86
87 #[serde(default)]
89 pub id: Option<String>,
90
91 #[serde(default)]
93 pub usage: Option<TokenUsage>,
94
95 #[serde(default, deserialize_with = "deserialize_message_content")]
101 pub content: Option<MessageContent>,
102}
103
104#[derive(Debug, Clone, PartialEq)]
110pub enum MessageContent {
111 Text(String),
113 Blocks(Vec<ContentBlock>),
115}
116
117impl MessageContent {
118 pub fn text(&self) -> Option<&str> {
122 match self {
123 MessageContent::Text(s) => Some(s.as_str()),
124 MessageContent::Blocks(blocks) => blocks.iter().find_map(|b| {
125 if let ContentBlock::Text { text } = b {
126 Some(text.as_str())
127 } else {
128 None
129 }
130 }),
131 }
132 }
133
134 pub fn blocks(&self) -> Option<&[ContentBlock]> {
136 match self {
137 MessageContent::Text(_) => None,
138 MessageContent::Blocks(blocks) => Some(blocks),
139 }
140 }
141}
142
143fn deserialize_message_content<'de, D>(deserializer: D) -> Result<Option<MessageContent>, D::Error>
145where
146 D: serde::Deserializer<'de>,
147{
148 use serde::de::{self, Visitor};
149
150 struct MessageContentVisitor;
151
152 impl<'de> Visitor<'de> for MessageContentVisitor {
153 type Value = Option<MessageContent>;
154
155 fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
156 formatter.write_str("a string, an array of content blocks, or null")
157 }
158
159 fn visit_none<E>(self) -> Result<Self::Value, E>
160 where
161 E: de::Error,
162 {
163 Ok(None)
164 }
165
166 fn visit_unit<E>(self) -> Result<Self::Value, E>
167 where
168 E: de::Error,
169 {
170 Ok(None)
171 }
172
173 fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
174 where
175 E: de::Error,
176 {
177 Ok(Some(MessageContent::Text(value.to_string())))
178 }
179
180 fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
181 where
182 E: de::Error,
183 {
184 Ok(Some(MessageContent::Text(value)))
185 }
186
187 fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
188 where
189 A: de::SeqAccess<'de>,
190 {
191 let blocks: Vec<ContentBlock> =
192 Deserialize::deserialize(de::value::SeqAccessDeserializer::new(seq))?;
193 Ok(Some(MessageContent::Blocks(blocks)))
194 }
195 }
196
197 deserializer.deserialize_any(MessageContentVisitor)
198}
199
200#[derive(Debug, Clone, Deserialize)]
202pub struct TokenUsage {
203 pub input_tokens: Option<i64>,
204 pub output_tokens: Option<i64>,
205
206 #[serde(default)]
207 pub cache_read_input_tokens: Option<i64>,
208
209 #[serde(default)]
210 pub cache_creation_input_tokens: Option<i64>,
211
212 #[serde(default)]
214 pub cache_creation: Option<CacheCreationDetails>,
215}
216
217#[derive(Debug, Clone, Deserialize)]
219pub struct CacheCreationDetails {
220 #[serde(default)]
221 pub ephemeral_5m_input_tokens: Option<i64>,
222
223 #[serde(default)]
224 pub ephemeral_1h_input_tokens: Option<i64>,
225}
226
227#[derive(Debug, Clone, PartialEq, Deserialize)]
229#[serde(tag = "type", rename_all = "snake_case")]
230pub enum ContentBlock {
231 Text { text: String },
233
234 ToolUse {
236 id: String,
237 name: String,
238 #[serde(default)]
239 input: serde_json::Value,
240 },
241
242 ToolResult {
244 tool_use_id: String,
245 #[serde(default)]
246 content: serde_json::Value,
247 },
248
249 Thinking {
251 #[serde(default)]
252 thinking: String,
253 },
254}
255
256impl TranscriptEntry {
257 pub fn into_events(self, machine_id: &str, tool_info_map: &mut ToolInfoMap) -> Vec<Event> {
268 let mut events = Vec::new();
269
270 let timestamp = chrono::DateTime::parse_from_rfc3339(&self.timestamp)
272 .ok()
273 .map(|dt| dt.with_timezone(&Utc));
274
275 let base_builder = || {
276 let mut b = EventBuilder::new(machine_id, EventType::ApiRequest, &self.session_id)
277 .source("transcript")
278 .framework("claude")
279 .is_sidechain(self.is_sidechain);
280
281 if let Some(ts) = timestamp {
282 b = b.timestamp(ts);
283 }
284 if let Some(ref cwd) = self.cwd {
285 b = b.cwd(cwd.clone());
286 }
287 if let Some(ref branch) = self.git_branch {
288 b = b.git_branch(branch.clone());
289 }
290
291 let mut metadata = serde_json::Map::new();
293 metadata.insert("uuid".into(), serde_json::json!(&self.uuid));
294 if let Some(ref parent) = self.parent_uuid {
295 metadata.insert("parent_uuid".into(), serde_json::json!(parent));
296 }
297 b = b.metadata(serde_json::Value::Object(metadata).to_string());
298
299 b
300 };
301
302 match self.entry_type.as_str() {
303 "assistant" => {
304 if let Some(ref message) = self.message {
305 if let Some(ref usage) = message.usage
307 && (usage.input_tokens.is_some() || usage.output_tokens.is_some())
308 {
309 let mut builder = base_builder();
310
311 let input = usage.input_tokens.unwrap_or(0);
313 let output = usage.output_tokens.unwrap_or(0);
314 builder = builder.tokens(input, output);
315
316 if let Some(cache_read) = usage.cache_read_input_tokens {
318 builder = builder.tokens_cache_read_opt(Some(cache_read));
319 }
320 if let Some(cache_write) = usage.cache_creation_input_tokens {
321 builder = builder.tokens_cache_write_opt(Some(cache_write));
322 }
323
324 if let Some(ref model) = message.model
326 && model != "<synthetic>"
327 {
328 builder = builder.model(model.clone());
329 }
330
331 events.push(builder.build());
332 }
333
334 if let Some(ref content) = message.content
336 && let Some(blocks) = content.blocks()
337 {
338 for block in blocks {
339 if let ContentBlock::ToolUse { id, name, input } = block {
340 tool_info_map.insert(
342 id.clone(),
343 ToolInfo {
344 name: name.clone(),
345 input: input.clone(),
346 },
347 );
348
349 let builder = EventBuilder::new(
350 machine_id,
351 EventType::PreToolUse,
352 &self.session_id,
353 )
354 .source("transcript")
355 .framework("claude")
356 .is_sidechain(self.is_sidechain)
357 .tool(id.clone(), name.clone())
358 .timestamp_opt(timestamp)
359 .cwd_opt(self.cwd.clone())
360 .git_branch_opt(self.git_branch.clone());
361
362 events.push(builder.build());
363 }
364 }
365 }
366 }
367 }
368
369 "user" => {
370 let message = self.message.as_ref();
372
373 let prompt_text = message
377 .and_then(|m| m.content.as_ref())
378 .and_then(|c| c.text())
379 .map(|s| s.to_string());
380
381 if prompt_text.is_some() {
384 let mut builder = EventBuilder::new(
385 machine_id,
386 EventType::UserPromptSubmit,
387 &self.session_id,
388 )
389 .source("transcript")
390 .framework("claude")
391 .is_sidechain(self.is_sidechain)
392 .timestamp_opt(timestamp);
393
394 if let Some(ref cwd) = self.cwd {
395 builder = builder.cwd(cwd.clone());
396 }
397 if let Some(ref branch) = self.git_branch {
398 builder = builder.git_branch(branch.clone());
399 }
400
401 if let Some(text) = prompt_text {
403 let truncated = truncate_string(&text, 1000);
404 builder =
405 builder.payload(serde_json::json!({"prompt": truncated}).to_string());
406 }
407
408 events.push(builder.build());
409 }
410
411 if let Some(ref message) = self.message
413 && let Some(ref content) = message.content
414 && let Some(blocks) = content.blocks()
415 {
416 for block in blocks {
417 if let ContentBlock::ToolResult {
418 tool_use_id,
419 content: result_content,
420 } = block
421 {
422 let mut builder = EventBuilder::new(
423 machine_id,
424 EventType::PostToolUse,
425 &self.session_id,
426 )
427 .source("transcript")
428 .framework("claude")
429 .is_sidechain(self.is_sidechain)
430 .tool_use_id(tool_use_id.clone())
431 .timestamp_opt(timestamp)
432 .cwd_opt(self.cwd.clone())
433 .git_branch_opt(self.git_branch.clone());
434
435 if let Some(tool_info) = tool_info_map.get(tool_use_id) {
437 builder = builder.tool_name(tool_info.name.clone());
440
441 if tool_info.name == "Bash" {
443 if let Some(command) = tool_info.input.get("command") {
445 builder = builder.payload(
446 serde_json::json!({ "command": command }).to_string(),
447 );
448 }
449
450 builder = builder.metadata(
452 serde_json::json!({ "output": result_content }).to_string(),
453 );
454 }
455 }
456
457 events.push(builder.build());
458 }
459 }
460 }
461 }
462
463 _ => {
464 }
466 }
467
468 events
469 }
470
471 pub fn dedup_key(&self) -> &str {
473 &self.uuid
474 }
475
476 pub fn should_skip(&self) -> bool {
478 self.entry_type != "user" && self.entry_type != "assistant"
480 }
481}
482
483#[cfg(test)]
484mod tests {
485 use super::*;
486
487 #[test]
488 fn test_parse_assistant_with_usage() {
489 let json = r#"{
490 "sessionId": "test-session",
491 "uuid": "entry-123",
492 "parentUuid": "entry-122",
493 "isSidechain": false,
494 "timestamp": "2025-12-25T21:30:31.390Z",
495 "type": "assistant",
496 "cwd": "/projects/test",
497 "message": {
498 "role": "assistant",
499 "model": "claude-opus-4-5-20251101",
500 "usage": {
501 "input_tokens": 1000,
502 "output_tokens": 500,
503 "cache_read_input_tokens": 200,
504 "cache_creation_input_tokens": 100
505 }
506 }
507 }"#;
508
509 let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
510 assert_eq!(entry.session_id, "test-session");
511 assert_eq!(entry.uuid, "entry-123");
512 assert!(!entry.is_sidechain);
513
514 let mut tool_info_map = ToolInfoMap::default();
515 let events = entry.into_events("machine-1", &mut tool_info_map);
516 assert_eq!(events.len(), 1);
517
518 let event = &events[0];
519 assert_eq!(event.event_type, EventType::ApiRequest);
520 assert_eq!(event.session_id, "test-session");
521 assert_eq!(event.source, Some("transcript".to_string()));
522 assert_eq!(event.is_sidechain, Some(false));
523 assert_eq!(event.tokens_input, Some(1000));
524 assert_eq!(event.tokens_output, Some(500));
525 assert_eq!(event.tokens_cache_read, Some(200));
526 assert_eq!(event.tokens_cache_write, Some(100));
527 assert_eq!(event.model, Some("claude-opus-4-5-20251101".to_string()));
528 }
529
530 #[test]
531 fn test_parse_sidechain_entry() {
532 let json = r#"{
533 "sessionId": "test-session",
534 "uuid": "entry-456",
535 "isSidechain": true,
536 "timestamp": "2025-12-25T21:30:31.390Z",
537 "type": "assistant",
538 "message": {
539 "role": "assistant",
540 "usage": {
541 "input_tokens": 100,
542 "output_tokens": 50
543 }
544 }
545 }"#;
546
547 let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
548 assert!(entry.is_sidechain);
549
550 let mut tool_info_map = ToolInfoMap::default();
551 let events = entry.into_events("machine-1", &mut tool_info_map);
552 assert_eq!(events[0].is_sidechain, Some(true));
553 }
554
555 #[test]
556 fn test_synthetic_model_is_filtered() {
557 let json = r#"{
561 "sessionId": "test-session",
562 "uuid": "entry-synthetic",
563 "isSidechain": false,
564 "timestamp": "2025-12-25T21:30:31.390Z",
565 "type": "assistant",
566 "message": {
567 "role": "assistant",
568 "model": "<synthetic>",
569 "usage": {
570 "input_tokens": 0,
571 "output_tokens": 0
572 },
573 "content": [
574 {"type": "text", "text": "API Error: 529 overloaded"}
575 ]
576 }
577 }"#;
578
579 let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
580 let mut tool_info_map = ToolInfoMap::default();
581 let events = entry.into_events("machine-1", &mut tool_info_map);
582
583 assert_eq!(events.len(), 1);
584 assert_eq!(events[0].event_type, EventType::ApiRequest);
585 assert_eq!(events[0].model, None);
587 }
588
589 #[test]
590 fn test_parse_user_message() {
591 let json = r#"{
592 "sessionId": "test-session",
593 "uuid": "entry-789",
594 "isSidechain": false,
595 "timestamp": "2025-12-25T21:30:31.390Z",
596 "type": "user",
597 "message": {
598 "role": "user",
599 "content": [
600 {"type": "text", "text": "Hello, Claude!"}
601 ]
602 }
603 }"#;
604
605 let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
606 let mut tool_info_map = ToolInfoMap::default();
607 let events = entry.into_events("machine-1", &mut tool_info_map);
608
609 assert_eq!(events.len(), 1);
610 assert_eq!(events[0].event_type, EventType::UserPromptSubmit);
611 }
612
613 #[test]
614 fn test_parse_user_message_string_content() {
615 let json = r#"{
619 "sessionId": "test-session",
620 "uuid": "entry-string",
621 "isSidechain": false,
622 "timestamp": "2025-12-25T21:30:31.390Z",
623 "type": "user",
624 "message": {
625 "role": "user",
626 "content": "ok"
627 }
628 }"#;
629
630 let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
631
632 let message = entry.message.as_ref().unwrap();
634 let content = message.content.as_ref().unwrap();
635 assert!(matches!(content, MessageContent::Text(_)));
636 assert_eq!(content.text(), Some("ok"));
637 assert!(content.blocks().is_none());
638
639 let mut tool_info_map = ToolInfoMap::default();
641 let events = entry.into_events("machine-1", &mut tool_info_map);
642
643 assert_eq!(events.len(), 1);
644 assert_eq!(events[0].event_type, EventType::UserPromptSubmit);
645
646 let payload: serde_json::Value =
648 serde_json::from_str(events[0].payload.as_ref().unwrap()).unwrap();
649 assert_eq!(payload["prompt"], "ok");
650 }
651
652 #[test]
653 fn test_message_content_blocks_variant() {
654 let json = r#"{
656 "sessionId": "test-session",
657 "uuid": "entry-blocks",
658 "isSidechain": false,
659 "timestamp": "2025-12-25T21:30:31.390Z",
660 "type": "user",
661 "message": {
662 "role": "user",
663 "content": [
664 {"type": "text", "text": "Hello from blocks!"}
665 ]
666 }
667 }"#;
668
669 let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
670 let message = entry.message.as_ref().unwrap();
671 let content = message.content.as_ref().unwrap();
672
673 assert!(matches!(content, MessageContent::Blocks(_)));
675 assert_eq!(content.text(), Some("Hello from blocks!"));
676 assert!(content.blocks().is_some());
677 assert_eq!(content.blocks().unwrap().len(), 1);
678 }
679
680 #[test]
681 fn test_parse_tool_use() {
682 let json = r#"{
683 "sessionId": "test-session",
684 "uuid": "entry-tool",
685 "isSidechain": false,
686 "timestamp": "2025-12-25T21:30:31.390Z",
687 "type": "assistant",
688 "message": {
689 "role": "assistant",
690 "usage": {
691 "input_tokens": 100,
692 "output_tokens": 50
693 },
694 "content": [
695 {"type": "tool_use", "id": "toolu_123", "name": "Bash", "input": {"command": "ls"}}
696 ]
697 }
698 }"#;
699
700 let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
701 let mut tool_info_map = ToolInfoMap::default();
702 let events = entry.into_events("machine-1", &mut tool_info_map);
703
704 assert_eq!(events.len(), 2);
706 assert_eq!(events[0].event_type, EventType::ApiRequest);
707 assert_eq!(events[1].event_type, EventType::PreToolUse);
708 assert_eq!(events[1].tool_use_id, Some("toolu_123".to_string()));
709 assert_eq!(events[1].tool_name, Some("Bash".to_string()));
710
711 assert!(tool_info_map.contains_key("toolu_123"));
713 assert_eq!(tool_info_map["toolu_123"].name, "Bash");
714 }
715
716 #[test]
717 fn test_parse_tool_result_with_enrichment() {
718 let tool_use_json = r#"{
720 "sessionId": "test-session",
721 "uuid": "entry-tool",
722 "isSidechain": false,
723 "timestamp": "2025-12-25T21:30:31.390Z",
724 "type": "assistant",
725 "message": {
726 "role": "assistant",
727 "usage": {
728 "input_tokens": 100,
729 "output_tokens": 50
730 },
731 "content": [
732 {"type": "tool_use", "id": "toolu_123", "name": "Bash", "input": {"command": "ls -la"}}
733 ]
734 }
735 }"#;
736
737 let tool_use_entry: TranscriptEntry = serde_json::from_str(tool_use_json).unwrap();
738 let mut tool_info_map = ToolInfoMap::default();
739 let _ = tool_use_entry.into_events("machine-1", &mut tool_info_map);
740
741 let tool_result_json = r#"{
743 "sessionId": "test-session",
744 "uuid": "entry-result",
745 "isSidechain": false,
746 "timestamp": "2025-12-25T21:30:31.390Z",
747 "type": "user",
748 "message": {
749 "role": "user",
750 "content": [
751 {"type": "tool_result", "tool_use_id": "toolu_123", "content": "file1.txt\nfile2.txt"}
752 ]
753 }
754 }"#;
755
756 let result_entry: TranscriptEntry = serde_json::from_str(tool_result_json).unwrap();
757 let events = result_entry.into_events("machine-1", &mut tool_info_map);
758
759 assert_eq!(events.len(), 1);
761 let event = &events[0];
762 assert_eq!(event.event_type, EventType::PostToolUse);
763 assert_eq!(event.tool_use_id, Some("toolu_123".to_string()));
764
765 assert_eq!(event.tool_name, Some("Bash".to_string()));
767
768 assert!(event.payload.is_some());
770 let payload: serde_json::Value =
771 serde_json::from_str(event.payload.as_ref().unwrap()).unwrap();
772 assert_eq!(payload["command"], "ls -la");
773
774 assert!(event.metadata.is_some());
776 let metadata: serde_json::Value =
777 serde_json::from_str(event.metadata.as_ref().unwrap()).unwrap();
778 assert_eq!(metadata["output"], "file1.txt\nfile2.txt");
779 }
780
781 #[test]
782 fn test_parse_non_bash_tool_no_payload() {
783 let tool_use_json = r#"{
785 "sessionId": "test-session",
786 "uuid": "entry-tool",
787 "isSidechain": false,
788 "timestamp": "2025-12-25T21:30:31.390Z",
789 "type": "assistant",
790 "message": {
791 "role": "assistant",
792 "usage": { "input_tokens": 100, "output_tokens": 50 },
793 "content": [
794 {"type": "tool_use", "id": "toolu_read", "name": "Read", "input": {"file_path": "/test.txt"}}
795 ]
796 }
797 }"#;
798
799 let tool_use_entry: TranscriptEntry = serde_json::from_str(tool_use_json).unwrap();
800 let mut tool_info_map = ToolInfoMap::default();
801 let _ = tool_use_entry.into_events("machine-1", &mut tool_info_map);
802
803 let tool_result_json = r#"{
804 "sessionId": "test-session",
805 "uuid": "entry-result",
806 "isSidechain": false,
807 "timestamp": "2025-12-25T21:30:31.390Z",
808 "type": "user",
809 "message": {
810 "role": "user",
811 "content": [
812 {"type": "tool_result", "tool_use_id": "toolu_read", "content": "file contents here"}
813 ]
814 }
815 }"#;
816
817 let result_entry: TranscriptEntry = serde_json::from_str(tool_result_json).unwrap();
818 let events = result_entry.into_events("machine-1", &mut tool_info_map);
819
820 assert_eq!(events.len(), 1);
821 let event = &events[0];
822
823 assert_eq!(event.tool_name, Some("Read".to_string()));
825
826 assert!(event.payload.is_none());
828 assert!(event.metadata.is_none());
829 }
830
831 #[test]
832 fn test_skip_non_message_entries() {
833 let json = r#"{
834 "sessionId": "test-session",
835 "uuid": "snapshot-1",
836 "isSidechain": false,
837 "timestamp": "2025-12-25T21:30:31.390Z",
838 "type": "file-history-snapshot"
839 }"#;
840
841 let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
842 assert!(entry.should_skip());
843
844 let mut tool_info_map = ToolInfoMap::default();
845 let events = entry.into_events("machine-1", &mut tool_info_map);
846 assert!(events.is_empty());
847 }
848
849 #[test]
850 fn test_user_message_with_multibyte_utf8_truncation() {
851 let long_prompt = format!("{}{}", "a".repeat(999), "é"); let json = format!(
856 r#"{{
857 "sessionId": "test-session",
858 "uuid": "entry-utf8",
859 "isSidechain": false,
860 "timestamp": "2025-12-25T21:30:31.390Z",
861 "type": "user",
862 "message": {{
863 "role": "user",
864 "content": [
865 {{"type": "text", "text": "{}"}}
866 ]
867 }}
868 }}"#,
869 long_prompt
870 );
871
872 let entry: TranscriptEntry = serde_json::from_str(&json).unwrap();
873 let mut tool_info_map = ToolInfoMap::default();
875 let events = entry.into_events("machine-1", &mut tool_info_map);
876
877 assert_eq!(events.len(), 1);
878 assert_eq!(events[0].event_type, EventType::UserPromptSubmit);
879
880 let payload: serde_json::Value =
882 serde_json::from_str(events[0].payload.as_ref().unwrap()).unwrap();
883 let prompt = payload["prompt"].as_str().unwrap();
884 assert!(prompt.ends_with("..."));
886 assert_eq!(prompt.len(), 1002); }
888}