mi6_core/input/transcript/
entry.rs

1//! Transcript entry types and parsing.
2//!
3//! Claude Code stores conversation data in JSONL files at:
4//! `~/.claude/projects/<project-path>/<session-id>.jsonl`
5
6use std::collections::HashMap;
7
8use chrono::Utc;
9use serde::Deserialize;
10
11use crate::model::{Event, EventBuilder, EventType};
12use crate::util::truncate_string;
13
14/// Information about a tool use, tracked for enriching PostToolUse events.
15#[derive(Debug, Clone)]
16pub struct ToolInfo {
17    /// The tool name (e.g., "Bash", "Read", "Write")
18    pub name: String,
19    /// The tool input (command for Bash, etc.)
20    pub input: serde_json::Value,
21}
22
23/// Map from tool_use_id to tool info.
24pub type ToolInfoMap = HashMap<String, ToolInfo>;
25
26/// A single entry in a Claude Code transcript JSONL file.
27#[derive(Debug, Clone, Deserialize)]
28#[serde(rename_all = "camelCase")]
29pub struct TranscriptEntry {
30    /// Unique identifier for this entry
31    pub uuid: String,
32
33    /// Links to previous entry (for conversation tree)
34    #[serde(default)]
35    pub parent_uuid: Option<String>,
36
37    /// Session identifier
38    pub session_id: String,
39
40    /// Entry timestamp (ISO 8601)
41    pub timestamp: String,
42
43    /// Entry type: "user" or "assistant"
44    #[serde(rename = "type")]
45    pub entry_type: String,
46
47    /// Whether this is a sidechain request (subagent/auxiliary)
48    /// CRITICAL for context size tracking - only main chain counts
49    #[serde(default)]
50    pub is_sidechain: bool,
51
52    /// Message content
53    #[serde(default)]
54    pub message: Option<TranscriptMessage>,
55
56    /// Working directory
57    #[serde(default)]
58    pub cwd: Option<String>,
59
60    /// Git branch
61    #[serde(default)]
62    pub git_branch: Option<String>,
63
64    /// Claude Code version
65    #[serde(default)]
66    pub version: Option<String>,
67
68    /// Human-readable session slug
69    #[serde(default)]
70    pub slug: Option<String>,
71
72    /// Tool use result (for user entries with tool results)
73    #[serde(default)]
74    pub tool_use_result: Option<serde_json::Value>,
75}
76
77/// Message content within a transcript entry.
78#[derive(Debug, Clone, Deserialize)]
79pub struct TranscriptMessage {
80    /// Role: "user" or "assistant"
81    pub role: String,
82
83    /// Model used (for assistant messages)
84    #[serde(default)]
85    pub model: Option<String>,
86
87    /// Message ID (for assistant messages)
88    #[serde(default)]
89    pub id: Option<String>,
90
91    /// Token usage (for assistant messages)
92    #[serde(default)]
93    pub usage: Option<TokenUsage>,
94
95    /// Content - can be either a plain string (for user prompts) or
96    /// an array of content blocks (for tool results, assistant messages).
97    /// Claude Code sends initial user prompts as plain strings like `"content": "ok"`,
98    /// but tool results and complex messages use array format like
99    /// `"content": [{"type": "text", "text": "..."}]`.
100    #[serde(default, deserialize_with = "deserialize_message_content")]
101    pub content: Option<MessageContent>,
102}
103
104/// Message content that can be either a plain string or an array of content blocks.
105///
106/// Claude Code uses different formats depending on the message type:
107/// - Initial user prompts: `"content": "hello"` (plain string)
108/// - Tool results/complex messages: `"content": [{"type": "text", "text": "..."}]` (array)
109#[derive(Debug, Clone, PartialEq)]
110pub enum MessageContent {
111    /// Plain text content (used for simple user prompts)
112    Text(String),
113    /// Array of content blocks (used for tool results, assistant messages)
114    Blocks(Vec<ContentBlock>),
115}
116
117impl MessageContent {
118    /// Extract plain text from the content, if available.
119    /// For `Text` variant, returns the string directly.
120    /// For `Blocks` variant, finds the first text block and returns its content.
121    pub fn text(&self) -> Option<&str> {
122        match self {
123            MessageContent::Text(s) => Some(s.as_str()),
124            MessageContent::Blocks(blocks) => blocks.iter().find_map(|b| {
125                if let ContentBlock::Text { text } = b {
126                    Some(text.as_str())
127                } else {
128                    None
129                }
130            }),
131        }
132    }
133
134    /// Get content blocks, if this is the Blocks variant.
135    pub fn blocks(&self) -> Option<&[ContentBlock]> {
136        match self {
137            MessageContent::Text(_) => None,
138            MessageContent::Blocks(blocks) => Some(blocks),
139        }
140    }
141}
142
143/// Custom deserializer for MessageContent that handles both string and array formats.
144fn deserialize_message_content<'de, D>(deserializer: D) -> Result<Option<MessageContent>, D::Error>
145where
146    D: serde::Deserializer<'de>,
147{
148    use serde::de::{self, Visitor};
149
150    struct MessageContentVisitor;
151
152    impl<'de> Visitor<'de> for MessageContentVisitor {
153        type Value = Option<MessageContent>;
154
155        fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
156            formatter.write_str("a string, an array of content blocks, or null")
157        }
158
159        fn visit_none<E>(self) -> Result<Self::Value, E>
160        where
161            E: de::Error,
162        {
163            Ok(None)
164        }
165
166        fn visit_unit<E>(self) -> Result<Self::Value, E>
167        where
168            E: de::Error,
169        {
170            Ok(None)
171        }
172
173        fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
174        where
175            E: de::Error,
176        {
177            Ok(Some(MessageContent::Text(value.to_string())))
178        }
179
180        fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
181        where
182            E: de::Error,
183        {
184            Ok(Some(MessageContent::Text(value)))
185        }
186
187        fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
188        where
189            A: de::SeqAccess<'de>,
190        {
191            let blocks: Vec<ContentBlock> =
192                Deserialize::deserialize(de::value::SeqAccessDeserializer::new(seq))?;
193            Ok(Some(MessageContent::Blocks(blocks)))
194        }
195    }
196
197    deserializer.deserialize_any(MessageContentVisitor)
198}
199
200/// Token usage statistics from API response.
201#[derive(Debug, Clone, Deserialize)]
202pub struct TokenUsage {
203    pub input_tokens: Option<i64>,
204    pub output_tokens: Option<i64>,
205
206    #[serde(default)]
207    pub cache_read_input_tokens: Option<i64>,
208
209    #[serde(default)]
210    pub cache_creation_input_tokens: Option<i64>,
211
212    /// Detailed cache creation info
213    #[serde(default)]
214    pub cache_creation: Option<CacheCreationDetails>,
215}
216
217/// Detailed cache creation statistics.
218#[derive(Debug, Clone, Deserialize)]
219pub struct CacheCreationDetails {
220    #[serde(default)]
221    pub ephemeral_5m_input_tokens: Option<i64>,
222
223    #[serde(default)]
224    pub ephemeral_1h_input_tokens: Option<i64>,
225}
226
227/// Content block types in messages.
228#[derive(Debug, Clone, PartialEq, Deserialize)]
229#[serde(tag = "type", rename_all = "snake_case")]
230pub enum ContentBlock {
231    /// Text content
232    Text { text: String },
233
234    /// Tool use request
235    ToolUse {
236        id: String,
237        name: String,
238        #[serde(default)]
239        input: serde_json::Value,
240    },
241
242    /// Tool result (in user messages)
243    ToolResult {
244        tool_use_id: String,
245        #[serde(default)]
246        content: serde_json::Value,
247    },
248
249    /// Thinking block (extended thinking)
250    Thinking {
251        #[serde(default)]
252        thinking: String,
253    },
254}
255
256impl TranscriptEntry {
257    /// Convert this transcript entry to Event(s).
258    ///
259    /// A single entry may produce multiple events:
260    /// - Assistant message with usage → ApiRequest
261    /// - User message → UserPromptSubmit
262    /// - Tool use blocks → PreToolUse events
263    /// - Tool result blocks → PostToolUse events
264    ///
265    /// The `tool_info_map` is used to track tool_use_id → (tool_name, input) mappings
266    /// across entries, so that PostToolUse events can include the tool name and command.
267    pub fn into_events(self, machine_id: &str, tool_info_map: &mut ToolInfoMap) -> Vec<Event> {
268        let mut events = Vec::new();
269
270        // Parse timestamp
271        let timestamp = chrono::DateTime::parse_from_rfc3339(&self.timestamp)
272            .ok()
273            .map(|dt| dt.with_timezone(&Utc));
274
275        let base_builder = || {
276            let mut b = EventBuilder::new(machine_id, EventType::ApiRequest, &self.session_id)
277                .source("transcript")
278                .framework("claude")
279                .is_sidechain(self.is_sidechain);
280
281            if let Some(ts) = timestamp {
282                b = b.timestamp(ts);
283            }
284            if let Some(ref cwd) = self.cwd {
285                b = b.cwd(cwd.clone());
286            }
287            if let Some(ref branch) = self.git_branch {
288                b = b.git_branch(branch.clone());
289            }
290
291            // Store uuid and parent_uuid in metadata
292            let mut metadata = serde_json::Map::new();
293            metadata.insert("uuid".into(), serde_json::json!(&self.uuid));
294            if let Some(ref parent) = self.parent_uuid {
295                metadata.insert("parent_uuid".into(), serde_json::json!(parent));
296            }
297            b = b.metadata(serde_json::Value::Object(metadata).to_string());
298
299            b
300        };
301
302        match self.entry_type.as_str() {
303            "assistant" => {
304                if let Some(ref message) = self.message {
305                    // Create ApiRequest event if we have usage data
306                    if let Some(ref usage) = message.usage
307                        && (usage.input_tokens.is_some() || usage.output_tokens.is_some())
308                    {
309                        let mut builder = base_builder();
310
311                        // Set tokens
312                        let input = usage.input_tokens.unwrap_or(0);
313                        let output = usage.output_tokens.unwrap_or(0);
314                        builder = builder.tokens(input, output);
315
316                        // Set cache tokens
317                        if let Some(cache_read) = usage.cache_read_input_tokens {
318                            builder = builder.tokens_cache_read_opt(Some(cache_read));
319                        }
320                        if let Some(cache_write) = usage.cache_creation_input_tokens {
321                            builder = builder.tokens_cache_write_opt(Some(cache_write));
322                        }
323
324                        // Set model (skip synthetic placeholder used for errors/timeouts)
325                        if let Some(ref model) = message.model
326                            && model != "<synthetic>"
327                        {
328                            builder = builder.model(model.clone());
329                        }
330
331                        events.push(builder.build());
332                    }
333
334                    // Create PreToolUse events for tool_use blocks
335                    if let Some(ref content) = message.content
336                        && let Some(blocks) = content.blocks()
337                    {
338                        for block in blocks {
339                            if let ContentBlock::ToolUse { id, name, input } = block {
340                                // Register tool info for later PostToolUse events
341                                tool_info_map.insert(
342                                    id.clone(),
343                                    ToolInfo {
344                                        name: name.clone(),
345                                        input: input.clone(),
346                                    },
347                                );
348
349                                let builder = EventBuilder::new(
350                                    machine_id,
351                                    EventType::PreToolUse,
352                                    &self.session_id,
353                                )
354                                .source("transcript")
355                                .framework("claude")
356                                .is_sidechain(self.is_sidechain)
357                                .tool(id.clone(), name.clone())
358                                .timestamp_opt(timestamp)
359                                .cwd_opt(self.cwd.clone())
360                                .git_branch_opt(self.git_branch.clone());
361
362                                events.push(builder.build());
363                            }
364                        }
365                    }
366                }
367            }
368
369            "user" => {
370                // Create UserPromptSubmit for user messages
371                let message = self.message.as_ref();
372
373                // Extract prompt text if available.
374                // MessageContent handles both plain string format (e.g., "content": "ok")
375                // and array format (e.g., "content": [{"type": "text", "text": "ok"}])
376                let prompt_text = message
377                    .and_then(|m| m.content.as_ref())
378                    .and_then(|c| c.text())
379                    .map(|s| s.to_string());
380
381                // Only create UserPromptSubmit if there's text content
382                // (tool_result entries don't need a UserPromptSubmit)
383                if prompt_text.is_some() {
384                    let mut builder = EventBuilder::new(
385                        machine_id,
386                        EventType::UserPromptSubmit,
387                        &self.session_id,
388                    )
389                    .source("transcript")
390                    .framework("claude")
391                    .is_sidechain(self.is_sidechain)
392                    .timestamp_opt(timestamp);
393
394                    if let Some(ref cwd) = self.cwd {
395                        builder = builder.cwd(cwd.clone());
396                    }
397                    if let Some(ref branch) = self.git_branch {
398                        builder = builder.git_branch(branch.clone());
399                    }
400
401                    // Store prompt in payload (truncated if very long)
402                    if let Some(text) = prompt_text {
403                        let truncated = truncate_string(&text, 1000);
404                        builder =
405                            builder.payload(serde_json::json!({"prompt": truncated}).to_string());
406                    }
407
408                    events.push(builder.build());
409                }
410
411                // Create PostToolUse events for tool_result blocks
412                if let Some(ref message) = self.message
413                    && let Some(ref content) = message.content
414                    && let Some(blocks) = content.blocks()
415                {
416                    for block in blocks {
417                        if let ContentBlock::ToolResult {
418                            tool_use_id,
419                            content: result_content,
420                        } = block
421                        {
422                            let mut builder = EventBuilder::new(
423                                machine_id,
424                                EventType::PostToolUse,
425                                &self.session_id,
426                            )
427                            .source("transcript")
428                            .framework("claude")
429                            .is_sidechain(self.is_sidechain)
430                            .tool_use_id(tool_use_id.clone())
431                            .timestamp_opt(timestamp)
432                            .cwd_opt(self.cwd.clone())
433                            .git_branch_opt(self.git_branch.clone());
434
435                            // Look up tool info from the map to enrich PostToolUse events
436                            if let Some(tool_info) = tool_info_map.get(tool_use_id) {
437                                // Set tool_name so GitHub context extraction can check for Bash
438                                // (tool_use_id is already set above, so only set tool_name here)
439                                builder = builder.tool_name(tool_info.name.clone());
440
441                                // Only include command/output for Bash tools (needed for GitHub context extraction)
442                                if tool_info.name == "Bash" {
443                                    // Include command in payload
444                                    if let Some(command) = tool_info.input.get("command") {
445                                        builder = builder.payload(
446                                            serde_json::json!({ "command": command }).to_string(),
447                                        );
448                                    }
449
450                                    // Include output in metadata for context extraction
451                                    builder = builder.metadata(
452                                        serde_json::json!({ "output": result_content }).to_string(),
453                                    );
454                                }
455                            }
456
457                            events.push(builder.build());
458                        }
459                    }
460                }
461            }
462
463            _ => {
464                // Skip other entry types (file-history-snapshot, etc.)
465            }
466        }
467
468        events
469    }
470
471    /// Get the unique identifier for deduplication.
472    pub fn dedup_key(&self) -> &str {
473        &self.uuid
474    }
475
476    /// Check if this entry should be skipped (non-conversation data).
477    pub fn should_skip(&self) -> bool {
478        // Skip file-history-snapshot and other non-message entries
479        self.entry_type != "user" && self.entry_type != "assistant"
480    }
481}
482
483#[cfg(test)]
484mod tests {
485    use super::*;
486
487    #[test]
488    fn test_parse_assistant_with_usage() {
489        let json = r#"{
490            "sessionId": "test-session",
491            "uuid": "entry-123",
492            "parentUuid": "entry-122",
493            "isSidechain": false,
494            "timestamp": "2025-12-25T21:30:31.390Z",
495            "type": "assistant",
496            "cwd": "/projects/test",
497            "message": {
498                "role": "assistant",
499                "model": "claude-opus-4-5-20251101",
500                "usage": {
501                    "input_tokens": 1000,
502                    "output_tokens": 500,
503                    "cache_read_input_tokens": 200,
504                    "cache_creation_input_tokens": 100
505                }
506            }
507        }"#;
508
509        let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
510        assert_eq!(entry.session_id, "test-session");
511        assert_eq!(entry.uuid, "entry-123");
512        assert!(!entry.is_sidechain);
513
514        let mut tool_info_map = ToolInfoMap::default();
515        let events = entry.into_events("machine-1", &mut tool_info_map);
516        assert_eq!(events.len(), 1);
517
518        let event = &events[0];
519        assert_eq!(event.event_type, EventType::ApiRequest);
520        assert_eq!(event.session_id, "test-session");
521        assert_eq!(event.source, Some("transcript".to_string()));
522        assert_eq!(event.is_sidechain, Some(false));
523        assert_eq!(event.tokens_input, Some(1000));
524        assert_eq!(event.tokens_output, Some(500));
525        assert_eq!(event.tokens_cache_read, Some(200));
526        assert_eq!(event.tokens_cache_write, Some(100));
527        assert_eq!(event.model, Some("claude-opus-4-5-20251101".to_string()));
528    }
529
530    #[test]
531    fn test_parse_sidechain_entry() {
532        let json = r#"{
533            "sessionId": "test-session",
534            "uuid": "entry-456",
535            "isSidechain": true,
536            "timestamp": "2025-12-25T21:30:31.390Z",
537            "type": "assistant",
538            "message": {
539                "role": "assistant",
540                "usage": {
541                    "input_tokens": 100,
542                    "output_tokens": 50
543                }
544            }
545        }"#;
546
547        let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
548        assert!(entry.is_sidechain);
549
550        let mut tool_info_map = ToolInfoMap::default();
551        let events = entry.into_events("machine-1", &mut tool_info_map);
552        assert_eq!(events[0].is_sidechain, Some(true));
553    }
554
555    #[test]
556    fn test_synthetic_model_is_filtered() {
557        // Claude Code uses "<synthetic>" as a placeholder for error messages,
558        // rate limits, timeouts, and synthetic responses. These should not
559        // be recorded as the actual model.
560        let json = r#"{
561            "sessionId": "test-session",
562            "uuid": "entry-synthetic",
563            "isSidechain": false,
564            "timestamp": "2025-12-25T21:30:31.390Z",
565            "type": "assistant",
566            "message": {
567                "role": "assistant",
568                "model": "<synthetic>",
569                "usage": {
570                    "input_tokens": 0,
571                    "output_tokens": 0
572                },
573                "content": [
574                    {"type": "text", "text": "API Error: 529 overloaded"}
575                ]
576            }
577        }"#;
578
579        let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
580        let mut tool_info_map = ToolInfoMap::default();
581        let events = entry.into_events("machine-1", &mut tool_info_map);
582
583        assert_eq!(events.len(), 1);
584        assert_eq!(events[0].event_type, EventType::ApiRequest);
585        // Model should be None, not "<synthetic>"
586        assert_eq!(events[0].model, None);
587    }
588
589    #[test]
590    fn test_parse_user_message() {
591        let json = r#"{
592            "sessionId": "test-session",
593            "uuid": "entry-789",
594            "isSidechain": false,
595            "timestamp": "2025-12-25T21:30:31.390Z",
596            "type": "user",
597            "message": {
598                "role": "user",
599                "content": [
600                    {"type": "text", "text": "Hello, Claude!"}
601                ]
602            }
603        }"#;
604
605        let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
606        let mut tool_info_map = ToolInfoMap::default();
607        let events = entry.into_events("machine-1", &mut tool_info_map);
608
609        assert_eq!(events.len(), 1);
610        assert_eq!(events[0].event_type, EventType::UserPromptSubmit);
611    }
612
613    #[test]
614    fn test_parse_user_message_string_content() {
615        // Claude Code sends initial user prompts with content as a plain string,
616        // not an array of content blocks. This is the format that was previously
617        // causing prompts to be silently dropped.
618        let json = r#"{
619            "sessionId": "test-session",
620            "uuid": "entry-string",
621            "isSidechain": false,
622            "timestamp": "2025-12-25T21:30:31.390Z",
623            "type": "user",
624            "message": {
625                "role": "user",
626                "content": "ok"
627            }
628        }"#;
629
630        let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
631
632        // Verify the content was parsed as MessageContent::Text
633        let message = entry.message.as_ref().unwrap();
634        let content = message.content.as_ref().unwrap();
635        assert!(matches!(content, MessageContent::Text(_)));
636        assert_eq!(content.text(), Some("ok"));
637        assert!(content.blocks().is_none());
638
639        // Verify event generation works correctly
640        let mut tool_info_map = ToolInfoMap::default();
641        let events = entry.into_events("machine-1", &mut tool_info_map);
642
643        assert_eq!(events.len(), 1);
644        assert_eq!(events[0].event_type, EventType::UserPromptSubmit);
645
646        // Verify the prompt is in the payload
647        let payload: serde_json::Value =
648            serde_json::from_str(events[0].payload.as_ref().unwrap()).unwrap();
649        assert_eq!(payload["prompt"], "ok");
650    }
651
652    #[test]
653    fn test_message_content_blocks_variant() {
654        // Test the Blocks variant of MessageContent
655        let json = r#"{
656            "sessionId": "test-session",
657            "uuid": "entry-blocks",
658            "isSidechain": false,
659            "timestamp": "2025-12-25T21:30:31.390Z",
660            "type": "user",
661            "message": {
662                "role": "user",
663                "content": [
664                    {"type": "text", "text": "Hello from blocks!"}
665                ]
666            }
667        }"#;
668
669        let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
670        let message = entry.message.as_ref().unwrap();
671        let content = message.content.as_ref().unwrap();
672
673        // Verify it's the Blocks variant
674        assert!(matches!(content, MessageContent::Blocks(_)));
675        assert_eq!(content.text(), Some("Hello from blocks!"));
676        assert!(content.blocks().is_some());
677        assert_eq!(content.blocks().unwrap().len(), 1);
678    }
679
680    #[test]
681    fn test_parse_tool_use() {
682        let json = r#"{
683            "sessionId": "test-session",
684            "uuid": "entry-tool",
685            "isSidechain": false,
686            "timestamp": "2025-12-25T21:30:31.390Z",
687            "type": "assistant",
688            "message": {
689                "role": "assistant",
690                "usage": {
691                    "input_tokens": 100,
692                    "output_tokens": 50
693                },
694                "content": [
695                    {"type": "tool_use", "id": "toolu_123", "name": "Bash", "input": {"command": "ls"}}
696                ]
697            }
698        }"#;
699
700        let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
701        let mut tool_info_map = ToolInfoMap::default();
702        let events = entry.into_events("machine-1", &mut tool_info_map);
703
704        // Should have ApiRequest + PreToolUse
705        assert_eq!(events.len(), 2);
706        assert_eq!(events[0].event_type, EventType::ApiRequest);
707        assert_eq!(events[1].event_type, EventType::PreToolUse);
708        assert_eq!(events[1].tool_use_id, Some("toolu_123".to_string()));
709        assert_eq!(events[1].tool_name, Some("Bash".to_string()));
710
711        // Verify tool info was registered
712        assert!(tool_info_map.contains_key("toolu_123"));
713        assert_eq!(tool_info_map["toolu_123"].name, "Bash");
714    }
715
716    #[test]
717    fn test_parse_tool_result_with_enrichment() {
718        // First, process a ToolUse entry to populate the tool_info_map
719        let tool_use_json = r#"{
720            "sessionId": "test-session",
721            "uuid": "entry-tool",
722            "isSidechain": false,
723            "timestamp": "2025-12-25T21:30:31.390Z",
724            "type": "assistant",
725            "message": {
726                "role": "assistant",
727                "usage": {
728                    "input_tokens": 100,
729                    "output_tokens": 50
730                },
731                "content": [
732                    {"type": "tool_use", "id": "toolu_123", "name": "Bash", "input": {"command": "ls -la"}}
733                ]
734            }
735        }"#;
736
737        let tool_use_entry: TranscriptEntry = serde_json::from_str(tool_use_json).unwrap();
738        let mut tool_info_map = ToolInfoMap::default();
739        let _ = tool_use_entry.into_events("machine-1", &mut tool_info_map);
740
741        // Now process the tool result entry
742        let tool_result_json = r#"{
743            "sessionId": "test-session",
744            "uuid": "entry-result",
745            "isSidechain": false,
746            "timestamp": "2025-12-25T21:30:31.390Z",
747            "type": "user",
748            "message": {
749                "role": "user",
750                "content": [
751                    {"type": "tool_result", "tool_use_id": "toolu_123", "content": "file1.txt\nfile2.txt"}
752                ]
753            }
754        }"#;
755
756        let result_entry: TranscriptEntry = serde_json::from_str(tool_result_json).unwrap();
757        let events = result_entry.into_events("machine-1", &mut tool_info_map);
758
759        // Should have PostToolUse only (no text content for UserPromptSubmit)
760        assert_eq!(events.len(), 1);
761        let event = &events[0];
762        assert_eq!(event.event_type, EventType::PostToolUse);
763        assert_eq!(event.tool_use_id, Some("toolu_123".to_string()));
764
765        // Verify tool_name is now populated from the map
766        assert_eq!(event.tool_name, Some("Bash".to_string()));
767
768        // Verify payload contains command
769        assert!(event.payload.is_some());
770        let payload: serde_json::Value =
771            serde_json::from_str(event.payload.as_ref().unwrap()).unwrap();
772        assert_eq!(payload["command"], "ls -la");
773
774        // Verify metadata contains output
775        assert!(event.metadata.is_some());
776        let metadata: serde_json::Value =
777            serde_json::from_str(event.metadata.as_ref().unwrap()).unwrap();
778        assert_eq!(metadata["output"], "file1.txt\nfile2.txt");
779    }
780
781    #[test]
782    fn test_parse_non_bash_tool_no_payload() {
783        // Non-Bash tools should get tool_name but NOT payload/metadata
784        let tool_use_json = r#"{
785            "sessionId": "test-session",
786            "uuid": "entry-tool",
787            "isSidechain": false,
788            "timestamp": "2025-12-25T21:30:31.390Z",
789            "type": "assistant",
790            "message": {
791                "role": "assistant",
792                "usage": { "input_tokens": 100, "output_tokens": 50 },
793                "content": [
794                    {"type": "tool_use", "id": "toolu_read", "name": "Read", "input": {"file_path": "/test.txt"}}
795                ]
796            }
797        }"#;
798
799        let tool_use_entry: TranscriptEntry = serde_json::from_str(tool_use_json).unwrap();
800        let mut tool_info_map = ToolInfoMap::default();
801        let _ = tool_use_entry.into_events("machine-1", &mut tool_info_map);
802
803        let tool_result_json = r#"{
804            "sessionId": "test-session",
805            "uuid": "entry-result",
806            "isSidechain": false,
807            "timestamp": "2025-12-25T21:30:31.390Z",
808            "type": "user",
809            "message": {
810                "role": "user",
811                "content": [
812                    {"type": "tool_result", "tool_use_id": "toolu_read", "content": "file contents here"}
813                ]
814            }
815        }"#;
816
817        let result_entry: TranscriptEntry = serde_json::from_str(tool_result_json).unwrap();
818        let events = result_entry.into_events("machine-1", &mut tool_info_map);
819
820        assert_eq!(events.len(), 1);
821        let event = &events[0];
822
823        // tool_name should be set for all tools
824        assert_eq!(event.tool_name, Some("Read".to_string()));
825
826        // But payload and metadata should NOT be set for non-Bash tools
827        assert!(event.payload.is_none());
828        assert!(event.metadata.is_none());
829    }
830
831    #[test]
832    fn test_skip_non_message_entries() {
833        let json = r#"{
834            "sessionId": "test-session",
835            "uuid": "snapshot-1",
836            "isSidechain": false,
837            "timestamp": "2025-12-25T21:30:31.390Z",
838            "type": "file-history-snapshot"
839        }"#;
840
841        let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
842        assert!(entry.should_skip());
843
844        let mut tool_info_map = ToolInfoMap::default();
845        let events = entry.into_events("machine-1", &mut tool_info_map);
846        assert!(events.is_empty());
847    }
848
849    #[test]
850    fn test_user_message_with_multibyte_utf8_truncation() {
851        // Create a prompt with exactly 999 ASCII chars followed by multi-byte UTF-8
852        // This tests the edge case where byte 1000 falls in the middle of a character
853        let long_prompt = format!("{}{}", "a".repeat(999), "é"); // 999 + 2 bytes = 1001 bytes
854
855        let json = format!(
856            r#"{{
857            "sessionId": "test-session",
858            "uuid": "entry-utf8",
859            "isSidechain": false,
860            "timestamp": "2025-12-25T21:30:31.390Z",
861            "type": "user",
862            "message": {{
863                "role": "user",
864                "content": [
865                    {{"type": "text", "text": "{}"}}
866                ]
867            }}
868        }}"#,
869            long_prompt
870        );
871
872        let entry: TranscriptEntry = serde_json::from_str(&json).unwrap();
873        // This should NOT panic even though byte 1000 is in the middle of "é"
874        let mut tool_info_map = ToolInfoMap::default();
875        let events = entry.into_events("machine-1", &mut tool_info_map);
876
877        assert_eq!(events.len(), 1);
878        assert_eq!(events[0].event_type, EventType::UserPromptSubmit);
879
880        // Verify payload contains truncated prompt ending at valid char boundary
881        let payload: serde_json::Value =
882            serde_json::from_str(events[0].payload.as_ref().unwrap()).unwrap();
883        let prompt = payload["prompt"].as_str().unwrap();
884        // Should be 999 'a's + "..." (truncated at byte 999 since byte 1000 is mid-character)
885        assert!(prompt.ends_with("..."));
886        assert_eq!(prompt.len(), 1002); // 999 + "..."
887    }
888}