mi6_core/input/transcript/
entry.rs

1//! Transcript entry types and parsing.
2//!
3//! Claude Code stores conversation data in JSONL files at:
4//! `~/.claude/projects/<project-path>/<session-id>.jsonl`
5
6use std::collections::HashMap;
7
8use chrono::Utc;
9use serde::Deserialize;
10
11use crate::model::{Event, EventBuilder, EventType};
12use crate::util::truncate_string;
13
14/// Information about a tool use, tracked for enriching PostToolUse events.
15#[derive(Debug, Clone)]
16pub struct ToolInfo {
17    /// The tool name (e.g., "Bash", "Read", "Write")
18    pub name: String,
19    /// The tool input (command for Bash, etc.)
20    pub input: serde_json::Value,
21}
22
23/// Map from tool_use_id to tool info.
24pub type ToolInfoMap = HashMap<String, ToolInfo>;
25
26/// A single entry in a Claude Code transcript JSONL file.
27#[derive(Debug, Clone, Deserialize)]
28#[serde(rename_all = "camelCase")]
29pub struct TranscriptEntry {
30    /// Unique identifier for this entry
31    pub uuid: String,
32
33    /// Links to previous entry (for conversation tree)
34    #[serde(default)]
35    pub parent_uuid: Option<String>,
36
37    /// Session identifier
38    pub session_id: String,
39
40    /// Entry timestamp (ISO 8601)
41    pub timestamp: String,
42
43    /// Entry type: "user" or "assistant"
44    #[serde(rename = "type")]
45    pub entry_type: String,
46
47    /// Whether this is a sidechain request (subagent/auxiliary)
48    /// CRITICAL for context size tracking - only main chain counts
49    #[serde(default)]
50    pub is_sidechain: bool,
51
52    /// Message content
53    #[serde(default)]
54    pub message: Option<TranscriptMessage>,
55
56    /// Working directory
57    #[serde(default)]
58    pub cwd: Option<String>,
59
60    /// Git branch
61    #[serde(default)]
62    pub git_branch: Option<String>,
63
64    /// Claude Code version
65    #[serde(default)]
66    pub version: Option<String>,
67
68    /// Human-readable session slug
69    #[serde(default)]
70    pub slug: Option<String>,
71
72    /// Tool use result (for user entries with tool results)
73    #[serde(default)]
74    pub tool_use_result: Option<serde_json::Value>,
75}
76
77/// Message content within a transcript entry.
78#[derive(Debug, Clone, Deserialize)]
79pub struct TranscriptMessage {
80    /// Role: "user" or "assistant"
81    pub role: String,
82
83    /// Model used (for assistant messages)
84    #[serde(default)]
85    pub model: Option<String>,
86
87    /// Message ID (for assistant messages)
88    #[serde(default)]
89    pub id: Option<String>,
90
91    /// Token usage (for assistant messages)
92    #[serde(default)]
93    pub usage: Option<TokenUsage>,
94
95    /// Content - can be either a plain string (for user prompts) or
96    /// an array of content blocks (for tool results, assistant messages).
97    /// Claude Code sends initial user prompts as plain strings like `"content": "ok"`,
98    /// but tool results and complex messages use array format like
99    /// `"content": [{"type": "text", "text": "..."}]`.
100    #[serde(default, deserialize_with = "deserialize_message_content")]
101    pub content: Option<MessageContent>,
102}
103
104/// Message content that can be either a plain string or an array of content blocks.
105///
106/// Claude Code uses different formats depending on the message type:
107/// - Initial user prompts: `"content": "hello"` (plain string)
108/// - Tool results/complex messages: `"content": [{"type": "text", "text": "..."}]` (array)
109#[derive(Debug, Clone, PartialEq)]
110pub enum MessageContent {
111    /// Plain text content (used for simple user prompts)
112    Text(String),
113    /// Array of content blocks (used for tool results, assistant messages)
114    Blocks(Vec<ContentBlock>),
115}
116
117impl MessageContent {
118    /// Extract plain text from the content, if available.
119    /// For `Text` variant, returns the string directly.
120    /// For `Blocks` variant, finds the first text block and returns its content.
121    pub fn text(&self) -> Option<&str> {
122        match self {
123            MessageContent::Text(s) => Some(s.as_str()),
124            MessageContent::Blocks(blocks) => blocks.iter().find_map(|b| {
125                if let ContentBlock::Text { text } = b {
126                    Some(text.as_str())
127                } else {
128                    None
129                }
130            }),
131        }
132    }
133
134    /// Get content blocks, if this is the Blocks variant.
135    pub fn blocks(&self) -> Option<&[ContentBlock]> {
136        match self {
137            MessageContent::Text(_) => None,
138            MessageContent::Blocks(blocks) => Some(blocks),
139        }
140    }
141}
142
143/// Custom deserializer for MessageContent that handles both string and array formats.
144fn deserialize_message_content<'de, D>(deserializer: D) -> Result<Option<MessageContent>, D::Error>
145where
146    D: serde::Deserializer<'de>,
147{
148    use serde::de::{self, Visitor};
149
150    struct MessageContentVisitor;
151
152    impl<'de> Visitor<'de> for MessageContentVisitor {
153        type Value = Option<MessageContent>;
154
155        fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
156            formatter.write_str("a string, an array of content blocks, or null")
157        }
158
159        fn visit_none<E>(self) -> Result<Self::Value, E>
160        where
161            E: de::Error,
162        {
163            Ok(None)
164        }
165
166        fn visit_unit<E>(self) -> Result<Self::Value, E>
167        where
168            E: de::Error,
169        {
170            Ok(None)
171        }
172
173        fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
174        where
175            E: de::Error,
176        {
177            Ok(Some(MessageContent::Text(value.to_string())))
178        }
179
180        fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
181        where
182            E: de::Error,
183        {
184            Ok(Some(MessageContent::Text(value)))
185        }
186
187        fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
188        where
189            A: de::SeqAccess<'de>,
190        {
191            let blocks: Vec<ContentBlock> =
192                Deserialize::deserialize(de::value::SeqAccessDeserializer::new(seq))?;
193            Ok(Some(MessageContent::Blocks(blocks)))
194        }
195    }
196
197    deserializer.deserialize_any(MessageContentVisitor)
198}
199
200/// Token usage statistics from API response.
201#[derive(Debug, Clone, Deserialize)]
202pub struct TokenUsage {
203    pub input_tokens: Option<i64>,
204    pub output_tokens: Option<i64>,
205
206    #[serde(default)]
207    pub cache_read_input_tokens: Option<i64>,
208
209    #[serde(default)]
210    pub cache_creation_input_tokens: Option<i64>,
211
212    /// Detailed cache creation info
213    #[serde(default)]
214    pub cache_creation: Option<CacheCreationDetails>,
215}
216
217/// Detailed cache creation statistics.
218#[derive(Debug, Clone, Deserialize)]
219pub struct CacheCreationDetails {
220    #[serde(default)]
221    pub ephemeral_5m_input_tokens: Option<i64>,
222
223    #[serde(default)]
224    pub ephemeral_1h_input_tokens: Option<i64>,
225}
226
227/// Content block types in messages.
228#[derive(Debug, Clone, PartialEq, Deserialize)]
229#[serde(tag = "type", rename_all = "snake_case")]
230pub enum ContentBlock {
231    /// Text content
232    Text { text: String },
233
234    /// Tool use request
235    ToolUse {
236        id: String,
237        name: String,
238        #[serde(default)]
239        input: serde_json::Value,
240    },
241
242    /// Tool result (in user messages)
243    ToolResult {
244        tool_use_id: String,
245        #[serde(default)]
246        content: serde_json::Value,
247    },
248
249    /// Thinking block (extended thinking)
250    Thinking {
251        #[serde(default)]
252        thinking: String,
253    },
254}
255
256impl TranscriptEntry {
257    /// Convert this transcript entry to Event(s).
258    ///
259    /// A single entry may produce multiple events:
260    /// - Assistant message with usage → ApiRequest
261    /// - User message → UserPromptSubmit
262    /// - Tool use blocks → PreToolUse events
263    /// - Tool result blocks → PostToolUse events
264    ///
265    /// The `tool_info_map` is used to track tool_use_id → (tool_name, input) mappings
266    /// across entries, so that PostToolUse events can include the tool name and command.
267    pub fn into_events(self, machine_id: &str, tool_info_map: &mut ToolInfoMap) -> Vec<Event> {
268        let mut events = Vec::new();
269
270        // Parse timestamp
271        let timestamp = chrono::DateTime::parse_from_rfc3339(&self.timestamp)
272            .ok()
273            .map(|dt| dt.with_timezone(&Utc));
274
275        let base_builder = || {
276            let mut b = EventBuilder::new(machine_id, EventType::ApiRequest, &self.session_id)
277                .source("transcript")
278                .framework("claude")
279                .is_sidechain(self.is_sidechain);
280
281            if let Some(ts) = timestamp {
282                b = b.timestamp(ts);
283            }
284            if let Some(ref cwd) = self.cwd {
285                b = b.cwd(cwd.clone());
286            }
287            if let Some(ref branch) = self.git_branch {
288                b = b.git_branch(branch.clone());
289            }
290
291            // Store uuid and parent_uuid in metadata
292            let mut metadata = serde_json::Map::new();
293            metadata.insert("uuid".into(), serde_json::json!(&self.uuid));
294            if let Some(ref parent) = self.parent_uuid {
295                metadata.insert("parent_uuid".into(), serde_json::json!(parent));
296            }
297            b = b.metadata(serde_json::Value::Object(metadata).to_string());
298
299            b
300        };
301
302        match self.entry_type.as_str() {
303            "assistant" => {
304                if let Some(ref message) = self.message {
305                    // Create ApiRequest event if we have usage data
306                    if let Some(ref usage) = message.usage
307                        && (usage.input_tokens.is_some() || usage.output_tokens.is_some())
308                    {
309                        let mut builder = base_builder();
310
311                        // Set tokens
312                        let input = usage.input_tokens.unwrap_or(0);
313                        let output = usage.output_tokens.unwrap_or(0);
314                        builder = builder.tokens(input, output);
315
316                        // Set cache tokens
317                        if let Some(cache_read) = usage.cache_read_input_tokens {
318                            builder = builder.tokens_cache_read_opt(Some(cache_read));
319                        }
320                        if let Some(cache_write) = usage.cache_creation_input_tokens {
321                            builder = builder.tokens_cache_write_opt(Some(cache_write));
322                        }
323
324                        // Set model
325                        if let Some(ref model) = message.model {
326                            builder = builder.model(model.clone());
327                        }
328
329                        events.push(builder.build());
330                    }
331
332                    // Create PreToolUse events for tool_use blocks
333                    if let Some(ref content) = message.content
334                        && let Some(blocks) = content.blocks()
335                    {
336                        for block in blocks {
337                            if let ContentBlock::ToolUse { id, name, input } = block {
338                                // Register tool info for later PostToolUse events
339                                tool_info_map.insert(
340                                    id.clone(),
341                                    ToolInfo {
342                                        name: name.clone(),
343                                        input: input.clone(),
344                                    },
345                                );
346
347                                let builder = EventBuilder::new(
348                                    machine_id,
349                                    EventType::PreToolUse,
350                                    &self.session_id,
351                                )
352                                .source("transcript")
353                                .framework("claude")
354                                .is_sidechain(self.is_sidechain)
355                                .tool(id.clone(), name.clone())
356                                .timestamp_opt(timestamp)
357                                .cwd_opt(self.cwd.clone())
358                                .git_branch_opt(self.git_branch.clone());
359
360                                events.push(builder.build());
361                            }
362                        }
363                    }
364                }
365            }
366
367            "user" => {
368                // Create UserPromptSubmit for user messages
369                let message = self.message.as_ref();
370
371                // Extract prompt text if available.
372                // MessageContent handles both plain string format (e.g., "content": "ok")
373                // and array format (e.g., "content": [{"type": "text", "text": "ok"}])
374                let prompt_text = message
375                    .and_then(|m| m.content.as_ref())
376                    .and_then(|c| c.text())
377                    .map(|s| s.to_string());
378
379                // Only create UserPromptSubmit if there's text content
380                // (tool_result entries don't need a UserPromptSubmit)
381                if prompt_text.is_some() {
382                    let mut builder = EventBuilder::new(
383                        machine_id,
384                        EventType::UserPromptSubmit,
385                        &self.session_id,
386                    )
387                    .source("transcript")
388                    .framework("claude")
389                    .is_sidechain(self.is_sidechain)
390                    .timestamp_opt(timestamp);
391
392                    if let Some(ref cwd) = self.cwd {
393                        builder = builder.cwd(cwd.clone());
394                    }
395                    if let Some(ref branch) = self.git_branch {
396                        builder = builder.git_branch(branch.clone());
397                    }
398
399                    // Store prompt in payload (truncated if very long)
400                    if let Some(text) = prompt_text {
401                        let truncated = truncate_string(&text, 1000);
402                        builder =
403                            builder.payload(serde_json::json!({"prompt": truncated}).to_string());
404                    }
405
406                    events.push(builder.build());
407                }
408
409                // Create PostToolUse events for tool_result blocks
410                if let Some(ref message) = self.message
411                    && let Some(ref content) = message.content
412                    && let Some(blocks) = content.blocks()
413                {
414                    for block in blocks {
415                        if let ContentBlock::ToolResult {
416                            tool_use_id,
417                            content: result_content,
418                        } = block
419                        {
420                            let mut builder = EventBuilder::new(
421                                machine_id,
422                                EventType::PostToolUse,
423                                &self.session_id,
424                            )
425                            .source("transcript")
426                            .framework("claude")
427                            .is_sidechain(self.is_sidechain)
428                            .tool_use_id(tool_use_id.clone())
429                            .timestamp_opt(timestamp)
430                            .cwd_opt(self.cwd.clone())
431                            .git_branch_opt(self.git_branch.clone());
432
433                            // Look up tool info from the map to enrich PostToolUse events
434                            if let Some(tool_info) = tool_info_map.get(tool_use_id) {
435                                // Set tool_name so GitHub context extraction can check for Bash
436                                // (tool_use_id is already set above, so only set tool_name here)
437                                builder = builder.tool_name(tool_info.name.clone());
438
439                                // Only include command/output for Bash tools (needed for GitHub context extraction)
440                                if tool_info.name == "Bash" {
441                                    // Include command in payload
442                                    if let Some(command) = tool_info.input.get("command") {
443                                        builder = builder.payload(
444                                            serde_json::json!({ "command": command }).to_string(),
445                                        );
446                                    }
447
448                                    // Include output in metadata for context extraction
449                                    builder = builder.metadata(
450                                        serde_json::json!({ "output": result_content }).to_string(),
451                                    );
452                                }
453                            }
454
455                            events.push(builder.build());
456                        }
457                    }
458                }
459            }
460
461            _ => {
462                // Skip other entry types (file-history-snapshot, etc.)
463            }
464        }
465
466        events
467    }
468
469    /// Get the unique identifier for deduplication.
470    pub fn dedup_key(&self) -> &str {
471        &self.uuid
472    }
473
474    /// Check if this entry should be skipped (non-conversation data).
475    pub fn should_skip(&self) -> bool {
476        // Skip file-history-snapshot and other non-message entries
477        self.entry_type != "user" && self.entry_type != "assistant"
478    }
479}
480
481#[cfg(test)]
482mod tests {
483    use super::*;
484
485    #[test]
486    fn test_parse_assistant_with_usage() {
487        let json = r#"{
488            "sessionId": "test-session",
489            "uuid": "entry-123",
490            "parentUuid": "entry-122",
491            "isSidechain": false,
492            "timestamp": "2025-12-25T21:30:31.390Z",
493            "type": "assistant",
494            "cwd": "/projects/test",
495            "message": {
496                "role": "assistant",
497                "model": "claude-opus-4-5-20251101",
498                "usage": {
499                    "input_tokens": 1000,
500                    "output_tokens": 500,
501                    "cache_read_input_tokens": 200,
502                    "cache_creation_input_tokens": 100
503                }
504            }
505        }"#;
506
507        let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
508        assert_eq!(entry.session_id, "test-session");
509        assert_eq!(entry.uuid, "entry-123");
510        assert!(!entry.is_sidechain);
511
512        let mut tool_info_map = ToolInfoMap::default();
513        let events = entry.into_events("machine-1", &mut tool_info_map);
514        assert_eq!(events.len(), 1);
515
516        let event = &events[0];
517        assert_eq!(event.event_type, EventType::ApiRequest);
518        assert_eq!(event.session_id, "test-session");
519        assert_eq!(event.source, Some("transcript".to_string()));
520        assert_eq!(event.is_sidechain, Some(false));
521        assert_eq!(event.tokens_input, Some(1000));
522        assert_eq!(event.tokens_output, Some(500));
523        assert_eq!(event.tokens_cache_read, Some(200));
524        assert_eq!(event.tokens_cache_write, Some(100));
525        assert_eq!(event.model, Some("claude-opus-4-5-20251101".to_string()));
526    }
527
528    #[test]
529    fn test_parse_sidechain_entry() {
530        let json = r#"{
531            "sessionId": "test-session",
532            "uuid": "entry-456",
533            "isSidechain": true,
534            "timestamp": "2025-12-25T21:30:31.390Z",
535            "type": "assistant",
536            "message": {
537                "role": "assistant",
538                "usage": {
539                    "input_tokens": 100,
540                    "output_tokens": 50
541                }
542            }
543        }"#;
544
545        let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
546        assert!(entry.is_sidechain);
547
548        let mut tool_info_map = ToolInfoMap::default();
549        let events = entry.into_events("machine-1", &mut tool_info_map);
550        assert_eq!(events[0].is_sidechain, Some(true));
551    }
552
553    #[test]
554    fn test_parse_user_message() {
555        let json = r#"{
556            "sessionId": "test-session",
557            "uuid": "entry-789",
558            "isSidechain": false,
559            "timestamp": "2025-12-25T21:30:31.390Z",
560            "type": "user",
561            "message": {
562                "role": "user",
563                "content": [
564                    {"type": "text", "text": "Hello, Claude!"}
565                ]
566            }
567        }"#;
568
569        let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
570        let mut tool_info_map = ToolInfoMap::default();
571        let events = entry.into_events("machine-1", &mut tool_info_map);
572
573        assert_eq!(events.len(), 1);
574        assert_eq!(events[0].event_type, EventType::UserPromptSubmit);
575    }
576
577    #[test]
578    fn test_parse_user_message_string_content() {
579        // Claude Code sends initial user prompts with content as a plain string,
580        // not an array of content blocks. This is the format that was previously
581        // causing prompts to be silently dropped.
582        let json = r#"{
583            "sessionId": "test-session",
584            "uuid": "entry-string",
585            "isSidechain": false,
586            "timestamp": "2025-12-25T21:30:31.390Z",
587            "type": "user",
588            "message": {
589                "role": "user",
590                "content": "ok"
591            }
592        }"#;
593
594        let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
595
596        // Verify the content was parsed as MessageContent::Text
597        let message = entry.message.as_ref().unwrap();
598        let content = message.content.as_ref().unwrap();
599        assert!(matches!(content, MessageContent::Text(_)));
600        assert_eq!(content.text(), Some("ok"));
601        assert!(content.blocks().is_none());
602
603        // Verify event generation works correctly
604        let mut tool_info_map = ToolInfoMap::default();
605        let events = entry.into_events("machine-1", &mut tool_info_map);
606
607        assert_eq!(events.len(), 1);
608        assert_eq!(events[0].event_type, EventType::UserPromptSubmit);
609
610        // Verify the prompt is in the payload
611        let payload: serde_json::Value =
612            serde_json::from_str(events[0].payload.as_ref().unwrap()).unwrap();
613        assert_eq!(payload["prompt"], "ok");
614    }
615
616    #[test]
617    fn test_message_content_blocks_variant() {
618        // Test the Blocks variant of MessageContent
619        let json = r#"{
620            "sessionId": "test-session",
621            "uuid": "entry-blocks",
622            "isSidechain": false,
623            "timestamp": "2025-12-25T21:30:31.390Z",
624            "type": "user",
625            "message": {
626                "role": "user",
627                "content": [
628                    {"type": "text", "text": "Hello from blocks!"}
629                ]
630            }
631        }"#;
632
633        let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
634        let message = entry.message.as_ref().unwrap();
635        let content = message.content.as_ref().unwrap();
636
637        // Verify it's the Blocks variant
638        assert!(matches!(content, MessageContent::Blocks(_)));
639        assert_eq!(content.text(), Some("Hello from blocks!"));
640        assert!(content.blocks().is_some());
641        assert_eq!(content.blocks().unwrap().len(), 1);
642    }
643
644    #[test]
645    fn test_parse_tool_use() {
646        let json = r#"{
647            "sessionId": "test-session",
648            "uuid": "entry-tool",
649            "isSidechain": false,
650            "timestamp": "2025-12-25T21:30:31.390Z",
651            "type": "assistant",
652            "message": {
653                "role": "assistant",
654                "usage": {
655                    "input_tokens": 100,
656                    "output_tokens": 50
657                },
658                "content": [
659                    {"type": "tool_use", "id": "toolu_123", "name": "Bash", "input": {"command": "ls"}}
660                ]
661            }
662        }"#;
663
664        let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
665        let mut tool_info_map = ToolInfoMap::default();
666        let events = entry.into_events("machine-1", &mut tool_info_map);
667
668        // Should have ApiRequest + PreToolUse
669        assert_eq!(events.len(), 2);
670        assert_eq!(events[0].event_type, EventType::ApiRequest);
671        assert_eq!(events[1].event_type, EventType::PreToolUse);
672        assert_eq!(events[1].tool_use_id, Some("toolu_123".to_string()));
673        assert_eq!(events[1].tool_name, Some("Bash".to_string()));
674
675        // Verify tool info was registered
676        assert!(tool_info_map.contains_key("toolu_123"));
677        assert_eq!(tool_info_map["toolu_123"].name, "Bash");
678    }
679
680    #[test]
681    fn test_parse_tool_result_with_enrichment() {
682        // First, process a ToolUse entry to populate the tool_info_map
683        let tool_use_json = r#"{
684            "sessionId": "test-session",
685            "uuid": "entry-tool",
686            "isSidechain": false,
687            "timestamp": "2025-12-25T21:30:31.390Z",
688            "type": "assistant",
689            "message": {
690                "role": "assistant",
691                "usage": {
692                    "input_tokens": 100,
693                    "output_tokens": 50
694                },
695                "content": [
696                    {"type": "tool_use", "id": "toolu_123", "name": "Bash", "input": {"command": "ls -la"}}
697                ]
698            }
699        }"#;
700
701        let tool_use_entry: TranscriptEntry = serde_json::from_str(tool_use_json).unwrap();
702        let mut tool_info_map = ToolInfoMap::default();
703        let _ = tool_use_entry.into_events("machine-1", &mut tool_info_map);
704
705        // Now process the tool result entry
706        let tool_result_json = r#"{
707            "sessionId": "test-session",
708            "uuid": "entry-result",
709            "isSidechain": false,
710            "timestamp": "2025-12-25T21:30:31.390Z",
711            "type": "user",
712            "message": {
713                "role": "user",
714                "content": [
715                    {"type": "tool_result", "tool_use_id": "toolu_123", "content": "file1.txt\nfile2.txt"}
716                ]
717            }
718        }"#;
719
720        let result_entry: TranscriptEntry = serde_json::from_str(tool_result_json).unwrap();
721        let events = result_entry.into_events("machine-1", &mut tool_info_map);
722
723        // Should have PostToolUse only (no text content for UserPromptSubmit)
724        assert_eq!(events.len(), 1);
725        let event = &events[0];
726        assert_eq!(event.event_type, EventType::PostToolUse);
727        assert_eq!(event.tool_use_id, Some("toolu_123".to_string()));
728
729        // Verify tool_name is now populated from the map
730        assert_eq!(event.tool_name, Some("Bash".to_string()));
731
732        // Verify payload contains command
733        assert!(event.payload.is_some());
734        let payload: serde_json::Value =
735            serde_json::from_str(event.payload.as_ref().unwrap()).unwrap();
736        assert_eq!(payload["command"], "ls -la");
737
738        // Verify metadata contains output
739        assert!(event.metadata.is_some());
740        let metadata: serde_json::Value =
741            serde_json::from_str(event.metadata.as_ref().unwrap()).unwrap();
742        assert_eq!(metadata["output"], "file1.txt\nfile2.txt");
743    }
744
745    #[test]
746    fn test_parse_non_bash_tool_no_payload() {
747        // Non-Bash tools should get tool_name but NOT payload/metadata
748        let tool_use_json = r#"{
749            "sessionId": "test-session",
750            "uuid": "entry-tool",
751            "isSidechain": false,
752            "timestamp": "2025-12-25T21:30:31.390Z",
753            "type": "assistant",
754            "message": {
755                "role": "assistant",
756                "usage": { "input_tokens": 100, "output_tokens": 50 },
757                "content": [
758                    {"type": "tool_use", "id": "toolu_read", "name": "Read", "input": {"file_path": "/test.txt"}}
759                ]
760            }
761        }"#;
762
763        let tool_use_entry: TranscriptEntry = serde_json::from_str(tool_use_json).unwrap();
764        let mut tool_info_map = ToolInfoMap::default();
765        let _ = tool_use_entry.into_events("machine-1", &mut tool_info_map);
766
767        let tool_result_json = r#"{
768            "sessionId": "test-session",
769            "uuid": "entry-result",
770            "isSidechain": false,
771            "timestamp": "2025-12-25T21:30:31.390Z",
772            "type": "user",
773            "message": {
774                "role": "user",
775                "content": [
776                    {"type": "tool_result", "tool_use_id": "toolu_read", "content": "file contents here"}
777                ]
778            }
779        }"#;
780
781        let result_entry: TranscriptEntry = serde_json::from_str(tool_result_json).unwrap();
782        let events = result_entry.into_events("machine-1", &mut tool_info_map);
783
784        assert_eq!(events.len(), 1);
785        let event = &events[0];
786
787        // tool_name should be set for all tools
788        assert_eq!(event.tool_name, Some("Read".to_string()));
789
790        // But payload and metadata should NOT be set for non-Bash tools
791        assert!(event.payload.is_none());
792        assert!(event.metadata.is_none());
793    }
794
795    #[test]
796    fn test_skip_non_message_entries() {
797        let json = r#"{
798            "sessionId": "test-session",
799            "uuid": "snapshot-1",
800            "isSidechain": false,
801            "timestamp": "2025-12-25T21:30:31.390Z",
802            "type": "file-history-snapshot"
803        }"#;
804
805        let entry: TranscriptEntry = serde_json::from_str(json).unwrap();
806        assert!(entry.should_skip());
807
808        let mut tool_info_map = ToolInfoMap::default();
809        let events = entry.into_events("machine-1", &mut tool_info_map);
810        assert!(events.is_empty());
811    }
812
813    #[test]
814    fn test_user_message_with_multibyte_utf8_truncation() {
815        // Create a prompt with exactly 999 ASCII chars followed by multi-byte UTF-8
816        // This tests the edge case where byte 1000 falls in the middle of a character
817        let long_prompt = format!("{}{}", "a".repeat(999), "é"); // 999 + 2 bytes = 1001 bytes
818
819        let json = format!(
820            r#"{{
821            "sessionId": "test-session",
822            "uuid": "entry-utf8",
823            "isSidechain": false,
824            "timestamp": "2025-12-25T21:30:31.390Z",
825            "type": "user",
826            "message": {{
827                "role": "user",
828                "content": [
829                    {{"type": "text", "text": "{}"}}
830                ]
831            }}
832        }}"#,
833            long_prompt
834        );
835
836        let entry: TranscriptEntry = serde_json::from_str(&json).unwrap();
837        // This should NOT panic even though byte 1000 is in the middle of "é"
838        let mut tool_info_map = ToolInfoMap::default();
839        let events = entry.into_events("machine-1", &mut tool_info_map);
840
841        assert_eq!(events.len(), 1);
842        assert_eq!(events[0].event_type, EventType::UserPromptSubmit);
843
844        // Verify payload contains truncated prompt ending at valid char boundary
845        let payload: serde_json::Value =
846            serde_json::from_str(events[0].payload.as_ref().unwrap()).unwrap();
847        let prompt = payload["prompt"].as_str().unwrap();
848        // Should be 999 'a's + "..." (truncated at byte 999 since byte 1000 is mid-character)
849        assert!(prompt.ends_with("..."));
850        assert_eq!(prompt.len(), 1002); // 999 + "..."
851    }
852}