Skip to main content

koda_core/
context_analysis.rs

1//! Context analysis — per-tool token breakdown and duplicate detection.
2//!
3//! Analyzes conversation history to identify where tokens are being spent.
4//! Used by compaction decisions, `/usage` reporting, and microcompact.
5//!
6//! ## What it reports
7//!
8//! - **Per-tool token counts** — how many tokens each tool's results consume
9//!   (Read, Grep, Bash, etc.)
10//! - **Duplicate file reads** — files read multiple times (wasted context)
11//! - **Human vs assistant split** — how much of the context is user messages
12//!   vs model responses
13//! - **Attachment sizes** — images and files pasted into the conversation
14//!
15//! ## How it's used
16//!
17//! - **Auto-compact**: triggers compaction when total tokens exceed threshold
18//! - **`/usage` command**: shows the token breakdown to the user
19//! - **Microcompact**: identifies old tool results safe to clear
20//!
21//! Inspired by Claude Code's `contextAnalysis.ts`.
22
23use std::collections::HashMap;
24use std::path::PathBuf;
25
26use crate::inference_helpers::{CHARS_PER_TOKEN, PER_MESSAGE_OVERHEAD};
27use crate::persistence::Message;
28
29/// Per-tool and per-role token breakdown of a conversation context.
30#[derive(Debug, Clone, Default)]
31pub struct ContextAnalysis {
32    /// Tokens in tool *request* blocks (assistant → tool_use), keyed by tool name.
33    pub tool_request_tokens: HashMap<String, usize>,
34    /// Tokens in tool *result* blocks (tool → result), keyed by tool name.
35    pub tool_result_tokens: HashMap<String, usize>,
36    /// Tokens in human/user messages (excluding tool results).
37    pub human_tokens: usize,
38    /// Tokens in assistant messages (excluding tool requests).
39    pub assistant_tokens: usize,
40    /// Files read more than once, with count and estimated wasted tokens.
41    pub duplicate_reads: HashMap<PathBuf, DuplicateRead>,
42    /// Total estimated tokens across all messages.
43    pub total: usize,
44}
45
46/// Info about a duplicated file read in context.
47#[derive(Debug, Clone)]
48pub struct DuplicateRead {
49    /// Number of times this file was read.
50    pub count: usize,
51    /// Estimated tokens wasted by redundant reads (all but the last).
52    pub wasted_tokens: usize,
53}
54
55impl ContextAnalysis {
56    /// Total tokens consumed by all tool results.
57    pub fn total_tool_result_tokens(&self) -> usize {
58        self.tool_result_tokens.values().sum()
59    }
60
61    /// Total tokens consumed by all tool requests.
62    pub fn total_tool_request_tokens(&self) -> usize {
63        self.tool_request_tokens.values().sum()
64    }
65
66    /// Total tokens wasted by duplicate file reads.
67    pub fn total_duplicate_waste(&self) -> usize {
68        self.duplicate_reads.values().map(|d| d.wasted_tokens).sum()
69    }
70
71    /// Percentage of total context consumed by tool results.
72    pub fn tool_result_percent(&self) -> usize {
73        if self.total == 0 {
74            return 0;
75        }
76        (self.total_tool_result_tokens() * 100) / self.total
77    }
78
79    /// Percentage of total context consumed by duplicate reads.
80    pub fn duplicate_read_percent(&self) -> usize {
81        if self.total == 0 {
82            return 0;
83        }
84        (self.total_duplicate_waste() * 100) / self.total
85    }
86
87    /// Top N tools by result token consumption, descending.
88    pub fn top_tool_results(&self, n: usize) -> Vec<(&str, usize)> {
89        let mut sorted: Vec<_> = self
90            .tool_result_tokens
91            .iter()
92            .map(|(k, v)| (k.as_str(), *v))
93            .collect();
94        sorted.sort_by(|a, b| b.1.cmp(&a.1));
95        sorted.truncate(n);
96        sorted
97    }
98
99    /// Format a human-readable summary for `/usage` or context warnings.
100    pub fn summary(&self) -> String {
101        let mut lines = Vec::new();
102        lines.push(format!("Context: ~{} tokens", self.total));
103        lines.push(format!(
104            "  Human: {} | Assistant: {} | Tool results: {} ({}%)",
105            self.human_tokens,
106            self.assistant_tokens,
107            self.total_tool_result_tokens(),
108            self.tool_result_percent(),
109        ));
110
111        let top = self.top_tool_results(5);
112        if !top.is_empty() {
113            lines.push("  Top tool results:".to_string());
114            for (name, tokens) in &top {
115                let pct = if self.total > 0 {
116                    (*tokens * 100) / self.total
117                } else {
118                    0
119                };
120                lines.push(format!("    {name}: ~{tokens} tokens ({pct}%)"));
121            }
122        }
123
124        let waste = self.total_duplicate_waste();
125        if waste > 0 {
126            lines.push(format!(
127                "  Duplicate reads: ~{waste} wasted tokens ({}%) across {} files",
128                self.duplicate_read_percent(),
129                self.duplicate_reads.len(),
130            ));
131        }
132
133        lines.join("\n")
134    }
135}
136
137/// Analyze conversation history and produce a token breakdown.
138///
139/// Walks the message list, classifying each message by role and extracting
140/// tool names from `tool_calls` JSON and `tool_call_id` linkage.
141pub fn analyze_context(messages: &[Message]) -> ContextAnalysis {
142    let mut analysis = ContextAnalysis::default();
143
144    // Phase 1: Build a map from tool_call_id → tool_name by scanning
145    // assistant messages with tool_calls JSON.
146    let mut id_to_tool: HashMap<String, String> = HashMap::new();
147    // Track Read tool: tool_call_id → file_path (extracted from args).
148    let mut read_tool_paths: HashMap<String, PathBuf> = HashMap::new();
149
150    for msg in messages {
151        if msg.role == crate::persistence::Role::Assistant
152            && let Some(ref tc_json) = msg.tool_calls
153        {
154            extract_tool_call_ids(tc_json, &mut id_to_tool, &mut read_tool_paths);
155        }
156    }
157
158    // Phase 2: Classify each message and accumulate tokens.
159    // Also track per-file read stats for duplicate detection.
160    let mut file_read_stats: HashMap<PathBuf, FileReadAccum> = HashMap::new();
161
162    for msg in messages {
163        let tokens = estimate_message_tokens(msg);
164        analysis.total += tokens;
165
166        match msg.role {
167            crate::persistence::Role::User => {
168                analysis.human_tokens += tokens;
169            }
170            crate::persistence::Role::Assistant => {
171                if let Some(ref tc_json) = msg.tool_calls {
172                    // Split tokens between text content and tool requests.
173                    let text_tokens = msg.content.as_deref().map_or(0, estimate_str_tokens);
174                    let tool_tokens = tokens.saturating_sub(text_tokens);
175                    analysis.assistant_tokens += text_tokens;
176
177                    // Attribute tool request tokens to each tool name.
178                    distribute_tool_request_tokens(
179                        tc_json,
180                        tool_tokens,
181                        &mut analysis.tool_request_tokens,
182                    );
183                } else {
184                    analysis.assistant_tokens += tokens;
185                }
186            }
187            crate::persistence::Role::Tool => {
188                // Look up which tool produced this result.
189                let tool_name = msg
190                    .tool_call_id
191                    .as_deref()
192                    .and_then(|id| id_to_tool.get(id))
193                    .cloned()
194                    .unwrap_or_else(|| "unknown".to_string());
195
196                *analysis
197                    .tool_result_tokens
198                    .entry(tool_name.clone())
199                    .or_default() += tokens;
200
201                // Track file read stats for duplicate detection.
202                if (tool_name == "Read" || tool_name == "read")
203                    && let Some(path) = msg
204                        .tool_call_id
205                        .as_deref()
206                        .and_then(|id| read_tool_paths.get(id))
207                {
208                    let entry =
209                        file_read_stats
210                            .entry(path.clone())
211                            .or_insert_with(|| FileReadAccum {
212                                count: 0,
213                                total_tokens: 0,
214                            });
215                    entry.count += 1;
216                    entry.total_tokens += tokens;
217                }
218            }
219            crate::persistence::Role::System => {
220                // System prompt — count toward total but not any category.
221            }
222        }
223    }
224
225    // Phase 3: Compute duplicate read waste.
226    for (path, accum) in file_read_stats {
227        if accum.count > 1 {
228            let avg_tokens = accum.total_tokens / accum.count;
229            let wasted = avg_tokens * (accum.count - 1);
230            analysis.duplicate_reads.insert(
231                path,
232                DuplicateRead {
233                    count: accum.count,
234                    wasted_tokens: wasted,
235                },
236            );
237        }
238    }
239
240    analysis
241}
242
243// ---------------------------------------------------------------------------
244// Internal helpers
245// ---------------------------------------------------------------------------
246
247/// Accumulator for per-file read token stats.
248struct FileReadAccum {
249    count: usize,
250    total_tokens: usize,
251}
252
253/// Estimate tokens for a single DB message row.
254fn estimate_message_tokens(msg: &Message) -> usize {
255    let content_len = msg.content.as_deref().map_or(0, |c| c.len());
256    let tc_len = msg.tool_calls.as_deref().map_or(0, |c| c.len());
257    ((content_len + tc_len) as f64 / CHARS_PER_TOKEN) as usize + PER_MESSAGE_OVERHEAD
258}
259
260/// Estimate tokens for a raw string.
261fn estimate_str_tokens(s: &str) -> usize {
262    (s.len() as f64 / CHARS_PER_TOKEN) as usize
263}
264
265/// Parse tool_calls JSON and populate id→name + read-path maps.
266fn extract_tool_call_ids(
267    tc_json: &str,
268    id_to_tool: &mut HashMap<String, String>,
269    read_paths: &mut HashMap<String, PathBuf>,
270) {
271    let calls: Vec<serde_json::Value> = match serde_json::from_str(tc_json) {
272        Ok(v) => v,
273        Err(_) => return,
274    };
275    for call in &calls {
276        let id = call.get("id").and_then(|v| v.as_str()).unwrap_or_default();
277        let name = call
278            .get("function_name")
279            .or_else(|| call.get("name"))
280            .and_then(|v| v.as_str())
281            .unwrap_or("unknown");
282
283        if !id.is_empty() {
284            id_to_tool.insert(id.to_string(), name.to_string());
285        }
286
287        // Extract file path for Read tool calls.
288        if (name == "Read" || name == "read")
289            && let Some(args) = call.get("arguments")
290        {
291            // `arguments` may be a JSON string or an object.
292            let args_obj: Option<serde_json::Value> = if let Some(s) = args.as_str() {
293                serde_json::from_str(s).ok()
294            } else {
295                Some(args.clone())
296            };
297            if let Some(obj) = args_obj
298                && let Some(path) = obj
299                    .get("file_path")
300                    .or_else(|| obj.get("path"))
301                    .and_then(|v| v.as_str())
302            {
303                read_paths.insert(id.to_string(), PathBuf::from(path));
304            }
305        }
306    }
307}
308
309/// Distribute tool_tokens proportionally across the tool calls in a JSON array.
310fn distribute_tool_request_tokens(
311    tc_json: &str,
312    total_tool_tokens: usize,
313    request_map: &mut HashMap<String, usize>,
314) {
315    let calls: Vec<serde_json::Value> = match serde_json::from_str(tc_json) {
316        Ok(v) => v,
317        Err(_) => return,
318    };
319    if calls.is_empty() {
320        return;
321    }
322    let per_call = total_tool_tokens / calls.len();
323    for call in &calls {
324        let name = call
325            .get("function_name")
326            .or_else(|| call.get("name"))
327            .and_then(|v| v.as_str())
328            .unwrap_or("unknown");
329        *request_map.entry(name.to_string()).or_default() += per_call;
330    }
331}
332
333#[cfg(test)]
334mod tests {
335    use super::*;
336    use crate::persistence::{Message, Role};
337
338    fn msg(
339        role: Role,
340        content: Option<&str>,
341        tool_calls: Option<&str>,
342        tool_call_id: Option<&str>,
343    ) -> Message {
344        Message {
345            id: 0,
346            session_id: String::new(),
347            role,
348            content: content.map(String::from),
349            full_content: None,
350            tool_calls: tool_calls.map(String::from),
351            tool_call_id: tool_call_id.map(String::from),
352            prompt_tokens: None,
353            completion_tokens: None,
354            cache_read_tokens: None,
355            cache_creation_tokens: None,
356            thinking_tokens: None,
357            thinking_content: None,
358            created_at: None,
359        }
360    }
361
362    #[test]
363    fn test_empty_history() {
364        let analysis = analyze_context(&[]);
365        assert_eq!(analysis.total, 0);
366        assert_eq!(analysis.human_tokens, 0);
367        assert_eq!(analysis.assistant_tokens, 0);
368        assert!(analysis.tool_result_tokens.is_empty());
369        assert!(analysis.duplicate_reads.is_empty());
370    }
371
372    #[test]
373    fn test_simple_conversation() {
374        let messages = vec![
375            msg(Role::User, Some("Hello world"), None, None),
376            msg(Role::Assistant, Some("Hi there!"), None, None),
377        ];
378        let analysis = analyze_context(&messages);
379        assert!(analysis.total > 0);
380        assert!(analysis.human_tokens > 0);
381        assert!(analysis.assistant_tokens > 0);
382        assert_eq!(analysis.total_tool_result_tokens(), 0);
383    }
384
385    #[test]
386    fn test_tool_call_attribution() {
387        let tc_json =
388            r#"[{"id":"tc_1","function_name":"Read","arguments":"{\"file_path\":\"foo.rs\"}"}]"#;
389        let messages = vec![
390            msg(Role::User, Some("Read foo.rs"), None, None),
391            msg(Role::Assistant, None, Some(tc_json), None),
392            msg(
393                Role::Tool,
394                Some("contents of foo.rs which is a pretty long file with lots of code"),
395                None,
396                Some("tc_1"),
397            ),
398        ];
399        let analysis = analyze_context(&messages);
400        assert!(analysis.tool_result_tokens.contains_key("Read"));
401        assert!(*analysis.tool_result_tokens.get("Read").unwrap() > 0);
402    }
403
404    #[test]
405    fn test_duplicate_read_detection() {
406        let tc1 =
407            r#"[{"id":"tc_1","function_name":"Read","arguments":"{\"file_path\":\"foo.rs\"}"}]"#;
408        let tc2 =
409            r#"[{"id":"tc_2","function_name":"Read","arguments":"{\"file_path\":\"foo.rs\"}"}]"#;
410        let tc3 =
411            r#"[{"id":"tc_3","function_name":"Read","arguments":"{\"file_path\":\"bar.rs\"}"}]"#;
412
413        let messages = vec![
414            msg(Role::User, Some("Read foo.rs"), None, None),
415            msg(Role::Assistant, None, Some(tc1), None),
416            msg(Role::Tool, Some("contents of foo"), None, Some("tc_1")),
417            msg(Role::User, Some("Read it again"), None, None),
418            msg(Role::Assistant, None, Some(tc2), None),
419            msg(Role::Tool, Some("contents of foo"), None, Some("tc_2")),
420            msg(Role::User, Some("Read bar.rs"), None, None),
421            msg(Role::Assistant, None, Some(tc3), None),
422            msg(Role::Tool, Some("contents of bar"), None, Some("tc_3")),
423        ];
424
425        let analysis = analyze_context(&messages);
426
427        // foo.rs was read twice → should appear in duplicate_reads
428        let foo_path = PathBuf::from("foo.rs");
429        assert!(analysis.duplicate_reads.contains_key(&foo_path));
430        assert_eq!(analysis.duplicate_reads[&foo_path].count, 2);
431        assert!(analysis.duplicate_reads[&foo_path].wasted_tokens > 0);
432
433        // bar.rs was read once → should NOT appear
434        let bar_path = PathBuf::from("bar.rs");
435        assert!(!analysis.duplicate_reads.contains_key(&bar_path));
436    }
437
438    #[test]
439    fn test_top_tool_results() {
440        let tc1 = r#"[{"id":"tc_1","function_name":"Read","arguments":"{}"}]"#;
441        let tc2 = r#"[{"id":"tc_2","function_name":"Bash","arguments":"{}"}]"#;
442
443        let long_content = "x".repeat(1000);
444        let short_content = "y".repeat(100);
445
446        let messages = vec![
447            msg(Role::Assistant, None, Some(tc1), None),
448            msg(Role::Tool, Some(&long_content), None, Some("tc_1")),
449            msg(Role::Assistant, None, Some(tc2), None),
450            msg(Role::Tool, Some(&short_content), None, Some("tc_2")),
451        ];
452
453        let analysis = analyze_context(&messages);
454        let top = analysis.top_tool_results(5);
455        assert!(!top.is_empty());
456        // Read should be first (more tokens)
457        assert_eq!(top[0].0, "Read");
458    }
459
460    #[test]
461    fn test_summary_format() {
462        let tc1 = r#"[{"id":"tc_1","function_name":"Read","arguments":"{}"}]"#;
463        let messages = vec![
464            msg(Role::User, Some("hello"), None, None),
465            msg(Role::Assistant, Some("let me read"), Some(tc1), None),
466            msg(Role::Tool, Some("file contents here"), None, Some("tc_1")),
467        ];
468        let analysis = analyze_context(&messages);
469        let summary = analysis.summary();
470        assert!(summary.contains("Context:"));
471        assert!(summary.contains("Human:"));
472        assert!(summary.contains("Tool results:"));
473    }
474
475    #[test]
476    fn test_multiple_tool_calls_in_one_message() {
477        let tc = r#"[
478            {"id":"tc_1","function_name":"Read","arguments":"{}"},
479            {"id":"tc_2","function_name":"Grep","arguments":"{}"}
480        ]"#;
481        let messages = vec![
482            msg(Role::Assistant, None, Some(tc), None),
483            msg(Role::Tool, Some("read result"), None, Some("tc_1")),
484            msg(Role::Tool, Some("grep result"), None, Some("tc_2")),
485        ];
486        let analysis = analyze_context(&messages);
487        assert!(analysis.tool_result_tokens.contains_key("Read"));
488        assert!(analysis.tool_result_tokens.contains_key("Grep"));
489    }
490
491    #[test]
492    fn test_total_tool_request_tokens_counted() {
493        // A tool_calls JSON in an assistant message should contribute to
494        // tool_request_tokens, not tool_result_tokens.
495        let tc =
496            r#"[{"id":"tc_1","function_name":"Read","arguments":"{\"file_path\":\"big.rs\"}"}]"#;
497        let messages = vec![
498            msg(Role::Assistant, None, Some(tc), None),
499            msg(Role::Tool, Some("result"), None, Some("tc_1")),
500        ];
501        let analysis = analyze_context(&messages);
502        assert!(
503            analysis.total_tool_request_tokens() > 0,
504            "tool request tokens should be counted"
505        );
506    }
507
508    #[test]
509    fn test_tool_result_percent_calculation() {
510        let tc = r#"[{"id":"tc_1","function_name":"Read","arguments":"{}"}]"#;
511        // Use a large result so it registers as a meaningful percentage.
512        let big_result = "x".repeat(500);
513        let messages = vec![
514            msg(Role::User, Some("hello"), None, None),
515            msg(Role::Assistant, None, Some(tc), None),
516            msg(Role::Tool, Some(&big_result), None, Some("tc_1")),
517        ];
518        let analysis = analyze_context(&messages);
519        let pct = analysis.tool_result_percent();
520        assert!(pct > 0 && pct <= 100, "percent should be 1-100, got {pct}");
521        // Tool result should be the dominant consumer in this exchange.
522        assert!(
523            pct > analysis.human_tokens * 100 / analysis.total,
524            "tool result percent should exceed human percent for large results"
525        );
526    }
527
528    #[test]
529    fn test_tool_result_percent_zero_when_no_context() {
530        let analysis = analyze_context(&[]);
531        assert_eq!(analysis.tool_result_percent(), 0);
532        assert_eq!(analysis.duplicate_read_percent(), 0);
533    }
534
535    #[test]
536    fn test_total_duplicate_waste_sums_correctly() {
537        let tc1 =
538            r#"[{"id":"tc_1","function_name":"Read","arguments":"{\"file_path\":\"f.rs\"}"}]"#;
539        let tc2 =
540            r#"[{"id":"tc_2","function_name":"Read","arguments":"{\"file_path\":\"f.rs\"}"}]"#;
541        let content = "y".repeat(200);
542        let messages = vec![
543            msg(Role::Assistant, None, Some(tc1), None),
544            msg(Role::Tool, Some(&content), None, Some("tc_1")),
545            msg(Role::Assistant, None, Some(tc2), None),
546            msg(Role::Tool, Some(&content), None, Some("tc_2")),
547        ];
548        let analysis = analyze_context(&messages);
549        assert!(
550            analysis.total_duplicate_waste() > 0,
551            "duplicate read of f.rs should produce non-zero waste"
552        );
553        // waste should equal result of the second (redundant) read
554        assert_eq!(
555            analysis.total_duplicate_waste(),
556            analysis
557                .duplicate_reads
558                .values()
559                .map(|d| d.wasted_tokens)
560                .sum::<usize>()
561        );
562    }
563
564    #[test]
565    fn test_duplicate_read_percent_nonzero() {
566        let tc1 =
567            r#"[{"id":"tc_1","function_name":"Read","arguments":"{\"file_path\":\"g.rs\"}"}]"#;
568        let tc2 =
569            r#"[{"id":"tc_2","function_name":"Read","arguments":"{\"file_path\":\"g.rs\"}"}]"#;
570        let content = "z".repeat(400);
571        let messages = vec![
572            msg(Role::Assistant, None, Some(tc1), None),
573            msg(Role::Tool, Some(&content), None, Some("tc_1")),
574            msg(Role::Assistant, None, Some(tc2), None),
575            msg(Role::Tool, Some(&content), None, Some("tc_2")),
576        ];
577        let analysis = analyze_context(&messages);
578        assert!(
579            analysis.duplicate_read_percent() > 0,
580            "duplicate reads should produce non-zero percent"
581        );
582    }
583
584    #[test]
585    fn test_top_tool_results_empty_when_n_zero() {
586        let tc = r#"[{"id":"tc_1","function_name":"Read","arguments":"{}"}]"#;
587        let messages = vec![
588            msg(Role::Assistant, None, Some(tc), None),
589            msg(Role::Tool, Some("stuff"), None, Some("tc_1")),
590        ];
591        let analysis = analyze_context(&messages);
592        assert!(analysis.top_tool_results(0).is_empty());
593    }
594
595    #[test]
596    fn test_top_tool_results_sorted_descending() {
597        let tc1 = r#"[{"id":"tc_1","function_name":"Bash","arguments":"{}"}]"#;
598        let tc2 = r#"[{"id":"tc_2","function_name":"Read","arguments":"{}"}]"#;
599        let tc3 = r#"[{"id":"tc_3","function_name":"Grep","arguments":"{}"}]"#;
600        let messages = vec![
601            msg(Role::Assistant, None, Some(tc1), None),
602            msg(Role::Tool, Some(&"a".repeat(100)), None, Some("tc_1")), // small
603            msg(Role::Assistant, None, Some(tc2), None),
604            msg(Role::Tool, Some(&"b".repeat(2000)), None, Some("tc_2")), // largest
605            msg(Role::Assistant, None, Some(tc3), None),
606            msg(Role::Tool, Some(&"c".repeat(500)), None, Some("tc_3")), // medium
607        ];
608        let analysis = analyze_context(&messages);
609        let top = analysis.top_tool_results(3);
610        assert_eq!(top.len(), 3);
611        // Descending order: Read > Grep > Bash
612        assert_eq!(top[0].0, "Read");
613        assert_eq!(top[1].0, "Grep");
614        assert_eq!(top[2].0, "Bash");
615        // Each entry should be >= the next.
616        assert!(top[0].1 >= top[1].1);
617        assert!(top[1].1 >= top[2].1);
618    }
619
620    #[test]
621    fn test_system_tokens_counted_in_total() {
622        let big_system = "S".repeat(1000);
623        let messages = vec![msg(Role::System, Some(&big_system), None, None)];
624        let analysis = analyze_context(&messages);
625        assert!(
626            analysis.total > 0,
627            "system message should contribute to total token count"
628        );
629        assert_eq!(
630            analysis.human_tokens, 0,
631            "system tokens should not be counted as human"
632        );
633    }
634
635    #[test]
636    fn test_summary_with_no_tool_use() {
637        let messages = vec![
638            msg(Role::User, Some("hi"), None, None),
639            msg(Role::Assistant, Some("hello"), None, None),
640        ];
641        let summary = analyze_context(&messages).summary();
642        assert!(summary.contains("Context:"));
643        assert!(summary.contains("Human:"));
644        // No tool section when there are no tool results.
645        assert!(!summary.contains("Top tool results:"));
646        assert!(!summary.contains("Duplicate reads:"));
647    }
648
649    #[test]
650    fn test_summary_includes_duplicate_waste_line() {
651        let tc1 =
652            r#"[{"id":"tc_1","function_name":"Read","arguments":"{\"file_path\":\"h.rs\"}"}]"#;
653        let tc2 =
654            r#"[{"id":"tc_2","function_name":"Read","arguments":"{\"file_path\":\"h.rs\"}"}]"#;
655        let content = "D".repeat(500);
656        let messages = vec![
657            msg(Role::Assistant, None, Some(tc1), None),
658            msg(Role::Tool, Some(&content), None, Some("tc_1")),
659            msg(Role::Assistant, None, Some(tc2), None),
660            msg(Role::Tool, Some(&content), None, Some("tc_2")),
661        ];
662        let summary = analyze_context(&messages).summary();
663        assert!(
664            summary.contains("Duplicate reads:"),
665            "summary should mention duplicate reads when present"
666        );
667    }
668}