1use std::collections::HashMap;
24use std::path::PathBuf;
25
26use crate::inference_helpers::{CHARS_PER_TOKEN, PER_MESSAGE_OVERHEAD};
27use crate::persistence::Message;
28
29#[derive(Debug, Clone, Default)]
31pub struct ContextAnalysis {
32 pub tool_request_tokens: HashMap<String, usize>,
34 pub tool_result_tokens: HashMap<String, usize>,
36 pub human_tokens: usize,
38 pub assistant_tokens: usize,
40 pub duplicate_reads: HashMap<PathBuf, DuplicateRead>,
42 pub total: usize,
44}
45
46#[derive(Debug, Clone)]
48pub struct DuplicateRead {
49 pub count: usize,
51 pub wasted_tokens: usize,
53}
54
55impl ContextAnalysis {
56 pub fn total_tool_result_tokens(&self) -> usize {
58 self.tool_result_tokens.values().sum()
59 }
60
61 pub fn total_tool_request_tokens(&self) -> usize {
63 self.tool_request_tokens.values().sum()
64 }
65
66 pub fn total_duplicate_waste(&self) -> usize {
68 self.duplicate_reads.values().map(|d| d.wasted_tokens).sum()
69 }
70
71 pub fn tool_result_percent(&self) -> usize {
73 if self.total == 0 {
74 return 0;
75 }
76 (self.total_tool_result_tokens() * 100) / self.total
77 }
78
79 pub fn duplicate_read_percent(&self) -> usize {
81 if self.total == 0 {
82 return 0;
83 }
84 (self.total_duplicate_waste() * 100) / self.total
85 }
86
87 pub fn top_tool_results(&self, n: usize) -> Vec<(&str, usize)> {
89 let mut sorted: Vec<_> = self
90 .tool_result_tokens
91 .iter()
92 .map(|(k, v)| (k.as_str(), *v))
93 .collect();
94 sorted.sort_by(|a, b| b.1.cmp(&a.1));
95 sorted.truncate(n);
96 sorted
97 }
98
99 pub fn summary(&self) -> String {
101 let mut lines = Vec::new();
102 lines.push(format!("Context: ~{} tokens", self.total));
103 lines.push(format!(
104 " Human: {} | Assistant: {} | Tool results: {} ({}%)",
105 self.human_tokens,
106 self.assistant_tokens,
107 self.total_tool_result_tokens(),
108 self.tool_result_percent(),
109 ));
110
111 let top = self.top_tool_results(5);
112 if !top.is_empty() {
113 lines.push(" Top tool results:".to_string());
114 for (name, tokens) in &top {
115 let pct = if self.total > 0 {
116 (*tokens * 100) / self.total
117 } else {
118 0
119 };
120 lines.push(format!(" {name}: ~{tokens} tokens ({pct}%)"));
121 }
122 }
123
124 let waste = self.total_duplicate_waste();
125 if waste > 0 {
126 lines.push(format!(
127 " Duplicate reads: ~{waste} wasted tokens ({}%) across {} files",
128 self.duplicate_read_percent(),
129 self.duplicate_reads.len(),
130 ));
131 }
132
133 lines.join("\n")
134 }
135}
136
137pub fn analyze_context(messages: &[Message]) -> ContextAnalysis {
142 let mut analysis = ContextAnalysis::default();
143
144 let mut id_to_tool: HashMap<String, String> = HashMap::new();
147 let mut read_tool_paths: HashMap<String, PathBuf> = HashMap::new();
149
150 for msg in messages {
151 if msg.role == crate::persistence::Role::Assistant
152 && let Some(ref tc_json) = msg.tool_calls
153 {
154 extract_tool_call_ids(tc_json, &mut id_to_tool, &mut read_tool_paths);
155 }
156 }
157
158 let mut file_read_stats: HashMap<PathBuf, FileReadAccum> = HashMap::new();
161
162 for msg in messages {
163 let tokens = estimate_message_tokens(msg);
164 analysis.total += tokens;
165
166 match msg.role {
167 crate::persistence::Role::User => {
168 analysis.human_tokens += tokens;
169 }
170 crate::persistence::Role::Assistant => {
171 if let Some(ref tc_json) = msg.tool_calls {
172 let text_tokens = msg.content.as_deref().map_or(0, estimate_str_tokens);
174 let tool_tokens = tokens.saturating_sub(text_tokens);
175 analysis.assistant_tokens += text_tokens;
176
177 distribute_tool_request_tokens(
179 tc_json,
180 tool_tokens,
181 &mut analysis.tool_request_tokens,
182 );
183 } else {
184 analysis.assistant_tokens += tokens;
185 }
186 }
187 crate::persistence::Role::Tool => {
188 let tool_name = msg
190 .tool_call_id
191 .as_deref()
192 .and_then(|id| id_to_tool.get(id))
193 .cloned()
194 .unwrap_or_else(|| "unknown".to_string());
195
196 *analysis
197 .tool_result_tokens
198 .entry(tool_name.clone())
199 .or_default() += tokens;
200
201 if (tool_name == "Read" || tool_name == "read")
203 && let Some(path) = msg
204 .tool_call_id
205 .as_deref()
206 .and_then(|id| read_tool_paths.get(id))
207 {
208 let entry =
209 file_read_stats
210 .entry(path.clone())
211 .or_insert_with(|| FileReadAccum {
212 count: 0,
213 total_tokens: 0,
214 });
215 entry.count += 1;
216 entry.total_tokens += tokens;
217 }
218 }
219 crate::persistence::Role::System => {
220 }
222 }
223 }
224
225 for (path, accum) in file_read_stats {
227 if accum.count > 1 {
228 let avg_tokens = accum.total_tokens / accum.count;
229 let wasted = avg_tokens * (accum.count - 1);
230 analysis.duplicate_reads.insert(
231 path,
232 DuplicateRead {
233 count: accum.count,
234 wasted_tokens: wasted,
235 },
236 );
237 }
238 }
239
240 analysis
241}
242
243struct FileReadAccum {
249 count: usize,
250 total_tokens: usize,
251}
252
253fn estimate_message_tokens(msg: &Message) -> usize {
255 let content_len = msg.content.as_deref().map_or(0, |c| c.len());
256 let tc_len = msg.tool_calls.as_deref().map_or(0, |c| c.len());
257 ((content_len + tc_len) as f64 / CHARS_PER_TOKEN) as usize + PER_MESSAGE_OVERHEAD
258}
259
260fn estimate_str_tokens(s: &str) -> usize {
262 (s.len() as f64 / CHARS_PER_TOKEN) as usize
263}
264
265fn extract_tool_call_ids(
267 tc_json: &str,
268 id_to_tool: &mut HashMap<String, String>,
269 read_paths: &mut HashMap<String, PathBuf>,
270) {
271 let calls: Vec<serde_json::Value> = match serde_json::from_str(tc_json) {
272 Ok(v) => v,
273 Err(_) => return,
274 };
275 for call in &calls {
276 let id = call.get("id").and_then(|v| v.as_str()).unwrap_or_default();
277 let name = call
278 .get("function_name")
279 .or_else(|| call.get("name"))
280 .and_then(|v| v.as_str())
281 .unwrap_or("unknown");
282
283 if !id.is_empty() {
284 id_to_tool.insert(id.to_string(), name.to_string());
285 }
286
287 if (name == "Read" || name == "read")
289 && let Some(args) = call.get("arguments")
290 {
291 let args_obj: Option<serde_json::Value> = if let Some(s) = args.as_str() {
293 serde_json::from_str(s).ok()
294 } else {
295 Some(args.clone())
296 };
297 if let Some(obj) = args_obj
298 && let Some(path) = obj
299 .get("file_path")
300 .or_else(|| obj.get("path"))
301 .and_then(|v| v.as_str())
302 {
303 read_paths.insert(id.to_string(), PathBuf::from(path));
304 }
305 }
306 }
307}
308
309fn distribute_tool_request_tokens(
311 tc_json: &str,
312 total_tool_tokens: usize,
313 request_map: &mut HashMap<String, usize>,
314) {
315 let calls: Vec<serde_json::Value> = match serde_json::from_str(tc_json) {
316 Ok(v) => v,
317 Err(_) => return,
318 };
319 if calls.is_empty() {
320 return;
321 }
322 let per_call = total_tool_tokens / calls.len();
323 for call in &calls {
324 let name = call
325 .get("function_name")
326 .or_else(|| call.get("name"))
327 .and_then(|v| v.as_str())
328 .unwrap_or("unknown");
329 *request_map.entry(name.to_string()).or_default() += per_call;
330 }
331}
332
333#[cfg(test)]
334mod tests {
335 use super::*;
336 use crate::persistence::{Message, Role};
337
338 fn msg(
339 role: Role,
340 content: Option<&str>,
341 tool_calls: Option<&str>,
342 tool_call_id: Option<&str>,
343 ) -> Message {
344 Message {
345 id: 0,
346 session_id: String::new(),
347 role,
348 content: content.map(String::from),
349 full_content: None,
350 tool_calls: tool_calls.map(String::from),
351 tool_call_id: tool_call_id.map(String::from),
352 prompt_tokens: None,
353 completion_tokens: None,
354 cache_read_tokens: None,
355 cache_creation_tokens: None,
356 thinking_tokens: None,
357 thinking_content: None,
358 created_at: None,
359 }
360 }
361
362 #[test]
363 fn test_empty_history() {
364 let analysis = analyze_context(&[]);
365 assert_eq!(analysis.total, 0);
366 assert_eq!(analysis.human_tokens, 0);
367 assert_eq!(analysis.assistant_tokens, 0);
368 assert!(analysis.tool_result_tokens.is_empty());
369 assert!(analysis.duplicate_reads.is_empty());
370 }
371
372 #[test]
373 fn test_simple_conversation() {
374 let messages = vec![
375 msg(Role::User, Some("Hello world"), None, None),
376 msg(Role::Assistant, Some("Hi there!"), None, None),
377 ];
378 let analysis = analyze_context(&messages);
379 assert!(analysis.total > 0);
380 assert!(analysis.human_tokens > 0);
381 assert!(analysis.assistant_tokens > 0);
382 assert_eq!(analysis.total_tool_result_tokens(), 0);
383 }
384
385 #[test]
386 fn test_tool_call_attribution() {
387 let tc_json =
388 r#"[{"id":"tc_1","function_name":"Read","arguments":"{\"file_path\":\"foo.rs\"}"}]"#;
389 let messages = vec![
390 msg(Role::User, Some("Read foo.rs"), None, None),
391 msg(Role::Assistant, None, Some(tc_json), None),
392 msg(
393 Role::Tool,
394 Some("contents of foo.rs which is a pretty long file with lots of code"),
395 None,
396 Some("tc_1"),
397 ),
398 ];
399 let analysis = analyze_context(&messages);
400 assert!(analysis.tool_result_tokens.contains_key("Read"));
401 assert!(*analysis.tool_result_tokens.get("Read").unwrap() > 0);
402 }
403
404 #[test]
405 fn test_duplicate_read_detection() {
406 let tc1 =
407 r#"[{"id":"tc_1","function_name":"Read","arguments":"{\"file_path\":\"foo.rs\"}"}]"#;
408 let tc2 =
409 r#"[{"id":"tc_2","function_name":"Read","arguments":"{\"file_path\":\"foo.rs\"}"}]"#;
410 let tc3 =
411 r#"[{"id":"tc_3","function_name":"Read","arguments":"{\"file_path\":\"bar.rs\"}"}]"#;
412
413 let messages = vec![
414 msg(Role::User, Some("Read foo.rs"), None, None),
415 msg(Role::Assistant, None, Some(tc1), None),
416 msg(Role::Tool, Some("contents of foo"), None, Some("tc_1")),
417 msg(Role::User, Some("Read it again"), None, None),
418 msg(Role::Assistant, None, Some(tc2), None),
419 msg(Role::Tool, Some("contents of foo"), None, Some("tc_2")),
420 msg(Role::User, Some("Read bar.rs"), None, None),
421 msg(Role::Assistant, None, Some(tc3), None),
422 msg(Role::Tool, Some("contents of bar"), None, Some("tc_3")),
423 ];
424
425 let analysis = analyze_context(&messages);
426
427 let foo_path = PathBuf::from("foo.rs");
429 assert!(analysis.duplicate_reads.contains_key(&foo_path));
430 assert_eq!(analysis.duplicate_reads[&foo_path].count, 2);
431 assert!(analysis.duplicate_reads[&foo_path].wasted_tokens > 0);
432
433 let bar_path = PathBuf::from("bar.rs");
435 assert!(!analysis.duplicate_reads.contains_key(&bar_path));
436 }
437
438 #[test]
439 fn test_top_tool_results() {
440 let tc1 = r#"[{"id":"tc_1","function_name":"Read","arguments":"{}"}]"#;
441 let tc2 = r#"[{"id":"tc_2","function_name":"Bash","arguments":"{}"}]"#;
442
443 let long_content = "x".repeat(1000);
444 let short_content = "y".repeat(100);
445
446 let messages = vec![
447 msg(Role::Assistant, None, Some(tc1), None),
448 msg(Role::Tool, Some(&long_content), None, Some("tc_1")),
449 msg(Role::Assistant, None, Some(tc2), None),
450 msg(Role::Tool, Some(&short_content), None, Some("tc_2")),
451 ];
452
453 let analysis = analyze_context(&messages);
454 let top = analysis.top_tool_results(5);
455 assert!(!top.is_empty());
456 assert_eq!(top[0].0, "Read");
458 }
459
460 #[test]
461 fn test_summary_format() {
462 let tc1 = r#"[{"id":"tc_1","function_name":"Read","arguments":"{}"}]"#;
463 let messages = vec![
464 msg(Role::User, Some("hello"), None, None),
465 msg(Role::Assistant, Some("let me read"), Some(tc1), None),
466 msg(Role::Tool, Some("file contents here"), None, Some("tc_1")),
467 ];
468 let analysis = analyze_context(&messages);
469 let summary = analysis.summary();
470 assert!(summary.contains("Context:"));
471 assert!(summary.contains("Human:"));
472 assert!(summary.contains("Tool results:"));
473 }
474
475 #[test]
476 fn test_multiple_tool_calls_in_one_message() {
477 let tc = r#"[
478 {"id":"tc_1","function_name":"Read","arguments":"{}"},
479 {"id":"tc_2","function_name":"Grep","arguments":"{}"}
480 ]"#;
481 let messages = vec![
482 msg(Role::Assistant, None, Some(tc), None),
483 msg(Role::Tool, Some("read result"), None, Some("tc_1")),
484 msg(Role::Tool, Some("grep result"), None, Some("tc_2")),
485 ];
486 let analysis = analyze_context(&messages);
487 assert!(analysis.tool_result_tokens.contains_key("Read"));
488 assert!(analysis.tool_result_tokens.contains_key("Grep"));
489 }
490
491 #[test]
492 fn test_total_tool_request_tokens_counted() {
493 let tc =
496 r#"[{"id":"tc_1","function_name":"Read","arguments":"{\"file_path\":\"big.rs\"}"}]"#;
497 let messages = vec![
498 msg(Role::Assistant, None, Some(tc), None),
499 msg(Role::Tool, Some("result"), None, Some("tc_1")),
500 ];
501 let analysis = analyze_context(&messages);
502 assert!(
503 analysis.total_tool_request_tokens() > 0,
504 "tool request tokens should be counted"
505 );
506 }
507
508 #[test]
509 fn test_tool_result_percent_calculation() {
510 let tc = r#"[{"id":"tc_1","function_name":"Read","arguments":"{}"}]"#;
511 let big_result = "x".repeat(500);
513 let messages = vec![
514 msg(Role::User, Some("hello"), None, None),
515 msg(Role::Assistant, None, Some(tc), None),
516 msg(Role::Tool, Some(&big_result), None, Some("tc_1")),
517 ];
518 let analysis = analyze_context(&messages);
519 let pct = analysis.tool_result_percent();
520 assert!(pct > 0 && pct <= 100, "percent should be 1-100, got {pct}");
521 assert!(
523 pct > analysis.human_tokens * 100 / analysis.total,
524 "tool result percent should exceed human percent for large results"
525 );
526 }
527
528 #[test]
529 fn test_tool_result_percent_zero_when_no_context() {
530 let analysis = analyze_context(&[]);
531 assert_eq!(analysis.tool_result_percent(), 0);
532 assert_eq!(analysis.duplicate_read_percent(), 0);
533 }
534
535 #[test]
536 fn test_total_duplicate_waste_sums_correctly() {
537 let tc1 =
538 r#"[{"id":"tc_1","function_name":"Read","arguments":"{\"file_path\":\"f.rs\"}"}]"#;
539 let tc2 =
540 r#"[{"id":"tc_2","function_name":"Read","arguments":"{\"file_path\":\"f.rs\"}"}]"#;
541 let content = "y".repeat(200);
542 let messages = vec![
543 msg(Role::Assistant, None, Some(tc1), None),
544 msg(Role::Tool, Some(&content), None, Some("tc_1")),
545 msg(Role::Assistant, None, Some(tc2), None),
546 msg(Role::Tool, Some(&content), None, Some("tc_2")),
547 ];
548 let analysis = analyze_context(&messages);
549 assert!(
550 analysis.total_duplicate_waste() > 0,
551 "duplicate read of f.rs should produce non-zero waste"
552 );
553 assert_eq!(
555 analysis.total_duplicate_waste(),
556 analysis
557 .duplicate_reads
558 .values()
559 .map(|d| d.wasted_tokens)
560 .sum::<usize>()
561 );
562 }
563
564 #[test]
565 fn test_duplicate_read_percent_nonzero() {
566 let tc1 =
567 r#"[{"id":"tc_1","function_name":"Read","arguments":"{\"file_path\":\"g.rs\"}"}]"#;
568 let tc2 =
569 r#"[{"id":"tc_2","function_name":"Read","arguments":"{\"file_path\":\"g.rs\"}"}]"#;
570 let content = "z".repeat(400);
571 let messages = vec![
572 msg(Role::Assistant, None, Some(tc1), None),
573 msg(Role::Tool, Some(&content), None, Some("tc_1")),
574 msg(Role::Assistant, None, Some(tc2), None),
575 msg(Role::Tool, Some(&content), None, Some("tc_2")),
576 ];
577 let analysis = analyze_context(&messages);
578 assert!(
579 analysis.duplicate_read_percent() > 0,
580 "duplicate reads should produce non-zero percent"
581 );
582 }
583
584 #[test]
585 fn test_top_tool_results_empty_when_n_zero() {
586 let tc = r#"[{"id":"tc_1","function_name":"Read","arguments":"{}"}]"#;
587 let messages = vec![
588 msg(Role::Assistant, None, Some(tc), None),
589 msg(Role::Tool, Some("stuff"), None, Some("tc_1")),
590 ];
591 let analysis = analyze_context(&messages);
592 assert!(analysis.top_tool_results(0).is_empty());
593 }
594
595 #[test]
596 fn test_top_tool_results_sorted_descending() {
597 let tc1 = r#"[{"id":"tc_1","function_name":"Bash","arguments":"{}"}]"#;
598 let tc2 = r#"[{"id":"tc_2","function_name":"Read","arguments":"{}"}]"#;
599 let tc3 = r#"[{"id":"tc_3","function_name":"Grep","arguments":"{}"}]"#;
600 let messages = vec![
601 msg(Role::Assistant, None, Some(tc1), None),
602 msg(Role::Tool, Some(&"a".repeat(100)), None, Some("tc_1")), msg(Role::Assistant, None, Some(tc2), None),
604 msg(Role::Tool, Some(&"b".repeat(2000)), None, Some("tc_2")), msg(Role::Assistant, None, Some(tc3), None),
606 msg(Role::Tool, Some(&"c".repeat(500)), None, Some("tc_3")), ];
608 let analysis = analyze_context(&messages);
609 let top = analysis.top_tool_results(3);
610 assert_eq!(top.len(), 3);
611 assert_eq!(top[0].0, "Read");
613 assert_eq!(top[1].0, "Grep");
614 assert_eq!(top[2].0, "Bash");
615 assert!(top[0].1 >= top[1].1);
617 assert!(top[1].1 >= top[2].1);
618 }
619
620 #[test]
621 fn test_system_tokens_counted_in_total() {
622 let big_system = "S".repeat(1000);
623 let messages = vec![msg(Role::System, Some(&big_system), None, None)];
624 let analysis = analyze_context(&messages);
625 assert!(
626 analysis.total > 0,
627 "system message should contribute to total token count"
628 );
629 assert_eq!(
630 analysis.human_tokens, 0,
631 "system tokens should not be counted as human"
632 );
633 }
634
635 #[test]
636 fn test_summary_with_no_tool_use() {
637 let messages = vec![
638 msg(Role::User, Some("hi"), None, None),
639 msg(Role::Assistant, Some("hello"), None, None),
640 ];
641 let summary = analyze_context(&messages).summary();
642 assert!(summary.contains("Context:"));
643 assert!(summary.contains("Human:"));
644 assert!(!summary.contains("Top tool results:"));
646 assert!(!summary.contains("Duplicate reads:"));
647 }
648
649 #[test]
650 fn test_summary_includes_duplicate_waste_line() {
651 let tc1 =
652 r#"[{"id":"tc_1","function_name":"Read","arguments":"{\"file_path\":\"h.rs\"}"}]"#;
653 let tc2 =
654 r#"[{"id":"tc_2","function_name":"Read","arguments":"{\"file_path\":\"h.rs\"}"}]"#;
655 let content = "D".repeat(500);
656 let messages = vec![
657 msg(Role::Assistant, None, Some(tc1), None),
658 msg(Role::Tool, Some(&content), None, Some("tc_1")),
659 msg(Role::Assistant, None, Some(tc2), None),
660 msg(Role::Tool, Some(&content), None, Some("tc_2")),
661 ];
662 let summary = analyze_context(&messages).summary();
663 assert!(
664 summary.contains("Duplicate reads:"),
665 "summary should mention duplicate reads when present"
666 );
667 }
668}