1use std::collections::HashMap;
24use std::path::PathBuf;
25
26use crate::inference_helpers::{CHARS_PER_TOKEN, PER_MESSAGE_OVERHEAD};
27use crate::persistence::Message;
28
29#[derive(Debug, Clone, Default)]
31pub struct ContextAnalysis {
32 pub tool_request_tokens: HashMap<String, usize>,
34 pub tool_result_tokens: HashMap<String, usize>,
36 pub human_tokens: usize,
38 pub assistant_tokens: usize,
40 pub duplicate_reads: HashMap<PathBuf, DuplicateRead>,
42 pub total: usize,
44}
45
46#[derive(Debug, Clone)]
48pub struct DuplicateRead {
49 pub count: usize,
51 pub wasted_tokens: usize,
53}
54
55impl ContextAnalysis {
56 pub fn total_tool_result_tokens(&self) -> usize {
58 self.tool_result_tokens.values().sum()
59 }
60
61 pub fn total_tool_request_tokens(&self) -> usize {
63 self.tool_request_tokens.values().sum()
64 }
65
66 pub fn total_duplicate_waste(&self) -> usize {
68 self.duplicate_reads.values().map(|d| d.wasted_tokens).sum()
69 }
70
71 pub fn tool_result_percent(&self) -> usize {
73 if self.total == 0 {
74 return 0;
75 }
76 (self.total_tool_result_tokens() * 100) / self.total
77 }
78
79 pub fn duplicate_read_percent(&self) -> usize {
81 if self.total == 0 {
82 return 0;
83 }
84 (self.total_duplicate_waste() * 100) / self.total
85 }
86
87 pub fn top_tool_results(&self, n: usize) -> Vec<(&str, usize)> {
89 let mut sorted: Vec<_> = self
90 .tool_result_tokens
91 .iter()
92 .map(|(k, v)| (k.as_str(), *v))
93 .collect();
94 sorted.sort_by_key(|entry| std::cmp::Reverse(entry.1));
95 sorted.truncate(n);
96 sorted
97 }
98
99 pub fn summary(&self) -> String {
101 let mut lines = Vec::new();
102 lines.push(format!("Context: ~{} tokens", self.total));
103 lines.push(format!(
104 " Human: {} | Assistant: {} | Tool results: {} ({}%)",
105 self.human_tokens,
106 self.assistant_tokens,
107 self.total_tool_result_tokens(),
108 self.tool_result_percent(),
109 ));
110
111 let top = self.top_tool_results(5);
112 if !top.is_empty() {
113 lines.push(" Top tool results:".to_string());
114 for (name, tokens) in &top {
115 let pct = (tokens * 100).checked_div(self.total).unwrap_or(0);
116 lines.push(format!(" {name}: ~{tokens} tokens ({pct}%)"));
117 }
118 }
119
120 let waste = self.total_duplicate_waste();
121 if waste > 0 {
122 lines.push(format!(
123 " Duplicate reads: ~{waste} wasted tokens ({}%) across {} files",
124 self.duplicate_read_percent(),
125 self.duplicate_reads.len(),
126 ));
127 }
128
129 lines.join("\n")
130 }
131}
132
133pub fn analyze_context(messages: &[Message]) -> ContextAnalysis {
138 let mut analysis = ContextAnalysis::default();
139
140 let mut id_to_tool: HashMap<String, String> = HashMap::new();
143 let mut read_tool_paths: HashMap<String, PathBuf> = HashMap::new();
145
146 for msg in messages {
147 if msg.role == crate::persistence::Role::Assistant
148 && let Some(ref tc_json) = msg.tool_calls
149 {
150 extract_tool_call_ids(tc_json, &mut id_to_tool, &mut read_tool_paths);
151 }
152 }
153
154 let mut file_read_stats: HashMap<PathBuf, FileReadAccum> = HashMap::new();
157
158 for msg in messages {
159 let tokens = estimate_message_tokens(msg);
160 analysis.total += tokens;
161
162 match msg.role {
163 crate::persistence::Role::User => {
164 analysis.human_tokens += tokens;
165 }
166 crate::persistence::Role::Assistant => {
167 if let Some(ref tc_json) = msg.tool_calls {
168 let text_tokens = msg.content.as_deref().map_or(0, estimate_str_tokens);
170 let tool_tokens = tokens.saturating_sub(text_tokens);
171 analysis.assistant_tokens += text_tokens;
172
173 distribute_tool_request_tokens(
175 tc_json,
176 tool_tokens,
177 &mut analysis.tool_request_tokens,
178 );
179 } else {
180 analysis.assistant_tokens += tokens;
181 }
182 }
183 crate::persistence::Role::Tool => {
184 let tool_name = msg
186 .tool_call_id
187 .as_deref()
188 .and_then(|id| id_to_tool.get(id))
189 .cloned()
190 .unwrap_or_else(|| "unknown".to_string());
191
192 *analysis
193 .tool_result_tokens
194 .entry(tool_name.clone())
195 .or_default() += tokens;
196
197 if (tool_name == "Read" || tool_name == "read")
199 && let Some(path) = msg
200 .tool_call_id
201 .as_deref()
202 .and_then(|id| read_tool_paths.get(id))
203 {
204 let entry =
205 file_read_stats
206 .entry(path.clone())
207 .or_insert_with(|| FileReadAccum {
208 count: 0,
209 total_tokens: 0,
210 });
211 entry.count += 1;
212 entry.total_tokens += tokens;
213 }
214 }
215 crate::persistence::Role::System => {
216 }
218 }
219 }
220
221 for (path, accum) in file_read_stats {
223 if accum.count > 1 {
224 let avg_tokens = accum.total_tokens / accum.count;
225 let wasted = avg_tokens * (accum.count - 1);
226 analysis.duplicate_reads.insert(
227 path,
228 DuplicateRead {
229 count: accum.count,
230 wasted_tokens: wasted,
231 },
232 );
233 }
234 }
235
236 analysis
237}
238
239struct FileReadAccum {
245 count: usize,
246 total_tokens: usize,
247}
248
249fn estimate_message_tokens(msg: &Message) -> usize {
251 let content_len = msg.content.as_deref().map_or(0, |c| c.len());
252 let tc_len = msg.tool_calls.as_deref().map_or(0, |c| c.len());
253 ((content_len + tc_len) as f64 / CHARS_PER_TOKEN) as usize + PER_MESSAGE_OVERHEAD
254}
255
256fn estimate_str_tokens(s: &str) -> usize {
258 (s.len() as f64 / CHARS_PER_TOKEN) as usize
259}
260
261fn extract_tool_call_ids(
263 tc_json: &str,
264 id_to_tool: &mut HashMap<String, String>,
265 read_paths: &mut HashMap<String, PathBuf>,
266) {
267 let calls: Vec<serde_json::Value> = match serde_json::from_str(tc_json) {
268 Ok(v) => v,
269 Err(_) => return,
270 };
271 for call in &calls {
272 let id = call.get("id").and_then(|v| v.as_str()).unwrap_or_default();
273 let name = call
274 .get("function_name")
275 .or_else(|| call.get("name"))
276 .and_then(|v| v.as_str())
277 .unwrap_or("unknown");
278
279 if !id.is_empty() {
280 id_to_tool.insert(id.to_string(), name.to_string());
281 }
282
283 if (name == "Read" || name == "read")
285 && let Some(args) = call.get("arguments")
286 {
287 let args_obj: Option<serde_json::Value> = if let Some(s) = args.as_str() {
289 serde_json::from_str(s).ok()
290 } else {
291 Some(args.clone())
292 };
293 if let Some(obj) = args_obj
294 && let Some(path) = obj
295 .get("file_path")
296 .or_else(|| obj.get("path"))
297 .and_then(|v| v.as_str())
298 {
299 read_paths.insert(id.to_string(), PathBuf::from(path));
300 }
301 }
302 }
303}
304
305fn distribute_tool_request_tokens(
307 tc_json: &str,
308 total_tool_tokens: usize,
309 request_map: &mut HashMap<String, usize>,
310) {
311 let calls: Vec<serde_json::Value> = match serde_json::from_str(tc_json) {
312 Ok(v) => v,
313 Err(_) => return,
314 };
315 if calls.is_empty() {
316 return;
317 }
318 let per_call = total_tool_tokens / calls.len();
319 for call in &calls {
320 let name = call
321 .get("function_name")
322 .or_else(|| call.get("name"))
323 .and_then(|v| v.as_str())
324 .unwrap_or("unknown");
325 *request_map.entry(name.to_string()).or_default() += per_call;
326 }
327}
328
329#[cfg(test)]
330mod tests {
331 use super::*;
332 use crate::persistence::{Message, Role};
333
334 fn msg(
335 role: Role,
336 content: Option<&str>,
337 tool_calls: Option<&str>,
338 tool_call_id: Option<&str>,
339 ) -> Message {
340 Message {
341 id: 0,
342 session_id: String::new(),
343 role,
344 content: content.map(String::from),
345 full_content: None,
346 tool_calls: tool_calls.map(String::from),
347 tool_call_id: tool_call_id.map(String::from),
348 prompt_tokens: None,
349 completion_tokens: None,
350 cache_read_tokens: None,
351 cache_creation_tokens: None,
352 thinking_tokens: None,
353 thinking_content: None,
354 created_at: None,
355 }
356 }
357
358 #[test]
359 fn test_empty_history() {
360 let analysis = analyze_context(&[]);
361 assert_eq!(analysis.total, 0);
362 assert_eq!(analysis.human_tokens, 0);
363 assert_eq!(analysis.assistant_tokens, 0);
364 assert!(analysis.tool_result_tokens.is_empty());
365 assert!(analysis.duplicate_reads.is_empty());
366 }
367
368 #[test]
369 fn test_simple_conversation() {
370 let messages = vec![
371 msg(Role::User, Some("Hello world"), None, None),
372 msg(Role::Assistant, Some("Hi there!"), None, None),
373 ];
374 let analysis = analyze_context(&messages);
375 assert!(analysis.total > 0);
376 assert!(analysis.human_tokens > 0);
377 assert!(analysis.assistant_tokens > 0);
378 assert_eq!(analysis.total_tool_result_tokens(), 0);
379 }
380
381 #[test]
382 fn test_tool_call_attribution() {
383 let tc_json =
384 r#"[{"id":"tc_1","function_name":"Read","arguments":"{\"file_path\":\"foo.rs\"}"}]"#;
385 let messages = vec![
386 msg(Role::User, Some("Read foo.rs"), None, None),
387 msg(Role::Assistant, None, Some(tc_json), None),
388 msg(
389 Role::Tool,
390 Some("contents of foo.rs which is a pretty long file with lots of code"),
391 None,
392 Some("tc_1"),
393 ),
394 ];
395 let analysis = analyze_context(&messages);
396 assert!(analysis.tool_result_tokens.contains_key("Read"));
397 assert!(*analysis.tool_result_tokens.get("Read").unwrap() > 0);
398 }
399
400 #[test]
401 fn test_duplicate_read_detection() {
402 let tc1 =
403 r#"[{"id":"tc_1","function_name":"Read","arguments":"{\"file_path\":\"foo.rs\"}"}]"#;
404 let tc2 =
405 r#"[{"id":"tc_2","function_name":"Read","arguments":"{\"file_path\":\"foo.rs\"}"}]"#;
406 let tc3 =
407 r#"[{"id":"tc_3","function_name":"Read","arguments":"{\"file_path\":\"bar.rs\"}"}]"#;
408
409 let messages = vec![
410 msg(Role::User, Some("Read foo.rs"), None, None),
411 msg(Role::Assistant, None, Some(tc1), None),
412 msg(Role::Tool, Some("contents of foo"), None, Some("tc_1")),
413 msg(Role::User, Some("Read it again"), None, None),
414 msg(Role::Assistant, None, Some(tc2), None),
415 msg(Role::Tool, Some("contents of foo"), None, Some("tc_2")),
416 msg(Role::User, Some("Read bar.rs"), None, None),
417 msg(Role::Assistant, None, Some(tc3), None),
418 msg(Role::Tool, Some("contents of bar"), None, Some("tc_3")),
419 ];
420
421 let analysis = analyze_context(&messages);
422
423 let foo_path = PathBuf::from("foo.rs");
425 assert!(analysis.duplicate_reads.contains_key(&foo_path));
426 assert_eq!(analysis.duplicate_reads[&foo_path].count, 2);
427 assert!(analysis.duplicate_reads[&foo_path].wasted_tokens > 0);
428
429 let bar_path = PathBuf::from("bar.rs");
431 assert!(!analysis.duplicate_reads.contains_key(&bar_path));
432 }
433
434 #[test]
435 fn test_top_tool_results() {
436 let tc1 = r#"[{"id":"tc_1","function_name":"Read","arguments":"{}"}]"#;
437 let tc2 = r#"[{"id":"tc_2","function_name":"Bash","arguments":"{}"}]"#;
438
439 let long_content = "x".repeat(1000);
440 let short_content = "y".repeat(100);
441
442 let messages = vec![
443 msg(Role::Assistant, None, Some(tc1), None),
444 msg(Role::Tool, Some(&long_content), None, Some("tc_1")),
445 msg(Role::Assistant, None, Some(tc2), None),
446 msg(Role::Tool, Some(&short_content), None, Some("tc_2")),
447 ];
448
449 let analysis = analyze_context(&messages);
450 let top = analysis.top_tool_results(5);
451 assert!(!top.is_empty());
452 assert_eq!(top[0].0, "Read");
454 }
455
456 #[test]
457 fn test_summary_format() {
458 let tc1 = r#"[{"id":"tc_1","function_name":"Read","arguments":"{}"}]"#;
459 let messages = vec![
460 msg(Role::User, Some("hello"), None, None),
461 msg(Role::Assistant, Some("let me read"), Some(tc1), None),
462 msg(Role::Tool, Some("file contents here"), None, Some("tc_1")),
463 ];
464 let analysis = analyze_context(&messages);
465 let summary = analysis.summary();
466 assert!(summary.contains("Context:"));
467 assert!(summary.contains("Human:"));
468 assert!(summary.contains("Tool results:"));
469 }
470
471 #[test]
472 fn test_multiple_tool_calls_in_one_message() {
473 let tc = r#"[
474 {"id":"tc_1","function_name":"Read","arguments":"{}"},
475 {"id":"tc_2","function_name":"Grep","arguments":"{}"}
476 ]"#;
477 let messages = vec![
478 msg(Role::Assistant, None, Some(tc), None),
479 msg(Role::Tool, Some("read result"), None, Some("tc_1")),
480 msg(Role::Tool, Some("grep result"), None, Some("tc_2")),
481 ];
482 let analysis = analyze_context(&messages);
483 assert!(analysis.tool_result_tokens.contains_key("Read"));
484 assert!(analysis.tool_result_tokens.contains_key("Grep"));
485 }
486
487 #[test]
488 fn test_total_tool_request_tokens_counted() {
489 let tc =
492 r#"[{"id":"tc_1","function_name":"Read","arguments":"{\"file_path\":\"big.rs\"}"}]"#;
493 let messages = vec![
494 msg(Role::Assistant, None, Some(tc), None),
495 msg(Role::Tool, Some("result"), None, Some("tc_1")),
496 ];
497 let analysis = analyze_context(&messages);
498 assert!(
499 analysis.total_tool_request_tokens() > 0,
500 "tool request tokens should be counted"
501 );
502 }
503
504 #[test]
505 fn test_tool_result_percent_calculation() {
506 let tc = r#"[{"id":"tc_1","function_name":"Read","arguments":"{}"}]"#;
507 let big_result = "x".repeat(500);
509 let messages = vec![
510 msg(Role::User, Some("hello"), None, None),
511 msg(Role::Assistant, None, Some(tc), None),
512 msg(Role::Tool, Some(&big_result), None, Some("tc_1")),
513 ];
514 let analysis = analyze_context(&messages);
515 let pct = analysis.tool_result_percent();
516 assert!(pct > 0 && pct <= 100, "percent should be 1-100, got {pct}");
517 assert!(
519 pct > analysis.human_tokens * 100 / analysis.total,
520 "tool result percent should exceed human percent for large results"
521 );
522 }
523
524 #[test]
525 fn test_tool_result_percent_zero_when_no_context() {
526 let analysis = analyze_context(&[]);
527 assert_eq!(analysis.tool_result_percent(), 0);
528 assert_eq!(analysis.duplicate_read_percent(), 0);
529 }
530
531 #[test]
532 fn test_total_duplicate_waste_sums_correctly() {
533 let tc1 =
534 r#"[{"id":"tc_1","function_name":"Read","arguments":"{\"file_path\":\"f.rs\"}"}]"#;
535 let tc2 =
536 r#"[{"id":"tc_2","function_name":"Read","arguments":"{\"file_path\":\"f.rs\"}"}]"#;
537 let content = "y".repeat(200);
538 let messages = vec![
539 msg(Role::Assistant, None, Some(tc1), None),
540 msg(Role::Tool, Some(&content), None, Some("tc_1")),
541 msg(Role::Assistant, None, Some(tc2), None),
542 msg(Role::Tool, Some(&content), None, Some("tc_2")),
543 ];
544 let analysis = analyze_context(&messages);
545 assert!(
546 analysis.total_duplicate_waste() > 0,
547 "duplicate read of f.rs should produce non-zero waste"
548 );
549 assert_eq!(
551 analysis.total_duplicate_waste(),
552 analysis
553 .duplicate_reads
554 .values()
555 .map(|d| d.wasted_tokens)
556 .sum::<usize>()
557 );
558 }
559
560 #[test]
561 fn test_duplicate_read_percent_nonzero() {
562 let tc1 =
563 r#"[{"id":"tc_1","function_name":"Read","arguments":"{\"file_path\":\"g.rs\"}"}]"#;
564 let tc2 =
565 r#"[{"id":"tc_2","function_name":"Read","arguments":"{\"file_path\":\"g.rs\"}"}]"#;
566 let content = "z".repeat(400);
567 let messages = vec![
568 msg(Role::Assistant, None, Some(tc1), None),
569 msg(Role::Tool, Some(&content), None, Some("tc_1")),
570 msg(Role::Assistant, None, Some(tc2), None),
571 msg(Role::Tool, Some(&content), None, Some("tc_2")),
572 ];
573 let analysis = analyze_context(&messages);
574 assert!(
575 analysis.duplicate_read_percent() > 0,
576 "duplicate reads should produce non-zero percent"
577 );
578 }
579
580 #[test]
581 fn test_top_tool_results_empty_when_n_zero() {
582 let tc = r#"[{"id":"tc_1","function_name":"Read","arguments":"{}"}]"#;
583 let messages = vec![
584 msg(Role::Assistant, None, Some(tc), None),
585 msg(Role::Tool, Some("stuff"), None, Some("tc_1")),
586 ];
587 let analysis = analyze_context(&messages);
588 assert!(analysis.top_tool_results(0).is_empty());
589 }
590
591 #[test]
592 fn test_top_tool_results_sorted_descending() {
593 let tc1 = r#"[{"id":"tc_1","function_name":"Bash","arguments":"{}"}]"#;
594 let tc2 = r#"[{"id":"tc_2","function_name":"Read","arguments":"{}"}]"#;
595 let tc3 = r#"[{"id":"tc_3","function_name":"Grep","arguments":"{}"}]"#;
596 let messages = vec![
597 msg(Role::Assistant, None, Some(tc1), None),
598 msg(Role::Tool, Some(&"a".repeat(100)), None, Some("tc_1")), msg(Role::Assistant, None, Some(tc2), None),
600 msg(Role::Tool, Some(&"b".repeat(2000)), None, Some("tc_2")), msg(Role::Assistant, None, Some(tc3), None),
602 msg(Role::Tool, Some(&"c".repeat(500)), None, Some("tc_3")), ];
604 let analysis = analyze_context(&messages);
605 let top = analysis.top_tool_results(3);
606 assert_eq!(top.len(), 3);
607 assert_eq!(top[0].0, "Read");
609 assert_eq!(top[1].0, "Grep");
610 assert_eq!(top[2].0, "Bash");
611 assert!(top[0].1 >= top[1].1);
613 assert!(top[1].1 >= top[2].1);
614 }
615
616 #[test]
617 fn test_system_tokens_counted_in_total() {
618 let big_system = "S".repeat(1000);
619 let messages = vec![msg(Role::System, Some(&big_system), None, None)];
620 let analysis = analyze_context(&messages);
621 assert!(
622 analysis.total > 0,
623 "system message should contribute to total token count"
624 );
625 assert_eq!(
626 analysis.human_tokens, 0,
627 "system tokens should not be counted as human"
628 );
629 }
630
631 #[test]
632 fn test_summary_with_no_tool_use() {
633 let messages = vec![
634 msg(Role::User, Some("hi"), None, None),
635 msg(Role::Assistant, Some("hello"), None, None),
636 ];
637 let summary = analyze_context(&messages).summary();
638 assert!(summary.contains("Context:"));
639 assert!(summary.contains("Human:"));
640 assert!(!summary.contains("Top tool results:"));
642 assert!(!summary.contains("Duplicate reads:"));
643 }
644
645 #[test]
646 fn test_summary_includes_duplicate_waste_line() {
647 let tc1 =
648 r#"[{"id":"tc_1","function_name":"Read","arguments":"{\"file_path\":\"h.rs\"}"}]"#;
649 let tc2 =
650 r#"[{"id":"tc_2","function_name":"Read","arguments":"{\"file_path\":\"h.rs\"}"}]"#;
651 let content = "D".repeat(500);
652 let messages = vec![
653 msg(Role::Assistant, None, Some(tc1), None),
654 msg(Role::Tool, Some(&content), None, Some("tc_1")),
655 msg(Role::Assistant, None, Some(tc2), None),
656 msg(Role::Tool, Some(&content), None, Some("tc_2")),
657 ];
658 let summary = analyze_context(&messages).summary();
659 assert!(
660 summary.contains("Duplicate reads:"),
661 "summary should mention duplicate reads when present"
662 );
663 }
664}