devboy_format_pipeline/dedup.rs
1//! Hint-based deduplication cache for tool responses in multi-turn agents.
2//!
3//! When an LLM agent re-requests data it has already seen (file re-reads after
4//! unrelated edits, MCP pipeline polls, repeated status checks), the response
5//! bytes are often identical. Instead of re-emitting the full payload, the
6//! pipeline emits a compact reference hint that points at an earlier
7//! `tool_call_id` still present in the agent's context window.
8//!
9//! On anonymized Claude Code session logs, this mechanism recovers ≈33% of
10//! total tokens (see `docs/research/paper-2-mckp-format-adaptive.md`).
11//!
12//! # Design
13//!
14//! - **Content-hash fingerprint** — SHA-256/128-bit prefix. Collisions are
15//! negligible for session volumes (≤10⁵ responses).
16//! - **Bounded LRU** — capacity defaults to 5; 95% of real-world savings
17//! concentrate in the most recent 3 responses.
18//! - **Partition-scoped** — cleared on `on_compaction_boundary()` (corresponds
19//! to Claude Code's `compact_boundary` JSONL event); older references are no
20//! longer reachable from the agent's context.
21//! - **Mutation-aware** — `FileRead` entries whose path is later mutated by
22//! `FileMutate` are invalidated immediately. Prevents stale hints after an
23//! `Edit` or `Write`.
24//!
25//! # Usage
26//!
27//! ```
28//! use devboy_format_pipeline::dedup::{DedupCache, DedupDecision, ToolKind, content_hash, render_reference_hint};
29//!
30//! let mut cache = DedupCache::with_capacity(5);
31//! let body = "pipeline 12345 status=success duration=120s";
32//! let fp = content_hash(body.as_bytes());
33//!
34//! match cache.check(&fp) {
35//! DedupDecision::Hint { reference_tool_call_id } => {
36//! let hint = render_reference_hint(&reference_tool_call_id);
37//! // emit `hint` instead of `body`
38//! }
39//! DedupDecision::Fresh => {
40//! // emit `body` normally and cache it
41//! cache.insert(fp, "tc_42", ToolKind::Other, None, "Bash");
42//! }
43//! }
44//! ```
45
46use std::collections::VecDeque;
47use std::sync::Arc;
48
49use sha2::{Digest, Sha256};
50
51use crate::near_ref::{DeltaField, NearRefConfig, extract_delta};
52
53/// 128-bit content fingerprint (first 16 bytes of SHA-256).
54pub type ContentHash = [u8; 16];
55
56/// Compute the content hash used by [`DedupCache`].
57pub fn content_hash(content: &[u8]) -> ContentHash {
58 let digest = Sha256::digest(content);
59 let mut out = [0u8; 16];
60 out.copy_from_slice(&digest[..16]);
61 out
62}
63
64/// Outcome of a [`DedupCache::check`] lookup.
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub enum DedupDecision {
67 /// Content not seen in the current partition — caller should emit the
68 /// response normally and then `insert` it into the cache.
69 Fresh,
70 /// Content matches a cached response — caller should emit a reference
71 /// hint (see [`render_reference_hint`]) instead of the full payload.
72 Hint {
73 /// Tool-call id of the cached response that the new request matches.
74 reference_tool_call_id: String,
75 },
76}
77
78/// Classification used to drive mutation-based invalidation.
79#[derive(Debug, Clone, Copy, PartialEq, Eq)]
80pub enum ToolKind {
81 /// Reads a file. Cached responses may become stale if the underlying file
82 /// is mutated and are invalidated by [`DedupCache::invalidate_file`].
83 FileRead,
84 /// Mutates a file. Does not itself need to be cached (responses are small
85 /// and rarely repeated), but triggers invalidation of matching
86 /// `FileRead` entries.
87 FileMutate,
88 /// Any other tool (Bash, MCP, Agent, Web…). No implicit cross-tool
89 /// invalidation.
90 Other,
91}
92
93impl ToolKind {
94 /// Classify by standard Claude Code tool name. Unknown names → `Other`.
95 pub fn from_tool_name(name: &str) -> Self {
96 match name {
97 "Read" | "Grep" | "Glob" | "NotebookRead" => Self::FileRead,
98 "Edit" | "Write" | "MultiEdit" | "NotebookEdit" => Self::FileMutate,
99 _ => Self::Other,
100 }
101 }
102}
103
104#[derive(Debug, Clone)]
105struct CacheEntry {
106 hash: ContentHash,
107 tool_call_id: String,
108 tool_kind: ToolKind,
109 /// Anonymized hash of the primary file-path argument (None for non-file
110 /// tools). Used as the invalidation key.
111 file_path_hash: Option<String>,
112 /// Optional cached body for Type-2 (near-duplicate) reference hints.
113 /// Populated only when the caller used `insert_with_body` (i.e. when
114 /// `dedup.near_ref_enabled` is on). `Arc<String>` keeps cloning
115 /// cheap when the cache holds multiple references to the same body.
116 body_snapshot: Option<Arc<String>>,
117 /// Tool name (anonymized) — drives Paper 3 cross-tool invalidation:
118 /// e.g. `update_issue` declares `invalidates = ["get_issue"]` in
119 /// its `ToolValueModel`, and [`DedupCache::invalidate_by_tool`]
120 /// reads this field to drop matching entries.
121 tool_name: String,
122}
123
124/// Session-scoped deduplication cache.
125///
126/// Maintains a bounded LRU of recent `(content_hash → tool_call_id)` mappings.
127/// A single instance is intended to live for one agent session (or one
128/// context partition, whichever is shorter).
129#[derive(Debug)]
130pub struct DedupCache {
131 entries: VecDeque<CacheEntry>,
132 capacity: usize,
133 partition: u64,
134}
135
136impl DedupCache {
137 /// Construct a cache with the given LRU capacity.
138 ///
139 /// Recommended default: `5`. Empirically, 95% of deduplication savings in
140 /// Claude Code logs come from the three most-recent responses; capacities
141 /// above ~5 yield diminishing returns.
142 pub fn with_capacity(capacity: usize) -> Self {
143 Self {
144 entries: VecDeque::with_capacity(capacity.max(1)),
145 capacity: capacity.max(1),
146 partition: 0,
147 }
148 }
149
150 /// Current context partition counter. Increments on each
151 /// `on_compaction_boundary()` call.
152 pub fn partition(&self) -> u64 {
153 self.partition
154 }
155
156 /// Number of entries currently cached.
157 pub fn len(&self) -> usize {
158 self.entries.len()
159 }
160
161 /// `true` if no entries are cached.
162 pub fn is_empty(&self) -> bool {
163 self.entries.is_empty()
164 }
165
166 /// LRU lookup that refreshes recency on hits.
167 ///
168 /// A match promotes the entry to the most-recently-used position, so
169 /// repeated references protect frequently-used content from eviction
170 /// when new responses are inserted.
171 pub fn check(&mut self, hash: &ContentHash) -> DedupDecision {
172 if let Some(idx) = self.entries.iter().position(|e| &e.hash == hash) {
173 // Move the matched entry to the back (most-recent position).
174 let entry = self
175 .entries
176 .remove(idx)
177 .expect("index came from VecDeque::position");
178 let reference_tool_call_id = entry.tool_call_id.clone();
179 self.entries.push_back(entry);
180 return DedupDecision::Hint {
181 reference_tool_call_id,
182 };
183 }
184 DedupDecision::Fresh
185 }
186
187 /// Insert a fresh response. Evicts the oldest entry if the cache is at
188 /// capacity.
189 ///
190 /// `tool_name` is the anonymized tool that produced the response —
191 /// used by [`Self::invalidate_by_tool`] for Paper 3 cross-tool
192 /// invalidation. Pass an empty string when the tool is unknown
193 /// (cross-tool invalidation will simply never match it).
194 pub fn insert(
195 &mut self,
196 hash: ContentHash,
197 tool_call_id: impl Into<String>,
198 tool_kind: ToolKind,
199 file_path_hash: Option<String>,
200 tool_name: impl Into<String>,
201 ) {
202 self.insert_inner(
203 hash,
204 tool_call_id.into(),
205 tool_kind,
206 file_path_hash,
207 None,
208 tool_name.into(),
209 );
210 }
211
212 /// Insert a fresh response and retain its body for Type-2
213 /// (near-duplicate) hint extraction. Used when
214 /// `dedup.near_ref_enabled` is on; otherwise prefer [`Self::insert`]
215 /// to avoid the extra allocation.
216 pub fn insert_with_body(
217 &mut self,
218 hash: ContentHash,
219 tool_call_id: impl Into<String>,
220 tool_kind: ToolKind,
221 file_path_hash: Option<String>,
222 body: Arc<String>,
223 tool_name: impl Into<String>,
224 ) {
225 self.insert_inner(
226 hash,
227 tool_call_id.into(),
228 tool_kind,
229 file_path_hash,
230 Some(body),
231 tool_name.into(),
232 );
233 }
234
235 fn insert_inner(
236 &mut self,
237 hash: ContentHash,
238 tool_call_id: String,
239 tool_kind: ToolKind,
240 file_path_hash: Option<String>,
241 body_snapshot: Option<Arc<String>>,
242 tool_name: String,
243 ) {
244 if self.entries.len() >= self.capacity {
245 self.entries.pop_front();
246 }
247 self.entries.push_back(CacheEntry {
248 hash,
249 tool_call_id,
250 tool_kind,
251 file_path_hash,
252 body_snapshot,
253 tool_name,
254 });
255 }
256
257 /// Look for a Type-2 near-duplicate match in the cache. Walks entries
258 /// from newest to oldest; the first one whose retained body produces
259 /// a valid delta against `new_body` (per [`extract_delta`]) wins.
260 /// Returns the matched `tool_call_id` plus the field-level deltas.
261 ///
262 /// Returns `None` when:
263 /// - no entry has a body snapshot,
264 /// - no entry's delta against `new_body` clears the eligibility
265 /// gate (size, scalar-only, key-set match),
266 /// - or `new_body` itself is too short to bother.
267 pub fn find_near_ref(
268 &self,
269 new_body: &str,
270 config: &NearRefConfig,
271 ) -> Option<(String, Vec<DeltaField>)> {
272 for entry in self.entries.iter().rev() {
273 let Some(body) = entry.body_snapshot.as_ref() else {
274 continue;
275 };
276 if let Some(deltas) = extract_delta(body.as_str(), new_body, config) {
277 return Some((entry.tool_call_id.clone(), deltas));
278 }
279 }
280 None
281 }
282
283 /// Invalidate all cached `FileRead` entries whose path hash matches.
284 /// Returns the number of entries dropped.
285 ///
286 /// Callers typically invoke this from a `FileMutate` tool response
287 /// handler to prevent emitting stale hints for a subsequently re-read file.
288 pub fn invalidate_file(&mut self, file_path_hash: &str) -> usize {
289 let before = self.entries.len();
290 self.entries.retain(|e| {
291 !(e.tool_kind == ToolKind::FileRead
292 && e.file_path_hash.as_deref() == Some(file_path_hash))
293 });
294 before - self.entries.len()
295 }
296
297 /// Invalidate every entry whose `tool_name` appears in
298 /// `invalidates`. Drives Paper 3 cross-tool invalidation: a writer
299 /// (`update_issue`, `Edit`, `Write`, …) declares which read tools
300 /// its `ToolValueModel.invalidates` list, and the runtime calls this
301 /// method right after the writer's response is processed.
302 ///
303 /// Returns the number of entries dropped. Empty `invalidates` is a
304 /// no-op (returns 0) — used by tools that do not affect any cache.
305 pub fn invalidate_by_tool(&mut self, invalidates: &[String]) -> usize {
306 if invalidates.is_empty() {
307 return 0;
308 }
309 let before = self.entries.len();
310 self.entries
311 .retain(|e| !invalidates.iter().any(|t| t == &e.tool_name));
312 before - self.entries.len()
313 }
314
315 /// Clear the cache and advance the partition counter.
316 ///
317 /// Called when the LLM host emits a context-compaction boundary
318 /// (Claude Code: `compact_boundary` event). Older references are no
319 /// longer reachable from the agent's context, so they must not be
320 /// referenced by new hints.
321 pub fn on_compaction_boundary(&mut self) {
322 self.entries.clear();
323 self.partition = self.partition.saturating_add(1);
324 }
325}
326
327impl Default for DedupCache {
328 /// Equivalent to `DedupCache::with_capacity(5)`.
329 fn default() -> Self {
330 Self::with_capacity(5)
331 }
332}
333
334/// Verbosity level for rendered reference hints.
335///
336/// Verbosity is tunable via `AdaptiveConfig::dedup::hint_verbosity` so
337/// deployments can trade one or two tokens of clarity for explicit
338/// byte-identity / source-tool metadata.
339#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
340pub enum HintVerbosity {
341 /// `> [ref: tc_42]` (~8 tokens)
342 Terse,
343 /// `> [ref: tc_42, byte-identical]` (~11 tokens, default)
344 #[default]
345 Standard,
346 /// `> [ref: tc_42, byte-identical, from: Read]` (~15 tokens)
347 Verbose,
348}
349
350/// Render a reference hint using the default (Standard) verbosity.
351///
352/// Kept as a convenience wrapper around [`render_reference_hint_with`]
353/// for callers that do not thread configuration yet.
354pub fn render_reference_hint(tool_call_id: &str) -> String {
355 render_reference_hint_with(tool_call_id, HintVerbosity::Standard, None)
356}
357
358/// Render a reference hint at the requested verbosity.
359///
360/// Forms (measured with `cl100k_base`):
361///
362/// ```text
363/// Terse > [ref: tc_42] ~8 tok
364/// Standard > [ref: tc_42, byte-identical] ~11 tok
365/// Verbose > [ref: tc_42, byte-identical, from: Read] ~15 tok
366/// ```
367///
368/// `source_tool`, when provided, is included only in the `Verbose`
369/// form — earlier verbosities intentionally drop it to stay terse.
370pub fn render_reference_hint_with(
371 tool_call_id: &str,
372 verbosity: HintVerbosity,
373 source_tool: Option<ToolKind>,
374) -> String {
375 match verbosity {
376 HintVerbosity::Terse => format!("> [ref: {}]", tool_call_id),
377 HintVerbosity::Standard => format!("> [ref: {}, byte-identical]", tool_call_id),
378 HintVerbosity::Verbose => {
379 let tool_tag = match source_tool {
380 Some(ToolKind::FileRead) => "file-read",
381 Some(ToolKind::FileMutate) => "file-mutate",
382 Some(ToolKind::Other) => "other",
383 None => "",
384 };
385 if tool_tag.is_empty() {
386 format!("> [ref: {}, byte-identical]", tool_call_id)
387 } else {
388 format!(
389 "> [ref: {}, byte-identical, from: {}]",
390 tool_call_id, tool_tag
391 )
392 }
393 }
394 }
395}
396
397#[cfg(test)]
398mod tests {
399 use super::*;
400
401 fn h(s: &str) -> ContentHash {
402 content_hash(s.as_bytes())
403 }
404
405 #[test]
406 fn fresh_response_then_duplicate_hits() {
407 let mut c = DedupCache::with_capacity(5);
408 let fp = h("pipeline 12345 status=success");
409 assert_eq!(c.check(&fp), DedupDecision::Fresh);
410 c.insert(fp, "tc_1", ToolKind::Other, None, "");
411 match c.check(&fp) {
412 DedupDecision::Hint {
413 reference_tool_call_id,
414 } => assert_eq!(reference_tool_call_id, "tc_1"),
415 other => panic!("expected hint, got {other:?}"),
416 }
417 }
418
419 #[test]
420 fn distinct_content_is_fresh() {
421 let mut c = DedupCache::with_capacity(5);
422 c.insert(h("A"), "tc_1", ToolKind::Other, None, "");
423 assert_eq!(c.check(&h("B")), DedupDecision::Fresh);
424 }
425
426 #[test]
427 fn lru_evicts_oldest_when_full() {
428 let mut c = DedupCache::with_capacity(2);
429 c.insert(h("one"), "tc_1", ToolKind::Other, None, "");
430 c.insert(h("two"), "tc_2", ToolKind::Other, None, "");
431 c.insert(h("three"), "tc_3", ToolKind::Other, None, "");
432 assert_eq!(c.check(&h("one")), DedupDecision::Fresh);
433 assert!(matches!(c.check(&h("two")), DedupDecision::Hint { .. }));
434 assert!(matches!(c.check(&h("three")), DedupDecision::Hint { .. }));
435 }
436
437 #[test]
438 fn mutation_invalidates_same_file_read() {
439 let mut c = DedupCache::with_capacity(5);
440 let file = "abc12345".to_string();
441 let content = h("line1\nline2");
442 c.insert(
443 content,
444 "tc_1",
445 ToolKind::FileRead,
446 Some(file.clone()),
447 "Read",
448 );
449 assert_eq!(c.invalidate_file(&file), 1);
450 assert_eq!(c.check(&content), DedupDecision::Fresh);
451 }
452
453 #[test]
454 fn mutation_on_different_file_preserves_entry() {
455 let mut c = DedupCache::with_capacity(5);
456 let content = h("irrelevant file body");
457 c.insert(
458 content,
459 "tc_1",
460 ToolKind::FileRead,
461 Some("hash_a".into()),
462 "Read",
463 );
464 assert_eq!(c.invalidate_file("hash_b"), 0);
465 assert!(matches!(c.check(&content), DedupDecision::Hint { .. }));
466 }
467
468 #[test]
469 fn compaction_boundary_clears_and_advances_partition() {
470 let mut c = DedupCache::with_capacity(5);
471 let x = h("x");
472 c.insert(x, "tc_1", ToolKind::Other, None, "");
473 assert_eq!(c.partition(), 0);
474 c.on_compaction_boundary();
475 assert_eq!(c.partition(), 1);
476 assert_eq!(c.check(&x), DedupDecision::Fresh);
477 assert!(c.is_empty());
478 }
479
480 #[test]
481 fn invalidate_by_tool_drops_matching_entries() {
482 // Paper 3 cross-tool invalidation: `update_issue` declares
483 // `invalidates = ["get_issue", "get_issues"]`. After it runs,
484 // any cached responses for those tools must be dropped.
485 let mut c = DedupCache::with_capacity(5);
486 c.insert(h("body_a"), "tc_a", ToolKind::Other, None, "get_issue");
487 c.insert(h("body_b"), "tc_b", ToolKind::Other, None, "get_issues");
488 c.insert(h("body_c"), "tc_c", ToolKind::Other, None, "get_pipeline");
489 assert_eq!(c.len(), 3);
490 let dropped = c.invalidate_by_tool(&["get_issue".to_string(), "get_issues".to_string()]);
491 assert_eq!(dropped, 2);
492 assert_eq!(c.len(), 1);
493 // get_pipeline survives.
494 assert!(matches!(c.check(&h("body_c")), DedupDecision::Hint { .. }));
495 assert_eq!(c.check(&h("body_a")), DedupDecision::Fresh);
496 assert_eq!(c.check(&h("body_b")), DedupDecision::Fresh);
497 }
498
499 #[test]
500 fn invalidate_by_tool_empty_list_is_noop() {
501 let mut c = DedupCache::with_capacity(5);
502 c.insert(h("a"), "tc_a", ToolKind::Other, None, "Bash");
503 assert_eq!(c.invalidate_by_tool(&[]), 0);
504 assert_eq!(c.len(), 1);
505 }
506
507 #[test]
508 fn tool_kind_classification() {
509 assert_eq!(ToolKind::from_tool_name("Read"), ToolKind::FileRead);
510 assert_eq!(ToolKind::from_tool_name("Grep"), ToolKind::FileRead);
511 assert_eq!(ToolKind::from_tool_name("Edit"), ToolKind::FileMutate);
512 assert_eq!(ToolKind::from_tool_name("Write"), ToolKind::FileMutate);
513 assert_eq!(ToolKind::from_tool_name("MultiEdit"), ToolKind::FileMutate);
514 assert_eq!(ToolKind::from_tool_name("Bash"), ToolKind::Other);
515 assert_eq!(
516 ToolKind::from_tool_name("mcp__x__get_issues"),
517 ToolKind::Other
518 );
519 }
520
521 #[test]
522 fn reference_hint_shape() {
523 let hint = render_reference_hint("tc_42");
524 assert!(hint.starts_with("> [ref:"));
525 assert!(hint.contains("tc_42"));
526 assert!(hint.contains("byte-identical"));
527 // Hint must stay small vs any realistic response.
528 assert!(hint.len() < 40);
529 }
530
531 #[test]
532 fn hint_verbosity_variants() {
533 let terse = render_reference_hint_with("tc_1", HintVerbosity::Terse, None);
534 assert_eq!(terse, "> [ref: tc_1]");
535
536 let standard =
537 render_reference_hint_with("tc_1", HintVerbosity::Standard, Some(ToolKind::FileRead));
538 // source_tool is dropped below Verbose
539 assert_eq!(standard, "> [ref: tc_1, byte-identical]");
540
541 let verbose =
542 render_reference_hint_with("tc_1", HintVerbosity::Verbose, Some(ToolKind::FileRead));
543 assert!(verbose.contains("byte-identical"));
544 assert!(verbose.contains("file-read"));
545 }
546
547 #[test]
548 fn capacity_zero_is_coerced_to_one() {
549 let c = DedupCache::with_capacity(0);
550 assert_eq!(c.len(), 0);
551 }
552
553 #[test]
554 fn edit_then_reread_scenario() {
555 // Canonical stale-hint scenario from the paper:
556 // 1. Read(foo.py) → content_A → cache it
557 // 2. Edit(foo.py) → invalidate foo.py entries
558 // 3. Read(foo.py) → content_B or A? — must be treated Fresh regardless
559 let mut c = DedupCache::with_capacity(5);
560 let file = "foo_hash".to_string();
561 let content_a = h("original body");
562 c.insert(
563 content_a,
564 "tc_1",
565 ToolKind::FileRead,
566 Some(file.clone()),
567 "Read",
568 );
569 // Edit invalidates
570 assert_eq!(c.invalidate_file(&file), 1);
571 // Even if the re-read content coincidentally matches, we must not hint
572 // against the stale entry (it is gone). Hence any subsequent content is Fresh.
573 assert_eq!(c.check(&content_a), DedupDecision::Fresh);
574 }
575}