Skip to main content

devboy_format_pipeline/
dedup.rs

1//! Hint-based deduplication cache for tool responses in multi-turn agents.
2//!
3//! When an LLM agent re-requests data it has already seen (file re-reads after
4//! unrelated edits, MCP pipeline polls, repeated status checks), the response
5//! bytes are often identical. Instead of re-emitting the full payload, the
6//! pipeline emits a compact reference hint that points at an earlier
7//! `tool_call_id` still present in the agent's context window.
8//!
9//! On anonymized Claude Code session logs, this mechanism recovers ≈33% of
10//! total tokens (see `docs/research/paper-2-mckp-format-adaptive.md`).
11//!
12//! # Design
13//!
14//! - **Content-hash fingerprint** — SHA-256/128-bit prefix. Collisions are
15//!   negligible for session volumes (≤10⁵ responses).
16//! - **Bounded LRU** — capacity defaults to 5; 95% of real-world savings
17//!   concentrate in the most recent 3 responses.
18//! - **Partition-scoped** — cleared on `on_compaction_boundary()` (corresponds
19//!   to Claude Code's `compact_boundary` JSONL event); older references are no
20//!   longer reachable from the agent's context.
21//! - **Mutation-aware** — `FileRead` entries whose path is later mutated by
22//!   `FileMutate` are invalidated immediately. Prevents stale hints after an
23//!   `Edit` or `Write`.
24//!
25//! # Usage
26//!
27//! ```
28//! use devboy_format_pipeline::dedup::{DedupCache, DedupDecision, ToolKind, content_hash, render_reference_hint};
29//!
30//! let mut cache = DedupCache::with_capacity(5);
31//! let body = "pipeline 12345 status=success duration=120s";
32//! let fp = content_hash(body.as_bytes());
33//!
34//! match cache.check(&fp) {
35//!     DedupDecision::Hint { reference_tool_call_id } => {
36//!         let hint = render_reference_hint(&reference_tool_call_id);
37//!         // emit `hint` instead of `body`
38//!     }
39//!     DedupDecision::Fresh => {
40//!         // emit `body` normally and cache it
41//!         cache.insert(fp, "tc_42", ToolKind::Other, None, "Bash");
42//!     }
43//! }
44//! ```
45
46use std::collections::VecDeque;
47use std::sync::Arc;
48
49use sha2::{Digest, Sha256};
50
51use crate::near_ref::{DeltaField, NearRefConfig, extract_delta};
52
53/// 128-bit content fingerprint (first 16 bytes of SHA-256).
54pub type ContentHash = [u8; 16];
55
56/// Compute the content hash used by [`DedupCache`].
57pub fn content_hash(content: &[u8]) -> ContentHash {
58    let digest = Sha256::digest(content);
59    let mut out = [0u8; 16];
60    out.copy_from_slice(&digest[..16]);
61    out
62}
63
64/// Outcome of a [`DedupCache::check`] lookup.
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub enum DedupDecision {
67    /// Content not seen in the current partition — caller should emit the
68    /// response normally and then `insert` it into the cache.
69    Fresh,
70    /// Content matches a cached response — caller should emit a reference
71    /// hint (see [`render_reference_hint`]) instead of the full payload.
72    Hint {
73        /// Tool-call id of the cached response that the new request matches.
74        reference_tool_call_id: String,
75    },
76}
77
78/// Classification used to drive mutation-based invalidation.
79#[derive(Debug, Clone, Copy, PartialEq, Eq)]
80pub enum ToolKind {
81    /// Reads a file. Cached responses may become stale if the underlying file
82    /// is mutated and are invalidated by [`DedupCache::invalidate_file`].
83    FileRead,
84    /// Mutates a file. Does not itself need to be cached (responses are small
85    /// and rarely repeated), but triggers invalidation of matching
86    /// `FileRead` entries.
87    FileMutate,
88    /// Any other tool (Bash, MCP, Agent, Web…). No implicit cross-tool
89    /// invalidation.
90    Other,
91}
92
93impl ToolKind {
94    /// Classify by standard Claude Code tool name. Unknown names → `Other`.
95    pub fn from_tool_name(name: &str) -> Self {
96        match name {
97            "Read" | "Grep" | "Glob" | "NotebookRead" => Self::FileRead,
98            "Edit" | "Write" | "MultiEdit" | "NotebookEdit" => Self::FileMutate,
99            _ => Self::Other,
100        }
101    }
102}
103
104#[derive(Debug, Clone)]
105struct CacheEntry {
106    hash: ContentHash,
107    tool_call_id: String,
108    tool_kind: ToolKind,
109    /// Anonymized hash of the primary file-path argument (None for non-file
110    /// tools). Used as the invalidation key.
111    file_path_hash: Option<String>,
112    /// Optional cached body for Type-2 (near-duplicate) reference hints.
113    /// Populated only when the caller used `insert_with_body` (i.e. when
114    /// `dedup.near_ref_enabled` is on). `Arc<String>` keeps cloning
115    /// cheap when the cache holds multiple references to the same body.
116    body_snapshot: Option<Arc<String>>,
117    /// Tool name (anonymized) — drives Paper 3 cross-tool invalidation:
118    /// e.g. `update_issue` declares `invalidates = ["get_issue"]` in
119    /// its `ToolValueModel`, and [`DedupCache::invalidate_by_tool`]
120    /// reads this field to drop matching entries.
121    tool_name: String,
122}
123
124/// Session-scoped deduplication cache.
125///
126/// Maintains a bounded LRU of recent `(content_hash → tool_call_id)` mappings.
127/// A single instance is intended to live for one agent session (or one
128/// context partition, whichever is shorter).
129#[derive(Debug)]
130pub struct DedupCache {
131    entries: VecDeque<CacheEntry>,
132    capacity: usize,
133    partition: u64,
134}
135
136impl DedupCache {
137    /// Construct a cache with the given LRU capacity.
138    ///
139    /// Recommended default: `5`. Empirically, 95% of deduplication savings in
140    /// Claude Code logs come from the three most-recent responses; capacities
141    /// above ~5 yield diminishing returns.
142    pub fn with_capacity(capacity: usize) -> Self {
143        Self {
144            entries: VecDeque::with_capacity(capacity.max(1)),
145            capacity: capacity.max(1),
146            partition: 0,
147        }
148    }
149
150    /// Current context partition counter. Increments on each
151    /// `on_compaction_boundary()` call.
152    pub fn partition(&self) -> u64 {
153        self.partition
154    }
155
156    /// Number of entries currently cached.
157    pub fn len(&self) -> usize {
158        self.entries.len()
159    }
160
161    /// `true` if no entries are cached.
162    pub fn is_empty(&self) -> bool {
163        self.entries.is_empty()
164    }
165
166    /// LRU lookup that refreshes recency on hits.
167    ///
168    /// A match promotes the entry to the most-recently-used position, so
169    /// repeated references protect frequently-used content from eviction
170    /// when new responses are inserted.
171    pub fn check(&mut self, hash: &ContentHash) -> DedupDecision {
172        if let Some(idx) = self.entries.iter().position(|e| &e.hash == hash) {
173            // Move the matched entry to the back (most-recent position).
174            let entry = self
175                .entries
176                .remove(idx)
177                .expect("index came from VecDeque::position");
178            let reference_tool_call_id = entry.tool_call_id.clone();
179            self.entries.push_back(entry);
180            return DedupDecision::Hint {
181                reference_tool_call_id,
182            };
183        }
184        DedupDecision::Fresh
185    }
186
187    /// Insert a fresh response. Evicts the oldest entry if the cache is at
188    /// capacity.
189    ///
190    /// `tool_name` is the anonymized tool that produced the response —
191    /// used by [`Self::invalidate_by_tool`] for Paper 3 cross-tool
192    /// invalidation. Pass an empty string when the tool is unknown
193    /// (cross-tool invalidation will simply never match it).
194    pub fn insert(
195        &mut self,
196        hash: ContentHash,
197        tool_call_id: impl Into<String>,
198        tool_kind: ToolKind,
199        file_path_hash: Option<String>,
200        tool_name: impl Into<String>,
201    ) {
202        self.insert_inner(
203            hash,
204            tool_call_id.into(),
205            tool_kind,
206            file_path_hash,
207            None,
208            tool_name.into(),
209        );
210    }
211
212    /// Insert a fresh response and retain its body for Type-2
213    /// (near-duplicate) hint extraction. Used when
214    /// `dedup.near_ref_enabled` is on; otherwise prefer [`Self::insert`]
215    /// to avoid the extra allocation.
216    pub fn insert_with_body(
217        &mut self,
218        hash: ContentHash,
219        tool_call_id: impl Into<String>,
220        tool_kind: ToolKind,
221        file_path_hash: Option<String>,
222        body: Arc<String>,
223        tool_name: impl Into<String>,
224    ) {
225        self.insert_inner(
226            hash,
227            tool_call_id.into(),
228            tool_kind,
229            file_path_hash,
230            Some(body),
231            tool_name.into(),
232        );
233    }
234
235    fn insert_inner(
236        &mut self,
237        hash: ContentHash,
238        tool_call_id: String,
239        tool_kind: ToolKind,
240        file_path_hash: Option<String>,
241        body_snapshot: Option<Arc<String>>,
242        tool_name: String,
243    ) {
244        if self.entries.len() >= self.capacity {
245            self.entries.pop_front();
246        }
247        self.entries.push_back(CacheEntry {
248            hash,
249            tool_call_id,
250            tool_kind,
251            file_path_hash,
252            body_snapshot,
253            tool_name,
254        });
255    }
256
257    /// Look for a Type-2 near-duplicate match in the cache. Walks entries
258    /// from newest to oldest; the first one whose retained body produces
259    /// a valid delta against `new_body` (per [`extract_delta`]) wins.
260    /// Returns the matched `tool_call_id` plus the field-level deltas.
261    ///
262    /// Returns `None` when:
263    /// - no entry has a body snapshot,
264    /// - no entry's delta against `new_body` clears the eligibility
265    ///   gate (size, scalar-only, key-set match),
266    /// - or `new_body` itself is too short to bother.
267    pub fn find_near_ref(
268        &self,
269        new_body: &str,
270        config: &NearRefConfig,
271    ) -> Option<(String, Vec<DeltaField>)> {
272        for entry in self.entries.iter().rev() {
273            let Some(body) = entry.body_snapshot.as_ref() else {
274                continue;
275            };
276            if let Some(deltas) = extract_delta(body.as_str(), new_body, config) {
277                return Some((entry.tool_call_id.clone(), deltas));
278            }
279        }
280        None
281    }
282
283    /// Invalidate all cached `FileRead` entries whose path hash matches.
284    /// Returns the number of entries dropped.
285    ///
286    /// Callers typically invoke this from a `FileMutate` tool response
287    /// handler to prevent emitting stale hints for a subsequently re-read file.
288    pub fn invalidate_file(&mut self, file_path_hash: &str) -> usize {
289        let before = self.entries.len();
290        self.entries.retain(|e| {
291            !(e.tool_kind == ToolKind::FileRead
292                && e.file_path_hash.as_deref() == Some(file_path_hash))
293        });
294        before - self.entries.len()
295    }
296
297    /// Invalidate every entry whose `tool_name` appears in
298    /// `invalidates`. Drives Paper 3 cross-tool invalidation: a writer
299    /// (`update_issue`, `Edit`, `Write`, …) declares which read tools
300    /// its `ToolValueModel.invalidates` list, and the runtime calls this
301    /// method right after the writer's response is processed.
302    ///
303    /// Returns the number of entries dropped. Empty `invalidates` is a
304    /// no-op (returns 0) — used by tools that do not affect any cache.
305    pub fn invalidate_by_tool(&mut self, invalidates: &[String]) -> usize {
306        if invalidates.is_empty() {
307            return 0;
308        }
309        let before = self.entries.len();
310        self.entries
311            .retain(|e| !invalidates.iter().any(|t| t == &e.tool_name));
312        before - self.entries.len()
313    }
314
315    /// Clear the cache and advance the partition counter.
316    ///
317    /// Called when the LLM host emits a context-compaction boundary
318    /// (Claude Code: `compact_boundary` event). Older references are no
319    /// longer reachable from the agent's context, so they must not be
320    /// referenced by new hints.
321    pub fn on_compaction_boundary(&mut self) {
322        self.entries.clear();
323        self.partition = self.partition.saturating_add(1);
324    }
325}
326
327impl Default for DedupCache {
328    /// Equivalent to `DedupCache::with_capacity(5)`.
329    fn default() -> Self {
330        Self::with_capacity(5)
331    }
332}
333
334/// Verbosity level for rendered reference hints.
335///
336/// Verbosity is tunable via `AdaptiveConfig::dedup::hint_verbosity` so
337/// deployments can trade one or two tokens of clarity for explicit
338/// byte-identity / source-tool metadata.
339#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
340pub enum HintVerbosity {
341    /// `> [ref: tc_42]` (~8 tokens)
342    Terse,
343    /// `> [ref: tc_42, byte-identical]` (~11 tokens, default)
344    #[default]
345    Standard,
346    /// `> [ref: tc_42, byte-identical, from: Read]` (~15 tokens)
347    Verbose,
348}
349
350/// Render a reference hint using the default (Standard) verbosity.
351///
352/// Kept as a convenience wrapper around [`render_reference_hint_with`]
353/// for callers that do not thread configuration yet.
354pub fn render_reference_hint(tool_call_id: &str) -> String {
355    render_reference_hint_with(tool_call_id, HintVerbosity::Standard, None)
356}
357
358/// Render a reference hint at the requested verbosity.
359///
360/// Forms (measured with `cl100k_base`):
361///
362/// ```text
363/// Terse     > [ref: tc_42]                               ~8 tok
364/// Standard  > [ref: tc_42, byte-identical]               ~11 tok
365/// Verbose   > [ref: tc_42, byte-identical, from: Read]   ~15 tok
366/// ```
367///
368/// `source_tool`, when provided, is included only in the `Verbose`
369/// form — earlier verbosities intentionally drop it to stay terse.
370pub fn render_reference_hint_with(
371    tool_call_id: &str,
372    verbosity: HintVerbosity,
373    source_tool: Option<ToolKind>,
374) -> String {
375    match verbosity {
376        HintVerbosity::Terse => format!("> [ref: {}]", tool_call_id),
377        HintVerbosity::Standard => format!("> [ref: {}, byte-identical]", tool_call_id),
378        HintVerbosity::Verbose => {
379            let tool_tag = match source_tool {
380                Some(ToolKind::FileRead) => "file-read",
381                Some(ToolKind::FileMutate) => "file-mutate",
382                Some(ToolKind::Other) => "other",
383                None => "",
384            };
385            if tool_tag.is_empty() {
386                format!("> [ref: {}, byte-identical]", tool_call_id)
387            } else {
388                format!(
389                    "> [ref: {}, byte-identical, from: {}]",
390                    tool_call_id, tool_tag
391                )
392            }
393        }
394    }
395}
396
397#[cfg(test)]
398mod tests {
399    use super::*;
400
401    fn h(s: &str) -> ContentHash {
402        content_hash(s.as_bytes())
403    }
404
405    #[test]
406    fn fresh_response_then_duplicate_hits() {
407        let mut c = DedupCache::with_capacity(5);
408        let fp = h("pipeline 12345 status=success");
409        assert_eq!(c.check(&fp), DedupDecision::Fresh);
410        c.insert(fp, "tc_1", ToolKind::Other, None, "");
411        match c.check(&fp) {
412            DedupDecision::Hint {
413                reference_tool_call_id,
414            } => assert_eq!(reference_tool_call_id, "tc_1"),
415            other => panic!("expected hint, got {other:?}"),
416        }
417    }
418
419    #[test]
420    fn distinct_content_is_fresh() {
421        let mut c = DedupCache::with_capacity(5);
422        c.insert(h("A"), "tc_1", ToolKind::Other, None, "");
423        assert_eq!(c.check(&h("B")), DedupDecision::Fresh);
424    }
425
426    #[test]
427    fn lru_evicts_oldest_when_full() {
428        let mut c = DedupCache::with_capacity(2);
429        c.insert(h("one"), "tc_1", ToolKind::Other, None, "");
430        c.insert(h("two"), "tc_2", ToolKind::Other, None, "");
431        c.insert(h("three"), "tc_3", ToolKind::Other, None, "");
432        assert_eq!(c.check(&h("one")), DedupDecision::Fresh);
433        assert!(matches!(c.check(&h("two")), DedupDecision::Hint { .. }));
434        assert!(matches!(c.check(&h("three")), DedupDecision::Hint { .. }));
435    }
436
437    #[test]
438    fn mutation_invalidates_same_file_read() {
439        let mut c = DedupCache::with_capacity(5);
440        let file = "abc12345".to_string();
441        let content = h("line1\nline2");
442        c.insert(
443            content,
444            "tc_1",
445            ToolKind::FileRead,
446            Some(file.clone()),
447            "Read",
448        );
449        assert_eq!(c.invalidate_file(&file), 1);
450        assert_eq!(c.check(&content), DedupDecision::Fresh);
451    }
452
453    #[test]
454    fn mutation_on_different_file_preserves_entry() {
455        let mut c = DedupCache::with_capacity(5);
456        let content = h("irrelevant file body");
457        c.insert(
458            content,
459            "tc_1",
460            ToolKind::FileRead,
461            Some("hash_a".into()),
462            "Read",
463        );
464        assert_eq!(c.invalidate_file("hash_b"), 0);
465        assert!(matches!(c.check(&content), DedupDecision::Hint { .. }));
466    }
467
468    #[test]
469    fn compaction_boundary_clears_and_advances_partition() {
470        let mut c = DedupCache::with_capacity(5);
471        let x = h("x");
472        c.insert(x, "tc_1", ToolKind::Other, None, "");
473        assert_eq!(c.partition(), 0);
474        c.on_compaction_boundary();
475        assert_eq!(c.partition(), 1);
476        assert_eq!(c.check(&x), DedupDecision::Fresh);
477        assert!(c.is_empty());
478    }
479
480    #[test]
481    fn invalidate_by_tool_drops_matching_entries() {
482        // Paper 3 cross-tool invalidation: `update_issue` declares
483        // `invalidates = ["get_issue", "get_issues"]`. After it runs,
484        // any cached responses for those tools must be dropped.
485        let mut c = DedupCache::with_capacity(5);
486        c.insert(h("body_a"), "tc_a", ToolKind::Other, None, "get_issue");
487        c.insert(h("body_b"), "tc_b", ToolKind::Other, None, "get_issues");
488        c.insert(h("body_c"), "tc_c", ToolKind::Other, None, "get_pipeline");
489        assert_eq!(c.len(), 3);
490        let dropped = c.invalidate_by_tool(&["get_issue".to_string(), "get_issues".to_string()]);
491        assert_eq!(dropped, 2);
492        assert_eq!(c.len(), 1);
493        // get_pipeline survives.
494        assert!(matches!(c.check(&h("body_c")), DedupDecision::Hint { .. }));
495        assert_eq!(c.check(&h("body_a")), DedupDecision::Fresh);
496        assert_eq!(c.check(&h("body_b")), DedupDecision::Fresh);
497    }
498
499    #[test]
500    fn invalidate_by_tool_empty_list_is_noop() {
501        let mut c = DedupCache::with_capacity(5);
502        c.insert(h("a"), "tc_a", ToolKind::Other, None, "Bash");
503        assert_eq!(c.invalidate_by_tool(&[]), 0);
504        assert_eq!(c.len(), 1);
505    }
506
507    #[test]
508    fn tool_kind_classification() {
509        assert_eq!(ToolKind::from_tool_name("Read"), ToolKind::FileRead);
510        assert_eq!(ToolKind::from_tool_name("Grep"), ToolKind::FileRead);
511        assert_eq!(ToolKind::from_tool_name("Edit"), ToolKind::FileMutate);
512        assert_eq!(ToolKind::from_tool_name("Write"), ToolKind::FileMutate);
513        assert_eq!(ToolKind::from_tool_name("MultiEdit"), ToolKind::FileMutate);
514        assert_eq!(ToolKind::from_tool_name("Bash"), ToolKind::Other);
515        assert_eq!(
516            ToolKind::from_tool_name("mcp__x__get_issues"),
517            ToolKind::Other
518        );
519    }
520
521    #[test]
522    fn reference_hint_shape() {
523        let hint = render_reference_hint("tc_42");
524        assert!(hint.starts_with("> [ref:"));
525        assert!(hint.contains("tc_42"));
526        assert!(hint.contains("byte-identical"));
527        // Hint must stay small vs any realistic response.
528        assert!(hint.len() < 40);
529    }
530
531    #[test]
532    fn hint_verbosity_variants() {
533        let terse = render_reference_hint_with("tc_1", HintVerbosity::Terse, None);
534        assert_eq!(terse, "> [ref: tc_1]");
535
536        let standard =
537            render_reference_hint_with("tc_1", HintVerbosity::Standard, Some(ToolKind::FileRead));
538        // source_tool is dropped below Verbose
539        assert_eq!(standard, "> [ref: tc_1, byte-identical]");
540
541        let verbose =
542            render_reference_hint_with("tc_1", HintVerbosity::Verbose, Some(ToolKind::FileRead));
543        assert!(verbose.contains("byte-identical"));
544        assert!(verbose.contains("file-read"));
545    }
546
547    #[test]
548    fn capacity_zero_is_coerced_to_one() {
549        let c = DedupCache::with_capacity(0);
550        assert_eq!(c.len(), 0);
551    }
552
553    #[test]
554    fn edit_then_reread_scenario() {
555        // Canonical stale-hint scenario from the paper:
556        //   1. Read(foo.py) → content_A → cache it
557        //   2. Edit(foo.py) → invalidate foo.py entries
558        //   3. Read(foo.py) → content_B or A? — must be treated Fresh regardless
559        let mut c = DedupCache::with_capacity(5);
560        let file = "foo_hash".to_string();
561        let content_a = h("original body");
562        c.insert(
563            content_a,
564            "tc_1",
565            ToolKind::FileRead,
566            Some(file.clone()),
567            "Read",
568        );
569        // Edit invalidates
570        assert_eq!(c.invalidate_file(&file), 1);
571        // Even if the re-read content coincidentally matches, we must not hint
572        // against the stale entry (it is gone). Hence any subsequent content is Fresh.
573        assert_eq!(c.check(&content_a), DedupDecision::Fresh);
574    }
575}