use std::collections::VecDeque;
use std::sync::Arc;
use sha2::{Digest, Sha256};
use crate::near_ref::{DeltaField, NearRefConfig, extract_delta};
pub type ContentHash = [u8; 16];
pub fn content_hash(content: &[u8]) -> ContentHash {
let digest = Sha256::digest(content);
let mut out = [0u8; 16];
out.copy_from_slice(&digest[..16]);
out
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DedupDecision {
Fresh,
Hint {
reference_tool_call_id: String,
},
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ToolKind {
FileRead,
FileMutate,
Other,
}
impl ToolKind {
pub fn from_tool_name(name: &str) -> Self {
match name {
"Read" | "Grep" | "Glob" | "NotebookRead" => Self::FileRead,
"Edit" | "Write" | "MultiEdit" | "NotebookEdit" => Self::FileMutate,
_ => Self::Other,
}
}
}
#[derive(Debug, Clone)]
struct CacheEntry {
hash: ContentHash,
tool_call_id: String,
tool_kind: ToolKind,
file_path_hash: Option<String>,
body_snapshot: Option<Arc<String>>,
tool_name: String,
}
#[derive(Debug)]
pub struct DedupCache {
entries: VecDeque<CacheEntry>,
capacity: usize,
partition: u64,
}
impl DedupCache {
pub fn with_capacity(capacity: usize) -> Self {
Self {
entries: VecDeque::with_capacity(capacity.max(1)),
capacity: capacity.max(1),
partition: 0,
}
}
pub fn partition(&self) -> u64 {
self.partition
}
pub fn len(&self) -> usize {
self.entries.len()
}
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
pub fn check(&mut self, hash: &ContentHash) -> DedupDecision {
if let Some(idx) = self.entries.iter().position(|e| &e.hash == hash) {
let entry = self
.entries
.remove(idx)
.expect("index came from VecDeque::position");
let reference_tool_call_id = entry.tool_call_id.clone();
self.entries.push_back(entry);
return DedupDecision::Hint {
reference_tool_call_id,
};
}
DedupDecision::Fresh
}
pub fn insert(
&mut self,
hash: ContentHash,
tool_call_id: impl Into<String>,
tool_kind: ToolKind,
file_path_hash: Option<String>,
tool_name: impl Into<String>,
) {
self.insert_inner(
hash,
tool_call_id.into(),
tool_kind,
file_path_hash,
None,
tool_name.into(),
);
}
pub fn insert_with_body(
&mut self,
hash: ContentHash,
tool_call_id: impl Into<String>,
tool_kind: ToolKind,
file_path_hash: Option<String>,
body: Arc<String>,
tool_name: impl Into<String>,
) {
self.insert_inner(
hash,
tool_call_id.into(),
tool_kind,
file_path_hash,
Some(body),
tool_name.into(),
);
}
fn insert_inner(
&mut self,
hash: ContentHash,
tool_call_id: String,
tool_kind: ToolKind,
file_path_hash: Option<String>,
body_snapshot: Option<Arc<String>>,
tool_name: String,
) {
if self.entries.len() >= self.capacity {
self.entries.pop_front();
}
self.entries.push_back(CacheEntry {
hash,
tool_call_id,
tool_kind,
file_path_hash,
body_snapshot,
tool_name,
});
}
pub fn find_near_ref(
&self,
new_body: &str,
config: &NearRefConfig,
) -> Option<(String, Vec<DeltaField>)> {
for entry in self.entries.iter().rev() {
let Some(body) = entry.body_snapshot.as_ref() else {
continue;
};
if let Some(deltas) = extract_delta(body.as_str(), new_body, config) {
return Some((entry.tool_call_id.clone(), deltas));
}
}
None
}
pub fn invalidate_file(&mut self, file_path_hash: &str) -> usize {
let before = self.entries.len();
self.entries.retain(|e| {
!(e.tool_kind == ToolKind::FileRead
&& e.file_path_hash.as_deref() == Some(file_path_hash))
});
before - self.entries.len()
}
pub fn invalidate_by_tool(&mut self, invalidates: &[String]) -> usize {
if invalidates.is_empty() {
return 0;
}
let before = self.entries.len();
self.entries
.retain(|e| !invalidates.iter().any(|t| t == &e.tool_name));
before - self.entries.len()
}
pub fn on_compaction_boundary(&mut self) {
self.entries.clear();
self.partition = self.partition.saturating_add(1);
}
}
impl Default for DedupCache {
fn default() -> Self {
Self::with_capacity(5)
}
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub enum HintVerbosity {
Terse,
#[default]
Standard,
Verbose,
}
pub fn render_reference_hint(tool_call_id: &str) -> String {
render_reference_hint_with(tool_call_id, HintVerbosity::Standard, None)
}
pub fn render_reference_hint_with(
tool_call_id: &str,
verbosity: HintVerbosity,
source_tool: Option<ToolKind>,
) -> String {
match verbosity {
HintVerbosity::Terse => format!("> [ref: {}]", tool_call_id),
HintVerbosity::Standard => format!("> [ref: {}, byte-identical]", tool_call_id),
HintVerbosity::Verbose => {
let tool_tag = match source_tool {
Some(ToolKind::FileRead) => "file-read",
Some(ToolKind::FileMutate) => "file-mutate",
Some(ToolKind::Other) => "other",
None => "",
};
if tool_tag.is_empty() {
format!("> [ref: {}, byte-identical]", tool_call_id)
} else {
format!(
"> [ref: {}, byte-identical, from: {}]",
tool_call_id, tool_tag
)
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn h(s: &str) -> ContentHash {
content_hash(s.as_bytes())
}
#[test]
fn fresh_response_then_duplicate_hits() {
let mut c = DedupCache::with_capacity(5);
let fp = h("pipeline 12345 status=success");
assert_eq!(c.check(&fp), DedupDecision::Fresh);
c.insert(fp, "tc_1", ToolKind::Other, None, "");
match c.check(&fp) {
DedupDecision::Hint {
reference_tool_call_id,
} => assert_eq!(reference_tool_call_id, "tc_1"),
other => panic!("expected hint, got {other:?}"),
}
}
#[test]
fn distinct_content_is_fresh() {
let mut c = DedupCache::with_capacity(5);
c.insert(h("A"), "tc_1", ToolKind::Other, None, "");
assert_eq!(c.check(&h("B")), DedupDecision::Fresh);
}
#[test]
fn lru_evicts_oldest_when_full() {
let mut c = DedupCache::with_capacity(2);
c.insert(h("one"), "tc_1", ToolKind::Other, None, "");
c.insert(h("two"), "tc_2", ToolKind::Other, None, "");
c.insert(h("three"), "tc_3", ToolKind::Other, None, "");
assert_eq!(c.check(&h("one")), DedupDecision::Fresh);
assert!(matches!(c.check(&h("two")), DedupDecision::Hint { .. }));
assert!(matches!(c.check(&h("three")), DedupDecision::Hint { .. }));
}
#[test]
fn mutation_invalidates_same_file_read() {
let mut c = DedupCache::with_capacity(5);
let file = "abc12345".to_string();
let content = h("line1\nline2");
c.insert(
content,
"tc_1",
ToolKind::FileRead,
Some(file.clone()),
"Read",
);
assert_eq!(c.invalidate_file(&file), 1);
assert_eq!(c.check(&content), DedupDecision::Fresh);
}
#[test]
fn mutation_on_different_file_preserves_entry() {
let mut c = DedupCache::with_capacity(5);
let content = h("irrelevant file body");
c.insert(
content,
"tc_1",
ToolKind::FileRead,
Some("hash_a".into()),
"Read",
);
assert_eq!(c.invalidate_file("hash_b"), 0);
assert!(matches!(c.check(&content), DedupDecision::Hint { .. }));
}
#[test]
fn compaction_boundary_clears_and_advances_partition() {
let mut c = DedupCache::with_capacity(5);
let x = h("x");
c.insert(x, "tc_1", ToolKind::Other, None, "");
assert_eq!(c.partition(), 0);
c.on_compaction_boundary();
assert_eq!(c.partition(), 1);
assert_eq!(c.check(&x), DedupDecision::Fresh);
assert!(c.is_empty());
}
#[test]
fn invalidate_by_tool_drops_matching_entries() {
let mut c = DedupCache::with_capacity(5);
c.insert(h("body_a"), "tc_a", ToolKind::Other, None, "get_issue");
c.insert(h("body_b"), "tc_b", ToolKind::Other, None, "get_issues");
c.insert(h("body_c"), "tc_c", ToolKind::Other, None, "get_pipeline");
assert_eq!(c.len(), 3);
let dropped = c.invalidate_by_tool(&["get_issue".to_string(), "get_issues".to_string()]);
assert_eq!(dropped, 2);
assert_eq!(c.len(), 1);
assert!(matches!(c.check(&h("body_c")), DedupDecision::Hint { .. }));
assert_eq!(c.check(&h("body_a")), DedupDecision::Fresh);
assert_eq!(c.check(&h("body_b")), DedupDecision::Fresh);
}
#[test]
fn invalidate_by_tool_empty_list_is_noop() {
let mut c = DedupCache::with_capacity(5);
c.insert(h("a"), "tc_a", ToolKind::Other, None, "Bash");
assert_eq!(c.invalidate_by_tool(&[]), 0);
assert_eq!(c.len(), 1);
}
#[test]
fn tool_kind_classification() {
assert_eq!(ToolKind::from_tool_name("Read"), ToolKind::FileRead);
assert_eq!(ToolKind::from_tool_name("Grep"), ToolKind::FileRead);
assert_eq!(ToolKind::from_tool_name("Edit"), ToolKind::FileMutate);
assert_eq!(ToolKind::from_tool_name("Write"), ToolKind::FileMutate);
assert_eq!(ToolKind::from_tool_name("MultiEdit"), ToolKind::FileMutate);
assert_eq!(ToolKind::from_tool_name("Bash"), ToolKind::Other);
assert_eq!(
ToolKind::from_tool_name("mcp__x__get_issues"),
ToolKind::Other
);
}
#[test]
fn reference_hint_shape() {
let hint = render_reference_hint("tc_42");
assert!(hint.starts_with("> [ref:"));
assert!(hint.contains("tc_42"));
assert!(hint.contains("byte-identical"));
assert!(hint.len() < 40);
}
#[test]
fn hint_verbosity_variants() {
let terse = render_reference_hint_with("tc_1", HintVerbosity::Terse, None);
assert_eq!(terse, "> [ref: tc_1]");
let standard =
render_reference_hint_with("tc_1", HintVerbosity::Standard, Some(ToolKind::FileRead));
assert_eq!(standard, "> [ref: tc_1, byte-identical]");
let verbose =
render_reference_hint_with("tc_1", HintVerbosity::Verbose, Some(ToolKind::FileRead));
assert!(verbose.contains("byte-identical"));
assert!(verbose.contains("file-read"));
}
#[test]
fn capacity_zero_is_coerced_to_one() {
let c = DedupCache::with_capacity(0);
assert_eq!(c.len(), 0);
}
#[test]
fn edit_then_reread_scenario() {
let mut c = DedupCache::with_capacity(5);
let file = "foo_hash".to_string();
let content_a = h("original body");
c.insert(
content_a,
"tc_1",
ToolKind::FileRead,
Some(file.clone()),
"Read",
);
assert_eq!(c.invalidate_file(&file), 1);
assert_eq!(c.check(&content_a), DedupDecision::Fresh);
}
}