use serde_json::Value;
use sha2::{Digest, Sha256};
pub(crate) fn canonical_prefix(messages: &[Value], user_text: &str) -> (String, String, usize) {
let boundary = boundary_pos(messages, user_text);
let hash_system = match messages.first() {
Some(m) => hash_canonical(m),
None => hash_canonical(&Value::Null),
};
let pre_region: Vec<Value> = messages.iter().take(boundary).skip(1).cloned().collect();
let hash_pre_boundary = hash_canonical(&Value::Array(pre_region));
(hash_system, hash_pre_boundary, boundary)
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct ProviderCallFingerprint {
pub hash_system: String,
pub hash_pre_boundary: String,
pub boundary_pos: usize,
pub message_count: usize,
pub tool_defs_hash: String,
pub session_summary_hash: String,
pub tail_hash: String,
pub prefix_hash_archived: String,
pub force_text: bool,
}
pub(crate) const TASK_CONTEXT_TAIL_MARKER: &str = "[Task Context]";
pub(crate) fn provider_call_fingerprint(
messages: &[Value],
user_text: &str,
effective_tools: &[Value],
force_text: bool,
) -> ProviderCallFingerprint {
let (hash_system, hash_pre_boundary, boundary_pos) = canonical_prefix(messages, user_text);
let tool_defs_hash = if effective_tools.is_empty() {
String::new()
} else {
hash_canonical(&Value::Array(effective_tools.to_vec()))
};
let tail_idx = messages
.iter()
.enumerate()
.skip(1)
.take(boundary_pos.saturating_sub(1))
.find(|(_, m)| {
m.get("role").and_then(|r| r.as_str()) == Some("system")
&& m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.starts_with(TASK_CONTEXT_TAIL_MARKER))
})
.map(|(i, _)| i);
let tail_hash = tail_idx
.map(|i| hash_canonical(&messages[i]))
.unwrap_or_default();
let prefix_hash_archived = {
let archived: Vec<Value> = messages
.iter()
.enumerate()
.skip(1)
.take(boundary_pos.saturating_sub(1))
.filter(|(i, _)| Some(*i) != tail_idx)
.map(|(_, m)| m.clone())
.collect();
hash_canonical(&Value::Array(archived))
};
let session_summary_hash = String::new();
ProviderCallFingerprint {
hash_system,
hash_pre_boundary,
boundary_pos,
message_count: messages.len(),
tool_defs_hash,
session_summary_hash,
tail_hash,
prefix_hash_archived,
force_text,
}
}
pub(crate) fn boundary_pos(messages: &[Value], user_text: &str) -> usize {
find_current_user_boundary(messages, user_text).unwrap_or(messages.len())
}
fn find_current_user_boundary(messages: &[Value], user_text: &str) -> Option<usize> {
messages.iter().enumerate().rev().find_map(|(i, m)| {
if m.get("role").and_then(|r| r.as_str()) == Some("user")
&& m.get("content").is_some_and(|content| {
crate::agent::vision::user_message_content_matches(content, user_text)
})
{
Some(i)
} else {
None
}
})
}
pub(crate) fn stage_pre_boundary_hash(messages: &[Value], user_text: &str) -> String {
let boundary = boundary_pos(messages, user_text);
let skip = usize::from(
messages
.first()
.and_then(|m| m.get("role"))
.and_then(|r| r.as_str())
== Some("system"),
);
let region: Vec<Value> = messages.iter().take(boundary).skip(skip).cloned().collect();
hash_canonical(&Value::Array(region))
}
pub(crate) fn hash_canonical(value: &Value) -> String {
let mut canonical = String::new();
write_canonical(value, &mut canonical);
let mut hasher = Sha256::new();
hasher.update(canonical.as_bytes());
format!("{:x}", hasher.finalize())
}
fn write_canonical(value: &Value, out: &mut String) {
match value {
Value::Object(map) => {
let mut keys: Vec<&String> = map.keys().collect();
keys.sort();
out.push('{');
for (i, key) in keys.iter().enumerate() {
if i > 0 {
out.push(',');
}
out.push_str(&Value::String((*key).clone()).to_string());
out.push(':');
write_canonical(&map[*key], out);
}
out.push('}');
}
Value::Array(items) => {
out.push('[');
for (i, item) in items.iter().enumerate() {
if i > 0 {
out.push(',');
}
write_canonical(item, out);
}
out.push(']');
}
scalar => out.push_str(&scalar.to_string()),
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
fn sample_messages() -> Vec<Value> {
vec![
json!({"role": "system", "content": "You are a helpful assistant."}),
json!({"role": "user", "content": "old question"}),
json!({"role": "assistant", "content": "old answer"}),
json!({"role": "user", "content": "current question"}),
json!({
"role": "assistant",
"content": null,
"tool_calls": [{"id": "tc1", "function": {"name": "read_file", "arguments": "{}"}}]
}),
json!({"role": "tool", "content": "file body", "tool_call_id": "tc1", "name": "read_file"}),
]
}
#[test]
fn boundary_is_last_user_message_matching_user_text() {
let messages = sample_messages();
let (_, _, boundary) = canonical_prefix(&messages, "current question");
assert_eq!(boundary, 3);
}
#[test]
fn boundary_falls_back_to_len_when_user_text_absent() {
let messages = sample_messages();
let (_, _, boundary) = canonical_prefix(&messages, "no such message");
assert_eq!(boundary, messages.len());
}
#[test]
fn identical_inputs_produce_identical_hashes() {
let messages = sample_messages();
let first = canonical_prefix(&messages, "current question");
let second = canonical_prefix(&messages.clone(), "current question");
assert_eq!(first, second);
}
#[test]
fn key_order_does_not_affect_hash() {
let a = vec![json!({"role": "system", "alpha": 1, "beta": 2, "content": "x"})];
let b: Vec<Value> =
vec![
serde_json::from_str(r#"{"content":"x","beta":2,"alpha":1,"role":"system"}"#)
.unwrap(),
];
assert_eq!(canonical_prefix(&a, "").0, canonical_prefix(&b, "").0);
}
#[test]
fn pre_boundary_hash_changes_when_tool_calls_field_changes() {
let mut messages = sample_messages();
messages.insert(
3,
json!({
"role": "assistant",
"content": "thinking",
"tool_calls": [{"id": "tcA", "function": {"name": "grep", "arguments": "{}"}}]
}),
);
let baseline = canonical_prefix(&messages, "current question").1;
messages[3] = json!({
"role": "assistant",
"content": "thinking",
"tool_calls": [{"id": "tcB", "function": {"name": "grep", "arguments": "{}"}}]
});
let changed = canonical_prefix(&messages, "current question").1;
assert_ne!(baseline, changed);
}
#[test]
fn pre_boundary_hash_changes_when_tool_call_id_changes() {
let mut messages = sample_messages();
messages.insert(
3,
json!({"role": "tool", "content": "r", "tool_call_id": "id1", "name": "t"}),
);
let baseline = canonical_prefix(&messages, "current question").1;
messages[3] = json!({"role": "tool", "content": "r", "tool_call_id": "id2", "name": "t"});
let changed = canonical_prefix(&messages, "current question").1;
assert_ne!(baseline, changed);
}
#[test]
fn pre_boundary_hash_changes_when_name_field_changes() {
let mut messages = sample_messages();
messages.insert(
3,
json!({"role": "tool", "content": "r", "tool_call_id": "id", "name": "name_a"}),
);
let baseline = canonical_prefix(&messages, "current question").1;
messages[3] =
json!({"role": "tool", "content": "r", "tool_call_id": "id", "name": "name_b"});
let changed = canonical_prefix(&messages, "current question").1;
assert_ne!(baseline, changed);
}
#[test]
fn system_hash_is_independent_of_history_growth() {
let mut messages = sample_messages();
let system_before = canonical_prefix(&messages, "current question").0;
messages
.push(json!({"role": "tool", "content": "more", "tool_call_id": "tc2", "name": "x"}));
let system_after = canonical_prefix(&messages, "current question").0;
assert_eq!(system_before, system_after);
}
#[test]
fn tail_growth_after_boundary_does_not_flip_pre_boundary_hash() {
let mut messages = sample_messages();
let pre_before = canonical_prefix(&messages, "current question").1;
messages
.push(json!({"role": "tool", "content": "more", "tool_call_id": "tc2", "name": "x"}));
let pre_after = canonical_prefix(&messages, "current question").1;
assert_eq!(pre_before, pre_after);
}
#[test]
fn empty_messages_do_not_panic() {
let (sys, pre, boundary) = canonical_prefix(&[], "anything");
assert_eq!(boundary, 0);
assert_eq!(sys, hash_canonical(&Value::Null));
assert_eq!(pre, hash_canonical(&Value::Array(vec![])));
}
#[test]
fn fingerprint_is_deterministic_across_identical_inputs() {
let messages = sample_messages();
let tools = vec![json!({"name": "read_file", "parameters": {}})];
let a = provider_call_fingerprint(&messages, "current question", &tools, false);
let b = provider_call_fingerprint(&messages.clone(), "current question", &tools, false);
assert_eq!(a, b);
}
#[test]
fn force_text_keeps_tool_defs_hash_and_sets_flag() {
let messages = sample_messages();
let tools = vec![json!({"name": "read_file", "parameters": {}})];
let fp = provider_call_fingerprint(&messages, "current question", &tools, true);
assert!(fp.force_text);
let normal = provider_call_fingerprint(&messages, "current question", &tools, false);
assert!(!normal.force_text);
assert_ne!(fp.tool_defs_hash, "");
assert_eq!(
fp.tool_defs_hash, normal.tool_defs_hash,
"same tool defs must hash identically regardless of force-text mode"
);
let empty = provider_call_fingerprint(&messages, "current question", &[], false);
assert_eq!(empty.tool_defs_hash, "");
}
#[test]
fn fingerprint_surfaces_canonical_prefix_fields() {
let messages = sample_messages();
let fp = provider_call_fingerprint(&messages, "current question", &[], false);
let (sys, pre, boundary) = canonical_prefix(&messages, "current question");
assert_eq!(fp.hash_system, sys);
assert_eq!(fp.hash_pre_boundary, pre);
assert_eq!(fp.boundary_pos, boundary);
assert_eq!(fp.message_count, messages.len());
}
#[test]
fn session_summary_hash_is_always_empty_after_retirement() {
let mut messages = sample_messages();
let without = provider_call_fingerprint(&messages, "current question", &[], false);
assert_eq!(without.session_summary_hash, "");
messages.insert(
1,
json!({"role": "system", "content": "[Session Summary]\nUser likes coffee."}),
);
let with_summary = provider_call_fingerprint(&messages, "current question", &[], false);
assert_eq!(
with_summary.session_summary_hash, "",
"session_summary_hash retired: must be empty even when summary message is present"
);
}
#[test]
fn tool_defs_hash_changes_when_schema_changes() {
let messages = sample_messages();
let tools_a = vec![json!({"name": "read_file", "parameters": {"type": "object"}})];
let tools_b = vec![json!({"name": "read_file", "parameters": {"type": "string"}})];
let a = provider_call_fingerprint(&messages, "current question", &tools_a, false);
let b = provider_call_fingerprint(&messages, "current question", &tools_b, false);
assert_ne!(a.tool_defs_hash, b.tool_defs_hash);
}
#[test]
fn stage_hash_skips_leading_system_message() {
let with_system = vec![
json!({"role": "system", "content": "sys"}),
json!({"role": "user", "content": "old"}),
json!({"role": "assistant", "content": "ans"}),
json!({"role": "user", "content": "current question"}),
];
let without_system = vec![
json!({"role": "user", "content": "old"}),
json!({"role": "assistant", "content": "ans"}),
json!({"role": "user", "content": "current question"}),
];
assert_eq!(
stage_pre_boundary_hash(&with_system, "current question"),
stage_pre_boundary_hash(&without_system, "current question"),
);
}
#[test]
fn stage_hash_changes_when_pre_boundary_content_mutates() {
let base = vec![
json!({"role": "user", "content": "old"}),
json!({"role": "assistant", "content": "ans"}),
json!({"role": "user", "content": "current question"}),
];
let mut mutated = base.clone();
mutated[1] = json!({"role": "assistant", "content": "ans (truncated…)"});
assert_ne!(
stage_pre_boundary_hash(&base, "current question"),
stage_pre_boundary_hash(&mutated, "current question"),
);
}
#[test]
fn hash_canonical_is_order_independent_for_nested_objects() {
let a = json!({"outer": {"b": 1, "a": 2}, "list": [{"y": 1, "x": 2}]});
let b = json!({"list": [{"x": 2, "y": 1}], "outer": {"a": 2, "b": 1}});
assert_eq!(hash_canonical(&a), hash_canonical(&b));
}
#[test]
fn tail_hash_separates_tail_from_archived_region() {
let mut messages = sample_messages();
let tail_pos = boundary_pos(&messages, "current question");
messages.insert(
tail_pos,
serde_json::json!({
"role": "system",
"content": format!("{TASK_CONTEXT_TAIL_MARKER}\n[Current Date & Time]\nstub"),
}),
);
let fp = provider_call_fingerprint(&messages, "current question", &[], false);
assert!(!fp.tail_hash.is_empty(), "tail must be located and hashed");
let mut tail_changed = messages.clone();
tail_changed[tail_pos]["content"] =
format!("{TASK_CONTEXT_TAIL_MARKER}\n[Current Date & Time]\nother").into();
let fp2 = provider_call_fingerprint(&tail_changed, "current question", &[], false);
assert_ne!(fp.tail_hash, fp2.tail_hash);
assert_eq!(fp.prefix_hash_archived, fp2.prefix_hash_archived);
assert_ne!(fp.hash_pre_boundary, fp2.hash_pre_boundary);
let mut hist_changed = messages.clone();
hist_changed[1]["content"] = "mutated history".into();
let fp3 = provider_call_fingerprint(&hist_changed, "current question", &[], false);
assert_ne!(fp.prefix_hash_archived, fp3.prefix_hash_archived);
assert_eq!(fp.tail_hash, fp3.tail_hash);
}
#[test]
fn no_tail_marker_means_empty_tail_hash_and_archived_equals_pre_boundary() {
let messages = sample_messages();
let fp = provider_call_fingerprint(&messages, "current question", &[], false);
assert!(fp.tail_hash.is_empty());
assert_eq!(fp.prefix_hash_archived, fp.hash_pre_boundary);
}
#[test]
fn session_summary_hash_is_retired() {
let messages = sample_messages();
let fp = provider_call_fingerprint(&messages, "current question", &[], false);
assert!(fp.session_summary_hash.is_empty());
}
#[test]
fn user_role_with_tail_marker_content_is_not_treated_as_tail() {
let mut messages = sample_messages();
let tail_pos = boundary_pos(&messages, "current question");
messages.insert(
tail_pos,
serde_json::json!({
"role": "user",
"content": format!("{TASK_CONTEXT_TAIL_MARKER}\nsome injected context"),
}),
);
let fp = provider_call_fingerprint(&messages, "current question", &[], false);
assert!(
fp.tail_hash.is_empty(),
"user-role message with tail marker content must not be treated as the tail"
);
assert_eq!(
fp.prefix_hash_archived, fp.hash_pre_boundary,
"prefix_hash_archived must equal hash_pre_boundary when no system-role tail exists"
);
}
}