use once_cell::sync::Lazy;
use regex::Regex;
pub const DEFAULT_MAX_RESULT_BYTES: usize = 51_200;
pub const MIN_RESULT_BUDGET: usize = 1024;
const BYTES_PER_TOKEN: usize = 4;
const MIN_HEX_BLOB_LEN: usize = 200;
static BASE64_URI_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"data:[a-zA-Z0-9/+\-\.]+;base64,[A-Za-z0-9+/=]+").unwrap());
static HEX_BLOB_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(&format!(r"[0-9a-fA-F]{{{},}}", MIN_HEX_BLOB_LEN)).unwrap());
pub fn sanitize_tool_result(result: &str, max_bytes: usize) -> String {
let mut out = BASE64_URI_RE
.replace_all(result, |caps: ®ex::Captures| {
let len = caps[0].len();
format!("[base64 data removed, {} bytes]", len)
})
.into_owned();
out = HEX_BLOB_RE
.replace_all(&out, |caps: ®ex::Captures| {
let len = caps[0].len();
format!("[hex data removed, {} chars]", len)
})
.into_owned();
if out.len() > max_bytes {
let total = out.len();
out.truncate(max_bytes);
while !out.is_char_boundary(out.len()) {
out.pop();
}
out.push_str(&format!("\n...[truncated, {} total bytes]", total));
}
out
}
pub fn compute_tool_result_budget(
context_limit: usize,
current_usage_tokens: usize,
pending_result_count: usize,
) -> usize {
let remaining_tokens = context_limit.saturating_sub(current_usage_tokens);
let remaining_bytes = remaining_tokens * BYTES_PER_TOKEN;
let count = pending_result_count.max(1);
let per_result = remaining_bytes / count;
per_result.clamp(MIN_RESULT_BUDGET, DEFAULT_MAX_RESULT_BYTES)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_no_change_for_normal_text() {
let input = "Hello, world! This is a normal tool result.";
assert_eq!(sanitize_tool_result(input, DEFAULT_MAX_RESULT_BYTES), input);
}
#[test]
fn test_strips_base64_data_uri() {
let b64 = "A".repeat(500);
let input = format!("before data:image/png;base64,{} after", b64);
let result = sanitize_tool_result(&input, DEFAULT_MAX_RESULT_BYTES);
assert!(!result.contains(&b64));
assert!(result.contains("[base64 data removed,"));
assert!(result.contains("before"));
assert!(result.contains("after"));
}
#[test]
fn test_strips_hex_blob() {
let hex = "a1b2c3d4e5f6".repeat(40); let input = format!("prefix {} suffix", hex);
let result = sanitize_tool_result(&input, DEFAULT_MAX_RESULT_BYTES);
assert!(!result.contains(&hex));
assert!(result.contains("[hex data removed,"));
assert!(result.contains("prefix"));
assert!(result.contains("suffix"));
}
#[test]
fn test_short_hex_not_stripped() {
let hex = "abcdef1234"; let input = format!("hash: {}", hex);
let result = sanitize_tool_result(&input, DEFAULT_MAX_RESULT_BYTES);
assert!(result.contains(hex));
}
#[test]
fn test_truncation() {
let input = "x".repeat(1000);
let result = sanitize_tool_result(&input, 100);
assert!(result.len() < 200); assert!(result.contains("[truncated, 1000 total bytes]"));
}
#[test]
fn test_empty_input() {
assert_eq!(sanitize_tool_result("", DEFAULT_MAX_RESULT_BYTES), "");
}
#[test]
fn test_multiple_base64_uris() {
let b64 = "Q".repeat(100);
let input = format!(
"img1: data:image/png;base64,{} and img2: data:application/pdf;base64,{}",
b64, b64
);
let result = sanitize_tool_result(&input, DEFAULT_MAX_RESULT_BYTES);
assert!(!result.contains(&b64));
assert_eq!(result.matches("[base64 data removed,").count(), 2);
}
#[test]
fn test_compute_budget_plenty_of_space() {
let budget = compute_tool_result_budget(100_000, 10_000, 1);
assert_eq!(budget, DEFAULT_MAX_RESULT_BYTES);
}
#[test]
fn test_compute_budget_tight_space() {
let budget = compute_tool_result_budget(100_000, 99_000, 1);
assert_eq!(budget, 4000);
assert!(budget > MIN_RESULT_BUDGET);
assert!(budget < DEFAULT_MAX_RESULT_BYTES);
}
#[test]
fn test_compute_budget_no_space() {
let budget = compute_tool_result_budget(100_000, 100_000, 1);
assert_eq!(budget, MIN_RESULT_BUDGET);
let budget = compute_tool_result_budget(100_000, 120_000, 1);
assert_eq!(budget, MIN_RESULT_BUDGET);
}
#[test]
fn test_compute_budget_multiple_results() {
let budget = compute_tool_result_budget(100_000, 90_000, 4);
assert_eq!(budget, 10_000);
}
#[test]
fn test_compute_budget_single_result() {
let budget = compute_tool_result_budget(100_000, 95_000, 1);
assert_eq!(budget, 20_000);
}
#[test]
fn test_compute_budget_zero_results() {
let budget = compute_tool_result_budget(100_000, 50_000, 0);
assert_eq!(budget, DEFAULT_MAX_RESULT_BYTES);
}
#[test]
fn test_compute_budget_never_below_minimum() {
let budget = compute_tool_result_budget(1000, 999, 10);
assert_eq!(budget, MIN_RESULT_BUDGET);
let budget = compute_tool_result_budget(1000, 1000, 100);
assert_eq!(budget, MIN_RESULT_BUDGET);
}
}