use super::ToolOutputCompressionConfig;
use regex_lite::Regex;
use std::sync::LazyLock;
#[derive(Debug)]
pub(super) struct SafeFilterResult {
pub(super) output: String,
pub(super) redacted: bool,
pub(super) capped: bool,
pub(super) binary_suppressed: bool,
pub(super) strategies: Vec<String>,
}
pub(super) fn apply_safe_filters(
raw_output: &str,
config: &ToolOutputCompressionConfig,
) -> SafeFilterResult {
let mut output = raw_output.to_string();
let mut redacted = false;
let mut capped = false;
let mut binary_suppressed = false;
let mut strategies = Vec::new();
let stripped = strip_ansi(&output);
if stripped != output {
output = stripped;
strategies.push("strip_ansi".to_string());
}
if config.redact_secrets {
let redacted_output = redact_secrets(&output);
if redacted_output != output {
output = redacted_output;
redacted = true;
strategies.push("redact_secrets".to_string());
}
}
if looks_binary(&output) {
let bytes = output.len();
output = format!("[Binary output suppressed: {bytes} bytes]");
capped = true;
binary_suppressed = true;
strategies.push("binary_suppression".to_string());
return SafeFilterResult {
output,
redacted,
capped,
binary_suppressed,
strategies,
};
}
let cap_outcome = cap_output(&output, config.max_output_bytes);
if cap_outcome.output != output {
output = cap_outcome.output;
capped = true;
strategies.push("cap_oversized".to_string());
if cap_outcome.error_window {
strategies.push("cap_error_window".to_string());
}
}
SafeFilterResult {
output,
redacted,
capped,
binary_suppressed,
strategies,
}
}
static ANSI_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"\x1b\[[0-9;?]*[ -/]*[@-~]"#).expect("valid ansi regex"));
pub(crate) fn redact_secrets(output: &str) -> String {
crate::secret_redaction::redact_text_best_effort(output)
}
fn looks_binary(output: &str) -> bool {
if output.contains('\0') {
return true;
}
let control = output
.chars()
.filter(|ch| ch.is_control() && !matches!(ch, '\n' | '\r' | '\t' | '\x1b'))
.count();
let total = output.chars().count().max(1);
total > 32 && control * 100 / total > 5
}
fn strip_ansi(output: &str) -> String {
ANSI_RE.replace_all(output, "").to_string()
}
const ERROR_SIGNAL_SCAN_LIMIT: usize = 64;
const ERROR_AWARE_HEAD_PCT: usize = 45;
const ERROR_AWARE_WINDOW_PCT: usize = 20;
struct CapOutcome {
output: String,
error_window: bool,
}
fn cap_output(output: &str, max_bytes: usize) -> CapOutcome {
if max_bytes == 0 || output.len() <= max_bytes {
return CapOutcome {
output: output.to_string(),
error_window: false,
};
}
let head_limit = max_bytes.saturating_mul(3) / 5;
let tail_limit = max_bytes.saturating_sub(head_limit);
let tail_start = output.len().saturating_sub(tail_limit);
if let Some(signal_start) = first_uncovered_error_signal(output, head_limit, tail_start) {
if let Some(windowed) = cap_output_error_aware(output, max_bytes, signal_start) {
return CapOutcome {
output: windowed,
error_window: true,
};
}
}
let head = take_bytes_on_char_boundary(output, head_limit);
let tail = take_last_bytes_on_char_boundary(output, tail_limit);
let removed = output.len().saturating_sub(head.len() + tail.len());
CapOutcome {
output: format!("{head}\n[Tool output capped: {removed} bytes removed]\n{tail}"),
error_window: false,
}
}
fn cap_output_error_aware(output: &str, max_bytes: usize, signal_start: usize) -> Option<String> {
let head_limit = max_bytes.saturating_mul(ERROR_AWARE_HEAD_PCT) / 100;
let window_limit = max_bytes.saturating_mul(ERROR_AWARE_WINDOW_PCT) / 100;
let tail_limit = max_bytes.saturating_sub(head_limit.saturating_add(window_limit));
if window_limit == 0 {
return None;
}
let head = take_bytes_on_char_boundary(output, head_limit);
let tail = take_last_bytes_on_char_boundary(output, tail_limit);
let tail_start = output.len() - tail.len();
let window_start = signal_start.max(head.len()).min(tail_start);
let window = take_bytes_on_char_boundary(&output[window_start..tail_start], window_limit);
if window.is_empty() {
return None;
}
let window_end = window_start + window.len();
let head_gap = window_start - head.len();
let tail_gap = tail_start - window_end;
let mut parts = vec![head];
if head_gap > 0 {
parts.push(format!("[Tool output capped: {head_gap} bytes removed]"));
}
parts.push(window.to_string());
if tail_gap > 0 {
parts.push(format!("[Tool output capped: {tail_gap} bytes removed]"));
}
parts.push(tail);
Some(parts.join("\n"))
}
fn first_uncovered_error_signal(
output: &str,
head_limit: usize,
tail_start: usize,
) -> Option<usize> {
let mut signals_seen = 0usize;
let mut offset = 0usize;
for line in output.split_inclusive('\n') {
let start = offset;
offset += line.len();
if signals_seen >= ERROR_SIGNAL_SCAN_LIMIT {
return None;
}
let content = line.strip_suffix('\n').unwrap_or(line);
if !is_error_signal_line(content) {
continue;
}
signals_seen += 1;
let covered_by_head = start + content.len() <= head_limit;
let covered_by_tail = start >= tail_start;
if !covered_by_head && !covered_by_tail {
return Some(start);
}
}
None
}
fn is_error_signal_line(line: &str) -> bool {
let trimmed = line.trim_start();
has_ascii_ci_prefix(trimmed, "error")
|| has_ascii_ci_prefix(trimmed, "fatal:")
|| has_ascii_ci_prefix(trimmed, "panic")
|| line.contains("error:")
|| line.contains("panicked at")
|| line.contains("Traceback (most recent call last)")
|| line.contains("Exception")
|| line.contains("FAILED")
|| line.contains("assertion")
}
fn has_ascii_ci_prefix(value: &str, prefix: &str) -> bool {
value.len() >= prefix.len()
&& value.as_bytes()[..prefix.len()].eq_ignore_ascii_case(prefix.as_bytes())
}
fn take_bytes_on_char_boundary(value: &str, limit: usize) -> String {
let mut end = 0;
for (idx, ch) in value.char_indices() {
let next = idx + ch.len_utf8();
if next > limit {
break;
}
end = next;
}
value[..end].to_string()
}
fn take_last_bytes_on_char_boundary(value: &str, limit: usize) -> String {
if value.len() <= limit {
return value.to_string();
}
let target = value.len().saturating_sub(limit);
let mut start = value.len();
for (idx, _) in value.char_indices() {
if idx >= target {
start = idx;
break;
}
}
value[start..].to_string()
}