tool-output-truncate 0.1.0

Truncate tool output (file reads, command runs, search hits) before adding to LLM message history. Char-aware head/middle/tail strategies with a configurable elision marker. Zero deps.
Documentation
//! Character-aware truncation.
//!
//! All boundaries are computed on UTF-8 codepoint indices so we never
//! split a multi-byte char. Char counting is O(n) but the input is bounded
//! by tool-output size; in practice this is a few ms at worst.

/// Build the elision marker shown in place of removed text.
fn elision(omitted: usize) -> String {
    format!("\n\n[{omitted} chars truncated]\n\n")
}

/// Count characters (codepoints), not bytes.
fn char_len(s: &str) -> usize {
    s.chars().count()
}

/// Byte index in `s` corresponding to the start of the `n`th char from the
/// front. `n` must be <= char_len(s); for n == char_len(s) returns s.len().
fn nth_char_boundary_from_front(s: &str, n: usize) -> usize {
    if n == 0 {
        return 0;
    }
    s.char_indices().nth(n).map(|(i, _)| i).unwrap_or(s.len())
}

/// Byte index in `s` corresponding to the start of the `n`th char from the
/// back. `n` must be <= char_len(s); for n == 0 returns s.len().
fn nth_char_boundary_from_back(s: &str, n: usize) -> usize {
    if n == 0 {
        return s.len();
    }
    let total = char_len(s);
    if n >= total {
        return 0;
    }
    nth_char_boundary_from_front(s, total - n)
}

/// Keep up to `max_chars` characters from the start. Everything after is
/// replaced with an elision marker indicating the count of omitted chars.
///
/// Returns the input unchanged if it already fits.
pub fn truncate_head(s: &str, max_chars: usize) -> String {
    let total = char_len(s);
    if total <= max_chars {
        return s.to_string();
    }
    let cut = nth_char_boundary_from_front(s, max_chars);
    let omitted = total - max_chars;
    format!("{}{}", &s[..cut], elision(omitted))
}

/// Keep up to `max_chars` characters from the end. Everything before is
/// replaced with an elision marker indicating the count of omitted chars.
///
/// Returns the input unchanged if it already fits.
pub fn truncate_tail(s: &str, max_chars: usize) -> String {
    let total = char_len(s);
    if total <= max_chars {
        return s.to_string();
    }
    let cut = nth_char_boundary_from_back(s, max_chars);
    let omitted = total - max_chars;
    format!("{}{}", elision(omitted), &s[cut..])
}

/// Keep up to `max_chars` characters by taking half from the start and
/// half from the end, replacing the middle with an elision marker.
///
/// `max_chars` is the budget for retained text only (the marker itself
/// adds ~30 chars of overhead). Returns the input unchanged if it already
/// fits.
pub fn truncate_middle(s: &str, max_chars: usize) -> String {
    let total = char_len(s);
    if total <= max_chars {
        return s.to_string();
    }
    let head = max_chars / 2;
    let tail = max_chars - head;
    let head_cut = nth_char_boundary_from_front(s, head);
    let tail_cut = nth_char_boundary_from_back(s, tail);
    let omitted = total - head - tail;
    format!("{}{}{}", &s[..head_cut], elision(omitted), &s[tail_cut..])
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn passthrough_when_under_cap() {
        assert_eq!(truncate_head("hello", 100), "hello");
        assert_eq!(truncate_tail("hello", 100), "hello");
        assert_eq!(truncate_middle("hello", 100), "hello");
    }

    #[test]
    fn passthrough_when_exactly_at_cap() {
        assert_eq!(truncate_head("hello", 5), "hello");
        assert_eq!(truncate_tail("hello", 5), "hello");
        assert_eq!(truncate_middle("hello", 5), "hello");
    }

    #[test]
    fn truncate_head_keeps_prefix() {
        let s = "abcdefghij";
        let out = truncate_head(s, 4);
        assert!(out.starts_with("abcd"));
        assert!(out.contains("6 chars truncated"));
    }

    #[test]
    fn truncate_tail_keeps_suffix() {
        let s = "abcdefghij";
        let out = truncate_tail(s, 4);
        assert!(out.ends_with("ghij"));
        assert!(out.contains("6 chars truncated"));
    }

    #[test]
    fn truncate_middle_keeps_both_ends() {
        let s = "abcdefghij";
        let out = truncate_middle(s, 4);
        assert!(out.starts_with("ab"));
        assert!(out.ends_with("ij"));
        assert!(out.contains("6 chars truncated"));
    }

    #[test]
    fn handles_multibyte_chars_safely() {
        let s = "\u{1f980}\u{1f980}\u{1f980}\u{1f980}\u{1f980}\u{1f980}\u{1f980}\u{1f980}";
        // 8 crabs, each 4 bytes -> 32 bytes total but 8 chars.
        let out = truncate_head(s, 3);
        assert!(out.starts_with("\u{1f980}\u{1f980}\u{1f980}"));
        assert!(out.contains("5 chars truncated"));
        // it's valid UTF-8
        assert!(out.is_char_boundary(out.len()));
    }

    #[test]
    fn middle_with_odd_budget() {
        // max=5 -> head=2, tail=3
        let out = truncate_middle("0123456789", 5);
        assert!(out.starts_with("01"));
        assert!(out.ends_with("789"));
    }

    #[test]
    fn empty_input_passthrough() {
        assert_eq!(truncate_head("", 10), "");
        assert_eq!(truncate_tail("", 10), "");
        assert_eq!(truncate_middle("", 10), "");
    }

    #[test]
    fn zero_max_chars_keeps_nothing() {
        let out = truncate_head("hello world", 0);
        assert!(!out.contains("hello"));
        assert!(out.contains("11 chars truncated"));
    }

    #[test]
    fn omitted_count_is_accurate() {
        // 1000 chars total, keep 100 -> 900 omitted
        let s = "x".repeat(1000);
        let out = truncate_head(&s, 100);
        assert!(out.contains("900 chars truncated"));
    }
}