Skip to main content

hematite/agent/
truncation.rs

1//! Grounded Output Truncation Module.
2//! Ports the "Middle-Truncation" patterns from Codex-RS to ensure
3//! Hematite preserves exit codes and headers while providing line metadata.
4pub fn formatted_truncate(content: &str, max_bytes: usize) -> String {
5    if content.len() <= max_bytes {
6        return content.to_string();
7    }
8
9    let total_lines = content.lines().count();
10    let truncated = truncate_middle(content, max_bytes);
11
12    format!(
13        "[TRUNCATED: total lines: {}]\n{}\n[... middle truncated to fit budget ...]\n{}",
14        total_lines, truncated.head, truncated.tail
15    )
16}
17
18pub struct TruncatedOutput {
19    pub head: String,
20    pub tail: String,
21}
22
23/// Truncate a string by keeping the beginning and end, removing the middle.
24/// Ensures UTF-8 safety by finding valid character boundaries.
25pub fn truncate_middle(content: &str, max_bytes: usize) -> TruncatedOutput {
26    if content.len() <= max_bytes {
27        return TruncatedOutput {
28            head: content.to_string(),
29            tail: String::new(),
30        };
31    }
32
33    // Keep 40% at the start, 40% at the end (roughly).
34    let head_size = (max_bytes as f32 * 0.4) as usize;
35    let tail_size = (max_bytes as f32 * 0.4) as usize;
36
37    // Find valid UTF-8 boundaries
38    let head_boundary = find_valid_boundary_forward(content, head_size);
39    let tail_boundary = find_valid_boundary_backward(content, content.len() - tail_size);
40
41    TruncatedOutput {
42        head: content[..head_boundary].to_string(),
43        tail: content[tail_boundary..].to_string(),
44    }
45}
46
47fn find_valid_boundary_forward(content: &str, target: usize) -> usize {
48    let mut pos = target;
49    while pos > 0 && !content.is_char_boundary(pos) {
50        pos -= 1;
51    }
52    pos
53}
54
55fn find_valid_boundary_backward(content: &str, target: usize) -> usize {
56    let mut pos = target;
57    while pos < content.len() && !content.is_char_boundary(pos) {
58        pos += 1;
59    }
60    pos
61}
62
63#[cfg(test)]
64mod tests {
65    use super::*;
66
67    #[test]
68    fn test_middle_truncation() {
69        let input = "1234567890";
70        let result = truncate_middle(input, 4);
71        // 4 bytes budget -> 40% is 1.6 bytes -> 1 byte head, 1 byte tail
72        assert_eq!(result.head, "1");
73        assert_eq!(result.tail, "0");
74    }
75
76    #[test]
77    fn test_utf8_boundary_safety() {
78        let input = "🦀🦀🦀🦀🦀"; // 每个螃蟹 4 字节, 总共 20 字节
79        let result = truncate_middle(input, 10);
80        // 10 bytes budget -> 4 byte head, 4 byte tail
81        assert_eq!(result.head, "🦀");
82        assert_eq!(result.tail, "🦀");
83    }
84}