Skip to main content

hematite/agent/
truncation.rs

1/// Grounded Output Truncation Module.
2/// Ports the "Middle-Truncation" patterns from Codex-RS to ensure
3/// Hematite preserves exit codes and headers while providing line metadata.
4
5pub fn formatted_truncate(content: &str, max_bytes: usize) -> String {
6    if content.len() <= max_bytes {
7        return content.to_string();
8    }
9
10    let total_lines = content.lines().count();
11    let truncated = truncate_middle(content, max_bytes);
12
13    format!(
14        "[TRUNCATED: total lines: {}]\n{}\n[... middle truncated to fit budget ...]\n{}",
15        total_lines, truncated.head, truncated.tail
16    )
17}
18
19pub struct TruncatedOutput {
20    pub head: String,
21    pub tail: String,
22}
23
24/// Truncate a string by keeping the beginning and end, removing the middle.
25/// Ensures UTF-8 safety by finding valid character boundaries.
26pub fn truncate_middle(content: &str, max_bytes: usize) -> TruncatedOutput {
27    if content.len() <= max_bytes {
28        return TruncatedOutput {
29            head: content.to_string(),
30            tail: String::new(),
31        };
32    }
33
34    // Keep 40% at the start, 40% at the end (roughly).
35    let head_size = (max_bytes as f32 * 0.4) as usize;
36    let tail_size = (max_bytes as f32 * 0.4) as usize;
37
38    // Find valid UTF-8 boundaries
39    let head_boundary = find_valid_boundary_forward(content, head_size);
40    let tail_boundary = find_valid_boundary_backward(content, content.len() - tail_size);
41
42    TruncatedOutput {
43        head: content[..head_boundary].to_string(),
44        tail: content[tail_boundary..].to_string(),
45    }
46}
47
48fn find_valid_boundary_forward(content: &str, target: usize) -> usize {
49    let mut pos = target;
50    while pos > 0 && !content.is_char_boundary(pos) {
51        pos -= 1;
52    }
53    pos
54}
55
56fn find_valid_boundary_backward(content: &str, target: usize) -> usize {
57    let mut pos = target;
58    while pos < content.len() && !content.is_char_boundary(pos) {
59        pos += 1;
60    }
61    pos
62}
63
64#[cfg(test)]
65mod tests {
66    use super::*;
67
68    #[test]
69    fn test_middle_truncation() {
70        let input = "1234567890";
71        let result = truncate_middle(input, 4);
72        // 4 bytes budget -> 40% is 1.6 bytes -> 1 byte head, 1 byte tail
73        assert_eq!(result.head, "1");
74        assert_eq!(result.tail, "0");
75    }
76
77    #[test]
78    fn test_utf8_boundary_safety() {
79        let input = "🦀🦀🦀🦀🦀"; // 每个螃蟹 4 字节, 总共 20 字节
80        let result = truncate_middle(input, 10);
81        // 10 bytes budget -> 4 byte head, 4 byte tail
82        assert_eq!(result.head, "🦀");
83        assert_eq!(result.tail, "🦀");
84    }
85}