Skip to main content

hematite/agent/
truncation.rs

1//! Grounded Output Truncation Module.
2//! Ports the "Middle-Truncation" patterns from Codex-RS to ensure
3//! Hematite preserves exit codes and headers while providing line metadata.
4pub fn formatted_truncate(content: &str, max_bytes: usize) -> String {
5    if content.len() <= max_bytes {
6        return content.to_string();
7    }
8
9    let total_lines = content.lines().count();
10    let truncated = truncate_middle(content, max_bytes);
11
12    format!(
13        "[TRUNCATED: total lines: {}]\n{}\n[... middle truncated to fit budget ...]\n{}",
14        total_lines, truncated.head, truncated.tail
15    )
16}
17
18pub struct TruncatedOutput {
19    pub head: String,
20    pub tail: String,
21}
22
23/// Truncate a string by keeping the beginning and end, removing the middle.
24/// Ensures UTF-8 safety by finding valid character boundaries.
25pub fn truncate_middle(content: &str, max_bytes: usize) -> TruncatedOutput {
26    if content.len() <= max_bytes {
27        return TruncatedOutput {
28            head: content.to_string(),
29            tail: String::new(),
30        };
31    }
32
33    // Keep 40% at the start, 40% at the end (roughly).
34    let head_size = (max_bytes as f32 * 0.4) as usize;
35    let tail_size = (max_bytes as f32 * 0.4) as usize;
36
37    // Find valid UTF-8 boundaries
38    let head_boundary = find_valid_boundary_forward(content, head_size);
39    let tail_boundary = find_valid_boundary_backward(content, content.len() - tail_size);
40
41    TruncatedOutput {
42        head: content[..head_boundary].to_string(),
43        tail: content[tail_boundary..].to_string(),
44    }
45}
46
47/// Returns the longest prefix of `s` that is at most `max_bytes` long and ends on a UTF-8 boundary.
48pub fn safe_head(s: &str, max_bytes: usize) -> &str {
49    let end = find_valid_boundary_forward(s, max_bytes.min(s.len()));
50    &s[..end]
51}
52
53/// Returns a suffix of `s` containing at most `max_bytes` bytes, starting on a UTF-8 boundary.
54pub fn safe_tail(s: &str, max_bytes: usize) -> &str {
55    if s.len() <= max_bytes {
56        return s;
57    }
58    let offset = s.len() - max_bytes;
59    // Walk forward from offset to find the nearest char boundary.
60    let start = (offset..=s.len())
61        .find(|&i| s.is_char_boundary(i))
62        .unwrap_or(s.len());
63    &s[start..]
64}
65
66fn find_valid_boundary_forward(content: &str, target: usize) -> usize {
67    let mut pos = target;
68    while pos > 0 && !content.is_char_boundary(pos) {
69        pos -= 1;
70    }
71    pos
72}
73
74fn find_valid_boundary_backward(content: &str, target: usize) -> usize {
75    let mut pos = target;
76    while pos < content.len() && !content.is_char_boundary(pos) {
77        pos += 1;
78    }
79    pos
80}
81
82#[cfg(test)]
83mod tests {
84    use super::*;
85
86    #[test]
87    fn test_middle_truncation() {
88        let input = "1234567890";
89        let result = truncate_middle(input, 4);
90        // 4 bytes budget -> 40% is 1.6 bytes -> 1 byte head, 1 byte tail
91        assert_eq!(result.head, "1");
92        assert_eq!(result.tail, "0");
93    }
94
95    #[test]
96    fn test_utf8_boundary_safety() {
97        let input = "πŸ¦€πŸ¦€πŸ¦€πŸ¦€πŸ¦€"; // 每δΈͺθžƒθŸΉ 4 ε­—θŠ‚, ζ€»ε…± 20 ε­—θŠ‚
98        let result = truncate_middle(input, 10);
99        // 10 bytes budget -> 4 byte head, 4 byte tail
100        assert_eq!(result.head, "πŸ¦€");
101        assert_eq!(result.tail, "πŸ¦€");
102    }
103}