Skip to main content

deepstrike_core/context/
text.rs

1//! UTF-8-safe text truncation for context render and compression.
2//!
3//! All byte-index cuts must land on `char` boundaries — slicing mid-scalar
4//! panics in debug builds and produces invalid strings in release.
5
6/// Return the longest prefix of `text` with at most `max_bytes` UTF-8 bytes,
7/// never splitting a scalar value.
8pub fn truncate_bytes_at_char_boundary(text: &str, max_bytes: usize) -> &str {
9    if text.len() <= max_bytes {
10        return text;
11    }
12    let mut end = max_bytes;
13    while end > 0 && !text.is_char_boundary(end) {
14        end -= 1;
15    }
16    &text[..end]
17}
18
19/// Truncate `text` to at most `max_bytes` bytes on a char boundary and append `suffix`.
20pub fn truncate_with_suffix(text: &str, max_bytes: usize, suffix: &str) -> String {
21    let prefix = truncate_bytes_at_char_boundary(text, max_bytes);
22    format!("{prefix}{suffix}")
23}
24
25/// Proportional byte budget for render-time truncation: keep `remaining` of `total`
26/// estimated tokens from a message whose content is `text` bytes long.
27pub fn proportional_byte_keep(text: &str, total_tokens: u32, remaining: u32) -> usize {
28    if total_tokens == 0 || remaining == 0 {
29        return 0;
30    }
31    let keep = (text.len() * remaining as usize / total_tokens as usize).max(1);
32    keep.min(text.len())
33}
34
35#[cfg(test)]
36mod tests {
37    use super::*;
38
39    #[test]
40    fn truncate_respects_char_boundary_for_cjk() {
41        // "你好世界" = 12 bytes; cut at byte 5 would split 好 (3-byte char).
42        let text = "你好世界";
43        assert_eq!(truncate_bytes_at_char_boundary(text, 5), "你");
44        assert_eq!(truncate_bytes_at_char_boundary(text, 12), text);
45    }
46
47    #[test]
48    fn truncate_with_suffix_on_cjk() {
49        let text = "你好世界";
50        let out = truncate_with_suffix(text, 5, "…");
51        assert_eq!(out, "你…");
52    }
53
54    #[test]
55    fn proportional_keep_never_exceeds_len() {
56        let text = "你好";
57        assert_eq!(proportional_byte_keep(text, 10, 10), 6);
58        assert!(proportional_byte_keep(text, 10, 3) <= text.len());
59    }
60}