pub fn safe_truncate(s: &str, max_bytes: usize) -> &str {
if s.len() <= max_bytes {
return s;
}
let mut last_valid_end = 0;
for (i, c) in s.char_indices() {
let char_end = i + c.len_utf8();
if char_end <= max_bytes {
last_valid_end = char_end;
} else {
break;
}
}
&s[..last_valid_end]
}
pub fn find_char_boundary(s: &str, target_pos: usize) -> usize {
if target_pos >= s.len() {
return s.len();
}
if target_pos == 0 {
return 0;
}
s.char_indices()
.take_while(|(i, _)| *i <= target_pos)
.last()
.map(|(i, _)| i)
.unwrap_or(0)
}
pub fn extract_snippet_safe(
content: &str,
match_pos: usize,
match_len: usize,
context_before: usize,
context_after: usize,
) -> (String, bool, bool) {
let byte_start = match_pos.saturating_sub(context_before);
let byte_end = (match_pos + match_len + context_after).min(content.len());
let start = if byte_start == 0 {
0
} else {
find_char_boundary(content, byte_start)
};
let end = if byte_end >= content.len() {
content.len()
} else {
content.char_indices()
.find(|(i, _)| *i > byte_end)
.map(|(i, _)| i)
.unwrap_or(content.len())
};
let snippet = content[start..end].to_string();
let needs_prefix = start > 0;
let needs_suffix = end < content.len();
(snippet, needs_prefix, needs_suffix)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_safe_truncate_ascii() {
let text = "Hello, World!";
assert_eq!(safe_truncate(text, 5), "Hello");
assert_eq!(safe_truncate(text, 100), "Hello, World!");
assert_eq!(safe_truncate(text, 0), "");
}
#[test]
fn test_safe_truncate_unicode() {
let text = "Hello 世界";
assert_eq!(safe_truncate(text, 5), "Hello");
assert_eq!(safe_truncate(text, 6), "Hello ");
assert_eq!(safe_truncate(text, 7), "Hello "); assert_eq!(safe_truncate(text, 8), "Hello "); assert_eq!(safe_truncate(text, 9), "Hello 世");
}
#[test]
fn test_safe_truncate_emoji() {
let text = "Hi 👋 there";
assert_eq!(safe_truncate(text, 2), "Hi");
assert_eq!(safe_truncate(text, 3), "Hi ");
assert_eq!(safe_truncate(text, 4), "Hi "); assert_eq!(safe_truncate(text, 7), "Hi 👋");
}
#[test]
fn test_find_char_boundary() {
let text = "Hello 世界";
assert_eq!(find_char_boundary(text, 0), 0);
assert_eq!(find_char_boundary(text, 5), 5);
assert_eq!(find_char_boundary(text, 7), 6); assert_eq!(find_char_boundary(text, 100), text.len());
}
#[test]
fn test_extract_snippet_safe() {
let content = "The quick brown fox jumps over the lazy dog";
let (snippet, prefix, suffix) = extract_snippet_safe(content, 10, 5, 6, 5);
assert_eq!(snippet, "quick brown fox j");
assert!(prefix);
assert!(suffix);
}
#[test]
fn test_extract_snippet_safe_unicode() {
let content = "Hello 世界, this is a test";
let (snippet, prefix, suffix) = extract_snippet_safe(content, 6, 6, 3, 3);
assert!(snippet.contains("世界"));
assert!(prefix);
assert!(suffix);
}
}