search-semantically 0.1.10

Embeddable semantic code search with multi-signal POEM ranking
Documentation
pub fn truncate_with_ellipsis(s: &str, max_bytes: usize) -> String {
    if s.len() <= max_bytes {
        return s.to_string();
    }
    if max_bytes <= 3 {
        let mut i = max_bytes;
        while i > 0 && !s.is_char_boundary(i) {
            i -= 1;
        }
        return s[..i].to_string();
    }
    let target = max_bytes - 3;
    let mut i = target;
    while i > 0 && !s.is_char_boundary(i) {
        i -= 1;
    }
    format!("{}...", &s[..i])
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn short_string_unchanged() {
        assert_eq!(truncate_with_ellipsis("hello", 10), "hello");
    }

    #[test]
    fn exact_length_unchanged() {
        assert_eq!(truncate_with_ellipsis("hello", 5), "hello");
    }

    #[test]
    fn long_ascii_truncated() {
        let s = "a".repeat(200);
        let result = truncate_with_ellipsis(&s, 120);
        assert_eq!(result.len(), 120);
        assert!(result.ends_with("..."));
    }

    #[test]
    fn multibyte_at_cut_is_valid_utf8() {
        // U+2019 RIGHT SINGLE QUOTATION MARK is 3 bytes: 0xE2 0x80 0x99
        let prefix = "a".repeat(115);
        let s = format!("{prefix}\u{2019}trailing text to make it long enough");
        let result = truncate_with_ellipsis(&s, 120);
        assert!(std::str::from_utf8(result.as_bytes()).is_ok());
        assert!(result.len() <= 120);
        assert!(result.ends_with("..."));
    }

    #[test]
    fn emoji_at_cut_is_valid_utf8() {
        // U+1F600 is 4 bytes: F0 9F 98 80. Place it so target=117 lands inside.
        // bytes 0..=115 = 'b', bytes 116..=119 = emoji. target=117 is a continuation
        // byte; loop must walk back to 116 (boundary at start of emoji).
        let prefix = "b".repeat(116);
        let s = format!("{prefix}\u{1F600}more text here");
        let result = truncate_with_ellipsis(&s, 120);
        assert_eq!(result, format!("{}...", "b".repeat(116)));
    }

    #[test]
    fn max_bytes_less_than_or_equal_3_no_ellipsis() {
        let s = "hello world";
        let result = truncate_with_ellipsis(s, 2);
        assert!(result.len() <= 2);
        assert!(std::str::from_utf8(result.as_bytes()).is_ok());
    }

    #[test]
    fn max_bytes_le_3_with_multibyte_walks_back_to_zero() {
        // max_bytes=2 with a 3-byte leading char: byte 2 is a continuation byte,
        // loop walks back to 0, returning "".
        let result = truncate_with_ellipsis("\u{2019}hello", 2);
        assert_eq!(result, "");
    }

    #[test]
    fn max_bytes_zero() {
        let result = truncate_with_ellipsis("hello", 0);
        assert_eq!(result, "");
    }
}