char_fns 0.1.1

unicode character-indexed string manipulation
Documentation
//! This crate provides the `CharFns` trait which is implemented
//! for `&str`s and provides methods for unicode character-indexed
//! string manipulation.

/// Provides methods for unicode character-indexed string
/// manipulation.
pub trait CharFns {

    /// Returns the number of unicode characters in the string.
    fn char_len(&self) -> usize;

    /// Splits the string at the unicode character index.
    fn char_split(&self, index: usize) -> (&str, &str);

    /// Replaces a range of unicode characters with a new substring.
    fn char_replace(&self, index: usize, len: usize, text: &str) -> String;
}

impl CharFns for str {
    fn char_len(&self) -> usize {
        self.chars().count()
    }

    fn char_split(&self, index: usize) -> (&str, &str) {
        self.split_at(byte_index(self, index))
    }

    fn char_replace(&self, index: usize, len: usize, text: &str) -> String {
        let (pre, remaining) = self.char_split(index);
        let (_, post) = remaining.char_split(len);
        format!("{}{}{}", pre, text, post)
    }
}

#[inline]
fn byte_index(string: &str, char_index: usize) -> usize {
    let mut bidx = 0;
    let mut cidx = 0;

    for b in string.bytes() {
        // This is bit magic equivalent to: b < 128 || b >= 192.
        // It checks whether the current byte index is a character
        // boundary.
        if (b as i8) >= -0x40 {
            if cidx == char_index { break }
            cidx += 1;
        }
        bidx += 1
    }
    bidx
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_len() {
        assert!("".char_len() == 0);
        assert!("hello".char_len() == 5);
        assert!("๐Ÿ˜๐Ÿ˜€".char_len() == 2);
    }

    #[test]
    fn test_split() {
        assert!("".char_split(0) == ("", ""));
        assert!("hello".char_split(2) == ("he", "llo"));
        assert!("๐Ÿค—๐Ÿ‹๐Ÿ รฑXwowโ‚ฎ".char_split(5) == ("๐Ÿค—๐Ÿ‹๐Ÿ รฑX","wowโ‚ฎ"));
    }

    #[test]
    fn test_replace_empty() {
        assert!("".char_replace(0, 0, "") == "");
        assert!("".char_replace(0, 0, "โ‚ขโ‚ธ") == "โ‚ขโ‚ธ");
    }

    #[test]
    fn test_replace_delete_only() {
        assert!("heโˆ…โŠ†โŠ‡o".char_replace(0, 3, "") == "โŠ†โŠ‡o");
        assert!("heโˆ…โŠ†โŠ‡o".char_replace(2, 2, "") == "heโŠ‡o");
        assert!("heโˆ…โŠ†โŠ‡o".char_replace(4, 2, "") == "heโˆ…โŠ†");
    }

    #[test]
    fn test_replace_insert_only() {
        assert!("๐Ÿค—๐Ÿ‹๐Ÿ รฑXwowโ‚ฎ".char_replace(0, 0, "โŠ†โŠ‡o") == "โŠ†โŠ‡o๐Ÿค—๐Ÿ‹๐Ÿ รฑXwowโ‚ฎ");
        assert!("๐Ÿค—๐Ÿ‹๐Ÿ รฑXwowโ‚ฎ".char_replace(1, 0, "โŠ†โŠ‡o") == "๐Ÿค—โŠ†โŠ‡o๐Ÿ‹๐Ÿ รฑXwowโ‚ฎ");
        assert!("๐Ÿค—๐Ÿ‹๐Ÿ รฑXwowโ‚ฎ".char_replace(9, 0, "โŠ†โŠ‡o") == "๐Ÿค—๐Ÿ‹๐Ÿ รฑXwowโ‚ฎโŠ†โŠ‡o");
    }

    #[test]
    fn test_replace_delete_and_insert() {
        assert!("๐Ÿค—๐Ÿ‹๐Ÿ รฑXwowโ‚ฎ".char_replace(0, 2, "โŠ†โŠ‡o") == "โŠ†โŠ‡o๐Ÿ รฑXwowโ‚ฎ");
        assert!("๐Ÿค—๐Ÿ‹๐Ÿ รฑXwowโ‚ฎ".char_replace(4, 4, "โŠ†โŠ‡o") == "๐Ÿค—๐Ÿ‹๐Ÿ รฑโŠ†โŠ‡oโ‚ฎ");
        assert!("๐Ÿค—๐Ÿ‹๐Ÿ รฑXwowโ‚ฎ".char_replace(1, 8, "โŠ†โŠ‡o") == "๐Ÿค—โŠ†โŠ‡o");
    }
}