sedregex 0.2.5

Sed-like regex library
Documentation
use crate::{cow_appender::CowAppender, str_ext::StrExt};
use std::borrow::Cow;

/// Splits a given string at a slash (`/`), skipping first `begin` chars while skipping escaled
/// slashes (`\/`).
///
/// If the slash was found, the function returns a part of string after the `begin` and before
/// the slash (slash not included) and the position of the slash.
///
/// If the slash was not found, the whole string starting from the `begin` is returned (with
/// escaped slashes being unescaped), and the position equals to the string length.
///
/// Please note that escape characters not followed by slashes are kept unchanged.
fn split_escape(string: &str, begin: usize) -> (Cow<str>, usize) {
    let mut buf = CowAppender::new(string, begin);
    let mut shift = begin;
    while shift < string.len() {
        if let Some(pos) = find_at(&string, shift, '/') {
            if let Some('\\') = string.previous(pos) {
                // Escaped slash found. Need to skip the character before it.
                buf.append(shift, pos - 1);
                buf.push(pos);
                shift = pos + 1;
            } else {
                // Unescaped slash found.
                buf.append(shift, pos);
                return (buf.into_inner(), pos);
            }
        } else {
            break;
        }
    }
    buf.append(shift, string.len());
    (buf.into_inner(), string.len())
}

/// Finds a character starting at a given `begin` position.
///
/// # Panics
///
/// Will panic if `begin` is out of bounds of the given string.
fn find_at(string: &str, begin: usize, c: char) -> Option<usize> {
    let (_, part) = string.split_at(begin);
    part.find(c).map(|pos| pos + begin)
}

/// An iterator over splitted parts of a string.
pub struct SlashSplitIter<'a> {
    original: &'a str,
    position: usize,
}

impl<'a> From<&'a str> for SlashSplitIter<'a> {
    fn from(string: &'a str) -> Self {
        SlashSplitIter {
            original: string,
            position: 0,
        }
    }
}

impl<'a> Iterator for SlashSplitIter<'a> {
    type Item = Cow<'a, str>;

    fn next(&mut self) -> Option<Self::Item> {
        if self.position >= self.original.len() {
            return None;
        }
        let (part, pos) = split_escape(self.original, self.position);
        self.position = match pos.checked_add(1) {
            Some(x) => x,
            None => self.original.len(),
        };
        Some(part)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn split_first_empty() {
        let data = "/something/";
        assert_eq!(("".into(), 0), split_escape(data, 0));
    }

    #[test]
    fn split_once() {
        let data = r"s/ahaha\/lol/wut/g";
        assert_eq!(("s".into(), 1), split_escape(data, 0));
        assert_eq!(("".into(), 1), split_escape(data, 1));
        assert_eq!(("ahaha/lol".into(), 12), split_escape(data, 2));
        assert_eq!(("haha/lol".into(), 12), split_escape(data, 3));
        assert_eq!(("".into(), 16), split_escape(data, 16));
        assert_eq!(("g".into(), 18), split_escape(data, 17));
    }

    #[test]
    fn split_once_long_char() {
        let data = r"s/ва大\/хех/wяt/g";
        assert_eq!(("s".into(), 1), split_escape(data, 0));
        assert_eq!(("".into(), 1), split_escape(data, 1));
        assert_eq!(("ва大/хех".into(), 17), split_escape(data, 2));
        assert_eq!(("wяt".into(), 22), split_escape(data, 18));
        assert_eq!(("".into(), 22), split_escape(data, 22));
        assert_eq!(("g".into(), 24), split_escape(data, 23));
    }

    #[test]
    fn split_once_last() {
        assert_eq!(("last/".into(), 6), split_escape(r"last\//", 0));
    }

    #[test]
    fn iter() {
        let data = r"a/fir\/st/second/th\/ird/fo\/rth/last";
        let mut iter = SlashSplitIter::from(data);
        assert_eq!(Some("a".into()), iter.next());
        assert_eq!(Some("fir/st".into()), iter.next());
        assert_eq!(Some("second".into()), iter.next());
        assert_eq!(Some("th/ird".into()), iter.next());
        assert_eq!(Some("fo/rth".into()), iter.next());
        assert_eq!(Some("last".into()), iter.next());
        assert_eq!(None, iter.next());
    }

    #[test]
    fn iter_long_chars() {
        let data = r"a/фя\/st/界/th\/ird/fo\/rth/last";
        let mut iter = SlashSplitIter::from(data);
        assert_eq!(Some("a".into()), iter.next());
        assert_eq!(Some("фя/st".into()), iter.next());
        assert_eq!(Some("".into()), iter.next());
        assert_eq!(Some("th/ird".into()), iter.next());
        assert_eq!(Some("fo/rth".into()), iter.next());
        assert_eq!(Some("last".into()), iter.next());
        assert_eq!(None, iter.next());
    }

    #[test]
    fn iter_first_empty() {
        let data = r"/fir\/st/second/th\/ird/fo\/rth/last";
        let mut iter = SlashSplitIter::from(data);
        assert_eq!(Some("".into()), iter.next());
        assert_eq!(Some("fir/st".into()), iter.next());
        assert_eq!(Some("second".into()), iter.next());
        assert_eq!(Some("th/ird".into()), iter.next());
        assert_eq!(Some("fo/rth".into()), iter.next());
        assert_eq!(Some("last".into()), iter.next());
        assert_eq!(None, iter.next());
    }

    #[test]
    fn iter_last_empty() {
        let data = "something/last/";
        let mut iter = SlashSplitIter::from(data);
        assert_eq!(Some("something".into()), iter.next());
        assert_eq!(Some("last".into()), iter.next());
        assert_eq!(None, iter.next());

        let data = r"something/世\/";
        let mut iter = SlashSplitIter::from(data);
        assert_eq!(Some("something".into()), iter.next());
        assert_eq!(Some("世/".into()), iter.next());
        assert_eq!(None, iter.next());
    }
}