rustextile 1.0.2

Textile markup language parser for Rust
Documentation
use fancy_regex::{Regex, Captures, CaptureMatches, Match};

pub(crate) const INVALID_REGEX: &str = "An ivalid regular expression";

macro_rules! fregex {
    ($x: expr) => {
         Regex::new($x).expect(crate::regextra::INVALID_REGEX)
    }
}

pub(crate) use fregex;


/// At the moment of writing `fancy_regex` does not support splitting
/// strings with regular expressions "out of the box".
/// Hence this module, which can not only split strings the usual way (
/// ignoring the delimiters), but also split them along with capturing
/// the delimiters as well.
pub(crate) struct ReSplitWithCapture<'r, 't> {
    last_end: usize,
    text: &'t str,
    last_match: Option<
        std::result::Result<Captures<'t>, fancy_regex::Error>,
    >,
    find_iter: CaptureMatches<'r, 't>,
    ready_for_next: bool,
    is_exhausted: bool,
    next_group_index: usize,
}

impl<'r, 't> Iterator for ReSplitWithCapture<'r, 't> {
    type Item = &'t str;

    fn next(&mut self) -> Option<Self::Item> {
        if self.is_exhausted {
            None
        } else {
            if self.ready_for_next {
                self.last_match = self.find_iter.next();
            }
            match &self.last_match {
                Some(Ok(m)) => {
                    if self.ready_for_next {
                        self.ready_for_next = m.len() == 1;
                        self.next_group_index = 1;
                        match m.get(0) {
                            Some(whole_match) => {
                                let chunk = &self.text
                                    [self.last_end..whole_match.start()];
                                self.last_end = whole_match.end();
                                Some(chunk)
                            }
                            None => unreachable!(),
                        }
                    } else {
                        match m.get(self.next_group_index) {
                            Some(g) => {
                                self.next_group_index += 1;
                                if self.next_group_index == m.len() {
                                    // No more groups left to return
                                    self.ready_for_next = true;
                                }
                                Some(g.as_str())
                            }
                            None => unreachable!(),
                        }
                    }
                }
                Some(Err(_)) | None => {
                    self.is_exhausted = true;
                    if self.last_end <= self.text.len() {
                        Some(&self.text[self.last_end..])
                    } else {
                        None
                    }
                }
            }
        }
    }
}

/// Returns an iterator of substrings of `text` delimited by a match of
/// the regular expression `re`.
/// Unlike `regex::Regex::split`, which only returns the non-matching parts,
/// this function is also capable of capturing the delimiters themselves
/// (when the regular expression captures them), thus mimicking
/// the behavior of `re.Regex.split` from Python.
pub(crate) fn split_with_capture<'r, 't>(
    re: &'r Regex,
    text: &'t str,
) -> ReSplitWithCapture<'r, 't> {
    ReSplitWithCapture {
        text,
        last_end: 0,
        find_iter: re.captures_iter(text),
        last_match: None,
        ready_for_next: true,
        is_exhausted: false,
        next_group_index: 0,
    }
}


/// Performs multiple sequential replacements within a text behind a `Cow<str>`
/// pointer, without cloning the underlying strings unnecessarily.
pub(crate) fn multi_replace<'a, I, S>(
    text: std::borrow::Cow<str>, mut changes: I,
) -> String
where I: Iterator<Item=(&'a Regex, S)> + 'a,
      S: AsRef<str> + 'a
{
    let next_regex = changes.next();
    match next_regex {
        Some((expr, replacement)) => {
            let new_text = expr.replace_all(&text, replacement.as_ref());
            multi_replace(new_text, changes)
        },
        None => text.into_owned()
    }
}

/// Performs multiple sequential replacements within a text behind a `Cow<str>`
/// pointer, without cloning the underlying strings unnecessarily.
/// Matches to multiple regex patterns will be replaced with a single replacement
/// (a string or whatever was generated by a function).
pub(crate) fn multi_replace_with_one<'a, I, R>(
    text: std::borrow::Cow<str>, mut changes: I,
    mut one_replacement: R
) -> String
where I: Iterator<Item=&'a Regex> + 'a,
      R: fancy_regex::Replacer,
{
    let next_regex = changes.next();
    match next_regex {
        Some(expr) => {
            let new_text = expr.replace_all(&text, one_replacement.by_ref());
            multi_replace_with_one(new_text, changes, one_replacement)
        },
        None => text.into_owned()
    }
}


pub(crate) fn unwrap_or_empty(input: Option<Match>) -> &str{
    input.as_ref().map(Match::as_str).unwrap_or_default()
}


#[cfg(test)]
mod test {
    use fancy_regex::Regex;

    #[test]
    fn test_split_with_delimiters() {
        let text =
            "This has become a \"new norm\" for everyone in the \"the circle\"";
        let expr = r#"\s?(")\s?"#;
        let re = Regex::new(expr).unwrap();
        let chunks: Vec<&str> = super::split_with_capture(&re, text).collect();
        assert_eq!(
            chunks,
            [
                "This has become a",
                "\"",
                "new norm",
                "\"",
                "for everyone in the",
                "\"",
                "the circle",
                "\"",
                ""
            ]
        );
        // Split without capturing the delimiter
        let expr = r#"\s?"\s?"#;
        let re = Regex::new(expr).unwrap();
        let chunks: Vec<&str> = super::split_with_capture(&re, text).collect();
        assert_eq!(
            chunks,
            [
                "This has become a",
                "new norm",
                "for everyone in the",
                "the circle",
                ""
            ]
        );
    }
}