sedregex 0.2.5

Sed-like regex library
Documentation
//! [![pipeline status](https://gitlab.com/mexus/sedregex/badges/master/pipeline.svg)](https://gitlab.com/mexus/sedregex/commits/master)
//! [![crates.io](https://img.shields.io/crates/v/sedregex.svg)](https://crates.io/crates/sedregex)
//! [![docs.rs](https://docs.rs/sedregex/badge.svg)](https://docs.rs/sedregex)
//!
//! [[Release docs]](https://docs.rs/sedregex/)
//!
//! [[Master docs]](https://mexus.gitlab.io/sedregex/sedregex/)
//!
//! A simple sed-like library that uses regex under the hood.
//!
//! ## Usage
//!
//! There are basically two public interfaces which you can enjoy:
//!
//! * [`find_and_replace`], which is useful for one-time processing of a bunch of commands,
//! * and [`ReplaceCommand`] when you need to run the same command multiple times.
//!
//! ## Examples
//!
//! Let's jump straight to the examples!
//!
//! ```
//! use sedregex::{find_and_replace, ReplaceCommand};
//!     // Both case-insensitive and global:
//!     assert_eq!(
//!         "Please stop wuts oh wut.",
//!         ReplaceCommand::new("s/lol/wut/ig").unwrap().execute("Please stop Lols oh lol."),
//!     );
//!
//!     // Multiple commands in one go:
//!     assert_eq!(
//!         "Please stop nos oh no.",
//!         find_and_replace("Please stop Lols oh lol.", &["s/lol/wut/ig", "s/wut/no/g"]).unwrap()
//!     );
//!
//!     // Same, but skipping the `s` character.
//!     assert_eq!(
//!         "Please stop wuts oh wut.",
//!         find_and_replace("Please stop Lols oh lol.", &["/lol/wut/ig"]).unwrap()
//!     );
//!
//!     // Skipping the flags.
//!     assert_eq!(
//!         "Please stop Lols oh wut.",
//!         find_and_replace("Please stop Lols oh lol.", &["/lol/wut/"]).unwrap()
//!     );
//!
//!     // Skipping the last slash.
//!     assert_eq!(
//!         "Please stop Lols oh wut.",
//!         find_and_replace("Please stop Lols oh lol.", &["/lol/wut"]).unwrap()
//!     );
//!
//!     // Escaping a slash in a replace part.
//!     assert_eq!(
//!         r"Please stop wut/s oh wut/.",
//!         find_and_replace("Please stop Lols oh lol.", &[r"s/lol/wut\//gi"]).unwrap()
//!     );
//!
//!     // Some regex stuff.
//!     // Also note the lack of the trailing slash: it's opitonal!
//!     assert_eq!(
//!         "Second, First",
//!         find_and_replace(
//!             "First, Second",
//!             &[r"s/(?P<first>[^,\s]+),\s+(?P<last>\S+)/$last, $first"],
//!         ).unwrap()
//!     );
//!
//!     // Ok let's go with some simple regex stuff.
//!     assert_eq!(
//!         "Some weird typo",
//!         find_and_replace("Some wierd typo", &[r"s/ie/ei/"]).unwrap()
//!     );
//! ```
//!
//! ## License
//!
//! Licensed under either of
//!
//!  * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
//!  * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
//!
//! at your option.
//!
//! ### Contribution
//!
//! Unless you explicitly state otherwise, any contribution intentionally submitted
//! for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any
//! additional terms or conditions.

#![deny(missing_docs)]

mod command;
mod cow_appender;
mod regex_flags;
mod replace_command;
mod replace_data;
mod splitting_iter;
mod str_ext;

pub use crate::{regex_flags::RegexFlag, replace_command::ReplaceCommand};

use std::{
    borrow::Cow,
    fmt::{self, Display},
};

/// An error that might happen during the parsing.
#[derive(Debug, PartialEq)]
pub enum ErrorKind {
    /// A given string doesn't have enough segments.
    NotEnoughSegments,
    /// Unknown regex command has been detected.
    UnknownCommand(String),
    /// Unknown regex flag has been detected.
    UnknownFlag(char),
    /// Regex parsing/compiling error.
    RegexError(regex::Error),
}

impl Display for ErrorKind {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        use ErrorKind::*;
        match self {
            NotEnoughSegments => write!(
                f,
                "Parsing error: a given string doesn't have enough segments"
            ),
            UnknownCommand(cmd) => write!(
                f,
                "Parsing error: unknown regex command '{}' has been detected",
                cmd
            ),
            UnknownFlag(flag) => write!(
                f,
                "Parsing error: unknown regex flag '{}' has been detected",
                flag
            ),
            RegexError(_err) => write!(f, "Parsing error: regex parsing/compiling error"),
        }
    }
}

impl std::error::Error for ErrorKind {
    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
        match self {
            ErrorKind::NotEnoughSegments | ErrorKind::UnknownCommand(_) | ErrorKind::UnknownFlag(_) => None,
            ErrorKind::RegexError(r) => Some(r),
        }
    }
}

/// Parses and executes regex replace commands in a form of `s/regex/replace/flags`.
///
/// # Usage notes
///
/// * Delimiter slashes (`/`) could be escaped by a backslash: `\/`
/// * For the list of supported commands please refer to the [`Command`](enum.Command.html) enum
/// * For the list of supported flags please refer to the [`RegexFlag`](enum.RegexFlag.html) enum
/// * If multiple commands are given they are executed one after another on a result produced by a
///   previous command.
///
/// For examples please see the crate's main documentation page.
pub fn find_and_replace<I>(text: &str, commands: I) -> Result<Cow<str>, ErrorKind>
where
    I: IntoIterator,
    I::Item: AsRef<str>,
{
    commands
        .into_iter()
        .try_fold(Cow::Borrowed(text), |text, cmd| {
            let replacer = ReplaceCommand::new(cmd.as_ref())?;
            Ok(replacer.execute(text))
        })
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn replace_simple() {
        let input = "a very short text";
        let commands = &[r"s/\w+/_/g"];
        let expected = "_ _ _ _";
        let actual = find_and_replace(input, commands).unwrap();
        assert_eq!(expected, actual);
    }

    #[test]
    fn replace_simple2() {
        let input = r"a$ very%% sh@ort! tex\w+t";
        let commands = &[r"s/\w+/_/g"];
        let expected = r"_$ _%% _@_! _\_+_";
        let actual = find_and_replace(input, commands).unwrap();
        assert_eq!(expected, actual);
    }

    #[test]
    fn replace_simple3() {
        let input = r"a$ very%% sh@ort! tex\w+t";
        let commands = &[r"s/\w+/_/"];
        let expected = r"_$ very%% sh@ort! tex\w+t";
        let actual = find_and_replace(input, commands).unwrap();
        assert_eq!(expected, actual);
    }

    #[test]
    fn replace_multiline_text() {
        let input = r#"Lorem Ipsum is simply dummy text
of the printing and typesetting industry. Lorem Ipsum
has been the industry's standard dummy text ever since
 the 1500s, when an unknown printer took a galley of
type and scrambled it to make a type specimen book.
It has"#;
        let commands = &[r"s/\w+/_/g"];
        let expected = "_ _ _ _ _ _\n_ _ _ _ _ _. _ _\n_ _ _ _\'_ _ _ _ _ _\n _ _, _ _ _ _ _ _ _ _\n_ _ _ _ _ _ _ _ _ _.\n_ _";
        let actual = find_and_replace(input, commands).unwrap();
        assert_eq!(expected, actual);
    }

    #[test]
    fn process_multiline_text_multicommand() {
        let input = r#"Lorem Ipsum is simply dummy text
of the printing and typesetting industry. Lorem Ipsum
has been the industry's standard dummy text ever since
 the 1500s, when an unknown printer took a galley of
type and scrambled it to make a type specimen book.
It has"#;
        let commands = &[r"s/[a-z]+/_/g", r"s/[0-9+]/=/g", r"s/_//g", r"s/ +//g"];
        let expected = "LI\n.LI\n\'\n====,\n.\nI";
        let actual = find_and_replace(input, commands).unwrap();
        assert_eq!(expected, actual);
    }
}