1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
//! [![pipeline status](https://gitlab.com/mexus/sedregex/badges/master/pipeline.svg)](https://gitlab.com/mexus/sedregex/commits/master)
//! [![crates.io](https://img.shields.io/crates/v/sedregex.svg)](https://crates.io/crates/sedregex)
//! [![docs.rs](https://docs.rs/sedregex/badge.svg)](https://docs.rs/sedregex)
//!
//! [[Release docs]](https://docs.rs/sedregex/)
//!
//! [[Master docs]](https://mexus.gitlab.io/sedregex/sedregex/)
//!
//! A simple sed-like library that uses regex under the hood.
//!
//! ## Examples
//!
//! Let's jump straight to the examples!
//!
//! ```
//! extern crate sedregex;
//! use sedregex::find_and_replace;
//! fn main() {
//! // Both case-insensitive and global:
//! assert_eq!(
//! "Please stop wuts oh wut.",
//! find_and_replace("Please stop Lols oh lol.", &["s/lol/wut/ig"]).unwrap()
//! );
//!
//! // Multiple commands in one go:
//! assert_eq!(
//! "Please stop nos oh no.",
//! find_and_replace("Please stop Lols oh lol.", &["s/lol/wut/ig", "s/wut/no/g"]).unwrap()
//! );
//!
//! // Same, but skipping the `s` character.
//! assert_eq!(
//! "Please stop wuts oh wut.",
//! find_and_replace("Please stop Lols oh lol.", &["/lol/wut/ig"]).unwrap()
//! );
//!
//! // Skipping the flags.
//! assert_eq!(
//! "Please stop Lols oh wut.",
//! find_and_replace("Please stop Lols oh lol.", &["/lol/wut/"]).unwrap()
//! );
//!
//! // Skipping the last slash.
//! assert_eq!(
//! "Please stop Lols oh wut.",
//! find_and_replace("Please stop Lols oh lol.", &["/lol/wut"]).unwrap()
//! );
//!
//! // Escaping a slash in a replace part.
//! assert_eq!(
//! r"Please stop wut/s oh wut/.",
//! find_and_replace("Please stop Lols oh lol.", &[r"s/lol/wut\//gi"]).unwrap()
//! );
//!
//! // Some regex stuff.
//! // Also note the lack of the trailing slash: it's opitonal!
//! assert_eq!(
//! "Second, First",
//! find_and_replace(
//! "First, Second",
//! &[r"s/(?P<first>[^,\s]+),\s+(?P<last>\S+)/$last, $first"],
//! ).unwrap()
//! );
//!
//! // Ok let's go with some simple regex stuff.
//! assert_eq!(
//! "Some weird typo",
//! find_and_replace("Some wierd typo", &[r"s/ie/ei/"]).unwrap()
//! );
//! }
//! ```
//!
//! ## License
//!
//! Licensed under either of
//!
//! * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
//! * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
//!
//! at your option.
//!
//! ### Contribution
//!
//! Unless you explicitly state otherwise, any contribution intentionally submitted
//! for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any
//! additional terms or conditions.
#![deny(missing_docs)]
extern crate regex;
mod command;
mod cow_appender;
mod regex_flags;
mod splitting;
mod splitting_iter;
mod str_ext;
pub use command::Command;
pub use regex_flags::RegexFlag;
use regex::Regex;
use splitting::split_for_replace;
use std::borrow::Cow;
/// An error that might happen during the parsing.
#[derive(Debug, PartialEq)]
pub enum ErrorKind {
/// A given string has not enough segments.
NotEnoughSegments,
/// Unknown regex command has been detected.
UnknownCommand(String),
/// Unknown regex flags has been detected.
UnknownFlag(char),
/// Regex parsing/compiling error.
RegexError(regex::Error),
}
/// Parses and executes regex replace commands in a form of `s/regex/replace/flags`.
///
/// # Usage notes
///
/// * Slashes (`/`) could be escaped by a backslash: `\/`
/// * For the list of supported commands please refer to the [`Command`](enum.Command.html) enum
/// * For the list of supported flags please refer to the [`RegexFlag`](enum.RegexFlag.html) enum
/// * If multiple commands are given they are executed one after another on a result produced by a
/// previous command.
///
/// For examples please see the crate's main documentation page.
pub fn find_and_replace<'a, I>(text: &'a str, commands: I) -> Result<Cow<'a, str>, ErrorKind>
where
I: IntoIterator,
I::Item: AsRef<str>,
{
commands
.into_iter()
.try_fold(Cow::Borrowed(text), |text, cmd| {
let replace_data = split_for_replace(cmd.as_ref())?;
let re = replace_data.build_regex()?;
let res = if replace_data.flags.is_global() {
regex_cow(text, re, Regex::replace_all, replace_data.with.as_ref())
} else {
regex_cow(text, re, Regex::replace, replace_data.with.as_ref())
};
Ok(res)
})
}
/// A helper function that tries to keep borrowed as borrowed while applying a regex.
fn regex_cow<'a, R, F>(s: Cow<'a, str>, re: Regex, f: F, rep: R) -> Cow<'a, str>
where
for<'r, 't> F: FnOnce(&'r Regex, &'t str, R) -> Cow<'t, str>,
R: regex::Replacer,
{
match s {
Cow::Borrowed(b) => f(&re, b, rep),
Cow::Owned(o) => Cow::Owned(f(&re, &o, rep).into_owned()),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn replace_simple() {
let input = "a very short text";
let commands = &[r"s/\w+/_/g"];
let expected = "_ _ _ _";
let actual = find_and_replace(input, commands).unwrap();
assert_eq!(expected, actual);
}
#[test]
fn replace_simple2() {
let input = r"a$ very%% sh@ort! tex\w+t";
let commands = &[r"s/\w+/_/g"];
let expected = r"_$ _%% _@_! _\_+_";
let actual = find_and_replace(input, commands).unwrap();
assert_eq!(expected, actual);
}
#[test]
fn replace_simple3() {
let input = r"a$ very%% sh@ort! tex\w+t";
let commands = &[r"s/\w+/_/"];
let expected = r"_$ very%% sh@ort! tex\w+t";
let actual = find_and_replace(input, commands).unwrap();
assert_eq!(expected, actual);
}
#[test]
fn replace_multiline_text() {
let input = r#"Lorem Ipsum is simply dummy text
of the printing and typesetting industry. Lorem Ipsum
has been the industry's standard dummy text ever since
the 1500s, when an unknown printer took a galley of
type and scrambled it to make a type specimen book.
It has"#;
let commands = &[r"s/\w+/_/g"];
let expected = "_ _ _ _ _ _\n_ _ _ _ _ _. _ _\n_ _ _ _\'_ _ _ _ _ _\n _ _, _ _ _ _ _ _ _ _\n_ _ _ _ _ _ _ _ _ _.\n_ _";
let actual = find_and_replace(input, commands).unwrap();
assert_eq!(expected, actual);
}
#[test]
fn process_multiline_text_multicommand() {
let input = r#"Lorem Ipsum is simply dummy text
of the printing and typesetting industry. Lorem Ipsum
has been the industry's standard dummy text ever since
the 1500s, when an unknown printer took a galley of
type and scrambled it to make a type specimen book.
It has"#;
let commands = &[r"s/[a-z]+/_/g", r"s/[0-9+]/=/g", r"s/_//g", r"s/ +//g"];
let expected = "LI\n.LI\n\'\n====,\n.\nI";
let actual = find_and_replace(input, commands).unwrap();
assert_eq!(expected, actual);
}
}