1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
//! [![pipeline status](https://gitlab.com/mexus/sedregex/badges/master/pipeline.svg)](https://gitlab.com/mexus/sedregex/commits/master)
//! [![crates.io](https://img.shields.io/crates/v/sedregex.svg)](https://crates.io/crates/sedregex)
//! [![docs.rs](https://docs.rs/sedregex/badge.svg)](https://docs.rs/sedregex)
//!
//! [[Release docs]](https://docs.rs/sedregex/)
//!
//! [[Master docs]](https://mexus.gitlab.io/sedregex/sedregex/)
//!
//! A simple sed-like library that uses regex under the hood.
//!
//! ## Usage
//!
//! There are basically two public interfaces which you can enjoy:
//!
//! * [`find_and_replace`], which is useful for one-time processing of a bunch of commands,
//! * and [`ReplaceCommand`] when you need to run the same command multiple times.
//!
//! ## Examples
//!
//! Let's jump straight to the examples!
//!
//! ```
//! use sedregex::{find_and_replace, ReplaceCommand};
//! // Both case-insensitive and global:
//! assert_eq!(
//! "Please stop wuts oh wut.",
//! ReplaceCommand::new("s/lol/wut/ig").unwrap().execute("Please stop Lols oh lol."),
//! );
//!
//! // Multiple commands in one go:
//! assert_eq!(
//! "Please stop nos oh no.",
//! find_and_replace("Please stop Lols oh lol.", &["s/lol/wut/ig", "s/wut/no/g"]).unwrap()
//! );
//!
//! // Same, but skipping the `s` character.
//! assert_eq!(
//! "Please stop wuts oh wut.",
//! find_and_replace("Please stop Lols oh lol.", &["/lol/wut/ig"]).unwrap()
//! );
//!
//! // Skipping the flags.
//! assert_eq!(
//! "Please stop Lols oh wut.",
//! find_and_replace("Please stop Lols oh lol.", &["/lol/wut/"]).unwrap()
//! );
//!
//! // Skipping the last slash.
//! assert_eq!(
//! "Please stop Lols oh wut.",
//! find_and_replace("Please stop Lols oh lol.", &["/lol/wut"]).unwrap()
//! );
//!
//! // Escaping a slash in a replace part.
//! assert_eq!(
//! r"Please stop wut/s oh wut/.",
//! find_and_replace("Please stop Lols oh lol.", &[r"s/lol/wut\//gi"]).unwrap()
//! );
//!
//! // Some regex stuff.
//! // Also note the lack of the trailing slash: it's opitonal!
//! assert_eq!(
//! "Second, First",
//! find_and_replace(
//! "First, Second",
//! &[r"s/(?P<first>[^,\s]+),\s+(?P<last>\S+)/$last, $first"],
//! ).unwrap()
//! );
//!
//! // Ok let's go with some simple regex stuff.
//! assert_eq!(
//! "Some weird typo",
//! find_and_replace("Some wierd typo", &[r"s/ie/ei/"]).unwrap()
//! );
//! ```
//!
//! ## License
//!
//! Licensed under either of
//!
//! * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
//! * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
//!
//! at your option.
//!
//! ### Contribution
//!
//! Unless you explicitly state otherwise, any contribution intentionally submitted
//! for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any
//! additional terms or conditions.
#![deny(missing_docs)]
mod command;
mod cow_appender;
mod regex_flags;
mod replace_command;
mod replace_data;
mod splitting_iter;
mod str_ext;
pub use crate::{regex_flags::RegexFlag, replace_command::ReplaceCommand};
use std::{
borrow::Cow,
fmt::{self, Display},
};
/// An error that might happen during the parsing.
#[derive(Debug, PartialEq)]
pub enum ErrorKind {
/// A given string doesn't have enough segments.
NotEnoughSegments,
/// Unknown regex command has been detected.
UnknownCommand(String),
/// Unknown regex flag has been detected.
UnknownFlag(char),
/// Regex parsing/compiling error.
RegexError(regex::Error),
}
impl Display for ErrorKind {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use ErrorKind::*;
match self {
NotEnoughSegments => write!(
f,
"Parsing error: a given string doesn't have enough segments"
),
UnknownCommand(cmd) => write!(
f,
"Parsing error: unknown regex command '{}' has been detected",
cmd
),
UnknownFlag(flag) => write!(
f,
"Parsing error: unknown regex flag '{}' has been detected",
flag
),
RegexError(_err) => write!(f, "Parsing error: regex parsing/compiling error"),
}
}
}
impl std::error::Error for ErrorKind {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
ErrorKind::NotEnoughSegments | ErrorKind::UnknownCommand(_) | ErrorKind::UnknownFlag(_) => None,
ErrorKind::RegexError(r) => Some(r),
}
}
}
/// Parses and executes regex replace commands in a form of `s/regex/replace/flags`.
///
/// # Usage notes
///
/// * Delimiter slashes (`/`) could be escaped by a backslash: `\/`
/// * For the list of supported commands please refer to the [`Command`](enum.Command.html) enum
/// * For the list of supported flags please refer to the [`RegexFlag`](enum.RegexFlag.html) enum
/// * If multiple commands are given they are executed one after another on a result produced by a
/// previous command.
///
/// For examples please see the crate's main documentation page.
pub fn find_and_replace<I>(text: &str, commands: I) -> Result<Cow<str>, ErrorKind>
where
I: IntoIterator,
I::Item: AsRef<str>,
{
commands
.into_iter()
.try_fold(Cow::Borrowed(text), |text, cmd| {
let replacer = ReplaceCommand::new(cmd.as_ref())?;
Ok(replacer.execute(text))
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn replace_simple() {
let input = "a very short text";
let commands = &[r"s/\w+/_/g"];
let expected = "_ _ _ _";
let actual = find_and_replace(input, commands).unwrap();
assert_eq!(expected, actual);
}
#[test]
fn replace_simple2() {
let input = r"a$ very%% sh@ort! tex\w+t";
let commands = &[r"s/\w+/_/g"];
let expected = r"_$ _%% _@_! _\_+_";
let actual = find_and_replace(input, commands).unwrap();
assert_eq!(expected, actual);
}
#[test]
fn replace_simple3() {
let input = r"a$ very%% sh@ort! tex\w+t";
let commands = &[r"s/\w+/_/"];
let expected = r"_$ very%% sh@ort! tex\w+t";
let actual = find_and_replace(input, commands).unwrap();
assert_eq!(expected, actual);
}
#[test]
fn replace_multiline_text() {
let input = r#"Lorem Ipsum is simply dummy text
of the printing and typesetting industry. Lorem Ipsum
has been the industry's standard dummy text ever since
the 1500s, when an unknown printer took a galley of
type and scrambled it to make a type specimen book.
It has"#;
let commands = &[r"s/\w+/_/g"];
let expected = "_ _ _ _ _ _\n_ _ _ _ _ _. _ _\n_ _ _ _\'_ _ _ _ _ _\n _ _, _ _ _ _ _ _ _ _\n_ _ _ _ _ _ _ _ _ _.\n_ _";
let actual = find_and_replace(input, commands).unwrap();
assert_eq!(expected, actual);
}
#[test]
fn process_multiline_text_multicommand() {
let input = r#"Lorem Ipsum is simply dummy text
of the printing and typesetting industry. Lorem Ipsum
has been the industry's standard dummy text ever since
the 1500s, when an unknown printer took a galley of
type and scrambled it to make a type specimen book.
It has"#;
let commands = &[r"s/[a-z]+/_/g", r"s/[0-9+]/=/g", r"s/_//g", r"s/ +//g"];
let expected = "LI\n.LI\n\'\n====,\n.\nI";
let actual = find_and_replace(input, commands).unwrap();
assert_eq!(expected, actual);
}
}