1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
//! [![pipeline status](https://gitlab.com/mexus/sedregex/badges/master/pipeline.svg)](https://gitlab.com/mexus/sedregex/commits/master)
//! [![crates.io](https://img.shields.io/crates/v/sedregex.svg)](https://crates.io/crates/sedregex)
//! [![docs.rs](https://docs.rs/sedregex/badge.svg)](https://docs.rs/sedregex)
//!
//! [[Release docs]](https://docs.rs/sedregex/)
//!
//! [[Master docs]](https://mexus.gitlab.io/sedregex/sedregex/)
//!
//! A simple sed-like library that uses regex under the hood.
//!
//! ## Usage
//!
//! There are basically two public interfaces which you can enjoy:
//!
//! * [`find_and_replace`], which is useful for one-time processing of a bunch of commands,
//! * and [`ReplaceCommand`] when you need to run the same command multiple times.
//!
//! ## Examples
//!
//! Let's jump straight to the examples!
//!
//! ```
//! use sedregex::{find_and_replace, ReplaceCommand};
//!     // Both case-insensitive and global:
//!     assert_eq!(
//!         "Please stop wuts oh wut.",
//!         ReplaceCommand::new("s/lol/wut/ig").unwrap().execute("Please stop Lols oh lol."),
//!     );
//!
//!     // Multiple commands in one go:
//!     assert_eq!(
//!         "Please stop nos oh no.",
//!         find_and_replace("Please stop Lols oh lol.", &["s/lol/wut/ig", "s/wut/no/g"]).unwrap()
//!     );
//!
//!     // Same, but skipping the `s` character.
//!     assert_eq!(
//!         "Please stop wuts oh wut.",
//!         find_and_replace("Please stop Lols oh lol.", &["/lol/wut/ig"]).unwrap()
//!     );
//!
//!     // Skipping the flags.
//!     assert_eq!(
//!         "Please stop Lols oh wut.",
//!         find_and_replace("Please stop Lols oh lol.", &["/lol/wut/"]).unwrap()
//!     );
//!
//!     // Skipping the last slash.
//!     assert_eq!(
//!         "Please stop Lols oh wut.",
//!         find_and_replace("Please stop Lols oh lol.", &["/lol/wut"]).unwrap()
//!     );
//!
//!     // Escaping a slash in a replace part.
//!     assert_eq!(
//!         r"Please stop wut/s oh wut/.",
//!         find_and_replace("Please stop Lols oh lol.", &[r"s/lol/wut\//gi"]).unwrap()
//!     );
//!
//!     // Some regex stuff.
//!     // Also note the lack of the trailing slash: it's opitonal!
//!     assert_eq!(
//!         "Second, First",
//!         find_and_replace(
//!             "First, Second",
//!             &[r"s/(?P<first>[^,\s]+),\s+(?P<last>\S+)/$last, $first"],
//!         ).unwrap()
//!     );
//!
//!     // Ok let's go with some simple regex stuff.
//!     assert_eq!(
//!         "Some weird typo",
//!         find_and_replace("Some wierd typo", &[r"s/ie/ei/"]).unwrap()
//!     );
//! ```
//!
//! ## License
//!
//! Licensed under either of
//!
//!  * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
//!  * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
//!
//! at your option.
//!
//! ### Contribution
//!
//! Unless you explicitly state otherwise, any contribution intentionally submitted
//! for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any
//! additional terms or conditions.

#![deny(missing_docs)]

mod command;
mod cow_appender;
mod regex_flags;
mod replace_command;
mod replace_data;
mod splitting_iter;
mod str_ext;

pub use crate::{regex_flags::RegexFlag, replace_command::ReplaceCommand};

use std::{
    borrow::Cow,
    fmt::{self, Display},
};

/// An error that might happen during the parsing.
#[derive(Debug, PartialEq)]
pub enum ErrorKind {
    /// A given string doesn't have enough segments.
    NotEnoughSegments,
    /// Unknown regex command has been detected.
    UnknownCommand(String),
    /// Unknown regex flag has been detected.
    UnknownFlag(char),
    /// Regex parsing/compiling error.
    RegexError(regex::Error),
}

impl Display for ErrorKind {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        use ErrorKind::*;
        match self {
            NotEnoughSegments => write!(
                f,
                "Parsing error: a given string doesn't have enough segments"
            ),
            UnknownCommand(cmd) => write!(
                f,
                "Parsing error: unknown regex command '{}' has been detected",
                cmd
            ),
            UnknownFlag(flag) => write!(
                f,
                "Parsing error: unknown regex flag '{}' has been detected",
                flag
            ),
            RegexError(_err) => write!(f, "Parsing error: regex parsing/compiling error"),
        }
    }
}

impl std::error::Error for ErrorKind {
    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
        match self {
            ErrorKind::NotEnoughSegments | ErrorKind::UnknownCommand(_) | ErrorKind::UnknownFlag(_) => None,
            ErrorKind::RegexError(r) => Some(r),
        }
    }
}

/// Parses and executes regex replace commands in a form of `s/regex/replace/flags`.
///
/// # Usage notes
///
/// * Delimiter slashes (`/`) could be escaped by a backslash: `\/`
/// * For the list of supported commands please refer to the [`Command`](enum.Command.html) enum
/// * For the list of supported flags please refer to the [`RegexFlag`](enum.RegexFlag.html) enum
/// * If multiple commands are given they are executed one after another on a result produced by a
///   previous command.
///
/// For examples please see the crate's main documentation page.
pub fn find_and_replace<I>(text: &str, commands: I) -> Result<Cow<str>, ErrorKind>
where
    I: IntoIterator,
    I::Item: AsRef<str>,
{
    commands
        .into_iter()
        .try_fold(Cow::Borrowed(text), |text, cmd| {
            let replacer = ReplaceCommand::new(cmd.as_ref())?;
            Ok(replacer.execute(text))
        })
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn replace_simple() {
        let input = "a very short text";
        let commands = &[r"s/\w+/_/g"];
        let expected = "_ _ _ _";
        let actual = find_and_replace(input, commands).unwrap();
        assert_eq!(expected, actual);
    }

    #[test]
    fn replace_simple2() {
        let input = r"a$ very%% sh@ort! tex\w+t";
        let commands = &[r"s/\w+/_/g"];
        let expected = r"_$ _%% _@_! _\_+_";
        let actual = find_and_replace(input, commands).unwrap();
        assert_eq!(expected, actual);
    }

    #[test]
    fn replace_simple3() {
        let input = r"a$ very%% sh@ort! tex\w+t";
        let commands = &[r"s/\w+/_/"];
        let expected = r"_$ very%% sh@ort! tex\w+t";
        let actual = find_and_replace(input, commands).unwrap();
        assert_eq!(expected, actual);
    }

    #[test]
    fn replace_multiline_text() {
        let input = r#"Lorem Ipsum is simply dummy text
of the printing and typesetting industry. Lorem Ipsum
has been the industry's standard dummy text ever since
 the 1500s, when an unknown printer took a galley of
type and scrambled it to make a type specimen book.
It has"#;
        let commands = &[r"s/\w+/_/g"];
        let expected = "_ _ _ _ _ _\n_ _ _ _ _ _. _ _\n_ _ _ _\'_ _ _ _ _ _\n _ _, _ _ _ _ _ _ _ _\n_ _ _ _ _ _ _ _ _ _.\n_ _";
        let actual = find_and_replace(input, commands).unwrap();
        assert_eq!(expected, actual);
    }

    #[test]
    fn process_multiline_text_multicommand() {
        let input = r#"Lorem Ipsum is simply dummy text
of the printing and typesetting industry. Lorem Ipsum
has been the industry's standard dummy text ever since
 the 1500s, when an unknown printer took a galley of
type and scrambled it to make a type specimen book.
It has"#;
        let commands = &[r"s/[a-z]+/_/g", r"s/[0-9+]/=/g", r"s/_//g", r"s/ +//g"];
        let expected = "LI\n.LI\n\'\n====,\n.\nI";
        let actual = find_and_replace(input, commands).unwrap();
        assert_eq!(expected, actual);
    }
}