1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
//! [![pipeline status](https://gitlab.com/mexus/sedregex/badges/master/pipeline.svg)](https://gitlab.com/mexus/sedregex/commits/master)
//! [![crates.io](https://img.shields.io/crates/v/sedregex.svg)](https://crates.io/crates/sedregex)
//! [![docs.rs](https://docs.rs/sedregex/badge.svg)](https://docs.rs/sedregex)
//!
//! [[Release docs]](https://docs.rs/sedregex/)
//!
//! [[Master docs]](https://mexus.gitlab.io/sedregex/sedregex/)
//!
//! A simple sed-like library that uses regex under the hood.
//!
//! ## Examples
//!
//! Let's jump straight to the examples!
//!
//! ```
//! extern crate sedregex;
//! use sedregex::find_and_replace;
//! fn main() {
//!     // Both case-insensitive and global:
//!     assert_eq!(
//!         "Please stop wuts oh wut.",
//!         find_and_replace("Please stop Lols oh lol.", &["s/lol/wut/ig"]).unwrap()
//!     );
//!
//!     // Multiple commands in one go:
//!     assert_eq!(
//!         "Please stop nos oh no.",
//!         find_and_replace("Please stop Lols oh lol.", &["s/lol/wut/ig", "s/wut/no/g"]).unwrap()
//!     );
//!
//!     // Same, but skipping the `s` character.
//!     assert_eq!(
//!         "Please stop wuts oh wut.",
//!         find_and_replace("Please stop Lols oh lol.", &["/lol/wut/ig"]).unwrap()
//!     );
//!
//!     // Skipping the flags.
//!     assert_eq!(
//!         "Please stop Lols oh wut.",
//!         find_and_replace("Please stop Lols oh lol.", &["/lol/wut/"]).unwrap()
//!     );
//!
//!     // Skipping the last slash.
//!     assert_eq!(
//!         "Please stop Lols oh wut.",
//!         find_and_replace("Please stop Lols oh lol.", &["/lol/wut"]).unwrap()
//!     );
//!
//!     // Escaping a slash in a replace part.
//!     assert_eq!(
//!         r"Please stop wut/s oh wut/.",
//!         find_and_replace("Please stop Lols oh lol.", &[r"s/lol/wut\//gi"]).unwrap()
//!     );
//!
//!     // Some regex stuff.
//!     // Also note the lack of the trailing slash: it's opitonal!
//!     assert_eq!(
//!         "Second, First",
//!         find_and_replace(
//!             "First, Second",
//!             &[r"s/(?P<first>[^,\s]+),\s+(?P<last>\S+)/$last, $first"],
//!         ).unwrap()
//!     );
//!
//!     // Ok let's go with some simple regex stuff.
//!     assert_eq!(
//!         "Some weird typo",
//!         find_and_replace("Some wierd typo", &[r"s/ie/ei/"]).unwrap()
//!     );
//! }
//! ```
//!
//! ## License
//!
//! Licensed under either of
//!
//!  * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
//!  * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
//!
//! at your option.
//!
//! ### Contribution
//!
//! Unless you explicitly state otherwise, any contribution intentionally submitted
//! for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any
//! additional terms or conditions.

#![deny(missing_docs)]

extern crate regex;

mod command;
mod cow_appender;
mod regex_flags;
mod splitting;
mod splitting_iter;
mod str_ext;

pub use command::Command;
pub use regex_flags::RegexFlag;

use regex::Regex;
use splitting::split_for_replace;
use std::borrow::Cow;

/// An error that might happen during the parsing.
#[derive(Debug, PartialEq)]
pub enum ErrorKind {
    /// A given string has not enough segments.
    NotEnoughSegments,
    /// Unknown regex command has been detected.
    UnknownCommand(String),
    /// Unknown regex flags has been detected.
    UnknownFlag(char),
    /// Regex parsing/compiling error.
    RegexError(regex::Error),
}

/// Parses and executes regex replace commands in a form of `s/regex/replace/flags`.
///
/// # Usage notes
///
/// * Slashes (`/`) could be escaped by a backslash: `\/`
/// * For the list of supported commands please refer to the [`Command`](enum.Command.html) enum
/// * For the list of supported flags please refer to the [`RegexFlag`](enum.RegexFlag.html) enum
/// * If multiple commands are given they are executed one after another on a result produced by a
///   previous command.
///
/// For examples please see the crate's main documentation page.
pub fn find_and_replace<'a, I>(text: &'a str, commands: I) -> Result<Cow<'a, str>, ErrorKind>
where
    I: IntoIterator,
    I::Item: AsRef<str>,
{
    commands
        .into_iter()
        .try_fold(Cow::Borrowed(text), |text, cmd| {
            let replace_data = split_for_replace(cmd.as_ref())?;
            let re = replace_data.build_regex()?;
            let res = if replace_data.flags.is_global() {
                regex_cow(text, re, Regex::replace_all, replace_data.with.as_ref())
            } else {
                regex_cow(text, re, Regex::replace, replace_data.with.as_ref())
            };
            Ok(res)
        })
}

/// A helper function that tries to keep borrowed as borrowed while applying a regex.
fn regex_cow<'a, R, F>(s: Cow<'a, str>, re: Regex, f: F, rep: R) -> Cow<'a, str>
where
    for<'r, 't> F: FnOnce(&'r Regex, &'t str, R) -> Cow<'t, str>,
    R: regex::Replacer,
{
    match s {
        Cow::Borrowed(b) => f(&re, b, rep),
        Cow::Owned(o) => Cow::Owned(f(&re, &o, rep).into_owned()),
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn replace_simple() {
        let input = "a very short text";
        let commands = &[r"s/\w+/_/g"];
        let expected = "_ _ _ _";
        let actual = find_and_replace(input, commands).unwrap();
        assert_eq!(expected, actual);
    }

    #[test]
    fn replace_simple2() {
        let input = r"a$ very%% sh@ort! tex\w+t";
        let commands = &[r"s/\w+/_/g"];
        let expected = r"_$ _%% _@_! _\_+_";
        let actual = find_and_replace(input, commands).unwrap();
        assert_eq!(expected, actual);
    }

    #[test]
    fn replace_simple3() {
        let input = r"a$ very%% sh@ort! tex\w+t";
        let commands = &[r"s/\w+/_/"];
        let expected = r"_$ very%% sh@ort! tex\w+t";
        let actual = find_and_replace(input, commands).unwrap();
        assert_eq!(expected, actual);
    }

    #[test]
    fn replace_multiline_text() {
        let input = r#"Lorem Ipsum is simply dummy text
of the printing and typesetting industry. Lorem Ipsum
has been the industry's standard dummy text ever since
 the 1500s, when an unknown printer took a galley of
type and scrambled it to make a type specimen book.
It has"#;
        let commands = &[r"s/\w+/_/g"];
        let expected = "_ _ _ _ _ _\n_ _ _ _ _ _. _ _\n_ _ _ _\'_ _ _ _ _ _\n _ _, _ _ _ _ _ _ _ _\n_ _ _ _ _ _ _ _ _ _.\n_ _";
        let actual = find_and_replace(input, commands).unwrap();
        assert_eq!(expected, actual);
    }

    #[test]
    fn process_multiline_text_multicommand() {
        let input = r#"Lorem Ipsum is simply dummy text
of the printing and typesetting industry. Lorem Ipsum
has been the industry's standard dummy text ever since
 the 1500s, when an unknown printer took a galley of
type and scrambled it to make a type specimen book.
It has"#;
        let commands = &[r"s/[a-z]+/_/g", r"s/[0-9+]/=/g", r"s/_//g", r"s/ +//g"];
        let expected = "LI\n.LI\n\'\n====,\n.\nI";
        let actual = find_and_replace(input, commands).unwrap();
        assert_eq!(expected, actual);
    }
}