sedregex/
lib.rs

1//! [![pipeline status](https://gitlab.com/mexus/sedregex/badges/master/pipeline.svg)](https://gitlab.com/mexus/sedregex/commits/master)
2//! [![crates.io](https://img.shields.io/crates/v/sedregex.svg)](https://crates.io/crates/sedregex)
3//! [![docs.rs](https://docs.rs/sedregex/badge.svg)](https://docs.rs/sedregex)
4//!
5//! [[Release docs]](https://docs.rs/sedregex/)
6//!
7//! [[Master docs]](https://mexus.gitlab.io/sedregex/sedregex/)
8//!
9//! A simple sed-like library that uses regex under the hood.
10//!
11//! ## Usage
12//!
13//! There are basically two public interfaces which you can enjoy:
14//!
15//! * [`find_and_replace`], which is useful for one-time processing of a bunch of commands,
16//! * and [`ReplaceCommand`] when you need to run the same command multiple times.
17//!
18//! ## Examples
19//!
20//! Let's jump straight to the examples!
21//!
22//! ```
23//! use sedregex::{find_and_replace, ReplaceCommand};
24//!     // Both case-insensitive and global:
25//!     assert_eq!(
26//!         "Please stop wuts oh wut.",
27//!         ReplaceCommand::new("s/lol/wut/ig").unwrap().execute("Please stop Lols oh lol."),
28//!     );
29//!
30//!     // Multiple commands in one go:
31//!     assert_eq!(
32//!         "Please stop nos oh no.",
33//!         find_and_replace("Please stop Lols oh lol.", &["s/lol/wut/ig", "s/wut/no/g"]).unwrap()
34//!     );
35//!
36//!     // Same, but skipping the `s` character.
37//!     assert_eq!(
38//!         "Please stop wuts oh wut.",
39//!         find_and_replace("Please stop Lols oh lol.", &["/lol/wut/ig"]).unwrap()
40//!     );
41//!
42//!     // Skipping the flags.
43//!     assert_eq!(
44//!         "Please stop Lols oh wut.",
45//!         find_and_replace("Please stop Lols oh lol.", &["/lol/wut/"]).unwrap()
46//!     );
47//!
48//!     // Skipping the last slash.
49//!     assert_eq!(
50//!         "Please stop Lols oh wut.",
51//!         find_and_replace("Please stop Lols oh lol.", &["/lol/wut"]).unwrap()
52//!     );
53//!
54//!     // Escaping a slash in a replace part.
55//!     assert_eq!(
56//!         r"Please stop wut/s oh wut/.",
57//!         find_and_replace("Please stop Lols oh lol.", &[r"s/lol/wut\//gi"]).unwrap()
58//!     );
59//!
60//!     // Some regex stuff.
61//!     // Also note the lack of the trailing slash: it's opitonal!
62//!     assert_eq!(
63//!         "Second, First",
64//!         find_and_replace(
65//!             "First, Second",
66//!             &[r"s/(?P<first>[^,\s]+),\s+(?P<last>\S+)/$last, $first"],
67//!         ).unwrap()
68//!     );
69//!
70//!     // Ok let's go with some simple regex stuff.
71//!     assert_eq!(
72//!         "Some weird typo",
73//!         find_and_replace("Some wierd typo", &[r"s/ie/ei/"]).unwrap()
74//!     );
75//! ```
76//!
77//! ## License
78//!
79//! Licensed under either of
80//!
81//!  * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
82//!  * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
83//!
84//! at your option.
85//!
86//! ### Contribution
87//!
88//! Unless you explicitly state otherwise, any contribution intentionally submitted
89//! for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any
90//! additional terms or conditions.
91
92#![deny(missing_docs)]
93
94mod command;
95mod cow_appender;
96mod regex_flags;
97mod replace_command;
98mod replace_data;
99mod splitting_iter;
100mod str_ext;
101
102pub use crate::{regex_flags::RegexFlag, replace_command::ReplaceCommand};
103
104use std::{
105    borrow::Cow,
106    fmt::{self, Display},
107};
108
109/// An error that might happen during the parsing.
110#[derive(Debug, PartialEq)]
111pub enum ErrorKind {
112    /// A given string doesn't have enough segments.
113    NotEnoughSegments,
114    /// Unknown regex command has been detected.
115    UnknownCommand(String),
116    /// Unknown regex flag has been detected.
117    UnknownFlag(char),
118    /// Regex parsing/compiling error.
119    RegexError(regex::Error),
120}
121
122impl Display for ErrorKind {
123    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
124        use ErrorKind::*;
125        match self {
126            NotEnoughSegments => write!(
127                f,
128                "Parsing error: a given string doesn't have enough segments"
129            ),
130            UnknownCommand(cmd) => write!(
131                f,
132                "Parsing error: unknown regex command '{}' has been detected",
133                cmd
134            ),
135            UnknownFlag(flag) => write!(
136                f,
137                "Parsing error: unknown regex flag '{}' has been detected",
138                flag
139            ),
140            RegexError(_err) => write!(f, "Parsing error: regex parsing/compiling error"),
141        }
142    }
143}
144
145impl std::error::Error for ErrorKind {
146    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
147        match self {
148            ErrorKind::NotEnoughSegments | ErrorKind::UnknownCommand(_) | ErrorKind::UnknownFlag(_) => None,
149            ErrorKind::RegexError(r) => Some(r),
150        }
151    }
152}
153
154/// Parses and executes regex replace commands in a form of `s/regex/replace/flags`.
155///
156/// # Usage notes
157///
158/// * Delimiter slashes (`/`) could be escaped by a backslash: `\/`
159/// * For the list of supported commands please refer to the [`Command`](enum.Command.html) enum
160/// * For the list of supported flags please refer to the [`RegexFlag`](enum.RegexFlag.html) enum
161/// * If multiple commands are given they are executed one after another on a result produced by a
162///   previous command.
163///
164/// For examples please see the crate's main documentation page.
165pub fn find_and_replace<I>(text: &str, commands: I) -> Result<Cow<str>, ErrorKind>
166where
167    I: IntoIterator,
168    I::Item: AsRef<str>,
169{
170    commands
171        .into_iter()
172        .try_fold(Cow::Borrowed(text), |text, cmd| {
173            let replacer = ReplaceCommand::new(cmd.as_ref())?;
174            Ok(replacer.execute(text))
175        })
176}
177
178#[cfg(test)]
179mod tests {
180    use super::*;
181
182    #[test]
183    fn replace_simple() {
184        let input = "a very short text";
185        let commands = &[r"s/\w+/_/g"];
186        let expected = "_ _ _ _";
187        let actual = find_and_replace(input, commands).unwrap();
188        assert_eq!(expected, actual);
189    }
190
191    #[test]
192    fn replace_simple2() {
193        let input = r"a$ very%% sh@ort! tex\w+t";
194        let commands = &[r"s/\w+/_/g"];
195        let expected = r"_$ _%% _@_! _\_+_";
196        let actual = find_and_replace(input, commands).unwrap();
197        assert_eq!(expected, actual);
198    }
199
200    #[test]
201    fn replace_simple3() {
202        let input = r"a$ very%% sh@ort! tex\w+t";
203        let commands = &[r"s/\w+/_/"];
204        let expected = r"_$ very%% sh@ort! tex\w+t";
205        let actual = find_and_replace(input, commands).unwrap();
206        assert_eq!(expected, actual);
207    }
208
209    #[test]
210    fn replace_multiline_text() {
211        let input = r#"Lorem Ipsum is simply dummy text
212of the printing and typesetting industry. Lorem Ipsum
213has been the industry's standard dummy text ever since
214 the 1500s, when an unknown printer took a galley of
215type and scrambled it to make a type specimen book.
216It has"#;
217        let commands = &[r"s/\w+/_/g"];
218        let expected = "_ _ _ _ _ _\n_ _ _ _ _ _. _ _\n_ _ _ _\'_ _ _ _ _ _\n _ _, _ _ _ _ _ _ _ _\n_ _ _ _ _ _ _ _ _ _.\n_ _";
219        let actual = find_and_replace(input, commands).unwrap();
220        assert_eq!(expected, actual);
221    }
222
223    #[test]
224    fn process_multiline_text_multicommand() {
225        let input = r#"Lorem Ipsum is simply dummy text
226of the printing and typesetting industry. Lorem Ipsum
227has been the industry's standard dummy text ever since
228 the 1500s, when an unknown printer took a galley of
229type and scrambled it to make a type specimen book.
230It has"#;
231        let commands = &[r"s/[a-z]+/_/g", r"s/[0-9+]/=/g", r"s/_//g", r"s/ +//g"];
232        let expected = "LI\n.LI\n\'\n====,\n.\nI";
233        let actual = find_and_replace(input, commands).unwrap();
234        assert_eq!(expected, actual);
235    }
236}
sedregex/lib.rs

sedregex/
lib.rs