fancy_regex/
expand.rs

1use alloc::borrow::Cow;
2use alloc::string::{String, ToString};
3use alloc::vec::Vec;
4
5use crate::parse::{parse_decimal, parse_id, ParsedId};
6use crate::{Captures, CompileError, Error, ParseError, Regex};
7
8/// A set of options for expanding a template string using the contents
9/// of capture groups.
10#[derive(Debug)]
11pub struct Expander {
12    sub_char: char,
13    open: &'static str,
14    close: &'static str,
15    allow_undelimited_name: bool,
16}
17
18impl Default for Expander {
19    /// Returns the default expander used by [`Captures::expand`].
20    ///
21    /// [`Captures::expand`]: struct.Captures.html#expand
22    fn default() -> Self {
23        Expander {
24            sub_char: '$',
25            open: "{",
26            close: "}",
27            allow_undelimited_name: true,
28        }
29    }
30}
31
32impl Expander {
33    /// Returns an expander that uses Python-compatible syntax.
34    ///
35    /// Expands all instances of `\num` or `\g<name>` in `replacement`
36    /// to the corresponding capture group `num` or `name`, and writes
37    /// them to the `dst` buffer given.
38    ///
39    /// `name` may be an integer corresponding to the index of the
40    /// capture group (counted by order of opening parenthesis where `\0` is the
41    /// entire match) or it can be a name (consisting of letters, digits or
42    /// underscores) corresponding to a named capture group.
43    ///
44    /// `num` must be an integer corresponding to the index of the
45    /// capture group.
46    ///
47    /// If `num` or `name` isn't a valid capture group (whether the name doesn't exist
48    /// or isn't a valid index), then it is replaced with the empty string.
49    ///
50    /// The longest possible number is used. e.g., `\10` looks up capture
51    /// group 10 and not capture group 1 followed by a literal 0.
52    ///
53    /// To write a literal `\`, use `\\`.
54    pub fn python() -> Expander {
55        Expander {
56            sub_char: '\\',
57            open: "g<",
58            close: ">",
59            allow_undelimited_name: false,
60        }
61    }
62
63    /// Checks `template` for errors.  The following conditions are checked for:
64    ///
65    /// - A reference to a numbered group that does not exist in `regex`
66    /// - A reference to a numbered group (other than 0) when `regex` contains named groups
67    /// - A reference to a named group that does not occur in `regex`
68    /// - An opening group name delimiter without a closing delimiter
69    /// - Using an empty string as a group name
70    pub fn check(&self, template: &str, regex: &Regex) -> crate::Result<()> {
71        let on_group_num = |num| {
72            if num == 0 {
73                Ok(())
74            } else if !regex.named_groups.is_empty() {
75                Err(Error::CompileError(CompileError::NamedBackrefOnly))
76            } else if num < regex.captures_len() {
77                Ok(())
78            } else {
79                Err(Error::CompileError(CompileError::InvalidBackref(num)))
80            }
81        };
82        self.exec(template, |step| match step {
83            Step::Char(_) => Ok(()),
84            Step::GroupName(name) => {
85                if regex.named_groups.contains_key(name) {
86                    Ok(())
87                } else if let Ok(num) = name.parse() {
88                    on_group_num(num)
89                } else {
90                    Err(Error::CompileError(CompileError::InvalidGroupNameBackref(
91                        name.to_string(),
92                    )))
93                }
94            }
95            Step::GroupNum(num) => on_group_num(num),
96            Step::Error => Err(Error::ParseError(
97                0,
98                ParseError::GeneralParseError(
99                    "parse error in template while expanding".to_string(),
100                ),
101            )),
102        })
103    }
104
105    /// Escapes the substitution character in `text` so it appears literally
106    /// in the output of `expansion`.
107    ///
108    /// ```
109    /// assert_eq!(
110    ///     fancy_regex::Expander::default().escape("Has a literal $ sign."),
111    ///     "Has a literal $$ sign.",
112    /// );
113    /// ```
114    pub fn escape<'a>(&self, text: &'a str) -> Cow<'a, str> {
115        if text.contains(self.sub_char) {
116            let mut quoted = String::with_capacity(self.sub_char.len_utf8() * 2);
117            quoted.push(self.sub_char);
118            quoted.push(self.sub_char);
119            Cow::Owned(text.replace(self.sub_char, &quoted))
120        } else {
121            Cow::Borrowed(text)
122        }
123    }
124
125    #[doc(hidden)]
126    #[deprecated(since = "0.4.0", note = "Use `escape` instead.")]
127    pub fn quote<'a>(&self, text: &'a str) -> Cow<'a, str> {
128        self.escape(text)
129    }
130
131    /// Expands the template string `template` using the syntax defined
132    /// by this expander and the values of capture groups from `captures`.
133    pub fn expansion(&self, template: &str, captures: &Captures<'_>) -> String {
134        let mut cursor = Vec::with_capacity(template.len());
135        #[cfg(feature = "std")]
136        self.write_expansion(&mut cursor, template, captures)
137            .expect("expansion succeeded");
138        #[cfg(not(feature = "std"))]
139        self.write_expansion_vec(&mut cursor, template, captures)
140            .expect("expansion succeeded");
141        String::from_utf8(cursor).expect("expansion is UTF-8")
142    }
143
144    /// Appends the expansion produced by `expansion` to `dst`.  Potentially more efficient
145    /// than calling `expansion` directly and appending to an existing string.
146    pub fn append_expansion(&self, dst: &mut String, template: &str, captures: &Captures<'_>) {
147        let mut cursor = core::mem::take(dst).into_bytes();
148        #[cfg(feature = "std")]
149        self.write_expansion(&mut cursor, template, captures)
150            .expect("expansion succeeded");
151        #[cfg(not(feature = "std"))]
152        self.write_expansion_vec(&mut cursor, template, captures)
153            .expect("expansion succeeded");
154        *dst = String::from_utf8(cursor).expect("expansion is UTF-8");
155    }
156
157    /// Writes the expansion produced by `expansion` to `dst`.  Potentially more efficient
158    /// than calling `expansion` directly and writing the result.
159    #[cfg(feature = "std")]
160    pub fn write_expansion(
161        &self,
162        mut dst: impl std::io::Write,
163        template: &str,
164        captures: &Captures<'_>,
165    ) -> std::io::Result<()> {
166        self.exec(template, |step| match step {
167            Step::Char(c) => write!(dst, "{}", c),
168            Step::GroupName(name) => {
169                if let Some(m) = captures.name(name) {
170                    write!(dst, "{}", m.as_str())
171                } else if let Some(m) = name.parse().ok().and_then(|num| captures.get(num)) {
172                    write!(dst, "{}", m.as_str())
173                } else {
174                    Ok(())
175                }
176            }
177            Step::GroupNum(num) => {
178                if let Some(m) = captures.get(num) {
179                    write!(dst, "{}", m.as_str())
180                } else {
181                    Ok(())
182                }
183            }
184            Step::Error => Ok(()),
185        })
186    }
187
188    /// Writes the expansion produced by `expansion` to `dst`.  Potentially more efficient
189    /// than calling `expansion` directly and writing the result.
190    pub fn write_expansion_vec(
191        &self,
192        dst: &mut Vec<u8>,
193        template: &str,
194        captures: &Captures<'_>,
195    ) -> core::fmt::Result {
196        self.exec(template, |step| match step {
197            Step::Char(c) => {
198                dst.extend(c.to_string().as_bytes());
199                Ok(())
200            }
201            Step::GroupName(name) => {
202                if let Some(m) = captures.name(name) {
203                    dst.extend(m.as_str().as_bytes());
204                } else if let Some(m) = name.parse().ok().and_then(|num| captures.get(num)) {
205                    dst.extend(m.as_str().as_bytes());
206                }
207                Ok(())
208            }
209            Step::GroupNum(num) => {
210                if let Some(m) = captures.get(num) {
211                    dst.extend(m.as_str().as_bytes());
212                }
213                Ok(())
214            }
215            Step::Error => Ok(()),
216        })
217    }
218
219    fn exec<'t, E>(
220        &self,
221        template: &'t str,
222        mut f: impl FnMut(Step<'t>) -> Result<(), E>,
223    ) -> Result<(), E> {
224        debug_assert!(!self.open.is_empty());
225        debug_assert!(!self.close.is_empty());
226        let mut iter = template.chars();
227        while let Some(c) = iter.next() {
228            if c == self.sub_char {
229                let tail = iter.as_str();
230                let skip = if tail.starts_with(self.sub_char) {
231                    f(Step::Char(self.sub_char))?;
232                    1
233                } else if let Some(ParsedId {
234                    id,
235                    relative: None,
236                    skip,
237                }) = parse_id(tail, self.open, self.close, false).or_else(|| {
238                    if self.allow_undelimited_name {
239                        parse_id(tail, "", "", false)
240                    } else {
241                        None
242                    }
243                }) {
244                    f(Step::GroupName(id))?;
245                    skip
246                } else if let Some((skip, num)) = parse_decimal(tail, 0) {
247                    f(Step::GroupNum(num))?;
248                    skip
249                } else {
250                    f(Step::Error)?;
251                    f(Step::Char(self.sub_char))?;
252                    0
253                };
254                iter = iter.as_str()[skip..].chars();
255            } else {
256                f(Step::Char(c))?;
257            }
258        }
259        Ok(())
260    }
261}
262
263enum Step<'a> {
264    Char(char),
265    GroupName(&'a str),
266    GroupNum(usize),
267    Error,
268}