fancy_regex/
expand.rs

1use alloc::borrow::Cow;
2use alloc::string::{String, ToString};
3use alloc::vec::Vec;
4
5use crate::parse::{parse_decimal, parse_id, ParsedId};
6use crate::{Captures, CompileError, Error, ParseError, Regex};
7
8/// A set of options for expanding a template string using the contents
9/// of capture groups.
10#[derive(Debug)]
11pub struct Expander {
12    sub_char: char,
13    open: &'static str,
14    close: &'static str,
15    allow_undelimited_name: bool,
16}
17
18impl Default for Expander {
19    /// Returns the default expander used by [`Captures::expand`].
20    ///
21    /// [`Captures::expand`]: struct.Captures.html#expand
22    fn default() -> Self {
23        Expander {
24            sub_char: '$',
25            open: "{",
26            close: "}",
27            allow_undelimited_name: true,
28        }
29    }
30}
31
32impl Expander {
33    /// Returns an expander that uses Python-compatible syntax.
34    ///
35    /// Expands all instances of `\num` or `\g<name>` in `replacement`
36    /// to the corresponding capture group `num` or `name`, and writes
37    /// them to the `dst` buffer given.
38    ///
39    /// `name` may be an integer corresponding to the index of the
40    /// capture group (counted by order of opening parenthesis where `\0` is the
41    /// entire match) or it can be a name (consisting of letters, digits or
42    /// underscores) corresponding to a named capture group.
43    ///
44    /// `num` must be an integer corresponding to the index of the
45    /// capture group.
46    ///
47    /// If `num` or `name` isn't a valid capture group (whether the name doesn't exist
48    /// or isn't a valid index), then it is replaced with the empty string.
49    ///
50    /// The longest possible number is used. e.g., `\10` looks up capture
51    /// group 10 and not capture group 1 followed by a literal 0.
52    ///
53    /// To write a literal `\`, use `\\`.
54    pub fn python() -> Expander {
55        Expander {
56            sub_char: '\\',
57            open: "g<",
58            close: ">",
59            allow_undelimited_name: false,
60        }
61    }
62
63    /// Checks `template` for errors.  The following conditions are checked for:
64    ///
65    /// - A reference to a numbered group that does not exist in `regex`
66    /// - A reference to a numbered group (other than 0) when `regex` contains named groups
67    /// - A reference to a named group that does not occur in `regex`
68    /// - An opening group name delimiter without a closing delimiter
69    /// - Using an empty string as a group name
70    pub fn check(&self, template: &str, regex: &Regex) -> crate::Result<()> {
71        let on_group_num = |num| {
72            if num == 0 {
73                Ok(())
74            } else if !regex.named_groups.is_empty() {
75                Err(Error::CompileError(CompileError::NamedBackrefOnly))
76            } else if num < regex.captures_len() {
77                Ok(())
78            } else {
79                Err(Error::CompileError(CompileError::InvalidBackref(num)))
80            }
81        };
82        self.exec(template, |step| match step {
83            Step::Char(_) => Ok(()),
84            Step::GroupName(name) => {
85                if regex.named_groups.contains_key(name) {
86                    Ok(())
87                } else if let Ok(num) = name.parse() {
88                    on_group_num(num)
89                } else {
90                    Err(Error::CompileError(CompileError::InvalidGroupNameBackref(
91                        name.to_string(),
92                    )))
93                }
94            }
95            Step::GroupNum(num) => on_group_num(num),
96            Step::Error => Err(Error::ParseError(
97                0,
98                ParseError::GeneralParseError(
99                    "parse error in template while expanding".to_string(),
100                ),
101            )),
102        })
103    }
104
105    /// Escapes the substitution character in `text` so it appears literally
106    /// in the output of `expansion`.
107    ///
108    /// ```
109    /// assert_eq!(
110    ///     fancy_regex::Expander::default().escape("Has a literal $ sign."),
111    ///     "Has a literal $$ sign.",
112    /// );
113    /// ```
114    pub fn escape<'a>(&self, text: &'a str) -> Cow<'a, str> {
115        if text.contains(self.sub_char) {
116            let mut quoted = String::with_capacity(self.sub_char.len_utf8() * 2);
117            quoted.push(self.sub_char);
118            quoted.push(self.sub_char);
119            Cow::Owned(text.replace(self.sub_char, &quoted))
120        } else {
121            Cow::Borrowed(text)
122        }
123    }
124
125    #[doc(hidden)]
126    #[deprecated(since = "0.4.0", note = "Use `escape` instead.")]
127    pub fn quote<'a>(&self, text: &'a str) -> Cow<'a, str> {
128        self.escape(text)
129    }
130
131    /// Expands the template string `template` using the syntax defined
132    /// by this expander and the values of capture groups from `captures`.
133    pub fn expansion(&self, template: &str, captures: &Captures<'_>) -> String {
134        let mut cursor = Vec::with_capacity(template.len());
135        #[cfg(feature = "std")]
136        self.write_expansion(&mut cursor, template, captures)
137            .expect("expansion succeeded");
138        #[cfg(not(feature = "std"))]
139        self.write_expansion_vec(&mut cursor, template, captures)
140            .expect("expansion succeeded");
141        String::from_utf8(cursor).expect("expansion is UTF-8")
142    }
143
144    /// Appends the expansion produced by `expansion` to `dst`.  Potentially more efficient
145    /// than calling `expansion` directly and appending to an existing string.
146    pub fn append_expansion(&self, dst: &mut String, template: &str, captures: &Captures<'_>) {
147        let mut cursor = core::mem::take(dst).into_bytes();
148        #[cfg(feature = "std")]
149        self.write_expansion(&mut cursor, template, captures)
150            .expect("expansion succeeded");
151        #[cfg(not(feature = "std"))]
152        self.write_expansion_vec(&mut cursor, template, captures)
153            .expect("expansion succeeded");
154        *dst = String::from_utf8(cursor).expect("expansion is UTF-8");
155    }
156
157    /// Writes the expansion produced by `expansion` to `dst`.  Potentially more efficient
158    /// than calling `expansion` directly and writing the result.
159    #[cfg(feature = "std")]
160    pub fn write_expansion(
161        &self,
162        mut dst: impl std::io::Write,
163        template: &str,
164        captures: &Captures<'_>,
165    ) -> std::io::Result<()> {
166        self.exec(template, |step| match step {
167            Step::Char(c) => write!(dst, "{}", c),
168            Step::GroupName(name) => {
169                if let Some(m) = captures.name(name) {
170                    write!(dst, "{}", m.as_str())
171                } else if let Some(m) = name.parse().ok().and_then(|num| captures.get(num)) {
172                    write!(dst, "{}", m.as_str())
173                } else {
174                    Ok(())
175                }
176            }
177            Step::GroupNum(num) => {
178                if let Some(m) = captures.get(num) {
179                    write!(dst, "{}", m.as_str())
180                } else {
181                    Ok(())
182                }
183            }
184            Step::Error => Ok(()),
185        })
186    }
187
188    /// Writes the expansion produced by `expansion` to `dst`.  Potentially more efficient
189    /// than calling `expansion` directly and writing the result.
190    pub fn write_expansion_vec(
191        &self,
192        dst: &mut Vec<u8>,
193        template: &str,
194        captures: &Captures<'_>,
195    ) -> core::fmt::Result {
196        self.exec(template, |step| match step {
197            Step::Char(c) => Ok(dst.extend(c.to_string().as_bytes())),
198            Step::GroupName(name) => {
199                if let Some(m) = captures.name(name) {
200                    Ok(dst.extend(m.as_str().as_bytes()))
201                } else if let Some(m) = name.parse().ok().and_then(|num| captures.get(num)) {
202                    Ok(dst.extend(m.as_str().as_bytes()))
203                } else {
204                    Ok(())
205                }
206            }
207            Step::GroupNum(num) => {
208                if let Some(m) = captures.get(num) {
209                    Ok(dst.extend(m.as_str().as_bytes()))
210                } else {
211                    Ok(())
212                }
213            }
214            Step::Error => Ok(()),
215        })
216    }
217
218    fn exec<'t, E>(
219        &self,
220        template: &'t str,
221        mut f: impl FnMut(Step<'t>) -> Result<(), E>,
222    ) -> Result<(), E> {
223        debug_assert!(!self.open.is_empty());
224        debug_assert!(!self.close.is_empty());
225        let mut iter = template.chars();
226        while let Some(c) = iter.next() {
227            if c == self.sub_char {
228                let tail = iter.as_str();
229                let skip = if tail.starts_with(self.sub_char) {
230                    f(Step::Char(self.sub_char))?;
231                    1
232                } else if let Some(ParsedId {
233                    id,
234                    relative: None,
235                    skip,
236                }) = parse_id(tail, self.open, self.close, false).or_else(|| {
237                    if self.allow_undelimited_name {
238                        parse_id(tail, "", "", false)
239                    } else {
240                        None
241                    }
242                }) {
243                    f(Step::GroupName(id))?;
244                    skip
245                } else if let Some((skip, num)) = parse_decimal(tail, 0) {
246                    f(Step::GroupNum(num))?;
247                    skip
248                } else {
249                    f(Step::Error)?;
250                    f(Step::Char(self.sub_char))?;
251                    0
252                };
253                iter = iter.as_str()[skip..].chars();
254            } else {
255                f(Step::Char(c))?;
256            }
257        }
258        Ok(())
259    }
260}
261
262enum Step<'a> {
263    Char(char),
264    GroupName(&'a str),
265    GroupNum(usize),
266    Error,
267}