fancy_regex/
expand.rs

1use alloc::borrow::Cow;
2use alloc::boxed::Box;
3use alloc::string::{String, ToString};
4use alloc::vec::Vec;
5
6use crate::parse::{parse_decimal, parse_id, ParsedId};
7use crate::{Captures, CompileError, Error, ParseError, Regex};
8
9/// A set of options for expanding a template string using the contents
10/// of capture groups.
11#[derive(Debug)]
12pub struct Expander {
13    sub_char: char,
14    open: &'static str,
15    close: &'static str,
16    allow_undelimited_name: bool,
17}
18
19impl Default for Expander {
20    /// Returns the default expander used by [`Captures::expand`].
21    ///
22    /// [`Captures::expand`]: struct.Captures.html#expand
23    fn default() -> Self {
24        Expander {
25            sub_char: '$',
26            open: "{",
27            close: "}",
28            allow_undelimited_name: true,
29        }
30    }
31}
32
33impl Expander {
34    /// Returns an expander that uses Python-compatible syntax.
35    ///
36    /// Expands all instances of `\num` or `\g<name>` in `replacement`
37    /// to the corresponding capture group `num` or `name`, and writes
38    /// them to the `dst` buffer given.
39    ///
40    /// `name` may be an integer corresponding to the index of the
41    /// capture group (counted by order of opening parenthesis where `\0` is the
42    /// entire match) or it can be a name (consisting of letters, digits or
43    /// underscores) corresponding to a named capture group.
44    ///
45    /// `num` must be an integer corresponding to the index of the
46    /// capture group.
47    ///
48    /// If `num` or `name` isn't a valid capture group (whether the name doesn't exist
49    /// or isn't a valid index), then it is replaced with the empty string.
50    ///
51    /// The longest possible number is used. e.g., `\10` looks up capture
52    /// group 10 and not capture group 1 followed by a literal 0.
53    ///
54    /// To write a literal `\`, use `\\`.
55    pub fn python() -> Expander {
56        Expander {
57            sub_char: '\\',
58            open: "g<",
59            close: ">",
60            allow_undelimited_name: false,
61        }
62    }
63
64    /// Checks `template` for errors.  The following conditions are checked for:
65    ///
66    /// - A reference to a numbered group that does not exist in `regex`
67    /// - A reference to a numbered group (other than 0) when `regex` contains named groups
68    /// - A reference to a named group that does not occur in `regex`
69    /// - An opening group name delimiter without a closing delimiter
70    /// - Using an empty string as a group name
71    pub fn check(&self, template: &str, regex: &Regex) -> crate::Result<()> {
72        let on_group_num = |num| {
73            if num == 0 {
74                Ok(())
75            } else if !regex.named_groups.is_empty() {
76                Err(Error::CompileError(Box::new(
77                    CompileError::NamedBackrefOnly,
78                )))
79            } else if num < regex.captures_len() {
80                Ok(())
81            } else {
82                Err(Error::CompileError(Box::new(CompileError::InvalidBackref(
83                    num,
84                ))))
85            }
86        };
87        self.exec(template, |step| match step {
88            Step::Char(_) => Ok(()),
89            Step::GroupName(name) => {
90                if regex.named_groups.contains_key(name) {
91                    Ok(())
92                } else if let Ok(num) = name.parse() {
93                    on_group_num(num)
94                } else {
95                    Err(Error::CompileError(Box::new(
96                        CompileError::InvalidGroupNameBackref(name.to_string()),
97                    )))
98                }
99            }
100            Step::GroupNum(num) => on_group_num(num),
101            Step::Error => Err(Error::ParseError(
102                0,
103                ParseError::GeneralParseError(
104                    "parse error in template while expanding".to_string(),
105                ),
106            )),
107        })
108    }
109
110    /// Escapes the substitution character in `text` so it appears literally
111    /// in the output of `expansion`.
112    ///
113    /// ```
114    /// assert_eq!(
115    ///     fancy_regex::Expander::default().escape("Has a literal $ sign."),
116    ///     "Has a literal $$ sign.",
117    /// );
118    /// ```
119    pub fn escape<'a>(&self, text: &'a str) -> Cow<'a, str> {
120        if text.contains(self.sub_char) {
121            let mut quoted = String::with_capacity(self.sub_char.len_utf8() * 2);
122            quoted.push(self.sub_char);
123            quoted.push(self.sub_char);
124            Cow::Owned(text.replace(self.sub_char, &quoted))
125        } else {
126            Cow::Borrowed(text)
127        }
128    }
129
130    #[doc(hidden)]
131    #[deprecated(since = "0.4.0", note = "Use `escape` instead.")]
132    pub fn quote<'a>(&self, text: &'a str) -> Cow<'a, str> {
133        self.escape(text)
134    }
135
136    /// Expands the template string `template` using the syntax defined
137    /// by this expander and the values of capture groups from `captures`.
138    pub fn expansion(&self, template: &str, captures: &Captures<'_>) -> String {
139        let mut cursor = Vec::with_capacity(template.len());
140        #[cfg(feature = "std")]
141        self.write_expansion(&mut cursor, template, captures)
142            .expect("expansion succeeded");
143        #[cfg(not(feature = "std"))]
144        self.write_expansion_vec(&mut cursor, template, captures)
145            .expect("expansion succeeded");
146        String::from_utf8(cursor).expect("expansion is UTF-8")
147    }
148
149    /// Appends the expansion produced by `expansion` to `dst`.  Potentially more efficient
150    /// than calling `expansion` directly and appending to an existing string.
151    pub fn append_expansion(&self, dst: &mut String, template: &str, captures: &Captures<'_>) {
152        let mut cursor = core::mem::take(dst).into_bytes();
153        #[cfg(feature = "std")]
154        self.write_expansion(&mut cursor, template, captures)
155            .expect("expansion succeeded");
156        #[cfg(not(feature = "std"))]
157        self.write_expansion_vec(&mut cursor, template, captures)
158            .expect("expansion succeeded");
159        *dst = String::from_utf8(cursor).expect("expansion is UTF-8");
160    }
161
162    /// Writes the expansion produced by `expansion` to `dst`.  Potentially more efficient
163    /// than calling `expansion` directly and writing the result.
164    #[cfg(feature = "std")]
165    pub fn write_expansion(
166        &self,
167        mut dst: impl std::io::Write,
168        template: &str,
169        captures: &Captures<'_>,
170    ) -> std::io::Result<()> {
171        self.exec(template, |step| match step {
172            Step::Char(c) => write!(dst, "{}", c),
173            Step::GroupName(name) => {
174                if let Some(m) = captures.name(name) {
175                    write!(dst, "{}", m.as_str())
176                } else if let Some(m) = name.parse().ok().and_then(|num| captures.get(num)) {
177                    write!(dst, "{}", m.as_str())
178                } else {
179                    Ok(())
180                }
181            }
182            Step::GroupNum(num) => {
183                if let Some(m) = captures.get(num) {
184                    write!(dst, "{}", m.as_str())
185                } else {
186                    Ok(())
187                }
188            }
189            Step::Error => Ok(()),
190        })
191    }
192
193    /// Writes the expansion produced by `expansion` to `dst`.  Potentially more efficient
194    /// than calling `expansion` directly and writing the result.
195    pub fn write_expansion_vec(
196        &self,
197        dst: &mut Vec<u8>,
198        template: &str,
199        captures: &Captures<'_>,
200    ) -> core::fmt::Result {
201        self.exec(template, |step| match step {
202            Step::Char(c) => {
203                dst.extend(c.to_string().as_bytes());
204                Ok(())
205            }
206            Step::GroupName(name) => {
207                if let Some(m) = captures.name(name) {
208                    dst.extend(m.as_str().as_bytes());
209                } else if let Some(m) = name.parse().ok().and_then(|num| captures.get(num)) {
210                    dst.extend(m.as_str().as_bytes());
211                }
212                Ok(())
213            }
214            Step::GroupNum(num) => {
215                if let Some(m) = captures.get(num) {
216                    dst.extend(m.as_str().as_bytes());
217                }
218                Ok(())
219            }
220            Step::Error => Ok(()),
221        })
222    }
223
224    fn exec<'t, E>(
225        &self,
226        template: &'t str,
227        mut f: impl FnMut(Step<'t>) -> Result<(), E>,
228    ) -> Result<(), E> {
229        debug_assert!(!self.open.is_empty());
230        debug_assert!(!self.close.is_empty());
231        let mut iter = template.chars();
232        while let Some(c) = iter.next() {
233            if c == self.sub_char {
234                let tail = iter.as_str();
235                let skip = if tail.starts_with(self.sub_char) {
236                    f(Step::Char(self.sub_char))?;
237                    1
238                } else if let Some(ParsedId {
239                    id,
240                    relative: None,
241                    skip,
242                }) = parse_id(tail, self.open, self.close, false).or_else(|| {
243                    if self.allow_undelimited_name {
244                        parse_id(tail, "", "", false)
245                    } else {
246                        None
247                    }
248                }) {
249                    f(Step::GroupName(id))?;
250                    skip
251                } else if let Some((skip, num)) = parse_decimal(tail, 0) {
252                    f(Step::GroupNum(num))?;
253                    skip
254                } else {
255                    f(Step::Error)?;
256                    f(Step::Char(self.sub_char))?;
257                    0
258                };
259                iter = iter.as_str()[skip..].chars();
260            } else {
261                f(Step::Char(c))?;
262            }
263        }
264        Ok(())
265    }
266}
267
268enum Step<'a> {
269    Char(char),
270    GroupName(&'a str),
271    GroupNum(usize),
272    Error,
273}