grep_matcher/
interpolate.rs

1use memchr::memchr;
2
3/// Interpolate capture references in `replacement` and write the interpolation
4/// result to `dst`. References in `replacement` take the form of $N or $name,
5/// where `N` is a capture group index and `name` is a capture group name. The
6/// function provided, `name_to_index`, maps capture group names to indices.
7///
8/// The `append` function given is responsible for writing the replacement
9/// to the `dst` buffer. That is, it is called with the capture group index
10/// of a capture group reference and is expected to resolve the index to its
11/// corresponding matched text. If no such match exists, then `append` should
12/// not write anything to its given buffer.
13#[inline]
14pub fn interpolate<A, N>(
15    mut replacement: &[u8],
16    mut append: A,
17    mut name_to_index: N,
18    dst: &mut Vec<u8>,
19) where
20    A: FnMut(usize, &mut Vec<u8>),
21    N: FnMut(&str) -> Option<usize>,
22{
23    while !replacement.is_empty() {
24        match memchr(b'$', replacement) {
25            None => break,
26            Some(i) => {
27                dst.extend(&replacement[..i]);
28                replacement = &replacement[i..];
29            }
30        }
31        if replacement.get(1).map_or(false, |&b| b == b'$') {
32            dst.push(b'$');
33            replacement = &replacement[2..];
34            continue;
35        }
36        debug_assert!(!replacement.is_empty());
37        let cap_ref = match find_cap_ref(replacement) {
38            Some(cap_ref) => cap_ref,
39            None => {
40                dst.push(b'$');
41                replacement = &replacement[1..];
42                continue;
43            }
44        };
45        replacement = &replacement[cap_ref.end..];
46        match cap_ref.cap {
47            Ref::Number(i) => append(i, dst),
48            Ref::Named(name) => {
49                if let Some(i) = name_to_index(name) {
50                    append(i, dst);
51                }
52            }
53        }
54    }
55    dst.extend(replacement);
56}
57
58/// `CaptureRef` represents a reference to a capture group inside some text.
59/// The reference is either a capture group name or a number.
60///
61/// It is also tagged with the position in the text immediately proceeding the
62/// capture reference.
63#[derive(Clone, Copy, Debug, Eq, PartialEq)]
64struct CaptureRef<'a> {
65    cap: Ref<'a>,
66    end: usize,
67}
68
69/// A reference to a capture group in some text.
70///
71/// e.g., `$2`, `$foo`, `${foo}`.
72#[derive(Clone, Copy, Debug, Eq, PartialEq)]
73enum Ref<'a> {
74    Named(&'a str),
75    Number(usize),
76}
77
78impl<'a> From<&'a str> for Ref<'a> {
79    #[inline]
80    fn from(x: &'a str) -> Ref<'a> {
81        Ref::Named(x)
82    }
83}
84
85impl From<usize> for Ref<'static> {
86    #[inline]
87    fn from(x: usize) -> Ref<'static> {
88        Ref::Number(x)
89    }
90}
91
92/// Parses a possible reference to a capture group name in the given text,
93/// starting at the beginning of `replacement`.
94///
95/// If no such valid reference could be found, None is returned.
96#[inline]
97fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef<'_>> {
98    let mut i = 0;
99    if replacement.len() <= 1 || replacement[0] != b'$' {
100        return None;
101    }
102    let mut brace = false;
103    i += 1;
104    if replacement[i] == b'{' {
105        brace = true;
106        i += 1;
107    }
108    let mut cap_end = i;
109    while replacement.get(cap_end).map_or(false, is_valid_cap_letter) {
110        cap_end += 1;
111    }
112    if cap_end == i {
113        return None;
114    }
115    // We just verified that the range 0..cap_end is valid ASCII, so it must
116    // therefore be valid UTF-8. If we really cared, we could avoid this UTF-8
117    // check with an unchecked conversion or by parsing the number straight
118    // from &[u8].
119    let cap = std::str::from_utf8(&replacement[i..cap_end])
120        .expect("valid UTF-8 capture name");
121    if brace {
122        if !replacement.get(cap_end).map_or(false, |&b| b == b'}') {
123            return None;
124        }
125        cap_end += 1;
126    }
127    Some(CaptureRef {
128        cap: match cap.parse::<u32>() {
129            Ok(i) => Ref::Number(i as usize),
130            Err(_) => Ref::Named(cap),
131        },
132        end: cap_end,
133    })
134}
135
136/// Returns true if and only if the given byte is allowed in a capture name.
137#[inline]
138fn is_valid_cap_letter(b: &u8) -> bool {
139    match *b {
140        b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true,
141        _ => false,
142    }
143}
144
145#[cfg(test)]
146mod tests {
147    use super::{CaptureRef, find_cap_ref, interpolate};
148
149    macro_rules! find {
150        ($name:ident, $text:expr) => {
151            #[test]
152            fn $name() {
153                assert_eq!(None, find_cap_ref($text.as_bytes()));
154            }
155        };
156        ($name:ident, $text:expr, $capref:expr) => {
157            #[test]
158            fn $name() {
159                assert_eq!(Some($capref), find_cap_ref($text.as_bytes()));
160            }
161        };
162    }
163
164    macro_rules! c {
165        ($name_or_number:expr, $pos:expr) => {
166            CaptureRef { cap: $name_or_number.into(), end: $pos }
167        };
168    }
169
170    find!(find_cap_ref1, "$foo", c!("foo", 4));
171    find!(find_cap_ref2, "${foo}", c!("foo", 6));
172    find!(find_cap_ref3, "$0", c!(0, 2));
173    find!(find_cap_ref4, "$5", c!(5, 2));
174    find!(find_cap_ref5, "$10", c!(10, 3));
175    find!(find_cap_ref6, "$42a", c!("42a", 4));
176    find!(find_cap_ref7, "${42}a", c!(42, 5));
177    find!(find_cap_ref8, "${42");
178    find!(find_cap_ref9, "${42 ");
179    find!(find_cap_ref10, " $0 ");
180    find!(find_cap_ref11, "$");
181    find!(find_cap_ref12, " ");
182    find!(find_cap_ref13, "");
183
184    // A convenience routine for using interpolate's unwieldy but flexible API.
185    fn interpolate_string(
186        mut name_to_index: Vec<(&'static str, usize)>,
187        caps: Vec<&'static str>,
188        replacement: &str,
189    ) -> String {
190        name_to_index.sort_by_key(|x| x.0);
191
192        let mut dst = vec![];
193        interpolate(
194            replacement.as_bytes(),
195            |i, dst| {
196                if let Some(&s) = caps.get(i) {
197                    dst.extend(s.as_bytes());
198                }
199            },
200            |name| -> Option<usize> {
201                name_to_index
202                    .binary_search_by_key(&name, |x| x.0)
203                    .ok()
204                    .map(|i| name_to_index[i].1)
205            },
206            &mut dst,
207        );
208        String::from_utf8(dst).unwrap()
209    }
210
211    macro_rules! interp {
212        ($name:ident, $map:expr, $caps:expr, $hay:expr, $expected:expr $(,)*) => {
213            #[test]
214            fn $name() {
215                assert_eq!($expected, interpolate_string($map, $caps, $hay));
216            }
217        };
218    }
219
220    interp!(
221        interp1,
222        vec![("foo", 2)],
223        vec!["", "", "xxx"],
224        "test $foo test",
225        "test xxx test",
226    );
227
228    interp!(
229        interp2,
230        vec![("foo", 2)],
231        vec!["", "", "xxx"],
232        "test$footest",
233        "test",
234    );
235
236    interp!(
237        interp3,
238        vec![("foo", 2)],
239        vec!["", "", "xxx"],
240        "test${foo}test",
241        "testxxxtest",
242    );
243
244    interp!(
245        interp4,
246        vec![("foo", 2)],
247        vec!["", "", "xxx"],
248        "test$2test",
249        "test",
250    );
251
252    interp!(
253        interp5,
254        vec![("foo", 2)],
255        vec!["", "", "xxx"],
256        "test${2}test",
257        "testxxxtest",
258    );
259
260    interp!(
261        interp6,
262        vec![("foo", 2)],
263        vec!["", "", "xxx"],
264        "test $$foo test",
265        "test $foo test",
266    );
267
268    interp!(
269        interp7,
270        vec![("foo", 2)],
271        vec!["", "", "xxx"],
272        "test $foo",
273        "test xxx",
274    );
275
276    interp!(
277        interp8,
278        vec![("foo", 2)],
279        vec!["", "", "xxx"],
280        "$foo test",
281        "xxx test",
282    );
283
284    interp!(
285        interp9,
286        vec![("bar", 1), ("foo", 2)],
287        vec!["", "yyy", "xxx"],
288        "test $bar$foo",
289        "test yyyxxx",
290    );
291
292    interp!(
293        interp10,
294        vec![("bar", 1), ("foo", 2)],
295        vec!["", "yyy", "xxx"],
296        "test $ test",
297        "test $ test",
298    );
299
300    interp!(
301        interp11,
302        vec![("bar", 1), ("foo", 2)],
303        vec!["", "yyy", "xxx"],
304        "test ${} test",
305        "test ${} test",
306    );
307
308    interp!(
309        interp12,
310        vec![("bar", 1), ("foo", 2)],
311        vec!["", "yyy", "xxx"],
312        "test ${ } test",
313        "test ${ } test",
314    );
315
316    interp!(
317        interp13,
318        vec![("bar", 1), ("foo", 2)],
319        vec!["", "yyy", "xxx"],
320        "test ${a b} test",
321        "test ${a b} test",
322    );
323
324    interp!(
325        interp14,
326        vec![("bar", 1), ("foo", 2)],
327        vec!["", "yyy", "xxx"],
328        "test ${a} test",
329        "test  test",
330    );
331}