cucumber_expressions/expand/
parameters.rs

1// Copyright (c) 2021-2025  Brendan Molloy <brendan@bbqsrc.net>,
2//                          Ilya Solovyiov <ilya.solovyiov@gmail.com>,
3//                          Kai Ren <tyranron@gmail.com>
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! Support for [custom][1] [`Parameter`]s.
12//!
13//! [1]: https://github.com/cucumber/cucumber-expressions#custom-parameter-types
14
15use std::{collections::HashMap, fmt::Display, iter, str, vec};
16
17use either::Either;
18use nom::{AsChar, Input};
19
20use crate::{expand::OwnedChars, Parameter, SingleExpression};
21
22use super::{
23    Expression, IntoRegexCharIter, ParameterError, ParameterIter,
24    SingleExpressionIter,
25};
26
27/// Parser of a [Cucumber Expressions][0] [AST] `Element` with [custom][1]
28/// `Parameters` in mind.
29///
30/// Usually, a [`Parameter`] is represented by a single [`Regex`] capturing
31/// group. In case there are multiple capturing groups, they will be named like
32/// `__{parameter_id}_{group_id}`. This is done to identify multiple capturing
33/// groups related to a single [`Parameter`].
34///
35/// [`Regex`]: regex::Regex
36/// [0]: https://github.com/cucumber/cucumber-expressions#readme
37/// [1]: https://github.com/cucumber/cucumber-expressions#custom-parameter-types
38/// [AST]: https://en.wikipedia.org/wiki/Abstract_syntax_tree
39#[derive(Clone, Copy, Debug)]
40pub struct WithCustom<Element, Parameters> {
41    /// Parsed element of a [Cucumber Expressions][0] [AST].
42    ///
43    /// [0]: https://github.com/cucumber/cucumber-expressions#readme
44    /// [AST]: https://en.wikipedia.org/wiki/Abstract_syntax_tree
45    pub element: Element,
46
47    /// Custom `Parameters` (in addition to [default ones][1]) to be used for
48    /// expanding the `Element` into a [`Regex`].
49    ///
50    /// [`Regex`]: regex::Regex
51    /// [1]: https://github.com/cucumber/cucumber-expressions#parameter-types
52    pub parameters: Parameters,
53}
54
55/// Provider of custom [`Parameter`]s.
56pub trait Provider<I> {
57    /// `<`[`Value`]` as `[`Input`]`>::`[`Item`].
58    ///
59    /// [`Item`]: Input::Item
60    /// [`Value`]: Self::Value
61    type Item: AsChar;
62
63    /// Value matcher to be used in a [`Regex`].
64    ///
65    /// Usually, a [`Parameter`] is represented by a single [`Regex`] capturing
66    /// group. In case there are multiple capturing groups, they will be named
67    /// like `__{parameter_id}_{group_id}`. This is done to identify multiple
68    /// capturing groups related to a single [`Parameter`].
69    ///
70    /// [`Regex`]: regex::Regex
71    type Value: Input<Item = Self::Item>;
72
73    /// Returns a [`Value`] matcher corresponding to the given `input`, if any.
74    ///
75    /// [`Value`]: Self::Value
76    fn get(&self, input: &I) -> Option<Self::Value>;
77}
78
79impl<'p, I, Key, Value, S> Provider<I> for &'p HashMap<Key, Value, S>
80where
81    I: Input,
82    <I as Input>::Item: AsChar,
83    Key: AsRef<str>,
84    Value: AsRef<str>,
85{
86    type Item = char;
87    type Value = &'p str;
88
89    fn get(&self, input: &I) -> Option<Self::Value> {
90        self.iter().find_map(|(k, v)| {
91            k.as_ref()
92                .chars()
93                .eq(input.iter_elements().map(AsChar::as_char))
94                .then(|| v.as_ref())
95        })
96    }
97}
98
99impl<I, Pars> IntoRegexCharIter<I> for WithCustom<Expression<I>, Pars>
100where
101    I: Clone + Display + Input,
102    <I as Input>::Item: AsChar,
103    Pars: Clone + Provider<I>,
104    <Pars as Provider<I>>::Value: Input,
105{
106    type Iter = ExpressionWithParsIter<I, Pars>;
107
108    fn into_regex_char_iter(self) -> Self::Iter {
109        let add_pars: fn(_) -> _ = |(item, parameters)| WithCustom {
110            element: item,
111            parameters,
112        };
113        let into_regex_char_iter: fn(_) -> _ =
114            IntoRegexCharIter::into_regex_char_iter;
115        iter::once(Ok('^'))
116            .chain(
117                self.element
118                    .0
119                    .into_iter()
120                    .zip(iter::repeat(self.parameters))
121                    .map(add_pars)
122                    .flat_map(into_regex_char_iter),
123            )
124            .chain(iter::once(Ok('$')))
125    }
126}
127
128// TODO: Replace with TAIT, once stabilized:
129//       https://github.com/rust-lang/rust/issues/63063
130/// [`IntoRegexCharIter::Iter`] for [`WithCustom`]`<`[`Expression`]`>`.
131type ExpressionWithParsIter<I, P> = iter::Chain<
132    iter::Chain<
133        iter::Once<Result<char, ParameterError<I>>>,
134        iter::FlatMap<
135            iter::Map<
136                iter::Zip<vec::IntoIter<SingleExpression<I>>, iter::Repeat<P>>,
137                fn(
138                    (SingleExpression<I>, P),
139                ) -> WithCustom<SingleExpression<I>, P>,
140            >,
141            SingleExprWithParsIter<I, P>,
142            fn(
143                WithCustom<SingleExpression<I>, P>,
144            ) -> SingleExprWithParsIter<I, P>,
145        >,
146    >,
147    iter::Once<Result<char, ParameterError<I>>>,
148>;
149
150impl<I, Pars> IntoRegexCharIter<I> for WithCustom<SingleExpression<I>, Pars>
151where
152    I: Clone + Display + Input,
153    <I as Input>::Item: AsChar,
154    Pars: Provider<I>,
155    <Pars as Provider<I>>::Value: Input,
156{
157    type Iter = SingleExprWithParsIter<I, Pars>;
158
159    fn into_regex_char_iter(self) -> Self::Iter {
160        use Either::{Left, Right};
161
162        if let SingleExpression::Parameter(item) = self.element {
163            Left(
164                WithCustom {
165                    element: item,
166                    parameters: self.parameters,
167                }
168                .into_regex_char_iter(),
169            )
170        } else {
171            Right(self.element.into_regex_char_iter())
172        }
173    }
174}
175
176// TODO: Replace with TAIT, once stabilized:
177//       https://github.com/rust-lang/rust/issues/63063
178/// [`IntoRegexCharIter::Iter`] for
179/// [`WithCustom`]`<`[`SingleExpression`]`>`.
180type SingleExprWithParsIter<I, P> = Either<
181    <WithCustom<Parameter<I>, P> as IntoRegexCharIter<I>>::Iter,
182    SingleExpressionIter<I>,
183>;
184
185impl<I, P> IntoRegexCharIter<I> for WithCustom<Parameter<I>, P>
186where
187    I: Clone + Display + Input,
188    <I as Input>::Item: AsChar,
189    P: Provider<I>,
190    <P as Provider<I>>::Value: Input,
191{
192    type Iter = WithParsIter<I, P>;
193
194    fn into_regex_char_iter(self) -> Self::Iter {
195        use Either::{Left, Right};
196
197        let id = self.element.id;
198
199        match self.parameters.get(&self.element) {
200            None => Right(Left(self.element.into_regex_char_iter())),
201            Some(v) => {
202                // We try to find '(' inside regex. If unsuccessfully, we can be
203                // sure that the regex has no groups, so we can skip parsing.
204                let parsed = v
205                    .iter_elements()
206                    .any(|c| c.as_char() == '(')
207                    .then(|| {
208                        let re = v
209                            .iter_elements()
210                            .map(AsChar::as_char)
211                            .collect::<String>();
212                        let hir = regex_syntax::Parser::new()
213                            .parse(&re)
214                            .map_err(|err| (self.element.input, re, err))?;
215                        Ok(regex_hir::has_capture_groups(&hir).then_some(hir))
216                    })
217                    .transpose();
218                let parsed = match parsed {
219                    Ok(hir) => hir.flatten(),
220                    Err((parameter, re, err)) => {
221                        return Left(iter::once(Err(
222                            ParameterError::RenameRegexGroup {
223                                parameter,
224                                re,
225                                err: Box::new(err),
226                            },
227                        )));
228                    }
229                };
230
231                parsed.map_or_else(
232                    || {
233                        let ok: fn(_) -> _ =
234                            |c: <P::Value as Input>::Item| Ok(c.as_char());
235                        Right(Right(Right(
236                            iter::once(Ok('('))
237                                .chain(v.iter_elements().map(ok))
238                                .chain(iter::once(Ok(')'))),
239                        )))
240                    },
241                    |cur_hir| {
242                        let ok: fn(_) -> _ = Ok;
243                        let new_hir =
244                            regex_hir::rename_capture_groups(cur_hir, id);
245                        Right(Right(Left(
246                            "(?:"
247                                .chars()
248                                .map(ok)
249                                .chain(
250                                    OwnedChars::new(new_hir.to_string())
251                                        .map(ok),
252                                )
253                                .chain(iter::once(Ok(')'))),
254                        )))
255                    },
256                )
257            }
258        }
259    }
260}
261
262// TODO: Replace with TAIT, once stabilized:
263//       https://github.com/rust-lang/rust/issues/63063
264/// [`IntoRegexCharIter::Iter`] for [`WithCustom`]`<`[`Parameter`]`>`.
265type WithParsIter<I, P> = Either<
266    iter::Once<Result<char, ParameterError<I>>>,
267    Either<
268        ParameterIter<I>,
269        Either<
270            iter::Chain<
271                iter::Chain<
272                    iter::Map<
273                        str::Chars<'static>,
274                        fn(char) -> Result<char, ParameterError<I>>,
275                    >,
276                    iter::Map<
277                        OwnedChars,
278                        fn(char) -> Result<char, ParameterError<I>>,
279                    >,
280                >,
281                iter::Once<Result<char, ParameterError<I>>>,
282            >,
283            iter::Chain<
284                iter::Chain<
285                    iter::Once<Result<char, ParameterError<I>>>,
286                    iter::Map<
287                        <<P as Provider<I>>::Value as Input>::Iter,
288                        fn(
289                            <<P as Provider<I>>::Value as Input>::Item,
290                        )
291                            -> Result<char, ParameterError<I>>,
292                    >,
293                >,
294                iter::Once<Result<char, ParameterError<I>>>,
295            >,
296        >,
297    >,
298>;
299
300/// Helpers to work with [`Regex`]es [`Hir`].
301///
302/// [`Hir`]: regex_syntax::hir::Hir
303/// [`Regex`]: regex::Regex
304mod regex_hir {
305    use std::mem;
306
307    use regex_syntax::hir::{Hir, HirKind};
308
309    /// Checks whether the given [`Regex`] [`Hir`] contains any capturing
310    /// groups.
311    ///
312    /// [`Regex`]: regex::Regex
313    pub(super) fn has_capture_groups(hir: &Hir) -> bool {
314        match hir.kind() {
315            HirKind::Empty
316            | HirKind::Literal(_)
317            | HirKind::Class(_)
318            | HirKind::Look(_)
319            | HirKind::Repetition(_) => false,
320            HirKind::Capture(_) => true,
321            HirKind::Concat(inner) | HirKind::Alternation(inner) => {
322                inner.iter().any(has_capture_groups)
323            }
324        }
325    }
326
327    /// Renames capturing groups in the given [`Hir`] via
328    /// `__{parameter_id}_{group_id}` naming scheme.
329    pub(super) fn rename_capture_groups(hir: Hir, parameter_id: usize) -> Hir {
330        rename_groups_inner(hir, parameter_id, &mut 0)
331    }
332
333    /// Renames capturing groups in the given [`Hir`] via
334    /// `__{parameter_id}_{group_id}` naming scheme, using the provided
335    /// `group_id_indexer`.
336    fn rename_groups_inner(
337        hir: Hir,
338        parameter_id: usize,
339        group_id_indexer: &mut usize,
340    ) -> Hir {
341        match hir.into_kind() {
342            HirKind::Empty => Hir::empty(),
343            HirKind::Literal(lit) => Hir::literal(lit.0),
344            HirKind::Class(cl) => Hir::class(cl),
345            HirKind::Look(l) => Hir::look(l),
346            HirKind::Repetition(rep) => Hir::repetition(rep),
347            HirKind::Capture(mut capture) => {
348                capture.name =
349                    Some(format!("__{parameter_id}_{group_id_indexer}").into());
350                *group_id_indexer += 1;
351
352                let inner_hir =
353                    mem::replace(capture.sub.as_mut(), Hir::empty());
354                drop(mem::replace(
355                    capture.sub.as_mut(),
356                    rename_groups_inner(
357                        inner_hir,
358                        parameter_id,
359                        group_id_indexer,
360                    ),
361                ));
362
363                Hir::capture(capture)
364            }
365            HirKind::Concat(concat) => Hir::concat(
366                concat
367                    .into_iter()
368                    .map(|h| {
369                        rename_groups_inner(h, parameter_id, group_id_indexer)
370                    })
371                    .collect(),
372            ),
373            HirKind::Alternation(alt) => Hir::alternation(
374                alt.into_iter()
375                    .map(|h| {
376                        rename_groups_inner(h, parameter_id, group_id_indexer)
377                    })
378                    .collect(),
379            ),
380        }
381    }
382}
383
384#[cfg(test)]
385mod spec {
386    use crate::expand::Error;
387
388    use super::{Expression, HashMap, ParameterError};
389
390    #[test]
391    fn custom_parameter() {
392        let pars = HashMap::from([("custom", "custom")]);
393        let expr = Expression::regex_with_parameters("{custom}", &pars)
394            .unwrap_or_else(|e| panic!("failed: {e}"));
395
396        assert_eq!(expr.as_str(), "^(custom)$");
397    }
398
399    #[test]
400    fn custom_parameter_with_groups() {
401        let pars = HashMap::from([("custom", "\"(custom)\"|'(custom)'")]);
402        let expr =
403            Expression::regex_with_parameters("{custom} {custom}", &pars)
404                .unwrap_or_else(|e| panic!("failed: {e}"));
405
406        assert_eq!(
407            expr.as_str(),
408            "^(?:(?:(?:\"(?P<__0_0>(?:custom))\")\
409                    |(?:'(?P<__0_1>(?:custom))'))) \
410              (?:(?:(?:\"(?P<__1_0>(?:custom))\")\
411                    |(?:'(?P<__1_1>(?:custom))')))$",
412        );
413    }
414
415    #[test]
416    fn default_parameter() {
417        let pars = HashMap::from([("custom", "custom")]);
418        let expr = Expression::regex_with_parameters("{}", &pars)
419            .unwrap_or_else(|e| panic!("failed: {e}"));
420
421        assert_eq!(expr.as_str(), "^(.*)$");
422    }
423
424    #[test]
425    fn unknown_parameter() {
426        let pars = HashMap::<String, String>::new();
427
428        match Expression::regex_with_parameters("{custom}", &pars).unwrap_err()
429        {
430            Error::Expansion(ParameterError::NotFound(not_found)) => {
431                assert_eq!(*not_found, "custom");
432            }
433            e @ (Error::Regex(_) | Error::Parsing(_) | Error::Expansion(_)) => {
434                panic!("wrong err: {e}")
435            }
436        }
437    }
438}