cucumber_expressions/expand/
parameters.rs

1// Copyright (c) 2021-2025  Brendan Molloy <brendan@bbqsrc.net>,
2//                          Ilya Solovyiov <ilya.solovyiov@gmail.com>,
3//                          Kai Ren <tyranron@gmail.com>
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! Support for [custom][1] [`Parameter`]s.
12//!
13//! [1]: https://github.com/cucumber/cucumber-expressions#custom-parameter-types
14
15use std::{collections::HashMap, fmt::Display, iter, str, vec};
16
17use either::Either;
18use nom::{AsChar, Input};
19
20use super::{
21    Expression, IntoRegexCharIter, ParameterError, ParameterIter,
22    SingleExpressionIter,
23};
24use crate::{Parameter, SingleExpression, expand::OwnedChars};
25
26/// Parser of a [Cucumber Expressions][0] [AST] `Element` with [custom][1]
27/// `Parameters` in mind.
28///
29/// Usually, a [`Parameter`] is represented by a single [`Regex`] capturing
30/// group. In case there are multiple capturing groups, they will be named like
31/// `__{parameter_id}_{group_id}`. This is done to identify multiple capturing
32/// groups related to a single [`Parameter`].
33///
34/// [`Regex`]: regex::Regex
35/// [0]: https://github.com/cucumber/cucumber-expressions#readme
36/// [1]: https://github.com/cucumber/cucumber-expressions#custom-parameter-types
37/// [AST]: https://en.wikipedia.org/wiki/Abstract_syntax_tree
38#[derive(Clone, Copy, Debug)]
39pub struct WithCustom<Element, Parameters> {
40    /// Parsed element of a [Cucumber Expressions][0] [AST].
41    ///
42    /// [0]: https://github.com/cucumber/cucumber-expressions#readme
43    /// [AST]: https://en.wikipedia.org/wiki/Abstract_syntax_tree
44    pub element: Element,
45
46    /// Custom `Parameters` (in addition to [default ones][1]) to be used for
47    /// expanding the `Element` into a [`Regex`].
48    ///
49    /// [`Regex`]: regex::Regex
50    /// [1]: https://github.com/cucumber/cucumber-expressions#parameter-types
51    pub parameters: Parameters,
52}
53
54/// Provider of custom [`Parameter`]s.
55pub trait Provider<I> {
56    /// `<`[`Value`]` as `[`Input`]`>::`[`Item`].
57    ///
58    /// [`Item`]: Input::Item
59    /// [`Value`]: Self::Value
60    type Item: AsChar;
61
62    /// Value matcher to be used in a [`Regex`].
63    ///
64    /// Usually, a [`Parameter`] is represented by a single [`Regex`] capturing
65    /// group. In case there are multiple capturing groups, they will be named
66    /// like `__{parameter_id}_{group_id}`. This is done to identify multiple
67    /// capturing groups related to a single [`Parameter`].
68    ///
69    /// [`Regex`]: regex::Regex
70    type Value: Input<Item = Self::Item>;
71
72    /// Returns a [`Value`] matcher corresponding to the given `input`, if any.
73    ///
74    /// [`Value`]: Self::Value
75    fn get(&self, input: &I) -> Option<Self::Value>;
76}
77
78impl<'p, I, Key, Value, S> Provider<I> for &'p HashMap<Key, Value, S>
79where
80    I: Input,
81    <I as Input>::Item: AsChar,
82    Key: AsRef<str>,
83    Value: AsRef<str>,
84{
85    type Item = char;
86    type Value = &'p str;
87
88    fn get(&self, input: &I) -> Option<Self::Value> {
89        self.iter().find_map(|(k, v)| {
90            k.as_ref()
91                .chars()
92                .eq(input.iter_elements().map(AsChar::as_char))
93                .then(|| v.as_ref())
94        })
95    }
96}
97
98impl<I, Pars> IntoRegexCharIter<I> for WithCustom<Expression<I>, Pars>
99where
100    I: Clone + Display + Input,
101    <I as Input>::Item: AsChar,
102    Pars: Clone + Provider<I>,
103    <Pars as Provider<I>>::Value: Input,
104{
105    type Iter = ExpressionWithParsIter<I, Pars>;
106
107    fn into_regex_char_iter(self) -> Self::Iter {
108        let add_pars: fn(_) -> _ =
109            |(item, parameters)| WithCustom { element: item, parameters };
110        let into_regex_char_iter: fn(_) -> _ =
111            IntoRegexCharIter::into_regex_char_iter;
112        iter::once(Ok('^'))
113            .chain(
114                self.element
115                    .0
116                    .into_iter()
117                    .zip(iter::repeat(self.parameters))
118                    .map(add_pars)
119                    .flat_map(into_regex_char_iter),
120            )
121            .chain(iter::once(Ok('$')))
122    }
123}
124
125// TODO: Replace with TAIT, once stabilized:
126//       https://github.com/rust-lang/rust/issues/63063
127/// [`IntoRegexCharIter::Iter`] for [`WithCustom`]`<`[`Expression`]`>`.
128type ExpressionWithParsIter<I, P> = iter::Chain<
129    iter::Chain<
130        iter::Once<Result<char, ParameterError<I>>>,
131        iter::FlatMap<
132            iter::Map<
133                iter::Zip<vec::IntoIter<SingleExpression<I>>, iter::Repeat<P>>,
134                fn(
135                    (SingleExpression<I>, P),
136                ) -> WithCustom<SingleExpression<I>, P>,
137            >,
138            SingleExprWithParsIter<I, P>,
139            fn(
140                WithCustom<SingleExpression<I>, P>,
141            ) -> SingleExprWithParsIter<I, P>,
142        >,
143    >,
144    iter::Once<Result<char, ParameterError<I>>>,
145>;
146
147impl<I, Pars> IntoRegexCharIter<I> for WithCustom<SingleExpression<I>, Pars>
148where
149    I: Clone + Display + Input,
150    <I as Input>::Item: AsChar,
151    Pars: Provider<I>,
152    <Pars as Provider<I>>::Value: Input,
153{
154    type Iter = SingleExprWithParsIter<I, Pars>;
155
156    fn into_regex_char_iter(self) -> Self::Iter {
157        use Either::{Left, Right};
158
159        if let SingleExpression::Parameter(item) = self.element {
160            Left(
161                WithCustom { element: item, parameters: self.parameters }
162                    .into_regex_char_iter(),
163            )
164        } else {
165            Right(self.element.into_regex_char_iter())
166        }
167    }
168}
169
170// TODO: Replace with TAIT, once stabilized:
171//       https://github.com/rust-lang/rust/issues/63063
172/// [`IntoRegexCharIter::Iter`] for
173/// [`WithCustom`]`<`[`SingleExpression`]`>`.
174type SingleExprWithParsIter<I, P> = Either<
175    <WithCustom<Parameter<I>, P> as IntoRegexCharIter<I>>::Iter,
176    SingleExpressionIter<I>,
177>;
178
179impl<I, P> IntoRegexCharIter<I> for WithCustom<Parameter<I>, P>
180where
181    I: Clone + Display + Input,
182    <I as Input>::Item: AsChar,
183    P: Provider<I>,
184    <P as Provider<I>>::Value: Input,
185{
186    type Iter = WithParsIter<I, P>;
187
188    fn into_regex_char_iter(self) -> Self::Iter {
189        use Either::{Left, Right};
190
191        let id = self.element.id;
192
193        match self.parameters.get(&self.element) {
194            None => Right(Left(self.element.into_regex_char_iter())),
195            Some(v) => {
196                // We try to find '(' inside regex. If unsuccessfully, we can be
197                // sure that the regex has no groups, so we can skip parsing.
198                let parsed = v
199                    .iter_elements()
200                    .any(|c| c.as_char() == '(')
201                    .then(|| {
202                        let re = v
203                            .iter_elements()
204                            .map(AsChar::as_char)
205                            .collect::<String>();
206                        let hir = regex_syntax::Parser::new()
207                            .parse(&re)
208                            .map_err(|err| (self.element.input, re, err))?;
209                        Ok(regex_hir::has_capture_groups(&hir).then_some(hir))
210                    })
211                    .transpose();
212                let parsed = match parsed {
213                    Ok(hir) => hir.flatten(),
214                    Err((parameter, re, err)) => {
215                        return Left(iter::once(Err(
216                            ParameterError::RenameRegexGroup {
217                                parameter,
218                                re,
219                                err: Box::new(err),
220                            },
221                        )));
222                    }
223                };
224
225                parsed.map_or_else(
226                    || {
227                        let ok: fn(_) -> _ =
228                            |c: <P::Value as Input>::Item| Ok(c.as_char());
229                        Right(Right(Right(
230                            iter::once(Ok('('))
231                                .chain(v.iter_elements().map(ok))
232                                .chain(iter::once(Ok(')'))),
233                        )))
234                    },
235                    |cur_hir| {
236                        let ok: fn(_) -> _ = Ok;
237                        let new_hir =
238                            regex_hir::rename_capture_groups(cur_hir, id);
239                        Right(Right(Left(
240                            "(?:"
241                                .chars()
242                                .map(ok)
243                                .chain(
244                                    OwnedChars::new(new_hir.to_string())
245                                        .map(ok),
246                                )
247                                .chain(iter::once(Ok(')'))),
248                        )))
249                    },
250                )
251            }
252        }
253    }
254}
255
256// TODO: Replace with TAIT, once stabilized:
257//       https://github.com/rust-lang/rust/issues/63063
258/// [`IntoRegexCharIter::Iter`] for [`WithCustom`]`<`[`Parameter`]`>`.
259type WithParsIter<I, P> = Either<
260    iter::Once<Result<char, ParameterError<I>>>,
261    Either<
262        ParameterIter<I>,
263        Either<
264            iter::Chain<
265                iter::Chain<
266                    iter::Map<
267                        str::Chars<'static>,
268                        fn(char) -> Result<char, ParameterError<I>>,
269                    >,
270                    iter::Map<
271                        OwnedChars,
272                        fn(char) -> Result<char, ParameterError<I>>,
273                    >,
274                >,
275                iter::Once<Result<char, ParameterError<I>>>,
276            >,
277            iter::Chain<
278                iter::Chain<
279                    iter::Once<Result<char, ParameterError<I>>>,
280                    iter::Map<
281                        <<P as Provider<I>>::Value as Input>::Iter,
282                        fn(
283                            <<P as Provider<I>>::Value as Input>::Item,
284                        )
285                            -> Result<char, ParameterError<I>>,
286                    >,
287                >,
288                iter::Once<Result<char, ParameterError<I>>>,
289            >,
290        >,
291    >,
292>;
293
294/// Helpers to work with [`Regex`]es [`Hir`].
295///
296/// [`Hir`]: regex_syntax::hir::Hir
297/// [`Regex`]: regex::Regex
298mod regex_hir {
299    use std::mem;
300
301    use regex_syntax::hir::{Hir, HirKind};
302
303    /// Checks whether the given [`Regex`] [`Hir`] contains any capturing
304    /// groups.
305    ///
306    /// [`Regex`]: regex::Regex
307    pub(super) fn has_capture_groups(hir: &Hir) -> bool {
308        match hir.kind() {
309            HirKind::Empty
310            | HirKind::Literal(_)
311            | HirKind::Class(_)
312            | HirKind::Look(_)
313            | HirKind::Repetition(_) => false,
314            HirKind::Capture(_) => true,
315            HirKind::Concat(inner) | HirKind::Alternation(inner) => {
316                inner.iter().any(has_capture_groups)
317            }
318        }
319    }
320
321    /// Renames capturing groups in the given [`Hir`] via
322    /// `__{parameter_id}_{group_id}` naming scheme.
323    pub(super) fn rename_capture_groups(hir: Hir, parameter_id: usize) -> Hir {
324        rename_groups_inner(hir, parameter_id, &mut 0)
325    }
326
327    /// Renames capturing groups in the given [`Hir`] via
328    /// `__{parameter_id}_{group_id}` naming scheme, using the provided
329    /// `group_id_indexer`.
330    fn rename_groups_inner(
331        hir: Hir,
332        parameter_id: usize,
333        group_id_indexer: &mut usize,
334    ) -> Hir {
335        match hir.into_kind() {
336            HirKind::Empty => Hir::empty(),
337            HirKind::Literal(lit) => Hir::literal(lit.0),
338            HirKind::Class(cl) => Hir::class(cl),
339            HirKind::Look(l) => Hir::look(l),
340            HirKind::Repetition(rep) => Hir::repetition(rep),
341            HirKind::Capture(mut capture) => {
342                capture.name =
343                    Some(format!("__{parameter_id}_{group_id_indexer}").into());
344                *group_id_indexer += 1;
345
346                let inner_hir =
347                    mem::replace(capture.sub.as_mut(), Hir::empty());
348                drop(mem::replace(
349                    capture.sub.as_mut(),
350                    rename_groups_inner(
351                        inner_hir,
352                        parameter_id,
353                        group_id_indexer,
354                    ),
355                ));
356
357                Hir::capture(capture)
358            }
359            HirKind::Concat(concat) => Hir::concat(
360                concat
361                    .into_iter()
362                    .map(|h| {
363                        rename_groups_inner(h, parameter_id, group_id_indexer)
364                    })
365                    .collect(),
366            ),
367            HirKind::Alternation(alt) => Hir::alternation(
368                alt.into_iter()
369                    .map(|h| {
370                        rename_groups_inner(h, parameter_id, group_id_indexer)
371                    })
372                    .collect(),
373            ),
374        }
375    }
376}
377
378#[cfg(test)]
379mod spec {
380    use super::{Expression, HashMap, ParameterError};
381    use crate::expand::Error;
382
383    #[test]
384    fn custom_parameter() {
385        let pars = HashMap::from([("custom", "custom")]);
386        let expr = Expression::regex_with_parameters("{custom}", &pars)
387            .unwrap_or_else(|e| panic!("failed: {e}"));
388
389        assert_eq!(expr.as_str(), "^(custom)$");
390    }
391
392    #[test]
393    fn custom_parameter_with_groups() {
394        let pars = HashMap::from([("custom", "\"(custom)\"|'(custom)'")]);
395        let expr =
396            Expression::regex_with_parameters("{custom} {custom}", &pars)
397                .unwrap_or_else(|e| panic!("failed: {e}"));
398
399        assert_eq!(
400            expr.as_str(),
401            "^(?:(?:(?:\"(?P<__0_0>(?:custom))\")\
402                    |(?:'(?P<__0_1>(?:custom))'))) \
403              (?:(?:(?:\"(?P<__1_0>(?:custom))\")\
404                    |(?:'(?P<__1_1>(?:custom))')))$",
405        );
406    }
407
408    #[test]
409    fn default_parameter() {
410        let pars = HashMap::from([("custom", "custom")]);
411        let expr = Expression::regex_with_parameters("{}", &pars)
412            .unwrap_or_else(|e| panic!("failed: {e}"));
413
414        assert_eq!(expr.as_str(), "^(.*)$");
415    }
416
417    #[test]
418    fn unknown_parameter() {
419        let pars = HashMap::<String, String>::new();
420
421        match Expression::regex_with_parameters("{custom}", &pars).unwrap_err()
422        {
423            Error::Expansion(ParameterError::NotFound(not_found)) => {
424                assert_eq!(*not_found, "custom");
425            }
426            e @ (Error::Regex(_) | Error::Parsing(_) | Error::Expansion(_)) => {
427                panic!("wrong err: {e}")
428            }
429        }
430    }
431}