1pub mod parameters;
21
22use std::{iter, str, vec};
23
24use derive_more::with_trait::{Debug, Display, Error as StdError, From};
25use either::Either;
26use nom::{AsChar, Input};
27use regex::Regex;
28
29use crate::{
30 parse, Alternation, Alternative, Expression, Optional, Parameter,
31 SingleAlternation, SingleExpression, Spanned,
32};
33
34pub use self::parameters::{
35 Provider as ParametersProvider, WithCustom as WithCustomParameters,
36};
37
38impl<'s> Expression<Spanned<'s>> {
39 pub fn regex<I: AsRef<str> + ?Sized>(
67 input: &'s I,
68 ) -> Result<Regex, Error<Spanned<'s>>> {
69 let re_str = Expression::parse(input)?
70 .into_regex_char_iter()
71 .collect::<Result<String, _>>()?;
72 Regex::new(&re_str).map_err(Into::into)
73 }
74
75 pub fn regex_with_parameters<I, Parameters>(
106 input: &'s I,
107 parameters: Parameters,
108 ) -> Result<Regex, Error<Spanned<'s>>>
109 where
110 I: AsRef<str> + ?Sized,
111 Parameters: Clone + ParametersProvider<Spanned<'s>>,
112 Parameters::Value: Input,
113 <Parameters::Value as Input>::Item: AsChar,
114 {
115 let re_str = Expression::parse(input)?
116 .with_parameters(parameters)
117 .into_regex_char_iter()
118 .collect::<Result<String, _>>()?;
119 Regex::new(&re_str).map_err(Into::into)
120 }
121
122 pub const fn with_parameters<P: ParametersProvider<Spanned<'s>>>(
128 self,
129 parameters: P,
130 ) -> WithCustomParameters<Self, P> {
131 WithCustomParameters {
132 element: self,
133 parameters,
134 }
135 }
136}
137
138#[derive(Clone, Debug, Display, From, StdError)]
143pub enum Error<Input> {
144 #[display("Parsing failed: {_0}")]
146 Parsing(parse::Error<Input>),
147
148 #[display("Failed to expand regex: {_0}")]
150 Expansion(ParameterError<Input>),
151
152 #[display("Regex creation failed: {_0}")]
154 Regex(regex::Error),
155}
156
157#[derive(Clone, Debug, Display, StdError)]
159pub enum ParameterError<Input> {
160 #[display("Parameter `{_0}` not found")]
162 NotFound(Input),
163
164 #[display(
166 "Failed to rename capturing groups in regex `{re}` of \
167 parameter `{parameter}`: {err}"
168 )]
169 RenameRegexGroup {
170 parameter: Input,
172
173 re: String,
175
176 err: Box<regex_syntax::Error>,
180 },
181}
182
183pub trait IntoRegexCharIter<I> {
190 type Iter: Iterator<Item = Result<char, ParameterError<I>>>;
192
193 fn into_regex_char_iter(self) -> Self::Iter;
198}
199
200impl<I> IntoRegexCharIter<I> for Expression<I>
201where
202 I: Clone + Display + Input,
203 <I as Input>::Item: AsChar,
204{
205 type Iter = ExpressionIter<I>;
206
207 fn into_regex_char_iter(self) -> Self::Iter {
208 let into_regex_char_iter: fn(_) -> _ =
209 IntoRegexCharIter::into_regex_char_iter;
210
211 iter::once(Ok('^'))
212 .chain(self.0.into_iter().flat_map(into_regex_char_iter))
213 .chain(iter::once(Ok('$')))
214 }
215}
216
217type ExpressionIter<I> = iter::Chain<
221 iter::Chain<
222 iter::Once<Result<char, ParameterError<I>>>,
223 iter::FlatMap<
224 vec::IntoIter<SingleExpression<I>>,
225 <SingleExpression<I> as IntoRegexCharIter<I>>::Iter,
226 fn(
227 SingleExpression<I>,
228 )
229 -> <SingleExpression<I> as IntoRegexCharIter<I>>::Iter,
230 >,
231 >,
232 iter::Once<Result<char, ParameterError<I>>>,
233>;
234
235impl<I> IntoRegexCharIter<I> for SingleExpression<I>
236where
237 I: Clone + Display + Input,
238 <I as Input>::Item: AsChar,
239{
240 type Iter = SingleExpressionIter<I>;
241
242 fn into_regex_char_iter(self) -> Self::Iter {
243 use Either::{Left, Right};
244
245 let ok: fn(_) -> _ = Ok;
246 let as_char: fn(_) -> _ = AsChar::as_char;
247
248 match self {
249 Self::Alternation(alt) => Left(alt.into_regex_char_iter()),
250 Self::Optional(opt) => Right(Left(opt.into_regex_char_iter())),
251 Self::Parameter(p) => Right(Right(Left(p.into_regex_char_iter()))),
252 Self::Text(t) | Self::Whitespaces(t) => Right(Right(Right(
253 EscapeForRegex::new(t.iter_elements().map(as_char)).map(ok),
254 ))),
255 }
256 }
257}
258
259type SingleExpressionIter<I> = Either<
263 <Alternation<I> as IntoRegexCharIter<I>>::Iter,
264 Either<
265 <Optional<I> as IntoRegexCharIter<I>>::Iter,
266 Either<
267 <Parameter<I> as IntoRegexCharIter<I>>::Iter,
268 iter::Map<
269 EscapeForRegex<
270 iter::Map<
271 <I as Input>::Iter,
272 fn(<I as Input>::Item) -> char,
273 >,
274 >,
275 MapOkChar<I>,
276 >,
277 >,
278 >,
279>;
280
281impl<I> IntoRegexCharIter<I> for Alternation<I>
282where
283 I: Display + Input,
284 <I as Input>::Item: AsChar,
285{
286 type Iter = AlternationIter<I>;
287
288 fn into_regex_char_iter(self) -> Self::Iter {
289 let ok: fn(_) -> _ = Ok;
290 let single_alt: fn(SingleAlternation<I>) -> _ = |alt| {
291 let into_regex_char_iter: fn(_) -> _ =
292 IntoRegexCharIter::into_regex_char_iter;
293
294 alt.into_iter()
295 .flat_map(into_regex_char_iter)
296 .chain(iter::once(Ok('|')))
297 };
298
299 "(?:"
300 .chars()
301 .map(ok)
302 .chain(SkipLast::new(self.0.into_iter().flat_map(single_alt)))
303 .chain(iter::once(Ok(')')))
304 }
305}
306
307type AlternationIter<I> = iter::Chain<
311 iter::Chain<
312 iter::Map<str::Chars<'static>, MapOkChar<I>>,
313 SkipLast<
314 iter::FlatMap<
315 vec::IntoIter<SingleAlternation<I>>,
316 AlternationIterInner<I>,
317 fn(SingleAlternation<I>) -> AlternationIterInner<I>,
318 >,
319 >,
320 >,
321 iter::Once<Result<char, ParameterError<I>>>,
322>;
323
324type AlternationIterInner<I> = iter::Chain<
328 iter::FlatMap<
329 vec::IntoIter<Alternative<I>>,
330 <Alternative<I> as IntoRegexCharIter<I>>::Iter,
331 fn(Alternative<I>) -> <Alternative<I> as IntoRegexCharIter<I>>::Iter,
332 >,
333 iter::Once<Result<char, ParameterError<I>>>,
334>;
335
336impl<I> IntoRegexCharIter<I> for Alternative<I>
337where
338 I: Display + Input,
339 <I as Input>::Item: AsChar,
340{
341 type Iter = AlternativeIter<I>;
342
343 fn into_regex_char_iter(self) -> Self::Iter {
344 use Either::{Left, Right};
345
346 let as_char: fn(<I as Input>::Item) -> char = AsChar::as_char;
347
348 match self {
349 Self::Optional(opt) => Left(opt.into_regex_char_iter()),
350 Self::Text(text) => Right(
351 EscapeForRegex::new(text.iter_elements().map(as_char)).map(Ok),
352 ),
353 }
354 }
355}
356
357type AlternativeIter<I> = Either<
361 <Optional<I> as IntoRegexCharIter<I>>::Iter,
362 iter::Map<
363 EscapeForRegex<
364 iter::Map<<I as Input>::Iter, fn(<I as Input>::Item) -> char>,
365 >,
366 MapOkChar<I>,
367 >,
368>;
369
370impl<I> IntoRegexCharIter<I> for Optional<I>
371where
372 I: Display + Input,
373 <I as Input>::Item: AsChar,
374{
375 type Iter = OptionalIter<I>;
376
377 fn into_regex_char_iter(self) -> Self::Iter {
378 let as_char: fn(<I as Input>::Item) -> char = AsChar::as_char;
379
380 "(?:"
381 .chars()
382 .chain(EscapeForRegex::new(self.0.iter_elements().map(as_char)))
383 .chain(")?".chars())
384 .map(Ok)
385 }
386}
387
388type OptionalIter<I> = iter::Map<
392 iter::Chain<
393 iter::Chain<
394 str::Chars<'static>,
395 EscapeForRegex<
396 iter::Map<<I as Input>::Iter, fn(<I as Input>::Item) -> char>,
397 >,
398 >,
399 str::Chars<'static>,
400 >,
401 MapOkChar<I>,
402>;
403
404type MapOkChar<I> = fn(char) -> Result<char, ParameterError<I>>;
406
407impl<I> IntoRegexCharIter<I> for Parameter<I>
408where
409 I: Clone + Display + Input,
410 <I as Input>::Item: AsChar,
411{
412 type Iter = ParameterIter<I>;
413
414 fn into_regex_char_iter(self) -> Self::Iter {
415 use Either::{Left, Right};
416
417 let eq = |i: &I, str: &str| {
418 i.iter_elements().map(AsChar::as_char).eq(str.chars())
419 };
420
421 if eq(&self.input, "int") {
422 Left(Left(r"((?:-?\d+)|(?:\d+))".chars().map(Ok)))
423 } else if eq(&self.input, "float") {
424 Left(Left(
432 "([+-]?(?:inf\
433 |NaN\
434 |(?:\\d+|\\d+\\.\\d*|\\d*\\.\\d+)(?:[eE][+-]?\\d+)?\
435 ))"
436 .chars()
437 .map(Ok),
438 ))
439 } else if eq(&self.input, "word") {
440 Left(Left(r"([^\s]+)".chars().map(Ok)))
441 } else if eq(&self.input, "string") {
442 Left(Right(
443 OwnedChars::new(format!(
444 "(?:\
445 \"(?P<__{id}_0>[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"\
446 |'(?P<__{id}_1>[^'\\\\]*(?:\\\\.[^'\\\\]*)*)'\
447 )",
448 id = self.id,
449 ))
450 .map(Ok),
451 ))
452 } else if eq(&self.input, "") {
453 Left(Left("(.*)".chars().map(Ok)))
454 } else {
455 Right(iter::once(Err(ParameterError::NotFound(self.input))))
456 }
457 }
458}
459
460type ParameterIter<I> = Either<
464 Either<
465 iter::Map<
466 str::Chars<'static>,
467 fn(char) -> Result<char, ParameterError<I>>,
468 >,
469 iter::Map<OwnedChars, fn(char) -> Result<char, ParameterError<I>>>,
470 >,
471 iter::Once<Result<char, ParameterError<I>>>,
472>;
473
474#[derive(Debug)]
478pub struct SkipLast<Iter: Iterator> {
479 iter: iter::Peekable<Iter>,
483}
484
485impl<Iter> Clone for SkipLast<Iter>
486where
487 Iter: Clone + Iterator,
488 Iter::Item: Clone,
489{
490 fn clone(&self) -> Self {
491 Self {
492 iter: self.iter.clone(),
493 }
494 }
495}
496
497impl<Iter: Iterator> SkipLast<Iter> {
498 pub fn new(iter: Iter) -> Self {
500 Self {
501 iter: iter.peekable(),
502 }
503 }
504}
505
506impl<Iter> Iterator for SkipLast<Iter>
507where
508 Iter: Iterator,
509{
510 type Item = Iter::Item;
511
512 fn next(&mut self) -> Option<Self::Item> {
513 let next = self.iter.next();
514 (self.iter.peek().is_some()).then_some(next).flatten()
515 }
516}
517
518#[derive(Clone, Debug)]
522pub struct OwnedChars {
523 str: String,
525
526 cur: usize,
528}
529
530impl OwnedChars {
531 #[must_use]
533 pub const fn new(str: String) -> Self {
534 Self { str, cur: 0 }
535 }
536}
537
538impl Iterator for OwnedChars {
539 type Item = char;
540
541 fn next(&mut self) -> Option<Self::Item> {
542 let char = self.str.chars().nth(self.cur)?;
543 self.cur += 1;
544 Some(char)
545 }
546}
547
548#[derive(Clone, Debug)]
562pub struct EscapeForRegex<Iter: Iterator> {
563 iter: iter::Peekable<Iter>,
565
566 was_escaped: Option<Iter::Item>,
570}
571
572impl<Iter: Iterator> EscapeForRegex<Iter> {
573 pub fn new(iter: Iter) -> Self {
575 Self {
576 iter: iter.peekable(),
577 was_escaped: None,
578 }
579 }
580}
581
582impl<Iter> Iterator for EscapeForRegex<Iter>
583where
584 Iter: Iterator<Item = char>,
585{
586 type Item = char;
587
588 fn next(&mut self) -> Option<Self::Item> {
589 let should_be_escaped = |c| "^$[]()\\{}.|?*+".contains(c);
590
591 if self.was_escaped.is_some() {
592 return self.was_escaped.take();
593 }
594
595 loop {
596 return match self.iter.next() {
597 Some('\\') => {
598 let c = *self.iter.peek()?;
599 if should_be_escaped(c) {
600 self.was_escaped = self.iter.next();
601 Some('\\')
602 } else {
603 continue;
604 }
605 }
606 Some(c) if should_be_escaped(c) => {
607 self.was_escaped = Some(c);
608 Some('\\')
609 }
610 Some(c) => Some(c),
611 None => None,
612 };
613 }
614 }
615}
616
617#[cfg(test)]
620mod spec {
621 use super::{Error, Expression, ParameterError};
622
623 #[test]
624 fn alternation_with_optional() {
625 let expr = Expression::regex("a/b(c)")
626 .unwrap_or_else(|e| panic!("failed: {e}"));
627
628 assert_eq!(expr.as_str(), "^(?:a|b(?:c)?)$");
629 }
630
631 #[test]
632 fn alternation() {
633 let expr = Expression::regex("a/b c/d/e")
634 .unwrap_or_else(|e| panic!("failed: {e}"));
635
636 assert_eq!(expr.as_str(), "^(?:a|b) (?:c|d|e)$");
637 assert!(expr.is_match("a c"));
638 assert!(expr.is_match("b e"));
639 assert!(!expr.is_match("c e"));
640 assert!(!expr.is_match("a"));
641 assert!(!expr.is_match("a "));
642 }
643
644 #[test]
645 fn empty() {
646 let expr =
647 Expression::regex("").unwrap_or_else(|e| panic!("failed: {e}"));
648
649 assert_eq!(expr.as_str(), "^$");
650 assert!(expr.is_match(""));
651 assert!(!expr.is_match("a"));
652 }
653
654 #[test]
655 fn escape_regex_characters() {
656 let expr = Expression::regex(r"^$[]\()\{}\\.|?*+")
657 .unwrap_or_else(|e| panic!("failed: {e}"));
658
659 assert_eq!(expr.as_str(), r"^\^\$\[\]\(\)\{\}\\\.\|\?\*\+$");
660 assert!(expr.is_match("^$[](){}\\.|?*+"));
661 }
662
663 #[test]
664 fn optional() {
665 let expr =
666 Expression::regex("(a)").unwrap_or_else(|e| panic!("failed: {e}"));
667
668 assert_eq!(expr.as_str(), "^(?:a)?$");
669 assert!(expr.is_match(""));
670 assert!(expr.is_match("a"));
671 assert!(!expr.is_match("b"));
672 }
673
674 #[test]
675 fn parameter_int() {
676 let expr = Expression::regex("{int}")
677 .unwrap_or_else(|e| panic!("failed: {e}"));
678
679 assert_eq!(expr.as_str(), "^((?:-?\\d+)|(?:\\d+))$");
680 assert!(expr.is_match("123"));
681 assert!(expr.is_match("-123"));
682 assert!(!expr.is_match("+123"));
683 assert!(!expr.is_match("123."));
684 }
685
686 #[test]
687 fn parameter_float() {
688 let expr = Expression::regex("{float}")
689 .unwrap_or_else(|e| panic!("failed: {e}"));
690
691 assert_eq!(
692 expr.as_str(),
693 "^([+-]?(?:inf\
694 |NaN\
695 |(?:\\d+|\\d+\\.\\d*|\\d*\\.\\d+)(?:[eE][+-]?\\d+)?\
696 ))$",
697 );
698 assert!(expr.is_match("+1"));
699 assert!(expr.is_match(".1"));
700 assert!(expr.is_match("-.1"));
701 assert!(expr.is_match("-1."));
702 assert!(expr.is_match("-1.1E+1"));
703 assert!(expr.is_match("-inf"));
704 assert!(expr.is_match("NaN"));
705 }
706
707 #[test]
708 fn parameter_word() {
709 let expr = Expression::regex("{word}")
710 .unwrap_or_else(|e| panic!("failed: {e}"));
711
712 assert_eq!(expr.as_str(), "^([^\\s]+)$");
713 assert!(expr.is_match("test"));
714 assert!(expr.is_match("\"test\""));
715 assert!(!expr.is_match("with space"));
716 }
717
718 #[test]
719 fn parameter_string() {
720 let expr = Expression::regex("{string}")
721 .unwrap_or_else(|e| panic!("failed: {e}"));
722
723 assert_eq!(
724 expr.as_str(),
725 "^(?:\
726 \"(?P<__0_0>[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"\
727 |'(?P<__0_1>[^'\\\\]*(?:\\\\.[^'\\\\]*)*)'\
728 )$",
729 );
730 assert!(expr.is_match("\"\""));
731 assert!(expr.is_match("''"));
732 assert!(expr.is_match("'with \"'"));
733 assert!(expr.is_match("\"with '\""));
734 assert!(expr.is_match("\"with \\\" escaped\""));
735 assert!(expr.is_match("'with \\' escaped'"));
736 assert!(!expr.is_match("word"));
737 }
738
739 #[test]
740 fn multiple_string_parameters() {
741 let expr = Expression::regex("{string} {string}")
742 .unwrap_or_else(|e| panic!("failed: {e}"));
743
744 assert_eq!(
745 expr.as_str(),
746 "^(?:\
747 \"(?P<__0_0>[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"\
748 |'(?P<__0_1>[^'\\\\]*(?:\\\\.[^'\\\\]*)*)'\
749 ) (?:\
750 \"(?P<__1_0>[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"\
751 |'(?P<__1_1>[^'\\\\]*(?:\\\\.[^'\\\\]*)*)'\
752 )$",
753 );
754 assert!(expr.is_match("\"\" ''"));
755 assert!(expr.is_match("'' \"\""));
756 assert!(expr.is_match("'with \"' \"\""));
757 assert!(expr.is_match("\"with '\" '\"'"));
758 assert!(expr.is_match("\"with \\\" escaped\" 'with \\' escaped'"));
759 assert!(expr.is_match("'with \\' escaped' \"with \\\" escaped\""));
760 }
761
762 #[test]
763 fn parameter_all() {
764 let expr =
765 Expression::regex("{}").unwrap_or_else(|e| panic!("failed: {e}"));
766
767 assert_eq!(expr.as_str(), "^(.*)$");
768 assert!(expr.is_match("anything matches"));
769 }
770
771 #[test]
772 fn text() {
773 let expr =
774 Expression::regex("a").unwrap_or_else(|e| panic!("failed: {e}"));
775
776 assert_eq!(expr.as_str(), "^a$");
777 assert!(expr.is_match("a"));
778 assert!(!expr.is_match("b"));
779 assert!(!expr.is_match("ab"));
780 }
781
782 #[test]
783 fn unicode() {
784 let expr = Expression::regex("Привет, Мир(ы)!")
785 .unwrap_or_else(|e| panic!("failed: {e}"));
786
787 assert_eq!(expr.as_str(), "^Привет, Мир(?:ы)?!$");
788 assert!(expr.is_match("Привет, Мир!"));
789 assert!(expr.is_match("Привет, Миры!"));
790 assert!(!expr.is_match("Hello world"));
791 }
792
793 #[test]
794 fn unknown_parameter() {
795 match Expression::regex("{custom}").unwrap_err() {
796 Error::Expansion(ParameterError::NotFound(not_found)) => {
797 assert_eq!(*not_found, "custom");
798 }
799 e @ (Error::Parsing(_) | Error::Regex(_) | Error::Expansion(_)) => {
800 panic!("wrong err: {e}");
801 }
802 }
803 }
804}