1pub mod parameters;
21
22use std::{iter, str, vec};
23
24use derive_more::with_trait::{Debug, Display, Error as StdError, From};
25use either::Either;
26use nom::{AsChar, Input};
27use regex::Regex;
28
29pub use self::parameters::{
30 Provider as ParametersProvider, WithCustom as WithCustomParameters,
31};
32use crate::{
33 Alternation, Alternative, Expression, Optional, Parameter,
34 SingleAlternation, SingleExpression, Spanned, parse,
35};
36
37impl<'s> Expression<Spanned<'s>> {
38 pub fn regex<I: AsRef<str> + ?Sized>(
66 input: &'s I,
67 ) -> Result<Regex, Error<Spanned<'s>>> {
68 let re_str = Expression::parse(input)?
69 .into_regex_char_iter()
70 .collect::<Result<String, _>>()?;
71 Regex::new(&re_str).map_err(Into::into)
72 }
73
74 pub fn regex_with_parameters<I, Parameters>(
103 input: &'s I,
104 parameters: Parameters,
105 ) -> Result<Regex, Error<Spanned<'s>>>
106 where
107 I: AsRef<str> + ?Sized,
108 Parameters: Clone + ParametersProvider<Spanned<'s>>,
109 Parameters::Value: Input,
110 <Parameters::Value as Input>::Item: AsChar,
111 {
112 let re_str = Expression::parse(input)?
113 .with_parameters(parameters)
114 .into_regex_char_iter()
115 .collect::<Result<String, _>>()?;
116 Regex::new(&re_str).map_err(Into::into)
117 }
118
119 pub const fn with_parameters<P: ParametersProvider<Spanned<'s>>>(
125 self,
126 parameters: P,
127 ) -> WithCustomParameters<Self, P> {
128 WithCustomParameters { element: self, parameters }
129 }
130}
131
132#[derive(Clone, Debug, Display, From, StdError)]
137pub enum Error<Input> {
138 #[display("Parsing failed: {_0}")]
140 Parsing(parse::Error<Input>),
141
142 #[display("Failed to expand regex: {_0}")]
144 Expansion(ParameterError<Input>),
145
146 #[display("Regex creation failed: {_0}")]
148 Regex(regex::Error),
149}
150
151#[derive(Clone, Debug, Display, StdError)]
153pub enum ParameterError<Input> {
154 #[display("Parameter `{_0}` not found")]
156 NotFound(Input),
157
158 #[display(
160 "Failed to rename capturing groups in regex `{re}` of \
161 parameter `{parameter}`: {err}"
162 )]
163 RenameRegexGroup {
164 parameter: Input,
166
167 re: String,
169
170 err: Box<regex_syntax::Error>,
174 },
175}
176
177pub trait IntoRegexCharIter<I> {
184 type Iter: Iterator<Item = Result<char, ParameterError<I>>>;
186
187 fn into_regex_char_iter(self) -> Self::Iter;
192}
193
194impl<I> IntoRegexCharIter<I> for Expression<I>
195where
196 I: Clone + Display + Input,
197 <I as Input>::Item: AsChar,
198{
199 type Iter = ExpressionIter<I>;
200
201 fn into_regex_char_iter(self) -> Self::Iter {
202 let into_regex_char_iter: fn(_) -> _ =
203 IntoRegexCharIter::into_regex_char_iter;
204
205 iter::once(Ok('^'))
206 .chain(self.0.into_iter().flat_map(into_regex_char_iter))
207 .chain(iter::once(Ok('$')))
208 }
209}
210
211type ExpressionIter<I> = iter::Chain<
215 iter::Chain<
216 iter::Once<Result<char, ParameterError<I>>>,
217 iter::FlatMap<
218 vec::IntoIter<SingleExpression<I>>,
219 <SingleExpression<I> as IntoRegexCharIter<I>>::Iter,
220 fn(
221 SingleExpression<I>,
222 )
223 -> <SingleExpression<I> as IntoRegexCharIter<I>>::Iter,
224 >,
225 >,
226 iter::Once<Result<char, ParameterError<I>>>,
227>;
228
229impl<I> IntoRegexCharIter<I> for SingleExpression<I>
230where
231 I: Clone + Display + Input,
232 <I as Input>::Item: AsChar,
233{
234 type Iter = SingleExpressionIter<I>;
235
236 fn into_regex_char_iter(self) -> Self::Iter {
237 use Either::{Left, Right};
238
239 let ok: fn(_) -> _ = Ok;
240 let as_char: fn(_) -> _ = AsChar::as_char;
241
242 match self {
243 Self::Alternation(alt) => Left(alt.into_regex_char_iter()),
244 Self::Optional(opt) => Right(Left(opt.into_regex_char_iter())),
245 Self::Parameter(p) => Right(Right(Left(p.into_regex_char_iter()))),
246 Self::Text(t) | Self::Whitespaces(t) => Right(Right(Right(
247 EscapeForRegex::new(t.iter_elements().map(as_char)).map(ok),
248 ))),
249 }
250 }
251}
252
253type SingleExpressionIter<I> = Either<
257 <Alternation<I> as IntoRegexCharIter<I>>::Iter,
258 Either<
259 <Optional<I> as IntoRegexCharIter<I>>::Iter,
260 Either<
261 <Parameter<I> as IntoRegexCharIter<I>>::Iter,
262 iter::Map<
263 EscapeForRegex<
264 iter::Map<
265 <I as Input>::Iter,
266 fn(<I as Input>::Item) -> char,
267 >,
268 >,
269 MapOkChar<I>,
270 >,
271 >,
272 >,
273>;
274
275impl<I> IntoRegexCharIter<I> for Alternation<I>
276where
277 I: Display + Input,
278 <I as Input>::Item: AsChar,
279{
280 type Iter = AlternationIter<I>;
281
282 fn into_regex_char_iter(self) -> Self::Iter {
283 let ok: fn(_) -> _ = Ok;
284 let single_alt: fn(SingleAlternation<I>) -> _ = |alt| {
285 let into_regex_char_iter: fn(_) -> _ =
286 IntoRegexCharIter::into_regex_char_iter;
287
288 alt.into_iter()
289 .flat_map(into_regex_char_iter)
290 .chain(iter::once(Ok('|')))
291 };
292
293 "(?:"
294 .chars()
295 .map(ok)
296 .chain(SkipLast::new(self.0.into_iter().flat_map(single_alt)))
297 .chain(iter::once(Ok(')')))
298 }
299}
300
301type AlternationIter<I> = iter::Chain<
305 iter::Chain<
306 iter::Map<str::Chars<'static>, MapOkChar<I>>,
307 SkipLast<
308 iter::FlatMap<
309 vec::IntoIter<SingleAlternation<I>>,
310 AlternationIterInner<I>,
311 fn(SingleAlternation<I>) -> AlternationIterInner<I>,
312 >,
313 >,
314 >,
315 iter::Once<Result<char, ParameterError<I>>>,
316>;
317
318type AlternationIterInner<I> = iter::Chain<
322 iter::FlatMap<
323 vec::IntoIter<Alternative<I>>,
324 <Alternative<I> as IntoRegexCharIter<I>>::Iter,
325 fn(Alternative<I>) -> <Alternative<I> as IntoRegexCharIter<I>>::Iter,
326 >,
327 iter::Once<Result<char, ParameterError<I>>>,
328>;
329
330impl<I> IntoRegexCharIter<I> for Alternative<I>
331where
332 I: Display + Input,
333 <I as Input>::Item: AsChar,
334{
335 type Iter = AlternativeIter<I>;
336
337 fn into_regex_char_iter(self) -> Self::Iter {
338 use Either::{Left, Right};
339
340 let as_char: fn(<I as Input>::Item) -> char = AsChar::as_char;
341
342 match self {
343 Self::Optional(opt) => Left(opt.into_regex_char_iter()),
344 Self::Text(text) => Right(
345 EscapeForRegex::new(text.iter_elements().map(as_char)).map(Ok),
346 ),
347 }
348 }
349}
350
351type AlternativeIter<I> = Either<
355 <Optional<I> as IntoRegexCharIter<I>>::Iter,
356 iter::Map<
357 EscapeForRegex<
358 iter::Map<<I as Input>::Iter, fn(<I as Input>::Item) -> char>,
359 >,
360 MapOkChar<I>,
361 >,
362>;
363
364impl<I> IntoRegexCharIter<I> for Optional<I>
365where
366 I: Display + Input,
367 <I as Input>::Item: AsChar,
368{
369 type Iter = OptionalIter<I>;
370
371 fn into_regex_char_iter(self) -> Self::Iter {
372 let as_char: fn(<I as Input>::Item) -> char = AsChar::as_char;
373
374 "(?:"
375 .chars()
376 .chain(EscapeForRegex::new(self.0.iter_elements().map(as_char)))
377 .chain(")?".chars())
378 .map(Ok)
379 }
380}
381
382type OptionalIter<I> = iter::Map<
386 iter::Chain<
387 iter::Chain<
388 str::Chars<'static>,
389 EscapeForRegex<
390 iter::Map<<I as Input>::Iter, fn(<I as Input>::Item) -> char>,
391 >,
392 >,
393 str::Chars<'static>,
394 >,
395 MapOkChar<I>,
396>;
397
398type MapOkChar<I> = fn(char) -> Result<char, ParameterError<I>>;
400
401impl<I> IntoRegexCharIter<I> for Parameter<I>
402where
403 I: Clone + Display + Input,
404 <I as Input>::Item: AsChar,
405{
406 type Iter = ParameterIter<I>;
407
408 fn into_regex_char_iter(self) -> Self::Iter {
409 use Either::{Left, Right};
410
411 let eq = |i: &I, str: &str| {
412 i.iter_elements().map(AsChar::as_char).eq(str.chars())
413 };
414
415 if eq(&self.input, "int") {
416 Left(Left(r"((?:-?\d+)|(?:\d+))".chars().map(Ok)))
417 } else if eq(&self.input, "float") {
418 Left(Left(
426 "([+-]?(?:inf\
427 |NaN\
428 |(?:\\d+|\\d+\\.\\d*|\\d*\\.\\d+)(?:[eE][+-]?\\d+)?\
429 ))"
430 .chars()
431 .map(Ok),
432 ))
433 } else if eq(&self.input, "word") {
434 Left(Left(r"([^\s]+)".chars().map(Ok)))
435 } else if eq(&self.input, "string") {
436 Left(Right(
437 OwnedChars::new(format!(
438 "(?:\
439 \"(?P<__{id}_0>[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"\
440 |'(?P<__{id}_1>[^'\\\\]*(?:\\\\.[^'\\\\]*)*)'\
441 )",
442 id = self.id,
443 ))
444 .map(Ok),
445 ))
446 } else if eq(&self.input, "") {
447 Left(Left("(.*)".chars().map(Ok)))
448 } else {
449 Right(iter::once(Err(ParameterError::NotFound(self.input))))
450 }
451 }
452}
453
454type ParameterIter<I> = Either<
458 Either<
459 iter::Map<
460 str::Chars<'static>,
461 fn(char) -> Result<char, ParameterError<I>>,
462 >,
463 iter::Map<OwnedChars, fn(char) -> Result<char, ParameterError<I>>>,
464 >,
465 iter::Once<Result<char, ParameterError<I>>>,
466>;
467
468#[derive(Debug)]
472pub struct SkipLast<Iter: Iterator> {
473 iter: iter::Peekable<Iter>,
477}
478
479impl<Iter> Clone for SkipLast<Iter>
480where
481 Iter: Clone + Iterator,
482 Iter::Item: Clone,
483{
484 fn clone(&self) -> Self {
485 Self { iter: self.iter.clone() }
486 }
487}
488
489impl<Iter: Iterator> SkipLast<Iter> {
490 pub fn new(iter: Iter) -> Self {
492 Self { iter: iter.peekable() }
493 }
494}
495
496impl<Iter> Iterator for SkipLast<Iter>
497where
498 Iter: Iterator,
499{
500 type Item = Iter::Item;
501
502 fn next(&mut self) -> Option<Self::Item> {
503 let next = self.iter.next();
504 (self.iter.peek().is_some()).then_some(next).flatten()
505 }
506}
507
508#[derive(Clone, Debug)]
512pub struct OwnedChars {
513 str: String,
515
516 cur: usize,
518}
519
520impl OwnedChars {
521 #[must_use]
523 pub const fn new(str: String) -> Self {
524 Self { str, cur: 0 }
525 }
526}
527
528impl Iterator for OwnedChars {
529 type Item = char;
530
531 fn next(&mut self) -> Option<Self::Item> {
532 let char = self.str.chars().nth(self.cur)?;
533 self.cur += 1;
534 Some(char)
535 }
536}
537
538#[derive(Clone, Debug)]
552pub struct EscapeForRegex<Iter: Iterator> {
553 iter: iter::Peekable<Iter>,
555
556 was_escaped: Option<Iter::Item>,
560}
561
562impl<Iter: Iterator> EscapeForRegex<Iter> {
563 pub fn new(iter: Iter) -> Self {
565 Self { iter: iter.peekable(), was_escaped: None }
566 }
567}
568
569impl<Iter> Iterator for EscapeForRegex<Iter>
570where
571 Iter: Iterator<Item = char>,
572{
573 type Item = char;
574
575 fn next(&mut self) -> Option<Self::Item> {
576 let should_be_escaped = |c| "^$[]()\\{}.|?*+".contains(c);
577
578 if self.was_escaped.is_some() {
579 return self.was_escaped.take();
580 }
581
582 loop {
583 return match self.iter.next() {
584 Some('\\') => {
585 let c = *self.iter.peek()?;
586 if should_be_escaped(c) {
587 self.was_escaped = self.iter.next();
588 Some('\\')
589 } else {
590 continue;
591 }
592 }
593 Some(c) if should_be_escaped(c) => {
594 self.was_escaped = Some(c);
595 Some('\\')
596 }
597 Some(c) => Some(c),
598 None => None,
599 };
600 }
601 }
602}
603
604#[cfg(test)]
607mod spec {
608 use super::{Error, Expression, ParameterError};
609
610 #[test]
611 fn alternation_with_optional() {
612 let expr = Expression::regex("a/b(c)")
613 .unwrap_or_else(|e| panic!("failed: {e}"));
614
615 assert_eq!(expr.as_str(), "^(?:a|b(?:c)?)$");
616 }
617
618 #[test]
619 fn alternation() {
620 let expr = Expression::regex("a/b c/d/e")
621 .unwrap_or_else(|e| panic!("failed: {e}"));
622
623 assert_eq!(expr.as_str(), "^(?:a|b) (?:c|d|e)$");
624 assert!(expr.is_match("a c"));
625 assert!(expr.is_match("b e"));
626 assert!(!expr.is_match("c e"));
627 assert!(!expr.is_match("a"));
628 assert!(!expr.is_match("a "));
629 }
630
631 #[test]
632 fn empty() {
633 let expr =
634 Expression::regex("").unwrap_or_else(|e| panic!("failed: {e}"));
635
636 assert_eq!(expr.as_str(), "^$");
637 assert!(expr.is_match(""));
638 assert!(!expr.is_match("a"));
639 }
640
641 #[test]
642 fn escape_regex_characters() {
643 let expr = Expression::regex(r"^$[]\()\{}\\.|?*+")
644 .unwrap_or_else(|e| panic!("failed: {e}"));
645
646 assert_eq!(expr.as_str(), r"^\^\$\[\]\(\)\{\}\\\.\|\?\*\+$");
647 assert!(expr.is_match("^$[](){}\\.|?*+"));
648 }
649
650 #[test]
651 fn optional() {
652 let expr =
653 Expression::regex("(a)").unwrap_or_else(|e| panic!("failed: {e}"));
654
655 assert_eq!(expr.as_str(), "^(?:a)?$");
656 assert!(expr.is_match(""));
657 assert!(expr.is_match("a"));
658 assert!(!expr.is_match("b"));
659 }
660
661 #[test]
662 fn parameter_int() {
663 let expr = Expression::regex("{int}")
664 .unwrap_or_else(|e| panic!("failed: {e}"));
665
666 assert_eq!(expr.as_str(), "^((?:-?\\d+)|(?:\\d+))$");
667 assert!(expr.is_match("123"));
668 assert!(expr.is_match("-123"));
669 assert!(!expr.is_match("+123"));
670 assert!(!expr.is_match("123."));
671 }
672
673 #[test]
674 fn parameter_float() {
675 let expr = Expression::regex("{float}")
676 .unwrap_or_else(|e| panic!("failed: {e}"));
677
678 assert_eq!(
679 expr.as_str(),
680 "^([+-]?(?:inf\
681 |NaN\
682 |(?:\\d+|\\d+\\.\\d*|\\d*\\.\\d+)(?:[eE][+-]?\\d+)?\
683 ))$",
684 );
685 assert!(expr.is_match("+1"));
686 assert!(expr.is_match(".1"));
687 assert!(expr.is_match("-.1"));
688 assert!(expr.is_match("-1."));
689 assert!(expr.is_match("-1.1E+1"));
690 assert!(expr.is_match("-inf"));
691 assert!(expr.is_match("NaN"));
692 }
693
694 #[test]
695 fn parameter_word() {
696 let expr = Expression::regex("{word}")
697 .unwrap_or_else(|e| panic!("failed: {e}"));
698
699 assert_eq!(expr.as_str(), "^([^\\s]+)$");
700 assert!(expr.is_match("test"));
701 assert!(expr.is_match("\"test\""));
702 assert!(!expr.is_match("with space"));
703 }
704
705 #[test]
706 fn parameter_string() {
707 let expr = Expression::regex("{string}")
708 .unwrap_or_else(|e| panic!("failed: {e}"));
709
710 assert_eq!(
711 expr.as_str(),
712 "^(?:\
713 \"(?P<__0_0>[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"\
714 |'(?P<__0_1>[^'\\\\]*(?:\\\\.[^'\\\\]*)*)'\
715 )$",
716 );
717 assert!(expr.is_match("\"\""));
718 assert!(expr.is_match("''"));
719 assert!(expr.is_match("'with \"'"));
720 assert!(expr.is_match("\"with '\""));
721 assert!(expr.is_match("\"with \\\" escaped\""));
722 assert!(expr.is_match("'with \\' escaped'"));
723 assert!(!expr.is_match("word"));
724 }
725
726 #[test]
727 fn multiple_string_parameters() {
728 let expr = Expression::regex("{string} {string}")
729 .unwrap_or_else(|e| panic!("failed: {e}"));
730
731 assert_eq!(
732 expr.as_str(),
733 "^(?:\
734 \"(?P<__0_0>[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"\
735 |'(?P<__0_1>[^'\\\\]*(?:\\\\.[^'\\\\]*)*)'\
736 ) (?:\
737 \"(?P<__1_0>[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"\
738 |'(?P<__1_1>[^'\\\\]*(?:\\\\.[^'\\\\]*)*)'\
739 )$",
740 );
741 assert!(expr.is_match("\"\" ''"));
742 assert!(expr.is_match("'' \"\""));
743 assert!(expr.is_match("'with \"' \"\""));
744 assert!(expr.is_match("\"with '\" '\"'"));
745 assert!(expr.is_match("\"with \\\" escaped\" 'with \\' escaped'"));
746 assert!(expr.is_match("'with \\' escaped' \"with \\\" escaped\""));
747 }
748
749 #[test]
750 fn parameter_all() {
751 let expr =
752 Expression::regex("{}").unwrap_or_else(|e| panic!("failed: {e}"));
753
754 assert_eq!(expr.as_str(), "^(.*)$");
755 assert!(expr.is_match("anything matches"));
756 }
757
758 #[test]
759 fn text() {
760 let expr =
761 Expression::regex("a").unwrap_or_else(|e| panic!("failed: {e}"));
762
763 assert_eq!(expr.as_str(), "^a$");
764 assert!(expr.is_match("a"));
765 assert!(!expr.is_match("b"));
766 assert!(!expr.is_match("ab"));
767 }
768
769 #[test]
770 fn unicode() {
771 let expr = Expression::regex("Привет, Мир(ы)!")
772 .unwrap_or_else(|e| panic!("failed: {e}"));
773
774 assert_eq!(expr.as_str(), "^Привет, Мир(?:ы)?!$");
775 assert!(expr.is_match("Привет, Мир!"));
776 assert!(expr.is_match("Привет, Миры!"));
777 assert!(!expr.is_match("Hello world"));
778 }
779
780 #[test]
781 fn unknown_parameter() {
782 match Expression::regex("{custom}").unwrap_err() {
783 Error::Expansion(ParameterError::NotFound(not_found)) => {
784 assert_eq!(*not_found, "custom");
785 }
786 e @ (Error::Parsing(_) | Error::Regex(_) | Error::Expansion(_)) => {
787 panic!("wrong err: {e}");
788 }
789 }
790 }
791}