sequoia_openpgp/regex/
mod.rs

1//! OpenPGP regex parser.
2//!
3//! OpenPGP defines a [regular expression language].  It is used with
4//! [trust signatures] to scope the trust that they extend.
5//!
6//!   [regular expression language]: https://www.rfc-editor.org/rfc/rfc9580.html#section-8
7//!   [trust signatures]: https://www.rfc-editor.org/rfc/rfc9580.html#section-5.2.3.21
8//!
9//! Compared with most regular expression languages, OpenPGP's is
10//! quite simple.  In particular, it only includes the following
11//! features:
12//!
13//!   - Alternations using `|`,
14//!   - Grouping using `(` and `)`,
15//!   - The `*`, `+`, and `?` glob operators,
16//!   - The `^`, and `$` anchors,
17//!   - The '.' operator, positive *non-empty* ranges
18//!     (e.g. `[a-zA-Z]`) and negative *non-empty* ranges (`[^@]`), and
19//!   - The backslash operator to escape special characters (except
20//!     in ranges).
21//!
22//! The regular expression engine defined in this module implements
23//! that language with two differences.  The first difference is that
24//! the compiler only works on UTF-8 strings (not bytes).  The second
25//! difference is that ranges in character classes are between UTF-8
26//! characters, not just ASCII characters.
27//!
28//! # Data Structures
29//!
30//! This module defines two data structures.  [`Regex`] encapsulates a
31//! valid regular expression, and provides methods to check whether
32//! the regular expression matches a string or a [`UserID`].
33//! [`RegexSet`] is similar, but encapsulates zero or more regular
34//! expressions, which may or may not be valid.  Its match methods
35//! return `true` if there are no regular expressions, or, if there is
36//! at least one regular expression, they return whether at least one
37//! of the regular expressions matches it.  `RegexSet`'s matcher
38//! handles invalid regular expressions by considering them to be
39//! regular expressions that don't match anything.  These semantics
40//! are consistent with a trust signature's scoping rules.  Further,
41//! strings that contain control characters never match.  This
42//! behavior can be overridden using [`Regex::disable_sanitizations`]
43//! and [`RegexSet::disable_sanitizations`].
44//!
45//!   [`UserID`]: crate::packet::UserID
46//!   [`Regex::disable_sanitizations`]: Regex::disable_sanitizations()
47//!   [`RegexSet::disable_sanitizations`]: RegexSet::disable_sanitizations()
48//!
49//! # Scoped Trust Signatures
50//!
51//! To create a trust signature, you create a signature whose [type]
52//! is either [GenericCertification], [PersonaCertification],
53//! [CasualCertification], or [PositiveCertification], and add a
54//! [Trust Signature] subpacket using, for instance, the
55//! [`SignatureBuilder::set_trust_signature`] method.
56//!
57//!   [type]: https://www.rfc-editor.org/rfc/rfc9580.html#section-5.2.1
58//!   [GenericCertification]: crate::types::SignatureType::GenericCertification
59//!   [PersonaCertification]: crate::types::SignatureType::PersonaCertification
60//!   [CasualCertification]: crate::types::SignatureType::CasualCertification
61//!   [PositiveCertification]: crate::types::SignatureType::PositiveCertification
62//!   [Trust Signature]: https://www.rfc-editor.org/rfc/rfc9580.html#section-5.2.3.21
63//!   [`SignatureBuilder::set_trust_signature`]: crate::packet::signature::SignatureBuilder::set_trust_signature()
64//!
65//! To scope a trust signature, you add a [Regular Expression
66//! subpacket] to it using
67//! [`SignatureBuilder::set_regular_expression`] or
68//! [`SignatureBuilder::add_regular_expression`].
69//!
70//! To extract any regular expressions, you can use
71//! [`SubpacketAreas::regular_expressions`].
72//!
73//!   [Regular Expression subpacket]: https://www.rfc-editor.org/rfc/rfc9580.html#section-5.2.3.22
74//!   [`SignatureBuilder::set_regular_expression`]: crate::packet::signature::SignatureBuilder::set_regular_expression()
75//!   [`SignatureBuilder::add_regular_expression`]: crate::packet::signature::SignatureBuilder::add_regular_expression()
76//!   [`SubpacketAreas::regular_expressions`]: crate::packet::signature::subpacket::SubpacketAreas::regular_expressions()
77//!
78//! # Caveat Emptor
79//!
80//! Note: GnuPG has [very limited regular expression support].  In
81//! particular, it only recognizes regular expressions with the
82//! following form:
83//!
84//!   [very limited regular expression support]: https://dev.gnupg.org/source/gnupg/browse/master/g10/trustdb.c;15e065dee891eef9545556f210b4199107999869$1558
85//!
86//! ```text
87//! <[^>]+[@.]example\.com>$
88//! ```
89//!
90//! Further, it escapes any operators between the `<[^>]+[@.]` and the
91//! `>$` except `.` and `\`.  Otherwise, GnuPG treats the regular
92//! expression as a literal domain (e.g., `example.com`).
93//!
94//! Further, until [version 2.2.22] (released in August 2020), GnuPG
95//! did not support regular expressions on Windows, and other systems
96//! that don't include `regcomp`.  On these systems, if a trust
97//! signature included a regular expression, GnuPG conservatively
98//! considered the whole trust signature to match nothing.
99//!
100//!   [version 2.2.22]: https://dev.gnupg.org/T5030
101//!
102//! # Examples
103//!
104//! A CA signs two certificates, one for Alice, who works at
105//! `example.com`, and one for Bob, who is associated with `some.org`.
106//! Carol then creates a trust signature for the CA, which she scopes
107//! to `example.org` and `example.com`.  We then confirm that Carol
108//! can use the CA to authenticate Alice, but not Bob.
109//!
110//! ```
111//! use sequoia_openpgp as openpgp;
112//! use openpgp::cert::prelude::*;
113//! use openpgp::packet::prelude::*;
114//! use openpgp::policy::StandardPolicy;
115//! use openpgp::regex::RegexSet;
116//! use openpgp::types::SignatureType;
117//!
118//! # fn main() -> openpgp::Result<()> {
119//! let p = &StandardPolicy::new();
120//!
121//! let (ca, _)
122//!     = CertBuilder::general_purpose(Some("OpenPGP CA <openpgp-ca@example.com>"))
123//!         .generate()?;
124//! let mut ca_signer = ca.primary_key().key().clone()
125//!     .parts_into_secret()?.into_keypair()?;
126//! let ca_userid = ca.with_policy(p, None)?
127//!     .userids().nth(0).expect("Added a User ID").userid();
128//!
129//! // The CA certifies "Alice <alice@example.com>".
130//! let (alice, _)
131//!     = CertBuilder::general_purpose(Some("Alice <alice@example.com>"))
132//!         .generate()?;
133//! let alice_userid = alice.with_policy(p, None)?
134//!     .userids().nth(0).expect("Added a User ID").userid();
135//! let alice_certification = SignatureBuilder::new(SignatureType::GenericCertification)
136//!     .sign_userid_binding(
137//!         &mut ca_signer,
138//!         alice.primary_key().component(),
139//!         alice_userid)?;
140//! let alice = alice.insert_packets(alice_certification.clone())?.0;
141//! # assert!(alice.clone().into_packets().any(|p| {
142//! #   match p {
143//! #       Packet::Signature(sig) => sig == alice_certification,
144//! #       _ => false,
145//! #   }
146//! # }));
147//!
148//! // The CA certifies "Bob <bob@some.org>".
149//! let (bob, _)
150//!     = CertBuilder::general_purpose(Some("Bob <bob@some.org>"))
151//!         .generate()?;
152//! let bob_userid = bob.with_policy(p, None)?
153//!     .userids().nth(0).expect("Added a User ID").userid();
154//! let bob_certification = SignatureBuilder::new(SignatureType::GenericCertification)
155//!     .sign_userid_binding(
156//!         &mut ca_signer,
157//!         bob.primary_key().component(),
158//!         bob_userid)?;
159//! let bob = bob.insert_packets(bob_certification.clone())?.0;
160//! # assert!(bob.clone().into_packets().any(|p| {
161//! #   match p {
162//! #       Packet::Signature(sig) => sig == bob_certification,
163//! #       _ => false,
164//! #   }
165//! # }));
166//!
167//!
168//! // Carol tsigns the CA's certificate.
169//! let (carol, _)
170//!     = CertBuilder::general_purpose(Some("Carol <carol@another.net>"))
171//!         .generate()?;
172//! let mut carol_signer = carol.primary_key().key().clone()
173//!     .parts_into_secret()?.into_keypair()?;
174//!
175//! let ca_tsig = SignatureBuilder::new(SignatureType::GenericCertification)
176//!     .set_trust_signature(2, 120)?
177//!     .set_regular_expression("<[^>]+[@.]example\\.org>$")?
178//!     .add_regular_expression("<[^>]+[@.]example\\.com>$")?
179//!     .sign_userid_binding(
180//!         &mut carol_signer,
181//!         ca.primary_key().component(),
182//!         ca_userid)?;
183//! let ca = ca.insert_packets(ca_tsig.clone())?.0;
184//! # assert!(ca.clone().into_packets().any(|p| {
185//! #   match p {
186//! #       Packet::Signature(sig) => sig == ca_tsig,
187//! #       _ => false,
188//! #   }
189//! # }));
190//!
191//!
192//! // Carol now tries to authenticate Alice and Bob's certificates
193//! // using the CA as a trusted introducer based on `ca_tsig`.
194//! let res = RegexSet::from_signature(&ca_tsig)?;
195//!
196//! // Should be able to authenticate Alice.
197//! let alice_ua = alice.with_policy(p, None)?
198//!     .userids().nth(0).expect("Added a User ID");
199//! # assert!(res.matches_userid(alice_ua.userid()));
200//! let mut authenticated = false;
201//! for c in alice_ua.certifications() {
202//!     if c.get_issuers().into_iter().any(|h| h.aliases(ca.key_handle())) {
203//!         if c.clone().verify_userid_binding(
204//!             ca.primary_key().key(),
205//!             alice.primary_key().key(),
206//!             alice_ua.userid()).is_ok()
207//!         {
208//!             authenticated |= res.matches_userid(alice_ua.userid());
209//!         }
210//!     }
211//! }
212//! assert!(authenticated);
213//!
214//! // But, although the CA has certified Bob's key, Carol doesn't rely
215//! // on it, because Bob's email address ("bob@some.org") is out of
216//! // scope (some.org, not example.com).
217//! let bob_ua = bob.with_policy(p, None)?
218//!     .userids().nth(0).expect("Added a User ID");
219//! # assert!(! res.matches_userid(bob_ua.userid()));
220//! let mut have_certification = false;
221//! let mut authenticated = false;
222//! for c in bob_ua.certifications() {
223//!     if c.get_issuers().into_iter().any(|h| h.aliases(ca.key_handle())) {
224//!         if c.clone().verify_userid_binding(
225//!             ca.primary_key().key(),
226//!             bob.primary_key().key(),
227//!             bob_ua.userid()).is_ok()
228//!         {
229//!             have_certification = true;
230//!             authenticated |= res.matches_userid(bob_ua.userid());
231//!         }
232//!     }
233//! }
234//! assert!(have_certification);
235//! assert!(! authenticated);
236//! # Ok(()) }
237//! ```
238
239use std::borrow::Borrow;
240use std::fmt;
241
242use lalrpop_util::ParseError;
243use regex_syntax::hir::{self, Hir};
244
245use crate::Error;
246use crate::Result;
247use crate::packet::prelude::*;
248use crate::types::SignatureType;
249
250pub(crate) mod lexer;
251lalrpop_util::lalrpop_mod!(
252    #[allow(clippy::all)]
253    #[allow(unused_parens)]
254    grammar,
255    "/regex/grammar.rs"
256);
257
258pub(crate) use self::lexer::Token;
259pub(crate) use self::lexer::{Lexer, LexicalError};
260
261const TRACE: bool = false;
262
263// Convert tokens into strings.
264//
265// Unfortunately, we can't implement From, because we don't define
266// ParseError in this crate.
267pub(crate) fn parse_error_downcast(e: ParseError<usize, Token, LexicalError>)
268    -> ParseError<usize, String, LexicalError>
269{
270    match e {
271        ParseError::UnrecognizedToken {
272            token: (start, t, end),
273            expected,
274        } => ParseError::UnrecognizedToken {
275            token: (start, t.into(), end),
276            expected,
277        },
278
279        ParseError::ExtraToken {
280            token: (start, t, end),
281        } => ParseError::ExtraToken {
282            token: (start, t.into(), end),
283        },
284
285        ParseError::InvalidToken { location }
286        => ParseError::InvalidToken { location },
287
288        ParseError::User { error }
289        => ParseError::User { error },
290
291        ParseError::UnrecognizedEof { location, expected }
292        => ParseError::UnrecognizedEof { location, expected },
293    }
294}
295
296// Used by grammar.lalrpop to generate a regex class (e.g. '[a-ce]').
297fn generate_class(caret: bool, chars: impl Iterator<Item=char>) -> Hir
298{
299    tracer!(TRACE, "generate_class");
300
301    // Dealing with ranges is a bit tricky.  We need to examine three
302    // tokens.  If the middle one is a dash, it's a range.
303
304    let chars: Vec<Option<char>> = chars
305        // Pad it out so what we can use windows to get three
306        // characters at a time, and be sure to process all
307        // characters.
308        .map(Some)
309        .chain(std::iter::once(None))
310        .chain(std::iter::once(None))
311        .collect();
312    if chars.len() == 2 {
313        // The grammar doesn't allow an empty class.
314        unreachable!();
315    } else {
316        let r = chars
317            .windows(3)
318            .scan(0,
319                  |skip: &mut usize, x: &[Option<char>]|
320                      // Scan stops if the result is None.
321                      // filter_map keeps only those elements that
322                      // are Some.
323                      -> Option<Option<hir::ClassUnicodeRange>>
324                  {
325                      if *skip > 0 {
326                          *skip -= 1;
327                          t!("Skipping: {:?} (skip now: {})", x, skip);
328                          Some(None)
329                      } else {
330                          match (x[0], x[1], x[2]) {
331                              (Some(a), Some('-'), Some(c)) => {
332                                  // We've got a real range.
333                                  *skip = 2;
334                                  t!("range for '{}-{}'", a, c);
335                                  Some(Some(hir::ClassUnicodeRange::new(a, c)))
336                              }
337                              (Some(a), _, _) => {
338                                  t!("range for '{}'", a);
339                                  Some(Some(hir::ClassUnicodeRange::new(a, a)))
340                              }
341                              (None, _, _) => unreachable!(),
342                          }
343                      }
344                  })
345            .flatten();
346        let mut class = hir::Class::Unicode(hir::ClassUnicode::new(r));
347        if caret {
348            class.negate();
349        }
350        Hir::class(class)
351    }
352}
353
354/// A compiled OpenPGP regular expression for matching UTF-8 encoded
355/// strings.
356///
357/// A `Regex` contains a regular expression compiled according to the
358/// rules defined in [Section 8 of RFC 9580] modulo two differences.
359/// First, the compiler only works on UTF-8 strings (not bytes).
360/// Second, ranges in character classes are between UTF-8 characters,
361/// not just ASCII characters.  Further, by default, strings that
362/// don't pass a sanity check (in particular, include Unicode control
363/// characters) never match.  This behavior can be customized using
364/// [`Regex::disable_sanitizations`].
365///
366///   [Section 8 of RFC 9580]: https://www.rfc-editor.org/rfc/rfc9580.html#section-8
367///   [trust signatures]: https://www.rfc-editor.org/rfc/rfc9580.html#section-5.2.3.21
368///   [`Regex::disable_sanitizations`]: Regex::disable_sanitizations()
369///
370/// Regular expressions are used to scope the trust that [trust
371/// signatures] extend.
372///
373/// When working with trust signatures, you'll usually want to use the
374/// [`RegexSet`] data structure, which already implements the correct
375/// semantics.
376///
377///
378/// See the [module-level documentation] for more details.
379///
380///   [module-level documentation]: self
381///
382/// # A note on equality
383///
384/// We define equality on `Regex` as the equality of the uncompiled
385/// regular expression given to the constructor and whether
386/// sanitizations are enabled.
387#[derive(Clone, Debug)]
388pub struct Regex {
389    /// The original regular expression.
390    ///
391    /// Equality is defined using this and `disable_sanitizations`.
392    re: String,
393    regex: regex::Regex,
394    disable_sanitizations: bool,
395}
396assert_send_and_sync!(Regex);
397
398impl PartialEq for Regex {
399    fn eq(&self, other: &Self) -> bool {
400        self.re == other.re
401            && self.disable_sanitizations == other.disable_sanitizations
402    }
403}
404
405impl Eq for Regex {}
406
407impl Regex {
408    /// Parses and compiles the regular expression.
409    ///
410    /// By default, strings that don't pass a sanity check (in
411    /// particular, include Unicode control characters) never match.
412    /// This behavior can be customized using
413    /// [`Regex::disable_sanitizations`].
414    ///
415    ///   [`Regex::disable_sanitizations`]: Regex::disable_sanitizations()
416    pub fn new(re: &str) -> Result<Self>
417    {
418        let lexer = Lexer::new(re);
419        let hir = match grammar::RegexParser::new().parse(re, lexer) {
420            Ok(hir) => hir,
421            Err(err) => return Err(parse_error_downcast(err).into()),
422        };
423
424        // Converting the Hir to a string and the compiling that is
425        // apparently the canonical way to convert a Hir to a Regex
426        // (at least it is what rip-grep does), which the author of
427        // regex also wrote.  See
428        // ripgrep/crates/regex/src/config.rs:ConfiguredHir::regex.
429        let regex = regex::RegexBuilder::new(&hir.to_string())
430            .build()?;
431
432        Ok(Self {
433            re: re.into(),
434            regex,
435            disable_sanitizations: false,
436        })
437    }
438
439    /// Parses and compiles the regular expression.
440    ///
441    /// Returns an error if `re` is not a valid UTF-8 string.
442    ///
443    /// By default, strings that don't pass a sanity check (in
444    /// particular, include Unicode control characters) never match.
445    /// This behavior can be customized using
446    /// [`Regex::disable_sanitizations`].
447    ///
448    ///   [`Regex::disable_sanitizations`]: Regex::disable_sanitizations()
449    pub fn from_bytes(re: &[u8]) -> Result<Self> {
450        Self::new(std::str::from_utf8(re)?)
451    }
452
453    /// Returns the string-representation of the regular expression.
454    pub fn as_str(&self) -> &str {
455        &self.re
456    }
457
458    /// Controls whether matched strings must pass a sanity check.
459    ///
460    /// If `false` (the default), i.e., sanity checks are enabled, and
461    /// the string doesn't pass the sanity check (in particular, it
462    /// contains a Unicode control character according to
463    /// [`char::is_control`], including newlines and an embedded `NUL`
464    /// byte), this returns `false`.
465    ///
466    ///   [`char::is_control`]: https://doc.rust-lang.org/std/primitive.char.html#method.is_control
467    pub fn disable_sanitizations(&mut self, disabled: bool) {
468        self.disable_sanitizations = disabled;
469    }
470
471    /// Returns whether the regular expression matches the string.
472    ///
473    /// If sanity checks are enabled (the default) and the string
474    /// doesn't pass the sanity check (in particular, it contains a
475    /// Unicode control character according to [`char::is_control`],
476    /// including newlines and an embedded `NUL` byte), this returns
477    /// `false`.
478    ///
479    ///   [`char::is_control`]: https://doc.rust-lang.org/std/primitive.char.html#method.is_control
480    pub fn is_match(&self, s: &str) -> bool {
481        if ! self.disable_sanitizations && s.chars().any(char::is_control) {
482            return false;
483        }
484
485        self.is_match_clean(s)
486    }
487
488    // is_match, but without the sanity check.
489    fn is_match_clean(&self, s: &str) -> bool {
490        self.regex.is_match(s)
491    }
492
493    /// Returns whether the regular expression matches the User ID.
494    ///
495    /// If the User ID is not a valid UTF-8 string, this returns
496    /// `false`.
497    ///
498    /// If sanity checks are enabled (the default) and the string
499    /// doesn't pass the sanity check (in particular, it contains a
500    /// Unicode control character according to [`char::is_control`],
501    /// including newlines and an embedded `NUL` byte), this returns
502    /// `false`.
503    ///
504    ///   [`char::is_control`]: https://doc.rust-lang.org/std/primitive.char.html#method.is_control
505    pub fn matches_userid(&self, u: &UserID) -> bool {
506        if let Ok(u) = std::str::from_utf8(u.value()) {
507            self.is_match(u)
508        } else {
509            false
510        }
511    }
512}
513
514#[derive(Clone, Debug)]
515enum RegexSet_ {
516    Regex(Regex),
517    Invalid,
518    Everything,
519}
520assert_send_and_sync!(RegexSet_);
521
522/// A set of regular expressions.
523///
524/// A `RegexSet` encapsulates a set of regular expressions.  The
525/// regular expressions are compiled according to the rules defined in
526/// [Section 8 of RFC 9580] modulo two differences.  First, the
527/// compiler only works on UTF-8 strings (not bytes).  Second, ranges
528/// in character classes are between UTF-8 characters, not just ASCII
529/// characters.  Further, by default, strings that don't pass a sanity
530/// check (in particular, include Unicode control characters) never
531/// match.  This behavior can be customized using
532/// [`RegexSet::disable_sanitizations`].
533///
534///   [Section 8 of RFC 9580]: https://www.rfc-editor.org/rfc/rfc9580.html#section-8
535///   [`RegexSet::disable_sanitizations`]: RegexSet::disable_sanitizations()
536///
537/// `RegexSet` implements the semantics of [regular expression]s used
538/// in [Trust Signatures].  In particular, a `RegexSet` makes it
539/// easier to deal with trust signatures that:
540///
541///   - Contain multiple Regular Expression subpackts,
542///   - Have no Regular Expression subpackets, and/or
543///   - Include one or more Regular Expression subpackets that are invalid.
544///
545///   [regular expressions]: https://www.rfc-editor.org/rfc/rfc9580.html#section-5.2.3.22
546///   [Trust Signatures]: https://www.rfc-editor.org/rfc/rfc9580.html#section-5.2.3.21
547///
548/// `RegexSet` compiles each regular expression individually.  If
549/// there are no regular expressions, the `RegexSet` matches
550/// everything.  If a regular expression is invalid, `RegexSet` treats
551/// it as if it doesn't match anything.  Thus, if all regular
552/// expressions are invalid, the `RegexSet` matches nothing (not
553/// everything!).
554///
555/// See the [module-level documentation] for more details.
556///
557///   [module-level documentation]: self
558///
559/// # A note on equality
560///
561/// We define equality on `RegexSet` as the equality of the uncompiled
562/// regular expressions given to the constructor and whether
563/// sanitizations are enabled.
564#[derive(Clone)]
565pub struct RegexSet {
566    /// The original regular expressions.
567    ///
568    /// Equality is defined using this and `disable_sanitizations`.
569    re_bytes: Vec<Vec<u8>>,
570    re_set: RegexSet_,
571    disable_sanitizations: bool,
572}
573assert_send_and_sync!(RegexSet);
574
575impl PartialEq for RegexSet {
576    fn eq(&self, other: &Self) -> bool {
577        self.re_bytes == other.re_bytes
578            && self.disable_sanitizations == other.disable_sanitizations
579    }
580}
581
582impl Eq for RegexSet {}
583
584impl fmt::Debug for RegexSet {
585    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
586        let mut d = f.debug_struct("RegexSet");
587        match self.re_set {
588            RegexSet_::Everything => {
589                d.field("regex", &"<Everything>")
590            }
591            RegexSet_::Invalid => {
592                d.field("regex", &"<Invalid>")
593            }
594            RegexSet_::Regex(ref r) => {
595                d.field("regex", &r.regex)
596            }
597        }
598        .field("sanitizations", &!self.disable_sanitizations)
599            .finish()
600    }
601}
602
603impl RegexSet {
604    /// Parses and compiles the regular expressions.
605    ///
606    /// Invalid regular expressions do not cause this to fail.  See
607    /// [`RegexSet`]'s top-level documentation for details.
608    ///
609    ///
610    /// By default, strings that don't pass a sanity check (in
611    /// particular, include Unicode control characters) never match.
612    /// This behavior can be customized using
613    /// [`RegexSet::disable_sanitizations`].
614    ///
615    ///   [`RegexSet::disable_sanitizations`]: RegexSet::disable_sanitizations()
616    ///
617    /// # Examples
618    ///
619    /// ```
620    /// use sequoia_openpgp as openpgp;
621    /// use openpgp::regex::RegexSet;
622    ///
623    /// # fn main() -> openpgp::Result<()> {
624    /// // Extract the regex and compile it.
625    /// let res = &[
626    ///     "<[^>]+[@.]example\\.org>$",
627    ///     // Invalid.
628    ///     "[..",
629    /// ];
630    ///
631    /// let res = RegexSet::new(res)?;
632    ///
633    /// assert!(res.is_match("Alice <alice@example.org>"));
634    /// assert!(! res.is_match("Bob <bob@example.com>"));
635    /// # Ok(()) }
636    /// ```
637    pub fn new<'a, RE, I>(res: I) -> Result<Self>
638    where RE: Borrow<&'a str>,
639          I: IntoIterator<Item=RE>,
640    {
641        tracer!(TRACE, "RegexSet::new");
642
643        let mut regexes = Vec::with_capacity(2);
644        let mut had_good = false;
645        let mut had_bad = false;
646
647        let mut re_bytes = Vec::new();
648        for re in res {
649            let re = re.borrow();
650            re_bytes.push(re.as_bytes().into());
651
652            let lexer = Lexer::new(re);
653            match grammar::RegexParser::new().parse(re, lexer) {
654                Ok(hir) => {
655                    had_good = true;
656                    regexes.push(hir);
657                }
658                Err(err) => {
659                    had_bad = true;
660                    t!("Compiling {:?}: {}", re, err);
661                }
662            }
663        }
664
665        if had_bad && ! had_good {
666            t!("All regular expressions were invalid.");
667            Ok(RegexSet {
668                re_bytes,
669                re_set: RegexSet_::Invalid,
670                disable_sanitizations: false,
671            })
672        } else if ! had_bad && ! had_good {
673            // Match everything.
674            t!("No regular expressions provided.");
675            Ok(RegexSet {
676                re_bytes,
677                re_set: RegexSet_::Everything,
678                disable_sanitizations: false,
679            })
680        } else {
681            // Match any of the regular expressions.
682            Ok(RegexSet {
683                re_bytes,
684                re_set: RegexSet_::Regex(
685                    Regex {
686                        re: String::new(),
687                        regex: regex::RegexBuilder::new(
688                            &Hir::alternation(regexes).to_string())
689                            .build()?,
690                        disable_sanitizations: false,
691                    }),
692                disable_sanitizations: false,
693            })
694        }
695    }
696
697    /// Parses and compiles the regular expressions.
698    ///
699    /// The regular expressions are first converted to UTF-8 strings.
700    /// Byte sequences that are not valid UTF-8 strings are considered
701    /// to be invalid regular expressions.  Invalid regular
702    /// expressions do not cause this to fail.  See [`RegexSet`]'s
703    /// top-level documentation for details.
704    ///
705    ///
706    /// By default, strings that don't pass a sanity check (in
707    /// particular, include Unicode control characters) never match.
708    /// This behavior can be customized using
709    /// [`RegexSet::disable_sanitizations`].
710    ///
711    ///   [`RegexSet::disable_sanitizations`]: RegexSet::disable_sanitizations()
712    ///
713    /// # Examples
714    ///
715    /// ```
716    /// use sequoia_openpgp as openpgp;
717    /// use openpgp::regex::RegexSet;
718    ///
719    /// # fn main() -> openpgp::Result<()> {
720    /// // A valid and an invalid UTF-8 byte sequence.  The invalid
721    /// // sequence doesn't match anything.  But, that doesn't impact
722    /// // the other regular expressions.
723    /// let res: &[ &[u8] ] = &[
724    ///     &b"<[^>]+[@.]example\\.org>$"[..],
725    ///     // Invalid UTF-8.
726    ///     &b"\xC3\x28"[..],
727    /// ];
728    /// assert!(std::str::from_utf8(res[0]).is_ok());
729    /// assert!(std::str::from_utf8(res[1]).is_err());
730    ///
731    /// let re_set = RegexSet::from_bytes(res.into_iter())?;
732    ///
733    /// assert!(re_set.is_match("Alice <alice@example.org>"));
734    /// assert!(! re_set.is_match("Bob <bob@example.com>"));
735    ///
736    /// // If we only have invalid UTF-8 strings, then nothing
737    /// // matches.
738    /// let res: &[ &[u8] ] = &[
739    ///     // Invalid UTF-8.
740    ///     &b"\xC3\x28"[..],
741    /// ];
742    /// assert!(std::str::from_utf8(res[0]).is_err());
743    ///
744    /// let re_set = RegexSet::from_bytes(res.into_iter())?;
745    ///
746    /// assert!(! re_set.is_match("Alice <alice@example.org>"));
747    /// assert!(! re_set.is_match("Bob <bob@example.com>"));
748    ///
749    ///
750    /// // But, if we have no regular expressions, everything matches.
751    /// let res: &[ &[u8] ] = &[];
752    /// let re_set = RegexSet::from_bytes(res.into_iter())?;
753    ///
754    /// assert!(re_set.is_match("Alice <alice@example.org>"));
755    /// assert!(re_set.is_match("Bob <bob@example.com>"));
756    /// # Ok(()) }
757    /// ```
758    pub fn from_bytes<'a, I, RE>(res: I) -> Result<Self>
759    where I: IntoIterator<Item=RE>,
760          RE: Borrow<&'a [u8]>,
761    {
762        let mut have_valid_utf8 = false;
763        let mut have_invalid_utf8 = false;
764        let mut re_bytes = Vec::new();
765        let re_set = Self::new(
766            res
767                .into_iter()
768                .scan((&mut have_valid_utf8, &mut have_invalid_utf8),
769                      |(valid, invalid), re|
770                      {
771                          re_bytes.push(re.borrow().to_vec());
772                          if let Ok(re) = std::str::from_utf8(re.borrow()) {
773                              **valid = true;
774                              Some(Some(re))
775                          } else {
776                              **invalid = true;
777                              Some(None)
778                          }
779                      })
780                .flatten());
781
782        if !have_valid_utf8 && have_invalid_utf8 {
783            // None of the strings were valid UTF-8.  Reject
784            // everything.
785            Ok(RegexSet {
786                re_bytes,
787                re_set: RegexSet_::Invalid,
788                disable_sanitizations: false,
789            })
790        } else {
791            // We had nothing or at least one string was valid UTF-8.
792            // RegexSet::new did the right thing.
793            re_set.map(|mut r| { r.re_bytes = re_bytes; r })
794        }
795    }
796
797    /// Returns the bytes-representation of the regular expressions.
798    pub fn as_bytes(&self) -> &[Vec<u8>] {
799        &self.re_bytes
800    }
801
802    /// Creates a `RegexSet` from the regular expressions stored in a
803    /// trust signature.
804    ///
805    /// This method is a convenience function, which extracts any
806    /// regular expressions from a [Trust Signature] and wraps them in a
807    /// `RegexSet`.
808    ///
809    ///   [Trust Signature]: https://www.rfc-editor.org/rfc/rfc9580.html#section-5.2.3.21
810    ///
811    /// If the signature is not a valid trust signature (its [type] is
812    /// [GenericCertification], [PersonaCertification],
813    /// [CasualCertification], or [PositiveCertification], and the
814    /// [Trust Signature] subpacket is present), this returns an
815    /// error.
816    ///
817    ///   [type]: https://www.rfc-editor.org/rfc/rfc9580.html#section-5.2.1
818    ///   [GenericCertification]: crate::types::SignatureType::GenericCertification
819    ///   [PersonaCertification]: crate::types::SignatureType::PersonaCertification
820    ///   [CasualCertification]: crate::types::SignatureType::CasualCertification
821    ///   [PositiveCertification]: crate::types::SignatureType::PositiveCertification
822    ///
823    /// By default, strings that don't pass a sanity check (in
824    /// particular, include Unicode control characters) never match.
825    /// This behavior can be customized using
826    /// [`RegexSet::disable_sanitizations`].
827    ///
828    ///   [`RegexSet::disable_sanitizations`]: RegexSet::disable_sanitizations()
829    ///
830    /// # Examples
831    ///
832    /// ```
833    /// use sequoia_openpgp as openpgp;
834    /// # use openpgp::cert::prelude::*;
835    /// # use openpgp::packet::prelude::*;
836    /// # use openpgp::policy::StandardPolicy;
837    /// use openpgp::regex::RegexSet;
838    /// # use openpgp::types::SignatureType;
839    /// #
840    /// # fn main() -> openpgp::Result<()> {
841    /// # let p = &StandardPolicy::new();
842    /// #
843    /// # let (alice, _)
844    /// #     = CertBuilder::general_purpose(Some("Alice <alice@example.org>"))
845    /// #         .generate()?;
846    /// # let mut alices_signer = alice.primary_key().key().clone()
847    /// #     .parts_into_secret()?.into_keypair()?;
848    /// #
849    /// # let (example_com, _)
850    /// #     = CertBuilder::general_purpose(Some("OpenPGP CA <openpgp-ca@example.com>"))
851    /// #         .generate()?;
852    /// # let example_com_userid = example_com.with_policy(p, None)?
853    /// #     .userids().nth(0).expect("Added a User ID").userid();
854    /// #
855    /// # let certification = SignatureBuilder::new(SignatureType::GenericCertification)
856    /// #     .set_trust_signature(1, 120)?
857    /// #     .set_regular_expression("<[^>]+[@.]example\\.org>$")?
858    /// #     .add_regular_expression("<[^>]+[@.]example\\.com>$")?
859    /// #     .sign_userid_binding(
860    /// #         &mut alices_signer,
861    /// #         example_com.primary_key().component(),
862    /// #         example_com_userid)?;
863    ///
864    /// // certification is a trust signature, which contains two regular
865    /// // expressions: one that matches all mail addresses for 'example.org'
866    /// // and another that matches all mail addresses for 'example.com'.
867    /// let certification: &Signature = // ...;
868    /// # &certification;
869    ///
870    /// // Extract the regex and compile it.
871    /// let res = RegexSet::from_signature(certification)?;
872    ///
873    /// // Some positive examples.
874    /// assert!(res.is_match("Alice <alice@example.org>"));
875    /// assert!(res.is_match("Bob <bob@example.com>"));
876    ///
877    /// // Wrong domain.
878    /// assert!(! res.is_match("Carol <carol@acme.com>"));
879    ///
880    /// // The standard regex, "<[^>]+[@.]example\\.org>$" only matches
881    /// // email addresses wrapped in <>.
882    /// assert!(! res.is_match("dave@example.com"));
883    ///
884    /// // And, it is case-sensitive.
885    /// assert!(res.is_match("Ellen <ellen@example.com>"));
886    /// assert!(! res.is_match("Ellen <ellen@EXAMPLE.COM>"));
887    /// # Ok(()) }
888    /// ```
889    pub fn from_signature(sig: &Signature) -> Result<Self>
890    {
891        use SignatureType::*;
892        match sig.typ() {
893            GenericCertification => (),
894            PersonaCertification => (),
895            CasualCertification => (),
896            PositiveCertification => (),
897            t => return Err(
898                Error::InvalidArgument(
899                    format!(
900                        "Expected a certification signature, found a {}",
901                        t))
902                    .into()),
903        }
904
905        if sig.trust_signature().is_none() {
906            return Err(
907                Error::InvalidArgument(
908                    "Expected a trust signature, \
909                     but the signature does not include \
910                     a valid Trust Signature subpacket".into())
911                    .into());
912        }
913
914        Self::from_bytes(sig.regular_expressions())
915    }
916
917    /// Returns a `RegexSet` that matches everything.
918    ///
919    /// Note: sanitizations are still enabled.  So, to really match
920    /// everything, you still need to call
921    /// [`RegexSet::disable_sanitizations`].
922    ///
923    ///   [`RegexSet::disable_sanitizations`]: RegexSet::disable_sanitizations()
924    ///
925    /// This can be used to optimize the evaluation of scoping rules
926    /// along a path: if a `RegexSet` matches everything, then it
927    /// doesn't further constrain the path.
928    pub fn everything() -> Self
929    {
930        Self {
931            re_bytes: vec![vec![]],
932            re_set: RegexSet_::Everything,
933            disable_sanitizations: false,
934        }
935    }
936
937    /// Returns whether a `RegexSet` matches everything.
938    ///
939    /// Normally, this only returns true if the `RegexSet` was created
940    /// using [`RegexSet::everything`].  [`RegexSet::new`],
941    /// [`RegexSet::from_bytes`], [`RegexSet::from_signature`] do
942    /// detect some regular expressions that match everything (e.g.,
943    /// if no regular expressions are supplied).  But, they do not
944    /// guarantee that a `RegexSet` containing a regular expression
945    /// like `.?`, which does in fact match everything, is detected as
946    /// matching everything.
947    ///
948    ///   [`RegexSet::everything`]: RegexSet::everything()
949    ///   [`RegexSet::new`]: RegexSet::everything()
950    ///   [`RegexSet::from_bytes`]: RegexSet::from_bytes()
951    ///   [`RegexSet::from_signature`]: RegexSet::from_signature()
952    ///
953    /// # Examples
954    ///
955    /// ```
956    /// use sequoia_openpgp as openpgp;
957    /// use openpgp::regex::RegexSet;
958    ///
959    /// # fn main() -> openpgp::Result<()> {
960    /// assert!(RegexSet::everything().matches_everything());
961    /// let empty: &[ &str ] = &[];
962    /// assert!(RegexSet::new(empty)?.matches_everything());
963    ///
964    /// // A regular expression that matches everything.  But
965    /// // `RegexSet` returns false, because it can't detect it.
966    /// let res: &[ &str ] = &[
967    ///     &".?"[..],
968    /// ];
969    /// let re_set = RegexSet::new(res.into_iter())?;
970    /// assert!(! re_set.matches_everything());
971    /// # Ok(()) }
972    /// ```
973    pub fn matches_everything(&self) -> bool {
974        matches!(self.re_set, RegexSet_::Everything)
975    }
976
977    /// Controls whether strings with control characters are allowed.
978    ///
979    /// If `false` (the default), i.e., sanity checks are enabled, and
980    /// the string doesn't pass the sanity check (in particular, it
981    /// contains a Unicode control character according to
982    /// [`char::is_control`], including newlines and an embedded `NUL`
983    /// byte), this returns `false`.
984    ///
985    ///   [`char::is_control`]: https://doc.rust-lang.org/std/primitive.char.html#method.is_control
986    pub fn disable_sanitizations(&mut self, allowed: bool) {
987        self.disable_sanitizations = allowed;
988        if let RegexSet_::Regex(ref mut re) = self.re_set {
989            re.disable_sanitizations(allowed);
990        }
991    }
992
993    /// Returns whether the regular expression set matches the string.
994    ///
995    /// If sanity checks are enabled (the default) and the string
996    /// doesn't pass the sanity check (in particular, it contains a
997    /// Unicode control character according to [`char::is_control`],
998    /// including newlines and an embedded `NUL` byte), this returns
999    /// `false`.
1000    ///
1001    ///   [`char::is_control`]: https://doc.rust-lang.org/std/primitive.char.html#method.is_control
1002    ///
1003    /// If the `RegexSet` contains one or more regular expressions,
1004    /// this method returns whether at least one of the regular
1005    /// expressions matches.  Invalid regular expressions never match.
1006    ///
1007    /// If the `RegexSet` does not contain any regular expressions
1008    /// (valid or otherwise), this method returns `true`.
1009    ///
1010    /// # Examples
1011    ///
1012    /// ```
1013    /// use sequoia_openpgp as openpgp;
1014    /// use openpgp::regex::RegexSet;
1015    ///
1016    /// # fn main() -> openpgp::Result<()> {
1017    /// // A regular expression that matches anything.  (Note: this is
1018    /// // equivalent to providing no regular expressions.)
1019    /// let res: &[ &str ] = &[
1020    ///     &""[..],
1021    /// ];
1022    /// let re_set = RegexSet::new(res.into_iter())?;
1023    ///
1024    /// assert!(re_set.is_match("Alice Lovelace <alice@example.org>"));
1025    ///
1026    /// // If a User ID has an embedded control character, it doesn't
1027    /// // match.
1028    /// assert!(! re_set.is_match("Alice <alice@example.org>\0"));
1029    /// # Ok(()) }
1030    /// ```
1031    pub fn is_match(&self, s: &str) -> bool {
1032        if ! self.disable_sanitizations && s.chars().any(char::is_control) {
1033            return false;
1034        }
1035
1036        match self.re_set {
1037            RegexSet_::Regex(ref re) =>
1038                re.is_match_clean(s),
1039            RegexSet_::Invalid =>
1040                false,
1041            RegexSet_::Everything =>
1042                true,
1043        }
1044    }
1045
1046    /// Returns whether the regular expression matches the User ID.
1047    ///
1048    /// If the User ID is not a valid UTF-8 string, this returns `false`.
1049    ///
1050    /// If sanity checks are enabled (the default) and the string
1051    /// doesn't pass the sanity check (in particular, it contains a
1052    /// Unicode control character according to [`char::is_control`],
1053    /// including newlines and an embedded `NUL` byte), this returns
1054    /// `false`.
1055    ///
1056    ///   [`char::is_control`]: https://doc.rust-lang.org/std/primitive.char.html#method.is_control
1057    ///
1058    /// If the `RegexSet` contains one or more regular expressions,
1059    /// this method returns whether at least one of the regular
1060    /// expressions matches.  Invalid regular expressions never match.
1061    ///
1062    /// If the `RegexSet` does not contain any regular expressions
1063    /// (valid or otherwise), this method returns `true`.
1064    ///
1065    /// # Examples
1066    ///
1067    /// ```
1068    /// use sequoia_openpgp as openpgp;
1069    /// use openpgp::packet::UserID;
1070    /// use openpgp::regex::RegexSet;
1071    ///
1072    /// # fn main() -> openpgp::Result<()> {
1073    /// // A regular expression that matches anything.  (Note: this is
1074    /// // equivalent to providing no regular expressions.)
1075    /// let res: &[ &str ] = &[
1076    ///     "",
1077    /// ];
1078    /// let re_set = RegexSet::new(res.into_iter())?;
1079    ///
1080    /// assert!(re_set.matches_userid(
1081    ///     &UserID::from(&b"Alice Lovelace <alice@example.org>"[..])));
1082    ///
1083    /// // If a User ID is not valid UTF-8, it never matches.
1084    /// assert!(! re_set.matches_userid(
1085    ///     &UserID::from(&b"Alice \xC3\x28 Lovelace <alice@example.org>"[..])));
1086    ///
1087    /// // If a User ID has an embedded control character, it doesn't
1088    /// // match.
1089    /// assert!(! re_set.matches_userid(
1090    ///     &UserID::from(&b"Alice <alice@example.org>\0"[..])));
1091    /// # Ok(()) }
1092    /// ```
1093    pub fn matches_userid(&self, u: &UserID) -> bool
1094    {
1095        if let Ok(u) = std::str::from_utf8(u.value()) {
1096            self.is_match(u)
1097        } else {
1098            false
1099        }
1100    }
1101}
1102
1103#[cfg(test)]
1104mod tests {
1105    use super::*;
1106
1107    #[test]
1108    fn regex() -> Result<()> {
1109        fn a(regex: &str, matches: &[(bool, &str)]) {
1110            eprint!("{} -> ", regex);
1111            let mut compiled = Regex::new(regex).unwrap();
1112            compiled.disable_sanitizations(true);
1113            eprintln!("{:?}", compiled);
1114            for &(matches, text) in matches {
1115                assert_eq!(matches, compiled.is_match(text),
1116                           "regex: {}\n text: {:?} should{} match",
1117                           regex, text, if matches { "" } else { " not" });
1118            }
1119        }
1120        fn f(regex: &str) {
1121            eprint!("{} -> ", regex);
1122            let compiled = Regex::new(regex);
1123            assert!(compiled.is_err());
1124            eprintln!("failed (expected)");
1125        }
1126
1127        // Test an important corner case: the + should only apply to
1128        // the b!  See: https://github.com/rust-lang/regex/issues/731
1129        a("xab+y", &[
1130            (true, "xaby"),
1131            (true, "xabby"),
1132            (false, "xababy"),
1133        ]);
1134        a("x(ab+)y", &[
1135            (false, "xy"),
1136            (false, "xay"),
1137            (true, "xaby"),
1138            (true, "xabby"),
1139            (true, "xabbby"),
1140            (false, "xababy"),
1141        ]);
1142        // But here the + matches "ab", not just the "b".
1143        a("x(ab)+y", &[
1144            (false, "xy"),
1145            (true, "xaby"),
1146            (false, "xabby"),
1147            (true, "xababy"),
1148            (true, "xabababy"),
1149            (false, "x(ab)y"),
1150        ]);
1151
1152
1153
1154        a("", &[
1155            (true, "s"),
1156            (true, "ss"),
1157        ]);
1158        a("s", &[
1159            (true, "s"),
1160            (true, "ss"),
1161            (false, "a"),
1162            (true, "hello, my prettiessss"),
1163            (false, "S"),
1164        ]);
1165        a("ss", &[
1166            (false, "s"),
1167            (true, "ss"),
1168            (true, "sss"),
1169            (false, "this has lots of ses, but not two ses together"),
1170            (true, "halloss"),
1171        ]);
1172
1173        a("a|b", &[
1174            (true, "a"),
1175            (true, "b"),
1176            (false, "c"),
1177            (true, "xxxaxxxbxxx"),
1178        ]);
1179        a("a|b|c", &[
1180            (true, "a"),
1181            (true, "b"),
1182            (true, "c"),
1183            (false, "d"),
1184            (true, "xxxaxxxbxxx"),
1185        ]);
1186        // This should match anything.
1187        a("|a", &[
1188            (true, "a"),
1189            (true, "b"),
1190        ]);
1191        a("a|", &[
1192            (true, "a"),
1193            (true, "b"),
1194        ]);
1195        a("|a|b", &[
1196            (true, "a"),
1197            (true, "b"),
1198            (true, "c"),
1199        ]);
1200        a("|a|b|c|d", &[
1201            (true, "a"),
1202            (true, "b"),
1203            (true, "c"),
1204            (true, "d"),
1205            (true, "eeee"),
1206        ]);
1207        a("a|b|", &[
1208            (true, "a"),
1209            (true, "b"),
1210            (true, "c"),
1211        ]);
1212        a("a|b|c|", &[
1213            (true, "a"),
1214            (true, "b"),
1215            (true, "c"),
1216            (true, "d"),
1217            (true, "eeee"),
1218        ]);
1219        a("|", &[
1220            (true, "a"),
1221            (true, "b"),
1222            (true, "c"),
1223            (true, "d"),
1224            (true, "eeee"),
1225        ]);
1226        a("|a|", &[
1227            (true, "a"),
1228            (true, "b"),
1229            (true, "c"),
1230            (true, "d"),
1231            (true, "eeee"),
1232        ]);
1233        a("|a|b|", &[
1234            (true, "a"),
1235            (true, "b"),
1236            (true, "c"),
1237            (true, "d"),
1238            (true, "eeee"),
1239        ]);
1240        // A nested empty.
1241        a("(a|)|b", &[
1242            (true, "a"),
1243            (true, "b"),
1244        ]);
1245        // empty+
1246        a("(a|b|()+)", &[
1247            (true, "a"),
1248            (true, "b"),
1249        ]);
1250        // (empty)+
1251        a("(a|b|(())+)", &[
1252            (true, "a"),
1253            (true, "b"),
1254        ]);
1255        // Multiple empty branches.
1256        a("(a|b|(()())())", &[
1257            (true, "a"),
1258            (true, "b"),
1259        ]);
1260        a("(a|b|(()())())|", &[
1261            (true, "a"),
1262            (true, "b"),
1263        ]);
1264
1265        // This is: "ab" or "cd", not a followed by b or c followed by d:
1266        //
1267        //   A regular expression is zero or more branches, separated by '|'.
1268        //   ...
1269        //   A branch is zero or more pieces, concatenated.
1270        //   ...
1271        //   A piece is an atom
1272        //   ...
1273        //   An atom is... a single character.
1274        a("ab|cd", &[
1275            (true, "abd"),
1276            (true, "acd"),
1277            (true, "abcd"),
1278            (false, "ad"),
1279            (false, "b"),
1280            (false, "c"),
1281            (false, "bb"),
1282        ]);
1283
1284        a("a*", &[
1285            (true, ""),
1286            (true, "a"),
1287            (true, "aa"),
1288            (true, "b"),
1289        ]);
1290        a("xa*y", &[
1291            (true, "xy"),
1292            (true, "xay"),
1293            (true, "xaay"),
1294            (false, "y"),
1295            (false, "ay"),
1296            (false, "aay"),
1297            (false, "x y"),
1298            (false, "x ay"),
1299            (false, "x aay"),
1300        ]);
1301        f("*");
1302
1303        a("a+", &[
1304            (false, ""),
1305            (true, "a"),
1306            (true, "aa"),
1307            (false, "b"),
1308            (true, "baab"),
1309            (true, "by ab"),
1310            (true, "baa b"),
1311        ]);
1312        a("ab+", &[
1313            (false, ""),
1314            (false, "a"),
1315            (false, "b"),
1316            (true, "ab"),
1317            (false, "bb"),
1318            (true, "baab"),
1319            (true, "by ab"),
1320            (false, "baa b"),
1321        ]);
1322        f("+");
1323
1324        a("a?", &[
1325            (true, ""),
1326            (true, "a"),
1327            (true, "aa"),
1328            (true, "aaa"),
1329            (true, "b"),
1330            (true, "baab"),
1331            (true, "by ab"),
1332            (true, "baa b"),
1333        ]);
1334        a("xa?y", &[
1335            (false, ""),
1336            (true, "xy"),
1337            (false, "a"),
1338            (true, "xay"),
1339            (false, "aa"),
1340            (false, "xaay"),
1341            (false, "b"),
1342            (false, "bxaayb"),
1343            (true, "by xayb"),
1344            (true, "baxay b"),
1345        ]);
1346        f("?");
1347
1348        f("a*?");
1349        a("a*b?c+", &[
1350            (false, ""),
1351            (true, "c"),
1352            (true, "abc"),
1353            (true, "aabbcc"),
1354            (false, "aab"),
1355            (true, "aaaaaabcccccccc"),
1356        ]);
1357        f("a?*+");
1358
1359        a("a?|b+", &[
1360            (true, ""),
1361            (true, "aaa"),
1362            (true, "bbb"),
1363            (true, "abaa"),
1364        ]);
1365        a("a+|b+", &[
1366            (false, ""),
1367            (true, "a"),
1368            (true, "aaa"),
1369            (true, "b"),
1370            (true, "bbb"),
1371            (true, "abaa"),
1372        ]);
1373        a("a+|b+|c+", &[
1374            (false, ""),
1375            (true, "a"),
1376            (true, "aaa"),
1377            (true, "b"),
1378            (true, "bbb"),
1379            (true, "abaa"),
1380            (true, "c"),
1381            (true, "ccc"),
1382            (true, "abaaccc"),
1383        ]);
1384        a("xa+|b+|c+y", &[
1385            (false, ""),
1386            (true, "xa"),
1387            (true, "xaa"),
1388            (true, "b"),
1389            (true, "bb"),
1390            (true, "cy"),
1391            (true, "ccy"),
1392
1393            (false, "a"),
1394            (false, "aaa"),
1395            (false, "c"),
1396            (false, "ccc"),
1397        ]);
1398        a("xa+y|sb+u", &[
1399            (false, ""),
1400            (true, "xay"),
1401            (true, "xaay"),
1402            (true, "sbu"),
1403            (true, "sbbu"),
1404            (true, "xysbu"),
1405
1406            (false, "a"),
1407            (false, "aaa"),
1408            (false, "xyu"),
1409            (false, "ccc"),
1410        ]);
1411        a("a*|a+|ab+cd+|", &[
1412            (true, ""),
1413        ]);
1414
1415        a("()", &[
1416            (true, ""),
1417            (true, "xyzzy"),
1418        ]);
1419        a("(())", &[
1420            (true, ""),
1421            (true, "xyzzy"),
1422        ]);
1423        a("((()))", &[
1424            (true, ""),
1425            (true, "xyzzy"),
1426        ]);
1427        f("((())");
1428        f("((())))");
1429        a("(a)", &[
1430            (true, "a"),
1431            (true, "(a)"),
1432            (false, "b"),
1433        ]);
1434        a("x(a)y", &[
1435            (false, "xy"),
1436            (true, "xay"),
1437            (false, "x(a)y"),
1438            (true, "(xay)"),
1439            (false, "a"),
1440            (false, "yax"),
1441        ]);
1442        a("x(ab)y", &[
1443            (false, "xy"),
1444            (false, "xay"),
1445            (false, "xby"),
1446            (true, "xaby"),
1447            (false, "x(ab)y"),
1448            (true, "(xaby)"),
1449        ]);
1450        a("x(ab)(cd)y", &[
1451            (true, "xabcdy"),
1452            (true, "zxabcdyz"),
1453        ]);
1454        a("a(bc)d(ef)g", &[
1455            (true, "abcdefg"),
1456            (true, "xabcdefgy"),
1457            (false, "xa(bc)d(ef)gy"),
1458        ]);
1459        a("a((bc))d((ef))g", &[
1460            (true, "abcdefg"),
1461            (true, "xabcdefgy"),
1462            (false, "xa(bc)d(ef)gy"),
1463        ]);
1464        a("a(b(c)d)e", &[
1465            (true, "abcde"),
1466            (true, "xabcdey"),
1467            (false, "xa(b(c)d)ey"),
1468        ]);
1469        a("x(a|b)y", &[
1470            (false, "xy"),
1471            (true, "xay"),
1472            (true, "xby"),
1473            (false, "xaay"),
1474            (false, "xbby"),
1475            (false, "xaby"),
1476            (false, "xaaby"),
1477            (false, "xabby"),
1478            (false, "xaabby"),
1479            (false, "xcy"),
1480        ]);
1481        a("x(a|bc)y", &[
1482            (false, "xy"),
1483            (true, "xay"),
1484            (false, "xby"),
1485            (true, "xbcy"),
1486            (false, "xaay"),
1487            (false, "xbby"),
1488            (false, "xaby"),
1489            (false, "xabcy"),
1490            (false, "xabby"),
1491            (false, "xaabby"),
1492            (false, "xcy"),
1493            (false, "xacy"),
1494        ]);
1495        a("x(a|b|c)y", &[
1496            (false, "xy"),
1497            (true, "xay"),
1498            (true, "xby"),
1499            (true, "xcy"),
1500            (false, "xaay"),
1501            (false, "xbby"),
1502            (false, "xaby"),
1503            (false, "xabcy"),
1504            (false, "xabby"),
1505            (false, "xaabby"),
1506            (false, "xacy"),
1507        ]);
1508        a("x(a|b)(c|d)y", &[
1509            (false, "xy"),
1510            (false, "xay"),
1511            (false, "xby"),
1512            (false, "xcy"),
1513            (false, "xdy"),
1514            (false, "xaay"),
1515            (false, "xbby"),
1516            (false, "xccy"),
1517            (false, "xddy"),
1518            (false, "xaby"),
1519            (false, "xcdy"),
1520            (true, "xacy"),
1521            (true, "xady"),
1522            (true, "xbcy"),
1523            (true, "xbdy"),
1524            (false, "xabcy"),
1525            (false, "xabby"),
1526            (false, "xaabby"),
1527        ]);
1528        a("x(a+|b+)y", &[
1529            (false, "xy"),
1530            (true, "xay"),
1531            (true, "xby"),
1532            (true, "xaay"),
1533            (true, "xbby"),
1534            (false, "xaby"),
1535            (false, "xaaby"),
1536            (false, "xabby"),
1537            (false, "xaabby"),
1538            (false, "xcy"),
1539        ]);
1540
1541        a(".", &[
1542            (false, ""),
1543            (true, "a"),
1544            (true, "ab"),
1545            (true, "ab\nc"),
1546            (true, "ab.c"),
1547        ]);
1548        a("x.y", &[
1549            (false, ""),
1550            (false, "xy"),
1551            (true, "xay"),
1552            (true, "x\ny"),
1553            (true, "x.y"),
1554            (false, "x..y"),
1555        ]);
1556
1557        a("^", &[
1558            (true, ""),
1559            (true, "xx"),
1560        ]);
1561        a("^abc", &[
1562            (false, ""),
1563            (true, "abcdef"),
1564            (false, "xabcdef"),
1565            (false, "\nabcdef"),
1566        ]);
1567        a("(^abc|^def)", &[
1568            (false, ""),
1569            (true, "abcd"),
1570            (true, "defg"),
1571            (false, "xabcd"),
1572            (false, "xdefg"),
1573            (false, "^abc"),
1574            (false, "^(abc|def)"),
1575            (false, "\nabcdef"),
1576        ]);
1577        a("(^abc|def)", &[
1578            (false, ""),
1579            (true, "abcd"),
1580            (true, "defg"),
1581            (false, "xabcd"),
1582            (true, "xdefg"),
1583            (false, "^abc"),
1584            (true, "^(abc|def)"),
1585            (false, "\nabcde"),
1586        ]);
1587        a("^^", &[
1588            (true, ""),
1589            (true, "abcdef"),
1590        ]);
1591        a("^abc^", &[
1592            (false, ""),
1593            (false, "abcdef"),
1594            (false, "xabcdef"),
1595            (false, "abc\n"),
1596            (false, "\nabc\n"),
1597            (false, "^abc^"),
1598        ]);
1599
1600        a("$", &[
1601            (true, ""),
1602            (true, "abc"),
1603        ]);
1604        a("abc$", &[
1605            (false, ""),
1606            (true, "abc"),
1607            (false, "abcx"),
1608            (false, "abc\n"),
1609            (false, "abc$"),
1610        ]);
1611        a("abc$$", &[
1612            (false, ""),
1613            (true, "abc"),
1614            (false, "abcx"),
1615            (false, "abc\n"),
1616            (false, "abc$"),
1617        ]);
1618        a("(abc$)x", &[
1619            (false, ""),
1620            (false, "abc"),
1621            (false, "abcx"),
1622            (false, "abc\nx"),
1623            (false, "abc$x"),
1624        ]);
1625        a("abc$|def$", &[
1626            (false, ""),
1627            (true, "abc"),
1628            (false, "abcx"),
1629            (false, "abc\n"),
1630            (false, "abc$"),
1631            (true, "def"),
1632            (false, "defx"),
1633            (false, "def\n"),
1634            (false, "def$"),
1635            (true, "abcdef"),
1636        ]);
1637
1638        a("\\|", &[
1639            (true, "|"),
1640            (false, ""),
1641            (false, "a"),
1642        ]);
1643        a("\\*", &[
1644            (true, "*"),
1645            (false, ""),
1646            (false, "a"),
1647        ]);
1648        a("\\+", &[
1649            (true, "+"),
1650            (false, ""),
1651            (false, "a"),
1652        ]);
1653        a("\\?", &[
1654            (true, "?"),
1655            (false, ""),
1656            (false, "a"),
1657        ]);
1658        a("\\.", &[
1659            (true, "."),
1660            (false, ""),
1661            (false, "a"),
1662        ]);
1663        a("\\^", &[
1664            (true, "^"),
1665            (false, ""),
1666            (false, "a"),
1667        ]);
1668        a("\\$", &[
1669            (true, "$"),
1670            (false, ""),
1671            (false, "a"),
1672        ]);
1673        a("\\\\", &[
1674            (true, "\\"),
1675            (false, ""),
1676            (false, "a"),
1677        ]);
1678        a("\\[", &[
1679            (true, "["),
1680            (false, ""),
1681            (false, "a"),
1682        ]);
1683        a("\\]", &[
1684            (true, "]"),
1685            (false, ""),
1686            (false, "a"),
1687        ]);
1688        a("\\-", &[
1689            (true, "-"),
1690            (false, ""),
1691            (false, "a"),
1692        ]);
1693        f("\\");
1694
1695        a("[a]", &[
1696            (true, "a"),
1697            (false, "b"),
1698        ]);
1699        a("[abc]", &[
1700            (true, "a"),
1701            (true, "b"),
1702            (true, "c"),
1703            (false, "d"),
1704        ]);
1705        a("[a-c]", &[
1706            (true, "a"),
1707            (true, "b"),
1708            (true, "c"),
1709            (false, "d"),
1710        ]);
1711        a("[xa-c]", &[
1712            (true, "a"),
1713            (true, "b"),
1714            (true, "c"),
1715            (true, "x"),
1716            (false, "d"),
1717        ]);
1718        a("[a-cxyz]", &[
1719            (true, "a"),
1720            (true, "b"),
1721            (true, "c"),
1722            (true, "x"),
1723            (false, "d"),
1724        ]);
1725        a("[a-c]x", &[
1726            (false, "a"),
1727            (false, "b"),
1728            (false, "c"),
1729            (false, "x"),
1730            (true, "ax"),
1731            (true, "bx"),
1732            (true, "cx"),
1733            (false, "d"),
1734            (false, "dx"),
1735        ]);
1736        a("[a-cxy]", &[
1737            (true, "a"),
1738            (true, "b"),
1739            (true, "c"),
1740            (true, "x"),
1741            (true, "y"),
1742            (false, "d"),
1743        ]);
1744        a("[a-c]xy", &[
1745            (false, "a"),
1746            (false, "b"),
1747            (false, "c"),
1748            (false, "x"),
1749            (false, "ax"),
1750            (false, "bx"),
1751            (false, "cx"),
1752            (true, "axy"),
1753            (true, "bxy"),
1754            (true, "cxy"),
1755            (false, "d"),
1756        ]);
1757        a("[a-cxyz]", &[
1758            (true, "a"),
1759            (true, "b"),
1760            (true, "c"),
1761            (true, "x"),
1762            (true, "y"),
1763            (true, "z"),
1764            (false, "d"),
1765        ]);
1766        a("[a-c]xyz", &[
1767            (false, "a"),
1768            (false, "b"),
1769            (false, "c"),
1770            (false, "x"),
1771            (false, "ax"),
1772            (false, "bx"),
1773            (false, "cx"),
1774            (false, "axy"),
1775            (false, "bxy"),
1776            (false, "cxy"),
1777            (true, "axyz"),
1778            (true, "bxyz"),
1779            (true, "cxyz"),
1780            (false, "d"),
1781        ]);
1782        a("xyz[a-c]", &[
1783            (false, "a"),
1784            (false, "b"),
1785            (false, "c"),
1786            (false, "x"),
1787            (false, "xa"),
1788            (false, "xb"),
1789            (false, "xc"),
1790            (false, "xya"),
1791            (false, "xyb"),
1792            (false, "xyc"),
1793            (true, "xyza"),
1794            (true, "xyzb"),
1795            (true, "xyzc"),
1796            (false, "d"),
1797        ]);
1798        a("[xyza-c]", &[
1799            (true, "a"),
1800            (true, "b"),
1801            (true, "c"),
1802            (true, "x"),
1803            (true, "y"),
1804            (true, "z"),
1805            (false, "d"),
1806        ]);
1807        a("[xya-cyz]", &[
1808            (true, "a"),
1809            (true, "b"),
1810            (true, "c"),
1811            (true, "x"),
1812            (true, "y"),
1813            (true, "z"),
1814            (false, "d"),
1815        ]);
1816        a("[x-za-c]", &[
1817            (true, "a"),
1818            (true, "b"),
1819            (true, "c"),
1820            (true, "x"),
1821            (true, "y"),
1822            (true, "z"),
1823            (false, "d"),
1824        ]);
1825        a("[x-zmna-c]", &[
1826            (true, "a"),
1827            (true, "b"),
1828            (true, "c"),
1829            (true, "x"),
1830            (true, "y"),
1831            (true, "z"),
1832            (true, "m"),
1833            (true, "n"),
1834            (false, "d"),
1835        ]);
1836        a("[-]", &[
1837            (true, "-"),
1838            (false, "d"),
1839        ]);
1840        a("[a-]", &[
1841            (true, "-"),
1842            (true, "a"),
1843            (false, "d"),
1844        ]);
1845        a("[-b]", &[
1846            (true, "-"),
1847            (true, "b"),
1848            (false, "d"),
1849        ]);
1850        a("[-bd-g]", &[
1851            (false, "a"),
1852            (true, "-"),
1853            (true, "b"),
1854            (true, "d"),
1855            (true, "f"),
1856        ]);
1857        a("[bd-g-]", &[
1858            (false, "a"),
1859            (true, "-"),
1860            (true, "b"),
1861            (true, "d"),
1862            (true, "f"),
1863        ]);
1864        // Backwards ranges.
1865        a("[9-0]", &[
1866            (false, "a"),
1867            (false, "-"),
1868            (true, "9"),
1869            (true, "0"),
1870            (true, "5"),
1871        ]);
1872
1873        a("[^a]", &[
1874            (false, "a"),
1875            (true, "b"),
1876        ]);
1877        a("[^abc]", &[
1878            (false, "a"),
1879            (false, "b"),
1880            (false, "c"),
1881            (true, "d"),
1882        ]);
1883        a("[^a-c]", &[
1884            (false, "a"),
1885            (false, "b"),
1886            (false, "c"),
1887            (true, "d"),
1888        ]);
1889        a("[^xa-c]", &[
1890            (false, "a"),
1891            (false, "b"),
1892            (false, "c"),
1893            (false, "x"),
1894            (true, "d"),
1895        ]);
1896        a("[^a-cxyz]", &[
1897            (false, "a"),
1898            (false, "b"),
1899            (false, "c"),
1900            (false, "x"),
1901            (true, "d"),
1902        ]);
1903        a("[^a-c]x", &[
1904            (false, "a"),
1905            (false, "b"),
1906            (false, "c"),
1907            (false, "x"),
1908            (false, "ax"),
1909            (false, "bx"),
1910            (false, "cx"),
1911            (false, "d"),
1912            (true, "dx"),
1913        ]);
1914        a("[^a-cxy]", &[
1915            (false, "a"),
1916            (false, "b"),
1917            (false, "c"),
1918            (false, "x"),
1919            (false, "y"),
1920            (true, "d"),
1921        ]);
1922        a("[^a-c]xy", &[
1923            (false, "a"),
1924            (false, "b"),
1925            (false, "c"),
1926            (false, "x"),
1927            (false, "ax"),
1928            (false, "bx"),
1929            (false, "cx"),
1930            (false, "axy"),
1931            (false, "bxy"),
1932            (false, "cxy"),
1933            (true, "dxy"),
1934            (false, "d"),
1935        ]);
1936        a("[^a-cxyz]", &[
1937            (false, "a"),
1938            (false, "b"),
1939            (false, "c"),
1940            (false, "x"),
1941            (false, "y"),
1942            (false, "z"),
1943            (true, "d"),
1944        ]);
1945        a("[^a-c]xyz", &[
1946            (false, "a"),
1947            (false, "b"),
1948            (false, "c"),
1949            (false, "x"),
1950            (false, "ax"),
1951            (false, "bx"),
1952            (false, "cx"),
1953            (false, "axy"),
1954            (false, "bxy"),
1955            (false, "cxy"),
1956            (false, "axyz"),
1957            (false, "bxyz"),
1958            (false, "cxyz"),
1959            (true, "dxyz"),
1960            (false, "d"),
1961        ]);
1962        a("xyz[^a-c]", &[
1963            (false, "a"),
1964            (false, "b"),
1965            (false, "c"),
1966            (false, "x"),
1967            (false, "xa"),
1968            (false, "xb"),
1969            (false, "xc"),
1970            (false, "xya"),
1971            (false, "xyb"),
1972            (false, "xyc"),
1973            (false, "xyza"),
1974            (false, "xyzb"),
1975            (false, "xyzc"),
1976            (true, "xyzd"),
1977            (false, "d"),
1978        ]);
1979        a("[^xyza-c]", &[
1980            (false, "a"),
1981            (false, "b"),
1982            (false, "c"),
1983            (false, "x"),
1984            (false, "y"),
1985            (false, "z"),
1986            (true, "d"),
1987        ]);
1988        a("[^xya-cyz]", &[
1989            (false, "a"),
1990            (false, "b"),
1991            (false, "c"),
1992            (false, "x"),
1993            (false, "y"),
1994            (false, "z"),
1995            (true, "d"),
1996        ]);
1997        a("[^x-za-c]", &[
1998            (false, "a"),
1999            (false, "b"),
2000            (false, "c"),
2001            (false, "x"),
2002            (false, "y"),
2003            (false, "z"),
2004            (true, "d"),
2005        ]);
2006        a("[^x-zmna-c]", &[
2007            (false, "a"),
2008            (false, "b"),
2009            (false, "c"),
2010            (false, "x"),
2011            (false, "y"),
2012            (false, "z"),
2013            (false, "m"),
2014            (false, "n"),
2015            (true, "d"),
2016        ]);
2017        a("[^-]", &[
2018            (false, "-"),
2019            (true, "d"),
2020        ]);
2021        a("[^a-]", &[
2022            (false, "-"),
2023            (false, "a"),
2024            (true, "d"),
2025        ]);
2026        a("[^-b]", &[
2027            (false, "-"),
2028            (false, "b"),
2029            (true, "d"),
2030        ]);
2031        a("[^-bd-g]", &[
2032            (true, "a"),
2033            (false, "-"),
2034            (false, "b"),
2035            (false, "d"),
2036            (false, "f"),
2037        ]);
2038        a("[^bd-g-]", &[
2039            (true, "a"),
2040            (false, "-"),
2041            (false, "b"),
2042            (false, "d"),
2043            (false, "f"),
2044        ]);
2045
2046        a("[a|b]", &[
2047            (true, "a"),
2048            (true, "|"),
2049            (false, "c"),
2050        ]);
2051        a("[a\\|b]", &[
2052            (true, "a"),
2053            (true, "|"),
2054            (true, "\\"),
2055            (false, "c"),
2056        ]);
2057        a("[a(b]", &[
2058            (true, "a"),
2059            (true, "("),
2060            (false, "c"),
2061        ]);
2062        a("[a)b]", &[
2063            (true, "a"),
2064            (true, ")"),
2065            (false, "c"),
2066        ]);
2067        a("[a^b]", &[
2068            (true, "a"),
2069            (true, "^"),
2070            (false, "c"),
2071        ]);
2072
2073        f("[]");
2074        f("[^]");
2075        a("[^]]", &[
2076            (true, "a"),
2077            (false, "]"),
2078            (true, "^"),
2079        ]);
2080        a("[]]", &[
2081            (false, "a"),
2082            (true, "]"),
2083        ]);
2084        // Matches [ or ].
2085        a("[][]", &[
2086            (false, "a"),
2087            (true, "["),
2088            (true, "]"),
2089        ]);
2090        // Matches anything but [ or ].
2091        a("[^][]", &[
2092            (true, "a"),
2093            (false, "["),
2094            (false, "]"),
2095        ]);
2096        // Anything but ^.
2097        a("[^^]", &[
2098            (true, "a"),
2099            (false, "^"),
2100            (true, "c"),
2101        ]);
2102
2103        // Make sure - is recognized as an atom when it is not part of
2104        // a range.  That is: a-z matches a or - or z, but it doesn't
2105        // match b (it's not a range).
2106        a("a-z", &[
2107            (true, "a-z"),
2108            (false, "a"),
2109            (false, "-"),
2110            (false, "z"),
2111            (false, "c"),
2112        ]);
2113
2114        a("a|-|z", &[
2115            (true, "a"),
2116            (true, "-"),
2117            (true, "z"),
2118            (false, "c"),
2119        ]);
2120
2121        Ok(())
2122    }
2123
2124    #[test]
2125    fn regex_set() -> Result<()> {
2126        let re = RegexSet::new(&[ "ab", "cd" ])?;
2127        assert!(re.is_match("ab"));
2128        assert!(re.is_match("cdef"));
2129        assert!(!re.is_match("xxx"));
2130
2131        // Try to make sure one re does not leak into another.
2132        let re = RegexSet::new(&[ "cd$", "^ab" ])?;
2133        assert!(re.is_match("abxx"));
2134        assert!(! re.is_match("xabxx"));
2135        assert!(re.is_match("xxcd"));
2136        assert!(! re.is_match("xxcdx"));
2137        assert!(re.is_match("abcdx"));
2138
2139        // Invalid regular expressions should be ignored.
2140        let re = RegexSet::new(&[ "[ab", "cd]", "x" ])?;
2141        assert!(!re.is_match("a"));
2142        assert!(!re.is_match("ab"));
2143        assert!(!re.is_match("[ab"));
2144        assert!(!re.is_match("c"));
2145        assert!(!re.is_match("cd"));
2146        assert!(!re.is_match("cd]"));
2147        assert!(re.is_match("x"));
2148
2149        // If all regular expressions are invalid, nothing should
2150        // match.
2151        let re = RegexSet::new(&[ "[ab", "cd]" ])?;
2152        assert!(!re.is_match("a"));
2153        assert!(!re.is_match("ab"));
2154        assert!(!re.is_match("[ab"));
2155        assert!(!re.is_match("c"));
2156        assert!(!re.is_match("cd"));
2157        assert!(!re.is_match("cd]"));
2158        assert!(!re.is_match("x"));
2159
2160        // If there are no regular expressions, everything should
2161        // match.
2162        let s: [&str; 0] = [];
2163        let re = RegexSet::new(&s)?;
2164        assert!(re.is_match("a"));
2165        assert!(re.is_match("ab"));
2166        assert!(re.is_match("[ab"));
2167        assert!(re.is_match("c"));
2168        assert!(re.is_match("cd"));
2169        assert!(re.is_match("cd]"));
2170        assert!(re.is_match("x"));
2171
2172        // The empty branch of the alternation should match everything.
2173        let re = RegexSet::new(&[ "ab|", "cd" ])?;
2174        assert!(re.is_match("a"));
2175        assert!(re.is_match("b"));
2176        assert!(re.is_match("x"));
2177        assert!(re.is_match("xyx"));
2178        assert!(re.is_match(""));
2179
2180        Ok(())
2181    }
2182
2183    #[test]
2184    fn regex_set_sequoia() -> Result<()> {
2185        let re = RegexSet::new(&["<[^>]+[@.]sequoia-pgp\\.org>$"])?;
2186        dbg!(&re);
2187        assert!(re.is_match("<justus@sequoia-pgp.org>"));
2188        assert!(!re.is_match("<justus@gnupg.org>"));
2189        Ok(())
2190    }
2191
2192    #[test]
2193    fn regex_set_sequoia_nodash() -> Result<()> {
2194        let re = RegexSet::new(&["<[^>]+[@.]sequoiapgp\\.org>$"])?;
2195        dbg!(&re);
2196        assert!(re.is_match("<justus@sequoiapgp.org>"));
2197        assert!(!re.is_match("<justus@gnupg.org>"));
2198        Ok(())
2199    }
2200}