sequoia_openpgp/regex/mod.rs
1//! OpenPGP regex parser.
2//!
3//! OpenPGP defines a [regular expression language]. It is used with
4//! [trust signatures] to scope the trust that they extend.
5//!
6//! [regular expression language]: https://www.rfc-editor.org/rfc/rfc9580.html#section-8
7//! [trust signatures]: https://www.rfc-editor.org/rfc/rfc9580.html#section-5.2.3.21
8//!
9//! Compared with most regular expression languages, OpenPGP's is
10//! quite simple. In particular, it only includes the following
11//! features:
12//!
13//! - Alternations using `|`,
14//! - Grouping using `(` and `)`,
15//! - The `*`, `+`, and `?` glob operators,
16//! - The `^`, and `$` anchors,
17//! - The '.' operator, positive *non-empty* ranges
18//! (e.g. `[a-zA-Z]`) and negative *non-empty* ranges (`[^@]`), and
19//! - The backslash operator to escape special characters (except
20//! in ranges).
21//!
22//! The regular expression engine defined in this module implements
23//! that language with two differences. The first difference is that
24//! the compiler only works on UTF-8 strings (not bytes). The second
25//! difference is that ranges in character classes are between UTF-8
26//! characters, not just ASCII characters.
27//!
28//! # Data Structures
29//!
30//! This module defines two data structures. [`Regex`] encapsulates a
31//! valid regular expression, and provides methods to check whether
32//! the regular expression matches a string or a [`UserID`].
33//! [`RegexSet`] is similar, but encapsulates zero or more regular
34//! expressions, which may or may not be valid. Its match methods
35//! return `true` if there are no regular expressions, or, if there is
36//! at least one regular expression, they return whether at least one
37//! of the regular expressions matches it. `RegexSet`'s matcher
38//! handles invalid regular expressions by considering them to be
39//! regular expressions that don't match anything. These semantics
40//! are consistent with a trust signature's scoping rules. Further,
41//! strings that contain control characters never match. This
42//! behavior can be overridden using [`Regex::disable_sanitizations`]
43//! and [`RegexSet::disable_sanitizations`].
44//!
45//! [`UserID`]: crate::packet::UserID
46//! [`Regex::disable_sanitizations`]: Regex::disable_sanitizations()
47//! [`RegexSet::disable_sanitizations`]: RegexSet::disable_sanitizations()
48//!
49//! # Scoped Trust Signatures
50//!
51//! To create a trust signature, you create a signature whose [type]
52//! is either [GenericCertification], [PersonaCertification],
53//! [CasualCertification], or [PositiveCertification], and add a
54//! [Trust Signature] subpacket using, for instance, the
55//! [`SignatureBuilder::set_trust_signature`] method.
56//!
57//! [type]: https://www.rfc-editor.org/rfc/rfc9580.html#section-5.2.1
58//! [GenericCertification]: crate::types::SignatureType::GenericCertification
59//! [PersonaCertification]: crate::types::SignatureType::PersonaCertification
60//! [CasualCertification]: crate::types::SignatureType::CasualCertification
61//! [PositiveCertification]: crate::types::SignatureType::PositiveCertification
62//! [Trust Signature]: https://www.rfc-editor.org/rfc/rfc9580.html#section-5.2.3.21
63//! [`SignatureBuilder::set_trust_signature`]: crate::packet::signature::SignatureBuilder::set_trust_signature()
64//!
65//! To scope a trust signature, you add a [Regular Expression
66//! subpacket] to it using
67//! [`SignatureBuilder::set_regular_expression`] or
68//! [`SignatureBuilder::add_regular_expression`].
69//!
70//! To extract any regular expressions, you can use
71//! [`SubpacketAreas::regular_expressions`].
72//!
73//! [Regular Expression subpacket]: https://www.rfc-editor.org/rfc/rfc9580.html#section-5.2.3.22
74//! [`SignatureBuilder::set_regular_expression`]: crate::packet::signature::SignatureBuilder::set_regular_expression()
75//! [`SignatureBuilder::add_regular_expression`]: crate::packet::signature::SignatureBuilder::add_regular_expression()
76//! [`SubpacketAreas::regular_expressions`]: crate::packet::signature::subpacket::SubpacketAreas::regular_expressions()
77//!
78//! # Caveat Emptor
79//!
80//! Note: GnuPG has [very limited regular expression support]. In
81//! particular, it only recognizes regular expressions with the
82//! following form:
83//!
84//! [very limited regular expression support]: https://dev.gnupg.org/source/gnupg/browse/master/g10/trustdb.c;15e065dee891eef9545556f210b4199107999869$1558
85//!
86//! ```text
87//! <[^>]+[@.]example\.com>$
88//! ```
89//!
90//! Further, it escapes any operators between the `<[^>]+[@.]` and the
91//! `>$` except `.` and `\`. Otherwise, GnuPG treats the regular
92//! expression as a literal domain (e.g., `example.com`).
93//!
94//! Further, until [version 2.2.22] (released in August 2020), GnuPG
95//! did not support regular expressions on Windows, and other systems
96//! that don't include `regcomp`. On these systems, if a trust
97//! signature included a regular expression, GnuPG conservatively
98//! considered the whole trust signature to match nothing.
99//!
100//! [version 2.2.22]: https://dev.gnupg.org/T5030
101//!
102//! # Examples
103//!
104//! A CA signs two certificates, one for Alice, who works at
105//! `example.com`, and one for Bob, who is associated with `some.org`.
106//! Carol then creates a trust signature for the CA, which she scopes
107//! to `example.org` and `example.com`. We then confirm that Carol
108//! can use the CA to authenticate Alice, but not Bob.
109//!
110//! ```
111//! use sequoia_openpgp as openpgp;
112//! use openpgp::cert::prelude::*;
113//! use openpgp::packet::prelude::*;
114//! use openpgp::policy::StandardPolicy;
115//! use openpgp::regex::RegexSet;
116//! use openpgp::types::SignatureType;
117//!
118//! # fn main() -> openpgp::Result<()> {
119//! let p = &StandardPolicy::new();
120//!
121//! let (ca, _)
122//! = CertBuilder::general_purpose(Some("OpenPGP CA <openpgp-ca@example.com>"))
123//! .generate()?;
124//! let mut ca_signer = ca.primary_key().key().clone()
125//! .parts_into_secret()?.into_keypair()?;
126//! let ca_userid = ca.with_policy(p, None)?
127//! .userids().nth(0).expect("Added a User ID").userid();
128//!
129//! // The CA certifies "Alice <alice@example.com>".
130//! let (alice, _)
131//! = CertBuilder::general_purpose(Some("Alice <alice@example.com>"))
132//! .generate()?;
133//! let alice_userid = alice.with_policy(p, None)?
134//! .userids().nth(0).expect("Added a User ID").userid();
135//! let alice_certification = SignatureBuilder::new(SignatureType::GenericCertification)
136//! .sign_userid_binding(
137//! &mut ca_signer,
138//! alice.primary_key().component(),
139//! alice_userid)?;
140//! let alice = alice.insert_packets(alice_certification.clone())?.0;
141//! # assert!(alice.clone().into_packets().any(|p| {
142//! # match p {
143//! # Packet::Signature(sig) => sig == alice_certification,
144//! # _ => false,
145//! # }
146//! # }));
147//!
148//! // The CA certifies "Bob <bob@some.org>".
149//! let (bob, _)
150//! = CertBuilder::general_purpose(Some("Bob <bob@some.org>"))
151//! .generate()?;
152//! let bob_userid = bob.with_policy(p, None)?
153//! .userids().nth(0).expect("Added a User ID").userid();
154//! let bob_certification = SignatureBuilder::new(SignatureType::GenericCertification)
155//! .sign_userid_binding(
156//! &mut ca_signer,
157//! bob.primary_key().component(),
158//! bob_userid)?;
159//! let bob = bob.insert_packets(bob_certification.clone())?.0;
160//! # assert!(bob.clone().into_packets().any(|p| {
161//! # match p {
162//! # Packet::Signature(sig) => sig == bob_certification,
163//! # _ => false,
164//! # }
165//! # }));
166//!
167//!
168//! // Carol tsigns the CA's certificate.
169//! let (carol, _)
170//! = CertBuilder::general_purpose(Some("Carol <carol@another.net>"))
171//! .generate()?;
172//! let mut carol_signer = carol.primary_key().key().clone()
173//! .parts_into_secret()?.into_keypair()?;
174//!
175//! let ca_tsig = SignatureBuilder::new(SignatureType::GenericCertification)
176//! .set_trust_signature(2, 120)?
177//! .set_regular_expression("<[^>]+[@.]example\\.org>$")?
178//! .add_regular_expression("<[^>]+[@.]example\\.com>$")?
179//! .sign_userid_binding(
180//! &mut carol_signer,
181//! ca.primary_key().component(),
182//! ca_userid)?;
183//! let ca = ca.insert_packets(ca_tsig.clone())?.0;
184//! # assert!(ca.clone().into_packets().any(|p| {
185//! # match p {
186//! # Packet::Signature(sig) => sig == ca_tsig,
187//! # _ => false,
188//! # }
189//! # }));
190//!
191//!
192//! // Carol now tries to authenticate Alice and Bob's certificates
193//! // using the CA as a trusted introducer based on `ca_tsig`.
194//! let res = RegexSet::from_signature(&ca_tsig)?;
195//!
196//! // Should be able to authenticate Alice.
197//! let alice_ua = alice.with_policy(p, None)?
198//! .userids().nth(0).expect("Added a User ID");
199//! # assert!(res.matches_userid(alice_ua.userid()));
200//! let mut authenticated = false;
201//! for c in alice_ua.certifications() {
202//! if c.get_issuers().into_iter().any(|h| h.aliases(ca.key_handle())) {
203//! if c.clone().verify_userid_binding(
204//! ca.primary_key().key(),
205//! alice.primary_key().key(),
206//! alice_ua.userid()).is_ok()
207//! {
208//! authenticated |= res.matches_userid(alice_ua.userid());
209//! }
210//! }
211//! }
212//! assert!(authenticated);
213//!
214//! // But, although the CA has certified Bob's key, Carol doesn't rely
215//! // on it, because Bob's email address ("bob@some.org") is out of
216//! // scope (some.org, not example.com).
217//! let bob_ua = bob.with_policy(p, None)?
218//! .userids().nth(0).expect("Added a User ID");
219//! # assert!(! res.matches_userid(bob_ua.userid()));
220//! let mut have_certification = false;
221//! let mut authenticated = false;
222//! for c in bob_ua.certifications() {
223//! if c.get_issuers().into_iter().any(|h| h.aliases(ca.key_handle())) {
224//! if c.clone().verify_userid_binding(
225//! ca.primary_key().key(),
226//! bob.primary_key().key(),
227//! bob_ua.userid()).is_ok()
228//! {
229//! have_certification = true;
230//! authenticated |= res.matches_userid(bob_ua.userid());
231//! }
232//! }
233//! }
234//! assert!(have_certification);
235//! assert!(! authenticated);
236//! # Ok(()) }
237//! ```
238
239use std::borrow::Borrow;
240use std::fmt;
241
242use lalrpop_util::ParseError;
243use regex_syntax::hir::{self, Hir};
244
245use crate::Error;
246use crate::Result;
247use crate::packet::prelude::*;
248use crate::types::SignatureType;
249
250pub(crate) mod lexer;
251lalrpop_util::lalrpop_mod!(
252 #[allow(clippy::all)]
253 #[allow(unused_parens)]
254 grammar,
255 "/regex/grammar.rs"
256);
257
258pub(crate) use self::lexer::Token;
259pub(crate) use self::lexer::{Lexer, LexicalError};
260
261const TRACE: bool = false;
262
263// Convert tokens into strings.
264//
265// Unfortunately, we can't implement From, because we don't define
266// ParseError in this crate.
267pub(crate) fn parse_error_downcast(e: ParseError<usize, Token, LexicalError>)
268 -> ParseError<usize, String, LexicalError>
269{
270 match e {
271 ParseError::UnrecognizedToken {
272 token: (start, t, end),
273 expected,
274 } => ParseError::UnrecognizedToken {
275 token: (start, t.into(), end),
276 expected,
277 },
278
279 ParseError::ExtraToken {
280 token: (start, t, end),
281 } => ParseError::ExtraToken {
282 token: (start, t.into(), end),
283 },
284
285 ParseError::InvalidToken { location }
286 => ParseError::InvalidToken { location },
287
288 ParseError::User { error }
289 => ParseError::User { error },
290
291 ParseError::UnrecognizedEof { location, expected }
292 => ParseError::UnrecognizedEof { location, expected },
293 }
294}
295
296// Used by grammar.lalrpop to generate a regex class (e.g. '[a-ce]').
297fn generate_class(caret: bool, chars: impl Iterator<Item=char>) -> Hir
298{
299 tracer!(TRACE, "generate_class");
300
301 // Dealing with ranges is a bit tricky. We need to examine three
302 // tokens. If the middle one is a dash, it's a range.
303
304 let chars: Vec<Option<char>> = chars
305 // Pad it out so what we can use windows to get three
306 // characters at a time, and be sure to process all
307 // characters.
308 .map(Some)
309 .chain(std::iter::once(None))
310 .chain(std::iter::once(None))
311 .collect();
312 if chars.len() == 2 {
313 // The grammar doesn't allow an empty class.
314 unreachable!();
315 } else {
316 let r = chars
317 .windows(3)
318 .scan(0,
319 |skip: &mut usize, x: &[Option<char>]|
320 // Scan stops if the result is None.
321 // filter_map keeps only those elements that
322 // are Some.
323 -> Option<Option<hir::ClassUnicodeRange>>
324 {
325 if *skip > 0 {
326 *skip -= 1;
327 t!("Skipping: {:?} (skip now: {})", x, skip);
328 Some(None)
329 } else {
330 match (x[0], x[1], x[2]) {
331 (Some(a), Some('-'), Some(c)) => {
332 // We've got a real range.
333 *skip = 2;
334 t!("range for '{}-{}'", a, c);
335 Some(Some(hir::ClassUnicodeRange::new(a, c)))
336 }
337 (Some(a), _, _) => {
338 t!("range for '{}'", a);
339 Some(Some(hir::ClassUnicodeRange::new(a, a)))
340 }
341 (None, _, _) => unreachable!(),
342 }
343 }
344 })
345 .flatten();
346 let mut class = hir::Class::Unicode(hir::ClassUnicode::new(r));
347 if caret {
348 class.negate();
349 }
350 Hir::class(class)
351 }
352}
353
354/// A compiled OpenPGP regular expression for matching UTF-8 encoded
355/// strings.
356///
357/// A `Regex` contains a regular expression compiled according to the
358/// rules defined in [Section 8 of RFC 9580] modulo two differences.
359/// First, the compiler only works on UTF-8 strings (not bytes).
360/// Second, ranges in character classes are between UTF-8 characters,
361/// not just ASCII characters. Further, by default, strings that
362/// don't pass a sanity check (in particular, include Unicode control
363/// characters) never match. This behavior can be customized using
364/// [`Regex::disable_sanitizations`].
365///
366/// [Section 8 of RFC 9580]: https://www.rfc-editor.org/rfc/rfc9580.html#section-8
367/// [trust signatures]: https://www.rfc-editor.org/rfc/rfc9580.html#section-5.2.3.21
368/// [`Regex::disable_sanitizations`]: Regex::disable_sanitizations()
369///
370/// Regular expressions are used to scope the trust that [trust
371/// signatures] extend.
372///
373/// When working with trust signatures, you'll usually want to use the
374/// [`RegexSet`] data structure, which already implements the correct
375/// semantics.
376///
377///
378/// See the [module-level documentation] for more details.
379///
380/// [module-level documentation]: self
381///
382/// # A note on equality
383///
384/// We define equality on `Regex` as the equality of the uncompiled
385/// regular expression given to the constructor and whether
386/// sanitizations are enabled.
387#[derive(Clone, Debug)]
388pub struct Regex {
389 /// The original regular expression.
390 ///
391 /// Equality is defined using this and `disable_sanitizations`.
392 re: String,
393 regex: regex::Regex,
394 disable_sanitizations: bool,
395}
396assert_send_and_sync!(Regex);
397
398impl PartialEq for Regex {
399 fn eq(&self, other: &Self) -> bool {
400 self.re == other.re
401 && self.disable_sanitizations == other.disable_sanitizations
402 }
403}
404
405impl Eq for Regex {}
406
407impl Regex {
408 /// Parses and compiles the regular expression.
409 ///
410 /// By default, strings that don't pass a sanity check (in
411 /// particular, include Unicode control characters) never match.
412 /// This behavior can be customized using
413 /// [`Regex::disable_sanitizations`].
414 ///
415 /// [`Regex::disable_sanitizations`]: Regex::disable_sanitizations()
416 pub fn new(re: &str) -> Result<Self>
417 {
418 let lexer = Lexer::new(re);
419 let hir = match grammar::RegexParser::new().parse(re, lexer) {
420 Ok(hir) => hir,
421 Err(err) => return Err(parse_error_downcast(err).into()),
422 };
423
424 // Converting the Hir to a string and the compiling that is
425 // apparently the canonical way to convert a Hir to a Regex
426 // (at least it is what rip-grep does), which the author of
427 // regex also wrote. See
428 // ripgrep/crates/regex/src/config.rs:ConfiguredHir::regex.
429 let regex = regex::RegexBuilder::new(&hir.to_string())
430 .build()?;
431
432 Ok(Self {
433 re: re.into(),
434 regex,
435 disable_sanitizations: false,
436 })
437 }
438
439 /// Parses and compiles the regular expression.
440 ///
441 /// Returns an error if `re` is not a valid UTF-8 string.
442 ///
443 /// By default, strings that don't pass a sanity check (in
444 /// particular, include Unicode control characters) never match.
445 /// This behavior can be customized using
446 /// [`Regex::disable_sanitizations`].
447 ///
448 /// [`Regex::disable_sanitizations`]: Regex::disable_sanitizations()
449 pub fn from_bytes(re: &[u8]) -> Result<Self> {
450 Self::new(std::str::from_utf8(re)?)
451 }
452
453 /// Returns the string-representation of the regular expression.
454 pub fn as_str(&self) -> &str {
455 &self.re
456 }
457
458 /// Controls whether matched strings must pass a sanity check.
459 ///
460 /// If `false` (the default), i.e., sanity checks are enabled, and
461 /// the string doesn't pass the sanity check (in particular, it
462 /// contains a Unicode control character according to
463 /// [`char::is_control`], including newlines and an embedded `NUL`
464 /// byte), this returns `false`.
465 ///
466 /// [`char::is_control`]: https://doc.rust-lang.org/std/primitive.char.html#method.is_control
467 pub fn disable_sanitizations(&mut self, disabled: bool) {
468 self.disable_sanitizations = disabled;
469 }
470
471 /// Returns whether the regular expression matches the string.
472 ///
473 /// If sanity checks are enabled (the default) and the string
474 /// doesn't pass the sanity check (in particular, it contains a
475 /// Unicode control character according to [`char::is_control`],
476 /// including newlines and an embedded `NUL` byte), this returns
477 /// `false`.
478 ///
479 /// [`char::is_control`]: https://doc.rust-lang.org/std/primitive.char.html#method.is_control
480 pub fn is_match(&self, s: &str) -> bool {
481 if ! self.disable_sanitizations && s.chars().any(char::is_control) {
482 return false;
483 }
484
485 self.is_match_clean(s)
486 }
487
488 // is_match, but without the sanity check.
489 fn is_match_clean(&self, s: &str) -> bool {
490 self.regex.is_match(s)
491 }
492
493 /// Returns whether the regular expression matches the User ID.
494 ///
495 /// If the User ID is not a valid UTF-8 string, this returns
496 /// `false`.
497 ///
498 /// If sanity checks are enabled (the default) and the string
499 /// doesn't pass the sanity check (in particular, it contains a
500 /// Unicode control character according to [`char::is_control`],
501 /// including newlines and an embedded `NUL` byte), this returns
502 /// `false`.
503 ///
504 /// [`char::is_control`]: https://doc.rust-lang.org/std/primitive.char.html#method.is_control
505 pub fn matches_userid(&self, u: &UserID) -> bool {
506 if let Ok(u) = std::str::from_utf8(u.value()) {
507 self.is_match(u)
508 } else {
509 false
510 }
511 }
512}
513
514#[derive(Clone, Debug)]
515enum RegexSet_ {
516 Regex(Regex),
517 Invalid,
518 Everything,
519}
520assert_send_and_sync!(RegexSet_);
521
522/// A set of regular expressions.
523///
524/// A `RegexSet` encapsulates a set of regular expressions. The
525/// regular expressions are compiled according to the rules defined in
526/// [Section 8 of RFC 9580] modulo two differences. First, the
527/// compiler only works on UTF-8 strings (not bytes). Second, ranges
528/// in character classes are between UTF-8 characters, not just ASCII
529/// characters. Further, by default, strings that don't pass a sanity
530/// check (in particular, include Unicode control characters) never
531/// match. This behavior can be customized using
532/// [`RegexSet::disable_sanitizations`].
533///
534/// [Section 8 of RFC 9580]: https://www.rfc-editor.org/rfc/rfc9580.html#section-8
535/// [`RegexSet::disable_sanitizations`]: RegexSet::disable_sanitizations()
536///
537/// `RegexSet` implements the semantics of [regular expression]s used
538/// in [Trust Signatures]. In particular, a `RegexSet` makes it
539/// easier to deal with trust signatures that:
540///
541/// - Contain multiple Regular Expression subpackts,
542/// - Have no Regular Expression subpackets, and/or
543/// - Include one or more Regular Expression subpackets that are invalid.
544///
545/// [regular expressions]: https://www.rfc-editor.org/rfc/rfc9580.html#section-5.2.3.22
546/// [Trust Signatures]: https://www.rfc-editor.org/rfc/rfc9580.html#section-5.2.3.21
547///
548/// `RegexSet` compiles each regular expression individually. If
549/// there are no regular expressions, the `RegexSet` matches
550/// everything. If a regular expression is invalid, `RegexSet` treats
551/// it as if it doesn't match anything. Thus, if all regular
552/// expressions are invalid, the `RegexSet` matches nothing (not
553/// everything!).
554///
555/// See the [module-level documentation] for more details.
556///
557/// [module-level documentation]: self
558///
559/// # A note on equality
560///
561/// We define equality on `RegexSet` as the equality of the uncompiled
562/// regular expressions given to the constructor and whether
563/// sanitizations are enabled.
564#[derive(Clone)]
565pub struct RegexSet {
566 /// The original regular expressions.
567 ///
568 /// Equality is defined using this and `disable_sanitizations`.
569 re_bytes: Vec<Vec<u8>>,
570 re_set: RegexSet_,
571 disable_sanitizations: bool,
572}
573assert_send_and_sync!(RegexSet);
574
575impl PartialEq for RegexSet {
576 fn eq(&self, other: &Self) -> bool {
577 self.re_bytes == other.re_bytes
578 && self.disable_sanitizations == other.disable_sanitizations
579 }
580}
581
582impl Eq for RegexSet {}
583
584impl fmt::Debug for RegexSet {
585 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
586 let mut d = f.debug_struct("RegexSet");
587 match self.re_set {
588 RegexSet_::Everything => {
589 d.field("regex", &"<Everything>")
590 }
591 RegexSet_::Invalid => {
592 d.field("regex", &"<Invalid>")
593 }
594 RegexSet_::Regex(ref r) => {
595 d.field("regex", &r.regex)
596 }
597 }
598 .field("sanitizations", &!self.disable_sanitizations)
599 .finish()
600 }
601}
602
603impl RegexSet {
604 /// Parses and compiles the regular expressions.
605 ///
606 /// Invalid regular expressions do not cause this to fail. See
607 /// [`RegexSet`]'s top-level documentation for details.
608 ///
609 ///
610 /// By default, strings that don't pass a sanity check (in
611 /// particular, include Unicode control characters) never match.
612 /// This behavior can be customized using
613 /// [`RegexSet::disable_sanitizations`].
614 ///
615 /// [`RegexSet::disable_sanitizations`]: RegexSet::disable_sanitizations()
616 ///
617 /// # Examples
618 ///
619 /// ```
620 /// use sequoia_openpgp as openpgp;
621 /// use openpgp::regex::RegexSet;
622 ///
623 /// # fn main() -> openpgp::Result<()> {
624 /// // Extract the regex and compile it.
625 /// let res = &[
626 /// "<[^>]+[@.]example\\.org>$",
627 /// // Invalid.
628 /// "[..",
629 /// ];
630 ///
631 /// let res = RegexSet::new(res)?;
632 ///
633 /// assert!(res.is_match("Alice <alice@example.org>"));
634 /// assert!(! res.is_match("Bob <bob@example.com>"));
635 /// # Ok(()) }
636 /// ```
637 pub fn new<'a, RE, I>(res: I) -> Result<Self>
638 where RE: Borrow<&'a str>,
639 I: IntoIterator<Item=RE>,
640 {
641 tracer!(TRACE, "RegexSet::new");
642
643 let mut regexes = Vec::with_capacity(2);
644 let mut had_good = false;
645 let mut had_bad = false;
646
647 let mut re_bytes = Vec::new();
648 for re in res {
649 let re = re.borrow();
650 re_bytes.push(re.as_bytes().into());
651
652 let lexer = Lexer::new(re);
653 match grammar::RegexParser::new().parse(re, lexer) {
654 Ok(hir) => {
655 had_good = true;
656 regexes.push(hir);
657 }
658 Err(err) => {
659 had_bad = true;
660 t!("Compiling {:?}: {}", re, err);
661 }
662 }
663 }
664
665 if had_bad && ! had_good {
666 t!("All regular expressions were invalid.");
667 Ok(RegexSet {
668 re_bytes,
669 re_set: RegexSet_::Invalid,
670 disable_sanitizations: false,
671 })
672 } else if ! had_bad && ! had_good {
673 // Match everything.
674 t!("No regular expressions provided.");
675 Ok(RegexSet {
676 re_bytes,
677 re_set: RegexSet_::Everything,
678 disable_sanitizations: false,
679 })
680 } else {
681 // Match any of the regular expressions.
682 Ok(RegexSet {
683 re_bytes,
684 re_set: RegexSet_::Regex(
685 Regex {
686 re: String::new(),
687 regex: regex::RegexBuilder::new(
688 &Hir::alternation(regexes).to_string())
689 .build()?,
690 disable_sanitizations: false,
691 }),
692 disable_sanitizations: false,
693 })
694 }
695 }
696
697 /// Parses and compiles the regular expressions.
698 ///
699 /// The regular expressions are first converted to UTF-8 strings.
700 /// Byte sequences that are not valid UTF-8 strings are considered
701 /// to be invalid regular expressions. Invalid regular
702 /// expressions do not cause this to fail. See [`RegexSet`]'s
703 /// top-level documentation for details.
704 ///
705 ///
706 /// By default, strings that don't pass a sanity check (in
707 /// particular, include Unicode control characters) never match.
708 /// This behavior can be customized using
709 /// [`RegexSet::disable_sanitizations`].
710 ///
711 /// [`RegexSet::disable_sanitizations`]: RegexSet::disable_sanitizations()
712 ///
713 /// # Examples
714 ///
715 /// ```
716 /// use sequoia_openpgp as openpgp;
717 /// use openpgp::regex::RegexSet;
718 ///
719 /// # fn main() -> openpgp::Result<()> {
720 /// // A valid and an invalid UTF-8 byte sequence. The invalid
721 /// // sequence doesn't match anything. But, that doesn't impact
722 /// // the other regular expressions.
723 /// let res: &[ &[u8] ] = &[
724 /// &b"<[^>]+[@.]example\\.org>$"[..],
725 /// // Invalid UTF-8.
726 /// &b"\xC3\x28"[..],
727 /// ];
728 /// assert!(std::str::from_utf8(res[0]).is_ok());
729 /// assert!(std::str::from_utf8(res[1]).is_err());
730 ///
731 /// let re_set = RegexSet::from_bytes(res.into_iter())?;
732 ///
733 /// assert!(re_set.is_match("Alice <alice@example.org>"));
734 /// assert!(! re_set.is_match("Bob <bob@example.com>"));
735 ///
736 /// // If we only have invalid UTF-8 strings, then nothing
737 /// // matches.
738 /// let res: &[ &[u8] ] = &[
739 /// // Invalid UTF-8.
740 /// &b"\xC3\x28"[..],
741 /// ];
742 /// assert!(std::str::from_utf8(res[0]).is_err());
743 ///
744 /// let re_set = RegexSet::from_bytes(res.into_iter())?;
745 ///
746 /// assert!(! re_set.is_match("Alice <alice@example.org>"));
747 /// assert!(! re_set.is_match("Bob <bob@example.com>"));
748 ///
749 ///
750 /// // But, if we have no regular expressions, everything matches.
751 /// let res: &[ &[u8] ] = &[];
752 /// let re_set = RegexSet::from_bytes(res.into_iter())?;
753 ///
754 /// assert!(re_set.is_match("Alice <alice@example.org>"));
755 /// assert!(re_set.is_match("Bob <bob@example.com>"));
756 /// # Ok(()) }
757 /// ```
758 pub fn from_bytes<'a, I, RE>(res: I) -> Result<Self>
759 where I: IntoIterator<Item=RE>,
760 RE: Borrow<&'a [u8]>,
761 {
762 let mut have_valid_utf8 = false;
763 let mut have_invalid_utf8 = false;
764 let mut re_bytes = Vec::new();
765 let re_set = Self::new(
766 res
767 .into_iter()
768 .scan((&mut have_valid_utf8, &mut have_invalid_utf8),
769 |(valid, invalid), re|
770 {
771 re_bytes.push(re.borrow().to_vec());
772 if let Ok(re) = std::str::from_utf8(re.borrow()) {
773 **valid = true;
774 Some(Some(re))
775 } else {
776 **invalid = true;
777 Some(None)
778 }
779 })
780 .flatten());
781
782 if !have_valid_utf8 && have_invalid_utf8 {
783 // None of the strings were valid UTF-8. Reject
784 // everything.
785 Ok(RegexSet {
786 re_bytes,
787 re_set: RegexSet_::Invalid,
788 disable_sanitizations: false,
789 })
790 } else {
791 // We had nothing or at least one string was valid UTF-8.
792 // RegexSet::new did the right thing.
793 re_set.map(|mut r| { r.re_bytes = re_bytes; r })
794 }
795 }
796
797 /// Returns the bytes-representation of the regular expressions.
798 pub fn as_bytes(&self) -> &[Vec<u8>] {
799 &self.re_bytes
800 }
801
802 /// Creates a `RegexSet` from the regular expressions stored in a
803 /// trust signature.
804 ///
805 /// This method is a convenience function, which extracts any
806 /// regular expressions from a [Trust Signature] and wraps them in a
807 /// `RegexSet`.
808 ///
809 /// [Trust Signature]: https://www.rfc-editor.org/rfc/rfc9580.html#section-5.2.3.21
810 ///
811 /// If the signature is not a valid trust signature (its [type] is
812 /// [GenericCertification], [PersonaCertification],
813 /// [CasualCertification], or [PositiveCertification], and the
814 /// [Trust Signature] subpacket is present), this returns an
815 /// error.
816 ///
817 /// [type]: https://www.rfc-editor.org/rfc/rfc9580.html#section-5.2.1
818 /// [GenericCertification]: crate::types::SignatureType::GenericCertification
819 /// [PersonaCertification]: crate::types::SignatureType::PersonaCertification
820 /// [CasualCertification]: crate::types::SignatureType::CasualCertification
821 /// [PositiveCertification]: crate::types::SignatureType::PositiveCertification
822 ///
823 /// By default, strings that don't pass a sanity check (in
824 /// particular, include Unicode control characters) never match.
825 /// This behavior can be customized using
826 /// [`RegexSet::disable_sanitizations`].
827 ///
828 /// [`RegexSet::disable_sanitizations`]: RegexSet::disable_sanitizations()
829 ///
830 /// # Examples
831 ///
832 /// ```
833 /// use sequoia_openpgp as openpgp;
834 /// # use openpgp::cert::prelude::*;
835 /// # use openpgp::packet::prelude::*;
836 /// # use openpgp::policy::StandardPolicy;
837 /// use openpgp::regex::RegexSet;
838 /// # use openpgp::types::SignatureType;
839 /// #
840 /// # fn main() -> openpgp::Result<()> {
841 /// # let p = &StandardPolicy::new();
842 /// #
843 /// # let (alice, _)
844 /// # = CertBuilder::general_purpose(Some("Alice <alice@example.org>"))
845 /// # .generate()?;
846 /// # let mut alices_signer = alice.primary_key().key().clone()
847 /// # .parts_into_secret()?.into_keypair()?;
848 /// #
849 /// # let (example_com, _)
850 /// # = CertBuilder::general_purpose(Some("OpenPGP CA <openpgp-ca@example.com>"))
851 /// # .generate()?;
852 /// # let example_com_userid = example_com.with_policy(p, None)?
853 /// # .userids().nth(0).expect("Added a User ID").userid();
854 /// #
855 /// # let certification = SignatureBuilder::new(SignatureType::GenericCertification)
856 /// # .set_trust_signature(1, 120)?
857 /// # .set_regular_expression("<[^>]+[@.]example\\.org>$")?
858 /// # .add_regular_expression("<[^>]+[@.]example\\.com>$")?
859 /// # .sign_userid_binding(
860 /// # &mut alices_signer,
861 /// # example_com.primary_key().component(),
862 /// # example_com_userid)?;
863 ///
864 /// // certification is a trust signature, which contains two regular
865 /// // expressions: one that matches all mail addresses for 'example.org'
866 /// // and another that matches all mail addresses for 'example.com'.
867 /// let certification: &Signature = // ...;
868 /// # &certification;
869 ///
870 /// // Extract the regex and compile it.
871 /// let res = RegexSet::from_signature(certification)?;
872 ///
873 /// // Some positive examples.
874 /// assert!(res.is_match("Alice <alice@example.org>"));
875 /// assert!(res.is_match("Bob <bob@example.com>"));
876 ///
877 /// // Wrong domain.
878 /// assert!(! res.is_match("Carol <carol@acme.com>"));
879 ///
880 /// // The standard regex, "<[^>]+[@.]example\\.org>$" only matches
881 /// // email addresses wrapped in <>.
882 /// assert!(! res.is_match("dave@example.com"));
883 ///
884 /// // And, it is case-sensitive.
885 /// assert!(res.is_match("Ellen <ellen@example.com>"));
886 /// assert!(! res.is_match("Ellen <ellen@EXAMPLE.COM>"));
887 /// # Ok(()) }
888 /// ```
889 pub fn from_signature(sig: &Signature) -> Result<Self>
890 {
891 use SignatureType::*;
892 match sig.typ() {
893 GenericCertification => (),
894 PersonaCertification => (),
895 CasualCertification => (),
896 PositiveCertification => (),
897 t => return Err(
898 Error::InvalidArgument(
899 format!(
900 "Expected a certification signature, found a {}",
901 t))
902 .into()),
903 }
904
905 if sig.trust_signature().is_none() {
906 return Err(
907 Error::InvalidArgument(
908 "Expected a trust signature, \
909 but the signature does not include \
910 a valid Trust Signature subpacket".into())
911 .into());
912 }
913
914 Self::from_bytes(sig.regular_expressions())
915 }
916
917 /// Returns a `RegexSet` that matches everything.
918 ///
919 /// Note: sanitizations are still enabled. So, to really match
920 /// everything, you still need to call
921 /// [`RegexSet::disable_sanitizations`].
922 ///
923 /// [`RegexSet::disable_sanitizations`]: RegexSet::disable_sanitizations()
924 ///
925 /// This can be used to optimize the evaluation of scoping rules
926 /// along a path: if a `RegexSet` matches everything, then it
927 /// doesn't further constrain the path.
928 pub fn everything() -> Self
929 {
930 Self {
931 re_bytes: vec![vec![]],
932 re_set: RegexSet_::Everything,
933 disable_sanitizations: false,
934 }
935 }
936
937 /// Returns whether a `RegexSet` matches everything.
938 ///
939 /// Normally, this only returns true if the `RegexSet` was created
940 /// using [`RegexSet::everything`]. [`RegexSet::new`],
941 /// [`RegexSet::from_bytes`], [`RegexSet::from_signature`] do
942 /// detect some regular expressions that match everything (e.g.,
943 /// if no regular expressions are supplied). But, they do not
944 /// guarantee that a `RegexSet` containing a regular expression
945 /// like `.?`, which does in fact match everything, is detected as
946 /// matching everything.
947 ///
948 /// [`RegexSet::everything`]: RegexSet::everything()
949 /// [`RegexSet::new`]: RegexSet::everything()
950 /// [`RegexSet::from_bytes`]: RegexSet::from_bytes()
951 /// [`RegexSet::from_signature`]: RegexSet::from_signature()
952 ///
953 /// # Examples
954 ///
955 /// ```
956 /// use sequoia_openpgp as openpgp;
957 /// use openpgp::regex::RegexSet;
958 ///
959 /// # fn main() -> openpgp::Result<()> {
960 /// assert!(RegexSet::everything().matches_everything());
961 /// let empty: &[ &str ] = &[];
962 /// assert!(RegexSet::new(empty)?.matches_everything());
963 ///
964 /// // A regular expression that matches everything. But
965 /// // `RegexSet` returns false, because it can't detect it.
966 /// let res: &[ &str ] = &[
967 /// &".?"[..],
968 /// ];
969 /// let re_set = RegexSet::new(res.into_iter())?;
970 /// assert!(! re_set.matches_everything());
971 /// # Ok(()) }
972 /// ```
973 pub fn matches_everything(&self) -> bool {
974 matches!(self.re_set, RegexSet_::Everything)
975 }
976
977 /// Controls whether strings with control characters are allowed.
978 ///
979 /// If `false` (the default), i.e., sanity checks are enabled, and
980 /// the string doesn't pass the sanity check (in particular, it
981 /// contains a Unicode control character according to
982 /// [`char::is_control`], including newlines and an embedded `NUL`
983 /// byte), this returns `false`.
984 ///
985 /// [`char::is_control`]: https://doc.rust-lang.org/std/primitive.char.html#method.is_control
986 pub fn disable_sanitizations(&mut self, allowed: bool) {
987 self.disable_sanitizations = allowed;
988 if let RegexSet_::Regex(ref mut re) = self.re_set {
989 re.disable_sanitizations(allowed);
990 }
991 }
992
993 /// Returns whether the regular expression set matches the string.
994 ///
995 /// If sanity checks are enabled (the default) and the string
996 /// doesn't pass the sanity check (in particular, it contains a
997 /// Unicode control character according to [`char::is_control`],
998 /// including newlines and an embedded `NUL` byte), this returns
999 /// `false`.
1000 ///
1001 /// [`char::is_control`]: https://doc.rust-lang.org/std/primitive.char.html#method.is_control
1002 ///
1003 /// If the `RegexSet` contains one or more regular expressions,
1004 /// this method returns whether at least one of the regular
1005 /// expressions matches. Invalid regular expressions never match.
1006 ///
1007 /// If the `RegexSet` does not contain any regular expressions
1008 /// (valid or otherwise), this method returns `true`.
1009 ///
1010 /// # Examples
1011 ///
1012 /// ```
1013 /// use sequoia_openpgp as openpgp;
1014 /// use openpgp::regex::RegexSet;
1015 ///
1016 /// # fn main() -> openpgp::Result<()> {
1017 /// // A regular expression that matches anything. (Note: this is
1018 /// // equivalent to providing no regular expressions.)
1019 /// let res: &[ &str ] = &[
1020 /// &""[..],
1021 /// ];
1022 /// let re_set = RegexSet::new(res.into_iter())?;
1023 ///
1024 /// assert!(re_set.is_match("Alice Lovelace <alice@example.org>"));
1025 ///
1026 /// // If a User ID has an embedded control character, it doesn't
1027 /// // match.
1028 /// assert!(! re_set.is_match("Alice <alice@example.org>\0"));
1029 /// # Ok(()) }
1030 /// ```
1031 pub fn is_match(&self, s: &str) -> bool {
1032 if ! self.disable_sanitizations && s.chars().any(char::is_control) {
1033 return false;
1034 }
1035
1036 match self.re_set {
1037 RegexSet_::Regex(ref re) =>
1038 re.is_match_clean(s),
1039 RegexSet_::Invalid =>
1040 false,
1041 RegexSet_::Everything =>
1042 true,
1043 }
1044 }
1045
1046 /// Returns whether the regular expression matches the User ID.
1047 ///
1048 /// If the User ID is not a valid UTF-8 string, this returns `false`.
1049 ///
1050 /// If sanity checks are enabled (the default) and the string
1051 /// doesn't pass the sanity check (in particular, it contains a
1052 /// Unicode control character according to [`char::is_control`],
1053 /// including newlines and an embedded `NUL` byte), this returns
1054 /// `false`.
1055 ///
1056 /// [`char::is_control`]: https://doc.rust-lang.org/std/primitive.char.html#method.is_control
1057 ///
1058 /// If the `RegexSet` contains one or more regular expressions,
1059 /// this method returns whether at least one of the regular
1060 /// expressions matches. Invalid regular expressions never match.
1061 ///
1062 /// If the `RegexSet` does not contain any regular expressions
1063 /// (valid or otherwise), this method returns `true`.
1064 ///
1065 /// # Examples
1066 ///
1067 /// ```
1068 /// use sequoia_openpgp as openpgp;
1069 /// use openpgp::packet::UserID;
1070 /// use openpgp::regex::RegexSet;
1071 ///
1072 /// # fn main() -> openpgp::Result<()> {
1073 /// // A regular expression that matches anything. (Note: this is
1074 /// // equivalent to providing no regular expressions.)
1075 /// let res: &[ &str ] = &[
1076 /// "",
1077 /// ];
1078 /// let re_set = RegexSet::new(res.into_iter())?;
1079 ///
1080 /// assert!(re_set.matches_userid(
1081 /// &UserID::from(&b"Alice Lovelace <alice@example.org>"[..])));
1082 ///
1083 /// // If a User ID is not valid UTF-8, it never matches.
1084 /// assert!(! re_set.matches_userid(
1085 /// &UserID::from(&b"Alice \xC3\x28 Lovelace <alice@example.org>"[..])));
1086 ///
1087 /// // If a User ID has an embedded control character, it doesn't
1088 /// // match.
1089 /// assert!(! re_set.matches_userid(
1090 /// &UserID::from(&b"Alice <alice@example.org>\0"[..])));
1091 /// # Ok(()) }
1092 /// ```
1093 pub fn matches_userid(&self, u: &UserID) -> bool
1094 {
1095 if let Ok(u) = std::str::from_utf8(u.value()) {
1096 self.is_match(u)
1097 } else {
1098 false
1099 }
1100 }
1101}
1102
1103#[cfg(test)]
1104mod tests {
1105 use super::*;
1106
1107 #[test]
1108 fn regex() -> Result<()> {
1109 fn a(regex: &str, matches: &[(bool, &str)]) {
1110 eprint!("{} -> ", regex);
1111 let mut compiled = Regex::new(regex).unwrap();
1112 compiled.disable_sanitizations(true);
1113 eprintln!("{:?}", compiled);
1114 for &(matches, text) in matches {
1115 assert_eq!(matches, compiled.is_match(text),
1116 "regex: {}\n text: {:?} should{} match",
1117 regex, text, if matches { "" } else { " not" });
1118 }
1119 }
1120 fn f(regex: &str) {
1121 eprint!("{} -> ", regex);
1122 let compiled = Regex::new(regex);
1123 assert!(compiled.is_err());
1124 eprintln!("failed (expected)");
1125 }
1126
1127 // Test an important corner case: the + should only apply to
1128 // the b! See: https://github.com/rust-lang/regex/issues/731
1129 a("xab+y", &[
1130 (true, "xaby"),
1131 (true, "xabby"),
1132 (false, "xababy"),
1133 ]);
1134 a("x(ab+)y", &[
1135 (false, "xy"),
1136 (false, "xay"),
1137 (true, "xaby"),
1138 (true, "xabby"),
1139 (true, "xabbby"),
1140 (false, "xababy"),
1141 ]);
1142 // But here the + matches "ab", not just the "b".
1143 a("x(ab)+y", &[
1144 (false, "xy"),
1145 (true, "xaby"),
1146 (false, "xabby"),
1147 (true, "xababy"),
1148 (true, "xabababy"),
1149 (false, "x(ab)y"),
1150 ]);
1151
1152
1153
1154 a("", &[
1155 (true, "s"),
1156 (true, "ss"),
1157 ]);
1158 a("s", &[
1159 (true, "s"),
1160 (true, "ss"),
1161 (false, "a"),
1162 (true, "hello, my prettiessss"),
1163 (false, "S"),
1164 ]);
1165 a("ss", &[
1166 (false, "s"),
1167 (true, "ss"),
1168 (true, "sss"),
1169 (false, "this has lots of ses, but not two ses together"),
1170 (true, "halloss"),
1171 ]);
1172
1173 a("a|b", &[
1174 (true, "a"),
1175 (true, "b"),
1176 (false, "c"),
1177 (true, "xxxaxxxbxxx"),
1178 ]);
1179 a("a|b|c", &[
1180 (true, "a"),
1181 (true, "b"),
1182 (true, "c"),
1183 (false, "d"),
1184 (true, "xxxaxxxbxxx"),
1185 ]);
1186 // This should match anything.
1187 a("|a", &[
1188 (true, "a"),
1189 (true, "b"),
1190 ]);
1191 a("a|", &[
1192 (true, "a"),
1193 (true, "b"),
1194 ]);
1195 a("|a|b", &[
1196 (true, "a"),
1197 (true, "b"),
1198 (true, "c"),
1199 ]);
1200 a("|a|b|c|d", &[
1201 (true, "a"),
1202 (true, "b"),
1203 (true, "c"),
1204 (true, "d"),
1205 (true, "eeee"),
1206 ]);
1207 a("a|b|", &[
1208 (true, "a"),
1209 (true, "b"),
1210 (true, "c"),
1211 ]);
1212 a("a|b|c|", &[
1213 (true, "a"),
1214 (true, "b"),
1215 (true, "c"),
1216 (true, "d"),
1217 (true, "eeee"),
1218 ]);
1219 a("|", &[
1220 (true, "a"),
1221 (true, "b"),
1222 (true, "c"),
1223 (true, "d"),
1224 (true, "eeee"),
1225 ]);
1226 a("|a|", &[
1227 (true, "a"),
1228 (true, "b"),
1229 (true, "c"),
1230 (true, "d"),
1231 (true, "eeee"),
1232 ]);
1233 a("|a|b|", &[
1234 (true, "a"),
1235 (true, "b"),
1236 (true, "c"),
1237 (true, "d"),
1238 (true, "eeee"),
1239 ]);
1240 // A nested empty.
1241 a("(a|)|b", &[
1242 (true, "a"),
1243 (true, "b"),
1244 ]);
1245 // empty+
1246 a("(a|b|()+)", &[
1247 (true, "a"),
1248 (true, "b"),
1249 ]);
1250 // (empty)+
1251 a("(a|b|(())+)", &[
1252 (true, "a"),
1253 (true, "b"),
1254 ]);
1255 // Multiple empty branches.
1256 a("(a|b|(()())())", &[
1257 (true, "a"),
1258 (true, "b"),
1259 ]);
1260 a("(a|b|(()())())|", &[
1261 (true, "a"),
1262 (true, "b"),
1263 ]);
1264
1265 // This is: "ab" or "cd", not a followed by b or c followed by d:
1266 //
1267 // A regular expression is zero or more branches, separated by '|'.
1268 // ...
1269 // A branch is zero or more pieces, concatenated.
1270 // ...
1271 // A piece is an atom
1272 // ...
1273 // An atom is... a single character.
1274 a("ab|cd", &[
1275 (true, "abd"),
1276 (true, "acd"),
1277 (true, "abcd"),
1278 (false, "ad"),
1279 (false, "b"),
1280 (false, "c"),
1281 (false, "bb"),
1282 ]);
1283
1284 a("a*", &[
1285 (true, ""),
1286 (true, "a"),
1287 (true, "aa"),
1288 (true, "b"),
1289 ]);
1290 a("xa*y", &[
1291 (true, "xy"),
1292 (true, "xay"),
1293 (true, "xaay"),
1294 (false, "y"),
1295 (false, "ay"),
1296 (false, "aay"),
1297 (false, "x y"),
1298 (false, "x ay"),
1299 (false, "x aay"),
1300 ]);
1301 f("*");
1302
1303 a("a+", &[
1304 (false, ""),
1305 (true, "a"),
1306 (true, "aa"),
1307 (false, "b"),
1308 (true, "baab"),
1309 (true, "by ab"),
1310 (true, "baa b"),
1311 ]);
1312 a("ab+", &[
1313 (false, ""),
1314 (false, "a"),
1315 (false, "b"),
1316 (true, "ab"),
1317 (false, "bb"),
1318 (true, "baab"),
1319 (true, "by ab"),
1320 (false, "baa b"),
1321 ]);
1322 f("+");
1323
1324 a("a?", &[
1325 (true, ""),
1326 (true, "a"),
1327 (true, "aa"),
1328 (true, "aaa"),
1329 (true, "b"),
1330 (true, "baab"),
1331 (true, "by ab"),
1332 (true, "baa b"),
1333 ]);
1334 a("xa?y", &[
1335 (false, ""),
1336 (true, "xy"),
1337 (false, "a"),
1338 (true, "xay"),
1339 (false, "aa"),
1340 (false, "xaay"),
1341 (false, "b"),
1342 (false, "bxaayb"),
1343 (true, "by xayb"),
1344 (true, "baxay b"),
1345 ]);
1346 f("?");
1347
1348 f("a*?");
1349 a("a*b?c+", &[
1350 (false, ""),
1351 (true, "c"),
1352 (true, "abc"),
1353 (true, "aabbcc"),
1354 (false, "aab"),
1355 (true, "aaaaaabcccccccc"),
1356 ]);
1357 f("a?*+");
1358
1359 a("a?|b+", &[
1360 (true, ""),
1361 (true, "aaa"),
1362 (true, "bbb"),
1363 (true, "abaa"),
1364 ]);
1365 a("a+|b+", &[
1366 (false, ""),
1367 (true, "a"),
1368 (true, "aaa"),
1369 (true, "b"),
1370 (true, "bbb"),
1371 (true, "abaa"),
1372 ]);
1373 a("a+|b+|c+", &[
1374 (false, ""),
1375 (true, "a"),
1376 (true, "aaa"),
1377 (true, "b"),
1378 (true, "bbb"),
1379 (true, "abaa"),
1380 (true, "c"),
1381 (true, "ccc"),
1382 (true, "abaaccc"),
1383 ]);
1384 a("xa+|b+|c+y", &[
1385 (false, ""),
1386 (true, "xa"),
1387 (true, "xaa"),
1388 (true, "b"),
1389 (true, "bb"),
1390 (true, "cy"),
1391 (true, "ccy"),
1392
1393 (false, "a"),
1394 (false, "aaa"),
1395 (false, "c"),
1396 (false, "ccc"),
1397 ]);
1398 a("xa+y|sb+u", &[
1399 (false, ""),
1400 (true, "xay"),
1401 (true, "xaay"),
1402 (true, "sbu"),
1403 (true, "sbbu"),
1404 (true, "xysbu"),
1405
1406 (false, "a"),
1407 (false, "aaa"),
1408 (false, "xyu"),
1409 (false, "ccc"),
1410 ]);
1411 a("a*|a+|ab+cd+|", &[
1412 (true, ""),
1413 ]);
1414
1415 a("()", &[
1416 (true, ""),
1417 (true, "xyzzy"),
1418 ]);
1419 a("(())", &[
1420 (true, ""),
1421 (true, "xyzzy"),
1422 ]);
1423 a("((()))", &[
1424 (true, ""),
1425 (true, "xyzzy"),
1426 ]);
1427 f("((())");
1428 f("((())))");
1429 a("(a)", &[
1430 (true, "a"),
1431 (true, "(a)"),
1432 (false, "b"),
1433 ]);
1434 a("x(a)y", &[
1435 (false, "xy"),
1436 (true, "xay"),
1437 (false, "x(a)y"),
1438 (true, "(xay)"),
1439 (false, "a"),
1440 (false, "yax"),
1441 ]);
1442 a("x(ab)y", &[
1443 (false, "xy"),
1444 (false, "xay"),
1445 (false, "xby"),
1446 (true, "xaby"),
1447 (false, "x(ab)y"),
1448 (true, "(xaby)"),
1449 ]);
1450 a("x(ab)(cd)y", &[
1451 (true, "xabcdy"),
1452 (true, "zxabcdyz"),
1453 ]);
1454 a("a(bc)d(ef)g", &[
1455 (true, "abcdefg"),
1456 (true, "xabcdefgy"),
1457 (false, "xa(bc)d(ef)gy"),
1458 ]);
1459 a("a((bc))d((ef))g", &[
1460 (true, "abcdefg"),
1461 (true, "xabcdefgy"),
1462 (false, "xa(bc)d(ef)gy"),
1463 ]);
1464 a("a(b(c)d)e", &[
1465 (true, "abcde"),
1466 (true, "xabcdey"),
1467 (false, "xa(b(c)d)ey"),
1468 ]);
1469 a("x(a|b)y", &[
1470 (false, "xy"),
1471 (true, "xay"),
1472 (true, "xby"),
1473 (false, "xaay"),
1474 (false, "xbby"),
1475 (false, "xaby"),
1476 (false, "xaaby"),
1477 (false, "xabby"),
1478 (false, "xaabby"),
1479 (false, "xcy"),
1480 ]);
1481 a("x(a|bc)y", &[
1482 (false, "xy"),
1483 (true, "xay"),
1484 (false, "xby"),
1485 (true, "xbcy"),
1486 (false, "xaay"),
1487 (false, "xbby"),
1488 (false, "xaby"),
1489 (false, "xabcy"),
1490 (false, "xabby"),
1491 (false, "xaabby"),
1492 (false, "xcy"),
1493 (false, "xacy"),
1494 ]);
1495 a("x(a|b|c)y", &[
1496 (false, "xy"),
1497 (true, "xay"),
1498 (true, "xby"),
1499 (true, "xcy"),
1500 (false, "xaay"),
1501 (false, "xbby"),
1502 (false, "xaby"),
1503 (false, "xabcy"),
1504 (false, "xabby"),
1505 (false, "xaabby"),
1506 (false, "xacy"),
1507 ]);
1508 a("x(a|b)(c|d)y", &[
1509 (false, "xy"),
1510 (false, "xay"),
1511 (false, "xby"),
1512 (false, "xcy"),
1513 (false, "xdy"),
1514 (false, "xaay"),
1515 (false, "xbby"),
1516 (false, "xccy"),
1517 (false, "xddy"),
1518 (false, "xaby"),
1519 (false, "xcdy"),
1520 (true, "xacy"),
1521 (true, "xady"),
1522 (true, "xbcy"),
1523 (true, "xbdy"),
1524 (false, "xabcy"),
1525 (false, "xabby"),
1526 (false, "xaabby"),
1527 ]);
1528 a("x(a+|b+)y", &[
1529 (false, "xy"),
1530 (true, "xay"),
1531 (true, "xby"),
1532 (true, "xaay"),
1533 (true, "xbby"),
1534 (false, "xaby"),
1535 (false, "xaaby"),
1536 (false, "xabby"),
1537 (false, "xaabby"),
1538 (false, "xcy"),
1539 ]);
1540
1541 a(".", &[
1542 (false, ""),
1543 (true, "a"),
1544 (true, "ab"),
1545 (true, "ab\nc"),
1546 (true, "ab.c"),
1547 ]);
1548 a("x.y", &[
1549 (false, ""),
1550 (false, "xy"),
1551 (true, "xay"),
1552 (true, "x\ny"),
1553 (true, "x.y"),
1554 (false, "x..y"),
1555 ]);
1556
1557 a("^", &[
1558 (true, ""),
1559 (true, "xx"),
1560 ]);
1561 a("^abc", &[
1562 (false, ""),
1563 (true, "abcdef"),
1564 (false, "xabcdef"),
1565 (false, "\nabcdef"),
1566 ]);
1567 a("(^abc|^def)", &[
1568 (false, ""),
1569 (true, "abcd"),
1570 (true, "defg"),
1571 (false, "xabcd"),
1572 (false, "xdefg"),
1573 (false, "^abc"),
1574 (false, "^(abc|def)"),
1575 (false, "\nabcdef"),
1576 ]);
1577 a("(^abc|def)", &[
1578 (false, ""),
1579 (true, "abcd"),
1580 (true, "defg"),
1581 (false, "xabcd"),
1582 (true, "xdefg"),
1583 (false, "^abc"),
1584 (true, "^(abc|def)"),
1585 (false, "\nabcde"),
1586 ]);
1587 a("^^", &[
1588 (true, ""),
1589 (true, "abcdef"),
1590 ]);
1591 a("^abc^", &[
1592 (false, ""),
1593 (false, "abcdef"),
1594 (false, "xabcdef"),
1595 (false, "abc\n"),
1596 (false, "\nabc\n"),
1597 (false, "^abc^"),
1598 ]);
1599
1600 a("$", &[
1601 (true, ""),
1602 (true, "abc"),
1603 ]);
1604 a("abc$", &[
1605 (false, ""),
1606 (true, "abc"),
1607 (false, "abcx"),
1608 (false, "abc\n"),
1609 (false, "abc$"),
1610 ]);
1611 a("abc$$", &[
1612 (false, ""),
1613 (true, "abc"),
1614 (false, "abcx"),
1615 (false, "abc\n"),
1616 (false, "abc$"),
1617 ]);
1618 a("(abc$)x", &[
1619 (false, ""),
1620 (false, "abc"),
1621 (false, "abcx"),
1622 (false, "abc\nx"),
1623 (false, "abc$x"),
1624 ]);
1625 a("abc$|def$", &[
1626 (false, ""),
1627 (true, "abc"),
1628 (false, "abcx"),
1629 (false, "abc\n"),
1630 (false, "abc$"),
1631 (true, "def"),
1632 (false, "defx"),
1633 (false, "def\n"),
1634 (false, "def$"),
1635 (true, "abcdef"),
1636 ]);
1637
1638 a("\\|", &[
1639 (true, "|"),
1640 (false, ""),
1641 (false, "a"),
1642 ]);
1643 a("\\*", &[
1644 (true, "*"),
1645 (false, ""),
1646 (false, "a"),
1647 ]);
1648 a("\\+", &[
1649 (true, "+"),
1650 (false, ""),
1651 (false, "a"),
1652 ]);
1653 a("\\?", &[
1654 (true, "?"),
1655 (false, ""),
1656 (false, "a"),
1657 ]);
1658 a("\\.", &[
1659 (true, "."),
1660 (false, ""),
1661 (false, "a"),
1662 ]);
1663 a("\\^", &[
1664 (true, "^"),
1665 (false, ""),
1666 (false, "a"),
1667 ]);
1668 a("\\$", &[
1669 (true, "$"),
1670 (false, ""),
1671 (false, "a"),
1672 ]);
1673 a("\\\\", &[
1674 (true, "\\"),
1675 (false, ""),
1676 (false, "a"),
1677 ]);
1678 a("\\[", &[
1679 (true, "["),
1680 (false, ""),
1681 (false, "a"),
1682 ]);
1683 a("\\]", &[
1684 (true, "]"),
1685 (false, ""),
1686 (false, "a"),
1687 ]);
1688 a("\\-", &[
1689 (true, "-"),
1690 (false, ""),
1691 (false, "a"),
1692 ]);
1693 f("\\");
1694
1695 a("[a]", &[
1696 (true, "a"),
1697 (false, "b"),
1698 ]);
1699 a("[abc]", &[
1700 (true, "a"),
1701 (true, "b"),
1702 (true, "c"),
1703 (false, "d"),
1704 ]);
1705 a("[a-c]", &[
1706 (true, "a"),
1707 (true, "b"),
1708 (true, "c"),
1709 (false, "d"),
1710 ]);
1711 a("[xa-c]", &[
1712 (true, "a"),
1713 (true, "b"),
1714 (true, "c"),
1715 (true, "x"),
1716 (false, "d"),
1717 ]);
1718 a("[a-cxyz]", &[
1719 (true, "a"),
1720 (true, "b"),
1721 (true, "c"),
1722 (true, "x"),
1723 (false, "d"),
1724 ]);
1725 a("[a-c]x", &[
1726 (false, "a"),
1727 (false, "b"),
1728 (false, "c"),
1729 (false, "x"),
1730 (true, "ax"),
1731 (true, "bx"),
1732 (true, "cx"),
1733 (false, "d"),
1734 (false, "dx"),
1735 ]);
1736 a("[a-cxy]", &[
1737 (true, "a"),
1738 (true, "b"),
1739 (true, "c"),
1740 (true, "x"),
1741 (true, "y"),
1742 (false, "d"),
1743 ]);
1744 a("[a-c]xy", &[
1745 (false, "a"),
1746 (false, "b"),
1747 (false, "c"),
1748 (false, "x"),
1749 (false, "ax"),
1750 (false, "bx"),
1751 (false, "cx"),
1752 (true, "axy"),
1753 (true, "bxy"),
1754 (true, "cxy"),
1755 (false, "d"),
1756 ]);
1757 a("[a-cxyz]", &[
1758 (true, "a"),
1759 (true, "b"),
1760 (true, "c"),
1761 (true, "x"),
1762 (true, "y"),
1763 (true, "z"),
1764 (false, "d"),
1765 ]);
1766 a("[a-c]xyz", &[
1767 (false, "a"),
1768 (false, "b"),
1769 (false, "c"),
1770 (false, "x"),
1771 (false, "ax"),
1772 (false, "bx"),
1773 (false, "cx"),
1774 (false, "axy"),
1775 (false, "bxy"),
1776 (false, "cxy"),
1777 (true, "axyz"),
1778 (true, "bxyz"),
1779 (true, "cxyz"),
1780 (false, "d"),
1781 ]);
1782 a("xyz[a-c]", &[
1783 (false, "a"),
1784 (false, "b"),
1785 (false, "c"),
1786 (false, "x"),
1787 (false, "xa"),
1788 (false, "xb"),
1789 (false, "xc"),
1790 (false, "xya"),
1791 (false, "xyb"),
1792 (false, "xyc"),
1793 (true, "xyza"),
1794 (true, "xyzb"),
1795 (true, "xyzc"),
1796 (false, "d"),
1797 ]);
1798 a("[xyza-c]", &[
1799 (true, "a"),
1800 (true, "b"),
1801 (true, "c"),
1802 (true, "x"),
1803 (true, "y"),
1804 (true, "z"),
1805 (false, "d"),
1806 ]);
1807 a("[xya-cyz]", &[
1808 (true, "a"),
1809 (true, "b"),
1810 (true, "c"),
1811 (true, "x"),
1812 (true, "y"),
1813 (true, "z"),
1814 (false, "d"),
1815 ]);
1816 a("[x-za-c]", &[
1817 (true, "a"),
1818 (true, "b"),
1819 (true, "c"),
1820 (true, "x"),
1821 (true, "y"),
1822 (true, "z"),
1823 (false, "d"),
1824 ]);
1825 a("[x-zmna-c]", &[
1826 (true, "a"),
1827 (true, "b"),
1828 (true, "c"),
1829 (true, "x"),
1830 (true, "y"),
1831 (true, "z"),
1832 (true, "m"),
1833 (true, "n"),
1834 (false, "d"),
1835 ]);
1836 a("[-]", &[
1837 (true, "-"),
1838 (false, "d"),
1839 ]);
1840 a("[a-]", &[
1841 (true, "-"),
1842 (true, "a"),
1843 (false, "d"),
1844 ]);
1845 a("[-b]", &[
1846 (true, "-"),
1847 (true, "b"),
1848 (false, "d"),
1849 ]);
1850 a("[-bd-g]", &[
1851 (false, "a"),
1852 (true, "-"),
1853 (true, "b"),
1854 (true, "d"),
1855 (true, "f"),
1856 ]);
1857 a("[bd-g-]", &[
1858 (false, "a"),
1859 (true, "-"),
1860 (true, "b"),
1861 (true, "d"),
1862 (true, "f"),
1863 ]);
1864 // Backwards ranges.
1865 a("[9-0]", &[
1866 (false, "a"),
1867 (false, "-"),
1868 (true, "9"),
1869 (true, "0"),
1870 (true, "5"),
1871 ]);
1872
1873 a("[^a]", &[
1874 (false, "a"),
1875 (true, "b"),
1876 ]);
1877 a("[^abc]", &[
1878 (false, "a"),
1879 (false, "b"),
1880 (false, "c"),
1881 (true, "d"),
1882 ]);
1883 a("[^a-c]", &[
1884 (false, "a"),
1885 (false, "b"),
1886 (false, "c"),
1887 (true, "d"),
1888 ]);
1889 a("[^xa-c]", &[
1890 (false, "a"),
1891 (false, "b"),
1892 (false, "c"),
1893 (false, "x"),
1894 (true, "d"),
1895 ]);
1896 a("[^a-cxyz]", &[
1897 (false, "a"),
1898 (false, "b"),
1899 (false, "c"),
1900 (false, "x"),
1901 (true, "d"),
1902 ]);
1903 a("[^a-c]x", &[
1904 (false, "a"),
1905 (false, "b"),
1906 (false, "c"),
1907 (false, "x"),
1908 (false, "ax"),
1909 (false, "bx"),
1910 (false, "cx"),
1911 (false, "d"),
1912 (true, "dx"),
1913 ]);
1914 a("[^a-cxy]", &[
1915 (false, "a"),
1916 (false, "b"),
1917 (false, "c"),
1918 (false, "x"),
1919 (false, "y"),
1920 (true, "d"),
1921 ]);
1922 a("[^a-c]xy", &[
1923 (false, "a"),
1924 (false, "b"),
1925 (false, "c"),
1926 (false, "x"),
1927 (false, "ax"),
1928 (false, "bx"),
1929 (false, "cx"),
1930 (false, "axy"),
1931 (false, "bxy"),
1932 (false, "cxy"),
1933 (true, "dxy"),
1934 (false, "d"),
1935 ]);
1936 a("[^a-cxyz]", &[
1937 (false, "a"),
1938 (false, "b"),
1939 (false, "c"),
1940 (false, "x"),
1941 (false, "y"),
1942 (false, "z"),
1943 (true, "d"),
1944 ]);
1945 a("[^a-c]xyz", &[
1946 (false, "a"),
1947 (false, "b"),
1948 (false, "c"),
1949 (false, "x"),
1950 (false, "ax"),
1951 (false, "bx"),
1952 (false, "cx"),
1953 (false, "axy"),
1954 (false, "bxy"),
1955 (false, "cxy"),
1956 (false, "axyz"),
1957 (false, "bxyz"),
1958 (false, "cxyz"),
1959 (true, "dxyz"),
1960 (false, "d"),
1961 ]);
1962 a("xyz[^a-c]", &[
1963 (false, "a"),
1964 (false, "b"),
1965 (false, "c"),
1966 (false, "x"),
1967 (false, "xa"),
1968 (false, "xb"),
1969 (false, "xc"),
1970 (false, "xya"),
1971 (false, "xyb"),
1972 (false, "xyc"),
1973 (false, "xyza"),
1974 (false, "xyzb"),
1975 (false, "xyzc"),
1976 (true, "xyzd"),
1977 (false, "d"),
1978 ]);
1979 a("[^xyza-c]", &[
1980 (false, "a"),
1981 (false, "b"),
1982 (false, "c"),
1983 (false, "x"),
1984 (false, "y"),
1985 (false, "z"),
1986 (true, "d"),
1987 ]);
1988 a("[^xya-cyz]", &[
1989 (false, "a"),
1990 (false, "b"),
1991 (false, "c"),
1992 (false, "x"),
1993 (false, "y"),
1994 (false, "z"),
1995 (true, "d"),
1996 ]);
1997 a("[^x-za-c]", &[
1998 (false, "a"),
1999 (false, "b"),
2000 (false, "c"),
2001 (false, "x"),
2002 (false, "y"),
2003 (false, "z"),
2004 (true, "d"),
2005 ]);
2006 a("[^x-zmna-c]", &[
2007 (false, "a"),
2008 (false, "b"),
2009 (false, "c"),
2010 (false, "x"),
2011 (false, "y"),
2012 (false, "z"),
2013 (false, "m"),
2014 (false, "n"),
2015 (true, "d"),
2016 ]);
2017 a("[^-]", &[
2018 (false, "-"),
2019 (true, "d"),
2020 ]);
2021 a("[^a-]", &[
2022 (false, "-"),
2023 (false, "a"),
2024 (true, "d"),
2025 ]);
2026 a("[^-b]", &[
2027 (false, "-"),
2028 (false, "b"),
2029 (true, "d"),
2030 ]);
2031 a("[^-bd-g]", &[
2032 (true, "a"),
2033 (false, "-"),
2034 (false, "b"),
2035 (false, "d"),
2036 (false, "f"),
2037 ]);
2038 a("[^bd-g-]", &[
2039 (true, "a"),
2040 (false, "-"),
2041 (false, "b"),
2042 (false, "d"),
2043 (false, "f"),
2044 ]);
2045
2046 a("[a|b]", &[
2047 (true, "a"),
2048 (true, "|"),
2049 (false, "c"),
2050 ]);
2051 a("[a\\|b]", &[
2052 (true, "a"),
2053 (true, "|"),
2054 (true, "\\"),
2055 (false, "c"),
2056 ]);
2057 a("[a(b]", &[
2058 (true, "a"),
2059 (true, "("),
2060 (false, "c"),
2061 ]);
2062 a("[a)b]", &[
2063 (true, "a"),
2064 (true, ")"),
2065 (false, "c"),
2066 ]);
2067 a("[a^b]", &[
2068 (true, "a"),
2069 (true, "^"),
2070 (false, "c"),
2071 ]);
2072
2073 f("[]");
2074 f("[^]");
2075 a("[^]]", &[
2076 (true, "a"),
2077 (false, "]"),
2078 (true, "^"),
2079 ]);
2080 a("[]]", &[
2081 (false, "a"),
2082 (true, "]"),
2083 ]);
2084 // Matches [ or ].
2085 a("[][]", &[
2086 (false, "a"),
2087 (true, "["),
2088 (true, "]"),
2089 ]);
2090 // Matches anything but [ or ].
2091 a("[^][]", &[
2092 (true, "a"),
2093 (false, "["),
2094 (false, "]"),
2095 ]);
2096 // Anything but ^.
2097 a("[^^]", &[
2098 (true, "a"),
2099 (false, "^"),
2100 (true, "c"),
2101 ]);
2102
2103 // Make sure - is recognized as an atom when it is not part of
2104 // a range. That is: a-z matches a or - or z, but it doesn't
2105 // match b (it's not a range).
2106 a("a-z", &[
2107 (true, "a-z"),
2108 (false, "a"),
2109 (false, "-"),
2110 (false, "z"),
2111 (false, "c"),
2112 ]);
2113
2114 a("a|-|z", &[
2115 (true, "a"),
2116 (true, "-"),
2117 (true, "z"),
2118 (false, "c"),
2119 ]);
2120
2121 Ok(())
2122 }
2123
2124 #[test]
2125 fn regex_set() -> Result<()> {
2126 let re = RegexSet::new(&[ "ab", "cd" ])?;
2127 assert!(re.is_match("ab"));
2128 assert!(re.is_match("cdef"));
2129 assert!(!re.is_match("xxx"));
2130
2131 // Try to make sure one re does not leak into another.
2132 let re = RegexSet::new(&[ "cd$", "^ab" ])?;
2133 assert!(re.is_match("abxx"));
2134 assert!(! re.is_match("xabxx"));
2135 assert!(re.is_match("xxcd"));
2136 assert!(! re.is_match("xxcdx"));
2137 assert!(re.is_match("abcdx"));
2138
2139 // Invalid regular expressions should be ignored.
2140 let re = RegexSet::new(&[ "[ab", "cd]", "x" ])?;
2141 assert!(!re.is_match("a"));
2142 assert!(!re.is_match("ab"));
2143 assert!(!re.is_match("[ab"));
2144 assert!(!re.is_match("c"));
2145 assert!(!re.is_match("cd"));
2146 assert!(!re.is_match("cd]"));
2147 assert!(re.is_match("x"));
2148
2149 // If all regular expressions are invalid, nothing should
2150 // match.
2151 let re = RegexSet::new(&[ "[ab", "cd]" ])?;
2152 assert!(!re.is_match("a"));
2153 assert!(!re.is_match("ab"));
2154 assert!(!re.is_match("[ab"));
2155 assert!(!re.is_match("c"));
2156 assert!(!re.is_match("cd"));
2157 assert!(!re.is_match("cd]"));
2158 assert!(!re.is_match("x"));
2159
2160 // If there are no regular expressions, everything should
2161 // match.
2162 let s: [&str; 0] = [];
2163 let re = RegexSet::new(&s)?;
2164 assert!(re.is_match("a"));
2165 assert!(re.is_match("ab"));
2166 assert!(re.is_match("[ab"));
2167 assert!(re.is_match("c"));
2168 assert!(re.is_match("cd"));
2169 assert!(re.is_match("cd]"));
2170 assert!(re.is_match("x"));
2171
2172 // The empty branch of the alternation should match everything.
2173 let re = RegexSet::new(&[ "ab|", "cd" ])?;
2174 assert!(re.is_match("a"));
2175 assert!(re.is_match("b"));
2176 assert!(re.is_match("x"));
2177 assert!(re.is_match("xyx"));
2178 assert!(re.is_match(""));
2179
2180 Ok(())
2181 }
2182
2183 #[test]
2184 fn regex_set_sequoia() -> Result<()> {
2185 let re = RegexSet::new(&["<[^>]+[@.]sequoia-pgp\\.org>$"])?;
2186 dbg!(&re);
2187 assert!(re.is_match("<justus@sequoia-pgp.org>"));
2188 assert!(!re.is_match("<justus@gnupg.org>"));
2189 Ok(())
2190 }
2191
2192 #[test]
2193 fn regex_set_sequoia_nodash() -> Result<()> {
2194 let re = RegexSet::new(&["<[^>]+[@.]sequoiapgp\\.org>$"])?;
2195 dbg!(&re);
2196 assert!(re.is_match("<justus@sequoiapgp.org>"));
2197 assert!(!re.is_match("<justus@gnupg.org>"));
2198 Ok(())
2199 }
2200}