ua_parser/
lib.rs

1#![deny(unsafe_code)]
2#![warn(missing_docs)]
3#![allow(clippy::empty_docs)]
4#![doc = include_str!("../README.md")]
5
6use regex::Captures;
7use serde::Deserialize;
8
9pub use regex_filtered::{BuildError, ParseError};
10
11mod resolvers;
12
13/// Error returned if the conversion of [`Regexes`] to [`Extractor`]
14/// fails.
15#[derive(Debug)]
16pub enum Error {
17    /// Compilation failed because one of the input regexes could not
18    /// be parsed or processed.
19    ParseError(ParseError),
20    /// Compilation failed because one of the prefilters could not be
21    /// built.
22    BuildError(BuildError),
23    /// A replacement template requires a group missing from the regex
24    MissingGroup(usize),
25}
26impl std::error::Error for Error {
27    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
28        match self {
29            Error::ParseError(p) => Some(p),
30            Error::BuildError(b) => Some(b),
31            Error::MissingGroup(_) => None,
32        }
33    }
34}
35impl std::fmt::Display for Error {
36    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
37        write!(f, "{self:?}")
38    }
39}
40impl From<ParseError> for Error {
41    fn from(value: ParseError) -> Self {
42        Self::ParseError(value)
43    }
44}
45impl From<BuildError> for Error {
46    fn from(value: BuildError) -> Self {
47        Self::BuildError(value)
48    }
49}
50
51/// Deserialization target for the parser descriptors, can be used
52/// with the relevant serde implementation to load from `regexes.yaml`
53/// or a conversion thereof.
54///
55/// Can then be compiled to a full [`Extractor`], or an individual
56/// list of parsers can be converted to the corresponding extractor.
57#[allow(missing_docs)]
58#[derive(Deserialize)]
59pub struct Regexes<'a> {
60    pub user_agent_parsers: Vec<user_agent::Parser<'a>>,
61    pub os_parsers: Vec<os::Parser<'a>>,
62    pub device_parsers: Vec<device::Parser<'a>>,
63}
64
65impl<'a> TryFrom<Regexes<'a>> for Extractor<'a> {
66    type Error = Error;
67    /// Compile parsed regexes to the corresponding full extractor.
68    ///
69    /// Prefer using individual builder / extractors if you don't need
70    /// all three domains extracted, as creating the individual
71    /// extractors does have a cost.
72    fn try_from(r: Regexes<'a>) -> Result<Self, Error> {
73        let ua = r
74            .user_agent_parsers
75            .into_iter()
76            .try_fold(user_agent::Builder::new(), |b, p| b.push(p))?
77            .build()?;
78        let os = r
79            .os_parsers
80            .into_iter()
81            .try_fold(os::Builder::new(), |b, p| b.push(p))?
82            .build()?;
83        let dev = r
84            .device_parsers
85            .into_iter()
86            .try_fold(device::Builder::new(), |b, p| b.push(p))?
87            .build()?;
88        Ok(Extractor { ua, os, dev })
89    }
90}
91
92/// Full extractor, simply delegates to the underlying individual
93/// extractors for the actual job.
94#[allow(missing_docs)]
95pub struct Extractor<'a> {
96    pub ua: user_agent::Extractor<'a>,
97    pub os: os::Extractor<'a>,
98    pub dev: device::Extractor<'a>,
99}
100impl<'a> Extractor<'a> {
101    /// Performs the extraction on every sub-extractor in sequence.
102    pub fn extract(
103        &'a self,
104        ua: &'a str,
105    ) -> (
106        Option<user_agent::ValueRef<'a>>,
107        Option<os::ValueRef<'a>>,
108        Option<device::ValueRef<'a>>,
109    ) {
110        (
111            self.ua.extract(ua),
112            self.os.extract(ua),
113            self.dev.extract(ua),
114        )
115    }
116}
117
118/// User agent module.
119///
120/// The user agent is the representation of the browser, in UAP lingo
121/// the user agent is composed of a *family* (the browser project) and
122/// a *version* of up to 4 segments.
123pub mod user_agent {
124    use serde::Deserialize;
125    use std::borrow::Cow;
126
127    use crate::resolvers::{FallbackResolver, FamilyResolver};
128    use regex_filtered::BuildError;
129
130    /// Individual user agent parser description. Plain data which can
131    /// be deserialized from serde-compatible storage, or created
132    /// literally (e.g. using a conversion or build script).
133    #[derive(Deserialize, Default)]
134    pub struct Parser<'a> {
135        /// Regex to check the UA against, if the regex matches the
136        /// parser applies.
137        pub regex: Cow<'a, str>,
138        /// If set, used for the [`ValueRef::family`] field. If it
139        /// contains a `$1` placeholder, that is replaced by the value
140        /// of the first match group.
141        ///
142        /// If unset, the first match group is used directly.
143        pub family_replacement: Option<Cow<'a, str>>,
144        /// If set, provides the value of the major version number,
145        /// otherwise the second match group is used.
146        pub v1_replacement: Option<Cow<'a, str>>,
147        /// If set, provides the value of the minor version number,
148        /// otherwise the third match group is used.
149        pub v2_replacement: Option<Cow<'a, str>>,
150        /// If set, provides the value of the patch version number,
151        /// otherwise the fourth match group is used.
152        pub v3_replacement: Option<Cow<'a, str>>,
153        /// If set, provides the value of the minor patch version
154        /// number, otherwise the fifth match group is used.
155        pub v4_replacement: Option<Cow<'a, str>>,
156    }
157
158    type Repl<'a> = (
159        FamilyResolver<'a>,
160        // Per spec, should actually be restrict-templated (same as
161        // family but for indexes 2-5 instead of 1).
162        FallbackResolver<'a>,
163        FallbackResolver<'a>,
164        FallbackResolver<'a>,
165        FallbackResolver<'a>,
166    );
167
168    /// Extractor builder, used to `push` parsers into before building
169    /// the extractor.
170    #[derive(Default)]
171    pub struct Builder<'a> {
172        builder: regex_filtered::Builder,
173        repl: Vec<Repl<'a>>,
174    }
175    impl<'a> Builder<'a> {
176        /// Initialise an empty builder.
177        pub fn new() -> Self {
178            Self {
179                builder: regex_filtered::Builder::new_atom_len(3),
180                repl: Vec::new(),
181            }
182        }
183
184        /// Build the extractor, may be called without pushing any
185        /// parser in though that is not very useful.
186        pub fn build(self) -> Result<Extractor<'a>, BuildError> {
187            let Self { builder, repl } = self;
188
189            Ok(Extractor {
190                matcher: builder.build()?,
191                repl,
192            })
193        }
194
195        /// Pushes a parser into the builder, may fail if the
196        /// [`Parser::regex`] is invalid.
197        pub fn push(mut self, ua: Parser<'a>) -> Result<Self, super::Error> {
198            self.builder = self.builder.push(&super::rewrite_regex(&ua.regex))?;
199            let r = &self.builder.regexes()[self.builder.regexes().len() - 1];
200            // number of groups in regex, excluding implicit entire match group
201            let groups = r.captures_len() - 1;
202            self.repl.push((
203                FamilyResolver::new(ua.family_replacement, groups)?,
204                FallbackResolver::new(ua.v1_replacement, groups, 2),
205                FallbackResolver::new(ua.v2_replacement, groups, 3),
206                FallbackResolver::new(ua.v3_replacement, groups, 4),
207                FallbackResolver::new(ua.v4_replacement, groups, 5),
208            ));
209            Ok(self)
210        }
211
212        /// Bulk loading of parsers into the builder.
213        pub fn push_all<I>(self, ua: I) -> Result<Self, super::Error>
214        where
215            I: IntoIterator<Item = Parser<'a>>,
216        {
217            ua.into_iter().try_fold(self, |s, p| s.push(p))
218        }
219    }
220
221    /// User Agent extractor.
222    pub struct Extractor<'a> {
223        matcher: regex_filtered::Regexes,
224        repl: Vec<Repl<'a>>,
225    }
226    impl<'a> Extractor<'a> {
227        /// Tries the loaded [`Parser`], upon finding the first
228        /// matching [`Parser`] performs data extraction following its
229        /// replacement directives and returns the result.
230        ///
231        /// Returns [`None`] if:
232        ///
233        /// - no matching parser was found
234        /// - the match does not have any matching groups *and*
235        ///   [`Parser::family_replacement`] is unset
236        /// - [`Parser::family_replacement`] has a substitution
237        ///   but there is no group in the regex
238        pub fn extract(&'a self, ua: &'a str) -> Option<ValueRef<'a>> {
239            let (idx, re) = self.matcher.matching(ua).next()?;
240            let c = re.captures(ua)?;
241
242            let (f, v1, v2, v3, v4) = &self.repl[idx];
243
244            Some(ValueRef {
245                family: f.resolve(&c),
246                major: v1.resolve(&c),
247                minor: v2.resolve(&c),
248                patch: v3.resolve(&c),
249                patch_minor: v4.resolve(&c),
250            })
251        }
252    }
253    /// Borrowed extracted value, borrows the content of the original
254    /// parser or the content of the user agent string, unless a
255    /// replacement is performed. (which is only possible for the )
256    #[derive(PartialEq, Eq, Default, Debug)]
257    pub struct ValueRef<'a> {
258        ///
259        pub family: Cow<'a, str>,
260        ///
261        pub major: Option<&'a str>,
262        ///
263        pub minor: Option<&'a str>,
264        ///
265        pub patch: Option<&'a str>,
266        ///
267        pub patch_minor: Option<&'a str>,
268    }
269
270    impl ValueRef<'_> {
271        /// Converts the borrowed result into an owned one,
272        /// independent from both the extractor and the user agent
273        /// string.
274        pub fn into_owned(self) -> Value {
275            Value {
276                family: self.family.into_owned(),
277                major: self.major.map(|c| c.to_string()),
278                minor: self.minor.map(|c| c.to_string()),
279                patch: self.patch.map(|c| c.to_string()),
280                patch_minor: self.patch_minor.map(|c| c.to_string()),
281            }
282        }
283    }
284
285    /// Owned extracted value, identical to [`ValueRef`] but not
286    /// linked to either the UA string or the extractor.
287    #[derive(PartialEq, Eq, Default, Debug)]
288    pub struct Value {
289        ///
290        pub family: String,
291        ///
292        pub major: Option<String>,
293        ///
294        pub minor: Option<String>,
295        ///
296        pub patch: Option<String>,
297        ///
298        pub patch_minor: Option<String>,
299    }
300}
301
302/// OS extraction module
303pub mod os {
304    use serde::Deserialize;
305    use std::borrow::Cow;
306
307    use regex_filtered::{BuildError, ParseError};
308
309    use crate::resolvers::{OptResolver, Resolver};
310
311    /// OS parser configuration
312    #[derive(Deserialize, Default)]
313    pub struct Parser<'a> {
314        ///
315        pub regex: Cow<'a, str>,
316        /// Replacement for the [`ValueRef::os`], must be set if there
317        /// is no capture in the [`Self::regex`], if there are
318        /// captures may be fully templated (with `$n` placeholders
319        /// for any group of the [`Self::regex`]).
320        pub os_replacement: Option<Cow<'a, str>>,
321        /// Replacement for the [`ValueRef::major`], may be fully templated.
322        pub os_v1_replacement: Option<Cow<'a, str>>,
323        /// Replacement for the [`ValueRef::minor`], may be fully templated.
324        pub os_v2_replacement: Option<Cow<'a, str>>,
325        /// Replacement for the [`ValueRef::patch`], may be fully templated.
326        pub os_v3_replacement: Option<Cow<'a, str>>,
327        /// Replacement for the [`ValueRef::patch_minor`], may be fully templated.
328        pub os_v4_replacement: Option<Cow<'a, str>>,
329    }
330    /// Builder for [`Extractor`].
331    #[derive(Default)]
332    pub struct Builder<'a> {
333        builder: regex_filtered::Builder,
334        repl: Vec<(
335            Resolver<'a>,
336            OptResolver<'a>,
337            OptResolver<'a>,
338            OptResolver<'a>,
339            OptResolver<'a>,
340        )>,
341    }
342    impl<'a> Builder<'a> {
343        ///
344        pub fn new() -> Self {
345            Self {
346                builder: regex_filtered::Builder::new_atom_len(3),
347                repl: Vec::new(),
348            }
349        }
350
351        /// Builds the [`Extractor`], may fail if building the
352        /// prefilter fails.
353        pub fn build(self) -> Result<Extractor<'a>, BuildError> {
354            let Self { builder, repl } = self;
355
356            Ok(Extractor {
357                matcher: builder.build()?,
358                repl,
359            })
360        }
361
362        /// Add a [`Parser`] configuration, fails if the regex can not
363        /// be parsed, or if [`Parser::os_replacement`] is missing and
364        /// the regex has no groups.
365        pub fn push(mut self, os: Parser<'a>) -> Result<Self, ParseError> {
366            self.builder = self.builder.push(&super::rewrite_regex(&os.regex))?;
367            let r = &self.builder.regexes()[self.builder.regexes().len() - 1];
368            // number of groups in regex, excluding implicit entire match group
369            let groups = r.captures_len() - 1;
370            self.repl.push((
371                Resolver::new(os.os_replacement, groups, 1),
372                OptResolver::new(os.os_v1_replacement, groups, 2),
373                OptResolver::new(os.os_v2_replacement, groups, 3),
374                OptResolver::new(os.os_v3_replacement, groups, 4),
375                OptResolver::new(os.os_v4_replacement, groups, 5),
376            ));
377            Ok(self)
378        }
379
380        /// Bulk loading of parsers into the builder.
381        pub fn push_all<I>(self, ua: I) -> Result<Self, ParseError>
382        where
383            I: IntoIterator<Item = Parser<'a>>,
384        {
385            ua.into_iter().try_fold(self, |s, p| s.push(p))
386        }
387    }
388
389    /// OS extractor structure
390    pub struct Extractor<'a> {
391        matcher: regex_filtered::Regexes,
392        repl: Vec<(
393            Resolver<'a>,
394            OptResolver<'a>,
395            OptResolver<'a>,
396            OptResolver<'a>,
397            OptResolver<'a>,
398        )>,
399    }
400    impl<'a> Extractor<'a> {
401        /// Matches & extracts the OS data for this user agent,
402        /// returns `None` if the UA string could not be matched.
403        pub fn extract(&'a self, ua: &'a str) -> Option<ValueRef<'a>> {
404            let (idx, re) = self.matcher.matching(ua).next()?;
405            let c = re.captures(ua)?;
406
407            let (o, v1, v2, v3, v4) = &self.repl[idx];
408
409            Some(ValueRef {
410                os: o.resolve(&c),
411                major: v1.resolve(&c),
412                minor: v2.resolve(&c),
413                patch: v3.resolve(&c),
414                patch_minor: v4.resolve(&c),
415            })
416        }
417    }
418
419    /// An OS extraction result.
420    #[derive(PartialEq, Eq, Default, Debug)]
421    pub struct ValueRef<'a> {
422        ///
423        pub os: Cow<'a, str>,
424        ///
425        pub major: Option<Cow<'a, str>>,
426        ///
427        pub minor: Option<Cow<'a, str>>,
428        ///
429        pub patch: Option<Cow<'a, str>>,
430        ///
431        pub patch_minor: Option<Cow<'a, str>>,
432    }
433
434    impl ValueRef<'_> {
435        /// Converts a [`ValueRef`] into a [`Value`] to avoid lifetime
436        /// concerns, may need to allocate and copy any data currently
437        /// borrowed from a [`Parser`] or user agent string.
438        pub fn into_owned(self) -> Value {
439            Value {
440                os: self.os.into_owned(),
441                major: self.major.map(|c| c.into_owned()),
442                minor: self.minor.map(|c| c.into_owned()),
443                patch: self.patch.map(|c| c.into_owned()),
444                patch_minor: self.patch_minor.map(|c| c.into_owned()),
445            }
446        }
447    }
448
449    /// Owned version of [`ValueRef`].
450    #[derive(PartialEq, Eq, Default, Debug)]
451    pub struct Value {
452        ///
453        pub os: String,
454        ///
455        pub major: Option<String>,
456        ///
457        pub minor: Option<String>,
458        ///
459        pub patch: Option<String>,
460        ///
461        pub patch_minor: Option<String>,
462    }
463}
464
465/// Extraction module for the device data of the user agent string.
466pub mod device {
467    use serde::Deserialize;
468    use std::borrow::Cow;
469
470    use regex_filtered::{BuildError, ParseError};
471
472    use crate::resolvers::{OptResolver, Resolver};
473
474    /// regex flags
475    #[derive(Deserialize, PartialEq, Eq)]
476    pub enum Flag {
477        /// Enables case-insensitive regex matching, deserializes from
478        /// the string `"i"`
479        #[serde(rename = "i")]
480        IgnoreCase,
481    }
482    /// Device parser description.
483    #[derive(Deserialize, Default)]
484    pub struct Parser<'a> {
485        /// Regex pattern to use for matching and data extraction.
486        pub regex: Cow<'a, str>,
487        /// Configuration flags for the regex, if any.
488        pub regex_flag: Option<Flag>,
489        /// Device replacement data, fully templated, must be present
490        /// *or* the regex must have at least one group, which will be
491        /// used instead.
492        pub device_replacement: Option<Cow<'a, str>>,
493        /// Brand replacement data, fully templated, optional, if
494        /// missing there is no fallback.
495        pub brand_replacement: Option<Cow<'a, str>>,
496        /// Model replacement data, fully templated, optional, if
497        /// missing will be replaced by the first group if the regex
498        /// has one.
499        pub model_replacement: Option<Cow<'a, str>>,
500    }
501
502    /// Extractor builder.
503    #[derive(Default)]
504    pub struct Builder<'a> {
505        builder: regex_filtered::Builder,
506        repl: Vec<(Resolver<'a>, OptResolver<'a>, OptResolver<'a>)>,
507    }
508    impl<'a> Builder<'a> {
509        /// Creates a builder in the default configurtion, which is
510        /// the only configuration.
511        pub fn new() -> Self {
512            Self {
513                builder: regex_filtered::Builder::new_atom_len(2),
514                repl: Vec::new(),
515            }
516        }
517
518        /// Builds an Extractor, may fail if compiling the prefilter fails.
519        pub fn build(self) -> Result<Extractor<'a>, BuildError> {
520            let Self { builder, repl } = self;
521
522            Ok(Extractor {
523                matcher: builder.build()?,
524                repl,
525            })
526        }
527
528        /// Add a parser to the set, may fail if parsing the regex
529        /// fails *or* if [`Parser::device_replacement`] is unset and
530        /// [`Parser::regex`] does not have at least one group, or a
531        /// templated [`Parser::device_replacement`] requests groups
532        /// which [`Parser::regex`] is missing.
533        pub fn push(mut self, device: Parser<'a>) -> Result<Self, ParseError> {
534            self.builder = self.builder.push_opt(
535                &super::rewrite_regex(&device.regex),
536                regex_filtered::Options::new()
537                    .case_insensitive(device.regex_flag == Some(Flag::IgnoreCase)),
538            )?;
539            let r = &self.builder.regexes()[self.builder.regexes().len() - 1];
540            // number of groups in regex, excluding implicit entire match group
541            let groups = r.captures_len() - 1;
542            self.repl.push((
543                Resolver::new(device.device_replacement, groups, 1),
544                OptResolver::new(device.brand_replacement, 0, 999),
545                OptResolver::new(device.model_replacement, groups, 1),
546            ));
547            Ok(self)
548        }
549
550        /// Bulk loading of parsers into the builder.
551        pub fn push_all<I>(self, ua: I) -> Result<Self, ParseError>
552        where
553            I: IntoIterator<Item = Parser<'a>>,
554        {
555            ua.into_iter().try_fold(self, |s, p| s.push(p))
556        }
557    }
558
559    /// Device extractor object.
560    pub struct Extractor<'a> {
561        matcher: regex_filtered::Regexes,
562        repl: Vec<(Resolver<'a>, OptResolver<'a>, OptResolver<'a>)>,
563    }
564    impl<'a> Extractor<'a> {
565        /// Perform data extraction from the user agent string,
566        /// returns `None` if no regex in the [`Extractor`] matches
567        /// the input.
568        pub fn extract(&'a self, ua: &'a str) -> Option<ValueRef<'a>> {
569            let (idx, re) = self.matcher.matching(ua).next()?;
570            let c = re.captures(ua)?;
571
572            let (d, v1, v2) = &self.repl[idx];
573
574            Some(ValueRef {
575                device: d.resolve(&c),
576                brand: v1.resolve(&c),
577                model: v2.resolve(&c),
578            })
579        }
580    }
581
582    /// Extracted device content, may borrow from one of the
583    /// [`Parser`] or from the user agent string.
584    #[derive(PartialEq, Eq, Default, Debug)]
585    pub struct ValueRef<'a> {
586        ///
587        pub device: Cow<'a, str>,
588        ///
589        pub brand: Option<Cow<'a, str>>,
590        ///
591        pub model: Option<Cow<'a, str>>,
592    }
593
594    impl ValueRef<'_> {
595        /// Converts [`Self`] to an owned [`Value`] getting rid of
596        /// borrowing concerns, may need to allocate and copy if any
597        /// of the attributes actually borrows from a [`Parser`] or
598        /// the user agent string.
599        pub fn into_owned(self) -> Value {
600            Value {
601                device: self.device.into_owned(),
602                brand: self.brand.map(|c| c.into_owned()),
603                model: self.model.map(|c| c.into_owned()),
604            }
605        }
606    }
607
608    /// Owned version of [`ValueRef`].
609    #[derive(PartialEq, Eq, Default, Debug)]
610    pub struct Value {
611        ///
612        pub device: String,
613        ///
614        pub brand: Option<String>,
615        ///
616        pub model: Option<String>,
617    }
618}
619
620/// Rewrites a regex's character classes to ascii and bounded
621/// repetitions to unbounded, the second to reduce regex memory
622/// requirements, and the first for both that and to better match the
623/// (inferred) semantics intended for ua-parser.
624fn rewrite_regex(re: &str) -> std::borrow::Cow<'_, str> {
625    let mut from = 0;
626    let mut out = String::new();
627
628    let mut it = re.char_indices();
629    let mut escape = false;
630    let mut inclass = 0;
631    'main: while let Some((idx, c)) = it.next() {
632        match c {
633            '\\' if !escape => {
634                escape = true;
635                continue;
636            }
637            '{' if !escape && inclass == 0 => {
638                if idx == 0 {
639                    // we're repeating nothing, this regex is broken, bail
640                    return re.into();
641                }
642                // we don't need to loop, we only want to replace {0, ...} and {1, ...}
643                let Some((_, start)) = it.next() else {
644                    continue;
645                };
646                if start != '0' && start != '1' {
647                    continue;
648                }
649
650                if !matches!(it.next(), Some((_, ','))) {
651                    continue;
652                }
653
654                let mut digits = 0;
655                for (ri, rc) in it.by_ref() {
656                    match rc {
657                        '}' if digits > 2 => {
658                            // here idx is the index of the start of
659                            // the range and ri is the end of range
660                            out.push_str(&re[from..idx]);
661                            from = ri + 1;
662                            out.push_str(if start == '0' { "*" } else { "+" });
663                            break;
664                        }
665                        c if c.is_ascii_digit() => {
666                            digits += 1;
667                        }
668                        _ => continue 'main,
669                    }
670                }
671            }
672            '[' if !escape => {
673                inclass += 1;
674            }
675            ']' if !escape => {
676                inclass -= 1;
677            }
678            // no need for special cases because regex allows nesting
679            // character classes, whereas js or python don't \o/
680            'd' if escape => {
681                // idx is d so idx-1 is \\, and we want to exclude it
682                out.push_str(&re[from..idx - 1]);
683                from = idx + 1;
684                out.push_str("[0-9]");
685            }
686            'D' if escape => {
687                out.push_str(&re[from..idx - 1]);
688                from = idx + 1;
689                out.push_str("[^0-9]");
690            }
691            'w' if escape => {
692                out.push_str(&re[from..idx - 1]);
693                from = idx + 1;
694                out.push_str("[A-Za-z0-9_]");
695            }
696            'W' if escape => {
697                out.push_str(&re[from..idx - 1]);
698                from = idx + 1;
699                out.push_str("[^A-Za-z0-9_]");
700            }
701            _ => (),
702        }
703        escape = false;
704    }
705
706    if from == 0 {
707        re.into()
708    } else {
709        out.push_str(&re[from..]);
710        out.into()
711    }
712}
713
714#[cfg(test)]
715mod test_rewrite_regex {
716    use super::rewrite_regex as rewrite;
717
718    #[test]
719    fn ignore_small_repetition() {
720        assert_eq!(rewrite(".{0,2}x"), ".{0,2}x");
721        assert_eq!(rewrite(".{0,}"), ".{0,}");
722        assert_eq!(rewrite(".{1,}"), ".{1,}");
723    }
724
725    #[test]
726    fn rewrite_large_repetitions() {
727        assert_eq!(rewrite(".{0,20}x"), ".{0,20}x");
728        assert_eq!(rewrite("(.{0,100})"), "(.*)");
729        assert_eq!(rewrite("(.{1,50})"), "(.{1,50})");
730        assert_eq!(rewrite(".{1,300}x"), ".+x");
731    }
732
733    #[test]
734    fn rewrite_all_repetitions() {
735        assert_eq!(
736            rewrite("; {0,2}(T-(?:07|[^0][0-9])[^;/]{1,100}?)(?: Build|\\) AppleWebKit)"),
737            "; {0,2}(T-(?:07|[^0][0-9])[^;/]+?)(?: Build|\\) AppleWebKit)",
738        );
739        assert_eq!(
740            rewrite("; {0,2}(SH\\-?[0-9][0-9][^;/]{1,100}|SBM[0-9][^;/]{1,100}?)(?: Build|\\) AppleWebKit)"),
741            "; {0,2}(SH\\-?[0-9][0-9][^;/]+|SBM[0-9][^;/]+?)(?: Build|\\) AppleWebKit)",
742        )
743    }
744
745    #[test]
746    fn ignore_non_repetitions() {
747        assert_eq!(
748            rewrite(r"\{1,2}"),
749            r"\{1,2}",
750            "if the opening brace is escaped it's not a repetition"
751        );
752        assert_eq!(
753            rewrite("[.{1,100}]"),
754            "[.{1,100}]",
755            "inside a set it's not a repetition"
756        );
757    }
758
759    #[test]
760    fn rewrite_classes() {
761        assert_eq!(rewrite(r"\dx"), "[0-9]x");
762        assert_eq!(rewrite(r"\wx"), "[A-Za-z0-9_]x");
763        assert_eq!(rewrite(r"[\d]x"), r"[[0-9]]x");
764    }
765}