posix_regex/
compile.rs

1//! The regex "compiler", which parses the regex itself.
2//! Produces a matcher ready to match input.
3
4#[cfg(feature = "no_std")]
5use alloc::collections::BTreeMap as HashMap;
6
7#[cfg(not(feature = "no_std"))]
8use std::collections::HashMap;
9
10use alloc::{borrow::Cow, vec, vec::Vec};
11use core::fmt;
12
13use crate::{ctype, tree::*, PosixRegex};
14
15/// Repetition bounds, for example + is (1, None), and ? is (0, Some(1))
16#[derive(Clone, Copy, PartialEq, Eq)]
17pub struct Range(pub u32, pub Option<u32>);
18impl fmt::Debug for Range {
19    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
20        match self {
21            Range(start, None) => write!(f, "{}..", start),
22            Range(start, Some(end)) => write!(f, "{}..{}", start, end),
23        }
24    }
25}
26
27/// An item inside square brackets, like `[abc]` or `[[:digit:]]`
28#[derive(Clone, PartialEq, Eq)]
29pub enum Collation {
30    Char(u8),
31    Class(fn(u8) -> bool),
32}
33impl Collation {
34    /// Compare this collation to a character
35    pub fn matches(&self, other: u8, insensitive: bool) -> bool {
36        match *self {
37            Collation::Char(me) if insensitive => {
38                if ctype::is_alpha(me) && ctype::is_alpha(other) {
39                    me | 32 == other | 32
40                } else {
41                    me == other
42                }
43            }
44            Collation::Char(me) => me == other,
45            Collation::Class(f) => f(other),
46        }
47    }
48}
49impl fmt::Debug for Collation {
50    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
51        match *self {
52            Collation::Char(c) => write!(f, "{:?}", c as char),
53            Collation::Class(c) => write!(f, "{:p}", c),
54        }
55    }
56}
57
58/// A single "compiled" token, such as a `.` or a character literal
59#[derive(Clone, PartialEq, Eq)]
60pub enum Token {
61    /// Internal token used to find matches that might be anywhere in the text
62    InternalStart,
63
64    Alternative,
65    Any,
66    BackRef(u32),
67    Char(u8),
68    End,
69    Group(usize),
70    OneOf {
71        invert: bool,
72        list: Vec<Collation>,
73    },
74    Root,
75    Start,
76    WordEnd,
77    WordStart,
78}
79impl fmt::Debug for Token {
80    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
81        match *self {
82            Token::InternalStart => write!(f, "<START>"),
83
84            Token::Alternative => write!(f, "Alternative"),
85            Token::Any => write!(f, "."),
86            Token::BackRef(id) => write!(f, "\\{}", id),
87            Token::Char(c) => write!(f, "{:?}", c as char),
88            Token::End => write!(f, "$"),
89            Token::Group(id) => write!(f, "Group({})", id),
90            Token::OneOf { invert, ref list } => write!(f, "{{invert: {}, {:?}}}", invert, list),
91            Token::Root => write!(f, "Root"),
92            Token::Start => write!(f, "^"),
93            Token::WordEnd => write!(f, ">"),
94            Token::WordStart => write!(f, "<"),
95        }
96    }
97}
98/// An error that occurred while compiling the regex
99#[derive(Clone, Debug, PartialEq, Eq)]
100pub enum Error {
101    EOF,
102    EmptyRepetition,
103    Expected(u8, Option<u8>),
104    IllegalRange,
105    IntegerOverflow,
106    InvalidBackRef(u32),
107    LeadingRepetition,
108    UnclosedRepetition,
109    UnexpectedToken(u8),
110    UnknownClass(Vec<u8>),
111    UnknownCollation,
112}
113
114/// A regex builder struct
115pub struct PosixRegexBuilder<'a> {
116    input: &'a [u8],
117    classes: HashMap<&'a [u8], fn(u8) -> bool>,
118    group_id: usize,
119    builder: TreeBuilder,
120    extended: bool,
121}
122impl<'a> PosixRegexBuilder<'a> {
123    /// Create a new instance that is ready to parse the regex `input`
124    pub fn new(input: &'a [u8]) -> Self {
125        Self {
126            input,
127            classes: HashMap::new(),
128            group_id: 1,
129            builder: TreeBuilder::default(),
130            extended: false,
131        }
132    }
133    /// Add a custom collation class, for use within square brackets (such as `[[:digit:]]`)
134    pub fn with_class(mut self, name: &'a [u8], callback: fn(u8) -> bool) -> Self {
135        self.classes.insert(name, callback);
136        self
137    }
138    /// Add all the default collation classes, like `[[:digit:]]` and `[[:alnum:]]`
139    pub fn with_default_classes(mut self) -> Self {
140        #[cfg(not(feature = "no_std"))]
141        self.classes.reserve(12);
142
143        self.classes.insert(b"alnum", ctype::is_alnum);
144        self.classes.insert(b"alpha", ctype::is_alpha);
145        self.classes.insert(b"blank", ctype::is_blank);
146        self.classes.insert(b"cntrl", ctype::is_cntrl);
147        self.classes.insert(b"digit", ctype::is_digit);
148        self.classes.insert(b"graph", ctype::is_graph);
149        self.classes.insert(b"lower", ctype::is_lower);
150        self.classes.insert(b"print", ctype::is_print);
151        self.classes.insert(b"punct", ctype::is_punct);
152        self.classes.insert(b"space", ctype::is_space);
153        self.classes.insert(b"upper", ctype::is_upper);
154        self.classes.insert(b"xdigit", ctype::is_xdigit);
155
156        self
157    }
158    /// Use POSIX extended regex
159    pub fn extended(mut self, extended: bool) -> Self {
160        self.extended = extended;
161        self
162    }
163    /// "Compile" this regex to a struct ready to match input
164    pub fn compile(self) -> Result<PosixRegex<'static>, Error> {
165        let tree = self.compile_tokens()?;
166        Ok(PosixRegex::new(Cow::Owned(tree)))
167    }
168    pub fn compile_tokens(mut self) -> Result<Tree, Error> {
169        self.builder.start_internal(Token::Root, Range(1, Some(1)));
170        self.parse()?;
171        self.builder.finish_internal();
172        Ok(self.builder.finish())
173    }
174
175    fn consume(&mut self, amount: usize) {
176        self.input = &self.input[amount..];
177    }
178    fn take_int(&mut self) -> Result<Option<u32>, Error> {
179        let mut out: Option<u32> = None;
180        while let Some(&c @ b'0'..=b'9') = self.input.first() {
181            self.consume(1);
182            out = Some(
183                out.unwrap_or(0)
184                    .checked_mul(10)
185                    .and_then(|out| out.checked_add((c - b'0') as u32))
186                    .ok_or(Error::IntegerOverflow)?,
187            );
188        }
189        Ok(out)
190    }
191    fn next(&mut self) -> Result<u8, Error> {
192        self.input
193            .first()
194            .map(|&c| {
195                self.consume(1);
196                c
197            })
198            .ok_or(Error::EOF)
199    }
200    fn expect(&mut self, c: u8) -> Result<(), Error> {
201        if self.input.first() != Some(&c) {
202            return Err(Error::Expected(c, self.input.first().cloned()));
203        }
204        self.consume(1);
205        Ok(())
206    }
207    fn parse_range(&mut self) -> Result<Range, Error> {
208        let mut range = Range(1, Some(1));
209        if let Some(&c) = self.input.first() {
210            let new = match c {
211                b'*' => Some((1, Range(0, None))),
212                b'\\' => match self.input.get(1) {
213                    Some(b'?') if !self.extended => Some((2, Range(0, Some(1)))),
214                    Some(b'+') if !self.extended => Some((2, Range(1, None))),
215                    Some(b'{') if !self.extended => {
216                        self.consume(2);
217                        let first = self.take_int()?.ok_or(Error::EmptyRepetition)?;
218                        let mut second = Some(first);
219                        if let Some(b',') = self.input.first() {
220                            self.consume(1);
221                            second = self.take_int()?;
222                        }
223                        if self.input.first() == Some(&b'}') {
224                            self.consume(1);
225                        } else if self.input.starts_with(br"\}") {
226                            self.consume(2);
227                        } else {
228                            return Err(Error::UnclosedRepetition);
229                        }
230                        if second.map(|second| first > second).unwrap_or(false) {
231                            return Err(Error::IllegalRange);
232                        }
233                        range = Range(first, second);
234                        None
235                    }
236                    _ => None,
237                },
238                b'?' if self.extended => Some((1, Range(0, Some(1)))),
239                b'+' if self.extended => Some((1, Range(1, None))),
240                b'{' if self.extended => {
241                    self.consume(1);
242                    let first = self.take_int()?.ok_or(Error::EmptyRepetition)?;
243                    let mut second = Some(first);
244                    if let Some(b',') = self.input.first() {
245                        self.consume(1);
246                        second = self.take_int()?;
247                    }
248                    if self.input.first() == Some(&b'}') {
249                        self.consume(1);
250                    } else if self.input.starts_with(br"\}") {
251                        self.consume(2);
252                    } else {
253                        return Err(Error::UnclosedRepetition);
254                    }
255                    if second.map(|second| first > second).unwrap_or(false) {
256                        return Err(Error::IllegalRange);
257                    }
258                    range = Range(first, second);
259                    None
260                }
261                _ => None,
262            };
263            if let Some((consume, new)) = new {
264                range = new;
265                self.consume(consume);
266            }
267        }
268        Ok(range)
269    }
270    fn parse(&mut self) -> Result<(), Error> {
271        self.builder
272            .start_internal(Token::Alternative, Range(1, Some(1)));
273        while let Ok(c) = self.next() {
274            let token = match c {
275                b'^' => Token::Start,
276                b'$' => Token::End,
277                b'.' => Token::Any,
278                b'[' => {
279                    let mut list = Vec::new();
280                    let invert = self.input.first() == Some(&b'^');
281
282                    if invert {
283                        self.consume(1);
284                    }
285
286                    loop {
287                        let mut c = self.next()?;
288
289                        let mut push = true;
290
291                        if c == b'[' {
292                            // TODO: Handle collation characters properly,
293                            // because currently idk what they are and only
294                            // have the behavior of `grep` to go on.
295                            match self.next()? {
296                                b'.' => {
297                                    c = self.next()?;
298                                    self.expect(b'.')?;
299                                    self.expect(b']')?;
300                                }
301                                b'=' => {
302                                    c = self.next()?;
303                                    self.expect(b'=')?;
304                                    self.expect(b']')?;
305                                }
306                                b':' => {
307                                    let end = self
308                                        .input
309                                        .iter()
310                                        .position(|&c| c == b':')
311                                        .ok_or(Error::EOF)?;
312                                    let key = &self.input[..end];
313                                    let class = *self
314                                        .classes
315                                        .get(key)
316                                        .ok_or_else(|| Error::UnknownClass(key.to_vec()))?;
317                                    self.consume(end + 1);
318                                    self.expect(b']')?;
319
320                                    list.push(Collation::Class(class));
321                                    push = false;
322                                }
323                                _ => return Err(Error::UnknownCollation),
324                            }
325                        }
326
327                        if push {
328                            list.push(Collation::Char(c));
329
330                            if self.input.first() == Some(&b'-') && self.input.get(1) != Some(&b']')
331                            {
332                                self.consume(1);
333                                let dest = self.next()?;
334                                for c in (c + 1)..=dest {
335                                    list.push(Collation::Char(c));
336                                }
337                            }
338                        }
339
340                        if self.input.first() == Some(&b']') {
341                            self.consume(1);
342                            break;
343                        }
344                    }
345
346                    Token::OneOf { invert, list }
347                }
348                b'\\'
349                    if self
350                        .input
351                        .first()
352                        .map(|&c| c.is_ascii_digit())
353                        .unwrap_or(false) =>
354                {
355                    let id = self.take_int()?.unwrap();
356                    if (id as usize) >= self.group_id {
357                        return Err(Error::InvalidBackRef(id));
358                    }
359                    Token::BackRef(id)
360                }
361                b'\\' => match self.next()? {
362                    b'(' if !self.extended => {
363                        let id = self.group_id;
364                        self.group_id += 1;
365                        let checkpoint = self.builder.checkpoint();
366                        self.parse()?;
367                        let range = self.parse_range()?;
368                        self.builder
369                            .start_internal_at(checkpoint, Token::Group(id), range);
370                        self.builder.finish_internal();
371                        continue;
372                    }
373                    b')' if !self.extended => break,
374                    b'|' if !self.extended => {
375                        self.builder.finish_internal();
376                        self.builder
377                            .start_internal(Token::Alternative, Range(1, Some(1)));
378                        continue;
379                    }
380                    b'<' => Token::WordStart,
381                    b'>' => Token::WordEnd,
382                    b'a' => Token::OneOf {
383                        invert: false,
384                        list: vec![Collation::Class(ctype::is_alnum)],
385                    },
386                    b'd' => Token::OneOf {
387                        invert: false,
388                        list: vec![Collation::Class(ctype::is_digit)],
389                    },
390                    b's' => Token::OneOf {
391                        invert: false,
392                        list: vec![Collation::Class(ctype::is_space)],
393                    },
394                    b'S' => Token::OneOf {
395                        invert: true,
396                        list: vec![Collation::Class(ctype::is_space)],
397                    },
398                    b'n' => Token::Char(b'\n'),
399                    b'r' => Token::Char(b'\r'),
400                    b't' => Token::Char(b'\t'),
401                    c => Token::Char(c),
402                },
403                b'(' if self.extended => {
404                    let id = self.group_id;
405                    self.group_id += 1;
406                    let checkpoint = self.builder.checkpoint();
407                    self.parse()?;
408                    let range = self.parse_range()?;
409                    self.builder
410                        .start_internal_at(checkpoint, Token::Group(id), range);
411                    self.builder.finish_internal();
412                    continue;
413                }
414                b')' if self.extended => break,
415                b'|' if self.extended => {
416                    self.builder.finish_internal();
417                    self.builder
418                        .start_internal(Token::Alternative, Range(1, Some(1)));
419                    continue;
420                }
421                c => Token::Char(c),
422            };
423            let range = self.parse_range()?;
424            self.builder.leaf(token, range);
425        }
426        self.builder.finish_internal();
427        Ok(())
428    }
429}
430
431#[cfg(test)]
432mod tests {
433    use super::*;
434
435    use alloc::{format, string::String};
436
437    fn compile(input: &[u8]) -> String {
438        format!(
439            "{:?}",
440            PosixRegexBuilder::new(input)
441                .with_default_classes()
442                .compile_tokens()
443                .expect("error compiling regex")
444        )
445    }
446
447    #[test]
448    fn basic() {
449        assert_eq!(
450            compile(b"abc"),
451            "\
452Root 1..1
453  Alternative 1..1
454    'a' 1..1
455    'b' 1..1
456    'c' 1..1
457"
458        );
459    }
460    #[test]
461    fn groups() {
462        assert_eq!(
463            compile(br"\(abc\|bcd\|cde\)"),
464            "\
465Root 1..1
466  Alternative 1..1
467    Group(1) 1..1
468      Alternative 1..1
469        'a' 1..1
470        'b' 1..1
471        'c' 1..1
472      Alternative 1..1
473        'b' 1..1
474        'c' 1..1
475        'd' 1..1
476      Alternative 1..1
477        'c' 1..1
478        'd' 1..1
479        'e' 1..1
480"
481        );
482        assert_eq!(
483            compile(br"\(abc\|\(bcd\|cde\)\)"),
484            "\
485Root 1..1
486  Alternative 1..1
487    Group(1) 1..1
488      Alternative 1..1
489        'a' 1..1
490        'b' 1..1
491        'c' 1..1
492      Alternative 1..1
493        Group(2) 1..1
494          Alternative 1..1
495            'b' 1..1
496            'c' 1..1
497            'd' 1..1
498          Alternative 1..1
499            'c' 1..1
500            'd' 1..1
501            'e' 1..1
502"
503        );
504    }
505    #[test]
506    fn words() {
507        assert_eq!(
508            compile(br"\<word\>"),
509            "\
510Root 1..1
511  Alternative 1..1
512    < 1..1
513    'w' 1..1
514    'o' 1..1
515    'r' 1..1
516    'd' 1..1
517    > 1..1
518"
519        );
520    }
521    #[test]
522    fn repetitions() {
523        assert_eq!(
524            compile(br"yeee*"),
525            "\
526Root 1..1
527  Alternative 1..1
528    'y' 1..1
529    'e' 1..1
530    'e' 1..1
531    'e' 0..
532"
533        );
534        assert_eq!(
535            compile(br"yee\?"),
536            "\
537Root 1..1
538  Alternative 1..1
539    'y' 1..1
540    'e' 1..1
541    'e' 0..1
542"
543        );
544        assert_eq!(
545            compile(br"yee\+"),
546            "\
547Root 1..1
548  Alternative 1..1
549    'y' 1..1
550    'e' 1..1
551    'e' 1..
552"
553        );
554        assert_eq!(
555            compile(br"ye\{2}"),
556            "\
557Root 1..1
558  Alternative 1..1
559    'y' 1..1
560    'e' 2..2
561"
562        );
563        assert_eq!(
564            compile(br"ye\{2,}"),
565            "\
566Root 1..1
567  Alternative 1..1
568    'y' 1..1
569    'e' 2..
570"
571        );
572        assert_eq!(
573            compile(br"ye\{2,3}"),
574            "\
575Root 1..1
576  Alternative 1..1
577    'y' 1..1
578    'e' 2..3
579"
580        );
581    }
582    #[test]
583    fn bracket() {
584        assert_eq!(
585            compile(b"[abc]"),
586            "\
587Root 1..1
588  Alternative 1..1
589    {invert: false, ['a', 'b', 'c']} 1..1
590"
591        );
592        assert_eq!(
593            compile(b"[^abc]"),
594            "\
595Root 1..1
596  Alternative 1..1
597    {invert: true, ['a', 'b', 'c']} 1..1
598"
599        );
600        assert_eq!(
601            compile(b"[]] [^]]"),
602            "\
603Root 1..1
604  Alternative 1..1
605    {invert: false, [']']} 1..1
606    ' ' 1..1
607    {invert: true, [']']} 1..1
608"
609        );
610        assert_eq!(
611            compile(b"[0-3] [a-c] [-1] [1-]"),
612            "\
613Root 1..1
614  Alternative 1..1
615    {invert: false, ['0', '1', '2', '3']} 1..1
616    ' ' 1..1
617    {invert: false, ['a', 'b', 'c']} 1..1
618    ' ' 1..1
619    {invert: false, ['-', '1']} 1..1
620    ' ' 1..1
621    {invert: false, ['1', '-']} 1..1
622"
623        );
624        assert_eq!(
625            compile(b"[[.-.]-/]"),
626            "\
627Root 1..1
628  Alternative 1..1
629    {invert: false, ['-', '.', '/']} 1..1
630"
631        );
632        assert_eq!(
633            compile(b"[[:digit:][:upper:]]"),
634            format!(
635                "\
636Root 1..1
637  Alternative 1..1
638    {{invert: false, [{:p}, {:p}]}} 1..1
639",
640                ctype::is_digit as fn(u8) -> bool,
641                ctype::is_upper as fn(u8) -> bool
642            )
643        );
644    }
645    #[test]
646    fn newline() {
647        assert_eq!(
648            compile(br"\r\n"),
649            "\
650Root 1..1
651  Alternative 1..1
652    '\\r' 1..1
653    '\\n' 1..1
654"
655        );
656    }
657    #[test]
658    fn backref() {
659        assert_eq!(
660            compile(br"\([abc]\)\1"),
661            "\
662Root 1..1
663  Alternative 1..1
664    Group(1) 1..1
665      Alternative 1..1
666        {invert: false, ['a', 'b', 'c']} 1..1
667    \\1 1..1
668"
669        )
670    }
671
672    fn compile_extended(input: &[u8]) -> String {
673        format!(
674            "{:?}",
675            PosixRegexBuilder::new(input)
676                .with_default_classes()
677                .extended(true)
678                .compile_tokens()
679                .expect("error compiling regex")
680        )
681    }
682
683    #[test]
684    fn basic_extended() {
685        assert_eq!(
686            compile_extended(b"abc"),
687            "\
688Root 1..1
689  Alternative 1..1
690    'a' 1..1
691    'b' 1..1
692    'c' 1..1
693"
694        );
695    }
696    #[test]
697    fn groups_extended() {
698        assert_eq!(
699            compile_extended(br"(abc|bcd|cde)"),
700            "\
701Root 1..1
702  Alternative 1..1
703    Group(1) 1..1
704      Alternative 1..1
705        'a' 1..1
706        'b' 1..1
707        'c' 1..1
708      Alternative 1..1
709        'b' 1..1
710        'c' 1..1
711        'd' 1..1
712      Alternative 1..1
713        'c' 1..1
714        'd' 1..1
715        'e' 1..1
716"
717        );
718        assert_eq!(
719            compile_extended(br"(abc|(bcd|cde))"),
720            "\
721Root 1..1
722  Alternative 1..1
723    Group(1) 1..1
724      Alternative 1..1
725        'a' 1..1
726        'b' 1..1
727        'c' 1..1
728      Alternative 1..1
729        Group(2) 1..1
730          Alternative 1..1
731            'b' 1..1
732            'c' 1..1
733            'd' 1..1
734          Alternative 1..1
735            'c' 1..1
736            'd' 1..1
737            'e' 1..1
738"
739        );
740    }
741    #[test]
742    fn words_extended() {
743        assert_eq!(
744            compile_extended(br"\<word\>"),
745            "\
746Root 1..1
747  Alternative 1..1
748    < 1..1
749    'w' 1..1
750    'o' 1..1
751    'r' 1..1
752    'd' 1..1
753    > 1..1
754"
755        );
756    }
757
758    #[test]
759    fn repetitions_extended() {
760        assert_eq!(
761            compile_extended(br"yeee*"),
762            "\
763Root 1..1
764  Alternative 1..1
765    'y' 1..1
766    'e' 1..1
767    'e' 1..1
768    'e' 0..
769"
770        );
771        assert_eq!(
772            compile_extended(br"yee?"),
773            "\
774Root 1..1
775  Alternative 1..1
776    'y' 1..1
777    'e' 1..1
778    'e' 0..1
779"
780        );
781        assert_eq!(
782            compile_extended(br"yee+"),
783            "\
784Root 1..1
785  Alternative 1..1
786    'y' 1..1
787    'e' 1..1
788    'e' 1..
789"
790        );
791        assert_eq!(
792            compile_extended(br"ye{2}"),
793            "\
794Root 1..1
795  Alternative 1..1
796    'y' 1..1
797    'e' 2..2
798"
799        );
800        assert_eq!(
801            compile_extended(br"ye{2,}"),
802            "\
803Root 1..1
804  Alternative 1..1
805    'y' 1..1
806    'e' 2..
807"
808        );
809        assert_eq!(
810            compile_extended(br"ye{2,3}"),
811            "\
812Root 1..1
813  Alternative 1..1
814    'y' 1..1
815    'e' 2..3
816"
817        );
818    }
819    #[test]
820    fn bracket_extended() {
821        assert_eq!(
822            compile_extended(b"[abc]"),
823            "\
824Root 1..1
825  Alternative 1..1
826    {invert: false, ['a', 'b', 'c']} 1..1
827"
828        );
829        assert_eq!(
830            compile_extended(b"[^abc]"),
831            "\
832Root 1..1
833  Alternative 1..1
834    {invert: true, ['a', 'b', 'c']} 1..1
835"
836        );
837        assert_eq!(
838            compile_extended(b"[]] [^]]"),
839            "\
840Root 1..1
841  Alternative 1..1
842    {invert: false, [']']} 1..1
843    ' ' 1..1
844    {invert: true, [']']} 1..1
845"
846        );
847        assert_eq!(
848            compile_extended(b"[0-3] [a-c] [-1] [1-]"),
849            "\
850Root 1..1
851  Alternative 1..1
852    {invert: false, ['0', '1', '2', '3']} 1..1
853    ' ' 1..1
854    {invert: false, ['a', 'b', 'c']} 1..1
855    ' ' 1..1
856    {invert: false, ['-', '1']} 1..1
857    ' ' 1..1
858    {invert: false, ['1', '-']} 1..1
859"
860        );
861        assert_eq!(
862            compile_extended(b"[[.-.]-/]"),
863            "\
864Root 1..1
865  Alternative 1..1
866    {invert: false, ['-', '.', '/']} 1..1
867"
868        );
869        assert_eq!(
870            compile_extended(b"[[:digit:][:upper:]]"),
871            format!(
872                "\
873Root 1..1
874  Alternative 1..1
875    {{invert: false, [{:p}, {:p}]}} 1..1
876",
877                ctype::is_digit as fn(u8) -> bool,
878                ctype::is_upper as fn(u8) -> bool
879            )
880        );
881    }
882    #[test]
883    fn newline_extended() {
884        assert_eq!(
885            compile_extended(br"\r\n"),
886            "\
887Root 1..1
888  Alternative 1..1
889    '\\r' 1..1
890    '\\n' 1..1
891"
892        );
893    }
894    #[test]
895    fn backref_extended() {
896        assert_eq!(
897            compile_extended(br"([abc])\1"),
898            "\
899Root 1..1
900  Alternative 1..1
901    Group(1) 1..1
902      Alternative 1..1
903        {invert: false, ['a', 'b', 'c']} 1..1
904    \\1 1..1
905"
906        )
907    }
908}