oxc_regular_expression 0.52.0

A collection of JavaScript tools written in Rust.
Documentation
mod reader_impl;
mod string_literal_parser;

pub use reader_impl::Reader;

#[cfg(test)]
mod test {
    use crate::parser::reader::Reader;

    #[test]
    fn should_fail() {
        for reader in [
            Reader::initialize(r#""Unterminated"#, true, true),
            Reader::initialize(r#""Unterminated"#, false, true),
            Reader::initialize("'Unterminated!", true, true),
            Reader::initialize("'Unterminated!", false, true),
        ] {
            assert!(reader.is_err());
        }
    }

    #[test]
    fn should_pass_basic() {
        for mut reader in [
            Reader::initialize("RegExp!", true, false).unwrap(),
            Reader::initialize("RegExp!", false, false).unwrap(),
            Reader::initialize(r#""RegExp!""#, true, true).unwrap(),
            Reader::initialize(r#""RegExp!""#, false, true).unwrap(),
            Reader::initialize("'RegExp!'", true, true).unwrap(),
            Reader::initialize("'RegExp!'", false, true).unwrap(),
        ] {
            assert_eq!(reader.peek(), Some('R' as u32));
            assert_eq!(reader.peek2(), Some('e' as u32));
            assert!(reader.eat('R'));
            assert!(!reader.eat('R'));
            assert!(reader.eat('e'));
            assert!(reader.eat('g'));
            assert!(reader.eat('E'));
            assert!(!reader.eat3('E', 'x', 'p'));
            assert!(reader.eat2('x', 'p'));

            let checkpoint = reader.checkpoint();
            assert_eq!(reader.peek(), Some('!' as u32));
            reader.advance();
            reader.advance();

            reader.rewind(checkpoint);
            assert_eq!(reader.peek(), Some('!' as u32));

            assert!(reader.eat('!'));
            assert_eq!(reader.peek(), None);
        }
    }

    #[test]
    fn should_pass_unicode() {
        let source_text = "𠮷野家は👈🏻あっち";

        let mut unicode_reader = Reader::initialize(source_text, true, false).unwrap();
        assert!(unicode_reader.eat('𠮷')); // Can eat
        assert!(unicode_reader.eat2('', ''));
        let checkpoint = unicode_reader.checkpoint();
        assert!(unicode_reader.eat(''));
        unicode_reader.advance(); // Emoji
        unicode_reader.advance(); // Skin tone
        assert!(unicode_reader.eat(''));
        assert_eq!(unicode_reader.peek(), Some('' as u32));
        assert_eq!(unicode_reader.peek2(), Some('' as u32));
        unicode_reader.rewind(checkpoint);
        assert!(unicode_reader.eat(''));

        let mut legacy_reader = Reader::initialize(source_text, false, false).unwrap();
        assert!(!legacy_reader.eat('𠮷')); // Can not eat
        legacy_reader.advance();
        assert!(!legacy_reader.eat('𠮷')); // Also can not
        legacy_reader.advance();
        assert!(legacy_reader.eat(''));
        assert!(legacy_reader.eat(''));
        let checkpoint = unicode_reader.checkpoint();
        assert!(legacy_reader.eat(''));
        legacy_reader.advance(); // Emoji(High surrogate)
        legacy_reader.advance(); // Emoji(Low surrogate)
        legacy_reader.advance(); // Skin tone(High surrogate)
        legacy_reader.advance(); // Skin tone(Low surrogate)
        assert_eq!(legacy_reader.peek(), Some('' as u32));
        assert_eq!(legacy_reader.peek2(), Some('' as u32));
        assert!(legacy_reader.eat3('', '', ''));
        legacy_reader.rewind(checkpoint);
        assert!(legacy_reader.eat(''));
    }

    #[test]
    fn span_position() {
        let source_text1 = r"^ Catch😎 @@ symbols🇺🇳 $";
        let reader1 = Reader::initialize(source_text1, true, false).unwrap();

        let source_text2 = format!("\"{source_text1}\"");
        let reader2 = Reader::initialize(&source_text2, true, true).unwrap();

        for mut reader in [reader1, reader2] {
            while reader.peek() != Some('^' as u32) {
                reader.advance();
            }
            let s1 = reader.offset();
            assert!(reader.eat('^'));
            let e1 = reader.offset();
            assert_eq!(&reader.atom(s1, e1), "^");

            while reader.peek() != Some('@' as u32) {
                reader.advance();
            }
            let s2 = reader.offset();
            assert!(reader.eat('@'));
            assert!(reader.eat('@'));
            let e2 = reader.offset();
            assert_eq!(&reader.atom(s2, e2), "@@");

            while reader.peek() != Some('$' as u32) {
                reader.advance();
            }
            let s3 = reader.offset();
            assert!(reader.eat('$'));
            let e3 = reader.offset();

            assert_eq!(&reader.atom(s3, e3), "$");
        }
    }

    #[test]
    fn handle_escapes() {
        let mut reader1 = Reader::initialize("こんにちWorld2024", true, false).unwrap();
        let mut reader2 = Reader::initialize(
            r#""\u3053\u3093\u306B\u3061\u0057\u006F\u0072\u006C\u0064\u0032\u0030\u0032\u0034""#,
            false,
            true,
        )
        .unwrap();

        assert_eq!(reader1.eat(''), reader2.eat(''));
        assert_eq!(reader1.eat(''), reader2.eat(''));

        loop {
            match (reader1.peek(), reader2.peek()) {
                (None, None) => {
                    break; // All passed
                }
                (Some(cp1), Some(cp2)) if cp1 == cp2 => {
                    reader1.advance();
                    reader2.advance();
                    // println!("{} == {}: {:?}", cp1, cp2, char::try_from(cp1));
                }
                _ => {
                    panic!("Mismatched characters");
                }
            }
        }
    }
}