browser_tester 1.5.0

use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use std::fmt;
use std::iter::Peekable;
use std::str::Chars;

#[derive(Debug, Clone)]
pub(crate) struct Regex {
    ast: Expr,
    capture_count: usize,
    named_captures: HashMap<String, Vec<usize>>,
    named_capture_order: Vec<String>,
    ignore_case: bool,
    multi_line: bool,
    dot_matches_new_line: bool,
    unicode_mode: bool,
}

impl Regex {
    pub(crate) fn new(pattern: &str) -> Result<Self, RegexError> {
        RegexBuilder::new(pattern).build()
    }

    pub(crate) fn is_match(&self, input: &str) -> Result<bool, RegexError> {
        Ok(self.find(input)?.is_some())
    }

    pub(crate) fn captures(&self, input: &str) -> Result<Option<Captures>, RegexError> {
        self.captures_from_pos(input, 0)
    }

    pub(crate) fn captures_from_pos(
        &self,
        input: &str,
        start: usize,
    ) -> Result<Option<Captures>, RegexError> {
        let prepared = PreparedInput::new(input);
        if start > prepared.text_len_bytes() {
            return Ok(None);
        }
        let Some(start_char) = prepared.byte_to_char_index(start) else {
            return Err(RegexError::new(
                "search start is not a valid UTF-8 character boundary",
            ));
        };
        Ok(self
            .find_spans_from_char(&prepared, start_char)
            .map(|spans| self.spans_to_captures(&prepared, spans)))
    }

    pub(crate) fn find(&self, input: &str) -> Result<Option<Match>, RegexError> {
        let captures = self.captures(input)?;
        Ok(captures.and_then(|c| c.get(0).cloned()))
    }

    fn find_spans_from_char(
        &self,
        prepared: &PreparedInput<'_>,
        start_char: usize,
    ) -> Option<Vec<Option<(usize, usize)>>> {
        let opts = MatchOptions {
            ignore_case: self.ignore_case,
            multi_line: self.multi_line,
            dot_matches_new_line: self.dot_matches_new_line,
            unicode_mode: self.unicode_mode,
        };

        let max_start = prepared.text_len_chars();
        for candidate_start in start_char..=max_start {
            let mut caps = vec![None; self.capture_count + 1];
            let initial = MatchState {
                pos: candidate_start,
                captures: caps.split_off(0),
            };
            let states = self.ast.match_states(prepared, &initial, &opts);
            if states.is_empty() {
                continue;
            }
            let mut first = states[0].clone();
            first.captures[0] = Some((candidate_start, first.pos));
            return Some(first.captures);
        }
        None
    }

    fn spans_to_captures(
        &self,
        prepared: &PreparedInput<'_>,
        spans: Vec<Option<(usize, usize)>>,
    ) -> Captures {
        let mut groups = Vec::with_capacity(spans.len());
        for span in spans {
            if let Some((start_char, end_char)) = span {
                let start = prepared.char_to_byte_index(start_char);
                let end = prepared.char_to_byte_index(end_char);
                let text = prepared.slice_bytes_to_string(start, end);
                groups.push(Some(Match { start, end, text }));
            } else {
                groups.push(None);
            }
        }
        Captures {
            groups,
            named_captures: self.named_captures.clone(),
            named_capture_order: self.named_capture_order.clone(),
        }
    }
}

#[derive(Debug, Clone)]
pub(crate) struct RegexBuilder {
    pattern: String,
    case_insensitive: bool,
    multi_line: bool,
    dot_matches_new_line: bool,
    unicode_mode: bool,
    unicode_sets_mode: bool,
}

impl RegexBuilder {
    pub(crate) fn new(pattern: &str) -> Self {
        Self {
            pattern: pattern.to_string(),
            case_insensitive: false,
            multi_line: false,
            dot_matches_new_line: false,
            unicode_mode: false,
            unicode_sets_mode: false,
        }
    }

    pub(crate) fn case_insensitive(&mut self, enabled: bool) -> &mut Self {
        self.case_insensitive = enabled;
        self
    }

    pub(crate) fn multi_line(&mut self, enabled: bool) -> &mut Self {
        self.multi_line = enabled;
        self
    }

    pub(crate) fn dot_matches_new_line(&mut self, enabled: bool) -> &mut Self {
        self.dot_matches_new_line = enabled;
        self
    }

    pub(crate) fn unicode_mode(&mut self, enabled: bool) -> &mut Self {
        self.unicode_mode = enabled;
        self
    }

    pub(crate) fn unicode_sets_mode(&mut self, enabled: bool) -> &mut Self {
        self.unicode_sets_mode = enabled;
        self
    }

    pub(crate) fn build(&self) -> Result<Regex, RegexError> {
        let mut parser = Parser::new(&self.pattern, self.unicode_mode, self.unicode_sets_mode);
        let ast = parser.parse()?;
        Ok(Regex {
            ast,
            capture_count: parser.capture_count,
            named_captures: parser.named_captures,
            named_capture_order: parser.named_capture_order,
            ignore_case: self.case_insensitive,
            multi_line: self.multi_line,
            dot_matches_new_line: self.dot_matches_new_line,
            unicode_mode: self.unicode_mode,
        })
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct Captures {
    groups: Vec<Option<Match>>,
    named_captures: HashMap<String, Vec<usize>>,
    named_capture_order: Vec<String>,
}

impl Captures {
    pub(crate) fn len(&self) -> usize {
        self.groups.len()
    }

    pub(crate) fn get(&self, index: usize) -> Option<&Match> {
        self.groups.get(index).and_then(Option::as_ref)
    }

    pub(crate) fn get_named(&self, name: &str) -> Option<&Match> {
        let indices = self.named_captures.get(name)?;
        for index in indices {
            if let Some(found) = self.get(*index) {
                return Some(found);
            }
        }
        None
    }

    pub(crate) fn has_named_groups(&self) -> bool {
        !self.named_captures.is_empty()
    }

    pub(crate) fn named_group_names(&self) -> Vec<String> {
        self.named_capture_order.clone()
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct Match {
    start: usize,
    end: usize,
    text: String,
}

impl Match {
    pub(crate) fn as_str(&self) -> &str {
        self.text.as_str()
    }

    pub(crate) fn start(&self) -> usize {
        self.start
    }

    pub(crate) fn end(&self) -> usize {
        self.end
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct RegexError {
    message: String,
}

impl RegexError {
    fn new(message: impl Into<String>) -> Self {
        Self {
            message: message.into(),
        }
    }
}

impl fmt::Display for RegexError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "{}", self.message)
    }
}

impl std::error::Error for RegexError {}

pub(crate) fn escape(value: &str) -> Cow<'_, str> {
    let mut out = String::with_capacity(value.len());
    let mut changed = false;

    for (idx, ch) in value.chars().enumerate() {
        if idx == 0 && ch.is_ascii_alphanumeric() {
            append_hex_escape(&mut out, ch as u32, 2);
            changed = true;
            continue;
        }

        match ch {
            '\n' => {
                out.push_str("\\n");
                changed = true;
            }
            '\r' => {
                out.push_str("\\r");
                changed = true;
            }
            '\t' => {
                out.push_str("\\t");
                changed = true;
            }
            '\u{000B}' => {
                out.push_str("\\v");
                changed = true;
            }
            '\u{000C}' => {
                out.push_str("\\f");
                changed = true;
            }
            '\u{0020}' => {
                out.push_str("\\x20");
                changed = true;
            }
            '\u{00A0}' => {
                out.push_str("\\xa0");
                changed = true;
            }
            _ if is_regex_meta(ch) => {
                out.push('\\');
                out.push(ch);
                changed = true;
            }
            _ if is_regexp_escape_hex_punctuator(ch) => {
                append_hex_escape(&mut out, ch as u32, 2);
                changed = true;
            }
            _ if is_regexp_escape_unicode_whitespace(ch) => {
                append_hex_escape(&mut out, ch as u32, 4);
                changed = true;
            }
            _ => out.push(ch),
        }
    }

    if changed {
        Cow::Owned(out)
    } else {
        Cow::Borrowed(value)
    }
}

#[derive(Debug, Clone)]
struct PreparedInput<'a> {
    src: &'a str,
    chars: Vec<char>,
    char_to_byte: Vec<usize>,
}

impl<'a> PreparedInput<'a> {
    fn new(src: &'a str) -> Self {
        let mut chars = Vec::new();
        let mut char_to_byte = Vec::new();
        for (idx, ch) in src.char_indices() {
            chars.push(ch);
            char_to_byte.push(idx);
        }
        char_to_byte.push(src.len());
        Self {
            src,
            chars,
            char_to_byte,
        }
    }

    fn text_len_chars(&self) -> usize {
        self.chars.len()
    }

    fn text_len_bytes(&self) -> usize {
        self.src.len()
    }

    fn char_at(&self, idx: usize) -> Option<char> {
        self.chars.get(idx).copied()
    }

    fn char_to_byte_index(&self, idx: usize) -> usize {
        self.char_to_byte
            .get(idx)
            .copied()
            .unwrap_or(self.src.len())
    }

    fn byte_to_char_index(&self, byte_idx: usize) -> Option<usize> {
        self.char_to_byte.binary_search(&byte_idx).ok()
    }

    fn slice_bytes_to_string(&self, start: usize, end: usize) -> String {
        self.src.get(start..end).unwrap_or_default().to_string()
    }

    fn is_word_boundary(&self, pos: usize, ignore_case: bool, unicode_mode: bool) -> bool {
        let prev = pos
            .checked_sub(1)
            .and_then(|idx| self.char_at(idx))
            .is_some_and(|ch| is_word_char_regex(ch, ignore_case, unicode_mode));
        let next = self
            .char_at(pos)
            .is_some_and(|ch| is_word_char_regex(ch, ignore_case, unicode_mode));
        prev != next
    }

    fn unicode_char_at(&self, pos: usize) -> Option<(char, usize)> {
        let first = self.char_at(pos)?;
        let Some(high) = deinternalize_surrogate_marker(first) else {
            return Some((first, 1));
        };
        if !(0xD800..=0xDBFF).contains(&high) {
            return Some((first, 1));
        }
        let Some(second) = self.char_at(pos + 1) else {
            return Some((first, 1));
        };
        let Some(low) = deinternalize_surrogate_marker(second) else {
            return Some((first, 1));
        };
        if !(0xDC00..=0xDFFF).contains(&low) {
            return Some((first, 1));
        }
        let cp = 0x1_0000 + (((high as u32 - 0xD800) << 10) | (low as u32 - 0xDC00));
        let scalar = char::from_u32(cp).expect("valid surrogate pair must decode to scalar");
        Some((scalar, 2))
    }

    fn unicode_char_before(&self, end: usize) -> Option<(char, usize)> {
        let last_pos = end.checked_sub(1)?;
        let last = self.char_at(last_pos)?;
        let Some(low) = deinternalize_surrogate_marker(last) else {
            return Some((last, 1));
        };
        if !(0xDC00..=0xDFFF).contains(&low) {
            return Some((last, 1));
        }
        let Some(high_pos) = end.checked_sub(2) else {
            return Some((last, 1));
        };
        let Some(high_marker) = self.char_at(high_pos) else {
            return Some((last, 1));
        };
        let Some(high) = deinternalize_surrogate_marker(high_marker) else {
            return Some((last, 1));
        };
        if !(0xD800..=0xDBFF).contains(&high) {
            return Some((last, 1));
        }
        let cp = 0x1_0000 + (((high as u32 - 0xD800) << 10) | (low as u32 - 0xDC00));
        let scalar = char::from_u32(cp).expect("valid surrogate pair must decode to scalar");
        Some((scalar, 2))
    }
}

#[derive(Debug, Clone)]
struct MatchState {
    pos: usize,
    captures: Vec<Option<(usize, usize)>>,
}

#[derive(Debug, Clone, Copy)]
struct MatchOptions {
    ignore_case: bool,
    multi_line: bool,
    dot_matches_new_line: bool,
    unicode_mode: bool,
}

#[derive(Debug, Clone)]
enum Expr {
    Empty,
    Sequence(Vec<Expr>),
    Alternation(Vec<Expr>),
    Literal(char),
    Dot,
    StartAnchor,
    EndAnchor,
    WordBoundary(bool),
    CharClass(CharClass),
    UnicodeStringProperty(UnicodeStringProperty),
    ClassSetOperation {
        operands: Vec<CharClass>,
        operators: Vec<ClassSetOperator>,
        negated: bool,
    },
    Group {
        index: Option<usize>,
        expr: Box<Expr>,
    },
    FlagModifierGroup {
        expr: Box<Expr>,
        ignore_case: Option<bool>,
        multi_line: Option<bool>,
        dot_matches_new_line: Option<bool>,
    },
    LookAhead {
        positive: bool,
        expr: Box<Expr>,
    },
    LookBehind {
        positive: bool,
        expr: Box<Expr>,
    },
    NamedBackReference(String),
    DecimalEscape(String),
    BackReference(usize),
    BackReferenceAny(Vec<usize>),
    Quantifier {
        expr: Box<Expr>,
        min: usize,
        max: Option<usize>,
        greedy: bool,
    },
}

impl Expr {
    fn match_states(
        &self,
        prepared: &PreparedInput<'_>,
        state: &MatchState,
        opts: &MatchOptions,
    ) -> Vec<MatchState> {
        match self {
            Self::Empty => vec![state.clone()],
            Self::Sequence(parts) => {
                let mut states = vec![state.clone()];
                for part in parts {
                    let mut next = Vec::new();
                    for candidate in &states {
                        next.extend(part.match_states(prepared, candidate, opts));
                    }
                    if next.is_empty() {
                        return Vec::new();
                    }
                    states = next;
                }
                states
            }
            Self::Alternation(branches) => {
                let mut out = Vec::new();
                for branch in branches {
                    out.extend(branch.match_states(prepared, state, opts));
                }
                out
            }
            Self::Literal(expected) => {
                if opts.unicode_mode {
                    let Some((actual, width)) = prepared.unicode_char_at(state.pos) else {
                        return Vec::new();
                    };
                    if chars_equal(actual, *expected, opts.ignore_case, opts.unicode_mode) {
                        let mut next = state.clone();
                        next.pos += width;
                        vec![next]
                    } else {
                        Vec::new()
                    }
                } else {
                    let Some(actual) = prepared.char_at(state.pos) else {
                        return Vec::new();
                    };
                    if chars_equal(actual, *expected, opts.ignore_case, opts.unicode_mode) {
                        let mut next = state.clone();
                        next.pos += 1;
                        vec![next]
                    } else {
                        Vec::new()
                    }
                }
            }
            Self::Dot => {
                if opts.unicode_mode {
                    let Some((actual, width)) = prepared.unicode_char_at(state.pos) else {
                        return Vec::new();
                    };
                    if opts.dot_matches_new_line || !is_line_terminator(actual) {
                        let mut next = state.clone();
                        next.pos += width;
                        vec![next]
                    } else {
                        Vec::new()
                    }
                } else {
                    let Some(actual) = prepared.char_at(state.pos) else {
                        return Vec::new();
                    };
                    if opts.dot_matches_new_line || !is_line_terminator(actual) {
                        let mut next = state.clone();
                        next.pos += 1;
                        vec![next]
                    } else {
                        Vec::new()
                    }
                }
            }
            Self::StartAnchor => {
                let at_start = state.pos == 0;
                let at_line_start = opts.multi_line
                    && state
                        .pos
                        .checked_sub(1)
                        .and_then(|idx| prepared.char_at(idx))
                        .is_some_and(is_line_terminator);
                if at_start || at_line_start {
                    vec![state.clone()]
                } else {
                    Vec::new()
                }
            }
            Self::EndAnchor => {
                let at_end = state.pos == prepared.text_len_chars();
                let at_line_end =
                    opts.multi_line && prepared.char_at(state.pos).is_some_and(is_line_terminator);
                if at_end || at_line_end {
                    vec![state.clone()]
                } else {
                    Vec::new()
                }
            }
            Self::WordBoundary(expected) => {
                if prepared.is_word_boundary(state.pos, opts.ignore_case, opts.unicode_mode)
                    == *expected
                {
                    vec![state.clone()]
                } else {
                    Vec::new()
                }
            }
            Self::CharClass(class) => {
                let ends =
                    class.match_ends(prepared, state.pos, opts.ignore_case, opts.unicode_mode);
                if ends.is_empty() {
                    Vec::new()
                } else {
                    let mut out = Vec::with_capacity(ends.len());
                    for end in ends {
                        let mut next = state.clone();
                        next.pos = end;
                        out.push(next);
                    }
                    out
                }
            }
            Self::UnicodeStringProperty(property) => {
                let Some(next_pos) = property.match_at(prepared, state.pos) else {
                    return Vec::new();
                };
                let mut next = state.clone();
                next.pos = next_pos;
                vec![next]
            }
            Self::ClassSetOperation {
                operands,
                operators,
                negated,
            } => {
                let ends = class_set_operation_match_ends(
                    prepared,
                    state.pos,
                    operands,
                    operators,
                    *negated,
                    opts.ignore_case,
                    opts.unicode_mode,
                );
                if ends.is_empty() {
                    return Vec::new();
                }
                let mut out = Vec::with_capacity(ends.len());
                for end in ends {
                    let mut next = state.clone();
                    next.pos = end;
                    out.push(next);
                }
                out
            }
            Self::Group { index, expr } => {
                let start = state.pos;
                let states = expr.match_states(prepared, state, opts);
                let mut out = Vec::with_capacity(states.len());
                for mut candidate in states {
                    if let Some(idx) = index {
                        if let Some(slot) = candidate.captures.get_mut(*idx) {
                            *slot = Some((start, candidate.pos));
                        }
                    }
                    out.push(candidate);
                }
                out
            }
            Self::FlagModifierGroup {
                expr,
                ignore_case,
                multi_line,
                dot_matches_new_line,
            } => {
                let mut local_opts = *opts;
                if let Some(value) = ignore_case {
                    local_opts.ignore_case = *value;
                }
                if let Some(value) = multi_line {
                    local_opts.multi_line = *value;
                }
                if let Some(value) = dot_matches_new_line {
                    local_opts.dot_matches_new_line = *value;
                }
                expr.match_states(prepared, state, &local_opts)
            }
            Self::LookAhead { positive, expr } => {
                let inner = expr.match_states(prepared, state, opts);
                if *positive {
                    let Some(mut matched) = inner.into_iter().next() else {
                        return Vec::new();
                    };
                    matched.pos = state.pos;
                    vec![matched]
                } else if inner.is_empty() {
                    vec![state.clone()]
                } else {
                    Vec::new()
                }
            }
            Self::LookBehind { positive, expr } => {
                let inner = expr.match_states_backward(prepared, state, opts);
                if *positive {
                    let Some(mut matched) = inner.into_iter().next() else {
                        return Vec::new();
                    };
                    matched.pos = state.pos;
                    vec![matched]
                } else if inner.is_empty() {
                    vec![state.clone()]
                } else {
                    Vec::new()
                }
            }
            Self::BackReference(index) => {
                let Some(group) = state.captures.get(*index).copied().flatten() else {
                    return vec![state.clone()];
                };
                let len = group.1.saturating_sub(group.0);
                if state.pos.saturating_add(len) > prepared.text_len_chars() {
                    return Vec::new();
                }
                for offset in 0..len {
                    let Some(left) = prepared.char_at(group.0 + offset) else {
                        return Vec::new();
                    };
                    let Some(right) = prepared.char_at(state.pos + offset) else {
                        return Vec::new();
                    };
                    if !chars_equal(left, right, opts.ignore_case, opts.unicode_mode) {
                        return Vec::new();
                    }
                }
                let mut next = state.clone();
                next.pos += len;
                vec![next]
            }
            Self::BackReferenceAny(indices) => {
                let Some(group) = indices
                    .iter()
                    .filter_map(|idx| state.captures.get(*idx).copied().flatten())
                    .next()
                else {
                    return vec![state.clone()];
                };
                let len = group.1.saturating_sub(group.0);
                if state.pos.saturating_add(len) > prepared.text_len_chars() {
                    return Vec::new();
                }
                for offset in 0..len {
                    let Some(left) = prepared.char_at(group.0 + offset) else {
                        return Vec::new();
                    };
                    let Some(right) = prepared.char_at(state.pos + offset) else {
                        return Vec::new();
                    };
                    if !chars_equal(left, right, opts.ignore_case, opts.unicode_mode) {
                        return Vec::new();
                    }
                }
                let mut next = state.clone();
                next.pos += len;
                vec![next]
            }
            Self::NamedBackReference(_) => Vec::new(),
            Self::DecimalEscape(_) => Vec::new(),
            Self::Quantifier {
                expr,
                min,
                max,
                greedy,
            } => {
                let capture_slots = capture_slots_in_expr(expr);
                let mut base = state.clone();
                clear_capture_slots(&mut base.captures, &capture_slots);
                let mut levels = vec![vec![base]];
                let limit = max.unwrap_or(usize::MAX);

                while levels.len() - 1 < limit {
                    let prev = levels.last().expect("levels always has one element");
                    let mut next = Vec::new();
                    let mut saw_progress = false;
                    for candidate in prev {
                        let mut seeded = candidate.clone();
                        clear_capture_slots(&mut seeded.captures, &capture_slots);
                        for matched in expr.match_states(prepared, &seeded, opts) {
                            if matched.pos != candidate.pos {
                                saw_progress = true;
                            }
                            next.push(matched);
                        }
                    }
                    if next.is_empty() {
                        break;
                    }

                    if !saw_progress {
                        let repeated = next.clone();
                        levels.push(next);
                        let target = match max {
                            Some(max) if *max == *min => *min,
                            Some(max) => min.saturating_add(1).min(*max),
                            None => min.saturating_add(1),
                        };
                        while levels.len() - 1 < target {
                            levels.push(repeated.clone());
                        }
                        break;
                    }

                    levels.push(next);
                }

                let available_max = levels.len().saturating_sub(1);
                if *min > available_max {
                    return Vec::new();
                }

                let mut out = Vec::new();
                if *greedy {
                    for count in (*min..=available_max).rev() {
                        out.extend(levels[count].iter().cloned());
                    }
                } else {
                    for count in *min..=available_max {
                        out.extend(levels[count].iter().cloned());
                    }
                }
                out
            }
        }
    }

    fn match_states_backward(
        &self,
        prepared: &PreparedInput<'_>,
        state: &MatchState,
        opts: &MatchOptions,
    ) -> Vec<MatchState> {
        match self {
            Self::Empty => vec![state.clone()],
            Self::Sequence(parts) => {
                let mut states = vec![state.clone()];
                for part in parts.iter().rev() {
                    let mut next = Vec::new();
                    for candidate in &states {
                        next.extend(part.match_states_backward(prepared, candidate, opts));
                    }
                    if next.is_empty() {
                        return Vec::new();
                    }
                    states = next;
                }
                states
            }
            Self::Alternation(branches) => {
                let mut out = Vec::new();
                for branch in branches {
                    out.extend(branch.match_states_backward(prepared, state, opts));
                }
                out
            }
            Self::Literal(expected) => {
                if opts.unicode_mode {
                    let Some((actual, width)) = prepared.unicode_char_before(state.pos) else {
                        return Vec::new();
                    };
                    if chars_equal(actual, *expected, opts.ignore_case, opts.unicode_mode) {
                        let Some(prev_pos) = state.pos.checked_sub(width) else {
                            return Vec::new();
                        };
                        let mut next = state.clone();
                        next.pos = prev_pos;
                        vec![next]
                    } else {
                        Vec::new()
                    }
                } else {
                    let Some(prev_pos) = state.pos.checked_sub(1) else {
                        return Vec::new();
                    };
                    let Some(actual) = prepared.char_at(prev_pos) else {
                        return Vec::new();
                    };
                    if chars_equal(actual, *expected, opts.ignore_case, opts.unicode_mode) {
                        let mut next = state.clone();
                        next.pos = prev_pos;
                        vec![next]
                    } else {
                        Vec::new()
                    }
                }
            }
            Self::Dot => {
                if opts.unicode_mode {
                    let Some((actual, width)) = prepared.unicode_char_before(state.pos) else {
                        return Vec::new();
                    };
                    if opts.dot_matches_new_line || !is_line_terminator(actual) {
                        let Some(prev_pos) = state.pos.checked_sub(width) else {
                            return Vec::new();
                        };
                        let mut next = state.clone();
                        next.pos = prev_pos;
                        vec![next]
                    } else {
                        Vec::new()
                    }
                } else {
                    let Some(prev_pos) = state.pos.checked_sub(1) else {
                        return Vec::new();
                    };
                    let Some(actual) = prepared.char_at(prev_pos) else {
                        return Vec::new();
                    };
                    if opts.dot_matches_new_line || !is_line_terminator(actual) {
                        let mut next = state.clone();
                        next.pos = prev_pos;
                        vec![next]
                    } else {
                        Vec::new()
                    }
                }
            }
            Self::StartAnchor => {
                let at_start = state.pos == 0;
                let at_line_start = opts.multi_line
                    && state
                        .pos
                        .checked_sub(1)
                        .and_then(|idx| prepared.char_at(idx))
                        .is_some_and(is_line_terminator);
                if at_start || at_line_start {
                    vec![state.clone()]
                } else {
                    Vec::new()
                }
            }
            Self::EndAnchor => {
                let at_end = state.pos == prepared.text_len_chars();
                let at_line_end =
                    opts.multi_line && prepared.char_at(state.pos).is_some_and(is_line_terminator);
                if at_end || at_line_end {
                    vec![state.clone()]
                } else {
                    Vec::new()
                }
            }
            Self::WordBoundary(expected) => {
                if prepared.is_word_boundary(state.pos, opts.ignore_case, opts.unicode_mode)
                    == *expected
                {
                    vec![state.clone()]
                } else {
                    Vec::new()
                }
            }
            Self::CharClass(class) => {
                let starts =
                    class.match_starts(prepared, state.pos, opts.ignore_case, opts.unicode_mode);
                if starts.is_empty() {
                    Vec::new()
                } else {
                    let mut out = Vec::with_capacity(starts.len());
                    for start in starts {
                        let mut next = state.clone();
                        next.pos = start;
                        out.push(next);
                    }
                    out
                }
            }
            Self::UnicodeStringProperty(property) => {
                let starts = unicode_string_property_match_starts(property, prepared, state.pos);
                if starts.is_empty() {
                    Vec::new()
                } else {
                    let mut out = Vec::with_capacity(starts.len());
                    for start in starts {
                        let mut next = state.clone();
                        next.pos = start;
                        out.push(next);
                    }
                    out
                }
            }
            Self::ClassSetOperation {
                operands,
                operators,
                negated,
            } => {
                let starts = class_set_operation_match_starts(
                    prepared,
                    state.pos,
                    operands,
                    operators,
                    *negated,
                    opts.ignore_case,
                    opts.unicode_mode,
                );
                if starts.is_empty() {
                    Vec::new()
                } else {
                    let mut out = Vec::with_capacity(starts.len());
                    for start in starts {
                        let mut next = state.clone();
                        next.pos = start;
                        out.push(next);
                    }
                    out
                }
            }
            Self::Group { index, expr } => {
                let end = state.pos;
                let states = expr.match_states_backward(prepared, state, opts);
                let mut out = Vec::with_capacity(states.len());
                for mut candidate in states {
                    if let Some(idx) = index {
                        if let Some(slot) = candidate.captures.get_mut(*idx) {
                            *slot = Some((candidate.pos, end));
                        }
                    }
                    out.push(candidate);
                }
                out
            }
            Self::FlagModifierGroup {
                expr,
                ignore_case,
                multi_line,
                dot_matches_new_line,
            } => {
                let mut local_opts = *opts;
                if let Some(value) = ignore_case {
                    local_opts.ignore_case = *value;
                }
                if let Some(value) = multi_line {
                    local_opts.multi_line = *value;
                }
                if let Some(value) = dot_matches_new_line {
                    local_opts.dot_matches_new_line = *value;
                }
                expr.match_states_backward(prepared, state, &local_opts)
            }
            Self::LookAhead { positive, expr } => {
                let inner = expr.match_states(prepared, state, opts);
                if *positive {
                    let Some(mut matched) = inner.into_iter().next() else {
                        return Vec::new();
                    };
                    matched.pos = state.pos;
                    vec![matched]
                } else if inner.is_empty() {
                    vec![state.clone()]
                } else {
                    Vec::new()
                }
            }
            Self::LookBehind { positive, expr } => {
                let inner = expr.match_states_backward(prepared, state, opts);
                if *positive {
                    let Some(mut matched) = inner.into_iter().next() else {
                        return Vec::new();
                    };
                    matched.pos = state.pos;
                    vec![matched]
                } else if inner.is_empty() {
                    vec![state.clone()]
                } else {
                    Vec::new()
                }
            }
            Self::BackReference(index) => {
                let Some(group) = state.captures.get(*index).copied().flatten() else {
                    return vec![state.clone()];
                };
                let len = group.1.saturating_sub(group.0);
                if state.pos < len {
                    return Vec::new();
                }
                let start = state.pos - len;
                for offset in 0..len {
                    let Some(left) = prepared.char_at(group.0 + offset) else {
                        return Vec::new();
                    };
                    let Some(right) = prepared.char_at(start + offset) else {
                        return Vec::new();
                    };
                    if !chars_equal(left, right, opts.ignore_case, opts.unicode_mode) {
                        return Vec::new();
                    }
                }
                let mut next = state.clone();
                next.pos = start;
                vec![next]
            }
            Self::BackReferenceAny(indices) => {
                let Some(group) = indices
                    .iter()
                    .filter_map(|idx| state.captures.get(*idx).copied().flatten())
                    .next()
                else {
                    return vec![state.clone()];
                };
                let len = group.1.saturating_sub(group.0);
                if state.pos < len {
                    return Vec::new();
                }
                let start = state.pos - len;
                for offset in 0..len {
                    let Some(left) = prepared.char_at(group.0 + offset) else {
                        return Vec::new();
                    };
                    let Some(right) = prepared.char_at(start + offset) else {
                        return Vec::new();
                    };
                    if !chars_equal(left, right, opts.ignore_case, opts.unicode_mode) {
                        return Vec::new();
                    }
                }
                let mut next = state.clone();
                next.pos = start;
                vec![next]
            }
            Self::NamedBackReference(_) => Vec::new(),
            Self::DecimalEscape(_) => Vec::new(),
            Self::Quantifier {
                expr,
                min,
                max,
                greedy,
            } => {
                let capture_slots = capture_slots_in_expr(expr);
                let mut base = state.clone();
                clear_capture_slots(&mut base.captures, &capture_slots);
                let mut levels = vec![vec![base]];
                let limit = max.unwrap_or(usize::MAX);

                while levels.len() - 1 < limit {
                    let prev = levels.last().expect("levels always has one element");
                    let mut next = Vec::new();
                    let mut saw_progress = false;
                    for candidate in prev {
                        let mut seeded = candidate.clone();
                        clear_capture_slots(&mut seeded.captures, &capture_slots);
                        for matched in expr.match_states_backward(prepared, &seeded, opts) {
                            if matched.pos != candidate.pos {
                                saw_progress = true;
                            }
                            next.push(matched);
                        }
                    }
                    if next.is_empty() {
                        break;
                    }

                    if !saw_progress {
                        let repeated = next.clone();
                        levels.push(next);
                        let target = match max {
                            Some(max) if *max == *min => *min,
                            Some(max) => min.saturating_add(1).min(*max),
                            None => min.saturating_add(1),
                        };
                        while levels.len() - 1 < target {
                            levels.push(repeated.clone());
                        }
                        break;
                    }

                    levels.push(next);
                }

                let available_max = levels.len().saturating_sub(1);
                if *min > available_max {
                    return Vec::new();
                }

                let mut out = Vec::new();
                if *greedy {
                    for count in (*min..=available_max).rev() {
                        out.extend(levels[count].iter().cloned());
                    }
                } else {
                    for count in *min..=available_max {
                        out.extend(levels[count].iter().cloned());
                    }
                }
                out
            }
        }
    }
}

#[derive(Debug, Clone)]
struct CharClass {
    negated: bool,
    items: Vec<ClassItem>,
}

impl CharClass {
    fn matches(&self, ch: char, ignore_case: bool, unicode_mode: bool) -> bool {
        let matched = self
            .items
            .iter()
            .any(|item| item.matches(ch, ignore_case, unicode_mode));
        if self.negated { !matched } else { matched }
    }

    fn match_ends(
        &self,
        prepared: &PreparedInput<'_>,
        pos: usize,
        ignore_case: bool,
        unicode_mode: bool,
    ) -> Vec<usize> {
        if self.negated {
            let (actual, width) = if unicode_mode {
                let Some((actual, width)) = prepared.unicode_char_at(pos) else {
                    return Vec::new();
                };
                (actual, width)
            } else {
                let Some(actual) = prepared.char_at(pos) else {
                    return Vec::new();
                };
                (actual, 1)
            };
            let matched = self
                .items
                .iter()
                .any(|item| item.matches(actual, ignore_case, unicode_mode));
            if matched {
                Vec::new()
            } else {
                vec![pos + width]
            }
        } else {
            let mut out = Vec::new();
            for item in &self.items {
                out.extend(item.match_ends(prepared, pos, ignore_case, unicode_mode));
            }
            dedupe_sorted_desc(out)
        }
    }

    fn match_starts(
        &self,
        prepared: &PreparedInput<'_>,
        end: usize,
        ignore_case: bool,
        unicode_mode: bool,
    ) -> Vec<usize> {
        if self.negated {
            let (start, actual) = if unicode_mode {
                let Some((actual, width)) = prepared.unicode_char_before(end) else {
                    return Vec::new();
                };
                let Some(start) = end.checked_sub(width) else {
                    return Vec::new();
                };
                (start, actual)
            } else {
                let Some(start) = end.checked_sub(1) else {
                    return Vec::new();
                };
                let Some(actual) = prepared.char_at(start) else {
                    return Vec::new();
                };
                (start, actual)
            };
            let matched = self
                .items
                .iter()
                .any(|item| item.matches(actual, ignore_case, unicode_mode));
            if matched { Vec::new() } else { vec![start] }
        } else {
            let mut out = Vec::new();
            for item in &self.items {
                out.extend(item.match_starts(prepared, end, ignore_case, unicode_mode));
            }
            dedupe_sorted_asc(out)
        }
    }
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum ClassSetOperator {
    Intersection,
    Difference,
}

#[derive(Debug, Clone)]
enum ClassItem {
    Char(char),
    Range(char, char),
    NestedExpression(Box<Expr>),
    StringDisjunction(Vec<String>),
    Digit,
    NotDigit,
    Word,
    NotWord,
    Space,
    NotSpace,
    UnicodeProperty {
        property: UnicodeProperty,
        negated: bool,
    },
    UnicodeStringProperty(UnicodeStringProperty),
}

impl ClassItem {
    fn matches(&self, ch: char, ignore_case: bool, unicode_mode: bool) -> bool {
        match self {
            Self::Char(expected) => chars_equal(ch, *expected, ignore_case, unicode_mode),
            Self::Range(start, end) => char_in_range(ch, *start, *end, ignore_case, unicode_mode),
            Self::NestedExpression(expr) => {
                expr_matches_single_char(expr, ch, ignore_case, unicode_mode)
            }
            Self::StringDisjunction(alternatives) => alternatives.iter().any(|value| {
                single_char_string(value)
                    .is_some_and(|expected| chars_equal(ch, expected, ignore_case, unicode_mode))
            }),
            Self::Digit => ch.is_ascii_digit(),
            Self::NotDigit => !ch.is_ascii_digit(),
            Self::Word => is_word_char_regex(ch, ignore_case, unicode_mode),
            Self::NotWord => !is_word_char_regex(ch, ignore_case, unicode_mode),
            Self::Space => is_space_char(ch),
            Self::NotSpace => !is_space_char(ch),
            Self::UnicodeProperty { property, negated } => {
                let matched = property.matches(ch);
                if *negated { !matched } else { matched }
            }
            Self::UnicodeStringProperty(_) => false,
        }
    }

    fn match_ends(
        &self,
        prepared: &PreparedInput<'_>,
        pos: usize,
        ignore_case: bool,
        unicode_mode: bool,
    ) -> Vec<usize> {
        if let Self::NestedExpression(expr) = self {
            return expr_match_ends(expr, prepared, pos, ignore_case, unicode_mode);
        }
        if let Self::StringDisjunction(alternatives) = self {
            return alternatives
                .iter()
                .filter_map(|candidate| {
                    match_literal_string_at(prepared, pos, candidate, ignore_case, unicode_mode)
                })
                .collect::<Vec<_>>();
        }
        if let Self::UnicodeStringProperty(property) = self {
            return property
                .match_at(prepared, pos)
                .into_iter()
                .collect::<Vec<_>>();
        }

        let Some((actual, width)) = (if unicode_mode {
            prepared.unicode_char_at(pos)
        } else {
            prepared.char_at(pos).map(|ch| (ch, 1))
        }) else {
            return Vec::new();
        };

        let matched = match self {
            Self::Char(expected) => chars_equal(actual, *expected, ignore_case, unicode_mode),
            Self::Range(start, end) => {
                char_in_range(actual, *start, *end, ignore_case, unicode_mode)
            }
            Self::Digit => actual.is_ascii_digit(),
            Self::NotDigit => !actual.is_ascii_digit(),
            Self::Word => is_word_char_regex(actual, ignore_case, unicode_mode),
            Self::NotWord => !is_word_char_regex(actual, ignore_case, unicode_mode),
            Self::Space => is_space_char(actual),
            Self::NotSpace => !is_space_char(actual),
            Self::UnicodeProperty { property, negated } => {
                let matched = property.matches(actual);
                if *negated { !matched } else { matched }
            }
            Self::NestedExpression(_)
            | Self::StringDisjunction(_)
            | Self::UnicodeStringProperty(_) => unreachable!("handled above"),
        };

        if matched {
            vec![pos + width]
        } else {
            Vec::new()
        }
    }

    fn match_starts(
        &self,
        prepared: &PreparedInput<'_>,
        end: usize,
        ignore_case: bool,
        unicode_mode: bool,
    ) -> Vec<usize> {
        let mut out = Vec::new();
        for start in 0..=end {
            if self
                .match_ends(prepared, start, ignore_case, unicode_mode)
                .iter()
                .any(|candidate_end| *candidate_end == end)
            {
                out.push(start);
            }
        }
        dedupe_sorted_asc(out)
    }
}

#[derive(Debug, Clone, Copy)]
enum UnicodeProperty {
    Letter,
    Number,
    DecimalNumber,
    UppercaseLetter,
    LowercaseLetter,
    Ascii,
    Any,
    ScriptLatin,
    ScriptGreek,
}

impl UnicodeProperty {
    fn matches(&self, ch: char) -> bool {
        match self {
            Self::Letter => ch.is_alphabetic(),
            Self::Number => ch.is_numeric(),
            Self::DecimalNumber => char_in_ranges(ch, UNICODE_DECIMAL_NUMBER_RANGES),
            Self::UppercaseLetter => ch.is_uppercase(),
            Self::LowercaseLetter => ch.is_lowercase(),
            Self::Ascii => ch.is_ascii(),
            Self::Any => true,
            Self::ScriptLatin => char_in_ranges(ch, UNICODE_SCRIPT_LATIN_RANGES),
            Self::ScriptGreek => char_in_ranges(ch, UNICODE_SCRIPT_GREEK_RANGES),
        }
    }
}

#[derive(Debug, Clone, Copy)]
enum UnicodeStringProperty {
    RgiEmoji,
}

impl UnicodeStringProperty {
    fn match_at(&self, prepared: &PreparedInput<'_>, pos: usize) -> Option<usize> {
        match self {
            Self::RgiEmoji => match_rgi_emoji_at(prepared, pos),
        }
    }
}

#[derive(Debug, Clone, Copy)]
enum UnicodePropertySpec {
    CodePoint(UnicodeProperty),
    String(UnicodeStringProperty),
}

struct Parser {
    chars: Vec<char>,
    i: usize,
    unicode_mode: bool,
    unicode_sets_mode: bool,
    parse_named_backreference_syntax: bool,
    capture_count: usize,
    named_captures: HashMap<String, Vec<usize>>,
    named_capture_order: Vec<String>,
    capture_names: HashMap<usize, String>,
}

impl Parser {
    fn new(pattern: &str, unicode_mode: bool, unicode_sets_mode: bool) -> Self {
        Self {
            chars: pattern.chars().collect(),
            i: 0,
            unicode_mode,
            unicode_sets_mode,
            parse_named_backreference_syntax: true,
            capture_count: 0,
            named_captures: HashMap::new(),
            named_capture_order: Vec::new(),
            capture_names: HashMap::new(),
        }
    }

    fn parse(&mut self) -> Result<Expr, RegexError> {
        let mut expr = self.parse_alternation()?;
        if self.i < self.chars.len() {
            return Err(RegexError::new("invalid regular expression syntax"));
        }
        self.validate_named_capture_duplicates(&expr)?;
        self.resolve_post_parse_escapes(&mut expr)?;
        self.resolve_decimal_escapes(&mut expr)?;
        Ok(expr)
    }

    fn parse_alternation(&mut self) -> Result<Expr, RegexError> {
        let mut branches = vec![self.parse_sequence()?];
        while self.peek() == Some('|') {
            self.i += 1;
            branches.push(self.parse_sequence()?);
        }
        if branches.len() == 1 {
            Ok(branches.pop().unwrap_or(Expr::Empty))
        } else {
            Ok(Expr::Alternation(branches))
        }
    }

    fn parse_sequence(&mut self) -> Result<Expr, RegexError> {
        let mut items = Vec::new();
        while let Some(ch) = self.peek() {
            if ch == ')' || ch == '|' {
                break;
            }
            let atom = self.parse_atom()?;
            let quantified = self.parse_quantifier(atom)?;
            items.push(quantified);
        }

        if items.is_empty() {
            Ok(Expr::Empty)
        } else if items.len() == 1 {
            Ok(items.pop().unwrap_or(Expr::Empty))
        } else {
            Ok(Expr::Sequence(items))
        }
    }

    fn parse_atom(&mut self) -> Result<Expr, RegexError> {
        let Some(ch) = self.next() else {
            return Err(RegexError::new("unexpected end of regular expression"));
        };

        match ch {
            '^' => Ok(Expr::StartAnchor),
            '$' => Ok(Expr::EndAnchor),
            '.' => Ok(Expr::Dot),
            '(' => self.parse_group(),
            '[' => self.parse_char_class(),
            '*' | '+' | '?' => Err(RegexError::new("nothing to repeat")),
            '{' => {
                if self.unicode_mode {
                    Err(RegexError::new("invalid quantifier syntax"))
                } else if self.looks_like_braced_quantifier(self.i - 1) {
                    Err(RegexError::new("nothing to repeat"))
                } else {
                    Ok(Expr::Literal('{'))
                }
            }
            '}' => {
                if self.unicode_mode {
                    Err(RegexError::new("invalid quantifier syntax"))
                } else {
                    Ok(Expr::Literal('}'))
                }
            }
            '\\' => self.parse_escape(false),
            _ => Ok(Expr::Literal(ch)),
        }
    }

    fn parse_group(&mut self) -> Result<Expr, RegexError> {
        if self.peek() == Some('?') {
            self.i += 1;
            let Some(ch) = self.next() else {
                return Err(RegexError::new("unsupported group syntax"));
            };
            match ch {
                ':' => {
                    let expr = self.parse_alternation()?;
                    self.expect(')')?;
                    Ok(Expr::Group {
                        index: None,
                        expr: Box::new(expr),
                    })
                }
                '=' => {
                    let expr = self.parse_alternation()?;
                    self.expect(')')?;
                    Ok(Expr::LookAhead {
                        positive: true,
                        expr: Box::new(expr),
                    })
                }
                '!' => {
                    let expr = self.parse_alternation()?;
                    self.expect(')')?;
                    Ok(Expr::LookAhead {
                        positive: false,
                        expr: Box::new(expr),
                    })
                }
                '<' => {
                    if self.peek() == Some('=') {
                        self.i += 1;
                        let expr = self.parse_alternation()?;
                        self.expect(')')?;
                        Ok(Expr::LookBehind {
                            positive: true,
                            expr: Box::new(expr),
                        })
                    } else if self.peek() == Some('!') {
                        self.i += 1;
                        let expr = self.parse_alternation()?;
                        self.expect(')')?;
                        Ok(Expr::LookBehind {
                            positive: false,
                            expr: Box::new(expr),
                        })
                    } else {
                        let name = self.parse_group_name()?;
                        self.capture_count += 1;
                        let index = self.capture_count;
                        if !self.named_captures.contains_key(&name) {
                            self.named_capture_order.push(name.clone());
                        }
                        self.named_captures
                            .entry(name.clone())
                            .or_default()
                            .push(index);
                        self.capture_names.insert(index, name);
                        let expr = self.parse_alternation()?;
                        self.expect(')')?;
                        Ok(Expr::Group {
                            index: Some(index),
                            expr: Box::new(expr),
                        })
                    }
                }
                'i' | 'm' | 's' | '-' => self.parse_flag_modifier_group(ch),
                _ => Err(RegexError::new("unsupported group syntax")),
            }
        } else {
            self.capture_count += 1;
            let index = self.capture_count;
            let expr = self.parse_alternation()?;
            self.expect(')')?;
            Ok(Expr::Group {
                index: Some(index),
                expr: Box::new(expr),
            })
        }
    }

    fn parse_flag_modifier_group(&mut self, first: char) -> Result<Expr, RegexError> {
        let mut enabling = HashSet::new();
        let mut disabling = HashSet::new();
        let mut in_disabling = false;
        let mut current = first;

        loop {
            match current {
                'i' | 'm' | 's' => {
                    if in_disabling {
                        if !disabling.insert(current) || enabling.contains(&current) {
                            return Err(RegexError::new("duplicate flag in modifier group"));
                        }
                    } else if !enabling.insert(current) || disabling.contains(&current) {
                        return Err(RegexError::new("duplicate flag in modifier group"));
                    }
                }
                '-' => {
                    if in_disabling {
                        return Err(RegexError::new("invalid modifier group flags"));
                    }
                    in_disabling = true;
                }
                ':' => break,
                _ => return Err(RegexError::new("unsupported group syntax")),
            }

            let Some(next) = self.next() else {
                return Err(RegexError::new("unsupported group syntax"));
            };
            current = next;
        }

        if enabling.is_empty() && disabling.is_empty() {
            return Err(RegexError::new("invalid modifier group flags"));
        }

        let expr = self.parse_alternation()?;
        self.expect(')')?;

        Ok(Expr::FlagModifierGroup {
            expr: Box::new(expr),
            ignore_case: flag_modifier_value('i', &enabling, &disabling),
            multi_line: flag_modifier_value('m', &enabling, &disabling),
            dot_matches_new_line: flag_modifier_value('s', &enabling, &disabling),
        })
    }

    fn parse_quantifier(&mut self, atom: Expr) -> Result<Expr, RegexError> {
        let Some(ch) = self.peek() else {
            return Ok(atom);
        };

        let quant = match ch {
            '*' => {
                self.i += 1;
                Some((0usize, None))
            }
            '+' => {
                self.i += 1;
                Some((1usize, None))
            }
            '?' => {
                self.i += 1;
                Some((0usize, Some(1usize)))
            }
            '{' => {
                let checkpoint = self.i;
                match self.parse_braced_quantifier() {
                    Ok(quant) => Some(quant),
                    Err(err) if !self.unicode_mode && err.message != "invalid quantifier range" => {
                        self.i = checkpoint;
                        None
                    }
                    Err(err) => return Err(err),
                }
            }
            _ => None,
        };

        let Some((min, max)) = quant else {
            return Ok(atom);
        };

        if !is_quantifier_target_supported(&atom) {
            return Err(RegexError::new("nothing to repeat"));
        }
        if self.unicode_mode && matches!(atom, Expr::LookAhead { .. }) {
            return Err(RegexError::new("invalid quantifier"));
        }

        let greedy = if self.peek() == Some('?') {
            self.i += 1;
            false
        } else {
            true
        };

        Ok(Expr::Quantifier {
            expr: Box::new(atom),
            min,
            max,
            greedy,
        })
    }

    fn parse_braced_quantifier(&mut self) -> Result<(usize, Option<usize>), RegexError> {
        self.expect('{')?;
        let min = self.parse_usize()?;

        match self.peek() {
            Some('}') => {
                self.i += 1;
                Ok((min, Some(min)))
            }
            Some(',') => {
                self.i += 1;
                if self.peek() == Some('}') {
                    self.i += 1;
                    Ok((min, None))
                } else {
                    let max = self.parse_usize()?;
                    if max < min {
                        return Err(RegexError::new("invalid quantifier range"));
                    }
                    self.expect('}')?;
                    Ok((min, Some(max)))
                }
            }
            _ => Err(RegexError::new("invalid quantifier syntax")),
        }
    }

    fn parse_char_class(&mut self) -> Result<Expr, RegexError> {
        let mut negated = false;
        if self.peek() == Some('^') {
            negated = true;
            self.i += 1;
        }

        let mut operands: Vec<Vec<ClassItem>> = vec![Vec::new()];
        let mut operators: Vec<ClassSetOperator> = Vec::new();

        while let Some(ch) = self.peek() {
            if ch == ']' {
                self.i += 1;
                if operators.is_empty() {
                    let items = operands
                        .pop()
                        .expect("character class always starts with one operand");
                    return self.build_class_expr_from_items(negated, items);
                }

                if operands.len() != operators.len() + 1 || operands.iter().any(Vec::is_empty) {
                    return Err(RegexError::new("invalid set operation"));
                }
                let has_unicode_string = operands
                    .iter()
                    .flatten()
                    .any(class_item_contains_unicode_string_property);
                let has_non_scalar_string = operands
                    .iter()
                    .flatten()
                    .any(class_item_contains_non_scalar_string);
                if negated && (has_unicode_string || has_non_scalar_string) {
                    return Err(RegexError::new("unsupported unicode set syntax"));
                }
                if operators.windows(2).any(|pair| pair[0] != pair[1]) {
                    return Err(RegexError::new("invalid set operation"));
                }
                if operands.iter().any(|operand| {
                    operand.len() != 1
                        || matches!(operand.first(), Some(ClassItem::Range(_, _)) | None)
                }) {
                    return Err(RegexError::new("invalid set operation"));
                }
                let operand_classes = operands
                    .into_iter()
                    .map(|items| CharClass {
                        negated: false,
                        items,
                    })
                    .collect::<Vec<_>>();
                return Ok(Expr::ClassSetOperation {
                    operands: operand_classes,
                    operators,
                    negated,
                });
            }

            if self.unicode_sets_mode {
                if self.peek() == Some('&') && self.peek_next() == Some('&') {
                    if operands.last().map_or(true, Vec::is_empty) {
                        return Err(RegexError::new("invalid set operation"));
                    }
                    self.i += 2;
                    operators.push(ClassSetOperator::Intersection);
                    operands.push(Vec::new());
                    continue;
                }
                if self.peek() == Some('-') && self.peek_next() == Some('-') {
                    if operands.last().map_or(true, Vec::is_empty) {
                        return Err(RegexError::new("invalid set operation"));
                    }
                    self.i += 2;
                    operators.push(ClassSetOperator::Difference);
                    operands.push(Vec::new());
                    continue;
                }
            }

            let item = self.parse_class_item()?;
            if self.peek() == Some('-')
                && !(self.unicode_sets_mode && self.peek_next() == Some('-'))
            {
                let backup = self.i;
                self.i += 1;
                if self.peek() == Some(']') {
                    self.i = backup;
                    operands
                        .last_mut()
                        .expect("character class always has current operand")
                        .push(item);
                    continue;
                }
                let next_item = self.parse_class_item()?;
                match (item, next_item) {
                    (ClassItem::Char(start), ClassItem::Char(end)) => {
                        if (start as u32) > (end as u32) {
                            return Err(RegexError::new("invalid character class range"));
                        }
                        operands
                            .last_mut()
                            .expect("character class always has current operand")
                            .push(ClassItem::Range(start, end));
                    }
                    _ => {
                        return Err(RegexError::new(
                            "character class range bounds must be literal characters",
                        ));
                    }
                }
            } else {
                operands
                    .last_mut()
                    .expect("character class always has current operand")
                    .push(item);
            }
        }

        Err(RegexError::new("unterminated character class"))
    }

    fn parse_class_item(&mut self) -> Result<ClassItem, RegexError> {
        let Some(ch) = self.next() else {
            return Err(RegexError::new("unterminated character class"));
        };

        if ch != '\\' {
            if self.unicode_sets_mode && ch == '[' {
                let nested = self.parse_char_class()?;
                return Ok(ClassItem::NestedExpression(Box::new(nested)));
            }
            return Ok(ClassItem::Char(ch));
        }

        let Some(escaped) = self.next() else {
            return Err(RegexError::new("unterminated escape sequence"));
        };

        match escaped {
            'd' => Ok(ClassItem::Digit),
            'D' => Ok(ClassItem::NotDigit),
            'w' => Ok(ClassItem::Word),
            'W' => Ok(ClassItem::NotWord),
            's' => Ok(ClassItem::Space),
            'S' => Ok(ClassItem::NotSpace),
            'c' => {
                if self.peek().is_some_and(|next| next.is_ascii_alphabetic()) {
                    let next = self.next().expect("peek guaranteed a character");
                    Ok(ClassItem::Char(control_escape_char(next)))
                } else if self
                    .peek()
                    .is_some_and(is_legacy_class_control_escape_suffix)
                {
                    if self.unicode_mode {
                        return Err(RegexError::new("invalid escape sequence"));
                    }
                    let next = self.next().expect("peek guaranteed a character");
                    Ok(ClassItem::Char(control_escape_char_extended(next)))
                } else if self.unicode_mode {
                    Err(RegexError::new("invalid escape sequence"))
                } else {
                    // Legacy (non-unicode) class parsing treats \c without a
                    // valid control suffix as a literal backslash, then re-parses `c`.
                    self.i = self.i.saturating_sub(1);
                    Ok(ClassItem::Char('\\'))
                }
            }
            'n' => Ok(ClassItem::Char('\n')),
            'r' => Ok(ClassItem::Char('\r')),
            't' => Ok(ClassItem::Char('\t')),
            'v' => Ok(ClassItem::Char('\u{000B}')),
            'f' => Ok(ClassItem::Char('\u{000C}')),
            'b' => Ok(ClassItem::Char('\u{0008}')),
            '0' => {
                if self.peek().is_some_and(|next| next.is_ascii_digit()) {
                    if self.unicode_mode {
                        return Err(RegexError::new("invalid numeric backreference"));
                    }
                    if self.peek().is_some_and(|next| matches!(next, '0'..='7')) {
                        Ok(ClassItem::Char(self.parse_legacy_octal_escape_char('0')))
                    } else {
                        Ok(ClassItem::Char('\0'))
                    }
                } else {
                    Ok(ClassItem::Char('\0'))
                }
            }
            '1'..='9' => {
                if self.unicode_mode {
                    return Err(RegexError::new("invalid numeric backreference"));
                }
                if escaped == '8' || escaped == '9' {
                    Ok(ClassItem::Char(escaped))
                } else {
                    Ok(ClassItem::Char(
                        self.parse_legacy_octal_escape_char(escaped),
                    ))
                }
            }
            'x' => {
                if self.peek_is_n_hex_digits(2) {
                    Ok(ClassItem::Char(self.parse_hex_char(2)?))
                } else if self.unicode_mode {
                    Err(RegexError::new("invalid hexadecimal escape sequence"))
                } else {
                    Ok(ClassItem::Char('x'))
                }
            }
            'u' => {
                if self.unicode_mode {
                    Ok(ClassItem::Char(self.parse_unicode_escape()?))
                } else if self.peek_is_n_hex_digits(4) {
                    Ok(ClassItem::Char(self.parse_unicode_escape_fixed4()?))
                } else {
                    Ok(ClassItem::Char('u'))
                }
            }
            'p' | 'P' => {
                if !self.unicode_mode {
                    Ok(ClassItem::Char(escaped))
                } else {
                    match self.parse_unicode_property_escape()? {
                        UnicodePropertySpec::CodePoint(property) => {
                            Ok(ClassItem::UnicodeProperty {
                                property,
                                negated: escaped == 'P',
                            })
                        }
                        UnicodePropertySpec::String(property) => {
                            if escaped == 'P' {
                                return Err(RegexError::new("invalid unicode property escape"));
                            }
                            Ok(ClassItem::UnicodeStringProperty(property))
                        }
                    }
                }
            }
            'q' => {
                if !self.unicode_sets_mode {
                    return Err(RegexError::new("invalid escape sequence"));
                }
                Ok(ClassItem::StringDisjunction(
                    self.parse_class_string_disjunction()?,
                ))
            }
            other => {
                if self.unicode_mode {
                    if is_unicode_identity_escape_in_class(other) {
                        Ok(ClassItem::Char(other))
                    } else {
                        Err(RegexError::new("invalid escape sequence"))
                    }
                } else {
                    Ok(ClassItem::Char(other))
                }
            }
        }
    }

    fn parse_class_string_disjunction(&mut self) -> Result<Vec<String>, RegexError> {
        if self.next() != Some('{') {
            return Err(RegexError::new("invalid escape sequence"));
        }

        let mut alternatives = vec![String::new()];
        loop {
            let Some(ch) = self.peek() else {
                return Err(RegexError::new("unterminated class string disjunction"));
            };

            if ch == '}' {
                self.i += 1;
                return Ok(alternatives);
            }

            if ch == '|' {
                self.i += 1;
                alternatives.push(String::new());
                continue;
            }

            let value = if ch == '\\' {
                self.i += 1;
                self.parse_class_string_escape()?
            } else {
                self.i += 1;
                ch
            };
            alternatives
                .last_mut()
                .expect("class string disjunction always has one alternative")
                .push(value);
        }
    }

    fn parse_class_string_escape(&mut self) -> Result<char, RegexError> {
        let Some(escaped) = self.next() else {
            return Err(RegexError::new("unterminated escape sequence"));
        };

        match escaped {
            'c' => {
                if self.peek().is_some_and(|next| next.is_ascii_alphabetic()) {
                    let next = self.next().expect("peek guaranteed a character");
                    Ok(control_escape_char(next))
                } else {
                    Err(RegexError::new("invalid escape sequence"))
                }
            }
            'n' => Ok('\n'),
            'r' => Ok('\r'),
            't' => Ok('\t'),
            'v' => Ok('\u{000B}'),
            'f' => Ok('\u{000C}'),
            'b' => Ok('\u{0008}'),
            '0' => {
                if self.peek().is_some_and(|next| next.is_ascii_digit()) {
                    Err(RegexError::new("invalid decimal escape"))
                } else {
                    Ok('\0')
                }
            }
            '1'..='9' => Err(RegexError::new("invalid decimal escape")),
            'x' => {
                if self.peek_is_n_hex_digits(2) {
                    self.parse_hex_char(2)
                } else {
                    Err(RegexError::new("invalid hexadecimal escape sequence"))
                }
            }
            'u' => self.parse_unicode_escape(),
            other => {
                if is_class_string_identity_escape(other) {
                    Ok(other)
                } else {
                    Err(RegexError::new("invalid escape sequence"))
                }
            }
        }
    }

    fn parse_escape(&mut self, _in_class: bool) -> Result<Expr, RegexError> {
        let Some(ch) = self.next() else {
            return Err(RegexError::new("unterminated escape sequence"));
        };

        match ch {
            'd' => Ok(Expr::CharClass(CharClass {
                negated: false,
                items: vec![ClassItem::Digit],
            })),
            'D' => Ok(Expr::CharClass(CharClass {
                negated: false,
                items: vec![ClassItem::NotDigit],
            })),
            'w' => Ok(Expr::CharClass(CharClass {
                negated: false,
                items: vec![ClassItem::Word],
            })),
            'W' => Ok(Expr::CharClass(CharClass {
                negated: false,
                items: vec![ClassItem::NotWord],
            })),
            's' => Ok(Expr::CharClass(CharClass {
                negated: false,
                items: vec![ClassItem::Space],
            })),
            'S' => Ok(Expr::CharClass(CharClass {
                negated: false,
                items: vec![ClassItem::NotSpace],
            })),
            'b' => Ok(Expr::WordBoundary(true)),
            'B' => Ok(Expr::WordBoundary(false)),
            'n' => Ok(Expr::Literal('\n')),
            'r' => Ok(Expr::Literal('\r')),
            't' => Ok(Expr::Literal('\t')),
            'v' => Ok(Expr::Literal('\u{000B}')),
            'f' => Ok(Expr::Literal('\u{000C}')),
            'c' => {
                if self.peek().is_some_and(|next| next.is_ascii_alphabetic()) {
                    let next = self.next().expect("peek guaranteed a character");
                    Ok(Expr::Literal(control_escape_char(next)))
                } else if self.unicode_mode {
                    Err(RegexError::new("invalid escape sequence"))
                } else {
                    // Legacy (non-unicode) syntax treats \c without an ASCII
                    // letter as a literal backslash, then re-parses `c`.
                    self.i = self.i.saturating_sub(1);
                    Ok(Expr::Literal('\\'))
                }
            }
            '0' => {
                if self.peek().is_some_and(|next| next.is_ascii_digit()) {
                    if self.unicode_mode {
                        return Err(RegexError::new("invalid numeric backreference"));
                    }
                    if self.peek().is_some_and(|next| matches!(next, '0'..='7')) {
                        Ok(Expr::Literal(self.parse_legacy_octal_escape_char('0')))
                    } else {
                        Ok(Expr::Literal('\0'))
                    }
                } else {
                    Ok(Expr::Literal('\0'))
                }
            }
            'x' => {
                if self.peek_is_n_hex_digits(2) {
                    Ok(Expr::Literal(self.parse_hex_char(2)?))
                } else if self.unicode_mode {
                    Err(RegexError::new("invalid hexadecimal escape sequence"))
                } else {
                    Ok(Expr::Literal('x'))
                }
            }
            'u' => {
                if self.unicode_mode {
                    Ok(Expr::Literal(self.parse_unicode_escape()?))
                } else if self.peek_is_n_hex_digits(4) {
                    Ok(Expr::Literal(self.parse_unicode_escape_fixed4()?))
                } else {
                    Ok(Expr::Literal('u'))
                }
            }
            'p' | 'P' => {
                if !self.unicode_mode {
                    Ok(Expr::Literal(ch))
                } else {
                    match self.parse_unicode_property_escape()? {
                        UnicodePropertySpec::CodePoint(property) => {
                            Ok(Expr::CharClass(CharClass {
                                negated: false,
                                items: vec![ClassItem::UnicodeProperty {
                                    property,
                                    negated: ch == 'P',
                                }],
                            }))
                        }
                        UnicodePropertySpec::String(property) => {
                            if ch == 'P' {
                                return Err(RegexError::new("invalid unicode property escape"));
                            }
                            Ok(Expr::UnicodeStringProperty(property))
                        }
                    }
                }
            }
            'k' => {
                if !self.parse_named_backreference_syntax {
                    return Ok(Expr::Literal('k'));
                }
                if self.peek() != Some('<') {
                    if self.unicode_mode {
                        return Err(RegexError::new("invalid named backreference syntax"));
                    }
                    return Ok(Expr::Literal('k'));
                }
                self.i += 1; // consume '<'
                let name = match self.parse_raw_group_name_until_gt() {
                    Some(name) => name,
                    None => {
                        if self.unicode_mode {
                            return Err(RegexError::new("invalid named backreference syntax"));
                        }
                        return Ok(Expr::Sequence(vec![Expr::Literal('k'), Expr::Literal('<')]));
                    }
                };
                Ok(Expr::NamedBackReference(name))
            }
            '1'..='9' => self.parse_numeric_backreference(ch),
            other => {
                if self.unicode_mode {
                    if is_unicode_identity_escape_outside_class(other) {
                        Ok(Expr::Literal(other))
                    } else {
                        Err(RegexError::new("invalid escape sequence"))
                    }
                } else {
                    Ok(Expr::Literal(other))
                }
            }
        }
    }

    fn parse_group_name(&mut self) -> Result<String, RegexError> {
        let Some(raw) = self.parse_raw_group_name_until_gt() else {
            return Err(RegexError::new("invalid capture group name"));
        };
        normalize_group_name(&raw).ok_or_else(|| RegexError::new("invalid capture group name"))
    }

    fn parse_raw_group_name_until_gt(&mut self) -> Option<String> {
        let start = self.i;
        let mut paren_depth = 0usize;
        let mut in_class = false;
        let mut escaped = false;
        while let Some(ch) = self.peek() {
            self.i += 1;
            if escaped {
                escaped = false;
                continue;
            }
            if in_class {
                match ch {
                    '\\' => escaped = true,
                    ']' => in_class = false,
                    _ => {}
                }
                continue;
            }
            match ch {
                '\\' => escaped = true,
                '[' => in_class = true,
                '(' => paren_depth = paren_depth.saturating_add(1),
                ')' => paren_depth = paren_depth.saturating_sub(1),
                '>' if paren_depth == 0 => {
                    let name = self.chars[start..self.i - 1].iter().collect::<String>();
                    return Some(name);
                }
                _ => {}
            }
        }
        None
    }

    fn parse_numeric_backreference(&mut self, first_digit: char) -> Result<Expr, RegexError> {
        let mut raw = String::new();
        raw.push(first_digit);
        while self.peek().is_some_and(|ch| ch.is_ascii_digit()) {
            raw.push(self.next().expect("peek guaranteed a digit"));
        }
        Ok(Expr::DecimalEscape(raw))
    }

    fn parse_unicode_escape(&mut self) -> Result<char, RegexError> {
        if self.peek() == Some('{') {
            self.i += 1;
            let start = self.i;
            while self.peek().is_some_and(|ch| ch.is_ascii_hexdigit()) {
                self.i += 1;
            }
            if self.peek() != Some('}') || self.i == start {
                return Err(RegexError::new("invalid Unicode escape sequence"));
            }
            let raw: String = self.chars[start..self.i].iter().collect();
            self.i += 1;
            let value = u32::from_str_radix(&raw, 16)
                .map_err(|_| RegexError::new("invalid Unicode escape sequence"))?;
            if value > 0x10FFFF {
                return Err(RegexError::new("invalid Unicode escape sequence"));
            }
            unicode_escape_char_from_u32(value)
        } else {
            self.parse_unicode_escape_fixed4()
        }
    }

    fn parse_unicode_escape_fixed4(&mut self) -> Result<char, RegexError> {
        let start = self.i;
        let end = start.saturating_add(4);
        if end > self.chars.len() {
            return Err(RegexError::new("invalid Unicode escape sequence"));
        }
        if !self.chars[start..end]
            .iter()
            .all(|ch| ch.is_ascii_hexdigit())
        {
            return Err(RegexError::new("invalid Unicode escape sequence"));
        }
        let raw: String = self.chars[start..end].iter().collect();
        self.i = end;
        let value = u32::from_str_radix(&raw, 16)
            .map_err(|_| RegexError::new("invalid Unicode escape sequence"))?;
        unicode_escape_char_from_u32(value)
    }

    fn parse_unicode_property_escape(&mut self) -> Result<UnicodePropertySpec, RegexError> {
        if self.next() != Some('{') {
            return Err(RegexError::new("invalid unicode property escape"));
        }
        let start = self.i;
        while let Some(ch) = self.peek() {
            if ch == '}' {
                break;
            }
            self.i += 1;
        }
        if self.peek() != Some('}') || self.i == start {
            return Err(RegexError::new("invalid unicode property escape"));
        }
        let raw: String = self.chars[start..self.i].iter().collect();
        self.i += 1; // consume '}'
        parse_unicode_property_name(&raw, self.unicode_sets_mode)
            .ok_or_else(|| RegexError::new("invalid unicode property escape"))
    }

    fn parse_hex_char(&mut self, digits: usize) -> Result<char, RegexError> {
        let start = self.i;
        let end = start.saturating_add(digits);
        if end > self.chars.len() {
            return Err(RegexError::new("invalid hexadecimal escape sequence"));
        }
        if !self.chars[start..end]
            .iter()
            .all(|ch| ch.is_ascii_hexdigit())
        {
            return Err(RegexError::new("invalid hexadecimal escape sequence"));
        }
        let raw: String = self.chars[start..end].iter().collect();
        self.i = end;
        let value = u32::from_str_radix(&raw, 16)
            .map_err(|_| RegexError::new("invalid hexadecimal escape sequence"))?;
        char::from_u32(value).ok_or_else(|| RegexError::new("invalid escape value"))
    }

    fn parse_usize(&mut self) -> Result<usize, RegexError> {
        let start = self.i;
        while self.peek().is_some_and(|ch| ch.is_ascii_digit()) {
            self.i += 1;
        }
        if self.i == start {
            return Err(RegexError::new("expected number in quantifier"));
        }
        let raw: String = self.chars[start..self.i].iter().collect();
        raw.parse::<usize>()
            .map_err(|_| RegexError::new("invalid numeric value in quantifier"))
    }

    fn peek_is_n_hex_digits(&self, digits: usize) -> bool {
        let start = self.i;
        let end = start.saturating_add(digits);
        end <= self.chars.len()
            && self.chars[start..end]
                .iter()
                .all(|ch| ch.is_ascii_hexdigit())
    }

    fn build_class_expr_from_items(
        &self,
        negated: bool,
        items: Vec<ClassItem>,
    ) -> Result<Expr, RegexError> {
        if negated
            && (items
                .iter()
                .any(class_item_contains_unicode_string_property)
                || items.iter().any(class_item_contains_non_scalar_string))
        {
            return Err(RegexError::new("unsupported unicode set syntax"));
        }

        Ok(Expr::CharClass(CharClass { negated, items }))
    }

    fn expect(&mut self, ch: char) -> Result<(), RegexError> {
        if self.next() == Some(ch) {
            Ok(())
        } else {
            Err(RegexError::new("unterminated group or invalid syntax"))
        }
    }

    fn validate_named_capture_duplicates(&self, expr: &Expr) -> Result<(), RegexError> {
        self.named_capture_paths(expr).map(|_| ())
    }

    fn named_capture_paths(&self, expr: &Expr) -> Result<Vec<HashSet<String>>, RegexError> {
        match expr {
            Expr::Empty
            | Expr::Literal(_)
            | Expr::Dot
            | Expr::StartAnchor
            | Expr::EndAnchor
            | Expr::WordBoundary(_)
            | Expr::CharClass(_)
            | Expr::UnicodeStringProperty(_)
            | Expr::ClassSetOperation { .. }
            | Expr::NamedBackReference(_)
            | Expr::DecimalEscape(_)
            | Expr::BackReference(_)
            | Expr::BackReferenceAny(_) => Ok(vec![HashSet::new()]),
            Expr::Sequence(parts) => {
                let mut paths = vec![HashSet::new()];
                for part in parts {
                    let part_paths = self.named_capture_paths(part)?;
                    let mut next = Vec::new();
                    for base in &paths {
                        for extension in &part_paths {
                            if let Some(duplicate) =
                                base.iter().find(|name| extension.contains(*name)).cloned()
                            {
                                return Err(RegexError::new(format!(
                                    "duplicate capture group name: {duplicate}"
                                )));
                            }
                            let mut merged = base.clone();
                            merged.extend(extension.iter().cloned());
                            next.push(merged);
                        }
                    }
                    paths = dedupe_name_sets(next);
                }
                Ok(paths)
            }
            Expr::Alternation(branches) => {
                let mut out = Vec::new();
                for branch in branches {
                    out.extend(self.named_capture_paths(branch)?);
                }
                Ok(dedupe_name_sets(out))
            }
            Expr::Group { index, expr } => {
                let mut paths = self.named_capture_paths(expr)?;
                if let Some(name) = index.and_then(|idx| self.capture_names.get(&idx)) {
                    for path in &mut paths {
                        if path.contains(name) {
                            return Err(RegexError::new(format!(
                                "duplicate capture group name: {name}"
                            )));
                        }
                        path.insert(name.clone());
                    }
                }
                Ok(paths)
            }
            Expr::FlagModifierGroup { expr, .. }
            | Expr::LookAhead { expr, .. }
            | Expr::LookBehind { expr, .. }
            | Expr::Quantifier { expr, .. } => self.named_capture_paths(expr),
        }
    }

    fn resolve_post_parse_escapes(&mut self, expr: &mut Expr) -> Result<(), RegexError> {
        match expr {
            Expr::NamedBackReference(name) => {
                let has_named_captures = !self.named_captures.is_empty();
                if !self.unicode_mode && !has_named_captures {
                    *expr = self.parse_legacy_named_backreference_expr(name)?;
                    return Ok(());
                }

                let Some(normalized_name) = normalize_group_name(name) else {
                    return Err(RegexError::new("invalid capture group name"));
                };

                let Some(indices) = self.named_captures.get(&normalized_name).cloned() else {
                    return Err(RegexError::new(format!(
                        "unknown named backreference: {normalized_name}"
                    )));
                };
                if indices.len() == 1 {
                    *expr = Expr::BackReference(indices[0]);
                } else {
                    *expr = Expr::BackReferenceAny(indices);
                }
                Ok(())
            }
            Expr::DecimalEscape(_) => Ok(()),
            Expr::Sequence(parts) | Expr::Alternation(parts) => {
                for part in parts {
                    self.resolve_post_parse_escapes(part)?;
                }
                Ok(())
            }
            Expr::Group { expr, .. }
            | Expr::FlagModifierGroup { expr, .. }
            | Expr::LookAhead { expr, .. }
            | Expr::LookBehind { expr, .. }
            | Expr::Quantifier { expr, .. } => self.resolve_post_parse_escapes(expr),
            _ => Ok(()),
        }
    }

    fn resolve_decimal_escapes(&self, expr: &mut Expr) -> Result<(), RegexError> {
        match expr {
            Expr::DecimalEscape(raw) => {
                *expr = self.resolve_decimal_escape(raw)?;
                Ok(())
            }
            Expr::Sequence(parts) | Expr::Alternation(parts) => {
                for part in parts {
                    self.resolve_decimal_escapes(part)?;
                }
                Ok(())
            }
            Expr::Group { expr, .. }
            | Expr::FlagModifierGroup { expr, .. }
            | Expr::LookAhead { expr, .. }
            | Expr::LookBehind { expr, .. }
            | Expr::Quantifier { expr, .. } => self.resolve_decimal_escapes(expr),
            _ => Ok(()),
        }
    }

    fn resolve_decimal_escape(&self, raw: &str) -> Result<Expr, RegexError> {
        let index = raw.parse::<usize>().ok();
        if self.unicode_mode {
            let Some(index) = index else {
                return Err(RegexError::new("invalid numeric backreference"));
            };
            if index == 0 || index > self.capture_count {
                return Err(RegexError::new("invalid numeric backreference"));
            }
            return Ok(Expr::BackReference(index));
        }

        if let Some(index) = index {
            if index > 0 && index <= self.capture_count {
                return Ok(Expr::BackReference(index));
            }
        }

        Ok(Self::legacy_decimal_escape_expr(raw))
    }

    fn legacy_decimal_escape_expr(raw: &str) -> Expr {
        let chars = raw.chars().collect::<Vec<_>>();
        let Some(first) = chars.first().copied() else {
            return Expr::Empty;
        };

        if first == '8' || first == '9' {
            return Self::literal_sequence_from_chars(&chars);
        }

        let mut octal = String::new();
        octal.push(first);
        let mut used = 1usize;
        while used < chars.len() && used < 3 && matches!(chars[used], '0'..='7') {
            octal.push(chars[used]);
            used += 1;
        }
        let value = u32::from_str_radix(&octal, 8).expect("legacy octal digits are valid");
        let first_expr = char::from_u32(value)
            .map(Expr::Literal)
            .unwrap_or(Expr::Literal('\0'));
        if used >= chars.len() {
            first_expr
        } else {
            let mut parts = vec![first_expr];
            parts.extend(chars[used..].iter().copied().map(Expr::Literal));
            Expr::Sequence(parts)
        }
    }

    fn literal_sequence_from_chars(chars: &[char]) -> Expr {
        if chars.len() == 1 {
            Expr::Literal(chars[0])
        } else {
            Expr::Sequence(chars.iter().copied().map(Expr::Literal).collect::<Vec<_>>())
        }
    }

    fn parse_legacy_octal_escape_char(&mut self, first: char) -> char {
        let mut raw = String::new();
        raw.push(first);
        for _ in 0..2 {
            if self.peek().is_some_and(|ch| matches!(ch, '0'..='7')) {
                raw.push(self.next().expect("peek guaranteed a digit"));
            } else {
                break;
            }
        }
        let value = u32::from_str_radix(&raw, 8).expect("legacy octal digits are valid");
        char::from_u32(value).unwrap_or('\0')
    }

    fn literal_named_backreference_expr(name: &str) -> Expr {
        let expanded = legacy_named_backreference_name_chars(name);
        let mut chars = Vec::with_capacity(expanded.len() + 3);
        chars.push('k');
        chars.push('<');
        chars.extend(expanded);
        chars.push('>');
        Self::literal_sequence_from_chars(&chars)
    }

    fn parse_legacy_named_backreference_expr(&mut self, name: &str) -> Result<Expr, RegexError> {
        let mut pattern = String::with_capacity(name.len() + 3);
        pattern.push('k');
        pattern.push('<');
        pattern.push_str(name);
        pattern.push('>');

        let mut parser = Parser::new(&pattern, false, false);
        parser.parse_named_backreference_syntax = false;
        match parser.parse() {
            Ok(mut expr) => {
                if !parser.named_captures.is_empty() {
                    return Err(RegexError::new("invalid capture group name"));
                }
                if parser.capture_count > 0 {
                    let offset = self.capture_count;
                    shift_expr_capture_indices(&mut expr, offset);
                    self.capture_count += parser.capture_count;
                }
                Ok(expr)
            }
            Err(_) => Ok(Self::literal_named_backreference_expr(name)),
        }
    }

    fn looks_like_braced_quantifier(&self, brace_index: usize) -> bool {
        if self.chars.get(brace_index) != Some(&'{') {
            return false;
        }

        let mut idx = brace_index + 1;
        let mut saw_digit = false;
        while self.chars.get(idx).is_some_and(|ch| ch.is_ascii_digit()) {
            saw_digit = true;
            idx += 1;
        }
        if !saw_digit {
            return false;
        }

        match self.chars.get(idx).copied() {
            Some('}') => true,
            Some(',') => {
                idx += 1;
                while self.chars.get(idx).is_some_and(|ch| ch.is_ascii_digit()) {
                    idx += 1;
                }
                self.chars.get(idx) == Some(&'}')
            }
            _ => false,
        }
    }

    fn peek(&self) -> Option<char> {
        self.chars.get(self.i).copied()
    }

    fn peek_next(&self) -> Option<char> {
        self.chars.get(self.i + 1).copied()
    }

    fn next(&mut self) -> Option<char> {
        let ch = self.peek()?;
        self.i += 1;
        Some(ch)
    }
}

fn chars_equal(left: char, right: char, ignore_case: bool, unicode_mode: bool) -> bool {
    if !ignore_case {
        return left == right;
    }

    let left = canonicalize_case_insensitive(left, unicode_mode);
    let right = canonicalize_case_insensitive(right, unicode_mode);
    left == right
}

fn canonicalize_case_insensitive(ch: char, unicode_mode: bool) -> char {
    if unicode_mode {
        canonicalize_unicode_char(ch)
    } else {
        canonicalize_non_unicode_char(ch)
    }
}

fn canonicalize_non_unicode_char(ch: char) -> char {
    let mut upper = ch.to_uppercase();
    let Some(first) = upper.next() else {
        return ch;
    };
    if upper.next().is_some() {
        return ch;
    }
    if (ch as u32) >= 0x80 && (first as u32) < 0x80 {
        return ch;
    }
    first
}

fn canonicalize_unicode_char(ch: char) -> char {
    match ch {
        '\u{017F}' => 's',        // LATIN SMALL LETTER LONG S
        '\u{212A}' => 'k',        // KELVIN SIGN
        '\u{03C2}' => '\u{03C3}', // GREEK SMALL LETTER FINAL SIGMA
        _ => {
            let mut lower = ch.to_lowercase();
            let Some(first) = lower.next() else {
                return ch;
            };
            if lower.next().is_some() { ch } else { first }
        }
    }
}

fn class_set_operation_matches_char(
    operands: &[CharClass],
    operators: &[ClassSetOperator],
    negated: bool,
    ch: char,
    ignore_case: bool,
    unicode_mode: bool,
) -> bool {
    let Some(first) = operands.first() else {
        return false;
    };
    let mut matched = first.matches(ch, ignore_case, unicode_mode);
    for (op, operand) in operators.iter().zip(operands.iter().skip(1)) {
        let right = operand.matches(ch, ignore_case, unicode_mode);
        match op {
            ClassSetOperator::Intersection => matched = matched && right,
            ClassSetOperator::Difference => matched = matched && !right,
        }
    }
    if negated { !matched } else { matched }
}

fn class_set_operation_match_ends(
    prepared: &PreparedInput<'_>,
    pos: usize,
    operands: &[CharClass],
    operators: &[ClassSetOperator],
    negated: bool,
    ignore_case: bool,
    unicode_mode: bool,
) -> Vec<usize> {
    let Some(first) = operands.first() else {
        return Vec::new();
    };

    let mut current = first
        .match_ends(prepared, pos, ignore_case, unicode_mode)
        .into_iter()
        .collect::<HashSet<_>>();
    for (op, operand) in operators.iter().zip(operands.iter().skip(1)) {
        let rhs = operand
            .match_ends(prepared, pos, ignore_case, unicode_mode)
            .into_iter()
            .collect::<HashSet<_>>();
        match op {
            ClassSetOperator::Intersection => current.retain(|end| rhs.contains(end)),
            ClassSetOperator::Difference => current.retain(|end| !rhs.contains(end)),
        }
    }

    if negated {
        let scalar_end = if unicode_mode {
            let Some((_, width)) = prepared.unicode_char_at(pos) else {
                return Vec::new();
            };
            pos + width
        } else {
            let Some(_) = prepared.char_at(pos) else {
                return Vec::new();
            };
            pos + 1
        };
        if current.contains(&scalar_end) {
            Vec::new()
        } else {
            vec![scalar_end]
        }
    } else {
        dedupe_sorted_desc(current.into_iter().collect::<Vec<_>>())
    }
}

fn class_set_operation_match_starts(
    prepared: &PreparedInput<'_>,
    end: usize,
    operands: &[CharClass],
    operators: &[ClassSetOperator],
    negated: bool,
    ignore_case: bool,
    unicode_mode: bool,
) -> Vec<usize> {
    let mut out = Vec::new();
    for start in 0..=end {
        if class_set_operation_match_ends(
            prepared,
            start,
            operands,
            operators,
            negated,
            ignore_case,
            unicode_mode,
        )
        .iter()
        .any(|candidate_end| *candidate_end == end)
        {
            out.push(start);
        }
    }
    dedupe_sorted_asc(out)
}

fn expr_match_ends(
    expr: &Expr,
    prepared: &PreparedInput<'_>,
    pos: usize,
    ignore_case: bool,
    unicode_mode: bool,
) -> Vec<usize> {
    match expr {
        Expr::Empty => vec![pos],
        Expr::Literal(expected) => {
            if unicode_mode {
                prepared
                    .unicode_char_at(pos)
                    .filter(|(actual, _)| {
                        chars_equal(*actual, *expected, ignore_case, unicode_mode)
                    })
                    .map(|(_, width)| pos + width)
                    .into_iter()
                    .collect::<Vec<_>>()
            } else {
                prepared
                    .char_at(pos)
                    .filter(|actual| chars_equal(*actual, *expected, ignore_case, unicode_mode))
                    .map(|_| pos + 1)
                    .into_iter()
                    .collect::<Vec<_>>()
            }
        }
        Expr::Sequence(parts) => {
            let mut cursors = vec![pos];
            for part in parts {
                let mut next = Vec::new();
                for cursor in &cursors {
                    next.extend(expr_match_ends(
                        part,
                        prepared,
                        *cursor,
                        ignore_case,
                        unicode_mode,
                    ));
                }
                if next.is_empty() {
                    return Vec::new();
                }
                cursors = dedupe_sorted_desc(next);
            }
            cursors
        }
        Expr::Alternation(branches) => {
            let mut out = Vec::new();
            for branch in branches {
                out.extend(expr_match_ends(
                    branch,
                    prepared,
                    pos,
                    ignore_case,
                    unicode_mode,
                ));
            }
            dedupe_sorted_desc(out)
        }
        Expr::CharClass(class) => class.match_ends(prepared, pos, ignore_case, unicode_mode),
        Expr::ClassSetOperation {
            operands,
            operators,
            negated,
        } => class_set_operation_match_ends(
            prepared,
            pos,
            operands,
            operators,
            *negated,
            ignore_case,
            unicode_mode,
        ),
        Expr::UnicodeStringProperty(property) => property
            .match_at(prepared, pos)
            .into_iter()
            .collect::<Vec<_>>(),
        _ => Vec::new(),
    }
}

fn expr_matches_single_char(expr: &Expr, ch: char, ignore_case: bool, unicode_mode: bool) -> bool {
    match expr {
        Expr::CharClass(class) => class.matches(ch, ignore_case, unicode_mode),
        Expr::ClassSetOperation {
            operands,
            operators,
            negated,
        } => class_set_operation_matches_char(
            operands,
            operators,
            *negated,
            ch,
            ignore_case,
            unicode_mode,
        ),
        Expr::Alternation(branches) => branches
            .iter()
            .any(|branch| expr_matches_single_char(branch, ch, ignore_case, unicode_mode)),
        Expr::Literal(expected) => chars_equal(ch, *expected, ignore_case, unicode_mode),
        Expr::UnicodeStringProperty(_) => false,
        _ => false,
    }
}

fn single_char_string(value: &str) -> Option<char> {
    let mut chars = value.chars();
    let first = chars.next()?;
    if chars.next().is_none() {
        Some(first)
    } else {
        None
    }
}

fn unicode_string_property_match_starts(
    property: &UnicodeStringProperty,
    prepared: &PreparedInput<'_>,
    end: usize,
) -> Vec<usize> {
    let mut out = Vec::new();
    for start in 0..=end {
        if property.match_at(prepared, start) == Some(end) {
            out.push(start);
        }
    }
    dedupe_sorted_asc(out)
}

fn match_literal_string_at(
    prepared: &PreparedInput<'_>,
    pos: usize,
    candidate: &str,
    ignore_case: bool,
    unicode_mode: bool,
) -> Option<usize> {
    let mut index = pos;
    for expected in candidate.chars() {
        let (actual, width) = if unicode_mode {
            prepared.unicode_char_at(index)?
        } else {
            (prepared.char_at(index)?, 1)
        };
        if !chars_equal(actual, expected, ignore_case, unicode_mode) {
            return None;
        }
        index += width;
    }
    Some(index)
}

fn dedupe_sorted_desc(mut values: Vec<usize>) -> Vec<usize> {
    values.sort_unstable();
    values.dedup();
    values.reverse();
    values
}

fn dedupe_sorted_asc(mut values: Vec<usize>) -> Vec<usize> {
    values.sort_unstable();
    values.dedup();
    values
}

fn dedupe_name_sets(values: Vec<HashSet<String>>) -> Vec<HashSet<String>> {
    let mut out: Vec<HashSet<String>> = Vec::new();
    for value in values {
        if !out.iter().any(|existing| existing == &value) {
            out.push(value);
        }
    }
    out
}

fn capture_slots_in_expr(expr: &Expr) -> Vec<usize> {
    let mut out = Vec::new();
    collect_capture_slots(expr, &mut out);
    out.sort_unstable();
    out.dedup();
    out
}

fn collect_capture_slots(expr: &Expr, out: &mut Vec<usize>) {
    match expr {
        Expr::Group { index, expr } => {
            if let Some(index) = index {
                out.push(*index);
            }
            collect_capture_slots(expr, out);
        }
        Expr::Sequence(parts) | Expr::Alternation(parts) => {
            for part in parts {
                collect_capture_slots(part, out);
            }
        }
        Expr::FlagModifierGroup { expr, .. }
        | Expr::LookAhead { expr, .. }
        | Expr::LookBehind { expr, .. }
        | Expr::Quantifier { expr, .. } => collect_capture_slots(expr, out),
        _ => {}
    }
}

fn clear_capture_slots(captures: &mut [Option<(usize, usize)>], slots: &[usize]) {
    for slot in slots {
        if let Some(capture) = captures.get_mut(*slot) {
            *capture = None;
        }
    }
}

fn legacy_named_backreference_name_chars(name: &str) -> Vec<char> {
    let chars = name.chars().collect::<Vec<_>>();
    let mut out = Vec::new();
    let mut i = 0usize;

    while i < chars.len() {
        let ch = chars[i];
        i += 1;

        if ch != '\\' {
            out.push(ch);
            continue;
        }

        if i >= chars.len() {
            break;
        }

        let escaped = chars[i];
        i += 1;
        match escaped {
            'c' => {
                if i < chars.len() && chars[i].is_ascii_alphabetic() {
                    out.push(control_escape_char(chars[i]));
                    i += 1;
                } else {
                    out.push('c');
                }
            }
            'n' => out.push('\n'),
            'r' => out.push('\r'),
            't' => out.push('\t'),
            'v' => out.push('\u{000B}'),
            'f' => out.push('\u{000C}'),
            '0' => {
                if i < chars.len() && chars[i].is_ascii_digit() {
                    if matches!(chars[i], '0'..='7') {
                        let mut raw = String::from("0");
                        let mut consumed = 0usize;
                        while i < chars.len() && consumed < 2 && matches!(chars[i], '0'..='7') {
                            raw.push(chars[i]);
                            i += 1;
                            consumed += 1;
                        }
                        let value =
                            u32::from_str_radix(&raw, 8).expect("legacy octal digits are valid");
                        out.push(char::from_u32(value).unwrap_or('\0'));
                    } else {
                        out.push('\0');
                    }
                } else {
                    out.push('\0');
                }
            }
            '1'..='9' => {
                let mut raw = String::new();
                raw.push(escaped);
                while i < chars.len() && chars[i].is_ascii_digit() {
                    raw.push(chars[i]);
                    i += 1;
                }
                out.extend(legacy_decimal_escape_to_chars(&raw));
            }
            'x' => {
                if i + 1 < chars.len()
                    && chars[i].is_ascii_hexdigit()
                    && chars[i + 1].is_ascii_hexdigit()
                {
                    let raw = [chars[i], chars[i + 1]].iter().collect::<String>();
                    i += 2;
                    let value =
                        u32::from_str_radix(&raw, 16).expect("hexadecimal escape digits are valid");
                    out.push(char::from_u32(value).unwrap_or('\0'));
                } else {
                    out.push('x');
                }
            }
            'u' => {
                if i + 3 < chars.len() && chars[i..i + 4].iter().all(|ch| ch.is_ascii_hexdigit()) {
                    let raw = chars[i..i + 4].iter().collect::<String>();
                    i += 4;
                    let value =
                        u32::from_str_radix(&raw, 16).expect("unicode escape digits are valid");
                    out.push(char::from_u32(value).unwrap_or('\0'));
                } else {
                    out.push('u');
                }
            }
            other => out.push(other),
        }
    }

    out
}

fn legacy_decimal_escape_to_chars(raw: &str) -> Vec<char> {
    let chars = raw.chars().collect::<Vec<_>>();
    let Some(first) = chars.first().copied() else {
        return Vec::new();
    };

    if first == '8' || first == '9' {
        return chars;
    }

    let mut octal = String::new();
    octal.push(first);
    let mut used = 1usize;
    while used < chars.len() && used < 3 && matches!(chars[used], '0'..='7') {
        octal.push(chars[used]);
        used += 1;
    }
    let value = u32::from_str_radix(&octal, 8).expect("legacy octal digits are valid");
    let mut out = vec![char::from_u32(value).unwrap_or('\0')];
    out.extend(chars[used..].iter().copied());
    out
}

fn shift_expr_capture_indices(expr: &mut Expr, offset: usize) {
    if offset == 0 {
        return;
    }
    match expr {
        Expr::Group { index, expr } => {
            if let Some(index) = index {
                *index += offset;
            }
            shift_expr_capture_indices(expr, offset);
        }
        Expr::BackReference(index) => {
            *index += offset;
        }
        Expr::BackReferenceAny(indices) => {
            for index in indices {
                *index += offset;
            }
        }
        Expr::Sequence(parts) | Expr::Alternation(parts) => {
            for part in parts {
                shift_expr_capture_indices(part, offset);
            }
        }
        Expr::FlagModifierGroup { expr, .. }
        | Expr::LookAhead { expr, .. }
        | Expr::LookBehind { expr, .. }
        | Expr::Quantifier { expr, .. } => {
            shift_expr_capture_indices(expr, offset);
        }
        _ => {}
    }
}

fn class_item_contains_non_scalar_string(item: &ClassItem) -> bool {
    match item {
        ClassItem::UnicodeStringProperty(_) => false,
        ClassItem::StringDisjunction(alternatives) => alternatives
            .iter()
            .any(|alternative| single_char_string(alternative).is_none()),
        ClassItem::NestedExpression(expr) => expr_contains_non_scalar_string(expr),
        _ => false,
    }
}

fn expr_contains_non_scalar_string(expr: &Expr) -> bool {
    match expr {
        Expr::Empty => true,
        Expr::Literal(_) => false,
        Expr::UnicodeStringProperty(_) => true,
        Expr::CharClass(class) => class
            .items
            .iter()
            .any(class_item_contains_non_scalar_string),
        Expr::ClassSetOperation { operands, .. } => operands.iter().any(|operand| {
            operand
                .items
                .iter()
                .any(class_item_contains_non_scalar_string)
        }),
        Expr::Sequence(parts) => {
            if parts.len() != 1 {
                true
            } else {
                parts.iter().any(expr_contains_non_scalar_string)
            }
        }
        Expr::Alternation(branches) => branches.iter().any(expr_contains_non_scalar_string),
        Expr::Group { expr, .. }
        | Expr::FlagModifierGroup { expr, .. }
        | Expr::LookAhead { expr, .. }
        | Expr::LookBehind { expr, .. }
        | Expr::Quantifier { expr, .. } => expr_contains_non_scalar_string(expr),
        _ => false,
    }
}

fn class_item_contains_unicode_string_property(item: &ClassItem) -> bool {
    match item {
        ClassItem::UnicodeStringProperty(_) => true,
        ClassItem::NestedExpression(expr) => expr_contains_unicode_string_property(expr),
        _ => false,
    }
}

fn expr_contains_unicode_string_property(expr: &Expr) -> bool {
    match expr {
        Expr::UnicodeStringProperty(_) => true,
        Expr::CharClass(class) => class
            .items
            .iter()
            .any(class_item_contains_unicode_string_property),
        Expr::ClassSetOperation { operands, .. } => operands.iter().any(|operand| {
            operand
                .items
                .iter()
                .any(class_item_contains_unicode_string_property)
        }),
        Expr::Alternation(branches) | Expr::Sequence(branches) => {
            branches.iter().any(expr_contains_unicode_string_property)
        }
        Expr::Group { expr, .. }
        | Expr::FlagModifierGroup { expr, .. }
        | Expr::LookAhead { expr, .. }
        | Expr::LookBehind { expr, .. }
        | Expr::Quantifier { expr, .. } => expr_contains_unicode_string_property(expr),
        _ => false,
    }
}

fn normalize_group_name(raw: &str) -> Option<String> {
    let mut chars = raw.chars().peekable();
    let mut out = String::new();
    while chars.peek().is_some() {
        let ch = next_group_name_char(&mut chars)?;
        if out.is_empty() {
            if !is_group_name_start(ch) {
                return None;
            }
        } else if !is_group_name_continue(ch) {
            return None;
        }
        out.push(ch);
    }
    if out.is_empty() { None } else { Some(out) }
}

fn next_group_name_char(chars: &mut Peekable<Chars<'_>>) -> Option<char> {
    let ch = chars.next()?;
    if ch != '\\' {
        return Some(ch);
    }
    parse_group_name_unicode_escape(chars)
}

fn parse_group_name_unicode_escape(chars: &mut Peekable<Chars<'_>>) -> Option<char> {
    if chars.next()? != 'u' {
        return None;
    }

    if chars.peek() == Some(&'{') {
        chars.next();
        let mut raw = String::new();
        while let Some(&ch) = chars.peek() {
            if ch == '}' {
                break;
            }
            if !ch.is_ascii_hexdigit() {
                return None;
            }
            raw.push(ch);
            chars.next();
        }
        if raw.is_empty() || chars.next() != Some('}') {
            return None;
        }
        let value = u32::from_str_radix(&raw, 16).ok()?;
        char::from_u32(value)
    } else {
        let mut raw = String::new();
        for _ in 0..4 {
            let ch = chars.next()?;
            if !ch.is_ascii_hexdigit() {
                return None;
            }
            raw.push(ch);
        }
        let value = u32::from_str_radix(&raw, 16).ok()?;
        char::from_u32(value)
    }
}

fn is_group_name_start(ch: char) -> bool {
    matches!(ch, '$' | '_') || ch.is_ascii_alphabetic() || (!ch.is_ascii() && ch.is_alphabetic())
}

fn is_group_name_continue(ch: char) -> bool {
    is_group_name_start(ch)
        || ch.is_ascii_digit()
        || matches!(ch, '\u{200C}' | '\u{200D}')
        || is_combining_mark(ch)
}

fn is_combining_mark(ch: char) -> bool {
    char_in_ranges(
        ch,
        &[
            (0x0300, 0x036F),
            (0x1AB0, 0x1AFF),
            (0x1DC0, 0x1DFF),
            (0x20D0, 0x20FF),
            (0xFE20, 0xFE2F),
        ],
    )
}

fn is_class_string_identity_escape(ch: char) -> bool {
    ch.is_ascii_punctuation() && ch != '_'
}

fn flag_modifier_value(
    flag: char,
    enabling: &HashSet<char>,
    disabling: &HashSet<char>,
) -> Option<bool> {
    if enabling.contains(&flag) {
        Some(true)
    } else if disabling.contains(&flag) {
        Some(false)
    } else {
        None
    }
}

fn parse_unicode_property_name(raw: &str, unicode_sets_mode: bool) -> Option<UnicodePropertySpec> {
    if let Some((name, value)) = raw.split_once('=') {
        return parse_unicode_property_pair(name, value);
    }
    match raw {
        "L" | "Letter" => Some(UnicodePropertySpec::CodePoint(UnicodeProperty::Letter)),
        "N" | "Number" => Some(UnicodePropertySpec::CodePoint(UnicodeProperty::Number)),
        "Nd" | "Decimal_Number" => Some(UnicodePropertySpec::CodePoint(
            UnicodeProperty::DecimalNumber,
        )),
        "Lu" | "Uppercase_Letter" => Some(UnicodePropertySpec::CodePoint(
            UnicodeProperty::UppercaseLetter,
        )),
        "Ll" | "Lowercase_Letter" => Some(UnicodePropertySpec::CodePoint(
            UnicodeProperty::LowercaseLetter,
        )),
        "ASCII" => Some(UnicodePropertySpec::CodePoint(UnicodeProperty::Ascii)),
        "Any" => Some(UnicodePropertySpec::CodePoint(UnicodeProperty::Any)),
        "RGI_Emoji" if unicode_sets_mode => {
            Some(UnicodePropertySpec::String(UnicodeStringProperty::RgiEmoji))
        }
        _ => None,
    }
}

fn parse_unicode_property_pair(name: &str, value: &str) -> Option<UnicodePropertySpec> {
    match name {
        "gc" | "General_Category" => match value {
            "L" | "Letter" => Some(UnicodePropertySpec::CodePoint(UnicodeProperty::Letter)),
            "N" | "Number" => Some(UnicodePropertySpec::CodePoint(UnicodeProperty::Number)),
            "Nd" | "Decimal_Number" => Some(UnicodePropertySpec::CodePoint(
                UnicodeProperty::DecimalNumber,
            )),
            "Lu" | "Uppercase_Letter" => Some(UnicodePropertySpec::CodePoint(
                UnicodeProperty::UppercaseLetter,
            )),
            "Ll" | "Lowercase_Letter" => Some(UnicodePropertySpec::CodePoint(
                UnicodeProperty::LowercaseLetter,
            )),
            _ => None,
        },
        "sc" | "Script" | "scx" | "Script_Extensions" => match value {
            "Latin" | "Latn" => Some(UnicodePropertySpec::CodePoint(UnicodeProperty::ScriptLatin)),
            "Greek" | "Grek" => Some(UnicodePropertySpec::CodePoint(UnicodeProperty::ScriptGreek)),
            _ => None,
        },
        _ => None,
    }
}

fn match_rgi_emoji_at(prepared: &PreparedInput<'_>, pos: usize) -> Option<usize> {
    let (ch, ch_width) = prepared.unicode_char_at(pos)?;

    // Keycap emoji sequences, e.g. 0️⃣, #️⃣, *️⃣.
    if is_keycap_base(ch) {
        let mut idx = pos + ch_width;
        if let Some((variation_selector, width)) = prepared.unicode_char_at(idx) {
            if variation_selector == '\u{FE0F}' {
                idx += width;
            }
        }
        if let Some((enclosing_keycap, width)) = prepared.unicode_char_at(idx) {
            if enclosing_keycap == '\u{20E3}' {
                return Some(idx + width);
            }
        }
    }

    // Flag emoji are two regional indicators.
    if is_regional_indicator(ch) {
        let next_pos = pos + ch_width;
        if let Some((next, next_width)) = prepared.unicode_char_at(next_pos) {
            if is_regional_indicator(next) {
                return Some(next_pos + next_width);
            }
        }
    }

    let mut idx = match_emoji_atom(prepared, pos)?;
    while let Some((joiner, joiner_width)) = prepared.unicode_char_at(idx) {
        if joiner != '\u{200D}' {
            break;
        }
        let next = match_emoji_atom(prepared, idx + joiner_width)?;
        idx = next;
    }
    Some(idx)
}

fn match_emoji_atom(prepared: &PreparedInput<'_>, pos: usize) -> Option<usize> {
    let (ch, ch_width) = prepared.unicode_char_at(pos)?;
    if !is_emoji_base(ch) {
        return None;
    }

    let mut idx = pos + ch_width;
    if let Some((variation_selector, width)) = prepared.unicode_char_at(idx) {
        if variation_selector == '\u{FE0F}' {
            idx += width;
        }
    }
    if let Some((modifier, width)) = prepared.unicode_char_at(idx) {
        if (0x1F3FB..=0x1F3FF).contains(&(modifier as u32)) {
            idx += width;
        }
    }
    Some(idx)
}

fn is_keycap_base(ch: char) -> bool {
    ch.is_ascii_digit() || matches!(ch, '#' | '*')
}

fn is_regional_indicator(ch: char) -> bool {
    (0x1F1E6..=0x1F1FF).contains(&(ch as u32))
}

fn is_emoji_base(ch: char) -> bool {
    let code = ch as u32;
    matches!(
        code,
        0x1F300..=0x1FAFF
            | 0x2600..=0x27BF
            | 0x2300..=0x23FF
            | 0x2194..=0x21AA
            | 0x00A9
            | 0x00AE
            | 0x203C
            | 0x2049
            | 0x2122
            | 0x2139
            | 0x2934
            | 0x2935
            | 0x3030
            | 0x303D
            | 0x3297
            | 0x3299
    )
}

fn control_escape_char(letter: char) -> char {
    let upper = letter.to_ascii_uppercase() as u8;
    char::from(upper - b'@')
}

fn is_legacy_class_control_escape_suffix(ch: char) -> bool {
    ch.is_ascii_digit() || ch == '_'
}

fn control_escape_char_extended(ch: char) -> char {
    if ch.is_ascii_alphabetic() {
        return control_escape_char(ch);
    }
    let value = (ch as u8) & 0x1F;
    char::from(value)
}

fn is_unicode_identity_escape_outside_class(ch: char) -> bool {
    matches!(
        ch,
        '$' | '(' | ')' | '*' | '+' | '.' | '/' | '?' | '[' | '\\' | ']' | '^' | '{' | '|' | '}'
    )
}

fn is_unicode_identity_escape_in_class(ch: char) -> bool {
    matches!(
        ch,
        '$' | '('
            | ')'
            | '*'
            | '+'
            | '-'
            | '.'
            | '/'
            | '?'
            | '['
            | '\\'
            | ']'
            | '^'
            | '{'
            | '|'
            | '}'
    )
}

fn char_in_ranges(ch: char, ranges: &[(u32, u32)]) -> bool {
    let code = ch as u32;
    ranges
        .iter()
        .any(|&(start, end)| start <= code && code <= end)
}

fn is_quantifier_target_supported(expr: &Expr) -> bool {
    !matches!(
        expr,
        Expr::StartAnchor | Expr::EndAnchor | Expr::WordBoundary(_) | Expr::LookBehind { .. }
    )
}

fn char_in_range(ch: char, start: char, end: char, ignore_case: bool, unicode_mode: bool) -> bool {
    let (start_code, end_code, code) = if ignore_case {
        if unicode_mode {
            (
                canonicalize_unicode_char(start) as u32,
                canonicalize_unicode_char(end) as u32,
                canonicalize_unicode_char(ch) as u32,
            )
        } else if start.is_ascii() && end.is_ascii() && ch.is_ascii() {
            (
                (start as u8).to_ascii_lowercase() as u32,
                (end as u8).to_ascii_lowercase() as u32,
                (ch as u8).to_ascii_lowercase() as u32,
            )
        } else {
            (start as u32, end as u32, ch as u32)
        }
    } else {
        (start as u32, end as u32, ch as u32)
    };
    if start_code <= end_code {
        (start_code..=end_code).contains(&code)
    } else {
        (end_code..=start_code).contains(&code)
    }
}

fn is_word_char(ch: char) -> bool {
    ch.is_ascii_alphanumeric() || ch == '_'
}

fn is_word_char_regex(ch: char, ignore_case: bool, unicode_mode: bool) -> bool {
    if is_word_char(ch) {
        return true;
    }
    if !(ignore_case && unicode_mode) {
        return false;
    }
    is_word_char(canonicalize_unicode_char(ch))
}

fn is_space_char(ch: char) -> bool {
    matches!(
        ch,
        '\u{0009}'
            | '\u{000A}'
            | '\u{000B}'
            | '\u{000C}'
            | '\u{000D}'
            | '\u{0020}'
            | '\u{00A0}'
            | '\u{1680}'
            | '\u{2000}'
            ..='\u{200A}'
                | '\u{2028}'
                | '\u{2029}'
                | '\u{202F}'
                | '\u{205F}'
                | '\u{3000}'
                | '\u{FEFF}'
    )
}

fn is_line_terminator(ch: char) -> bool {
    matches!(ch, '\n' | '\r' | '\u{2028}' | '\u{2029}')
}

fn is_regex_meta(ch: char) -> bool {
    matches!(
        ch,
        '\\' | '.' | '*' | '+' | '?' | '(' | ')' | '[' | ']' | '{' | '}' | '|' | '^' | '$' | '/'
    )
}

const INTERNAL_SURROGATE_BASE: u32 = 0xE000;
const INTERNAL_SURROGATE_LAST: u32 = INTERNAL_SURROGATE_BASE + 0x07FF;

pub(crate) fn internalize_utf16_code_unit(unit: u16) -> char {
    if (0xD800..=0xDFFF).contains(&unit) {
        let mapped = INTERNAL_SURROGATE_BASE + (unit as u32 - 0xD800);
        return char::from_u32(mapped).expect("internal surrogate marker must be valid scalar");
    }
    char::from_u32(unit as u32).expect("BMP non-surrogate code unit must be valid scalar")
}

pub(crate) fn deinternalize_surrogate_marker(ch: char) -> Option<u16> {
    let code = ch as u32;
    if (INTERNAL_SURROGATE_BASE..=INTERNAL_SURROGATE_LAST).contains(&code) {
        Some((0xD800 + code - INTERNAL_SURROGATE_BASE) as u16)
    } else {
        None
    }
}

fn unicode_escape_char_from_u32(value: u32) -> Result<char, RegexError> {
    if (0xD800..=0xDFFF).contains(&value) {
        return Ok(internalize_utf16_code_unit(value as u16));
    }
    char::from_u32(value).ok_or_else(|| RegexError::new("invalid Unicode escape sequence"))
}

fn append_hex_escape(out: &mut String, value: u32, width: usize) {
    match width {
        2 => out.push_str(&format!("\\x{value:02x}")),
        4 => out.push_str(&format!("\\u{value:04x}")),
        _ => out.push_str(&format!("\\u{{{value:x}}}")),
    }
}

fn is_regexp_escape_hex_punctuator(ch: char) -> bool {
    matches!(
        ch,
        '!' | '"'
            | '#'
            | '%'
            | '&'
            | '\''
            | ','
            | '-'
            | ':'
            | ';'
            | '<'
            | '='
            | '>'
            | '@'
            | '`'
            | '~'
    )
}

fn is_regexp_escape_unicode_whitespace(ch: char) -> bool {
    matches!(
        ch,
        '\u{1680}' | '\u{2000}'
            ..='\u{200A}'
                | '\u{2028}'
                | '\u{2029}'
                | '\u{202F}'
                | '\u{205F}'
                | '\u{3000}'
                | '\u{FEFF}'
    )
}

const UNICODE_DECIMAL_NUMBER_RANGES: &[(u32, u32)] = &[
    (0x30, 0x39),
    (0x660, 0x669),
    (0x6F0, 0x6F9),
    (0x7C0, 0x7C9),
    (0x966, 0x96F),
    (0x9E6, 0x9EF),
    (0xA66, 0xA6F),
    (0xAE6, 0xAEF),
    (0xB66, 0xB6F),
    (0xBE6, 0xBEF),
    (0xC66, 0xC6F),
    (0xCE6, 0xCEF),
    (0xD66, 0xD6F),
    (0xDE6, 0xDEF),
    (0xE50, 0xE59),
    (0xED0, 0xED9),
    (0xF20, 0xF29),
    (0x1040, 0x1049),
    (0x1090, 0x1099),
    (0x17E0, 0x17E9),
    (0x1810, 0x1819),
    (0x1946, 0x194F),
    (0x19D0, 0x19D9),
    (0x1A80, 0x1A89),
    (0x1A90, 0x1A99),
    (0x1B50, 0x1B59),
    (0x1BB0, 0x1BB9),
    (0x1C40, 0x1C49),
    (0x1C50, 0x1C59),
    (0xA620, 0xA629),
    (0xA8D0, 0xA8D9),
    (0xA900, 0xA909),
    (0xA9D0, 0xA9D9),
    (0xA9F0, 0xA9F9),
    (0xAA50, 0xAA59),
    (0xABF0, 0xABF9),
    (0xFF10, 0xFF19),
    (0x104A0, 0x104A9),
    (0x10D30, 0x10D39),
    (0x10D40, 0x10D49),
    (0x11066, 0x1106F),
    (0x110F0, 0x110F9),
    (0x11136, 0x1113F),
    (0x111D0, 0x111D9),
    (0x112F0, 0x112F9),
    (0x11450, 0x11459),
    (0x114D0, 0x114D9),
    (0x11650, 0x11659),
    (0x116C0, 0x116C9),
    (0x116D0, 0x116E3),
    (0x11730, 0x11739),
    (0x118E0, 0x118E9),
    (0x11950, 0x11959),
    (0x11BF0, 0x11BF9),
    (0x11C50, 0x11C59),
    (0x11D50, 0x11D59),
    (0x11DA0, 0x11DA9),
    (0x11DE0, 0x11DE9),
    (0x11F50, 0x11F59),
    (0x16130, 0x16139),
    (0x16A60, 0x16A69),
    (0x16AC0, 0x16AC9),
    (0x16B50, 0x16B59),
    (0x16D70, 0x16D79),
    (0x1CCF0, 0x1CCF9),
    (0x1D7CE, 0x1D7FF),
    (0x1E140, 0x1E149),
    (0x1E2F0, 0x1E2F9),
    (0x1E4F0, 0x1E4F9),
    (0x1E5F1, 0x1E5FA),
    (0x1E950, 0x1E959),
    (0x1FBF0, 0x1FBF9),
];

const UNICODE_SCRIPT_LATIN_RANGES: &[(u32, u32)] = &[
    (0x41, 0x5A),
    (0x61, 0x7A),
    (0xAA, 0xAA),
    (0xBA, 0xBA),
    (0xC0, 0xD6),
    (0xD8, 0xF6),
    (0xF8, 0x2B8),
    (0x2E0, 0x2E4),
    (0x1D00, 0x1D25),
    (0x1D2C, 0x1D5C),
    (0x1D62, 0x1D65),
    (0x1D6B, 0x1D77),
    (0x1D79, 0x1DBE),
    (0x1E00, 0x1EFF),
    (0x2071, 0x2071),
    (0x207F, 0x207F),
    (0x2090, 0x209C),
    (0x212A, 0x212B),
    (0x2132, 0x2132),
    (0x214E, 0x214E),
    (0x2160, 0x2188),
    (0x2C60, 0x2C7F),
    (0xA722, 0xA787),
    (0xA78B, 0xA7DC),
    (0xA7F1, 0xA7FF),
    (0xAB30, 0xAB5A),
    (0xAB5C, 0xAB64),
    (0xAB66, 0xAB69),
    (0xFB00, 0xFB06),
    (0xFF21, 0xFF3A),
    (0xFF41, 0xFF5A),
    (0x10780, 0x10785),
    (0x10787, 0x107B0),
    (0x107B2, 0x107BA),
    (0x1DF00, 0x1DF1E),
    (0x1DF25, 0x1DF2A),
];

const UNICODE_SCRIPT_GREEK_RANGES: &[(u32, u32)] = &[
    (0x370, 0x373),
    (0x375, 0x377),
    (0x37A, 0x37D),
    (0x37F, 0x37F),
    (0x384, 0x384),
    (0x386, 0x386),
    (0x388, 0x38A),
    (0x38C, 0x38C),
    (0x38E, 0x3A1),
    (0x3A3, 0x3E1),
    (0x3F0, 0x3FF),
    (0x1D26, 0x1D2A),
    (0x1D5D, 0x1D61),
    (0x1D66, 0x1D6A),
    (0x1DBF, 0x1DBF),
    (0x1F00, 0x1F15),
    (0x1F18, 0x1F1D),
    (0x1F20, 0x1F45),
    (0x1F48, 0x1F4D),
    (0x1F50, 0x1F57),
    (0x1F59, 0x1F59),
    (0x1F5B, 0x1F5B),
    (0x1F5D, 0x1F5D),
    (0x1F5F, 0x1F7D),
    (0x1F80, 0x1FB4),
    (0x1FB6, 0x1FC4),
    (0x1FC6, 0x1FD3),
    (0x1FD6, 0x1FDB),
    (0x1FDD, 0x1FEF),
    (0x1FF2, 0x1FF4),
    (0x1FF6, 0x1FFE),
    (0x2126, 0x2126),
    (0xAB65, 0xAB65),
    (0x10140, 0x1018E),
    (0x101A0, 0x101A0),
    (0x1D200, 0x1D245),
];