perl-regex 0.13.2

Regex parsing and validation helpers for Perl syntax
Documentation
use super::parser::{parse_named_capture_name, parse_named_capture_name_from};

#[derive(Debug, Clone, PartialEq)]
pub struct CaptureGroup {
    pub name: String,
    pub index: usize,
    pub pattern: String,
}

pub(crate) fn extract_named_captures(pattern: &str) -> Vec<CaptureGroup> {
    let bytes = pattern.as_bytes();
    let mut result = Vec::new();
    let mut i = 0;
    let mut capture_index = 0;

    while i < bytes.len() {
        if bytes[i] == b'\\' {
            i += 2;
            continue;
        }

        if bytes[i] == b'[' {
            i += 1;
            while i < bytes.len() {
                if bytes[i] == b'\\' {
                    i += 2;
                } else if bytes[i] == b']' {
                    i += 1;
                    break;
                } else {
                    i += 1;
                }
            }
            continue;
        }

        if bytes[i] == b'(' {
            i += 1;
            if i < bytes.len() && bytes[i] == b'?' {
                i += 1;
                if i < bytes.len() && bytes[i] == b'<' {
                    i += 1;
                    if i < bytes.len() && (bytes[i] == b'=' || bytes[i] == b'!') {
                        i += 1;
                        continue;
                    }

                    if let Some((name, next)) = parse_named_capture_name_from(bytes, i, b'>') {
                        capture_index += 1;
                        i = next;
                        let (subpattern, next_i) = collect_subpattern(bytes, i);
                        i = next_i;
                        result.push(CaptureGroup {
                            name,
                            index: capture_index,
                            pattern: subpattern,
                        });
                        continue;
                    }
                } else if i < bytes.len() && bytes[i] == b'\'' {
                    if let Some((name, next)) = parse_named_capture_name(bytes, i, b'\'', b'\'') {
                        capture_index += 1;
                        i = next;
                        let (subpattern, next_i) = collect_subpattern(bytes, i);
                        i = next_i;
                        result.push(CaptureGroup {
                            name,
                            index: capture_index,
                            pattern: subpattern,
                        });
                        continue;
                    }
                }
                continue;
            }

            capture_index += 1;
            continue;
        }

        i += 1;
    }

    result
}

fn collect_subpattern(bytes: &[u8], mut i: usize) -> (String, usize) {
    let start = i;
    let mut depth = 1usize;
    while i < bytes.len() && depth > 0 {
        if bytes[i] == b'\\' {
            i += 2;
            continue;
        }

        if bytes[i] == b'[' {
            i += 1;
            while i < bytes.len() {
                if bytes[i] == b'\\' {
                    i += 2;
                } else if bytes[i] == b']' {
                    i += 1;
                    break;
                } else {
                    i += 1;
                }
            }
            continue;
        }

        if bytes[i] == b'(' {
            depth += 1;
        } else if bytes[i] == b')' {
            depth -= 1;
        }
        i += 1;
    }

    let subpattern = if i > 0 && start < i - 1 {
        String::from_utf8_lossy(&bytes[start..i - 1]).into_owned()
    } else {
        String::new()
    };

    (subpattern, i)
}