lextrail-test 0.1.0

use std::collections::HashMap;

use crate::helpers::{TrailError, consume_lexeme, format_error, get_env, is_escaped, peek};

enum SplitMarker {
    GROUP { index: usize },
    INTERVAL { index: usize },
    CLASS { index: usize },
}

pub fn rex_split(regex: &str) -> Result<Vec<String>, TrailError> {
    let mut lexemes: Vec<String> = Vec::new();
    let mut is_char_class = false;
    let mut is_intv_quant = false;
    let mut accumulate: Vec<char> = Vec::new();
    let mut markers: Vec<SplitMarker> = Vec::new();
    let mut i = 0;

    let chars: Vec<char> = regex.chars().collect();

    while i < chars.len() {
        let curr = chars[i];

        // === CHARACTER CLASSES ===
        if curr == '[' {
            if is_escaped(&chars, i) {
                accumulate.push(curr);
            } else if is_char_class {
                accumulate.extend(['\\', curr]);
            } else {
                let next = peek(&chars, i, 1);

                // Placeholder for empty character classes.
                if next == ']' {
                    lexemes.push(String::new());
                } else {
                    consume_lexeme(&mut lexemes, &mut accumulate);
                    lexemes.push(curr.to_string());

                    markers.push(SplitMarker::CLASS { index: i });
                    is_char_class = true;
                }
            }
        } else if curr == ']' {
            if is_escaped(&chars, i) {
                accumulate.push(curr);
            } else {
                if is_char_class {
                    consume_lexeme(&mut lexemes, &mut accumulate);
                    lexemes.push(curr.to_string());

                    let marker = markers
                        .pop()
                        .expect("Expected a `SplitMarker`, found `None` instead.");
                    assert!(
                        matches!(marker, SplitMarker::CLASS { .. }),
                        "`MarkerKind.CLASS` was not found."
                    );
                    is_char_class = false;
                } else {
                    let context: String = chars[..i].iter().collect();

                    return Err(TrailError(format_error(
                        "Unmatched closing bracket `]`.",
                        &context,
                        "]",
                    )));
                }
            }
        }
        // === QUANTIFIER CLASSES ===
        else if curr == '{' {
            if is_escaped(&chars, i) {
                accumulate.push(curr);
            } else if is_char_class {
                accumulate.extend(['\\', curr]);
            } else {
                let (prev, next) = (peek(&chars, i, -1), peek(&chars, i, 1));

                if matches!(prev, '*' | '+' | '?' | '}' | '(') {
                    let context: String = chars[..i - 1].iter().collect();
                    let source = format!("{}{}", prev, "{");

                    return Err(TrailError(format_error(
                        "Invalid quantifier precedence.",
                        &context,
                        &source,
                    )));
                } else if prev == '\0' {
                    return Err(TrailError(format_error(
                        "Interval quantifiers must be preceded by either an expression or a group.",
                        "",
                        "{",
                    )));
                } else if next == '}' {
                    let context: String = chars[..i].iter().collect();

                    return Err(TrailError(format_error(
                        "Invalid interval quantifier.",
                        &context,
                        "{}",
                    )));
                } else {
                    consume_lexeme(&mut lexemes, &mut accumulate);
                    lexemes.push(curr.to_string());

                    markers.push(SplitMarker::INTERVAL { index: i });
                    is_intv_quant = true;
                }
            }
        } else if curr == '}' {
            if is_escaped(&chars, i) {
                accumulate.push(curr);
            } else if is_char_class {
                accumulate.extend(['\\', curr]);
            } else {
                if is_intv_quant {
                    consume_lexeme(&mut lexemes, &mut accumulate);

                    let length = lexemes.len();
                    let (bracket, quantifier) = (lexemes.get(length - 2), lexemes.get(length - 1));

                    if let (Some(bracket), Some(quantifier)) = (bracket, quantifier) {
                        let parts: Vec<&str> =
                            quantifier.split(",").map(|part| part.trim()).collect();

                        if bracket == "{"
                            && parts.len() <= 2
                            && parts
                                .iter()
                                .all(|x| x.is_empty() || x.parse::<u32>().is_ok())
                        {
                            if parts.len() == 2 && parts.iter().all(|x| x.parse::<u32>().is_ok()) {
                                let (low, up) = (
                                    parts[0].parse::<u32>().unwrap(),
                                    parts[1].parse::<u32>().unwrap(),
                                );

                                if low > up {
                                    let context: String =
                                        chars[..i - quantifier.len() - 1].iter().collect();
                                    let source = format!("{}{}{}", "{", quantifier, "}");

                                    return Err(TrailError(format_error(
                                        "Interval quantifier bounds out of order.",
                                        &context,
                                        &source,
                                    )));
                                }
                            }

                            lexemes.push(curr.to_string());
                        } else {
                            let context: String = chars[..i - bracket.len() - quantifier.len()]
                                .iter()
                                .collect();
                            let source = format!("{}{}{}", bracket, quantifier, "}");

                            return Err(TrailError(format_error(
                                "Invalid interval quantifier.",
                                &context,
                                &source,
                            )));
                        }

                        let marker = markers
                            .pop()
                            .expect("Expected a `SplitMarker`, found `None` instead.");
                        assert!(
                            matches!(marker, SplitMarker::INTERVAL { .. }),
                            "`MarkerKind.INTERVAL` was not found."
                        );
                        is_intv_quant = false;
                    } else {
                        let context: String = chars[..i].iter().collect();

                        return Err(TrailError(format_error(
                            "Invalid interval quantifier.",
                            &context,
                            "}",
                        )));
                    }
                } else {
                    let context: String = chars[..i].iter().collect();

                    return Err(TrailError(format_error(
                        "Unmatched closing bracket `}`.",
                        &context,
                        "}",
                    )));
                }
            }
        }
        // === CAPTURE CLASSES ===
        else if curr == '(' {
            if is_escaped(&chars, i) {
                accumulate.push(curr);
            } else if is_char_class {
                accumulate.extend(['\\', curr]);
            } else {
                consume_lexeme(&mut lexemes, &mut accumulate);
                lexemes.push(curr.to_string());

                markers.push(SplitMarker::GROUP { index: i })
            }
        } else if curr == ')' {
            if is_escaped(&chars, i) {
                accumulate.push(curr);
            } else if is_char_class {
                accumulate.extend(['\\', curr]);
            } else if !markers.is_empty() {
                consume_lexeme(&mut lexemes, &mut accumulate);
                lexemes.push(curr.to_string());

                let marker = markers
                    .pop()
                    .expect("Expected a `SplitMarker`, found `None` instead.");
                assert!(
                    matches!(marker, SplitMarker::GROUP { .. }),
                    "`MarkerKind.GROUP` was not found."
                );
            } else {
                let context: String = chars[..i].iter().collect();

                return Err(TrailError(format_error(
                    "Unmatched closing bracket `)`.",
                    &context,
                    ")",
                )));
            }
        }
        // === BACKREFERENCES ===
        else if curr.is_ascii_digit() {
            if is_escaped(&chars, i) {
                let context: String = chars[..i - 1].iter().collect();
                let source = format!("\\{}", curr);

                return Err(TrailError(format_error(
                    "Unsupported backreference format, use `(?<alphanumeric>...)` to capture, and `$<alphanumeric>` to load instead.",
                    &context,
                    &source,
                )));
            } else {
                accumulate.push(curr);
            }
        } else if curr == '?' {
            if is_escaped(&chars, i) {
                lexemes.push(curr.to_string());
            } else if is_char_class {
                accumulate.extend(['\\', curr]);
            } else {
                let (prev, next) = (peek(&chars, i, -1), peek(&chars, i, 1));

                if prev == '\0' {
                    return Err(TrailError(format_error(
                        "Quantifiers must be preceded by either an expression or a group.",
                        "",
                        &curr.to_string(),
                    )));
                } else if matches!(prev, '*' | '+' | '?' | '}') {
                    let context: String = chars[..i - 1].iter().collect();

                    return Err(TrailError(format_error(
                        "Invalid quantifier precedence.",
                        &context,
                        &curr.to_string(),
                    )));
                } else if prev == '(' && !is_escaped(&chars, i) {
                    let skip = peek(&chars, i, 2);

                    if next == '<' && !matches!(skip, '=' | '!') {
                        consume_lexeme(&mut lexemes, &mut accumulate);
                        consume_reference(&mut lexemes, &mut i, &chars)?;
                    } else {
                        let context: String = chars[..i - 1].iter().collect();
                        let source = format!("{}{}{}", prev, curr, next);

                        return Err(TrailError(format_error(
                            "Unsupported question-mark construct.",
                            &context,
                            &source,
                        )));
                    }
                } else {
                    consume_lexeme(&mut lexemes, &mut accumulate);
                    lexemes.push(curr.to_string());
                }
            }
        } else if curr == '$' {
            if is_escaped(&chars, i) {
                accumulate.push(curr);
            } else if is_char_class {
                accumulate.extend(['\\', curr]);
            } else {
                let next = peek(&chars, i, 1);

                if matches!(next, '\0' | '|') {
                    // Ignored.
                } else if next == '<' {
                    consume_lexeme(&mut lexemes, &mut accumulate);
                    consume_reference(&mut lexemes, &mut i, &chars)?;
                } else {
                    let context: String = chars[..i].iter().collect();

                    return Err(TrailError(format_error(
                        "Misplaced anchor, escape with `\\` for literal dollar sign.",
                        &context,
                        "$",
                    )));
                }
            }
        } else if curr == '^' {
            if is_escaped(&chars, i) {
                accumulate.push(curr);
            } else {
                let (prev, next) = (peek(&chars, i, -1), peek(&chars, i, 1));

                if is_char_class {
                    if prev == '[' && !is_escaped(&chars, i - 1) {
                        if next == ']' {
                            lexemes.push(String::new());
                        }

                        assert!(accumulate.is_empty(), "`accumulate` expected to be empty.");

                        lexemes.push(curr.to_string());
                    } else {
                        accumulate.extend(['\\', curr]);
                    }
                } else {
                    if matches!(prev, '\0' | '|') {
                        // Ignore.
                    } else {
                        let context: String = chars[..i].iter().collect();

                        return Err(TrailError(format_error(
                            "Misplaced anchor, escape with `\\` for literal caret.",
                            &context,
                            "^",
                        )));
                    }
                }
            }
        } else if curr == '-' {
            if is_escaped(&chars, i) {
                accumulate.push(curr);
            } else {
                if is_char_class {
                    let (prev, next) = (peek(&chars, i, -1), peek(&chars, i, 1));

                    if next == ']' {
                        accumulate.extend(['\\', curr]);
                    } else {
                        if !accumulate.is_empty() {
                            if prev.is_ascii_alphanumeric() && next.is_ascii_alphanumeric() {
                                let (start, end) = (prev as u32, next as u32);

                                if start <= end {
                                    if next == ']' {
                                        accumulate.extend(['\\', curr]);
                                    } else {
                                        accumulate.pop();
                                        consume_lexeme(&mut lexemes, &mut accumulate);
                                        lexemes.extend([
                                            prev.to_string(),
                                            curr.to_string(),
                                            next.to_string(),
                                        ]);
                                        i += 1;
                                    }
                                } else {
                                    let header = format!(
                                        "Character range '{prev}-{next}' is invalid: start must be less than or equal to end."
                                    );
                                    let context: String = chars[..i - 1].iter().collect();
                                    let source = format!("{}{}{}", prev, curr, next);

                                    return Err(TrailError(format_error(
                                        &header, &context, &source,
                                    )));
                                }
                            } else {
                                let context: String = chars[..i - 1].iter().collect();
                                let source = format!("{}-{}", prev, next);

                                return Err(TrailError(format_error(
                                    "Range contains non-alphanumeric characters.",
                                    &context,
                                    &source,
                                )));
                            }
                        } else {
                            accumulate.extend(['\\', curr]);
                        }
                    }
                } else {
                    accumulate.extend(['\\', curr]);
                }
            }
        } else if curr == '|' {
            if is_escaped(&chars, i) {
                accumulate.push(curr);
            } else if is_char_class {
                accumulate.extend(['\\', curr]);
            } else {
                consume_lexeme(&mut lexemes, &mut accumulate);
                lexemes.push(curr.to_string());
            }
        } else if curr == '.' {
            if is_escaped(&chars, i) {
                accumulate.push(curr);
            } else if is_char_class {
                accumulate.extend(['\\', curr]);
            } else {
                consume_lexeme(&mut lexemes, &mut accumulate);
                lexemes.push(curr.to_string());
            }
        }
        // === QUANTIFIERS ===
        // [NOTE] `?` is handled separately, since it could also act as a reference for `?<...>`.
        else if matches!(curr, '+' | '*') {
            if is_escaped(&chars, i) {
                accumulate.push(curr);
            } else if is_char_class {
                accumulate.extend(['\\', curr]);
            } else {
                let prev = peek(&chars, i, -1);

                if matches!(prev, '*' | '+' | '?' | '}' | '(') {
                    let context: String = chars[..i - 1].iter().collect();
                    let source = format!("{}{}", prev, curr);

                    return Err(TrailError(format_error(
                        "Invalid quantifier precedence.",
                        &context,
                        &source,
                    )));
                } else if prev == '\0' {
                    return Err(TrailError(format_error(
                        "Quantifiers must be preceded by either an expression or a group.",
                        "",
                        &curr.to_string(),
                    )));
                } else {
                    consume_lexeme(&mut lexemes, &mut accumulate);
                    lexemes.push(curr.to_string());
                }
            }
        }
        // === PREDEFINED CHARACTER CLASSES ===
        // Supported.
        else if matches!(curr, 'd' | 'D' | 'w' | 'W' | 's' | 'S') {
            if is_escaped(&chars, i) {
                accumulate.pop();
                consume_lexeme(&mut lexemes, &mut accumulate);
                accumulate.extend(['\\', curr]);
            } else {
                accumulate.push(curr);
            }
        }
        // Unsupported.
        else if matches!(curr, 'b' | 'B') {
            if is_escaped(&chars, i) {
                let context: String = chars[..i - 1].iter().collect();
                let source = format!("\\{}", curr);

                return Err(TrailError(format_error(
                    "Unsupported predefined escape character.",
                    &context,
                    &source,
                )));
            }
        }
        // === BACKSLASH ===
        else if curr == '\\' {
            if is_escaped(&chars, i) {
                accumulate.push(curr);
            } else {
                let next = peek(&chars, i, 1);

                if ESCAPABLE.contains(&next) {
                    accumulate.push(curr);
                } else if next.is_ascii_digit() || next == '/' {
                    // Ignore.
                } else {
                    let context: String = chars[..i].iter().collect();
                    let source = format!("\\{}", next);

                    return Err(TrailError(format_error(
                        "Invalid escape character.",
                        &context,
                        &source,
                    )));
                }
            }
        }
        // === RESERVED ===
        else if curr == '\0' {
            let context: String = chars[..i - 1].iter().collect();

            return Err(TrailError(format_error(
                "Reserved null character `\\0`.",
                &context,
                &curr.to_string(),
            )));
        } else {
            accumulate.push(curr);
        }

        i += 1;
    }

    consume_lexeme(&mut lexemes, &mut accumulate);

    if let Some(marker) = markers.pop() {
        match marker {
            SplitMarker::GROUP { index } => {
                let context: String = chars[..index].iter().collect();

                return Err(TrailError(format_error(
                    "Unmatched '(' - expected a closing ')'.",
                    &context,
                    "(",
                )));
            }
            SplitMarker::CLASS { index } => {
                let context: String = chars[..index].iter().collect();

                return Err(TrailError(format_error(
                    "Unmatched '[' - expected a closing ']'.",
                    &context,
                    "[",
                )));
            }
            SplitMarker::INTERVAL { index } => {
                let context: String = chars[..index].iter().collect();

                return Err(TrailError(format_error(
                    "Unmatched '{' - expected a closing '}'.",
                    &context,
                    "{",
                )));
            }
        }
    }

    return Ok(lexemes);
}

pub fn rex_expand(chunks: Vec<String>) -> Vec<String> {
    let digits = expand_ascii('0', '9');
    let non_digits = digits
        .clone()
        .pipe(|s| exclude_from_ascii(&s))
        .pipe(|s| include_escapes(&s));
    let word = format!(
        "{}{}{}_",
        expand_ascii('0', '9'),
        expand_ascii('a', 'z'),
        expand_ascii('A', 'Z')
    );
    let non_word = word
        .clone()
        .pipe(|s| exclude_from_ascii(&s))
        .pipe(|s| include_escapes(&s));
    let space = " \t\n\r\u{000C}\u{000B}".to_string();
    let non_space = String::from(" ")
        .pipe(|s| exclude_from_ascii(&s))
        .pipe(|s| include_escapes(&s));
    let wildcard = String::from("\n")
        .pipe(|s| exclude_from_ascii(&s))
        .pipe(|s| include_escapes(&s));

    let mut lexemes: Vec<String> = Vec::new();
    let mut i = 0;
    let mut is_char_class = false;
    let mut is_complement = false;

    while i < chunks.len() {
        let chunk = chunks[i].clone();

        if chunk == "[" {
            is_char_class = true;

            lexemes.push(chunk);
        } else if chunk == "]" {
            is_char_class = false;

            let mut assembled = Vec::new();
            while let Some(lexeme) = lexemes.pop() {
                if lexeme == "[" {
                    break;
                }
                assembled.push(lexeme);
            }

            assert!(!assembled.is_empty(), "Character class is empty.");

            let mut chunk: String = assembled.into_iter().rev().collect();

            if is_complement {
                chunk = chunk
                    .pipe(|s| exclude_escapes(&s))
                    .pipe(|s| exclude_from_ascii(&s))
                    .pipe(|s| include_escapes(&s));

                is_complement = false;
            }

            lexemes.extend(["[".to_string(), chunk, "]".to_string()]);
        } else if chunk == "^" {
            is_complement = true;
        }
        // === CHARACTER RANGE ===
        else if chunk == "-" {
            let start = lexemes
                .pop()
                .expect("Range `-` must be preceded by a bound.")
                .chars()
                .collect::<Vec<char>>()[0];
            let end: char = chunks
                .get(i + 1)
                .expect("Range `-` must be succeeded by a bound.")
                .chars()
                .collect::<Vec<char>>()[0];

            lexemes.push(expand_ascii(start, end));
            i += 1;
        }
        // === PREDEFINED CHARACTER CLASSES ===
        else if chunk == "\\d" {
            if is_char_class {
                lexemes.push(digits.clone());
            } else {
                lexemes.extend(["[".to_string(), digits.clone(), "]".to_string()]);
            }
        } else if chunk == "\\D" {
            if is_char_class {
                lexemes.push(non_digits.clone());
            } else {
                lexemes.extend(["[".to_string(), non_digits.clone(), "]".to_string()]);
            }
        } else if chunk == "\\w" {
            if is_char_class {
                lexemes.push(word.clone());
            } else {
                lexemes.extend(["[".to_string(), word.clone(), "]".to_string()]);
            }
        } else if chunk == "\\W" {
            if is_char_class {
                lexemes.push(non_word.clone());
            } else {
                lexemes.extend(["[".to_string(), non_word.clone(), "]".to_string()]);
            }
        } else if chunk == "\\s" {
            if is_char_class {
                lexemes.push(space.clone());
            } else {
                lexemes.extend(["[".to_string(), space.clone(), "]".to_string()]);
            }
        } else if chunk == "\\S" {
            if is_char_class {
                lexemes.push(non_space.clone());
            } else {
                lexemes.extend(["[".to_string(), non_space.clone(), "]".to_string()]);
            }
        } else if chunk == "." {
            lexemes.extend(["[".to_string(), wildcard.clone(), "]".to_string()]);
        } else {
            lexemes.push(chunk);
        }

        i += 1;
    }

    return lexemes;
}

pub fn rex_norm(chunks: Vec<String>) -> Vec<String> {
    let quantifiers: HashMap<&str, (Vec<&str>, Vec<&str>)> = HashMap::from([
        ("?", (vec!["["], vec!["]"])),
        ("+", (vec!["{"], vec!["}"])),
        ("*", (vec!["{", "["], vec!["]", "}"])),
    ]);

    let mut lexemes: Vec<String> = Vec::new();
    let mut i = 0;

    while i < chunks.len() {
        let chunk = chunks[i].clone();

        // === CHARACTER CLASSES ===
        if chunk == "[" {
            let (next, skip) = (
                chunks.get(i + 1).expect("Character class is empty."),
                chunks.get(i + 2).expect("Character class is empty."),
            );

            assert_eq!(skip, "]", "Expected assembled character class.");

            if get_env("TEST_MODE", false) {
                lexemes.push("(".to_string());

                let unions = exclude_escapes(next)
                    .chars()
                    .map(|c| format!("\"{}\"", c))
                    .collect::<Vec<_>>()
                    .join("|");
                lexemes.push(unions);

                lexemes.push(")".to_string());
            } else {
                lexemes.push("(".to_string());

                let mut unions: Vec<String> = exclude_escapes(next)
                    .chars()
                    .flat_map(|c| [format!("\"{}\"", c), "|".to_string()])
                    .collect();
                lexemes.extend(unions.drain(..unions.len() - 1));

                lexemes.push(")".to_string());
            }

            i += 2
        }
        // === QUANTIFIERS ===
        else if matches!(&*chunk, "*" | "+" | "?") {
            assert!(
                !lexemes.is_empty(),
                "Quantifiers must be preceded by either a expression or a group."
            );

            let prev = lexemes
                .pop()
                .expect("Quantifiers must be preceded by either a expression or a group.");
            let (open_br, close_br) = &quantifiers[&*chunk];

            if prev == ")" {
                let mut assembled = Vec::new();
                let mut depth = 0;

                while let Some(chunk) = lexemes.pop() {
                    if chunk == "(" && depth == 0 {
                        break;
                    }

                    depth += if chunk == ")" {
                        1
                    } else if chunk == "(" {
                        -1
                    } else {
                        0
                    };
                    assembled.push(chunk);
                }

                assembled = assembled.into_iter().rev().collect();

                lexemes.extend(open_br.iter().map(|&s| s.to_string()));
                lexemes.extend(assembled);
                lexemes.extend(close_br.iter().map(|&s| s.to_string()));
            } else {
                assert!(
                    prev.starts_with("\"") && prev.len() == 3 && prev.ends_with("\""),
                    "Quantifier was not preceded by a terminal character."
                );

                lexemes.extend(open_br.iter().map(|&s| s.to_string()));
                lexemes.push(prev);
                lexemes.extend(close_br.iter().map(|&s| s.to_string()));
            }
        }
        // === INTERVAL QUANTIFIERS ===
        else if chunk == "{" {
            let prev = lexemes
                .pop()
                .expect("Interval quantifiers must be preceded by either a expression or a group.");
            let next: Vec<&str> = chunks
                .get(i + 1)
                .expect("Interval quantifiers must have a content.")
                .split(",")
                .collect();

            let args = match next.len() {
                2 => (next[0], next[1]),
                _ => (next[0], next[0]),
            };
            let (req, max) = (
                args.0.parse::<u32>().unwrap_or(0),
                args.1.parse::<u32>().unwrap_or(0),
            );

            let opt: i32 = (max as i32) - (req as i32);

            if prev == ")" {
                let mut assembled = Vec::new();
                let mut depth = 0;

                while let Some(chunk) = lexemes.pop() {
                    if chunk == "(" && depth == 0 {
                        break;
                    }

                    depth += if chunk == ")" {
                        1
                    } else if chunk == "(" {
                        -1
                    } else {
                        0
                    };
                    assembled.push(chunk);
                }

                assembled = assembled.into_iter().rev().collect();

                for _ in 0..req {
                    lexemes.push("(".to_string());
                    lexemes.extend(assembled.iter().cloned());
                    lexemes.push(")".to_string());
                }

                for _ in 0..opt {
                    lexemes.push("[".to_string());
                    lexemes.extend(assembled.iter().cloned());
                    lexemes.push("]".to_string());
                }

                if opt < 0 {
                    lexemes.extend(["{".to_string(), "[".to_string()]);
                    lexemes.extend(assembled.iter().cloned());
                    lexemes.extend(["]".to_string(), "}".to_string()]);
                }
            } else {
                assert!(
                    prev.starts_with("\"") && prev.len() == 3 && prev.ends_with("\""),
                    "Quantifier was not preceded by a terminal character."
                );

                lexemes.extend(std::iter::repeat(prev.clone()).take(req as usize));

                for _ in 0..opt {
                    lexemes.push("[".to_string());
                    lexemes.push(prev.clone());
                    lexemes.push("]".to_string());
                }

                if opt < 0 {
                    lexemes.extend(["{".to_string(), "[".to_string()]);
                    lexemes.push(prev.clone());
                    lexemes.extend(["]".to_string(), "}".to_string()]);
                }
            }

            i += 2;
        } else {
            assert!(
                !"[]{}.-^$+*?".contains(&chunk),
                "Invalid symbols at normalization pass."
            );

            if matches!(&*chunk, "(" | ")" | "|") || is_reference(&chunk) {
                lexemes.push(chunk);
            } else {
                let chunk = exclude_escapes(&chunk);

                if matches!(chunks.get(i + 1), Some(s) if ["{", "*", "?", "+"].contains(&s.as_str()))
                {
                    let size = chunk.len();

                    if size == 1 {
                        lexemes.push(format!("\"{}\"", chunk));
                    } else {
                        lexemes.extend([
                            format!("\"{}\"", &chunk[..size - 1]),
                            format!("\"{}\"", &chunk[size - 1..]),
                        ]);
                    }
                } else {
                    lexemes.push(format!("\"{}\"", chunk));
                }
            }
        }

        i += 1;
    }

    return lexemes;
}

pub fn rex_parse(regex: &str) -> Result<Vec<String>, TrailError> {
    Ok(rex_norm(rex_expand(rex_split(regex)?)))
}

// ============================ HELPERS ============================

const ESCAPABLE: &[char] = &[
    '(', ')', '[', ']', '{', '}', 'd', 'D', 'w', 'W', 's', 'S', '|', '+', '*', '?', '-', '^', '$',
    '.',
];

trait Pipe: Sized {
    fn pipe<F, R>(self, f: F) -> R
    where
        F: FnOnce(Self) -> R,
    {
        f(self)
    }
}

impl Pipe for String {}

fn consume_reference(
    lexemes: &mut Vec<String>,
    index: &mut usize,
    chars: &Vec<char>,
) -> Result<(), TrailError> {
    let (prefix, bracket) = (chars[*index], chars[*index + 1]);
    let mut reference: Vec<char> = Vec::new();

    reference.extend([prefix, bracket]);
    *index += 2;

    let mut curr = chars[*index];
    while curr != '>' {
        if curr.is_ascii_alphanumeric() || curr == '_' {
            reference.push(curr);
        } else {
            let context: String = chars[..*index].iter().collect();
            let source = curr.to_string();

            return Err(TrailError(format_error(
                "Reference name must be a word.",
                &context,
                &source,
            )));
        }

        *index += 1;
        curr = chars[*index]
    }

    reference.push(chars[*index]);

    consume_lexeme(lexemes, &mut reference);

    return Ok(());
}

const CONTEXT: &str = "()[]{}|.*?+-";

fn include_escapes(exp: &str) -> String {
    exp.chars()
        .flat_map(|c| {
            if CONTEXT.contains(c) {
                vec!['\\', c]
            } else {
                vec![c]
            }
        })
        .collect()
}

fn expand_ascii(start: char, end: char) -> String {
    (start..=end).collect()
}

const PRINTABLE: &str = "0123456789\
     abcdefghijklmnopqrstuvwxyz\
     ABCDEFGHIJKLMNOPQRSTUVWXYZ\
     !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ \
     \t\n\r\x0b\x0c";

fn exclude_from_ascii(exp: &str) -> String {
    PRINTABLE.chars().filter(|&c| !exp.contains(c)).collect()
}

fn exclude_escapes(exp: &str) -> String {
    let mut result = Vec::new();

    for c in exp.chars() {
        if CONTEXT.contains(c) && !result.is_empty() {
            result.pop();
        }
        result.push(c);
    }

    result.into_iter().collect()
}

fn is_reference(reference: &String) -> bool {
    (reference.starts_with("?<") || reference.starts_with("$<")) && reference.ends_with(">")
}