zuzu-rust 0.4.0

use crate::error::{Result, ZuzuRustError};
use crate::span::Span;
use crate::token::{TemplatePart, Token, TokenKind};

const KEYWORDS: &[&str] = &[
    "let",
    "const",
    "function",
    "async",
    "await",
    "spawn",
    "class",
    "trait",
    "method",
    "static",
    "extends",
    "with",
    "but",
    "from",
    "import",
    "as",
    "try",
    "catch",
    "if",
    "else",
    "unless",
    "while",
    "for",
    "in",
    "switch",
    "case",
    "default",
    "return",
    "next",
    "continue",
    "last",
    "throw",
    "die",
    "do",
    "fn",
    "new",
    "super",
    "null",
    "true",
    "false",
    "and",
    "or",
    "xor",
    "nand",
    "not",
    "mod",
    "eq",
    "ne",
    "gt",
    "ge",
    "lt",
    "le",
    "cmp",
    "eqi",
    "nei",
    "gti",
    "gei",
    "lti",
    "lei",
    "cmpi",
    "say",
    "print",
    "warn",
    "assert",
    "debug",
    "instanceof",
    "does",
    "can",
    "union",
    "divides",
    "intersection",
    "subsetof",
    "supersetof",
    "equivalentof",
    "abs",
    "sqrt",
    "floor",
    "ceil",
    "round",
    "int",
    "uc",
    "lc",
    "length",
    "typeof",
];

const TWO_CHAR_OPERATORS: &[&str] = &[
    ":=", "+=", "-=", "*=", "/=", "_=", "~=", "**", "==", "!=", "<=", ">=", "++", "--", "->", "@?",
    "@@", "?:", "|>", "<|", "×=", "÷=", "≤", "≥", "≠", "≡", "≢", "≶", "≷", "⋀", "⋁", "⊻", "⊼", "∈",
    "∉", "⋃", "⋂", "∖", "¬", "√", "⊂", "⊃", ">>", "<<", "⌊", "⌋", "⌈", "⌉", "«", "»", "→", "∣",
    "∤",
];

const THREE_CHAR_OPERATORS: &[&str] = &["**=", "?:=", "<=>", ".(", "⊂⊃", "<<<", ">>>", "..."];

pub fn lex(source: &str) -> Result<Vec<Token>> {
    let chars: Vec<char> = source.chars().collect();
    let mut tokens = Vec::new();
    let mut i = 0usize;
    let mut line = 1usize;
    let mut column = 1usize;

    while i < chars.len() {
        let ch = chars[i];

        if i == 0 && ch == '#' && i + 1 < chars.len() && chars[i + 1] == '!' {
            i += 2;
            column += 2;
            while i < chars.len() && chars[i] != '\n' {
                i += 1;
                column += 1;
            }
            continue;
        }

        if column == 1 && ch == '=' && starts_pod_command(&chars, i) {
            let (next_i, next_line, next_column) = skip_pod_block(&chars, i, line, column);
            i = next_i;
            line = next_line;
            column = next_column;
            continue;
        }

        if ch == ' ' || ch == '\t' || ch == '\r' {
            i += 1;
            column += 1;
            continue;
        }
        if ch == '\n' {
            i += 1;
            line += 1;
            column = 1;
            continue;
        }
        if ch == '/' && i + 1 < chars.len() && chars[i + 1] == '/' {
            i += 2;
            column += 2;
            while i < chars.len() && chars[i] != '\n' {
                i += 1;
                column += 1;
            }
            continue;
        }
        if ch == '/' && i + 1 < chars.len() && chars[i + 1] == '*' {
            i += 2;
            column += 2;
            while i + 1 < chars.len() {
                if chars[i] == '*' && chars[i + 1] == '/' {
                    i += 2;
                    column += 2;
                    break;
                }
                if chars[i] == '\n' {
                    i += 1;
                    line += 1;
                    column = 1;
                } else {
                    i += 1;
                    column += 1;
                }
            }
            continue;
        }

        let start = i;
        let start_line = line;
        let start_column = column;

        if ch == '/' && can_start_regex(&tokens) {
            let (pattern, parts, flags, end, new_line, new_column) =
                lex_regex(&chars, i, line, column)?;
            tokens.push(Token::new(
                TokenKind::Regex {
                    pattern,
                    parts,
                    flags,
                },
                Span::new(start, end, start_line, start_column),
            ));
            i = end;
            line = new_line;
            column = new_column;
            continue;
        }

        if ch == '^' && i + 1 < chars.len() && chars[i + 1] == '^' {
            tokens.push(Token::new(
                TokenKind::Identifier("^^".to_owned()),
                Span::new(start, start + 2, start_line, start_column),
            ));
            i += 2;
            column += 2;
            continue;
        }

        if let Some(operator) = match_operator(&chars, i) {
            let width = operator.chars().count();
            let operator = if operator == "→" { "->" } else { operator };
            tokens.push(Token::new(
                TokenKind::Operator(operator.to_owned()),
                Span::new(start, start + width, start_line, start_column),
            ));
            i += width;
            column += width;
            continue;
        }

        if ch == '_' && i + 1 < chars.len() && is_identifier_continue(chars[i + 1]) {
            let (value, end, end_column) = lex_identifier(&chars, i, column);
            tokens.push(Token::new(
                TokenKind::Identifier(value),
                Span::new(start, end, start_line, start_column),
            ));
            i = end;
            column = end_column;
            continue;
        }

        match ch {
            '(' | ')' | '{' | '}' | '[' | ']' | ',' | ';' => {
                tokens.push(Token::new(
                    TokenKind::Punct(ch),
                    Span::new(start, start + 1, start_line, start_column),
                ));
                i += 1;
                column += 1;
            }
            '.' | ':' | '?' | '+' | '-' | '*' | '/' | '_' | '=' | '<' | '>' | '!' | '&' | '|'
            | '^' | '~' | '@' | '\\' | '×' | '÷' | '≤' | '≥' | '≠' | '≡' | '≢' | '≶' | '≷'
            | '⋀' | '⋁' | '⊻' | '⊼' | '∈' | '∉' | '⋃' | '⋂' | '∖' | '¬' | '√' | '⊂' | '⊃' | '⌊'
            | '⌋' | '⌈' | '⌉' | '«' | '»' | '▷' | '◁' | '∣' | '∤' => {
                tokens.push(Token::new(
                    TokenKind::Operator(ch.to_string()),
                    Span::new(start, start + 1, start_line, start_column),
                ));
                i += 1;
                column += 1;
            }
            '"' => {
                let (value, end, new_line, new_column) =
                    if i + 2 < chars.len() && chars[i + 1] == '"' && chars[i + 2] == '"' {
                        lex_triple_quoted_string(&chars, i, line, column)?
                    } else {
                        lex_string(&chars, i, line, column)?
                    };
                tokens.push(Token::new(
                    TokenKind::String(value),
                    Span::new(start, end, start_line, start_column),
                ));
                i = end;
                line = new_line;
                column = new_column;
            }
            '\'' => {
                let (value, end, new_line, new_column) =
                    if i + 2 < chars.len() && chars[i + 1] == '\'' && chars[i + 2] == '\'' {
                        lex_triple_single_quoted_binary(&chars, i, line, column)?
                    } else {
                        lex_single_quoted_binary(&chars, i, line, column)?
                    };
                tokens.push(Token::new(
                    TokenKind::BinaryString(value),
                    Span::new(start, end, start_line, start_column),
                ));
                i = end;
                line = new_line;
                column = new_column;
            }
            '`' => {
                let (value, end, new_line, new_column) = lex_template(&chars, i, line, column)?;
                tokens.push(Token::new(
                    TokenKind::Template(value),
                    Span::new(start, end, start_line, start_column),
                ));
                i = end;
                line = new_line;
                column = new_column;
            }
            '0'..='9' => {
                let (value, end, end_column) = lex_number(&chars, i, column);
                tokens.push(Token::new(
                    TokenKind::Number(value),
                    Span::new(start, end, start_line, start_column),
                ));
                i = end;
                column = end_column;
            }
            '⊤' => {
                tokens.push(Token::new(
                    TokenKind::Keyword("true"),
                    Span::new(start, start + 1, start_line, start_column),
                ));
                i += 1;
                column += 1;
            }
            '⊥' => {
                tokens.push(Token::new(
                    TokenKind::Keyword("false"),
                    Span::new(start, start + 1, start_line, start_column),
                ));
                i += 1;
                column += 1;
            }
            _ if is_identifier_start(ch) => {
                let (value, end, end_column) = lex_identifier(&chars, i, column);
                let kind = if KEYWORDS.contains(&value.as_str()) {
                    TokenKind::Keyword(Box::leak(value.into_boxed_str()))
                } else {
                    TokenKind::Identifier(value)
                };
                tokens.push(Token::new(
                    kind,
                    Span::new(start, end, start_line, start_column),
                ));
                i = end;
                column = end_column;
            }
            _ => {
                return Err(ZuzuRustError::lex(
                    format!("unexpected character '{}'", ch),
                    line,
                    column,
                ));
            }
        }
    }

    tokens.push(Token::new(
        TokenKind::Eof,
        Span::new(source.len(), source.len(), line, column),
    ));
    Ok(tokens)
}

fn match_operator(chars: &[char], index: usize) -> Option<&'static str> {
    for operator in THREE_CHAR_OPERATORS {
        if matches_text(chars, index, operator) {
            return Some(operator);
        }
    }
    for operator in TWO_CHAR_OPERATORS {
        if matches_text(chars, index, operator) {
            return Some(operator);
        }
    }
    None
}

fn matches_text(chars: &[char], index: usize, text: &str) -> bool {
    let len = text.chars().count();
    if index + len > chars.len() {
        return false;
    }
    chars[index..index + len].iter().copied().eq(text.chars())
}

fn hex_value(ch: char) -> Option<u32> {
    match ch {
        '0'..='9' => Some(ch as u32 - '0' as u32),
        'a'..='f' => Some(ch as u32 - 'a' as u32 + 10),
        'A'..='F' => Some(ch as u32 - 'A' as u32 + 10),
        _ => None,
    }
}

fn read_hex_escape(
    chars: &[char],
    index: usize,
    digits: usize,
    line: usize,
    column: usize,
    name: &str,
) -> Result<u32> {
    if index + digits > chars.len() {
        return Err(ZuzuRustError::lex(
            format!("invalid {name} escape"),
            line,
            column,
        ));
    }
    let mut value = 0u32;
    for offset in 0..digits {
        let Some(digit) = hex_value(chars[index + offset]) else {
            return Err(ZuzuRustError::lex(
                format!("invalid {name} escape"),
                line,
                column,
            ));
        };
        value = value * 16 + digit;
    }
    Ok(value)
}

fn decode_text_escape(
    chars: &[char],
    index: &mut usize,
    column: &mut usize,
    line: usize,
    allow_unicode: bool,
    literal_name: &str,
) -> Result<char> {
    if *index >= chars.len() {
        return Err(ZuzuRustError::lex(
            format!("unterminated {literal_name} escape"),
            line,
            *column,
        ));
    }
    let escape_line = line;
    let escape_column = *column;
    let ch = chars[*index];
    let value = match ch {
        'n' => {
            *index += 1;
            *column += 1;
            '\n'
        }
        'r' => {
            *index += 1;
            *column += 1;
            '\r'
        }
        't' => {
            *index += 1;
            *column += 1;
            '\t'
        }
        '\\' | '\'' | '"' | '`' | '/' | '$' => {
            *index += 1;
            *column += 1;
            ch
        }
        'x' => {
            let code = read_hex_escape(chars, *index + 1, 2, escape_line, escape_column, "\\x")?;
            *index += 3;
            *column += 3;
            char::from_u32(code).expect("\\xHH is always a valid Unicode scalar")
        }
        'u' if allow_unicode => {
            let code = read_hex_escape(chars, *index + 1, 4, escape_line, escape_column, "\\u")?;
            let Some(ch) = char::from_u32(code) else {
                return Err(ZuzuRustError::lex(
                    "invalid Unicode escape",
                    escape_line,
                    escape_column,
                ));
            };
            *index += 5;
            *column += 5;
            ch
        }
        'u' => {
            return Err(ZuzuRustError::lex(
                "Unicode escapes are not supported in binary strings",
                escape_line,
                escape_column,
            ))
        }
        _ => {
            return Err(ZuzuRustError::lex(
                format!("invalid {literal_name} escape"),
                escape_line,
                escape_column,
            ))
        }
    };
    Ok(value)
}

fn decode_binary_escape(
    chars: &[char],
    index: &mut usize,
    column: &mut usize,
    line: usize,
) -> Result<Vec<u8>> {
    if *index >= chars.len() {
        return Err(ZuzuRustError::lex(
            "unterminated binary string escape",
            line,
            *column,
        ));
    }
    let escape_line = line;
    let escape_column = *column;
    let ch = chars[*index];
    let byte = match ch {
        'n' => Some(b'\n'),
        'r' => Some(b'\r'),
        't' => Some(b'\t'),
        '\\' => Some(b'\\'),
        '\'' => Some(b'\''),
        '"' => Some(b'"'),
        '`' => Some(b'`'),
        '/' => Some(b'/'),
        '$' => Some(b'$'),
        'x' => {
            let value = read_hex_escape(chars, *index + 1, 2, escape_line, escape_column, "\\x")?;
            *index += 3;
            *column += 3;
            return Ok(vec![value as u8]);
        }
        'u' => {
            return Err(ZuzuRustError::lex(
                "Unicode escapes are not supported in binary strings",
                escape_line,
                escape_column,
            ));
        }
        _ => None,
    };
    let Some(byte) = byte else {
        return Err(ZuzuRustError::lex(
            "invalid binary string escape",
            escape_line,
            escape_column,
        ));
    };
    *index += 1;
    *column += 1;
    Ok(vec![byte])
}

fn lex_string(
    chars: &[char],
    start: usize,
    line: usize,
    column: usize,
) -> Result<(String, usize, usize, usize)> {
    let mut value = String::new();
    let mut i = start + 1;
    let current_line = line;
    let mut current_column = column + 1;

    while i < chars.len() {
        let ch = chars[i];
        match ch {
            '"' => {
                return Ok((value, i + 1, current_line, current_column + 1));
            }
            '\\' => {
                i += 1;
                current_column += 1;
                let escaped = decode_text_escape(
                    chars,
                    &mut i,
                    &mut current_column,
                    current_line,
                    true,
                    "string",
                )?;
                value.push(escaped);
            }
            '\n' => {
                return Err(ZuzuRustError::lex(
                    "unterminated string literal",
                    current_line,
                    current_column,
                ));
            }
            _ => {
                value.push(ch);
                i += 1;
                current_column += 1;
            }
        }
    }

    Err(ZuzuRustError::lex(
        "unterminated string literal",
        line,
        column,
    ))
}

fn lex_single_quoted_binary(
    chars: &[char],
    start: usize,
    line: usize,
    column: usize,
) -> Result<(Vec<u8>, usize, usize, usize)> {
    let mut value = Vec::new();
    let mut i = start + 1;
    let current_line = line;
    let mut current_column = column + 1;

    while i < chars.len() {
        let ch = chars[i];
        match ch {
            '\'' => return Ok((value, i + 1, current_line, current_column + 1)),
            '\\' => {
                i += 1;
                current_column += 1;
                value.extend(decode_binary_escape(
                    chars,
                    &mut i,
                    &mut current_column,
                    current_line,
                )?);
            }
            '\n' => {
                return Err(ZuzuRustError::lex(
                    "unterminated binary string literal",
                    current_line,
                    current_column,
                ));
            }
            _ => {
                let mut buf = [0u8; 4];
                value.extend_from_slice(ch.encode_utf8(&mut buf).as_bytes());
                i += 1;
                current_column += 1;
            }
        }
    }

    Err(ZuzuRustError::lex(
        "unterminated binary string literal",
        line,
        column,
    ))
}

fn lex_triple_single_quoted_binary(
    chars: &[char],
    start: usize,
    line: usize,
    column: usize,
) -> Result<(Vec<u8>, usize, usize, usize)> {
    let mut value = Vec::new();
    let mut i = start + 3;
    let mut current_line = line;
    let mut current_column = column + 3;

    while i < chars.len() {
        if i + 2 < chars.len() && chars[i] == '\'' && chars[i + 1] == '\'' && chars[i + 2] == '\'' {
            return Ok((value, i + 3, current_line, current_column + 3));
        }
        let ch = chars[i];
        let mut buf = [0u8; 4];
        value.extend_from_slice(ch.encode_utf8(&mut buf).as_bytes());
        i += 1;
        if ch == '\n' {
            current_line += 1;
            current_column = 1;
        } else {
            current_column += 1;
        }
    }

    Err(ZuzuRustError::lex(
        "unterminated triple-quoted binary string literal",
        line,
        column,
    ))
}

fn lex_triple_quoted_string(
    chars: &[char],
    start: usize,
    line: usize,
    column: usize,
) -> Result<(String, usize, usize, usize)> {
    let mut value = String::new();
    let mut i = start + 3;
    let mut current_line = line;
    let mut current_column = column + 3;

    while i < chars.len() {
        if i + 2 < chars.len() && chars[i] == '"' && chars[i + 1] == '"' && chars[i + 2] == '"' {
            return Ok((value, i + 3, current_line, current_column + 3));
        }
        let ch = chars[i];
        value.push(ch);
        i += 1;
        if ch == '\n' {
            current_line += 1;
            current_column = 1;
        } else {
            current_column += 1;
        }
    }

    Err(ZuzuRustError::lex(
        "unterminated triple-quoted string literal",
        line,
        column,
    ))
}

fn lex_template(
    chars: &[char],
    start: usize,
    line: usize,
    column: usize,
) -> Result<(Vec<TemplatePart>, usize, usize, usize)> {
    if start + 2 < chars.len() && chars[start + 1] == '`' && chars[start + 2] == '`' {
        return lex_triple_backtick_template(chars, start, line, column);
    }

    let mut parts = Vec::new();
    let mut text_part = String::new();
    let mut text_line = line;
    let mut i = start + 1;
    let mut current_line = line;
    let mut current_column = column + 1;

    while i < chars.len() {
        let ch = chars[i];
        if ch == '`' {
            if !text_part.is_empty() {
                parts.push(TemplatePart::Text {
                    line: text_line,
                    value: text_part,
                });
            }
            return Ok((parts, i + 1, current_line, current_column + 1));
        }
        if ch == '\\' {
            i += 1;
            current_column += 1;
            let escaped = decode_text_escape(
                chars,
                &mut i,
                &mut current_column,
                current_line,
                true,
                "template literal",
            )?;
            text_part.push(escaped);
            continue;
        }
        if ch == '$' && i + 1 < chars.len() && chars[i + 1] == '{' {
            if !text_part.is_empty() {
                parts.push(TemplatePart::Text {
                    line: text_line,
                    value: std::mem::take(&mut text_part),
                });
            }
            let expr_line = current_line;
            let (expr, end, new_line, new_column) =
                lex_template_interpolation(chars, i + 2, current_line, current_column + 2)?;
            parts.push(TemplatePart::Expr {
                line: expr_line,
                source: expr,
            });
            i = end;
            current_line = new_line;
            current_column = new_column;
            text_line = current_line;
            continue;
        }
        if ch == '\n' {
            return Err(ZuzuRustError::lex(
                "unterminated template literal",
                current_line,
                current_column,
            ));
        }
        text_part.push(ch);
        i += 1;
        current_column += 1;
    }

    Err(ZuzuRustError::lex(
        "unterminated template literal",
        line,
        column,
    ))
}

fn lex_triple_backtick_template(
    chars: &[char],
    start: usize,
    line: usize,
    column: usize,
) -> Result<(Vec<TemplatePart>, usize, usize, usize)> {
    let mut parts = Vec::new();
    let mut text_part = String::new();
    let mut text_line = line;
    let mut i = start + 3;
    let mut current_line = line;
    let mut current_column = column + 3;

    while i < chars.len() {
        if i + 2 < chars.len() && chars[i] == '`' && chars[i + 1] == '`' && chars[i + 2] == '`' {
            if !text_part.is_empty() {
                parts.push(TemplatePart::Text {
                    line: text_line,
                    value: text_part,
                });
            }
            return Ok((parts, i + 3, current_line, current_column + 3));
        }
        if chars[i] == '$' && i + 1 < chars.len() && chars[i + 1] == '{' {
            if !text_part.is_empty() {
                parts.push(TemplatePart::Text {
                    line: text_line,
                    value: std::mem::take(&mut text_part),
                });
            }
            let expr_line = current_line;
            let (expr, end, new_line, new_column) =
                lex_template_interpolation(chars, i + 2, current_line, current_column + 2)?;
            parts.push(TemplatePart::Expr {
                line: expr_line,
                source: expr,
            });
            i = end;
            current_line = new_line;
            current_column = new_column;
            text_line = current_line;
            continue;
        }
        let ch = chars[i];
        if text_part.is_empty() {
            text_line = current_line;
        }
        text_part.push(ch);
        i += 1;
        if ch == '\n' {
            current_line += 1;
            current_column = 1;
        } else {
            current_column += 1;
        }
    }

    Err(ZuzuRustError::lex(
        "unterminated triple-backtick template literal",
        line,
        column,
    ))
}

fn lex_template_interpolation(
    chars: &[char],
    start: usize,
    line: usize,
    column: usize,
) -> Result<(String, usize, usize, usize)> {
    let mut i = start;
    let mut current_line = line;
    let mut current_column = column;
    let mut depth = 1usize;
    let expr_start = start;

    while i < chars.len() {
        let ch = chars[i];
        if ch == '"' || ch == '\'' {
            let (end, new_line, new_column) =
                skip_quoted(chars, i, current_line, current_column, ch)?;
            i = end;
            current_line = new_line;
            current_column = new_column;
            continue;
        }
        if ch == '`' {
            let (_, end, new_line, new_column) =
                lex_template(chars, i, current_line, current_column)?;
            i = end;
            current_line = new_line;
            current_column = new_column;
            continue;
        }
        if ch == '/' && i + 1 < chars.len() && chars[i + 1] == '/' {
            i += 2;
            current_column += 2;
            while i < chars.len() && chars[i] != '\n' {
                i += 1;
                current_column += 1;
            }
            continue;
        }
        if ch == '/' && i + 1 < chars.len() && chars[i + 1] == '*' {
            i += 2;
            current_column += 2;
            while i + 1 < chars.len() {
                if chars[i] == '*' && chars[i + 1] == '/' {
                    i += 2;
                    current_column += 2;
                    break;
                }
                if chars[i] == '\n' {
                    i += 1;
                    current_line += 1;
                    current_column = 1;
                } else {
                    i += 1;
                    current_column += 1;
                }
            }
            continue;
        }
        if ch == '{' {
            depth += 1;
            i += 1;
            current_column += 1;
            continue;
        }
        if ch == '}' {
            depth -= 1;
            if depth == 0 {
                let expr: String = chars[expr_start..i].iter().collect();
                return Ok((expr, i + 1, current_line, current_column + 1));
            }
            i += 1;
            current_column += 1;
            continue;
        }
        if ch == '\n' {
            i += 1;
            current_line += 1;
            current_column = 1;
            continue;
        }
        i += 1;
        current_column += 1;
    }

    Err(ZuzuRustError::lex(
        "unterminated template interpolation",
        line,
        column,
    ))
}

fn skip_quoted(
    chars: &[char],
    start: usize,
    line: usize,
    column: usize,
    quote: char,
) -> Result<(usize, usize, usize)> {
    let mut i = start + 1;
    let mut current_line = line;
    let mut current_column = column + 1;

    while i < chars.len() {
        let ch = chars[i];
        if ch == '\\' {
            i += 1;
            current_column += 1;
            if i < chars.len() {
                if chars[i] == '\n' {
                    current_line += 1;
                    current_column = 1;
                } else {
                    current_column += 1;
                }
                i += 1;
            }
            continue;
        }
        if ch == quote {
            return Ok((i + 1, current_line, current_column + 1));
        }
        if ch == '\n' {
            current_line += 1;
            current_column = 1;
            i += 1;
            continue;
        }
        i += 1;
        current_column += 1;
    }

    Err(ZuzuRustError::lex(
        "unterminated quoted section in template literal",
        line,
        column,
    ))
}

fn lex_number(chars: &[char], start: usize, column: usize) -> (String, usize, usize) {
    // Radix-prefixed integers: 0x… hex, 0b… binary, 0o… octal.
    // Only lowercase prefixes are part of the language; the token value
    // is normalised to decimal because later stages parse it as f64.
    if chars[start] == '0' && start + 2 < chars.len() {
        let (radix, digit_ok): (u32, fn(char) -> bool) = match chars[start + 1] {
            'x' => (16, |ch| ch.is_ascii_hexdigit()),
            'b' => (2, |ch| ch == '0' || ch == '1'),
            'o' => (8, |ch| ('0'..='7').contains(&ch)),
            _ => (0, |_| false),
        };
        if radix != 0 && digit_ok(chars[start + 2]) {
            let mut end = start + 2;
            while end < chars.len() && digit_ok(chars[end]) {
                end += 1;
            }
            let digits: String = chars[start + 2..end].iter().collect();
            let value = u128::from_str_radix(&digits, radix).unwrap_or(0);
            return (value.to_string(), end, column + (end - start));
        }
    }

    let mut end = start + 1;
    let mut end_column = column + 1;
    let mut seen_dot = false;
    while end < chars.len() {
        if chars[end].is_ascii_digit() {
            end += 1;
            end_column += 1;
            continue;
        }
        if !seen_dot
            && chars[end] == '.'
            && end + 1 < chars.len()
            && chars[end + 1].is_ascii_digit()
        {
            seen_dot = true;
            end += 1;
            end_column += 1;
            continue;
        }
        break;
    }
    // Exponent: uppercase E only (lowercase e is not part of the language).
    if end < chars.len() && chars[end] == 'E' {
        let mut exp_end = end + 1;
        if exp_end < chars.len() && (chars[exp_end] == '+' || chars[exp_end] == '-') {
            exp_end += 1;
        }
        if exp_end < chars.len() && chars[exp_end].is_ascii_digit() {
            while exp_end < chars.len() && chars[exp_end].is_ascii_digit() {
                exp_end += 1;
            }
            end_column += exp_end - end;
            end = exp_end;
        }
    }
    (chars[start..end].iter().collect(), end, end_column)
}

fn can_start_regex(tokens: &[Token]) -> bool {
    let Some(token) = tokens.last() else {
        return true;
    };
    match &token.kind {
        TokenKind::Keyword(_) | TokenKind::Operator(_) | TokenKind::Regex { .. } => true,
        TokenKind::Punct(ch) => matches!(ch, '(' | '[' | '{' | ',' | ';'),
        TokenKind::Identifier(_)
        | TokenKind::Number(_)
        | TokenKind::String(_)
        | TokenKind::BinaryString(_)
        | TokenKind::Template(_)
        | TokenKind::Eof => false,
    }
}

fn lex_regex(
    chars: &[char],
    start: usize,
    line: usize,
    column: usize,
) -> Result<(String, Vec<TemplatePart>, String, usize, usize, usize)> {
    let mut value = String::new();
    let mut parts = Vec::new();
    let mut text_part = String::new();
    let mut text_line = line;
    let mut i = start + 1;
    let mut current_line = line;
    let mut current_column = column + 1;
    let mut in_class = false;
    let mut saw_interpolation = false;

    while i < chars.len() {
        let ch = chars[i];
        if ch == '\\' {
            value.push(ch);
            text_part.push(ch);
            i += 1;
            current_column += 1;
            if i >= chars.len() {
                break;
            }
            let escaped = chars[i];
            value.push(escaped);
            text_part.push(escaped);
            if escaped == '\n' {
                current_line += 1;
                current_column = 1;
            } else {
                current_column += 1;
            }
            i += 1;
            continue;
        }
        if ch == '$' && i + 1 < chars.len() && chars[i + 1] == '{' {
            if !text_part.is_empty() {
                parts.push(TemplatePart::Text {
                    line: text_line,
                    value: std::mem::take(&mut text_part),
                });
            }
            let expr_line = current_line;
            let (expr, end, new_line, new_column) =
                lex_template_interpolation(chars, i + 2, current_line, current_column + 2)?;
            parts.push(TemplatePart::Expr {
                line: expr_line,
                source: expr,
            });
            saw_interpolation = true;
            value.push('$');
            value.push('{');
            let expr_source: String = chars[i + 2..end - 1].iter().collect();
            value.push_str(&expr_source);
            value.push('}');
            i = end;
            current_line = new_line;
            current_column = new_column;
            text_line = current_line;
            continue;
        }
        if ch == '[' {
            in_class = true;
        } else if ch == ']' {
            in_class = false;
        } else if ch == '/' && !in_class {
            i += 1;
            current_column += 1;
            let flags_start = i;
            while i < chars.len() && chars[i].is_ascii_alphabetic() {
                i += 1;
                current_column += 1;
            }
            let flags: String = chars[flags_start..i].iter().collect();
            if saw_interpolation && !text_part.is_empty() {
                parts.push(TemplatePart::Text {
                    line: text_line,
                    value: text_part,
                });
            }
            return Ok((value, parts, flags, i, current_line, current_column));
        }
        value.push(ch);
        if text_part.is_empty() {
            text_line = current_line;
        }
        text_part.push(ch);
        if ch == '\n' {
            current_line += 1;
            current_column = 1;
        } else {
            current_column += 1;
        }
        i += 1;
    }

    Err(ZuzuRustError::lex(
        "unterminated regex literal",
        line,
        column,
    ))
}

fn lex_identifier(chars: &[char], start: usize, column: usize) -> (String, usize, usize) {
    let mut end = start + 1;
    let mut end_column = column + 1;
    while end < chars.len() && is_identifier_continue(chars[end]) {
        end += 1;
        end_column += 1;
    }
    (chars[start..end].iter().collect(), end, end_column)
}

fn starts_pod_command(chars: &[char], index: usize) -> bool {
    if index + 1 >= chars.len() {
        return false;
    }
    chars[index + 1].is_ascii_alphabetic()
}

fn skip_pod_block(
    chars: &[char],
    start: usize,
    line: usize,
    column: usize,
) -> (usize, usize, usize) {
    let mut i = start;
    let mut current_line = line;
    let mut current_column = column;

    loop {
        while i < chars.len() && chars[i] != '\n' {
            i += 1;
            current_column += 1;
        }

        let line_text: String = chars[start_of_line(chars, i)..i].iter().collect();
        if line_text.starts_with("=cut") {
            if i < chars.len() && chars[i] == '\n' {
                i += 1;
                current_line += 1;
                current_column = 1;
            }
            break;
        }

        if i >= chars.len() {
            break;
        }

        i += 1;
        current_line += 1;
        current_column = 1;

        if i >= chars.len() {
            break;
        }

        if chars[i] != '=' || !starts_pod_command(chars, i) {
            while i < chars.len() && chars[i] != '\n' {
                i += 1;
                current_column += 1;
            }
            if i < chars.len() && chars[i] == '\n' {
                i += 1;
                current_line += 1;
                current_column = 1;
            }
        }
    }

    (i, current_line, current_column)
}

fn start_of_line(chars: &[char], index: usize) -> usize {
    let mut pos = index;
    while pos > 0 && chars[pos - 1] != '\n' {
        pos -= 1;
    }
    pos
}

fn is_identifier_start(ch: char) -> bool {
    ch == '_' || ch.is_alphabetic()
}

fn is_identifier_continue(ch: char) -> bool {
    ch == '_' || ch.is_alphanumeric()
}