#[derive(Debug, Clone)]
pub struct Token {
pub text: String,
pub line: usize,
pub column: usize,
}
impl Token {
fn new(text: String, line: usize, column: usize) -> Self {
Token { text, line, column }
}
}
impl PartialEq<&str> for Token {
fn eq(&self, other: &&str) -> bool {
self.text == *other
}
}
impl PartialEq<str> for Token {
fn eq(&self, other: &str) -> bool {
self.text == other
}
}
pub(super) fn annotate_error_with_line(msg: String, tok: Option<&Token>) -> String {
if msg.starts_with("at line ") {
return msg;
}
let line = tok.map(|t| t.line).unwrap_or(0);
format!("at line {}: {}", line + 1, msg)
}
pub(super) fn is_float_literal(token: &str) -> bool {
let s = token.strip_prefix('-').unwrap_or(token);
if s.is_empty() {
return false;
}
s.contains('.') || s.contains('e') || s.contains('E')
}
pub(super) fn unescape_string(s: &str) -> Result<String, String> {
let mut result = String::new();
let mut chars = s.chars();
while let Some(ch) = chars.next() {
if ch == '\\' {
match chars.next() {
Some('"') => result.push('"'),
Some('\\') => result.push('\\'),
Some('n') => result.push('\n'),
Some('r') => result.push('\r'),
Some('t') => result.push('\t'),
Some('x') => {
let hex1 = chars.next().ok_or_else(|| {
"Incomplete hex escape sequence '\\x' - expected 2 hex digits".to_string()
})?;
let hex2 = chars.next().ok_or_else(|| {
format!(
"Incomplete hex escape sequence '\\x{}' - expected 2 hex digits",
hex1
)
})?;
let hex_str: String = [hex1, hex2].iter().collect();
let byte_val = u8::from_str_radix(&hex_str, 16).map_err(|_| {
format!(
"Invalid hex escape sequence '\\x{}' - expected 2 hex digits (00-FF)",
hex_str
)
})?;
result.push(byte_val as char);
}
Some(c) => {
return Err(format!(
"Unknown escape sequence '\\{}' in string literal. \
Supported: \\\" \\\\ \\n \\r \\t \\xNN",
c
));
}
None => {
return Err("String ends with incomplete escape sequence '\\'".to_string());
}
}
} else {
result.push(ch);
}
}
Ok(result)
}
pub(super) fn tokenize(source: &str) -> Vec<Token> {
let mut tokens = Vec::new();
let mut current = String::new();
let mut current_start_line = 0;
let mut current_start_col = 0;
let mut in_string = false;
let mut prev_was_backslash = false;
let mut line = 0;
let mut col = 0;
for ch in source.chars() {
if in_string {
current.push(ch);
if ch == '"' && !prev_was_backslash {
in_string = false;
tokens.push(Token::new(
current.clone(),
current_start_line,
current_start_col,
));
current.clear();
prev_was_backslash = false;
} else if ch == '\\' && !prev_was_backslash {
prev_was_backslash = true;
} else {
prev_was_backslash = false;
}
if ch == '\n' {
line += 1;
col = 0;
} else {
col += 1;
}
} else if ch == '"' {
if !current.is_empty() {
tokens.push(Token::new(
current.clone(),
current_start_line,
current_start_col,
));
current.clear();
}
in_string = true;
current_start_line = line;
current_start_col = col;
current.push(ch);
prev_was_backslash = false;
col += 1;
} else if ch.is_whitespace() {
if !current.is_empty() {
tokens.push(Token::new(
current.clone(),
current_start_line,
current_start_col,
));
current.clear();
}
if ch == '\n' {
tokens.push(Token::new("\n".to_string(), line, col));
line += 1;
col = 0;
} else {
col += 1;
}
} else if "():;[]{},".contains(ch) {
if !current.is_empty() {
tokens.push(Token::new(
current.clone(),
current_start_line,
current_start_col,
));
current.clear();
}
tokens.push(Token::new(ch.to_string(), line, col));
col += 1;
} else {
if current.is_empty() {
current_start_line = line;
current_start_col = col;
}
current.push(ch);
col += 1;
}
}
if in_string {
tokens.push(Token::new(
"<<<UNCLOSED_STRING>>>".to_string(),
current_start_line,
current_start_col,
));
} else if !current.is_empty() {
tokens.push(Token::new(current, current_start_line, current_start_col));
}
tokens
}