use crate::diag::Diagnostic;
use crate::span::Span;
#[derive(Debug, Clone, PartialEq)]
pub enum TokenKind {
Ident(String),
Int(i64),
Float(f64),
Str(String),
AtIdent(String),
HexColor(String),
Dimension(u32, u32),
Comment(String),
LBrace,
RBrace,
LParen,
RParen,
LBracket,
RBracket,
Colon,
Semi,
Comma,
Dot,
DotDot,
Star,
Plus,
Minus,
Slash,
Percent,
Eq,
Arrow, BidiArrow, BackArrow, Newline,
Eof,
}
impl TokenKind {
pub fn describe(&self) -> String {
match self {
TokenKind::Ident(name) => format!("identifier `{name}`"),
TokenKind::Int(v) => format!("number `{v}`"),
TokenKind::Float(v) => format!("number `{v}`"),
TokenKind::Str(_) => "string literal".into(),
TokenKind::AtIdent(name) => format!("theme token `@{name}`"),
TokenKind::HexColor(c) => format!("color `#{c}`"),
TokenKind::Dimension(a, b) => format!("grid size `{a}x{b}`"),
TokenKind::Comment(_) => "comment".into(),
TokenKind::LBrace => "`{`".into(),
TokenKind::RBrace => "`}`".into(),
TokenKind::LParen => "`(`".into(),
TokenKind::RParen => "`)`".into(),
TokenKind::LBracket => "`[`".into(),
TokenKind::RBracket => "`]`".into(),
TokenKind::Colon => "`:`".into(),
TokenKind::Semi => "`;`".into(),
TokenKind::Comma => "`,`".into(),
TokenKind::Dot => "`.`".into(),
TokenKind::DotDot => "`..`".into(),
TokenKind::Star => "`*`".into(),
TokenKind::Plus => "`+`".into(),
TokenKind::Minus => "`-`".into(),
TokenKind::Slash => "`/`".into(),
TokenKind::Percent => "`%`".into(),
TokenKind::Eq => "`=`".into(),
TokenKind::Arrow => "`->`".into(),
TokenKind::BidiArrow => "`<->`".into(),
TokenKind::BackArrow => "`<-`".into(),
TokenKind::Newline => "end of line".into(),
TokenKind::Eof => "end of file".into(),
}
}
}
#[derive(Debug, Clone)]
pub struct Token {
pub kind: TokenKind,
pub span: Span,
}
pub struct LexOutput {
pub tokens: Vec<Token>,
pub diagnostics: Vec<Diagnostic>,
}
pub fn lex(text: &str) -> LexOutput {
Lexer {
text,
bytes: text.as_bytes(),
pos: 0,
tokens: Vec::new(),
diags: Vec::new(),
}
.run()
}
struct Lexer<'a> {
text: &'a str,
bytes: &'a [u8],
pos: usize,
tokens: Vec<Token>,
diags: Vec<Diagnostic>,
}
impl<'a> Lexer<'a> {
fn run(mut self) -> LexOutput {
while self.pos < self.bytes.len() {
let start = self.pos;
let b = self.bytes[self.pos];
match b {
b' ' | b'\t' | b'\r' => self.pos += 1,
b'\n' => {
self.pos += 1;
self.push(TokenKind::Newline, start);
}
b'/' if self.peek(1) == Some(b'/') => {
let mut end = self.pos;
while end < self.bytes.len() && self.bytes[end] != b'\n' {
end += 1;
}
let content = self.text[self.pos + 2..end].trim().to_string();
self.pos = end;
self.push(TokenKind::Comment(content), start);
}
b'"' => self.lex_string(start),
b'0'..=b'9' => self.lex_number(start),
b'A'..=b'Z' | b'a'..=b'z' | b'_' => self.lex_ident(start),
b'@' => {
self.pos += 1;
if self.cur_is_ident_start() {
let name = self.take_ident_text();
self.push(TokenKind::AtIdent(name), start);
} else {
self.error_char(
start,
"`@` must be followed by a theme token name, like `@accent`",
);
}
}
b'#' => {
self.pos += 1;
let hex_start = self.pos;
while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_hexdigit() {
self.pos += 1;
}
let hex = &self.text[hex_start..self.pos];
if hex.len() == 3 || hex.len() == 6 || hex.len() == 8 {
self.push(TokenKind::HexColor(hex.to_string()), start);
} else {
self.diags.push(
Diagnostic::error("E0105", format!("invalid color literal `#{hex}`"))
.with_label(
Span::new(start, self.pos),
"expected 3, 6, or 8 hex digits",
)
.with_help("write colors as `#rgb`, `#rrggbb`, or `#rrggbbaa`"),
);
}
}
b'{' => self.single(TokenKind::LBrace),
b'}' => self.single(TokenKind::RBrace),
b'(' => self.single(TokenKind::LParen),
b')' => self.single(TokenKind::RParen),
b'[' => self.single(TokenKind::LBracket),
b']' => self.single(TokenKind::RBracket),
b':' => self.single(TokenKind::Colon),
b';' => self.single(TokenKind::Semi),
b',' => self.single(TokenKind::Comma),
b'*' => self.single(TokenKind::Star),
b'+' => self.single(TokenKind::Plus),
b'%' => self.single(TokenKind::Percent),
b'=' => self.single(TokenKind::Eq),
b'/' => self.single(TokenKind::Slash),
b'.' => {
if self.peek(1) == Some(b'.') {
self.pos += 2;
self.push(TokenKind::DotDot, start);
} else {
self.single(TokenKind::Dot);
}
}
b'-' => {
if self.peek(1) == Some(b'>') {
self.pos += 2;
self.push(TokenKind::Arrow, start);
} else {
self.single(TokenKind::Minus);
}
}
b'<' => {
if self.peek(1) == Some(b'-') && self.peek(2) == Some(b'>') {
self.pos += 3;
self.push(TokenKind::BidiArrow, start);
} else if self.peek(1) == Some(b'-') {
self.pos += 2;
self.push(TokenKind::BackArrow, start);
} else {
self.error_char(
start,
"`<` is only used in the `<->` and `<-` edge arrows",
);
}
}
_ => {
let ch_len = self.text[self.pos..]
.chars()
.next()
.map(|c| c.len_utf8())
.unwrap_or(1);
self.pos += ch_len;
let ch = &self.text[start..self.pos];
self.diags.push(
Diagnostic::error("E0101", format!("unexpected character `{ch}`"))
.with_label(
Span::new(start, self.pos),
"not valid drawlang syntax here",
),
);
}
}
}
let end = self.bytes.len();
if !matches!(
self.tokens.last().map(|t| &t.kind),
Some(TokenKind::Newline) | None
) {
self.tokens.push(Token {
kind: TokenKind::Newline,
span: Span::new(end, end),
});
}
self.tokens.push(Token {
kind: TokenKind::Eof,
span: Span::new(end, end),
});
LexOutput {
tokens: self.tokens,
diagnostics: self.diags,
}
}
fn peek(&self, ahead: usize) -> Option<u8> {
self.bytes.get(self.pos + ahead).copied()
}
fn push(&mut self, kind: TokenKind, start: usize) {
self.tokens.push(Token {
kind,
span: Span::new(start, self.pos),
});
}
fn single(&mut self, kind: TokenKind) {
let start = self.pos;
self.pos += 1;
self.push(kind, start);
}
fn error_char(&mut self, start: usize, help: &str) {
let ch = &self.text[start..self.pos.max(start + 1).min(self.text.len())];
self.diags.push(
Diagnostic::error("E0101", format!("unexpected character `{ch}`"))
.with_label(Span::new(start, self.pos.max(start + 1)), "not valid here")
.with_help(help),
);
}
fn cur_is_ident_start(&self) -> bool {
matches!(
self.bytes.get(self.pos),
Some(b'A'..=b'Z' | b'a'..=b'z' | b'_')
)
}
fn take_ident_text(&mut self) -> String {
let start = self.pos;
while matches!(
self.bytes.get(self.pos),
Some(b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'_')
) {
self.pos += 1;
}
self.text[start..self.pos].to_string()
}
fn lex_ident(&mut self, start: usize) {
let name = self.take_ident_text();
self.push(TokenKind::Ident(name), start);
}
fn lex_number(&mut self, start: usize) {
while matches!(self.bytes.get(self.pos), Some(b'0'..=b'9')) {
self.pos += 1;
}
if self.bytes.get(self.pos) == Some(&b'x')
&& matches!(self.bytes.get(self.pos + 1), Some(b'0'..=b'9'))
{
let cols_str = &self.text[start..self.pos];
self.pos += 1; let rows_start = self.pos;
while matches!(self.bytes.get(self.pos), Some(b'0'..=b'9')) {
self.pos += 1;
}
let rows_str = &self.text[rows_start..self.pos];
if self.cur_is_ident_start() {
self.take_ident_text();
self.diags.push(
Diagnostic::error("E0105", "malformed grid size")
.with_label(Span::new(start, self.pos), "expected something like `2x4`"),
);
return;
}
let a: u32 = cols_str.parse().unwrap_or(0);
let b: u32 = rows_str.parse().unwrap_or(0);
self.push(TokenKind::Dimension(a, b), start);
return;
}
if self.bytes.get(self.pos) == Some(&b'.')
&& matches!(self.bytes.get(self.pos + 1), Some(b'0'..=b'9'))
{
self.pos += 1;
while matches!(self.bytes.get(self.pos), Some(b'0'..=b'9')) {
self.pos += 1;
}
let v: f64 = self.text[start..self.pos].parse().unwrap();
self.push(TokenKind::Float(v), start);
return;
}
if self.cur_is_ident_start() {
let unit_start = self.pos;
let unit = self.take_ident_text();
self.diags.push(
Diagnostic::error(
"E0105",
format!("numbers take no unit suffix; found `{unit}`"),
)
.with_label(Span::new(unit_start, self.pos), "remove this suffix")
.with_help("drawlang lengths are always in pixels; write the bare number"),
);
return;
}
let v: i64 = self.text[start..self.pos].parse().unwrap_or(0);
self.push(TokenKind::Int(v), start);
}
fn lex_string(&mut self, start: usize) {
self.pos += 1; let mut value = String::new();
let mut brace_depth = 0usize;
loop {
match self.bytes.get(self.pos) {
None | Some(b'\n') => {
self.diags.push(
Diagnostic::error("E0102", "unterminated string literal")
.with_label(
Span::new(start, self.pos),
"string starts here and never closes",
)
.with_help("add a closing `\"` before the end of the line"),
);
self.push(TokenKind::Str(value), start);
return;
}
Some(b'"') if brace_depth == 0 => {
self.pos += 1;
break;
}
Some(b'\\') => {
self.pos += 1;
match self.bytes.get(self.pos) {
Some(b'n') => value.push('\n'),
Some(b't') => value.push('\t'),
Some(b'"') => value.push('"'),
Some(b'\\') => value.push('\\'),
Some(b'{') => value.push('\u{1}'),
Some(b'}') => value.push('\u{2}'),
other => {
let ch = other.map(|&b| b as char).unwrap_or('?');
self.diags.push(
Diagnostic::error("E0102", format!("unknown escape `\\{ch}`"))
.with_label(
Span::new(self.pos - 1, self.pos + 1),
"not a valid escape sequence",
)
.with_help(r#"valid escapes are \" \\ \n \t \{ \}"#),
);
}
}
self.pos += 1;
}
Some(&b'{') => {
brace_depth += 1;
value.push('{');
self.pos += 1;
}
Some(&b'}') => {
brace_depth = brace_depth.saturating_sub(1);
value.push('}');
self.pos += 1;
}
Some(_) => {
let ch = self.text[self.pos..].chars().next().unwrap();
value.push(ch);
self.pos += ch.len_utf8();
}
}
}
if brace_depth > 0 {
self.diags.push(
Diagnostic::error("E0104", "unbalanced `{` in string interpolation")
.with_label(
Span::new(start, self.pos),
"this string has an unclosed `{`",
)
.with_help(
r#"close the interpolation (`"GPU {i}"`) or escape the brace as `\{`"#,
),
);
}
self.push(TokenKind::Str(value), start);
}
}