use crate::tokens::{LexerError, SpannedToken};
use logos::{Lexer, Logos, internal::LexerInternal};
use microcad_lang_base::Span;
use std::borrow::Cow;
#[derive(Debug, PartialEq, Clone)]
#[allow(missing_docs)]
pub enum LogosToken<'a> {
Normal(NormalToken<'a>),
Error(LexerError),
}
#[derive(Logos, Debug, PartialEq, Clone)]
#[allow(missing_docs)]
#[logos(error(LexerError))]
pub enum NormalToken<'a> {
#[regex(r"[ \t\n\f]", callback = whitespace_callback)]
Whitespace(Cow<'a, str>),
#[regex(r#"\/\/\/[^\n]*"#, allow_greedy = true, callback = token_cow)]
DocComment(Cow<'a, str>),
#[regex(r#"\/\/![^\n]*"#, allow_greedy = true, callback = token_cow)]
InnerDocComment(Cow<'a, str>),
#[regex(r#"\/\/[^\n]*"#, allow_greedy = true, callback = single_line_comment_callback)]
SingleLineComment(Cow<'a, str>),
#[token("/*", callback = multi_line_comment_callback)]
MultiLineComment(Cow<'a, str>),
#[token("mod")]
KeywordMod,
#[token("part")]
KeywordPart,
#[token("sketch")]
KeywordSketch,
#[token("op", priority = 5)]
KeywordOp,
#[token("fn", priority = 5)]
KeywordFn,
#[token("if", priority = 5)]
KeywordIf,
#[token("else")]
KeywordElse,
#[token("use")]
KeywordUse,
#[token("as", priority = 5)]
KeywordAs,
#[token("return")]
KeywordReturn,
#[token("pub")]
KeywordPub,
#[token("const")]
KeywordConst,
#[token("prop")]
KeywordProp,
#[token("init")]
KeywordInit,
#[token("__plugin")]
KeywordPlugin,
#[token("assembly")]
KeywordAssembly,
#[token("material")]
KeywordMaterial,
#[token("unit")]
KeywordUnit,
#[token("enum")]
KeywordEnum,
#[token("struct")]
KeywordStruct,
#[token("match")]
KeywordMatch,
#[token("type")]
KeywordType,
#[token("extern")]
KeywordExtern,
#[regex("(_|[a-zA-Z])[_a-zA-Z0-9']*", callback = token_cow, priority = 4)]
Identifier(Cow<'a, str>),
#[regex(r#"([a-z]+[²³23]?(/[a-z]+[²³23]?)?)|°|%|'"#, callback = token_cow, priority = 3)]
Unit(Cow<'a, str>),
#[regex(r#"(0|[1-9]\d*)"#, callback = token_cow)]
LiteralInt(Cow<'a, str>),
#[regex(r#"(0|[1-9]\d*)\.\."#, callback = token_cow)]
LiteralIntWithRange(Cow<'a, str>),
#[regex(r#"((0|[1-9]\d*)\.(\d+)?((e|E)(-|\+)?(\d+))?)|(-?(0|[1-9]\d*)?\.(\d+)((e|E)(-|\+)?(\d+))?)|(-?(0|[1-9]\d*)\.?((e|E)(-|\+)?(\d+)))"#, callback = token_cow)]
LiteralFloat(Cow<'a, str>),
#[regex(r#"""#, string_token_callback)]
Quote(QuoteVariant<'a>),
#[token("true")]
LiteralBoolTrue,
#[token("false")]
LiteralBoolFalse,
#[token(":")]
SigilColon,
#[token(";")]
SigilSemiColon,
#[token("::")]
SigilDoubleColon,
#[token("(")]
SigilOpenBracket,
#[token(")")]
SigilCloseBracket,
#[token("[")]
SigilOpenSquareBracket,
#[token("]")]
SigilCloseSquareBracket,
#[token("{")]
SigilOpenCurlyBracket,
#[token("}")]
SigilCloseCurlyBracket,
#[token("#")]
SigilHash,
#[token(".")]
SigilDot,
#[token(",")]
SigilComma,
#[token("..")]
SigilDoubleDot,
#[token("@")]
SigilAt,
#[token("->")]
SigilSingleArrow,
#[token("+")]
OperatorAdd,
#[token("-")]
OperatorSubtract,
#[token("*")]
OperatorMultiply,
#[token("/")]
OperatorDivide,
#[token("|")]
OperatorUnion,
#[token("&")]
OperatorIntersect,
#[token("^")]
OperatorPowerXor,
#[token(">")]
OperatorGreaterThan,
#[token("<")]
OperatorLessThan,
#[token(">=")]
OperatorGreaterEqual,
#[token("<=")]
OperatorLessEqual,
#[token("~")]
OperatorNear,
#[token("==")]
OperatorEqual,
#[token("!=")]
OperatorNotEqual,
#[token("and")]
OperatorAnd,
#[token("or", priority = 5)]
OperatorOr,
#[token("xor")]
OperatorXor,
#[token("!")]
OperatorNot,
#[token("=")]
OperatorAssignment,
}
#[derive(Debug, PartialEq, Clone)]
pub enum QuoteVariant<'a> {
String {
span: Span,
contents: Vec<SpannedToken<StringToken<'a>>>,
},
Unit,
}
fn multi_line_comment_callback<'a>(lex: &mut Lexer<'a, NormalToken<'a>>) -> Option<Cow<'a, str>> {
let text = &lex.source()[lex.span().start..];
let end = text.find("*/")?;
let comment = text[2..end].trim_start_matches('*').trim();
lex.bump(end);
Some(comment.into())
}
fn whitespace_callback<'a>(lex: &mut Lexer<'a, NormalToken<'a>>) -> Option<Cow<'a, str>> {
let text = &lex.source()[lex.span().start..];
let end = text
.find(|c: char| !c.is_ascii_whitespace())
.unwrap_or(text.len());
let whitespace = &text[0..end];
lex.bump(end - 1);
Some(whitespace.into())
}
fn single_line_comment_callback<'a>(lex: &mut Lexer<'a, NormalToken<'a>>) -> Option<Cow<'a, str>> {
Some(lex.slice().trim().into())
}
fn string_token_callback<'a>(
lex: &mut Lexer<'a, NormalToken<'a>>,
) -> Result<QuoteVariant<'a>, LexerError> {
let last_byte = lex
.source()
.as_bytes()
.get(lex.span().start.saturating_sub(1))
.copied()
.unwrap_or_default();
if last_byte == b']' || last_byte == b'.' || last_byte.is_ascii_digit() {
return Ok(QuoteVariant::Unit);
}
let span_start = lex.span().start;
let mut string_lexer = lex.clone().morph::<StringToken>();
let mut tokens = Vec::new();
while let Some(token) = string_lexer.next() {
match token {
Ok(StringToken::Quote) => {
*lex = string_lexer.morph();
let span_end = lex.span().end;
return Ok(QuoteVariant::String {
span: span_start..span_end,
contents: tokens,
});
}
Err(e) => {
let start = lex.span().start;
*lex = string_lexer.morph();
return Err(match e {
LexerError::UnclosedStringFormat(span) => {
LexerError::UnclosedStringFormat(start..span.end)
}
e => e,
});
}
Ok(tok) => tokens.push(SpannedToken {
span: string_lexer.span(),
token: tok,
}),
}
}
Err(LexerError::UnclosedString(lex.span()))
}
#[derive(Logos, Debug, PartialEq, Clone)]
#[logos(error(LexerError))]
#[allow(missing_docs)]
pub enum StringToken<'a> {
#[regex(r#"[^"{}\\]+"#, callback = token_cow)]
Content(Cow<'a, str>),
#[regex(r#"\\["\\/bfnrt]"#, callback = escaped_car)]
Escaped(char),
#[token(r#"\"#)]
BackSlash,
#[token(r#"{{"#)]
EscapedCurlyOpen,
#[token(r#"}}"#)]
EscapedCurlyClose,
#[token("{", format_token_callback)]
FormatStart(
(
Vec<SpannedToken<NormalToken<'a>>>,
Vec<SpannedToken<StringFormatToken<'a>>>,
),
),
#[token(r#"""#)]
Quote,
}
pub fn get_literal_string(string_tokens: &[SpannedToken<StringToken>]) -> Option<String> {
let mut result = String::new();
for token in string_tokens {
match &token.token {
StringToken::Content(s) => result.push_str(s.as_ref()),
StringToken::Escaped(c) => result.push(*c),
StringToken::BackSlash => result.push('\\'),
StringToken::EscapedCurlyOpen => result.push('{'),
StringToken::EscapedCurlyClose => result.push('}'),
_ => return None,
}
}
Some(result)
}
#[allow(clippy::type_complexity)]
fn format_token_callback<'a>(
lex: &mut Lexer<'a, StringToken<'a>>,
) -> Result<
(
Vec<SpannedToken<NormalToken<'a>>>,
Vec<SpannedToken<StringFormatToken<'a>>>,
),
LexerError,
> {
let mut expression_lexer = lex.clone().morph::<NormalToken>();
let mut expression_tokens = Vec::new();
let mut format_tokens = Vec::new();
let mut with_format = false;
while let Some(token) = expression_lexer.next() {
match token {
Ok(NormalToken::SigilCloseCurlyBracket) => break,
Ok(NormalToken::SigilColon) => {
with_format = true;
break;
}
Ok(NormalToken::Quote(QuoteVariant::String { contents, .. })) => {
let start = lex.span().start;
let end = contents
.first()
.map(|t| t.span.start)
.unwrap_or_else(|| expression_lexer.span().start);
lex.end(end);
return Err(LexerError::UnclosedStringFormat(start..end));
}
Err(LexerError::UnclosedString(span)) => {
let start = lex.span().start;
let end = span.start + 1;
lex.end(end);
return Err(LexerError::UnclosedStringFormat(start..end));
}
Err(e) => return Err(e),
Ok(token) => expression_tokens.push(SpannedToken {
span: expression_lexer.span(),
token,
}),
}
}
let mut format_lexer = expression_lexer.morph::<StringFormatToken>();
if with_format {
while let Some(token) = format_lexer.next() {
match token {
Ok(StringFormatToken::FormatEnd) => {
break;
}
Err(e) => {
*lex = format_lexer.morph();
return Err(e);
}
Ok(StringFormatToken::StringEnd) => {
let start = lex.span().start;
let end = format_lexer.span().end;
lex.end(end);
return Err(LexerError::UnclosedStringFormat(start..end));
}
Ok(token) => format_tokens.push(SpannedToken {
span: format_lexer.span(),
token,
}),
}
}
}
*lex = format_lexer.morph();
Ok((expression_tokens, format_tokens))
}
#[derive(Logos, Debug, PartialEq, Clone)]
#[logos(error(LexerError))]
#[allow(missing_docs)]
pub enum StringFormatToken<'a> {
#[token("}")]
FormatEnd,
#[regex(r#"\.[\d]+"#, callback = token_cow)]
FormatPrecision(Cow<'a, str>),
#[regex(r#"0[\d]+"#, callback = token_cow)]
FormatWidth(Cow<'a, str>),
#[token("\"")]
StringEnd,
}
fn token_cow<'a, Token: Logos<'a, Source = str>>(lex: &mut Lexer<'a, Token>) -> Cow<'a, str> {
Cow::Borrowed(lex.slice())
}
fn escaped_car<'a, Token: Logos<'a, Source = str>>(lex: &mut Lexer<'a, Token>) -> char {
lex.slice()
.chars()
.nth(1)
.expect("escaped token always has 2 chars")
}